pax_global_header00006660000000000000000000000064141134006350014507gustar00rootroot0000000000000052 comment=3ee3b6e8770cd31305b120d3aabbb273c9ec184a relion-3.1.3/000077500000000000000000000000001411340063500130035ustar00rootroot00000000000000relion-3.1.3/.github/000077500000000000000000000000001411340063500143435ustar00rootroot00000000000000relion-3.1.3/.github/ISSUE_TEMPLATE/000077500000000000000000000000001411340063500165265ustar00rootroot00000000000000relion-3.1.3/.github/ISSUE_TEMPLATE/bug_report.md000066400000000000000000000063301411340063500212220ustar00rootroot00000000000000--- name: Bug report about: Report a problem title: '' labels: '' assignees: '' --- This is a template for reporting bugs. Please fill in as much information as you can. **Describe your problem** Please write a clear description of what the problem is. Data processing questions should be posted to [the CCPEM mailing list](https://www.jiscmail.ac.uk/cgi-bin/webadmin?A0=CCPEM), not here. **DO NOT** cross post a same question to multiple issues and/or many mailing lists (CCPEM, 3DEM, etc). **Environment:** - OS: [e.g. Ubuntu 16.04 LTS] - MPI runtime: [e.g. OpenMPI 2.0.1] - RELION version [e.g. RELION-3.1-devel-commit-6ba935 (please see the title bar of the GUI)] - Memory: [e.g. 128 GB] - GPU: [e.g. GTX 1080Ti] **Dataset:** - Box size: [e.g. 256 px] - Pixel size: [e.g. 0.9 Å/px] - Number of particles: [e.g. 150,000] - Description: [e.g. A tetrameric protein of about 400 kDa in total] **Job options:** - Type of job: [e.g. Refine3D] - Number of MPI processes: [e.g. 4] - Number of threads: [e.g. 6] - Full command (see `note.txt` in the job directory): ``` `which relion_refine_mpi` --o Refine3D/job019/run --auto_refine --split_random_halves --i CtfRefine/job018/particles_ctf_refine.star --ref PostProcess/job001/postprocess.mrc --firstiter_cc --ini_high 12 --dont_combine_weights_via_disc --scratch_dir /ssd --pool 3 --pad 2 --ctf --ctf_corrected_ref --particle_diameter 142 --flatten_solvent --zero_mask --solvent_mask Result-by-Rado/run_class001_mask_th0.01_ns3_ngs7_box400.mrc --solvent_correct_fsc --oversampling 1 --healpix_order 3 --auto_local_healpix_order 4 --offset_range 5 --offset_step 2 --sym O --low_resol_join_halves 40 --norm --scale --j 8 --gpu "" --keep_scratch --pipeline_control Refine3D/job019/ ``` **Error message:** Please cite the *full* error message as the example below. ``` A line in the STAR file contains fewer columns than the number of labels. Expected = 3 Found = 2 Error in line: 0 0.0 in: /prog/relion-devel-lmb/src/metadata_table.cpp, line 966 === Backtrace === /prog/relion-devel-lmb/bin/relion_motion_refine_mpi(_ZN11RelionErrorC1ERKSsS1_l+0x41) [0x42e981] /prog/relion-devel-lmb/bin/relion_motion_refine_mpi(_ZN13MetaDataTable12readStarLoopERSt14basic_ifstreamIcSt11char_traitsIcEEPSt6vectorI8EMDLabelSaIS6_EESsb+0xedd) [0x4361ad] /prog/relion-devel-lmb/bin/relion_motion_refine_mpi(_ZN13MetaDataTable8readStarERSt14basic_ifstreamIcSt11char_traitsIcEERKSsPSt6vectorI8EMDLabelSaIS8_EESsb+0x580) [0x436f10] /prog/relion-devel-lmb/bin/relion_motion_refine_mpi(_ZN10Micrograph4readE8FileNameb+0x5a3) [0x454bb3] /prog/relion-devel-lmb/bin/relion_motion_refine_mpi(_ZN10MicrographC2E8FileNameS0_d+0x2e3) [0x4568b3] /prog/relion-devel-lmb/bin/relion_motion_refine_mpi(_ZN17MicrographHandler14isMoviePresentERK13MetaDataTableb+0x180) [0x568280] /prog/relion-devel-lmb/bin/relion_motion_refine_mpi(_ZN17MicrographHandler17cullMissingMoviesERKSt6vectorI13MetaDataTableSaIS1_EEi+0xe6) [0x568dc6] /prog/relion-devel-lmb/bin/relion_motion_refine_mpi(_ZN13MotionRefiner4initEv+0x56f) [0x49e1ff] /prog/relion-devel-lmb/bin/relion_motion_refine_mpi(main+0x31) [0x42a5e1] /lib64/libc.so.6(__libc_start_main+0xf5) [0x2b7ac026e495] /prog/relion-devel-lmb/bin/relion_motion_refine_mpi() [0x42b3cf] ================== ``` relion-3.1.3/.gitignore000066400000000000000000000001611411340063500147710ustar00rootroot00000000000000build build-* cmake-build-debug cmake-build-release external .idea #Eclipse files .cproject .project Debug *.pyc relion-3.1.3/AUTHORS000066400000000000000000000026311411340063500140550ustar00rootroot00000000000000This program is developed in the group of Sjors H.W. Scheres at the MRC Laboratory of Molecular Biology, - Sjors H.W. Scheres - Shaoda He - Takanori Nakane - Jasenko Zivanov - Liyi Dong and Erik Lindahl at Stockholm University. - Erik Lindahl - Björn O. Forsberg - Dari Kimanius However, it does also contain pieces of code from the following packages: - XMIPP by COS Sorzano et al: http:/xmipp.cnb.csic.es - BSOFT by Heymann JB et al: http://lsbr.niams.nih.gov/bsoft/ - HEALPIX by NASA Jet Propulsion Laboratory: http://healpix.jpl.nasa.gov/ - libLBFGS by Naoaki Okazaki: http://www.chokkan.org/software/liblbfgs/ - Numerical diagonalization of 3x3 matrices by Joachim Kopp: https://www.mpi-hd.mpg.de/personalhomes/globes/3x3/index.html - Gravis by Graphics and Vision Research Group at University of Basel: https://gravis.dmi.unibas.ch/index.html - CPlot2D by Attila Michael Zsaki: http://www.amzsaki.com/?page_id=2073 - cmake-modules by Ryan A. Pavlik: https://github.com/rpavlik/cmake-modules - Many contributions (especially RelionIt GUI) from Colin Palmer - Symmetry relaxation patch by Vahid Abrishami Original disclaimers in the code of these external packages have been maintained as much as possible. Please contact Sjors Scheres (scheres@mrc-lmb.cam.ac.uk) if you feel this has not been done correctly. We also thank those who reported bugs and suggested fixes. They are acknowledged in the Git commit messages. relion-3.1.3/CMakeLists.txt000066400000000000000000000404511411340063500155470ustar00rootroot00000000000000cmake_minimum_required(VERSION 2.8 FATAL_ERROR) #if(POLICY CMP0048) # cmake_policy(SET CMP0048 NEW) #endif() project(Relion) # Use new policy for OS X @rpath if(POLICY CMP0042) cmake_policy(SET CMP0042 NEW) endif() # Add the path to the additional Find.cmake files # which are included with the distributed RLEION-code list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake) add_definitions(-DINSTALL_LIBRARY_DIR=${CMAKE_INSTALL_PREFIX}/lib/) add_definitions(-DSOURCE_DIR=${CMAKE_SOURCE_DIR}/src/) if(MDT_TYPE_CHECK) message(" DEVELOPER MODE: MetaDataTable type check is enabled.") add_definitions(-DMETADATA_TABLE_TYPE_CHECK) endif() # message(STATUS "INSTALL_LIBRARY_DIR set to ${CMAKE_INSTALL_PREFIX}/lib/") # message(STATUS "SOURCE_DIR set to ${CMAKE_SOURCE_DIR}/src/") # ------------------------------------------------------------------RPATH SETTINGS-- if(NOT APPLE) # use, i.e. don't skip the full RPATH for the build tree SET(CMAKE_SKIP_BUILD_RPATH FALSE) # when building, don't use the install RPATH already # (but later on when installing) SET(CMAKE_BUILD_WITH_INSTALL_RPATH FALSE) SET(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/lib") # add the automatically determined parts of the RPATH # which point to directories outside the build tree to the install RPATH SET(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE) # the RPATH to be used when installing, but only if it's not a system directory LIST(FIND CMAKE_PLATFORM_IMPLICIT_LINK_DIRECTORIES "${CMAKE_INSTALL_PREFIX}/lib" isSystemDir) IF("${isSystemDir}" STREQUAL "-1") SET(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/lib") ENDIF("${isSystemDir}" STREQUAL "-1") endif(NOT APPLE) # ---------------------------------------------------------SET SPECIFIC BUILD TYPE-- if(NOT ${CMAKE_BUILD_TYPE} STREQUAL "") string( TOLOWER "${CMAKE_BUILD_TYPE}" CMAKE_BUILD_TYPE_LOWER ) if( ( NOT ${CMAKE_BUILD_TYPE_LOWER} STREQUAL "none" ) AND ( NOT ${CMAKE_BUILD_TYPE_LOWER} STREQUAL "release" ) AND ( NOT ${CMAKE_BUILD_TYPE_LOWER} STREQUAL "debug" ) AND ( NOT ${CMAKE_BUILD_TYPE_LOWER} STREQUAL "relwithdebinfo" ) AND ( NOT ${CMAKE_BUILD_TYPE_LOWER} STREQUAL "profiling" ) AND ( NOT ${CMAKE_BUILD_TYPE_LOWER} STREQUAL "benchmarking" ) ) message( FATAL_ERROR "CMAKE_BUILD_TYPE : '${CMAKE_BUILD_TYPE}' is not a valid build type. " "Valid options are: 'None', 'Release', 'Debug', 'RelWithDebInfo', and 'Profiling'." ) endif() message(STATUS "BUILD TYPE set to '${CMAKE_BUILD_TYPE}'") SET(CMAKE_BUILD_TYPE ${CMAKE_BUILD_TYPE} CACHE STRING "Choose the type of build, options are: 'None', 'Release', 'Debug', 'RelWithDebInfo', and 'Profiling'.") else() SET(CMAKE_BUILD_TYPE "Release") message(STATUS "BUILD TYPE set to the default type: '${CMAKE_BUILD_TYPE}'") string( TOLOWER "${CMAKE_BUILD_TYPE}" CMAKE_BUILD_TYPE_LOWER ) endif() # ------------------OPTIONS WHICH ARE NEEDED TO SET BUILD-TYPES (COMPILATION FLAGS)-- # ------------------------------------------------------------------------CUDA-ARCH-- if(NOT DEFINED CUDA_ARCH) message(STATUS "Setting fallback CUDA_ARCH=35") set(CUDARCH "-arch=sm_35") else(NOT DEFINED CUDA_ARCH) message(STATUS "Using provided CUDA_ARCH=${CUDA_ARCH}") set(CUDARCH "-arch=sm_${CUDA_ARCH}") endif(NOT DEFINED CUDA_ARCH) # -------------------------------------------------------------------FURTHER OPTIONS-- # CUDA on by default, so check for CPU-accelration request and possible conflicting dual-request option(ALTCPU "Enable Accelerated CPU version" OFF) if(ALTCPU) option(CUDA "Enable CUDA GPU acceleration" OFF) if(ALTCPU AND CUDA) message(FATAL_ERROR "You cannot build with both CUDA=ON and ALTCPU=ON. Please choose one and rerun CMAKE") endif() else() option(CUDA "Enable CUDA GPU acceleration" ON) endif() option(DoublePrec_CPU "DoublePrec_CPU" ON) option(DoublePrec_ACC "Accelerated Code use double-precision" OFF) option(MKLFFT "Use MKL rather than FFTW for FFT" OFF) option(CudaTexture "CudaTexture" ON) option(ALLOW_CTF_IN_SAGD "Allow CTF-modulation in SAGD, as specified in Claim 1 of patent US10,282,513B2" ON) if(ALLOW_CTF_IN_SAGD) message(STATUS "ALLOW_CTF_IN_SAGD enabled - This build of RELION allows modulation of particle images by a contrast transfer function inside stochastic average gradient descent, as specified in Claim 1 of patent US10,282,513B2") else() message(STATUS "ALLOW_CTF_IN_SAGD disabled - This build of RELION does not allow modulation of particle images by a contrast transfer function inside stochastic average gradient descent, as specified in Claim 1 of patent US10,282,513B2") endif() if(ALTCPU) message(STATUS "ALTCPU enabled - Building CPU-accelerated version of RELION") endif() if(CUDA) message(STATUS "CUDA enabled - Building CUDA-accelerated version of RELION") endif() if(CUDA OR ALTCPU) add_definitions(-DACC_CUDA=2 -DACC_CPU=1) endif() # -----------------------------------------------DOUBLE PRECISION (CUDA-CODE) OR NOT-- if(DoublePrec_CPU) message(STATUS "Setting cpu precision to double") else(DoublePrec_CPU) message(STATUS "Setting cpu precision to single") add_definitions(-DRELION_SINGLE_PRECISION) endif(DoublePrec_CPU) if(DoublePrec_ACC) message(STATUS "Setting accelerated code precision to double") add_definitions(-DACC_DOUBLE_PRECISION) set(CudaTexture FALSE) else(DoublePrec_ACC) message(STATUS "Setting accelerated code precision to single") endif(DoublePrec_ACC) # ----------------------------------------------------------INCLUDE ALL BUILD TYPES-- #This *has* to be AFTER project() include(${CMAKE_SOURCE_DIR}/cmake/BuildTypes.cmake) if(CUDA) # -----------------------------------------------------------------------------CUDA-- # DOC: http://www.cmake.org/cmake/help/v3.0/module/FindCUDA.html FIND_PACKAGE(CUDA) endif() if(CUDA_FOUND) message(STATUS "Using cuda wrapper to compile....") if( (NOT ${CUDA_VERSION} VERSION_LESS "7.5") AND (NOT DoublePrec_ACC) ) message(STATUS "Cuda version is >= 7.5 and single-precision build, enable double usage warning.") set(WARN_DBL "--ptxas-options=-warn-double-usage") # cuda>=7.5 elseif( ${CUDA_VERSION} VERSION_LESS "7.0") message(WARNING "Cuda version is less than 7.0, so relion will be compiled without GPU support.") set(CUDA OFF) endif() if(CUDA) add_definitions(-DCUDA) endif() else(CUDA_FOUND) message(STATUS "Using non-cuda compilation....") endif(CUDA_FOUND) # ------------------------------------------------------------------ALLOCATOR CHOICE-- option(CachedAlloc "CachedAlloc" ON) if(NOT CachedAlloc) add_definitions(-DCUDA_NO_CUSTOM_ALLOCATION) message(STATUS "Cached allocation is disabled.") endif(NOT CachedAlloc) option(CustomAllocMemGuards "CustomAllocMemGuards" OFF) if(CustomAllocMemGuards) add_definitions(-DCUSTOM_ALLOCATOR_MEMGUARD) message(STATUS "Abort on out of bound write.") endif(CustomAllocMemGuards) # -------------------------------------------------------------FORCE USE OF STL-LIBS-- option(CudaForceSTL "CudaForceSTL" OFF) if(CudaForceSTL) add_definitions(-DCUDA_FORCESTL) message(STATUS "Building cuda files wusing stl-libs for sort, min and max.") endif(CudaForceSTL) # ------------------------------------------------------------------------GUI OR NOT-- # Skip FLTK/X11-dependent binaries or not option(GUI "GUI" ON) if(NOT GUI) message(STATUS "Omitting GUI targets as per your request") endif() # ---------------------------------------------------------------------------TBB -- option(FORCE_OWN_TBB "FORCE_OWN_TBB" OFF) if (ALTCPU) if (FORCE_OWN_TBB) message(STATUS "Will ignore any potentially installed system TBB lib, as per your request.") include(${CMAKE_SOURCE_DIR}/cmake/BuildTBB.cmake) set(INSTALL_OWN_TBB 1) else(FORCE_OWN_TBB) find_package(TBB) if(TBB_FOUND) include_directories("${TBB_INCLUDE_DIRS}") message(STATUS "TBB_FOUND : ${TBB_FOUND}") message(STATUS "TBB_INCLUDE_DIRS : ${TBB_INCLUDE_DIRS}") message(STATUS "TBB_VERSION : ${TBB_VERSION}") message(STATUS "TBB_LIBRARIES : ${TBB_LIBRARIES}") else(TBB_FOUND) include(${CMAKE_SOURCE_DIR}/cmake/BuildTBB.cmake) set(INSTALL_OWN_TBB 1) endif(TBB_FOUND) endif(FORCE_OWN_TBB) endif(ALTCPU) # -------------------------------------------------------------------------------MPI-- find_package(MPI REQUIRED) if ("${MPI_CXX_INCLUDE_DIRS}" STREQUAL "") include_directories("${MPI_CXX_INCLUDE_PATH}") else() include_directories("${MPI_CXX_INCLUDE_DIRS}") endif() message(STATUS "MPI_INCLUDE_PATH : ${MPI_INCLUDE_PATH}") message(STATUS "MPI_LIBRARIES : ${MPI_LIBRARIES}") message(STATUS "MPI_CXX_INCLUDE_PATH : ${MPI_CXX_INCLUDE_PATH}") message(STATUS "MPI_CXX_LIBRARIES : ${MPI_CXX_LIBRARIES}") message(STATUS "CMAKE_C_COMPILER : ${CMAKE_C_COMPILER}") message(STATUS "CMAKE_CXX_COMPILER : ${CMAKE_CXX_COMPILER}") message(STATUS "MPI_C_COMPILER : ${MPI_C_COMPILER}") message(STATUS "MPI_CXX_COMPILER : ${MPI_CXX_COMPILER}") SET(CMAKE_C_COMPILER ${MPI_C_COMPILER}) SET(CMAKE_CXX_COMPILER ${MPI_CXX_COMPILER}) set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib) set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib) set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin) # ----------------------------------------------------------Intel Compiler support -- # ---------------------------------- and build flags including MKL and TBB --------- message(STATUS "CMAKE_CXX_COMPILER_ID : ${CMAKE_CXX_COMPILER_ID}") if(MKLFFT) if (NOT "$ENV{MKLROOT}" STREQUAL "") include_directories("$ENV{MKLROOT}/include/fftw") message(STATUS "MKL FFTW wrapper header files: $ENV{MKLROOT}/include/fftw") else() message("COMPILATION MAY FAIL since no MKL FFTW wrapper header files could be found. Please make sure the MKLROOT environmental variable is set.") endif() add_definitions(-DMKLFFT) endif(MKLFFT) if("${CMAKE_CXX_COMPILER_ID}" MATCHES "Intel" OR "${CMAKE_CXX_COMPILER}" MATCHES "icpx") # "Intel" is for classic Intel compiler and "IntelLLVM" is for oneAPI compiler which is supported from CMake 3.20 if(MKLFFT) if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Intel") SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -qopenmp -mkl=parallel -limf ") else() # Intel oneAPI compiler SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fiopenmp -mkl=parallel -limf ") endif() endif(MKLFFT) if(ALTCPU) add_definitions(-DFAST_CENTERFFT) endif() else() if(MKLFFT) # For the time being, let's use the sequential version (as with FFTW) link_directories("$ENV{MKLROOT}/lib/intel64") SET(FFTW_LIBRARIES mkl_intel_lp64 mkl_sequential mkl_core) endif(MKLFFT) endif() if(ALTCPU) add_definitions(-DALTCPU) endif() if(ALLOW_CTF_IN_SAGD) add_definitions(-DALLOW_CTF_IN_SGD) endif() # ---------------------------------------------------------------USE TEXTURES OR NOT-- if(NOT CudaTexture OR ALTCPU) add_definitions(-DPROJECTOR_NO_TEXTURES) message(STATUS "Texture interpolation is omitted.") endif(NOT CudaTexture OR ALTCPU) # --------------------------------------------------------------------------X11/FLTK-- option(FORCE_OWN_FLTK "FORCE_OWN_FLTK" OFF) FIND_PACKAGE(X11) if(GUI) if(X11_FOUND) set(FLTK_SKIP_OPENGL TRUE) #OpenGL is not required for relion if(NOT FORCE_OWN_FLTK) FIND_PACKAGE(FLTK) if(FLTK_FOUND) message(STATUS "X11 and FLTK were found") message(STATUS "FLTK_LIBRARIES: ${FLTK_LIBRARIES}") else() message(STATUS "No FLTK installation was found") endif() endif(NOT FORCE_OWN_FLTK) if(NOT FLTK_FOUND) include(${CMAKE_SOURCE_DIR}/cmake/BuildFLTK.cmake) set(INSTALL_OWN_FLTK 1) endif(NOT FLTK_FOUND) else(X11_FOUND) message( STATUS "\n-- ------------------ YOU HAVE NO X11-LIBS ------------------") message( STATUS "CCmake found no X11-libs on your system, which are required for the GUI.") message( STATUS " You CAN add the flag -DGUI=OFF to avoid using X11" ) message(FATAL_ERROR "X11 is required for GUI.") endif(X11_FOUND) endif(GUI) # -------------------------------------------------------------------------------FFT-- if(NOT MKLFFT) option(FORCE_OWN_FFTW "FORCE_OWN_FFTW" OFF) option(AMDFFTW "Use AMD optimized version of FFTW. This needs a new version of GCC (>= 8.3 recommended)." OFF) set(FFTW_DOUBLE_REQUIRED TRUE) set(FFTW_SINGLE_REQUIRED TRUE) if(AMDFFTW) set(FORCE_OWN_FFTW ON) endif() if(NOT FORCE_OWN_FFTW) FIND_PACKAGE(FFTW COMPONENTS SINGLE DOUBLE) endif(NOT FORCE_OWN_FFTW) if(NOT FFTW_FOUND) include(${CMAKE_SOURCE_DIR}/cmake/BuildFFTW.cmake) endif(NOT FFTW_FOUND) endif(NOT MKLFFT) # ---------------------------------------------------------------------------SIN/COS-- include(CheckCXXSymbolExists) check_cxx_symbol_exists(sincos math.h HAVE_SINCOS) check_cxx_symbol_exists(__sincos math.h HAVE___SINCOS) if(HAVE_SINCOS) add_definitions(-DHAVE_SINCOS) endif() if(HAVE___SINCOS) add_definitions(-DHAVE___SINCOS) endif() # ------------------------------------------------------------------------------TIFF-- find_package(TIFF REQUIRED) if(TIFF_FOUND) add_definitions(-DHAVE_TIFF) endif() find_package(ZLIB) find_package(PNG) if(PNG_FOUND) add_definitions(-DHAVE_PNG) endif() # ----------------------------------------------------------------------COPY SCRIPTS-- if(FORCE_OWN_FFTW) install(DIRECTORY external/fftw/lib/ DESTINATION lib FILES_MATCHING PATTERN "*") endif() list(APPEND RELION_SCRIPT_FILES star_printtable star_plottable star_loopheader star_datablock_stack star_datablock_singlefiles star_datablock_ctfdat qsub.csh) add_custom_target(copy_scripts ALL) foreach (SCRIPT_FILE ${RELION_SCRIPT_FILES}) add_custom_command(TARGET copy_scripts POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_SOURCE_DIR}/scripts/${SCRIPT_FILE} ${CMAKE_BINARY_DIR}/bin/relion_${SCRIPT_FILE} ) endforeach() install( DIRECTORY ${CMAKE_BINARY_DIR}/bin DESTINATION ${CMAKE_INSTALL_PREFIX} USE_SOURCE_PERMISSIONS FILES_MATCHING PATTERN "*") # install fltk if we built our own version if(INSTALL_OWN_FLTK) install(DIRECTORY external/fltk/lib/ DESTINATION lib FILES_MATCHING PATTERN "*") endif() # -----------------------------------------------------------------RELION COMPONENTS-- option(BUILD_SHARED_LIBS "BUILD_SHARED_LIBS" OFF) message("BUILD_SHARED_LIBS = ${BUILD_SHARED_LIBS}") if(BUILD_SHARED_LIBS) message(STATUS "Building shared libs (smaller build size and binaries)") else() message(STATUS "Building static libs (larger build size and binaries)") endif() ADD_SUBDIRECTORY(src/apps) #message(STATUS "CUDA option = ${CUDA}") #message(STATUS "ALTCPU option = ${ALTCPU}") #message(STATUS "ALLOW_CTF_IN_SAGD option = ${ALLOW_CTF_IN_SAGD}") #message(STATUS "DoublePrec_CPU option = ${DoublePrec_CPU}") #message(STATUS "DoublePrec_ACC option = ${DoublePrec_ACC}") #message(STATUS "MKLFFT option = ${MKLFFT}") #message(STATUS "CudaTexture option = ${CudaTexture}") #get_directory_property( DirDefs COMPILE_DEFINITIONS ) #message(STATUS "COMPILE_DEFINITIONS = ${DirDefs}" ) #message(STATUS "CMAKE_C_FLAGS : ${CMAKE_C_FLAGS}") #message(STATUS "CMAKE_CXX_FLAGS : ${CMAKE_CXX_FLAGS}") #message(STATUS "CMAKE_C_COMPILER : ${CMAKE_C_COMPILER}") #message(STATUS "CMAKE_CXX_COMPILER : ${CMAKE_CXX_COMPILER}") #message(STATUS "MPI_C_COMPILER : ${MPI_C_COMPILER}") #message(STATUS "MPI_CXX_COMPILER : ${MPI_CXX_COMPILER}") #message(STATUS "CMAKE_EXE_LINKER_FLAGS : ${CMAKE_EXE_LINKER_FLAGS}") # -----------------------------------------------------------------------------TESTS-- # Include testing flag(s) as precomiler # definitions and include test directives #enable_testing() #include(${CMAKE_SOURCE_DIR}/tests/RelionTests.cmake) option(BUILD_TESTS "Build and configure tests" OFF) if(BUILD_TESTS) include(CTest) add_subdirectory(tests) endif() # ----------------------------------------------------------PRINT OUT ALL CMAKE VARS-- #get_cmake_property(_variableNames VARIABLES) #foreach (_variableName ${_variableNames}) # message(STATUS "${_variableName}=${${_variableName}}") #endforeach() relion-3.1.3/COPYING000066400000000000000000000431221411340063500140400ustar00rootroot00000000000000 GNU GENERAL PUBLIC LICENSE Version 2, June 1991 Copyright (C) 1989, 1991 Free Software Foundation, Inc. 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. Preamble The licenses for most software are designed to take away your freedom to share and change it. By contrast, the GNU General Public License is intended to guarantee your freedom to share and change free software--to make sure the software is free for all its users. This General Public License applies to most of the Free Software Foundation's software and to any other program whose authors commit to using it. (Some other Free Software Foundation software is covered by the GNU Library General Public License instead.) You can apply it to your programs, too. When we speak of free software, we are referring to freedom, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for this service if you wish), that you receive source code or can get it if you want it, that you can change the software or use pieces of it in new free programs; and that you know you can do these things. To protect your rights, we need to make restrictions that forbid anyone to deny you these rights or to ask you to surrender the rights. These restrictions translate to certain responsibilities for you if you distribute copies of the software, or if you modify it. For example, if you distribute copies of such a program, whether gratis or for a fee, you must give the recipients all the rights that you have. You must make sure that they, too, receive or can get the source code. And you must show them these terms so they know their rights. We protect your rights with two steps: (1) copyright the software, and (2) offer you this license which gives you legal permission to copy, distribute and/or modify the software. Also, for each author's protection and ours, we want to make certain that everyone understands that there is no warranty for this free software. If the software is modified by someone else and passed on, we want its recipients to know that what they have is not the original, so that any problems introduced by others will not reflect on the original authors' reputations. Finally, any free program is threatened constantly by software patents. We wish to avoid the danger that redistributors of a free program will individually obtain patent licenses, in effect making the program proprietary. To prevent this, we have made it clear that any patent must be licensed for everyone's free use or not licensed at all. The precise terms and conditions for copying, distribution and modification follow. GNU GENERAL PUBLIC LICENSE TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 0. This License applies to any program or other work which contains a notice placed by the copyright holder saying it may be distributed under the terms of this General Public License. The "Program", below, refers to any such program or work, and a "work based on the Program" means either the Program or any derivative work under copyright law: that is to say, a work containing the Program or a portion of it, either verbatim or with modifications and/or translated into another language. (Hereinafter, translation is included without limitation in the term "modification".) Each licensee is addressed as "you". Activities other than copying, distribution and modification are not covered by this License; they are outside its scope. The act of running the Program is not restricted, and the output from the Program is covered only if its contents constitute a work based on the Program (independent of having been made by running the Program). Whether that is true depends on what the Program does. 1. You may copy and distribute verbatim copies of the Program's source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice and disclaimer of warranty; keep intact all the notices that refer to this License and to the absence of any warranty; and give any other recipients of the Program a copy of this License along with the Program. You may charge a fee for the physical act of transferring a copy, and you may at your option offer warranty protection in exchange for a fee. 2. You may modify your copy or copies of the Program or any portion of it, thus forming a work based on the Program, and copy and distribute such modifications or work under the terms of Section 1 above, provided that you also meet all of these conditions: a) You must cause the modified files to carry prominent notices stating that you changed the files and the date of any change. b) You must cause any work that you distribute or publish, that in whole or in part contains or is derived from the Program or any part thereof, to be licensed as a whole at no charge to all third parties under the terms of this License. c) If the modified program normally reads commands interactively when run, you must cause it, when started running for such interactive use in the most ordinary way, to print or display an announcement including an appropriate copyright notice and a notice that there is no warranty (or else, saying that you provide a warranty) and that users may redistribute the program under these conditions, and telling the user how to view a copy of this License. (Exception: if the Program itself is interactive but does not normally print such an announcement, your work based on the Program is not required to print an announcement.) These requirements apply to the modified work as a whole. If identifiable sections of that work are not derived from the Program, and can be reasonably considered independent and separate works in themselves, then this License, and its terms, do not apply to those sections when you distribute them as separate works. But when you distribute the same sections as part of a whole which is a work based on the Program, the distribution of the whole must be on the terms of this License, whose permissions for other licensees extend to the entire whole, and thus to each and every part regardless of who wrote it. Thus, it is not the intent of this section to claim rights or contest your rights to work written entirely by you; rather, the intent is to exercise the right to control the distribution of derivative or collective works based on the Program. In addition, mere aggregation of another work not based on the Program with the Program (or with a work based on the Program) on a volume of a storage or distribution medium does not bring the other work under the scope of this License. 3. You may copy and distribute the Program (or a work based on it, under Section 2) in object code or executable form under the terms of Sections 1 and 2 above provided that you also do one of the following: a) Accompany it with the complete corresponding machine-readable source code, which must be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or, b) Accompany it with a written offer, valid for at least three years, to give any third party, for a charge no more than your cost of physically performing source distribution, a complete machine-readable copy of the corresponding source code, to be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or, c) Accompany it with the information you received as to the offer to distribute corresponding source code. (This alternative is allowed only for noncommercial distribution and only if you received the program in object code or executable form with such an offer, in accord with Subsection b above.) The source code for a work means the preferred form of the work for making modifications to it. For an executable work, complete source code means all the source code for all modules it contains, plus any associated interface definition files, plus the scripts used to control compilation and installation of the executable. However, as a special exception, the source code distributed need not include anything that is normally distributed (in either source or binary form) with the major components (compiler, kernel, and so on) of the operating system on which the executable runs, unless that component itself accompanies the executable. If distribution of executable or object code is made by offering access to copy from a designated place, then offering equivalent access to copy the source code from the same place counts as distribution of the source code, even though third parties are not compelled to copy the source along with the object code. 4. You may not copy, modify, sublicense, or distribute the Program except as expressly provided under this License. Any attempt otherwise to copy, modify, sublicense or distribute the Program is void, and will automatically terminate your rights under this License. However, parties who have received copies, or rights, from you under this License will not have their licenses terminated so long as such parties remain in full compliance. 5. You are not required to accept this License, since you have not signed it. However, nothing else grants you permission to modify or distribute the Program or its derivative works. These actions are prohibited by law if you do not accept this License. Therefore, by modifying or distributing the Program (or any work based on the Program), you indicate your acceptance of this License to do so, and all its terms and conditions for copying, distributing or modifying the Program or works based on it. 6. Each time you redistribute the Program (or any work based on the Program), the recipient automatically receives a license from the original licensor to copy, distribute or modify the Program subject to these terms and conditions. You may not impose any further restrictions on the recipients' exercise of the rights granted herein. You are not responsible for enforcing compliance by third parties to this License. 7. If, as a consequence of a court judgment or allegation of patent infringement or for any other reason (not limited to patent issues), conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot distribute so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not distribute the Program at all. For example, if a patent license would not permit royalty-free redistribution of the Program by all those who receive copies directly or indirectly through you, then the only way you could satisfy both it and this License would be to refrain entirely from distribution of the Program. If any portion of this section is held invalid or unenforceable under any particular circumstance, the balance of the section is intended to apply and the section as a whole is intended to apply in other circumstances. It is not the purpose of this section to induce you to infringe any patents or other property right claims or to contest validity of any such claims; this section has the sole purpose of protecting the integrity of the free software distribution system, which is implemented by public license practices. Many people have made generous contributions to the wide range of software distributed through that system in reliance on consistent application of that system; it is up to the author/donor to decide if he or she is willing to distribute software through any other system and a licensee cannot impose that choice. This section is intended to make thoroughly clear what is believed to be a consequence of the rest of this License. 8. If the distribution and/or use of the Program is restricted in certain countries either by patents or by copyrighted interfaces, the original copyright holder who places the Program under this License may add an explicit geographical distribution limitation excluding those countries, so that distribution is permitted only in or among countries not thus excluded. In such case, this License incorporates the limitation as if written in the body of this License. 9. The Free Software Foundation may publish revised and/or new versions of the General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. Each version is given a distinguishing version number. If the Program specifies a version number of this License which applies to it and "any later version", you have the option of following the terms and conditions either of that version or of any later version published by the Free Software Foundation. If the Program does not specify a version number of this License, you may choose any version ever published by the Free Software Foundation. 10. If you wish to incorporate parts of the Program into other free programs whose distribution conditions are different, write to the author to ask for permission. For software which is copyrighted by the Free Software Foundation, write to the Free Software Foundation; we sometimes make exceptions for this. Our decision will be guided by the two goals of preserving the free status of all derivatives of our free software and of promoting the sharing and reuse of software generally. NO WARRANTY 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. END OF TERMS AND CONDITIONS How to Apply These Terms to Your New Programs If you develop a new program, and you want it to be of the greatest possible use to the public, the best way to achieve this is to make it free software which everyone can redistribute and change under these terms. To do so, attach the following notices to the program. It is safest to attach them to the start of each source file to most effectively convey the exclusion of warranty; and each file should have at least the "copyright" line and a pointer to where the full notice is found. Copyright (C) This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA Also add information on how to contact you by electronic and paper mail. If the program is interactive, make it output a short notice like this when it starts in an interactive mode: Gnomovision version 69, Copyright (C) year name of author Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. This is free software, and you are welcome to redistribute it under certain conditions; type `show c' for details. The hypothetical commands `show w' and `show c' should show the appropriate parts of the General Public License. Of course, the commands you use may be called something other than `show w' and `show c'; they could even be mouse-clicks or menu items--whatever suits your program. You should also get your employer (if you work as a programmer) or your school, if any, to sign a "copyright disclaimer" for the program, if necessary. Here is a sample; alter the names: Yoyodyne, Inc., hereby disclaims all copyright interest in the program `Gnomovision' (which makes passes at compilers) written by James Hacker. , 1 April 1989 Ty Coon, President of Vice This General Public License does not permit incorporating your program into proprietary programs. If your program is a subroutine library, you may consider it more useful to permit linking proprietary applications with the library. If this is what you want to do, use the GNU Library General Public License instead of this License. relion-3.1.3/LICENSE000066400000000000000000000450171411340063500140170ustar00rootroot00000000000000 GNU GENERAL PUBLIC LICENSE Version 2, June 1991 Copyright (C) 1989, 1991 Free Software Foundation, Inc. 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. Preamble The licenses for most software are designed to take away your freedom to share and change it. By contrast, the GNU General Public License is intended to guarantee your freedom to share and change free software--to make sure the software is free for all its users. This General Public License applies to most of the Free Software Foundation's software and to any other program whose authors commit to using it. (Some other Free Software Foundation software is covered by the GNU Library General Public License instead.) You can apply it to your programs, too. When we speak of free software, we are referring to freedom, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for this service if you wish), that you receive source code or can get it if you want it, that you can change the software or use pieces of it in new free programs; and that you know you can do these things. To protect your rights, we need to make restrictions that forbid anyone to deny you these rights or to ask you to surrender the rights. These restrictions translate to certain responsibilities for you if you distribute copies of the software, or if you modify it. For example, if you distribute copies of such a program, whether gratis or for a fee, you must give the recipients all the rights that you have. You must make sure that they, too, receive or can get the source code. And you must show them these terms so they know their rights. We protect your rights with two steps: (1) copyright the software, and (2) offer you this license which gives you legal permission to copy, distribute and/or modify the software. Also, for each author's protection and ours, we want to make certain that everyone understands that there is no warranty for this free software. If the software is modified by someone else and passed on, we want its recipients to know that what they have is not the original, so that any problems introduced by others will not reflect on the original authors' reputations. Finally, any free program is threatened constantly by software patents. We wish to avoid the danger that redistributors of a free program will individually obtain patent licenses, in effect making the program proprietary. To prevent this, we have made it clear that any patent must be licensed for everyone's free use or not licensed at all. The precise terms and conditions for copying, distribution and modification follow. GNU GENERAL PUBLIC LICENSE TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 0. This License applies to any program or other work which contains a notice placed by the copyright holder saying it may be distributed under the terms of this General Public License. The "Program", below, refers to any such program or work, and a "work based on the Program" means either the Program or any derivative work under copyright law: that is to say, a work containing the Program or a portion of it, either verbatim or with modifications and/or translated into another language. (Hereinafter, translation is included without limitation in the term "modification".) Each licensee is addressed as "you". Activities other than copying, distribution and modification are not covered by this License; they are outside its scope. The act of running the Program is not restricted, and the output from the Program is covered only if its contents constitute a work based on the Program (independent of having been made by running the Program). Whether that is true depends on what the Program does. 1. You may copy and distribute verbatim copies of the Program's source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice and disclaimer of warranty; keep intact all the notices that refer to this License and to the absence of any warranty; and give any other recipients of the Program a copy of this License along with the Program. You may charge a fee for the physical act of transferring a copy, and you may at your option offer warranty protection in exchange for a fee. 2. You may modify your copy or copies of the Program or any portion of it, thus forming a work based on the Program, and copy and distribute such modifications or work under the terms of Section 1 above, provided that you also meet all of these conditions: a) You must cause the modified files to carry prominent notices stating that you changed the files and the date of any change. b) You must cause any work that you distribute or publish, that in whole or in part contains or is derived from the Program or any part thereof, to be licensed as a whole at no charge to all third parties under the terms of this License. c) If the modified program normally reads commands interactively when run, you must cause it, when started running for such interactive use in the most ordinary way, to print or display an announcement including an appropriate copyright notice and a notice that there is no warranty (or else, saying that you provide a warranty) and that users may redistribute the program under these conditions, and telling the user how to view a copy of this License. (Exception: if the Program itself is interactive but does not normally print such an announcement, your work based on the Program is not required to print an announcement.) These requirements apply to the modified work as a whole. If identifiable sections of that work are not derived from the Program, and can be reasonably considered independent and separate works in themselves, then this License, and its terms, do not apply to those sections when you distribute them as separate works. But when you distribute the same sections as part of a whole which is a work based on the Program, the distribution of the whole must be on the terms of this License, whose permissions for other licensees extend to the entire whole, and thus to each and every part regardless of who wrote it. Thus, it is not the intent of this section to claim rights or contest your rights to work written entirely by you; rather, the intent is to exercise the right to control the distribution of derivative or collective works based on the Program. In addition, mere aggregation of another work not based on the Program with the Program (or with a work based on the Program) on a volume of a storage or distribution medium does not bring the other work under the scope of this License. 3. You may copy and distribute the Program (or a work based on it, under Section 2) in object code or executable form under the terms of Sections 1 and 2 above provided that you also do one of the following: a) Accompany it with the complete corresponding machine-readable source code, which must be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or, b) Accompany it with a written offer, valid for at least three years, to give any third party, for a charge no more than your cost of physically performing source distribution, a complete machine-readable copy of the corresponding source code, to be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or, c) Accompany it with the information you received as to the offer to distribute corresponding source code. (This alternative is allowed only for noncommercial distribution and only if you received the program in object code or executable form with such an offer, in accord with Subsection b above.) The source code for a work means the preferred form of the work for making modifications to it. For an executable work, complete source code means all the source code for all modules it contains, plus any associated interface definition files, plus the scripts used to control compilation and installation of the executable. However, as a special exception, the source code distributed need not include anything that is normally distributed (in either source or binary form) with the major components (compiler, kernel, and so on) of the operating system on which the executable runs, unless that component itself accompanies the executable. If distribution of executable or object code is made by offering access to copy from a designated place, then offering equivalent access to copy the source code from the same place counts as distribution of the source code, even though third parties are not compelled to copy the source along with the object code. 4. You may not copy, modify, sublicense, or distribute the Program except as expressly provided under this License. Any attempt otherwise to copy, modify, sublicense or distribute the Program is void, and will automatically terminate your rights under this License. However, parties who have received copies, or rights, from you under this License will not have their licenses terminated so long as such parties remain in full compliance. 5. You are not required to accept this License, since you have not signed it. However, nothing else grants you permission to modify or distribute the Program or its derivative works. These actions are prohibited by law if you do not accept this License. Therefore, by modifying or distributing the Program (or any work based on the Program), you indicate your acceptance of this License to do so, and all its terms and conditions for copying, distributing or modifying the Program or works based on it. 6. Each time you redistribute the Program (or any work based on the Program), the recipient automatically receives a license from the original licensor to copy, distribute or modify the Program subject to these terms and conditions. You may not impose any further restrictions on the recipients' exercise of the rights granted herein. You are not responsible for enforcing compliance by third parties to this License. 7. If, as a consequence of a court judgment or allegation of patent infringement or for any other reason (not limited to patent issues), conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot distribute so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not distribute the Program at all. For example, if a patent license would not permit royalty-free redistribution of the Program by all those who receive copies directly or indirectly through you, then the only way you could satisfy both it and this License would be to refrain entirely from distribution of the Program. If any portion of this section is held invalid or unenforceable under any particular circumstance, the balance of the section is intended to apply and the section as a whole is intended to apply in other circumstances. It is not the purpose of this section to induce you to infringe any patents or other property right claims or to contest validity of any such claims; this section has the sole purpose of protecting the integrity of the free software distribution system, which is implemented by public license practices. Many people have made generous contributions to the wide range of software distributed through that system in reliance on consistent application of that system; it is up to the author/donor to decide if he or she is willing to distribute software through any other system and a licensee cannot impose that choice. This section is intended to make thoroughly clear what is believed to be a consequence of the rest of this License. 8. If the distribution and/or use of the Program is restricted in certain countries either by patents or by copyrighted interfaces, the original copyright holder who places the Program under this License may add an explicit geographical distribution limitation excluding those countries, so that distribution is permitted only in or among countries not thus excluded. In such case, this License incorporates the limitation as if written in the body of this License. This version of RELION uses a stochastic average gradient descent (SAGD) method in conjunction with a modulation by a contrast transfer function of the particle images for ab-initio single-particle reconstruction. Should you wish to use RELION with the SAGD method in conjunction with a modulation by a contrast transfer function of the particle images for ab-initio single-particle reconstruction, please note that this functionality may be covered in the USA by US Patent No. 10,282,513. Non-profit academic research uses of the patented IP are freely licensed under the terms of the license agreement available at https://cryosparc.com/patent-faqs. Should you wish to use RELION without modulation by a contrast transfer function of the particle images in the SAGD method for ab-initio single-particle reconstruction, you may disable this functionality using -DALLOW_CTF_IN_SAGD=OFF at the stage of configuration, i.e. before compilation of the code. 9. The Free Software Foundation may publish revised and/or new versions of the General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. Each version is given a distinguishing version number. If the Program specifies a version number of this License which applies to it and "any later version", you have the option of following the terms and conditions either of that version or of any later version published by the Free Software Foundation. If the Program does not specify a version number of this License, you may choose any version ever published by the Free Software Foundation. 10. If you wish to incorporate parts of the Program into other free programs whose distribution conditions are different, write to the author to ask for permission. For software which is copyrighted by the Free Software Foundation, write to the Free Software Foundation; we sometimes make exceptions for this. Our decision will be guided by the two goals of preserving the free status of all derivatives of our free software and of promoting the sharing and reuse of software generally. NO WARRANTY 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. END OF TERMS AND CONDITIONS How to Apply These Terms to Your New Programs If you develop a new program, and you want it to be of the greatest possible use to the public, the best way to achieve this is to make it free software which everyone can redistribute and change under these terms. To do so, attach the following notices to the program. It is safest to attach them to the start of each source file to most effectively convey the exclusion of warranty; and each file should have at least the "copyright" line and a pointer to where the full notice is found. Copyright (C) This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA Also add information on how to contact you by electronic and paper mail. If the program is interactive, make it output a short notice like this when it starts in an interactive mode: Gnomovision version 69, Copyright (C) year name of author Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. This is free software, and you are welcome to redistribute it under certain conditions; type `show c' for details. The hypothetical commands `show w' and `show c' should show the appropriate parts of the General Public License. Of course, the commands you use may be called something other than `show w' and `show c'; they could even be mouse-clicks or menu items--whatever suits your program. You should also get your employer (if you work as a programmer) or your school, if any, to sign a "copyright disclaimer" for the program, if necessary. Here is a sample; alter the names: Yoyodyne, Inc., hereby disclaims all copyright interest in the program `Gnomovision' (which makes passes at compilers) written by James Hacker. , 1 April 1989 Ty Coon, President of Vice This General Public License does not permit incorporating your program into proprietary programs. If your program is a subroutine library, you may consider it more useful to permit linking proprietary applications with the library. If this is what you want to do, use the GNU Library General Public License instead of this License. relion-3.1.3/README.md000066400000000000000000000140021411340063500142570ustar00rootroot00000000000000RELION 3.1.1 ============= RELION (for REgularised LIkelihood OptimisatioN) is a stand-alone computer program for Maximum A Posteriori refinement of (multiple) 3D reconstructions or 2D class averages in cryo-electron microscopy. It is developed in the research group of Sjors Scheres at the MRC Laboratory of Molecular Biology. The underlying theory of MAP refinement is given in a [scientific publication](https://www.ncbi.nlm.nih.gov/pubmed/22100448). If RELION is useful in your work, please cite this paper. The more comprehensive documentation of RELION is stored on the [Wiki](http://www2.mrc-lmb.cam.ac.uk/relion) For EER data processing, please read instructions in [our wiki](https://www3.mrc-lmb.cam.ac.uk/relion/index.php/Image_compression#Falcon4_EER) carefully. ## Installation More extensive options and configurations are available [here](http://www2.mrc-lmb.cam.ac.uk/relion/index.php/Download_%26_install), but the outlines to clone and install relion for typical use are made easy through [cmake](https://en.wikipedia.org/wiki/CMake). On Debian or Ubuntu machines, installing cmake, the compiler, and additional dependencies (mpi, fftw) is as easy as: ``` sudo apt install cmake git build-essential mpi-default-bin mpi-default-dev libfftw3-dev libtiff-dev ``` On other systems it is typically just as easy, you simply have to modify "apt" to the appropriate package manager (e.g. yum). Once git and cmake are installed, relion can be easily installed through: ``` git clone https://github.com/3dem/relion.git cd relion git checkout master mkdir build cd build cmake .. make ``` By performing `git checkout ver3.1` instead of `git checkout master`, you can access the latest (developmental) updates for RELION 3.1.x. The code there is not tested as throughfully as that in the master branch and not generally recommended. The binaries will be produced in the `build/bin` directory. If you want to copy binaries into somewhere else, run `cmake` with `-DCMAKE_INSTALL_PREFIX=/where/to/install/` and perform `make install` as the final step. Do not specify the build directory itself as `CMAKE_INSTALL_PREFIX`! This will not work. Also note that the MPI library used for compilation must be the one you intend to use RELION with. Compiling RELION with one version of MPI and running the resulting binary with mpirun from another version can cause crash. See our wiki below for details. In any case, you have to make sure your PATH environmental variable points to the directory containing relion binaries. Launching RELION as `/path/to/relion` is NOT a right way; this starts the right GUI, but the GUI might invoke other versions of RELION in the PATH. If FLTK related errors are reported, please add `-DFORCE_OWN_FLTK=ON` to `cmake`. For FFTW related errors, try `-DFORCE_OWN_FFTW=ON`. RELION also requires libtiff. Most Linux distributions have packages like `libtiff-dev` or `libtiff-devel`. Note that you need a developer package. You need version 4.0.x to read BigTIFF files. If you installed libtiff in a non-standard location, specify the location by `-DTIFF_INCLUDE_DIR=/path/to/include -DTIFF_LIBRARY=/path/to/libtiff.so.5`. See [our wiki](http://www2.mrc-lmb.cam.ac.uk/relion/index.php/Download_%26_install) for more options, troubleshooting and useful environmental variables (especially in HPC clusters). ## Updating RELION is intermittently updated, with both minor and major features. To update an existing installation, simply use the following commands ``` cd relion git pull cd build make make install # Only when you have specified CMAKE_INSTALL_PREFIX in the cmake step ``` If something went wrong, remove the `build` directory and try again from `cmake`. ## Options for accelerated versions Parts of the cryo-EM processing pipeline can be very computationally demanding, and in some cases special hardware can be used to make these faster. There are two such cases at the moment; * GPU acceleration: RELION only supports CUDA-capable GPUs of compute capabilty 3.5 or higher. * Vectorized CPU code path: RELION only supports GCC and ICC 2018.3 or later. Note that you cannot have both acceleration in the same binary at the moment. There are more benefits than speed; the accelearated versions also have a decreased memory footprint. Details about how to enable either of these options is listed below. ## GPU-acceleration Tools that are GPU-accelerated: * relion\_refine (i.e. Class2D, Class3D, Refine3D, Multibody refinement) * relion\_autopick Classification without alignment is not accelerated. When CUDA SDK is available, GPU support is automatically compiled. ### Use If you run relion\_refine with a the "`--gpu`" flag, you will run the accelerated CUDA version of the kernels. If you leave out the "`--gpu`" flag, it will run the original CPU version. ## CPU-acceleration Tools that are CPU-accelerated (vectorized): * relion\_refine (i.e. Class2D, Class3D, Refine3D, Multibody refinement) Classification without alignment is not accelerated. To build with support for CPU-accelerated kernels in addition to the original CPU version, build by setting `ALTCPU=ON` ``` cd build rm -r * cmake -DALTCPU=ON .. make make install ``` This will require the Intel TBB (Threading Building Blocks) library. RELION will look for TBB, and fetch and install it when it is missing on your system. You can force this behaviour (and make sure you are using the latest version) by adding: ``` -DFORCE_OWN_TBB=ON ``` In addition, you can make use the Intel Math Kernel Library (Intel MKL). This is optional (but will scale better with increased threads). Add this by: ``` -DMKLFFT=ON ``` ### Use If you run relion\_refine with a the "`--cpu`" flag, you will run the accelerated version. If you leave it the original CPU version will be run. You should use this flag if you can, unless you want to verify old runs or behaviour. For details on how to compile with Intel compilers and optimal runtime configulations, please look at our [wiki](https://www3.mrc-lmb.cam.ac.uk/relion/index.php/Benchmarks_%26_computer_hardware#Accelerated_RELION.2C_using_GPUs_or_CPU-vectorization). relion-3.1.3/betaGuide.pdf000066400000000000000000002337541411340063500154050ustar00rootroot00000000000000%PDF-1.4 %쏢 23 0 obj <> stream xZMs7oGLnڪGJHH )68.~Z̮m8lq@jV=y IN&?>;ZNLw4y7kwpݝAB/'QEm0:m/Tf&&әф`I-3ІЍE>=F&R@X8M<OÄ ?Ce781=7SZɾ;J̊fM@VR/=JVK~qk-#!Yej{mŨyn'Gu_ϲ%Vr𲡬f. e8tɓ|:nϕ$S͗YMls >'efQ80 &Hfܓ6Brpt'HT6wQy8jKYB[hMֽ H\ksq;gH0J"OIZASړ) -Ր=X&F@[A-;mI5`*x,84yIH1s*qr{uf*UI<"jzƆ!5RA)Mw0FAVݹȽ(*'[䜒ĪdmՃW;'gok08 +Q.-wC-fGEɹK+n*)ݮȡ7E1>%cюaՙ[' goyl1,y\Q~j6ƾ#ߡp_pux :ܐ|a[ă{X+?QlƶUV>>K FkG%#p%RRVIK{Ci!O-jWk&ktz!֜c71*\F mMG&l5m;eS̞iNj"l+f{?GOZل.ɍc}9f祰H}:Z W6D.xfX6ҾQ8f(gϣ^Ui3deբRo~ں1(O(u嫊@-n=3NP;g8b b,׭(ce &Cendstream endobj 24 0 obj 1812 endobj 53 0 obj <> stream x=ْqgV݇_IR6Ea˖A`qPAa.wfVuwVue,|࠷xz.=M{W`'n.OWES \D'cŃ/y+5;SZ޼~uye 0#|/*?aM6^oσ){qO8={̡B0Ѧ1,{u5&9ƩsvO=gWORL+1]w &8hbnڦ3U&c;pÆ5=\94oLK:kˇ7 m7/)B|Glȣu/ln"h?54|/|/`p/$|ݲ2Wyo F3sZ5G}RWrK7げ\}m e!I-kD F ˚I[[ @ q0p0ᆃ;~ ,*ܷl=Qf4Q.a^k2܆_M/g/bN~%9^}\/KS`:nc%^y>#a?%p \xp*o5=]qq9%yqgĎCXD`~\N nI(+,ʇ 0v^ii:~&f\kB,VP4 g=v-[~eİhKG#@CoH+U$JtPT*fN `Ͽcs>-W}3K69@&uX9鍐)Mpr̉ b3dbfHlxX+E#64\,6b0qZH ?,#Ir^ d ׏e\,Q#@zfL&20r(\`*FA\p+m L_@Z pZ۰%xpb/^(YysbT_U7` zd&ۙc 4_p07Di}O$.бkB Vz2eAK{6)["AWNp|{OcG5hTA7#IkGȪN|Ԁ6 ua khgSM AOlPdU58X8q$o"߭m::vNb[T: =R 8k﯈UWn!YN"?@5-|2 FOR~O~Rx](FBc@E.ea<ֹH+)TmV_Jܤ ˗\R"BxO, " ր0V&qtr({s~=ؽ\l?eo)Ƹ䤋HofsH(gڹwAlm@bFȞ>{\{^&Cx)E8Jó`ȃA5e}W I!|JjQ_,e_/|Ƒ@Q7VfL!~NI<*U+>g; % 4{|^'3zdGp ﹿ&Q`FW0nu`U` ,C"8&;wg0&Q)|8]eliϾ!h!}|l R)p6w)w- i#[FUtQN1z /}׀DTf?h (.u""'9ۅEQҲ@rL΀weWL(6s@ѹSv{ޢޑDrq8|>Z9dڸ+aMz%HMC!t@ߕ7UfQM8v"0 XbW+{#θ)tnJ/tֈ#wy(S ڌ} ză@kuwo1\H} .&znKZjD̲ 3t+,t=@6wK3xeRC;ZM-Ȋ]ՆX?M`O6o hKuX@䧆 @اit!(ݼ Pf'V},Kl!1!0Ct; WMR( #6J $_Iwutu׹qON;+X1* L>)kFM zQf]ȍ>V>N Q.zTWKIl>4F9a}E<'Mq„G>v/ށIQƍ Imi I6n\!m[遁I4DhKQp~\̓c*q(iKt Vݣf+_2#j{BhNYoo*P<4vtA02S-aK D[T#V ɛf帅c\=:Bw51NUU5?V=kģ8yẋ]f k#^Y 'ASk` P/.6Ѿ\||5D`eda?1b{4[=Ay5-?SQ?Qqcl/^[٘(\5zuIQ1n}n,}nPBw#pB;PP nE}}jkptTC'),?zՖj62q=ՠ̥8[WiRaı:8{/Em# Z hEm;!m΀4H!?FNY )٪гf2!΄V)0Ydd7 3yD d3q^6 ϑ',%] +xr#E2vU.Э<T)9ɩ/k5(4e'֛β+P窂dn5Mc;&\(Kv(s\B"펒Sd*om2eLDo=7ETxȘ㟘oO Y5gU?1ϡ4!L~Iۉ;ٟu{[(nx,q0Btg!.1`!]Hqry媆y!@fG}Z1o䔐p,ai: J+#˷m]$SG)e2\IJjj% "LCʞfrY@Eh2 b^!l E#R-vԂh-l0@\|Wq{Wp I3瘅UoY'pwv ,jXXPn_N:ݴTxip<{mjwYgB5DL V 0G%!c gy[u-?%P g ReZ0:[wuM>vNK,*/ [KŠz.sWÚX(WO<̿LSaX0˙= 9x2hRoP+J.䢎?i%AeR\}!eT!Wed(zZ*8j$Ű#}\nB&" a.&?'vV't0͓Q򟄁%nv޵uPx (<(|ےM*$d"^Y1 dbq8dgn^F/MJVa y1$O .ih"бc-R6bP +A\=< e>(4RnIk\ ۑZ: Nmx cF-J;-S+ Gfpϩp^{4ثt/>v"|8>jyP4 $9ń)c%#*K3PF8}1 wqHIx\:62ЫS7t߿tf@,8Q,*T̝Mu!m2M2QnGAq ƀS;]Jϸ9XOg\qZja\+6m #@|x$'Q`P r{KHoE1}P[)U6{-_3_)(5J-f׈d+4WhiٻN̽ch1J ;]^-1hNދ9tF-O_?_?wy\Yg`nמ?(3\w;ww TWm{7)>\1yX=A8BL/ 1ݜqy;JZj}긏 ]-4sژ=9br Ed~%5OߗFr33MY5;5m,>޳q=TD9jAQ*?XVȱIIC8/A l?)k*ezMzs6"$6iVS sƫ-t3hc@.R ݨN/R%,&jLIlI:wi4v9xa3Q ! 5PwkmȐm:gqfٟ282Z)Ӧ $G_þbqT,:Y(uK%( 0 %;Š,;;!u3 7ekvm_ͷfDl}Jyvh,F oyb^ꀳi+ZNlQڮ@a=^d\*ATo*7_9l 'l𖄸1_T0(<|3Rh8WՂ4^xYqJn#q<=O慠0dmi|,+`T}_UWԹ4ћ:_ET qƭ$?ev:7yBR6N+eVǒQ+1&^)mqbt=<a'Ǻē0,&T0Uk7Ưzq[Yuk /&h;x ѴgZK9ԏGXn)ͲgӠά]D] O}<ߑ5u~j\.WJj"`a^AGͻ,IU30ыtSW~qHꇪ$ o?)HcɟSVvŐ6[ZIb\}@.sN9)C6ITn,EO8L]U-_J!H_&YH̊<[ 4xƀs\M:^mz  ߰zS`jL<ҹ.֧itiGC(lՃn|uƄwptC޹SLP~TeTaR »[W ..B'l\ wdM8Nl5+Je06ycsnYn4M{)/)fU-]mun x|+j1jARu l6di[$}:7ɇ )!o`Jlj/鶼dcA5vH2I|^OL:@U;Qy䰡I(btmU科kKѭI[pS8+MzVшi7xիBry;)Ƕc~#~07~*.*Wb"Na#6n6Lo;^u@e 'UVkrOM:Y?+`ZlXk55Re am߭8a D%?iO6o c#/He. ΗvU^7S&c 7sr[&kEx ^zf?TZߪUg[Fľ*v۾}֊ʑNC ۦھmD祝kxTዟGgeM"0iA a( tҫmN?oU>_"j1c]"$ǃ0^^?vǫ-X> oQ_PsX{sa&ECʅk2+)G5 OABv_:aV[9٥6T]^NBt+Jg-eδ 4cI0(fL,ћ&G6VNe(3㱘_(?xNFZM)Z&fs`R#I Nyãu_R _t6Q0FA@¢xnlECXz5;,2,uz5A+}~ʂ6`n VT~ݓWN~K\Mo [pwҦ[Ԟw(F`j1 < tYsRiٝ{rN ZB@,Lޢ3o @ę^Z5> stream x=َGry8;s?xwk{ dȬ#{ʆԬ#2~'u9/^_|vwNj E,{aiJ^{{E~U]*=Oޛˠ¤M|/9 ~l:ڇXd=ܲ|<)c8յzR.9C5|5 }Dʤ5>79prfk)l-.&5>}ʅ9>wݷj1oWLe^i:a?en ?,뤄3*%C^rgj2 ܗ*?qr=-vy]lǼL!&( ˏyP&_:.nCՅ՝RpH8չrR v>S Wke&e-;1<¦TAa 4 .>#[PQ\"2)Hwp 6S2&'6d&_|5;k&M_13gyo` 09×M)# (~D?{vA7։~$X챒Jvn~72 p#yv5c.AEz3yYHZCBh@x[&.W肩8$ئTe,Ȳ?< '\(7SI?]]+`0{7,Pen\̬@t G,'QbfCrx.AHDdZ1eft)X8]yEj2*YL'Ձ+/?t+̺(:NJ6 ͐zqE% \kQH~{@p`1S?+'Q|Ԩ- "tw4rE@!\.'Z $ܴpMmGC8Jqd+'MOgl)fsi6~sOF wF}1r;5sfznv^p,f?L2;N^|.4F^3*@aSm GWogJF<ȏgS f%h[2$X|lyB5hb豻ܻ6ZzaРLL=QMJh#u7CfAhD<*Ra/tbciH"XoPFdjٱ$EoMKDT +*he6*%B5$pwaYN3G4^hѨkY3NRK5ҒHn|8c? dcd|IG\]U_1*v5A .wZCbWkV_@)%NZx|_l!~P=)w*^ QAĄ'<Z`<`}P>3$vn]4|x:ÌiGzB?=SZ!s,2@MƂgri^эv!.x.CBx|ZYM΄XlMՉ܊IIfBA%gA|BoRfN)z|wt &ZW;*]HnOlț;6^4ty%ԁWq.&U=I{\oĊIaC/< #",r~]⮵wS&o_@rsO,%1 4 D%c{̱tVP͢JlLsG_qWCVS{m[;gsؖK#GSSSԅBF|>..6ȁ Wi!,uX13exd `.6,Scqш싍i<\2%ScrSV+C$j' JGe>9=/v pS;~ͨBMӾ2Jhа ̑fX@خ*okJ5]H5 HTHQ-1pXTvB\:s7%{L`#gK|r4 WܞȪb'S3xwݿ12(%Q1L $[(@jmbTܣu_D(NWv?cps @a59@/R!eT56w! 'eŭBFRcOD+o)Bdk\%\I n̷弲Si3!X*m~juM)dSLW*YuldZ&:^-~ ӒSPsUN:8GQT5 jGz]=CÌA~)l>UAs 곷ij p<3]wDWFV-)uA"|'כ4ئ~My 2,^:BkZ݇QhN"=NJ{0+F?uC#˽ˏQ ZΌBHs:;mɹTntۜ3ψԾճxBz#f`8$9eWQ vDد'E/N@IanX~^ [-UbW!*Z6~Y4sfPTm |_ _bmUf!yVuQ2D?M?}:~My1_qF8H 6HW>2 s [Gsy(a~( i;oZK5xRٙ/ LIVHؑB9PDꛀdJ[TFJ W [ L'{tTtN. ( 0UrcZ|$vtY2 .I"ڃzD*z֋Dnu4$/61X{M)쯸fzճi '9 %I= I͊^lC` _.5Nj&q_0d_wǝMx&vd[Vs9Qx)iNf9,&%nw].vY7O&îCkLOysRy3l{[Ya`& rg%u!4U@2h~u }c\,M6T-_ϰ*և?JH q^a]{Bou$9 wFzATӁf\n^3ИX&^@ 8 Om"jT"2Vv_Yrd=Pa *MFP1'l>o˦7)a-ׇI<(Sw$vEEhYfq8$%/ARhfY׸We~(g;3X1 +sp rݭwwtd3%n&P{"k%bW%bX K,5: rk$Y OfA`E&]Pgx߽<̥LH-^ۼsnM"hLNF$<<%gV$+_] >Ii[@ZD}_?||þsE;ˢ;CV ޺[8mC&|OTs(RZK;XG\ Kx(6rypzg;00wJYҟQt|oaFQw×|qAÔj1C;>=`b\cȺ/Lf9xIv)oe|I8xaB- |TRNE|U]ZlĴJsN6Ӗ#UZF"uW۔b F-m m4s! 4 ㌳0?0<E[Z%7X<8u̻ey0 P d_B5(+(etH k9Rfm}-p  P\ҕL6㠦m9V:̭('sflR_̃ 4,O:QOE.Pʍ\7}z( }EhayQsmMl|ߛ)Dt$z"d&ZGvTlq5SV]Nd(!:f9QV*gOUK N^nZ ͛Jqv}BvEy`n>u*rǪeW]$NFȨf[yul-u^:=ϒ[uV!U_ϵ$䊴$5z,> ]L>/_bki֢;Z !#{_~wj`'?4#-i2aѱUO,!ݗy< Ag!^j]G]Nuڣ*CcZjPwʏuLf ,/}'^iԖ/!gY탉Ɖ!;tC/$* V?'.J@V!x9YInH8.*~OoݖdLMZ}``AݪDx}h~R ~{n~0!l,YQ\J&Ę*!k tII8 i15 ]7,m~7H&xJk)1< u}Q_VR:+ 4i!Wb^᥆>eag1GLfkkKWf m^~m]Q/_HbS@ 5s+AN<=$ Q$G`oJ\La =5RwetaM*:8.IPYb I aŇ >aS)Ἴ@VHnNUdT2*loiZ5>bi9 }!`o RSB,2+N\ c4b#DqŚ#017Ē܏1!BP'R;w9~-vPΡU7=w,4;$yG²Xpkb| l)*)\\De4AuIف&ܔC Ac{ؤ`?G2WI lΕ%&.;OXHCWzԤq̏3R"^TS"K琩\gB%Q?"3i%ǀJNx~pV)т |#L# FO(~x.[f!m.ߵk ~o/ɉ̇*"NE?s$[\Z°`P]pTqxub[m50v;nE Rb̂{mPTԔ\f-XUGp9֢@u(J$3vkb7%Ci^[DyN@?_#0>oC9&B!4/&'RXWeJlm2TJiT22tn9ufnKkiUf)܃cNؚjX]ܶfI5eXM_ w)Sͳ f&@U Ӻ=f>qDR<_I1hZ`n6_& 4&5fxCk?WV)}y]9TJHæ(I69\Z % 4_.siendstream endobj 74 0 obj 7413 endobj 86 0 obj <> stream xl5Zj3t8axo0Q+ H+ xhM>F /\BLZ!'b"M[atZ{8ڛ7;f%RY[p¿نX^ M9~zl<5ngMjzk+8 zЫkG:!yKBVwMTʤuT"@Yj!F% oa%Js,|`"uۼtဥ2b}VQUS8z P58@MU/  2 i?G&^X`_ =pB6 wH? Ѐl S ŀ֥"ȚLjS 5x|1ߛ4h@FJGgiJt gbˣQfr,akaf[ْ̾lI3H ;Dn:\"+:7atAQaRZ::o^_鮰nôWb /A.b*(1MN^oqQw8 BPkӅpct~组~8g$`stQ` -q9 _3)xoq KʙEvrgt,B,aB< 2ut~6'7C I"#)~J{M5tf#Ϊ4fA0G|C[Ru L'.. K9ل7%XH_ %ECgIG?ߓ:;KRwZl#pq-:b-v.0BuܬםnAimtܑl@}}K #a X"q_&#!}Y[?^i(褢WN;Хprĸxu8[pPr/g9Lkn-N6 43Q-B:gS3z90Kp F/mNMH5~L2`OЎVW~,שn}1Clm=T9ȒSg,4tL#N h™׃#=ubcr`(ªva y#io~ݜ[۠GFC%/gd|,f WU`ހHNG&FU4 Я1*BvΑg[\UBTϪ wĎYkKND3hUNF7vf8楑AN*GWL(cp. eI\`oQ˂ѱ*}q3vw"#0Zp!@ ų W<'V1l7zZ MJ08 Obր> h!F$\z&EɢՂ MWGΡ} 8,blGNWA!X>Q̡>I¥XHQN RtmDmSDDWXu2Ly0J$9F_s0 JCux?.^pBEw=Zqnx7:)I,۱4:>Y YT0&}ϗݔ$&[ics/j6?l!ACmnr$g.C3lU^qp"V>E"Wk-tf`GقB( )fo;pځ%(׍45^Wc&6ySM~LInj ʣ`sq=,rHeq#;uHa3[?TN'1hpiSզX1uǩADrעG}-I/ =|Ӏ:  [,/vrw/O-Ri-!Z3QTz\]MoW8@vIT<˞)P~I+W nwM|^y16QX&őYG'K.1&jJ4>~چmD,H^u4W&[9Gi+˺XGࣣ <,%aiFAmE>'T'2^~*Q [z49: FةĢ~,Z%Nd0GɹcēfdIT.$ H;;>\P4RqHuZI8?$ B'OI6u/-qxeu- b(RF*Tj8(~2bjIQ#_VWL[@jEBb:ݴܳ5^E4sOQ~*p~ȝQ fkh5ق 5TDޒ)]Z Vb:ےkYS$"{W;ZegI娛uuxF4ׇK_i&\)G|#M4lHN4oZ߬EfG8?ku.$'Ưe_wq{ljO{]% 0+>%N**u}RiY+TKC~uQ48CıjL)MȀy'ꖗyf*?kQ~(6)HD'T]>CikT56T z{n+2j*&ږa"^M+c$N5s~z׺8YG %e0cɒ$#7VsS=>G1P' LFp |1l]$,<RJj-WEPX 럜·݁UP JUApU;~^W+:j^erx6k^a}ۮ{4tgj~,z>9$y-R*pi&%g9=ӰɎԹKb#.@:`[Ƚ\[DZFV#)em6uWVhM%]l> stream x\Y]~#xn/TYR0 ER"3{ߧ%ґνcH׋/b'O߿:ʞ??r"O_N =rNҧ4ڝwBzwJ&uLpAimgjҷΤqg)v`[kd! QG u -.mvQK7_֞]t:F紺ޙ> J;2NKg،j+DLh×j'@_}hVE Akob"2֙PpњV ȯbIe^MZ<˞H0I/TM1GlO׷K5j l.;/^_62>U?̻Rfϧc}͑ˀ+wBۿ t4%0rnJ5?XM_0XeD}%v:i wNoΫ`HِeT6MdU7#U̗dGhnr¿?ڝ+^WM9JW ~ bOϤbE@+؝rG % 9h 7JM(-DL{.@FheI޶3(&9@A rlgZ};A B>L2ֵ0f?3 `vo47NXevM.\р}ḧ́VH2VBSD"]զȏL|̑_Z`xM Ҽ4/˹ٻ3 X+@i%#wvu]+ X!ڢt=꛴R0V*WG)lsz~¬(6&{\rrh]"v(ΈtnV {biS'(#BCյ])mGvYt Pxa47гD ph'${K `>ɀvQ-8]3ghcϿkn*] مH <]A,uI(0c"c_ `"(ڱ%ҠBae6-iНY]VsV7( YgfJƯlZ/P-ΓomjgC=.j|| (3[xX: (hEHWb<-&}o=vV {9@| x-M -baC"\0n9G836f#ͧ_jpsJղЮ1:-4<3#LBdXVj2L6EԪ/\0)I̖ gZ2MD u<)ap|ѵ.R~/^*$=t!qE vψ2V;jr0[^y㹎3P;Ђ&Ƚ\Y_#XŚ HGrcB-&D28uBSDŽp_>Mwr5k8G(r5r]`u[v8bYqnm$a@)Y應!31J˶eތUl8\%. ƝEM@i5rmgtɅW 41=3D ©'sWg&'HQoFuf0GI'M*p £oZ'xMEւj0Z =`ëڍ4ΒE4qo '0\Na7Udro@\o#h+Ϙ T\ͪ !H-bş %,H  -.#<}(h&fOn/l/=Hc0;WƋu:0@'B2L:i,>ueh_@W We"S$ ,O&h'x9)F>М9S_<0 n$Crzu A Z3VnE(3(l74 ɀ( \lrА0<=wIn3uL   "!+P,sCAVkQVN#ƛkn3`89걺k8oӗ2GH3Vr4! ҏ${*RQ| ջhc_lBrg5K0xߤx`g1 p͜qH-V2JT8l]‰lz۳O #%*#XPK&c #5U]Jc'5-GD%|Vެ"Tk Ԫ@nFܙ@^H 8(VܮxR U utDUQ- I"h8VCd4*sHUkjF39&< Tcx+{L=Sq|S^s 02ۘ}\*Ç`mEߋ_Ku@*s]4 ŋt ,DxAڦ4"9_ylT6*PaBzA#K÷tn­ul]p0NmI-n}iKGĖD-.PC nhrU-dM{+A*y#ƛp5l]=&dn)ӿ~!s) ;ٮ59Mu}lQLHʠRp恔~]*P`,@ޭ ݤ̀MKK|f{v r\L[@V'yk<8=ȧһ*mkRT[NRkq]+06CVlN~hiAQ#:ʡ'F\Ɩ'YpϺ$SҍA0̭" rg9:\z36G[Nf& S) ${ ,]n̕3%9̴ ىa(P#ꗙm]]w =k 85`x52`-J0>[P_wKe[-$ydf1/P/WK8yN`lQ:2,AYQ<<Wp]G *9beFL:\V 7N8C1}%7ꗕ1~zݸ<=zf7v?i؀r+4>,T71 R5f+5ָ+Ƞd*[@3 GTNLQۑ,֮UEٙNØ|$k]F kBI8ޏߡ_UM]8RY/,PTg>ųFlX4mI]u!GuYK)/-/,W=DLjQWTox(j"q?&qp PUEyg>}M[5z6-FiR9ʴ)>?+/+|&endstream endobj 93 0 obj 4804 endobj 97 0 obj <> stream x\s]Gxtpz?:3!J N&8b<7뻫JG (jItBl=wt!lߏOף5L[nG!8Xq ÈmnP~8sFe[CNcKk?'+iȔ.IǁP>=?sG8֎ԽDZy;|>x3ɘ@9kyH00I1|Nh'ApX48i8{mͦ -E0Rj)h=?w!KeLԣs>H$9'?g÷V1h⢁p?(a^(lSPKYI(8脒<4sN4g*CaGСy5l(*giH^*)v[C3$֑|H ;Mz f-UEr8jq.83G-*L3bPիFu^5C`j#òzJ58eyJ?xi6(Y,O[ 52[ya1ccn/qFgs#;pڥE8uOK]mp\$*)ͱ"B4ϻ2\:N܍85;-]tGlW} c *vqtŃ:iicE6SB_@\^ MH]]|}VDMII9 OtÀe;Ģ'Q-DBd{vs\bnUzVgwJai}U6JYzL |(_}^zt%9mif8+Aޓ8nTƪ]l!Ѱݤ1<C8U8Y฿ǺhdNH99xJ 6D$ jk#dWf̬a~vO- ' = g@ JESuMDCKc(eE,kIJƩs /)4b/bD̂(3bh^!N{ Wď>!%KWd腹 #:Ъfہ(`ڟ3(%%bp, 0VaЄn*( [ Lߏ}b(ßR}XsmBZ0@ C@_%1wBlA' %jx0EzT :plJZLi"EG&2̞O9㤓\C[qѲBuT0\ RN)t T:nJɠJhw5ykŠŵm=S_G1s^oTRʕ?ao<@Xb[]/\OMj;Qd0]!!0DYGYcS]SUfWSe^3_ :$S/ z'$.NmРcx@n^|W` T[ʒj`=@|y]:JiCN҇ jK*6jٚ37!{=iԁSC!6\A(:xJ*=!1q}m^$_l$n; l DlKυʩ9fc\J~eka<5iE2%V68cX-zA<ی&dߴ! 1fpnS3,1iTt[(mde]>a ܢ㊬#IfAY=nC"DJAvnXϖGl>M:~Fɽ!d''J4?@BoޑWŭ Lf,Uo{"vA3ibuM+F9A4 9Lhۮǒèkq\rtx`suX\n& 4{3& dT— (t崔P2O5(6~q4i6q4\o% EYFBBn_eyb B6T# 9}sgHx )^{Iei5(Å*E߽(ӥ(8ib qG)ShkMzZM"%Z ]k[O])q o|U)&^aJ˻:arxS'D&'aJž&Gޑ$lUuJ;r礛~XJ\:r~ PMAvSv 6" ~zXq9;ΏAi8?ǜ'1R]*lnR_N%',,JZ}uy! J G CC7PZﮄfx!rW8p-x!qoW*s% Jػ@w2p&^gdRBteӍ2 +Ăm~ĜMXw|!TWpSIWl6PV4˲d [\USx1-%z8[G1 #-Wy⎕z(%wQdŮw-8!hr_K?aM1~S_֮v ߝkqE2L}2bX+|[c(MD( Ӌ2'i֊ %m `< >;!Tq q4LrMփ1>M ;_)b((eڹny8Mh{ @ At+")h.~nA>/}BCIAc,C kUQ8{:0:Jhu}ZƤ?@D~j  & T~鱦xHl}uz͢־ G GKil ͓MŠNw1|`d|uLs 2҂7QRJA;T͝Ӥ*2[YS5.@*@#h3[[H/2t!NoҚ%EdehK%gu57Odxhn@TWggm6r45}c1T;8ݮx'䎊hVT?E|G?yj(Y b/d,R96VWi}\3ZgZ +d vg?U֗✔U"!mp}7OiQ-͵ +TF&yFqb!ɜ,9 , ka=-oG$cX[@D=ٲy*EAV)+' U ^L>>1endstream endobj 98 0 obj 4096 endobj 102 0 obj <> stream xXr7 O#HrXkVo}x:_ .EYX!,ճՋUdmLnCx`w^'''nXĒI^E bR75몕)(.r n4F)OYn ﺷ}冣]4v5x ?C>k *zM04 ElsN i ީ;#r ;hz;]Yc/ٲR;jB Rkv Vu5mP7KLb3I;O)ʓ_2.U0n>;l6 !(k _EOG]u Й?1EȜeζl"҅jyKt:uHȖUe><XcWl&ZEjn+q?).#-Sf@SXdSjѝA<4]Z}T4bY*x20g@U]NXftPO!| CΚGdjy;Bת#?͚oHɲBa D齌ַ 1 :%GI$ǥFNM<)v6~>V~]-_Z*$Wj" d=LC6FGa۵h:kuݕi '4]y)չ{ȏ;Aܫ,m5Uޕ~C_6ahiҬqZ-f K.|6ˣ=l/suC(! &L12^U`0b׊QrJ~<,ۛ? DL}јlLO=%Ev/} *'UJ[` \>%f]=l-n\W-Y#. 2v ޼,> /Annots[32 0 R 33 0 R 36 0 R 37 0 R 38 0 R 39 0 R 40 0 R 41 0 R 42 0 R 43 0 R 44 0 R 45 0 R 46 0 R 47 0 R 48 0 R 49 0 R]/Contents 23 0 R >> endobj 52 0 obj <> /Annots[57 0 R 58 0 R 61 0 R 64 0 R 67 0 R 68 0 R 69 0 R]/Contents 53 0 R >> endobj 72 0 obj <> /Annots[75 0 R 82 0 R]/Contents 73 0 R >> endobj 85 0 obj <> /Annots[88 0 R]/Contents 86 0 R >> endobj 91 0 obj <> /Contents 92 0 R >> endobj 96 0 obj <> /Contents 97 0 R >> endobj 101 0 obj <> /Annots[104 0 R 105 0 R]/Contents 102 0 R >> endobj 3 0 obj << /Type /Pages /Kids [ 4 0 R 52 0 R 72 0 R 85 0 R 91 0 R 96 0 R 101 0 R ] /Count 7 >> endobj 6 0 obj << /Count 3 /First 7 0 R /Last 20 0 R >> endobj 1 0 obj <> endobj 8 0 obj << /Title(READ THIS!) /Dest/subsection.1.1 /Parent 7 0 R /Next 9 0 R >> endobj 9 0 obj << /Title(New features on the GUI) /Dest/subsection.1.2 /Parent 7 0 R /Prev 8 0 R /Next 10 0 R >> endobj 10 0 obj << /Title(New features on the command line) /Dest/subsection.1.3 /Parent 7 0 R /Prev 9 0 R /Next 11 0 R >> endobj 11 0 obj << /Title(CPU acceleration) /Dest/subsection.1.4 /Parent 7 0 R /Prev 10 0 R >> endobj 7 0 obj << /Title(Introduction) /Dest/section.1 /Count -4 /Parent 6 0 R /Next 12 0 R /First 8 0 R /Last 11 0 R >> endobj 13 0 obj << /Title(Getting the code) /Dest/subsection.2.1 /Parent 12 0 R /Next 14 0 R >> endobj 14 0 obj << /Title(Updating) /Dest/subsection.2.2 /Parent 12 0 R /Prev 13 0 R /Next 15 0 R >> endobj 15 0 obj << /Title(Installing) /Dest/subsection.2.3 /Parent 12 0 R /Prev 14 0 R /Next 16 0 R >> endobj 17 0 obj << /Title(Supported configurations) /Dest/subsubsection.2.4.1 /Parent 16 0 R /Next 18 0 R >> endobj 18 0 obj << /Title(Building with the Intel\(R\) Compiler) /Dest/subsubsection.2.4.2 /Parent 16 0 R /Prev 17 0 R /Next 19 0 R >> endobj 19 0 obj << /Title(Hardware-specific vectorisation using the Intel\(R\) Compiler) /Dest/subsubsection.2.4.3 /Parent 16 0 R /Prev 18 0 R >> endobj 16 0 obj << /Title(Compiling the CPU-accelerated code) /Dest/subsection.2.4 /Count -3 /Parent 12 0 R /Prev 15 0 R /First 17 0 R /Last 19 0 R >> endobj 12 0 obj << /Title(Download and install) /Dest/section.2 /Count -4 /Parent 6 0 R /Prev 7 0 R /Next 20 0 R /First 13 0 R /Last 16 0 R >> endobj 21 0 obj << /Title(Reporting bugs) /Dest/subsection.3.1 /Parent 20 0 R /Next 22 0 R >> endobj 22 0 obj << /Title(Asking questions) /Dest/subsection.3.2 /Parent 20 0 R /Prev 21 0 R >> endobj 25 0 obj <>endobj 32 0 obj <>endobj 33 0 obj <>endobj 36 0 obj <>endobj 37 0 obj <>endobj 38 0 obj <>endobj 39 0 obj <>endobj 40 0 obj <>endobj 41 0 obj <>endobj 42 0 obj <>endobj 43 0 obj <>endobj 44 0 obj <>endobj 45 0 obj <>endobj 46 0 obj <>endobj 47 0 obj <>endobj 48 0 obj <>endobj 49 0 obj <>endobj 50 0 obj <> endobj 51 0 obj <> endobj 57 0 obj <>endobj 58 0 obj <>endobj 61 0 obj <> /Subtype/Link>>endobj 64 0 obj <> /Subtype/Link>>endobj 67 0 obj <> /Subtype/Link>>endobj 68 0 obj <> /Subtype/Link>>endobj 69 0 obj <> /Subtype/Link>>endobj 70 0 obj <> endobj 71 0 obj <> endobj 75 0 obj <> /Subtype/Link>>endobj 82 0 obj <> /Subtype/Link>>endobj 83 0 obj <> endobj 84 0 obj <> endobj 88 0 obj <>endobj 89 0 obj <> endobj 90 0 obj <> endobj 94 0 obj <> endobj 95 0 obj <> endobj 99 0 obj <> endobj 100 0 obj <> endobj 104 0 obj <> /Subtype/Link>>endobj 105 0 obj <> /Subtype/Link>>endobj 106 0 obj <> endobj 107 0 obj <> endobj 78 0 obj <> endobj 119 0 obj <> endobj 76 0 obj <> endobj 65 0 obj <> endobj 62 0 obj <> endobj 120 0 obj <> endobj 59 0 obj <> endobj 55 0 obj <> endobj 34 0 obj <> endobj 121 0 obj <> endobj 30 0 obj <> endobj 122 0 obj <> endobj 28 0 obj <> endobj 26 0 obj <> endobj 80 0 obj <> endobj 79 0 obj <> endobj 108 0 obj <>stream x%O4B+V8\?Xm^֏p !A "J?k՜oUkwxyٳσ"2EJ^޶gOGPhxLS^dA!U`<(ܮn"2~mw;&h04ZmGqMK\Rz1QV)Dm0Sh8cb{gkj!-c\c23&5H9(#q1Di}}V6R 2cY" r)Ŭ7/6{om)WE 5:Ć

EX6`.fW3_ X|N{{MIJx*а4`7G^o. 5§wk}@8+oPD hR볅_<C-`,`oM)?C.4$nZz(΍φ#LŖzIw}N. ?'EX`Vxr)6{yA"ӂZ\PouÇ㳘R$N_ [H "XMz~.i?Qp7ɦ3|";Q:.|D+ h9E¤B ^ endstream endobj 77 0 obj <> endobj 109 0 obj <>stream xuPmHSa~۽tkTۍ f$\XԤ޶O508yDK͑4#$à\APD՟M!O8sx>2$(Tc(Zv(qDBRR O%+q])it{Z6U y$]TTr^[ř̂wj 6^h[XR`v6y\Mr5|7rn0;yn[j7fr&w#u!.>πP2LTHN2P?vR]ԼTSq$1ܠ]Cs>:]x3Si>-iiBkXA[RʠĪfV?^:S1H) h|$U!WUp9O~G q# Q?_xti +QUv\ xಇ +q!CDD}R{H %d 7DX9l_o 5 |SXZ{wAnп"@x /$C77m3X2iX`b043k@XКX9eo1`Akw&"K(]K0\og I9.5 CL"+NQgoɘ<}6 endstream endobj 66 0 obj <> endobj 110 0 obj <>stream xY XS׶>1(J<{CTY[P*BkEf!a &; 2CGXjZ8ԫVնjkNyンH}k_k P"HsTӟcD;qw #{ѣ=áFE"*c<2^tis6uԩs](Ez)d^58@:~~R9oʔ~ѓLt V Pls]!P pX7k<gA3G *VG_F v͇9qqlAm|p (%5t3Q(NRZT/cX(,SQd4cvMή'Ol}{IgL'({1;#$# 0S8pP 4lj@koc1chɆzT7iH4Hv4wq^Y,oZlܶv&O4 S#B 'x80~GYbx S'{,Zl" DpXn><>kGFى4ɊwY(W #}oo4JMS ʼhLLW}y|NBvb bQtV`.% u A{_^2#;w8 1 zaO'>,=l֔z3 vƹs_w.p,R C&ڟ lrR1 z@NRؾTqz9j95ik`g5,Zl&a]'هg~!."15F}ʡ9﬌_nl7ͳ?X{J[ŰXM?9ϱ}=NMōU N17LC[_Gp^ ީpվ(u)Oyك/Fm2u .rgQbL'H\Qu4jOo#^݌9afB ֙rV)n&/[I_rS|vB^JCmJ NZ_/`7W?=RU(ւK_"ӧg(9V t?δkSb_x[+QuP6&zۡDtޒGLSr؝Cʆ[Ơ%ՉueM:tR] *D?N-e]WG8tsZTůI;0&GK ' S~5 8g %KƁJU5Ҳ;kݿF.8G_[I95;L_!z&X @ %>_c>4d3 x< ϒl>Ġ08tc“3Z2g5$+gJž<:ŐhsN`SxY{'>8]Y)+2B&a .;϶RG:R2BvM(3q&6L۴1^& 4^TCL]uE󵗐^Bx-.'D9,HؔFK865y;H7U6G- 5vBy!J'-|{&(KvggTZPP dt=Qs?U:~=);iQVt9Vu9٧Bq~@"B .5_d_%6}9- ZBNKۦ[/%# &i@IRIڜlSi<OSˁ֬٣hSٴ8o_~Ao=A,1 |uHWMӮn^'~{ުipwʣGwg}eݑDm669yjuQd,>M玱[LWŒ-.Z:}gEZŹ/+'tPK#BFzۓ]0mz!ưXP9i`Lo:ւd Mృ VKrgK`zz69O^٭PV1f,+ "A+>58&#ѾN oyal*QځbծMxSQgh*oչފ|6nBڣTK!?{9@rFG|u|E'uhwFDR?AO?)s, 9%YӄJ]`^a 7.M0\ 7;#!4@%m}k+dWu SZ!-/)@ULm\qb.m÷ִt炒 Rd\%SyUqmy&1]ˢXGl^޾;}vOdK;LJtJ ~֞uA%E;pVҥ!&1.EGfp4'5l*0GWXjܽa)~p_;^qgN!5o_kLFїTUjtͽQ@q./'P4Aj&B"tnQΫ 5(&'eDk16%#9źD8vUuC.i(d^Yd!]\v.Ա &W I;A>Ϟcɂ}L{̩C&?rBLeRE*#)9_D-B.e (:nH0c vo:W?N0( D$0X_\ ǻAoC O"")B{Ը̔,.`wj&QArFQUVu0ogTU?ZpjKKO\flZzfR3F6s8>8uPpzS Œϱ/oEHZoڬ4sϮEå>IA*>4J ԬۙP#Tn]Pt,߽;SL]^T%N6N+θf ԋ/KviHbŒVvcF}eEޛQhMe= x"Ow} f9e-"A7_eR!i<#e[e=\c$ey%/[|e)9jA>j<&)T;|ex&2^_QxdO[3~쁞эp2y'3h[gA1is"t%msgɷr[Z$,-E4ϗs F!0[ endstream endobj 63 0 obj <> endobj 111 0 obj <>stream xcd`ab`dddw 441U~H3a!#Gk7s7BcG``fd/mq/,L(QHT04Q020TpM-LNSM,HM,rr3SK*4l2JJ s4u3K2RSRSJsS nӃP%E )Ey % L,?:~t^|?ę|fH7# _<&WB7smbK<y$]} endstream endobj 60 0 obj <> endobj 112 0 obj <>stream x}ViTW HӍbQQ(;FG@ݯdSPD* h(FPAe\F3f&_<)^+ _  .V`+LKFw$e2c=p N)9Ǎeg{9oZ%ZXpi{LGxKra2C H`}-'Upi9rʒ Hn/jO76ALX`U;u2T c95]xBqNz]jFCx$X9ڕ~0yW}0)dUNDI7)Νsa7Ŕh%3% eMz~wWvîhAN$Ʒ9TQv|;)8~ &-2 #KKPS^t$Bx7((&0. $t2̧}Y~8Ci<'ɿۊ9ɨD[4nn| |s>3~k>d)DŅ>^LUYh8, yv\O;ΗpyYbuщc /r{؇7KH5Ր-\}xTٟȑ8 lHtjO@.Fh\fc!+qfrp(:OHH X=-BP2O>ţC)N?4-`eEZs0]_yGZr5J+em} |jȉ?ÇkTkTC0Pɶ14ބӲx", qZ`lZ "AzxxltE:_1o:ü3:}"xF>}dg].xMbwo @uO+Ձj5Rx!o`?5Τ/Lckƫxhx U"X#X[}P{"@'V F4 YdTNϫ ' lL\mCBZurbY[p~!n l$-LFB0g_khC WEd٢EKى\FR&89>{q4>RE*-CGuPCZT[ ]2"/NF,=ʲ47 ],yf lځK׵]jyjYþ>1zR' ߇﨓]1y2 \~o;@0{rQetbt4~ӏ9~ G߄oʖTfddmەOҌZI޲0tW#Zfi A do.F!XJW]Pj|_E$)uO zt{EǰE/> X$Cz}GHp64gt6d%i)O< 6̃;v@ɧDۍ|~Ҡ Ш-( Yz \qVt6GEFbSdhD{F,\7c2i^ZV R1y3= X-zz PMOPsH0u4\>`>taX3nX7Z endstream endobj 56 0 obj <> endobj 113 0 obj <>stream xU_LSWm˽VufzoY @`ؖehaڄpX1 ڦH*vE}a1 ,{qaN,.i fYN;9D8^w{wݿG_770LPlfY>{u F6c&97wGz`w@ {vuu ސ|aJȧC *ڠ\n@z߮F5PMD~z jEW_ kr/jow$;)5LU LV ^"Έ\cizAN:B+M8G/%$̦_@byAۥ S} nڱm)l-Nӹ\BKe^q߈se" .6XTxc&]y($''% k=A+ ~3;0⃙,F$GIMqاȃgo?/:=uIg**h0m* R3 zWGI#*9ڇ B6s"Suށ߽^2c3:of RDt0lTF#;42 aS9[d?ZFai+!P=ݽZ10G551_6=y}f+}fݘM endstream endobj 35 0 obj <> endobj 114 0 obj <>stream xX 8e} Zv )$c(9Jfi"R4픡BEeJu즽ow9տϹ\X׷3DͤY 5Z@S )ʕ2tJD1JRԨHj(ES( jIiQéKQ6SRm21(VP:W[o* Q6ڜ0Hx2Vnqȸ!T *ݪUUUT\mZڳ3Jԇ穟Ro6ba 5J555kҌR֚_U#Gčg)6T@RIx'O|n̙I~g#g@݆}hYC7Sg;urԝqGu.rCvX:۔pܼEb+l,a9v"\.,+ȴSba t `O(aEr%k"[ЊOq)Zޛn 0/oj{PzƼDxe/<9b3 a[931%V2{)ΒLB!&}ªxGa` Fp :PvAV*n_l`Qڭ[7gN[Wru B>KK{exz#.>4V1x!L 0 az˥qAoU 8l%܁FL>,c)uv>̺̖FUEF̛oa -婋Yk b3T"\ vց1Q.:DZ ruX26]#a,z6ie/Yü{>sM`W7" 5F}ua(w{:7<qW [zZ8<Q+N]'.M(V~6> y2(ܣ|_(NsՠS+P% >x&fKAטVƶ:Lsoū66RWf|'{,κ5~y,p"kc[삝AÒG$'O h-7x, wg?zQ׏6<- YF* R1PMF>5wnݽZ5+pn%=B\2U%~!=FˁkP2Lb\vAbɏ4s!#Ύ-&vi vIfolzO}ږ4X/G"a5=|NX-?O'XйPQuqڝȧHz~v!Ǘ6 I_՗Vכ\ bۖ[`}ӞK߈g@wy q"t)LxUG 3.[ékHJE<OLKՏ͢u(.t]TRPv:bV:r%83uP3h/P A KNZ_!XppfXfSp8I.&LnKH,Baʐfļy߁#Pn\H(>{1|<508s.shlH_w_<x$nɎg4m r6ĺ'ToܷYh"t;K77 s38ctnW)9a7@fMAMeKS!Sַ?V޼T_~'V!X#ݛ0<"6䌴C FF;+0#uҤ}tDg?eΘ& >7̞ X 92,wq UƁ&,!S| /Aޯ&Wi=WcNX(&[EuRkx$n4[Nl/F\~Gܰ`̶5_-G@:+"2: {wg9(P4ް ػ”rQՄb<^GW^_lafvWPcnrpU:Lפ?PN>g%-yyxH>ܑSg AMfgUH6 L%!DfxK1)iu}} `-9hz~~⋫n>['D$( 2NK R_,yz@1jBG o<#$bnݼհr:~wGZ9.՟^b4T9d--tY):*v$ȾI] ueel/eQ?~e< )p (V;߇'` }lje#/Ėrk8݁"H'B { P&*p( ?9d k4K7\ JG$I$n5Z-*lvuOۜ6č V˱VWļ\ s`6 ? ;a%LQMΩӎO|sNpN i!aޓ ϸpi(m4W3@??"/aݿ꣎K^ζ|lվn0e*7})!%.@=bt\2ȁ#((êw#C MXuݓ7+Ex EaV0 ?ٜ e0f+YxG)_+J8]NvҪ '$'OFoA<&wX̺t|OԴ6ťiwbkÚd}]G  UMlI$PXN]bW >I'%\2q ؍Ú !8O (Sl9 ln X^yA̓!@0uʈNEW)ܚ]dBO?_SOu]StdAqѵV5m6^nCSA*,ƶlw_Ǟ*i^av~DIx謬b\+a8fI#*Y(,h!BT#s!StSPFÉh]2aꗲ(S:.sefM VG2аmb2qBT19g-,H?('ܒ; 1' Hv%ۦEI/%E[1O+@,C|8 Kz|}Z{ ~xe(2{7NOݶqcftwo[}Ϟ<`u #Ni?%0`{,]u!::ݯC2cl9bMMԶ:;N/iӄ>18W^\b웨A8|86wga4-DNhA҂Xc&2I!2s6vs5\Ÿ rTbMwǚXQ Wq`nd=n| vb}Dғ]xХK7og ߓ[F/-Y{6I|%b[[/^3kzeO>h<+ELHW7T,dqmU4cř{?=jcRo>op,]8sazLlD< a#W?s eI8 [\!ć!1rf)uB#s,_Mr-Qmzf88@ 4gvsʺOv xEa@]|n{vğ! O8> nWt^|˨N<5|fkWQ>x C2Τ2\E(7gғ'%h,JI L/Z0)R.Xao70Oi7`<`3Z/\dCp`J|+{!_P_Y^Xo~p%[3UO veF|B\j{8r8^3n{s;^EhO(UiW(HԆH yd endstream endobj 31 0 obj <> endobj 115 0 obj <>stream xX XSg־1xsrť.]Ѣ QA-$ABHrBHpྶӪSucZ?|<_2'ɽ|gyys%GF"tdʹcǸ> K]ķ@,ު R88ؽ'Mo0R$>0/6.I="R; e1]4dKKNu%71ҽ=J'6{lrZoFt2>*p9[0#Z$?@<E;'(?(ƅ8m6QspB]r&;]’~G>1 {Qխ&]|,9@Jc)x2L&f#䖹8~׌S$ ^ gL1학OE3'8\E@ʉ2@LylzKj5UPLyYΥ%3#ғFĈ 9;>%[vIx.=nR1QE(#Μ\A._=B8V];@4{:f}S>2) CH V(ZE^5vhC1Y0GXDØw^܃}dUCBu Svnc3#]M7SqdU 8mFK8R'՝ K|ORlmGe9ʅ$AMq 0)j¡=~Pza@pi.|e \}eՑGxQcKdcM0ĔdxG1'_4ܜ P5*;70kܡ:PDjc ysJ0Siq Z9 et _mͱɿ%}Xmv@v ^AG?wWMLtU,& Ӛ[Pjiesү"#RL}4)iRQZ!J1F?` ¸{ttR *ڰPvNz2ў]0ڴ);[ .!uuJTJ7iY[[t>؍z7Q+ԩ: {&n_k{E] ~naf3e #WfcVyrH{%ypIx\K4PFX!WJHq4o-\p*WI'E]ι|ThM𵶮%|n8T0q.oؗ;a=d֨fn"cR'yljҔMdN$6 34E[KdwQ2ǻxC nEG5P3m'ɸE]g{ uf|vRD_JVqd9d}HNl\@: s9۪SZω8܅˅HY$)5a/vMBbT~p/(&Z\- ۸ g"Mf6HKMHVND;RcVMWJG9&K$#ku9zQ\` )It@%dK*+hT3%F eVD8|1,Q SdԐ Z;>j=FP 4$xB .@M1EtM* H>3[~S짺4XtzWMR4LWPhzlNeȲ-e4vϯMCRqrŜXcLyr-S >jS߆c./93p>6#y佖8cs a_)P?T<_m"itچ-U'}scQ)3]%fYƏz9rwjP~ 8^ÙJv'ޟ*@aL½Zg%VgHm<bI$h p¶̺_x26&Q4f7ݾ cm鞀/R ^}4ڥ~?RY> *aĹksd^b> 8%x܊R8wU{! %Ȕ'pփgsZȡ ])iy͒R8kPU!gYVLhAɼcSa&aLIY h _aIK;eva-V8űSM6r5'sx~h7%tuh"[JqYąR\4Hs%B ͹f[SU:8pUH pLg-[m<}Pȩ\lW?&o3pdnR{y;,4D j2?;W}sY KfVy>'8sYLS/ܺ,EX넗#C`MYD nD{[78v9^uy[|kme)H6ghSIʔ|"t)W=/ Je'zj36uЎ&YBiCa㎝(]*_/PZsCȐaZ݆3mY?k8*>;pwtqxŮ\c-z Aȟs|~yV[_@1pXv cQce2u:S$wzO? -5i-0s;LKoy?=B>[%d58 .^D m2S]ޏY4ןbw,ROnPj[ose> 6\B#ڦ8 Xwsfuq3~*G7]]`\U[rmgFrUT[S%#uǁf̄Yv_U`l:lJS*#Ae]# Ɖ;/]EB6A>f@H> endobj 116 0 obj <>stream x%}PTe߻p]Cuﭦ-B@Td]Va >JuYd]\fZ£d|iZ .,H*L6LSM{鵱5:s(@E)cⓓz*Ϣ4dB*T'8s Neݺ;Z`fHlqޒ%%rٖk2nRb&_Lr͒]#IsÌ0-{P8W6mELQk, FY|V.X-[%MfmBA ًCeRQҡh2 FSQgD(E?QӨBEC1A}YNyA-$2=0_} $%Q\PpXE>~.=7ȕ[;~vR 8ZaEx%!I#I:_3 {.rٮyJ-uۇh9RN&:-Kh;G=eL;#dž;:R-c}݁]ۃc<57h<&uOAҝ`UYܦcb`ߌ] *"HX>UHsd] adl1%pJK{op&&`hvִ['$ ЮEEŶ\:}>?hIQ7>ݍרsC8UwxٱᗈB8|;k Rt+t_)&^Ef\z +t-ȣ]ҷc7۾dZ†57`wwir?74*m$8, MONC﻽e0DէeEܣ@zNxN Xb g92 q5ׄsi@殆8{Pz-%M-bn}Tʖ jN?0 C^n +[MPZh?uSpmQ.NP9jq2XM"jU]P|U&s endstream endobj 27 0 obj <> endobj 117 0 obj <>stream xMT PY&Ӳ0It **J!:+$!2#|͢ #HUjq ǝ׹zPVmmz~sϹ$B$)zTvdz>r!w׋S~"= i#Im>6ۨSed%ފ9KH/(u*EJDTjRb$NP)FL>{y/Ee#1XeRM& f%Q)#:VW$rmREg6TZNψUm_HZ‹# o"H$%ByDO&‰BƒHL!>!\D>D<'W#E;/W{w}TW5`k<]1MZu:თ%kU"YuA Xs8jbL<Ox@ &/dB,@w#w/K Y zVKA&e J.  A#G>JP +%y0T zYL֘7gCFB!p(;H mn\Vn68 c}Ƣvj?!_' ޯn4ԉ\1(ڒmؑSGtBU[/55bˏ| ϟcTカ#ʪt>x8eGF)i_㩯>pr z,dG/15߭خ>*8^! HZ\(6d# ʬН[yz~ uDط(Eڥ@w+wwpygǭK{%T [I% *11Ն-g/(>o 3ur yCs)DV(o%_=+|k$v(iܲz2QaЛ <#hhD_'jEGTeUԋyHD wb@}On?\Zqc d endstream endobj 81 0 obj <> endobj 118 0 obj <>stream x\CMR7$,  WTQmqCopyright (c) 1997, 2009 American Mathematical Society (), with Reserved Font Name CMR7.CMR7Computer Modern2p͋JiuP~>}L讧Ǻɋ !74/XWϡ=:4MFkgo0wCna  7 ڛ } endstream endobj 20 0 obj << /Title(Providing feedback) /Dest/section.3 /Count -2 /Parent 6 0 R /Prev 12 0 R /First 21 0 R /Last 22 0 R >> endobj 5 0 obj <>endobj 123 0 obj <>stream dvips + GPL Ghostscript 9.07 () 2018-08-08T15:55:40+01:00 2018-08-08T15:55:40+01:00 LaTeX with hyperref package ()()() endstream endobj 2 0 obj <>endobj xref 0 124 0000000000 65535 f 0000034728 00000 n 0000077005 00000 n 0000034570 00000 n 0000033194 00000 n 0000074075 00000 n 0000034672 00000 n 0000035288 00000 n 0000034870 00000 n 0000034957 00000 n 0000035070 00000 n 0000035193 00000 n 0000036250 00000 n 0000035409 00000 n 0000035505 00000 n 0000035606 00000 n 0000036099 00000 n 0000035709 00000 n 0000035818 00000 n 0000035953 00000 n 0000073946 00000 n 0000036393 00000 n 0000036487 00000 n 0000000015 00000 n 0000001899 00000 n 0000036583 00000 n 0000045013 00000 n 0000070953 00000 n 0000044693 00000 n 0000069483 00000 n 0000044130 00000 n 0000064088 00000 n 0000036625 00000 n 0000036757 00000 n 0000043463 00000 n 0000058338 00000 n 0000036895 00000 n 0000037033 00000 n 0000037171 00000 n 0000037309 00000 n 0000037442 00000 n 0000037580 00000 n 0000037717 00000 n 0000037855 00000 n 0000037992 00000 n 0000038135 00000 n 0000038278 00000 n 0000038421 00000 n 0000038552 00000 n 0000038690 00000 n 0000038828 00000 n 0000038860 00000 n 0000033475 00000 n 0000001920 00000 n 0000009576 00000 n 0000043269 00000 n 0000057148 00000 n 0000038925 00000 n 0000039058 00000 n 0000042892 00000 n 0000053974 00000 n 0000039190 00000 n 0000042656 00000 n 0000053368 00000 n 0000039388 00000 n 0000042202 00000 n 0000047497 00000 n 0000039559 00000 n 0000039731 00000 n 0000039902 00000 n 0000040073 00000 n 0000040105 00000 n 0000033694 00000 n 0000009597 00000 n 0000017082 00000 n 0000040192 00000 n 0000041899 00000 n 0000046482 00000 n 0000041581 00000 n 0000045514 00000 n 0000045361 00000 n 0000073292 00000 n 0000040375 00000 n 0000040552 00000 n 0000040584 00000 n 0000033878 00000 n 0000017103 00000 n 0000022418 00000 n 0000040704 00000 n 0000040842 00000 n 0000040874 00000 n 0000034055 00000 n 0000022439 00000 n 0000027315 00000 n 0000040950 00000 n 0000040982 00000 n 0000034217 00000 n 0000027336 00000 n 0000031504 00000 n 0000041058 00000 n 0000041090 00000 n 0000034380 00000 n 0000031525 00000 n 0000033172 00000 n 0000041145 00000 n 0000041335 00000 n 0000041504 00000 n 0000041537 00000 n 0000045740 00000 n 0000046738 00000 n 0000047948 00000 n 0000053611 00000 n 0000054295 00000 n 0000057387 00000 n 0000058841 00000 n 0000064483 00000 n 0000069742 00000 n 0000071244 00000 n 0000073502 00000 n 0000041811 00000 n 0000042803 00000 n 0000043991 00000 n 0000044608 00000 n 0000075375 00000 n trailer << /Size 124 /Root 1 0 R /Info 2 0 R /ID [<32BE351176E92AF5F9FB27E9B8B0A81A><32BE351176E92AF5F9FB27E9B8B0A81A>] >> startxref 77216 %%EOF relion-3.1.3/cmake/000077500000000000000000000000001411340063500140635ustar00rootroot00000000000000relion-3.1.3/cmake/BuildFFTW.cmake000066400000000000000000000121521411340063500166140ustar00rootroot00000000000000set(FFTW_EXTERNAL_PATH "${CMAKE_SOURCE_DIR}/external/fftw") if (NOT DEFINED TARGET_X86) try_compile(TARGET_X86 ${CMAKE_BINARY_DIR} "${CMAKE_SOURCE_DIR}/cmake/TestX86.c") endif() find_path( OWN_FFTW_INCLUDES NAMES fftw3.h PATHS ${FFTW_EXTERNAL_PATH}/include NO_DEFAULT_PATH) find_library(OWN_FFTW_SINGLE NAMES fftw3f PATHS ${FFTW_EXTERNAL_PATH}/lib NO_DEFAULT_PATH) find_library(OWN_FFTW_DOUBLE NAMES fftw3 PATHS ${FFTW_EXTERNAL_PATH}/lib NO_DEFAULT_PATH) if(OWN_FFTW_INCLUDES AND (OWN_FFTW_SINGLE OR NOT FFTW_SINGLE_REQUIRED) AND (OWN_FFTW_DOUBLE OR NOT FFTW_DOUBLE_REQUIRED)) if (OWN_FFTW_SINGLE AND FFTW_SINGLE_REQUIRED) message(STATUS "Found previously built non-system single precision FFTW libraries that will be used.") #message(STATUS "OWN_FFTW_SINGLE: ${OWN_FFTW_SINGLE}") endif() if (OWN_FFTW_DOUBLE AND FFTW_DOUBLE_REQUIRED) message(STATUS "Found previously built non-system double precision FFTW libraries that will be used.") #message(STATUS "OWN_FFTW_DOUBLE: ${OWN_FFTW_DOUBLE}") endif() set(FFTW_FOUND TRUE) set(BUILD_OWN_FFTW FALSE) set(BUILD_OWN_FFTWF FALSE) else() message(STATUS "--------------------------------------------------------") message(STATUS "------ REQUIRED FFTW LIBRARIES WHERE NOT FOUND. --------") message(STATUS "-------------- FFTW WILL BE DOWNLOADED AND -------------") message(STATUS "--------------- BUILT DURING COMPILE-TIME. -------------") message(STATUS "--------------------------------------------------------") message(STATUS "---- A WORKING INTERNET CONNECTION WILL BE REQUIRED. ---") message(STATUS "--------------------------------------------------------") set(FFTW_FOUND FALSE) set(ext_conf_flags_fft --enable-shared --prefix=${FFTW_EXTERNAL_PATH}) if(TARGET_X86) if (AMDFFTW) set(ext_conf_flags_fft ${ext_conf_flags_fft} --enable-sse2 --enable-avx --enable-avx2 --enable-amd-opt) else() set(ext_conf_flags_fft ${ext_conf_flags_fft} --enable-avx) endif() endif() set(OWN_FFTW_SINGLE ${FFTW_EXTERNAL_PATH}/lib/${CMAKE_SHARED_LIBRARY_PREFIX}fftw3${CMAKE_SHARED_LIBRARY_SUFFIX}) set(OWN_FFTW_DOUBLE ${FFTW_EXTERNAL_PATH}/lib/${CMAKE_SHARED_LIBRARY_PREFIX}fftw3f${CMAKE_SHARED_LIBRARY_SUFFIX}) set(OWN_FFTW_INCLUDES "${FFTW_EXTERNAL_PATH}/include" ) set(FFTW_PATH ${FFTW_PATH} ${FFTW_EXTERNAL_PATH}) set(FFTW_EXTERNAL_LIBS_TAR_DIRECTORY ${FFTW_EXTERNAL_PATH}) set(FFTW_EXTERNAL_LIBS_EXTRACT_TARGET ${FFTW_EXTERNAL_LIBS_TAR_DIRECTORY}) set(FFTW_FFTW3_TAR_FILE http://fftw.org/fftw-3.3.8.tar.gz) set(FFTW_MD5 8aac833c943d8e90d51b697b27d4384d) if (AMDFFTW) set(FFTW_FFTW3_TAR_FILE https://github.com/amd/amd-fftw/archive/2.2.zip) set(FFTW_MD5 2e9c59ad80ec5bd75ce04c7970c9f47a) endif() set(FFTW_FFTW3_LIB_DIR ${FFTW_EXTERNAL_LIBS_EXTRACT_TARGET}/fftw3) set(FFTW_FFTW3_BUILD_DIR ${FFTW_EXTERNAL_LIBS_EXTRACT_TARGET}/fftw3-build) include(ExternalProject) if (FFTW_SINGLE_REQUIRED) set(BUILD_OWN_FFTW TRUE) endif() if (FFTW_DOUBLE_REQUIRED) set(BUILD_OWN_FFTWF TRUE) endif() # Rather messy logic: # We build double prec here but if double prec is not required, build single prec if (NOT FFTW_DOUBLE_REQUIRED) set(ext_conf_flags_fft ${ext_conf_flags_fft} --enable-float --enable-sse) endif() externalproject_add(own_fftw_lib URL ${FFTW_FFTW3_TAR_FILE} URL_MD5 ${FFTW_MD5} DOWNLOAD_DIR ${FFTW_EXTERNAL_LIBS_TAR_DIRECTORY} SOURCE_DIR ${FFTW_FFTW3_LIB_DIR} CONFIGURE_COMMAND /configure ${ext_conf_flags_fft} INSTALL_DIR ${FFTW_EXTERNAL_PATH}/fftw3 BINARY_DIR ${FFTW_EXTERNAL_PATH}/fftw3 BUILD_COMMAND ${MAKE} # LOG_CONFIGURE # LOG_BUILD LOG_INSTALL) add_custom_command( COMMAND ${CMAKE_COMMAND} -E echo "Registering own FFTW byproducts" OUTPUT "${FFTW_EXTERNAL_PATH}/lib/libfftw3.so" DEPENDS own_fftw_lib) add_custom_target(own_fftw_lib_byproducts DEPENDS "${FFTW_EXTERNAL_PATH}/lib/libfftw3.so") # When both double and single prec are required, build single later. if (FFTW_DOUBLE_REQUIRED AND FFTW_SINGLE_REQUIRED) externalproject_add(own_fftwf_lib URL ${FFTW_FFTW3_TAR_FILE} URL_MD5 ${FFTW_MD5} DOWNLOAD_DIR ${FFTW_EXTERNAL_LIBS_TAR_DIRECTORY} SOURCE_DIR ${FFTW_FFTW3_LIB_DIR} CONFIGURE_COMMAND /configure ${ext_conf_flags_fft} --enable-float --enable-sse INSTALL_DIR ${FFTW_EXTERNAL_PATH}/fftw3 BINARY_DIR ${FFTW_EXTERNAL_PATH}/fftw3 BUILD_COMMAND ${MAKE} # LOG_CONFIGURE # LOG_BUILD LOG_INSTALL) add_dependencies(own_fftwf_lib own_fftw_lib) add_custom_command( COMMAND ${CMAKE_COMMAND} -E echo "Registering own FFTWf byproducts" OUTPUT "${FFTW_EXTERNAL_PATH}/lib/libfftw3f.so" DEPENDS own_fftwf_lib) add_custom_target(own_fftwf_lib_byproducts DEPENDS "${FFTW_EXTERNAL_PATH}/lib/libfftw3f.so") endif() endif() if (FFTW_SINGLE_REQUIRED) set(FFTW_LIBRARIES ${OWN_FFTW_SINGLE} ${FFTW_LIBRARIES}) endif() if (FFTW_DOUBLE_REQUIRED) set(FFTW_LIBRARIES ${OWN_FFTW_DOUBLE} ${FFTW_LIBRARIES}) endif() if (FFTW_INCLUDES) set(FFTW_INCLUDES ${OWN_FFTW_INCLUDES} ${FFTW_INCLUDES}) else() set(FFTW_INCLUDES ${OWN_FFTW_INCLUDES}) endif() #message(STATUS "FFTW_INCLUDES: ${FFTW_INCLUDES}") #message(STATUS "FFTW_LIBRARIES: ${FFTW_LIBRARIES}") relion-3.1.3/cmake/BuildFLTK.cmake000066400000000000000000000054601411340063500166120ustar00rootroot00000000000000 set(FLTK_EXTERNAL_PATH "${CMAKE_SOURCE_DIR}/external/fltk") set(ext_conf_flags_fltk --enable-shared --prefix=${FLTK_EXTERNAL_PATH}) ## ------------------------------------------------------------- PREVIOUS EXT LIBS? -- find_library(FLTK_LIBRARIES NAMES fltk PATHS "${FLTK_EXTERNAL_PATH}/lib" NO_DEFAULT_PATH) find_path(FLTK_INCLUDE_DIR NAMES FL/Fl.H PATHS "${FLTK_EXTERNAL_PATH}/include" NO_DEFAULT_PATH) find_path(FLTK_INCLUDES NAMES FL/Fl.H PATHS "${FLTK_EXTERNAL_PATH}/include" NO_DEFAULT_PATH) if(FLTK_INCLUDE_DIR AND FLTK_LIBRARIES) set(FLTK_FOUND TRUE) message(STATUS "Found previously built non-system FLTK libraries that will be used.") else() set(FLTK_FOUND FALSE) message(STATUS "--------------------------------------------------------") message(STATUS "-------- NO EXISTING FLTK LIBRARIES WHERE FOUND. -------") message(STATUS "-------------- FLTK WILL BE DOWNLOADED AND -------------") message(STATUS "--------------- BUILT DURING COMPILE-TIME. -------------") message(STATUS "--------------------------------------------------------") message(STATUS "---- A WORKING INTERNET CONNECTION WILL BE REQUIRED. ---") message(STATUS "--------------------------------------------------------") endif() ## ----------------------------------------------------------------- NEW EXT LIBS? -- if(NOT FLTK_FOUND) set(FLTK_LIBRARIES "${FLTK_EXTERNAL_PATH}/lib/${CMAKE_SHARED_LIBRARY_PREFIX}fltk${CMAKE_SHARED_LIBRARY_SUFFIX}" ) set(FLTK_INCLUDE_DIR "${FLTK_EXTERNAL_PATH}/include" ) include(ExternalProject) set(FLTK_EXTERNAL_LIBS_TAR_DIRECTORY ${FLTK_EXTERNAL_PATH}) set(FLTK_EXTERNAL_LIBS_EXTRACT_TARGET ${FLTK_EXTERNAL_LIBS_TAR_DIRECTORY}) message(STATUS "no previous fltk found, the following paths are set for libs/headers TO BE built") set(FLTK_TAR_FILE ftp://ftp.mrc-lmb.cam.ac.uk/pub/scheres/fltk-1.3.5-source.tar.gz) # FLTK 1.3.5 set(FLTK_HASH e85017defd5a03ae82e634311db87bbf) set(FLTK_TAR_NAME fltk-1.3.5-source.tar.gz) set(FLTK_LIB_DIR ${FLTK_EXTERNAL_LIBS_EXTRACT_TARGET}/fltk) set(FLTK_BUILD_DIR ${FLTK_EXTERNAL_LIBS_EXTRACT_TARGET}/fltk-build) #set(CMAKE_INSTALL_PREFIX ${FLTK_EXTERNAL_PATH}) externalproject_add(OWN_FLTK URL ${FLTK_TAR_FILE} # TIMEOUT 15 URL_MD5 ${FLTK_HASH} DOWNLOAD_DIR ${FLTK_EXTERNAL_LIBS_TAR_DIRECTORY} DOWNLOAD_NAME ${FLTK_TAR_NAME} SOURCE_DIR ${FLTK_LIB_DIR} CONFIGURE_COMMAND /configure ${ext_conf_flags_fltk} INSTALL_DIR ${FLTK_EXTERNAL_PATH}/fltk BINARY_DIR ${FLTK_EXTERNAL_PATH}/fltk BUILD_COMMAND ${MAKE} # LOG_CONFIGURE # LOG_BUILD LOG_INSTALL) set(BUILD_OWN_FLTK TRUE) else() set(BUILD_OWN_FLTK FALSE) endif() message(STATUS "FLTK_INCLUDE_DIR: ${FLTK_INCLUDE_DIR}") message(STATUS "FLTK_LIBRARIES: ${FLTK_LIBRARIES}") relion-3.1.3/cmake/BuildTBB.cmake000066400000000000000000000053631411340063500164630ustar00rootroot00000000000000 set(TBB_PREFIX tbb2018U3) set(TBB_EXTERNAL_PATH "${CMAKE_SOURCE_DIR}/external/tbb") ## ------------------------------------------------------------- PREVIOUS EXT LIBS? -- find_library(TBB_TEST_LIB NAMES tbb PATHS "${TBB_EXTERNAL_PATH}/${TBB_PREFIX}/build/${TBB_PREFIX}_release" NO_DEFAULT_PATH) find_path(TBB_TEST_INCLUDES NAMES tbb/tbb.h PATHS "${TBB_EXTERNAL_PATH}/${TBB_PREFIX}/include" NO_DEFAULT_PATH) if(TBB_TEST_LIB AND TBB_TEST_INCLUDES) set(TBB_FOUND TRUE) message(STATUS "Found previously built non-system TBB libraries that will be used.") else() message(STATUS "TBB_TEST_LIB: ${TBB_TEST_LIB}") message(STATUS "TBB_TEST_INCLUDES: ${TBB_TEST_INCLUDES}") set(TBB_FOUND FALSE) message(STATUS "--------------------------------------------------------") message(STATUS "-------- NO EXISTING TBB LIBRARIES WHERE FOUND. -------") message(STATUS "-------------- TBB WILL BE DOWNLOADED AND -------------") message(STATUS "--------------- BUILT DURING COMPILE-TIME. -------------") message(STATUS "--------------------------------------------------------") message(STATUS "---- A WORKING INTERNET CONNECTION WILL BE REQUIRED. ---") message(STATUS "--------------------------------------------------------") endif() ## ----------------------------------------------------------------- NEW EXT LIBS? -- if(NOT TBB_FOUND) message(STATUS "no previous tbb found, the following paths are set for libs/headers TO BE built") include(ExternalProject) set(TBB_URL https://github.com/oneapi-src/oneTBB/archive/2018_U3.tar.gz) # TBB 2018 U3 set(TBB_URL_MD5 6a5b327fc86e2cd259f43af9322fdf42) set(TBB_TAR_NAME tbb-2018_U3.tar.gz) ExternalProject_Add(OWN_TBB URL ${TBB_URL} URL_MD5 ${TBB_URL_MD5} DOWNLOAD_DIR ${TBB_EXTERNAL_PATH} DOWNLOAD_NAME ${TBB_TAR_NAME} SOURCE_DIR "${TBB_EXTERNAL_PATH}/${TBB_PREFIX}" CONFIGURE_COMMAND "" BUILD_COMMAND make tbb_build_prefix=${TBB_PREFIX} BUILD_IN_SOURCE 1 INSTALL_COMMAND "" LOG_DOWNLOAD 1 LOG_BUILD 1 ) set(BUILD_OWN_TBB TRUE) else(NOT TBB_FOUND) set(BUILD_OWN_TBB FALSE) endif(NOT TBB_FOUND) set(TBB_INCLUDE_DIRS "${TBB_EXTERNAL_PATH}/${TBB_PREFIX}/include" ) include_directories("${TBB_INCLUDE_DIRS}") # in release mode set(TBB_LIBRARY_DIRS ${TBB_EXTERNAL_PATH}/${TBB_PREFIX}/build/${TBB_PREFIX}_release) link_directories(${TBB_LIBRARY_DIRS}) set(TBB_LIBRARIES tbb tbbmalloc) # in debug mode #set(TBB_LIBRARY_DIRS ${TBB_EXTERNAL_PATH}/build/${TBB_PREFIX}_debug) #link_directories(${TBB_LIBRARY_DIR}) #set(TBB_LIBRARIES tbb_debug tbbmalloc_debug) install(DIRECTORY ${TBB_LIBRARY_DIRS}/ DESTINATION lib USE_SOURCE_PERMISSIONS FILES_MATCHING PATTERN "*.so*") message(STATUS "TBB_INCLUDE_DIRS: ${TBB_INCLUDE_DIRS}") message(STATUS "TBB_LIBRARY_DIRS: ${TBB_LIBRARY_DIRS}") relion-3.1.3/cmake/BuildTypes.cmake000066400000000000000000000226711411340063500171610ustar00rootroot00000000000000# Extra flags defined on each build type (this file is all optional to include) # # Because gcc is compliant with a float128 type, fftw has become as well. nvcc is NOT. # So -D__INTEL_COMPILER just manages to avoid compiling float128-targets (see fftw3.h, for instance). # Add -G to allow cuda-gdb to break inside kernels. set(EXTRA_NVCC_FLAGS "-D__INTEL_COMPILER --default-stream per-thread") if(MDT_TYPE_CHECK) # Unfortunately -std=c++0x is not supported. -Xcompiler=-std=c++0x also does not work. # This flag is only for developers, so we don't have to worry about old compilers. set(EXTRA_NVCC_FLAGS "${EXTRA_NVCC_FLAGS} -std=c++11") endif() #if(OPENMP_FOUND) # set(EXTRA_NVCC_FLAGS "${EXTRA_NVCC_FLAGS} -fopenmp") #endif() set(RELION_NVCC_FLAGS "${CUDARCH} ${WARN_DBL} ${EXTRA_NVCC_FLAGS}" CACHE STRING "" FORCE) #message(STATUS "RELION_NVCC_FLAGS: ${RELION_NVCC_FLAGS}") # -------------------------- # Debug BUILD # -------------------------- # Additional useful nvcc-flags for debugging # # -keep Keep all intermediate files that are generated during internal compilation steps. # --resource-usage how resource usage such as registers and memeory of the GPU code. This option implies # --nvlink-options=--verbose when --relocatable-device-code=true is set. Otherwise, # it implies --ptxas-options=--verbose. # -- Compiler flags ------------------------------------------------- set(RELION_FLAGS_DEBUG "-O0" CACHE STRING "") set(RELION_NVCC_FLAGS_DEBUG "${RELION_NVCC_FLAGS}" CACHE STRING "") # -- Linker flags --------------------------------------------------- set(RELION_LINKER_FLAGS_DEBUG " ") # -- Append compiler and linker flags ------------------------------- set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} ${RELION_FLAGS_DEBUG}" CACHE STRING "") set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} ${RELION_FLAGS_DEBUG}" CACHE STRING "") set(CMAKE_EXE_LINKER_FLAGS_DEBUG "${CMAKE_EXE_LINKER_FLAGS_DEBUG} ${RELION_LINKER_FLAGS_DEBUG}" CACHE STRING "") set(CUDA_NVCC_FLAGS_DEBUG "${RELION_NVCC_FLAGS_DEBUG}" CACHE STRING "") # -- Add preprocessor defintions ------------------------------------ set(RELION_DEFINITIONS_DEBUG "-DDEBUG_CUDA") set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} ${RELION_DEFINITIONS_DEBUG}") #message(STATUS "Set the extra flags for Debug build type") #message(STATUS "RELION_NVCC_FLAGS_DEBUG : ${RELION_NVCC_FLAGS_DEBUG}") #message(STATUS "CUDA_NVCC_FLAGS_DEBUG : ${CUDA_NVCC_FLAGS_DEBUG}") #message(STATUS "CMAKE_CXX_FLAGS_DEBUG : ${CMAKE_CXX_FLAGS_DEBUG}") #-------------------------------------------------------------------- # -------------------------- # RELWITHDEBINFO BUILD # -------------------------- # -- Compiler flags ------------------------------------------------- set(RELION_NVCC_FLAGS_RELWITHDEBINFO "${RELION_NVCC_FLAGS}" CACHE STRING "") # -- Linker flags --------------------------------------------------- set(RELION_LINKER_FLAGS_RELWITHDEBINFO " ") # -- Append compiler and linker flags ------------------------------- set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} ${RELION_FLAGS_RELWITHDEBINFO}" CACHE STRING "") set(CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_C_FLAGS_RELWITHDEBINFO} ${RELION_FLAGS_RELWITHDEBINFO}" CACHE STRING "") set(CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO "${CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO} ${RELION_LINKER_FLAGS_RELWITHDEBINFO}" CACHE STRING "") set(CUDA_NVCC_FLAGS_RELWITHDEBINFO "${RELION_NVCC_FLAGS_RELWITHDEBINFO}" CACHE STRING "") # -- Add preprocessor defintions ------------------------------------ set(RELION_DEFINITIONS_RELWITHDEBINFO "-DDEBUG_CUDA") #message(STATUS "Set the extra flags for RELWITHDEBINFO build type") #message(STATUS "RELION_NVCC_FLAGS_RELWITHDEBINFO : ${RELION_NVCC_FLAGS_RELWITHDEBINFO}") #message(STATUS "CUDA_NVCC_FLAGS_RELWITHDEBINFO : ${CUDA_NVCC_FLAGS_RELWITHDEBINFO}") #message(STATUS "CMAKE_CXX_FLAGS_RELWITHDEBINFO : ${CMAKE_CXX_FLAGS_RELWITHDEBINFO}") #-------------------------------------------------------------------- # -------------------------- # Release BUILD # -------------------------- # Additional useful nvcc-flags for optimization # # --use_fast_math # --prec-div This option controls single-precision floating-point division and reciprocals. # --prec-div=true enables the IEEE round-to-nearest mode and --prec-div=false enables # the fast approximation mode. --use_fast_math implies --prec-div=false. # --prec-sqrt -||- sqrt # --fmad This option enables (disables) the contraction of floating-point multiplies and # adds/subtracts into floating-point multiply-add operations (FMAD, FFMA, or DFMA). # --use_fast_math implies --fmad=true. # --restrict Programmer assertion that all kernel pointer parameters are restrict pointers. # -- Compiler flags ------------------------------------------------- set(RELION_FLAGS_RELEASE "" CACHE STRING "") set(RELION_NVCC_FLAGS_RELEASE "${RELION_NVCC_FLAGS} --disable-warnings" CACHE STRING "") # -- Linker flags --------------------------------------------------- set(RELION_LINKER_FLAGS_RELEASE "") # -- Append compiler and linker flags ------------------------------- #message(STATUS "CCF_RELEASE : ${CMAKE_CXX_FLAGS_RELEASE}") set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} ${RELION_FLAGS_RELEASE}" CACHE STRING "") set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} ${RELION_FLAGS_RELEASE}" CACHE STRING "") set(CMAKE_EXE_LINKER_FLAGS_RELEASE "${CMAKE_EXE_LINKER_FLAGS_RELEASE} ${RELION_LINKER_FLAGS_RELEASE}" CACHE STRING "") set(CUDA_NVCC_FLAGS_RELEASE "${RELION_NVCC_FLAGS_RELEASE}" CACHE STRING "") # -- Add preprocessor defintions ------------------------------------ set(RELION_DEFINITIONS_RELEASE "") set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} ${RELION_DEFINITIONS_RELEASE}") #message(STATUS "RELION_FLAGS_PROFILING : ${RELION_FLAGS_PROFILING}") #message(STATUS "CMAKE_CXX_FLAGS_PROFILING : ${CMAKE_CXX_FLAGS_PROFILING}") #-------------------------------------------------------------------- # ---------------------------------- # NVIDIA Profiling BUILD # (Release for nvprof) # ---------------------------------- # ** NOTE: this will not have overall Release perf. ** # Additional useful nvcc-flags for profiling # # -pg gprof profiling output (needs linker flag) # --resource-usage how resource usage such as registers and memeory of the GPU code. This option implies # --nvlink-options=--verbose when --relocatable-device-code=true is set. Otherwise, # it implies --ptxas-options=--verbose# # -- Compiler flags ------------------------------------------------- set(RELION_FLAGS_PROFILING "" CACHE STRING "") set(RELION_NVCC_FLAGS_PROFILING "${RELION_NVCC_FLAGS} -lineinfo" CACHE STRING "") # -- Linker flags --------------------------------------------------- set(RELION_LINKER_FLAGS_PROFILING "") # -- Append compiler and linker flags ------------------------------- set(CMAKE_CXX_FLAGS_PROFILING "${CMAKE_CXX_FLAGS_RELEASE} ${RELION_FLAGS_PROFILING}" CACHE STRING "") set(CMAKE_C_FLAGS_PROFILING "${CMAKE_C_FLAGS_RELEASE} ${RELION_FALAGS_PROFILING}" CACHE STRING "") set(CMAKE_EXE_LINKER_FLAGS_PROFILING "${CMAKE_EXE_LINKER_FLAGS_RELEASE} ${RELION_LINKER_FLAGS_PROFILING}" CACHE STRING "") set(CUDA_NVCC_FLAGS_PROFILING "${RELION_NVCC_FLAGS_PROFILING}" CACHE STRING "") # -- Add preprocessor defintions ------------------------------------ set(RELION_DEFINITIONS_PROFILING "-DCUDA_PROFILING") set(CMAKE_CXX_FLAGS_PROFILING "${CMAKE_CXX_FLAGS_PROFILING} ${RELION_DEFINITIONS_PROFILING}") #message(STATUS "RELION_FLAGS_PROFILING : ${RELION_FLAGS_PROFILING}") #message(STATUS "CMAKE_CXX_FLAGS_PROFILING : ${CMAKE_CXX_FLAGS_PROFILING}") #-------------------------------------------------------------------- # ---------------------------------- # Benchmarking BUILD # (Release with profiling output) # ---------------------------------- # -- Compiler flags ------------------------------------------------- set(RELION_FLAGS_BENCHMARKING "" CACHE STRING "") set(RELION_NVCC_FLAGS_BENCHMARKING "${RELION_NVCC_FLAGS} " CACHE STRING "") # -- Linker flags --------------------------------------------------- set(RELION_LINKER_FLAGS_BENCHMARKING "") # -- Append compiler and linker flags ------------------------------- set(CMAKE_CXX_FLAGS_BENCHMARKING "${CMAKE_CXX_FLAGS_RELEASE} ${RELION_FLAGS_BENCHMARKING}" CACHE STRING "" FORCE) set(CMAKE_C_FLAGS_BENCHMARKING "${CMAKE_C_FLAGS_RELEASE} ${RELION_FLAGS_BENCHMARKING}" CACHE STRING "" FORCE) set(CMAKE_EXE_LINKER_FLAGS_BENCHMARKING "${CMAKE_EXE_LINKER_FLAGS_RELEASE} ${RELION_LINKER_FLAGS_BENCHMARKING}" CACHE STRING "" FORCE) set(CUDA_NVCC_FLAGS_BENCHMARKING "${RELION_NVCC_FLAGS_BENCHMARKING}" CACHE STRING "" FORCE) # -- Add preprocessor defintions ------------------------------------ set(RELION_DEFINITIONS_BENCHMARKING "-DCUDA_BENCHMARK -DTIMING") set(CMAKE_CXX_FLAGS_BENCHMARKING "${CMAKE_CXX_FLAGS_BENCHMARKING} ${RELION_DEFINITIONS_BENCHMARKING}") #-------------------------------------------------------------------- relion-3.1.3/cmake/FindFFTW.cmake000066400000000000000000000037701411340063500164430ustar00rootroot00000000000000# CMake helper to locate the needed libraries and headers # for compilation of RELION binaries. # set(LIB_PATHFFT $ENV{FFTW_LIB}) set(INC_PATHFFT $ENV{FFTW_INCLUDE}) unset(FFTW_PATH CACHE) unset(FFTW_INCLUDES CACHE) unset(FFTW_LIBRARIES CACHE) if(DEFINED ENV{FFTW_INCLUDE}) find_path(FFTW_PATH NAMES fftw3.h PATHS ${INC_PATHFFT} ) find_path(FFTW_INCLUDES NAMES fftw3.h PATHS ${INC_PATHFFT} ) else() find_path(FFTW_PATH NAMES fftw3.h ) find_path(FFTW_INCLUDES NAMES fftw3.h ) endif() find_library(_FFTW_SINGLE NAMES fftw3f PATHS ${LIB_PATHFFT} $ENV{FFTW_LIB} $ENV{FFTW_HOME} ) find_library(_FFTW_DOUBLE NAMES fftw3 PATHS ${LIB_PATHFFT} $ENV{FFTW_LIB} $ENV{FFTW_HOME} ) if (FFTW_PATH AND FFTW_INCLUDES AND (_FFTW_SINGLE OR NOT FFTW_FIND_REQUIRED_SINGLE) AND (_FFTW_DOUBLE OR NOT FFTW_FIND_REQUIRED_DOUBLE)) set(FFTW_FOUND TRUE) if (_FFTW_SINGLE) set(FFTW_LIBRARIES ${FFTW_LIBRARIES} ${_FFTW_SINGLE}) endif() if (_FFTW_DOUBLE) set(FFTW_LIBRARIES ${FFTW_LIBRARIES} ${_FFTW_DOUBLE}) endif() message(STATUS "Found FFTW") message(STATUS "FFTW_PATH: ${FFTW_PATH}") message(STATUS "FFTW_INCLUDES: ${FFTW_INCLUDES}") message(STATUS "FFTW_LIBRARIES: ${FFTW_LIBRARIES}") else() set(FFTW_FOUND FALSE) #message(STATUS "FFTW_PATH: ${FFTW_PATH}") #message(STATUS "FFTW_INCLUDES: ${FFTW_INCLUDES}") #message(STATUS "_FFTW_SINGLE: ${_FFTW_SINGLE}") #message(STATUS "FFTW_FIND_REQUIRED_SINGLE: ${FFTW_FIND_REQUIRED_SINGLE}") #message(STATUS "_FFTW_DOUBLE: ${_FFTW_DOUBLE}") #message(STATUS "FFTW_FIND_REQUIRED_DOUBLE: ${FFTW_FIND_REQUIRED_DOUBLE}") if(NOT _FFTW_DOUBLE AND FFTW_FIND_REQUIRED_SINGLE) message(STATUS "Single-precision FFTW was required but NOT found") endif() if(NOT _FFTW_SINGLE AND FFTW_FIND_REQUIRED_DOUBLE) message(STATUS "Double-precision FFTW was required but NOT found") endif() endif() if(FFTW_FIND_REQUIRED AND NOT FFTW_FOUND) message( FATAL_ERROR "The required FFTW libraries were not found." ) endif(FFTW_FIND_REQUIRED AND NOT FFTW_FOUND) relion-3.1.3/cmake/FindTBB.cmake000066400000000000000000000275541411340063500163120ustar00rootroot00000000000000# The MIT License (MIT) # # Copyright (c) 2015 Justus Calvin # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in all # copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. # # FindTBB # ------- # # Find TBB include directories and libraries. # # Usage: # # find_package(TBB [major[.minor]] [EXACT] # [QUIET] [REQUIRED] # [[COMPONENTS] [components...]] # [OPTIONAL_COMPONENTS components...]) # # where the allowed components are tbbmalloc and tbb_preview. Users may modify # the behavior of this module with the following variables: # # * TBB_ROOT_DIR - The base directory the of TBB installation. # * TBB_INCLUDE_DIR - The directory that contains the TBB headers files. # * TBB_LIBRARY - The directory that contains the TBB library files. # * TBB__LIBRARY - The path of the TBB the corresponding TBB library. # These libraries, if specified, override the # corresponding library search results, where # may be tbb, tbb_debug, tbbmalloc, tbbmalloc_debug, # tbb_preview, or tbb_preview_debug. # * TBB_USE_DEBUG_BUILD - The debug version of tbb libraries, if present, will # be used instead of the release version. # # Users may modify the behavior of this module with the following environment # variables: # # * TBB_INSTALL_DIR # * TBBROOT # * LIBRARY_PATH # # This module will set the following variables: # # * TBB_FOUND - Set to false, or undefined, if we haven’t found, or # don’t want to use TBB. # * TBB__FOUND - If False, optional part of TBB sytem is # not available. # * TBB_VERSION - The full version string # * TBB_VERSION_MAJOR - The major version # * TBB_VERSION_MINOR - The minor version # * TBB_INTERFACE_VERSION - The interface version number defined in # tbb/tbb_stddef.h until oneAPI, or in # tbb/version.h under oneAPI # * TBB__LIBRARY_RELEASE - The path of the TBB release version of # , where may be tbb, tbb_debug, # tbbmalloc, tbbmalloc_debug, tbb_preview, or # tbb_preview_debug. # * TBB__LIBRARY_DEGUG - The path of the TBB release version of # , where may be tbb, tbb_debug, # tbbmalloc, tbbmalloc_debug, tbb_preview, or # tbb_preview_debug. # # The following varibles should be used to build and link with TBB: # # * TBB_INCLUDE_DIRS - The include directory for TBB. # * TBB_LIBRARIES - The libraries to link against to use TBB. # * TBB_LIBRARIES_RELEASE - The release libraries to link against to use TBB. # * TBB_LIBRARIES_DEBUG - The debug libraries to link against to use TBB. # * TBB_DEFINITIONS - Definitions to use when compiling code that uses # TBB. # * TBB_DEFINITIONS_RELEASE - Definitions to use when compiling release code that # uses TBB. # * TBB_DEFINITIONS_DEBUG - Definitions to use when compiling debug code that # uses TBB. # # This module will also create the "tbb" target that may be used when building # executables and libraries. include(FindPackageHandleStandardArgs) if(NOT TBB_FOUND) ################################## # Check the build type ################################## if(NOT DEFINED TBB_USE_DEBUG_BUILD) if(CMAKE_BUILD_TYPE MATCHES "(Debug|DEBUG|debug|RelWithDebInfo|RELWITHDEBINFO|relwithdebinfo)") set(TBB_BUILD_TYPE DEBUG) else() set(TBB_BUILD_TYPE RELEASE) endif() elseif(TBB_USE_DEBUG_BUILD) set(TBB_BUILD_TYPE DEBUG) else() set(TBB_BUILD_TYPE RELEASE) endif() ################################## # Set the TBB search directories ################################## # Define search paths based on user input and environment variables set(TBB_SEARCH_DIR ${TBB_ROOT_DIR} $ENV{TBB_INSTALL_DIR} $ENV{TBBROOT}) # Define the search directories based on the current platform if(CMAKE_SYSTEM_NAME STREQUAL "Windows") set(TBB_DEFAULT_SEARCH_DIR "C:/Program Files/Intel/TBB" "C:/Program Files (x86)/Intel/TBB") # Set the target architecture if(CMAKE_SIZEOF_VOID_P EQUAL 8) set(TBB_ARCHITECTURE "intel64") else() set(TBB_ARCHITECTURE "ia32") endif() # Set the TBB search library path search suffix based on the version of VC if(WINDOWS_STORE) set(TBB_LIB_PATH_SUFFIX "lib/${TBB_ARCHITECTURE}/vc11_ui") elseif(MSVC14) set(TBB_LIB_PATH_SUFFIX "lib/${TBB_ARCHITECTURE}/vc14") elseif(MSVC12) set(TBB_LIB_PATH_SUFFIX "lib/${TBB_ARCHITECTURE}/vc12") elseif(MSVC11) set(TBB_LIB_PATH_SUFFIX "lib/${TBB_ARCHITECTURE}/vc11") elseif(MSVC10) set(TBB_LIB_PATH_SUFFIX "lib/${TBB_ARCHITECTURE}/vc10") endif() # Add the library path search suffix for the VC independent version of TBB list(APPEND TBB_LIB_PATH_SUFFIX "lib/${TBB_ARCHITECTURE}/vc_mt") elseif(CMAKE_SYSTEM_NAME STREQUAL "Darwin") # OS X set(TBB_DEFAULT_SEARCH_DIR "/opt/intel/tbb") # TODO: Check to see which C++ library is being used by the compiler. if(NOT ${CMAKE_SYSTEM_VERSION} VERSION_LESS 13.0) # The default C++ library on OS X 10.9 and later is libc++ set(TBB_LIB_PATH_SUFFIX "lib/libc++" "lib") else() set(TBB_LIB_PATH_SUFFIX "lib") endif() elseif(CMAKE_SYSTEM_NAME STREQUAL "Linux") # Linux set(TBB_DEFAULT_SEARCH_DIR "/opt/intel/tbb" "/usr/include/tbb") # TODO: Check compiler version to see the suffix should be /gcc4.1 or # /gcc4.1. For now, assume that the compiler is more recent than # gcc 4.4.x or later. # if(CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64") # set(TBB_LIB_PATH_SUFFIX "lib/intel64/gcc4.4") # elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^i.86$") # set(TBB_LIB_PATH_SUFFIX "lib/ia32/gcc4.4") # endif() endif() ################################## # Find the TBB include dir ################################## find_path(TBB_INCLUDE_DIRS tbb/tbb.h HINTS ${TBB_INCLUDE_DIR} ${TBB_SEARCH_DIR} PATHS ${TBB_DEFAULT_SEARCH_DIR} PATH_SUFFIXES include) ################################## # Set version strings ################################## if(TBB_INCLUDE_DIRS) if(EXISTS "${TBB_INCLUDE_DIRS}/tbb/tbb_stddef.h") file(READ "${TBB_INCLUDE_DIRS}/tbb/tbb_stddef.h" _tbb_version_file) else() file(READ "${TBB_INCLUDE_DIRS}/oneapi/tbb/version.h" _tbb_version_file) endif() string(REGEX REPLACE ".*#define TBB_VERSION_MAJOR ([0-9]+).*" "\\1" TBB_VERSION_MAJOR "${_tbb_version_file}") string(REGEX REPLACE ".*#define TBB_VERSION_MINOR ([0-9]+).*" "\\1" TBB_VERSION_MINOR "${_tbb_version_file}") string(REGEX REPLACE ".*#define TBB_INTERFACE_VERSION ([0-9]+).*" "\\1" TBB_INTERFACE_VERSION "${_tbb_version_file}") set(TBB_VERSION "${TBB_VERSION_MAJOR}.${TBB_VERSION_MINOR}") endif() ################################## # Find TBB components ################################## if(TBB_VERSION VERSION_LESS 4.3) set(TBB_SEARCH_COMPOMPONENTS tbb_preview tbbmalloc tbb) else() set(TBB_SEARCH_COMPOMPONENTS tbb_preview tbbmalloc_proxy tbbmalloc tbb) endif() # Find each component foreach(_comp ${TBB_SEARCH_COMPOMPONENTS}) if(";${TBB_FIND_COMPONENTS};tbb;" MATCHES ";${_comp};") # Search for the libraries find_library(TBB_${_comp}_LIBRARY_RELEASE ${_comp} HINTS ${TBB_LIBRARY} ${TBB_SEARCH_DIR} PATHS ${TBB_DEFAULT_SEARCH_DIR} ENV LIBRARY_PATH PATH_SUFFIXES ${TBB_LIB_PATH_SUFFIX}) find_library(TBB_${_comp}_LIBRARY_DEBUG ${_comp}_debug HINTS ${TBB_LIBRARY} ${TBB_SEARCH_DIR} PATHS ${TBB_DEFAULT_SEARCH_DIR} ENV LIBRARY_PATH PATH_SUFFIXES ${TBB_LIB_PATH_SUFFIX}) if(TBB_${_comp}_LIBRARY_DEBUG) list(APPEND TBB_LIBRARIES_DEBUG "${TBB_${_comp}_LIBRARY_DEBUG}") endif() if(TBB_${_comp}_LIBRARY_RELEASE) list(APPEND TBB_LIBRARIES_RELEASE "${TBB_${_comp}_LIBRARY_RELEASE}") endif() if(TBB_${_comp}_LIBRARY_${TBB_BUILD_TYPE} AND NOT TBB_${_comp}_LIBRARY) set(TBB_${_comp}_LIBRARY "${TBB_${_comp}_LIBRARY_${TBB_BUILD_TYPE}}") endif() if(TBB_${_comp}_LIBRARY AND EXISTS "${TBB_${_comp}_LIBRARY}") set(TBB_${_comp}_FOUND TRUE) else() set(TBB_${_comp}_FOUND FALSE) endif() # Mark internal variables as advanced mark_as_advanced(TBB_${_comp}_LIBRARY_RELEASE) mark_as_advanced(TBB_${_comp}_LIBRARY_DEBUG) mark_as_advanced(TBB_${_comp}_LIBRARY) endif() endforeach() ################################## # Set compile flags and libraries ################################## set(TBB_DEFINITIONS_RELEASE "") set(TBB_DEFINITIONS_DEBUG "-DTBB_USE_DEBUG=1") if(TBB_LIBRARIES_${TBB_BUILD_TYPE}) set(TBB_DEFINITIONS "${TBB_DEFINITIONS_${TBB_BUILD_TYPE}}") set(TBB_LIBRARIES "${TBB_LIBRARIES_${TBB_BUILD_TYPE}}") elseif(TBB_LIBRARIES_RELEASE) set(TBB_DEFINITIONS "${TBB_DEFINITIONS_RELEASE}") set(TBB_LIBRARIES "${TBB_LIBRARIES_RELEASE}") elseif(TBB_LIBRARIES_DEBUG) set(TBB_DEFINITIONS "${TBB_DEFINITIONS_DEBUG}") set(TBB_LIBRARIES "${TBB_LIBRARIES_DEBUG}") endif() find_package_handle_standard_args(TBB REQUIRED_VARS TBB_INCLUDE_DIRS TBB_LIBRARIES HANDLE_COMPONENTS VERSION_VAR TBB_VERSION) ################################## # Create targets ################################## if(NOT CMAKE_VERSION VERSION_LESS 3.0 AND TBB_FOUND) add_library(tbb SHARED IMPORTED) set_target_properties(tbb PROPERTIES INTERFACE_INCLUDE_DIRECTORIES ${TBB_INCLUDE_DIRS} IMPORTED_LOCATION ${TBB_LIBRARIES}) if(TBB_LIBRARIES_RELEASE AND TBB_LIBRARIES_DEBUG) set_target_properties(tbb PROPERTIES INTERFACE_COMPILE_DEFINITIONS "$<$,$>:TBB_USE_DEBUG=1>" IMPORTED_LOCATION_DEBUG ${TBB_LIBRARIES_DEBUG} IMPORTED_LOCATION_RELWITHDEBINFO ${TBB_LIBRARIES_DEBUG} IMPORTED_LOCATION_RELEASE ${TBB_LIBRARIES_RELEASE} IMPORTED_LOCATION_MINSIZEREL ${TBB_LIBRARIES_RELEASE} ) elseif(TBB_LIBRARIES_RELEASE) set_target_properties(tbb PROPERTIES IMPORTED_LOCATION ${TBB_LIBRARIES_RELEASE}) else() set_target_properties(tbb PROPERTIES INTERFACE_COMPILE_DEFINITIONS "${TBB_DEFINITIONS_DEBUG}" IMPORTED_LOCATION ${TBB_LIBRARIES_DEBUG} ) endif() endif() mark_as_advanced(TBB_INCLUDE_DIRS TBB_LIBRARIES) unset(TBB_ARCHITECTURE) unset(TBB_BUILD_TYPE) unset(TBB_LIB_PATH_SUFFIX) unset(TBB_DEFAULT_SEARCH_DIR) endif() relion-3.1.3/cmake/GetGitRevisionDescription.cmake000066400000000000000000000115431411340063500221770ustar00rootroot00000000000000# - Returns a version string from Git # # These functions force a re-configure on each git commit so that you can # trust the values of the variables in your build system. # # get_git_head_revision( [ ...]) # # Returns the refspec and sha hash of the current head revision # # git_describe( [ ...]) # # Returns the results of git describe on the source tree, and adjusting # the output so that it tests false if an error occurs. # # git_get_exact_tag( [ ...]) # # Returns the results of git describe --exact-match on the source tree, # and adjusting the output so that it tests false if there was no exact # matching tag. # # git_local_changes() # # Returns either "CLEAN" or "DIRTY" with respect to uncommitted changes. # Uses the return code of "git diff-index --quiet HEAD --". # Does not regard untracked files. # # Requires CMake 2.6 or newer (uses the 'function' command) # # Original Author: # 2009-2010 Ryan Pavlik # http://academic.cleardefinition.com # Iowa State University HCI Graduate Program/VRAC # # Copyright Iowa State University 2009-2010. # Distributed under the Boost Software License, Version 1.0. # (See accompanying file LICENSE_1_0.txt or copy at # http://www.boost.org/LICENSE_1_0.txt) if(__get_git_revision_description) return() endif() set(__get_git_revision_description YES) # We must run the following at "include" time, not at function call time, # to find the path to this module rather than the path to a calling list file get_filename_component(_gitdescmoddir ${CMAKE_CURRENT_LIST_FILE} PATH) function(get_git_head_revision _refspecvar _hashvar) set(GIT_PARENT_DIR "${CMAKE_CURRENT_SOURCE_DIR}") set(GIT_DIR "${GIT_PARENT_DIR}/.git") while(NOT EXISTS "${GIT_DIR}") # .git dir not found, search parent directories set(GIT_PREVIOUS_PARENT "${GIT_PARENT_DIR}") get_filename_component(GIT_PARENT_DIR ${GIT_PARENT_DIR} PATH) if(GIT_PARENT_DIR STREQUAL GIT_PREVIOUS_PARENT) # We have reached the root directory, we are not in git set(${_refspecvar} "GITDIR-NOTFOUND" PARENT_SCOPE) set(${_hashvar} "GITDIR-NOTFOUND" PARENT_SCOPE) return() endif() set(GIT_DIR "${GIT_PARENT_DIR}/.git") endwhile() # check if this is a submodule if(NOT IS_DIRECTORY ${GIT_DIR}) file(READ ${GIT_DIR} submodule) string(REGEX REPLACE "gitdir: (.*)\n$" "\\1" GIT_DIR_RELATIVE ${submodule}) get_filename_component(SUBMODULE_DIR ${GIT_DIR} PATH) get_filename_component(GIT_DIR ${SUBMODULE_DIR}/${GIT_DIR_RELATIVE} ABSOLUTE) endif() set(GIT_DATA "${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/git-data") if(NOT EXISTS "${GIT_DATA}") file(MAKE_DIRECTORY "${GIT_DATA}") endif() if(NOT EXISTS "${GIT_DIR}/HEAD") return() endif() set(HEAD_FILE "${GIT_DATA}/HEAD") configure_file("${GIT_DIR}/HEAD" "${HEAD_FILE}" COPYONLY) configure_file("${_gitdescmoddir}/GetGitRevisionDescription.cmake.in" "${GIT_DATA}/grabRef.cmake" @ONLY) include("${GIT_DATA}/grabRef.cmake") set(${_refspecvar} "${HEAD_REF}" PARENT_SCOPE) set(${_hashvar} "${HEAD_HASH}" PARENT_SCOPE) endfunction() function(git_describe _var) if(NOT GIT_FOUND) find_package(Git QUIET) endif() get_git_head_revision(refspec hash) if(NOT GIT_FOUND) set(${_var} "GIT-NOTFOUND" PARENT_SCOPE) return() endif() if(NOT hash) set(${_var} "HEAD-HASH-NOTFOUND" PARENT_SCOPE) return() endif() # TODO sanitize #if((${ARGN}" MATCHES "&&") OR # (ARGN MATCHES "||") OR # (ARGN MATCHES "\\;")) # message("Please report the following error to the project!") # message(FATAL_ERROR "Looks like someone's doing something nefarious with git_describe! Passed arguments ${ARGN}") #endif() #message(STATUS "Arguments to execute_process: ${ARGN}") execute_process(COMMAND "${GIT_EXECUTABLE}" describe ${hash} ${ARGN} WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}" RESULT_VARIABLE res OUTPUT_VARIABLE out ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) if(NOT res EQUAL 0) set(out "${out}-${res}-NOTFOUND") endif() set(${_var} "${out}" PARENT_SCOPE) endfunction() function(git_get_exact_tag _var) git_describe(out --exact-match ${ARGN}) set(${_var} "${out}" PARENT_SCOPE) endfunction() function(git_local_changes _var) if(NOT GIT_FOUND) find_package(Git QUIET) endif() get_git_head_revision(refspec hash) if(NOT GIT_FOUND) set(${_var} "GIT-NOTFOUND" PARENT_SCOPE) return() endif() if(NOT hash) set(${_var} "HEAD-HASH-NOTFOUND" PARENT_SCOPE) return() endif() execute_process(COMMAND "${GIT_EXECUTABLE}" diff-index --quiet HEAD -- WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}" RESULT_VARIABLE res OUTPUT_VARIABLE out ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) if(res EQUAL 0) set(${_var} "CLEAN" PARENT_SCOPE) else() set(${_var} "DIRTY" PARENT_SCOPE) endif() endfunction() relion-3.1.3/cmake/GetGitRevisionDescription.cmake.in000066400000000000000000000024031411340063500225770ustar00rootroot00000000000000# # Internal file for GetGitRevisionDescription.cmake # # Requires CMake 2.6 or newer (uses the 'function' command) # # Original Author: # 2009-2010 Ryan Pavlik # http://academic.cleardefinition.com # Iowa State University HCI Graduate Program/VRAC # # Copyright Iowa State University 2009-2010. # Distributed under the Boost Software License, Version 1.0. # (See accompanying file LICENSE_1_0.txt or copy at # http://www.boost.org/LICENSE_1_0.txt) set(HEAD_HASH) file(READ "@HEAD_FILE@" HEAD_CONTENTS LIMIT 1024) string(STRIP "${HEAD_CONTENTS}" HEAD_CONTENTS) if(HEAD_CONTENTS MATCHES "ref") # named branch string(REPLACE "ref: " "" HEAD_REF "${HEAD_CONTENTS}") if(EXISTS "@GIT_DIR@/${HEAD_REF}") configure_file("@GIT_DIR@/${HEAD_REF}" "@GIT_DATA@/head-ref" COPYONLY) else() configure_file("@GIT_DIR@/packed-refs" "@GIT_DATA@/packed-refs" COPYONLY) file(READ "@GIT_DATA@/packed-refs" PACKED_REFS) if(${PACKED_REFS} MATCHES "([0-9a-z]*) ${HEAD_REF}") set(HEAD_HASH "${CMAKE_MATCH_1}") endif() endif() else() # detached HEAD configure_file("@GIT_DIR@/HEAD" "@GIT_DATA@/head-ref" COPYONLY) endif() if(NOT HEAD_HASH) file(READ "@GIT_DATA@/head-ref" HEAD_HASH LIMIT 1024) string(STRIP "${HEAD_HASH}" HEAD_HASH) endif() relion-3.1.3/cmake/TestX86.c000066400000000000000000000002311411340063500154500ustar00rootroot00000000000000int main() { #if defined (__i386__) || defined (__x86_64__) || defined (_M_IX86) || defined (_M_X64) return 0; #else #error This is not x86 #endif } relion-3.1.3/data/000077500000000000000000000000001411340063500137145ustar00rootroot00000000000000relion-3.1.3/data/mtf_de20_300kV.star000066400000000000000000000215201411340063500170720ustar00rootroot00000000000000data_mtf_de20_300kv loop_ _rlnResolutionInversePixel _rlnMtfValue 0.0000 1.0000 0.0008 0.9999 0.0016 0.9995 0.0023 0.9989 0.0031 0.9980 0.0039 0.9969 0.0047 0.9956 0.0055 0.9941 0.0063 0.9925 0.0070 0.9907 0.0078 0.9887 0.0086 0.9867 0.0094 0.9846 0.0102 0.9824 0.0109 0.9802 0.0117 0.9780 0.0125 0.9758 0.0133 0.9736 0.0141 0.9715 0.0148 0.9694 0.0156 0.9673 0.0164 0.9653 0.0172 0.9634 0.0180 0.9616 0.0188 0.9598 0.0195 0.9581 0.0203 0.9565 0.0211 0.9550 0.0219 0.9535 0.0227 0.9521 0.0234 0.9507 0.0242 0.9494 0.0250 0.9481 0.0258 0.9469 0.0266 0.9457 0.0273 0.9446 0.0281 0.9434 0.0289 0.9423 0.0297 0.9412 0.0305 0.9401 0.0313 0.9391 0.0320 0.9380 0.0328 0.9369 0.0336 0.9359 0.0344 0.9348 0.0352 0.9337 0.0359 0.9327 0.0367 0.9316 0.0375 0.9305 0.0383 0.9294 0.0391 0.9283 0.0398 0.9272 0.0406 0.9261 0.0414 0.9250 0.0422 0.9238 0.0430 0.9227 0.0438 0.9215 0.0445 0.9204 0.0453 0.9192 0.0461 0.9180 0.0469 0.9168 0.0477 0.9156 0.0484 0.9144 0.0492 0.9132 0.0500 0.9120 0.0508 0.9108 0.0516 0.9095 0.0523 0.9083 0.0531 0.9070 0.0539 0.9058 0.0547 0.9045 0.0555 0.9033 0.0563 0.9020 0.0570 0.9007 0.0578 0.8994 0.0586 0.8981 0.0594 0.8968 0.0602 0.8955 0.0609 0.8942 0.0617 0.8929 0.0625 0.8916 0.0633 0.8903 0.0641 0.8889 0.0648 0.8876 0.0656 0.8863 0.0664 0.8850 0.0672 0.8836 0.0680 0.8823 0.0688 0.8809 0.0695 0.8796 0.0703 0.8783 0.0711 0.8769 0.0719 0.8756 0.0727 0.8742 0.0734 0.8728 0.0742 0.8715 0.0750 0.8701 0.0758 0.8688 0.0766 0.8674 0.0773 0.8660 0.0781 0.8647 0.0789 0.8633 0.0797 0.8619 0.0805 0.8606 0.0813 0.8592 0.0820 0.8578 0.0828 0.8564 0.0836 0.8551 0.0844 0.8537 0.0852 0.8523 0.0859 0.8509 0.0867 0.8495 0.0875 0.8482 0.0883 0.8468 0.0891 0.8454 0.0898 0.8440 0.0906 0.8426 0.0914 0.8412 0.0922 0.8399 0.0930 0.8385 0.0938 0.8371 0.0945 0.8357 0.0953 0.8343 0.0961 0.8329 0.0969 0.8315 0.0977 0.8301 0.0984 0.8288 0.0992 0.8274 0.1000 0.8260 0.1008 0.8246 0.1016 0.8232 0.1023 0.8218 0.1031 0.8204 0.1039 0.8190 0.1047 0.8176 0.1055 0.8162 0.1063 0.8148 0.1070 0.8134 0.1078 0.8120 0.1086 0.8106 0.1094 0.8093 0.1102 0.8079 0.1109 0.8065 0.1117 0.8051 0.1125 0.8037 0.1133 0.8023 0.1141 0.8009 0.1148 0.7995 0.1156 0.7981 0.1164 0.7967 0.1172 0.7953 0.1180 0.7939 0.1188 0.7925 0.1195 0.7911 0.1203 0.7897 0.1211 0.7883 0.1219 0.7870 0.1227 0.7856 0.1234 0.7842 0.1242 0.7828 0.1250 0.7814 0.1258 0.7800 0.1266 0.7786 0.1273 0.7772 0.1281 0.7758 0.1289 0.7744 0.1297 0.7731 0.1305 0.7717 0.1313 0.7703 0.1320 0.7689 0.1328 0.7675 0.1336 0.7661 0.1344 0.7647 0.1352 0.7634 0.1359 0.7620 0.1367 0.7606 0.1375 0.7592 0.1383 0.7578 0.1391 0.7564 0.1398 0.7551 0.1406 0.7537 0.1414 0.7523 0.1422 0.7509 0.1430 0.7496 0.1438 0.7482 0.1445 0.7468 0.1453 0.7454 0.1461 0.7441 0.1469 0.7427 0.1477 0.7413 0.1484 0.7400 0.1492 0.7386 0.1500 0.7372 0.1508 0.7359 0.1516 0.7345 0.1523 0.7331 0.1531 0.7318 0.1539 0.7304 0.1547 0.7290 0.1555 0.7277 0.1563 0.7263 0.1570 0.7250 0.1578 0.7236 0.1586 0.7223 0.1594 0.7209 0.1602 0.7196 0.1609 0.7182 0.1617 0.7168 0.1625 0.7155 0.1633 0.7142 0.1641 0.7128 0.1648 0.7115 0.1656 0.7101 0.1664 0.7088 0.1672 0.7074 0.1680 0.7061 0.1688 0.7047 0.1695 0.7034 0.1703 0.7021 0.1711 0.7007 0.1719 0.6994 0.1727 0.6981 0.1734 0.6967 0.1742 0.6954 0.1750 0.6941 0.1758 0.6927 0.1766 0.6914 0.1773 0.6901 0.1781 0.6887 0.1789 0.6874 0.1797 0.6861 0.1805 0.6848 0.1813 0.6834 0.1820 0.6821 0.1828 0.6808 0.1836 0.6795 0.1844 0.6782 0.1852 0.6768 0.1859 0.6755 0.1867 0.6742 0.1875 0.6729 0.1883 0.6716 0.1891 0.6703 0.1898 0.6689 0.1906 0.6676 0.1914 0.6663 0.1922 0.6650 0.1930 0.6637 0.1938 0.6624 0.1945 0.6611 0.1953 0.6598 0.1961 0.6585 0.1969 0.6572 0.1977 0.6559 0.1984 0.6546 0.1992 0.6533 0.2000 0.6520 0.2008 0.6507 0.2016 0.6494 0.2023 0.6480 0.2031 0.6467 0.2039 0.6455 0.2047 0.6442 0.2055 0.6429 0.2063 0.6416 0.2070 0.6403 0.2078 0.6390 0.2086 0.6377 0.2094 0.6364 0.2102 0.6351 0.2109 0.6338 0.2117 0.6325 0.2125 0.6312 0.2133 0.6299 0.2141 0.6286 0.2148 0.6273 0.2156 0.6260 0.2164 0.6247 0.2172 0.6235 0.2180 0.6222 0.2188 0.6209 0.2195 0.6196 0.2203 0.6183 0.2211 0.6170 0.2219 0.6157 0.2227 0.6144 0.2234 0.6132 0.2242 0.6119 0.2250 0.6106 0.2258 0.6093 0.2266 0.6080 0.2273 0.6067 0.2281 0.6054 0.2289 0.6042 0.2297 0.6029 0.2305 0.6016 0.2313 0.6003 0.2320 0.5990 0.2328 0.5977 0.2336 0.5965 0.2344 0.5952 0.2352 0.5939 0.2359 0.5926 0.2367 0.5913 0.2375 0.5901 0.2383 0.5888 0.2391 0.5875 0.2398 0.5862 0.2406 0.5849 0.2414 0.5836 0.2422 0.5824 0.2430 0.5811 0.2438 0.5798 0.2445 0.5785 0.2453 0.5773 0.2461 0.5760 0.2469 0.5747 0.2477 0.5734 0.2484 0.5721 0.2492 0.5709 0.2500 0.5696 0.2508 0.5683 0.2516 0.5670 0.2523 0.5657 0.2531 0.5645 0.2539 0.5632 0.2547 0.5619 0.2555 0.5606 0.2563 0.5594 0.2570 0.5581 0.2578 0.5568 0.2586 0.5555 0.2594 0.5543 0.2602 0.5530 0.2609 0.5517 0.2617 0.5504 0.2625 0.5492 0.2633 0.5479 0.2641 0.5466 0.2648 0.5453 0.2656 0.5441 0.2664 0.5428 0.2672 0.5415 0.2680 0.5402 0.2688 0.5390 0.2695 0.5377 0.2703 0.5364 0.2711 0.5351 0.2719 0.5339 0.2727 0.5326 0.2734 0.5313 0.2742 0.5300 0.2750 0.5288 0.2758 0.5275 0.2766 0.5262 0.2773 0.5249 0.2781 0.5237 0.2789 0.5224 0.2797 0.5211 0.2805 0.5199 0.2813 0.5186 0.2820 0.5173 0.2828 0.5161 0.2836 0.5148 0.2844 0.5135 0.2852 0.5122 0.2859 0.5110 0.2867 0.5097 0.2875 0.5084 0.2883 0.5072 0.2891 0.5059 0.2898 0.5046 0.2906 0.5034 0.2914 0.5021 0.2922 0.5008 0.2930 0.4996 0.2938 0.4983 0.2945 0.4970 0.2953 0.4958 0.2961 0.4945 0.2969 0.4932 0.2977 0.4920 0.2984 0.4907 0.2992 0.4894 0.3000 0.4882 0.3008 0.4869 0.3016 0.4857 0.3023 0.4844 0.3031 0.4831 0.3039 0.4819 0.3047 0.4806 0.3055 0.4793 0.3063 0.4781 0.3070 0.4768 0.3078 0.4756 0.3086 0.4743 0.3094 0.4731 0.3102 0.4718 0.3109 0.4705 0.3117 0.4693 0.3125 0.4680 0.3133 0.4668 0.3141 0.4655 0.3148 0.4643 0.3156 0.4630 0.3164 0.4618 0.3172 0.4605 0.3180 0.4593 0.3188 0.4580 0.3195 0.4568 0.3203 0.4555 0.3211 0.4543 0.3219 0.4530 0.3227 0.4518 0.3234 0.4505 0.3242 0.4493 0.3250 0.4480 0.3258 0.4468 0.3266 0.4455 0.3273 0.4443 0.3281 0.4431 0.3289 0.4418 0.3297 0.4406 0.3305 0.4393 0.3313 0.4381 0.3320 0.4369 0.3328 0.4356 0.3336 0.4344 0.3344 0.4331 0.3352 0.4319 0.3359 0.4307 0.3367 0.4294 0.3375 0.4282 0.3383 0.4270 0.3391 0.4257 0.3398 0.4245 0.3406 0.4233 0.3414 0.4221 0.3422 0.4208 0.3430 0.4196 0.3438 0.4184 0.3445 0.4172 0.3453 0.4159 0.3461 0.4147 0.3469 0.4135 0.3477 0.4123 0.3484 0.4111 0.3492 0.4098 0.3500 0.4086 0.3508 0.4074 0.3516 0.4062 0.3523 0.4050 0.3531 0.4038 0.3539 0.4026 0.3547 0.4013 0.3555 0.4001 0.3563 0.3989 0.3570 0.3977 0.3578 0.3965 0.3586 0.3953 0.3594 0.3941 0.3602 0.3929 0.3609 0.3917 0.3617 0.3905 0.3625 0.3893 0.3633 0.3881 0.3641 0.3869 0.3648 0.3857 0.3656 0.3845 0.3664 0.3833 0.3672 0.3821 0.3680 0.3809 0.3688 0.3798 0.3695 0.3786 0.3703 0.3774 0.3711 0.3762 0.3719 0.3750 0.3727 0.3738 0.3734 0.3726 0.3742 0.3715 0.3750 0.3703 0.3758 0.3691 0.3766 0.3679 0.3773 0.3668 0.3781 0.3656 0.3789 0.3644 0.3797 0.3632 0.3805 0.3621 0.3813 0.3609 0.3820 0.3597 0.3828 0.3586 0.3836 0.3574 0.3844 0.3563 0.3852 0.3551 0.3859 0.3539 0.3867 0.3528 0.3875 0.3516 0.3883 0.3505 0.3891 0.3493 0.3898 0.3482 0.3906 0.3470 0.3914 0.3459 0.3922 0.3447 0.3930 0.3436 0.3938 0.3424 0.3945 0.3413 0.3953 0.3402 0.3961 0.3390 0.3969 0.3379 0.3977 0.3367 0.3984 0.3356 0.3992 0.3345 0.4000 0.3333 0.4008 0.3322 0.4016 0.3311 0.4023 0.3300 0.4031 0.3288 0.4039 0.3277 0.4047 0.3266 0.4055 0.3255 0.4063 0.3244 0.4070 0.3232 0.4078 0.3221 0.4086 0.3210 0.4094 0.3199 0.4102 0.3188 0.4109 0.3177 0.4117 0.3166 0.4125 0.3155 0.4133 0.3144 0.4141 0.3133 0.4148 0.3122 0.4156 0.3111 0.4164 0.3100 0.4172 0.3089 0.4180 0.3078 0.4188 0.3067 0.4195 0.3056 0.4203 0.3045 0.4211 0.3035 0.4219 0.3024 0.4227 0.3013 0.4234 0.3002 0.4242 0.2991 0.4250 0.2981 0.4258 0.2970 0.4266 0.2959 0.4273 0.2949 0.4281 0.2938 0.4289 0.2927 0.4297 0.2917 0.4305 0.2906 0.4313 0.2895 0.4320 0.2885 0.4328 0.2874 0.4336 0.2864 0.4344 0.2853 0.4352 0.2843 0.4359 0.2832 0.4367 0.2822 0.4375 0.2811 0.4383 0.2801 0.4391 0.2790 0.4398 0.2780 0.4406 0.2770 0.4414 0.2759 0.4422 0.2749 0.4430 0.2739 0.4438 0.2728 0.4445 0.2718 0.4453 0.2708 0.4461 0.2698 0.4469 0.2687 0.4477 0.2677 0.4484 0.2667 0.4492 0.2657 0.4500 0.2647 0.4508 0.2637 0.4516 0.2627 0.4523 0.2617 0.4531 0.2607 0.4539 0.2596 0.4547 0.2586 0.4555 0.2576 0.4563 0.2567 0.4570 0.2557 0.4578 0.2547 0.4586 0.2537 0.4594 0.2527 0.4602 0.2517 0.4609 0.2507 0.4617 0.2497 0.4625 0.2488 0.4633 0.2478 0.4641 0.2468 0.4648 0.2458 0.4656 0.2449 0.4664 0.2439 0.4672 0.2429 0.4680 0.2420 0.4688 0.2410 0.4695 0.2400 0.4703 0.2391 0.4711 0.2381 0.4719 0.2372 0.4727 0.2362 0.4734 0.2353 0.4742 0.2343 0.4750 0.2334 0.4758 0.2324 0.4766 0.2315 0.4773 0.2305 0.4781 0.2296 0.4789 0.2287 0.4797 0.2277 0.4805 0.2268 0.4813 0.2259 0.4820 0.2250 0.4828 0.2240 0.4836 0.2231 0.4844 0.2222 0.4852 0.2213 0.4859 0.2204 0.4867 0.2194 0.4875 0.2185 0.4883 0.2176 0.4891 0.2167 0.4898 0.2158 0.4906 0.2149 0.4914 0.2140 0.4922 0.2131 0.4930 0.2122 0.4938 0.2113 0.4945 0.2104 0.4953 0.2095 0.4961 0.2086 0.4969 0.2078 0.4977 0.2069 0.4984 0.2060 0.4992 0.2051 0.5000 0.2042 relion-3.1.3/data/mtf_falcon2_300kV.star000066400000000000000000000362451411340063500176760ustar00rootroot00000000000000data_mtf_falcon2_300kv loop_ _rlnResolutionInversePixel _rlnMtfValue 0.0 1.000 0.0005 0.999939 0.001 0.999755 0.0015 0.999450 0.002 0.999025 0.0025 0.998480 0.003 0.997819 0.0035 0.997044 0.004 0.996157 0.0045 0.995162 0.005 0.994062 0.0055 0.992861 0.006 0.991562 0.0065 0.990171 0.007 0.988691 0.0075 0.987126 0.008 0.985481 0.0085 0.983761 0.009 0.981969 0.0095 0.980111 0.01 0.978191 0.0105 0.976213 0.011 0.974180 0.0115 0.972098 0.012 0.969970 0.0125 0.967799 0.013 0.965589 0.0135 0.963344 0.014 0.961066 0.0145 0.958759 0.015 0.956425 0.0155 0.954066 0.016 0.951686 0.0165 0.949287 0.017 0.946870 0.0175 0.944438 0.018 0.941993 0.0185 0.939536 0.019 0.937070 0.0195 0.934595 0.02 0.932113 0.0205 0.929626 0.021 0.927135 0.0215 0.924642 0.022 0.922147 0.0225 0.919651 0.023 0.917157 0.0235 0.914665 0.024 0.912176 0.0245 0.909691 0.025 0.907211 0.0255 0.904738 0.026 0.902271 0.0265 0.899812 0.027 0.897363 0.0275 0.894922 0.028 0.892492 0.0285 0.890073 0.029 0.887666 0.0295 0.885271 0.03 0.882888 0.0305 0.880519 0.031 0.878164 0.0315 0.875823 0.032 0.873497 0.0325 0.871185 0.033 0.868889 0.0335 0.866607 0.034 0.864342 0.0345 0.862091 0.035 0.859857 0.0355 0.857638 0.036 0.855434 0.0365 0.853246 0.037 0.851073 0.0375 0.848915 0.038 0.846772 0.0385 0.844644 0.039 0.842531 0.0395 0.840431 0.04 0.838345 0.0405 0.836272 0.041 0.834213 0.0415 0.832166 0.042 0.830131 0.0425 0.828107 0.043 0.826095 0.0435 0.824094 0.044 0.822103 0.0445 0.820122 0.045 0.818150 0.0455 0.816186 0.046 0.814231 0.0465 0.812284 0.047 0.810344 0.0475 0.808410 0.048 0.806483 0.0485 0.804562 0.049 0.802646 0.0495 0.800734 0.05 0.798827 0.0505 0.796924 0.051 0.795024 0.0515 0.793127 0.052 0.791233 0.0525 0.789340 0.053 0.787450 0.0535 0.785561 0.054 0.783672 0.0545 0.781785 0.055 0.779897 0.0555 0.778010 0.056 0.776122 0.0565 0.774233 0.057 0.772344 0.0575 0.770453 0.058 0.768561 0.0585 0.766667 0.059 0.764772 0.0595 0.762874 0.06 0.760974 0.0605 0.759071 0.061 0.757166 0.0615 0.755258 0.062 0.753347 0.0625 0.751434 0.063 0.749517 0.0635 0.747596 0.064 0.745673 0.0645 0.743746 0.065 0.741816 0.0655 0.739882 0.066 0.737945 0.0665 0.736004 0.067 0.734060 0.0675 0.732112 0.068 0.730160 0.0685 0.728205 0.069 0.726247 0.0695 0.724284 0.07 0.722319 0.0705 0.720350 0.071 0.718377 0.0715 0.716401 0.072 0.714422 0.0725 0.712440 0.073 0.710454 0.0735 0.708465 0.074 0.706473 0.0745 0.704479 0.075 0.702481 0.0755 0.700480 0.076 0.698477 0.0765 0.696471 0.077 0.694463 0.0775 0.692452 0.078 0.690439 0.0785 0.688424 0.079 0.686407 0.0795 0.684387 0.08 0.682366 0.0805 0.680342 0.081 0.678318 0.0815 0.676291 0.082 0.674263 0.0825 0.672234 0.083 0.670203 0.0835 0.668171 0.084 0.666138 0.0845 0.664105 0.085 0.662070 0.0855 0.660035 0.086 0.657999 0.0865 0.655963 0.087 0.653926 0.0875 0.651889 0.088 0.649852 0.0885 0.647815 0.089 0.645778 0.0895 0.643741 0.09 0.641705 0.0905 0.639669 0.091 0.637633 0.0915 0.635598 0.092 0.633564 0.0925 0.631531 0.093 0.629498 0.0935 0.627467 0.094 0.625437 0.0945 0.623408 0.095 0.621381 0.0955 0.619354 0.096 0.617330 0.0965 0.615307 0.097 0.613286 0.0975 0.611267 0.098 0.609250 0.0985 0.607235 0.099 0.605222 0.0995 0.603211 0.1 0.601203 0.1005 0.599197 0.101 0.597193 0.1015 0.595193 0.102 0.593194 0.1025 0.591199 0.103 0.589207 0.1035 0.587217 0.104 0.585231 0.1045 0.583247 0.105 0.581267 0.1055 0.579290 0.106 0.577317 0.1065 0.575347 0.107 0.573380 0.1075 0.571417 0.108 0.569458 0.1085 0.567503 0.109 0.565551 0.1095 0.563603 0.11 0.561659 0.1105 0.559719 0.111 0.557783 0.1115 0.555852 0.112 0.553924 0.1125 0.552001 0.113 0.550082 0.1135 0.548168 0.114 0.546258 0.1145 0.544352 0.115 0.542451 0.1155 0.540555 0.116 0.538663 0.1165 0.536776 0.117 0.534894 0.1175 0.533017 0.118 0.531145 0.1185 0.529278 0.119 0.527415 0.1195 0.525558 0.12 0.523706 0.1205 0.521858 0.121 0.520017 0.1215 0.518180 0.122 0.516348 0.1225 0.514522 0.123 0.512701 0.1235 0.510886 0.124 0.509076 0.1245 0.507271 0.125 0.505472 0.1255 0.503679 0.126 0.501891 0.1265 0.500109 0.127 0.498332 0.1275 0.496561 0.128 0.494795 0.1285 0.493036 0.129 0.491282 0.1295 0.489533 0.13 0.487791 0.1305 0.486054 0.131 0.484324 0.1315 0.482599 0.132 0.480880 0.1325 0.479166 0.133 0.477459 0.1335 0.475758 0.134 0.474063 0.1345 0.472373 0.135 0.470690 0.1355 0.469013 0.136 0.467341 0.1365 0.465676 0.137 0.464017 0.1375 0.462363 0.138 0.460716 0.1385 0.459075 0.139 0.457440 0.1395 0.455811 0.14 0.454189 0.1405 0.452572 0.141 0.450961 0.1415 0.449357 0.142 0.447759 0.1425 0.446166 0.143 0.444580 0.1435 0.443000 0.144 0.441427 0.1445 0.439859 0.145 0.438298 0.1455 0.436742 0.146 0.435193 0.1465 0.433650 0.147 0.432113 0.1475 0.430582 0.148 0.429057 0.1485 0.427539 0.149 0.426026 0.1495 0.424520 0.15 0.423019 0.1505 0.421525 0.151 0.420037 0.1515 0.418555 0.152 0.417079 0.1525 0.415609 0.153 0.414145 0.1535 0.412687 0.154 0.411236 0.1545 0.409790 0.155 0.408350 0.1555 0.406916 0.156 0.405488 0.1565 0.404066 0.157 0.402651 0.1575 0.401241 0.158 0.399837 0.1585 0.398438 0.159 0.397046 0.1595 0.395660 0.16 0.394279 0.1605 0.392905 0.161 0.391536 0.1615 0.390173 0.162 0.388816 0.1625 0.387464 0.163 0.386119 0.1635 0.384779 0.164 0.383444 0.1645 0.382116 0.165 0.380793 0.1655 0.379476 0.166 0.378164 0.1665 0.376858 0.167 0.375558 0.1675 0.374263 0.168 0.372974 0.1685 0.371690 0.169 0.370412 0.1695 0.369139 0.17 0.367872 0.1705 0.366610 0.171 0.365354 0.1715 0.364103 0.172 0.362857 0.1725 0.361617 0.173 0.360382 0.1735 0.359152 0.174 0.357928 0.1745 0.356709 0.175 0.355495 0.1755 0.354286 0.176 0.353083 0.1765 0.351884 0.177 0.350691 0.1775 0.349503 0.178 0.348320 0.1785 0.347142 0.179 0.345969 0.1795 0.344801 0.18 0.343638 0.1805 0.342480 0.181 0.341327 0.1815 0.340179 0.182 0.339035 0.1825 0.337897 0.183 0.336763 0.1835 0.335634 0.184 0.334510 0.1845 0.333391 0.185 0.332276 0.1855 0.331166 0.186 0.330061 0.1865 0.328960 0.187 0.327864 0.1875 0.326773 0.188 0.325686 0.1885 0.324603 0.189 0.323526 0.1895 0.322452 0.19 0.321383 0.1905 0.320319 0.191 0.319259 0.1915 0.318203 0.192 0.317152 0.1925 0.316105 0.193 0.315062 0.1935 0.314024 0.194 0.312989 0.1945 0.311960 0.195 0.310934 0.1955 0.309912 0.196 0.308895 0.1965 0.307882 0.197 0.306872 0.1975 0.305867 0.198 0.304866 0.1985 0.303869 0.199 0.302876 0.1995 0.301887 0.2 0.300902 0.2005 0.299921 0.201 0.298943 0.2015 0.297970 0.202 0.297000 0.2025 0.296035 0.203 0.295073 0.2035 0.294115 0.204 0.293160 0.2045 0.292210 0.205 0.291263 0.2055 0.290320 0.206 0.289380 0.2065 0.288444 0.207 0.287512 0.2075 0.286583 0.208 0.285658 0.2085 0.284736 0.209 0.283818 0.2095 0.282904 0.21 0.281993 0.2105 0.281085 0.211 0.280181 0.2115 0.279280 0.212 0.278383 0.2125 0.277489 0.213 0.276598 0.2135 0.275711 0.214 0.274827 0.2145 0.273946 0.215 0.273069 0.2155 0.272195 0.216 0.271324 0.2165 0.270456 0.217 0.269592 0.2175 0.268730 0.218 0.267872 0.2185 0.267017 0.219 0.266165 0.2195 0.265316 0.22 0.264471 0.2205 0.263628 0.221 0.262788 0.2215 0.261952 0.222 0.261118 0.2225 0.260287 0.223 0.259460 0.2235 0.258635 0.224 0.257813 0.2245 0.256994 0.225 0.256178 0.2255 0.255365 0.226 0.254555 0.2265 0.253748 0.227 0.252943 0.2275 0.252142 0.228 0.251343 0.2285 0.250546 0.229 0.249753 0.2295 0.248962 0.23 0.248175 0.2305 0.247389 0.231 0.246607 0.2315 0.245827 0.232 0.245050 0.2325 0.244276 0.233 0.243504 0.2335 0.242735 0.234 0.241968 0.2345 0.241204 0.235 0.240443 0.2355 0.239684 0.236 0.238928 0.2365 0.238174 0.237 0.237423 0.2375 0.236674 0.238 0.235928 0.2385 0.235185 0.239 0.234443 0.2395 0.233705 0.24 0.232968 0.2405 0.232235 0.241 0.231503 0.2415 0.230774 0.242 0.230048 0.2425 0.229324 0.243 0.228602 0.2435 0.227883 0.244 0.227166 0.2445 0.226451 0.245 0.225739 0.2455 0.225029 0.246 0.224321 0.2465 0.223615 0.247 0.222912 0.2475 0.222212 0.248 0.221513 0.2485 0.220817 0.249 0.220123 0.2495 0.219431 0.25 0.218741 0.2505 0.218054 0.251 0.217369 0.2515 0.216686 0.252 0.216005 0.2525 0.215327 0.253 0.214650 0.2535 0.213976 0.254 0.213304 0.2545 0.212634 0.255 0.211967 0.2555 0.211301 0.256 0.210637 0.2565 0.209976 0.257 0.209317 0.2575 0.208660 0.258 0.208005 0.2585 0.207352 0.259 0.206701 0.2595 0.206052 0.26 0.205405 0.2605 0.204760 0.261 0.204117 0.2615 0.203477 0.262 0.202838 0.2625 0.202201 0.263 0.201567 0.2635 0.200934 0.264 0.200304 0.2645 0.199675 0.265 0.199048 0.2655 0.198424 0.266 0.197801 0.2665 0.197180 0.267 0.196561 0.2675 0.195945 0.268 0.195330 0.2685 0.194717 0.269 0.194106 0.2695 0.193497 0.27 0.192890 0.2705 0.192284 0.271 0.191681 0.2715 0.191080 0.272 0.190480 0.2725 0.189882 0.273 0.189287 0.2735 0.188693 0.274 0.188101 0.2745 0.187511 0.275 0.186922 0.2755 0.186336 0.276 0.185751 0.2765 0.185169 0.277 0.184588 0.2775 0.184009 0.278 0.183431 0.2785 0.182856 0.279 0.182282 0.2795 0.181711 0.28 0.181141 0.2805 0.180573 0.281 0.180006 0.2815 0.179442 0.282 0.178879 0.2825 0.178318 0.283 0.177759 0.2835 0.177201 0.284 0.176646 0.2845 0.176092 0.285 0.175540 0.2855 0.174989 0.286 0.174441 0.2865 0.173894 0.287 0.173349 0.2875 0.172805 0.288 0.172264 0.2885 0.171724 0.289 0.171185 0.2895 0.170649 0.29 0.170114 0.2905 0.169581 0.291 0.169050 0.2915 0.168520 0.292 0.167992 0.2925 0.167466 0.293 0.166942 0.2935 0.166419 0.294 0.165898 0.2945 0.165378 0.295 0.164860 0.2955 0.164344 0.296 0.163830 0.2965 0.163317 0.297 0.162806 0.2975 0.162297 0.298 0.161789 0.2985 0.161283 0.299 0.160778 0.2995 0.160275 0.3 0.159774 0.3005 0.159274 0.301 0.158776 0.3015 0.158280 0.302 0.157785 0.3025 0.157292 0.303 0.156801 0.3035 0.156311 0.304 0.155823 0.3045 0.155336 0.305 0.154851 0.3055 0.154368 0.306 0.153886 0.3065 0.153406 0.307 0.152927 0.3075 0.152450 0.308 0.151974 0.3085 0.151501 0.309 0.151028 0.3095 0.150557 0.31 0.150088 0.3105 0.149621 0.311 0.149154 0.3115 0.148690 0.312 0.148227 0.3125 0.147765 0.313 0.147306 0.3135 0.146847 0.314 0.146390 0.3145 0.145935 0.315 0.145481 0.3155 0.145029 0.316 0.144579 0.3165 0.144129 0.317 0.143682 0.3175 0.143236 0.318 0.142791 0.3185 0.142348 0.319 0.141906 0.3195 0.141466 0.32 0.141027 0.3205 0.140590 0.321 0.140155 0.3215 0.139720 0.322 0.139288 0.3225 0.138857 0.323 0.138427 0.3235 0.137999 0.324 0.137572 0.3245 0.137146 0.325 0.136723 0.3255 0.136300 0.326 0.135879 0.3265 0.135460 0.327 0.135042 0.3275 0.134625 0.328 0.134210 0.3285 0.133796 0.329 0.133384 0.3295 0.132973 0.33 0.132563 0.3305 0.132155 0.331 0.131749 0.3315 0.131343 0.332 0.130940 0.3325 0.130537 0.333 0.130136 0.3335 0.129737 0.334 0.129338 0.3345 0.128942 0.335 0.128546 0.3355 0.128152 0.336 0.127760 0.3365 0.127368 0.337 0.126978 0.3375 0.126590 0.338 0.126203 0.3385 0.125817 0.339 0.125432 0.3395 0.125049 0.34 0.124668 0.3405 0.124287 0.341 0.123908 0.3415 0.123530 0.342 0.123154 0.3425 0.122779 0.343 0.122405 0.3435 0.122033 0.344 0.121662 0.3445 0.121292 0.345 0.120924 0.3455 0.120557 0.346 0.120191 0.3465 0.119827 0.347 0.119463 0.3475 0.119101 0.348 0.118741 0.3485 0.118382 0.349 0.118024 0.3495 0.117667 0.35 0.117311 0.3505 0.116957 0.351 0.116604 0.3515 0.116253 0.352 0.115902 0.3525 0.115553 0.353 0.115205 0.3535 0.114859 0.354 0.114513 0.3545 0.114169 0.355 0.113826 0.3555 0.113485 0.356 0.113144 0.3565 0.112805 0.357 0.112467 0.3575 0.112131 0.358 0.111795 0.3585 0.111461 0.359 0.111128 0.3595 0.110796 0.36 0.110465 0.3605 0.110136 0.361 0.109807 0.3615 0.109480 0.362 0.109154 0.3625 0.108830 0.363 0.108506 0.3635 0.108184 0.364 0.107863 0.3645 0.107543 0.365 0.107224 0.3655 0.106906 0.366 0.106590 0.3665 0.106274 0.367 0.105960 0.3675 0.105647 0.368 0.105335 0.3685 0.105024 0.369 0.104715 0.3695 0.104406 0.37 0.104099 0.3705 0.103792 0.371 0.103487 0.3715 0.103183 0.372 0.102880 0.3725 0.102578 0.373 0.102278 0.3735 0.101978 0.374 0.101679 0.3745 0.101382 0.375 0.101086 0.3755 0.100790 0.376 0.100496 0.3765 0.100203 0.377 0.099911 0.3775 0.099620 0.378 0.099330 0.3785 0.099041 0.379 0.098754 0.3795 0.098467 0.38 0.098181 0.3805 0.097897 0.381 0.097613 0.3815 0.097331 0.382 0.097049 0.3825 0.096769 0.383 0.096489 0.3835 0.096211 0.384 0.095933 0.3845 0.095657 0.385 0.095381 0.3855 0.095107 0.386 0.094834 0.3865 0.094561 0.387 0.094290 0.3875 0.094020 0.388 0.093750 0.3885 0.093482 0.389 0.093214 0.3895 0.092948 0.39 0.092682 0.3905 0.092418 0.391 0.092154 0.3915 0.091892 0.392 0.091630 0.3925 0.091369 0.393 0.091110 0.3935 0.090851 0.394 0.090593 0.3945 0.090336 0.395 0.090080 0.3955 0.089825 0.396 0.089571 0.3965 0.089318 0.397 0.089066 0.3975 0.088815 0.398 0.088564 0.3985 0.088315 0.399 0.088066 0.3995 0.087818 0.4 0.087572 0.4005 0.087326 0.401 0.087081 0.4015 0.086837 0.402 0.086593 0.4025 0.086351 0.403 0.086109 0.4035 0.085869 0.404 0.085629 0.4045 0.085390 0.405 0.085152 0.4055 0.084915 0.406 0.084679 0.4065 0.084443 0.407 0.084209 0.4075 0.083975 0.408 0.083742 0.4085 0.083510 0.409 0.083279 0.4095 0.083048 0.41 0.082819 0.4105 0.082590 0.411 0.082362 0.4115 0.082135 0.412 0.081909 0.4125 0.081683 0.413 0.081458 0.4135 0.081235 0.414 0.081011 0.4145 0.080789 0.415 0.080568 0.4155 0.080347 0.416 0.080127 0.4165 0.079908 0.417 0.079689 0.4175 0.079472 0.418 0.079255 0.4185 0.079039 0.419 0.078824 0.4195 0.078609 0.42 0.078395 0.4205 0.078182 0.421 0.077970 0.4215 0.077758 0.422 0.077548 0.4225 0.077337 0.423 0.077128 0.4235 0.076920 0.424 0.076712 0.4245 0.076505 0.425 0.076298 0.4255 0.076092 0.426 0.075887 0.4265 0.075683 0.427 0.075480 0.4275 0.075277 0.428 0.075075 0.4285 0.074873 0.429 0.074672 0.4295 0.074472 0.43 0.074273 0.4305 0.074074 0.431 0.073876 0.4315 0.073679 0.432 0.073482 0.4325 0.073286 0.433 0.073091 0.4335 0.072896 0.434 0.072702 0.4345 0.072509 0.435 0.072316 0.4355 0.072124 0.436 0.071933 0.4365 0.071742 0.437 0.071552 0.4375 0.071363 0.438 0.071174 0.4385 0.070986 0.439 0.070799 0.4395 0.070612 0.44 0.070425 0.4405 0.070240 0.441 0.070055 0.4415 0.069871 0.442 0.069687 0.4425 0.069504 0.443 0.069321 0.4435 0.069139 0.444 0.068958 0.4445 0.068777 0.445 0.068597 0.4455 0.068417 0.446 0.068239 0.4465 0.068060 0.447 0.067882 0.4475 0.067705 0.448 0.067529 0.4485 0.067353 0.449 0.067177 0.4495 0.067002 0.45 0.066828 0.4505 0.066654 0.451 0.066481 0.4515 0.066309 0.452 0.066136 0.4525 0.065965 0.453 0.065794 0.4535 0.065624 0.454 0.065454 0.4545 0.065284 0.455 0.065116 0.4555 0.064948 0.456 0.064780 0.4565 0.064613 0.457 0.064446 0.4575 0.064280 0.458 0.064114 0.4585 0.063949 0.459 0.063785 0.4595 0.063621 0.46 0.063457 0.4605 0.063294 0.461 0.063132 0.4615 0.062970 0.462 0.062809 0.4625 0.062648 0.463 0.062487 0.4635 0.062327 0.464 0.062168 0.4645 0.062009 0.465 0.061850 0.4655 0.061692 0.466 0.061535 0.4665 0.061378 0.467 0.061221 0.4675 0.061065 0.468 0.060910 0.4685 0.060755 0.469 0.060600 0.4695 0.060446 0.47 0.060292 0.4705 0.060139 0.471 0.059986 0.4715 0.059834 0.472 0.059682 0.4725 0.059530 0.473 0.059380 0.4735 0.059229 0.474 0.059079 0.4745 0.058929 0.475 0.058780 0.4755 0.058631 0.476 0.058483 0.4765 0.058335 0.477 0.058188 0.4775 0.058041 0.478 0.057894 0.4785 0.057748 0.479 0.057603 0.4795 0.057457 0.48 0.057312 0.4805 0.057168 0.481 0.057024 0.4815 0.056880 0.482 0.056737 0.4825 0.056594 0.483 0.056452 0.4835 0.056310 0.484 0.056169 0.4845 0.056027 0.485 0.055887 0.4855 0.055746 0.486 0.055606 0.4865 0.055467 0.487 0.055328 0.4875 0.055189 0.488 0.055050 0.4885 0.054912 0.489 0.054775 0.4895 0.054638 0.49 0.054501 0.4905 0.054364 0.491 0.054228 0.4915 0.054092 0.492 0.053957 0.4925 0.053822 0.493 0.053687 0.4935 0.053553 0.494 0.053419 0.4945 0.053286 0.495 0.053153 0.4955 0.053020 0.496 0.052887 0.4965 0.052755 0.497 0.052624 0.4975 0.052492 0.498 0.052361 0.4985 0.052231 0.499 0.052100 0.4995 0.051970 0.5 0.051841 relion-3.1.3/data/mtf_falcon3EC_200kV.star000066400000000000000000000026241411340063500201000ustar00rootroot00000000000000data_mtf_f3ec_200kv loop_ _rlnResolutionInversePixel _rlnMtfValue 0 1 0.0078125 0.9965995836 0.015625 0.9925965222 0.0234375 0.9877767088 0.03125 0.9826158006 0.0390625 0.973230544 0.046875 0.9628059415 0.0546875 0.9514492453 0.0625 0.9393189064 0.0703125 0.9264497262 0.078125 0.9128093952 0.0859375 0.8989773586 0.09375 0.8856018102 0.101562 0.8731967977 0.109375 0.8606716371 0.117188 0.8470417512 0.125 0.8320004733 0.132812 0.8162277582 0.140625 0.8003397901 0.148438 0.7843893832 0.15625 0.7678728253 0.164062 0.7500509419 0.171875 0.731119514 0.179688 0.712057037 0.1875 0.6929270334 0.195312 0.6751743913 0.203125 0.6575674458 0.210938 0.638938447 0.21875 0.6202184216 0.226562 0.6022803573 0.234375 0.5858632383 0.242188 0.5695459872 0.25 0.552001403 0.257812 0.5353377238 0.265625 0.5190810502 0.273438 0.5026498543 0.28125 0.4841325857 0.289062 0.4637873005 0.296875 0.4441714491 0.304688 0.4249340186 0.3125 0.4064081369 0.320312 0.3872083841 0.328125 0.3678557365 0.335938 0.3495579495 0.34375 0.3333669666 0.351562 0.3208098327 0.359375 0.3091536233 0.367188 0.2965952757 0.375 0.2835832674 0.382812 0.2693826996 0.390625 0.2561915974 0.398438 0.2441277906 0.40625 0.231465507 0.414062 0.2179244748 0.421875 0.2043799634 0.429688 0.192607817 0.4375 0.1816710396 0.445312 0.1703538314 0.453125 0.1598712022 0.460938 0.1513502544 0.46875 0.1467784596 0.476562 0.1424061923 0.484375 0.1384942927 0.492188 0.134713809 relion-3.1.3/data/mtf_falcon3EC_300kV.star000066400000000000000000000026161411340063500201020ustar00rootroot00000000000000data_mtf_f3ec_300kv loop_ _rlnResolutionInversePixel _rlnMtfValue 0 1 0.0078125 0.9964851529 0.015625 0.9935477645 0.0234375 0.9908861019 0.03125 0.9880337235 0.0390625 0.9814730437 0.046875 0.973590513 0.0546875 0.9648087389 0.0625 0.9554497043 0.0703125 0.9455722965 0.078125 0.9349830869 0.0859375 0.9234127068 0.09375 0.9107339924 0.101562 0.8970834075 0.109375 0.882816709 0.117188 0.86832079 0.125 0.853789483 0.132812 0.8391130803 0.140625 0.8239667126 0.148438 0.8080357402 0.15625 0.7912221696 0.164062 0.7737090404 0.171875 0.755859011 0.179688 0.7380137468 0.1875 0.720315353 0.195312 0.7026623656 0.203125 0.6848208758 0.210938 0.6665986007 0.21875 0.6479621604 0.226562 0.6290412613 0.234375 0.6100404732 0.242188 0.5911270856 0.25 0.5723743125 0.257812 0.5537992 0.265625 0.5354536931 0.273438 0.5174750916 0.28125 0.5000367775 0.289062 0.483227546 0.296875 0.4669472011 0.304688 0.4508987469 0.3125 0.4346962785 0.320312 0.4180348611 0.328125 0.4008286407 0.335938 0.3832389573 0.34375 0.3655779587 0.351562 0.3481510778 0.359375 0.331139073 0.367188 0.3145842216 0.375 0.298471077 0.382812 0.2828388979 0.390625 0.2678464558 0.398438 0.2537284049 0.40625 0.2406577445 0.414062 0.2286300856 0.421875 0.2174726552 0.429688 0.2069493639 0.4375 0.1968663323 0.445312 0.1871215361 0.453125 0.1776948039 0.460938 0.1685941212 0.46875 0.164686055 0.476562 0.1607952981 0.484375 0.1565754705 0.492188 0.1517875288 relion-3.1.3/data/mtf_falcon4EC_200kV.star000066400000000000000000000025511411340063500201000ustar00rootroot00000000000000data_ loop_ _rlnResolutionInversePixel _rlnMtfValue 0 0.999616224 0.0078125 0.995158661 0.015625 0.98887104 0.0234375 0.983366571 0.03125 0.975232027 0.0390625 0.961867582 0.046875 0.946684649 0.0546875 0.933235127 0.0625 0.919362103 0.0703125 0.902274135 0.078125 0.884878334 0.0859375 0.871142298 0.09375 0.858466071 0.1015625 0.84125776 0.109375 0.820159088 0.1171875 0.801428667 0.125 0.787326269 0.1328125 0.77296602 0.140625 0.754078328 0.1484375 0.732544945 0.15625 0.712165774 0.1640625 0.692827445 0.171875 0.67215755 0.1796875 0.650609876 0.1875 0.631319837 0.1953125 0.615367361 0.203125 0.599714111 0.2109375 0.580337233 0.21875 0.556305406 0.2265625 0.530867734 0.234375 0.508807593 0.2421875 0.491621838 0.25 0.475396925 0.2578125 0.455229698 0.265625 0.431473823 0.2734375 0.409665617 0.28125 0.393265162 0.2890625 0.379034773 0.296875 0.36203301 0.3046875 0.34214037 0.3125 0.322942267 0.3203125 0.306309837 0.328125 0.291348888 0.3359375 0.277252415 0.34375 0.263644751 0.3515625 0.249266196 0.359375 0.233507733 0.3671875 0.218161942 0.375 0.204797022 0.3828125 0.192193849 0.390625 0.1795398 0.3984375 0.169224015 0.40625 0.162112968 0.4140625 0.153788349 0.421875 0.140833814 0.4296875 0.12728583 0.4375 0.118772761 0.4453125 0.112341917 0.453125 0.101470404 0.4609375 0.088408075 0.46875 0.082460402 0.4765625 0.083598335 0.484375 0.08129187 0.4921875 0.071874225 relion-3.1.3/data/mtf_falcon4EC_300kV.star000066400000000000000000000025511411340063500201010ustar00rootroot00000000000000data_ loop_ _rlnResolutionInversePixel _rlnMtfValue 0 0.999517475 0.0078125 0.996579739 0.015625 0.992900244 0.0234375 0.989832553 0.03125 0.983908091 0.0390625 0.973875682 0.046875 0.964275326 0.0546875 0.957904422 0.0625 0.950608532 0.0703125 0.938585295 0.078125 0.9252223 0.0859375 0.914892764 0.09375 0.904637227 0.1015625 0.888937434 0.109375 0.869150145 0.1171875 0.851057431 0.125 0.835400035 0.1328125 0.818407124 0.140625 0.800461011 0.1484375 0.785998414 0.15625 0.774531419 0.1640625 0.759799356 0.171875 0.739380588 0.1796875 0.718132477 0.1875 0.69972289 0.1953125 0.68080891 0.203125 0.657253144 0.2109375 0.63102172 0.21875 0.6071277 0.2265625 0.587275043 0.234375 0.569886863 0.2421875 0.553118179 0.25 0.534835784 0.2578125 0.513181064 0.265625 0.490032199 0.2734375 0.470366628 0.28125 0.455116175 0.2890625 0.439029629 0.296875 0.418877436 0.3046875 0.39868344 0.3125 0.382950624 0.3203125 0.369185037 0.328125 0.352881923 0.3359375 0.335059925 0.34375 0.318572787 0.3515625 0.301170215 0.359375 0.279851324 0.3671875 0.259431463 0.375 0.247846734 0.3828125 0.242640992 0.390625 0.233246077 0.3984375 0.21627062 0.40625 0.198794363 0.4140625 0.186885279 0.421875 0.177479822 0.4296875 0.16614912 0.4375 0.154657026 0.4453125 0.145793776 0.453125 0.137494976 0.4609375 0.127398312 0.46875 0.117608892 0.4765625 0.110049302 0.484375 0.102166512 0.4921875 0.092640118 relion-3.1.3/data/mtf_k2_300kV.star000066400000000000000000000360561411340063500166660ustar00rootroot00000000000000data_mtf_k2_300kv loop_ _rlnResolutionInversePixel _rlnMtfValue 0.0000 1.00000 0.0005 0.988714 0.001 0.985642 0.0015 0.985445 0.002 0.98521 0.0025 0.98491 0.003 0.984545 0.0035 0.984117 0.004 0.983628 0.0045 0.98308 0.005 0.982474 0.0055 0.981813 0.006 0.981099 0.0065 0.980335 0.007 0.979524 0.0075 0.978669 0.008 0.977772 0.0085 0.976838 0.009 0.975868 0.0095 0.974866 0.01 0.973836 0.0105 0.972781 0.011 0.971704 0.0115 0.970609 0.012 0.969498 0.0125 0.968375 0.013 0.967244 0.0135 0.966107 0.014 0.964967 0.0145 0.963827 0.015 0.96269 0.0155 0.961559 0.016 0.960436 0.0165 0.959323 0.017 0.958223 0.0175 0.957138 0.018 0.95607 0.0185 0.95502 0.019 0.953991 0.0195 0.952982 0.02 0.951997 0.0205 0.951035 0.021 0.950098 0.0215 0.949187 0.022 0.948302 0.0225 0.947443 0.023 0.946612 0.0235 0.945808 0.024 0.945031 0.0245 0.944282 0.025 0.94356 0.0255 0.942865 0.026 0.942196 0.0265 0.941554 0.027 0.940937 0.0275 0.940345 0.028 0.939777 0.0285 0.939234 0.029 0.938713 0.0295 0.938214 0.03 0.937737 0.0305 0.93728 0.031 0.936843 0.0315 0.936424 0.032 0.936024 0.0325 0.93564 0.033 0.935272 0.0335 0.934919 0.034 0.934581 0.0345 0.934256 0.035 0.933943 0.0355 0.933642 0.036 0.933352 0.0365 0.933072 0.037 0.932801 0.0375 0.932539 0.038 0.932285 0.0385 0.932038 0.039 0.931797 0.0395 0.931563 0.04 0.931334 0.0405 0.93111 0.041 0.93089 0.0415 0.930673 0.042 0.930461 0.0425 0.930251 0.043 0.930044 0.0435 0.929838 0.044 0.929635 0.0445 0.929433 0.045 0.929233 0.0455 0.929033 0.046 0.928834 0.0465 0.928635 0.047 0.928437 0.0475 0.928238 0.048 0.928039 0.0485 0.92784 0.049 0.927641 0.0495 0.92744 0.05 0.927239 0.0505 0.927037 0.051 0.926835 0.0515 0.926631 0.052 0.926425 0.0525 0.926219 0.053 0.926011 0.0535 0.925802 0.054 0.925592 0.0545 0.92538 0.055 0.925166 0.0555 0.924951 0.056 0.924734 0.0565 0.924516 0.057 0.924296 0.0575 0.924074 0.058 0.923851 0.0585 0.923626 0.059 0.923399 0.0595 0.923171 0.06 0.92294 0.0605 0.922708 0.061 0.922475 0.0615 0.922239 0.062 0.922002 0.0625 0.921763 0.063 0.921522 0.0635 0.921279 0.064 0.921035 0.0645 0.920789 0.065 0.920541 0.0655 0.920291 0.066 0.920039 0.0665 0.919786 0.067 0.919531 0.0675 0.919274 0.068 0.919015 0.0685 0.918754 0.069 0.918492 0.0695 0.918228 0.07 0.917962 0.0705 0.917694 0.071 0.917425 0.0715 0.917154 0.072 0.916881 0.0725 0.916606 0.073 0.916329 0.0735 0.916051 0.074 0.915771 0.0745 0.915489 0.075 0.915206 0.0755 0.91492 0.076 0.914633 0.0765 0.914344 0.077 0.914054 0.0775 0.913762 0.078 0.913468 0.0785 0.913172 0.079 0.912874 0.0795 0.912575 0.08 0.912274 0.0805 0.911971 0.081 0.911667 0.0815 0.911361 0.082 0.911053 0.0825 0.910743 0.083 0.910432 0.0835 0.910119 0.084 0.909804 0.0845 0.909487 0.085 0.909169 0.0855 0.908849 0.086 0.908528 0.0865 0.908205 0.087 0.90788 0.0875 0.907553 0.088 0.907225 0.0885 0.906895 0.089 0.906563 0.0895 0.90623 0.09 0.905895 0.0905 0.905558 0.091 0.90522 0.0915 0.90488 0.092 0.904538 0.0925 0.904195 0.093 0.90385 0.0935 0.903503 0.094 0.903155 0.0945 0.902805 0.095 0.902453 0.0955 0.9021 0.096 0.901745 0.0965 0.901389 0.097 0.901031 0.0975 0.900671 0.098 0.90031 0.0985 0.899947 0.099 0.899582 0.0995 0.899216 0.1 0.898849 0.1005 0.898479 0.101 0.898108 0.1015 0.897736 0.102 0.897361 0.1025 0.896986 0.103 0.896608 0.1035 0.896229 0.104 0.895849 0.1045 0.895467 0.105 0.895083 0.1055 0.894698 0.106 0.894311 0.1065 0.893923 0.107 0.893533 0.1075 0.893141 0.108 0.892748 0.1085 0.892354 0.109 0.891958 0.1095 0.89156 0.11 0.891161 0.1105 0.89076 0.111 0.890358 0.1115 0.889954 0.112 0.889549 0.1125 0.889142 0.113 0.888734 0.1135 0.888324 0.114 0.887912 0.1145 0.887499 0.115 0.887085 0.1155 0.886669 0.116 0.886252 0.1165 0.885833 0.117 0.885413 0.1175 0.884991 0.118 0.884567 0.1185 0.884143 0.119 0.883716 0.1195 0.883289 0.12 0.882859 0.1205 0.882429 0.121 0.881997 0.1215 0.881563 0.122 0.881128 0.1225 0.880691 0.123 0.880253 0.1235 0.879814 0.124 0.879373 0.1245 0.878931 0.125 0.878487 0.1255 0.878042 0.126 0.877596 0.1265 0.877148 0.127 0.876699 0.1275 0.876248 0.128 0.875796 0.1285 0.875342 0.129 0.874887 0.1295 0.874431 0.13 0.873973 0.1305 0.873514 0.131 0.873053 0.1315 0.872592 0.132 0.872128 0.1325 0.871664 0.133 0.871198 0.1335 0.87073 0.134 0.870262 0.1345 0.869792 0.135 0.86932 0.1355 0.868847 0.136 0.868373 0.1365 0.867898 0.137 0.867421 0.1375 0.866943 0.138 0.866463 0.1385 0.865983 0.139 0.8655 0.1395 0.865017 0.14 0.864532 0.1405 0.864046 0.141 0.863559 0.1415 0.86307 0.142 0.86258 0.1425 0.862089 0.143 0.861597 0.1435 0.861103 0.144 0.860608 0.1445 0.860111 0.145 0.859614 0.1455 0.859115 0.146 0.858615 0.1465 0.858113 0.147 0.857611 0.1475 0.857107 0.148 0.856602 0.1485 0.856095 0.149 0.855588 0.1495 0.855079 0.15 0.854569 0.1505 0.854057 0.151 0.853545 0.1515 0.853031 0.152 0.852516 0.1525 0.852 0.153 0.851482 0.1535 0.850963 0.154 0.850444 0.1545 0.849923 0.155 0.8494 0.1555 0.848877 0.156 0.848352 0.1565 0.847827 0.157 0.8473 0.1575 0.846772 0.158 0.846242 0.1585 0.845712 0.159 0.84518 0.1595 0.844647 0.16 0.844114 0.1605 0.843578 0.161 0.843042 0.1615 0.842505 0.162 0.841966 0.1625 0.841427 0.163 0.840886 0.1635 0.840344 0.164 0.839801 0.1645 0.839257 0.165 0.838712 0.1655 0.838166 0.166 0.837618 0.1665 0.83707 0.167 0.83652 0.1675 0.83597 0.168 0.835418 0.1685 0.834865 0.169 0.834311 0.1695 0.833756 0.17 0.8332 0.1705 0.832643 0.171 0.832085 0.1715 0.831525 0.172 0.830965 0.1725 0.830404 0.173 0.829841 0.1735 0.829278 0.174 0.828713 0.1745 0.828148 0.175 0.827581 0.1755 0.827014 0.176 0.826445 0.1765 0.825876 0.177 0.825305 0.1775 0.824733 0.178 0.824161 0.1785 0.823587 0.179 0.823013 0.1795 0.822437 0.18 0.82186 0.1805 0.821283 0.181 0.820704 0.1815 0.820125 0.182 0.819544 0.1825 0.818963 0.183 0.81838 0.1835 0.817797 0.184 0.817213 0.1845 0.816627 0.185 0.816041 0.1855 0.815454 0.186 0.814866 0.1865 0.814277 0.187 0.813687 0.1875 0.813096 0.188 0.812504 0.1885 0.811912 0.189 0.811318 0.1895 0.810723 0.19 0.810128 0.1905 0.809531 0.191 0.808934 0.1915 0.808336 0.192 0.807737 0.1925 0.807137 0.193 0.806536 0.1935 0.805935 0.194 0.805332 0.1945 0.804729 0.195 0.804124 0.1955 0.803519 0.196 0.802913 0.1965 0.802306 0.197 0.801698 0.1975 0.80109 0.198 0.800481 0.1985 0.79987 0.199 0.799259 0.1995 0.798647 0.2 0.798035 0.2005 0.797421 0.201 0.796807 0.2015 0.796191 0.202 0.795575 0.2025 0.794959 0.203 0.794341 0.2035 0.793723 0.204 0.793103 0.2045 0.792483 0.205 0.791863 0.2055 0.791241 0.206 0.790619 0.2065 0.789996 0.207 0.789372 0.2075 0.788747 0.208 0.788122 0.2085 0.787496 0.209 0.786869 0.2095 0.786241 0.21 0.785613 0.2105 0.784984 0.211 0.784354 0.2115 0.783723 0.212 0.783092 0.2125 0.78246 0.213 0.781827 0.2135 0.781193 0.214 0.780559 0.2145 0.779924 0.215 0.779288 0.2155 0.778652 0.216 0.778015 0.2165 0.777377 0.217 0.776739 0.2175 0.7761 0.218 0.77546 0.2185 0.774819 0.219 0.774178 0.2195 0.773536 0.22 0.772894 0.2205 0.772251 0.221 0.771607 0.2215 0.770962 0.222 0.770317 0.2225 0.769671 0.223 0.769025 0.2235 0.768378 0.224 0.76773 0.2245 0.767081 0.225 0.766432 0.2255 0.765783 0.226 0.765133 0.2265 0.764482 0.227 0.76383 0.2275 0.763178 0.228 0.762525 0.2285 0.761872 0.229 0.761218 0.2295 0.760563 0.23 0.759908 0.2305 0.759253 0.231 0.758596 0.2315 0.757939 0.232 0.757282 0.2325 0.756624 0.233 0.755965 0.2335 0.755306 0.234 0.754646 0.2345 0.753986 0.235 0.753325 0.2355 0.752664 0.236 0.752002 0.2365 0.751339 0.237 0.750676 0.2375 0.750012 0.238 0.749348 0.2385 0.748683 0.239 0.748018 0.2395 0.747352 0.24 0.746686 0.2405 0.746019 0.241 0.745352 0.2415 0.744684 0.242 0.744016 0.2425 0.743347 0.243 0.742678 0.2435 0.742008 0.244 0.741337 0.2445 0.740667 0.245 0.739995 0.2455 0.739324 0.246 0.738651 0.2465 0.737978 0.247 0.737305 0.2475 0.736632 0.248 0.735957 0.2485 0.735283 0.249 0.734608 0.2495 0.733932 0.25 0.733256 0.2505 0.73258 0.251 0.731903 0.2515 0.731225 0.252 0.730547 0.2525 0.729869 0.253 0.72919 0.2535 0.728511 0.254 0.727832 0.2545 0.727152 0.255 0.726471 0.2555 0.725791 0.256 0.725109 0.2565 0.724428 0.257 0.723746 0.2575 0.723063 0.258 0.722381 0.2585 0.721697 0.259 0.721014 0.2595 0.72033 0.26 0.719645 0.2605 0.71896 0.261 0.718275 0.2615 0.71759 0.262 0.716904 0.2625 0.716217 0.263 0.715531 0.2635 0.714844 0.264 0.714156 0.2645 0.713469 0.265 0.71278 0.2655 0.712092 0.266 0.711403 0.2665 0.710714 0.267 0.710024 0.2675 0.709334 0.268 0.708644 0.2685 0.707954 0.269 0.707263 0.2695 0.706572 0.27 0.70588 0.2705 0.705188 0.271 0.704496 0.2715 0.703804 0.272 0.703111 0.2725 0.702418 0.273 0.701724 0.2735 0.701031 0.274 0.700337 0.2745 0.699642 0.275 0.698948 0.2755 0.698253 0.276 0.697557 0.2765 0.696862 0.277 0.696166 0.2775 0.69547 0.278 0.694774 0.2785 0.694077 0.279 0.69338 0.2795 0.692683 0.28 0.691986 0.2805 0.691288 0.281 0.69059 0.2815 0.689892 0.282 0.689193 0.2825 0.688495 0.283 0.687796 0.2835 0.687096 0.284 0.686397 0.2845 0.685697 0.285 0.684997 0.2855 0.684297 0.286 0.683597 0.2865 0.682896 0.287 0.682195 0.2875 0.681494 0.288 0.680793 0.2885 0.680091 0.289 0.67939 0.2895 0.678688 0.29 0.677985 0.2905 0.677283 0.291 0.67658 0.2915 0.675878 0.292 0.675175 0.2925 0.674471 0.293 0.673768 0.2935 0.673064 0.294 0.672361 0.2945 0.671657 0.295 0.670953 0.2955 0.670248 0.296 0.669544 0.2965 0.668839 0.297 0.668134 0.2975 0.667429 0.298 0.666724 0.2985 0.666019 0.299 0.665313 0.2995 0.664607 0.3 0.663902 0.3005 0.663196 0.301 0.662489 0.3015 0.661783 0.302 0.661077 0.3025 0.66037 0.303 0.659663 0.3035 0.658956 0.304 0.658249 0.3045 0.657542 0.305 0.656835 0.3055 0.656127 0.306 0.65542 0.3065 0.654712 0.307 0.654004 0.3075 0.653296 0.308 0.652588 0.3085 0.65188 0.309 0.651172 0.3095 0.650463 0.31 0.649755 0.3105 0.649046 0.311 0.648338 0.3115 0.647629 0.312 0.64692 0.3125 0.646211 0.313 0.645502 0.3135 0.644792 0.314 0.644083 0.3145 0.643374 0.315 0.642664 0.3155 0.641955 0.316 0.641245 0.3165 0.640535 0.317 0.639825 0.3175 0.639115 0.318 0.638405 0.3185 0.637695 0.319 0.636985 0.3195 0.636275 0.32 0.635565 0.3205 0.634855 0.321 0.634144 0.3215 0.633434 0.322 0.632723 0.3225 0.632013 0.323 0.631302 0.3235 0.630591 0.324 0.629881 0.3245 0.62917 0.325 0.628459 0.3255 0.627748 0.326 0.627038 0.3265 0.626327 0.327 0.625616 0.3275 0.624905 0.328 0.624194 0.3285 0.623483 0.329 0.622772 0.3295 0.622061 0.33 0.62135 0.3305 0.620638 0.331 0.619927 0.3315 0.619216 0.332 0.618505 0.3325 0.617794 0.333 0.617083 0.3335 0.616371 0.334 0.61566 0.3345 0.614949 0.335 0.614238 0.3355 0.613527 0.336 0.612815 0.3365 0.612104 0.337 0.611393 0.3375 0.610682 0.338 0.60997 0.3385 0.609259 0.339 0.608548 0.3395 0.607837 0.34 0.607126 0.3405 0.606415 0.341 0.605704 0.3415 0.604993 0.342 0.604281 0.3425 0.60357 0.343 0.602859 0.3435 0.602148 0.344 0.601438 0.3445 0.600727 0.345 0.600016 0.3455 0.599305 0.346 0.598594 0.3465 0.597883 0.347 0.597173 0.3475 0.596462 0.348 0.595751 0.3485 0.595041 0.349 0.59433 0.3495 0.59362 0.35 0.592909 0.3505 0.592199 0.351 0.591489 0.3515 0.590778 0.352 0.590068 0.3525 0.589358 0.353 0.588648 0.3535 0.587938 0.354 0.587228 0.3545 0.586518 0.355 0.585808 0.3555 0.585098 0.356 0.584389 0.3565 0.583679 0.357 0.58297 0.3575 0.58226 0.358 0.581551 0.3585 0.580841 0.359 0.580132 0.3595 0.579423 0.36 0.578714 0.3605 0.578005 0.361 0.577296 0.3615 0.576587 0.362 0.575879 0.3625 0.57517 0.363 0.574462 0.3635 0.573753 0.364 0.573045 0.3645 0.572337 0.365 0.571629 0.3655 0.570921 0.366 0.570213 0.3665 0.569505 0.367 0.568797 0.3675 0.56809 0.368 0.567382 0.3685 0.566675 0.369 0.565968 0.3695 0.565261 0.37 0.564554 0.3705 0.563847 0.371 0.56314 0.3715 0.562433 0.372 0.561727 0.3725 0.56102 0.373 0.560314 0.3735 0.559608 0.374 0.558902 0.3745 0.558196 0.375 0.55749 0.3755 0.556785 0.376 0.556079 0.3765 0.555374 0.377 0.554669 0.3775 0.553963 0.378 0.553259 0.3785 0.552554 0.379 0.551849 0.3795 0.551145 0.38 0.55044 0.3805 0.549736 0.381 0.549032 0.3815 0.548328 0.382 0.547624 0.3825 0.54692 0.383 0.546217 0.3835 0.545514 0.384 0.54481 0.3845 0.544107 0.385 0.543405 0.3855 0.542702 0.386 0.541999 0.3865 0.541297 0.387 0.540595 0.3875 0.539893 0.388 0.539191 0.3885 0.538489 0.389 0.537787 0.3895 0.537086 0.39 0.536385 0.3905 0.535684 0.391 0.534983 0.3915 0.534282 0.392 0.533582 0.3925 0.532881 0.393 0.532181 0.3935 0.531481 0.394 0.530781 0.3945 0.530082 0.395 0.529382 0.3955 0.528683 0.396 0.527984 0.3965 0.527285 0.397 0.526586 0.3975 0.525888 0.398 0.52519 0.3985 0.524491 0.399 0.523793 0.3995 0.523096 0.4 0.522398 0.4005 0.521701 0.401 0.521004 0.4015 0.520307 0.402 0.51961 0.4025 0.518913 0.403 0.518217 0.4035 0.517521 0.404 0.516825 0.4045 0.516129 0.405 0.515434 0.4055 0.514738 0.406 0.514043 0.4065 0.513348 0.407 0.512653 0.4075 0.511959 0.408 0.511265 0.4085 0.510571 0.409 0.509877 0.4095 0.509183 0.41 0.50849 0.4105 0.507796 0.411 0.507103 0.4115 0.506411 0.412 0.505718 0.4125 0.505026 0.413 0.504334 0.4135 0.503642 0.414 0.50295 0.4145 0.502259 0.415 0.501567 0.4155 0.500876 0.416 0.500186 0.4165 0.499495 0.417 0.498805 0.4175 0.498115 0.418 0.497425 0.4185 0.496735 0.419 0.496046 0.4195 0.495357 0.42 0.494668 0.4205 0.493979 0.421 0.493291 0.4215 0.492603 0.422 0.491915 0.4225 0.491227 0.423 0.49054 0.4235 0.489853 0.424 0.489166 0.4245 0.488479 0.425 0.487793 0.4255 0.487106 0.426 0.48642 0.4265 0.485735 0.427 0.485049 0.4275 0.484364 0.428 0.483679 0.4285 0.482994 0.429 0.48231 0.4295 0.481626 0.43 0.480942 0.4305 0.480258 0.431 0.479575 0.4315 0.478892 0.432 0.478209 0.4325 0.477526 0.433 0.476844 0.4335 0.476162 0.434 0.47548 0.4345 0.474798 0.435 0.474117 0.4355 0.473436 0.436 0.472755 0.4365 0.472074 0.437 0.471394 0.4375 0.470714 0.438 0.470035 0.4385 0.469355 0.439 0.468676 0.4395 0.467997 0.44 0.467318 0.4405 0.46664 0.441 0.465962 0.4415 0.465284 0.442 0.464607 0.4425 0.46393 0.443 0.463253 0.4435 0.462576 0.444 0.4619 0.4445 0.461223 0.445 0.460548 0.4455 0.459872 0.446 0.459197 0.4465 0.458522 0.447 0.457847 0.4475 0.457173 0.448 0.456499 0.4485 0.455825 0.449 0.455151 0.4495 0.454478 0.45 0.453805 0.4505 0.453133 0.451 0.45246 0.4515 0.451788 0.452 0.451116 0.4525 0.450445 0.453 0.449774 0.4535 0.449103 0.454 0.448432 0.4545 0.447762 0.455 0.447092 0.4555 0.446422 0.456 0.445753 0.4565 0.445084 0.457 0.444415 0.4575 0.443747 0.458 0.443078 0.4585 0.44241 0.459 0.441743 0.4595 0.441076 0.46 0.440409 0.4605 0.439742 0.461 0.439076 0.4615 0.43841 0.462 0.437744 0.4625 0.437079 0.463 0.436414 0.4635 0.435749 0.464 0.435084 0.4645 0.43442 0.465 0.433756 0.4655 0.433093 0.466 0.43243 0.4665 0.431767 0.467 0.431104 0.4675 0.430442 0.468 0.42978 0.4685 0.429118 0.469 0.428457 0.4695 0.427796 0.47 0.427136 0.4705 0.426475 0.471 0.425815 0.4715 0.425156 0.472 0.424496 0.4725 0.423837 0.473 0.423179 0.4735 0.42252 0.474 0.421862 0.4745 0.421204 0.475 0.420547 0.4755 0.41989 0.476 0.419233 0.4765 0.418577 0.477 0.417921 0.4775 0.417265 0.478 0.41661 0.4785 0.415955 0.479 0.4153 0.4795 0.414646 0.48 0.413992 0.4805 0.413338 0.481 0.412685 0.4815 0.412032 0.482 0.411379 0.4825 0.410727 0.483 0.410075 0.4835 0.409423 0.484 0.408772 0.4845 0.408121 0.485 0.40747 0.4855 0.40682 0.486 0.40617 0.4865 0.40552 0.487 0.404871 0.4875 0.404222 0.488 0.403574 0.4885 0.402925 0.489 0.402278 0.4895 0.40163 0.49 0.400983 0.4905 0.400336 0.491 0.39969 0.4915 0.399044 0.492 0.398398 0.4925 0.397752 0.493 0.397107 0.4935 0.396463 0.494 0.395818 0.4945 0.395175 0.495 0.394531 0.4955 0.393888 0.496 0.393245 0.4965 0.392602 0.497 0.39196 0.4975 0.391318 0.498 0.390677 0.4985 0.390036 0.499 0.389395 0.4995 0.388755 0.5 0.388115 relion-3.1.3/relion.h000066400000000000000000000030421411340063500144430ustar00rootroot00000000000000#ifndef RELION_H_ #define RELION_H_ //This is a main header - it includes everything else. #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #endif // RELION_H_ relion-3.1.3/scripts/000077500000000000000000000000001411340063500144725ustar00rootroot00000000000000relion-3.1.3/scripts/bfactor_plot.py000077500000000000000000000516511411340063500175350ustar00rootroot00000000000000#!/usr/bin/env python """ bfactor_plot --------- Pipeline setup script for automated processing with RELION 3. Authors: Sjors H.W. Scheres, Takanori Nakane & Colin Palmer Call this from the intended location of the RELION project directory, and provide the name of a file containing options if needed. See the relion_it_options.py file for an example. Usage: /path/to/relion_it.py [options_file...] """ from __future__ import print_function import collections import os import runpy import sys import time import glob from math import log, sqrt # Constants PIPELINE_STAR = 'default_pipeline.star' RUNNING_FILE = 'RUNNING' # prefix is appended in main() SETUP_CHECK_FILE = 'SUBMITTED_JOBS' # prefix is appended in main() class RelionItOptions(object): """ Options for the relion_it pipeline setup script. When initialised, this contains default values for all options. Call ``update_from()`` to override the defaults with a dictionary of new values. """ ############################################################################# # Change the parameters below to reflect your experiment # ############################################################################# # job prefix prefix = 'BFACTOR_PLOT_' # If program crahses saying "'utf-8' codec can't decode byte 0xXX in position YY", # most likely run.job file in the job directory contains garbage bytes. # Refine3D job with all particles # This must be a job from RELION 3.1, not 3.0. input_refine3d_job = 'Refine3D/job040/' # PostProcess job for resolution assessment input_postprocess_job = 'PostProcess/job083/' # Minimum number of particles minimum_nr_particles = 100 # Maximum number of particles maximum_nr_particles = 9999999 #### relion_refine paremeters # Initial low-pass filter for the refinements refine_ini_lowpass = 40 # Read all particles in one batch into memory? refine_preread_images = False # Or copy particles to scratch disk? refine_scratch_disk = '' # Number of pooled particles? refine_nr_pool = 10 # Use GPU-acceleration? refine_do_gpu = True # Which GPU to use (different from GPU used for pre-processing?) refine_gpu = '' # How many MPI processes to use refine_mpi = 5 # How many threads to use refine_threads = 6 # Skip padding? refine_skip_padding = False # Submit jobs to the cluster? refine_submit_to_queue = False ### Cluster submission settings # Name of the queue to which to submit the job queue_name = 'openmpi' # Name of the command used to submit scripts to the queue queue_submit_command = 'qsub -l gpu=4' # The template for your standard queue job submission script queue_submission_template = '/public/EM/RELION/relion/bin/qsub.csh' # Minimum number of dedicated cores that need to be requested on each node queue_minimum_dedicated = 32 ####################################################################### ############ typically no need to change anything below this line ####################################################################### def update_from(self, other): """ Update this RelionItOptions object from a dictionary. Special values (with names like '__xxx__') are removed, allowing this method to be given a dictionary containing the namespace from a script run with ``runpy``. """ while len(other) > 0: key, value = other.popitem() if not (key.startswith('__') and key.endswith('__')): # exclude __name__, __builtins__ etc. if hasattr(self, key): setattr(self, key, value) else: print('Unrecognised option {}'.format(key)) def load_star(filename): from collections import OrderedDict datasets = OrderedDict() current_data = None current_colnames = None in_loop = 0 # 0: outside 1: reading colnames 2: reading data for line in open(filename): line = line.strip() # remove comments comment_pos = line.find('#') if comment_pos > 0: line = line[:comment_pos] if line == "": if in_loop == 2: in_loop = 0 continue if line.startswith("data_"): in_loop = 0 data_name = line[5:] current_data = OrderedDict() datasets[data_name] = current_data elif line.startswith("loop_"): current_colnames = [] in_loop = 1 elif line.startswith("_"): if in_loop == 2: in_loop = 0 elems = line[1:].split() if in_loop == 1: current_colnames.append(elems[0]) current_data[elems[0]] = [] else: current_data[elems[0]] = elems[1] elif in_loop > 0: in_loop = 2 elems = line.split() assert len(elems) == len(current_colnames) for idx, e in enumerate(elems): current_data[current_colnames[idx]].append(e) return datasets def getJobName(name_in_script, done_file): jobname = None # See if we've done this job before, i.e. whether it is in the done_file if (os.path.isfile(done_file)): f = open(done_file,'r') for line in f: elems = line.split() if len(elems) < 3: continue if elems[0] == name_in_script: jobname = elems[2] break f.close() return jobname def addJob(jobtype, name_in_script, done_file, options, template=None, alias=None): jobname = getJobName(name_in_script, done_file) # If we hadn't done it before, add it now if (jobname is not None): already_had_it = True else: already_had_it = False optionstring = '' for opt in options[:]: optionstring += opt + ';' command = 'relion_pipeliner' if template is None: command += ' --addJob ' + jobtype else: command += ' --addJobFromStar ' + template command += ' --addJobOptions "' + optionstring + '"' if alias is not None: command += ' --setJobAlias "' + alias + '"' #print("Debug: addJob executes " + command) os.system(command) pipeline = load_star(PIPELINE_STAR) jobname = pipeline['pipeline_processes']['rlnPipeLineProcessName'][-1] # Now add the jobname to the done_file f = open(done_file,'a') f.write(name_in_script + ' = ' + jobname + '\n') f.close() # return the name of the job in the RELION pipeline, e.g. 'Import/job001/' return jobname, already_had_it def RunJobs(jobs, repeat, wait, schedulename): runjobsstring = '' for job in jobs[:]: runjobsstring += job + ' ' command = 'relion_pipeliner --schedule ' + schedulename + ' --repeat ' + str(repeat) + ' --min_wait ' + str(wait) + ' --RunJobs "' + runjobsstring + '" &' #print("Debug: RunJobs executes " + command) os.system(command) def CheckForExit(): if not os.path.isfile(RUNNING_FILE): print(" RELION_IT:", RUNNING_FILE, "file no longer exists, exiting now ...") exit(0) def WaitForJob(wait_for_this_job, seconds_wait): time.sleep(seconds_wait) print(" RELION_IT: waiting for job to finish in", wait_for_this_job) while True: pipeline = load_star(PIPELINE_STAR) myjobnr = -1 for jobnr in range(0,len(pipeline['pipeline_processes']['rlnPipeLineProcessName'])): jobname = pipeline['pipeline_processes']['rlnPipeLineProcessName'][jobnr] if jobname == wait_for_this_job: myjobnr = jobnr if myjobnr < 0: print(" ERROR: cannot find ", wait_for_this_job, " in ", PIPELINE_STAR) exit(1) status = int(pipeline['pipeline_processes']['rlnPipeLineProcessStatus'][myjobnr]) if status == 2: print(" RELION_IT: job in", wait_for_this_job, "has finished now") return else: CheckForExit() time.sleep(seconds_wait) def find_split_job_output(prefix, n, max_digits=6): import os.path for i in range(max_digits): filename = prefix + str(n).rjust(i, '0') + '.star' if os.path.isfile(filename): return filename return None def line_fit(xs, ys): n = len(xs) assert n == len(ys) mean_x = 0.0 mean_y = 0.0 for x, y in zip(xs, ys): mean_x += x mean_y += y mean_x /= n mean_y /= n var_x = 0.0 cov_xy = 0.0 for x, y in zip(xs, ys): var_x += (x - mean_x) ** 2 cov_xy += (x - mean_x) * (y - mean_y) slope = cov_xy / var_x intercept = mean_y - slope * mean_x return slope, intercept def get_postprocess_result(post_star): result = load_star(post_star)['general'] resolution = float(result['rlnFinalResolution']) pp_bfactor = float(result['rlnBfactorUsedForSharpening']) return resolution, pp_bfactor def run_pipeline(opts): """ Configure and run the RELION 3 pipeline with the given options. Args: opts: options for the pipeline, as a RelionItOptions object. """ # Write RUNNING_RELION_IT file, when deleted, this script will stop with open(RUNNING_FILE, 'w'): pass ### Prepare the list of queue arguments for later use queue_options = ['Submit to queue? == Yes', 'Queue name: == {}'.format(opts.queue_name), 'Queue submit command: == {}'.format(opts.queue_submit_command), 'Standard submission script: == {}'.format(opts.queue_submission_template), 'Minimum dedicated cores per node: == {}'.format(opts.queue_minimum_dedicated)] # Get the original STAR file refine3d_run_file = opts.input_refine3d_job+'job.star' all_particles_star_file = None if os.path.exists(refine3d_run_file): for line in open(refine3d_run_file,'r'): if 'fn_img' in line: all_particles_star_file = line.split()[1].replace('\n','') break else: refine3d_run_file = opts.input_refine3d_job+'run.job' # old style for line in open(refine3d_run_file,'r'): if 'Input images STAR file' in line: all_particles_star_file = line.split(' == ')[1].replace('\n','') break if all_particles_star_file is None: print(' ERROR: cannot find input STAR file in', refine3d_run_file) exit(1) all_particles = load_star(all_particles_star_file) all_nr_particles = len(all_particles['particles']['rlnImageName']) all_particles_resolution, all_particles_bfactor = get_postprocess_result(opts.input_postprocess_job + 'postprocess.star') nr_particles = [] resolutions = [] pp_bfactors = [] current_nr_particles = opts.minimum_nr_particles while current_nr_particles <= opts.maximum_nr_particles and current_nr_particles < all_nr_particles: schedule_name = 'batch_' + str(current_nr_particles) # A. Split the STAR file split_options = ['OR select from particles.star: == {}'.format(all_particles_star_file), 'OR: split into subsets? == Yes', 'Subset size: == {}'.format(current_nr_particles), 'Randomise order before making subsets?: == Yes', 'OR: number of subsets: == 1'] split_job_name = 'split_job_' + str(current_nr_particles) split_alias = opts.prefix + 'split_' + str(current_nr_particles) split_job, already_had_it = addJob('Select', split_job_name, SETUP_CHECK_FILE, split_options, None, split_alias) if not already_had_it: RunJobs([split_job], 1, 0, schedule_name) WaitForJob(split_job, 30) # B. Run Refine3D split_filename = find_split_job_output('{}particles_split'.format(split_job), 1) assert split_filename is not None refine_options = ['Input images STAR file: == {}'.format(split_filename), 'Number of pooled particles: == {}'.format(opts.refine_nr_pool), 'Which GPUs to use: == {}'.format(opts.refine_gpu), 'Number of MPI procs: == {}'.format(opts.refine_mpi), 'Initial low-pass filter (A): == {}'.format(opts.refine_ini_lowpass), 'Number of threads: == {}'.format(opts.refine_threads)] if opts.refine_skip_padding: refine_options.append('Skip padding? == Yes') else: refine_options.append('Skip padding? == No') if opts.refine_do_gpu: refine_options.append('Use GPU acceleration? == Yes') else: refine_options.append('Use GPU acceleration? == No') if opts.refine_preread_images: refine_options.append('Pre-read all particles into RAM? == Yes') refine_options.append('Copy particles to scratch directory: == ') else: refine_options.append('Pre-read all particles into RAM? == No') refine_options.append('Copy particles to scratch directory: == {}'.format(opts.refine_scratch_disk)) if opts.refine_submit_to_queue: refine_options.extend(queue_options) else: refine_options.append('Submit to queue? == No') refine_job_name = 'refine_job_' + str(current_nr_particles) refine_alias = opts.prefix + str(current_nr_particles) refine_job, already_had_it = addJob('Refine3D', refine_job_name, SETUP_CHECK_FILE, refine_options, refine3d_run_file, refine_alias) if not already_had_it: RunJobs([refine_job], 1, 0, schedule_name) WaitForJob(refine_job, 30) halfmap_filename = None try: job_star = load_star(refine_job + "job_pipeline.star") for output_file in job_star["pipeline_output_edges"]['rlnPipeLineEdgeToNode']: if output_file.endswith("half1_class001_unfil.mrc"): halfmap_filename = output_file break assert halfmap_filename != None except: print(" RELION_IT: Refinement job " + refine_job + " does not contain expected output maps.") print(" RELION_IT: This job should have finished, but you may continue it from the GUI.") print(" RELION_IT: For now, making the plot without this job.") if halfmap_filename is not None: # C. Run PostProcess postprocess_run_file = opts.input_postprocess_job+'job.star' if not os.path.exists(postprocess_run_file): postprocess_run_file = opts.input_postprocess_job+'run.job' post_options = ['One of the 2 unfiltered half-maps: == {}'.format(halfmap_filename)] post_job_name = 'post_job_' + str(current_nr_particles) post_alias = opts.prefix + str(current_nr_particles) post_job, already_had_it = addJob('PostProcess', post_job_name, SETUP_CHECK_FILE, post_options, postprocess_run_file, post_alias) if not already_had_it: RunJobs([post_job], 1, 0, schedule_name) WaitForJob(post_job, 30) # Get resolution from post_star = post_job + 'postprocess.star' try: resolution, pp_bfactor = get_postprocess_result(post_star) nr_particles.append(current_nr_particles) resolutions.append(resolution) pp_bfactors.append(pp_bfactor) except: print(' RELION_IT: WARNING: Failed to get post-processed resolution for {} particles'.format(current_nr_particles)) # Update the current number of particles current_nr_particles = 2 * current_nr_particles # Also include the result from the original PostProcessing job if all_nr_particles <= opts.maximum_nr_particles: nr_particles.append(all_nr_particles) resolutions.append(all_particles_resolution) pp_bfactors.append(all_particles_bfactor) # Now already make preliminary plots here, e.g print() print('NrParticles Ln(NrParticles) Resolution(A) 1/Resolution^2 PostProcessBfactor') xs = [] ys = [] for n_particles, resolution, pp_bfactor in zip(nr_particles, resolutions, pp_bfactors): log_n_particles = log(n_particles) inv_d2 = 1.0 / (resolution * resolution) print('{0:11d} {1:15.3f} {2:13.2f} {3:14.4f} {4:18.2f}'.format(n_particles,log_n_particles, resolution, inv_d2, -pp_bfactor)) xs.append(log_n_particles) ys.append(inv_d2) slope, intercept = line_fit(xs, ys) b_factor = 2.0 / slope print() print(" RELION_IT: ESTIMATED B-FACTOR from {0:d} points is {1:.2f}".format(len(xs), b_factor)) print(" RELION_IT: The fitted line is: Resolution = 1 / Sqrt(2 / {0:.3f} * Log_e(#Particles) + {1:.3f})".format(b_factor, intercept)) print(" RELION_IT: IF this trend holds, you will get:") for x in (1.5, 2, 4, 8): current_nr_particles = int(all_nr_particles * x) resolution = 1 / sqrt(slope * log(current_nr_particles) + intercept) print(" RELION_IT: {0:.2f} A from {1:d} particles ({2:d} % of the current number of particles)".format(resolution, current_nr_particles, int(x * 100))) if True:#try: # Try plotting import matplotlib as mpl mpl.use('pdf') import matplotlib.pyplot as plt import numpy as np fitted = [] for x in xs: fitted.append(x * slope + intercept) fig = plt.figure() ax1 = fig.add_subplot(111) ax1.plot(xs, ys, '.') ax1.plot(xs, fitted) ax1.set_xlabel("ln(#particles)") ax1.set_ylabel("1/Resolution$^2$ in 1/$\AA^2$") ax1.set_title("Rosenthal & Henderson plot: B = 2.0 / slope = {:.1f}".format(b_factor)); ax2 = ax1.twiny() ax2.xaxis.set_ticks_position("bottom") ax2.xaxis.set_label_position("bottom") ax2.set_xlim(ax1.get_xlim()) ax2.spines["bottom"].set_position(("axes", -0.15)) # In matplotlib 1.2, the order seems to matter ax2.set_xlabel("#particles") ax2.set_xticklabels(np.exp(ax1.get_xticks()).astype(np.int)) ax3 = ax1.twinx() ax3.set_ylabel("Resolution in $\AA$") ax3.set_ylim(ax1.get_ylim()) ax3.yaxis.set_ticks_position("right") ax3.yaxis.set_label_position("right") yticks = ax1.get_yticks() yticks[yticks <= 0] = 1.0 / (999 * 999) # to avoid zero division and negative sqrt ndigits = 1 if np.max(yticks) > 0.25: ndigits = 2 ax3.set_yticklabels(np.sqrt(1 / yticks).round(ndigits)) output_name = opts.prefix + "rosenthal-henderson-plot.pdf" plt.savefig(output_name, bbox_inches='tight') print(" RELION_IT: Plot written to " + output_name) else:#except: print('WARNING: Failed to plot. Probably matplotlib and/or numpy is missing.') if os.path.isfile(RUNNING_FILE): os.remove(RUNNING_FILE) print(' RELION_IT: exiting now... ') def main(): """ Run the RELION 3 pipeline. Options files given as command line arguments will be opened in order and used to update the default options. """ global RUNNING_FILE global SETUP_CHECK_FILE opts = RelionItOptions() for user_opt_file in sys.argv[1:]: print(' RELION_IT: reading options from {}'.format(user_opt_file)) user_opts = runpy.run_path(user_opt_file) opts.update_from(user_opts) SETUP_CHECK_FILE = opts.prefix + SETUP_CHECK_FILE RUNNING_FILE = opts.prefix + RUNNING_FILE # Make sure no other version of this script are running... if os.path.isfile(RUNNING_FILE): print(" RELION_IT: ERROR:", RUNNING_FILE, "is already present: delete this file and make sure no other copy of this script is running. Exiting now ...") exit(0) print(' RELION_IT: -------------------------------------------------------------------------------------------------------------------') print(' RELION_IT: Script for automated Bfactor-plot generation in RELION (>= 3.1)') print(' RELION_IT: Authors: Sjors H.W. Scheres & Takanori Nakane') print(' RELION_IT: ') print(' RELION_IT: Usage: ./bfactor_plot.py [extra_options.py ...]') print(' RELION_IT: ') print(' RELION_IT: This script keeps track of already submitted jobs in a filed called', SETUP_CHECK_FILE) print(' RELION_IT: upon a restart, jobs present in this file will be ignored.') print(' RELION_IT: If you would like to re-do a specific job from scratch (e.g. because you changed its parameters)') print(' RELION_IT: remove that job, and those that depend on it, from the', SETUP_CHECK_FILE) print(' RELION_IT: -------------------------------------------------------------------------------------------------------------------') print(' RELION_IT: ') run_pipeline(opts) if __name__ == "__main__": main() relion-3.1.3/scripts/eer_trajectory_handler.py000066400000000000000000000212671411340063500215720ustar00rootroot00000000000000#!/bin/env python3 import argparse from collections import OrderedDict from math import floor import numpy as np import os import sys def load_star(filename): datasets = OrderedDict() current_data = None current_colnames = None in_loop = 0 # 0: outside 1: reading colnames 2: reading data for line in open(filename): line = line.strip() # remove comments comment_pos = line.find('#') if comment_pos > 0: line = line[:comment_pos] if line == "": if in_loop == 2: in_loop = 0 continue if line.startswith("data_"): in_loop = 0 data_name = line[5:] current_data = OrderedDict() datasets[data_name] = current_data elif line.startswith("loop_"): current_colnames = [] in_loop = 1 elif line.startswith("_"): if in_loop == 2: in_loop = 0 elems = line[1:].split() if in_loop == 1: current_colnames.append(elems[0]) current_data[elems[0]] = [] else: current_data[elems[0]] = elems[1] elif in_loop > 0: in_loop = 2 elems = line.split() assert len(elems) == len(current_colnames) for idx, e in enumerate(elems): current_data[current_colnames[idx]].append(e) return datasets def write_star(filename, datasets): f = open(filename, "w") for data_name, data in datasets.items(): f.write( "\ndata_" + data_name + "\n\n") col_names = list(data.keys()) need_loop = isinstance(data[col_names[0]], list) if need_loop: f.write("loop_\n") for idx, col_name in enumerate(col_names): f.write("_%s #%d\n" % (col_name, idx + 1)) nrow = len(data[col_names[0]]) for row in range(nrow): f.write("\t".join([data[x][row] for x in col_names])) f.write("\n") else: for col_name, value in data.items(): f.write("_%s\t%s\n" % (col_name, value)) f.write("\n") f.close() def interpolate_trajectory(traj_star, eer_grouping, old_grouping): nz = int(traj_star['general']['rlnImageSizeZ']) if (old_grouping <= 0): if 'rlnEERGrouping' not in traj_star['general']: sys.stderr.write("ERROR: The trajectory STAR file does not contain rlnEERGrouping. You have to specify the old grouping as --old_group.\n") sys.exit(-1) old_grouping = float(traj_star['general']['rlnEERGrouping']) new_nz = int(floor(nz * old_grouping / eer_grouping)) scale = eer_grouping / old_grouping traj_star['general']['rlnImageSizeZ'] = str(new_nz) traj_star['general']['rlnMicrographDoseRate'] = str(float(traj_star['general']['rlnMicrographDoseRate']) * scale) traj_star['general']['rlnEERGrouping'] = eer_grouping xs = np.array(traj_star['global_shift']['rlnMicrographShiftX'], dtype=np.float) ys = np.array(traj_star['global_shift']['rlnMicrographShiftY'], dtype=np.float) new_xs = np.zeros(new_nz) new_ys = np.zeros(new_nz) # This interpolation is not very accurate. We should take # the MIDDLE, not the start of a range, as an observation point. # However, such small error should be corrected in Polish anyway. for i in range(new_nz): src = i * scale src1 = int(floor(src)) src2 = src1 + 1 frac = src - src1 #print(i, src, src1, src2) if src2 >= nz: # be lazy; don't extrapolate new_xs[i] = xs[nz - 1] new_ys[i] = ys[nz - 1] else: new_xs[i] = xs[src1] * (1 - frac) + xs[src2] * frac new_ys[i] = ys[src1] * (1 - frac) + ys[src2] * frac traj_star['global_shift']['rlnMicrographFrameNumber'] = list(np.linspace(1, new_nz, num=new_nz).astype(np.int).astype(np.str0)) traj_star['global_shift']['rlnMicrographShiftX'] = list(new_xs.astype(np.str0)) traj_star['global_shift']['rlnMicrographShiftY'] = list(new_ys.astype(np.str0)) # z is not normalized, so have to be patched. if "local_motion_model" in traj_star: coeffs = np.array(traj_star['local_motion_model']['rlnMotionModelCoeff'], dtype=np.float) coeffs *= scale # 1st-order in time(z) coeffs[1::3] *= scale # 2nd-order coeffs[2::3] *= scale # 3rd-order traj_star['local_motion_model']['rlnMotionModelCoeff'] = list(coeffs.astype(np.str0)) return traj_star def resample_image(traj_star, eer_upsampling): orig_size = int(traj_star['general']['rlnImageSizeX']) assert orig_size == int(traj_star['general']['rlnImageSizeY']) if (orig_size == 4096 and eer_upsampling == 2): scale = 2.0 elif (orig_size == 8192 and eer_upsampling == 1): scale = 0.5 else: raise "Illegal eer_upsampling" traj_star['general']['rlnImageSizeX'] = str(int(orig_size * scale)) traj_star['general']['rlnImageSizeY'] = str(int(orig_size * scale)) traj_star['general']['rlnMicrographBinning'] = str(eer_upsampling) traj_star['general']['rlnEERUpsampling'] = str(eer_upsampling) traj_star['general']['rlnMicrographOriginalPixelSize'] = str(float(traj_star['general']['rlnMicrographOriginalPixelSize']) / scale) xs = np.array(traj_star['global_shift']['rlnMicrographShiftX'], dtype=np.float) * scale ys = np.array(traj_star['global_shift']['rlnMicrographShiftY'], dtype=np.float) * scale traj_star['global_shift']['rlnMicrographShiftX'] = list(xs.astype(np.str0)) traj_star['global_shift']['rlnMicrographShiftY'] = list(ys.astype(np.str0)) # Hot pixels if 'hot_pixels' in traj_star: hot_xs = np.array(traj_star['hot_pixels']['rlnCoordinateX'], dtype=np.float) hot_ys = np.array(traj_star['hot_pixels']['rlnCoordinateY'], dtype=np.float) if scale == 2: hot_xs = np.hstack([2 * hot_xs, 2 * hot_xs, 2 * hot_xs + 1, 2 * hot_xs + 1]) hot_ys = np.hstack([2 * hot_ys, 2 * hot_ys + 1, 2 * hot_ys, 2 * hot_ys + 1]) elif scale == 0.5: tmp = np.floor(np.vstack([hot_xs, hot_ys]) / 2.0).astype(np.int) tmp = np.unique(tmp, axis = 1) hot_xs = tmp[0, :] hot_ys = tmp[1, :] traj_star['hot_pixels']['rlnCoordinateX'] = list(hot_xs.astype(np.str0)) traj_star['hot_pixels']['rlnCoordinateY'] = list(hot_ys.astype(np.str0)) return traj_star def add_suffix(filename, suffix): tmp = os.path.splitext(filename) return "%s_%s%s" % (tmp[0], suffix, tmp[1]) parser = argparse.ArgumentParser(description='Tweak motion trajectory STAR files for EER movies') parser.add_argument('--i', type=str, nargs='?', metavar='corrected_micrographs.star', required=True, help='Motion correction STAR file') parser.add_argument('--o', type=str, nargs='?', metavar='suffix', required=True, help='Suffix for output files') parser.add_argument('--old_group', type=int, nargs='?', metavar='group', default=0, help='Old EER grouping (must be specified when not recorded in the STAR file)') parser.add_argument('--regroup', type=int, nargs='?', metavar='group', default=0, help='Regroup to this number of physical frames / fraction') parser.add_argument('--resample', type=int, nargs='?', metavar='sampling', default=0, help='Resample to this level. 1=4K, 2=8K (super-res)') args = parser.parse_args() #print(args) fn_motioncorr_star = args.i suffix = args.o if (args.resample == 0 and args.regroup == 0): sys.stderr.write("Error: Nothing to do. Please specify --resample and/or --regroup.\n") sys.exit(-1) motioncorr_star = load_star(fn_motioncorr_star) print("Read %s" % fn_motioncorr_star) print("Found %d movies" % len(motioncorr_star['micrographs']['rlnMicrographMetadata'])) for idx, fn_traj in enumerate(motioncorr_star['micrographs']['rlnMicrographMetadata']): fn_out = add_suffix(fn_traj, suffix) motioncorr_star['micrographs']['rlnMicrographMetadata'][idx] = fn_out print("Processing %s => %s" % (fn_traj, fn_out)) traj_star = load_star(fn_traj) if (args.regroup > 0): interpolate_trajectory(traj_star, args.regroup, args.old_group) if (args.resample > 0): resample_image(traj_star, args.resample) # local_shift table is not updated, because it is not used by Polish. # To avoid confusion, delete it. if 'local_shift' in traj_star: del traj_star['local_shift'] write_star(fn_out, traj_star) #break fn_out = add_suffix(fn_motioncorr_star, suffix) write_star(fn_out, motioncorr_star) print("Written %s" % fn_out) relion-3.1.3/scripts/qsub.csh000077500000000000000000000003441411340063500161470ustar00rootroot00000000000000#!/bin/tcsh #$ -pe XXXqueueXXX XXXnodesXXX #$ -l dedicated=XXXdedicatedXXX #$ -e XXXerrfileXXX #$ -o XXXoutfileXXX #$ -A Relion #$ -cwd #$ -S /bin/tcsh mpiexec -mca orte_forward_job_control 1 -n XXXmpinodesXXX XXXcommandXXX relion-3.1.3/scripts/relion_it.py000066400000000000000000003330331411340063500170350ustar00rootroot00000000000000#!/usr/bin/env python2.7 """ relion_it.py ============ Script for automated, on-the-fly single-particle analysis in RELION 3 Authors: Sjors H.W. Scheres, Takanori Nakane & Colin M. Palmer Usage: relion_it.py [extra_options.py [extra_options2.py ....] ] [--gui] [--continue] To get started, go to the intended location of your RELION project directory and make sure your micrographs are accessible from within it (e.g. in a subdirectory called `Movies/' - use a symlink if necessary). Then run this script, providing the names of files containing options if needed. (To call the script, you'll need to enter the full path to it, put the directory containing it on your PATH environment variable, or put a copy of the script in the current directory.) Run with the `--gui' option to launch a simple GUI which will set up a run from a few basic options. (The GUI can also be used to save a complete options file that you can then edit as required.) Once the script is running, open a normal RELION GUI to see what's happening and visualise the results. See below for full instructions including how to handle errors. If you have any problems, please edit the script as needed, call on your local Python expert or email the CCP-EM mailing list (https://www.jiscmail.ac.uk/ccpem). Overview -------- relion_it.py creates a number of RELION jobs and then runs one or more `relion_pipeliner' processes to schedule them (exactly like using the "Schedule" button in the RELION GUI). Instructions and information are printed to the terminal by relion_it.py as it runs. relion_it.py uses a large number of options to control how the jobs are run. It's designed to be very flexible and so these options can be changed in a number of ways: - The easiest way is to use the simple GUI (enabled by passing the `--gui' argument), which allows you to set a few simple options. These are then used to calculate appropriate values for the complete set of options. (See "Using the GUI" below for more information on this.) - For more control, options can be put into one or more Python files (with a simple "option_name = value" format or with more complicated calculations - see "Options files" below for more information). The names of these options files can passed as command line arguments to relion_it.py. - For maximum control, you can make your own copy of this script and change the option values and the code itself however you want. Before running relion_it.py, you need to make sure you're in your intended RELION project directory, and that your movie files are accessible by relative paths within that directory (as usual for a RELION project). You could do this by moving the files from the microscope straight into the project directory, using a symlink from your project directory to the real location of the data, or running a script to create a new symlink to each micrograph as it is collected. Options files ------------- relion_it.py uses a large number of options for controlling both the flow of the script and the parameters for individual jobs. These options can be read from Python script files when relion_it.py is started. The options are all listed the body of the script below, with a comment to explain each option. One way to use this script is to copy it in its entirety into your project directory, edit the options directly in the script and then run it (with no command line arguments). However, it's often better to keep the script in the RELION source directory (where it can be updated easily) and use options files to configure it. An example of a simple options file is: angpix = 1.06 This would override the default pixel size value, but leave all other options at their defaults. The options files are read and interpreted as Python scripts. A simple list of "option_name = value" lines is all that is needed, though you can also use any Python commands you like to do more complex calculations. To generate an example file containing all of the options, run "relion_it.py --gui" and then click the "Save options" button, which will save all the current options to a file called `relion_it_options.py' in the working directory. The options are named descriptively so you can probably understand what most of them do quite easily. For more help on any particular option, look at the comment above its definition in this script, or search the script's code to see how it is used. Options files can be useful as templates. As an example, at Diamond Light Source's eBIC facility, we have a template file called `dls_cluster_options.py' that contains the necessary settings to make relion_it.py submit most of its jobs to run on the DLS GPU cluster. You could also set up standard templates for a particular microscope (say, voltage and Cs settings) or for a particular project or computer configuration. When relion_it.py starts, it reads all options files in the order they are given on the command line. Subsequent files will override earlier ones, so the last value given for any particular option will be the value that is used. If you start relion_it.py with the `--continue' argument, it will automatically add `relion_it_options.py' to the end of the list of options files. This means that if you are in a project directory where the relion_it.py GUI has previously been used, all options will be defined in the relion_it_options.py file and they will override any other options files given on the command line. (This is very useful for restarting the script after a problem, but it would be pointless to combine `--continue' with any options template files.) Note that if relion_it.py finds option names that it doesn't recognise while it's reading an options file, it will print a warning (but continue anyway). If you've been editing options files by hand, you should check the output from relion_it.py when it starts to make sure there are no typos in the options you wanted to set. (If you're using local variables for intermediate Python calculations in an options file, it's a good idea to use names starting with a leading underscore so you can immediately tell them apart from warnings about genuine spelling mistakes.) Using the GUI ------------- The GUI provides a simple way to start new projects with relion_it.py. If you want to use it, prepare your project directory as described above, then start the GUI with "relion_it.py --gui". (If you're using any template options files, you can give those too, for example "relion_it.py /path/to/site/options.py --gui".) The window that appears should be self-explanatory. Fill in the options as needed for your project, and use the check boxes on the right to control what processing steps will be done. When you're ready, click either "Save options" or "Save & run". The program will check the values you've entered and then use them to calculate a few extra options for relion_it.py. The options will then be saved to a file called `relion_it_options.py', and if you clicked "Save & run" the processing run will start immediately. If any of the entered values are invalid (for example, if there are letters in a field which should be a number), the GUI will display a message box with an error when you click one of the buttons. It will also display a warning if any values appear to be incorrect (but you can choose to ignore the warning by clicking "OK"). The GUI will try to calculate some extra options from the values you enter using the following rules: 1. If a 3D reference is given, use a single pass with reference-based autopicking, minimum distance between particles of 0.7 times the particle size, and a batch size of 100,000 particles. 2. If no 3D reference is given, run a first pass with reference-free LoG autopicking and a batch size of 10,000, and then a second pass with reference-based autopicking and a batch size of 100,000. These options should be sensible in many cases, but if you'd like to change them, save the options from the GUI using the "Save options" button, close the GUI, and edit the `relion_it_options.py' file to change the option values as needed. You can then start the processing run with "relion_it.py --continue". Running the pipelines --------------------- relion_it.py uses several different scheduling pipelines to run its jobs. While each one is running, a file is created in the project directory called `RUNNING_PIPELINER_'. A log of the jobs run by that pipeline is stored in `pipeline_.log'. If you want to stop one of the pipelines for any reason, delete its `RUNNING_' file and within a minute or two the pipeliner will notice that the file has been removed and stop. relion_it.py itself uses a similar file called `RUNNING_RELION_IT', and you can delete this to stop the script (which will not affect any pipelines that are already running). It keeps a list of all of the jobs it has submitted in a file called `RELION_IT_SUBMITTED_JOBS'. This file can be edited manually if necessary (but not while the script is running!) Most of the jobs are run by the `preprocessing' pipeline. This will do the following: 1. Import movies 2. Motion correction 3. CTF estimation 4. Particle auto-picking 5. Particle extraction 6. Batch selection After a number of particles have been extracted (1,000 by default), a 2D classification job will be run to provide feedback on the quality of the data collection and particle picking. Particles are split into batches of a fixed size (default 10,000 for the first pass with no reference, or 100,000 otherwise). The first batch is special: as it grows, the 2D classification job is re-run repeatedly to provide early feedback on the quality of the data. For subsequent batches, the script waits for each batch to be complete before running 2D classification on it. You can provide reference structures for auto-picking and 3D classification. (If you provide a 3D reference in the GUI it will automatically be used for both tasks.) If you do not provide a reference for auto-picking, reference-free LoG picking will be used. If you do not provide a reference for classification, relion_it.py will run the preprocessing pipeline twice. In the first pass, an initial model will be generated, and then a second pass of preprocessing will be done using the initial model as a reference for auto-picking and classification. relion_it.py makes an effort to try to identify a suitable reference to use from the classes produced by the InitialModel job, but if it selects an inappropriate reference, you can change it by stopping the pipelines and script ("rm RUNNING_*"), updating the reference filename stored in the file named `RELION_IT_2NDPASS_3DREF', deleting the relevant jobs (`autopick2_job' and those following) from the `RELION_IT_SUBMITTED_JOBS' file, then restarting the pipeline with "relion_it.py --continue". Fixing problems --------------- One-off job failure ``````````````````` Occasionally, a single job can fail with an isolated error, for example if there are temporary network problems while working on a remote filesystem. If this happens, RELION will wait forever for the files to appear that would indicate the job has finished. In the meantime, no new jobs will be run, which can cause a backlog of micrographs to build up. To fix this (for a preprocessing job), you can just try to re-run the job from the RELION GUI. Select the job in the "Running jobs" list, then click "Job actions" -> "Mark as finished". Select the job again in the "Finished jobs" list, then click "Continue!" to re-start the job. That approach should work for preprocessing jobs, but probably won't work for classification or inital model generation jobs, since those cannot be continued and must instead be restarted from the beginning. The best way to do that is to restart the job manually, outside the RELION GUI, and then when the job finishes RELION should continue as if the job had never failed. For example, with a failed local job: ps -e | grep relion # to check if the job is still active kill # to stop the job # now re-run the commands from the job's `note.txt' file or with a job that was submitted to an SGE cluster queue: qstat # to check if the job is still active in the queue qdel # to remove the job from the queue qsub job_type/job_directory/run_submit.script # to re-submit the job The other option is to just run a new job from the RELION GUI in the normal way (select the job you want to "copy" in the jobs list, make a "new" job by clicking on the job type in the list in the top-left of the GUI, then click "Run!"). However, if you do this, relion_it.py will not know about the new job and will not run any further downstream processing based on it. In this situation, you can either continue to process your data manually in RELION, or you could edit the `RELION_IT_SUBMITTED_JOBS' file to replace the failed job with the manual one, and delete the jobs that followed the original one. After that, if you re-run the script it should continue as normal from that job onwards. Repeated job failure ```````````````````` If a job fails repeatedly, it usually indicates that there is some problem with the job parameters or the files that the job needs to access. In favourable cases, it's possible you could fix the problem by selecting the job in the RELION GUI, changing one of the parameters that is not greyed out, then clicking "Continue!". Often, though, the problem will be with one of the parameters that can't be changed for a job that already exists, so the job will need to be deleted and recreated with a different set of parameters. To handle this situation, stop all of the pipelines and the relion_it.py script ("rm RUNNING_*"), then identify and fix the problem. Often, the problem will be an error in one of the job parameters, which can usually be fixed by changing one of the script options (for example by changing the settings in `relion_it_options.py', if you originally used the GUI to start the run). If the problem is caused by missing files from an upstream job, you might need to check the output of previous jobs and look in the job directories to figure out what the problem is. Again, if it's an error in the parameters for a job, you can probably fix it by editing `relion_it_options.py'. After changing any script options, you'll need to use the RELION GUI to delete the affected job and all jobs downstream of it, and also remove them from the list in the `RELION_IT_SUBMITTED_JOBS' file. Then you should be able to restart the pipelines by running "relion_it.py --continue". If you still can't get a particular job to run without errors, you can at least continue to run the upstream jobs that are working properly. You can do this either by changing the options for relion_it.py (there are options to switch off 2D or 3D classification, or to stop after CTF estimation), or by manually scheduling the jobs you want using the RELION GUI. Remember that after running relion_it.py, you have a normal RELION project, so if the script can't do what you want, you can simply stop it and then use all of RELION's normal job management and scheduling abilities. Advanced usage -------------- It's possible to customise many aspects of the way relion_it.py works, but the details go beyond the scope of this introduction. Simple customisation can be done by setting appropriate option values (see "Option files" above). For more substantial changes, you might need to edit the script's Python code to get the behaviour you want. Most of the important logic is in the `run_pipeline()' function so that's a good place to start. Good luck! """ from __future__ import print_function from __future__ import division # always use float division import argparse import glob import inspect import math import os import runpy import time import traceback try: import Tkinter as tk import tkMessageBox import tkFileDialog except ImportError: # The GUI is optional. If the user requests it, it will fail when it tries # to open so we can ignore the error for now. pass # Constants PIPELINE_STAR = 'default_pipeline.star' RUNNING_FILE = 'RUNNING_RELION_IT' SECONDPASS_REF3D_FILE = 'RELION_IT_2NDPASS_3DREF' SETUP_CHECK_FILE = 'RELION_IT_SUBMITTED_JOBS' PREPROCESS_SCHEDULE_PASS1 = 'PREPROCESS' PREPROCESS_SCHEDULE_PASS2 = 'PREPROCESS_PASS2' OPTIONS_FILE = 'relion_it_options.py' class RelionItOptions(object): """ Options for the relion_it pipeline setup script. When initialised, this contains default values for all options. Call ``update_from()`` to override the defaults with a dictionary of new values. """ ############################################################################# # Change the parameters below to reflect your experiment # # Current defaults reflect cryo-ARM betagal data set of RELION-3.0 tutorial # ############################################################################# ### General parameters # Pixel size in Angstroms in the input movies angpix = 0.885 # Acceleration voltage (in kV) voltage = 200 # Polara = 2.0; Talos/Krios = 2.7; some Cryo-ARM = 1.4 Cs = 1.4 ### Import images (Linux wild card; movies as *.mrc, *.mrcs, *.tiff or *.tif; single-frame micrographs as *.mrc) import_images = 'Movies/*.tiff' # Are these multi-frame movies? Set to False for single-frame micrographs (and motion-correction will be skipped) images_are_movies = True ### MotionCorrection parameters # Dose in electrons per squared Angstrom per fraction motioncor_doseperframe = 1.277 # Gain-reference image in MRC format (only necessary if input movies are not yet gain-corrected, e.g. compressed TIFFs from K2) motioncor_gainreference = 'Movies/gain.mrc' # EER upsampling (1 = 4K, 2 = 8K). If you use 8K rendering, the pixel size (angpix) MUST be the half of the physical pixel size and the motioncor_binning should be 2. eer_upsampling = 1 # EER fractionation. The dose rate (motioncor_doseperframe) is e/A2/fraction after this fractionation. eer_grouping = 20 ### CTF estimation parameters # Most cases won't need changes here... ### Autopick parameters # Use reference-free Laplacian-of-Gaussian picking (otherwise use reference-based template matching instead) autopick_do_LoG = True # Minimum and maximum diameter in Angstrom for the LoG filter autopick_LoG_diam_min = 150 autopick_LoG_diam_max = 180 # Use positive values (0-1) to pick fewer particles; use negative values (-1-0) to pick more particles autopick_LoG_adjust_threshold = 0.0 autopick_LoG_upper_threshold = 999.0 # # OR: # # References for reference-based picking (when autopick_do_LoG = False) autopick_2dreferences = '' # OR: provide a 3D references for reference-based picking (when autopick_do_LoG = False) autopick_3dreference = '' # Threshold for reference-based autopicking (threshold 0 will pick too many particles. Default of 0.4 is hopefully better. Ultimately, just hope classification will sort it all out...) autopick_refs_threshold = 0.4 # Minimum inter-particle distance for reference-based picking (~70% of particle diameter often works well) autopick_refs_min_distance = 120 # # For both LoG and refs: # # Use this to remove false positives from carbon edges (useful range: 1.0-1.2, -1 to switch off) autopick_stddev_noise = -1 # Use this to remove false positives from carbon edges (useful range: -0.5-0.0; -999 to switch off) autopick_avg_noise = -999 ### Extract parameters # Box size of particles in the averaged micrographs (in pixels) extract_boxsize = 256 # Down-scale the particles upon extraction? extract_downscale = False # Box size of the down-scaled particles (in pixels) extract_small_boxsize = 64 # In second pass, down-scale the particles upon extraction? extract2_downscale = False # In second pass, box size of the down-scaled particles (in pixels) extract2_small_boxsize = 128 ### Now perform 2D and/or 3D classification with the extracted particles? do_class2d = True # And/or perform 3D classification? do_class3d = True # Repeat 2D and/or 3D-classification for batches of this many particles batch_size = 10000 # Number of 2D classes to use class2d_nr_classes = 50 # Diameter of the mask used for 2D/3D classification (in Angstrom) mask_diameter = 190 # Symmetry group (when using SGD for initial model generation, C1 may work best) symmetry = 'C1' # ### 3D-classification parameters # Number of 3D classes to use class3d_nr_classes = 4 # Have initial 3D model? If not, calculate one using SGD initial model generation have_3d_reference = False # Initial reference model class3d_reference = '' # Is reference on correct greyscale? class3d_ref_is_correct_greyscale = False # Has the initial reference been CTF-corrected? class3d_ref_is_ctf_corrected = True # Initial lowpass filter on reference class3d_ini_lowpass = 40 ### Use the largest 3D class from the first batch as a 3D reference for a second pass of autopicking? (only when do_class3d is True) do_second_pass = True # Only move on to template-based autopicking if the 3D references achieves this resolution (in A) minimum_resolution_3dref_2ndpass = 20 # In the second pass, perform 2D classification? do_class2d_pass2 = True # In the second pass, perform 3D classification? do_class3d_pass2 = False # Batch size in the second pass batch_size_pass2 = 100000 ################################################################################### ############ Often the parameters below can be kept the same for a given set-up ################################################################################### ### Repeat settings for entire pipeline # Repeat the pre-processing runs this many times (or until RUNNING_PIPELINER_default_PREPROCESS file is deleted) preprocess_repeat_times = 999 # Wait at least this many minutes between each repeat cycle preprocess_repeat_wait = 1 ### Stop after CTF estimation? I.e., skip autopicking, extraction, 2D/3D classification, etc? stop_after_ctf_estimation = False # Check every this many minutes if enough particles have been extracted for a new batch of 2D-classification batch_repeat_time = 1 ### MotionCorrection parameters # Use RELION's own implementation of motion-correction (CPU-only) instead of the UCSF implementation? motioncor_do_own = True # The number of threads (only for RELION's own implementation) is optimal when nr_movie_frames/nr_threads = integer motioncor_threads = 6 # Exectutable of UCSF MotionCor2 motioncor_exe = '/public/EM/MOTIONCOR2/MotionCor2' # On which GPU(s) to execute UCSF MotionCor2 motioncor_gpu = '0' # How many MPI processes to use for running motion correction? motioncor_mpi = 4 # Local motion-estimation patches for MotionCor2 motioncor_patches_x = 4 motioncor_patches_y = 4 # B-factor in A^2 for downweighting of high-spatial frequencies motioncor_bfactor = 150 # Use binning=2 for super-resolution movies motioncor_binning = 1 # Provide a defect file for your camera if you have one motioncor_defectfile = '' # orientation of the gain-reference w.r.t your movies (if input movies are not yet gain-corrected, e.g. TIFFs) motioncor_gainflip = 'No flipping (0)' motioncor_gainrot = 'No rotation (0)' # Other arguments for MotionCor2 motioncor_other_args = '' # Submit motion correction job to the cluster? motioncor_submit_to_queue = False ### CTF estimation parameters # Amplitude contrast (Q0) ampl_contrast = 0.1 # CTFFIND-defined parameters ctffind_boxsize = 512 ctffind_astigmatism = 100 ctffind_maxres = 5 ctffind_minres = 30 ctffind_defocus_max = 50000 ctffind_defocus_min = 5000 ctffind_defocus_step = 500 # For Gctf: ignore parameters on the 'Searches' tab? ctffind_do_ignore_search_params = True # For Gctf: perform equi-phase averaging? ctffind_do_EPA = True # Also estimate phase shifts (for VPP data) ctffind_do_phaseshift = False # Executable to Kai Zhang's Gctf gctf_exe = '/public/EM/Gctf/bin/Gctf' # On which GPU(s) to execute Gctf gctf_gpu = '0' # Use Alexis Rohou's CTFFIND4 (CPU-only) instead? use_ctffind_instead = True # Executable for Alexis Rohou's CTFFIND4 ctffind4_exe = '/public/EM/ctffind/ctffind.exe' # How many MPI processes to use for running CTF estimation? ctffind_mpi = 8 # Submit CTF estimation job to the cluster? ctffind_submit_to_queue = False ### Autopick parameters # Use GPU-acceleration for autopicking? autopick_do_gpu = True # Which GPU(s) to use for autopicking autopick_gpu = '0' # Low-pass filter for auto-picking the micrographs autopick_lowpass = 20 # Shrink factor for faster picking (0 = fastest; 1 = slowest) autopick_shrink_factor = 0 # How many MPI processes to use for running auto-picking? autopick_mpi = 1 # Additional arguments for autopicking autopick_other_args = '' # Submit Autopick job to the cluster? autopick_submit_to_queue = False # Are the references CTF-corrected? autopick_refs_are_ctf_corrected = True # Do the references have inverted contrast wrt the micrographs? autopick_refs_have_inverted_contrast = True # Ignore CTFs until the first peak autopick_refs_ignore_ctf1stpeak = False # Diameter of mask for the references (in A; negative value for automated detection of mask diameter) autopick_refs_mask_diam = -1 # In-plane angular sampling interval autopick_inplane_sampling = 10 # Symmetry of the 3D reference for autopicking autopick_3dref_symmetry = 'C1' # 3D angular sampling for generating projections of the 3D reference for autopicking (30 degrees is usually enough) autopick_3dref_sampling = '30 degrees' # Pixel size in the provided 2D/3D references (negative for same as in motion-corrected movies) autopick_ref_angpix = -1 ### Extract parameters # Diameter for background normalisation (in pixels; negative value: default is 75% box size) extract_bg_diameter = -1 # How many MPI processes to use for running particle extraction? extract_mpi = 1 # Submit Extract job to the cluster? extract_submit_to_queue = False ## Discard particles based on average/stddev values? (this may be important for SGD initial model generation) do_discard_on_image_statistics = False # Discard images that have average/stddev values that are more than this many sigma away from the ensemble average discard_sigma = 4 # Submit discard job to the cluster? discard_submit_to_queue = False #### Common relion_refine paremeters used for 2D/3D classification and initial model generation # Read all particles in one batch into memory? refine_preread_images = False # Or copy particles to scratch disk? refine_scratch_disk = '' # Number of pooled particles? refine_nr_pool = 10 # Use GPU-acceleration? refine_do_gpu = True # Which GPU to use (different from GPU used for pre-processing?) refine_gpu = '1' # How many MPI processes to use refine_mpi = 1 # How many threads to use refine_threads = 6 # Skip padding? refine_skip_padding = False # Submit jobs to the cluster? refine_submit_to_queue = False # Use fast subsets in 2D/3D classification when batch_size is bigger than this refine_batchsize_for_fast_subsets = 10000 ### 2D classification parameters # Wait with the first 2D classification batch until at least this many particles are extracted minimum_batch_size = 10000 # Number of iterations to perform in 2D classification # Must be at least 20 for fast subsets class2d_nr_iter = 20 # Rotational search step (in degrees) class2d_angle_step = 6 # Offset search range (in pixels) class2d_offset_range = 5 # Offset search step (in pixels) class2d_offset_step = 1 # Option to ignore the CTFs until their first peak (try this if all particles go into very few classes) class2d_ctf_ign1stpeak = False # Additional arguments to pass to relion-refine class2d_other_args = '' ### 3D classification parameters # Number of iterations to perform in 3D classification # Must be at least 20 for fast subsets class3d_nr_iter = 20 # Reference mask class3d_reference_mask = '' # Option to ignore the CTFs until their first peak (try this if all particles go into very few classes) class3d_ctf_ign1stpeak = False # Regularisation parameter (T) class3d_T_value = 4 # Angular sampling step class3d_angle_step = '7.5 degrees' # Offset search range (in pixels) class3d_offset_range = 5 # Offset search step (in pixels) class3d_offset_step = 1 # Additional arguments to pass to relion-refine class3d_other_args = '' ## SGD initial model generation # Number of models to generate simulatenously (K>1 may be useful for getting rid of outliers in the particle images) inimodel_nr_classes = 4 # Ignore CTFs until first peak? inimodel_ctf_ign1stpeak = False # Enforce non-negative solvent? inimodel_solvent_flatten = True # Initial angular sampling inimodel_angle_step = '15 degrees' # Initial search range (in pixels) inimodel_offset_range = 6 # Initial offset search step (in pixels) inimodel_offset_step = 2 # Number of initial iterations inimodel_nr_iter_initial = 50 # Number of in-between iterations inimodel_nr_iter_inbetween = 200 # Number of final iterations inimodel_nr_iter_final = 50 # Frequency to write out information inimodel_freq_writeout = 10 # Initial resolution (in A) inimodel_resol_ini = 35 # Final resolution (in A) inimodel_resol_final = 15 # Initial mini-batch size inimodel_batchsize_ini = 100 # Final mini-batch size inimodel_batchsize_final = 500 # Increased noise variance half-life (off, i.e. -1, by default; values of ~1000 have been observed to be useful in difficult cases) inimodel_sigmafudge_halflife = -1 # Additional arguments to pass to relion_refine (skip annealing to get rid of outlier particles) inimodel_other_args = ' --sgd_skip_anneal ' ### Cluster submission settings # Name of the queue to which to submit the job queue_name = 'openmpi' # Name of the command used to submit scripts to the queue queue_submit_command = 'qsub' # The template for your standard queue job submission script queue_submission_template = '/public/EM/RELION/relion/bin/qsub.csh' # Minimum number of dedicated cores that need to be requested on each node queue_minimum_dedicated = 1 ### End of options ####################################################################### ############ typically no need to change anything below this line ####################################################################### def update_from(self, other): """ Update this RelionItOptions object from a dictionary. Special values (with names like '__xxx__') are removed, allowing this method to be given a dictionary containing the namespace from a script run with ``runpy``. """ while len(other) > 0: key, value = other.popitem() if not (key.startswith('__') and key.endswith('__')): # exclude __name__, __builtins__ etc. if hasattr(self, key): setattr(self, key, value) else: print(" RELION_IT: Unrecognised option '{}'".format(key)) def print_options(self, out_file=None): """ Print the current options. This method prints the options in the same format as they are read, allowing options to be written to a file and re-used. Args: out_file: A file object (optional). If supplied, options will be written to this file, otherwise they will be printed to sys.stdout. Raises: ValueError: If there is a problem printing the options. """ out_file.write("# Options file for relion_it.py\n\n") seen_start = False option_names = [key for key in dir(self) if (not (key.startswith('__') and key.endswith('__')) and not callable(getattr(self, key)))] # Parse the source code for this class, and write out all comments along with option lines containing new values for line in inspect.getsourcelines(RelionItOptions)[0]: line = line.strip() if not seen_start: if line != "### General parameters": # Ignore lines until this one continue seen_start = True if line == "### End of options": # Stop here break if line.startswith('#') or len(line) == 0: # Print comments or blank lines as-is out_file.write(line + "\n"); else: # Assume all other lines define an option name and value. Replace with new value. equals_index = line.find('=') if equals_index > 0: option_name = line[:equals_index].strip() if option_name in option_names: out_file.write('{} = {}\n'.format(option_name, repr(getattr(self, option_name)))) option_names.remove(option_name) else: # This error should not occur. If it does, there is probably a programming error. raise ValueError("Unrecognised option name '{}'".format(option_name)) if len(option_names) > 0: # This error should not occur. If it does, there is probably a programming error. raise ValueError("Some options were not written to the output file: {}".format(option_names)) class RelionItGui(object): def __init__(self, main_window, options): self.main_window = main_window self.options = options # Convenience function for making file browser buttons def new_browse_button(master, var_to_set, filetypes=(('MRC file', '*.mrc'), ('All files', '*'))): def browse_command(): chosen_file = tkFileDialog.askopenfilename(filetypes=filetypes) if chosen_file is not None: # Make path relative if it's in the current directory if chosen_file.startswith(os.getcwd()): chosen_file = os.path.relpath(chosen_file) var_to_set.set(chosen_file) return tk.Button(master, text="Browse...", command=browse_command) ### Create GUI main_frame = tk.Frame(main_window) main_frame.pack(fill=tk.BOTH, expand=1) left_frame = tk.Frame(main_frame) left_frame.pack(side=tk.LEFT, anchor=tk.N, fill=tk.X, expand=1) right_frame = tk.Frame(main_frame) right_frame.pack(side=tk.LEFT, anchor=tk.N, fill=tk.X, expand=1) ### expt_frame = tk.LabelFrame(left_frame, text="Experimental details", padx=5, pady=5) expt_frame.pack(padx=5, pady=5, fill=tk.X, expand=1) tk.Grid.columnconfigure(expt_frame, 1, weight=1) row = 0 tk.Label(expt_frame, text="Voltage (kV):").grid(row=row, sticky=tk.W) self.voltage_entry = tk.Entry(expt_frame) self.voltage_entry.grid(row=row, column=1, sticky=tk.W+tk.E) self.voltage_entry.insert(0, str(options.voltage)) row += 1 tk.Label(expt_frame, text="Cs (mm):").grid(row=row, sticky=tk.W) self.cs_entry = tk.Entry(expt_frame) self.cs_entry.grid(row=row, column=1, sticky=tk.W+tk.E) self.cs_entry.insert(0, str(options.Cs)) row += 1 tk.Label(expt_frame, text="Phase plate?").grid(row=row, sticky=tk.W) self.phaseplate_var = tk.IntVar() phaseplate_button = tk.Checkbutton(expt_frame, var=self.phaseplate_var) phaseplate_button.grid(row=row, column=1, sticky=tk.W) if options.ctffind_do_phaseshift: phaseplate_button.select() row += 1 tk.Label(expt_frame, text=u"Pixel size (\u212B):").grid(row=row, sticky=tk.W) self.angpix_var = tk.StringVar() # for data binding self.angpix_entry = tk.Entry(expt_frame, textvariable=self.angpix_var) self.angpix_entry.grid(row=row, column=1, sticky=tk.W+tk.E) self.angpix_entry.insert(0, str(options.angpix)) row += 1 tk.Label(expt_frame, text=u"Exposure rate (e\u207B / \u212B\u00B2 / frame):").grid(row=row, sticky=tk.W) self.exposure_entry = tk.Entry(expt_frame) self.exposure_entry.grid(row=row, column=1, sticky=tk.W + tk.E) self.exposure_entry.insert(0, str(options.motioncor_doseperframe)) ### particle_frame = tk.LabelFrame(left_frame, text="Particle details", padx=5, pady=5) particle_frame.pack(padx=5, pady=5, fill=tk.X, expand=1) tk.Grid.columnconfigure(particle_frame, 1, weight=1) row = 0 tk.Label(particle_frame, text=u"Longest diameter (\u212B):").grid(row=row, sticky=tk.W) self.particle_max_diam_var = tk.StringVar() # for data binding self.particle_max_diam_entry = tk.Entry(particle_frame, textvariable=self.particle_max_diam_var) self.particle_max_diam_entry.grid(row=row, column=1, sticky=tk.W+tk.E, columnspan=2) self.particle_max_diam_entry.insert(0, str(options.autopick_LoG_diam_max)) row += 1 tk.Label(particle_frame, text=u"Shortest diameter (\u212B):").grid(row=row, sticky=tk.W) self.particle_min_diam_entry = tk.Entry(particle_frame) self.particle_min_diam_entry.grid(row=row, column=1, sticky=tk.W+tk.E, columnspan=2) self.particle_min_diam_entry.insert(0, str(options.autopick_LoG_diam_min)) row += 1 tk.Label(particle_frame, text="3D reference (optional):").grid(row=row, sticky=tk.W) self.ref_3d_var = tk.StringVar() # for data binding self.ref_3d_entry = tk.Entry(particle_frame, textvariable=self.ref_3d_var) self.ref_3d_entry.grid(row=row, column=1, sticky=tk.W+tk.E) self.ref_3d_entry.insert(0, str(options.autopick_3dreference)) new_browse_button(particle_frame, self.ref_3d_var).grid(row=row, column=2) row += 1 tk.Label(particle_frame, text=u"Mask diameter (\u212B):").grid(row=row, sticky=tk.W) self.mask_diameter_var = tk.StringVar() # for data binding self.mask_diameter_entry = tk.Entry(particle_frame, textvariable=self.mask_diameter_var) self.mask_diameter_entry.grid(row=row, column=1, sticky=tk.W+tk.E) self.mask_diameter_entry.insert(0, str(options.mask_diameter)) self.mask_diameter_px = tk.Label(particle_frame, text="= NNN px") self.mask_diameter_px.grid(row=row, column=2,sticky=tk.W) row += 1 tk.Label(particle_frame, text="Box size (px):").grid(row=row, sticky=tk.W) self.box_size_var = tk.StringVar() # for data binding self.box_size_entry = tk.Entry(particle_frame, textvariable=self.box_size_var) self.box_size_entry.grid(row=row, column=1, sticky=tk.W+tk.E) self.box_size_entry.insert(0, str(options.extract_boxsize)) self.box_size_in_angstrom = tk.Label(particle_frame, text=u"= NNN \u212B") self.box_size_in_angstrom.grid(row=row, column=2,sticky=tk.W) row += 1 tk.Label(particle_frame, text="Down-sample to (px):").grid(row=row, sticky=tk.W) self.extract_small_boxsize_var = tk.StringVar() # for data binding self.extract_small_boxsize_entry = tk.Entry(particle_frame, textvariable=self.extract_small_boxsize_var) self.extract_small_boxsize_entry.grid(row=row, column=1, sticky=tk.W+tk.E) self.extract_small_boxsize_entry.insert(0, str(options.extract_small_boxsize)) self.extract_angpix = tk.Label(particle_frame, text=u"= NNN \u212B/px") self.extract_angpix.grid(row=row, column=2,sticky=tk.W) row += 1 tk.Label(particle_frame, text="Calculate for me:").grid(row=row, sticky=tk.W) self.auto_boxsize_var = tk.IntVar() auto_boxsize_button = tk.Checkbutton(particle_frame, var=self.auto_boxsize_var) auto_boxsize_button.grid(row=row, column=1, sticky=tk.W) auto_boxsize_button.select() ### project_frame = tk.LabelFrame(right_frame, text="Project details", padx=5, pady=5) project_frame.pack(padx=5, pady=5, fill=tk.X, expand=1) tk.Grid.columnconfigure(project_frame, 1, weight=1) row = 0 tk.Label(project_frame, text="Project directory:").grid(row=row, sticky=tk.W) tk.Label(project_frame, text=os.getcwd(), anchor=tk.W).grid(row=row, column=1, sticky=tk.W, columnspan=2) row += 1 tk.Label(project_frame, text="Pattern for movies:").grid(row=row, sticky=tk.W) self.import_images_var = tk.StringVar() # for data binding self.import_images_entry = tk.Entry(project_frame, textvariable=self.import_images_var) self.import_images_entry.grid(row=row, column=1, sticky=tk.W+tk.E) self.import_images_entry.insert(0, self.options.import_images) import_button = new_browse_button(project_frame, self.import_images_var, filetypes=(('Image file', '{*.mrc, *.mrcs, *.tif, *.tiff}'), ('All files', '*'))) import_button.grid(row=row, column=2) row += 1 tk.Label(project_frame, text="Gain reference (optional):").grid(row=row, sticky=tk.W) self.gainref_var = tk.StringVar() # for data binding self.gainref_entry = tk.Entry(project_frame, textvariable=self.gainref_var) self.gainref_entry.grid(row=row, column=1, sticky=tk.W+tk.E) self.gainref_entry.insert(0, self.options.motioncor_gainreference) new_browse_button(project_frame, self.gainref_var).grid(row=row, column=2) ### pipeline_frame = tk.LabelFrame(right_frame, text="Pipeline control", padx=5, pady=5) pipeline_frame.pack(padx=5, pady=5, fill=tk.X, expand=1) tk.Grid.columnconfigure(expt_frame, 1, weight=1) row = 0 tk.Label(pipeline_frame, text="Stop after CTF estimation?").grid(row=row, sticky=tk.W) self.stop_after_ctf_var = tk.IntVar() stop_after_ctf_button = tk.Checkbutton(pipeline_frame, var=self.stop_after_ctf_var) stop_after_ctf_button.grid(row=row, column=1, sticky=tk.W) if options.stop_after_ctf_estimation: stop_after_ctf_button.select() row += 1 tk.Label(pipeline_frame, text="Do 2D classification?").grid(row=row, sticky=tk.W) self.class2d_var = tk.IntVar() class2d_button = tk.Checkbutton(pipeline_frame, var=self.class2d_var) class2d_button.grid(row=row, column=1, sticky=tk.W) if options.do_class2d: class2d_button.select() row += 1 tk.Label(pipeline_frame, text="Do 3D classification?").grid(row=row, sticky=tk.W) self.class3d_var = tk.IntVar() class3d_button = tk.Checkbutton(pipeline_frame, var=self.class3d_var) class3d_button.grid(row=row, column=1, sticky=tk.W) if options.do_class3d: class3d_button.select() row += 1 tk.Label(pipeline_frame, text="Do second pass? (only if no 3D ref)").grid(row=row, sticky=tk.W) self.second_pass_var = tk.IntVar() second_pass_button = tk.Checkbutton(pipeline_frame, var=self.second_pass_var) second_pass_button.grid(row=row, column=1, sticky=tk.W) if options.do_second_pass: second_pass_button.select() row += 1 tk.Label(pipeline_frame, text="Do 2D classification (2nd pass)?").grid(row=row, sticky=tk.W) self.class2d_pass2_var = tk.IntVar() class2d_pass2_button = tk.Checkbutton(pipeline_frame, var=self.class2d_pass2_var) class2d_pass2_button.grid(row=row, column=1, sticky=tk.W) class2d_pass2_button.select() if options.do_class2d_pass2: class2d_pass2_button.select() row += 1 tk.Label(pipeline_frame, text="Do 3D classification (2nd pass)?").grid(row=row, sticky=tk.W) self.class3d_pass2_var = tk.IntVar() class3d_pass2_button = tk.Checkbutton(pipeline_frame, var=self.class3d_pass2_var) class3d_pass2_button.grid(row=row, column=1, sticky=tk.W) if options.do_class3d_pass2: class3d_pass2_button.select() ### Add logic to the box size boxes def calculate_box_size(particle_size_pixels): # Use box 20% larger than particle and ensure size is even box_size_exact = 1.2 * particle_size_pixels box_size_int = int(math.ceil(box_size_exact)) return box_size_int + box_size_int % 2 def calculate_downscaled_box_size(box_size_pix, angpix): for small_box_pix in (48, 64, 96, 128, 160, 192, 256, 288, 300, 320, 360, 384, 400, 420, 450, 480, 512, 640, 768, 896, 1024): # Don't go larger than the original box if small_box_pix > box_size_pix: return box_size_pix # If Nyquist freq. is better than 8.5 A, use this downscaled box, otherwise continue to next size up small_box_angpix = angpix * box_size_pix / small_box_pix if small_box_angpix < 4.25: return small_box_pix # Fall back to a warning message return "Box size is too large!" def update_box_size_labels(*args_ignored, **kwargs_ignored): try: angpix = float(self.angpix_entry.get()) except ValueError: # Can't update any of the labels without angpix self.mask_diameter_px.config(text="= NNN px") self.box_size_in_angstrom.config(text=u"= NNN \u212B") self.extract_angpix.config(text=u"= NNN \u212B/px") return try: mask_diameter = float(self.mask_diameter_entry.get()) mask_diameter_px = mask_diameter / angpix self.mask_diameter_px.config(text="= {:.1f} px".format(mask_diameter_px)) except (ValueError, ZeroDivisionError): self.mask_diameter_px.config(text="= NNN px") # Don't return - an error here doesn't stop us calculating the other labels try: box_size = float(self.box_size_entry.get()) box_angpix = angpix * box_size self.box_size_in_angstrom.config(text=u"= {:.1f} \u212B".format(box_angpix)) except ValueError: # Can't update these without the box size self.box_size_in_angstrom.config(text=u"= NNN \u212B") self.extract_angpix.config(text=u"= NNN \u212B/px") return try: extract_small_boxsize = float(self.extract_small_boxsize_entry.get()) small_box_angpix = box_angpix / extract_small_boxsize self.extract_angpix.config(text=u"= {:.3f} \u212B/px".format(small_box_angpix)) except (ValueError, ZeroDivisionError): # Can't update the downscaled pixel size unless the downscaled box size is valid self.extract_angpix.config(text=u"= NNN \u212B/px") def update_box_sizes(*args_ignored, **kwargs_ignored): # Always activate entry boxes - either we're activating them anyway, or we need to edit the text. # For text editing we need to activate the box first then deactivate again afterwards. self.mask_diameter_entry.config(state=tk.NORMAL) self.box_size_entry.config(state=tk.NORMAL) self.extract_small_boxsize_entry.config(state=tk.NORMAL) if self.get_var_as_bool(self.auto_boxsize_var): try: particle_size_angstroms = float(self.particle_max_diam_entry.get()) mask_diameter = 1.1 * particle_size_angstroms self.mask_diameter_entry.delete(0, tk.END) self.mask_diameter_entry.insert(0, str(mask_diameter)) angpix = float(self.angpix_entry.get()) particle_size_pixels = particle_size_angstroms / angpix box_size = calculate_box_size(particle_size_pixels) self.box_size_entry.delete(0, tk.END) self.box_size_entry.insert(0, str(box_size)) small_boxsize = calculate_downscaled_box_size(int(box_size), angpix) self.extract_small_boxsize_entry.delete(0, tk.END) self.extract_small_boxsize_entry.insert(0, str(small_boxsize)) except: # Ignore errors - they will be picked up if the user tries to save the options pass self.mask_diameter_entry.config(state=tk.DISABLED) self.box_size_entry.config(state=tk.DISABLED) self.extract_small_boxsize_entry.config(state=tk.DISABLED) update_box_size_labels() self.box_size_var.trace('w', update_box_size_labels) self.extract_small_boxsize_var.trace('w', update_box_size_labels) self.angpix_var.trace('w', update_box_sizes) self.particle_max_diam_var.trace('w', update_box_sizes) auto_boxsize_button.config(command=update_box_sizes) ### Add logic to the check boxes def update_pipeline_control_state(*args_ignored, **kwargs_ignored): new_state = tk.DISABLED if self.stop_after_ctf_var.get() else tk.NORMAL class2d_button.config(state=new_state) class3d_button.config(state=new_state) self.particle_max_diam_entry.config(state=new_state) self.particle_min_diam_entry.config(state=new_state) self.ref_3d_entry.config(state=new_state) # Update the box size controls with care to avoid activating them when we shouldn't auto_boxsize_button.config(state=new_state) if new_state == tk.DISABLED: self.mask_diameter_entry.config(state=new_state) self.box_size_entry.config(state=new_state) self.extract_small_boxsize_entry.config(state=new_state) else: update_box_sizes() can_do_second_pass = (self.class3d_var.get() and len(self.ref_3d_var.get()) == 0 and not self.stop_after_ctf_var.get()) second_pass_button.config(state=tk.NORMAL if can_do_second_pass else tk.DISABLED) will_do_second_pass = can_do_second_pass and self.second_pass_var.get() class2d_pass2_button.config(state=tk.NORMAL if will_do_second_pass else tk.DISABLED) class3d_pass2_button.config(state=tk.NORMAL if will_do_second_pass else tk.DISABLED) stop_after_ctf_button.config(command=update_pipeline_control_state) class3d_button.config(command=update_pipeline_control_state) second_pass_button.config(command=update_pipeline_control_state) self.ref_3d_var.trace('w', update_pipeline_control_state) ### button_frame = tk.Frame(right_frame) button_frame.pack(padx=5, pady=5, fill=tk.X, expand=1) self.run_button = tk.Button(button_frame, text="Save & run", command=self.run_pipeline) self.run_button.pack(padx=5, pady=5, side=tk.RIGHT) self.save_button = tk.Button(button_frame, text="Save options", command=self.save_options) self.save_button.pack(padx=5, pady=5, side=tk.RIGHT) # Show initial pixel sizes update_box_sizes() def get_var_as_bool(self, var): """Helper function to convert a Tk IntVar (linked to a checkbox) to a boolean value""" return True if var.get() == 1 else False def fetch_options_from_gui(self): """ Fetch the current values from the GUI widgets and store them in the options object. Returns: A list of warning messages about possible incorrect option values. Raises: ValueError: If an option value is invalid. """ opts = self.options warnings = [] opts.stop_after_ctf_estimation = self.get_var_as_bool(self.stop_after_ctf_var) opts.do_class2d = self.get_var_as_bool(self.class2d_var) opts.do_class3d = self.get_var_as_bool(self.class3d_var) opts.do_second_pass = self.get_var_as_bool(self.second_pass_var) opts.do_class2d_pass2 = self.get_var_as_bool(self.class2d_pass2_var) opts.do_class3d_pass2 = self.get_var_as_bool(self.class3d_pass2_var) try: opts.voltage = float(self.voltage_entry.get()) except ValueError: raise ValueError("Voltage must be a number") if opts.voltage <= 0.0: warnings.append("- Voltage should be a positive number") try: opts.Cs = float(self.cs_entry.get()) except ValueError: raise ValueError("Cs must be a number") opts.ctffind_do_phaseshift = self.get_var_as_bool(self.phaseplate_var) try: opts.angpix = float(self.angpix_entry.get()) except ValueError: raise ValueError("Pixel size must be a number") if opts.angpix <= 0.0: warnings.append("- Pixel size should be a positive number") try: opts.motioncor_doseperframe = float(self.exposure_entry.get()) except ValueError: raise ValueError("Exposure rate must be a number") if opts.motioncor_doseperframe <= 0.0: warnings.append("- Exposure rate should be a positive number") try: opts.autopick_LoG_diam_max = float(self.particle_max_diam_entry.get()) except ValueError: if len(self.particle_max_diam_entry.get()) == 0 and opts.stop_after_ctf_estimation: # This was left blank and won't be used, set to zero to avoid errors in calculations later opts.autopick_LoG_diam_max = 0.0 else: raise ValueError("Particle longest diameter must be a number") try: opts.autopick_LoG_diam_min = float(self.particle_min_diam_entry.get()) except ValueError: if len(self.particle_min_diam_entry.get()) == 0 and opts.stop_after_ctf_estimation: # This was left blank and won't be used, set to zero to avoid errors in calculations later opts.autopick_LoG_diam_min = 0.0 else: raise ValueError("Particle shortest diameter must be a number") opts.autopick_3dreference = self.ref_3d_entry.get() if len(opts.autopick_3dreference) > 0 and not os.path.isfile(opts.autopick_3dreference): warnings.append("- 3D reference file '{}' does not exist".format(opts.autopick_3dreference)) try: opts.mask_diameter = float(self.mask_diameter_entry.get()) except ValueError: raise ValueError("Mask diameter must be a number") if opts.mask_diameter <= 0: warnings.append("- Mask diameter should be a positive number") try: opts.extract_boxsize = int(self.box_size_entry.get()) except ValueError: raise ValueError("Box size must be a number") if opts.extract_boxsize <= 0: warnings.append("- Box size should be a positive number") try: opts.extract_small_boxsize = int(self.extract_small_boxsize_entry.get()) opts.extract2_small_boxsize = opts.extract_small_boxsize opts.extract_downscale = True opts.extract2_downscale = True except ValueError: raise ValueError("Down-sampled box size must be a number") if opts.extract_small_boxsize <= 0: warnings.append("- Down-sampled box size should be a positive number") opts.import_images = self.import_images_entry.get() if opts.import_images.startswith(('/', '..')): warnings.append("- Movies should be located inside the project directory") if '*' not in opts.import_images: warnings.append("- Pattern for input movies should normally contain a '*' to select more than one file") opts.motioncor_gainreference = self.gainref_entry.get() if len(opts.motioncor_gainreference) > 0 and not os.path.isfile(opts.motioncor_gainreference): warnings.append("- Gain reference file '{}' does not exist".format(opts.motioncor_gainreference)) return warnings def calculate_full_options(self): """ Update the options from the values that have been fetched from the GUI. This method uses the values that the user has set in the GUI to calculate a number of other options for the script. """ opts = self.options # If we have a 3D reference, do a single pass with a large batch size if len(opts.autopick_3dreference) > 0: opts.autopick_do_LoG = False opts.autopick_refs_min_distance = opts.autopick_LoG_diam_max * 0.7 opts.class3d_reference = opts.autopick_3dreference opts.do_second_pass = False else: # No 3D reference - do LoG autopicking in the first pass opts.autopick_do_LoG = True opts.class3d_reference = '' # Now set a sensible batch size (leaving batch_size_pass2 at its default 100,000) if opts.do_second_pass: opts.batch_size = 10000 else: opts.batch_size = 100000 def save_options(self): """ Update the full set of options from the values in the GUI, and save them to a file. Returns: True if the options were valid and saved successfully, otherwise False. """ try: warnings = self.fetch_options_from_gui() if len(warnings) == 0 or tkMessageBox.askokcancel("Warning", "\n".join(warnings), icon='warning', default=tkMessageBox.CANCEL): self.calculate_full_options() print(" RELION_IT: Writing all options to {}".format(OPTIONS_FILE)) if os.path.isfile(OPTIONS_FILE): print(" RELION_IT: File {0} already exists; renaming old copy to {0}~".format(OPTIONS_FILE)) os.rename(OPTIONS_FILE, OPTIONS_FILE + '~') with open(OPTIONS_FILE, 'w') as optfile: self.options.print_options(optfile) return True except Exception as ex: tkMessageBox.showerror("Error", ex.message) traceback.print_exc() return False def run_pipeline(self): """ Update the full set of options from the values in the GUI, close the GUI and run the pipeline. """ if self.save_options(): self.main_window.destroy() run_pipeline(self.options) def safe_load_star(filename, max_try=5, wait=10, expected=[]): for _ in range(max_try): try: star = load_star(filename) entry = star # make sure the expected key is present for key in expected: entry = entry[key] return star except: print("safe_load_star is retrying to read: ", filename, ", expected key:", expected) import time time.sleep(wait) assert False, "Failed to read a star file: " + filename def load_star(filename): from collections import OrderedDict datasets = OrderedDict() current_data = None current_colnames = None in_loop = 0 # 0: outside 1: reading colnames 2: reading data for line in open(filename): line = line.strip() # remove comments comment_pos = line.find('#') if comment_pos > 0: line = line[:comment_pos] if line == "": if in_loop == 2: in_loop = 0 continue if line.startswith("data_"): in_loop = 0 data_name = line[5:] current_data = OrderedDict() datasets[data_name] = current_data elif line.startswith("loop_"): current_colnames = [] in_loop = 1 elif line.startswith("_"): if in_loop == 2: in_loop = 0 elems = line[1:].split() if in_loop == 1: current_colnames.append(elems[0]) current_data[elems[0]] = [] else: current_data[elems[0]] = elems[1] elif in_loop > 0: in_loop = 2 elems = line.split() assert len(elems) == len(current_colnames), ("Error in STAR file {}, number of elements in {} does not match number of column names {}" .format(filename, elems, current_colnames)) for idx, e in enumerate(elems): current_data[current_colnames[idx]].append(e) return datasets # Don't get stuck in infinite while True loops.... def CheckForExit(): if not os.path.isfile(RUNNING_FILE): print(" RELION_IT:", RUNNING_FILE, "file no longer exists, exiting now ...") exit(0) # Allow direct progressing to the second pass def getSecondPassReference(): if os.path.isfile(SECONDPASS_REF3D_FILE): with open(SECONDPASS_REF3D_FILE, 'r') as myfile: filename, angpix = myfile.readlines() else: filename = '' angpix = '0' return filename.replace('\n',''), angpix.replace('\n','') def getJobName(name_in_script, done_file): jobname = None # See if we've done this job before, i.e. whether it is in the done_file if (os.path.isfile(done_file)): f = open(done_file,'r') for line in f: elems = line.split() if len(elems) < 3: continue if elems[0] == name_in_script: jobname = elems[2] break f.close() return jobname def addJob(jobtype, name_in_script, done_file, options, alias=None): jobname = getJobName(name_in_script, done_file) # If we hadn't done it before, add it now if (jobname is not None): already_had_it = True else: already_had_it = False optionstring = '' for opt in options[:]: optionstring += opt + ';' command = 'relion_pipeliner --addJob ' + jobtype + ' --addJobOptions "' + optionstring + '"' if alias is not None: command += ' --setJobAlias "' + alias + '"' #print("DEBUG: Running " + command) os.system(command) pipeline = safe_load_star(PIPELINE_STAR, expected=['pipeline_processes', 'rlnPipeLineProcessName']) jobname = pipeline['pipeline_processes']['rlnPipeLineProcessName'][-1] # Now add the jobname to the done_file f = open(done_file,'a') f.write(name_in_script + ' = ' + jobname + '\n') f.close() # return the name of the job in the RELION pipeline, e.g. 'Import/job001/' return jobname, already_had_it def RunJobs(jobs, repeat, wait, schedulename): runjobsstring = '' for job in jobs[:]: runjobsstring += job + ' ' command = 'relion_pipeliner --schedule ' + schedulename + ' --repeat ' + str(repeat) + ' --min_wait ' + str(wait) + ' --RunJobs "' + runjobsstring + '" &' os.system(command) def WaitForJob(wait_for_this_job, seconds_wait): time.sleep(seconds_wait) print(" RELION_IT: waiting for job to finish in", wait_for_this_job) while True: pipeline = safe_load_star(PIPELINE_STAR, expected=['pipeline_processes', 'rlnPipeLineProcessName']) myjobnr = -1 for jobnr in range(0,len(pipeline['pipeline_processes']['rlnPipeLineProcessName'])): jobname = pipeline['pipeline_processes']['rlnPipeLineProcessName'][jobnr] if jobname == wait_for_this_job: myjobnr = jobnr if myjobnr < 0: print(" ERROR: cannot find ", wait_for_this_job, " in ", PIPELINE_STAR) exit(1) status = int(pipeline['pipeline_processes']['rlnPipeLineProcessStatus'][myjobnr]) if status == 2: print(" RELION_IT: job in", wait_for_this_job, "has finished now") return else: CheckForExit() time.sleep(seconds_wait) def find_split_job_output(prefix, n, max_digits=6): import os.path for i in range(max_digits): filename = prefix + str(n).rjust(i, '0') + '.star' if os.path.isfile(filename): return filename return None def writeManualPickingGuiFile(my_part_diam): if not os.path.isfile('.gui_manualpickrun.job'): with open('.gui_manualpickrun.job', 'w') as g: g.write("""job_type == 3 Pixel size (A) == -1 Black value: == 0 Blue value: == 0 MetaDataLabel for color: == rlnParticleSelectZScore Scale for CTF image: == 1 Particle diameter (A): == {} Blue<>red color particles? == No Highpass filter (A) == -1 Lowpass filter (A) == 20 Scale for micrographs: == 0.2 Red value: == 2 Sigma contrast: == 3 White value: == 0 """.format(my_part_diam)) return def findBestClass(model_star_file, use_resol=True): model_star = safe_load_star(model_star_file) best_resol = 999 best_size = 0 best_class = 0 for iclass in range(0, len(model_star['model_classes']['rlnReferenceImage'])): mysize = float(model_star['model_classes']['rlnClassDistribution'][iclass]) myresol = float(model_star['model_classes']['rlnEstimatedResolution'][iclass]) if (not use_resol and (mysize > best_size or (mysize == best_size and myresol < best_resol))) \ or (use_resol and (myresol < best_resol or (myresol == best_resol and mysize > best_size))): best_size = mysize best_class = model_star['model_classes']['rlnReferenceImage'][iclass] best_resol = myresol print(" RELION_IT: found best class:",best_class,"with class size of",best_size,"and resolution of",best_resol) return best_class, best_resol, model_star['model_general']['rlnPixelSize'] def findOutputModelStar(job_dir): found = None try: job_star = safe_load_star(job_dir + "job_pipeline.star", expected=['pipeline_output_edges', 'rlnPipeLineEdgeToNode']) for output_file in job_star["pipeline_output_edges"]['rlnPipeLineEdgeToNode']: if output_file.endswith("_model.star"): found = output_file break except: pass return found def run_pipeline(opts): """ Configure and run the RELION 3 pipeline with the given options. Args: opts: options for the pipeline, as a RelionItOptions object. """ # if this really necessary? dont think so... if (os.path.isfile(PIPELINE_STAR) == False): g = open(PIPELINE_STAR,'w') g.write('data_pipeline_general\n') g.write('_rlnPipeLineJobCounter 1\n') g.close() # Write RUNNING_RELION_IT file, when deleted, this script will stop with open(RUNNING_FILE, 'w'): pass # Write mainGUI project file, so GUI won't ask to set up a project with open('.gui_projectdir', 'w'): pass #### Set up GUI file for Manualpick job to allow easy viewing of autopick results if opts.autopick_do_LoG: my_part_diam = opts.autopick_LoG_diam_min else: my_part_diam = opts.autopick_refs_min_distance writeManualPickingGuiFile(my_part_diam) ### Prepare the list of queue arguments for later use queue_options = ['Submit to queue? == Yes', 'Queue name: == {}'.format(opts.queue_name), 'Queue submit command: == {}'.format(opts.queue_submit_command), 'Standard submission script: == {}'.format(opts.queue_submission_template), 'Minimum dedicated cores per node: == {}'.format(opts.queue_minimum_dedicated)] # If we're only doing motioncorr and ctf estimation, then forget about the second pass and the batch processing if opts.stop_after_ctf_estimation: opts.do_class2d = False opts.do_class3d = False opts.do_second_pass = False if opts.do_second_pass: nr_passes = 2 else: nr_passes = 1 # if SECONDPASS_REF3D_FILE exists, go straight into the second pass first_pass = 0 if opts.do_second_pass: secondpass_ref3d, secondpass_ref3d_angpix = getSecondPassReference() if not secondpass_ref3d == '': print(' RELION_IT: found', secondpass_ref3d,'with angpix=',secondpass_ref3d_angpix,'as a 3D reference for second pass in file',SECONDPASS_REF3D_FILE) print(' RELION_IT: if the automatic selection of the reference turned out to be unsatisfactory,') print(' RELION_IT: you can re-run the second pass with another reference by:') print(' RELION_IT: stopping the pipeline by deleting RUNNING_*') print(' RELION_IT: updating the reference filename in',SECONDPASS_REF3D_FILE) print(' RELION_IT: deleting relevant jobs (autopick2_job and followings) in',SETUP_CHECK_FILE) print(' RELION_IT: and restarting the pipeline.') first_pass = 1 opts.autopick_3dreference = secondpass_ref3d opts.autopick_ref_angpix = secondpass_ref3d_angpix opts.autopick_2dreferences = '' opts.autopick_do_LoG = False opts.class3d_reference = secondpass_ref3d opts.have_3d_reference = True # Allow to perform two passes through the entire pipeline (PREPROCESS and CLASS2D/3D batches) # The second pass, a 3D reference generated in the first pass will be used for template-based autopicking for ipass in range(first_pass, nr_passes): #### Set up the Import job import_options = ['Raw input files: == {}'.format(opts.import_images), 'Import raw movies/micrographs? == Yes', 'Pixel size (Angstrom): == {}'.format(opts.angpix), 'Voltage (kV): == {}'.format(opts.voltage), 'Spherical aberration (mm): == {}'.format(opts.Cs), 'Amplitude contrast: == {}'.format(opts.ampl_contrast)] if opts.images_are_movies: import_options.append('Are these multi-frame movies? == Yes') else: import_options.append('Are these multi-frame movies? == No') import_job, already_had_it = addJob('Import','import_job', SETUP_CHECK_FILE, import_options) if opts.images_are_movies: #### Set up the MotionCor job motioncorr_options = ['Input movies STAR file: == {}movies.star'.format(import_job), 'MOTIONCOR2 executable: == {}'.format(opts.motioncor_exe), 'Defect file: == {}'.format(opts.motioncor_defectfile), 'Gain-reference image: == {}'.format(opts.motioncor_gainreference), 'Gain flip: == {}'.format(opts.motioncor_gainflip), 'Gain rotation: == {}'.format(opts.motioncor_gainrot), 'Do dose-weighting? == Yes', 'Dose per frame (e/A2): == {}'.format(opts.motioncor_doseperframe), 'Number of patches X: == {}'.format(opts.motioncor_patches_x), 'Number of patches Y: == {}'.format(opts.motioncor_patches_y), 'Bfactor: == {}'.format(opts.motioncor_bfactor), 'Binning factor: == {}'.format(opts.motioncor_binning), 'Which GPUs to use: == {}'.format(opts.motioncor_gpu), 'Other MOTIONCOR2 arguments == {}'.format(opts.motioncor_other_args), 'Number of threads: == {}'.format(opts.motioncor_threads), 'Number of MPI procs: == {}'.format(opts.motioncor_mpi), 'Additional arguments: == --eer_upsampling {} --eer_grouping {}'.format(opts.eer_upsampling, opts.eer_grouping)] if (opts.motioncor_do_own): motioncorr_options.append('Use RELION\'s own implementation? == Yes') if opts.use_ctffind_instead: motioncorr_options.append('Save sum of power spectra? == Yes') else: motioncorr_options.append('Save sum of power spectra? == No') else: motioncorr_options.append('Use RELION\'s own implementation? == No') if opts.motioncor_submit_to_queue: motioncorr_options.extend(queue_options) motioncorr_job, already_had_it = addJob('MotionCorr', 'motioncorr_job', SETUP_CHECK_FILE, motioncorr_options) #### Set up the CtfFind job ctffind_options = ['Amount of astigmatism (A): == {}'.format(opts.ctffind_astigmatism), 'FFT box size (pix): == {}'.format(opts.ctffind_boxsize), 'Maximum defocus value (A): == {}'.format(opts.ctffind_defocus_max), 'Minimum defocus value (A): == {}'.format(opts.ctffind_defocus_min), 'Defocus step size (A): == {}'.format(opts.ctffind_defocus_step), 'Maximum resolution (A): == {}'.format(opts.ctffind_maxres), 'Minimum resolution (A): == {}'.format(opts.ctffind_minres), 'Gctf executable: == {}'.format(opts.gctf_exe), 'Which GPUs to use: == {}'.format(opts.gctf_gpu), 'CTFFIND-4.1 executable: == {}'.format(opts.ctffind4_exe), 'Number of MPI procs: == {}'.format(opts.ctffind_mpi)] if opts.images_are_movies: ctffind_options.append('Input micrographs STAR file: == {}{}'.format(motioncorr_job, 'corrected_micrographs.star')) else: ctffind_options.append('Input micrographs STAR file: == {}{}'.format(import_job, 'micrographs.star')) if opts.use_ctffind_instead: ctffind_options.append('Use CTFFIND-4.1? == Yes') ctffind_options.append('Use Gctf instead? == No') ctffind_options.append('Use power spectra from MotionCorr job? == Yes') else: ctffind_options.append('Use CTFFIND-4.1? == No') ctffind_options.append('Use Gctf instead? == Yes') ctffind_options.append('Use power spectra from MotionCorr job? == No') if (opts.ctffind_do_ignore_search_params): ctffind_options.append('Ignore \'Searches\' parameters? == Yes') else: ctffind_options.append('Ignore \'Searches\' parameters? == No') if (opts.ctffind_do_EPA): ctffind_options.append('Perform equi-phase averaging? == Yes') else: ctffind_options.append('Perform equi-phase averaging? == No') if opts.ctffind_do_phaseshift: ctffind_options.append('Estimate phase shifts? == Yes') else: ctffind_options.append('Estimate phase shifts? == No') if opts.ctffind_submit_to_queue: ctffind_options.extend(queue_options) ctffind_job, already_had_it = addJob('CtfFind', 'ctffind_job', SETUP_CHECK_FILE, ctffind_options) runjobs = [import_job] if opts.images_are_movies: runjobs.append(motioncorr_job) runjobs.append(ctffind_job) # There is an option to stop on-the-fly processing after CTF estimation if not opts.stop_after_ctf_estimation: autopick_options = ['Input micrographs for autopick: == {}micrographs_ctf.star'.format(ctffind_job), 'Min. diameter for LoG filter (A) == {}'.format(opts.autopick_LoG_diam_min), 'Max. diameter for LoG filter (A) == {}'.format(opts.autopick_LoG_diam_max), 'Maximum resolution to consider (A) == {}'.format(opts.autopick_lowpass), 'Adjust default threshold (stddev): == {}'.format(opts.autopick_LoG_adjust_threshold), 'Upper threshold (stddev): == {}'.format(opts.autopick_LoG_upper_threshold), '2D references: == {}'.format(opts.autopick_2dreferences), '3D reference: == {}'.format(opts.autopick_3dreference), 'Symmetry: == {}'.format(opts.autopick_3dref_symmetry), 'Pixel size in references (A) == {}'.format(opts.autopick_ref_angpix), '3D angular sampling: == {}'.format(opts.autopick_3dref_sampling), 'In-plane angular sampling (deg) == {}'.format(opts.autopick_inplane_sampling), 'Picking threshold: == {}'.format(opts.autopick_refs_threshold), 'Minimum inter-particle distance (A): == {}'.format(opts.autopick_refs_min_distance), 'Mask diameter (A) == {}'.format(opts.autopick_refs_mask_diam), 'Maximum stddev noise: == {}'.format(opts.autopick_stddev_noise), 'Minimum avg noise: == {}'.format(opts.autopick_avg_noise), 'Shrink factor: == {}'.format(opts.autopick_shrink_factor), 'Which GPUs to use: == {}'.format(opts.autopick_gpu), 'Additional arguments: == {}'.format(opts.autopick_other_args), 'Number of MPI procs: == {}'.format(opts.autopick_mpi)] if not opts.autopick_3dreference == '': autopick_options.append('OR: provide a 3D reference? == Yes') else: autopick_options.append('OR: provide a 3D reference? == No') if opts.autopick_do_LoG: autopick_options.append('OR: use Laplacian-of-Gaussian? == Yes') else: autopick_options.append('OR: use Laplacian-of-Gaussian? == No') if opts.autopick_refs_are_ctf_corrected: autopick_options.append('Are References CTF corrected? == Yes') else: autopick_options.append('Are References CTF corrected? == No') if opts.autopick_refs_have_inverted_contrast: autopick_options.append('References have inverted contrast? == Yes') else: autopick_options.append('References have inverted contrast? == No') if opts.autopick_refs_ignore_ctf1stpeak: autopick_options.append('Ignore CTFs until first peak? == Yes') else: autopick_options.append('Ignore CTFs until first peak? == No') if opts.autopick_do_gpu and (not opts.autopick_do_LoG): autopick_options.append('Use GPU acceleration? == Yes') else: autopick_options.append('Use GPU acceleration? == No') if opts.autopick_submit_to_queue: autopick_options.extend(queue_options) if ipass == 0: autopick_job_name = 'autopick_job' autopick_alias = 'pass 1' else: autopick_job_name = 'autopick2_job' autopick_alias = 'pass 2' autopick_job, already_had_it = addJob('AutoPick', autopick_job_name, SETUP_CHECK_FILE, autopick_options, alias=autopick_alias) runjobs.append(autopick_job) #### Set up the Extract job extract_options = ['Input coordinates: == {}coords_suffix_autopick.star'.format(autopick_job), 'micrograph STAR file: == {}micrographs_ctf.star'.format(ctffind_job), 'Diameter background circle (pix): == {}'.format(opts.extract_bg_diameter), 'Particle box size (pix): == {}'.format(opts.extract_boxsize), 'Number of MPI procs: == {}'.format(opts.extract_mpi)] if ipass == 0: if opts.extract_downscale: extract_options.append('Rescale particles? == Yes') extract_options.append('Re-scaled size (pixels): == {}'.format(opts.extract_small_boxsize)) else: if opts.extract2_downscale: extract_options.append('Rescale particles? == Yes') extract_options.append('Re-scaled size (pixels): == {}'.format(opts.extract2_small_boxsize)) if opts.extract_submit_to_queue: extract_options.extend(queue_options) if ipass == 0: extract_job_name = 'extract_job' extract_alias = 'pass 1' else: extract_job_name = 'extract2_job' extract_alias = 'pass 2' extract_job, already_had_it = addJob('Extract', extract_job_name, SETUP_CHECK_FILE, extract_options, alias=extract_alias) runjobs.append(extract_job) if (ipass == 0 and (opts.do_class2d or opts.do_class3d)) or (ipass == 1 and (opts.do_class2d_pass2 or opts.do_class3d_pass2)): #### Set up the Select job to split the particle STAR file into batches split_options = ['OR select from particles.star: == {}particles.star'.format(extract_job), 'OR: split into subsets? == Yes', 'OR: number of subsets: == -1'] if ipass == 0: split_job_name = 'split_job' split_options.append('Subset size: == {}'.format(opts.batch_size)) split_alias = 'into {}'.format(opts.batch_size) else: split_job_name = 'split2_job' split_options.append('Subset size: == {}'.format(opts.batch_size_pass2)) split_alias = 'into {}'.format(opts.batch_size_pass2) split_job, already_had_it = addJob('Select', split_job_name, SETUP_CHECK_FILE, split_options, alias=split_alias) # Now start running stuff runjobs.append(split_job) # Now execute the entire preprocessing pipeliner if ipass == 0: preprocess_schedule_name = PREPROCESS_SCHEDULE_PASS1 else: preprocess_schedule_name = PREPROCESS_SCHEDULE_PASS2 RunJobs(runjobs, opts.preprocess_repeat_times, opts.preprocess_repeat_wait, preprocess_schedule_name) print(' RELION_IT: submitted',preprocess_schedule_name,'pipeliner with', opts.preprocess_repeat_times,'repeats of the preprocessing jobs') print(' RELION_IT: this pipeliner will run in the background of your shell. You can stop it by deleting the file RUNNING_PIPELINER_'+preprocess_schedule_name) ########## From now on, process extracted particles in batches for 2D or 3D classification, only perform SGD inimodel for first batch and if no 3D reference is available # There is again an option to stop here... if (ipass == 0 and (opts.do_class2d or opts.do_class3d)) or (ipass == 1 and (opts.do_class2d_pass2 or opts.do_class3d_pass2)): ### If necessary, rescale the 3D reference in the second pass! # TODO: rescale initial reference if different from movies? if ipass == 1 and (opts.extract_downscale or opts.extract2_downscale): particles_angpix = opts.angpix if opts.images_are_movies: particles_angpix = particles_angpix * opts.motioncor_binning if opts.extract2_downscale: particles_angpix = particles_angpix * opts.extract_boxsize / opts.extract2_small_boxsize particles_boxsize = opts.extract2_small_boxsize else: particles_boxsize = opts.extract_boxsize if abs(float(particles_angpix) - float(opts.autopick_ref_angpix)) > 0.01: # Now rescale the reference for 3D classification opts.class3d_reference = opts.autopick_3dreference.replace('.mrc','_rescaled.mrc') print(' RELION_IT: rescaling the 3D reference from pixel size',opts.autopick_ref_angpix,'to',particles_angpix,'and saving the new reference as',opts.class3d_reference) command = 'relion_image_handler --i ' + opts.autopick_3dreference + ' --o ' + opts.class3d_reference + ' --angpix ' + str(opts.autopick_ref_angpix) + ' --rescale_angpix ' + str(particles_angpix) + ' --new_box ' + str(particles_boxsize) os.system(command) print(' RELION_IT: now entering an infinite loop for batch-processing of particles. You can stop this loop by deleting the file', RUNNING_FILE) # It could be that this is a restart, so check previous_batch1_size in the output directory. # Also check the presence of class2d_job_batch_001 in case the first job was not submitted yet. first_split_file = find_split_job_output(split_job + 'particles_split', 1) if getJobName("class2d_job_batch_001", SETUP_CHECK_FILE) is not None and \ first_split_file is not None: batch1 = safe_load_star(first_split_file, expected=['particles', 'rlnMicrographName']) previous_batch1_size = len(batch1['particles']['rlnMicrographName']) else: previous_batch1_size = 0 continue_this_pass = True while continue_this_pass: have_new_batch = False nr_batches = len(glob.glob(split_job + "particles_split*.star")) for ibatch in range(0, nr_batches): iibatch = ibatch + 1 batch_name = find_split_job_output(split_job + "particles_split", iibatch) batch = safe_load_star(batch_name, expected=['particles', 'rlnMicrographName']) batch_size = len(batch['particles']['rlnMicrographName']) rerun_batch1 = False if ( iibatch == 1 and batch_size > previous_batch1_size and batch_size > opts.minimum_batch_size ): previous_batch1_size = batch_size rerun_batch1 = True particles_star_file = batch_name # The first batch is special: perform 2D classification with smaller batch size (but at least minimum_batch_size) and keep overwriting in the same output directory if ( rerun_batch1 or batch_size == opts.batch_size): # Discard particles with odd average/stddev values if opts.do_discard_on_image_statistics: #### Run a Select job to get rid of particles with outlier average/stddev values... discard_options = ['OR select from particles.star: == {}'.format(batch_name), 'OR: select on image statistics? == Yes', 'Sigma-value for discarding images: == {}'.format(opts.discard_sigma), 'Metadata label for images: == rlnImageName'] if ipass == 0: discard_job_name = 'discard_job' else: discard_job_name = 'discard2_job' if opts.discard_submit_to_queue: discard_options.extend(queue_options) discard_job, already_had_it = addJob('Select', discard_job_name, SETUP_CHECK_FILE, discard_options) if ((not already_had_it) or rerun_batch1): have_new_batch = True RunJobs([discard_job], 1, 1, 'DISCARD') print(" RELION_IT: submitted job to discard based on image statistics for", batch_size ,"particles in", batch_name) # Wait here until this Discard job is finished. Check every thirty seconds WaitForJob(discard_job, 30) particles_star_file = discard_job + 'particles.star' # 2D classification if (ipass == 0 and opts.do_class2d) or (ipass == 1 and opts.do_class2d_pass2): class2d_options = ['Input images STAR file: == {}'.format(particles_star_file), 'Number of classes: == {}'.format(opts.class2d_nr_classes), 'Mask diameter (A): == {}'.format(opts.mask_diameter), 'Number of iterations: == {}'.format(opts.class2d_nr_iter), 'Angular search range - psi (deg): == {}'.format(opts.class2d_angle_step), 'Offset search range (pix): == {}'.format(opts.class2d_offset_range), 'Offset search step (pix): == {}'.format(opts.class2d_offset_step), 'Number of pooled particles: == {}'.format(opts.refine_nr_pool), 'Which GPUs to use: == {}'.format(opts.refine_gpu), 'Number of MPI procs: == {}'.format(opts.refine_mpi), 'Number of threads: == {}'.format(opts.refine_threads), 'Copy particles to scratch directory: == {}'.format(opts.refine_scratch_disk), 'Additional arguments: == {}'.format(opts.class2d_other_args)] if batch_size > opts.refine_batchsize_for_fast_subsets: class2d_options.append('Use fast subsets (for large data sets)? == Yes') else: class2d_options.append('Use fast subsets (for large data sets)? == No') if opts.refine_do_gpu: class2d_options.append('Use GPU acceleration? == Yes') else: class2d_options.append('Use GPU acceleration? == No') if opts.class2d_ctf_ign1stpeak: class2d_options.append('Ignore CTFs until first peak? == Yes') else: class2d_options.append('Ignore CTFs until first peak? == No') if opts.refine_preread_images: class2d_options.append('Pre-read all particles into RAM? == Yes') else: class2d_options.append('Pre-read all particles into RAM? == No') if opts.refine_submit_to_queue: class2d_options.extend(queue_options) if ipass == 0: jobname = 'class2d_job_batch_{:03d}'.format(iibatch) alias = 'pass1_batch_{:03d}'.format(iibatch) else: jobname = 'class2d_pass2_job_batch_{:03d}'.format(iibatch) alias = 'pass2_batch_{:03d}'.format(iibatch) class2d_job, already_had_it = addJob('Class2D', jobname, SETUP_CHECK_FILE, class2d_options, alias=alias) if ((not already_had_it) or rerun_batch1): have_new_batch = True RunJobs([class2d_job], 1, 1, 'CLASS2D') print(" RELION_IT: submitted 2D classification with", batch_size ,"particles in", class2d_job) # Wait here until this Class2D job is finished. Check every thirty seconds WaitForJob(class2d_job, 30) # Perform 3D classification if (ipass == 0 and opts.do_class3d) or (ipass == 1 and opts.do_class3d_pass2): # Do SGD initial model generation only in the first pass, when no reference is provided AND only for the first (complete) batch, for subsequent batches use that model if (not opts.have_3d_reference) and ipass == 0 and iibatch == 1 and batch_size == opts.batch_size: inimodel_options = ['Input images STAR file: == {}'.format(particles_star_file), 'Symmetry: == {}'.format(opts.symmetry), 'Mask diameter (A): == {}'.format(opts.mask_diameter), 'Number of classes: == {}'.format(opts.inimodel_nr_classes), 'Initial angular sampling: == {}'.format(opts.inimodel_angle_step), 'Offset search range (pix): == {}'.format(opts.inimodel_offset_range), 'Offset search step (pix): == {}'.format(opts.inimodel_offset_step), 'Number of initial iterations: == {}'.format(opts.inimodel_nr_iter_initial), 'Number of in-between iterations: == {}'.format(opts.inimodel_nr_iter_inbetween), 'Number of final iterations: == {}'.format(opts.inimodel_nr_iter_final), 'Write-out frequency (iter): == {}'.format(opts.inimodel_freq_writeout), 'Initial resolution (A): == {}'.format(opts.inimodel_resol_ini), 'Final resolution (A): == {}'.format(opts.inimodel_resol_final), 'Initial mini-batch size: == {}'.format(opts.inimodel_batchsize_ini), 'Final mini-batch size: == {}'.format(opts.inimodel_batchsize_final), 'Increased noise variance half-life: == {}'.format(opts.inimodel_sigmafudge_halflife), 'Number of pooled particles: == 1', 'Which GPUs to use: == {}'.format(opts.refine_gpu), 'Number of MPI procs: == {}'.format(opts.refine_mpi), 'Number of threads: == {}'.format(opts.refine_threads), 'Copy particles to scratch directory: == {}'.format(opts.refine_scratch_disk), 'Additional arguments: == {}'.format(opts.inimodel_other_args)] if opts.inimodel_solvent_flatten: inimodel_options.append('Flatten and enforce non-negative solvent? == Yes') else: inimodel_options.append('Flatten and enforce non-negative solvent? == No') if opts.refine_skip_padding: inimodel_options.append('Skip padding? == Yes') else: inimodel_options.append('Skip padding? == No') if opts.refine_do_gpu: inimodel_options.append('Use GPU acceleration? == Yes') else: inimodel_options.append('Use GPU acceleration? == No') if opts.inimodel_ctf_ign1stpeak: inimodel_options.append('Ignore CTFs until first peak? == Yes') else: inimodel_options.append('Ignore CTFs until first peak? == No') if opts.refine_preread_images: inimodel_options.append('Pre-read all particles into RAM? == Yes') else: inimodel_options.append('Pre-read all particles into RAM? == No') if opts.refine_submit_to_queue: inimodel_options.extend(queue_options) inimodel_job, already_had_it = addJob('InitialModel', 'inimodel', SETUP_CHECK_FILE, inimodel_options) if (not already_had_it): have_new_batch = True RunJobs([inimodel_job], 1, 1, 'INIMODEL') print(" RELION_IT: submitted initial model generation with", batch_size ,"particles in", inimodel_job) # Wait here until this inimodel job is finished. Check every thirty seconds WaitForJob(inimodel_job, 30) sgd_model_star = findOutputModelStar(inimodel_job) if sgd_model_star is None: print(" RELION_IT: Initial model generation " + inimodel_job + " does not contain expected output maps.") print(" RELION_IT: This job should have finished, but you may continue it from the GUI.") raise Exception("ERROR!! quitting the pipeline.") # TODO: MAKE MORE ROBUST # Use the model of the largest class for the 3D classification below total_iter = opts.inimodel_nr_iter_initial + opts.inimodel_nr_iter_inbetween + opts.inimodel_nr_iter_final best_inimodel_class, best_inimodel_resol, best_inimodel_angpix = findBestClass(sgd_model_star, use_resol=True) opts.class3d_reference = best_inimodel_class opts.class3d_ref_is_correct_greyscale = True opts.class3d_ref_is_ctf_corrected = True opts.have_3d_reference = True if opts.have_3d_reference: # Now perform the actual 3D classification class3d_options = ['Input images STAR file: == {}'.format(particles_star_file), 'Reference map: == {}'.format(opts.class3d_reference), 'Initial low-pass filter (A): == {}'.format(opts.class3d_ini_lowpass), 'Symmetry: == {}'.format(opts.symmetry), 'Regularisation parameter T: == {}'.format(opts.class3d_T_value), 'Reference mask (optional): == {}'.format(opts.class3d_reference_mask), 'Number of classes: == {}'.format(opts.class3d_nr_classes), 'Mask diameter (A): == {}'.format(opts.mask_diameter), 'Number of iterations: == {}'.format(opts.class3d_nr_iter), 'Angular sampling interval: == {}'.format(opts.class3d_angle_step), 'Offset search range (pix): == {}'.format(opts.class3d_offset_range), 'Offset search step (pix): == {}'.format(opts.class3d_offset_step), 'Number of pooled particles: == {}'.format(opts.refine_nr_pool), 'Which GPUs to use: == {}'.format(opts.refine_gpu), 'Number of MPI procs: == {}'.format(opts.refine_mpi), 'Number of threads: == {}'.format(opts.refine_threads), 'Copy particles to scratch directory: == {}'.format(opts.refine_scratch_disk), 'Additional arguments: == {}'.format(opts.class3d_other_args)] if batch_size > opts.refine_batchsize_for_fast_subsets: class3d_options.append('Use fast subsets (for large data sets)? == Yes') else: class3d_options.append('Use fast subsets (for large data sets)? == No') if opts.class3d_ref_is_correct_greyscale: class3d_options.append('Ref. map is on absolute greyscale? == Yes') else: class3d_options.append('Ref. map is on absolute greyscale? == No') if opts.class3d_ref_is_ctf_corrected: class3d_options.append('Has reference been CTF-corrected? == Yes') else: class3d_options.append('Has reference been CTF-corrected? == No') if opts.refine_skip_padding: class3d_options.append('Skip padding? == Yes') else: class3d_options.append('Skip padding? == No') if opts.refine_do_gpu: class3d_options.append('Use GPU acceleration? == Yes') else: class3d_options.append('Use GPU acceleration? == No') if opts.class3d_ctf_ign1stpeak: class3d_options.append('Ignore CTFs until first peak? == Yes') else: class3d_options.append('Ignore CTFs until first peak? == No') if opts.refine_preread_images: class3d_options.append('Pre-read all particles into RAM? == Yes') else: class3d_options.append('Pre-read all particles into RAM? == No') if opts.refine_submit_to_queue: class3d_options.extend(queue_options) if ipass == 0: jobname = 'class3d_job_batch_{:03d}'.format(iibatch) alias = 'pass1_batch_{:03d}'.format(iibatch) else: jobname = 'class3d2_job_batch_{:03d}'.format(iibatch) alias = 'pass2_batch_{:03d}'.format(iibatch) class3d_job, already_had_it = addJob('Class3D', jobname, SETUP_CHECK_FILE, class3d_options, alias=alias) if ((not already_had_it) or rerun_batch1): have_new_batch = True RunJobs([class3d_job], 1, 1, 'CLASS3D') print(' RELION_IT: submitted 3D classification with', batch_size ,'particles in', class3d_job) # Wait here until this Class2D job is finished. Check every thirty seconds WaitForJob(class3d_job, 30) class3d_model_star = findOutputModelStar(class3d_job) if class3d_model_star is None: print(" RELION_IT: 3D Classification " + class3d_job + " does not contain expected output maps.") print(" RELION_IT: This job should have finished, but you may continue it from the GUI.") raise Exception("ERROR!! quitting the pipeline.") # TODO: MAKE MORE ROBUST best_class3d_class, best_class3d_resol, best_class3d_angpix = findBestClass(class3d_model_star, use_resol=True) # Once the first batch in the first pass is completed: move on to the second pass if (ipass == 0 and opts.do_second_pass and iibatch == 1 and best_class3d_resol < opts.minimum_resolution_3dref_2ndpass): opts.autopick_3dreference = best_class3d_class opts.autopick_ref_angpix = best_class3d_angpix opts.autopick_2dreferences = '' opts.autopick_do_LoG = False opts.class3d_reference = best_class3d_class opts.have_3d_reference = True opts.autopick_3dref_symmetry = opts.symmetry # Stop the PREPROCESS pipeliner of the first pass by removing its RUNNING file filename_to_remove = 'RUNNING_PIPELINER_'+preprocess_schedule_name if os.path.isfile(filename_to_remove): print(' RELION_IT: removing file',filename_to_remove,'to stop the pipeliner from the first pass') os.remove(filename_to_remove) # Generate a file to indicate we're in the second pass, so that restarts of the python script will be smooth g = open(SECONDPASS_REF3D_FILE,'w') g.write(str(best_class3d_class)+'\n'+str(best_class3d_angpix)+'\n') g.close() # Move out of this ipass of the passes loop.... ibatch = nr_batches+1 continue_this_pass = False print(' RELION_IT: moving on to the second pass using',opts.autopick_3dreference,'for template-based autopicking') # break out of the for-loop over the batches break if not have_new_batch: CheckForExit() # The following prevents checking the particles.star file too often time.sleep(60*opts.batch_repeat_time) def main(): """ Run the RELION 3 pipeline. Options files given as command line arguments will be opened in order and used to update the default options. """ # Start by parsing arguments # (If --help is given, the program will print a usage message and exit) parser = argparse.ArgumentParser() parser.add_argument("extra_options", nargs="*", metavar="extra_options.py", help="Python files containing options for relion_it.py") parser.add_argument("--gui", action="store_true", help="launch a simple GUI to set options") parser.add_argument("--continue", action="store_true", dest="continue_", help="continue a previous run by loading options from ./relion_it_options.py") args = parser.parse_args() print(' RELION_IT: -------------------------------------------------------------------------------------------------------------------') print(' RELION_IT: script for automated, on-the-fly single-particle analysis in RELION (>= 3.1)') print(' RELION_IT: authors: Sjors H.W. Scheres, Takanori Nakane & Colin M. Palmer') print(' RELION_IT: ') print(' RELION_IT: usage: ./relion_it.py [extra_options.py [extra_options2.py ....] ] [--gui] [--continue]') print(' RELION_IT: ') print(' RELION_IT: this script will check whether processes are still running using files with names starting with RUNNING') print(' RELION_IT: you can restart this script after stopping previous processes by deleting all RUNNING files') print(' RELION_IT: this script keeps track of already submitted jobs in a filed called',SETUP_CHECK_FILE) print(' RELION_IT: upon a restart, jobs present in this file will be continued (for preprocessing), or ignored when already finished') print(' RELION_IT: if you would like to re-do a specific job from scratch (e.g. because you changed its parameters)') print(' RELION_IT: remove that job, and those that depend on it, from the',SETUP_CHECK_FILE) print(' RELION_IT: -------------------------------------------------------------------------------------------------------------------') print(' RELION_IT: ') # Make sure no other version of this script are running... if os.path.isfile(RUNNING_FILE): print(" RELION_IT: ERROR:", RUNNING_FILE, "is already present: delete this file and make sure no other copy of this script is running. Exiting now ...") exit(0) # Also make sure the preprocessing pipeliners are stopped before re-starting this script for checkfile in ('RUNNING_PIPELINER_'+PREPROCESS_SCHEDULE_PASS1, 'RUNNING_PIPELINER_'+PREPROCESS_SCHEDULE_PASS2): if os.path.isfile(checkfile): print(" RELION_IT: ERROR:", checkfile, "is already present: delete this file and make sure no relion_pipeliner job is still running. Exiting now ...") exit(0) if args.continue_: print(' RELION_IT: continuing a previous run. Options will be loaded from ./relion_it_options.py') args.extra_options.append(OPTIONS_FILE) opts = RelionItOptions() for user_opt_file in args.extra_options: print(' RELION_IT: reading options from {}'.format(user_opt_file)) user_opts = runpy.run_path(user_opt_file) opts.update_from(user_opts) if args.gui: print(' RELION_IT: launching GUI...') tk_root = tk.Tk() tk_root.title("relion_it.py setup") RelionItGui(tk_root, opts) tk_root.mainloop() else: run_pipeline(opts) if __name__ == "__main__": main() relion-3.1.3/scripts/star_datablock_ctfdat000077500000000000000000000020371411340063500207240ustar00rootroot00000000000000#!/bin/bash if [ $# != 0 ]; then while read line; do # Get image name from first column image=`echo $line | awk '{print $1}'` # Get ctfparam name from second column ctfparam=`echo $line | awk '{print $2}'` # Strip directories from ctfparam and remove "_Periodogramavg.ctfparam" to get micrograph name micro=`echo $ctfparam | awk -F"/" '{print $NF}' | sed 's|_Periodogramavg.ctfparam||'` # Get metadata from the ctfparam, reverse sign for defocusU, defocusV and Q0 metadata=`awk '{if ($1~"defocus" || $1~"Q0") {printf "%s%s", -$2," "} else if ($1~"sampling" || $1~"K=") {} else {printf "%s%s", $2," "} } END {printf "\n"}' <$ctfparam ` echo $image $micro $metadata done < $1 else echo " === Usage: === " echo " ${0} " echo " " echo " === Purpose: === " echo " This (bash) script generates the STAR datablock for all images in an xmipp-format CTFDAT file" echo " Note that the sign for XMIPP's defocusU, defocusV and Q0 values is reversed " echo " " echo " === Example: ===" echo " ${0} all_images.ctfdat " fi relion-3.1.3/scripts/star_datablock_singlefiles000077500000000000000000000013121411340063500217560ustar00rootroot00000000000000#!/bin/bash if [ $# != 0 ]; then for i in ${1} do echo -n ${i}" " for (( c=2; c<=$#; c++ )) do echo -n ${!c}" " done echo "" done else echo " === Usage: === " echo " ${0} \"*.spi\" ..." echo " " echo " === Purpose: === " echo " This (bash) script generates the datablock for all images represented by the wildcard in the first argument" echo " Other (optional) data values are in value1, value2, etc. " echo " " echo " === Example: ===" echo " ${0} \"tmp/*\" 10000 10500 0.0 200 2 0.1" echo " yields: " echo " tmp/t1.spi 10000 10500 0.0 200 2 0.1" echo " tmp/t2.spi 10000 10500 0.0 200 2 0.1" echo " tmp/t3.spi 10000 10500 0.0 200 2 0.1" fi relion-3.1.3/scripts/star_datablock_stack000077500000000000000000000013771411340063500205720ustar00rootroot00000000000000#!/bin/bash if [ $# != 0 ]; then for i in `seq 1 ${1}` do printf "%06i%1s%s%s" ${i} "@" ${2} " " for (( c=3; c<=$#; c++ )) do echo -n ${!c}" " done echo "" done else echo " === Usage: === " echo " ${0} ..." echo " " echo " === Purpose: === " echo " This (bash) script generates the datablock for N images in a stack named stackname" echo " Other (optional) data values are in value1, value2, etc. " echo " " echo " === Example: ===" echo " ${0} 3 my_images.mrcs 10000 10500 0.0 200 2 0.1" echo " yields: " echo " 000001@my_images.mrcs 10000 10500 0.0 200 2 0.1" echo " 000002@my_images.mrcs 10000 10500 0.0 200 2 0.1" echo " 000003@my_images.mrcs 10000 10500 0.0 200 2 0.1" fi relion-3.1.3/scripts/star_loopheader000077500000000000000000000012431411340063500175730ustar00rootroot00000000000000#!/bin/bash -f if [ $# != 0 ]; then echo "data_" echo "loop_" for i; do echo "_"${i} done; else echo " === Usage: === " echo " ${0} ..." echo " " echo " === Purpose: === " echo " This (bash) script generates the header of STAR-file with the given labels" echo " " echo " === Example: ===" echo " ${0} rlnImageName rlnDefocusU rlnDefocusV rlnDefocusAngle rlnVoltage rlnSphericalAberration rlnAmplitudeContrast" echo " yields: " echo " data_" echo " loop_" echo " _rlnImageName" echo " _rlnDefocusU" echo " _rlnDefocusV" echo " _rlnDefocusAngle" echo " _rlnVoltage" echo " _rlnSphericalAberration" echo " _rlnAmplitudeContrast" fi relion-3.1.3/scripts/star_plottable000077500000000000000000000023011411340063500174330ustar00rootroot00000000000000#!/bin/bash if [ $# != 0 ]; then echo " set title '${2}'" > gnuplot.plt echo " set ylabel '${3}'" >> gnuplot.plt if [ $# == 4 ]; then relion_star_printtable ${1} ${2} ${4} ${3} > ${1}-${2}-${3}.dat echo " set xlabel '${4}'" >> gnuplot.plt else relion_star_printtable ${1} ${2} ${3} > ${1}-${2}-${3}.dat echo " set xlabel 'index'" >> gnuplot.plt fi echo " plot '${1}-${2}-${3}.dat' w l " >> gnuplot.plt echo " ** Written datafile: " ${1}-${2}-${3}.dat echo " ** Running: gnuplot -persist gnuplot.plt " echo " ** Alternatively, inside an interactive gnuplot session type: load \"gnuplot.plt\"" gnuplot -persist gnuplot.plt else echo " === Usage: === " echo " ${0} " echo " " echo " === Purpose: === " echo " This (bash) script uses gnuplot to plot content from a datablock (with name ) in " echo " It will make a plot of the values given for against those of " echo " If is not given, the values of will be plotted linearly" echo " " echo " === Example: ===" echo " ${0} run3_it024_model.star run3_it024_model.star data_model_class_1 rlnSsnrMap rlnResolution" fi relion-3.1.3/scripts/star_printtable000077500000000000000000000043271411340063500176230ustar00rootroot00000000000000#!/usr/bin/env bash if [ $# -ge 2 ]; then tmpdir=$(mktemp -d -t relion_print_star_temp.XXXXXX) # Get the desired data block awk -v"tab=${2}" 'BEGIN {if (tab!~/^data_/) tab="data_"tab} (a==0 && $1==tab) {a=22; next} (a==22) {if ($1=="loop_") a+=1; else if ($1~/^data_/) exit;} (a==23) {if (length($0)<2 || $1~/^data_/) exit; else print $0;}' ${1} | grep -v loop_ > $tmpdir/tmp.dat if [ $# == 2 ]; then # print all columns awk '$1!~/^_/ {print;}' < $tmpdir/tmp.dat else # print selected columns awk '$1!~/^_/ {j++; print j}' < $tmpdir/tmp.dat > $tmpdir/tmp1.dat for (( c=3; c<=$#; c++ )); do awk -v"label=${!c}" 'BEGIN {if (label!~/^_/) label="_"label} {if ($1~/^_/) {i++; if ($1==label) col=i;} else {j++; print j, $col}}' < $tmpdir/tmp.dat > $tmpdir/tmp2.dat join $tmpdir/tmp1.dat $tmpdir/tmp2.dat > $tmpdir/tmp3.dat mv -f $tmpdir/tmp3.dat $tmpdir/tmp1.dat done awk '{for (i=2; i<=NF;i++) printf("%s%s", $i, (i==NF)?"\n":OFS)}' < $tmpdir/tmp1.dat fi rm -fr $tmpdir else echo " === Usage: === " echo " ${0} [ ...]" echo " " echo " === Purpose: === " echo " This (bash) script prints the contents of a datablock (with name tablename) from a starfile" echo " If any labels are given, then only those will be printed " echo " " echo " === Example: === " echo " ${0} run3_it024_model.star data_model_class_1 rlnResolution rlnSsnrMap" echo " (NOTE: not _rlnResolution)" echo " " echo " === Limitations: === " echo " This program makes a temporary directory under \$TMPDIR. This folder must be writable and have sufficient space." echo " " echo " This program does not perform any error checks." echo " When specified table and/or column(s) are absent in the input, the program might give incorrect results." echo " In older versions, table names and column names could match only partially. For example, rlnFourierShellCorrelationCorrected matched rlnFourierShellCorrelation. This was dangerous and the match is exact now." echo " " echo " To address these issues, this program will be completely re-written in the next major update (RELION 3.2)." echo " In the new version, the errors are handled more strictly. Please update your scripts to prepare for transition." echo "" fi relion-3.1.3/src/000077500000000000000000000000001411340063500135725ustar00rootroot00000000000000relion-3.1.3/src/CPlot2D.cpp000066400000000000000000001075711411340063500155200ustar00rootroot00000000000000// // CPlot2D.cpp // // A simple class for plotting xy values and // generating a PostScript output, aimed at // easy integration into C/C++ programs. // // You are granted use of the code, but please // be a nice guy and acknowledge where you got // it from by giving credit! // // If you have any comments, questions, bug // reports, you can contact me at // attila AT amzsaki.com // // Created by Attila Michael Zsaki on 14-03-17. // Copyright (c) 2014 AMZs. All rights reserved. // #include #include #include #include #include #include #include "CPlot2D.h" void joinMultipleEPSIntoSinglePDF(FileName fn_pdf, std::vector fn_eps) { FileName fn_list = fn_pdf + ".lst"; std::string command = "gs -sDEVICE=pdfwrite -dNOPAUSE -dBATCH -dSAFER -dDEVICEWIDTHPOINTS=800 -dDEVICEHEIGHTPOINTS=800 -sOutputFile="; command += fn_pdf + " @" + fn_list; std::ofstream filelist(fn_pdf + ".lst"); bool have_at_least_one = false; for (int i = 0; i < fn_eps.size(); i++) { // fn_eps[i] could be a Linux wildcard... std::vector all_eps_files; fn_eps[i].globFiles(all_eps_files); for (long int j= 0; j < all_eps_files.size(); j++) { if (exists(all_eps_files[j])) { filelist << all_eps_files[j] << "\n"; have_at_least_one = true; } } } filelist.close(); bool have_error_in_gs = false; if (have_at_least_one) { command += " > /dev/null"; if (system(command.c_str())) { std::cerr << " ERROR in executing: " << command << "\n"; have_error_in_gs = true; } } else { std::cerr << " Did not find any of the expected EPS files to generate a PDF file" << "\n"; } // std::remove(fn_list.c_str()); // don't know why but Ghostscript fails with this line. // system() should wait the termination of the program, so this is very strange... if (!have_at_least_one || have_error_in_gs) { std::cerr << " + Will make an empty PDF-file in " << fn_pdf << "\n"; touch(fn_pdf); } } CPlot2D::CPlot2D(std::string title) { //m_dXAxisSize=809.0; // Golden Ratio m_dXAxisSize=600.0; // Golden Ratio m_dYAxisSize=500.0; m_dBottomFrameSize=90.0; m_dRightFrameSize=40.0; m_dTopFrameSize=40.0; m_dLeftFrameSize=75.0; m_dXTotalSize=m_dLeftFrameSize+m_dXAxisSize+m_dRightFrameSize; m_dYTotalSize=m_dBottomFrameSize+m_dYAxisSize+m_dTopFrameSize; m_dDiagonalSize=sqrt(m_dXAxisSize*m_dXAxisSize+m_dYAxisSize*m_dYAxisSize); m_dFrameLineWidth=1.0; m_dFrameColor[0]=0.0; m_dFrameColor[1]=0.0; m_dFrameColor[2]=0.0; m_dGridLineWidth=1.0; m_dGridColor[0]=0.8; m_dGridColor[1]=0.8; m_dGridColor[2]=0.8; m_dLineDotSpacing=0.0; m_bDrawXAxisTickMarks=true; m_bDrawYAxisTickMarks=true; m_dXAxisNumbersSpacing=1.0; m_dYAxisNumbersSpacing=1.0; m_iXAxisNumberOfTicks=10; m_iYAxisNumberOfTicks=10; m_bDrawXAxisGridLines=true; m_bDrawYAxisGridLines=true; m_bDrawGridLinesDashed=true; m_bFlipY = false; m_dFlipYOffset = 0; // Sjors Scheres 22mar2016: changed all fonts to Times m_strXAxisLabelFont="Times"; m_dXAxisLabelFontSize=12.0; m_strYAxisLabelFont="Times"; m_dYAxisLabelFontSize=12.0; m_strXAxisTitleFont="Times"; m_dXAxisTitleFontSize=16.0; m_strYAxisTitleFont="Times"; m_dYAxisTitleFontSize=16.0; m_dXAxisTitleColor[0]=0.0; m_dXAxisTitleColor[1]=0.0; m_dXAxisTitleColor[2]=0.0; m_dYAxisTitleColor[0]=0.0; m_dYAxisTitleColor[1]=0.0; m_dYAxisTitleColor[2]=0.0; m_strXAxisTitle=""; m_strYAxisTitle=""; // Sjors Scheres 22mar2016: insert title of plot here m_strPlotTitle=title; m_dXAxisLabelColor[0]=0.0; m_dXAxisLabelColor[1]=0.0; m_dXAxisLabelColor[2]=0.0; m_dYAxisLabelColor[0]=0.0; m_dYAxisLabelColor[1]=0.0; m_dYAxisLabelColor[2]=0.0; m_bDrawLegend=true; } CPlot2D::~CPlot2D() { } void CPlot2D::OutputPostScriptPlot(std::string fileName) { outputFile.open(fileName.c_str()); // precompute plot dimensions PrecomputeDimensions(); // header outputFile << "%!PS-Adobe-2.0 EPSF-1.2" << "\n"; outputFile << "%%BoundingBox: 0 0 " << (int)m_dXTotalSize << " " << (int)m_dYTotalSize << "\n"; outputFile << "%%Pages: 1" << "\n"; outputFile << "%%EndComments" << "\n"; // grid lines if (m_bDrawXAxisGridLines) { DrawXAxisGridLinesPostScript(); } if (m_bDrawYAxisGridLines) { DrawYAxisGridLinesPostScript(); } // draw plot frame DrawFramePostScript(); // draw axis tick marks if (m_bDrawXAxisTickMarks) { DrawXAxisTickMarksPostScript(); } if (m_bDrawYAxisTickMarks) { DrawYAxisTickMarksPostScript(); } // draw axis labels // might be separate from drawing tick marks one day... if (m_bDrawXAxisTickMarks) { DrawXAxisLabelsPostScript(); } if (m_bDrawYAxisTickMarks) { DrawYAxisLabelsPostScript(); } // draw axis titles DrawXAxisTitlePostScript(); DrawYAxisTitlePostScript(); // draw data DrawDataPostScript(); // draw legend if (m_bDrawLegend) { DrawLegendPostScript(); } outputFile.close(); } void CPlot2D::DrawFramePostScript() { outputFile << "newpath" << "\n"; outputFile << m_dLeftFrameSize << " " << m_dBottomFrameSize << " moveto" << "\n"; outputFile << m_dXAxisSize << " " << 0 << " rlineto" << "\n"; outputFile << 0 << " " << m_dYAxisSize << " rlineto" << "\n"; outputFile << -m_dXAxisSize << " " << 0 << " rlineto" << "\n"; outputFile << "closepath" << "\n"; outputFile << m_dFrameLineWidth << " setlinewidth" << "\n"; outputFile << m_dFrameColor[0] << " " << m_dFrameColor[1] << " " << m_dFrameColor[2] << " setrgbcolor" << "\n"; outputFile << "stroke" << "\n"; double labelXCoordinate,labelYCoordinate; labelXCoordinate=m_dLeftFrameSize+m_dXAxisSize*0.5; labelYCoordinate=m_dBottomFrameSize + m_dYAxisSize + 10; outputFile << "/" << m_strXAxisTitleFont << " findfont" << "\n"; outputFile << m_dYAxisTitleFontSize << " scalefont" << "\n"; outputFile << "setfont" << "\n"; outputFile << labelXCoordinate << " " << labelYCoordinate << " moveto" << "\n"; outputFile << m_dXAxisTitleColor[0] << " " << m_dXAxisTitleColor[1] << " " << m_dXAxisTitleColor[2] << " setrgbcolor" << "\n"; // let PostScript handle the final adjustment based on the width of the string outputFile << "(" << m_strPlotTitle << ")" << " dup stringwidth pop 2 div neg 0 rmoveto show" << "\n"; } void CPlot2D::PrecomputeDimensions() { m_dXTotalSize=m_dXAxisSize+m_dRightFrameSize+m_dLeftFrameSize; m_dYTotalSize=m_dYAxisSize+m_dTopFrameSize+m_dBottomFrameSize; m_dMaxXExtent=0.0; m_dMaxYExtent=0.0; m_dMinXStartPoint=DBL_MAX; m_dMinYStartPoint=DBL_MAX; m_dMaxXEndPoint=-DBL_MAX; m_dMaxYEndPoint=-DBL_MAX; // for all data sets for (int i=0;im_dataSets[i].GetXMinValue()) { m_dMinXStartPoint=m_dataSets[i].GetXMinValue(); } if (m_dMinYStartPoint>m_dataSets[i].GetYMinValue()) { m_dMinYStartPoint=m_dataSets[i].GetYMinValue(); } if (m_dMaxXEndPoint xValues, std::vector yValues) { CDataSet dataSet; if (m_dataSets.size() == 0) dataSet.SetDatasetColor(1., 0., 0.); else if (m_dataSets.size() == 1) dataSet.SetDatasetColor(0., 1., 0.); else dataSet.SetDatasetColor(0., 0., 1.); dataSet.SetDrawMarker(false); if (xValues.size() != yValues.size()) { REPORT_ERROR("ERROR: xValues and yValues vectors do not have identical sizes."); } for (long int i = 0; i < yValues.size(); i++) { CDataPoint point=CDataPoint(xValues[i],yValues[i]); dataSet.AddDataPoint(point); } m_dataSets.push_back(dataSet); } void CPlot2D::AddDataSet(std::vector yValues) { CDataSet dataSet; if (m_dataSets.size() == 0) dataSet.SetDatasetColor(1., 0., 0.); else if (m_dataSets.size() == 1) dataSet.SetDatasetColor(0., 1., 0.); else dataSet.SetDatasetColor(0., 0., 1.); dataSet.SetDrawMarker(false); for (long int i = 0; i < yValues.size(); i++) { CDataPoint point=CDataPoint(i+1,yValues[i]); dataSet.AddDataPoint(point); } m_dataSets.push_back(dataSet); } void CPlot2D::DrawMarker(std::string symbol, double size, bool filled, double xLocation, double yLocation, int dataSet) { double r,g,b; m_dataSets[dataSet].GetDatasetColor(&r,&g,&b); if (symbol=="o") { outputFile << xLocation << " " << yLocation << " moveto" << "\n"; outputFile << xLocation << " " << yLocation << " " << size*0.5 << " 0 360 arc closepath" << "\n"; outputFile << r << " " << g << " " << b << " setrgbcolor" << "\n"; if (filled) { outputFile << "fill" << "\n"; } outputFile << "stroke" << "\n"; } if (symbol=="x" || symbol=="*") { double halfSize=0.5*size; outputFile << xLocation-halfSize << " " << yLocation-halfSize << " moveto" << "\n"; outputFile << size << " " << size << " rlineto" << "\n"; outputFile << -size << " " << 0 << " rmoveto" << "\n"; outputFile << size << " " << -size << " rlineto" << "\n"; outputFile << r << " " << g << " " << b << " setrgbcolor" << "\n"; outputFile << "stroke" << "\n"; } if (symbol=="+" || symbol=="*") { double halfSize=0.5*size; outputFile << xLocation-halfSize << " " << yLocation << " moveto" << "\n"; outputFile << size << " " << 0 << " rlineto" << "\n"; outputFile << -halfSize << " " << halfSize << " rmoveto" << "\n"; outputFile << 0 << " " << -size << " rlineto" << "\n"; outputFile << r << " " << g << " " << b << " setrgbcolor" << "\n"; outputFile << "stroke" << "\n"; } if (symbol=="diamond") { double halfSize=0.5*size; outputFile << "newpath" << "\n"; outputFile << xLocation << " " << yLocation-halfSize << " moveto" << "\n"; outputFile << halfSize << " " << halfSize << " rlineto" << "\n"; outputFile << -halfSize << " " << halfSize << " rlineto" << "\n"; outputFile << -halfSize << " " << -halfSize << " rlineto" << "\n"; outputFile << "closepath" << "\n"; outputFile << r << " " << g << " " << b << " setrgbcolor" << "\n"; if (filled) { outputFile << "fill" << "\n"; } outputFile << "stroke" << "\n"; } if (symbol=="square") { double halfSize=0.5*size; outputFile << "newpath" << "\n"; outputFile << xLocation-halfSize << " " << yLocation-halfSize << " moveto" << "\n"; outputFile << size << " " << 0 << " rlineto" << "\n"; outputFile << 0 << " " << size << " rlineto" << "\n"; outputFile << -size << " " << 0 << " rlineto" << "\n"; outputFile << "closepath" << "\n"; outputFile << r << " " << g << " " << b << " setrgbcolor" << "\n"; if (filled) { outputFile << "fill" << "\n"; } outputFile << "stroke" << "\n"; } if (symbol=="triangle") { double halfSize=0.5*size; double halfEdgeLength=1.5/sqrt(3.0)*halfSize; outputFile << "newpath" << "\n"; outputFile << xLocation-halfEdgeLength << " " << yLocation-halfSize*0.5 << " moveto" << "\n"; outputFile << halfEdgeLength*2.0 << " " << 0 << " rlineto" << "\n"; outputFile << -halfEdgeLength << " " << 1.5*halfSize << " rlineto" << "\n"; outputFile << "closepath" << "\n"; outputFile << r << " " << g << " " << b << " setrgbcolor" << "\n"; if (filled) { outputFile << "fill" << "\n"; } outputFile << "stroke" << "\n"; } } void CPlot2D::DrawXAxisTickMarksPostScript() { for (int i=0;imaxNumberOfCharactersInDatasetTitles) { maxNumberOfCharactersInDatasetTitles=(int)m_dataSets[i].GetDatasetTitle().size(); } } double maxFontHeight=widthPerLegendText/(maxNumberOfCharactersInDatasetTitles*0.75); if (maxFontHeight>14.0) { maxFontHeight=14.0; } if (maxFontHeight<6.0) { maxFontHeight=6.0; } double legendXCoordinate,legendYCoordinate; for (int i=0;i #include #include #include #include #include #include #include "src/filename.h" /* SHWS: join multiple eps files into a single pdf * */ void joinMultipleEPSIntoSinglePDF(FileName fn_pdf, std::vector fn_eps); /* EL: Including all of the std namespace on the global level both in this and * every single file that includes this header will lead to ambiguous * definitions with Cuda. Fixed by adding std:: prefixes to string, vector, * and ofstream. */ // using namespace std; /*! A simple container class to hold a data point, comprised of an x and y value stored in a double. Simple accessors are implemented to set and get the data stored. */ class CDataPoint { public: /*! Constructor for the class. The x and y values are initialized to zero. */ CDataPoint() { m_dDataX=0.0; m_dDataY=0.0; }; /*! Nothing really to destruct... */ ~CDataPoint() {}; /*! Constructor for the class with initialization of x and y. */ CDataPoint(double x, double y) { m_dDataX=x; m_dDataY=y; }; /*! Another way to set the x and y values. */ void SetValues(double x, double y) { m_dDataX=x; m_dDataY=y; }; /*! Get the x and y values as a pair. */ void GetValues(double *x, double *y) { *x=m_dDataX; *y=m_dDataY; }; /*! Get the x value individually. */ double GetX() { return (m_dDataX); }; /*! Get the y value individually. */ double GetY() { return (m_dDataY); }; protected: double m_dDataX; /*!< Storage for the x value of a data point. */ double m_dDataY; /*!< Storage for the y value of a data point. */ }; /*! A container class to hold a dataset. This class stores all the data points in a vector along with attributes specific to representing a dataset, such as the color of the line and markers or the title of the dataset. */ class CDataSet { public: /*! Constructor for the class. The member variables are initialized to common values resulting in a, perhaps, pleasing representation of the data without the need to set anything up. */ CDataSet() { m_dLineWidth=1.0; m_dColor[0]=0.0; m_dColor[1]=0.0; m_dColor[2]=0.0; m_strMarker="o"; m_dMarkerSize=7.0; m_bDrawLine=true; m_bDrawMarker=true; m_bDrawMarkerFilled=true; m_bDashedLine=false; m_iDashedLinePattern="dash"; m_strDatasetTitle=""; m_strDatasetLegendFont="Times"; }; /*! Nothing really to destruct... */ ~CDataSet() {}; /*! Set a data point at the index location. This function neccessitates the presence of the data location pointed to by the index variable. Generally, the preferred method to set or add data is to use the AddDataPoint() function, which appends the new data point to the dataset. */ void SetDataPoint(int index, CDataPoint point) { m_dDataPoints[index]=point; }; /*! Get the data point at the location indicated by the variable index. */ CDataPoint GetDataPoint(int index) { return (m_dDataPoints[index]); }; /*! Add a new data point by appending it to the end of the data vector. */ void AddDataPoint(CDataPoint point) { m_dDataPoints.push_back(point); }; /*! Returns the number of data points comprisig the data set. */ int GetNumberOfDataPointsInSet() { return ((int)m_dDataPoints.size()); }; /*! Returns the minimum value of the x variable in the dataset by iterating over all data points in the set. */ double GetXMinValue() { double min=DBL_MAX; for (int i=0;imax) { max=m_dDataPoints[i].GetX(); } } return (max); }; /*! Returns the minimum value of the y variable in the dataset by iterating over all data points in the set. */ double GetYMinValue() { double min=DBL_MAX; for (int i=0;imax) { max=m_dDataPoints[i].GetY(); } } return (max); }; /*! Returns the extent (maximum minus minimum) of the x values in the dataset. */ double GetXExtent() { return (GetXMaxValue()-GetXMinValue()); }; /*! Returns the extent (maximum minus minimum) of the y values in the dataset. */ double GetYExtent() { return (GetYMaxValue()-GetYMinValue()); }; /*! Sets the line width that will be used in drawing the line representing the dataset. */ void SetLineWidth(double lineWidth) { m_dLineWidth=lineWidth; }; /*! Returns the line width that will be used in drawing the line representing the dataset. */ double GetLineWidth() { return (m_dLineWidth); }; /*! Sets the RGB color used for drawing the dataset. The range is 0.0-1.0. */ void SetDatasetColor(double r, double g, double b) { m_dColor[0]=r; m_dColor[1]=g; m_dColor[2]=b; }; /*! Gets the RGB color used for drawing the dataset. */ void GetDatasetColor(double *r, double *g, double *b) { *r=m_dColor[0]; *g=m_dColor[1]; *b=m_dColor[2]; }; /*! Sets the marker symbol used in drawing the dataset. See the declaration of variable for the available types. */ void SetMarkerSymbol(std::string symbol) { m_strMarker=symbol; }; /*! Sets the marker symbol size used in drawing the dataset. */ void SetMarkerSize(double size) { m_dMarkerSize=size; }; /*! A flag to enable/disable drawing a line connecting the data points. */ void SetDrawLine(bool flag) { m_bDrawLine=flag; }; /*! A flag to enable/disable drawing the marker symbol. */ void SetDrawMarker(bool flag) { m_bDrawMarker=flag; }; /*! A flag to enable/disable filling the interior of the marker symbol. */ void SetDrawMarkerFilled(bool flag) { m_bDrawMarkerFilled=flag; }; /*! Returns a string describing the marker symbol. */ std::string GetMarkerSymbol() { return (m_strMarker); }; /*! Returns the size of the marker symbol. */ double GetMarkerSize() { return (m_dMarkerSize); }; /*! Returns a boolean representing if the line spanning the data points will be drawn or not. */ bool GetDrawLine() { return (m_bDrawLine); }; /*! Returns a boolean representing if the marker symbol will be drawn or not. */ bool GetDrawMarker() { return (m_bDrawMarker); }; /*! Returns a boolean representing if filling the interior of the marker symbol is enabled or disabled. */ bool GetDrawMarkerFilled() { return (m_bDrawMarkerFilled); }; /*! Sets the dased line pattern style used in drawing the line spanning data points. For the availabel styles see the declaration of the variable. */ void SetDashedLinePattern(std::string pattern) { m_iDashedLinePattern=pattern; }; /*! Gets the dased line pattern style used in drawing the line spanning data points. */ std::string GetDashedLinePattern() { return (m_iDashedLinePattern); }; /*! Sets the drawing style for the lines spanning data points to be dashed. */ void SetDashedLine(bool dashed) { m_bDashedLine=dashed; }; /*! Gets a boolean representing if the dashed drawing style for the lines spanning data points is active or not. */ bool GetDashedLine() { return (m_bDashedLine); }; /*! Sets the string used in the plot legend denoting this dataset. */ void SetDatasetTitle(std::string title) { m_strDatasetTitle=title; }; /*! Gets the string used in the plot legend denoting this dataset. */ std::string GetDatasetTitle() { return (m_strDatasetTitle); }; /*! Sets the font used in the plot legend denoting this dataset. */ void SetDatasetLegendFont(std::string font) { m_strDatasetLegendFont=font; }; /*! Gets the font used in the plot legend denoting this dataset. */ std::string GetDatasetLegendFont() { return (m_strDatasetLegendFont); } protected: std::string m_strDatasetTitle; /*!< Storage for the dataset's title. */ std::string m_strDatasetLegendFont; /*!< Size of the font used to show the dataset's title in the legend. */ double m_dLineWidth; /*!< The width of the line drawn to connect data points. */ double m_dColor[3]; /*!< The color of the line drawn to connect data points. */ std::string m_iDashedLinePattern; /*!< The dashed line pattern drawn to connect data points, the possibilites are (strings): "dot", "dash" or "dash_dot". */ std::string m_strMarker; /*!< The style for the marker drawn at each data point, possibilities are (strings): "x", "o", "*", "diamond", "square", "triangle". */ double m_dMarkerSize; /*!< The size of the marker drawn at each data point. */ // flags bool m_bDrawLine; /*!< Boolean flag to enable/disable drawing of the line connecting data points. */ bool m_bDrawMarker; /*!< Boolean flag to enable/disable drawing of the marker at the location of data points. */ bool m_bDrawMarkerFilled; /*!< Boolean flag to enable/disable infilling of the marker at the location of data points. */ bool m_bDashedLine; /*!< Boolean flag to enable/disable drawing of a dashed line connecting data points. */ // data storage std::vector m_dDataPoints; /*!< A vector storage for the data points. */ }; /*! The class responsible for storing and rendering all the data sets it contains. Although the currently implemented method for rendering of a plot is into a PostScript file, it is envisioned that other rendering methods, such as OpenGL can be implemented as well. The accessors are created for both setting and retrieving plot parameters. */ class CPlot2D { public: /*! The constructor for the class. Member variables are initialized to a set of values, which results in a, hopefully, pleasing plot without changing any of them. */ CPlot2D(std::string title = ""); /*! Nothing really to destruct... */ ~CPlot2D(); // accessors /*! Sets the total size of the plot in the x dimension. */ void SetTitle(std::string); /*! Sets the total size of the plot in the x dimension. */ void SetXTotalSize(double value); /*! Gets the total size of the plot in the x dimension. */ double GetXTotalSize(); /*! Sets the total size of the plot in the y dimension. */ void SetYTotalSize(double value); /*! Gets the total size of the plot in the y dimension. */ double GetYTotalSize(); /*! Sets the x axis size of the plot. */ void SetXAxisSize(double value); /*! Gets the x axis size of the plot. */ double GetXAxisSize(); /*! Sets the y axis size of the plot. */ void SetYAxisSize(double value); /*! Gets the y axis size of the plot. */ double GetYAxisSize(); /*! Sets the bottom frame of the plot (e.g. the distance from the bottom of the image to the plot frame. */ void SetBottomFrameSize(double value); /*! Gets the bottom frame of the plot (e.g. the distance from the bottom of the image to the plot frame. */ double GetBottomFrameSize(); /*! Sets the right frame of the plot (e.g. the distance from the right of the image to the plot frame. */ void SetRightFrameSize(double value); /*! Gets the right frame of the plot (e.g. the distance from the right of the image to the plot frame. */ double GetRightFrameSize(); /*! Sets the top frame of the plot (e.g. the distance from the top of the image to the plot frame. */ void SetTopFrameSize(double value); /*! Gets the top frame of the plot (e.g. the distance from the top of the image to the plot frame. */ double GetTopFrameSize(); /*! Sets the left frame of the plot (e.g. the distance from the left of the image to the plot frame. */ void SetLeftFrameSize(double value); /*! Gets the left frame of the plot (e.g. the distance from the left of the image to the plot frame. */ double GetLeftFrameSize(); /*! Sets the thickness of the line that the plot frame is drawn with. */ void SetFrameLineWidth(double value); /*! Gets the thickness of the line that the plot frame is drawn with. */ double GetFrameLineWidth(); /*! Sets the thickness of the line that the grid is drawn with. */ void SetGridLineWidth(double value); /*! Gets the thickness of the line that the grid is drawn with. */ double GetGridLineWidth(); /*! Sets the color of the frame as an RGB triplet. */ void SetFrameColor(double r, double g, double b); /*! Gets the color of the frame as an RGB triplet. */ void GetFrameColor(double *r, double *g, double *b); /*! Sets the color of the grid as an RGB triplet. */ void SetGridColor(double r, double g, double b); /*! Gets the color of the grid as an RGB triplet. */ void GetGridColor(double *r, double *g, double *b); /*! Sets the spacing of tick marks and labels along the x axis. */ void SetXAxisNumbersSpacing(double spacing); /*! Gets the spacing of tick marks and labels along the x axis. */ double GetXAxisNumbersSpacing(); /*! Sets the spacing of tick marks and labels along the y axis. */ void SetYAxisNumbersSpacing(double spacing); /*! Gets the spacing of tick marks and labels along the y axis. */ double GetYAxisNumbersSpacing(); /*! Sets a flag to enable/disable drawing the x axis labels. */ void SetDrawXAxisTickMarks(bool flag); /*! Gets the state of a flag which enables/disables drawing the x axis labels. */ bool GetDrawXAxisTickMarks(); /*! Sets a flag to enable/disable drawing the y axis labels. */ void SetDrawYAxisTickMarks(bool flag); /*! Gets the state of a flag which enables/disables drawing the y axis labels. */ bool GetDrawYAxisTickMarks(); /*! Sets the number of tick marks and labels along the x axis. */ void SetXAxisNumberOfTicks(int number); /*! Gets the number of tick marks and labels along the x axis. */ int GetXAxisNumberOfTicks(); /*! Sets the number of tick marks and labels along the y axis. */ void SetYAxisNumberOfTicks(int number); /*! Gets the number of tick marks and labels along the y axis. */ int GetYAxisNumberOfTicks(); /*! Sets a flag that enables/disables the drawing of grid lines for the x axis. */ void SetDrawXAxisGridLines(bool flag); /*! Gets the state of a flag that enables/disables the drawing of grid lines for the x axis. */ bool GetDrawXAxisGridLines(); /*! Sets a flag that enables/disables the drawing of grid lines for the y axis. */ void SetDrawYAxisGridLines(bool flag); /*! Gets the state of a flag that enables/disables the drawing of grid lines for the y axis. */ bool GetDrawYAxisGridLines(); /*! Sets a flag that enables/disables the drawing of dashed grid lines. */ void SetDrawGridLinesDashed(bool flag); /*! Gets the state of a flag that enables/disables the drawing of dashed grid lines. */ bool GetDrawGridLinesDashed(); /*! Sets the font (as a string) that is used for the labels on the x axis. */ void SetXAxisLabelFont(std::string font); /*! Gets the font (as a string) that is used for the labels on the x axis. */ std::string GetXAxisLabelFont(); /*! Sets the font size for the x axis labels. */ void SetXAxisLabelFontSize(double value); /*! Gets the font size for the x axis labels. */ double GetXAxisLabelFontSize(); /*! Sets the font (as a string) that is used for the labels on the y axis. */ void SetYAxisLabelFont(std::string font); /*! Gets the font (as a string) that is used for the labels on the y axis. */ std::string GetYAxisLabelFont(); /*! Set the font size for the y axis labels. */ void SetYAxisLabelFontSize(double value); /*! Gets the font size for the y axis labels. */ double GetYAxisLabelFontSize(); /*! Sets the font (as a string) that is used for the title on the legend for the x axis. */ void SetXAxisTitleFont(std::string font); /*! Gets the font (as a string) that is used for the title on the legend for the x axis. */ std::string GetXAxisTitleFont(); /*! Sets the font size that is used for the title on the legend for the x axis. */ void SetXAxisTitleFontSize(double value); /*! Gets the font size that is used for the title on the legend for the x axis. */ double GetXAxisTitleFontSize(); /*! Sets the font (as a string) that is used for the title on the legend for the y axis. */ void SetYAxisTitleFont(std::string font); /*! Gets the font (as a string) that is used for the title on the legend for the x axis. */ std::string GetYAxisTitleFont(); /*! Sets the font size that is used for the title on the legend for the y axis. */ void SetYAxisTitleFontSize(double value); /*! Gets the font size that is used for the title on the legend for the y axis. */ double GetYAxisTitleFontSize(); /*! Sets the title for the x axis. Usually this is where the units or quantity represented by the data should be displayed goes. */ void SetXAxisTitle(std::string title); /*! Gets the title for the x axis. */ std::string GetXAxisTitle(); /*! Sets the title for the y axis. Usually this is where the units or quantity represented by the data should be displayed goes. */ void SetYAxisTitle(std::string title); /*! Gets the title for the y axis. */ std::string GetYAxisTitle(); /*! Sets the color, as an RGB triplet, for the x axis title. */ void SetXAxisTitleColor(double r, double g, double b); /*! Gets the color, as an RGB triplet, for the x axis title. */ void GetXAxisTitleColor(double *r, double *g, double *b); /*! Sets the color, as an RGB triplet, for the y axis title. */ void SetYAxisTitleColor(double r, double g, double b); /*! Gets the color, as an RGB triplet, for the y axis title. */ void GetYAxisTitleColor(double *r, double *g, double *b); /*! Sets the color, as an RGB triplet, for the x axis labels. */ void SetXAxisLabelColor(double r, double g, double b); /*! Gets the color, as an RGB triplet, for the x axis labels. */ void GetXAxisLabelColor(double *r, double *g, double *b); /*! Sets the color, as an RGB triplet, for the y axis labels. */ void SetYAxisLabelColor(double r, double g, double b); /*! Gets the color, as an RGB triplet, for the y axis labels. */ void GetYAxisLabelColor(double *r, double *g, double *b); /*! Set a flag to enable/disable drawing the legend. */ void SetDrawLegend(bool flag); /*! Get the state of a flag that enables/disables drawing the legend. */ bool GetDrawLegend(); /*! Get and Set a flag that flips the orientation of the Y axis. */ bool GetFlipY(); void SetFlipY(bool flag); // outputs /*! The function, which is responsible for generating the PostScript output of the plot. */ void OutputPostScriptPlot(std::string fileName); // data set functions /*! Adds a new dataset to the plot. */ void AddDataSet(CDataSet dataSet); /*! Adds a new dataset to the plot as two arrays comprised of the x and y values. The length of the array is supplied as well. */ void AddDataSet(int numPoints, double *xValues, double *yValues); /*! Adds a new dataset to the plot as two vectors comprised of the x and y values */ void AddDataSet(std::vector xValues, std::vector yValues); void AddDataSet(std::vector yValues); protected: /*! A function, which precomputes parameters of the plot, such as the overal size of the plot, the spacing of tick marks and labels, the length of the dashes in a dashed line. */ void PrecomputeDimensions(); /*! A function to compute label spacing, see the comments at the head of the implementation regarding the source of the algorithm. */ void ComputeLabelTickSpacing(double dataMin, double dataMax, double *plotMin, double *plotMax, double *tickSpacing, int numTicks, std::string axis); /*! A function to aid the computation of label spacing, see the comments at the head of the implementation regarding the source of the algorithm. */ double NiceNum(double x, int round); // outputs /*! A function to draw the frame surrounding the plot in a PostScript format. */ void DrawFramePostScript(); /*! A function to draw the data sets in a PostScript format. */ void DrawDataPostScript(); /*! A function to draw a single marker in a PostScript format. */ void DrawMarker(std::string symbol, double size, bool filled, double xLocation, double yLocation, int dataSet); /*! A function to draw the x axis tick marks in a PostScript format. */ void DrawXAxisTickMarksPostScript(); /*! A function to draw the y axis tick marks in a PostScript format. */ void DrawYAxisTickMarksPostScript(); /*! A function to draw the x axis grid lines in a PostScript format. */ void DrawXAxisGridLinesPostScript(); /*! A function to draw the y axis grid lines in a PostScript format. */ void DrawYAxisGridLinesPostScript(); /*! A function to draw the x axis labels in a PostScript format. */ void DrawXAxisLabelsPostScript(); /*! A function to draw the y axis labels in a PostScript format. */ void DrawYAxisLabelsPostScript(); /*! A function to draw the x axis title in a PostScript format. */ void DrawXAxisTitlePostScript(); /*! A function to draw the y axis title in a PostScript format. */ void DrawYAxisTitlePostScript(); /*! A function to draw the legend in a PostScript format. */ void DrawLegendPostScript(); protected: // general plot sizes double m_dXTotalSize; /*!< Total size of the plot in the x direction. Including the frames around it. */ double m_dYTotalSize; /*!< Total size of the plot in the y direction. Including the frames around it.*/ double m_dXAxisSize; /*!< The size of the plot in the x direction. Not including the frames around it. */ double m_dYAxisSize; /*!< The size of the plot in the y direction. Not including the frames around it. */ double m_dDiagonalSize; /*!< The size of the plot along its diagonal direction. */ double m_dFlipYOffset; /*!< Used to shift the Y origin when flipping the Y axis. */ double m_dBottomFrameSize; /*!< The size of the bottom frame. */ double m_dRightFrameSize; /*!< The size of the right frame. */ double m_dTopFrameSize; /*!< The size of the top frame. */ double m_dLeftFrameSize; /*!< The size of the left frame. */ int m_iXAxisNumberOfTicks; /*!< Number of tick marks along the x axis. */ int m_iYAxisNumberOfTicks; /*!< Number of tick marks along the y axis. */ double m_dXAxisNumbersSpacing; /*!< Spacing between tick marks along the x axis. */ double m_dYAxisNumbersSpacing; /*!< Spacing between tick marks along the y axis. */ char m_cXAxisLabelFormat[10]; /*!< Format of labels along the x axis. */ char m_cYAxisLabelFormat[10]; /*!< Format of labels along the y axis. */ std::vector m_strXAxisLabels; /*!< Labels along the x axis. */ std::vector m_strYAxisLabels; /*!< Labels along the y axis. */ //Sjors Scheres 22mar2016: insert PlotTitle std::string m_strPlotTitle; /*!< Title of plot. */ std::string m_strXAxisTitle; /*!< Title of x axis. */ std::string m_strYAxisTitle; /*!< Title of y axis. */ int m_iXAxisNumberOfLabels; /*!< Number of labels along the x axis. */ int m_iYAxisNumberOfLabels; /*!< Number of labels along the y axis. */ double m_dXScale; /*!< Scale along x axis that converts dataset values to the plot space. */ double m_dYScale; /*!< Scale along yx axis that converts dataset values to the plot spac. */ double m_dMaxXExtent; /*!< Maximum extent of x axis. */ double m_dMaxYExtent; /*!< Maximum extent of y axis. */ double m_dMinXStartPoint; /*!< Minimum starting point along x axis over all data sets. */ double m_dMinYStartPoint; /*!< Minimum starting point along y axis over all data sets. */ double m_dMaxXEndPoint; /*!< Maximum end point along x axis over all data sets. */ double m_dMaxYEndPoint; /*!< Maximum end point along y axis over all data sets. */ // line widths double m_dFrameLineWidth; /*!< Width (thickness) of line used for the frame. */ double m_dGridLineWidth; /*!< Width (thickness) of line used for the grid. */ // colors double m_dFrameColor[3]; /*!< Frame color as an RGB triplet. */ double m_dGridColor[3]; /*!< Grid color as an RGB triplet. */ double m_dXAxisTitleColor[3]; /*!< X axis title color as an RGB triplet. */ double m_dYAxisTitleColor[3]; /*!< Y axis title color as an RGB triplet. */ double m_dXAxisLabelColor[3]; /*!< X axis label color as an RGB triplet. */ double m_dYAxisLabelColor[3]; /*!< Y axis label color as an RGB triplet. */ // pleasing dot spacing, tick mark length double m_dLineDotSpacing; /*!< Spacing of lines and dots for a dashed line. */ double m_dTickMarkLength; /*!< Length of tick marks. */ // fonts std::string m_strXAxisLabelFont; /*!< Font for x axis labels. */ double m_dXAxisLabelFontSize; /*!< Font size for x axis label. */ std::string m_strYAxisLabelFont; /*!< Font for yx axis label. */ double m_dYAxisLabelFontSize; /*!< Font size for y axis label. */ std::string m_strXAxisTitleFont; /*!< Font for x axis title. */ double m_dXAxisTitleFontSize; /*!< Font size for x axis title. */ std::string m_strYAxisTitleFont; /*!< Font for y axis title. */ double m_dYAxisTitleFontSize; /*!< Font size for y axis title. */ // flags bool m_bDrawXAxisTickMarks; /*!< Flag for enabling/disabling the drawing of x axis tick marks. */ bool m_bDrawYAxisTickMarks; /*!< Flag for enabling/disabling the drawing of y axis tick marks. */ bool m_bDrawXAxisGridLines; /*!< Flag for enabling/disabling the drawing of x axis grid lines. */ bool m_bDrawYAxisGridLines; /*!< Flag for enabling/disabling the drawing of y axis grid lines. */ bool m_bDrawGridLinesDashed; /*!< Flag for enabling/disabling the drawing dashed grid lines. */ bool m_bDrawLegend; /*!< Flag for enabling/disabling the drawing of the legend. */ bool m_bFlipY; /*!< Flag for flipping the Y axis. */ // output std::ofstream outputFile; /*!< The output stream. */ // data storage std::vector m_dataSets; /*!< Storage for the datasets, implemented as a vector. */ }; inline void CPlot2D::SetTitle(std::string title) { m_strPlotTitle=title; } inline void CPlot2D::SetXTotalSize(double value) { m_dXTotalSize=value; } inline void CPlot2D::SetYTotalSize(double value) { m_dYTotalSize=value; } inline void CPlot2D::SetXAxisSize(double value) { m_dXAxisSize=value; } inline void CPlot2D::SetYAxisSize(double value) { m_dYAxisSize=value; } inline void CPlot2D::SetBottomFrameSize(double value) { m_dBottomFrameSize=value; } inline void CPlot2D::SetRightFrameSize(double value) { m_dRightFrameSize=value; } inline void CPlot2D::SetTopFrameSize(double value) { m_dTopFrameSize=value; } inline void CPlot2D::SetLeftFrameSize(double value) { m_dLeftFrameSize=value; } inline double CPlot2D::GetXTotalSize() { return (m_dXTotalSize); } inline double CPlot2D::GetYTotalSize() { return (m_dYTotalSize); } inline double CPlot2D::GetXAxisSize() { return (m_dXAxisSize); } inline double CPlot2D::GetYAxisSize() { return (m_dYAxisSize); } inline double CPlot2D::GetBottomFrameSize() { return (m_dBottomFrameSize); } inline double CPlot2D::GetRightFrameSize() { return (m_dRightFrameSize); } inline double CPlot2D::GetTopFrameSize() { return (m_dTopFrameSize); } inline double CPlot2D::GetLeftFrameSize() { return (m_dLeftFrameSize); } inline void CPlot2D::SetFrameLineWidth(double value) { m_dFrameLineWidth=value; } inline double CPlot2D::GetFrameLineWidth() { return (m_dFrameLineWidth); } inline void CPlot2D::SetGridLineWidth(double value) { m_dGridLineWidth=value; } inline double CPlot2D::GetGridLineWidth() { return (m_dGridLineWidth); } inline void CPlot2D::SetFrameColor(double r, double g, double b) { m_dFrameColor[0]=r; m_dFrameColor[1]=g; m_dFrameColor[2]=b; } inline void CPlot2D::GetFrameColor(double *r, double *g, double *b) { *r=m_dFrameColor[0]; *g=m_dFrameColor[1]; *b=m_dFrameColor[2]; } inline void CPlot2D::SetGridColor(double r, double g, double b) { m_dGridColor[0]=r; m_dGridColor[1]=g; m_dGridColor[2]=b; } inline void CPlot2D::GetGridColor(double *r, double *g, double *b) { *r=m_dGridColor[0]; *g=m_dGridColor[1]; *b=m_dGridColor[2]; } inline void CPlot2D::AddDataSet(CDataSet dataSet) { m_dataSets.push_back(dataSet); } inline void CPlot2D::SetXAxisNumbersSpacing(double spacing) { m_dXAxisNumbersSpacing=spacing; } inline void CPlot2D::SetYAxisNumbersSpacing(double spacing) { m_dYAxisNumbersSpacing=spacing; } inline void CPlot2D::SetDrawXAxisTickMarks(bool flag) { m_bDrawXAxisTickMarks=flag; } inline void CPlot2D::SetDrawYAxisTickMarks(bool flag) { m_bDrawYAxisTickMarks=flag; } inline void CPlot2D::SetXAxisNumberOfTicks(int number) { m_iXAxisNumberOfTicks=number; } inline void CPlot2D::SetYAxisNumberOfTicks(int number) { m_iYAxisNumberOfTicks=number; } inline void CPlot2D::SetDrawXAxisGridLines(bool flag) { m_bDrawXAxisGridLines=flag; } inline void CPlot2D::SetDrawYAxisGridLines(bool flag) { m_bDrawYAxisGridLines=flag; } inline void CPlot2D::SetDrawGridLinesDashed(bool flag) { m_bDrawGridLinesDashed=flag; } inline double CPlot2D::GetXAxisNumbersSpacing() { return (m_dXAxisNumbersSpacing); } inline double CPlot2D::GetYAxisNumbersSpacing() { return (m_dYAxisNumbersSpacing); } inline bool CPlot2D::GetDrawXAxisTickMarks() { return (m_bDrawXAxisTickMarks); } inline bool CPlot2D::GetDrawYAxisTickMarks() { return (m_bDrawYAxisTickMarks); } inline int CPlot2D::GetXAxisNumberOfTicks() { return (m_iXAxisNumberOfTicks); } inline int CPlot2D::GetYAxisNumberOfTicks() { return (m_iYAxisNumberOfTicks); } inline bool CPlot2D::GetDrawXAxisGridLines() { return (m_bDrawXAxisGridLines); } inline bool CPlot2D::GetDrawYAxisGridLines() { return (m_bDrawYAxisGridLines); } inline bool CPlot2D::GetDrawGridLinesDashed() { return (m_bDrawGridLinesDashed); } inline void CPlot2D::SetXAxisLabelFont(std::string font) { m_strXAxisLabelFont=font; } inline std::string CPlot2D::GetXAxisLabelFont() { return (m_strXAxisLabelFont); } inline void CPlot2D::SetXAxisLabelFontSize(double value) { m_dXAxisLabelFontSize=value; } inline double CPlot2D::GetXAxisLabelFontSize() { return (m_dXAxisLabelFontSize); } inline void CPlot2D::SetYAxisLabelFont(std::string font) { m_strYAxisLabelFont=font; } inline std::string CPlot2D::GetYAxisLabelFont() { return (m_strYAxisLabelFont); } inline void CPlot2D::SetYAxisLabelFontSize(double value) { m_dYAxisLabelFontSize=value; } inline double CPlot2D::GetYAxisLabelFontSize() { return (m_dYAxisLabelFontSize); } inline void CPlot2D::SetXAxisTitleFont(std::string font) { m_strXAxisTitleFont=font; } inline std::string CPlot2D::GetXAxisTitleFont() { return (m_strXAxisTitleFont); } inline void CPlot2D::SetXAxisTitleFontSize(double value) { m_dXAxisTitleFontSize=value; } inline double CPlot2D::GetXAxisTitleFontSize() { return (m_dXAxisTitleFontSize); } inline void CPlot2D::SetYAxisTitleFont(std::string font) { m_strYAxisTitleFont=font; } inline std::string CPlot2D::GetYAxisTitleFont() { return (m_strYAxisTitleFont); } inline void CPlot2D::SetYAxisTitleFontSize(double value) { m_dYAxisTitleFontSize=value; } inline double CPlot2D::GetYAxisTitleFontSize() { return (m_dYAxisTitleFontSize); } inline void CPlot2D::SetXAxisTitle(std::string title) { m_strXAxisTitle=title; } inline std::string CPlot2D::GetXAxisTitle() { return (m_strXAxisTitle); } inline void CPlot2D::SetYAxisTitle(std::string title) { m_strYAxisTitle=title; } inline std::string CPlot2D::GetYAxisTitle() { return (m_strYAxisTitle); } inline void CPlot2D::SetXAxisTitleColor(double r, double g, double b) { m_dXAxisTitleColor[0]=r; m_dXAxisTitleColor[1]=g; m_dXAxisTitleColor[2]=b; } inline void CPlot2D::GetXAxisTitleColor(double *r, double *g, double *b) { *r=m_dXAxisTitleColor[0]; *g=m_dXAxisTitleColor[1]; *b=m_dXAxisTitleColor[2]; } inline void CPlot2D::SetYAxisTitleColor(double r, double g, double b) { m_dYAxisTitleColor[0]=r; m_dYAxisTitleColor[1]=g; m_dYAxisTitleColor[2]=b; } inline void CPlot2D::GetYAxisTitleColor(double *r, double *g, double *b) { *r=m_dYAxisTitleColor[0]; *g=m_dYAxisTitleColor[1]; *b=m_dYAxisTitleColor[2]; } inline void CPlot2D::SetXAxisLabelColor(double r, double g, double b) { m_dXAxisLabelColor[0]=r; m_dXAxisLabelColor[1]=g; m_dXAxisLabelColor[2]=b; } inline void CPlot2D::GetXAxisLabelColor(double *r, double *g, double *b) { *r=m_dXAxisLabelColor[0]; *g=m_dXAxisLabelColor[1]; *b=m_dXAxisLabelColor[2]; } inline void CPlot2D::SetYAxisLabelColor(double r, double g, double b) { m_dYAxisLabelColor[0]=r; m_dYAxisLabelColor[1]=g; m_dYAxisLabelColor[2]=b; } inline void CPlot2D::GetYAxisLabelColor(double *r, double *g, double *b) { *r=m_dYAxisLabelColor[0]; *g=m_dYAxisLabelColor[1]; *b=m_dYAxisLabelColor[2]; } inline void CPlot2D::SetDrawLegend(bool flag) { m_bDrawLegend=flag; } inline bool CPlot2D::GetDrawLegend() { return (m_bDrawLegend); } inline void CPlot2D::SetFlipY(bool flag) { m_bFlipY=flag; } inline bool CPlot2D::GetFlipY() { return (m_bFlipY); } #endif /* defined(__CPlot2D__) */ relion-3.1.3/src/Healpix_2.15a/000077500000000000000000000000001411340063500157725ustar00rootroot00000000000000relion-3.1.3/src/Healpix_2.15a/NOTES000066400000000000000000000024661411340063500166150ustar00rootroot00000000000000#Downloaded Healpix_2.15a from http://sourceforge.net/projects/healpix/?showfeed=everything #Extracted tarball in: ~/app/Healpix_2.15a #Then copied relevant parts of it: cd $ELMOHOME/external/Healpix_2.15a/ cp -r ~/app/Healpix_2.15a/src/cxx/cxxsupport . cp -r ~/app/Healpix_2.15a/src/cxx/Healpix_cxx . cp -r ~/app/Healpix_2.15a/src/cxx/libfftpack . # Then changed all the header to simplify my Makefile.am: ./change_includes.csh WHICH IS: #!/bin/csh -f foreach pat (*/*.h) set head=`echo $pat | awk -F"/" '{print $NF}'` echo $pat foreach cfile (*/*.h */*.cc) sed "s|${head}|external/Healpix_2.15a/${pat}|" <${cfile} > t; mv -f t ${cfile} end end # Then move xyf2nest, nest2xyf, xyf2ring and ring2xyf in # Healpix_cxx/healpix_base.h from protected to public # Then added header and sources to the Makefile.am: # remove stuff to do with fits cd $ELMOHOME ls external/Healpix_2.15a/*/*.h |awk '{print $NF, "\\"}' |grep -v "fits" |grep -v "simparam" ls external/Healpix_2.15a/*/*.cc |awk '{print $NF, "\\"}' |grep -v "fits" |grep -v "simparam" #not fitshandle # In the end only keep minimum of the library and store all in this directory: arr.h cxxutils.cc cxxutils.h datatypes.h geom_utils.h healpix_base.cc healpix_base.h lsconstants.h message_error.h openmp_support.h pointing.h vec3.h relion-3.1.3/src/Healpix_2.15a/arr.h000066400000000000000000000350221411340063500167310ustar00rootroot00000000000000/* * This file is part of Healpix_cxx. * * Healpix_cxx is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * Healpix_cxx is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with Healpix_cxx; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * * For more information about HEALPix, see http://healpix.jpl.nasa.gov */ /* * Healpix_cxx is being developed at the Max-Planck-Institut fuer Astrophysik * and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt * (DLR). */ /*! \file src/Healpix_2.15a/cxxsupport/arr.h * Various high-performance array classes used by the Planck LevelS package. * * Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007 Max-Planck-Society * \author Martin Reinecke */ #ifndef PLANCK_ARR_H #define PLANCK_ARR_H #include "src/Healpix_2.15a/cxxutils.h" #include /*! \defgroup arraygroup Array classes */ /*! \{ */ /*! An array whose size is known at compile time. Very useful for storing small arrays on the stack, without need for \a new and \a delete(). */ template class fix_arr { private: T d[sz]; public: /*! Returns the size of the array. */ long size() const { return sz; } /*! Returns a reference to element \a #n */ template T &operator[] (T2 n) {return d[n];} /*! Returns a constant reference to element \a #n */ template const T &operator[] (T2 n) const {return d[n];} }; /*! One-dimensional array type. */ template class arr { private: long s; T *d; bool own; #if defined(PLANCK_CHECKS) void check_range(long n) const { if ((n<0) || (n>=s)) throw Message_error ("arr: index "+dataToString(n)+" is out of range. Max index is " +dataToString(s-1)); } #endif void reset() { s=0; d=0; own=true; } public: /*! Creates a zero-sized array. */ arr() : s(0), d(0), own(true) {} /*! Creates an array with \a sz entries. */ arr(long sz) : s(sz), d (s>0 ? new T[s] : 0), own(true) {} /*! Creates an array with \a sz entries, and initializes them with \a inival. */ arr(long sz, const T &inival) : s(sz), d (s>0 ? new T[s] : 0), own(true) { fill(inival); } /*! Creates an array with \a sz entries, which uses the memory pointed to by \a ptr. \note \a ptr will not be deallocated by the destructor. \warning Only use this if you REALLY know what you are doing. In particular, this is only safely usable if

  • \a T is a POD type
  • \a ptr survives during the lifetime of the array object
  • \a ptr is not subject to garbage collection
Other restrictions may apply. You have been warned. */ arr (T *ptr, long sz): s(sz), d(ptr), own(false) {} /*! Creates an array which is a copy of \a orig. The data in \a orig is duplicated. */ arr (const arr &orig): s(orig.s), d (s>0 ? new T[s] : 0), own(true) { for (long m=0; m0 ? new T[sz] : 0; own = true; } /*! Deallocates the memory held by the array, and sets the array size to 0. */ void dealloc () {if (own) delete[] d; reset();} /*! Writes \a val into every element of the array. */ void fill (const T &val) { for (long m=0; m T &operator[] (T2 n) {check_range(n); return d[n];} template const T &operator[] (T2 n) const {check_range(n); return d[n];} #else /*! Returns a reference to element \a #n */ template T &operator[] (T2 n) {return d[n];} /*! Returns a constant reference to element \a #n */ template const T &operator[] (T2 n) const {return d[n];} #endif T *begin() { return d; } T *end() { return d+s; } /*! Sorts the elements in the array, in ascending order. */ void sort() { std::sort (d,d+s); } /*! Returns the minimum and maximum entry in \a minv and \a maxv, respectively. Throws an exception if the array is zero-sized. */ void minmax (T &minv, T &maxv) const { planck_assert(s>0,"trying to find min and max of a zero-sized array"); minv=maxv=d[0]; for (int m=1; mmaxv) maxv=d[m]; } } /*! Assigns the contents and size of \a other to the array. On exit, \a other is yero-sized. */ void transfer (arr &other) { if (own) delete[] d; d=other.d; s=other.s; own=other.own; other.reset(); } /*! Swaps contents and size with \a other. */ void swap (arr &other) { std::swap(d,other.d); std::swap(s,other.s); std::swap(own,other.own);} }; /*! Two-dimensional array type. The storage ordering is the same as in C. An entry is located by address arithmetic, not by double dereferencing. The indices start at zero. */ template class arr2 { private: long s1, s2; arr d; #if defined (PLANCK_CHECKS) void check_range(long n) const { if ((n<0) || (n>=s1)) throw Message_error ("arr2: index "+dataToString(n)+" is out of range. Max index is " +dataToString(s1-1)); } #endif public: /*! Creates a zero-sized array. */ arr2() : s1(0), s2(0) {} /*! Creates an array with the dimensions \a sz1 and \a sz2. */ arr2(long sz1, long sz2) : s1(sz1), s2(sz2), d(s1*s2) {} /*! Creates the array as a copy of \a orig. */ arr2(const arr2 &orig) : s1(orig.s1), s2(orig.s2), d(orig.d) {} /*! Frees the memory associated with the array. */ ~arr2() {} /*! Returns the first array dimension. */ long size1() const { return s1; } /*! Returns the second array dimension. */ long size2() const { return s2; } /*! Returns the total array size, i.e. the product of both dimensions. */ long size () const { return s1*s2; } /*! Allocates space for an array with \a sz1*sz2 elements. The content of the array is undefined on exit. \a sz1 or \a sz2 can be 0. If \a sz1*sz2 is the same as the currently allocated space, no reallocation is performed. */ void alloc (long sz1, long sz2) { if (sz1*sz2 != d.size()) d.alloc(sz1*sz2); s1=sz1; s2=sz2; } /*! Allocates space for an array with \a sz1*sz2 elements. The content of the array is undefined on exit. \a sz1 or \a sz2 can be 0. If \a sz1*sz2 is smaller than the currently allocated space, no reallocation is performed. */ void fast_alloc (long sz1, long sz2) { if (sz1*sz2<=d.size()) { s1=sz1; s2=sz2; } else alloc(sz1,sz2); } /*! Deallocates the space and makes the array zero-sized. */ void dealloc () {d.dealloc(); s1=0; s2=0;} /*! Sets all array elements to \a val. */ void fill (const T &val) { d.fill(val); } /*! Changes the array to be a copy of \a orig. */ arr2 &operator= (const arr2 &orig) { if (this==&orig) return *this; alloc (orig.s1, orig.s2); d = orig.d; return *this; } #if defined (PLANCK_CHECKS) template T *operator[] (T2 n) {check_range(n);return &d[n*s2];} template const T *operator[] (T2 n) const {check_range(n);return &d[n*s2];} #else /*! Returns a pointer to the beginning of slice \a #n. */ template T *operator[] (T2 n) {return &d[n*s2];} /*! Returns a constant pointer to the beginning of slice \a #n. */ template const T *operator[] (T2 n) const {return &d[n*s2];} #endif /*! Returns the minimum and maximum entry in \a minv and \a maxv, respectively. Throws an exception if the array is zero-sized. */ void minmax (T &minv, T &maxv) const { planck_assert(s1*s2>0, "trying to find min and max of a zero-sized array"); minv=maxv=d[0]; for (int m=1; mmaxv) maxv=d[m]; } } /*! Swaps contents and sizes with \a other. */ void swap (arr2 &other) { d.swap(other.d); std::swap(s1,other.s1); std::swap(s2,other.s2); } }; /*! Two-dimensional array type. An entry is located by double dereferencing, i.e. via an array of pointers. The indices start at zero. */ template class arr2b { private: long s1, s2; arr d; arr d1; #if defined (PLANCK_CHECKS) void check_range(long n) const { if ((n<0) || (n>=s1)) throw Message_error ("arr: index "+dataToString(n)+" is out of range. Max index is " +dataToString(s1-1)); } #endif void fill_d1() { for (long m=0; m T *operator[] (T2 n) {check_range(n); return d1[n];} template const T *operator[] (T2 n) const {check_range(n); return d1[n];} #else /*! Returns a pointer to the beginning of slice \a #n. */ template T *operator[] (T2 n) {return d1[n];} /*! Returns a constant pointer to the beginning of slice \a #n. */ template const T *operator[] (T2 n) const {return d1[n];} #endif /*! Returns a pointer to the beginning of the pointer array. */ T **p0() {return &d1[0];} }; /*! Three-dimensional array type. The storage ordering is the same as in C. An entry is located by address arithmetic, not by multiple dereferencing. The indices start at zero. */ template class arr3 { private: long s1, s2, s3, s2s3; arr d; public: /*! Creates a zero-sized array. */ arr3() : s1(0), s2(0), s3(0), s2s3(0), d(0) {} /*! Creates an array with the dimensions \a sz1, \a sz2 and \a sz3. */ arr3(long sz1, long sz2, long sz3) : s1(sz1), s2(sz2), s3(sz3), s2s3(s2*s3), d(s1*s2*s3) {} /*! Creates the array as a copy of \a orig. */ arr3(const arr3 &orig) : s1(orig.s1), s2(orig.s2), s3(orig.s3), s2s3(orig.s2s3), d(orig.d) {} /*! Frees the memory associated with the array. */ ~arr3() {} /*! Returns the first array dimension. */ long size1() const { return s1; } /*! Returns the second array dimension. */ long size2() const { return s2; } /*! Returns the third array dimension. */ long size3() const { return s3; } /*! Returns the total array size, i.e. the product of all dimensions. */ long size () const { return s1*s2*s3; } /*! Allocates space for an array with \a sz1*sz2*sz3 elements. The content of the array is undefined on exit. */ void alloc (long sz1, long sz2, long sz3) { d.alloc(sz1*sz2*sz3); s1=sz1; s2=sz2; s3=sz3; s2s3=s2*s3; } /*! Deallocates the space and makes the array zero-sized. */ void dealloc () {d.dealloc(); s1=0; s2=0; s3=0; s2s3=0;} /*! Sets all array elements to \a val. */ void fill (const T &val) { d.fill(val); } /*! Changes the array to be a copy of \a orig. */ arr3 &operator= (const arr3 &orig) { if (this==&orig) return *this; alloc (orig.s1, orig.s2, orig.s3); d = orig.d; return *this; } /*! Returns a reference to the element with the indices \a n1, \a n2 and \a n3. */ template T &operator() (T2 n1, T2 n2, T2 n3) {return d[n1*s2s3 + n2*s3 + n3];} /*! Returns a constant reference to the element with the indices \a n1, \a n2 and \a n3. */ template const T &operator() (T2 n1, T2 n2, T2 n3) const {return d[n1*s2s3 + n2*s3 + n3];} /*! Swaps contents and sizes with \a other. */ void swap (arr3 &other) { d.swap(other.d); std::swap(s1,other.s1); std::swap(s2,other.s2); std::swap(s3,other.s3); std::swap(s2s3,other.s2s3); } }; /*! \} */ #endif relion-3.1.3/src/Healpix_2.15a/cxxutils.cc000066400000000000000000000211771411340063500201740ustar00rootroot00000000000000/* * This file is part of Healpix_cxx. * * Healpix_cxx is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * Healpix_cxx is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with Healpix_cxx; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * * For more information about HEALPix, see http://healpix.jpl.nasa.gov */ /* * Healpix_cxx is being developed at the Max-Planck-Institut fuer Astrophysik * and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt * (DLR). */ /* * This file contains the implementation of various convenience functions * used by the Planck LevelS package. * * Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007 Max-Planck-Society * Authors: Martin Reinecke, Reinhard Hell */ // if we are using g++, check for version 3.0 or higher #ifdef __GNUC__ #if (__GNUC__<3) #error your C++ compiler is too old. g++ version 3.0 or higher is required. #endif #endif #include #include #include #include #include #include #include #include "src/Healpix_2.15a/cxxutils.h" #include "src/Healpix_2.15a/datatypes.h" #include "src/Healpix_2.15a/openmp_support.h" using namespace std; bool file_present (const string &filename) { ifstream dummy(filename.c_str()); //JV return (bool) dummy; } void assert_present (const string &filename) { if (file_present(filename)) return; throw Message_error ("Error: file " + filename + " does not exist!"); } void assert_not_present (const string &filename) { if (!file_present(filename)) return; throw Message_error ("Error: file " + filename + " already exists!"); } void remove_file (const string &filename) { remove (filename.c_str()); } string trim (const string &orig) { string::size_type p1=orig.find_first_not_of(" \t"); if (p1==string::npos) return ""; string::size_type p2=orig.find_last_not_of(" \t"); return orig.substr(p1,p2-p1+1); } template string dataToString (const T &x) { ostringstream strstrm; strstrm << x; return trim(strstrm.str()); } template<> string dataToString (const bool &x) { return x ? "T" : "F"; } template<> string dataToString (const string &x) { return trim(x); } template<> string dataToString (const float &x) { ostringstream strstrm; strstrm << setprecision(8) << x; return trim(strstrm.str()); } template<> string dataToString (const double &x) { ostringstream strstrm; strstrm << setprecision(16) << x; return trim(strstrm.str()); } template string dataToString (const signed char &x); template string dataToString (const unsigned char &x); template string dataToString (const short &x); template string dataToString (const unsigned short &x); template string dataToString (const int &x); template string dataToString (const unsigned int &x); template string dataToString (const long &x); template string dataToString (const unsigned long &x); template string dataToString (const long long &x); template string dataToString (const unsigned long long &x); string intToString(int x, int width) { ostringstream strstrm; strstrm << setw(width) << setfill('0') << x; return trim(strstrm.str()); } template void stringToData (const string &x, T &value) { string error = string("conversion error in stringToData<") + type2typename() +">(\""+x+"\")"; istringstream strstrm(x); strstrm >> value; if (!strstrm) throw Message_error(error); string rest; strstrm >> rest; // rest=trim(rest); if (rest.length()>0) throw Message_error(error); } template<> void stringToData (const string &x, string &value) { value = trim(x); } template<> void stringToData (const string &x, bool &value) { if ( x=="F" || x=="f" || x=="n" || x=="N" || x=="false" || x==".false." || x=="FALSE" || x==".FALSE.") value=false; else if (x=="T" || x=="t" || x=="y" || x=="Y" || x=="true" || x==".true." || x=="TRUE" || x==".TRUE.") value=true; else { string error = string("conversion error in stringToData(\"")+x+"\")"; throw Message_error (error); } } template void stringToData (const string &x, signed char &value); template void stringToData (const string &x, unsigned char &value); template void stringToData (const string &x, short &value); template void stringToData (const string &x, unsigned short &value); template void stringToData (const string &x, int &value); template void stringToData (const string &x, unsigned int &value); template void stringToData (const string &x, long &value); template void stringToData (const string &x, unsigned long &value); template void stringToData (const string &x, long long &value); template void stringToData (const string &x, unsigned long long &value); template void stringToData (const string &x, float &value); template void stringToData (const string &x, double &value); bool equal_nocase (const string &a, const string &b) { if (a.size()!=b.size()) return false; for (unsigned int m=0; mlastpercent) cout << "\r " << setw(3) << nowpercent << "% done\r" << flush; } void end_announce_progress () { cout << endl; } #endif static void openmp_status() { if (openmp_enabled()) { cout << "Application was compiled with OpenMP support," << endl; if (openmp_max_threads() == 1) cout << "but running with one process only." << endl; else cout << "running with up to " << openmp_max_threads() << " processes." << endl; } else cout << "Application was compiled without OpenMP support;" << endl << "running in scalar mode." << endl; } void announce (const string &name) { cout << endl << "+-"; for (unsigned int m=0; m &dict) { int lineno=0; dict.clear(); ifstream inp(filename.c_str()); //JV planck_assert ((bool)inp,"Could not open parameter file "+filename); while (inp) { string line; getline(inp, line); ++lineno; // remove potential carriage returns at the end of the line line=line.substr(0,line.find_first_of("\r")); line=line.substr(0,line.find_first_of("#")); line=trim(line); if (line.size()>0) { string::size_type eqpos=line.find("="); if (eqpos!=string::npos) { string key=trim(line.substr(0,eqpos)), value=trim(line.substr(eqpos+1,string::npos)); if (key=="") cerr << "Warning: empty key in " << filename << ", line " << lineno << endl; else { if (dict.find(key)!=dict.end()) cerr << "Warning: key " << key << " multiply defined in " << filename << ", line " << lineno << endl; dict[key]=value; } } else cerr << "Warning: unrecognized format in " << filename << ", line " << lineno << ":\n" << line << endl; } } } relion-3.1.3/src/Healpix_2.15a/cxxutils.h000066400000000000000000000226341411340063500200350ustar00rootroot00000000000000/* * This file is part of Healpix_cxx. * * Healpix_cxx is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * Healpix_cxx is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with Healpix_cxx; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * * For more information about HEALPix, see http://healpix.jpl.nasa.gov */ /* * Healpix_cxx is being developed at the Max-Planck-Institut fuer Astrophysik * and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt * (DLR). */ /*! \file src/Healpix_2.15a/cxxsupport/cxxutils.h * Various convenience functions used by the Planck LevelS package. * * Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007 Max-Planck-Society * \author Martin Reinecke \author Reinhard Hell */ #ifndef PLANCK_CXXUTILS_H #define PLANCK_CXXUTILS_H #include #include #include #include #include "src/Healpix_2.15a/message_error.h" #include "src/Healpix_2.15a/lsconstants.h" /*! \defgroup mathutilsgroup Mathematical helper functions */ /*! \{ */ //! Returns \e true if | \a a-b | < \a epsilon * | \a b |, else \e false. template inline bool approx (F a, F b, F epsilon=1e-5) { using namespace std; return abs(a-b) < (epsilon*abs(b)); } //! Returns \e true if | \a a-b | < \a epsilon, else \e false. template inline bool abs_approx (F a, F b, F epsilon=1e-5) { using namespace std; return abs(a-b) < epsilon; } //! Returns the largest integer which is smaller than (or equal to) \a arg. template inline I ifloor (F arg) { return (arg>=0) ? I(arg) : I(arg)-1; } //! Returns the integer which is nearest to \a arg. template inline I nearest (F arg) { arg += 0.5; return (arg>=0) ? I(arg) : I(arg)-1; } //! Returns \a v1+v2 if \a v1<0, \a v1-v2 if \a v1>=v2, else \a v1. /*! \a v1 can be positive or negative; \a v2 must be positive. */ template inline T weak_modulo (T v1, T v2) { return (v1>=0) ? ((v1=0) ? ((v1 inline I imodulo (I v1, I v2) { return (v1>=0) ? ((v1 inline T sign (const T& signvalue) { return (signvalue>=0) ? 1 : -1; } //! Returns the integer \a n, which fulfills \a n*n<=arg<(n+1)*(n+1). template inline unsigned int isqrt (I arg) { using namespace std; if (sizeof(I)<=4) return unsigned (sqrt(arg+0.5)); else { long double arg2 = arg; return unsigned (sqrt(arg2+0.5)); } } //! Returns the largest integer \a n that fulfills \a 2^n<=arg. template inline unsigned int ilog2 (I arg) { unsigned int res=0; while (arg > 0x0000FFFF) { res+=16; arg>>=16; } if (arg > 0x000000FF) { res|=8; arg>>=8; } if (arg > 0x0000000F) { res|=4; arg>>=4; } if (arg > 0x00000003) { res|=2; arg>>=2; } if (arg > 0x00000001) { res|=1; } return res; } //! Returns \a atan2(y,x) if \a x!=0 or \a y!=0; else returns 0. inline double safe_atan2 (double y, double x) { using namespace std; return ((x==0.) && (y==0.)) ? 0.0 : atan2(y,x); } //! Returns an index to the left of two interpolation values. /*! \a begin points to an array containing a sequence of values sorted in ascending order. The length of the array is \a len. If \a val is lower than the first element, 0 is returned. If \a val is higher than the last element, \a len-2 is returned. Else, the index of the largest element smaller than \a val is returned. */ template inline int interpol_left (const T *begin, int len, const T &val) { const T *end = begin+len; const T *iter = std::lower_bound (begin, end, val); if (iter==begin) return 0; if (iter==end) return len-2; return (iter-begin)-1; } //! Returns an index to the nearest interpolation value. /*! \a begin points to an array containing a sequence of values sorted in ascending order. The length of the array is \a len. If \a val is lower than the first element, 0 is returned. If \a val is higher than the last element, \a len-1 is returned. Else, the index of the nearest element within the sequence of values is returned. */ template inline int interpol_nearest (const T *begin, int len, const T &val) { int left = interpol_left(begin, len, val); T delleft = val-(*(begin+left)); T delright = (*(begin+left+1))-val; if (delright<0) return left+1; return (delright std::string dataToString(const T &x); template<> std::string dataToString (const bool &x); template<> std::string dataToString (const std::string &x); template<> std::string dataToString (const float &x); template<> std::string dataToString (const double &x); /*! Returns a string containing the text representation of \a x, padded with leading zeroes to \a width characters. */ std::string intToString(int x, int width); //! Reads a value of a given datatype from a string template void stringToData (const std::string &x, T &value); template<> void stringToData (const std::string &x, std::string &value); template<> void stringToData (const std::string &x, bool &value); //! Reads a value of a given datatype from a string template inline T stringToData (const std::string &x) { T result; stringToData(x,result); return result; } //! Parses the file \a filename and returns the key/value pairs in \a dict. void parse_file (const std::string &filename, std::map &dict); //! Case-insensitive string comparison /*! Returns \a true, if \a a and \a b differ only in capitalisation, else \a false. */ bool equal_nocase (const std::string &a, const std::string &b); //! Returns lowercase version of \a input. std::string tolower(const std::string &input); /*! \} */ //! Indicates progress by printing the percentage of \a now/total. /*! A message is only printed if it has changed since \a now-1/total. The output is followed by a carriage return, not a newline. */ void announce_progress (int now, int total); //! Indicates progress by printing the percentage of \a now/total. /*! A message is only printed if it has changed since \a last/total. The output is followed by a carriage return, not a newline. */ void announce_progress (double now, double last, double total); /*! This function should be called after a sequence of announce_progress() calls has finished. */ void end_announce_progress (); //! Prints a banner containing \a name. Useful for displaying program names. void announce (const std::string &name); /*! Prints a banner containing \a name and checks if \a argc==argc_expected. If not, a usage description is given and the program is terminated. */ void module_startup (const std::string &name, int argc, const char **argv, int argc_expected, const std::string &argv_expected); //! Returns an appropriate FITS repetition count for a map with \a npix pixels. inline unsigned int healpix_repcount (int npix) { if (npix<1024) return 1; else if ((npix%1024)==0) return 1024; else return isqrt (npix/12); } #endif relion-3.1.3/src/Healpix_2.15a/datatypes.h000066400000000000000000000162151411340063500201460ustar00rootroot00000000000000/* * This file is part of Healpix_cxx. * * Healpix_cxx is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * Healpix_cxx is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with Healpix_cxx; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * * For more information about HEALPix, see http://healpix.jpl.nasa.gov */ /* * Healpix_cxx is being developed at the Max-Planck-Institut fuer Astrophysik * and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt * (DLR). */ /* * This file defines various platform-independent data types. * If any of the requested types is not available, compilation aborts * with an error (unfortunately a rather obscure one). * * Copyright (C) 2004 Max-Planck-Society * Author: Martin Reinecke */ #ifndef PLANCK_DATATYPES_H #define PLANCK_DATATYPES_H #include #include "src/Healpix_2.15a/message_error.h" // Template magic to select the proper data types. These templates // should not be used outside this file. template struct sizeChooserHelper { typedef void TYPE; }; template struct sizeChooserHelper { typedef T TYPE; }; template struct sizeChooserHelper2 { typedef T1 TYPE; }; template struct sizeChooserHelper2 { typedef T2 TYPE; }; template struct sizeChooserHelper2 { typedef T3 TYPE; }; template <> struct sizeChooserHelper2 { }; template struct sizeChooser { typedef typename sizeChooserHelper2 ::TYPE, typename sizeChooserHelper::TYPE, typename sizeChooserHelper::TYPE >::TYPE TYPE; }; typedef signed char int8; typedef unsigned char uint8; typedef sizeChooser<2, short, int>::TYPE int16; typedef sizeChooser<2, unsigned short, unsigned int>::TYPE uint16; typedef sizeChooser<4, int, long, short>::TYPE int32; typedef sizeChooser<4, unsigned int, unsigned long, unsigned short>::TYPE uint32; typedef sizeChooser<8, long, long long>::TYPE int64; typedef sizeChooser<8, unsigned long, unsigned long long>::TYPE uint64; typedef sizeChooser<4, float, double>::TYPE float32; typedef sizeChooser<8, double, long double>::TYPE float64; // mapping of types to integer constants enum { PLANCK_INT8 = 0, PLANCK_UINT8 = 1, PLANCK_INT16 = 2, PLANCK_UINT16 = 3, PLANCK_INT32 = 4, PLANCK_UINT32 = 5, PLANCK_INT64 = 6, PLANCK_UINT64 = 7, PLANCK_FLOAT32 = 8, PLANCK_FLOAT64 = 9, PLANCK_BOOL = 10, PLANCK_STRING = 11 }; template struct typehelper {}; template<> struct typehelper { enum { id=PLANCK_INT8 }; }; template<> struct typehelper { enum { id=PLANCK_UINT8 }; }; template<> struct typehelper { enum { id=PLANCK_INT16 }; }; template<> struct typehelper { enum { id=PLANCK_UINT16 }; }; template<> struct typehelper { enum { id=PLANCK_INT32 }; }; template<> struct typehelper { enum { id=PLANCK_UINT32 }; }; template<> struct typehelper { enum { id=PLANCK_INT64 }; }; template<> struct typehelper { enum { id=PLANCK_UINT64 }; }; template<> struct typehelper { enum { id=PLANCK_FLOAT32 }; }; template<> struct typehelper { enum { id=PLANCK_FLOAT64 }; }; template<> struct typehelper { enum { id=PLANCK_BOOL }; }; template<> struct typehelper { enum { id=PLANCK_STRING }; }; inline int type2size (int type) { switch (type) { case PLANCK_INT8 : return 1; case PLANCK_UINT8 : return 1; case PLANCK_INT16 : return 2; case PLANCK_UINT16 : return 2; case PLANCK_INT32 : return 4; case PLANCK_UINT32 : return 4; case PLANCK_INT64 : return 8; case PLANCK_UINT64 : return 8; case PLANCK_FLOAT32: return 4; case PLANCK_FLOAT64: return 8; case PLANCK_BOOL : return 1; case PLANCK_STRING : return 1; default: throw Message_error ("unsupported data type"); } } inline int string2type(const std::string &type) { if (type=="FLOAT64") return PLANCK_FLOAT64; if (type=="FLOAT32") return PLANCK_FLOAT32; if (type=="INT8") return PLANCK_INT8; if (type=="UINT8") return PLANCK_UINT8; if (type=="INT16") return PLANCK_INT16; if (type=="UINT16") return PLANCK_UINT16; if (type=="INT32") return PLANCK_INT32; if (type=="UINT32") return PLANCK_UINT32; if (type=="INT64") return PLANCK_INT64; if (type=="UINT64") return PLANCK_UINT64; if (type=="BOOL") return PLANCK_BOOL; if (type=="STRING") return PLANCK_STRING; throw Message_error ("unknown data type "+type); } inline const char *type2string (int type) { switch (type) { case PLANCK_INT8 : return "INT8"; case PLANCK_UINT8 : return "UINT8"; case PLANCK_INT16 : return "INT16"; case PLANCK_UINT16 : return "UINT16"; case PLANCK_INT32 : return "INT32"; case PLANCK_UINT32 : return "UINT32"; case PLANCK_INT64 : return "INT64"; case PLANCK_UINT64 : return "UINT64"; case PLANCK_FLOAT32: return "FLOAT32"; case PLANCK_FLOAT64: return "FLOAT64"; case PLANCK_BOOL : return "BOOL"; case PLANCK_STRING : return "STRING"; default: throw Message_error ("unsupported data type"); } } template inline const char *type2typename () { return "unknown type"; } template<> inline const char *type2typename () { return "signed char"; } template<> inline const char *type2typename () { return "unsigned char"; } template<> inline const char *type2typename () { return "short"; } template<> inline const char *type2typename () { return "unsigned short"; } template<> inline const char *type2typename () { return "int"; } template<> inline const char *type2typename () { return "unsigned int"; } template<> inline const char *type2typename () { return "long"; } template<> inline const char *type2typename () { return "unsigned long"; } template<> inline const char *type2typename () { return "long long"; } template<> inline const char *type2typename () { return "unsigned long long"; } template<> inline const char *type2typename () { return "float"; } template<> inline const char *type2typename () { return "double"; } template<> inline const char *type2typename () { return "bool"; } template<> inline const char *type2typename () { return "std::string"; } #endif relion-3.1.3/src/Healpix_2.15a/geom_utils.h000066400000000000000000000043131411340063500203130ustar00rootroot00000000000000/* * This file is part of Healpix_cxx. * * Healpix_cxx is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * Healpix_cxx is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with Healpix_cxx; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * * For more information about HEALPix, see http://healpix.jpl.nasa.gov */ /* * Healpix_cxx is being developed at the Max-Planck-Institut fuer Astrophysik * and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt * (DLR). */ /*! \file src/Healpix_2.15a/cxxsupport/geom_utils.h * Geometric utility functions. * * Copyright (C) 2003, 2006 Max-Planck-Society * \author Martin Reinecke * \author Reinhard Hell */ #include "src/Healpix_2.15a/cxxutils.h" #include "src/Healpix_2.15a/vec3.h" /*! Returns the orientation when looking from point \a loc on the unit sphere in the direction \a dir. \a loc must be normalized. The result ranges from -pi to pi, is 0 for North and pi/2 for West, i.e. the angle is given in mathematically positive sense. If \a loc is the North or South pole, the returned angle is \a atan2(dir.y,dir.x). */ inline double orientation (const vec3 &loc, const vec3 &dir) { // FIXME: here is still optimization potential if (loc.x==0 && loc.y==0) { if (loc.z>0) return safe_atan2(dir.y,-dir.x); else return safe_atan2(dir.y,dir.x); } vec3 east (-loc.y, loc.x, 0); vec3 north = crossprod(loc,east); double y = dotprod(dir,east); double x = dotprod(dir,north); return safe_atan2(-y,x); } /*! Returns the angle between \a v1 and \a v2 in radians. */ inline double v_angle (const vec3 &v1, const vec3 &v2) { return atan2 (crossprod(v1,v2).Length(), dotprod(v1,v2)); } relion-3.1.3/src/Healpix_2.15a/healpix_base.cc000066400000000000000000000526061411340063500207360ustar00rootroot00000000000000/* * This file is part of Healpix_cxx. * * Healpix_cxx is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * Healpix_cxx is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with Healpix_cxx; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * * For more information about HEALPix, see http://healpix.jpl.nasa.gov */ /* * Healpix_cxx is being developed at the Max-Planck-Institut fuer Astrophysik * and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt * (DLR). */ /* * Copyright (C) 2003, 2004, 2005, 2006 Max-Planck-Society * Author: Martin Reinecke */ #include "src/Healpix_2.15a/healpix_base.h" #include "src/Healpix_2.15a/cxxutils.h" #include "src/Healpix_2.15a/pointing.h" #include "src/Healpix_2.15a/arr.h" #include "src/Healpix_2.15a/geom_utils.h" using namespace std; short Healpix_Base::ctab[]; short Healpix_Base::utab[]; const nside_dummy SET_NSIDE=nside_dummy(); Healpix_Base::Tablefiller::Tablefiller() { for (int m=0; m<0x100; ++m) { ctab[m] = (m&0x1 ) | ((m&0x2 ) << 7) | ((m&0x4 ) >> 1) | ((m&0x8 ) << 6) | ((m&0x10) >> 2) | ((m&0x20) << 5) | ((m&0x40) >> 3) | ((m&0x80) << 4); utab[m] = (m&0x1 ) | ((m&0x2 ) << 1) | ((m&0x4 ) << 2) | ((m&0x8 ) << 3) | ((m&0x10) << 4) | ((m&0x20) << 5) | ((m&0x40) << 6) | ((m&0x80) << 7); } } Healpix_Base::Tablefiller Healpix_Base::Filler; const int Healpix_Base::jrll[] = { 2,2,2,2,3,3,3,3,4,4,4,4 }; const int Healpix_Base::jpll[] = { 1,3,5,7,0,2,4,6,1,3,5,7 }; int Healpix_Base::npix2nside (int npix) { int res=isqrt(npix/12); planck_assert (npix==res*res*12, "npix2nside: invalid argument"); return res; } int Healpix_Base::ring_above (double z) const { double az=abs(z); if (az>twothird) // polar caps { int iring = int(nside_*sqrt(3*(1-az))); return (z>0) ? iring : 4*nside_-iring-1; } else // ----- equatorial region --------- return int(nside_*(2-1.5*z)); } void Healpix_Base::in_ring(int iz, double phi0, double dphi, vector &listir) const { int nr, ir, ipix1; double shift=0.5; if (iz(3*nside_)) // south pole { ir = 4*nside_ - iz; nr = ir*4; ipix1 = npix_ - 2*ir*(ir+1); // lowest pixel number in the ring } else // equatorial region { ir = iz - nside_ + 1; // within {1, 2*nside + 1} nr = nside_*4; if ((ir&1)==0) shift = 0; ipix1 = ncap_ + (ir-1)*nr; // lowest pixel number in the ring } int ipix2 = ipix1 + nr - 1; // highest pixel number in the ring // ----------- constructs the pixel list -------------- if (dphi > (pi-1e-7)) for (int i=ipix1; i<=ipix2; ++i) listir.push_back(i); else { int ip_lo = ifloor(nr*inv_twopi*(phi0-dphi) - shift)+1; int ip_hi = ifloor(nr*inv_twopi*(phi0+dphi) - shift); int pixnum = ip_lo+ipix1; if (pixnumipix2) pixnum -= nr; listir.push_back(pixnum); } } } void Healpix_Base::nest2xyf (int pix, int &ix, int &iy, int &face_num) const { face_num = pix>>(2*order_); pix &= (npface_-1); int raw = (pix&0x5555) | ((pix&0x55550000)>>15); ix = ctab[raw&0xff] | (ctab[raw>>8]<<4); pix >>= 1; raw = (pix&0x5555) | ((pix&0x55550000)>>15); iy = ctab[raw&0xff] | (ctab[raw>>8]<<4); } int Healpix_Base::xyf2nest (int ix, int iy, int face_num) const { return (face_num<<(2*order_)) + (utab[ix&0xff] | (utab[ix>>8]<<16) | (utab[iy&0xff]<<1) | (utab[iy>>8]<<17)); } void Healpix_Base::ring2xyf (int pix, int &ix, int &iy, int &face_num) const { int iring, iphi, kshift, nr; int nl2 = 2*nside_; if (pix=(2*iring)) { face_num=2; tmp-=2*iring; } if (tmp>=iring) ++face_num; } else if (pix<(npix_-ncap_)) // Equatorial region { int ip = pix - ncap_; if (order_>=0) { iring = (ip>>(order_+2)) + nside_; // counted from North pole iphi = (ip&(4*nside_-1)) + 1; } else { iring = (ip/(4*nside_)) + nside_; // counted from North pole iphi = (ip%(4*nside_)) + 1; } kshift = (iring+nside_)&1; nr = nside_; unsigned int ire = iring-nside_+1; unsigned int irm = nl2+2-ire; int ifm, ifp; if (order_>=0) { ifm = (iphi - ire/2 + nside_ -1) >> order_; ifp = (iphi - irm/2 + nside_ -1) >> order_; } else { ifm = (iphi - ire/2 + nside_ -1) / nside_; ifp = (iphi - irm/2 + nside_ -1) / nside_; } if (ifp == ifm) // faces 4 to 7 face_num = (ifp==4) ? 4 : ifp+4; else if (ifp=(2*nr)) { face_num=10; tmp-=2*nr; } if (tmp>=nr) ++face_num; } int irt = iring - (jrll[face_num]*nside_) + 1; int ipt = 2*iphi- jpll[face_num]*nr - kshift -1; if (ipt>=nl2) ipt-=8*nside_; ix = (ipt-irt) >>1; iy =(-(ipt+irt))>>1; } int Healpix_Base::xyf2ring (int ix, int iy, int face_num) const { int nl4 = 4*nside_; int jr = (jrll[face_num]*nside_) - ix - iy - 1; int nr, kshift, n_before; if (jr 3*nside_) { nr = nl4-jr; n_before = npix_ - 2*(nr+1)*nr; kshift = 0; } else { nr = nside_; n_before = ncap_ + (jr-nside_)*nl4; kshift = (jr-nside_)&1; } int jp = (jpll[face_num]*nr + ix - iy + 1 + kshift) / 2; if (jp>nl4) jp-=nl4; else if (jp<1) jp+=nl4; return n_before + jp - 1; } double Healpix_Base::ring2z (int ring) const { if (ring=0, "nest2ring: need hierarchical map"); int ix, iy, face_num; nest2xyf (pix, ix, iy, face_num); return xyf2ring (ix, iy, face_num); } int Healpix_Base::ring2nest (int pix) const { planck_assert(order_>=0, "ring2nest: need hierarchical map"); int ix, iy, face_num; ring2xyf (pix, ix, iy, face_num); return xyf2nest (ix, iy, face_num); } int Healpix_Base::nest2peano (int pix) const { static const unsigned char subpix[8][4] = { { 0, 1, 3, 2 }, { 3, 0, 2, 1 }, { 2, 3, 1, 0 }, { 1, 2, 0, 3 }, { 0, 3, 1, 2 }, { 1, 0, 2, 3 }, { 2, 1, 3, 0 }, { 3, 2, 0, 1 } }; const unsigned char subpath[8][4] = { { 4, 0, 6, 0 }, { 7, 5, 1, 1 }, { 2, 4, 2, 6 }, { 3, 3, 7, 5 }, { 0, 2, 4, 4 }, { 5, 1, 5, 3 }, { 6, 6, 0, 2 }, { 1, 7, 3, 7 } }; static const unsigned char face2path[12] = { 2, 5, 2, 5, 3, 6, 3, 6, 2, 3, 2, 3 }; static const unsigned char face2peanoface[12] = { 0, 5, 6, 11, 10, 1, 4, 7, 2, 3, 8, 9 }; int face = pix>>(2*order_); unsigned char path = face2path[face]; int result = 0; for (int shift=2*order_-2; shift>=0; shift-=2) { unsigned char spix = (pix>>shift) & 0x3; result <<= 2; result |= subpix[path][spix]; path=subpath[path][spix]; } return result + (int(face2peanoface[face])<<(2*order_)); } int Healpix_Base::peano2nest (int pix) const { static const unsigned char subpix[8][4] = { { 0, 1, 3, 2 }, { 1, 3, 2, 0 }, { 3, 2, 0, 1 }, { 2, 0, 1, 3 }, { 0, 2, 3, 1 }, { 1, 0, 2, 3 }, { 3, 1, 0, 2 }, { 2, 3, 1, 0 } }; static const unsigned char subpath[8][4] = { { 4, 0, 0, 6 }, { 5, 1, 1, 7 }, { 6, 2, 2, 4 }, { 7, 3, 3, 5 }, { 0, 4, 4, 2 }, { 1, 5, 5, 3 }, { 2, 6, 6, 0 }, { 3, 7, 7, 1 } }; static const unsigned char face2path[12] = { 2, 6, 2, 3, 3, 5, 2, 6, 2, 3, 3, 5 }; static const unsigned char peanoface2face[12] = { 0, 5, 8, 9, 6, 1, 2, 7, 10, 11, 4, 3 }; int face = pix>>(2*order_); unsigned char path = face2path[face]; int result = 0; for (int shift=2*order_-2; shift>=0; shift-=2) { unsigned char spix = (pix>>shift) & 0x3; result <<= 2; result |= subpix[path][spix]; path=subpath[path][spix]; } return result + (int(peanoface2face[face])<<(2*order_)); } int Healpix_Base::ang2pix_z_phi (double z, double phi) const { double za = abs(z); double tt = fmodulo(phi,twopi) * inv_halfpi; // in [0,4) if (scheme_==RING) { if (za<=twothird) // Equatorial region { double temp1 = nside_*(0.5+tt); double temp2 = nside_*z*0.75; int jp = int(temp1-temp2); // index of ascending edge line int jm = int(temp1+temp2); // index of descending edge line // ring number counted from z=2/3 int ir = nside_ + 1 + jp - jm; // in {1,2n+1} int kshift = 1-(ir&1); // kshift=1 if ir even, 0 otherwise int ip = (jp+jm-nside_+kshift+1)/2; // in {0,4n-1} ip = imodulo(ip,4*nside_); return ncap_ + (ir-1)*4*nside_ + ip; } else // North & South polar caps { double tp = tt-int(tt); double tmp = nside_*sqrt(3*(1-za)); int jp = int(tp*tmp); // increasing edge line index int jm = int((1.0-tp)*tmp); // decreasing edge line index int ir = jp+jm+1; // ring number counted from the closest pole int ip = int(tt*ir); // in {0,4*ir-1} ip = imodulo(ip,4*ir); if (z>0) return 2*ir*(ir-1) + ip; else return npix_ - 2*ir*(ir+1) + ip; } } else // scheme_ == NEST { int face_num, ix, iy; if (za<=twothird) // Equatorial region { double temp1 = nside_*(0.5+tt); double temp2 = nside_*(z*0.75); int jp = int(temp1-temp2); // index of ascending edge line int jm = int(temp1+temp2); // index of descending edge line int ifp = jp >> order_; // in {0,4} int ifm = jm >> order_; if (ifp == ifm) // faces 4 to 7 face_num = (ifp==4) ? 4: ifp+4; else if (ifp < ifm) // (half-)faces 0 to 3 face_num = ifp; else // (half-)faces 8 to 11 face_num = ifm + 8; ix = jm & (nside_-1); iy = nside_ - (jp & (nside_-1)) - 1; } else // polar region, za > 2/3 { int ntt = int(tt); if (ntt>=4) ntt=3; double tp = tt-ntt; double tmp = nside_*sqrt(3*(1-za)); int jp = int(tp*tmp); // increasing edge line index int jm = int((1.0-tp)*tmp); // decreasing edge line index if (jp>=nside_) jp = nside_-1; // for points too close to the boundary if (jm>=nside_) jm = nside_-1; if (z >= 0) { face_num = ntt; // in {0,3} ix = nside_ - jm - 1; iy = nside_ - jp - 1; } else { face_num = ntt + 8; // in {8,11} ix = jp; iy = jm; } } return xyf2nest(ix,iy,face_num); } } void Healpix_Base::pix2ang_z_phi (int pix, double &z, double &phi) const { if (scheme_==RING) { if (pix 3*nside_) { nr = nl4-jr; z = nr*nr*fact2_ - 1; kshift = 0; } else { nr = nside_; z = (2*nside_-jr)*fact1_; kshift = (jr-nside_)&1; } int jp = (jpll[face_num]*nr + ix -iy + 1 + kshift) / 2; if (jp>nl4) jp-=nl4; if (jp<1) jp+=nl4; phi = (jp-(kshift+1)*0.5)*(halfpi/nr); } } void Healpix_Base::query_disc (const pointing &ptg, double radius, vector& listpix) const { listpix.clear(); double dth1 = fact2_; double dth2 = fact1_; double cosang = cos(radius); double z0 = cos(ptg.theta); double xa = 1./sqrt((1-z0)*(1+z0)); double rlat1 = ptg.theta - radius; double zmax = cos(rlat1); int irmin = ring_above (zmax)+1; if (rlat1<=0) // north pole in the disc for (int m=1; m=0, "error in query_disc()"); double dphi=atan2(sqrt(ysq),x); in_ring (iz, ptg.phi, dphi, listpix); } if (rlat2>=pi) // south pole in the disc for (int m=irmax+1; m<(4*nside_); ++m) // rings completely in the disc in_ring (m, 0, pi, listpix); if (scheme_==NEST) for (unsigned int m=0; m2*nside_) ? 4*nside_-ring : ring; if (northring < nside_) { double tmp = northring*northring*fact2_; costheta = 1 - tmp; sintheta = sqrt(tmp*(2-tmp)); ringpix = 4*northring; shifted = true; startpix = 2*northring*(northring-1); } else { costheta = (2*nside_-northring)*fact1_; sintheta = sqrt((1+costheta)*(1-costheta)); ringpix = 4*nside_; shifted = ((northring-nside_) & 1) == 0; startpix = ncap_ + (northring-nside_)*ringpix; } if (northring != ring) // southern hemisphere { costheta = -costheta; startpix = npix_ - startpix - ringpix; } } void Healpix_Base::neighbors (int pix, fix_arr &result) const { static const int xoffset[] = { -1,-1, 0, 1, 1, 1, 0,-1 }; static const int yoffset[] = { 0, 1, 1, 1, 0,-1,-1,-1 }; static const int facearray[][12] = { { 8, 9,10,11,-1,-1,-1,-1,10,11, 8, 9 }, // S { 5, 6, 7, 4, 8, 9,10,11, 9,10,11, 8 }, // SE { -1,-1,-1,-1, 5, 6, 7, 4,-1,-1,-1,-1 }, // E { 4, 5, 6, 7,11, 8, 9,10,11, 8, 9,10 }, // SW { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11 }, // center { 1, 2, 3, 0, 0, 1, 2, 3, 5, 6, 7, 4 }, // NE { -1,-1,-1,-1, 7, 4, 5, 6,-1,-1,-1,-1 }, // W { 3, 0, 1, 2, 3, 0, 1, 2, 4, 5, 6, 7 }, // NW { 2, 3, 0, 1,-1,-1,-1,-1, 0, 1, 2, 3 } }; // N static const int swaparray[][12] = { { 0,0,0,0,0,0,0,0,3,3,3,3 }, // S { 0,0,0,0,0,0,0,0,6,6,6,6 }, // SE { 0,0,0,0,0,0,0,0,0,0,0,0 }, // E { 0,0,0,0,0,0,0,0,5,5,5,5 }, // SW { 0,0,0,0,0,0,0,0,0,0,0,0 }, // center { 5,5,5,5,0,0,0,0,0,0,0,0 }, // NE { 0,0,0,0,0,0,0,0,0,0,0,0 }, // W { 6,6,6,6,0,0,0,0,0,0,0,0 }, // NW { 3,3,3,3,0,0,0,0,0,0,0,0 } }; // N int ix, iy, face_num; (scheme_==RING) ? ring2xyf(pix,ix,iy,face_num) : nest2xyf(pix,ix,iy,face_num); const int nsm1 = nside_-1; if ((ix>0)&&(ix0)&&(iy=nside_) { x-=nside_; nbnum+=1; } if (y<0) { y+=nside_; nbnum-=3; } else if (y>=nside_) { y-=nside_; nbnum+=3; } int f = facearray[nbnum][face_num]; if (f>=0) { if (swaparray[nbnum][face_num]&1) x=nside_-x-1; if (swaparray[nbnum][face_num]&2) y=nside_-y-1; if (swaparray[nbnum][face_num]&4) std::swap(x,y); result[i] = (scheme_==RING) ? xyf2ring(x,y,f) : xyf2nest(x,y,f); } else result[i] = -1; } } } void Healpix_Base::get_ring_info2 (int ring, int &startpix, int &ringpix, double &theta, bool &shifted) const { int northring = (ring>2*nside_) ? 4*nside_-ring : ring; if (northring < nside_) { double tmp = northring*northring*fact2_; double costheta = 1 - tmp; double sintheta = sqrt(tmp*(2-tmp)); theta = atan2(sintheta,costheta); ringpix = 4*northring; shifted = true; startpix = 2*northring*(northring-1); } else { theta = acos((2*nside_-northring)*fact1_); ringpix = 4*nside_; shifted = ((northring-nside_) & 1) == 0; startpix = ncap_ + (northring-nside_)*ringpix; } if (northring != ring) // southern hemisphere { theta = pi-theta; startpix = npix_ - startpix - ringpix; } } void Healpix_Base::get_interpol (const pointing &ptg, fix_arr &pix, fix_arr &wgt) const { double z = cos (ptg.theta); int ir1 = ring_above(z); int ir2 = ir1+1; double theta1, theta2, w1, tmp, dphi; int sp,nr; bool shift; int i1,i2; if (ir1>0) { get_ring_info2 (ir1, sp, nr, theta1, shift); dphi = twopi/nr; tmp = (ptg.phi/dphi - .5*shift); i1 = (tmp<0) ? int(tmp)-1 : int(tmp); w1 = (ptg.phi-(i1+.5*shift)*dphi)/dphi; i2 = i1+1; if (i1<0) i1 +=nr; if (i2>=nr) i2 -=nr; pix[0] = sp+i1; pix[1] = sp+i2; wgt[0] = 1-w1; wgt[1] = w1; } if (ir2<(4*nside_)) { get_ring_info2 (ir2, sp, nr, theta2, shift); dphi = twopi/nr; tmp = (ptg.phi/dphi - .5*shift); i1 = (tmp<0) ? int(tmp)-1 : int(tmp); w1 = (ptg.phi-(i1+.5*shift)*dphi)/dphi; i2 = i1+1; if (i1<0) i1 +=nr; if (i2>=nr) i2 -=nr; pix[2] = sp+i1; pix[3] = sp+i2; wgt[2] = 1-w1; wgt[3] = w1; } if (ir1==0) { double wtheta = ptg.theta/theta2; wgt[2] *= wtheta; wgt[3] *= wtheta; double fac = (1-wtheta)*0.25; wgt[0] = fac; wgt[1] = fac; wgt[2] += fac; wgt[3] +=fac; pix[0] = (pix[2]+2)%4; pix[1] = (pix[3]+2)%4; } else if (ir2==4*nside_) { double wtheta = (ptg.theta-theta1)/(pi-theta1); wgt[0] *= (1-wtheta); wgt[1] *= (1-wtheta); double fac = wtheta*0.25; wgt[0] += fac; wgt[1] += fac; wgt[2] = fac; wgt[3] =fac; pix[2] = ((pix[0]+2)&3)+npix_-4; pix[3] = ((pix[1]+2)&3)+npix_-4; } else { double wtheta = (ptg.theta-theta1)/(theta2-theta1); wgt[0] *= (1-wtheta); wgt[1] *= (1-wtheta); wgt[2] *= wtheta; wgt[3] *= wtheta; } if (scheme_==NEST) for (int m=0; m #include "src/Healpix_2.15a/cxxutils.h" #include "src/Healpix_2.15a/lsconstants.h" #include "src/Healpix_2.15a/pointing.h" template class fix_arr; /*! The two possible ordering schemes of a HEALPix map. */ typedef enum { RING, /*!< RING scheme */ NEST /*!< NESTED scheme */ } Healpix_Ordering_Scheme; class nside_dummy {}; extern const nside_dummy SET_NSIDE; /*! Functionality related to the HEALPix pixelisation. */ class Healpix_Base { protected: enum { order_max=13 }; class Tablefiller { public: Tablefiller(); }; static Tablefiller Filler; friend class Tablefiller; static short ctab[0x100], utab[0x100]; static const int jrll[]; static const int jpll[]; /*! The order of the map; -1 for nonhierarchical map. */ int order_; /*! The N_side parameter of the map; 0 if not allocated. */ int nside_; int npface_, ncap_, npix_; double fact1_, fact2_; /*! The map's ordering scheme. */ Healpix_Ordering_Scheme scheme_; inline int ring_above (double z) const; void in_ring (int iz, double phi0, double dphi, std::vector &listir) const; typedef int (Healpix_Base::*swapfunc)(int pix) const; typedef void (Healpix_Base::*pix2xyf) (int pix, int &x, int &y, int &f) const; typedef int (Healpix_Base::*xyf2pix) (int x, int y, int f) const; public: // Sjors 18nov2010: moved these from protected to public int xyf2nest(int ix, int iy, int face_num) const; void nest2xyf(int pix, int &ix, int &iy, int &face_num) const; int xyf2ring(int ix, int iy, int face_num) const; void ring2xyf(int pix, int &ix, int &iy, int &face_num) const; /*! Calculates the map order from its \a N_side parameter. Returns -1 if \a nside is not a power of 2. \param nside the \a N_side parameter */ static int nside2order (int nside) { planck_assert (nside>0, "invalid value for Nside"); if ((nside)&(nside-1)) return -1; return ilog2(nside); } /*! Calculates the \a N_side parameter from the number of pixels. \param npix the number of pixels */ static int npix2nside (int npix); /*! Constructs an unallocated object. */ Healpix_Base () : order_(-1), nside_(0), npface_(0), ncap_(0), npix_(0), fact1_(0), fact2_(0), scheme_(RING) {} /*! Constructs an object with a given \a order and the ordering scheme \a scheme. */ Healpix_Base (int order, Healpix_Ordering_Scheme scheme) { Set (order, scheme); } /*! Constructs an object with a given \a nside and the ordering scheme \a scheme. The \a nside_dummy parameter must be set to SET_NSIDE. */ Healpix_Base (int nside, Healpix_Ordering_Scheme scheme, const nside_dummy) { SetNside (nside, scheme); } /* Adjusts the object to \a order and \a scheme. */ void Set (int order, Healpix_Ordering_Scheme scheme) { planck_assert ((order>=0)&&(order<=order_max), "bad order"); order_ = order; nside_ = 1<=0), "SetNside: nside must be power of 2 for nested maps"); nside_ = nside; npface_ = nside_*nside_; ncap_ = (npface_-nside_)<<1; npix_ = 12*npface_; fact2_ = 4./npix_; fact1_ = (nside_<<1)*fact2_; scheme_ = scheme; } /*! Returns the z-coordinate of the ring \a ring. This also works for the (not really existing) rings 0 and 4*nside. */ double ring2z (int ring) const; /*! Returns the number of the ring in which \a pix lies. */ int pix2ring (int pix) const; /*! Translates a pixel number from NEST to RING. */ int nest2ring (int pix) const; /*! Translates a pixel number from RING to NEST. */ int ring2nest (int pix) const; /*! Translates a pixel number from NEST to its Peano index. */ int nest2peano (int pix) const; /*! Translates a pixel number from its Peano index to NEST. */ int peano2nest (int pix) const; int ang2pix_z_phi (double z, double phi) const; /*! Returns the number of the pixel which contains the angular coordinates \a ang. */ int ang2pix (const pointing &ang) const { return ang2pix_z_phi (cos(ang.theta), ang.phi); } /*! Returns the number of the pixel which contains the vector \a vec (\a vec is normalized if necessary). */ int vec2pix (const vec3 &vec) const { return ang2pix_z_phi (vec.z/vec.Length(), safe_atan2(vec.y,vec.x)); } void pix2ang_z_phi (int pix, double &z, double &phi) const; /*! Returns the angular coordinates of the center of the pixel with number \a pix. */ pointing pix2ang (int pix) const { double z, phi; pix2ang_z_phi (pix,z,phi); return pointing(acos(z),phi); } /*! Returns the vector to the center of the pixel with number \a pix. */ vec3 pix2vec (int pix) const { double z, phi; pix2ang_z_phi (pix,z,phi); vec3 res; res.set_z_phi (z, phi); return res; } /*! Returns the numbers of all pixels whose centers lie within \a radius of \a dir in \a listpix. \param dir the angular coordinates of the disc center \param radius the radius (in radians) of the disc \param listpix a vector containing the numbers of all pixels within the disc \note This method is more efficient in the RING scheme. */ void query_disc (const pointing &dir, double radius, std::vector &listpix) const; /*! Returns the numbers of all pixels that lie at least partially within \a radius of \a dir in \a listpix. It may also return a few pixels which do not lie in the disk at all. \param dir the angular coordinates of the disc center \param radius the radius (in radians) of the disc \param listpix a vector containing the numbers of all pixels within the disc \note This method works in both RING and NEST schemes, but is considerably faster in the RING scheme. */ void query_disc_inclusive (const pointing &dir, double radius, std::vector &listpix) const { query_disc (dir,radius+1.362*pi/(4*nside_),listpix); } /*! Returns useful information about a given ring of the map. \param ring the ring number (the number of the first ring is 1) \param startpix the number of the first pixel in the ring \param ringpix the number of pixels in the ring \param costheta the cosine of the colatitude (in radians) of the ring \param sintheta the sine of the colatitude (in radians) of the ring \param shifted if \a true, the center of the first pixel is not at \a phi=0 */ void get_ring_info (int ring, int &startpix, int &ringpix, double &costheta, double &sintheta, bool &shifted) const; /*! Returns useful information about a given ring of the map. \param ring the ring number (the number of the first ring is 1) \param startpix the number of the first pixel in the ring \param ringpix the number of pixels in the ring \param theta the colatitude (in radians) of the ring \param shifted if \a true, the center of the first pixel is not at \a phi=0 */ void get_ring_info2 (int ring, int &startpix, int &ringpix, double &theta, bool &shifted) const; /*! Returns the neighboring pixels of \a pix in \a result. On exit, \a result contains (in this order) the pixel numbers of the SW, W, NW, N, NE, E, SE and S neighbor of \a pix. If a neighbor does not exist (this can only be the case for the W, N, E and S neighbors), its entry is set to -1. \note This method works in both RING and NEST schemes, but is considerably faster in the NEST scheme. */ void neighbors (int pix, fix_arr &result) const; /*! Returns interpolation information for the direction \a ptg. The surrounding pixels are returned in \a pix, their corresponding weights in \a wgt. \note This method works in both RING and NEST schemes, but is considerably faster in the RING scheme. */ void get_interpol (const pointing &ptg, fix_arr &pix, fix_arr &wgt) const; /*! return npix_ private member. */ int get_npix() const; /*! Returns the order parameter of the object. */ int Order() const { return order_; } /*! Returns the \a N_side parameter of the object. */ int Nside() const { return nside_; } /*! Returns the number of pixels of the object. */ int Npix() const { return npix_; } /*! Returns the ordering scheme of the object. */ Healpix_Ordering_Scheme Scheme() const { return scheme_; } /*! Returns \a true, if both objects have the same nside and scheme, else \a false. */ bool conformable (const Healpix_Base &other) const { return ((nside_==other.nside_) && (scheme_==other.scheme_)); } /*! Swaps the contents of two Healpix_Base objects. */ void swap (Healpix_Base &other) { std::swap(order_,other.order_); std::swap(nside_,other.nside_); std::swap(npface_,other.npface_); std::swap(ncap_,other.ncap_); std::swap(npix_,other.npix_); std::swap(fact1_,other.fact1_); std::swap(fact2_,other.fact2_); std::swap(scheme_,other.scheme_); } /*! Returns the maximum angular distance (in radian) between any pixel center and its corners. */ double max_pixrad() const; }; #endif relion-3.1.3/src/Healpix_2.15a/lsconstants.h000066400000000000000000000065261411340063500205270ustar00rootroot00000000000000/* * This file is part of Healpix_cxx. * * Healpix_cxx is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * Healpix_cxx is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with Healpix_cxx; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * * For more information about HEALPix, see http://healpix.jpl.nasa.gov */ /* * Healpix_cxx is being developed at the Max-Planck-Institut fuer Astrophysik * and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt * (DLR). */ /*! \file src/Healpix_2.15a/cxxsupport/lsconstants.h * Mathematical, physical and technical constants for LevelS. */ #ifndef PLANCK_CONSTANTS_H #define PLANCK_CONSTANTS_H #include /*! \defgroup mathconstgroup Mathematical constants */ /*! \{ */ const double pi=3.141592653589793238462643383279502884197; const double twopi=6.283185307179586476925286766559005768394; const double inv_twopi=1.0/twopi; const double fourpi=12.56637061435917295385057353311801153679; const double halfpi=1.570796326794896619231321691639751442099; const double inv_halfpi=0.6366197723675813430755350534900574; const double inv_sqrt4pi = 0.2820947917738781434740397257803862929220; const double ln2 = 0.6931471805599453094172321214581766; const double inv_ln2 = 1.4426950408889634073599246810018921; const double ln10 = 2.3025850929940456840179914546843642; const double onethird=1.0/3.0; const double twothird=2.0/3.0; const double fourthird=4.0/3.0; const double degr2rad=pi/180.0; const double rad2degr=180.0/pi; //! Ratio between FWHM and sigma of a Gauss curve (\f$\sqrt{8\ln2}\f$). const double sigma2fwhm=2.3548200450309493; // sqrt(8*log(2.)) const double fwhm2sigma=1/sigma2fwhm; /*! \} */ /*! \defgroup physconstgroup Physical constants */ /*! \{ */ const double Jansky2SI=1.0e-26; const double SI2Jansky=1.0e+26; //! Light speed in m/s. const double speedOfLight=2.99792458e8; //! Boltzmann's constant in J/K const double kBoltzmann=1.380658e-23; //! Stefan-Boltzmann constant in W/m^2/K^4 const double sigmaStefanBoltzmann=5.67051e-8; //! Planck's constant in J s const double hPlanck=6.6260755e-34; //! Astronomical unit in m const double astronomicalUnit=1.49597893e11; //! Solar constant in W/m^2 const double solarConstant=1368.0; //! Tropical year in s const double tropicalYear=3.15569259747e7; //! Average CMB temperature in K const double tcmb = 2.726; //! Colatitude of the solar system motion relative to CMB //! (ecliptical coordinates). const double solsysdir_ecl_theta = 1.7678013480275747; //! Longitude of the solar system motion relative to CMB //! (ecliptical coordinates). const double solsysdir_ecl_phi = 3.0039153062803194; //! Speed of the solar system motion relative to CMB in m/s. const double solsysspeed = 371000.0; /*! \} */ // technical constants //! Healpix value representing "undefined" const double Healpix_undef=-1.6375e30; #endif relion-3.1.3/src/Healpix_2.15a/message_error.h000066400000000000000000000050071411340063500210020ustar00rootroot00000000000000/* * This file is part of Healpix_cxx. * * Healpix_cxx is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * Healpix_cxx is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with Healpix_cxx; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * * For more information about HEALPix, see http://healpix.jpl.nasa.gov */ /* * Healpix_cxx is being developed at the Max-Planck-Institut fuer Astrophysik * and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt * (DLR). */ /* * Class for error reporting * * Copyright (C) 2003, 2004 Max-Planck-Society * Authors: Reinhard Hell, Martin Reinecke */ #ifndef PLANCK_MESSAGE_ERROR_H #define PLANCK_MESSAGE_ERROR_H #include #include #include #if defined (PLANCK_STACKTRACE) #include #endif inline void show_stackframe() { #if defined (PLANCK_STACKTRACE) void *trace[16]; int trace_size = backtrace(trace, 16); char **messages = backtrace_symbols(trace, trace_size); std::cerr << "[bt] Execution path:" << std::endl; for (int i=0; i #endif inline bool openmp_enabled() { #ifdef _OPENMP return true; #else return false; #endif } inline int openmp_max_threads () { #ifdef _OPENMP return omp_get_max_threads(); #else return 1; #endif } inline int openmp_thread_num () { #ifdef _OPENMP return omp_get_thread_num(); #else return 0; #endif } /*! Calculates the range of indices between \a glo and \a ghi which must be processed by this thread and returns it in \a lo and \a hi. The indices \a ghi and \a hi are "one past the last real index", in analogy to the STL iterators. */ inline void openmp_calc_share (int glo, int ghi, int &lo, int &hi) { #ifdef _OPENMP int nwork = ghi-glo; int nproc = omp_get_num_threads(); int me = omp_get_thread_num(); int nbase = nwork/nproc; int additional = nwork%nproc; lo = glo+me*nbase + ((me #include "src/Healpix_2.15a/vec3.h" #include "src/Healpix_2.15a/cxxutils.h" /*! \defgroup pointinggroup Pointings */ /*! \{ */ /*! Class representing a direction in 3D space or a location on the unit sphere. */ class pointing { public: /*! Colatitude of the pointing (i.e. the North pole is at \a theta=0). */ double theta; /*! Longitude of the pointing. */ double phi; /*! Default constructor. \a theta and \a phi are not initialized. */ pointing() {} /*! Creates a pointing with \a Theta and \a Phi. */ pointing (double Theta, double Phi) : theta(Theta), phi(Phi) {} // FIXME: should become "explicit" some time /*! Creates a pointing from the vector \a inp. \a inp need not be normalized. */ pointing (const vec3 &inp) { using namespace std; theta = atan2(sqrt(inp.x*inp.x+inp.y*inp.y),inp.z); phi = safe_atan2 (inp.y,inp.x); if (phi<0) phi += twopi; } // FIXME: should be removed some time /*! Returns a normalized vector pointing in the same direction. */ operator vec3() const { double st=sin(theta); return vec3 (st*cos(phi), st*sin(phi), cos(theta)); } /*! Returns a normalized vector pointing in the same direction. */ vec3 to_vec3() const { double st=sin(theta); return vec3 (st*cos(phi), st*sin(phi), cos(theta)); } /*! Changes the angles so that \a 0<=theta<=pi and \a 0<=phi<2*pi. */ void normalize() { theta=fmodulo(theta,twopi); if (theta>pi) { phi+=pi; theta=twopi-theta; } phi=fmodulo(phi,twopi); } }; /*! Converts \a vec to \a ptg. \a vec need not be normalized. \relates pointing */ inline void vec2pnt(const vec3 &vec, pointing &ptg) { using namespace std; ptg.theta = atan2(sqrt(vec.x*vec.x+vec.y*vec.y),vec.z); ptg.phi = safe_atan2 (vec.y,vec.x); if (ptg.phi<0) ptg.phi += twopi; } /*! Writes \a p to \a os. \relates pointing */ inline std::ostream &operator<< (std::ostream &os, const pointing &p) { os << p.theta << ", " << p.phi << std::endl; return os; } /*! \} */ #endif relion-3.1.3/src/Healpix_2.15a/vec3.h000066400000000000000000000076311411340063500170120ustar00rootroot00000000000000/* * This file is part of Healpix_cxx. * * Healpix_cxx is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * Healpix_cxx is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with Healpix_cxx; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * * For more information about HEALPix, see http://healpix.jpl.nasa.gov */ /* * Healpix_cxx is being developed at the Max-Planck-Institut fuer Astrophysik * and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt * (DLR). */ /*! \file src/Healpix_2.15a/cxxsupport/vec3.h * Class representing 3D cartesian vectors * * Copyright (C) 2003, 2006 Max-Planck-Society * \author Martin Reinecke */ #ifndef PLANCK_VEC3_H #define PLANCK_VEC3_H #include #include /*! \defgroup vec3group 3D vectors */ /*! \{ */ /*! Class representing a 3D cartesian vector. */ class vec3 { public: double x, /*!< x-coordinate */ y, /*!< y-coordinate */ z; /*!< z-coordinate */ /*! Default constructor. Does not initialize \a x, \a y, and \a z. */ vec3 () {} /*! Creates a vector with the coordinates \a xc, \a yc, and \a zc. */ vec3 (double xc, double yc, double zc) : x(xc), y(yc), z(zc) {} /*! Creates a unit vector from a z coordinate and an azimuthal angle. */ void set_z_phi (double z_, double phi) { using namespace std; double sintheta = sqrt((1.-z_)*(1.+z_)); x = sintheta*cos(phi); y = sintheta*sin(phi); z = z_; } /*! Normalizes the vector to length 1. */ void Normalize () { using namespace std; double l = 1.0/sqrt (x*x + y*y + z*z); x*=l; y*=l; z*=l; } /*! Returns the length of the vector. */ double Length () const { return sqrt (x*x + y*y + z*z); } /*! Returns the squared length of the vector. */ double SquaredLength () const { return (x*x + y*y + z*z); } /*! Returns the vector with the signs of all coordinates flipped. */ const vec3 operator- () const { return vec3 (-x, -y, -z); } /*! Flips the signs of all coordinates. */ void Flip () { x=-x; y=-y; z=-z; } /*! Subtracts \a vec from the vector. */ const vec3 operator- (const vec3 &vec) const { return vec3 (x-vec.x, y-vec.y, z-vec.z); } /*! Adds \a vec to the vector. */ const vec3 operator+ (const vec3 &vec) const { return vec3 (x+vec.x, y+vec.y, z+vec.z); } /*! Returns the vector scaled by \a fact. */ const vec3 operator* (double fact) const { return vec3 (x*fact, y*fact, z*fact); } /*! Returns the vector scaled by \a 1/fact. */ const vec3 operator/ (double fact) const { double xfact = 1./fact; return vec3 (x*xfact, y*xfact, z*xfact); } /*! Scales the vector by \a fact. */ vec3 &operator*= (double fact) { x*=fact; y*=fact; z*=fact; return *this; } }; /*! Returns the dot product of \a v1 and \a v2. \relates vec3 */ inline double dotprod(const vec3 &v1, const vec3 &v2) { return v1.x*v2.x + v1.y*v2.y + v1.z*v2.z; } /*! Returns the cross product of \a a and \a b. \relates vec3 */ inline vec3 crossprod(const vec3 &a, const vec3 &b) { return vec3 (a.y*b.z - a.z*b.y, a.z*b.x - a.x*b.z, a.x*b.y - a.y*b.x); } /*! Writes \a v to \a os. \relates vec3 */ inline std::ostream &operator<< (std::ostream &os, const vec3 &v) { os << v.x << ", " << v.y << ", " << v.z << std::endl; return os; } /*! \} */ #endif relion-3.1.3/src/acc/000077500000000000000000000000001411340063500143205ustar00rootroot00000000000000relion-3.1.3/src/acc/acc_backprojector.h000066400000000000000000000032341411340063500201310ustar00rootroot00000000000000#ifndef ACC_BACKPROJECTOR_H_ #define ACC_BACKPROJECTOR_H_ #ifdef CUDA # include #endif #include "src/complex.h" #include "src/acc/settings.h" #include "src/acc/acc_ptr.h" #ifndef CUDA # include #endif class AccBackprojector { public: int mdlX, mdlY, mdlZ, mdlInitY, mdlInitZ, maxR, maxR2; XFLOAT padding_factor; size_t mdlXYZ; #ifndef CUDA tbb::spin_mutex *mutexes; #endif size_t allocaton_size; size_t voxelCount; XFLOAT *d_mdlReal, *d_mdlImag, *d_mdlWeight; cudaStream_t stream; public: AccBackprojector(): mdlX(0), mdlY(0), mdlZ(0), mdlXYZ(0), mdlInitY(0), mdlInitZ(0), maxR(0), maxR2(0), padding_factor(0), allocaton_size(0), voxelCount(0), d_mdlReal(NULL), d_mdlImag(NULL), d_mdlWeight(NULL), stream(0) #ifndef CUDA , mutexes(0) #endif {} size_t setMdlDim( int xdim, int ydim, int zdim, int inity, int initz, int max_r, XFLOAT paddingFactor); void initMdl(); void backproject( XFLOAT *d_imgs_nomask_real, XFLOAT *d_imgs_nomask_imag, XFLOAT *trans_x, XFLOAT *trans_y, XFLOAT *trans_z, XFLOAT* d_weights, XFLOAT* d_Minvsigma2s, XFLOAT* d_ctfs, unsigned long translation_num, XFLOAT significant_weight, XFLOAT weight_norm, XFLOAT *d_eulers, int imgX, int imgY, int imgZ, unsigned long imageCount, bool data_is_3D, cudaStream_t optStream); void getMdlData(XFLOAT *real, XFLOAT *imag, XFLOAT * weights); void getMdlDataPtrs(XFLOAT *& real, XFLOAT *& imag, XFLOAT *& weights); void setStream(cudaStream_t s) { stream = s; } cudaStream_t getStream() { return stream; } void clear(); ~AccBackprojector(); }; #endif relion-3.1.3/src/acc/acc_backprojector_impl.h000066400000000000000000000071771411340063500211640ustar00rootroot00000000000000//#include //#include //#include "src/acc/settings.h" //#include "src/acc/acc_backprojector.h" //#include "src/acc/cuda/cuda_kernels/cuda_device_utils.cuh" //#include "src/acc/acc_projector.h" size_t AccBackprojector::setMdlDim( int xdim, int ydim, int zdim, int inity, int initz, int max_r, XFLOAT paddingFactor) { if (xdim != mdlX || ydim != mdlY || zdim != mdlZ || inity != mdlInitY || initz != mdlInitZ || max_r != maxR || paddingFactor != padding_factor) { clear(); mdlX = xdim; mdlY = ydim; mdlZ = zdim; if (mdlZ < 1) mdlZ = 1; mdlXYZ = (size_t)xdim*(size_t)ydim*(size_t)zdim; mdlInitY = inity; mdlInitZ = initz; maxR = max_r; maxR2 = max_r*max_r; padding_factor = paddingFactor; //Allocate space for model #ifdef CUDA HANDLE_ERROR(cudaMalloc( (void**) &d_mdlReal, mdlXYZ * sizeof(XFLOAT))); HANDLE_ERROR(cudaMalloc( (void**) &d_mdlImag, mdlXYZ * sizeof(XFLOAT))); HANDLE_ERROR(cudaMalloc( (void**) &d_mdlWeight, mdlXYZ * sizeof(XFLOAT))); #else if (posix_memalign((void **)&d_mdlReal, MEM_ALIGN, mdlXYZ * sizeof(XFLOAT))) CRITICAL(RAMERR); if (posix_memalign((void **)&d_mdlImag, MEM_ALIGN, mdlXYZ * sizeof(XFLOAT))) CRITICAL(RAMERR); if (posix_memalign((void **)&d_mdlWeight, MEM_ALIGN, mdlXYZ * sizeof(XFLOAT))) CRITICAL(RAMERR); mutexes = new tbb::spin_mutex[mdlZ*mdlY]; #endif allocaton_size = mdlXYZ * sizeof(XFLOAT) * 3; } return allocaton_size; } void AccBackprojector::initMdl() { #ifdef DEBUG_CUDA if (mdlXYZ == 0) { printf("Model dimensions must be set with setMdlDim before call to initMdl."); CRITICAL(ERR_MDLDIM); } if (voxelCount != 0) { printf("DEBUG_ERROR: Duplicated call to model setup"); CRITICAL(ERR_MDLSET); } #endif //Initiate model with zeros #ifdef CUDA DEBUG_HANDLE_ERROR(cudaMemset( d_mdlReal, 0, mdlXYZ * sizeof(XFLOAT))); DEBUG_HANDLE_ERROR(cudaMemset( d_mdlImag, 0, mdlXYZ * sizeof(XFLOAT))); DEBUG_HANDLE_ERROR(cudaMemset( d_mdlWeight, 0, mdlXYZ * sizeof(XFLOAT))); #else memset(d_mdlReal, 0, mdlXYZ * sizeof(XFLOAT)); memset(d_mdlImag, 0, mdlXYZ * sizeof(XFLOAT)); memset(d_mdlWeight, 0, mdlXYZ * sizeof(XFLOAT)); #endif voxelCount = mdlXYZ; } void AccBackprojector::getMdlData(XFLOAT *r, XFLOAT *i, XFLOAT * w) { #ifdef CUDA DEBUG_HANDLE_ERROR(cudaStreamSynchronize(stream)); //Make sure to wait for remaining kernel executions DEBUG_HANDLE_ERROR(cudaMemcpyAsync( r, d_mdlReal, mdlXYZ * sizeof(XFLOAT), cudaMemcpyDeviceToHost, stream)); DEBUG_HANDLE_ERROR(cudaMemcpyAsync( i, d_mdlImag, mdlXYZ * sizeof(XFLOAT), cudaMemcpyDeviceToHost, stream)); DEBUG_HANDLE_ERROR(cudaMemcpyAsync( w, d_mdlWeight, mdlXYZ * sizeof(XFLOAT), cudaMemcpyDeviceToHost, stream)); DEBUG_HANDLE_ERROR(cudaStreamSynchronize(stream)); //Wait for copy #else memcpy(r, d_mdlReal, mdlXYZ * sizeof(XFLOAT)); memcpy(i, d_mdlImag, mdlXYZ * sizeof(XFLOAT)); memcpy(w, d_mdlWeight, mdlXYZ * sizeof(XFLOAT)); #endif } void AccBackprojector::getMdlDataPtrs(XFLOAT *& r, XFLOAT *& i, XFLOAT *& w) { #ifndef CUDA r = d_mdlReal; i = d_mdlImag; w = d_mdlWeight; #endif } void AccBackprojector::clear() { mdlX = 0; mdlY = 0; mdlZ = 0; mdlXYZ = 0; mdlInitY = 0; mdlInitZ = 0; maxR = 0; maxR2 = 0; padding_factor = 0; allocaton_size = 0; if (d_mdlReal != NULL) { #ifdef CUDA DEBUG_HANDLE_ERROR(cudaFree(d_mdlReal)); DEBUG_HANDLE_ERROR(cudaFree(d_mdlImag)); DEBUG_HANDLE_ERROR(cudaFree(d_mdlWeight)); #else free(d_mdlReal); free(d_mdlImag); free(d_mdlWeight); delete [] mutexes; #endif d_mdlReal = d_mdlImag = d_mdlWeight = NULL; } } AccBackprojector::~AccBackprojector() { clear(); } relion-3.1.3/src/acc/acc_helper_functions.h000066400000000000000000000360671411340063500206620ustar00rootroot00000000000000#ifndef ACC_HELPER_FUNCTIONS_H_ #define ACC_HELPER_FUNCTIONS_H_ #include "src/acc/acc_ml_optimiser.h" /* * This assisting function goes over the orientations determined as significant for this image, and checks * which translations should be included in the list of those which differences will be calculated for. * * Any contiguous translations with a shared orientation are grouped together into a "job" which is supplied * to the difference kernel. If there are more contiguous translations than the specified "chunk" number, * these are split into separate jobs, to increase parallelism at the cost of redundant memory reads. */ long int makeJobsForDiff2Fine( OptimisationParamters &op, SamplingParameters &sp, long int orientation_num, long int translation_num, ProjectionParams &FineProjectionData, std::vector< long unsigned > &iover_transes, std::vector< long unsigned > &ihiddens, long int nr_over_orient, long int nr_over_trans, int img_id, IndexedDataArray &FPW, // FPW=FinePassWeights IndexedDataArrayMask &dataMask, int chunk); /* * This assisting function goes over the weight-array and groups all weights with shared * orientations into 'jobs' which are fed into the collect-kenrel, which reduces all translations * with computed differences into a reduced object to be back-projected. */ long int makeJobsForCollect(IndexedDataArray &FPW, IndexedDataArrayMask &dataMask, unsigned long NewJobNum); // FPW=FinePassWeights /* * Maps weights to a decoupled indexing of translations and orientations */ void mapWeights( unsigned long orientation_start, XFLOAT *mapped_weights, unsigned long orientation_num, unsigned long idxArr_start, unsigned long idxArr_end, unsigned long translation_num, XFLOAT *weights, long unsigned *rot_idx, long unsigned *trans_idx, unsigned long current_oversampling); void buildCorrImage(MlOptimiser *baseMLO, OptimisationParamters &op, AccPtr &corr_img, int img_id, long int group_id, bool ctf_premultiplied); void generateEulerMatrices( ProjectionParams &ProjectionData, XFLOAT *eulers, bool inverse, Matrix2D &L, Matrix2D &R); long unsigned generateProjectionSetupFine( OptimisationParamters &op, SamplingParameters &sp, MlOptimiser *baseMLO, unsigned iclass, ProjectionParams &ProjectionData); void runWavgKernel( AccProjectorKernel &projector, XFLOAT *eulers, XFLOAT *Fimgs_real, XFLOAT *Fimgs_imag, XFLOAT *trans_x, XFLOAT *trans_y, XFLOAT *trans_z, XFLOAT *sorted_weights, XFLOAT *ctfs, XFLOAT *wdiff2s_parts, XFLOAT *wdiff2s_AA, XFLOAT *wdiff2s_XA, OptimisationParamters &op, long unsigned orientation_num, long unsigned translation_num, unsigned long image_size, int img_id, int group_id, int exp_iclass, XFLOAT part_scale, bool refs_are_ctf_corrected, bool ctf_premultiplied, bool data_is_3D, cudaStream_t stream); void runBackProjectKernel( AccBackprojector &BP, AccProjectorKernel &projector, XFLOAT *d_img_real, XFLOAT *d_img_imag, XFLOAT *trans_x, XFLOAT *trans_y, XFLOAT *trans_z, XFLOAT* d_weights, XFLOAT* d_Minvsigma2s, XFLOAT* d_ctfs, unsigned long translation_num, XFLOAT significant_weight, XFLOAT weight_norm, XFLOAT *d_eulers, int imgX, int imgY, int imgZ, unsigned long imageCount, bool data_is_3D, bool do_sgd, bool ctf_premultiplied, cudaStream_t optStream); template< typename T> void deviceInitComplexValue(AccPtr &data, XFLOAT value) { AccUtilities::InitComplexValue(data, value); } template< typename T> void deviceInitValue(AccPtr &data, T value) { AccUtilities::InitValue(data, value); } template< typename T> void deviceInitValue(AccPtr &data, T value, size_t Size) { AccUtilities::InitValue(data, value, Size); } void mapAllWeightsToMweights( unsigned long * d_iorient, //projectorPlan.iorientclasses XFLOAT * d_allweights, //allWeights XFLOAT * d_mweights, //Mweight unsigned long orientation_num, //projectorPlan.orientation_num unsigned long translation_num, //translation_num cudaStream_t stream ); #define OVER_THRESHOLD_BLOCK_SIZE 512 template< typename T> void arrayOverThreshold(AccPtr &data, AccPtr &passed, T threshold) { #ifdef CUDA int grid_size = ceil((float)data.getSize()/(float)OVER_THRESHOLD_BLOCK_SIZE); cuda_kernel_array_over_threshold<<< grid_size, OVER_THRESHOLD_BLOCK_SIZE, 0, data.getStream() >>>( ~data, ~passed, threshold, data.getSize(), OVER_THRESHOLD_BLOCK_SIZE); LAUNCH_HANDLE_ERROR(cudaGetLastError()); #else int Size = data.getSize(); for(size_t i=0; i= threshold) passed[i] = true; else passed[i] = false; } #endif } #define FIND_IN_CUMULATIVE_BLOCK_SIZE 512 template< typename T> size_t findThresholdIdxInCumulativeSum(AccPtr &data, T threshold) { int grid_size = ceil((float)(data.getSize()-1)/(float)FIND_IN_CUMULATIVE_BLOCK_SIZE); if(grid_size==0) { return(0); } else { #ifdef CUDA AccPtr idx(1, data.getStream(), data.getAllocator()); idx[0] = 0; idx.putOnDevice(); cuda_kernel_find_threshold_idx_in_cumulative<<< grid_size, FIND_IN_CUMULATIVE_BLOCK_SIZE, 0, data.getStream() >>>( ~data, threshold, data.getSize()-1, ~idx, FIND_IN_CUMULATIVE_BLOCK_SIZE); idx.cpToHost(); DEBUG_HANDLE_ERROR(cudaStreamSynchronize(data.getStream())); return idx[0]; #else size_t idx = 0; size_t size_m1 = data.getSize()-1; for (size_t i = 0; i < size_m1; i++) { if (data[i] <= threshold && threshold < data[i+1]) idx = i+1; } return idx; #endif } } void runDiff2KernelCoarse( AccProjectorKernel &projector, XFLOAT *trans_x, XFLOAT *trans_y, XFLOAT *trans_z, XFLOAT *corr_img, XFLOAT *Fimg_real, XFLOAT *Fimg_imag, XFLOAT *d_eulers, XFLOAT *diff2s, XFLOAT local_sqrtXi2, long unsigned orientation_num, unsigned long translation_num, unsigned long image_size, cudaStream_t stream, bool do_CC, bool data_is_3D); void runDiff2KernelFine( AccProjectorKernel &projector, XFLOAT *corr_img, XFLOAT *Fimgs_real, XFLOAT *Fimgs_imag, XFLOAT *trans_x, XFLOAT *trans_y, XFLOAT *trans_z, XFLOAT *eulers, long unsigned *rot_id, long unsigned *rot_idx, long unsigned *trans_idx, long unsigned *job_idx, long unsigned *job_num, XFLOAT *diff2s, OptimisationParamters &op, MlOptimiser *baseMLO, long unsigned orientation_num, long unsigned translation_num, long unsigned significant_num, unsigned long image_size, int img_id, int exp_iclass, cudaStream_t stream, long unsigned job_num_count, bool do_CC, bool data_is_3D); void runCollect2jobs( int grid_dim, XFLOAT * oo_otrans_x, // otrans-size -> make const XFLOAT * oo_otrans_y, // otrans-size -> make const XFLOAT * oo_otrans_z, // otrans-size -> make const XFLOAT * myp_oo_otrans_x2y2z2, // otrans-size -> make const XFLOAT * weights, XFLOAT significant_weight, // TODO Put in const XFLOAT sum_weight, // TODO Put in const unsigned long nr_trans, unsigned long oversampled_trans, unsigned long oversampled_rot, unsigned long oversamples, bool skip_rots, XFLOAT * p_weights, XFLOAT * p_thr_wsum_prior_offsetx_class, XFLOAT * p_thr_wsum_prior_offsety_class, XFLOAT * p_thr_wsum_prior_offsetz_class, XFLOAT * p_thr_wsum_sigma2_offset, size_t * rot_idx, size_t * trans_idx, size_t * jobOrigin, size_t * jobExtent, bool data_is_3D ); void windowFourierTransform2( AccPtr &d_in, AccPtr &d_out, size_t iX, size_t iY, size_t iZ, //Input dimensions size_t oX, size_t oY, size_t oZ, //Output dimensions size_t Npsi = 1, size_t pos = 0, cudaStream_t stream = 0); void selfApplyBeamTilt2(MultidimArray &Fimg, RFLOAT beamtilt_x, RFLOAT beamtilt_y, RFLOAT wavelength, RFLOAT Cs, RFLOAT angpix, int ori_size); template void runCenterFFT(MultidimArray< T >& v, bool forward, CudaCustomAllocator *allocator) { AccPtr img_in (v.nzyxdim, allocator); // with original data pointer // AccPtr img_aux(v.nzyxdim, allocator); // temporary holder for (unsigned long i = 0; i < v.nzyxdim; i ++) img_in[i] = (XFLOAT) v.data[i]; img_in.putOnDevice(); // img_aux.deviceAlloc(); if ( v.getDim() == 1 ) { std::cerr << "CenterFFT on gpu reverts to cpu for dim!=2 (now dim=1)" < aux; int l, shift; l = XSIZE(v); aux.resize(l); shift = (int)(l / 2); if (!forward) shift = -shift; // Shift the input in an auxiliar vector for (int i = 0; i < l; i++) { int ip = i + shift; if (ip < 0) ip += l; else if (ip >= l) ip -= l; aux(ip) = DIRECT_A1D_ELEM(v, i); } // Copy the vector for (int i = 0; i < l; i++) DIRECT_A1D_ELEM(v, i) = DIRECT_A1D_ELEM(aux, i); } else if ( v.getDim() == 2 ) { // 2D //std::cerr << "CenterFFT on gpu with dim=2!" < aux; int l, shift; // Shift in the X direction l = XSIZE(v); aux.resize(l); shift = (int)(l / 2); if (!forward) shift = -shift; for (int k = 0; k < ZSIZE(v); k++) for (int i = 0; i < YSIZE(v); i++) { // Shift the input in an auxiliary vector for (int j = 0; j < l; j++) { int jp = j + shift; if (jp < 0) jp += l; else if (jp >= l) jp -= l; aux(jp) = DIRECT_A3D_ELEM(v, k, i, j); } // Copy the vector for (int j = 0; j < l; j++) DIRECT_A3D_ELEM(v, k, i, j) = DIRECT_A1D_ELEM(aux, j); } // Shift in the Y direction l = YSIZE(v); aux.resize(l); shift = (int)(l / 2); if (!forward) shift = -shift; for (int k = 0; k < ZSIZE(v); k++) for (int j = 0; j < XSIZE(v); j++) { // Shift the input in an auxiliary vector for (int i = 0; i < l; i++) { int ip = i + shift; if (ip < 0) ip += l; else if (ip >= l) ip -= l; aux(ip) = DIRECT_A3D_ELEM(v, k, i, j); } // Copy the vector for (int i = 0; i < l; i++) DIRECT_A3D_ELEM(v, k, i, j) = DIRECT_A1D_ELEM(aux, i); } // Shift in the Z direction l = ZSIZE(v); aux.resize(l); shift = (int)(l / 2); if (!forward) shift = -shift; for (int i = 0; i < YSIZE(v); i++) for (int j = 0; j < XSIZE(v); j++) { // Shift the input in an auxiliary vector for (int k = 0; k < l; k++) { int kp = k + shift; if (kp < 0) kp += l; else if (kp >= l) kp -= l; aux(kp) = DIRECT_A3D_ELEM(v, k, i, j); } // Copy the vector for (int k = 0; k < l; k++) DIRECT_A3D_ELEM(v, k, i, j) = DIRECT_A1D_ELEM(aux, k); } } else { v.printShape(); REPORT_ERROR("CenterFFT ERROR: Dimension should be 1, 2 or 3"); } } template void runCenterFFT( AccPtr< T > &img_in, int xSize, int ySize, bool forward, int batchSize = 1) { // AccPtr img_aux(img_in.h_ptr, img_in.getSize(), allocator); // temporary holder // img_aux.deviceAlloc(); int xshift = (xSize / 2); int yshift = (ySize / 2); if (!forward) { xshift = -xshift; yshift = -yshift; } int blocks = ceilf((float)((xSize*ySize)/(float)(2*CFTT_BLOCK_SIZE))); AccUtilities::centerFFT_2D(blocks, batchSize, CFTT_BLOCK_SIZE, img_in.getStream(), ~img_in, xSize*ySize, xSize, ySize, xshift, yshift); LAUNCH_HANDLE_ERROR(cudaGetLastError()); // HANDLE_ERROR(cudaStreamSynchronize(0)); // img_aux.cpOnDevice(img_in.d_ptr); //update input image with centered kernel-output. } template void runCenterFFT( AccPtr< T > &img_in, int xSize, int ySize, int zSize, bool forward, int batchSize = 1) { // AccPtr img_aux(img_in.h_ptr, img_in.getSize(), allocator); // temporary holder // img_aux.deviceAlloc(); if(zSize>1) { int xshift = (xSize / 2); int yshift = (ySize / 2); int zshift = (ySize / 2); if (!forward) { xshift = -xshift; yshift = -yshift; zshift = -zshift; } int grid_size = ceilf((float)(((size_t)xSize*(size_t)ySize*(size_t)zSize)/ (float)(2*CFTT_BLOCK_SIZE))); AccUtilities::centerFFT_3D(grid_size, batchSize, CFTT_BLOCK_SIZE, img_in.getStream(), ~img_in, (size_t)xSize*(size_t)ySize*(size_t)zSize, xSize, ySize, zSize, xshift, yshift, zshift); LAUNCH_HANDLE_ERROR(cudaGetLastError()); // HANDLE_ERROR(cudaStreamSynchronize(0)); // img_aux.cpOnDevice(img_in.d_ptr); //update input image with centered kernel-output. } else { int xshift = (xSize / 2); int yshift = (ySize / 2); if (!forward) { xshift = -xshift; yshift = -yshift; } int blocks = ceilf((float)((xSize*ySize)/(float)(2*CFTT_BLOCK_SIZE))); AccUtilities::centerFFT_2D(blocks, batchSize, CFTT_BLOCK_SIZE, img_in.getStream(), ~img_in, xSize*ySize, xSize, ySize, xshift, yshift); LAUNCH_HANDLE_ERROR(cudaGetLastError()); } } template void lowPassFilterMapGPU( AccPtr< T > &img_in, size_t Zdim, size_t Ydim, size_t Xdim, long int ori_size, RFLOAT lowpass, RFLOAT highpass, RFLOAT angpix, int filter_edge_width, bool do_highpass) { // High or low? RFLOAT passLimit = (do_highpass ? highpass : lowpass); // Which resolution shell is the filter? int ires_filter = ROUND((ori_size * angpix)/passLimit); int filter_edge_halfwidth = filter_edge_width / 2; // Soft-edge: from 1 shell less to one shell more: XFLOAT edge_low = XMIPP_MAX(0., (ires_filter - filter_edge_halfwidth) / (RFLOAT)ori_size); // in 1/pix XFLOAT edge_high = XMIPP_MIN(Xdim, (ires_filter + filter_edge_halfwidth) / (RFLOAT)ori_size); // in 1/pix XFLOAT edge_width = edge_high - edge_low; int blocks = ceilf( (float)((size_t)Xdim*(size_t)Ydim*(size_t)Zdim) / (float)(CFTT_BLOCK_SIZE) ); if (do_highpass) { AccUtilities::frequencyPass(blocks,CFTT_BLOCK_SIZE, img_in.getStream(), ~img_in, ori_size, Xdim, Ydim, Zdim, edge_low, edge_width, edge_high, (XFLOAT)angpix, (size_t)Xdim*(size_t)Ydim*(size_t)Zdim); } else { AccUtilities::frequencyPass(blocks,CFTT_BLOCK_SIZE, img_in.getStream(), ~img_in, ori_size, Xdim, Ydim, Zdim, edge_low, edge_width, edge_high, (XFLOAT)angpix, (size_t)Xdim*(size_t)Ydim*(size_t)Zdim); } LAUNCH_HANDLE_ERROR(cudaGetLastError()); } #endif //ACC_HELPER_FUNCTIONS_H_ relion-3.1.3/src/acc/acc_helper_functions_impl.h000066400000000000000000001534241411340063500217000ustar00rootroot00000000000000/* #undef ALTCPU #include #include "src/acc/cuda/cuda_settings.h" #include "src/acc/cuda/cuda_kernels/BP.cuh" #include "src/macros.h" #include "src/error.h" */ long int makeJobsForDiff2Fine( OptimisationParamters &op, SamplingParameters &sp, long int orientation_num, long int translation_num, ProjectionParams &FineProjectionData, std::vector< long unsigned > &iover_transes, std::vector< long unsigned > &ihiddens, long int nr_over_orient, long int nr_over_trans, int img_id, IndexedDataArray &FPW, // FPW=FinePassWeights IndexedDataArrayMask &dataMask, int chunk) { long unsigned w_base = dataMask.firstPos, w(0), k(0); // be on the safe side with the jobArrays: make them as large as they could possibly be // (this will be reduced at exit of this function) dataMask.setNumberOfJobs(orientation_num*translation_num); dataMask.setNumberOfWeights(orientation_num*translation_num); dataMask.jobOrigin.hostAlloc(); dataMask.jobExtent.hostAlloc(); dataMask.jobOrigin[k]=0; for (long unsigned i = 0; i < orientation_num; i++) { dataMask.jobExtent[k]=0; long int tk=0; long int iover_rot = FineProjectionData.iover_rots[i]; for (long unsigned j = 0; j < translation_num; j++) { long int iover_trans = iover_transes[j]; long int ihidden = FineProjectionData.iorientclasses[i] * sp.nr_trans + ihiddens[j]; if(DIRECT_A2D_ELEM(op.Mcoarse_significant, img_id, ihidden)==1) { FPW.rot_id[w_base+w] = FineProjectionData.iorientclasses[i] % (sp.nr_dir*sp.nr_psi); // where to look for priors etc FPW.rot_idx[w_base+w] = i; // which rot for this significant task FPW.trans_idx[w_base+w] = j; // which trans - || - FPW.ihidden_overs[w_base+w]= (ihidden * nr_over_orient + iover_rot) * nr_over_trans + iover_trans; if(tk>=chunk) { tk=0; // reset counter k++; // use new element dataMask.jobOrigin[k]=w; dataMask.jobExtent[k]=0; // prepare next element for ++ incrementing } tk++; // increment limit-checker dataMask.jobExtent[k]++; // increment number of transes this job w++; } else if(tk!=0) // start a new one with the same rotidx - we expect transes to be sequential. { tk=0; // reset counter k++; // use new element dataMask.jobOrigin[k]=w; dataMask.jobExtent[k]=0; // prepare next element for ++ incrementing } } if(tk>0) // use new element (if tk==0) then we are currently on an element with no signif, so we should continue using this element { k++; dataMask.jobOrigin[k]=w; dataMask.jobExtent[k]=0; } } if(dataMask.jobExtent[k]!=0) // if we started putting somehting in last element, then the count is one higher than the index k+=1; dataMask.setNumberOfJobs(k); dataMask.setNumberOfWeights(w); // if(dataMask.weightNum>0) // { // dataMask.jobOrigin.device_alloc(); // dataMask.jobExtent.device_alloc(); // } return(w); } long int makeJobsForCollect(IndexedDataArray &FPW, IndexedDataArrayMask &dataMask, unsigned long NewJobNum) // FPW=FinePassWeights { // reset the old (diff2Fine) job-definitions // dataMask.jobOrigin.free_host(); // dataMask.jobOrigin.free_device(); // dataMask.jobExtent.free_host(); // dataMask.jobExtent.free_device(); dataMask.setNumberOfJobs(NewJobNum); // dataMask.jobOrigin.hostAlloc(); // dataMask.jobExtent.hostAlloc(); long int jobid=0; dataMask.jobOrigin[jobid]=0; dataMask.jobExtent[jobid]=1; long int crot =FPW.rot_idx[jobid]; // set current rot for(long int n=1; n::max(); for (long unsigned i = idxArr_start; i < idxArr_end; i++) mapped_weights[ (rot_idx[i]-orientation_start) * translation_num + trans_idx[i] ]= weights[i]; } void buildCorrImage(MlOptimiser *baseMLO, OptimisationParamters &op, AccPtr &corr_img, int img_id, long int group_id, bool ctf_premultiplied) { // CC or not if((baseMLO->iter == 1 && baseMLO->do_firstiter_cc) || baseMLO->do_always_cc) for(size_t i = 0; i < corr_img.getSize(); i++) corr_img[i] = 1. / (op.local_sqrtXi2[img_id]*op.local_sqrtXi2[img_id]); else for(size_t i = 0; i < corr_img.getSize(); i++) corr_img[i] = *(op.local_Minvsigma2[img_id].data + i ); // ctf-correction or not ( NOTE this is not were the difference metric is ctf-corrected, but // rather where we apply the additional correction to make the GPU-specific arithmetic equal // to the CPU method) if (baseMLO->do_ctf_correction) { if (baseMLO->refs_are_ctf_corrected) for(size_t i = 0; i < corr_img.getSize(); i++) corr_img[i] *= DIRECT_MULTIDIM_ELEM(op.local_Fctf[img_id], i)*DIRECT_MULTIDIM_ELEM(op.local_Fctf[img_id], i); if (ctf_premultiplied) for(size_t i = 0; i < corr_img.getSize(); i++) corr_img[i] *= DIRECT_MULTIDIM_ELEM(op.local_Fctf[img_id], i)*DIRECT_MULTIDIM_ELEM(op.local_Fctf[img_id], i); } // scale-correction or not ( NOTE this is not were the difference metric is scale-corrected, but // rather where we apply the additional correction to make the GPU-specific arithmetic equal // to the CPU method) XFLOAT myscale = baseMLO->mymodel.scale_correction[group_id]; if (baseMLO->do_scale_correction) for(size_t i = 0; i < corr_img.getSize(); i++) corr_img[i] *= myscale * myscale; } void generateEulerMatrices( ProjectionParams &ProjectionData, XFLOAT *eulers, bool inverse, Matrix2D &L, Matrix2D &R) { RFLOAT alpha, beta, gamma; RFLOAT ca, sa, cb, sb, cg, sg; RFLOAT cc, cs, sc, ss; Matrix2D A(3,3); bool doL = (L.mdimx == 3 && L.mdimy == 3); bool doR = (R.mdimx == 3 && R.mdimy == 3); for (long int i = 0; i < ProjectionData.rots.size(); i++) { //TODO In a sense we're doing RAD2DEG just to do DEG2RAD here. //The only place the degree value is actually used is in the metadata assignment. alpha = DEG2RAD(ProjectionData.rots[i]); beta = DEG2RAD(ProjectionData.tilts[i]); gamma = DEG2RAD(ProjectionData.psis[i]); #ifdef RELION_SINGLE_PRECISION sincosf(alpha, &sa, &ca); sincosf(beta, &sb, &cb); sincosf(gamma, &sg, &cg); #else sincos(alpha, &sa, &ca); sincos(beta, &sb, &cb); sincos(gamma, &sg, &cg); #endif cc = cb * ca; cs = cb * sa; sc = sb * ca; ss = sb * sa; A(0, 0) = cg * cc - sg * sa; A(0, 1) = cg * cs + sg * ca; A(0, 2) = -cg * sb; A(1, 0) = -sg * cc - cg * sa; A(1, 1) = -sg * cs + cg * ca; A(1, 2) = sg * sb; A(2, 0) = sc; A(2, 1) = ss; A(2, 2) = cb; if (doL) A = L * A; if (doR) A = A * R; if(inverse) A = A.inv(); for (int m = 0; m < 3; m ++) for (int n = 0; n < 3; n ++) eulers[9 * i + (m*3 + n)] = A(m, n); } } long unsigned generateProjectionSetupFine( OptimisationParamters &op, SamplingParameters &sp, MlOptimiser *baseMLO, unsigned iclass, ProjectionParams &ProjectionData) // FIXME : For coarse iteration this is **SLOW** HERE ARE SOME NOTES FOR PARALLELIZING IT (GPU OFFLOAD): /* * Since it is based on push_back, parallelizing sould be fine given som atomic opreation appends, * what takes time is looping through all this. The job-splitting in collect2jobs-preproccesing and * divideOrientationsIntoBlockjobs() relies on chunks of shared orientations being adjacent in * ProjectionData.rot_id (and thus also .rot_idx), but does not care which order those chunks appear * in. So as long as a parallelilsm and "atomic push_back" is organised to use an orientation as a * minimum unit, the job-splitting should be fine with the output. */ { //Local variables std::vector< RFLOAT > oversampled_rot, oversampled_tilt, oversampled_psi; long int orientation_num = 0; for (long int idir = sp.idir_min, iorient = 0; idir <= sp.idir_max; idir++) { for (long int ipsi = sp.ipsi_min; ipsi <= sp.ipsi_max; ipsi++, iorient++) { long int iorientclass = iclass * sp.nr_dir * sp.nr_psi + iorient; if (baseMLO->isSignificantAnyImageAnyTranslation(iorientclass, sp.itrans_min, sp.itrans_max, op.Mcoarse_significant)) { // Now get the oversampled (rot, tilt, psi) triplets // This will be only the original (rot,tilt,psi) triplet in the first pass (sp.current_oversampling==0) baseMLO->sampling.getOrientations(idir, ipsi, sp.current_oversampling, oversampled_rot, oversampled_tilt, oversampled_psi, op.pointer_dir_nonzeroprior, op.directions_prior, op.pointer_psi_nonzeroprior, op.psi_prior); // Loop over all oversampled orientations (only a single one in the first pass) for (long int iover_rot = 0; iover_rot < sp.nr_oversampled_rot; iover_rot++) { ProjectionData.pushBackAll( (long unsigned)iclass, oversampled_rot[iover_rot], oversampled_tilt[iover_rot], oversampled_psi[iover_rot], iorientclass, iover_rot ); orientation_num ++; } } } } ProjectionData.orientation_num[iclass]=orientation_num; return orientation_num; } void runWavgKernel( AccProjectorKernel &projector, XFLOAT *eulers, XFLOAT *Fimg_real, XFLOAT *Fimg_imag, XFLOAT *trans_x, XFLOAT *trans_y, XFLOAT *trans_z, XFLOAT *sorted_weights, XFLOAT *ctfs, XFLOAT *wdiff2s_parts, XFLOAT *wdiff2s_AA, XFLOAT *wdiff2s_XA, OptimisationParamters &op, long unsigned orientation_num, long unsigned translation_num, unsigned long image_size, int img_id, int group_id, int exp_iclass, XFLOAT part_scale, bool refs_are_ctf_corrected, bool ctf_premultiplied, bool data_is_3D, cudaStream_t stream) { //cudaFuncSetCacheConfig(cuda_kernel_wavg_fast, cudaFuncCachePreferShared); if (ctf_premultiplied) { if (refs_are_ctf_corrected) { if(data_is_3D) AccUtilities::kernel_wavg( eulers, projector, image_size, orientation_num, Fimg_real, Fimg_imag, trans_x, trans_y, trans_z, sorted_weights, ctfs, wdiff2s_parts, wdiff2s_AA, wdiff2s_XA, translation_num, (XFLOAT) op.sum_weight[img_id], (XFLOAT) op.significant_weight[img_id], part_scale, stream ); else if (projector.mdlZ!=0) AccUtilities::kernel_wavg( eulers, projector, image_size, orientation_num, Fimg_real, Fimg_imag, trans_x, trans_y, trans_z, sorted_weights, ctfs, wdiff2s_parts, wdiff2s_AA, wdiff2s_XA, translation_num, (XFLOAT) op.sum_weight[img_id], (XFLOAT) op.significant_weight[img_id], part_scale, stream ); else AccUtilities::kernel_wavg( eulers, projector, image_size, orientation_num, Fimg_real, Fimg_imag, trans_x, trans_y, trans_z, sorted_weights, ctfs, wdiff2s_parts, wdiff2s_AA, wdiff2s_XA, translation_num, (XFLOAT) op.sum_weight[img_id], (XFLOAT) op.significant_weight[img_id], part_scale, stream ); } else { if(data_is_3D) AccUtilities::kernel_wavg( eulers, projector, image_size, orientation_num, Fimg_real, Fimg_imag, trans_x, trans_y, trans_z, sorted_weights, ctfs, wdiff2s_parts, wdiff2s_AA, wdiff2s_XA, translation_num, (XFLOAT) op.sum_weight[img_id], (XFLOAT) op.significant_weight[img_id], part_scale, stream ); else if (projector.mdlZ!=0) AccUtilities::kernel_wavg( eulers, projector, image_size, orientation_num, Fimg_real, Fimg_imag, trans_x, trans_y, trans_z, sorted_weights, ctfs, wdiff2s_parts, wdiff2s_AA, wdiff2s_XA, translation_num, (XFLOAT) op.sum_weight[img_id], (XFLOAT) op.significant_weight[img_id], part_scale, stream ); else AccUtilities::kernel_wavg( eulers, projector, image_size, orientation_num, Fimg_real, Fimg_imag, trans_x, trans_y, trans_z, sorted_weights, ctfs, wdiff2s_parts, wdiff2s_AA, wdiff2s_XA, translation_num, (XFLOAT) op.sum_weight[img_id], (XFLOAT) op.significant_weight[img_id], part_scale, stream ); } } else // not ctf_premultiplied { if (refs_are_ctf_corrected) { if(data_is_3D) AccUtilities::kernel_wavg( eulers, projector, image_size, orientation_num, Fimg_real, Fimg_imag, trans_x, trans_y, trans_z, sorted_weights, ctfs, wdiff2s_parts, wdiff2s_AA, wdiff2s_XA, translation_num, (XFLOAT) op.sum_weight[img_id], (XFLOAT) op.significant_weight[img_id], part_scale, stream ); else if (projector.mdlZ!=0) AccUtilities::kernel_wavg( eulers, projector, image_size, orientation_num, Fimg_real, Fimg_imag, trans_x, trans_y, trans_z, sorted_weights, ctfs, wdiff2s_parts, wdiff2s_AA, wdiff2s_XA, translation_num, (XFLOAT) op.sum_weight[img_id], (XFLOAT) op.significant_weight[img_id], part_scale, stream ); else AccUtilities::kernel_wavg( eulers, projector, image_size, orientation_num, Fimg_real, Fimg_imag, trans_x, trans_y, trans_z, sorted_weights, ctfs, wdiff2s_parts, wdiff2s_AA, wdiff2s_XA, translation_num, (XFLOAT) op.sum_weight[img_id], (XFLOAT) op.significant_weight[img_id], part_scale, stream ); } else { if(data_is_3D) AccUtilities::kernel_wavg( eulers, projector, image_size, orientation_num, Fimg_real, Fimg_imag, trans_x, trans_y, trans_z, sorted_weights, ctfs, wdiff2s_parts, wdiff2s_AA, wdiff2s_XA, translation_num, (XFLOAT) op.sum_weight[img_id], (XFLOAT) op.significant_weight[img_id], part_scale, stream ); else if (projector.mdlZ!=0) AccUtilities::kernel_wavg( eulers, projector, image_size, orientation_num, Fimg_real, Fimg_imag, trans_x, trans_y, trans_z, sorted_weights, ctfs, wdiff2s_parts, wdiff2s_AA, wdiff2s_XA, translation_num, (XFLOAT) op.sum_weight[img_id], (XFLOAT) op.significant_weight[img_id], part_scale, stream ); else AccUtilities::kernel_wavg( eulers, projector, image_size, orientation_num, Fimg_real, Fimg_imag, trans_x, trans_y, trans_z, sorted_weights, ctfs, wdiff2s_parts, wdiff2s_AA, wdiff2s_XA, translation_num, (XFLOAT) op.sum_weight[img_id], (XFLOAT) op.significant_weight[img_id], part_scale, stream ); } } LAUNCH_HANDLE_ERROR(cudaGetLastError()); } void runBackProjectKernel( AccBackprojector &BP, AccProjectorKernel &projector, XFLOAT *d_img_real, XFLOAT *d_img_imag, XFLOAT *trans_x, XFLOAT *trans_y, XFLOAT *trans_z, XFLOAT* d_weights, XFLOAT* d_Minvsigma2s, XFLOAT* d_ctfs, unsigned long translation_num, XFLOAT significant_weight, XFLOAT weight_norm, XFLOAT *d_eulers, int imgX, int imgY, int imgZ, unsigned long imageCount, bool data_is_3D, bool do_sgd, bool ctf_premultiplied, cudaStream_t optStream) { if(BP.mdlZ==1) { #ifdef CUDA if(ctf_premultiplied) cuda_kernel_backproject2D<<>>( d_img_real, d_img_imag, trans_x, trans_y, d_weights, d_Minvsigma2s, d_ctfs, translation_num, significant_weight, weight_norm, d_eulers, BP.d_mdlReal, BP.d_mdlImag, BP.d_mdlWeight, BP.maxR, BP.maxR2, BP.padding_factor, imgX, imgY, imgX*imgY, BP.mdlX, BP.mdlInitY); else cuda_kernel_backproject2D<<>>( d_img_real, d_img_imag, trans_x, trans_y, d_weights, d_Minvsigma2s, d_ctfs, translation_num, significant_weight, weight_norm, d_eulers, BP.d_mdlReal, BP.d_mdlImag, BP.d_mdlWeight, BP.maxR, BP.maxR2, BP.padding_factor, imgX, imgY, imgX*imgY, BP.mdlX, BP.mdlInitY); LAUNCH_HANDLE_ERROR(cudaGetLastError()); #else if(ctf_premultiplied) CpuKernels::backproject2D(imageCount, BP_2D_BLOCK_SIZE, d_img_real, d_img_imag, trans_x, trans_y, d_weights, d_Minvsigma2s, d_ctfs, translation_num, significant_weight, weight_norm, d_eulers, BP.d_mdlReal, BP.d_mdlImag, BP.d_mdlWeight, BP.maxR, BP.maxR2, (XFLOAT)BP.padding_factor, (unsigned)imgX, (unsigned)imgY, (unsigned)imgX*imgY, (unsigned)BP.mdlX, BP.mdlInitY, BP.mutexes); else CpuKernels::backproject2D(imageCount, BP_2D_BLOCK_SIZE, d_img_real, d_img_imag, trans_x, trans_y, d_weights, d_Minvsigma2s, d_ctfs, translation_num, significant_weight, weight_norm, d_eulers, BP.d_mdlReal, BP.d_mdlImag, BP.d_mdlWeight, BP.maxR, BP.maxR2, (XFLOAT)BP.padding_factor, (unsigned)imgX, (unsigned)imgY, (unsigned)imgX*imgY, (unsigned)BP.mdlX, BP.mdlInitY, BP.mutexes); #endif } else { if(do_sgd) { if(data_is_3D) #ifdef CUDA if(ctf_premultiplied) cuda_kernel_backprojectSGD<<>>( projector, d_img_real, d_img_imag, trans_x, trans_y, trans_z, d_weights, d_Minvsigma2s, d_ctfs, translation_num, significant_weight, weight_norm, d_eulers, BP.d_mdlReal, BP.d_mdlImag, BP.d_mdlWeight, BP.maxR, BP.maxR2, BP.padding_factor, imgX, imgY, imgZ, imgX*imgY*imgZ, BP.mdlX, BP.mdlY, BP.mdlInitY, BP.mdlInitZ); else cuda_kernel_backprojectSGD<<>>( projector, d_img_real, d_img_imag, trans_x, trans_y, trans_z, d_weights, d_Minvsigma2s, d_ctfs, translation_num, significant_weight, weight_norm, d_eulers, BP.d_mdlReal, BP.d_mdlImag, BP.d_mdlWeight, BP.maxR, BP.maxR2, BP.padding_factor, imgX, imgY, imgZ, imgX*imgY*imgZ, BP.mdlX, BP.mdlY, BP.mdlInitY, BP.mdlInitZ); #else if(ctf_premultiplied) CpuKernels::backprojectSGD(imageCount, BP_DATA3D_BLOCK_SIZE, projector, d_img_real, d_img_imag, trans_x, trans_y, trans_z, d_weights, d_Minvsigma2s, d_ctfs, translation_num, significant_weight, weight_norm, d_eulers, BP.d_mdlReal, BP.d_mdlImag, BP.d_mdlWeight, BP.maxR, BP.maxR2, BP.padding_factor, imgX, imgY, imgZ, (size_t)imgX*(size_t)imgY*(size_t)imgZ, BP.mdlX, BP.mdlY, BP.mdlInitY, BP.mdlInitZ, BP.mutexes); else CpuKernels::backprojectSGD(imageCount, BP_DATA3D_BLOCK_SIZE, projector, d_img_real, d_img_imag, trans_x, trans_y, trans_z, d_weights, d_Minvsigma2s, d_ctfs, translation_num, significant_weight, weight_norm, d_eulers, BP.d_mdlReal, BP.d_mdlImag, BP.d_mdlWeight, BP.maxR, BP.maxR2, BP.padding_factor, imgX, imgY, imgZ, (size_t)imgX*(size_t)imgY*(size_t)imgZ, BP.mdlX, BP.mdlY, BP.mdlInitY, BP.mdlInitZ, BP.mutexes); #endif else #ifdef CUDA if(ctf_premultiplied) cuda_kernel_backprojectSGD<<>>( projector, d_img_real, d_img_imag, trans_x, trans_y, trans_z, d_weights, d_Minvsigma2s, d_ctfs, translation_num, significant_weight, weight_norm, d_eulers, BP.d_mdlReal, BP.d_mdlImag, BP.d_mdlWeight, BP.maxR, BP.maxR2, BP.padding_factor, imgX, imgY, imgZ, imgX*imgY*imgZ, BP.mdlX, BP.mdlY, BP.mdlInitY, BP.mdlInitZ); else cuda_kernel_backprojectSGD<<>>( projector, d_img_real, d_img_imag, trans_x, trans_y, trans_z, d_weights, d_Minvsigma2s, d_ctfs, translation_num, significant_weight, weight_norm, d_eulers, BP.d_mdlReal, BP.d_mdlImag, BP.d_mdlWeight, BP.maxR, BP.maxR2, BP.padding_factor, imgX, imgY, imgZ, imgX*imgY*imgZ, BP.mdlX, BP.mdlY, BP.mdlInitY, BP.mdlInitZ); #else if(ctf_premultiplied) CpuKernels::backprojectSGD(imageCount, BP_REF3D_BLOCK_SIZE, projector, d_img_real, d_img_imag, trans_x, trans_y, trans_z, d_weights, d_Minvsigma2s, d_ctfs, translation_num, significant_weight, weight_norm, d_eulers, BP.d_mdlReal, BP.d_mdlImag, BP.d_mdlWeight, BP.maxR, BP.maxR2, (XFLOAT)BP.padding_factor, (unsigned)imgX, (unsigned)imgY, (unsigned)imgZ, (size_t)imgX*(size_t)imgY*(size_t)imgZ, (unsigned)BP.mdlX, (unsigned)BP.mdlY, BP.mdlInitY, BP.mdlInitZ, BP.mutexes); else CpuKernels::backprojectSGD(imageCount, BP_REF3D_BLOCK_SIZE, projector, d_img_real, d_img_imag, trans_x, trans_y, trans_z, d_weights, d_Minvsigma2s, d_ctfs, translation_num, significant_weight, weight_norm, d_eulers, BP.d_mdlReal, BP.d_mdlImag, BP.d_mdlWeight, BP.maxR, BP.maxR2, (XFLOAT)BP.padding_factor, (unsigned)imgX, (unsigned)imgY, (unsigned)imgZ, (size_t)imgX*(size_t)imgY*(size_t)imgZ, (unsigned)BP.mdlX, (unsigned)BP.mdlY, BP.mdlInitY, BP.mdlInitZ, BP.mutexes); #endif } else { if(data_is_3D) #ifdef CUDA if(ctf_premultiplied) cuda_kernel_backproject3D<<>>( d_img_real, d_img_imag, trans_x, trans_y, trans_z, d_weights, d_Minvsigma2s, d_ctfs, translation_num, significant_weight, weight_norm, d_eulers, BP.d_mdlReal, BP.d_mdlImag, BP.d_mdlWeight, BP.maxR, BP.maxR2, BP.padding_factor, imgX, imgY, imgZ, imgX*imgY*imgZ, BP.mdlX, BP.mdlY, BP.mdlInitY, BP.mdlInitZ); else cuda_kernel_backproject3D<<>>( d_img_real, d_img_imag, trans_x, trans_y, trans_z, d_weights, d_Minvsigma2s, d_ctfs, translation_num, significant_weight, weight_norm, d_eulers, BP.d_mdlReal, BP.d_mdlImag, BP.d_mdlWeight, BP.maxR, BP.maxR2, BP.padding_factor, imgX, imgY, imgZ, imgX*imgY*imgZ, BP.mdlX, BP.mdlY, BP.mdlInitY, BP.mdlInitZ); #else if(ctf_premultiplied) CpuKernels::backproject3D(imageCount,BP_DATA3D_BLOCK_SIZE, d_img_real, d_img_imag, trans_x, trans_y, trans_z, d_weights, d_Minvsigma2s, d_ctfs, translation_num, significant_weight, weight_norm, d_eulers, BP.d_mdlReal, BP.d_mdlImag, BP.d_mdlWeight, BP.maxR, BP.maxR2, (XFLOAT)BP.padding_factor, (unsigned)imgX, (unsigned)imgY, (unsigned)imgZ, (size_t)imgX*(size_t)imgY*(size_t)imgZ, (unsigned)BP.mdlX, (unsigned)BP.mdlY, BP.mdlInitY, BP.mdlInitZ, BP.mutexes); else CpuKernels::backproject3D(imageCount,BP_DATA3D_BLOCK_SIZE, d_img_real, d_img_imag, trans_x, trans_y, trans_z, d_weights, d_Minvsigma2s, d_ctfs, translation_num, significant_weight, weight_norm, d_eulers, BP.d_mdlReal, BP.d_mdlImag, BP.d_mdlWeight, BP.maxR, BP.maxR2, (XFLOAT)BP.padding_factor, (unsigned)imgX, (unsigned)imgY, (unsigned)imgZ, (size_t)imgX*(size_t)imgY*(size_t)imgZ, (unsigned)BP.mdlX, (unsigned)BP.mdlY, BP.mdlInitY, BP.mdlInitZ, BP.mutexes); #endif else #ifdef CUDA if(ctf_premultiplied) cuda_kernel_backproject3D<<>>( d_img_real, d_img_imag, trans_x, trans_y, trans_z, d_weights, d_Minvsigma2s, d_ctfs, translation_num, significant_weight, weight_norm, d_eulers, BP.d_mdlReal, BP.d_mdlImag, BP.d_mdlWeight, BP.maxR, BP.maxR2, BP.padding_factor, imgX, imgY, imgZ, imgX*imgY*imgZ, BP.mdlX, BP.mdlY, BP.mdlInitY, BP.mdlInitZ); else cuda_kernel_backproject3D<<>>( d_img_real, d_img_imag, trans_x, trans_y, trans_z, d_weights, d_Minvsigma2s, d_ctfs, translation_num, significant_weight, weight_norm, d_eulers, BP.d_mdlReal, BP.d_mdlImag, BP.d_mdlWeight, BP.maxR, BP.maxR2, BP.padding_factor, imgX, imgY, imgZ, imgX*imgY*imgZ, BP.mdlX, BP.mdlY, BP.mdlInitY, BP.mdlInitZ); #else #if 1 //TODO Clean this up if(ctf_premultiplied) CpuKernels::backprojectRef3D(imageCount, d_img_real, d_img_imag, trans_x, trans_y, d_weights, d_Minvsigma2s, d_ctfs, translation_num, significant_weight, weight_norm, d_eulers, BP.d_mdlReal, BP.d_mdlImag, BP.d_mdlWeight, BP.maxR, BP.maxR2, (XFLOAT)BP.padding_factor, (unsigned)imgX, (unsigned)imgY, (unsigned)imgZ, (size_t)imgX*(size_t)imgY*(size_t)imgZ, (unsigned)BP.mdlX, (unsigned)BP.mdlY, BP.mdlInitY, BP.mdlInitZ, BP.mutexes); else CpuKernels::backprojectRef3D(imageCount, d_img_real, d_img_imag, trans_x, trans_y, d_weights, d_Minvsigma2s, d_ctfs, translation_num, significant_weight, weight_norm, d_eulers, BP.d_mdlReal, BP.d_mdlImag, BP.d_mdlWeight, BP.maxR, BP.maxR2, (XFLOAT)BP.padding_factor, (unsigned)imgX, (unsigned)imgY, (unsigned)imgZ, (size_t)imgX*(size_t)imgY*(size_t)imgZ, (unsigned)BP.mdlX, (unsigned)BP.mdlY, BP.mdlInitY, BP.mdlInitZ, BP.mutexes); #else if(ctf_premultiplied) CpuKernels::backproject3D(imageCount,BP_REF3D_BLOCK_SIZE, d_img_real, d_img_imag, trans_x, trans_y, trans_z, d_weights, d_Minvsigma2s, d_ctfs, translation_num, significant_weight, weight_norm, d_eulers, BP.d_mdlReal, BP.d_mdlImag, BP.d_mdlWeight, BP.maxR, BP.maxR2, (XFLOAT)BP.padding_factor, (unsigned)imgX, (unsigned)imgY, (unsigned)imgZ, (size_t)imgX*(size_t)imgY*(size_t)imgZ, (unsigned)BP.mdlX, (unsigned)BP.mdlY, BP.mdlInitY, BP.mdlInitZ, BP.mutexes); else CpuKernels::backproject3D(imageCount,BP_REF3D_BLOCK_SIZE, d_img_real, d_img_imag, trans_x, trans_y, trans_z, d_weights, d_Minvsigma2s, d_ctfs, translation_num, significant_weight, weight_norm, d_eulers, BP.d_mdlReal, BP.d_mdlImag, BP.d_mdlWeight, BP.maxR, BP.maxR2, (XFLOAT)BP.padding_factor, (unsigned)imgX, (unsigned)imgY, (unsigned)imgZ, (size_t)imgX*(size_t)imgY*(size_t)imgZ, (unsigned)BP.mdlX, (unsigned)BP.mdlY, BP.mdlInitY, BP.mdlInitZ, BP.mutexes); #endif #endif } // do_sgd is false LAUNCH_HANDLE_ERROR(cudaGetLastError()); } } #define WEIGHT_MAP_BLOCK_SIZE 512 void mapAllWeightsToMweights( unsigned long * d_iorient, //projectorPlan.iorientclasses XFLOAT * d_allweights, //allWeights XFLOAT * d_mweights, //Mweight unsigned long orientation_num, //projectorPlan.orientation_num unsigned long translation_num, //translation_num cudaStream_t stream ) { size_t combinations = orientation_num*translation_num; int grid_size = ceil((float)(combinations)/(float)WEIGHT_MAP_BLOCK_SIZE); #ifdef CUDA cuda_kernel_allweights_to_mweights<<< grid_size, WEIGHT_MAP_BLOCK_SIZE, 0, stream >>>( d_iorient, d_allweights, d_mweights, orientation_num, translation_num, WEIGHT_MAP_BLOCK_SIZE); LAUNCH_HANDLE_ERROR(cudaGetLastError()); #else for (size_t i=0; i < combinations; i++) d_mweights[d_iorient[i/translation_num] * translation_num + i%translation_num] = d_allweights[i/translation_num * translation_num + i%translation_num]; // TODO - isn't this just d_allweights[idx + idx%translation_num]? Really? #endif } void runDiff2KernelCoarse( AccProjectorKernel &projector, XFLOAT *trans_x, XFLOAT *trans_y, XFLOAT *trans_z, XFLOAT *corr_img, XFLOAT *Fimg_real, XFLOAT *Fimg_imag, XFLOAT *d_eulers, XFLOAT *diff2s, XFLOAT local_sqrtXi2, long unsigned orientation_num, long unsigned translation_num, long unsigned image_size, cudaStream_t stream, bool do_CC, bool data_is_3D) { const long unsigned blocks3D = (data_is_3D? D2C_BLOCK_SIZE_DATA3D : D2C_BLOCK_SIZE_REF3D); if(!do_CC) { if(projector.mdlZ!=0) { #ifdef ACC_DOUBLE_PRECISION if (translation_num > blocks3D*4) CRITICAL(ERR_TRANSLIM); #else if (translation_num > blocks3D*8) CRITICAL(ERR_TRANSLIM); #endif long unsigned rest = orientation_num % blocks3D; long unsigned even_orientation_num = orientation_num - rest; // TODO - find a more compact way to represent these combinations resulting in // a single call to diff2_course? if (translation_num <= blocks3D) { if (even_orientation_num != 0) { if(data_is_3D) AccUtilities::diff2_coarse( even_orientation_num/(unsigned long)D2C_EULERS_PER_BLOCK_DATA3D, D2C_BLOCK_SIZE_DATA3D, d_eulers, trans_x, trans_y, trans_z, Fimg_real, Fimg_imag, projector, corr_img, diff2s, translation_num, image_size, stream); else AccUtilities::diff2_coarse( even_orientation_num/(unsigned long)D2C_EULERS_PER_BLOCK_REF3D, D2C_BLOCK_SIZE_REF3D, d_eulers, trans_x, trans_y, trans_z, Fimg_real, Fimg_imag, projector, corr_img, diff2s, translation_num, image_size, stream); } if (rest != 0) { if(data_is_3D) AccUtilities::diff2_coarse( rest, D2C_BLOCK_SIZE_DATA3D, &d_eulers[9*even_orientation_num], trans_x, trans_y, trans_z, Fimg_real, Fimg_imag, projector, corr_img, &diff2s[translation_num*even_orientation_num], translation_num, image_size, stream); else AccUtilities::diff2_coarse( rest, D2C_BLOCK_SIZE_REF3D, &d_eulers[9*even_orientation_num], trans_x, trans_y, trans_z, Fimg_real, Fimg_imag, projector, corr_img, &diff2s[translation_num*even_orientation_num], translation_num, image_size, stream); } } else if (translation_num <= blocks3D*2) { if (even_orientation_num != 0) { if(data_is_3D) AccUtilities::diff2_coarse( even_orientation_num/(unsigned long)D2C_EULERS_PER_BLOCK_DATA3D, D2C_BLOCK_SIZE_DATA3D*2, d_eulers, trans_x, trans_y, trans_z, Fimg_real, Fimg_imag, projector, corr_img, diff2s, translation_num, image_size, stream); else AccUtilities::diff2_coarse( even_orientation_num/(unsigned long)D2C_EULERS_PER_BLOCK_REF3D, D2C_BLOCK_SIZE_REF3D*2, d_eulers, trans_x, trans_y, trans_z, Fimg_real, Fimg_imag, projector, corr_img, diff2s, translation_num, image_size, stream); } if (rest != 0) { if(data_is_3D) AccUtilities::diff2_coarse( rest, D2C_BLOCK_SIZE_DATA3D*2, &d_eulers[9*even_orientation_num], trans_x, trans_y, trans_z, Fimg_real, Fimg_imag, projector, corr_img, &diff2s[translation_num*even_orientation_num], translation_num, image_size, stream); else AccUtilities::diff2_coarse( rest, D2C_BLOCK_SIZE_REF3D*2, &d_eulers[9*even_orientation_num], trans_x, trans_y, trans_z, Fimg_real, Fimg_imag, projector, corr_img, &diff2s[translation_num*even_orientation_num], translation_num, image_size, stream); } } else if (translation_num <= blocks3D*4) { if (even_orientation_num != 0) { if(data_is_3D) AccUtilities::diff2_coarse( even_orientation_num/(unsigned long)D2C_EULERS_PER_BLOCK_DATA3D, D2C_BLOCK_SIZE_DATA3D*4, d_eulers, trans_x, trans_y, trans_z, Fimg_real, Fimg_imag, projector, corr_img, diff2s, translation_num, image_size, stream); else AccUtilities::diff2_coarse( even_orientation_num/(unsigned long)D2C_EULERS_PER_BLOCK_REF3D, D2C_BLOCK_SIZE_REF3D*4, d_eulers, trans_x, trans_y, trans_z, Fimg_real, Fimg_imag, projector, corr_img, diff2s, translation_num, image_size, stream); } if (rest != 0) { if(data_is_3D) AccUtilities::diff2_coarse( rest, D2C_BLOCK_SIZE_DATA3D*4, &d_eulers[9*even_orientation_num], trans_x, trans_y, trans_z, Fimg_real, Fimg_imag, projector, corr_img, &diff2s[translation_num*even_orientation_num], translation_num, image_size, stream); else AccUtilities::diff2_coarse( rest, D2C_BLOCK_SIZE_REF3D*4, &d_eulers[9*even_orientation_num], trans_x, trans_y, trans_z, Fimg_real, Fimg_imag, projector, corr_img, &diff2s[translation_num*even_orientation_num], translation_num, image_size, stream); } } #ifndef ACC_DOUBLE_PRECISION else { if (even_orientation_num != 0) { if(data_is_3D) AccUtilities::diff2_coarse( even_orientation_num/(unsigned long)D2C_EULERS_PER_BLOCK_DATA3D, D2C_BLOCK_SIZE_DATA3D*8, d_eulers, trans_x, trans_y, trans_z, Fimg_real, Fimg_imag, projector, corr_img, diff2s, translation_num, image_size, stream); else AccUtilities::diff2_coarse( even_orientation_num/(unsigned long)D2C_EULERS_PER_BLOCK_REF3D, D2C_BLOCK_SIZE_REF3D*8, d_eulers, trans_x, trans_y, trans_z, Fimg_real, Fimg_imag, projector, corr_img, diff2s, translation_num, image_size, stream); } if (rest != 0) { if(data_is_3D) AccUtilities::diff2_coarse( rest, D2C_BLOCK_SIZE_DATA3D*8, &d_eulers[9*even_orientation_num], trans_x, trans_y, trans_z, Fimg_real, Fimg_imag, projector, corr_img, &diff2s[translation_num*even_orientation_num], translation_num, image_size, stream); else AccUtilities::diff2_coarse( rest, D2C_BLOCK_SIZE_REF3D*8, &d_eulers[9*even_orientation_num], trans_x, trans_y, trans_z, Fimg_real, Fimg_imag, projector, corr_img, &diff2s[translation_num*even_orientation_num], translation_num, image_size, stream); } } #endif } // projector.mdlZ!=0 else { if (translation_num > D2C_BLOCK_SIZE_2D) { printf("Number of coarse translations larger than %d on the GPU not supported.\n", D2C_BLOCK_SIZE_2D); fflush(stdout); exit(1); } long unsigned rest = orientation_num % (unsigned long)D2C_EULERS_PER_BLOCK_2D; long unsigned even_orientation_num = orientation_num - rest; if (even_orientation_num != 0) { if(data_is_3D) AccUtilities::diff2_coarse( even_orientation_num/(unsigned long)D2C_EULERS_PER_BLOCK_2D, D2C_BLOCK_SIZE_2D, d_eulers, trans_x, trans_y, trans_z, Fimg_real, Fimg_imag, projector, corr_img, diff2s, translation_num, image_size, stream); else AccUtilities::diff2_coarse( even_orientation_num/(unsigned long)D2C_EULERS_PER_BLOCK_2D, D2C_BLOCK_SIZE_2D, d_eulers, trans_x, trans_y, trans_z, Fimg_real, Fimg_imag, projector, corr_img, diff2s, translation_num, image_size, stream); LAUNCH_HANDLE_ERROR(cudaGetLastError()); } if (rest != 0) { if(data_is_3D) AccUtilities::diff2_coarse( rest, D2C_BLOCK_SIZE_2D, &d_eulers[9*even_orientation_num], trans_x, trans_y, trans_z, Fimg_real, Fimg_imag, projector, corr_img, &diff2s[translation_num*even_orientation_num], translation_num, image_size, stream); else AccUtilities::diff2_coarse( rest, D2C_BLOCK_SIZE_2D, &d_eulers[9*even_orientation_num], trans_x, trans_y, trans_z, Fimg_real, Fimg_imag, projector, corr_img, &diff2s[translation_num*even_orientation_num], translation_num, image_size, stream); LAUNCH_HANDLE_ERROR(cudaGetLastError()); } } // projector.mdlZ==0 } // !do_CC else { // do_CC // TODO - find a more compact way to represent these combinations resulting in // a single call to diff2_CC_course? // dim3 CCblocks(orientation_num,translation_num); if(data_is_3D) AccUtilities::diff2_CC_coarse( orientation_num, D2C_BLOCK_SIZE_DATA3D, d_eulers, Fimg_real, Fimg_imag, trans_x, trans_y, trans_z, projector, corr_img, diff2s, translation_num, image_size, local_sqrtXi2, stream); else if(projector.mdlZ!=0) AccUtilities::diff2_CC_coarse( orientation_num, D2C_BLOCK_SIZE_REF3D, d_eulers, Fimg_real, Fimg_imag, trans_x, trans_y, trans_z, projector, corr_img, diff2s, translation_num, image_size, local_sqrtXi2, stream); else AccUtilities::diff2_CC_coarse( orientation_num, D2C_BLOCK_SIZE_2D, d_eulers, Fimg_real, Fimg_imag, trans_x, trans_y, trans_z, projector, corr_img, diff2s, translation_num, image_size, local_sqrtXi2, stream); LAUNCH_HANDLE_ERROR(cudaGetLastError()); } // do_CC } void runDiff2KernelFine( AccProjectorKernel &projector, XFLOAT *corr_img, XFLOAT *Fimgs_real, XFLOAT *Fimgs_imag, XFLOAT *trans_x, XFLOAT *trans_y, XFLOAT *trans_z, XFLOAT *eulers, long unsigned *rot_id, long unsigned *rot_idx, long unsigned *trans_idx, long unsigned *job_idx, long unsigned *job_num, XFLOAT *diff2s, OptimisationParamters &op, MlOptimiser *baseMLO, long unsigned orientation_num, long unsigned translation_num, long unsigned significant_num, unsigned long image_size, int img_id, int exp_iclass, cudaStream_t stream, long unsigned job_num_count, bool do_CC, bool data_is_3D) { long unsigned block_dim = job_num_count; if(!do_CC) { // TODO - find a more compact way to represent these combinations resulting in // a single call to diff2_fine? if(data_is_3D) AccUtilities::diff2_fine( block_dim, D2F_BLOCK_SIZE_DATA3D, eulers, Fimgs_real, Fimgs_imag, trans_x, trans_y, trans_z, projector, corr_img, // in these non-CC kernels this is effectively an adjusted MinvSigma2 diff2s, image_size, op.highres_Xi2_img[img_id] / 2., orientation_num, translation_num, job_num_count, //significant_num, rot_idx, trans_idx, job_idx, job_num, stream); else if(projector.mdlZ!=0) AccUtilities::diff2_fine( block_dim, D2F_BLOCK_SIZE_REF3D, eulers, Fimgs_real, Fimgs_imag, trans_x, trans_y, trans_z, projector, corr_img, // in these non-CC kernels this is effectively an adjusted MinvSigma2 diff2s, image_size, op.highres_Xi2_img[img_id] / 2., orientation_num, translation_num, job_num_count, //significant_num, rot_idx, trans_idx, job_idx, job_num, stream); else AccUtilities::diff2_fine( block_dim, D2F_BLOCK_SIZE_2D, eulers, Fimgs_real, Fimgs_imag, trans_x, trans_y, trans_z, projector, corr_img, // in these non-CC kernels this is effectively an adjusted MinvSigma2 diff2s, image_size, op.highres_Xi2_img[img_id] / 2., orientation_num, translation_num, job_num_count, //significant_num, rot_idx, trans_idx, job_idx, job_num, stream); LAUNCH_HANDLE_ERROR(cudaGetLastError()); } else { // TODO - find a more compact way to represent these combinations resulting in // a single call to diff2_CC_fine? if(data_is_3D) AccUtilities::diff2_CC_fine( block_dim, D2F_BLOCK_SIZE_DATA3D, eulers, Fimgs_real, Fimgs_imag, trans_x, trans_y, trans_z, projector, corr_img, diff2s, image_size, op.highres_Xi2_img[img_id] / 2., (XFLOAT) op.local_sqrtXi2[img_id], orientation_num, translation_num, job_num_count, //significant_num, rot_idx, trans_idx, job_idx, job_num, stream); else if(projector.mdlZ!=0) AccUtilities::diff2_CC_fine( block_dim, D2F_BLOCK_SIZE_REF3D, eulers, Fimgs_real, Fimgs_imag, trans_x, trans_y, trans_z, projector, corr_img, diff2s, image_size, op.highres_Xi2_img[img_id] / 2., (XFLOAT) op.local_sqrtXi2[img_id], orientation_num, translation_num, job_num_count, //significant_num, rot_idx, trans_idx, job_idx, job_num, stream); else AccUtilities::diff2_CC_fine( block_dim, D2F_BLOCK_SIZE_2D, eulers, Fimgs_real, Fimgs_imag, trans_x, trans_y, trans_z, projector, corr_img, diff2s, image_size, op.highres_Xi2_img[img_id] / 2., (XFLOAT) op.local_sqrtXi2[img_id], orientation_num, translation_num, job_num_count, //significant_num, rot_idx, trans_idx, job_idx, job_num, stream); LAUNCH_HANDLE_ERROR(cudaGetLastError()); } } void runCollect2jobs( int grid_dim, XFLOAT * oo_otrans_x, // otrans-size -> make const XFLOAT * oo_otrans_y, // otrans-size -> make const XFLOAT * oo_otrans_z, // otrans-size -> make const XFLOAT * myp_oo_otrans_x2y2z2, // otrans-size -> make const XFLOAT * weights, XFLOAT significant_weight, XFLOAT sum_weight, unsigned long nr_trans, unsigned long nr_oversampled_trans, unsigned long nr_oversampled_rot, unsigned long oversamples, bool skip_rots, XFLOAT * p_weights, XFLOAT * p_thr_wsum_prior_offsetx_class, XFLOAT * p_thr_wsum_prior_offsety_class, XFLOAT * p_thr_wsum_prior_offsetz_class, XFLOAT * p_thr_wsum_sigma2_offset, size_t * rot_idx, size_t * trans_idx, size_t * jobOrigin, size_t * jobExtent, bool data_is_3D ) { if (data_is_3D) { #ifdef CUDA dim3 numblocks(grid_dim); size_t shared_buffer = sizeof(XFLOAT)*SUMW_BLOCK_SIZE*5; // x+y+z+myp+weights cuda_kernel_collect2jobs<<>>( oo_otrans_x, // otrans-size -> make const oo_otrans_y, // otrans-size -> make const oo_otrans_z, // otrans-size -> make const myp_oo_otrans_x2y2z2, // otrans-size -> make const weights, significant_weight, sum_weight, nr_trans, nr_oversampled_trans, nr_oversampled_rot, oversamples, skip_rots, p_weights, p_thr_wsum_prior_offsetx_class, p_thr_wsum_prior_offsety_class, p_thr_wsum_prior_offsetz_class, p_thr_wsum_sigma2_offset, rot_idx, trans_idx, jobOrigin, jobExtent); #else CpuKernels::collect2jobs(grid_dim, SUMW_BLOCK_SIZE, oo_otrans_x, // otrans-size -> make const oo_otrans_y, // otrans-size -> make const oo_otrans_z, // otrans-size -> make const myp_oo_otrans_x2y2z2, // otrans-size -> make const weights, significant_weight, sum_weight, nr_trans, nr_oversampled_trans, nr_oversampled_rot, oversamples, skip_rots, p_weights, p_thr_wsum_prior_offsetx_class, p_thr_wsum_prior_offsety_class, p_thr_wsum_prior_offsetz_class, p_thr_wsum_sigma2_offset, rot_idx, trans_idx, jobOrigin, jobExtent); #endif } else { #ifdef CUDA dim3 numblocks(grid_dim); size_t shared_buffer = sizeof(XFLOAT)*SUMW_BLOCK_SIZE*4; // x+y+myp+weights cuda_kernel_collect2jobs<<>>( oo_otrans_x, // otrans-size -> make const oo_otrans_y, // otrans-size -> make const oo_otrans_z, // otrans-size -> make const myp_oo_otrans_x2y2z2, // otrans-size -> make const weights, significant_weight, sum_weight, nr_trans, nr_oversampled_trans, nr_oversampled_rot, oversamples, skip_rots, p_weights, p_thr_wsum_prior_offsetx_class, p_thr_wsum_prior_offsety_class, p_thr_wsum_prior_offsetz_class, p_thr_wsum_sigma2_offset, rot_idx, trans_idx, jobOrigin, jobExtent); #else CpuKernels::collect2jobs(grid_dim, SUMW_BLOCK_SIZE, oo_otrans_x, // otrans-size -> make const oo_otrans_y, // otrans-size -> make const oo_otrans_z, // otrans-size -> make const myp_oo_otrans_x2y2z2, // otrans-size -> make const weights, significant_weight, sum_weight, nr_trans, nr_oversampled_trans, nr_oversampled_rot, oversamples, skip_rots, p_weights, p_thr_wsum_prior_offsetx_class, p_thr_wsum_prior_offsety_class, p_thr_wsum_prior_offsetz_class, p_thr_wsum_sigma2_offset, rot_idx, trans_idx, jobOrigin, jobExtent); #endif } } //void windowFourierTransform2( // XFLOAT *d_in_real, // XFLOAT *d_in_imag, // XFLOAT *d_out_real, // XFLOAT *d_out_imag, // unsigned iX, unsigned iY, unsigned iZ, //Input dimensions // unsigned oX, unsigned oY, unsigned oZ, //Output dimensions // cudaStream_t stream // ) //{ // if (iX > 1 && iY/2 + 1 != iX) // REPORT_ERROR("windowFourierTransform ERROR: the Fourier transform should be of an image with equal sizes in all dimensions!"); // // if (oY == iX) // REPORT_ERROR("windowFourierTransform ERROR: there is a one-to-one map between input and output!"); // // cudaMemInit( d_out_real, 0, (size_t) oX*oY*oZ, stream ); // cudaMemInit( d_out_imag, 0, (size_t) oX*oY*oZ, stream ); // // if (oY > iX) // { // long int max_r2 = (iX - 1) * (iX - 1); // // unsigned grid_dim = ceil((float)(iX*iY*iZ) / (float) WINDOW_FT_BLOCK_SIZE); // cuda_kernel_window_fourier_transform<<< grid_dim, WINDOW_FT_BLOCK_SIZE, 0, stream >>>( // d_in_real, // d_in_imag, // d_out_real, // d_out_imag, // iX, iY, iZ, iX * iY, //Input dimensions // oX, oY, oZ, oX * oY, //Output dimensions // iX*iY*iZ, // max_r2 ); // } // else // { // unsigned grid_dim = ceil((float)(oX*oY*oZ) / (float) WINDOW_FT_BLOCK_SIZE); // cuda_kernel_window_fourier_transform<<< grid_dim, WINDOW_FT_BLOCK_SIZE, 0, stream >>>( // d_in_real, // d_in_imag, // d_out_real, // d_out_imag, // iX, iY, iZ, iX * iY, //Input dimensions // oX, oY, oZ, oX * oY, //Output dimensions // oX*oY*oZ); // } //} #define WINDOW_FT_BLOCK_SIZE 128 void windowFourierTransform2( AccPtr &d_in, AccPtr &d_out, size_t iX, size_t iY, size_t iZ, //Input dimensions size_t oX, size_t oY, size_t oZ, //Output dimensions size_t Npsi, size_t pos, cudaStream_t stream) { if (iX > 1 && iY/2 + 1 != iX) REPORT_ERROR("windowFourierTransform ERROR: the Fourier transform should be of an image with equal sizes in all dimensions!"); // if (oX == iX) // REPORT_ERROR("windowFourierTransform ERROR: there is a one-to-one map between input and output!"); deviceInitComplexValue(d_out, (XFLOAT)0.); HANDLE_ERROR(cudaStreamSynchronize(d_out.getStream())); if(oX==iX) { HANDLE_ERROR(cudaStreamSynchronize(d_in.getStream())); #ifdef CUDA cudaCpyDeviceToDevice(&d_in(pos), ~d_out, oX*oY*oZ*Npsi, d_out.getStream() ); #else memcpy(&d_out[0], &d_in[0], oX*oY*oZ*Npsi*sizeof(ACCCOMPLEX)); #endif return; } if (oX > iX) { long int max_r2 = (iX - 1) * (iX - 1); #ifdef CUDA dim3 grid_dim(ceil((float)(iX*iY*iZ) / (float) WINDOW_FT_BLOCK_SIZE),Npsi); cuda_kernel_window_fourier_transform<<< grid_dim, WINDOW_FT_BLOCK_SIZE, 0, d_out.getStream() >>>( &d_in(pos), ~d_out, iX, iY, iZ, iX * iY, //Input dimensions oX, oY, oZ, oX * oY, //Output dimensions iX*iY*iZ, WINDOW_FT_BLOCK_SIZE, max_r2); LAUNCH_HANDLE_ERROR(cudaGetLastError()); #else size_t grid_dim = (size_t)( ceil((float)(iX*iY*iZ) / (float) WINDOW_FT_BLOCK_SIZE)); CpuKernels::window_fourier_transform( grid_dim, Npsi, WINDOW_FT_BLOCK_SIZE, &d_in[pos], &d_out[0], iX, iY, iZ, iX * iY, //Input dimensions oX, oY, oZ, oX * oY, //Output dimensions iX*iY*iZ, max_r2); #endif } else { #ifdef CUDA dim3 grid_dim(ceil((float)(oX*oY*oZ) / (float) WINDOW_FT_BLOCK_SIZE),Npsi); cuda_kernel_window_fourier_transform<<< grid_dim, WINDOW_FT_BLOCK_SIZE, 0, d_out.getStream() >>>( &d_in(pos), ~d_out, iX, iY, iZ, iX * iY, //Input dimensions oX, oY, oZ, oX * oY, //Output dimensions oX*oY*oZ, WINDOW_FT_BLOCK_SIZE); LAUNCH_HANDLE_ERROR(cudaGetLastError()); #else int grid_dim = (int)( ceil((float)(oX*oY*oZ) / (float) WINDOW_FT_BLOCK_SIZE)); CpuKernels::window_fourier_transform( grid_dim, Npsi, WINDOW_FT_BLOCK_SIZE, &d_in[pos], &d_out[0], iX, iY, iZ, iX * iY, //Input dimensions oX, oY, oZ, oX * oY, //Output dimensions oX*oY*oZ ); #endif } } void run_calcPowerSpectrum(Complex *dFaux, int padoridim, Complex *ddata, int data_sz, RFLOAT *dpower_spectrum, RFLOAT *dcounter, int max_r2, int min_r2, RFLOAT normfft, RFLOAT padding_factor, RFLOAT weight, RFLOAT *dfourier_mask, int fx, int fy, int fz, bool do_fourier_mask, bool if3D) { #ifdef CUDA dim3 bs(32,4); dim3 gs(ceil((padoridim/2+1)/(float)bs.x), ceil(padoridim/(float)bs.y)); if(if3D) { bs.z = 2; gs.z = ceil(padoridim/(float)bs.z); } if(sizeof(RFLOAT) == sizeof(double)) cuda_kernel_calcPowerSpectrum<<>>((double2*)dFaux,padoridim,(double2*)ddata,data_sz,dpower_spectrum,dcounter, max_r2,min_r2,normfft,padding_factor,weight,dfourier_mask,fx,fy,fz,do_fourier_mask); else cuda_kernel_calcPowerSpectrum<<>>((float2*)dFaux,padoridim,(float2*)ddata,data_sz,dpower_spectrum,dcounter, max_r2,min_r2,normfft,padding_factor,weight,dfourier_mask,fx,fy,fz,do_fourier_mask); LAUNCH_HANDLE_ERROR(cudaGetLastError()); #endif } void run_updatePowerSpectrum(RFLOAT *dcounter, int sz, RFLOAT *dpower_spectrum) { #ifdef CUDA cuda_kernel_updatePowerSpectrum<<>>(dcounter, dpower_spectrum, sz); LAUNCH_HANDLE_ERROR(cudaGetLastError()); #endif } void scale(RFLOAT *img, size_t sz, RFLOAT val, cudaStream_t stream) { int block_size = 256; int MultiBsize = ceil(sz/(float)block_size); #ifdef CUDA AccUtilities::multiply(MultiBsize,block_size, stream, img, val, (size_t)sz); #endif } void selfApplyBeamTilt2(MultidimArray &Fimg, RFLOAT beamtilt_x, RFLOAT beamtilt_y, RFLOAT wavelength, RFLOAT Cs, RFLOAT angpix, int ori_size) { if (Fimg.getDim() != 2) REPORT_ERROR("applyBeamTilt can only be done on 2D Fourier Transforms!"); RFLOAT boxsize = angpix * ori_size; RFLOAT factor = 0.360 * Cs * 10000000 * wavelength * wavelength / (boxsize * boxsize * boxsize); for (unsigned n = 0 ; n < Fimg.yxdim; n ++) { unsigned i = n / Fimg.xdim; unsigned j = n % Fimg.xdim; unsigned jp = j; int ip = i < Fimg.xdim ? i : i - Fimg.ydim; RFLOAT delta_phase = factor * (ip * ip + jp * jp) * (ip * beamtilt_y + jp * beamtilt_x); RFLOAT realval = Fimg.data[i*Fimg.xdim+j].real; RFLOAT imagval = Fimg.data[i*Fimg.xdim+j].imag; RFLOAT mag = sqrt(realval * realval + imagval * imagval); RFLOAT phas = atan2(imagval, realval) + DEG2RAD(delta_phase); // apply phase shift! realval = mag * cos(phas); imagval = mag * sin(phas); Fimg.data[i*Fimg.xdim+j] = Complex(realval, imagval); } } relion-3.1.3/src/acc/acc_ml_optimiser.h000066400000000000000000000247211411340063500200100ustar00rootroot00000000000000#ifndef ACC_ML_OPTIMISER_H_ #define ACC_ML_OPTIMISER_H_ #include "src/acc/acc_ptr.h" #ifdef ALTCPU #include #endif /* #ifdef ACC_DOUBLE_PRECISION #define XFLOAT double #else #define XFLOAT float #endif */ class SamplingParameters { public: unsigned long nr_dir, nr_psi, nr_trans, nr_oversampled_rot, nr_oversampled_trans, nr_images, current_oversampling, current_image_size, iclass_min, iclass_max, idir_min, idir_max, ipsi_min, ipsi_max, itrans_min, itrans_max; std::string current_img; SamplingParameters(): nr_dir(0), nr_psi(0), nr_trans(0), nr_oversampled_rot(0), nr_oversampled_trans(0), nr_images(0), current_oversampling(0), current_image_size(0), iclass_min(0), iclass_max(0), idir_min(0), idir_max(0), ipsi_min(0), ipsi_max(0), itrans_min(0), itrans_max(0), current_img() {}; }; class Indices { public: size_t fineIdx, coarseIdx, iclass, idir, ipsi, itrans, ioverrot, iovertrans; Indices(): fineIdx(0), coarseIdx(0), iclass(0), idir(0), ipsi(0), itrans(0), ioverrot(0), iovertrans(0) {}; void fineIndexToFineIndices(SamplingParameters sp) // converts an "ihidden_over" (finely sampled) index to partial indices (and coarse index) { int oversamples = sp.nr_oversampled_rot*sp.nr_oversampled_trans; int t_idx = fineIdx; iclass = floor( t_idx / ( sp.nr_dir * sp.nr_psi * sp.nr_trans * oversamples )); t_idx -= iclass * ( sp.nr_dir * sp.nr_psi * sp.nr_trans * oversamples ); idir = floor( t_idx / ( sp.nr_psi * sp.nr_trans * oversamples )); t_idx -= idir * ( sp.nr_psi * sp.nr_trans * oversamples ); ipsi = floor( t_idx / ( sp.nr_trans * oversamples )); t_idx -= ipsi * ( sp.nr_trans * oversamples ); itrans = floor( t_idx / oversamples ); t_idx -= itrans * oversamples ; ioverrot = floor( t_idx / sp.nr_oversampled_trans ); t_idx -= ioverrot * sp.nr_oversampled_trans ; iovertrans = t_idx ; coarseIdx = sp.nr_trans * sp.nr_psi * idir + sp.nr_trans * ipsi + itrans; } void fineIndicesToFineIndex(SamplingParameters sp) // converts partial indices to an "ihidden_over" (finely sampled) index // FIXME Untested { int oversamples = sp.nr_oversampled_rot*sp.nr_oversampled_trans; size_t idx = 0; idx += iclass * sp.nr_dir * sp.nr_psi * sp.nr_trans * oversamples; idx += idir * sp.nr_psi * sp.nr_trans * oversamples; idx += ipsi * sp.nr_trans * oversamples; idx += itrans * oversamples; idx += ioverrot * sp.nr_oversampled_trans; idx += iovertrans; fineIdx = idx; } void coarseIndexToCoarseIndices(SamplingParameters sp) // converts an "ihidden" (coarsely sampled) index to coarse partial indices // FIXME Untested { size_t t_idx = coarseIdx; iclass = floor( t_idx / ( sp.nr_dir * sp.nr_psi * sp.nr_trans)); t_idx -= iclass * ( sp.nr_dir * sp.nr_psi * sp.nr_trans); idir = floor( t_idx / ( sp.nr_psi * sp.nr_trans )); t_idx -= idir * ( sp.nr_psi * sp.nr_trans ); ipsi = floor( t_idx / ( sp.nr_trans )); t_idx -= ipsi * ( sp.nr_trans ); itrans = t_idx ; ioverrot = 0; iovertrans = 0; } void coarseIndicesToCoarseIndex(SamplingParameters sp) // converts coarse partial indices to an "ihidden" (coarsely sampled) index // FIXME Untested { size_t idx = 0; idx += idir * sp.nr_psi * sp.nr_trans; idx += ipsi * sp.nr_trans; idx += itrans; coarseIdx = idx; } }; class OptimisationParamters { public: unsigned metadata_offset; unsigned long part_id; std::vector > Fimg, Fimg_nomask, local_Fimgs_shifted, local_Fimgs_shifted_nomask; std::vector > Fctf, local_Fctf, local_Minvsigma2; std::vector pointer_dir_nonzeroprior, pointer_psi_nonzeroprior; std::vector directions_prior, psi_prior, local_sqrtXi2; std::vector highres_Xi2_img, min_diff2; MultidimArray Mcoarse_significant; // And from storeWeightedSums std::vector sum_weight, significant_weight, max_weight; std::vector > old_offset, prior; std::vector > power_img; MultidimArray Mweight; std::vector max_index; OptimisationParamters (unsigned nr_images, unsigned long part_id): metadata_offset(0), part_id(part_id) { power_img.resize(nr_images); highres_Xi2_img.resize(nr_images); Fimg.resize(nr_images); Fimg_nomask.resize(nr_images); Fctf.resize(nr_images); old_offset.resize(nr_images); prior.resize(nr_images); max_index.resize(nr_images); }; }; class ProjectionParams { public: std::vector< size_t > orientation_num; // the number of significant orientation for each class size_t orientationNumAllClasses; // sum of the above std::vector< RFLOAT > rots, tilts, psis; std::vector< size_t > iorientclasses, iover_rots; // These are arrays which detial the number of entries in each class, and where each class starts. // NOTE: There is no information about which class each class_idx refers to, there is only // a distinction between different classes. std::vector< size_t > class_entries, class_idx; inline ProjectionParams(): rots(), tilts(), psis(), iorientclasses(), iover_rots(), class_entries(), class_idx(), orientation_num(), orientationNumAllClasses(0) {}; inline ProjectionParams(size_t classes): rots(), tilts(), psis(), iorientclasses(), iover_rots(), class_entries(classes), class_idx(classes), orientation_num(classes), orientationNumAllClasses(0) { class_idx[0]=0; class_entries[0]=0; }; // constructor that slices out a part of a parent ProjectionParams, assumed to contain a single (partial or entire) class inline ProjectionParams(ProjectionParams &parent, size_t start, size_t end): rots( parent.rots.begin() +start, parent.rots.begin() +end), tilts( parent.tilts.begin() +start, parent.tilts.begin() +end), psis( parent.psis.begin() +start, parent.psis.begin() +end), iorientclasses( parent.iorientclasses.begin() +start, parent.iorientclasses.begin() +end), iover_rots( parent.iover_rots.begin() +start, parent.iover_rots.begin() +end), orientation_num(1), orientationNumAllClasses(0), class_entries(1,end-start), class_idx(1,0) // NOTE: this is NOT the class, but rather where in these partial PrjParams to start, which is @ 0. {}; public: // Appends new values into the projection parameters for later use. // class_idx is used as such: // the n:th class (beginning with 0:th) // begins @ element class_idx[n] // ends @ element class_idx[n]+class_entries[n] void pushBackAll(size_t iclass, RFLOAT NEWrot,RFLOAT NEWtilt ,RFLOAT NEWpsi, size_t NEWiorientclasses,size_t NEWiover_rots) { // incremement the counter for this class class_entries[iclass]++; // and push a new entry rots.push_back(NEWrot); tilts.push_back(NEWtilt); psis.push_back(NEWpsi); iorientclasses.push_back(NEWiorientclasses); iover_rots.push_back(NEWiover_rots); } }; class IndexedDataArrayMask { public: // indexes of job partition // every element in jobOrigin is a reference to point to a position in a IndexedDataArray.weights array where that job starts RELATIVE to firstPos // every element in jobExtent specifies the number of weights for that job AccPtr jobOrigin, jobExtent; size_t firstPos, lastPos; // positions in indexedDataArray data and index arrays to slice out size_t weightNum, jobNum; // number of weights and jobs this class public: IndexedDataArrayMask(AccPtrFactory ptrFactory) : firstPos(), lastPos(), weightNum(), jobNum() { jobOrigin = ptrFactory.make(); jobExtent = ptrFactory.make(); } void setNumberOfJobs(size_t newSize) { jobNum=newSize; jobOrigin.setSize(newSize); jobExtent.setSize(newSize); } void setNumberOfWeights(size_t newSize) { weightNum=newSize; } inline ~IndexedDataArrayMask() { // jobOrigin.free_host(); // jobExtent.free_host(); }; }; class IndexedDataArray { public: //actual data AccPtr weights; // indexes with same length as data // -- basic indices --------------------------------- // rot_id = id of rot = which of all POSSIBLE orientations this weight signifies // rot_idx = index of rot = which in the sequence of the determined significant orientations this weight signifies // trans_id = id of trans = which of all POSSIBLE translations this weight signifies // -- special indices --------------------------------- // ihidden_overs = mapping to MWeight-based indexing for compatibility AccPtr rot_id, rot_idx, trans_idx, ihidden_overs; public: inline IndexedDataArray(AccPtrFactory ptrFactory): weights(ptrFactory.make()), rot_id(ptrFactory.make()), rot_idx(ptrFactory.make()), trans_idx(ptrFactory.make()), ihidden_overs(ptrFactory.make()) {}; inline IndexedDataArray(IndexedDataArray &parent, IndexedDataArrayMask &mask): weights( parent.weights, mask.firstPos, mask.weightNum), rot_id( parent.rot_id, mask.firstPos, mask.weightNum), rot_idx( parent.rot_idx, mask.firstPos, mask.weightNum), trans_idx( parent.trans_idx, mask.firstPos, mask.weightNum), ihidden_overs(parent.ihidden_overs, mask.firstPos, mask.weightNum) {}; public: void setDataSize(size_t newSize) { weights.setSize(newSize); rot_id.setSize(newSize); rot_idx.setSize(newSize); trans_idx.setSize(newSize); ihidden_overs.setSize(newSize); } void host_alloc_all() { weights.freeHostIfSet(); weights.hostAlloc(); rot_id.freeHostIfSet(); rot_id.hostAlloc(); rot_idx.freeHostIfSet(); rot_idx.hostAlloc(); trans_idx.freeHostIfSet(); trans_idx.hostAlloc(); ihidden_overs.freeHostIfSet(); ihidden_overs.hostAlloc(); } void device_alloc_all() { weights.freeDeviceIfSet(); weights.deviceAlloc(); rot_id.freeDeviceIfSet(); rot_id.deviceAlloc(); rot_idx.freeDeviceIfSet(); rot_idx.deviceAlloc(); trans_idx.freeDeviceIfSet(); trans_idx.deviceAlloc(); ihidden_overs.freeDeviceIfSet(); ihidden_overs.deviceAlloc(); } void dual_alloc_all() { host_alloc_all(); device_alloc_all(); } void dual_free_all() { weights.freeDeviceIfSet(); rot_id.freeDeviceIfSet(); rot_idx.freeDeviceIfSet(); trans_idx.freeDeviceIfSet(); ihidden_overs.freeDeviceIfSet(); weights.freeHostIfSet(); rot_id.freeHostIfSet(); rot_idx.freeHostIfSet(); trans_idx.freeHostIfSet(); ihidden_overs.freeHostIfSet(); } ~IndexedDataArray() { dual_free_all(); } }; #endif relion-3.1.3/src/acc/acc_ml_optimiser_impl.h000066400000000000000000004271401411340063500210330ustar00rootroot00000000000000static pthread_mutex_t global_mutex = PTHREAD_MUTEX_INITIALIZER; #include "src/ml_optimiser_mpi.h" // ---------------------------------------------------------------------------- // -------------------- getFourierTransformsAndCtfs --------------------------- // ---------------------------------------------------------------------------- template void getFourierTransformsAndCtfs(long int part_id, OptimisationParamters &op, SamplingParameters &sp, MlOptimiser *baseMLO, MlClass *accMLO, AccPtrFactory ptrFactory, int ibody = 0 ) { GTIC(accMLO->timer,"getFourierTransformsAndCtfs"); #ifdef TIMING if (part_id == baseMLO->exp_my_first_part_id) baseMLO->timer.tic(baseMLO->TIMING_ESP_FT); #endif CUSTOM_ALLOCATOR_REGION_NAME("GFTCTF"); for (int img_id = 0; img_id < sp.nr_images; img_id++) { CTIC(accMLO->timer,"init"); FileName fn_img; Image img, rec_img; MultidimArray Fimg; MultidimArray Faux; MultidimArray Fctf; Matrix2D Aori; Matrix1D my_projected_com(baseMLO->mymodel.data_dim), my_refined_ibody_offset(baseMLO->mymodel.data_dim); // Which group do I belong? int group_id =baseMLO->mydata.getGroupId(part_id, img_id); // What is my optics group? int optics_group = baseMLO->mydata.getOpticsGroup(part_id, img_id); RFLOAT my_pixel_size = baseMLO->mydata.getImagePixelSize(part_id, img_id); // metadata offset for this image in the particle int my_metadata_offset = op.metadata_offset + img_id; // Get the right line in the exp_fn_img strings (also exp_fn_recimg and exp_fn_ctfs) int istop = 0; for (long int ii = baseMLO->exp_my_first_part_id; ii < part_id; ii++) istop += baseMLO->mydata.numberOfImagesInParticle(part_id); istop += img_id; if (!baseMLO->mydata.getImageNameOnScratch(part_id, img_id, fn_img)) { std::istringstream split(baseMLO->exp_fn_img); for (int i = 0; i <= my_metadata_offset; i++) getline(split, fn_img); } sp.current_img = fn_img; // Get the norm_correction RFLOAT normcorr = DIRECT_A2D_ELEM(baseMLO->exp_metadata, my_metadata_offset, METADATA_NORM); // Safeguard against gold-standard separation if (baseMLO->do_split_random_halves) { int halfset = DIRECT_A2D_ELEM(baseMLO->exp_metadata, my_metadata_offset, METADATA_NR_SIGN); if (halfset != baseMLO->my_halfset) { std::cerr << "BUG!!! halfset= " << halfset << " my_halfset= " << baseMLO->my_halfset << " part_id= " << part_id << std::endl; REPORT_ERROR("BUG! Mixing gold-standard separation!!!!"); } } // Get the optimal origin offsets from the previous iteration // Sjors 5mar18: it is very important that my_old_offset has baseMLO->mymodel.data_dim and not just (3), as transformCartesianAndHelicalCoords will give different results!!! Matrix1D my_old_offset(baseMLO->mymodel.data_dim), my_prior(baseMLO->mymodel.data_dim), my_old_offset_ori; int icol_rot, icol_tilt, icol_psi, icol_xoff, icol_yoff, icol_zoff; XX(my_old_offset) = DIRECT_A2D_ELEM(baseMLO->exp_metadata, my_metadata_offset, METADATA_XOFF); YY(my_old_offset) = DIRECT_A2D_ELEM(baseMLO->exp_metadata, my_metadata_offset, METADATA_YOFF); XX(my_prior) = DIRECT_A2D_ELEM(baseMLO->exp_metadata, my_metadata_offset, METADATA_XOFF_PRIOR); YY(my_prior) = DIRECT_A2D_ELEM(baseMLO->exp_metadata, my_metadata_offset, METADATA_YOFF_PRIOR); // Uninitialised priors were set to 999. if (XX(my_prior) > 998.99 && XX(my_prior) < 999.01) XX(my_prior) = 0.; if (YY(my_prior) > 998.99 && YY(my_prior) < 999.01) YY(my_prior) = 0.; if (accMLO->dataIs3D) { ZZ(my_old_offset) = DIRECT_A2D_ELEM(baseMLO->exp_metadata, my_metadata_offset, METADATA_ZOFF); ZZ(my_prior) = DIRECT_A2D_ELEM(baseMLO->exp_metadata, my_metadata_offset, METADATA_ZOFF_PRIOR); // Unitialised priors were set to 999. if (ZZ(my_prior) > 998.99 && ZZ(my_prior) < 999.01) ZZ(my_prior) = 0.; } if (baseMLO->mymodel.nr_bodies > 1) { // 17May2017: Shift image to the projected COM for this body! // Aori is the original transformation matrix of the consensus refinement Euler_angles2matrix(DIRECT_A2D_ELEM(baseMLO->exp_metadata, my_metadata_offset, METADATA_ROT), DIRECT_A2D_ELEM(baseMLO->exp_metadata, my_metadata_offset, METADATA_TILT), DIRECT_A2D_ELEM(baseMLO->exp_metadata, my_metadata_offset, METADATA_PSI), Aori, false); my_projected_com = Aori * baseMLO->mymodel.com_bodies[ibody]; // This will have made my_projected_com of size 3 again! resize to mymodel.data_dim my_projected_com.resize(baseMLO->mymodel.data_dim); // Subtract the projected COM offset, to position this body in the center // Also keep the my_old_offset in my_old_offset_ori my_old_offset_ori = my_old_offset; my_old_offset -= my_projected_com; // Also get refined offset for this body icol_xoff = 3 + METADATA_LINE_LENGTH_BEFORE_BODIES + (ibody) * METADATA_NR_BODY_PARAMS; icol_yoff = 4 + METADATA_LINE_LENGTH_BEFORE_BODIES + (ibody) * METADATA_NR_BODY_PARAMS; icol_zoff = 5 + METADATA_LINE_LENGTH_BEFORE_BODIES + (ibody) * METADATA_NR_BODY_PARAMS; XX(my_refined_ibody_offset) = DIRECT_A2D_ELEM(baseMLO->exp_metadata, my_metadata_offset, icol_xoff); YY(my_refined_ibody_offset) = DIRECT_A2D_ELEM(baseMLO->exp_metadata, my_metadata_offset, icol_yoff); if (baseMLO->mymodel.data_dim == 3) ZZ(my_refined_ibody_offset) = DIRECT_A2D_ELEM(baseMLO->exp_metadata, my_metadata_offset, icol_zoff); // For multi-body refinement: set the priors of the translations to zero (i.e. everything centred around consensus offset) my_prior.initZeros(); } CTOC(accMLO->timer,"init"); CTIC(accMLO->timer,"nonZeroProb"); // Orientational priors if (baseMLO->mymodel.nr_bodies > 1 ) { // Centre local searches around the orientation from the previous iteration, this one goes with overall sigma2_ang // On top of that, apply prior on the deviation from (0,0,0) with mymodel.sigma_tilt_bodies[ibody] and mymodel.sigma_psi_bodies[ibody] icol_rot = 0 + METADATA_LINE_LENGTH_BEFORE_BODIES + (ibody) * METADATA_NR_BODY_PARAMS; icol_tilt = 1 + METADATA_LINE_LENGTH_BEFORE_BODIES + (ibody) * METADATA_NR_BODY_PARAMS; icol_psi = 2 + METADATA_LINE_LENGTH_BEFORE_BODIES + (ibody) * METADATA_NR_BODY_PARAMS; RFLOAT prior_rot = DIRECT_A2D_ELEM(baseMLO->exp_metadata, my_metadata_offset, icol_rot); RFLOAT prior_tilt = DIRECT_A2D_ELEM(baseMLO->exp_metadata, my_metadata_offset, icol_tilt); RFLOAT prior_psi = DIRECT_A2D_ELEM(baseMLO->exp_metadata, my_metadata_offset, icol_psi); baseMLO->sampling.selectOrientationsWithNonZeroPriorProbability( prior_rot, prior_tilt, prior_psi, sqrt(baseMLO->mymodel.sigma2_rot), sqrt(baseMLO->mymodel.sigma2_tilt), sqrt(baseMLO->mymodel.sigma2_psi), op.pointer_dir_nonzeroprior, op.directions_prior, op.pointer_psi_nonzeroprior, op.psi_prior, false, 3., baseMLO->mymodel.sigma_tilt_bodies[ibody], baseMLO->mymodel.sigma_psi_bodies[ibody]); } else if (baseMLO->mymodel.orientational_prior_mode != NOPRIOR && !(baseMLO->do_skip_align ||baseMLO-> do_skip_rotate)) { // First try if there are some fixed prior angles // For multi-body refinements, ignore the original priors and get the refined residual angles from the previous iteration RFLOAT prior_rot = DIRECT_A2D_ELEM(baseMLO->exp_metadata, my_metadata_offset, METADATA_ROT_PRIOR); RFLOAT prior_tilt = DIRECT_A2D_ELEM(baseMLO->exp_metadata, my_metadata_offset, METADATA_TILT_PRIOR); RFLOAT prior_psi = DIRECT_A2D_ELEM(baseMLO->exp_metadata, my_metadata_offset, METADATA_PSI_PRIOR); RFLOAT prior_psi_flip_ratio = DIRECT_A2D_ELEM(baseMLO->exp_metadata, my_metadata_offset, METADATA_PSI_PRIOR_FLIP_RATIO); bool do_auto_refine_local_searches = (baseMLO->do_auto_refine) && (baseMLO->sampling.healpix_order >= baseMLO->autosampling_hporder_local_searches); bool do_classification_local_searches = (! baseMLO->do_auto_refine) && (baseMLO->mymodel.orientational_prior_mode == PRIOR_ROTTILT_PSI) && (baseMLO->mymodel.sigma2_rot > 0.) && (baseMLO->mymodel.sigma2_tilt > 0.) && (baseMLO->mymodel.sigma2_psi > 0.); bool do_local_angular_searches = (do_auto_refine_local_searches) || (do_classification_local_searches); // If there were no defined priors (i.e. their values were 999.), then use the "normal" angles if (prior_rot > 998.99 && prior_rot < 999.01) prior_rot = DIRECT_A2D_ELEM(baseMLO->exp_metadata, my_metadata_offset, METADATA_ROT); if (prior_tilt > 998.99 && prior_tilt < 999.01) prior_tilt = DIRECT_A2D_ELEM(baseMLO->exp_metadata, my_metadata_offset, METADATA_TILT); if (prior_psi > 998.99 && prior_psi < 999.01) prior_psi = DIRECT_A2D_ELEM(baseMLO->exp_metadata, my_metadata_offset, METADATA_PSI); if (prior_psi_flip_ratio > 998.99 && prior_psi_flip_ratio < 999.01) prior_psi_flip_ratio = 0.5; ////////// How does this work now: each particle has a different sampling object?!!! // Select only those orientations that have non-zero prior probability if (baseMLO->do_helical_refine && baseMLO->mymodel.ref_dim == 3) { baseMLO->sampling.selectOrientationsWithNonZeroPriorProbabilityFor3DHelicalReconstruction(prior_rot, prior_tilt, prior_psi, sqrt(baseMLO->mymodel.sigma2_rot), sqrt(baseMLO->mymodel.sigma2_tilt), sqrt(baseMLO->mymodel.sigma2_psi), op.pointer_dir_nonzeroprior, op.directions_prior, op.pointer_psi_nonzeroprior, op.psi_prior, do_local_angular_searches, prior_psi_flip_ratio); } else { baseMLO->sampling.selectOrientationsWithNonZeroPriorProbability(prior_rot, prior_tilt, prior_psi, sqrt(baseMLO->mymodel.sigma2_rot), sqrt(baseMLO->mymodel.sigma2_tilt), sqrt(baseMLO->mymodel.sigma2_psi), op.pointer_dir_nonzeroprior, op.directions_prior, op.pointer_psi_nonzeroprior, op.psi_prior); } long int nr_orients = baseMLO->sampling.NrDirections(0, &op.pointer_dir_nonzeroprior) * baseMLO->sampling.NrPsiSamplings(0, &op.pointer_psi_nonzeroprior); if (nr_orients == 0) { std::cerr << " sampling.NrDirections()= " << baseMLO->sampling.NrDirections(0, &op.pointer_dir_nonzeroprior) << " sampling.NrPsiSamplings()= " << baseMLO->sampling.NrPsiSamplings(0, &op.pointer_psi_nonzeroprior) << std::endl; REPORT_ERROR("Zero orientations fall within the local angular search. Increase the sigma-value(s) on the orientations!"); } } CTOC(accMLO->timer,"nonZeroProb"); // ------------------------------------------------------------------------------------------ CTIC(accMLO->timer,"readData"); // Get the image and recimg data if (baseMLO->do_parallel_disc_io) { // If all followers had preread images into RAM: get those now if (baseMLO->do_preread_images) { img().reshape(baseMLO->mydata.particles[part_id].images[img_id].img); CTIC(accMLO->timer,"ParaReadPrereadImages"); FOR_ALL_DIRECT_ELEMENTS_IN_MULTIDIMARRAY(baseMLO->mydata.particles[part_id].images[img_id].img) { DIRECT_MULTIDIM_ELEM(img(), n) = (RFLOAT)DIRECT_MULTIDIM_ELEM(baseMLO->mydata.particles[part_id].images[img_id].img, n); } CTOC(accMLO->timer,"ParaReadPrereadImages"); } else { if (accMLO->dataIs3D) { CTIC(accMLO->timer,"ParaRead3DImages"); img.read(fn_img); img().setXmippOrigin(); CTOC(accMLO->timer,"ParaRead3DImages"); } else { CTIC(accMLO->timer,"ParaRead2DImages"); img() = baseMLO->exp_imgs[my_metadata_offset]; CTOC(accMLO->timer,"ParaRead2DImages"); } } if (baseMLO->has_converged && baseMLO->do_use_reconstruct_images) { FileName fn_recimg; std::istringstream split2(baseMLO->exp_fn_recimg); // Get the right line in the exp_fn_img string for (int i = 0; i <= my_metadata_offset; i++) getline(split2, fn_recimg); rec_img.read(fn_recimg); rec_img().setXmippOrigin(); } } else { // Unpack the image from the imagedata if (accMLO->dataIs3D) { CTIC(accMLO->timer,"Read3DImages"); CTIC(accMLO->timer,"resize"); img().resize(baseMLO->image_full_size[optics_group], baseMLO->image_full_size[optics_group], baseMLO->image_full_size[optics_group]); CTOC(accMLO->timer,"resize"); // Only allow a single image per call of this function!!! nr_pool needs to be set to 1!!!! // This will save memory, as we'll need to store all translated images in memory.... FOR_ALL_DIRECT_ELEMENTS_IN_ARRAY3D(img()) { DIRECT_A3D_ELEM(img(), k, i, j) = DIRECT_A3D_ELEM(baseMLO->exp_imagedata, k, i, j); } img().setXmippOrigin(); if (baseMLO->has_converged && baseMLO->do_use_reconstruct_images) { rec_img().resize(baseMLO->image_full_size[optics_group], baseMLO->image_full_size[optics_group], baseMLO->image_full_size[optics_group]); int offset = (baseMLO->do_ctf_correction) ? 2 * baseMLO->image_full_size[optics_group] : baseMLO->image_full_size[optics_group]; FOR_ALL_DIRECT_ELEMENTS_IN_ARRAY3D(rec_img()) { DIRECT_A3D_ELEM(rec_img(), k, i, j) = DIRECT_A3D_ELEM(baseMLO->exp_imagedata, offset + k, i, j); } rec_img().setXmippOrigin(); } CTOC(accMLO->timer,"Read3DImages"); } else { CTIC(accMLO->timer,"Read2DImages"); img().resize(baseMLO->image_full_size[optics_group], baseMLO->image_full_size[optics_group]); FOR_ALL_DIRECT_ELEMENTS_IN_ARRAY2D(img()) { DIRECT_A2D_ELEM(img(), i, j) = DIRECT_A3D_ELEM(baseMLO->exp_imagedata, my_metadata_offset, i, j); } img().setXmippOrigin(); if (baseMLO->has_converged && baseMLO->do_use_reconstruct_images) { /// TODO: this will be WRONG for multi-image particles, but I guess that's not going to happen anyway... int my_nr_particles = baseMLO->exp_my_last_part_id - baseMLO->exp_my_first_part_id + 1; rec_img().resize(baseMLO->image_full_size[optics_group], baseMLO->image_full_size[optics_group]); FOR_ALL_DIRECT_ELEMENTS_IN_ARRAY2D(rec_img()) { DIRECT_A2D_ELEM(rec_img(), i, j) = DIRECT_A3D_ELEM(baseMLO->exp_imagedata, my_nr_particles + my_metadata_offset, i, j); } rec_img().setXmippOrigin(); } CTOC(accMLO->timer,"Read2DImages"); } } CTOC(accMLO->timer,"readData"); // ------------------------------------------------------------------------------------------ size_t current_size_x = baseMLO->image_current_size[optics_group] / 2 + 1; size_t current_size_y = baseMLO->image_current_size[optics_group]; size_t current_size_z = (accMLO->dataIs3D) ? baseMLO->image_current_size[optics_group] : 1; accMLO->transformer1.setSize(img().xdim,img().ydim,img().zdim); Fimg.initZeros(current_size_z, current_size_y, current_size_x); // ------------------------------------------------------------------------------------------ CTIC(cudaMLO->timer,"makeNoiseMask"); // Either mask with zeros or noise. Here, make a noise-image that will be optional in the softMask-kernel. AccDataTypes::Image RandomImage(img(),ptrFactory); if (!baseMLO->do_zero_mask) // prepare a acc-side Random image { if(RandomImage.is3D()) CRITICAL("Noise-masking not supported with acceleration and 3D input: Noise-kernel(s) is hard-coded 2D"); // Make a F-space image to hold generate and modulate noise RandomImage.accAlloc(); // Set up scalar adjustment factor and random seed XFLOAT temp_sigmaFudgeFactor = baseMLO->sigma2_fudge; int seed(baseMLO->random_seed + part_id); // Remap mymodel.sigma2_noise[group_id] onto remapped_sigma2_noise for this images's size and angpix MultidimArray remapped_sigma2_noise; remapped_sigma2_noise.initZeros(XSIZE(img())/2+1); RFLOAT remap_image_sizes = (baseMLO->image_full_size[optics_group] * my_pixel_size) / (baseMLO->mymodel.ori_size * baseMLO->mymodel.pixel_size); FOR_ALL_DIRECT_ELEMENTS_IN_ARRAY1D(baseMLO->mymodel.sigma2_noise[group_id]) { int i_remap = ROUND(remap_image_sizes * i); if (i_remap < XSIZE(remapped_sigma2_noise)) DIRECT_A1D_ELEM(remapped_sigma2_noise, i_remap) = DIRECT_A1D_ELEM(baseMLO->mymodel.sigma2_noise[group_id], i); } LAUNCH_PRIVATE_ERROR(cudaGetLastError(),accMLO->errorStatus); // construct the noise-image AccUtilities::makeNoiseImage( temp_sigmaFudgeFactor, remapped_sigma2_noise, seed, accMLO, RandomImage, RandomImage.is3D()); LAUNCH_PRIVATE_ERROR(cudaGetLastError(),accMLO->errorStatus); } CTOC(cudaMLO->timer,"makeNoiseMask"); // ------------------------------------------------------------------------------------------ CTIC(accMLO->timer,"HelicalPrep"); /* FIXME : For some reason the device-allocation inside "selfTranslate" takes a much longer time than expected. * I tried moving it up and placing the size under a bunch of if()-cases, but this simply transferred the * allocation-cost to that region. /BjoernF,160129 */ // Apply (rounded) old offsets first my_old_offset.selfROUND(); // Helical reconstruction: calculate old_offset in the system of coordinates of the helix, i.e. parallel & perpendicular, depending on psi-angle! // For helices do NOT apply old_offset along the direction of the helix!! Matrix1D my_old_offset_helix_coords; RFLOAT rot_deg = DIRECT_A2D_ELEM(baseMLO->exp_metadata, my_metadata_offset, METADATA_ROT); RFLOAT tilt_deg = DIRECT_A2D_ELEM(baseMLO->exp_metadata, my_metadata_offset, METADATA_TILT); RFLOAT psi_deg = DIRECT_A2D_ELEM(baseMLO->exp_metadata, my_metadata_offset, METADATA_PSI); if ( (baseMLO->do_helical_refine) && (! baseMLO->ignore_helical_symmetry) ) { // Calculate my_old_offset_helix_coords from my_old_offset and psi angle transformCartesianAndHelicalCoords(my_old_offset, my_old_offset_helix_coords, rot_deg, tilt_deg, psi_deg, CART_TO_HELICAL_COORDS); // We do NOT want to accumulate the offsets in the direction along the helix (which is X in the helical coordinate system!) // However, when doing helical local searches, we accumulate offsets // Do NOT accumulate offsets in 3D classification of helices if ( (! baseMLO->do_skip_align) && (! baseMLO->do_skip_rotate) ) { // TODO: check whether the following lines make sense bool do_auto_refine_local_searches = (baseMLO->do_auto_refine) && (baseMLO->sampling.healpix_order >= baseMLO->autosampling_hporder_local_searches); bool do_classification_local_searches = (! baseMLO->do_auto_refine) && (baseMLO->mymodel.orientational_prior_mode == PRIOR_ROTTILT_PSI) && (baseMLO->mymodel.sigma2_rot > 0.) && (baseMLO->mymodel.sigma2_tilt > 0.) && (baseMLO->mymodel.sigma2_psi > 0.); bool do_local_angular_searches = (do_auto_refine_local_searches) || (do_classification_local_searches); if (!do_local_angular_searches) { if (! accMLO->dataIs3D) XX(my_old_offset_helix_coords) = 0.; else ZZ(my_old_offset_helix_coords) = 0.; } } // TODO: Now re-calculate the my_old_offset in the real (or image) system of coordinate (rotate -psi angle) transformCartesianAndHelicalCoords(my_old_offset_helix_coords, my_old_offset, rot_deg, tilt_deg, psi_deg, HELICAL_TO_CART_COORDS); } CTOC(accMLO->timer,"HelicalPrep"); // ------------------------------------------------------------------------------------------ my_old_offset.selfROUND(); // ------------------------------------------------------------------------------------------ CTIC(accMLO->timer,"TranslateAndNormCorrect"); AccDataTypes::Image d_img(img.data, ptrFactory); AccDataTypes::Image d_rec_img(img.data, ptrFactory); d_img.allAlloc(); d_img.allInit(0); XFLOAT normcorr_val = baseMLO->do_norm_correction ? (XFLOAT)(baseMLO->mymodel.avg_norm_correction / normcorr) : 1; AccUtilities::TranslateAndNormCorrect( img.data, // input host-side MultidimArray d_img, // output acc-side Array normcorr_val, XX(my_old_offset), YY(my_old_offset), (accMLO->dataIs3D) ? ZZ(my_old_offset) : 0., accMLO->dataIs3D); LAUNCH_PRIVATE_ERROR(cudaGetLastError(),accMLO->errorStatus); CTOC(accMLO->timer,"TranslateAndNormCorrect"); // Set up the UNMASKED image to use for reconstruction, which may be a separate image altogether (rec_img) // // d_img has the image information which will be masked // if(baseMLO->has_converged && baseMLO->do_use_reconstruct_images) { CTIC(accMLO->timer,"TranslateAndNormCorrect_recImg"); d_rec_img.allAlloc(); d_rec_img.allInit(0); AccUtilities::TranslateAndNormCorrect( rec_img.data, // input host-side MultidimArray d_rec_img, // output acc-side Array normcorr_val, XX(my_old_offset), YY(my_old_offset), (accMLO->dataIs3D) ? ZZ(my_old_offset) : 0., accMLO->dataIs3D); LAUNCH_PRIVATE_ERROR(cudaGetLastError(),accMLO->errorStatus); CTOC(accMLO->timer,"TranslateAndNormCorrect_recImg"); CTIC(cudaMLO->timer,"normalizeAndTransform_recImg"); // The image used to reconstruct is not masked, so we transform and beam-tilt it AccUtilities::normalizeAndTransformImage(d_rec_img, // input acc-side Array Fimg, // output host-side MultidimArray accMLO, current_size_x, current_size_y, current_size_z); LAUNCH_PRIVATE_ERROR(cudaGetLastError(),accMLO->errorStatus); CTOC(cudaMLO->timer,"normalizeAndTransform_recImg"); } else // if we don't have special images, just use the same as for alignment. But do it here, *before masking* { CTIC(cudaMLO->timer,"normalizeAndTransform_recImg"); // The image used to reconstruct is not masked, so we transform and beam-tilt it AccUtilities::normalizeAndTransformImage( d_img, // input acc-side Array Fimg, // output host-side MultidimArray accMLO, current_size_x, current_size_y, current_size_z); LAUNCH_PRIVATE_ERROR(cudaGetLastError(),accMLO->errorStatus); CTOC(cudaMLO->timer,"normalizeAndTransform_recImg"); } // ------------------------------------------------------------------------------------------ if ( (baseMLO->do_helical_refine) && (! baseMLO->ignore_helical_symmetry) ) { // Transform rounded Cartesian offsets to corresponding helical ones transformCartesianAndHelicalCoords(my_old_offset, my_old_offset_helix_coords, rot_deg, tilt_deg, psi_deg, CART_TO_HELICAL_COORDS); op.old_offset[img_id] = my_old_offset_helix_coords; } else { // For multi-bodies: store only the old refined offset, not the constant consensus offset or the projected COM of this body if (baseMLO->mymodel.nr_bodies > 1) op.old_offset[img_id] = my_refined_ibody_offset; else op.old_offset[img_id] = my_old_offset; // Not doing helical refinement. Rounded Cartesian offsets are stored. } // Also store priors on translations op.prior[img_id] = my_prior; // ------------------------------------------------------------------------------------------ CTIC(accMLO->timer,"selfApplyBeamTilt"); baseMLO->mydata.obsModel.demodulatePhase(optics_group, Fimg); baseMLO->mydata.obsModel.divideByMtf(optics_group, Fimg); CTOC(accMLO->timer,"selfApplyBeamTilt"); op.Fimg_nomask.at(img_id) = Fimg; // ------------------------------------------------------------------------------------------ MultidimArray Mnoise; bool is_helical_segment = (baseMLO->do_helical_refine) || ((baseMLO->mymodel.ref_dim == 2) && (baseMLO->helical_tube_outer_diameter > 0.)); // For multibodies: have the mask radius equal to maximum radius within body mask plus the translational offset search range RFLOAT my_mask_radius = (baseMLO->mymodel.nr_bodies > 1 ) ? (baseMLO->mymodel.max_radius_mask_bodies[ibody] + baseMLO->sampling.offset_range) / my_pixel_size : baseMLO->particle_diameter / (2. * my_pixel_size); // ------------------------------------------------------------------------------------------ // We are now done with the unmasked image used for reconstruction. // Now make the masked image used for alignment and classification. if (is_helical_segment) { CTIC(accMLO->timer,"applyHelicalMask"); // download img... d_img.cpToHost(); d_img.streamSync(); d_img.getHost(img()); // ...modify img... if(baseMLO->do_zero_mask) { softMaskOutsideMapForHelix(img(), psi_deg, tilt_deg, my_mask_radius, (baseMLO->helical_tube_outer_diameter / (2. * my_pixel_size)), baseMLO->width_mask_edge); } else { MultidimArray Mnoise; RandomImage.hostAlloc(); RandomImage.cpToHost(); Mnoise.resize(img()); RandomImage.getHost(Mnoise); softMaskOutsideMapForHelix(img(), psi_deg, tilt_deg, my_mask_radius, (baseMLO->helical_tube_outer_diameter / (2. * my_pixel_size)), baseMLO->width_mask_edge, &Mnoise); } // ... and re-upload img d_img.setHost(img()); d_img.cpToDevice(); CTOC(accMLO->timer,"applyHelicalMask"); } else // this is not a helical segment { CTIC(accMLO->timer,"applyMask"); // Shared parameters for noise/zero masking XFLOAT cosine_width = baseMLO->width_mask_edge; XFLOAT radius = (XFLOAT) my_mask_radius; if (radius < 0) radius = ((RFLOAT)img.data.xdim)/2.; XFLOAT radius_p = radius + cosine_width; // For zero-masking, we need the background-value XFLOAT bg_val(0.); if(baseMLO->do_zero_mask) { AccPtr softMaskSum = ptrFactory.make((size_t)SOFTMASK_BLOCK_SIZE, 0); AccPtr softMaskSum_bg = ptrFactory.make((size_t)SOFTMASK_BLOCK_SIZE, 0); softMaskSum.accAlloc(); softMaskSum_bg.accAlloc(); softMaskSum.accInit(0); softMaskSum_bg.accInit(0); // Calculate the background value AccUtilities::softMaskBackgroundValue( d_img, radius, radius_p, cosine_width, softMaskSum, softMaskSum_bg); LAUNCH_PRIVATE_ERROR(cudaGetLastError(),accMLO->errorStatus); softMaskSum.streamSync(); // Finalize the background value bg_val = (RFLOAT) AccUtilities::getSumOnDevice(softMaskSum_bg) / (RFLOAT) AccUtilities::getSumOnDevice(softMaskSum); softMaskSum.streamSync(); } //avoid kernel-calls warning about null-pointer for RandomImage if (baseMLO->do_zero_mask) RandomImage.setAccPtr(d_img); // Apply a cosine-softened mask, using either the background value or the noise-image outside of the radius AccUtilities::cosineFilter( d_img, baseMLO->do_zero_mask, RandomImage, radius, radius_p, cosine_width, bg_val); LAUNCH_PRIVATE_ERROR(cudaGetLastError(),accMLO->errorStatus); DEBUG_HANDLE_ERROR(cudaStreamSynchronize(cudaStreamPerThread)); CTOC(accMLO->timer,"applyMask"); } // ------------------------------------------------------------------------------------------ CTIC(cudaMLO->timer,"normalizeAndTransform"); AccUtilities::normalizeAndTransformImage( d_img, // input Fimg, // output accMLO, current_size_x, current_size_y, current_size_z); LAUNCH_PRIVATE_ERROR(cudaGetLastError(),accMLO->errorStatus); CTOC(cudaMLO->timer,"normalizeAndTransform"); // ------------------------------------------------------------------------------------------ CTIC(accMLO->timer,"powerClass"); // Store the power_class spectrum of the whole image (to fill sigma2_noise between current_size and full_size if (baseMLO->image_current_size[optics_group] < baseMLO->image_full_size[optics_group]) { AccPtr spectrumAndXi2 = ptrFactory.make((size_t)((baseMLO->image_full_size[optics_group]/2+1)+1), 0); // last +1 is the Xi2, to remove an expensive memcpy spectrumAndXi2.allAlloc(); spectrumAndXi2.accInit(0); spectrumAndXi2.streamSync(); int gridSize = CEIL((float)(accMLO->transformer1.fouriers.getSize()) / (float)POWERCLASS_BLOCK_SIZE); if(accMLO->dataIs3D) AccUtilities::powerClass(gridSize,POWERCLASS_BLOCK_SIZE, ~accMLO->transformer1.fouriers, ~spectrumAndXi2, accMLO->transformer1.fouriers.getSize(), spectrumAndXi2.getSize()-1, accMLO->transformer1.xFSize, accMLO->transformer1.yFSize, accMLO->transformer1.zFSize, (baseMLO->image_current_size[optics_group]/2)+1, // note: NOT baseMLO->image_full_size[optics_group]/2+1 &(~spectrumAndXi2)[spectrumAndXi2.getSize()-1]); // last element is the hihgres_Xi2 else AccUtilities::powerClass(gridSize,POWERCLASS_BLOCK_SIZE, ~accMLO->transformer1.fouriers, ~spectrumAndXi2, accMLO->transformer1.fouriers.getSize(), spectrumAndXi2.getSize()-1, accMLO->transformer1.xFSize, accMLO->transformer1.yFSize, accMLO->transformer1.zFSize, (baseMLO->image_current_size[optics_group]/2)+1, // note: NOT baseMLO->image_full_size[optics_group]/2+1 &(~spectrumAndXi2)[spectrumAndXi2.getSize()-1]); // last element is the hihgres_Xi2 LAUNCH_PRIVATE_ERROR(cudaGetLastError(),accMLO->errorStatus); spectrumAndXi2.streamSync(); spectrumAndXi2.cpToHost(); spectrumAndXi2.streamSync(); op.power_img.at(img_id).resize(baseMLO->image_full_size[optics_group]/2 + 1); for (int i = 0; i<(spectrumAndXi2.getSize()-1); i ++) op.power_img.at(img_id).data[i] = spectrumAndXi2[i]; op.highres_Xi2_img.at(img_id) = spectrumAndXi2[spectrumAndXi2.getSize()-1]; } else { op.highres_Xi2_img.at(img_id) = 0.; } CTOC(accMLO->timer,"powerClass"); Fctf.resize(Fimg); // Now calculate the actual CTF if (baseMLO->do_ctf_correction) { if (accMLO->dataIs3D) { Image Ictf; if (baseMLO->do_parallel_disc_io) { CTIC(accMLO->timer,"CTFRead3D_disk"); // Read CTF-image from disc FileName fn_ctf; if (!baseMLO->mydata.getImageNameOnScratch(part_id, img_id, fn_ctf, true)) { std::istringstream split(baseMLO->exp_fn_ctf); // Get the right line in the exp_fn_img string for (int i = 0; i <= my_metadata_offset; i++) getline(split, fn_ctf); } Ictf.read(fn_ctf); CTOC(accMLO->timer,"CTFRead3D_disk"); } else { CTIC(accMLO->timer,"CTFRead3D_array"); // Unpack the CTF-image from the exp_imagedata array Ictf().resize(baseMLO->image_full_size[optics_group], baseMLO->image_full_size[optics_group], baseMLO->image_full_size[optics_group]); FOR_ALL_DIRECT_ELEMENTS_IN_ARRAY3D(Ictf()) { DIRECT_A3D_ELEM(Ictf(), k, i, j) = DIRECT_A3D_ELEM(baseMLO->exp_imagedata, baseMLO->image_full_size[optics_group] + k, i, j); } CTOC(accMLO->timer,"CTFRead3D_array"); } // Set the CTF-image in Fctf CTIC(accMLO->timer,"CTFSet3D_array"); // If there is a redundant half, get rid of it if (XSIZE(Ictf()) == YSIZE(Ictf())) { Ictf().setXmippOrigin(); FOR_ALL_ELEMENTS_IN_FFTW_TRANSFORM(Fctf) { // Use negative kp,ip and jp indices, because the origin in the ctf_img lies half a pixel to the right of the actual center.... DIRECT_A3D_ELEM(Fctf, k, i, j) = A3D_ELEM(Ictf(), -kp, -ip, -jp); } } // otherwise, just window the CTF to the current resolution else if (XSIZE(Ictf()) == YSIZE(Ictf()) / 2 + 1) { windowFourierTransform(Ictf(), Fctf, YSIZE(Fctf)); } // if dimensions are neither cubical nor FFTW, stop else { REPORT_ERROR("3D CTF volume must be either cubical or adhere to FFTW format!"); } CTOC(accMLO->timer,"CTFSet3D_array"); } else { CTIC(accMLO->timer,"CTFRead2D"); CTF ctf; ctf.setValuesByGroup( &(baseMLO->mydata).obsModel, optics_group, DIRECT_A2D_ELEM(baseMLO->exp_metadata, my_metadata_offset, METADATA_CTF_DEFOCUS_U), DIRECT_A2D_ELEM(baseMLO->exp_metadata, my_metadata_offset, METADATA_CTF_DEFOCUS_V), DIRECT_A2D_ELEM(baseMLO->exp_metadata, my_metadata_offset, METADATA_CTF_DEFOCUS_ANGLE), DIRECT_A2D_ELEM(baseMLO->exp_metadata, my_metadata_offset, METADATA_CTF_BFACTOR), DIRECT_A2D_ELEM(baseMLO->exp_metadata, my_metadata_offset, METADATA_CTF_KFACTOR), DIRECT_A2D_ELEM(baseMLO->exp_metadata, my_metadata_offset, METADATA_CTF_PHASE_SHIFT)); ctf.getFftwImage(Fctf, baseMLO->image_full_size[optics_group], baseMLO->image_full_size[optics_group], my_pixel_size, baseMLO->ctf_phase_flipped, baseMLO->only_flip_phases, baseMLO->intact_ctf_first_peak, true, baseMLO->do_ctf_padding); CTOC(accMLO->timer,"CTFRead2D"); } } else { Fctf.initConstant(1.); } CTOC(accMLO->timer,"ctfCorr"); CTIC(accMLO->timer,"selfApplyBeamTilt"); baseMLO->mydata.obsModel.demodulatePhase(optics_group, Fimg); baseMLO->mydata.obsModel.divideByMtf(optics_group, Fimg); CTOC(accMLO->timer,"selfApplyBeamTilt"); // Store Fimg and Fctf op.Fimg.at(img_id) = Fimg; op.Fctf.at(img_id) = Fctf; // If we're doing multibody refinement, now subtract projections of the other bodies from both the masked and the unmasked particle if (baseMLO->mymodel.nr_bodies > 1) { MultidimArray Fsum_obody; Fsum_obody.initZeros(Fimg); for (int obody = 0; obody < baseMLO->mymodel.nr_bodies; obody++) { if (obody != ibody) // Only subtract if other body is not this body.... { // Get the right metadata int ocol_rot = 0 + METADATA_LINE_LENGTH_BEFORE_BODIES + (obody) * METADATA_NR_BODY_PARAMS; int ocol_tilt = 1 + METADATA_LINE_LENGTH_BEFORE_BODIES + (obody) * METADATA_NR_BODY_PARAMS; int ocol_psi = 2 + METADATA_LINE_LENGTH_BEFORE_BODIES + (obody) * METADATA_NR_BODY_PARAMS; int ocol_xoff = 3 + METADATA_LINE_LENGTH_BEFORE_BODIES + (obody) * METADATA_NR_BODY_PARAMS; int ocol_yoff = 4 + METADATA_LINE_LENGTH_BEFORE_BODIES + (obody) * METADATA_NR_BODY_PARAMS; int ocol_zoff = 5 + METADATA_LINE_LENGTH_BEFORE_BODIES + (obody) * METADATA_NR_BODY_PARAMS; //int ocol_norm = 6 + METADATA_LINE_LENGTH_BEFORE_BODIES + (obody) * METADATA_NR_BODY_PARAMS; Matrix2D Aresi, Abody; // Aresi is the residual orientation for this obody Euler_angles2matrix(DIRECT_A2D_ELEM(baseMLO->exp_metadata, my_metadata_offset, ocol_rot), DIRECT_A2D_ELEM(baseMLO->exp_metadata, my_metadata_offset, ocol_tilt), DIRECT_A2D_ELEM(baseMLO->exp_metadata, my_metadata_offset, ocol_psi), Aresi, false); // The real orientation to be applied is the obody transformation applied and the original one Abody = Aori * (baseMLO->mymodel.orient_bodies[obody]).transpose() * baseMLO->A_rot90 * Aresi * baseMLO->mymodel.orient_bodies[obody]; // Apply anisotropic mag and scaling Abody = baseMLO->mydata.obsModel.applyAnisoMag(Abody, optics_group); Abody = baseMLO->mydata.obsModel.applyScaleDifference(Abody, optics_group, baseMLO->mymodel.ori_size, baseMLO->mymodel.pixel_size); // Get the FT of the projection in the right direction MultidimArray FTo; FTo.initZeros(Fimg); // The following line gets the correct pointer to account for overlap in the bodies int oobody = DIRECT_A2D_ELEM(baseMLO->mymodel.pointer_body_overlap, ibody, obody); baseMLO->mymodel.PPref[oobody].get2DFourierTransform(FTo, Abody); /******************************************************************************** * Currently CPU-memory for projectors is not deallocated when doing multibody * due to the previous line. See cpu_ml_optimiser.cpp and cuda_ml_optimiser.cu ********************************************************************************/ // 17May2017: Body is centered at its own COM // move it back to its place in the original particle image Matrix1D other_projected_com(baseMLO->mymodel.data_dim); // Projected COM for this body (using Aori, just like above for ibody and my_projected_com!!!) other_projected_com = Aori * (baseMLO->mymodel.com_bodies[obody]); // This will have made other_projected_com of size 3 again! resize to mymodel.data_dim other_projected_com.resize(baseMLO->mymodel.data_dim); // Do the exact same as was done for the ibody, but DONT selfROUND here, as later phaseShift applied to ibody below!!! other_projected_com -= my_old_offset_ori; // Subtract refined obody-displacement XX(other_projected_com) -= DIRECT_A2D_ELEM(baseMLO->exp_metadata, my_metadata_offset, ocol_xoff); YY(other_projected_com) -= DIRECT_A2D_ELEM(baseMLO->exp_metadata, my_metadata_offset, ocol_yoff); if (baseMLO->mymodel.data_dim == 3) ZZ(other_projected_com) -= DIRECT_A2D_ELEM(baseMLO->exp_metadata, my_metadata_offset, ocol_zoff); // Add the my_old_offset=selfRound(my_old_offset_ori - my_projected_com) already applied to this image for ibody other_projected_com += my_old_offset; shiftImageInFourierTransform(FTo, Faux, (RFLOAT)baseMLO->image_full_size[optics_group], XX(other_projected_com), YY(other_projected_com), (accMLO->dataIs3D) ? ZZ(other_projected_com) : 0.); // Sum the Fourier transforms of all the obodies Fsum_obody += Faux; } // end if obody != ibody } // end for obody // Now that we have all the summed projections of the obodies, apply CTF, masks etc // Apply the CTF to this reference projection if (baseMLO->do_ctf_correction) { if (baseMLO->mydata.obsModel.getCtfPremultiplied(optics_group)) FOR_ALL_DIRECT_ELEMENTS_IN_MULTIDIMARRAY(Fsum_obody) DIRECT_MULTIDIM_ELEM(Fsum_obody, n) *= (DIRECT_MULTIDIM_ELEM(Fctf, n) * DIRECT_MULTIDIM_ELEM(Fctf, n)); else FOR_ALL_DIRECT_ELEMENTS_IN_MULTIDIMARRAY(Fsum_obody) DIRECT_MULTIDIM_ELEM(Fsum_obody, n) *= DIRECT_MULTIDIM_ELEM(Fctf, n); // Also do phase modulation, for beam tilt correction and other asymmetric aberrations baseMLO->mydata.obsModel.demodulatePhase(optics_group, Fsum_obody, true); // true means do_modulate_instead baseMLO->mydata.obsModel.divideByMtf(optics_group, Fsum_obody, true); // true means do_multiply_instead } // Subtract the other-body FT from the current image FT // First the unmasked one, which will be used for reconstruction // Only do this if the flag below is true. Otherwise, use the original particles for reconstruction if (baseMLO->do_reconstruct_subtracted_bodies) op.Fimg_nomask.at(img_id) -= Fsum_obody; // For the masked one, have to mask outside the circular mask to prevent negative values outside the mask in the subtracted image! CenterFFTbySign(Fsum_obody); windowFourierTransform(Fsum_obody, Faux, baseMLO->image_full_size[optics_group]); accMLO->transformer.inverseFourierTransform(Faux, img()); softMaskOutsideMap(img(), my_mask_radius, (RFLOAT)baseMLO->width_mask_edge); // And back to Fourier space now accMLO->transformer.FourierTransform(img(), Faux); windowFourierTransform(Faux, Fsum_obody, baseMLO->image_current_size[optics_group]); CenterFFTbySign(Fsum_obody); // Subtract the other-body FT from the masked exp_Fimgs op.Fimg.at(img_id) -= Fsum_obody; // 23jul17: NEW: as we haven't applied the (nonROUNDED!!) my_refined_ibody_offset yet, do this now in the FourierTransform Faux = op.Fimg.at(img_id); shiftImageInFourierTransform(Faux, op.Fimg.at(img_id), (RFLOAT)baseMLO->image_full_size[optics_group], XX(my_refined_ibody_offset), YY(my_refined_ibody_offset), (accMLO->dataIs3D) ? ZZ(my_refined_ibody_offset) : 0); Faux = op.Fimg_nomask.at(img_id); shiftImageInFourierTransform(Faux, op.Fimg_nomask.at(img_id), (RFLOAT)baseMLO->image_full_size[optics_group], XX(my_refined_ibody_offset), YY(my_refined_ibody_offset), (accMLO->dataIs3D) ? ZZ(my_refined_ibody_offset) : 0); } // end if mymodel.nr_bodies > 1 } // end loop img_id //accMLO->transformer.clear(); #ifdef TIMING if (part_id == baseMLO->exp_my_first_part_id) baseMLO->timer.toc(baseMLO->TIMING_ESP_FT); #endif GTOC(accMLO->timer,"getFourierTransformsAndCtfs"); GATHERGPUTIMINGS(accMLO->timer); } // ---------------------------------------------------------------------------- // ------------------ getAllSquaredDifferencesCoarse -------------------------- // ---------------------------------------------------------------------------- template void getAllSquaredDifferencesCoarse( unsigned exp_ipass, OptimisationParamters &op, SamplingParameters &sp, MlOptimiser *baseMLO, MlClass *accMLO, AccPtr &Mweight, AccPtrFactory ptrFactory, int ibody = 0) { #ifdef TIMING if (op.part_id == baseMLO->exp_my_first_part_id) baseMLO->timer.tic(baseMLO->TIMING_ESP_DIFF1); #endif CUSTOM_ALLOCATOR_REGION_NAME("DIFF_COARSE"); CTIC(accMLO->timer,"diff_pre_gpu"); unsigned long weightsPerPart(baseMLO->mymodel.nr_classes * sp.nr_dir * sp.nr_psi * sp.nr_trans * sp.nr_oversampled_rot * sp.nr_oversampled_trans); std::vector > dummy; std::vector > > dummy2; baseMLO->precalculateShiftedImagesCtfsAndInvSigma2s(false, false, op.part_id, sp.current_oversampling, op.metadata_offset, // inserted SHWS 12112015 sp.itrans_min, sp.itrans_max, op.Fimg, dummy, op.Fctf, dummy2, dummy2, op.local_Fctf, op.local_sqrtXi2, op.local_Minvsigma2); CTOC(accMLO->timer,"diff_pre_gpu"); std::vector< AccProjectorPlan > projectorPlans(0, (CudaCustomAllocator *)accMLO->getAllocator()); //If particle specific sampling plan required if (accMLO->generateProjectionPlanOnTheFly) { CTIC(accMLO->timer,"generateProjectionSetupCoarse"); projectorPlans.resize(baseMLO->mymodel.nr_classes, (CudaCustomAllocator *)accMLO->getAllocator()); for (unsigned long iclass = sp.iclass_min; iclass <= sp.iclass_max; iclass++) { if (baseMLO->mymodel.pdf_class[iclass] > 0.) { Matrix2D MBL, MBR, Aori; if (baseMLO->mymodel.nr_bodies > 1) { // img_id=0 because in multi-body refinement we do not do movie frames! RFLOAT rot_ori = DIRECT_A2D_ELEM(baseMLO->exp_metadata, op.metadata_offset, METADATA_ROT); RFLOAT tilt_ori = DIRECT_A2D_ELEM(baseMLO->exp_metadata, op.metadata_offset, METADATA_TILT); RFLOAT psi_ori = DIRECT_A2D_ELEM(baseMLO->exp_metadata, op.metadata_offset, METADATA_PSI); Euler_angles2matrix(rot_ori, tilt_ori, psi_ori, Aori, false); MBL = Aori * (baseMLO->mymodel.orient_bodies[ibody]).transpose() * baseMLO->A_rot90; MBR = baseMLO->mymodel.orient_bodies[ibody]; } int optics_group = baseMLO->mydata.getOpticsGroup(op.part_id, 0); // get optics group of first image for this particle... Matrix2D mag; mag.initIdentity(3); mag = baseMLO->mydata.obsModel.applyAnisoMag(mag, optics_group); mag = baseMLO->mydata.obsModel.applyScaleDifference(mag, optics_group, baseMLO->mymodel.ori_size, baseMLO->mymodel.pixel_size); if (!mag.isIdentity()) { if (MBL.mdimx == 3 && MBL.mdimx ==3) MBL = mag * MBL; else MBL = mag; } projectorPlans[iclass].setup( baseMLO->sampling, op.directions_prior, op.psi_prior, op.pointer_dir_nonzeroprior, op.pointer_psi_nonzeroprior, NULL, //Mcoarse_significant baseMLO->mymodel.pdf_class, baseMLO->mymodel.pdf_direction, sp.nr_dir, sp.nr_psi, sp.idir_min, sp.idir_max, sp.ipsi_min, sp.ipsi_max, sp.itrans_min, sp.itrans_max, 0, //current_oversampling 1, //nr_oversampled_rot iclass, true, //coarse !IS_NOT_INV, baseMLO->do_skip_align, baseMLO->do_skip_rotate, baseMLO->mymodel.orientational_prior_mode, MBL, MBR ); } } CTOC(accMLO->timer,"generateProjectionSetupCoarse"); } else projectorPlans = accMLO->bundle->coarseProjectionPlans; // Loop only from sp.iclass_min to sp.iclass_max to deal with seed generation in first iteration size_t allWeights_size(0); for (int exp_iclass = sp.iclass_min; exp_iclass <= sp.iclass_max; exp_iclass++) allWeights_size += projectorPlans[exp_iclass].orientation_num * sp.nr_trans*sp.nr_oversampled_trans; AccPtr allWeights = ptrFactory.make(allWeights_size); allWeights.accAlloc(); deviceInitValue(allWeights, 0); // Make sure entire array initialized long int allWeights_pos=0; bool do_CC = (baseMLO->iter == 1 && baseMLO->do_firstiter_cc) || baseMLO->do_always_cc; for (int img_id = 0; img_id < sp.nr_images; img_id++) { int my_metadata_offset = op.metadata_offset + img_id; long int group_id = baseMLO->mydata.getGroupId(op.part_id, img_id); RFLOAT my_pixel_size = baseMLO->mydata.getImagePixelSize(op.part_id, img_id); int optics_group = baseMLO->mydata.getOpticsGroup(op.part_id, img_id); unsigned long image_size = op.local_Minvsigma2[img_id].nzyxdim; bool ctf_premultiplied = baseMLO->mydata.obsModel.getCtfPremultiplied(optics_group); /*==================================== Generate Translations ======================================*/ CTIC(accMLO->timer,"translation_1"); long unsigned translation_num((sp.itrans_max - sp.itrans_min + 1) * sp.nr_oversampled_trans); // here we introduce offsets for the trans_ and img_ in an array as it is more efficient to // copy one big array to/from GPU rather than four small arrays size_t trans_x_offset = 0*(size_t)translation_num; size_t trans_y_offset = 1*(size_t)translation_num; size_t trans_z_offset = 2*(size_t)translation_num; size_t img_re_offset = 0*(size_t)image_size; size_t img_im_offset = 1*(size_t)image_size; AccPtr Fimg_ = ptrFactory.make((size_t)image_size*2); AccPtr trans_xyz = ptrFactory.make((size_t)translation_num*3); Fimg_.allAlloc(); trans_xyz.allAlloc(); std::vector oversampled_translations_x, oversampled_translations_y, oversampled_translations_z; for (long int itrans = 0; itrans < translation_num; itrans++) { baseMLO->sampling.getTranslationsInPixel(itrans, 0, my_pixel_size, oversampled_translations_x, oversampled_translations_y, oversampled_translations_z, (baseMLO->do_helical_refine) && (! baseMLO->ignore_helical_symmetry)); RFLOAT xshift = 0., yshift = 0., zshift = 0.; xshift = oversampled_translations_x[0]; yshift = oversampled_translations_y[0]; if (accMLO->dataIs3D) zshift = oversampled_translations_z[0]; if ( (baseMLO->do_helical_refine) && (! baseMLO->ignore_helical_symmetry) ) { RFLOAT rot_deg = DIRECT_A2D_ELEM(baseMLO->exp_metadata, my_metadata_offset, METADATA_ROT); RFLOAT tilt_deg = DIRECT_A2D_ELEM(baseMLO->exp_metadata, my_metadata_offset, METADATA_TILT); RFLOAT psi_deg = DIRECT_A2D_ELEM(baseMLO->exp_metadata,my_metadata_offset, METADATA_PSI); transformCartesianAndHelicalCoords(xshift, yshift, zshift, xshift, yshift, zshift, rot_deg, tilt_deg, psi_deg, (accMLO->dataIs3D) ? (3) : (2), HELICAL_TO_CART_COORDS); } trans_xyz[trans_x_offset+itrans] = -2 * PI * xshift / (double)baseMLO->image_full_size[optics_group]; trans_xyz[trans_y_offset+itrans] = -2 * PI * yshift / (double)baseMLO->image_full_size[optics_group]; trans_xyz[trans_z_offset+itrans] = -2 * PI * zshift / (double)baseMLO->image_full_size[optics_group]; } XFLOAT scale_correction = baseMLO->do_scale_correction ? baseMLO->mymodel.scale_correction[group_id] : 1; int exp_current_image_size = (baseMLO->strict_highres_exp > 0.|| baseMLO->adaptive_oversampling > 0) ? baseMLO->image_coarse_size[optics_group] : baseMLO->image_current_size[optics_group]; MultidimArray Fimg; windowFourierTransform(op.Fimg[img_id], Fimg, exp_current_image_size); for (unsigned long i = 0; i < image_size; i ++) { XFLOAT pixel_correction = 1.0/scale_correction; if (baseMLO->do_ctf_correction && fabs(op.local_Fctf[img_id].data[i]) > 1e-8) { // if ctf[i]==0, pix_corr[i] becomes NaN. // However, corr_img[i]==0, so pix-diff in kernel==0. // This is ok since originally, pix-diff==Img.real^2 + Img.imag^2, // which is ori-indep, and we subtract min_diff form ALL orients. if (baseMLO->refs_are_ctf_corrected) { pixel_correction /= op.local_Fctf[img_id].data[i]; } if (ctf_premultiplied) { pixel_correction /= op.local_Fctf[img_id].data[i]; } } Fimg_[img_re_offset+i] = Fimg.data[i].real * pixel_correction; Fimg_[img_im_offset+i] = Fimg.data[i].imag * pixel_correction; } trans_xyz.cpToDevice(); Fimg_.cpToDevice(); CTOC(accMLO->timer,"translation_1"); // To speed up calculation, several image-corrections are grouped into a single pixel-wise "filter", or image-correciton AccPtr corr_img = ptrFactory.make((size_t)image_size); corr_img.allAlloc(); buildCorrImage(baseMLO,op,corr_img,img_id,group_id, ctf_premultiplied); corr_img.cpToDevice(); deviceInitValue(allWeights, (XFLOAT) (op.highres_Xi2_img[img_id] / 2.)); allWeights_pos = 0; for (int exp_iclass = sp.iclass_min; exp_iclass <= sp.iclass_max; exp_iclass++) DEBUG_HANDLE_ERROR(cudaStreamSynchronize(accMLO->classStreams[exp_iclass])); DEBUG_HANDLE_ERROR(cudaStreamSynchronize(cudaStreamPerThread)); for (unsigned long iclass = sp.iclass_min; iclass <= sp.iclass_max; iclass++) { int iproj; if (baseMLO->mymodel.nr_bodies > 1) iproj = ibody; else iproj = iclass; if ( projectorPlans[iclass].orientation_num > 0 ) { AccProjectorKernel projKernel = AccProjectorKernel::makeKernel( accMLO->bundle->projectors[iproj], op.local_Minvsigma2[img_id].xdim, op.local_Minvsigma2[img_id].ydim, op.local_Minvsigma2[img_id].zdim, op.local_Minvsigma2[img_id].xdim-1); runDiff2KernelCoarse( projKernel, &(~trans_xyz)[trans_x_offset], //~trans_x, &(~trans_xyz)[trans_y_offset], //~trans_y, &(~trans_xyz)[trans_z_offset], //~trans_z, ~corr_img, &(~Fimg_)[img_re_offset], //~Fimg_real, &(~Fimg_)[img_im_offset], //~Fimg_imag, ~projectorPlans[iclass].eulers, &(~allWeights)[allWeights_pos], (XFLOAT) op.local_sqrtXi2[img_id], projectorPlans[iclass].orientation_num, translation_num, image_size, accMLO->classStreams[iclass], do_CC, accMLO->dataIs3D); mapAllWeightsToMweights( ~projectorPlans[iclass].iorientclasses, &(~allWeights)[allWeights_pos], &(~Mweight)[img_id*weightsPerPart], projectorPlans[iclass].orientation_num, translation_num, accMLO->classStreams[iclass] ); /*==================================== Retrieve Results ======================================*/ allWeights_pos += projectorPlans[iclass].orientation_num*translation_num; } } for (unsigned long exp_iclass = sp.iclass_min; exp_iclass <= sp.iclass_max; exp_iclass++) DEBUG_HANDLE_ERROR(cudaStreamSynchronize(accMLO->classStreams[exp_iclass])); DEBUG_HANDLE_ERROR(cudaStreamSynchronize(cudaStreamPerThread)); // does not appear to be NEEDED FOR NON-BLOCKING CLASS STREAMS in tests, but should be to sync against classStreams op.min_diff2[img_id] = AccUtilities::getMinOnDevice(allWeights); } // end loop img_id #ifdef TIMING if (op.part_id == baseMLO->exp_my_first_part_id) baseMLO->timer.toc(baseMLO->TIMING_ESP_DIFF1); #endif } // ---------------------------------------------------------------------------- // -------------------- getAllSquaredDifferencesFine -------------------------- // ---------------------------------------------------------------------------- template void getAllSquaredDifferencesFine( unsigned exp_ipass, OptimisationParamters &op, SamplingParameters &sp, MlOptimiser *baseMLO, MlClass *accMLO, std::vector &FinePassWeights, std::vector > &FPCMasks, std::vector &FineProjectionData, AccPtrFactory ptrFactory, int ibody, std::vector &bundleD2) { #ifdef TIMING if (op.part_id == baseMLO->exp_my_first_part_id) baseMLO->timer.tic(baseMLO->TIMING_ESP_DIFF2); #endif CUSTOM_ALLOCATOR_REGION_NAME("DIFF_FINE"); CTIC(accMLO->timer,"diff_pre_gpu"); CTIC(accMLO->timer,"precalculateShiftedImagesCtfsAndInvSigma2s"); std::vector > dummy; std::vector > > dummy2; baseMLO->precalculateShiftedImagesCtfsAndInvSigma2s(false, false, op.part_id, sp.current_oversampling, op.metadata_offset, // inserted SHWS 12112015 sp.itrans_min, sp.itrans_max, op.Fimg, dummy, op.Fctf, dummy2, dummy2, op.local_Fctf, op.local_sqrtXi2, op.local_Minvsigma2); CTOC(accMLO->timer,"precalculateShiftedImagesCtfsAndInvSigma2s"); CTOC(accMLO->timer,"diff_pre_gpu"); /*======================================================================================= Particle Iteration =========================================================================================*/ for (int img_id = 0; img_id < sp.nr_images; img_id++) { // Reset size without de-allocating: we will append everything significant within // the current allocation and then re-allocate the then determined (smaller) volume int my_metadata_offset = op.metadata_offset + img_id; long int group_id = baseMLO->mydata.getGroupId(op.part_id, img_id); RFLOAT my_pixel_size = baseMLO->mydata.getImagePixelSize(op.part_id, img_id); int optics_group = baseMLO->mydata.getOpticsGroup(op.part_id, img_id); unsigned long image_size = op.local_Minvsigma2[img_id].nzyxdim; bool ctf_premultiplied = baseMLO->mydata.obsModel.getCtfPremultiplied(optics_group); MultidimArray Fref; Fref.resize(op.local_Minvsigma2[img_id]); /*==================================== Generate Translations ======================================*/ CTIC(accMLO->timer,"translation_2"); long unsigned translation_num((sp.itrans_max - sp.itrans_min + 1) * sp.nr_oversampled_trans); // here we introduce offsets for the trans_ and img_ in an array as it is more efficient to // copy one big array to/from GPU rather than four small arrays size_t trans_x_offset = 0*(size_t)translation_num; size_t trans_y_offset = 1*(size_t)translation_num; size_t trans_z_offset = 2*(size_t)translation_num; size_t img_re_offset = 0*(size_t)image_size; size_t img_im_offset = 1*(size_t)image_size; AccPtr Fimg_ = ptrFactory.make((size_t)image_size*2); AccPtr trans_xyz = ptrFactory.make((size_t)translation_num*3); Fimg_.allAlloc(); trans_xyz.allAlloc(); std::vector oversampled_translations_x, oversampled_translations_y, oversampled_translations_z; int j = 0; for (long int itrans = 0; itrans < (sp.itrans_max - sp.itrans_min + 1); itrans++) { baseMLO->sampling.getTranslationsInPixel(itrans, baseMLO->adaptive_oversampling, my_pixel_size, oversampled_translations_x, oversampled_translations_y, oversampled_translations_z, (baseMLO->do_helical_refine) && (! baseMLO->ignore_helical_symmetry)); for (long int iover_trans = 0; iover_trans < oversampled_translations_x.size(); iover_trans++) { RFLOAT xshift = 0., yshift = 0., zshift = 0.; xshift = oversampled_translations_x[iover_trans]; yshift = oversampled_translations_y[iover_trans]; if (accMLO->dataIs3D) zshift = oversampled_translations_z[iover_trans]; if ( (baseMLO->do_helical_refine) && (! baseMLO->ignore_helical_symmetry) ) { RFLOAT rot_deg = DIRECT_A2D_ELEM(baseMLO->exp_metadata, my_metadata_offset, METADATA_ROT); RFLOAT tilt_deg = DIRECT_A2D_ELEM(baseMLO->exp_metadata, my_metadata_offset, METADATA_TILT); RFLOAT psi_deg = DIRECT_A2D_ELEM(baseMLO->exp_metadata, my_metadata_offset, METADATA_PSI); transformCartesianAndHelicalCoords(xshift, yshift, zshift, xshift, yshift, zshift, rot_deg, tilt_deg, psi_deg, (accMLO->dataIs3D) ? (3) : (2), HELICAL_TO_CART_COORDS); } trans_xyz[trans_x_offset+j] = -2 * PI * xshift / (double)baseMLO->image_full_size[optics_group]; trans_xyz[trans_y_offset+j] = -2 * PI * yshift / (double)baseMLO->image_full_size[optics_group]; trans_xyz[trans_z_offset+j] = -2 * PI * zshift / (double)baseMLO->image_full_size[optics_group]; j ++; } } XFLOAT scale_correction = baseMLO->do_scale_correction ? baseMLO->mymodel.scale_correction[group_id] : 1; int exp_current_image_size = (baseMLO->strict_highres_exp > 0.) ? baseMLO->image_coarse_size[optics_group] : baseMLO->image_current_size[optics_group]; MultidimArray Fimg, Fimg_nomask; windowFourierTransform(op.Fimg[img_id], Fimg, exp_current_image_size); for (unsigned long i = 0; i < image_size; i ++) { XFLOAT pixel_correction = 1.0/scale_correction; if (baseMLO->do_ctf_correction && fabs(op.local_Fctf[img_id].data[i]) > 1e-8) { // if ctf[i]==0, pix_corr[i] becomes NaN. // However, corr_img[i]==0, so pix-diff in kernel==0. // This is ok since originally, pix-diff==Img.real^2 + Img.imag^2, // which is ori-indep, and we subtract min_diff form ALL orients. if (baseMLO->refs_are_ctf_corrected) { pixel_correction /= op.local_Fctf[img_id].data[i]; } if (ctf_premultiplied) { pixel_correction /= op.local_Fctf[img_id].data[i]; } } Fimg_[img_re_offset+i] = Fimg.data[i].real * pixel_correction; Fimg_[img_im_offset+i] = Fimg.data[i].imag * pixel_correction; } CTOC(accMLO->timer,"translation_2"); CTIC(accMLO->timer,"kernel_init_1"); AccPtr corr_img = ptrFactory.make((size_t)image_size); corr_img.allAlloc(); buildCorrImage(baseMLO,op,corr_img,img_id,group_id, ctf_premultiplied); trans_xyz.cpToDevice(); Fimg_.cpToDevice(); corr_img.cpToDevice(); CTOC(accMLO->timer,"kernel_init_1"); std::vector< AccPtr > eulers((size_t)(sp.iclass_max-sp.iclass_min+1), ptrFactory.make()); AccPtrBundle AllEulers = ptrFactory.makeBundle(); AllEulers.setSize(9*FineProjectionData[img_id].orientationNumAllClasses*sizeof(XFLOAT)); AllEulers.allAlloc(); unsigned long newDataSize(0); for (unsigned long exp_iclass = sp.iclass_min; exp_iclass <= sp.iclass_max; exp_iclass++) { FPCMasks[img_id][exp_iclass].weightNum=0; if ((baseMLO->mymodel.pdf_class[exp_iclass] > 0.) && (FineProjectionData[img_id].class_entries[exp_iclass] > 0) ) { // use "slice" constructor with class-specific parameters to retrieve a temporary ProjectionParams with data for this class ProjectionParams thisClassProjectionData( FineProjectionData[img_id], FineProjectionData[img_id].class_idx[exp_iclass], FineProjectionData[img_id].class_idx[exp_iclass]+FineProjectionData[img_id].class_entries[exp_iclass]); // since we retrieved the ProjectionParams for *the whole* class the orientation_num is also equal. thisClassProjectionData.orientation_num[0] = FineProjectionData[img_id].class_entries[exp_iclass]; long unsigned orientation_num = thisClassProjectionData.orientation_num[0]; if(orientation_num==0) continue; CTIC(accMLO->timer,"pair_list_1"); long unsigned significant_num(0); long int nr_over_orient = baseMLO->sampling.oversamplingFactorOrientations(sp.current_oversampling); long int nr_over_trans = baseMLO->sampling.oversamplingFactorTranslations(sp.current_oversampling); // Prepare the mask of the weight-array for this class if (FPCMasks[img_id][exp_iclass].weightNum==0) FPCMasks[img_id][exp_iclass].firstPos = newDataSize; long unsigned ihidden(0); std::vector< long unsigned > iover_transes, ihiddens; for (long int itrans = sp.itrans_min; itrans <= sp.itrans_max; itrans++, ihidden++) { for (long int iover_trans = 0; iover_trans < sp.nr_oversampled_trans; iover_trans++) { ihiddens.push_back(ihidden); iover_transes.push_back(iover_trans); } } int chunkSize(0); if(accMLO->dataIs3D) chunkSize = D2F_CHUNK_DATA3D; else if(accMLO->refIs3D) chunkSize = D2F_CHUNK_DATA3D; else chunkSize = D2F_CHUNK_2D; // Do more significance checks on translations and create jobDivision significant_num = makeJobsForDiff2Fine( op, sp, // alot of different type inputs... orientation_num, translation_num, thisClassProjectionData, iover_transes, ihiddens, nr_over_orient, nr_over_trans, img_id, FinePassWeights[img_id], FPCMasks[img_id][exp_iclass], // ..and output into index-arrays mask... chunkSize); // ..based on a given maximum chunk-size // extend size by number of significants found this class newDataSize += significant_num; FPCMasks[img_id][exp_iclass].weightNum = significant_num; FPCMasks[img_id][exp_iclass].lastPos = FPCMasks[img_id][exp_iclass].firstPos + significant_num; CTOC(accMLO->timer,"pair_list_1"); CTIC(accMLO->timer,"IndexedArrayMemCp2"); bundleD2[img_id].pack(FPCMasks[img_id][exp_iclass].jobOrigin); bundleD2[img_id].pack(FPCMasks[img_id][exp_iclass].jobExtent); CTOC(accMLO->timer,"IndexedArrayMemCp2"); Matrix2D MBL, MBR; if (baseMLO->mymodel.nr_bodies > 1) { Matrix2D Aori; RFLOAT rot_ori = DIRECT_A2D_ELEM(baseMLO->exp_metadata, op.metadata_offset, METADATA_ROT); RFLOAT tilt_ori = DIRECT_A2D_ELEM(baseMLO->exp_metadata, op.metadata_offset, METADATA_TILT); RFLOAT psi_ori = DIRECT_A2D_ELEM(baseMLO->exp_metadata, op.metadata_offset, METADATA_PSI); Euler_angles2matrix(rot_ori, tilt_ori, psi_ori, Aori, false); MBL = Aori * (baseMLO->mymodel.orient_bodies[ibody]).transpose() * baseMLO->A_rot90; MBR = baseMLO->mymodel.orient_bodies[ibody]; } CTIC(accMLO->timer,"generateEulerMatrices"); eulers[exp_iclass-sp.iclass_min].setSize(9*FineProjectionData[img_id].class_entries[exp_iclass]); eulers[exp_iclass-sp.iclass_min].hostAlloc(); Matrix2D mag; mag.initIdentity(3); mag = baseMLO->mydata.obsModel.applyAnisoMag(mag, optics_group); mag = baseMLO->mydata.obsModel.applyScaleDifference(mag, optics_group, baseMLO->mymodel.ori_size, baseMLO->mymodel.pixel_size); if (!mag.isIdentity()) { if (MBL.mdimx == 3 && MBL.mdimx ==3) MBL = mag * MBL; else MBL = mag; } generateEulerMatrices( thisClassProjectionData, &(eulers[exp_iclass-sp.iclass_min])[0], true, MBL, MBR); AllEulers.pack(eulers[exp_iclass-sp.iclass_min]); CTOC(accMLO->timer,"generateEulerMatrices"); } } bundleD2[img_id].cpToDevice(); AllEulers.cpToDevice(); FinePassWeights[img_id].rot_id.cpToDevice(); //FIXME this is not used FinePassWeights[img_id].rot_idx.cpToDevice(); FinePassWeights[img_id].trans_idx.cpToDevice(); for (unsigned long exp_iclass = sp.iclass_min; exp_iclass <= sp.iclass_max; exp_iclass++) DEBUG_HANDLE_ERROR(cudaStreamSynchronize(accMLO->classStreams[exp_iclass])); DEBUG_HANDLE_ERROR(cudaStreamSynchronize(cudaStreamPerThread)); for (unsigned long iclass = sp.iclass_min; iclass <= sp.iclass_max; iclass++) { int iproj; if (baseMLO->mymodel.nr_bodies > 1) iproj = ibody; else iproj = iclass; if ((baseMLO->mymodel.pdf_class[iclass] > 0.) && (FineProjectionData[img_id].class_entries[iclass] > 0) ) { long unsigned orientation_num = FineProjectionData[img_id].class_entries[iclass]; if(orientation_num==0) continue; long unsigned significant_num(FPCMasks[img_id][iclass].weightNum); if(significant_num==0) continue; CTIC(accMLO->timer,"Diff2MakeKernel"); AccProjectorKernel projKernel = AccProjectorKernel::makeKernel( accMLO->bundle->projectors[iproj], op.local_Minvsigma2[img_id].xdim, op.local_Minvsigma2[img_id].ydim, op.local_Minvsigma2[img_id].zdim, op.local_Minvsigma2[img_id].xdim-1); CTOC(accMLO->timer,"Diff2MakeKernel"); // Use the constructed mask to construct a partial class-specific input IndexedDataArray thisClassFinePassWeights(FinePassWeights[img_id],FPCMasks[img_id][iclass]); CTIC(accMLO->timer,"Diff2CALL"); runDiff2KernelFine( projKernel, ~corr_img, &(~Fimg_)[img_re_offset], //~Fimg_real, &(~Fimg_)[img_im_offset], //~Fimg_imag, &(~trans_xyz)[trans_x_offset], //~trans_x, &(~trans_xyz)[trans_y_offset], //~trans_y, &(~trans_xyz)[trans_z_offset], //~trans_z, ~eulers[iclass-sp.iclass_min], ~thisClassFinePassWeights.rot_id, ~thisClassFinePassWeights.rot_idx, ~thisClassFinePassWeights.trans_idx, ~FPCMasks[img_id][iclass].jobOrigin, ~FPCMasks[img_id][iclass].jobExtent, ~thisClassFinePassWeights.weights, op, baseMLO, orientation_num, translation_num, significant_num, image_size, img_id, iclass, accMLO->classStreams[iclass], FPCMasks[img_id][iclass].jobOrigin.getSize(), ((baseMLO->iter == 1 && baseMLO->do_firstiter_cc) || baseMLO->do_always_cc), accMLO->dataIs3D ); // DEBUG_HANDLE_ERROR(cudaStreamSynchronize(cudaStreamPerThread)); CTOC(accMLO->timer,"Diff2CALL"); } // end if class significant } // end loop iclass for (unsigned long exp_iclass = sp.iclass_min; exp_iclass <= sp.iclass_max; exp_iclass++) DEBUG_HANDLE_ERROR(cudaStreamSynchronize(accMLO->classStreams[exp_iclass])); DEBUG_HANDLE_ERROR(cudaStreamSynchronize(cudaStreamPerThread)); FinePassWeights[img_id].setDataSize( newDataSize ); CTIC(accMLO->timer,"collect_data_1"); if(baseMLO->adaptive_oversampling!=0) { op.min_diff2[img_id] = (RFLOAT) AccUtilities::getMinOnDevice(FinePassWeights[img_id].weights); } CTOC(accMLO->timer,"collect_data_1"); // std::cerr << " fine pass minweight = " << op.min_diff2[img_id] << std::endl; }// end loop img_id #ifdef TIMING if (op.part_id == baseMLO->exp_my_first_part_id) baseMLO->timer.toc(baseMLO->TIMING_ESP_DIFF2); #endif } // ---------------------------------------------------------------------------- // -------------- convertAllSquaredDifferencesToWeights ----------------------- // ---------------------------------------------------------------------------- template void convertAllSquaredDifferencesToWeights(unsigned exp_ipass, OptimisationParamters &op, SamplingParameters &sp, MlOptimiser *baseMLO, MlClass *accMLO, std::vector< IndexedDataArray > &PassWeights, std::vector< std::vector< IndexedDataArrayMask > > &FPCMasks, AccPtr &Mweight, // FPCMasks = Fine-Pass Class-Masks AccPtrFactory ptrFactory, int ibody) { #ifdef TIMING if (op.part_id == baseMLO->exp_my_first_part_id) { if (exp_ipass == 0) baseMLO->timer.tic(baseMLO->TIMING_ESP_WEIGHT1); else baseMLO->timer.tic(baseMLO->TIMING_ESP_WEIGHT2); } #endif RFLOAT my_sigma2_offset = (baseMLO->mymodel.nr_bodies > 1) ? baseMLO->mymodel.sigma_offset_bodies[ibody]*baseMLO->mymodel.sigma_offset_bodies[ibody] : baseMLO->mymodel.sigma2_offset; // Ready the "prior-containers" for all classes (remake every img_id) AccPtr pdf_orientation = ptrFactory.make((size_t)((sp.iclass_max-sp.iclass_min+1) * sp.nr_dir * sp.nr_psi)); AccPtr pdf_orientation_zeros = ptrFactory.make(pdf_orientation.getSize()); AccPtr pdf_offset = ptrFactory.make((size_t)((sp.iclass_max-sp.iclass_min+1)*sp.nr_trans)); AccPtr pdf_offset_zeros = ptrFactory.make(pdf_offset.getSize()); pdf_orientation.accAlloc(); pdf_orientation_zeros.accAlloc(); pdf_offset.allAlloc(); pdf_offset_zeros.allAlloc(); CUSTOM_ALLOCATOR_REGION_NAME("CASDTW_PDF"); // pdf_orientation is img_id-independent, so we keep it above img_id scope CTIC(accMLO->timer,"get_orient_priors"); AccPtr pdfs = ptrFactory.make((size_t)((sp.iclass_max-sp.iclass_min+1) * sp.nr_dir * sp.nr_psi)); pdfs.allAlloc(); for (unsigned long exp_iclass = sp.iclass_min; exp_iclass <= sp.iclass_max; exp_iclass++) for (unsigned long idir = sp.idir_min, iorientclass = (exp_iclass-sp.iclass_min) * sp.nr_dir * sp.nr_psi; idir <=sp.idir_max; idir++) for (unsigned long ipsi = sp.ipsi_min; ipsi <= sp.ipsi_max; ipsi++, iorientclass++) { RFLOAT pdf(0); if (baseMLO->do_skip_align || baseMLO->do_skip_rotate) pdf = baseMLO->mymodel.pdf_class[exp_iclass]; else if (baseMLO->mymodel.orientational_prior_mode == NOPRIOR) pdf = DIRECT_MULTIDIM_ELEM(baseMLO->mymodel.pdf_direction[exp_iclass], idir); else pdf = op.directions_prior[idir] * op.psi_prior[ipsi]; pdfs[iorientclass] = pdf; } pdfs.cpToDevice(); AccUtilities::initOrientations(pdfs, pdf_orientation, pdf_orientation_zeros); CTOC(accMLO->timer,"get_orient_priors"); if(exp_ipass==0 || baseMLO->adaptive_oversampling!=0) { op.sum_weight.clear(); op.sum_weight.resize(sp.nr_images, (RFLOAT)(sp.nr_images)); op.max_weight.clear(); op.max_weight.resize(sp.nr_images, (RFLOAT)-1); } if (exp_ipass==0) op.Mcoarse_significant.resizeNoCp(1,1,sp.nr_images, XSIZE(op.Mweight)); XFLOAT my_significant_weight; op.significant_weight.clear(); op.significant_weight.resize(sp.nr_images, 0.); // loop over all images inside this particle for (int img_id = 0; img_id < sp.nr_images; img_id++) { int my_metadata_offset = op.metadata_offset + img_id; RFLOAT my_pixel_size = baseMLO->mydata.getImagePixelSize(op.part_id, img_id); RFLOAT old_offset_x, old_offset_y, old_offset_z; if (baseMLO->mymodel.nr_bodies > 1) { old_offset_x = old_offset_y = old_offset_z = 0.; } else { old_offset_x = XX(op.old_offset[img_id]); old_offset_y = YY(op.old_offset[img_id]); if (accMLO->dataIs3D) old_offset_z = ZZ(op.old_offset[img_id]); } if ((baseMLO->iter == 1 && baseMLO->do_firstiter_cc) || baseMLO->do_always_cc) { if(exp_ipass==0) { int nr_coarse_weights = (sp.iclass_max-sp.iclass_min+1)*sp.nr_images * sp.nr_dir * sp.nr_psi * sp.nr_trans; PassWeights[img_id].weights.setAccPtr(&(~Mweight)[img_id*nr_coarse_weights]); PassWeights[img_id].weights.setHostPtr(&Mweight[img_id*nr_coarse_weights]); PassWeights[img_id].weights.setSize(nr_coarse_weights); } PassWeights[img_id].weights.doFreeHost=false; std::pair min_pair=AccUtilities::getArgMinOnDevice(PassWeights[img_id].weights); PassWeights[img_id].weights.cpToHost(); DEBUG_HANDLE_ERROR(cudaStreamSynchronize(cudaStreamPerThread)); //Set all device-located weights to zero, and only the smallest one to 1. #ifdef CUDA DEBUG_HANDLE_ERROR(cudaMemsetAsync(~(PassWeights[img_id].weights), 0.f, PassWeights[img_id].weights.getSize()*sizeof(XFLOAT),0)); XFLOAT unity=1; DEBUG_HANDLE_ERROR(cudaMemcpyAsync( &(PassWeights[img_id].weights(min_pair.first) ), &unity, sizeof(XFLOAT), cudaMemcpyHostToDevice, 0)); PassWeights[img_id].weights.cpToHost(); DEBUG_HANDLE_ERROR(cudaStreamSynchronize(cudaStreamPerThread)); #else deviceInitValue(PassWeights[img_id].weights, (XFLOAT)0.0); PassWeights[img_id].weights[min_pair.first] = (XFLOAT)1.0; #endif my_significant_weight = 0.999; DIRECT_A2D_ELEM(baseMLO->exp_metadata, my_metadata_offset, METADATA_NR_SIGN) = (RFLOAT) 1.; if (exp_ipass==0) // TODO better memset, 0 => false , 1 => true for (int ihidden = 0; ihidden < XSIZE(op.Mcoarse_significant); ihidden++) if (DIRECT_A2D_ELEM(op.Mweight, img_id, ihidden) >= my_significant_weight) DIRECT_A2D_ELEM(op.Mcoarse_significant, img_id, ihidden) = true; else DIRECT_A2D_ELEM(op.Mcoarse_significant, img_id, ihidden) = false; else { std::pair max_pair = AccUtilities::getArgMaxOnDevice(PassWeights[img_id].weights); op.max_index[img_id].fineIdx = PassWeights[img_id].ihidden_overs[max_pair.first]; op.max_weight[img_id] = max_pair.second; } } else { long int sumRedSize=0; for (unsigned long exp_iclass = sp.iclass_min; exp_iclass <= sp.iclass_max; exp_iclass++) sumRedSize+= (exp_ipass==0) ? ceilf((float)(sp.nr_dir*sp.nr_psi)/(float)SUMW_BLOCK_SIZE) : ceil((float)FPCMasks[img_id][exp_iclass].jobNum / (float)SUMW_BLOCK_SIZE); // loop through making translational priors for all classes this img_id - then copy all at once - then loop through kernel calls ( TODO: group kernel calls into one big kernel) CTIC(accMLO->timer,"get_offset_priors"); for (unsigned long exp_iclass = sp.iclass_min; exp_iclass <= sp.iclass_max; exp_iclass++) { RFLOAT myprior_x, myprior_y, myprior_z; if (baseMLO->mymodel.nr_bodies > 1) { myprior_x = myprior_y = myprior_z = 0.; } else if (baseMLO->mymodel.ref_dim == 2 && !baseMLO->do_helical_refine) { myprior_x = XX(baseMLO->mymodel.prior_offset_class[exp_iclass]); myprior_y = YY(baseMLO->mymodel.prior_offset_class[exp_iclass]); } else { myprior_x = XX(op.prior[img_id]); myprior_y = YY(op.prior[img_id]); if (accMLO->dataIs3D) myprior_z = ZZ(op.prior[img_id]); } for (unsigned long itrans = sp.itrans_min; itrans <= sp.itrans_max; itrans++) { // If it is doing helical refinement AND Cartesian vector myprior has a length > 0, transform the vector to its helical coordinates if ( (baseMLO->do_helical_refine) && (! baseMLO->ignore_helical_symmetry)) { RFLOAT mypriors_len2 = myprior_x * myprior_x + myprior_y * myprior_y; if (accMLO->dataIs3D) mypriors_len2 += myprior_z * myprior_z; if (mypriors_len2 > 0.00001) { RFLOAT rot_deg = DIRECT_A2D_ELEM(baseMLO->exp_metadata, my_metadata_offset, METADATA_ROT); RFLOAT tilt_deg = DIRECT_A2D_ELEM(baseMLO->exp_metadata, my_metadata_offset, METADATA_TILT); RFLOAT psi_deg = DIRECT_A2D_ELEM(baseMLO->exp_metadata, my_metadata_offset, METADATA_PSI); transformCartesianAndHelicalCoords(myprior_x, myprior_y, myprior_z, myprior_x, myprior_y, myprior_z, rot_deg, tilt_deg, psi_deg, (accMLO->dataIs3D) ? (3) : (2), CART_TO_HELICAL_COORDS); } } // (For helical refinement) Now offset, old_offset, sampling.translations and myprior are all in helical coordinates // To speed things up, only calculate pdf_offset at the coarse sampling. // That should not matter much, and that way one does not need to calculate all the OversampledTranslations double pdf(0), pdf_zeros(0); RFLOAT offset_x = old_offset_x + baseMLO->sampling.translations_x[itrans]; RFLOAT offset_y = old_offset_y + baseMLO->sampling.translations_y[itrans]; double tdiff2 = 0.; if ( (! baseMLO->do_helical_refine) || (baseMLO->ignore_helical_symmetry) || (accMLO->dataIs3D) ) tdiff2 += (offset_x - myprior_x) * (offset_x - myprior_x); tdiff2 += (offset_y - myprior_y) * (offset_y - myprior_y); if (accMLO->dataIs3D) { RFLOAT offset_z = old_offset_z + baseMLO->sampling.translations_z[itrans]; if ( (! baseMLO->do_helical_refine) || (baseMLO->ignore_helical_symmetry) ) tdiff2 += (offset_z - myprior_z) * (offset_z - myprior_z); } // As of version 3.1, sigma_offsets are in Angstroms! tdiff2 *= my_pixel_size * my_pixel_size; // P(offset|sigma2_offset) // This is the probability of the offset, given the model offset and variance. if (my_sigma2_offset < 0.0001) { pdf_zeros = tdiff2 > 0.; pdf = pdf_zeros ? 0. : 1.; } else { pdf_zeros = false; pdf = tdiff2 / (-2. * my_sigma2_offset); } pdf_offset_zeros[(exp_iclass-sp.iclass_min)*sp.nr_trans + itrans] = pdf_zeros; pdf_offset [(exp_iclass-sp.iclass_min)*sp.nr_trans + itrans] = pdf; } } pdf_offset_zeros.cpToDevice(); pdf_offset.cpToDevice(); CTOC(accMLO->timer,"get_offset_priors"); CTIC(accMLO->timer,"sumweight1"); if(exp_ipass==0) { AccPtr ipartMweight( Mweight, img_id * op.Mweight.xdim + sp.nr_dir * sp.nr_psi * sp.nr_trans * sp.iclass_min, (sp.iclass_max-sp.iclass_min+1) * sp.nr_dir * sp.nr_psi * sp.nr_trans); pdf_offset.streamSync(); AccUtilities::kernel_weights_exponent_coarse( sp.iclass_max-sp.iclass_min+1, pdf_orientation, pdf_orientation_zeros, pdf_offset, pdf_offset_zeros, ipartMweight, (XFLOAT)op.min_diff2[img_id], sp.nr_dir*sp.nr_psi, sp.nr_trans); XFLOAT weights_max = AccUtilities::getMaxOnDevice(ipartMweight); /* * Add 50 since we want to stay away from e^88, which approaches the single precision limit. * We still want as high numbers as possible to utilize most of the single precision span. * Dari - 201710 */ AccUtilities::kernel_exponentiate( ipartMweight, 50 - weights_max); CTIC(accMLO->timer,"sort"); DEBUG_HANDLE_ERROR(cudaStreamSynchronize(cudaStreamPerThread)); unsigned long ipart_length = (sp.iclass_max-sp.iclass_min+1) * sp.nr_dir * sp.nr_psi * sp.nr_trans; size_t offset = img_id * op.Mweight.xdim + sp.nr_dir * sp.nr_psi * sp.nr_trans * sp.iclass_min; if (ipart_length > 1) { //Wrap the current ipart data in a new pointer AccPtr unsorted_ipart( Mweight, offset, ipart_length); AccPtr filtered = ptrFactory.make((size_t)unsorted_ipart.getSize()); CUSTOM_ALLOCATOR_REGION_NAME("CASDTW_SORTSUM"); filtered.deviceAlloc(); #ifdef DEBUG_CUDA if (unsorted_ipart.getSize()==0) ACC_PTR_DEBUG_FATAL("Unsorted array size zero.\n"); // Hopefully Impossible #endif size_t filteredSize = AccUtilities::filterGreaterZeroOnDevice(unsorted_ipart, filtered); if (filteredSize == 0) { std::cerr << std::endl; std::cerr << " fn_img= " << sp.current_img << std::endl; std::cerr << " img_id= " << img_id << " adaptive_fraction= " << baseMLO->adaptive_fraction << std::endl; std::cerr << " min_diff2= " << op.min_diff2[img_id] << std::endl; pdf_orientation.dumpAccToFile("error_dump_pdf_orientation"); pdf_offset.dumpAccToFile("error_dump_pdf_offset"); unsorted_ipart.dumpAccToFile("error_dump_filtered"); std::cerr << "Dumped data: error_dump_pdf_orientation, error_dump_pdf_orientation and error_dump_unsorted." << std::endl; CRITICAL(ERRFILTEREDZERO); // "filteredSize == 0" } filtered.setSize(filteredSize); AccPtr sorted = ptrFactory.make((size_t)filteredSize); AccPtr cumulative_sum = ptrFactory.make((size_t)filteredSize); sorted.accAlloc(); cumulative_sum.accAlloc(); AccUtilities::sortOnDevice(filtered, sorted); AccUtilities::scanOnDevice(sorted, cumulative_sum); CTOC(accMLO->timer,"sort"); op.sum_weight[img_id] = cumulative_sum.getAccValueAt(cumulative_sum.getSize() - 1); long int my_nr_significant_coarse_samples; size_t thresholdIdx = findThresholdIdxInCumulativeSum(cumulative_sum, (1 - baseMLO->adaptive_fraction) * op.sum_weight[img_id]); my_nr_significant_coarse_samples = filteredSize - thresholdIdx; if (my_nr_significant_coarse_samples == 0) { std::cerr << std::endl; std::cerr << " fn_img= " << sp.current_img << std::endl; std::cerr << " img_id= " << img_id << " adaptive_fraction= " << baseMLO->adaptive_fraction << std::endl; std::cerr << " threshold= " << (1 - baseMLO->adaptive_fraction) * op.sum_weight[img_id] << " thresholdIdx= " << thresholdIdx << std::endl; std::cerr << " op.sum_weight[img_id]= " << op.sum_weight[img_id] << std::endl; std::cerr << " min_diff2= " << op.min_diff2[img_id] << std::endl; unsorted_ipart.dumpAccToFile("error_dump_unsorted"); filtered.dumpAccToFile("error_dump_filtered"); sorted.dumpAccToFile("error_dump_sorted"); cumulative_sum.dumpAccToFile("error_dump_cumulative_sum"); std::cerr << "Written error_dump_unsorted, error_dump_filtered, error_dump_sorted, and error_dump_cumulative_sum." << std::endl; CRITICAL(ERRNOSIGNIFS); // "my_nr_significant_coarse_samples == 0" } if (baseMLO->maximum_significants > 0 && my_nr_significant_coarse_samples > baseMLO->maximum_significants) { my_nr_significant_coarse_samples = baseMLO->maximum_significants; thresholdIdx = filteredSize - my_nr_significant_coarse_samples; } XFLOAT significant_weight = sorted.getAccValueAt(thresholdIdx); CTIC(accMLO->timer,"getArgMaxOnDevice"); std::pair max_pair = AccUtilities::getArgMaxOnDevice(unsorted_ipart); CTOC(accMLO->timer,"getArgMaxOnDevice"); op.max_index[img_id].coarseIdx = max_pair.first; op.max_weight[img_id] = max_pair.second; // Store nr_significant_coarse_samples for this particle // Don't do this for multibody, as it would be overwritten for each body, // and we also use METADATA_NR_SIGN in the new safeguard for the gold-standard separation if (baseMLO->mymodel.nr_bodies == 1) DIRECT_A2D_ELEM(baseMLO->exp_metadata, my_metadata_offset, METADATA_NR_SIGN) = (RFLOAT) my_nr_significant_coarse_samples; AccPtr Mcoarse_significant = ptrFactory.make(ipart_length); Mcoarse_significant.setHostPtr(&op.Mcoarse_significant.data[offset]); CUSTOM_ALLOCATOR_REGION_NAME("CASDTW_SIG"); Mcoarse_significant.deviceAlloc(); DEBUG_HANDLE_ERROR(cudaStreamSynchronize(cudaStreamPerThread)); arrayOverThreshold(unsorted_ipart, Mcoarse_significant, significant_weight); Mcoarse_significant.cpToHost(); DEBUG_HANDLE_ERROR(cudaStreamSynchronize(cudaStreamPerThread)); } else if (ipart_length == 1) { op.Mcoarse_significant.data[img_id * op.Mweight.xdim + sp.nr_dir * sp.nr_psi * sp.nr_trans * sp.iclass_min] = 1; } else CRITICAL(ERRNEGLENGTH); } else { for (int exp_iclass = sp.iclass_min; exp_iclass <= sp.iclass_max; exp_iclass++) DEBUG_HANDLE_ERROR(cudaStreamSynchronize(accMLO->classStreams[exp_iclass])); DEBUG_HANDLE_ERROR(cudaStreamSynchronize(cudaStreamPerThread)); XFLOAT weights_max = -std::numeric_limits::max(); pdf_offset.streamSync(); for (unsigned long exp_iclass = sp.iclass_min; exp_iclass <= sp.iclass_max; exp_iclass++) // TODO could use classStreams { if ((baseMLO->mymodel.pdf_class[exp_iclass] > 0.) && (FPCMasks[img_id][exp_iclass].weightNum > 0) ) { // Use the constructed mask to build a partial (class-specific) input // (until now, PassWeights has been an empty placeholder. We now create class-partials pointing at it, and start to fill it with stuff) IndexedDataArray thisClassPassWeights(PassWeights[img_id],FPCMasks[img_id][exp_iclass]); AccPtr pdf_orientation_class = ptrFactory.make(sp.nr_dir*sp.nr_psi), pdf_offset_class = ptrFactory.make(sp.nr_trans); AccPtr pdf_orientation_zeros_class = ptrFactory.make(sp.nr_dir*sp.nr_psi), pdf_offset_zeros_class = ptrFactory.make(sp.nr_trans); pdf_orientation_class .setAccPtr(&((~pdf_orientation) [(exp_iclass-sp.iclass_min)*sp.nr_dir*sp.nr_psi])); pdf_orientation_zeros_class.setAccPtr(&((~pdf_orientation_zeros)[(exp_iclass-sp.iclass_min)*sp.nr_dir*sp.nr_psi])); pdf_offset_class .setAccPtr(&((~pdf_offset) [(exp_iclass-sp.iclass_min)*sp.nr_trans])); pdf_offset_zeros_class .setAccPtr(&((~pdf_offset_zeros) [(exp_iclass-sp.iclass_min)*sp.nr_trans])); thisClassPassWeights.weights.setStream(accMLO->classStreams[exp_iclass]); AccUtilities::kernel_exponentiate_weights_fine( ~pdf_orientation_class, ~pdf_orientation_zeros_class, ~pdf_offset_class, ~pdf_offset_zeros_class, ~thisClassPassWeights.weights, (XFLOAT)op.min_diff2[img_id], sp.nr_oversampled_rot, sp.nr_oversampled_trans, ~thisClassPassWeights.rot_id, ~thisClassPassWeights.trans_idx, ~FPCMasks[img_id][exp_iclass].jobOrigin, ~FPCMasks[img_id][exp_iclass].jobExtent, FPCMasks[img_id][exp_iclass].jobNum, accMLO->classStreams[exp_iclass]); XFLOAT m = AccUtilities::getMaxOnDevice(thisClassPassWeights.weights); if (m > weights_max) weights_max = m; } } for (unsigned long exp_iclass = sp.iclass_min; exp_iclass <= sp.iclass_max; exp_iclass++) // TODO could use classStreams { if ((baseMLO->mymodel.pdf_class[exp_iclass] > 0.) && (FPCMasks[img_id][exp_iclass].weightNum > 0) ) { IndexedDataArray thisClassPassWeights(PassWeights[img_id],FPCMasks[img_id][exp_iclass]); thisClassPassWeights.weights.setStream(accMLO->classStreams[exp_iclass]); /* * Add 50 since we want to stay away from e^88, which approaches the single precision limit. * We still want as high numbers as possible to utilize most of the single precision span. * Dari - 201710 */ AccUtilities::kernel_exponentiate( thisClassPassWeights.weights, 50 - weights_max ); } } op.min_diff2[img_id] += 50 - weights_max; for (unsigned long exp_iclass = sp.iclass_min; exp_iclass <= sp.iclass_max; exp_iclass++) DEBUG_HANDLE_ERROR(cudaStreamSynchronize(accMLO->classStreams[exp_iclass])); DEBUG_HANDLE_ERROR(cudaStreamSynchronize(cudaStreamPerThread)); PassWeights[img_id].weights.cpToHost(); // note that the host-pointer is shared: we're copying to Mweight. CTIC(accMLO->timer,"sort"); DEBUG_HANDLE_ERROR(cudaStreamSynchronize(cudaStreamPerThread)); size_t weightSize = PassWeights[img_id].weights.getSize(); AccPtr sorted = ptrFactory.make((size_t)weightSize); AccPtr cumulative_sum = ptrFactory.make((size_t)weightSize); CUSTOM_ALLOCATOR_REGION_NAME("CASDTW_FINE"); sorted.accAlloc(); cumulative_sum.accAlloc(); AccUtilities::sortOnDevice(PassWeights[img_id].weights, sorted); AccUtilities::scanOnDevice(sorted, cumulative_sum); CTOC(accMLO->timer,"sort"); if(baseMLO->adaptive_oversampling!=0) { op.sum_weight[img_id] = cumulative_sum.getAccValueAt(cumulative_sum.getSize() - 1); if (op.sum_weight[img_id]==0) { std::cerr << std::endl; std::cerr << " fn_img= " << sp.current_img << std::endl; std::cerr << " op.part_id= " << op.part_id << std::endl; std::cerr << " img_id= " << img_id << std::endl; std::cerr << " op.min_diff2[img_id]= " << op.min_diff2[img_id] << std::endl; int group_id = baseMLO->mydata.getGroupId(op.part_id, img_id); std::cerr << " group_id= " << group_id << std::endl; std::cerr << " ml_model.scale_correction[group_id]= " << baseMLO->mymodel.scale_correction[group_id] << std::endl; std::cerr << " exp_significant_weight[img_id]= " << op.significant_weight[img_id] << std::endl; std::cerr << " exp_max_weight[img_id]= " << op.max_weight[img_id] << std::endl; std::cerr << " ml_model.sigma2_noise[group_id]= " << baseMLO->mymodel.sigma2_noise[group_id] << std::endl; CRITICAL(ERRSUMWEIGHTZERO); //"op.sum_weight[img_id]==0" } size_t thresholdIdx = findThresholdIdxInCumulativeSum(cumulative_sum, (1 - baseMLO->adaptive_fraction) * op.sum_weight[img_id]); my_significant_weight = sorted.getAccValueAt(thresholdIdx); CTIC(accMLO->timer,"getArgMaxOnDevice"); std::pair max_pair = AccUtilities::getArgMaxOnDevice(PassWeights[img_id].weights); CTOC(accMLO->timer,"getArgMaxOnDevice"); op.max_index[img_id].fineIdx = PassWeights[img_id].ihidden_overs[max_pair.first]; op.max_weight[img_id] = max_pair.second; } else { my_significant_weight = sorted.getAccValueAt(0); } } CTOC(accMLO->timer,"sumweight1"); } op.significant_weight[img_id] = (RFLOAT) my_significant_weight; } // end loop img_id #ifdef TIMING if (op.part_id == baseMLO->exp_my_first_part_id) { if (exp_ipass == 0) baseMLO->timer.toc(baseMLO->TIMING_ESP_WEIGHT1); else baseMLO->timer.toc(baseMLO->TIMING_ESP_WEIGHT2); } #endif } // ---------------------------------------------------------------------------- // -------------------------- storeWeightedSums ------------------------------- // ---------------------------------------------------------------------------- template void storeWeightedSums(OptimisationParamters &op, SamplingParameters &sp, MlOptimiser *baseMLO, MlClass *accMLO, std::vector &FinePassWeights, std::vector &ProjectionData, std::vector > &FPCMasks, AccPtrFactory ptrFactory, int ibody, std::vector< AccPtrBundle > &bundleSWS) { #ifdef TIMING if (op.part_id == baseMLO->exp_my_first_part_id) baseMLO->timer.tic(baseMLO->TIMING_ESP_WSUM); #endif CTIC(accMLO->timer,"store_init"); // Re-do below because now also want unmasked images AND if (stricht_highres_exp >0.) then may need to resize std::vector > dummy; std::vector > > dummy2; baseMLO->precalculateShiftedImagesCtfsAndInvSigma2s(false, true, op.part_id, sp.current_oversampling, op.metadata_offset, // inserted SHWS 12112015 sp.itrans_min, sp.itrans_max, op.Fimg, op.Fimg_nomask, op.Fctf, dummy2, dummy2, op.local_Fctf, op.local_sqrtXi2, op.local_Minvsigma2); // In doThreadPrecalculateShiftedImagesCtfsAndInvSigma2s() the origin of the op.local_Minvsigma2s was omitted. // Set those back here for (int img_id = 0; img_id < sp.nr_images; img_id++) { int group_id = baseMLO->mydata.getGroupId(op.part_id, img_id); DIRECT_MULTIDIM_ELEM(op.local_Minvsigma2[img_id], 0) = 1. / (baseMLO->sigma2_fudge * DIRECT_A1D_ELEM(baseMLO->mymodel.sigma2_noise[group_id], 0)); } // For norm_correction and scale_correction of all images of this particle std::vector exp_wsum_norm_correction; std::vector exp_wsum_scale_correction_XA, exp_wsum_scale_correction_AA; std::vector thr_wsum_signal_product_spectra, thr_wsum_reference_power_spectra; exp_wsum_norm_correction.resize(sp.nr_images, 0.); std::vector > thr_wsum_sigma2_noise; // for noise estimation (per image) thr_wsum_sigma2_noise.resize(sp.nr_images); // For scale_correction if (baseMLO->do_scale_correction) { exp_wsum_scale_correction_XA.resize(sp.nr_images); exp_wsum_scale_correction_AA.resize(sp.nr_images); thr_wsum_signal_product_spectra.resize(sp.nr_images); thr_wsum_reference_power_spectra.resize(sp.nr_images); } // Possibly different array sizes in different optics groups! for (int img_id = 0; img_id < sp.nr_images; img_id++) { int optics_group = baseMLO->mydata.getOpticsGroup(op.part_id, img_id); thr_wsum_sigma2_noise[img_id].initZeros(baseMLO->image_full_size[optics_group]/2 + 1); if (baseMLO->do_scale_correction) { exp_wsum_scale_correction_AA[img_id] = 0.; exp_wsum_scale_correction_XA[img_id] = 0.; thr_wsum_signal_product_spectra[img_id] = 0.; thr_wsum_reference_power_spectra[img_id] = 0.; } } std::vector oversampled_translations_x, oversampled_translations_y, oversampled_translations_z; bool have_warned_small_scale = false; // Make local copies of weighted sums (except BPrefs, which are too big) // so that there are not too many mutex locks below std::vector > thr_wsum_pdf_direction; std::vector thr_wsum_norm_correction, thr_sumw_group, thr_wsum_pdf_class, thr_wsum_prior_offsetx_class, thr_wsum_prior_offsety_class; RFLOAT thr_wsum_sigma2_offset; MultidimArray thr_metadata, zeroArray; // wsum_pdf_direction is a 1D-array (of length sampling.NrDirections()) for each class zeroArray.initZeros(baseMLO->sampling.NrDirections()); thr_wsum_pdf_direction.resize(baseMLO->mymodel.nr_classes * baseMLO->mymodel.nr_bodies, zeroArray); // sumw_group is a RFLOAT for each group thr_sumw_group.resize(sp.nr_images, 0.); // wsum_pdf_class is a RFLOAT for each class thr_wsum_pdf_class.resize(baseMLO->mymodel.nr_classes, 0.); if (baseMLO->mymodel.ref_dim == 2) { thr_wsum_prior_offsetx_class.resize(baseMLO->mymodel.nr_classes, 0.); thr_wsum_prior_offsety_class.resize(baseMLO->mymodel.nr_classes, 0.); } // wsum_sigma2_offset is just a RFLOAT thr_wsum_sigma2_offset = 0.; CTOC(accMLO->timer,"store_init"); /*======================================================================================= COLLECT 2 AND SET METADATA =======================================================================================*/ CTIC(accMLO->timer,"collect_data_2"); unsigned long nr_transes = sp.nr_trans*sp.nr_oversampled_trans; unsigned long nr_fake_classes = (sp.iclass_max-sp.iclass_min+1); unsigned long oversamples = sp.nr_oversampled_trans * sp.nr_oversampled_rot; std::vector block_nums(sp.nr_images*nr_fake_classes); for (int img_id = 0; img_id < sp.nr_images; img_id++) { // here we introduce offsets for the oo_transes in an array as it is more efficient to // copy one big array to/from GPU rather than four small arrays size_t otrans_x = 0*(size_t)nr_fake_classes*nr_transes; size_t otrans_y = 1*(size_t)nr_fake_classes*nr_transes; size_t otrans_z = 2*(size_t)nr_fake_classes*nr_transes; size_t otrans_x2y2z2 = 3*(size_t)nr_fake_classes*nr_transes; // Allocate space for all classes, so that we can pre-calculate data for all classes, copy in one operation, call kenrels on all classes, and copy back in one operation AccPtr oo_otrans = ptrFactory.make((size_t)nr_fake_classes*nr_transes*4); oo_otrans.allAlloc(); int sumBlockNum =0; int my_metadata_offset = op.metadata_offset + img_id; int group_id = baseMLO->mydata.getGroupId(op.part_id, img_id); const int optics_group = baseMLO->mydata.getOpticsGroup(op.part_id, img_id); RFLOAT my_pixel_size = baseMLO->mydata.getImagePixelSize(op.part_id, img_id); CTIC(accMLO->timer,"collect_data_2_pre_kernel"); for (unsigned long exp_iclass = sp.iclass_min; exp_iclass <= sp.iclass_max; exp_iclass++) { unsigned long fake_class = exp_iclass-sp.iclass_min; // if we only have the third class to do, the third class will be the "first" we do, i.e. the "fake" first. if ((baseMLO->mymodel.pdf_class[exp_iclass] == 0.) || (ProjectionData[img_id].class_entries[exp_iclass] == 0) ) continue; // Use the constructed mask to construct a partial class-specific input IndexedDataArray thisClassFinePassWeights(FinePassWeights[img_id],FPCMasks[img_id][exp_iclass]); // Re-define the job-partition of the indexedArray of weights so that the collect-kernel can work with it. block_nums[nr_fake_classes*img_id + fake_class] = makeJobsForCollect(thisClassFinePassWeights, FPCMasks[img_id][exp_iclass], ProjectionData[img_id].orientation_num[exp_iclass]); bundleSWS[img_id].pack(FPCMasks[img_id][exp_iclass].jobOrigin); bundleSWS[img_id].pack(FPCMasks[img_id][exp_iclass].jobExtent); sumBlockNum+=block_nums[nr_fake_classes*img_id + fake_class]; RFLOAT myprior_x, myprior_y, myprior_z, old_offset_z; RFLOAT old_offset_x = XX(op.old_offset[img_id]); RFLOAT old_offset_y = YY(op.old_offset[img_id]); if (baseMLO->mymodel.ref_dim == 2 && baseMLO->mymodel.nr_bodies == 1) { myprior_x = XX(baseMLO->mymodel.prior_offset_class[exp_iclass]); myprior_y = YY(baseMLO->mymodel.prior_offset_class[exp_iclass]); } else { myprior_x = XX(op.prior[img_id]); myprior_y = YY(op.prior[img_id]); if (baseMLO->mymodel.data_dim == 3) { myprior_z = ZZ(op.prior[img_id]); old_offset_z = ZZ(op.old_offset[img_id]); } } /*====================================================== COLLECT 2 ======================================================*/ //Pregenerate oversampled translation objects for kernel-call for (long int itrans = 0, iitrans = 0; itrans < sp.nr_trans; itrans++) { baseMLO->sampling.getTranslationsInPixel(itrans, baseMLO->adaptive_oversampling, my_pixel_size, oversampled_translations_x, oversampled_translations_y, oversampled_translations_z, (baseMLO->do_helical_refine) && (! baseMLO->ignore_helical_symmetry)); for (long int iover_trans = 0; iover_trans < sp.nr_oversampled_trans; iover_trans++, iitrans++) { oo_otrans[otrans_x+fake_class*nr_transes+iitrans] = old_offset_x + oversampled_translations_x[iover_trans]; oo_otrans[otrans_y+fake_class*nr_transes+iitrans] = old_offset_y + oversampled_translations_y[iover_trans]; if (accMLO->dataIs3D) oo_otrans[otrans_z+fake_class*nr_transes+iitrans] = old_offset_z + oversampled_translations_z[iover_trans]; // Calculate the vector length of myprior RFLOAT mypriors_len2 = myprior_x * myprior_x + myprior_y * myprior_y; if (accMLO->dataIs3D) mypriors_len2 += myprior_z * myprior_z; // If it is doing helical refinement AND Cartesian vector myprior has a length > 0, transform the vector to its helical coordinates if ( (baseMLO->do_helical_refine) && (! baseMLO->ignore_helical_symmetry) && (mypriors_len2 > 0.00001) ) { RFLOAT rot_deg = DIRECT_A2D_ELEM(baseMLO->exp_metadata, my_metadata_offset, METADATA_ROT); RFLOAT tilt_deg = DIRECT_A2D_ELEM(baseMLO->exp_metadata, my_metadata_offset, METADATA_TILT); RFLOAT psi_deg = DIRECT_A2D_ELEM(baseMLO->exp_metadata, my_metadata_offset, METADATA_PSI); transformCartesianAndHelicalCoords(myprior_x, myprior_y, myprior_z, myprior_x, myprior_y, myprior_z, rot_deg, tilt_deg, psi_deg, (accMLO->dataIs3D) ? (3) : (2), CART_TO_HELICAL_COORDS); } if ( (! baseMLO->do_helical_refine) || (baseMLO->ignore_helical_symmetry) ) RFLOAT diffx = myprior_x - oo_otrans[otrans_x+fake_class*nr_transes+iitrans]; RFLOAT diffx = myprior_x - oo_otrans[otrans_x+fake_class*nr_transes+iitrans]; RFLOAT diffy = myprior_y - oo_otrans[otrans_y+fake_class*nr_transes+iitrans]; RFLOAT diffz = 0; if (accMLO->dataIs3D) diffz = myprior_z - (old_offset_z + oversampled_translations_z[iover_trans]); oo_otrans[otrans_x2y2z2+fake_class*nr_transes+iitrans] = diffx*diffx + diffy*diffy + diffz*diffz; } } } bundleSWS[img_id].cpToDevice(); oo_otrans.cpToDevice(); DEBUG_HANDLE_ERROR(cudaStreamSynchronize(cudaStreamPerThread)); // here we introduce offsets for the clases in an array as it is more efficient to // copy one big array to/from GPU rather than four small arrays size_t offsetx_class = 0*(size_t)sumBlockNum; size_t offsety_class = 1*(size_t)sumBlockNum; size_t offsetz_class = 2*(size_t)sumBlockNum; size_t sigma2_offset = 3*(size_t)sumBlockNum; AccPtr p_weights = ptrFactory.make((size_t)sumBlockNum); AccPtr p_thr_wsum_prior_offsetxyz_class = ptrFactory.make((size_t)sumBlockNum*4); p_weights.allAlloc(); p_thr_wsum_prior_offsetxyz_class.allAlloc(); CTOC(accMLO->timer,"collect_data_2_pre_kernel"); int partial_pos=0; for (long int exp_iclass = sp.iclass_min; exp_iclass <= sp.iclass_max; exp_iclass++) { long int fake_class = exp_iclass-sp.iclass_min; // if we only have the third class to do, the third class will be the "first" we do, i.e. the "fake" first. if ((baseMLO->mymodel.pdf_class[exp_iclass] == 0.) || (ProjectionData[img_id].class_entries[exp_iclass] == 0) ) continue; // Use the constructed mask to construct a partial class-specific input IndexedDataArray thisClassFinePassWeights(FinePassWeights[img_id],FPCMasks[img_id][exp_iclass]); long int cpos=fake_class*nr_transes; int block_num = block_nums[nr_fake_classes*img_id + fake_class]; runCollect2jobs(block_num, &(~oo_otrans)[otrans_x+cpos], // otrans-size -> make const &(~oo_otrans)[otrans_y+cpos], // otrans-size -> make const &(~oo_otrans)[otrans_z+cpos], // otrans-size -> make const &(~oo_otrans)[otrans_x2y2z2+cpos], // otrans-size -> make const ~thisClassFinePassWeights.weights, (XFLOAT)op.significant_weight[img_id], (XFLOAT)op.sum_weight[img_id], sp.nr_trans, sp.nr_oversampled_trans, sp.nr_oversampled_rot, oversamples, (baseMLO->do_skip_align || baseMLO->do_skip_rotate ), &(~p_weights)[partial_pos], &(~p_thr_wsum_prior_offsetxyz_class)[offsetx_class+partial_pos], &(~p_thr_wsum_prior_offsetxyz_class)[offsety_class+partial_pos], &(~p_thr_wsum_prior_offsetxyz_class)[offsetz_class+partial_pos], &(~p_thr_wsum_prior_offsetxyz_class)[sigma2_offset+partial_pos], ~thisClassFinePassWeights.rot_idx, ~thisClassFinePassWeights.trans_idx, ~FPCMasks[img_id][exp_iclass].jobOrigin, ~FPCMasks[img_id][exp_iclass].jobExtent, accMLO->dataIs3D); LAUNCH_PRIVATE_ERROR(cudaGetLastError(),accMLO->errorStatus); partial_pos+=block_num; } CTIC(accMLO->timer,"collect_data_2_post_kernel"); p_weights.cpToHost(); p_thr_wsum_prior_offsetxyz_class.cpToHost(); DEBUG_HANDLE_ERROR(cudaStreamSynchronize(cudaStreamPerThread)); int iorient = 0; partial_pos=0; for (long int iclass = sp.iclass_min; iclass <= sp.iclass_max; iclass++) { long int fake_class = iclass-sp.iclass_min; // if we only have the third class to do, the third class will be the "first" we do, i.e. the "fake" first. if ((baseMLO->mymodel.pdf_class[iclass] == 0.) || (ProjectionData[img_id].class_entries[iclass] == 0) ) continue; int block_num = block_nums[nr_fake_classes*img_id + fake_class]; for (long int n = partial_pos; n < partial_pos+block_num; n++) { iorient= FinePassWeights[img_id].rot_id[FPCMasks[img_id][iclass].jobOrigin[n-partial_pos]+FPCMasks[img_id][iclass].firstPos]; long int mydir, idir=floor(iorient/sp.nr_psi); if (baseMLO->mymodel.orientational_prior_mode == NOPRIOR) mydir = idir; else mydir = op.pointer_dir_nonzeroprior[idir]; // store partials according to indices of the relevant dimension unsigned ithr_wsum_pdf_direction = baseMLO->mymodel.nr_bodies > 1 ? ibody : iclass; DIRECT_MULTIDIM_ELEM(thr_wsum_pdf_direction[ithr_wsum_pdf_direction], mydir) += p_weights[n]; thr_sumw_group[img_id] += p_weights[n]; thr_wsum_pdf_class[iclass] += p_weights[n]; thr_wsum_sigma2_offset += my_pixel_size * my_pixel_size * p_thr_wsum_prior_offsetxyz_class[sigma2_offset+n]; if (baseMLO->mymodel.ref_dim == 2) { thr_wsum_prior_offsetx_class[iclass] += my_pixel_size * p_thr_wsum_prior_offsetxyz_class[offsetx_class+n]; thr_wsum_prior_offsety_class[iclass] += my_pixel_size * p_thr_wsum_prior_offsetxyz_class[offsety_class+n]; } } partial_pos+=block_num; } // end loop iclass CTOC(accMLO->timer,"collect_data_2_post_kernel"); } // end loop img_id /*====================================================== SET METADATA ======================================================*/ std::vector< RFLOAT> oversampled_rot, oversampled_tilt, oversampled_psi; for (long int img_id = 0; img_id < sp.nr_images; img_id++) { int my_metadata_offset = op.metadata_offset + img_id; RFLOAT my_pixel_size = baseMLO->mydata.getImagePixelSize(op.part_id, img_id); CTIC(accMLO->timer,"setMetadata"); if(baseMLO->adaptive_oversampling!=0) op.max_index[img_id].fineIndexToFineIndices(sp); // set partial indices corresponding to the found max_index, to be used below else op.max_index[img_id].coarseIndexToCoarseIndices(sp); baseMLO->sampling.getTranslationsInPixel(op.max_index[img_id].itrans, baseMLO->adaptive_oversampling, my_pixel_size, oversampled_translations_x, oversampled_translations_y, oversampled_translations_z, (baseMLO->do_helical_refine) && (! baseMLO->ignore_helical_symmetry)); //TODO We already have rot, tilt and psi don't calculated them again if(baseMLO->do_skip_align || baseMLO->do_skip_rotate) baseMLO->sampling.getOrientations(sp.idir_min, sp.ipsi_min, baseMLO->adaptive_oversampling, oversampled_rot, oversampled_tilt, oversampled_psi, op.pointer_dir_nonzeroprior, op.directions_prior, op.pointer_psi_nonzeroprior, op.psi_prior); else baseMLO->sampling.getOrientations(op.max_index[img_id].idir, op.max_index[img_id].ipsi, baseMLO->adaptive_oversampling, oversampled_rot, oversampled_tilt, oversampled_psi, op.pointer_dir_nonzeroprior, op.directions_prior, op.pointer_psi_nonzeroprior, op.psi_prior); baseMLO->sampling.getOrientations(op.max_index[img_id].idir, op.max_index[img_id].ipsi, baseMLO->adaptive_oversampling, oversampled_rot, oversampled_tilt, oversampled_psi, op.pointer_dir_nonzeroprior, op.directions_prior, op.pointer_psi_nonzeroprior, op.psi_prior); RFLOAT rot = oversampled_rot[op.max_index[img_id].ioverrot]; RFLOAT tilt = oversampled_tilt[op.max_index[img_id].ioverrot]; RFLOAT psi = oversampled_psi[op.max_index[img_id].ioverrot]; int icol_rot = (baseMLO->mymodel.nr_bodies == 1) ? METADATA_ROT : 0 + METADATA_LINE_LENGTH_BEFORE_BODIES + (ibody) * METADATA_NR_BODY_PARAMS; int icol_tilt = (baseMLO->mymodel.nr_bodies == 1) ? METADATA_TILT : 1 + METADATA_LINE_LENGTH_BEFORE_BODIES + (ibody) * METADATA_NR_BODY_PARAMS; int icol_psi = (baseMLO->mymodel.nr_bodies == 1) ? METADATA_PSI : 2 + METADATA_LINE_LENGTH_BEFORE_BODIES + (ibody) * METADATA_NR_BODY_PARAMS; int icol_xoff = (baseMLO->mymodel.nr_bodies == 1) ? METADATA_XOFF : 3 + METADATA_LINE_LENGTH_BEFORE_BODIES + (ibody) * METADATA_NR_BODY_PARAMS; int icol_yoff = (baseMLO->mymodel.nr_bodies == 1) ? METADATA_YOFF : 4 + METADATA_LINE_LENGTH_BEFORE_BODIES + (ibody) * METADATA_NR_BODY_PARAMS; int icol_zoff = (baseMLO->mymodel.nr_bodies == 1) ? METADATA_ZOFF : 5 + METADATA_LINE_LENGTH_BEFORE_BODIES + (ibody) * METADATA_NR_BODY_PARAMS; RFLOAT old_rot = DIRECT_A2D_ELEM(baseMLO->exp_metadata, my_metadata_offset, icol_rot); DIRECT_A2D_ELEM(baseMLO->exp_metadata, my_metadata_offset, icol_rot) = rot; RFLOAT old_tilt = DIRECT_A2D_ELEM(baseMLO->exp_metadata, my_metadata_offset, icol_tilt); DIRECT_A2D_ELEM(baseMLO->exp_metadata, my_metadata_offset, icol_tilt) = tilt; RFLOAT old_psi = DIRECT_A2D_ELEM(baseMLO->exp_metadata, my_metadata_offset, icol_psi); DIRECT_A2D_ELEM(baseMLO->exp_metadata, my_metadata_offset, icol_psi) = psi; Matrix1D shifts(baseMLO->mymodel.data_dim); XX(shifts) = XX(op.old_offset[img_id]) + oversampled_translations_x[op.max_index[img_id].iovertrans]; YY(shifts) = YY(op.old_offset[img_id]) + oversampled_translations_y[op.max_index[img_id].iovertrans]; if (accMLO->dataIs3D) { ZZ(shifts) = oversampled_translations_z[op.max_index[img_id].iovertrans]; } // Use oldpsi-angle to rotate back the XX(exp_old_offset[img_id]) + oversampled_translations_x[iover_trans] and if ( (baseMLO->do_helical_refine) && (! baseMLO->ignore_helical_symmetry) ) transformCartesianAndHelicalCoords(shifts, shifts, old_rot, old_tilt, old_psi, HELICAL_TO_CART_COORDS); DIRECT_A2D_ELEM(baseMLO->exp_metadata, my_metadata_offset, icol_xoff) = XX(shifts); DIRECT_A2D_ELEM(baseMLO->exp_metadata, my_metadata_offset, icol_yoff) = YY(shifts); if (accMLO->dataIs3D) DIRECT_A2D_ELEM(baseMLO->exp_metadata, my_metadata_offset, icol_zoff) = ZZ(shifts); if (ibody == 0) { DIRECT_A2D_ELEM(baseMLO->exp_metadata, my_metadata_offset, METADATA_CLASS) = (RFLOAT)op.max_index[img_id].iclass + 1; RFLOAT pmax = op.max_weight[img_id]/op.sum_weight[img_id]; if(pmax>1) //maximum normalised probability weight is (unreasonably) larger than unity CRITICAL("Relion is finding a normalised probability greater than 1"); DIRECT_A2D_ELEM(baseMLO->exp_metadata, my_metadata_offset, METADATA_PMAX) = pmax; } CTOC(accMLO->timer,"setMetadata"); } CTOC(accMLO->timer,"collect_data_2"); /*======================================================================================= MAXIMIZATION =======================================================================================*/ CTIC(accMLO->timer,"maximization"); for (int img_id = 0; img_id < sp.nr_images; img_id++) { int my_metadata_offset = op.metadata_offset + img_id; int group_id = baseMLO->mydata.getGroupId(op.part_id, img_id); const int optics_group = baseMLO->mydata.getOpticsGroup(op.part_id, img_id); RFLOAT my_pixel_size = baseMLO->mydata.getImagePixelSize(op.part_id, img_id); bool ctf_premultiplied = baseMLO->mydata.obsModel.getCtfPremultiplied(optics_group); /*====================================================== TRANSLATIONS ======================================================*/ long unsigned translation_num((sp.itrans_max - sp.itrans_min + 1) * sp.nr_oversampled_trans); size_t trans_x_offset = 0*(size_t)translation_num; size_t trans_y_offset = 1*(size_t)translation_num; size_t trans_z_offset = 2*(size_t)translation_num; AccPtr trans_xyz = ptrFactory.make((size_t)translation_num*3); trans_xyz.allAlloc(); int j = 0; for (long int itrans = 0; itrans < (sp.itrans_max - sp.itrans_min + 1); itrans++) { //TODO Called multiple time to generate same list, reuse the same list baseMLO->sampling.getTranslationsInPixel(itrans, baseMLO->adaptive_oversampling, my_pixel_size, oversampled_translations_x, oversampled_translations_y, oversampled_translations_z, (baseMLO->do_helical_refine) && (! baseMLO->ignore_helical_symmetry)); for (long int iover_trans = 0; iover_trans < oversampled_translations_x.size(); iover_trans++) { RFLOAT xshift = 0., yshift = 0., zshift = 0.; xshift = oversampled_translations_x[iover_trans]; yshift = oversampled_translations_y[iover_trans]; if (accMLO->dataIs3D) zshift = oversampled_translations_z[iover_trans]; if ( (baseMLO->do_helical_refine) && (! baseMLO->ignore_helical_symmetry) ) { RFLOAT rot_deg = DIRECT_A2D_ELEM(baseMLO->exp_metadata, my_metadata_offset, METADATA_ROT); RFLOAT tilt_deg = DIRECT_A2D_ELEM(baseMLO->exp_metadata, my_metadata_offset, METADATA_TILT); RFLOAT psi_deg = DIRECT_A2D_ELEM(baseMLO->exp_metadata, my_metadata_offset, METADATA_PSI); transformCartesianAndHelicalCoords(xshift, yshift, zshift, xshift, yshift, zshift, rot_deg, tilt_deg, psi_deg, (accMLO->dataIs3D) ? (3) : (2), HELICAL_TO_CART_COORDS); } trans_xyz[trans_x_offset+j] = -2 * PI * xshift / (double)baseMLO->image_full_size[optics_group]; trans_xyz[trans_y_offset+j] = -2 * PI * yshift / (double)baseMLO->image_full_size[optics_group]; trans_xyz[trans_z_offset+j] = -2 * PI * zshift / (double)baseMLO->image_full_size[optics_group]; j ++; } } trans_xyz.cpToDevice(); /*====================================================== IMAGES ======================================================*/ CUSTOM_ALLOCATOR_REGION_NAME("TRANS_3"); CTIC(accMLO->timer,"translation_3"); MultidimArray Fimg, Fimg_nonmask; windowFourierTransform(op.Fimg[img_id], Fimg, baseMLO->image_current_size[optics_group]); //TODO PO isen't this already done in getFourierTransformsAndCtfs? windowFourierTransform(op.Fimg_nomask[img_id], Fimg_nonmask, baseMLO->image_current_size[optics_group]); unsigned long image_size = Fimg.nzyxdim; size_t re_offset = 0*(size_t)image_size; size_t im_offset = 1*(size_t)image_size; size_t re_nomask_offset = 2*(size_t)image_size; size_t im_nomask_offset = 3*(size_t)image_size; AccPtr Fimgs = ptrFactory.make(4*(size_t)image_size); Fimgs.allAlloc(); for (unsigned long i = 0; i < image_size; i ++) { Fimgs[re_offset+i] = Fimg.data[i].real; Fimgs[im_offset+i] = Fimg.data[i].imag; Fimgs[re_nomask_offset+i] = Fimg_nonmask.data[i].real; Fimgs[im_nomask_offset+i] = Fimg_nonmask.data[i].imag; } Fimgs.cpToDevice(); CTOC(accMLO->timer,"translation_3"); /*====================================================== SCALE ======================================================*/ XFLOAT part_scale(1.); if (baseMLO->do_scale_correction) { part_scale = baseMLO->mymodel.scale_correction[group_id]; if (part_scale > 10000.) { std::cerr << " rlnMicrographScaleCorrection= " << part_scale << " group= " << group_id + 1 << std::endl; CRITICAL(ERRHIGHSCALE); } else if (part_scale < 0.001) { if (!have_warned_small_scale) { std::cout << " WARNING: ignoring group " << group_id + 1 << " with very small or negative scale (" << part_scale << "); Use larger groups for more stable scale estimates." << std::endl; have_warned_small_scale = true; } part_scale = 0.001; } } AccPtr ctfs = ptrFactory.make((size_t)image_size); ctfs.allAlloc(); if (baseMLO->do_ctf_correction) { for (unsigned long i = 0; i < image_size; i++) ctfs[i] = (XFLOAT) op.local_Fctf[img_id].data[i] * part_scale; } else //TODO should be handled by memset for (unsigned long i = 0; i < image_size; i++) ctfs[i] = part_scale; ctfs.cpToDevice(); /*====================================================== MINVSIGMA ======================================================*/ AccPtr Minvsigma2s = ptrFactory.make((size_t)image_size); Minvsigma2s.allAlloc(); if (baseMLO->do_map) for (unsigned long i = 0; i < image_size; i++) Minvsigma2s[i] = op.local_Minvsigma2[img_id].data[i]; else for (unsigned long i = 0; i < image_size; i++) Minvsigma2s[i] = 1; Minvsigma2s.cpToDevice(); /*====================================================== CLASS LOOP ======================================================*/ CUSTOM_ALLOCATOR_REGION_NAME("wdiff2s"); size_t wdiff2s_buf = (size_t)(baseMLO->mymodel.nr_classes*image_size)*2+(size_t)image_size; size_t AA_offset = 0*(size_t)(baseMLO->mymodel.nr_classes*image_size); size_t XA_offset = 1*(size_t)(baseMLO->mymodel.nr_classes*image_size); size_t sum_offset = 2*(size_t)(baseMLO->mymodel.nr_classes*image_size); AccPtr wdiff2s = ptrFactory.make(wdiff2s_buf); wdiff2s.allAlloc(); wdiff2s.accInit(0); unsigned long AAXA_pos=0; CUSTOM_ALLOCATOR_REGION_NAME("BP_data"); // Loop from iclass_min to iclass_max to deal with seed generation in first iteration AccPtr sorted_weights = ptrFactory.make((size_t)(ProjectionData[img_id].orientationNumAllClasses * translation_num)); sorted_weights.allAlloc(); std::vector > eulers(baseMLO->mymodel.nr_classes, ptrFactory.make()); unsigned long classPos = 0; for (unsigned long exp_iclass = sp.iclass_min; exp_iclass <= sp.iclass_max; exp_iclass++) DEBUG_HANDLE_ERROR(cudaStreamSynchronize(accMLO->classStreams[exp_iclass])); DEBUG_HANDLE_ERROR(cudaStreamSynchronize(cudaStreamPerThread)); for (unsigned long iclass = sp.iclass_min; iclass <= sp.iclass_max; iclass++) { if((baseMLO->mymodel.pdf_class[iclass] == 0.) || (ProjectionData[img_id].class_entries[iclass] == 0)) continue; // Use the constructed mask to construct a partial class-specific input IndexedDataArray thisClassFinePassWeights(FinePassWeights[img_id],FPCMasks[img_id][iclass]); CTIC(accMLO->timer,"thisClassProjectionSetupCoarse"); // use "slice" constructor with class-specific parameters to retrieve a temporary ProjectionParams with data for this class ProjectionParams thisClassProjectionData( ProjectionData[img_id], ProjectionData[img_id].class_idx[iclass], ProjectionData[img_id].class_idx[iclass]+ProjectionData[img_id].class_entries[iclass]); thisClassProjectionData.orientation_num[0] = ProjectionData[img_id].orientation_num[iclass]; CTOC(accMLO->timer,"thisClassProjectionSetupCoarse"); long unsigned orientation_num(thisClassProjectionData.orientation_num[0]); /*====================================================== PROJECTIONS ======================================================*/ Matrix2D MBL, MBR; if (baseMLO->mymodel.nr_bodies > 1) { Matrix2D Aori; RFLOAT rot_ori = DIRECT_A2D_ELEM(baseMLO->exp_metadata, op.metadata_offset, METADATA_ROT); RFLOAT tilt_ori = DIRECT_A2D_ELEM(baseMLO->exp_metadata, op.metadata_offset, METADATA_TILT); RFLOAT psi_ori = DIRECT_A2D_ELEM(baseMLO->exp_metadata, op.metadata_offset, METADATA_PSI); Euler_angles2matrix(rot_ori, tilt_ori, psi_ori, Aori, false); MBL = Aori * (baseMLO->mymodel.orient_bodies[ibody]).transpose() * baseMLO->A_rot90; MBR = baseMLO->mymodel.orient_bodies[ibody]; } eulers[iclass].setSize(orientation_num * 9); eulers[iclass].setStream(accMLO->classStreams[iclass]); eulers[iclass].hostAlloc(); CTIC(accMLO->timer,"generateEulerMatricesProjector"); Matrix2D mag; mag.initIdentity(3); mag = baseMLO->mydata.obsModel.applyAnisoMag(mag, optics_group); mag = baseMLO->mydata.obsModel.applyScaleDifference(mag, optics_group, baseMLO->mymodel.ori_size, baseMLO->mymodel.pixel_size); if (!mag.isIdentity()) { if (MBL.mdimx == 3 && MBL.mdimx ==3) MBL = mag * MBL; else MBL = mag; } generateEulerMatrices( thisClassProjectionData, &eulers[iclass][0], true, MBL, MBR); eulers[iclass].deviceAlloc(); eulers[iclass].cpToDevice(); CTOC(accMLO->timer,"generateEulerMatricesProjector"); /*====================================================== MAP WEIGHTS ======================================================*/ CTIC(accMLO->timer,"pre_wavg_map"); for (long unsigned i = 0; i < orientation_num*translation_num; i++) sorted_weights[classPos+i] = -std::numeric_limits::max(); for (long unsigned i = 0; i < thisClassFinePassWeights.weights.getSize(); i++) sorted_weights[classPos+(thisClassFinePassWeights.rot_idx[i]) * translation_num + thisClassFinePassWeights.trans_idx[i] ] = thisClassFinePassWeights.weights[i]; classPos+=orientation_num*translation_num; CTOC(accMLO->timer,"pre_wavg_map"); } sorted_weights.cpToDevice(); // These syncs are necessary (for multiple ranks on the same GPU), and (assumed) low-cost. for (unsigned long iclass = sp.iclass_min; iclass <= sp.iclass_max; iclass++) DEBUG_HANDLE_ERROR(cudaStreamSynchronize(accMLO->classStreams[iclass])); DEBUG_HANDLE_ERROR(cudaStreamSynchronize(cudaStreamPerThread)); classPos = 0; for (unsigned long iclass = sp.iclass_min; iclass <= sp.iclass_max; iclass++) { int iproj; if (baseMLO->mymodel.nr_bodies > 1) iproj = ibody; else iproj = iclass; if((baseMLO->mymodel.pdf_class[iclass] == 0.) || (ProjectionData[img_id].class_entries[iclass] == 0)) continue; /*====================================================== KERNEL CALL ======================================================*/ long unsigned orientation_num(ProjectionData[img_id].orientation_num[iclass]); AccProjectorKernel projKernel = AccProjectorKernel::makeKernel( accMLO->bundle->projectors[iproj], op.local_Minvsigma2[img_id].xdim, op.local_Minvsigma2[img_id].ydim, op.local_Minvsigma2[img_id].zdim, op.local_Minvsigma2[img_id].xdim-1); runWavgKernel( projKernel, ~eulers[iclass], &(~Fimgs)[re_offset], //~Fimgs_real, &(~Fimgs)[im_offset], //~Fimgs_imag, &(~trans_xyz)[trans_x_offset], //~trans_x, &(~trans_xyz)[trans_y_offset], //~trans_y, &(~trans_xyz)[trans_z_offset], //~trans_z, &(~sorted_weights)[classPos], ~ctfs, &(~wdiff2s)[sum_offset], &(~wdiff2s)[AA_offset+AAXA_pos], &(~wdiff2s)[XA_offset+AAXA_pos], op, orientation_num, translation_num, image_size, img_id, group_id, iclass, part_scale, baseMLO->refs_are_ctf_corrected, ctf_premultiplied, accMLO->dataIs3D, accMLO->classStreams[iclass]); /*====================================================== BACKPROJECTION ======================================================*/ #ifdef TIMING if (op.part_id == baseMLO->exp_my_first_part_id) baseMLO->timer.tic(baseMLO->TIMING_WSUM_BACKPROJ); #endif CTIC(accMLO->timer,"backproject"); runBackProjectKernel( accMLO->bundle->backprojectors[iproj], projKernel, &(~Fimgs)[re_nomask_offset], //~Fimgs_nomask_real, &(~Fimgs)[im_nomask_offset], //~Fimgs_nomask_imag, &(~trans_xyz)[trans_x_offset], //~trans_x, &(~trans_xyz)[trans_y_offset], //~trans_y, &(~trans_xyz)[trans_z_offset], //~trans_z, &(~sorted_weights)[classPos], ~Minvsigma2s, ~ctfs, translation_num, (XFLOAT) op.significant_weight[img_id], (XFLOAT) op.sum_weight[img_id], ~eulers[iclass], op.local_Minvsigma2[img_id].xdim, op.local_Minvsigma2[img_id].ydim, op.local_Minvsigma2[img_id].zdim, orientation_num, accMLO->dataIs3D, (baseMLO->do_sgd && !baseMLO->do_avoid_sgd), ctf_premultiplied, accMLO->classStreams[iclass]); CTOC(accMLO->timer,"backproject"); #ifdef TIMING if (op.part_id == baseMLO->exp_my_first_part_id) baseMLO->timer.toc(baseMLO->TIMING_WSUM_BACKPROJ); #endif //Update indices AAXA_pos += image_size; classPos += orientation_num*translation_num; } // end loop iclass CUSTOM_ALLOCATOR_REGION_NAME("UNSET"); // NOTE: We've never seen that this sync is necessary, but it is needed in principle, and // its absence in other parts of the code has caused issues. It is also very low-cost. for (unsigned long exp_iclass = sp.iclass_min; exp_iclass <= sp.iclass_max; exp_iclass++) DEBUG_HANDLE_ERROR(cudaStreamSynchronize(accMLO->classStreams[exp_iclass])); DEBUG_HANDLE_ERROR(cudaStreamSynchronize(cudaStreamPerThread)); wdiff2s.cpToHost(); DEBUG_HANDLE_ERROR(cudaStreamSynchronize(cudaStreamPerThread)); AAXA_pos=0; for (unsigned long exp_iclass = sp.iclass_min; exp_iclass <= sp.iclass_max; exp_iclass++) { if((baseMLO->mymodel.pdf_class[exp_iclass] == 0.) || (ProjectionData[img_id].class_entries[exp_iclass] == 0)) continue; for (long int j = 0; j < image_size; j++) { int ires = DIRECT_MULTIDIM_ELEM(baseMLO->Mresol_fine[optics_group], j); if (ires > -1 && baseMLO->do_scale_correction && DIRECT_A1D_ELEM(baseMLO->mymodel.data_vs_prior_class[exp_iclass], ires) > 3.) { exp_wsum_scale_correction_AA[img_id] += wdiff2s[AA_offset+AAXA_pos+j]; exp_wsum_scale_correction_XA[img_id] += wdiff2s[XA_offset+AAXA_pos+j]; } } AAXA_pos += image_size; } // end loop iclass for (unsigned long j = 0; j < image_size; j++) { int ires = DIRECT_MULTIDIM_ELEM(baseMLO->Mresol_fine[optics_group], j); if (ires > -1) { thr_wsum_sigma2_noise[img_id].data[ires] += (RFLOAT) wdiff2s[sum_offset+j]; exp_wsum_norm_correction[img_id] += (RFLOAT) wdiff2s[sum_offset+j]; //TODO could be gpu-reduced } } } // end loop img_id CTOC(accMLO->timer,"maximization"); CTIC(accMLO->timer,"store_post_gpu"); // Extend norm_correction and sigma2_noise estimation to higher resolutions for all particles // Also calculate dLL for each particle and store in metadata // loop over all images inside this particle RFLOAT thr_avg_norm_correction = 0.; RFLOAT thr_sum_dLL = 0., thr_sum_Pmax = 0.; for (int img_id = 0; img_id < sp.nr_images; img_id++) { int my_metadata_offset = op.metadata_offset + img_id; int group_id = baseMLO->mydata.getGroupId(op.part_id, img_id); const int optics_group = baseMLO->mydata.getOpticsGroup(op.part_id, img_id); RFLOAT my_pixel_size = baseMLO->mydata.getOpticsPixelSize(optics_group); int my_image_size = baseMLO->mydata.getOpticsImageSize(optics_group); // If the current images were smaller than the original size, fill the rest of wsum_model.sigma2_noise with the power_class spectrum of the images for (unsigned long ires = baseMLO->image_current_size[optics_group]/2 + 1; ires < baseMLO->image_full_size[optics_group]/2 + 1; ires++) { DIRECT_A1D_ELEM(thr_wsum_sigma2_noise[img_id], ires) += DIRECT_A1D_ELEM(op.power_img[img_id], ires); // Also extend the weighted sum of the norm_correction exp_wsum_norm_correction[img_id] += DIRECT_A1D_ELEM(op.power_img[img_id], ires); } // Store norm_correction // Multiply by old value because the old norm_correction term was already applied to the image if (baseMLO->do_norm_correction && baseMLO->mymodel.nr_bodies == 1) { RFLOAT old_norm_correction = DIRECT_A2D_ELEM(baseMLO->exp_metadata, my_metadata_offset, METADATA_NORM); old_norm_correction /= baseMLO->mymodel.avg_norm_correction; // The factor two below is because exp_wsum_norm_correctiom is similar to sigma2_noise, which is the variance for the real/imag components // The variance of the total image (on which one normalizes) is twice this value! RFLOAT normcorr = old_norm_correction * sqrt(exp_wsum_norm_correction[img_id] * 2.); thr_avg_norm_correction += normcorr; // Now set the new norm_correction in the relevant position of exp_metadata DIRECT_A2D_ELEM(baseMLO->exp_metadata, my_metadata_offset, METADATA_NORM) = normcorr; // Print warning for strange norm-correction values if (!((baseMLO->iter == 1 && baseMLO->do_firstiter_cc) || baseMLO->do_always_cc) && DIRECT_A2D_ELEM(baseMLO->exp_metadata, my_metadata_offset, METADATA_NORM) > 10.) { std::cout << " WARNING: norm_correction= "<< DIRECT_A2D_ELEM(baseMLO->exp_metadata, my_metadata_offset, METADATA_NORM) << " for particle " << op.part_id << " in group " << group_id + 1 << "; Are your groups large enough? Or is the reference on the correct greyscale?" << std::endl; } } // Store weighted sums for scale_correction if (baseMLO->do_scale_correction) { // Divide XA by the old scale_correction and AA by the square of that, because was incorporated into Fctf exp_wsum_scale_correction_XA[img_id] /= baseMLO->mymodel.scale_correction[group_id]; exp_wsum_scale_correction_AA[img_id] /= baseMLO->mymodel.scale_correction[group_id] * baseMLO->mymodel.scale_correction[group_id]; thr_wsum_signal_product_spectra[img_id] += exp_wsum_scale_correction_XA[img_id]; thr_wsum_reference_power_spectra[img_id] += exp_wsum_scale_correction_AA[img_id]; } // Calculate DLL for each particle RFLOAT logsigma2 = 0.; RFLOAT remap_image_sizes = (baseMLO->mymodel.ori_size * baseMLO->mymodel.pixel_size) / (my_image_size * my_pixel_size); FOR_ALL_DIRECT_ELEMENTS_IN_MULTIDIMARRAY(baseMLO->Mresol_fine[optics_group]) { int ires = DIRECT_MULTIDIM_ELEM(baseMLO->Mresol_fine[optics_group], n); int ires_remapped = ROUND(remap_image_sizes * ires); // Note there is no sqrt in the normalisation term because of the 2-dimensionality of the complex-plane // Also exclude origin from logsigma2, as this will not be considered in the P-calculations if (ires > 0 && ires_remapped < XSIZE(baseMLO->mymodel.sigma2_noise[group_id])) logsigma2 += log( 2. * PI * DIRECT_A1D_ELEM(baseMLO->mymodel.sigma2_noise[group_id], ires_remapped)); } RFLOAT dLL; if ((baseMLO->iter==1 && baseMLO->do_firstiter_cc) || baseMLO->do_always_cc) dLL = -op.min_diff2[img_id]; else dLL = log(op.sum_weight[img_id]) - op.min_diff2[img_id] - logsigma2; // Store dLL of each image in the output array, and keep track of total sum DIRECT_A2D_ELEM(baseMLO->exp_metadata, my_metadata_offset, METADATA_DLL) = dLL; thr_sum_dLL += dLL; // Also store sum of Pmax thr_sum_Pmax += DIRECT_A2D_ELEM(baseMLO->exp_metadata, my_metadata_offset, METADATA_PMAX); } // Now, inside a global_mutex, update the other weighted sums among all threads if (!baseMLO->do_skip_maximization) { pthread_mutex_lock(&global_mutex); for (int img_id = 0; img_id < sp.nr_images; img_id++) { long int igroup = baseMLO->mydata.getGroupId(op.part_id, img_id); int optics_group = baseMLO->mydata.getOpticsGroup(op.part_id, img_id); int my_image_size = baseMLO->mydata.getOpticsImageSize(optics_group); RFLOAT my_pixel_size = baseMLO->mydata.getOpticsPixelSize(optics_group); RFLOAT remap_image_sizes = (baseMLO->mymodel.ori_size * baseMLO->mymodel.pixel_size) / (my_image_size * my_pixel_size); FOR_ALL_DIRECT_ELEMENTS_IN_ARRAY1D(thr_wsum_sigma2_noise[img_id]) { int i_resam = ROUND(i * remap_image_sizes); if (i_resam < XSIZE(baseMLO->wsum_model.sigma2_noise[igroup])) { DIRECT_A1D_ELEM(baseMLO->wsum_model.sigma2_noise[igroup], i_resam) += DIRECT_A1D_ELEM(thr_wsum_sigma2_noise[img_id], i); } } baseMLO->wsum_model.sumw_group[igroup] += thr_sumw_group[img_id]; if (baseMLO->do_scale_correction) { baseMLO->wsum_model.wsum_signal_product[igroup] += thr_wsum_signal_product_spectra[img_id]; baseMLO->wsum_model.wsum_reference_power[igroup] += thr_wsum_reference_power_spectra[img_id]; } } for (int n = 0; n < baseMLO->mymodel.nr_classes; n++) { baseMLO->wsum_model.pdf_class[n] += thr_wsum_pdf_class[n]; if (baseMLO->mymodel.ref_dim == 2) { XX(baseMLO->wsum_model.prior_offset_class[n]) += thr_wsum_prior_offsetx_class[n]; YY(baseMLO->wsum_model.prior_offset_class[n]) += thr_wsum_prior_offsety_class[n]; } } for (int n = 0; n < baseMLO->mymodel.nr_classes * baseMLO->mymodel.nr_bodies; n++) { if (!(baseMLO->do_skip_align || baseMLO->do_skip_rotate) ) baseMLO->wsum_model.pdf_direction[n] += thr_wsum_pdf_direction[n]; } baseMLO->wsum_model.sigma2_offset += thr_wsum_sigma2_offset; if (baseMLO->do_norm_correction && baseMLO->mymodel.nr_bodies == 1) baseMLO->wsum_model.avg_norm_correction += thr_avg_norm_correction; baseMLO->wsum_model.LL += thr_sum_dLL; baseMLO->wsum_model.ave_Pmax += thr_sum_Pmax; pthread_mutex_unlock(&global_mutex); } // end if !do_skip_maximization CTOC(accMLO->timer,"store_post_gpu"); #ifdef TIMING if (op.part_id == baseMLO->exp_my_first_part_id) baseMLO->timer.toc(baseMLO->TIMING_ESP_WSUM); #endif } // ---------------------------------------------------------------------------- // -------------------- accDoExpectationOneParticle --------------------------- // ---------------------------------------------------------------------------- template void accDoExpectationOneParticle(MlClass *myInstance, unsigned long part_id_sorted, int thread_id, AccPtrFactory ptrFactory) { SamplingParameters sp; MlOptimiser *baseMLO = myInstance->baseMLO; CTIC(timer,"oneParticle"); #ifdef TIMING // Only time one thread if (thread_id == 0) baseMLO->timer.tic(baseMLO->TIMING_ESP_DIFF2_A); #endif long int part_id = baseMLO->mydata.sorted_idx[part_id_sorted]; sp.nr_images = baseMLO->mydata.numberOfImagesInParticle(part_id); OptimisationParamters op(sp.nr_images, part_id); // In the first iteration, multiple seeds will be generated // A single random class is selected for each pool of images, and one does not marginalise over the orientations // The optimal orientation is based on signal-product (rather than the signal-intensity sensitive Gaussian) // If do_firstiter_cc, then first perform a single iteration with K=1 and cross-correlation criteria, afterwards // Decide which classes to integrate over (for random class assignment in 1st iteration) sp.iclass_min = 0; sp.iclass_max = baseMLO->mymodel.nr_classes - 1; // low-pass filter again and generate the seeds if (baseMLO->do_generate_seeds) { if (baseMLO->do_firstiter_cc && baseMLO->iter == 1) { // In first (CC) iter, use a single reference (and CC) sp.iclass_min = sp.iclass_max = 0; } else if ( (baseMLO->do_firstiter_cc && baseMLO->iter == 2) || (!baseMLO->do_firstiter_cc && baseMLO->iter == 1)) { // In second CC iter, or first iter without CC: generate the seeds // Now select a single random class // exp_part_id is already in randomized order (controlled by -seed) // WARNING: USING SAME iclass_min AND iclass_max FOR SomeParticles!! // Make sure random division is always the same with the same seed long int idx = part_id_sorted - baseMLO->exp_my_first_part_id; if (idx >= baseMLO->exp_random_class_some_particles.size()) REPORT_ERROR("BUG: expectationOneParticle idx>random_class_some_particles.size()"); sp.iclass_min = sp.iclass_max = baseMLO->exp_random_class_some_particles[idx]; } } // Loop over all bodies of the multi-body refinement // Basically, subsequently align and store weighted sums for each body for (int ibody = 0; ibody < baseMLO->mymodel.nr_bodies; ibody++) { OptimisationParamters op(sp.nr_images, part_id); // Skip this body if keep_fixed_bodies[ibody] or if it's angular accuracy is worse than 1.5x the sampling rate if ( baseMLO->mymodel.nr_bodies > 1 && baseMLO->mymodel.keep_fixed_bodies[ibody] > 0) continue; // Global exp_metadata array has metadata of all particles. Where does part_id start? for (long int iori = baseMLO->exp_my_first_part_id; iori <= baseMLO->exp_my_last_part_id; iori++) { if (iori == part_id_sorted) break; op.metadata_offset += baseMLO->mydata.numberOfImagesInParticle(iori); } #ifdef TIMING // Only time one thread if (thread_id == 0) baseMLO->timer.toc(baseMLO->TIMING_ESP_DIFF2_A); #endif CTIC(timer,"getFourierTransformsAndCtfs"); getFourierTransformsAndCtfs(part_id, op, sp, baseMLO, myInstance, ptrFactory, ibody); CTOC(timer,"getFourierTransformsAndCtfs"); // To deal with skipped alignments/rotations if (baseMLO->do_skip_align) { sp.itrans_min = sp.itrans_max = sp.idir_min = sp.idir_max = sp.ipsi_min = sp.ipsi_max = part_id_sorted - baseMLO->exp_my_first_part_id; } else { sp.itrans_min = 0; sp.itrans_max = baseMLO->sampling.NrTranslationalSamplings() - 1; } if (baseMLO->do_skip_align || baseMLO->do_skip_rotate) { sp.idir_min = sp.idir_max = sp.ipsi_min = sp.ipsi_max = part_id_sorted - baseMLO->exp_my_first_part_id; } else if (baseMLO->do_only_sample_tilt) { sp.idir_min = 0; sp.idir_max = baseMLO->sampling.NrDirections(0, &op.pointer_dir_nonzeroprior) - 1; sp.ipsi_min = sp.ipsi_max = part_id_sorted - baseMLO->exp_my_first_part_id; } else { sp.idir_min = sp.ipsi_min = 0; sp.idir_max = baseMLO->sampling.NrDirections(0, &op.pointer_dir_nonzeroprior) - 1; sp.ipsi_max = baseMLO->sampling.NrPsiSamplings(0, &op.pointer_psi_nonzeroprior ) - 1; } // Initialise significant weight to minus one, so that all coarse sampling points will be handled in the first pass op.significant_weight.resize(sp.nr_images, -1.); // Only perform a second pass when using adaptive oversampling //int nr_sampling_passes = (baseMLO->adaptive_oversampling > 0) ? 2 : 1; // But on the gpu the data-structures are different between passes, so we need to make a symbolic pass to set the weights up for storeWS int nr_sampling_passes = 2; /// -- This is a iframe-indexed vector, each entry of which is a dense data-array. These are replacements to using // Mweight in the sparse (Fine-sampled) pass, coarse is unused but created empty input for convert ( FIXME ) std::vector CoarsePassWeights(1, ptrFactory); std::vector FinePassWeights(sp.nr_images, ptrFactory); // -- This is a iframe-indexed vector, each entry of which is a class-indexed vector of masks, one for each // class in FinePassWeights std::vector < std::vector > FinePassClassMasks(sp.nr_images, std::vector (baseMLO->mymodel.nr_classes, ptrFactory)); // -- This is a iframe-indexed vector, each entry of which is parameters used in the projection-operations *after* the // coarse pass, declared here to keep scope to storeWS std::vector < ProjectionParams > FineProjectionData(sp.nr_images, baseMLO->mymodel.nr_classes); std::vector < AccPtrBundle > bundleD2(sp.nr_images, ptrFactory.makeBundle()); std::vector < AccPtrBundle > bundleSWS(sp.nr_images, ptrFactory.makeBundle()); for (int ipass = 0; ipass < nr_sampling_passes; ipass++) { CTIC(timer,"weightPass"); #ifdef TIMING // Only time one thread if (thread_id == 0) baseMLO->timer.tic(baseMLO->TIMING_ESP_DIFF2_B); #endif // Use coarse sampling in the first pass, oversampled one the second pass sp.current_oversampling = (ipass == 0) ? 0 : baseMLO->adaptive_oversampling; sp.nr_dir = (baseMLO->do_skip_align || baseMLO->do_skip_rotate) ? 1 : baseMLO->sampling.NrDirections(0, &op.pointer_dir_nonzeroprior); sp.nr_psi = (baseMLO->do_skip_align || baseMLO->do_skip_rotate) ? 1 : baseMLO->sampling.NrPsiSamplings(0, &op.pointer_psi_nonzeroprior); sp.nr_trans = (baseMLO->do_skip_align) ? 1 : baseMLO->sampling.NrTranslationalSamplings(); sp.nr_oversampled_rot = baseMLO->sampling.oversamplingFactorOrientations(sp.current_oversampling); sp.nr_oversampled_trans = baseMLO->sampling.oversamplingFactorTranslations(sp.current_oversampling); #ifdef TIMING // Only time one thread if (thread_id == 0) baseMLO->timer.toc(baseMLO->TIMING_ESP_DIFF2_B); #endif op.min_diff2.resize(sp.nr_images, 0); if (ipass == 0) { unsigned long weightsPerPart(baseMLO->mymodel.nr_classes * sp.nr_dir * sp.nr_psi * sp.nr_trans * sp.nr_oversampled_rot * sp.nr_oversampled_trans); op.Mweight.resizeNoCp(1,1,sp.nr_images, weightsPerPart); AccPtr Mweight = ptrFactory.make(); Mweight.setSize(sp.nr_images * weightsPerPart); Mweight.setHostPtr(op.Mweight.data); Mweight.deviceAlloc(); deviceInitValue(Mweight, -std::numeric_limits::max()); Mweight.streamSync(); CTIC(timer,"getAllSquaredDifferencesCoarse"); getAllSquaredDifferencesCoarse(ipass, op, sp, baseMLO, myInstance, Mweight, ptrFactory, ibody); CTOC(timer,"getAllSquaredDifferencesCoarse"); CTIC(timer,"convertAllSquaredDifferencesToWeightsCoarse"); convertAllSquaredDifferencesToWeights(ipass, op, sp, baseMLO, myInstance, CoarsePassWeights, FinePassClassMasks, Mweight, ptrFactory, ibody); CTOC(timer,"convertAllSquaredDifferencesToWeightsCoarse"); } else { #ifdef TIMING // Only time one thread if (thread_id == 0) baseMLO->timer.tic(baseMLO->TIMING_ESP_DIFF2_D); #endif // // -- go through all classes and generate projectionsetups for all classes - to be used in getASDF and storeWS below -- // // the reason to do this globally is subtle - we want the orientation_num of all classes to estimate a largest possible // // weight-array, which would be insanely much larger than necessary if we had to assume the worst. for (int img_id = 0; img_id < sp.nr_images; img_id++) { FineProjectionData[img_id].orientationNumAllClasses = 0; for (int exp_iclass = sp.iclass_min; exp_iclass <= sp.iclass_max; exp_iclass++) { if(exp_iclass>0) FineProjectionData[img_id].class_idx[exp_iclass] = FineProjectionData[img_id].rots.size(); FineProjectionData[img_id].class_entries[exp_iclass] = 0; CTIC(timer,"generateProjectionSetup"); FineProjectionData[img_id].orientationNumAllClasses += generateProjectionSetupFine( op, sp, baseMLO, exp_iclass, FineProjectionData[img_id]); CTOC(timer,"generateProjectionSetup"); } //set a maximum possible size for all weights (to be reduced by significance-checks) size_t dataSize = FineProjectionData[img_id].orientationNumAllClasses*sp.nr_trans*sp.nr_oversampled_trans; FinePassWeights[img_id].setDataSize(dataSize); FinePassWeights[img_id].dual_alloc_all(); bundleD2[img_id].setSize(2*(FineProjectionData[img_id].orientationNumAllClasses*sp.nr_trans*sp.nr_oversampled_trans)*sizeof(unsigned long)); bundleD2[img_id].allAlloc(); } #ifdef TIMING // Only time one thread if (thread_id == 0) baseMLO->timer.toc(baseMLO->TIMING_ESP_DIFF2_D); #endif CTIC(timer,"getAllSquaredDifferencesFine"); getAllSquaredDifferencesFine(ipass, op, sp, baseMLO, myInstance, FinePassWeights, FinePassClassMasks, FineProjectionData, ptrFactory, ibody, bundleD2); CTOC(timer,"getAllSquaredDifferencesFine"); FinePassWeights[0].weights.cpToHost(); AccPtr Mweight = ptrFactory.make(); //DUMMY CTIC(timer,"convertAllSquaredDifferencesToWeightsFine"); convertAllSquaredDifferencesToWeights(ipass, op, sp, baseMLO, myInstance, FinePassWeights, FinePassClassMasks, Mweight, ptrFactory, ibody); CTOC(timer,"convertAllSquaredDifferencesToWeightsFine"); } CTOC(timer,"weightPass"); } #ifdef TIMING // Only time one thread if (thread_id == 0) baseMLO->timer.tic(baseMLO->TIMING_ESP_DIFF2_E); #endif // For the reconstruction step use mymodel.current_size! // as of 3.1, no longer necessary? sp.current_image_size = baseMLO->mymodel.current_size; for (unsigned long img_id = 0; img_id < sp.nr_images; img_id++) { bundleSWS[img_id].setSize(2*(FineProjectionData[img_id].orientationNumAllClasses)*sizeof(unsigned long)); bundleSWS[img_id].allAlloc(); } #ifdef TIMING // Only time one thread if (thread_id == 0) baseMLO->timer.toc(baseMLO->TIMING_ESP_DIFF2_E); #endif CTIC(timer,"storeWeightedSums"); storeWeightedSums(op, sp, baseMLO, myInstance, FinePassWeights, FineProjectionData, FinePassClassMasks, ptrFactory, ibody, bundleSWS); CTOC(timer,"storeWeightedSums"); for (long int img_id = 0; img_id < sp.nr_images; img_id++) { FinePassWeights[img_id].dual_free_all(); } } CTOC(timer,"oneParticle"); } relion-3.1.3/src/acc/acc_projector.h000066400000000000000000000030471411340063500173120ustar00rootroot00000000000000#ifndef ACC_PROJECTOR_H_ #define ACC_PROJECTOR_H_ #include "src/complex.h" //#include "src/acc/cuda/cuda_settings.h" //#include "src/acc/cuda/cuda_mem_utils.h" #include "src/acc/acc_ptr.h" //#include //#include "src/acc/cuda/cuda_kernels/cuda_device_utils.cuh" #ifndef CUDA #include #endif class AccProjector { friend class AccProjectorKernel; int mdlX, mdlY, mdlZ, mdlMaxR, mdlInitY, mdlInitZ; XFLOAT padding_factor; size_t mdlXYZ; size_t allocaton_size; #ifndef PROJECTOR_NO_TEXTURES XFLOAT *texArrayReal2D, *texArrayImag2D; cudaArray_t *texArrayReal, *texArrayImag; cudaTextureObject_t *mdlReal, *mdlImag; size_t pitch2D; #else #ifdef CUDA XFLOAT *mdlReal, *mdlImag; #else std::complex *mdlComplex; int externalFree; #endif #endif // PROJECTOR_NO_TEXTURES public: AccProjector(): mdlX(0), mdlY(0), mdlZ(0), mdlXYZ(0), mdlMaxR(0), mdlInitY(0), mdlInitZ(0), padding_factor(0), allocaton_size(0) { #ifndef PROJECTOR_NO_TEXTURES texArrayReal2D = 0; texArrayImag2D = 0; texArrayReal = 0; texArrayImag = 0; mdlReal = 0; mdlImag = 0; pitch2D = 0; #else #ifdef CUDA mdlReal = 0; mdlImag = 0; #else mdlComplex = 0; externalFree = 0; #endif #endif } bool setMdlDim( int xdim, int ydim, int zdim, int inity, int initz, int maxr, XFLOAT paddingFactor); void initMdl(XFLOAT *real, XFLOAT *imag); void initMdl(Complex *data); #ifndef CUDA void initMdl(std::complex *data); #endif void clear(); ~AccProjector() { clear(); }; }; // AccProjector #endif relion-3.1.3/src/acc/acc_projector_impl.h000066400000000000000000000153751411340063500203420ustar00rootroot00000000000000#include "src/acc/acc_projector.h" #include bool AccProjector::setMdlDim( int xdim, int ydim, int zdim, int inity, int initz, int maxr, XFLOAT paddingFactor) { if(zdim == 1) zdim = 0; if (xdim == mdlX && ydim == mdlY && zdim == mdlZ && inity == mdlInitY && initz == mdlInitZ && maxr == mdlMaxR && paddingFactor == padding_factor) return false; clear(); mdlX = xdim; mdlY = ydim; mdlZ = zdim; if(zdim == 0) mdlXYZ = (size_t)xdim*(size_t)ydim; else mdlXYZ = (size_t)xdim*(size_t)ydim*(size_t)zdim; mdlInitY = inity; mdlInitZ = initz; mdlMaxR = maxr; padding_factor = paddingFactor; #ifndef PROJECTOR_NO_TEXTURES mdlReal = new cudaTextureObject_t(); mdlImag = new cudaTextureObject_t(); // create channel to describe data type (bits,bits,bits,bits,type) cudaChannelFormatDesc desc; desc = cudaCreateChannelDesc(32, 0, 0, 0, cudaChannelFormatKindFloat); struct cudaResourceDesc resDesc_real, resDesc_imag; struct cudaTextureDesc texDesc; // -- Zero all data in objects handlers memset(&resDesc_real, 0, sizeof(cudaResourceDesc)); memset(&resDesc_imag, 0, sizeof(cudaResourceDesc)); memset(&texDesc, 0, sizeof(cudaTextureDesc)); if(mdlZ!=0) // 3D model { texArrayReal = new cudaArray_t(); texArrayImag = new cudaArray_t(); // -- make extents for automatic pitch:ing (aligment) of allocated 3D arrays cudaExtent volumeSize = make_cudaExtent(mdlX, mdlY, mdlZ); // -- Allocate and copy data using very clever CUDA memcpy-functions HANDLE_ERROR(cudaMalloc3DArray(texArrayReal, &desc, volumeSize)); HANDLE_ERROR(cudaMalloc3DArray(texArrayImag, &desc, volumeSize)); // -- Descriptors of the channel(s) in the texture(s) resDesc_real.res.array.array = *texArrayReal; resDesc_imag.res.array.array = *texArrayImag; resDesc_real.resType = cudaResourceTypeArray; resDesc_imag.resType = cudaResourceTypeArray; } else // 2D model { HANDLE_ERROR(cudaMallocPitch(&texArrayReal2D, &pitch2D, sizeof(XFLOAT)*mdlX,mdlY)); HANDLE_ERROR(cudaMallocPitch(&texArrayImag2D, &pitch2D, sizeof(XFLOAT)*mdlX,mdlY)); // -- Descriptors of the channel(s) in the texture(s) resDesc_real.resType = cudaResourceTypePitch2D; resDesc_real.res.pitch2D.devPtr = texArrayReal2D; resDesc_real.res.pitch2D.pitchInBytes = pitch2D; resDesc_real.res.pitch2D.width = mdlX; resDesc_real.res.pitch2D.height = mdlY; resDesc_real.res.pitch2D.desc = desc; // ------------------------------------------------- resDesc_imag.resType = cudaResourceTypePitch2D; resDesc_imag.res.pitch2D.devPtr = texArrayImag2D; resDesc_imag.res.pitch2D.pitchInBytes = pitch2D; resDesc_imag.res.pitch2D.width = mdlX; resDesc_imag.res.pitch2D.height = mdlY; resDesc_imag.res.pitch2D.desc = desc; } // -- Decriptors of the texture(s) and methods used for reading it(them) -- texDesc.filterMode = cudaFilterModeLinear; texDesc.readMode = cudaReadModeElementType; texDesc.normalizedCoords = false; for(int n=0; n<3; n++) texDesc.addressMode[n]=cudaAddressModeClamp; // -- Create texture object(s) HANDLE_ERROR(cudaCreateTextureObject(mdlReal, &resDesc_real, &texDesc, NULL)); HANDLE_ERROR(cudaCreateTextureObject(mdlImag, &resDesc_imag, &texDesc, NULL)); #else #ifdef CUDA DEBUG_HANDLE_ERROR(cudaMalloc( (void**) &mdlReal, mdlXYZ * sizeof(XFLOAT))); DEBUG_HANDLE_ERROR(cudaMalloc( (void**) &mdlImag, mdlXYZ * sizeof(XFLOAT))); #else mdlComplex = NULL; #endif #endif return true; } void AccProjector::initMdl(XFLOAT *real, XFLOAT *imag) { #ifdef DEBUG_CUDA if (mdlXYZ == 0) { printf("DEBUG_ERROR: Model dimensions must be set with setMdlDim before call to setMdlData."); CRITICAL(ERR_MDLDIM); } #ifdef CUDA if (mdlReal == NULL) { printf("DEBUG_ERROR: initMdl called before call to setMdlData."); CRITICAL(ERR_MDLSET); } #else if (mdlComplex == NULL) { printf("DEBUG_ERROR: initMdl called before call to setMdlData."); CRITICAL(ERR_MDLSET); } #endif #endif #ifndef PROJECTOR_NO_TEXTURES if(mdlZ!=0) // 3D model { // -- make extents for automatic pitching (aligment) of allocated 3D arrays cudaMemcpy3DParms copyParams = {0}; copyParams.extent = make_cudaExtent(mdlX, mdlY, mdlZ); copyParams.kind = cudaMemcpyHostToDevice; // -- Copy data copyParams.dstArray = *texArrayReal; copyParams.srcPtr = make_cudaPitchedPtr(real, mdlX * sizeof(XFLOAT), mdlY, mdlZ); DEBUG_HANDLE_ERROR(cudaMemcpy3D(©Params)); copyParams.dstArray = *texArrayImag; copyParams.srcPtr = make_cudaPitchedPtr(imag, mdlX * sizeof(XFLOAT), mdlY, mdlZ); DEBUG_HANDLE_ERROR(cudaMemcpy3D(©Params)); } else // 2D model { DEBUG_HANDLE_ERROR(cudaMemcpy2D(texArrayReal2D, pitch2D, real, sizeof(XFLOAT) * mdlX, sizeof(XFLOAT) * mdlX, mdlY, cudaMemcpyHostToDevice)); DEBUG_HANDLE_ERROR(cudaMemcpy2D(texArrayImag2D, pitch2D, imag, sizeof(XFLOAT) * mdlX, sizeof(XFLOAT) * mdlX, mdlY, cudaMemcpyHostToDevice)); } #else #ifdef CUDA DEBUG_HANDLE_ERROR(cudaMemcpy( mdlReal, real, mdlXYZ * sizeof(XFLOAT), cudaMemcpyHostToDevice)); DEBUG_HANDLE_ERROR(cudaMemcpy( mdlImag, imag, mdlXYZ * sizeof(XFLOAT), cudaMemcpyHostToDevice)); #else std::complex *pData = mdlComplex; for(size_t i=0; i arrayval(*real ++, *imag ++); pData[i] = arrayval; } #endif #endif } #ifndef CUDA void AccProjector::initMdl(std::complex *data) { mdlComplex = data; // No copy needed - everyone shares the complex reference arrays externalFree = 1; // This is shared memory freed outside the projector } #endif void AccProjector::initMdl(Complex *data) { XFLOAT *tmpReal; XFLOAT *tmpImag; if (posix_memalign((void **)&tmpReal, MEM_ALIGN, mdlXYZ * sizeof(XFLOAT))) CRITICAL(RAMERR); if (posix_memalign((void **)&tmpImag, MEM_ALIGN, mdlXYZ * sizeof(XFLOAT))) CRITICAL(RAMERR); for (size_t i = 0; i < mdlXYZ; i ++) { tmpReal[i] = (XFLOAT) data[i].real; tmpImag[i] = (XFLOAT) data[i].imag; } initMdl(tmpReal, tmpImag); free(tmpReal); free(tmpImag); } void AccProjector::clear() { mdlX = 0; mdlY = 0; mdlZ = 0; mdlXYZ = 0; mdlInitY = 0; mdlInitZ = 0; mdlMaxR = 0; padding_factor = 0; allocaton_size = 0; #ifdef CUDA if (mdlReal != 0) { #ifndef PROJECTOR_NO_TEXTURES cudaDestroyTextureObject(*mdlReal); cudaDestroyTextureObject(*mdlImag); delete mdlReal; delete mdlImag; if(mdlZ!=0) //3D case { cudaFreeArray(*texArrayReal); cudaFreeArray(*texArrayImag); delete texArrayReal; delete texArrayImag; } else //2D case { HANDLE_ERROR(cudaFree(texArrayReal2D)); HANDLE_ERROR(cudaFree(texArrayImag2D)); } texArrayReal = 0; texArrayImag = 0; #else cudaFree(mdlReal); cudaFree(mdlImag); #endif mdlReal = 0; mdlImag = 0; } #else // ifdef CUDA if ((mdlComplex != NULL) && (externalFree == 0)) { delete [] mdlComplex; mdlComplex = NULL; } #endif // ifdef CUDA } relion-3.1.3/src/acc/acc_projector_plan.h000066400000000000000000000053471411340063500203310ustar00rootroot00000000000000#ifndef ACC_PROJECTOR_PLAN_H_ #define ACC_PROJECTOR_PLAN_H_ #include #include "src/acc/acc_ptr.h" #include "src/healpix_sampling.h" #include #include class AccProjectorPlan { public: AccPtr< long unsigned> iorientclasses; AccPtr eulers; long unsigned orientation_num; AccProjectorPlan(): orientation_num(0) {}; AccProjectorPlan(CudaCustomAllocator *allocator): iorientclasses(allocator), eulers(allocator), orientation_num(0) {}; //Copy constructor AccProjectorPlan( const AccProjectorPlan& other ): iorientclasses(other.iorientclasses), eulers(other.eulers), orientation_num(other.orientation_num) {}; void setup( HealpixSampling &sampling, std::vector &directions_prior, std::vector &psi_prior, std::vector &pointer_dir_nonzeroprior, std::vector &pointer_psi_nonzeroprior, MultidimArray *Mcoarse_significant, std::vector &pdf_class, std::vector > &pdf_direction, unsigned long nr_dir, unsigned long nr_psi, unsigned long nr_oversampled_rot, unsigned long idir_min, unsigned long idir_max, unsigned long ipsi_min, unsigned long ipsi_max, unsigned long itrans_min, unsigned long itrans_max, unsigned long current_oversampling, unsigned iclass, bool coarse, bool inverseMatrix, bool do_skip_align, bool do_skip_rotate, int orientational_prior_mode, Matrix2D &L_, Matrix2D &R_); void setup( HealpixSampling &sampling, std::vector &directions_prior, std::vector &psi_prior, std::vector &pointer_dir_nonzeroprior, std::vector &pointer_psi_nonzeroprior, MultidimArray *Mcoarse_significant, std::vector &pdf_class, std::vector > &pdf_direction, unsigned long nr_dir, unsigned long nr_psi, unsigned long nr_oversampled_rot, unsigned long idir_min, unsigned long idir_max, unsigned long ipsi_min, unsigned long ipsi_max, unsigned long itrans_min, unsigned long itrans_max, unsigned long current_oversampling, unsigned iclass, bool coarse, bool inverseMatrix, bool do_skip_align, bool do_skip_rotate, int orientational_prior_mode) { Matrix2D dummyRL; setup( sampling, directions_prior, psi_prior, pointer_dir_nonzeroprior, pointer_psi_nonzeroprior, Mcoarse_significant, pdf_class, pdf_direction, nr_dir, nr_psi, nr_oversampled_rot, idir_min, idir_max, ipsi_min, ipsi_max, itrans_min, itrans_max, current_oversampling, iclass, coarse, inverseMatrix, do_skip_align, do_skip_rotate, orientational_prior_mode, dummyRL, dummyRL); } void printTo(std::ostream &os); // print void clear(); }; #endif relion-3.1.3/src/acc/acc_projector_plan_impl.h000066400000000000000000000263231411340063500213470ustar00rootroot00000000000000#include "src/acc/acc_projector_plan.h" #include "src/acc/utilities.h" #include "src/time.h" //#define PP_TIMING #ifdef PP_TIMING Timer timer; int TIMING_TOP = timer.setNew("setup"); int TIMING_SAMPLING = timer.setNew(" sampling"); int TIMING_PRIOR = timer.setNew(" prior"); int TIMING_PROC_CALC = timer.setNew(" procCalc"); int TIMING_PROC = timer.setNew(" proc"); int TIMING_GEN = timer.setNew(" genOri"); int TIMING_PERTURB = timer.setNew(" perturb"); int TIMING_EULERS = timer.setNew(" eulers"); #define TIMING_TIC(id) timer.tic(id) #define TIMING_TOC(id) timer.toc(id) #else #define TIMING_TIC(id) #define TIMING_TOC(id) #endif void getOrientations(HealpixSampling &sampling, long int idir, long int ipsi, int oversampling_order, std::vector &my_rot, std::vector &my_tilt, std::vector &my_psi, std::vector &pointer_dir_nonzeroprior, std::vector &directions_prior, std::vector &pointer_psi_nonzeroprior, std::vector &psi_prior) { my_rot.clear(); my_tilt.clear(); my_psi.clear(); long int my_idir, my_ipsi; if (pointer_dir_nonzeroprior.size() > idir && pointer_psi_nonzeroprior.size() > ipsi) { // nonzeroprior vectors have been initialised, so use priors! my_idir = pointer_dir_nonzeroprior[idir]; my_ipsi = pointer_psi_nonzeroprior[ipsi]; } else { // no priors my_idir = idir; my_ipsi = ipsi; } if (oversampling_order == 0) { my_rot.push_back(sampling.rot_angles[my_idir]); my_tilt.push_back(sampling.tilt_angles[my_idir]); my_psi.push_back(sampling.psi_angles[my_ipsi]); } else if (!sampling.is_3D) { // for 2D sampling, only push back oversampled psi rotations sampling.pushbackOversampledPsiAngles(my_ipsi, oversampling_order, 0., 0., my_rot, my_tilt, my_psi); } else { // Set up oversampled grid for 3D sampling Healpix_Base HealPixOver(oversampling_order + sampling.healpix_order, NEST); int fact = HealPixOver.Nside()/sampling.healpix_base.Nside(); int x, y, face; RFLOAT rot, tilt; // Get x, y and face for the original, coarse grid long int ipix = sampling.directions_ipix[my_idir]; sampling.healpix_base.nest2xyf(ipix, x, y, face); // Loop over the oversampled Healpix pixels on the fine grid for (int j = fact * y; j < fact * (y+1); ++j) { for (int i = fact * x; i < fact * (x+1); ++i) { long int overpix = HealPixOver.xyf2nest(i, j, face); // this one always has to be double (also for SINGLE_PRECISION CALCULATIONS) for call to external library double zz, phi; HealPixOver.pix2ang_z_phi(overpix, zz, phi); rot = RAD2DEG(phi); tilt = ACOSD(zz); // The geometrical considerations about the symmetry below require that rot = [-180,180] and tilt [0,180] sampling.checkDirection(rot, tilt); sampling.pushbackOversampledPsiAngles(my_ipsi, oversampling_order, rot, tilt, my_rot, my_tilt, my_psi); } } } } void AccProjectorPlan::setup( HealpixSampling &sampling, std::vector &directions_prior, std::vector &psi_prior, std::vector &pointer_dir_nonzeroprior, std::vector &pointer_psi_nonzeroprior, MultidimArray *Mcoarse_significant, std::vector &pdf_class, std::vector > &pdf_direction, unsigned long nr_dir, unsigned long nr_psi, unsigned long idir_min, unsigned long idir_max, unsigned long ipsi_min, unsigned long ipsi_max, unsigned long itrans_min, unsigned long itrans_max, unsigned long current_oversampling, unsigned long nr_oversampled_rot, unsigned iclass, bool coarse, bool inverseMatrix, bool do_skip_align, bool do_skip_rotate, int orientational_prior_mode, Matrix2D &L_, Matrix2D &R_) { TIMING_TIC(TIMING_TOP); std::vector< RFLOAT > oversampled_rot, oversampled_tilt, oversampled_psi; AccPtr alphas = eulers.make(nr_dir * nr_psi * nr_oversampled_rot * 9); AccPtr betas = eulers.make(nr_dir * nr_psi * nr_oversampled_rot * 9); AccPtr gammas = eulers.make(nr_dir * nr_psi * nr_oversampled_rot * 9); AccPtr perturb = eulers.make((size_t)9); AccPtr adjustL = eulers.make((size_t)9); AccPtr adjustR = eulers.make((size_t)9); alphas.hostAlloc(); betas.hostAlloc(); gammas.hostAlloc(); eulers.freeIfSet(); eulers.setSize(nr_dir * nr_psi * nr_oversampled_rot * 9); eulers.hostAlloc(); iorientclasses.freeIfSet(); iorientclasses.setSize(nr_dir * nr_psi * nr_oversampled_rot); iorientclasses.hostAlloc(); orientation_num = 0; Matrix2D L(3,3); Matrix2D R(3,3); L.initIdentity(); R.initIdentity(); bool doL(false), doR(false); RFLOAT myperturb(0.); if (L_.mdimx == L.mdimx && L_.mdimy == L.mdimy) { doL = true; L = L_ * L; } if (ABS(sampling.random_perturbation) > 0.) { myperturb = sampling.random_perturbation * sampling.getAngularSampling(); if (sampling.is_3D) { Euler_angles2matrix(myperturb, myperturb, myperturb, R); } doR = true; } if (R_.mdimx == R.mdimx && R_.mdimy == R.mdimy) { doR = true; R = R * R_; } TIMING_TIC(TIMING_SAMPLING); for (long int idir = idir_min, iorient = 0; idir <= idir_max; idir++) { for (long int ipsi = ipsi_min, ipart = 0; ipsi <= ipsi_max; ipsi++, iorient++) { long int iorientclass = iclass * nr_dir * nr_psi + iorient; TIMING_TIC(TIMING_PRIOR); // Get prior for this direction and skip calculation if prior==0 RFLOAT pdf_orientation; if (do_skip_align || do_skip_rotate) { pdf_orientation = pdf_class[iclass]; } else if (orientational_prior_mode == NOPRIOR) { pdf_orientation = DIRECT_MULTIDIM_ELEM(pdf_direction[iclass], idir); } else { pdf_orientation = directions_prior[idir] * psi_prior[ipsi]; } TIMING_TOC(TIMING_PRIOR); // In the first pass, always proceed // In the second pass, check whether one of the translations for this orientation of any of the particles had a significant weight in the first pass // if so, proceed with projecting the reference in that direction bool do_proceed(false); TIMING_TIC(TIMING_PROC_CALC); if (coarse && pdf_orientation > 0.) do_proceed = true; else if (pdf_orientation > 0.) { long int nr_trans = itrans_max - itrans_min + 1; for (long int ipart = 0; ipart < YSIZE(*Mcoarse_significant); ipart++) { long int ihidden = iorient * nr_trans; for (long int itrans = itrans_min; itrans <= itrans_max; itrans++, ihidden++) { if (DIRECT_A2D_ELEM(*Mcoarse_significant, ipart, ihidden)) { do_proceed = true; break; } } } } TIMING_TOC(TIMING_PROC_CALC); TIMING_TIC(TIMING_PROC); if (do_proceed) { // Now get the oversampled (rot, tilt, psi) triplets // This will be only the original (rot,tilt,psi) triplet in the first pass (sp.current_oversampling==0) TIMING_TIC(TIMING_GEN); getOrientations(sampling, idir, ipsi, current_oversampling, oversampled_rot, oversampled_tilt, oversampled_psi, pointer_dir_nonzeroprior, directions_prior, pointer_psi_nonzeroprior, psi_prior); TIMING_TOC(TIMING_GEN); // Loop over all oversampled orientations (only a single one in the first pass) for (long int iover_rot = 0; iover_rot < nr_oversampled_rot; iover_rot++, ipart++) { if (sampling.is_3D) { alphas[orientation_num] = oversampled_rot[iover_rot]; betas[orientation_num] = oversampled_tilt[iover_rot]; gammas[orientation_num] = oversampled_psi[iover_rot]; } else { alphas[orientation_num] = oversampled_psi[iover_rot] + myperturb; } iorientclasses[orientation_num] = iorientclass; orientation_num ++; } } TIMING_TOC(TIMING_PROC); } } TIMING_TOC(TIMING_SAMPLING); iorientclasses.resizeHostCopy(orientation_num); iorientclasses.putOnDevice(); eulers.resizeHostCopy(orientation_num * 9); eulers.deviceAlloc(); alphas.resizeHostCopy(orientation_num); alphas.putOnDevice(); if(sampling.is_3D) { betas.resizeHostCopy(orientation_num); betas.putOnDevice(); gammas.resizeHostCopy(orientation_num); gammas.putOnDevice(); } if (doL) { adjustL.hostAlloc(); for (int i = 0; i < 9; i ++) adjustL[i] = (XFLOAT) L.mdata[i]; adjustL.putOnDevice(); } if (doR) { adjustR.hostAlloc(); for (int i = 0; i < 9; i ++) adjustR[i] = (XFLOAT) R.mdata[i]; adjustR.putOnDevice(); } int grid_size = ceil((float)orientation_num/(float)BLOCK_SIZE); if(inverseMatrix) { if(sampling.is_3D) { if (doL && doR) AccUtilities::acc_make_eulers_3D( grid_size,BLOCK_SIZE,eulers.getStream(), ~alphas, ~betas, ~gammas, ~eulers, orientation_num, ~adjustL, ~adjustR); else if (doL) AccUtilities::acc_make_eulers_3D( grid_size,BLOCK_SIZE,eulers.getStream(), ~alphas, ~betas, ~gammas, ~eulers, orientation_num, ~adjustL, NULL); else if (doR) AccUtilities::acc_make_eulers_3D( grid_size,BLOCK_SIZE,eulers.getStream(), ~alphas, ~betas, ~gammas, ~eulers, orientation_num, NULL, ~adjustR); else AccUtilities::acc_make_eulers_3D( grid_size,BLOCK_SIZE,eulers.getStream(), ~alphas, ~betas, ~gammas, ~eulers, orientation_num, NULL, NULL); } else AccUtilities::acc_make_eulers_2D( grid_size,BLOCK_SIZE,eulers.getStream(), ~alphas, ~eulers, orientation_num); } else { if(sampling.is_3D) { if (doL && doR) AccUtilities::acc_make_eulers_3D( grid_size,BLOCK_SIZE,eulers.getStream(), ~alphas, ~betas, ~gammas, ~eulers, orientation_num, ~adjustL, ~adjustR); else if (doL) AccUtilities::acc_make_eulers_3D( grid_size,BLOCK_SIZE,eulers.getStream(), ~alphas, ~betas, ~gammas, ~eulers, orientation_num, ~adjustL, NULL); else if (doR) AccUtilities::acc_make_eulers_3D( grid_size,BLOCK_SIZE,eulers.getStream(), ~alphas, ~betas, ~gammas, ~eulers, orientation_num, NULL, ~adjustR); else AccUtilities::acc_make_eulers_3D( grid_size,BLOCK_SIZE,eulers.getStream(), ~alphas, ~betas, ~gammas, ~eulers, orientation_num, NULL, NULL); } else AccUtilities::acc_make_eulers_2D(grid_size,BLOCK_SIZE,eulers.getStream(), ~alphas, ~eulers, orientation_num); } TIMING_TOC(TIMING_TOP); } void AccProjectorPlan::printTo(std::ostream &os) // print { os << "orientation_num = " << orientation_num << std::endl; os << "iorientclasses.getSize() = " << iorientclasses.getSize() << std::endl; os << std::endl << "iorientclasses\tiover_rots\teulers" << std::endl; for (int i = 0; i < iorientclasses.getSize(); i ++) { os << iorientclasses[i] << "\t\t" << "\t"; for (int j = 0; j < 9; j++) os << eulers[i * 9 + j] << "\t"; os << std::endl; } } void AccProjectorPlan::clear() { orientation_num = 0; iorientclasses.freeIfSet(); iorientclasses.setSize(0); eulers.freeIfSet(); eulers.setSize(0); #ifdef PP_TIMING timer.printTimes(false); #endif } relion-3.1.3/src/acc/acc_projectorkernel_impl.h000066400000000000000000000154071411340063500215370ustar00rootroot00000000000000#ifndef ACC_PROJECTORKERNELIMPL_H_ #define ACC_PROJECTORKERNELIMPL_H_ #ifndef PROJECTOR_NO_TEXTURES #define PROJECTOR_PTR_TYPE cudaTextureObject_t #else #define PROJECTOR_PTR_TYPE XFLOAT * #endif class AccProjectorKernel { public: int mdlX, mdlXY, mdlZ, imgX, imgY, imgZ, mdlInitY, mdlInitZ, maxR, maxR2, maxR2_padded; XFLOAT padding_factor; PROJECTOR_PTR_TYPE mdlReal; PROJECTOR_PTR_TYPE mdlImag; #ifdef CUDA PROJECTOR_PTR_TYPE mdlComplex; #else std::complex *mdlComplex; #endif AccProjectorKernel( int mdlX, int mdlY, int mdlZ, int imgX, int imgY, int imgZ, int mdlInitY, int mdlInitZ, XFLOAT padding_factor, int maxR, #ifdef CUDA PROJECTOR_PTR_TYPE mdlComplex #else std::complex *mdlComplex #endif ): mdlX(mdlX), mdlXY(mdlX*mdlY), mdlZ(mdlZ), imgX(imgX), imgY(imgY), imgZ(imgZ), mdlInitY(mdlInitY), mdlInitZ(mdlInitZ), padding_factor(padding_factor), maxR(maxR), maxR2(maxR*maxR), maxR2_padded(maxR*maxR*padding_factor*padding_factor), mdlComplex(mdlComplex) {}; AccProjectorKernel( int mdlX, int mdlY, int mdlZ, int imgX, int imgY, int imgZ, int mdlInitY, int mdlInitZ, XFLOAT padding_factor, int maxR, PROJECTOR_PTR_TYPE mdlReal, PROJECTOR_PTR_TYPE mdlImag ): mdlX(mdlX), mdlXY(mdlX*mdlY), mdlZ(mdlZ), imgX(imgX), imgY(imgY), imgZ(imgZ), mdlInitY(mdlInitY), mdlInitZ(mdlInitZ), padding_factor(padding_factor), maxR(maxR), maxR2(maxR*maxR), maxR2_padded(maxR*maxR*padding_factor*padding_factor), mdlReal(mdlReal), mdlImag(mdlImag) { #ifndef CUDA std::complex *pData = mdlComplex; for(size_t i=0; i<(size_t)mdlX * (size_t)mdlY * (size_t)mdlZ; i++) { std::complex arrayval(*mdlReal ++, *mdlImag ++); pData[i] = arrayval; } #endif }; #ifdef CUDA __device__ __forceinline__ #else #ifndef __INTEL_COMPILER __attribute__((always_inline)) #endif inline #endif void project3Dmodel( int x, int y, int z, XFLOAT e0, XFLOAT e1, XFLOAT e2, XFLOAT e3, XFLOAT e4, XFLOAT e5, XFLOAT e6, XFLOAT e7, XFLOAT e8, XFLOAT &real, XFLOAT &imag) { XFLOAT xp = (e0 * x + e1 * y + e2 * z) * padding_factor; XFLOAT yp = (e3 * x + e4 * y + e5 * z) * padding_factor; XFLOAT zp = (e6 * x + e7 * y + e8 * z) * padding_factor; int r2 = xp*xp + yp*yp + zp*zp; if (r2 <= maxR2_padded) { #ifdef PROJECTOR_NO_TEXTURES bool invers(xp < 0); if (invers) { xp = -xp; yp = -yp; zp = -zp; } #ifdef CUDA real = no_tex3D(mdlReal, xp, yp, zp, mdlX, mdlXY, mdlInitY, mdlInitZ); imag = - no_tex3D(mdlImag, xp, yp, zp, mdlX, mdlXY, mdlInitY, mdlInitZ); #else CpuKernels::complex3D(mdlComplex, real, imag, xp, yp, zp, mdlX, mdlXY, mdlInitY, mdlInitZ); #endif if(invers) imag = -imag; #else if (xp < 0) { // Get complex conjugated hermitian symmetry pair xp = -xp; yp = -yp; zp = -zp; yp -= mdlInitY; zp -= mdlInitZ; real = tex3D(mdlReal, xp + (XFLOAT)0.5, yp + (XFLOAT)0.5, zp + (XFLOAT)0.5); imag = - tex3D(mdlImag, xp + (XFLOAT)0.5, yp + (XFLOAT)0.5, zp + (XFLOAT)0.5); } else { yp -= mdlInitY; zp -= mdlInitZ; real = tex3D(mdlReal, xp + (XFLOAT)0.5, yp + (XFLOAT)0.5, zp + (XFLOAT)0.5); imag = tex3D(mdlImag, xp + (XFLOAT)0.5, yp + (XFLOAT)0.5, zp + (XFLOAT)0.5); } #endif } else { real = (XFLOAT)0; imag = (XFLOAT)0; } } #ifdef CUDA __device__ __forceinline__ #else #ifndef __INTEL_COMPILER __attribute__((always_inline)) #endif inline #endif void project3Dmodel( int x, int y, XFLOAT e0, XFLOAT e1, XFLOAT e3, XFLOAT e4, XFLOAT e6, XFLOAT e7, XFLOAT &real, XFLOAT &imag) { XFLOAT xp = (e0 * x + e1 * y ) * padding_factor; XFLOAT yp = (e3 * x + e4 * y ) * padding_factor; XFLOAT zp = (e6 * x + e7 * y ) * padding_factor; int r2 = xp*xp + yp*yp + zp*zp; if (r2 <= maxR2_padded) { #ifdef PROJECTOR_NO_TEXTURES bool invers(xp < 0); if (invers) { xp = -xp; yp = -yp; zp = -zp; } #ifdef CUDA real = no_tex3D(mdlReal, xp, yp, zp, mdlX, mdlXY, mdlInitY, mdlInitZ); imag = no_tex3D(mdlImag, xp, yp, zp, mdlX, mdlXY, mdlInitY, mdlInitZ); #else CpuKernels::complex3D(mdlComplex, real, imag, xp, yp, zp, mdlX, mdlXY, mdlInitY, mdlInitZ); #endif if(invers) imag = -imag; #else if (xp < 0) { // Get complex conjugated hermitian symmetry pair xp = -xp; yp = -yp; zp = -zp; yp -= mdlInitY; zp -= mdlInitZ; real = tex3D(mdlReal, xp + (XFLOAT)0.5, yp + (XFLOAT)0.5, zp + (XFLOAT)0.5); imag = - tex3D(mdlImag, xp + (XFLOAT)0.5, yp + (XFLOAT)0.5, zp + (XFLOAT)0.5); } else { yp -= mdlInitY; zp -= mdlInitZ; real = tex3D(mdlReal, xp + (XFLOAT)0.5, yp + (XFLOAT)0.5, zp + (XFLOAT)0.5); imag = tex3D(mdlImag, xp + (XFLOAT)0.5, yp + (XFLOAT)0.5, zp + (XFLOAT)0.5); } #endif } else { real = (XFLOAT)0; imag = (XFLOAT)0; } } #ifdef CUDA __device__ __forceinline__ #else #ifndef __INTEL_COMPILER __attribute__((always_inline)) #endif inline #endif void project2Dmodel( int x, int y, XFLOAT e0, XFLOAT e1, XFLOAT e3, XFLOAT e4, XFLOAT &real, XFLOAT &imag) { XFLOAT xp = (e0 * x + e1 * y ) * padding_factor; XFLOAT yp = (e3 * x + e4 * y ) * padding_factor; int r2 = xp*xp + yp*yp; if (r2 <= maxR2_padded) { #ifdef PROJECTOR_NO_TEXTURES bool invers(xp < 0); if (invers) { xp = -xp; yp = -yp; } #ifdef CUDA real = no_tex2D(mdlReal, xp, yp, mdlX, mdlInitY); imag = no_tex2D(mdlImag, xp, yp, mdlX, mdlInitY); #else CpuKernels::complex2D(mdlComplex, real, imag, xp, yp, mdlX, mdlInitY); #endif if(invers) imag = -imag; #else if (xp < 0) { // Get complex conjugated hermitian symmetry pair xp = -xp; yp = -yp; yp -= mdlInitY; real = tex2D(mdlReal, xp + (XFLOAT)0.5, yp + (XFLOAT)0.5); imag = - tex2D(mdlImag, xp + (XFLOAT)0.5, yp + (XFLOAT)0.5); } else { yp -= mdlInitY; real = tex2D(mdlReal, xp + (XFLOAT)0.5, yp + (XFLOAT)0.5); imag = tex2D(mdlImag, xp + (XFLOAT)0.5, yp + (XFLOAT)0.5); } #endif } else { real=(XFLOAT)0; imag=(XFLOAT)0; } } static AccProjectorKernel makeKernel(AccProjector &p, int imgX, int imgY, int imgZ, int imgMaxR) { int maxR = p.mdlMaxR >= imgMaxR ? imgMaxR : p.mdlMaxR; AccProjectorKernel k( p.mdlX, p.mdlY, p.mdlZ, imgX, imgY, imgZ, p.mdlInitY, p.mdlInitZ, p.padding_factor, maxR, #ifndef PROJECTOR_NO_TEXTURES *p.mdlReal, *p.mdlImag #else #ifdef CUDA p.mdlReal, p.mdlImag #else p.mdlComplex #endif #endif ); return k; } }; // class AccProjectorKernel #endif relion-3.1.3/src/acc/acc_ptr.h000066400000000000000000000604531411340063500161140ustar00rootroot00000000000000#ifndef ACC_PTR_H_ #define ACC_PTR_H_ #include "src/acc/settings.h" #ifdef CUDA #include "src/acc/cuda/cuda_settings.h" #include #include "src/acc/cuda/custom_allocator.cuh" #include "src/acc/cuda/cuda_mem_utils.h" #include "src/acc/cuda/shortcuts.cuh" #else #include "src/acc/cpu/cpu_settings.h" #endif #include #include #include #include #include #include #include #include #include #include "src/macros.h" #include "src/error.h" #include "src/parallel.h" #ifndef MEM_ALIGN #define MEM_ALIGN 64 #endif #define ACC_PTR_DEBUG_FATAL( err ) (HandleAccPtrDebugFatal( err, __FILE__, __LINE__ )) static void HandleAccPtrDebugFatal( const char *err, const char *file, int line ) { fprintf(stderr, "DEBUG ERROR: %s in %s:%d\n", err, file, line ); fflush(stdout); #ifdef DEBUG_CUDA raise(SIGSEGV); #else CRITICAL(ERRGPUKERN); #endif } #define ACC_PTR_DEBUG_INFO( err ) (HandleAccPtrDebugInformational( err, __FILE__, __LINE__ )) static void HandleAccPtrDebugInformational( const char *err, const char *file, int line ) { fprintf(stderr, "POSSIBLE ISSUE: %s in %s:%d\n", err, file, line ); fflush(stdout); } enum AccType {accUNSET, accCUDA, accCPU}; #ifdef CUDA typedef cudaStream_t StreamType; typedef CudaCustomAllocator AllocatorType; typedef CudaCustomAllocator::Alloc AllocationType; #else typedef float StreamType; //Dummy type typedef double AllocatorType; //Dummy type typedef double AllocationType; //Dummy type #endif template class AccPtr { protected: AllocatorType *allocator; AllocationType *alloc; StreamType stream; AccType accType; size_t size; //Size used when copying data from and to device T *hPtr, *dPtr; //Host and device pointers bool doFreeDevice; //True if host or device needs to be freed public: bool doFreeHost; //TODO make this private /*====================================================== CONSTRUCTORS WITH ALLOCATORS ======================================================*/ AccPtr(AllocatorType *allocator): size(0), hPtr(NULL), dPtr(NULL), doFreeHost(false), doFreeDevice(false), allocator(allocator), alloc(NULL), stream(cudaStreamPerThread), #ifdef CUDA accType(accCUDA) #else accType(accCPU) #endif {} AccPtr(StreamType stream, AllocatorType *allocator): size(0), hPtr(NULL), dPtr(NULL), doFreeHost(false), doFreeDevice(false), allocator(allocator), alloc(NULL), stream(stream), #ifdef CUDA accType(accCUDA) #else accType(accCPU) #endif {} AccPtr(size_t size, AllocatorType *allocator): size(size), dPtr(NULL), doFreeHost(true), doFreeDevice(false), allocator(allocator), alloc(NULL), stream(cudaStreamPerThread), #ifdef CUDA accType(accCUDA) #else accType(accCPU) #endif { if(posix_memalign((void **)&hPtr, MEM_ALIGN, sizeof(T) * size)) CRITICAL(RAMERR); } AccPtr(size_t size, StreamType stream, AllocatorType *allocator): size(size), dPtr(NULL), doFreeHost(true), doFreeDevice(false), allocator(allocator), alloc(NULL), stream(stream), #ifdef CUDA accType(accCUDA) #else accType(accCPU) #endif { if(posix_memalign((void **)&hPtr, MEM_ALIGN, sizeof(T) * size)) CRITICAL(RAMERR); } AccPtr(T * h_start, size_t size, AllocatorType *allocator): size(size), hPtr(h_start), dPtr(NULL), doFreeHost(false), doFreeDevice(false), allocator(allocator), alloc(NULL), stream(cudaStreamPerThread), #ifdef CUDA accType(accCUDA) #else accType(accCPU) #endif {} AccPtr(T * h_start, size_t size, StreamType stream, AllocatorType *allocator): size(size), hPtr(h_start), dPtr(NULL), doFreeHost(false), doFreeDevice(false), allocator(allocator), alloc(NULL), stream(cudaStreamPerThread), #ifdef CUDA accType(accCUDA) #else accType(accCPU) #endif {} AccPtr(T * h_start, T * d_start, size_t size, AllocatorType *allocator): size(size), hPtr(h_start), dPtr(d_start), doFreeHost(false), doFreeDevice(false), allocator(allocator), alloc(NULL), stream(cudaStreamPerThread), #ifdef CUDA accType(accCUDA) #else accType(accCPU) #endif {} AccPtr(T * h_start, T * d_start, size_t size, StreamType stream, AllocatorType *allocator): size(size), hPtr(h_start), dPtr(d_start), doFreeHost(false), doFreeDevice(false), allocator(allocator), alloc(NULL), stream(stream), #ifdef CUDA accType(accCUDA) #else accType(accCPU) #endif {} /*====================================================== CONSTRUCTORS ======================================================*/ AccPtr(): size(0), hPtr(NULL), dPtr(NULL), doFreeHost(false), doFreeDevice(false), allocator(NULL), alloc(NULL), stream(cudaStreamPerThread), #ifdef CUDA accType(accCUDA) #else accType(accCPU) #endif {} AccPtr(StreamType stream): size(0), hPtr(NULL), dPtr(NULL), doFreeHost(false), doFreeDevice(false), allocator(NULL), alloc(NULL), stream(stream), #ifdef CUDA accType(accCUDA) #else accType(accCPU) #endif {} AccPtr(size_t size): size(size), dPtr(NULL), doFreeHost(true), doFreeDevice(false), allocator(NULL), alloc(NULL), stream(cudaStreamPerThread), #ifdef CUDA accType(accCUDA) #else accType(accCPU) #endif { if(posix_memalign((void **)&hPtr, MEM_ALIGN, sizeof(T) * size)) CRITICAL(RAMERR); } AccPtr(size_t size, StreamType stream): size(size), dPtr(NULL), doFreeHost(true), doFreeDevice(false), allocator(NULL), alloc(NULL), stream(stream), #ifdef CUDA accType(accCUDA) #else accType(accCPU) #endif { if(posix_memalign((void **)&hPtr, MEM_ALIGN, sizeof(T) * size)) CRITICAL(RAMERR); } AccPtr(T * h_start, size_t size): size(size), hPtr(h_start), dPtr(NULL), doFreeHost(false), doFreeDevice(false), allocator(NULL), alloc(NULL), stream(cudaStreamPerThread), #ifdef CUDA accType(accCUDA) #else accType(accCPU) #endif {} AccPtr(T * h_start, size_t size, StreamType stream): size(size), hPtr(h_start), dPtr(NULL), doFreeHost(false), doFreeDevice(false), allocator(NULL), alloc(NULL), stream(cudaStreamPerThread), #ifdef CUDA accType(accCUDA) #else accType(accCPU) #endif {} AccPtr(T * h_start, T * d_start, size_t size): size(size), hPtr(h_start), dPtr(d_start), doFreeHost(false), doFreeDevice(false), allocator(NULL), alloc(NULL), stream(cudaStreamPerThread), #ifdef CUDA accType(accCUDA) #else accType(accCPU) #endif {} AccPtr(T * h_start, T * d_start, size_t size, StreamType stream): size(size), hPtr(h_start), dPtr(d_start), doFreeHost(false), doFreeDevice(false), allocator(NULL), alloc(NULL), stream(stream), #ifdef CUDA accType(accCUDA) #else accType(accCPU) #endif {} /*====================================================== CONSTRUCTORS WITH OTHER POINTERS ======================================================*/ AccPtr(const AccPtr &ptr): size(ptr.size), hPtr(ptr.hPtr), dPtr(ptr.dPtr), doFreeHost(false), doFreeDevice(false), allocator(ptr.allocator), alloc(NULL), stream(ptr.stream), accType(ptr.accType) {} AccPtr(const AccPtr &ptr, size_t start_idx, size_t size): size(size), hPtr(&ptr.hPtr[start_idx]), dPtr(&ptr.dPtr[start_idx]), doFreeHost(false), doFreeDevice(false), allocator(ptr.allocator), alloc(NULL), stream(ptr.stream), accType(ptr.accType) {} /*====================================================== METHOD BODY ======================================================*/ void setAccType(AccType accT) { accType = accT; } void markReadyEvent() { #ifdef CUDA if (accType == accCUDA) { #ifdef DEBUG_CUDA if (alloc == NULL) ACC_PTR_DEBUG_FATAL("markReadyEvent called on null allocation.\n"); #endif alloc->markReadyEvent(stream); } #endif } /** * Allocate memory on device */ void deviceAlloc() { #ifdef CUDA if (accType == accCUDA) { #ifdef DEBUG_CUDA if(size==0) ACC_PTR_DEBUG_FATAL("deviceAlloc called with size == 0"); if (doFreeDevice) ACC_PTR_DEBUG_FATAL("Device double allocation.\n"); #endif doFreeDevice = true; alloc = allocator->alloc(size * sizeof(T)); dPtr = (T*) alloc->getPtr(); } #endif } /** * Allocate memory on device with given size */ void deviceAlloc(size_t newSize) { size = newSize; deviceAlloc(); } /** * Allocate memory on host */ void hostAlloc() { #ifdef DEBUG_CUDA if(size==0) ACC_PTR_DEBUG_FATAL("deviceAlloc called with size == 0"); if (doFreeHost) ACC_PTR_DEBUG_FATAL("Host double allocation.\n"); #endif doFreeHost = true; // TODO - alternatively, this could be aligned std::vector if(posix_memalign((void **)&hPtr, MEM_ALIGN, sizeof(T) * size)) CRITICAL(RAMERR); } /** * Allocate memory on host with given size */ void hostAlloc(size_t newSize) { size = newSize; hostAlloc(); } void allAlloc() { deviceAlloc(); hostAlloc(); } void allAlloc(size_t newSize) { size = newSize; deviceAlloc(); hostAlloc(); } void accAlloc() { if (accType == accCUDA) deviceAlloc(); else hostAlloc(); } void accAlloc(size_t newSize) { if (accType == accCUDA) deviceAlloc(newSize); else hostAlloc(newSize); } // Allocate storage of a new size for the array void resizeHost(size_t newSize) { #ifdef DEBUG_CUDA if (size==0) ACC_PTR_DEBUG_INFO("Resizing from size zero (permitted).\n"); #endif // TODO - alternatively, this could be aligned std::vector T* newArr; if(posix_memalign((void **)&newArr, MEM_ALIGN, sizeof(T) * newSize)) CRITICAL(RAMERR); memset( newArr, 0x0, sizeof(T) * newSize); #ifdef DEBUG_CUDA if (dPtr!=NULL) ACC_PTR_DEBUG_FATAL("resizeHost: Resizing host with present device allocation.\n"); if (newSize==0) ACC_PTR_DEBUG_INFO("resizeHost: Array resized to size zero (permitted with fear). Something may break downstream\n"); #endif freeHostIfSet(); setSize(newSize); setHostPtr(newArr); doFreeHost=true; } // Resize retaining as much of the original contents as possible void resizeHostCopy(size_t newSize) { #ifdef DEBUG_CUDA // if (size==0) // ACC_PTR_DEBUG_INFO("Resizing from size zero (permitted).\n"); #endif // TODO - alternatively, this could be aligned std::vector T* newArr; if(posix_memalign((void **)&newArr, MEM_ALIGN, sizeof(T) * newSize)) CRITICAL(RAMERR); // Copy in what we can from the original matrix if ((size > 0) && (hPtr != NULL)) { if (newSize < size) memcpy( newArr, hPtr, newSize * sizeof(T) ); else memcpy( newArr, hPtr, size * sizeof(T) ); // Initialize remaining memory if any if (newSize > size) { size_t theRest = sizeof(T) * (newSize - size); memset( newArr, 0x0, theRest); } } // There was nothing from before to copy - clear new memory if (hPtr == NULL) { memset( newArr, 0x0, sizeof(T) * newSize); } #ifdef DEBUG_CUDA if (dPtr!=NULL) ACC_PTR_DEBUG_FATAL("resizeHostCopy: Resizing host with present device allocation.\n"); if (newSize==0) ACC_PTR_DEBUG_INFO("resizeHostCopy: Array resized to size zero (permitted with fear). Something may break downstream\n"); #endif freeHostIfSet(); setSize(newSize); setHostPtr(newArr); doFreeHost=true; } /** * Initiate device memory with provided value */ void deviceInit(int value) { #ifdef CUDA if (accType == accCUDA) { #ifdef DEBUG_CUDA if (dPtr == NULL) ACC_PTR_DEBUG_FATAL("Memset requested before allocation in deviceInit().\n"); #endif cudaMemInit( dPtr, value, size, stream); } #endif } /** * Initiate host memory with provided value */ void hostInit(int value) { #ifdef DEBUG_CUDA if (hPtr == NULL) ACC_PTR_DEBUG_FATAL("Memset requested before allocation in hostInit().\n"); #endif memset(hPtr, value, size * sizeof(T)); } /** * Initiate memory with provided value */ void accInit(int value) { if (accType == accCUDA) deviceInit(value); else hostInit(value); } /** * Initiate all used memory with provided value */ void allInit(int value) { hostInit(value); if (accType == accCUDA) deviceInit(value); } /** * Copy a number (size) of bytes to device stored in the host pointer */ void cpToDevice() { #ifdef CUDA if (accType == accCUDA) { #ifdef DEBUG_CUDA if (dPtr == NULL) ACC_PTR_DEBUG_FATAL("cpToDevice() called before allocation.\n"); if (hPtr == NULL) ACC_PTR_DEBUG_FATAL("NULL host pointer in cpToDevice().\n"); #endif CudaShortcuts::cpyHostToDevice(hPtr, dPtr, size, stream); } #endif } /** * Copy a number (size) of bytes to device stored in the provided host pointer */ void cpToDevice(T * hostPtr) { if (accType == accCUDA) { #ifdef DEBUG_CUDA if (hostPtr == NULL) ACC_PTR_DEBUG_FATAL("Null-pointer given in cpToDevice(hostPtr).\n"); #endif hPtr = hostPtr; cpToDevice(); } } /** * alloc and copy */ void putOnDevice() { deviceAlloc(); cpToDevice(); } /** * alloc size and copy */ void putOnDevice(size_t newSize) { size=newSize; deviceAlloc(); cpToDevice(); } /** * Copy a number (size) of bytes from device to the host pointer */ void cpToHost() { #ifdef CUDA if (accType == accCUDA) { #ifdef DEBUG_CUDA if (dPtr == NULL) ACC_PTR_DEBUG_FATAL("cp_to_host() called before device allocation.\n"); if (hPtr == NULL) ACC_PTR_DEBUG_FATAL("NULL host pointer in cp_to_host().\n"); #endif cudaCpyDeviceToHost(dPtr, hPtr, size, stream); } #endif } /** * Copy a number (thisSize) of bytes from device to the host pointer */ void cpToHost(size_t thisSize) { #ifdef CUDA if (accType == accCUDA) { #ifdef DEBUG_CUDA if (dPtr == NULL) ACC_PTR_DEBUG_FATAL("cp_to_host(thisSize) called before device allocation.\n"); if (hPtr == NULL) ACC_PTR_DEBUG_FATAL("NULL host pointer in cp_to_host(thisSize).\n"); #endif cudaCpyDeviceToHost(dPtr, hPtr, thisSize, stream); } #endif } /** * Copy a number (thisSize) of bytes from device to a specific host pointer */ void cpToHost(T* hstPtr, size_t thisSize) { #ifdef CUDA if (accType == accCUDA) { #ifdef DEBUG_CUDA if (dPtr == NULL) ACC_PTR_DEBUG_FATAL("cp_to_host(hstPtr, thisSize) called before device allocation.\n"); if (hstPtr == NULL) ACC_PTR_DEBUG_FATAL("NULL host pointer in cp_to_host(hstPtr, thisSize).\n"); #endif cudaCpyDeviceToHost(dPtr, hstPtr, thisSize, stream); } #endif } /** * Copy a number (size) of bytes from device to the host pointer */ void cpToHostOnStream(StreamType s) { #ifdef CUDA if (accType == accCUDA) { #ifdef DEBUG_CUDA if (dPtr == NULL) ACC_PTR_DEBUG_FATAL("cp_to_host_on_stream(s) called before device allocation.\n"); if (hPtr == NULL) ACC_PTR_DEBUG_FATAL("NULL host pointer in cp_to_host_on_stream(s).\n"); #endif cudaCpyDeviceToHost(dPtr, hPtr, size, s); } #endif } /** * Copy a number (size) of bytes from device pointer to the provided new device pointer */ void cpOnDevice(T * dstDevPtr) { #ifdef CUDA if (accType == accCUDA) { #ifdef DEBUG_CUDA if (dstDevPtr == NULL) ACC_PTR_DEBUG_FATAL("NULL-pointer given in cpOnDevice(dstDevPtr).\n"); #endif CudaShortcuts::cpyDeviceToDevice(dPtr, dstDevPtr, size, stream); } #endif } /** * Copy a number (size) of bytes from host pointer to the provided new host pointer */ void cpOnHost(T * dstDevPtr) { #ifdef DEBUG_CUDA if (dstDevPtr == NULL) ACC_PTR_DEBUG_FATAL("NULL-pointer given in cp_on_host(dstDevPtr).\n"); if (hPtr == NULL) ACC_PTR_DEBUG_FATAL("NULL input pointer given in cp_on_host(hPtr).\n"); #endif memcpy ( dstDevPtr, hPtr, size * sizeof(T)); } void cpOnAcc(T * dstDevPtr) { if (accType == accCUDA) cpOnDevice(dstDevPtr); else cpOnHost(dstDevPtr); } void cpOnAcc(AccPtr &devPtr) { if (accType == accCUDA) cpOnDevice(devPtr.dPtr); else cpOnHost(devPtr.hPtr); } /** * Host data quick access */ const T& operator[](size_t idx) const { #ifdef DEBUG_CUDA if (hPtr == NULL) ACC_PTR_DEBUG_FATAL("const operator[] called with NULL host pointer.\n"); #endif return hPtr[idx]; }; /** * Host data quick access */ T& operator[](size_t idx) { #ifdef DEBUG_CUDA if (hPtr == NULL) ACC_PTR_DEBUG_FATAL("operator[] called with NULL host pointer.\n"); #endif return hPtr[idx]; }; /** * Device data quick access */ T& operator()(size_t idx) { #ifdef DEBUG_CUDA if (dPtr == NULL) ACC_PTR_DEBUG_FATAL("operator(idx) called with NULL acc pointer.\n"); #endif return dPtr[idx]; }; /** * Device data quick access */ const T& operator()(size_t idx) const { #ifdef DEBUG_CUDA if (dPtr == NULL) ACC_PTR_DEBUG_FATAL("operator(idx) called with NULL acc pointer.\n"); #endif return dPtr[idx]; }; /** * Raw data pointer quick access */ T* operator()() { // TODO - this could cause considerable confusion given the above operators. But it // also simplifies code that uses it. What to do... if (accType == accCUDA) { #ifdef DEBUG_CUDA if (dPtr == NULL) ACC_PTR_DEBUG_FATAL("operator() called with NULL device pointer.\n"); #endif return dPtr; } else { #ifdef DEBUG_CUDA if (hPtr == NULL) ACC_PTR_DEBUG_FATAL("operator() called with NULL host pointer.\n"); #endif return hPtr; } }; T* operator~() { // TODO - this could cause considerable confusion given the above operators. But it // also simplifies code that uses it. What to do... if (accType == accCUDA) { #ifdef DEBUG_CUDA if ( dPtr == 0) ACC_PTR_DEBUG_FATAL("DEBUG_WARNING: \"kernel cast\" on null device pointer.\n"); #endif return dPtr; } else { #ifdef DEBUG_CUDA if ( hPtr == 0) ACC_PTR_DEBUG_FATAL("DEBUG_WARNING: \"kernel cast\" on null host pointer.\n"); #endif return hPtr; } } void streamSync() { #ifdef CUDA if (accType == accCUDA) DEBUG_HANDLE_ERROR(cudaStreamSynchronize(stream)); #endif } T getAccValueAt(size_t idx) { #ifdef CUDA if (accType == accCUDA) { T value; cudaCpyDeviceToHost(&dPtr[idx], &value, 1, stream); streamSync(); return value; } else #endif return hPtr[idx]; } T getDeviceAt(size_t idx) { #ifdef CUDA if (accType == accCUDA) { T value; cudaCpyDeviceToHost(&dPtr[idx], &value, 1, stream); streamSync(); return value; } #else return NULL; #endif } void dumpDeviceToFile(std::string fileName) { #ifdef CUDA if (accType == accCUDA) { T *tmp = new T[size]; cudaCpyDeviceToHost(dPtr, tmp, size, stream); std::ofstream f; f.open(fileName.c_str()); streamSync(); for (unsigned i = 0; i < size; i ++) f << tmp[i] << std::endl; f.close(); delete [] tmp; } else #endif { std::ofstream f; f.open(fileName.c_str()); f << "Pointer has no device support." << std::endl; f.close(); } } void dumpHostToFile(std::string fileName) { std::ofstream f; f.open(fileName.c_str()); for (unsigned i = 0; i < size; i ++) f << hPtr[i] << std::endl; f.close(); } void dumpAccToFile(std::string fileName) { if (accType == accCUDA) dumpDeviceToFile(fileName); else dumpHostToFile(fileName); } /** * Delete device data */ void freeDevice() { #ifdef CUDA if (accType == accCUDA) { #ifdef DEBUG_CUDA if (dPtr == NULL) ACC_PTR_DEBUG_FATAL("Free device memory was called on NULL pointer in free_device().\n"); #endif doFreeDevice = false; if (alloc->getReadyEvent() == 0) alloc->markReadyEvent(stream); alloc->doFreeWhenReady(); alloc = NULL; // DEBUG_HANDLE_ERROR(cudaFree(dPtr)); dPtr = NULL; } #endif } /** * Delete host data */ void freeHost() { #ifdef DEBUG_CUDA if (hPtr == NULL) ACC_PTR_DEBUG_FATAL("free_host() called on NULL pointer.\n"); #endif doFreeHost = false; if (NULL != hPtr) free(hPtr); hPtr = NULL; } void freeHostIfSet() { if (doFreeHost) freeHost(); } void freeDeviceIfSet() { if (doFreeDevice) freeDevice(); } /** * Delete both device and host data */ void freeBoth() { freeDevice(); freeHost(); } void freeIfSet() { freeHostIfSet(); freeDeviceIfSet(); } ~AccPtr() { freeIfSet(); } /*====================================================== GETTERS AND SETTERS ======================================================*/ bool willFreeHost() { return doFreeHost; } bool willFreeDevice() { return doFreeDevice; } void setStream(StreamType s) { stream = s; } StreamType getStream() { return stream; } void setSize(size_t s) { size = s; } size_t getSize() { return size; } T *getDevicePtr() { return dPtr; } T *getHostPtr() { return hPtr; } T *getAccPtr() { if (accType == accCUDA) return dPtr; else return hPtr; } void setAllocator(AllocatorType *a) { freeDeviceIfSet(); allocator = a; }; AllocatorType *getAllocator() { return allocator; } void setDevicePtr(T *ptr) { #ifdef DEBUG_CUDA if (doFreeDevice) ACC_PTR_DEBUG_FATAL("Device pointer set without freeing the old one.\n"); #endif dPtr = ptr; } void setDevicePtr(const AccPtr &ptr) { #ifdef DEBUG_CUDA if (ptr.dPtr == NULL) ACC_PTR_DEBUG_FATAL("Device pointer is not set.\n"); #endif setDevicePtr(ptr.dPtr); } void setHostPtr(T *ptr) { #ifdef DEBUG_CUDA if (doFreeHost) ACC_PTR_DEBUG_FATAL("Host pointer set without freeing the old one.\n"); #endif hPtr = ptr; } void setHostPtr(const AccPtr &ptr) { #ifdef DEBUG_CUDA if (ptr.hPtr == NULL) ACC_PTR_DEBUG_FATAL("Host pointer is not set.\n"); #endif setHostPtr(ptr.hPtr); } void setAccPtr(const AccPtr &ptr) { if (accType == accCUDA) setDevicePtr(ptr.hPtr); else setHostPtr(ptr.hPtr); } void setAccPtr(T *ptr) { if (accType == accCUDA) setDevicePtr(ptr); else setHostPtr(ptr); } AccType getAccType() { return accType; } template AccPtr make() { AccPtr ptr(stream, allocator); ptr.setAccType(accType); return ptr; } template AccPtr make(size_t s) { AccPtr ptr(stream, allocator); ptr.setAccType(accType); ptr.setSize(s); return ptr; } }; typedef unsigned char AccPtrBundleByte; class AccPtrBundle: public AccPtr { private: size_t current_packed_pos; public: AccPtrBundle(StreamType stream, AllocatorType *allocator): AccPtr(stream, allocator), current_packed_pos(0) {} AccPtrBundle(size_t size, StreamType stream, AllocatorType *allocator): AccPtr(stream, allocator), current_packed_pos(0) { setSize(size); } template void pack(AccPtr &ptr) { #ifdef CUDA #ifdef DEBUG_CUDA if (current_packed_pos + ptr.getSize() > size) ACC_PTR_DEBUG_FATAL("Packing exceeds bundle total size.\n"); if (hPtr == NULL) ACC_PTR_DEBUG_FATAL("Pack called on null host pointer.\n"); #endif if (ptr.getHostPtr() != NULL) memcpy ( &hPtr[current_packed_pos], ptr.getHostPtr(), ptr.getSize() * sizeof(T)); ptr.freeHostIfSet(); ptr.setHostPtr((T*) &hPtr[current_packed_pos]); ptr.setDevicePtr((T*) &dPtr[current_packed_pos]); current_packed_pos += ptr.getSize() * sizeof(T); #else if (ptr.getHostPtr() == NULL) ptr.hostAlloc(); #endif } //Overwrite allocation methods and block for no device void allAlloc() { #ifdef CUDA AccPtr::allAlloc(); #endif } void allAlloc(size_t size) { #ifdef CUDA AccPtr::allAlloc(size); #endif } void hostAlloc() { #ifdef CUDA AccPtr::hostAlloc(); #endif } void hostAlloc(size_t size) { #ifdef CUDA AccPtr::hostAlloc(size); #endif } }; class AccPtrFactory { private: AllocatorType *allocator; StreamType stream; AccType accType; public: AccPtrFactory(): allocator(NULL), stream(0), accType(accUNSET) {} AccPtrFactory(AccType accT): allocator(NULL), stream(0), accType(accT) {} AccPtrFactory(AllocatorType *alloc): allocator(alloc), stream(0), accType(accCUDA) {} AccPtrFactory(AllocatorType *alloc, StreamType s): allocator(alloc), stream(s), accType(accCUDA) {} template AccPtr make() { AccPtr ptr(stream, allocator); ptr.setAccType(accType); return ptr; } template AccPtr make(size_t size) { AccPtr ptr(stream, allocator); ptr.setAccType(accType); ptr.setSize(size); return ptr; } template AccPtr make(size_t size, StreamType s) { AccPtr ptr(s, allocator); ptr.setAccType(accType); ptr.setSize(size); return ptr; } AccPtrBundle makeBundle() { AccPtrBundle bundle(stream, allocator); bundle.setAccType(accType); return bundle; } AccPtrBundle makeBundle(size_t size) { AccPtrBundle bundle(size, stream, allocator); bundle.setAccType(accType); return bundle; } }; #endif relion-3.1.3/src/acc/cpu/000077500000000000000000000000001411340063500151075ustar00rootroot00000000000000relion-3.1.3/src/acc/cpu/cpu_backprojector.cpp000066400000000000000000000013331411340063500213120ustar00rootroot00000000000000#include #include #include #include #include "src/acc/cpu/cuda_stubs.h" #include "src/acc/acc_ptr.h" #include "src/acc/acc_projector.h" #include "src/acc/acc_backprojector.h" #include "src/acc/acc_projector_plan.h" #include "src/acc/cpu/cpu_benchmark_utils.h" #include "src/acc/cpu/cpu_helper_functions.h" #include "src/acc/cpu/cpu_kernels/helper.h" #include "src/acc/cpu/cpu_kernels/diff2.h" #include "src/acc/cpu/cpu_kernels/wavg.h" #include "src/acc/cpu/cpu_kernels/BP.h" #include "src/acc/utilities.h" #include "src/acc/data_types.h" #include "src/acc/acc_helper_functions.h" #include "src/acc/cpu/cpu_settings.h" #include "src/acc/acc_backprojector_impl.h" relion-3.1.3/src/acc/cpu/cpu_benchmark_utils.h000066400000000000000000000040501411340063500213000ustar00rootroot00000000000000 #ifndef CPU_BENCHMARK_UTILS_H_ #define CPU_BENCHMARK_UTILS_H_ #define CTIC(timer,timing) #define CTOC(timer,timing) #define GTIC(timer,timing) #define GTOC(timer,timing) #define GATHERGPUTIMINGS(timer) namespace CpuKernels { //Non-concurrent benchmarking tools (only for Linux) /* //#include #include #include #include #include #include #include #ifdef TIMING_FILES #define CTIC(timer,timing) (timer.cuda_cpu_tic(timing)) #define CTOC(timer,timing) (timer.cuda_cpu_toc(timing)) #define GTIC(timer,timing) (timer.cuda_gpu_tic(timing)) #define GTOC(timer,timing) (timer.cuda_gpu_toc(timing)) #define GATHERGPUTIMINGS(timer) (timer.cuda_gpu_printtictoc()) #elif defined CUDA_PROFILING #include #define CTIC(timer,timing) (nvtxRangePush(timing)) #define CTOC(timer,timing) (nvtxRangePop()) #define GTIC(timer,timing) #define GTOC(timer,timing) #define GATHERGPUTIMINGS(timer) #else #define CTIC(timer,timing) #define CTOC(timer,timing) #define GTIC(timer,timing) #define GTOC(timer,timing) #define GATHERGPUTIMINGS(timer) #endif class relion_timer { public: std::vector cuda_cpu_benchmark_identifiers; std::vector cuda_cpu_benchmark_start_times; FILE *cuda_cpu_benchmark_fPtr; std::vector cuda_gpu_benchmark_identifiers; std::vector cuda_gpu_benchmark_start_times; std::vector cuda_gpu_benchmark_stop_times; FILE *cuda_gpu_benchmark_fPtr; relion_timer(std::string fnm) { std::stringstream fnm_cpu, fnm_gpu; fnm_cpu << "output/" << fnm << "_cpu.dat"; cuda_cpu_benchmark_fPtr = fopen(fnm_cpu.str().c_str(),"a"); fnm_gpu << "output/" << fnm << "_gpu.dat"; cuda_gpu_benchmark_fPtr = fopen(fnm_gpu.str().c_str(),"a"); } int cuda_benchmark_find_id(std::string id, std::vector v); void cuda_cpu_tic(std::string id); void cuda_cpu_toc(std::string id); void cuda_gpu_tic(std::string id); void cuda_gpu_toc(std::string id); void cuda_gpu_printtictoc(); }; */ } #endif /* CUDA_BENCHMARK_UTILS_H_ */ relion-3.1.3/src/acc/cpu/cpu_helper_functions.cpp000066400000000000000000000012671411340063500220370ustar00rootroot00000000000000#ifdef ALTCPU // Make sure we build for CPU #include "src/acc/cpu/cuda_stubs.h" #include "src/acc/acc_ptr.h" #include "src/acc/acc_projector.h" #include "src/acc/acc_backprojector.h" #include "src/acc/acc_projector_plan.h" #include "src/acc/cpu/cpu_benchmark_utils.h" #include "src/acc/cpu/cpu_helper_functions.h" #include "src/acc/cpu/cpu_kernels/helper.h" #include "src/acc/cpu/cpu_kernels/diff2.h" #include "src/acc/cpu/cpu_kernels/wavg.h" #include "src/acc/cpu/cpu_kernels/BP.h" #include "src/acc/utilities.h" #include "src/acc/data_types.h" #include "src/acc/acc_helper_functions.h" #include "src/acc/cpu/cpu_settings.h" #include "src/acc/acc_helper_functions_impl.h" #endif // ALTCPU relion-3.1.3/src/acc/cpu/cpu_helper_functions.h000066400000000000000000000061541411340063500215040ustar00rootroot00000000000000#ifndef CPU_HELPER_FUNCTIONS_H_ #define CPU_HELPER_FUNCTIONS_H_ #include "src/acc/cpu/cpu_ml_optimiser.h" #include "src/acc/acc_projector.h" #include "src/acc/cpu/cpu_benchmark_utils.h" #include "src/acc/cpu/cpu_kernels/helper.h" #include "src/acc/cpu/cpu_kernels/diff2.h" #include "src/acc/cpu/cpu_kernels/wavg.h" #include #include #include #include #include #include #include #include #include "src/complex.h" #include "src/parallel.h" #include #include namespace CpuKernels { #define WINDOW_FT_BLOCK_SIZE 128 template void window_fourier_transform( size_t grid_dim, size_t Npsi, size_t block_size, ACCCOMPLEX *g_in, ACCCOMPLEX *g_out, size_t iX, size_t iY, size_t iZ, size_t iYX, //Input dimensions size_t oX, size_t oY, size_t oZ, size_t oYX, //Output dimensions size_t max_idx, size_t max_r2 = 0 ) { for(size_t blk=0; blk= max_idx) return; long int k, i, kp, ip, jp; if (check_max_r2) { k = n / (iX * iY); i = (n % (iX * iY)) / iX; kp = k < iX ? k : k - iZ; ip = i < iX ? i : i - iY; jp = n % iX; if (kp*kp + ip*ip + jp*jp > max_r2) return; } else { k = n / (oX * oY); i = (n % (oX * oY)) / oX; kp = k < oX ? k : k - oZ; ip = i < oX ? i : i - oY; jp = n % oX; } long int in_idx = (kp < 0 ? kp + iZ : kp) * iYX + (ip < 0 ? ip + iY : ip)*iX + jp; long int out_idx = (kp < 0 ? kp + oZ : kp) * oYX + (ip < 0 ? ip + oY : ip)*oX + jp; g_out[out_idx + oOFF] = g_in[in_idx + iOFF]; } // for tid } // for psi } // for blk } // may need to change to parallel reduce if it becomes the bottle neck. template static T getMin(T *data, size_t size) { T min = data[0]; for(size_t i=1; i static T getMax(T *data, size_t size) { T max = data[0]; for(size_t i=1; i max ? data[i] : max; return max; } template static T getSum(T *data, size_t size) { T sum = data[0]; for(size_t i=1; i static std::pair getArgMin(T *data, size_t size) { std::pair pair; pair.first = 0; pair.second = data[0]; for(size_t i=1; i static std::pair getArgMax(T *data, size_t size) { std::pair pair; pair.first = 0; pair.second = data[0]; for(size_t i=1; i pair.second) { pair.first = i; pair.second = data[i]; } return pair; } } // Namespace CpuKernels #endif //CPU_HELPER_FUNCTIONS_H_ relion-3.1.3/src/acc/cpu/cpu_kernels/000077500000000000000000000000001411340063500174215ustar00rootroot00000000000000relion-3.1.3/src/acc/cpu/cpu_kernels/BP.h000066400000000000000000000774031411340063500201060ustar00rootroot00000000000000#include #include #include #include #include "src/acc/acc_backprojector.h" #include "src/acc/cpu/cpu_kernels/helper.h" namespace CpuKernels { template < bool CTF_PREMULTIPLIED > #ifndef __INTEL_COMPILER __attribute__((always_inline)) #endif inline void backproject2D( unsigned long imageCount, int block_size, XFLOAT *g_img_real, XFLOAT *g_img_imag, XFLOAT *g_trans_x, XFLOAT *g_trans_y, XFLOAT* g_weights, XFLOAT* g_Minvsigma2s, XFLOAT* g_ctfs, unsigned long translation_num, XFLOAT significant_weight, XFLOAT weight_norm, XFLOAT *g_eulers, XFLOAT *g_model_real, XFLOAT *g_model_imag, XFLOAT *g_model_weight, int max_r, int max_r2, XFLOAT padding_factor, unsigned img_x, unsigned img_y, unsigned img_xy, unsigned mdl_x, int mdl_inity, tbb::spin_mutex *mutexes) { int img_y_half = img_y / 2; int max_r2_out = max_r2 * padding_factor * padding_factor; // pre-compute sin and cos for x and y direction XFLOAT sin_x[translation_num][img_x], cos_x[translation_num][img_x]; XFLOAT sin_y[translation_num][img_y], cos_y[translation_num][img_y]; computeSincosLookupTable2D(translation_num, g_trans_x, g_trans_y, img_x, img_y, &sin_x[0][0], &cos_x[0][0], &sin_y[0][0], &cos_y[0][0]); // Set up some other variables XFLOAT s_eulers[4]; XFLOAT weight_norm_inverse = (XFLOAT) 1.0 / weight_norm; XFLOAT xp[img_x], yp[img_x]; XFLOAT real[img_x], imag[img_x], Fweight[img_x]; for (unsigned long img=0; img img_y_half) { y = iy - img_y; } int xmax = img_x; memset(Fweight,0,sizeof(XFLOAT)*img_x); memset(real, 0,sizeof(XFLOAT)*img_x); memset(imag, 0,sizeof(XFLOAT)*img_x); for (unsigned long itrans = 0; itrans < translation_num; itrans++) { XFLOAT weight = g_weights[img * translation_num + itrans]; if (weight < significant_weight) continue; XFLOAT trans_cos_y, trans_sin_y; if ( y < 0) { trans_cos_y = cos_y[itrans][-y]; trans_sin_y = -sin_y[itrans][-y]; } else { trans_cos_y = cos_y[itrans][y]; trans_sin_y = sin_y[itrans][y]; } XFLOAT *trans_cos_x = &cos_x[itrans][0]; XFLOAT *trans_sin_x = &sin_x[itrans][0]; for(int x=0; x max_r2_out) { Fweight[x]= (XFLOAT) 0.0; continue; } // Only asymmetric half is stored if (xp[x] < (XFLOAT) 0.0) { // Get complex conjugated hermitian symmetry pair xp[x] = -xp[x]; yp[x] = -yp[x]; imag[x] = -imag[x]; } } // for x for(int x=0; x #ifndef __INTEL_COMPILER __attribute__((always_inline)) #endif inline void backproject3D( unsigned long imageCount, int block_size, XFLOAT *g_img_real, XFLOAT *g_img_imag, XFLOAT *g_trans_x, XFLOAT *g_trans_y, XFLOAT *g_trans_z, XFLOAT* g_weights, XFLOAT* g_Minvsigma2s, XFLOAT* g_ctfs, unsigned long translation_num, XFLOAT significant_weight, XFLOAT weight_norm, XFLOAT *g_eulers, XFLOAT *g_model_real, XFLOAT *g_model_imag, XFLOAT *g_model_weight, int max_r, int max_r2, XFLOAT padding_factor, unsigned img_x, unsigned img_y, unsigned img_z, size_t img_xyz, unsigned mdl_x, unsigned mdl_y, int mdl_inity, int mdl_initz, tbb::spin_mutex *mutexes) { int img_y_half = img_y / 2; int img_z_half = img_z / 2; int max_r2_vol = max_r2 * padding_factor * padding_factor; // Set up some variables XFLOAT s_eulers[9]; // We collect block_size number of values before storing the results to // help vectorization and control memory accesses XFLOAT real[block_size], imag[block_size], Fweight[block_size]; XFLOAT xp[block_size], yp[block_size], zp[block_size]; for (unsigned long img=0; img= img_xyz) continue; // just doesn't make sense to proceed in this case int x,y,z,xy; XFLOAT minvsigma2, ctf, img_real, img_imag, weight; if(DATA3D) { z = CpuKernels::floorfracf(pixel, (size_t)img_x*(size_t)img_y); xy = pixel % (img_x*img_y); x = xy % img_x; y = CpuKernels::floorfracf( xy, (size_t)img_x); if (z > img_z_half) { z = z - img_z; if(x==0) ok_for_next=0; } } else { x = pixel % img_x; y = CpuKernels::floorfracf( pixel , (size_t)img_x); } if (y > img_y_half) { y = y - img_y; } // Get logical coordinates in the 3D map if(DATA3D) { xp[tid] = (s_eulers[0] * x + s_eulers[1] * y + s_eulers[2] * z) * padding_factor; yp[tid] = (s_eulers[3] * x + s_eulers[4] * y + s_eulers[5] * z) * padding_factor; zp[tid] = (s_eulers[6] * x + s_eulers[7] * y + s_eulers[8] * z) * padding_factor; } else { xp[tid] = (s_eulers[0] * x + s_eulers[1] * y ) * padding_factor; yp[tid] = (s_eulers[3] * x + s_eulers[4] * y ) * padding_factor; zp[tid] = (s_eulers[6] * x + s_eulers[7] * y ) * padding_factor; } // Only consider pixels that are projected inside the sphere in output coordinates. // --JZ, Nov. 26th 2018 if ( ( xp[tid] * xp[tid] + yp[tid] * yp[tid] + zp[tid] * zp[tid] ) > max_r2_vol) { ok_for_next = 0; } if(ok_for_next) { //WAVG minvsigma2 = g_Minvsigma2s[pixel]; ctf = g_ctfs[pixel]; img_real = g_img_real[pixel]; img_imag = g_img_imag[pixel]; Fweight[tid] = (XFLOAT) 0.0; real[tid] = (XFLOAT) 0.0; imag[tid] = (XFLOAT) 0.0; XFLOAT inv_minsigma_ctf; if(CTF_PREMULTIPLIED) inv_minsigma_ctf = weight_norm_inverse * minvsigma2; else inv_minsigma_ctf = weight_norm_inverse * ctf * minvsigma2; XFLOAT temp_real, temp_imag; for (unsigned long itrans = 0; itrans < translation_num; itrans++) { weight = g_weights[img * translation_num + itrans]; if (weight >= significant_weight) { weight = weight * inv_minsigma_ctf; if(CTF_PREMULTIPLIED) Fweight[tid] += weight * ctf * ctf; else Fweight[tid] += weight * ctf; if(DATA3D) CpuKernels::translatePixel(x, y, z, g_trans_x[itrans], g_trans_y[itrans], g_trans_z[itrans], img_real, img_imag, temp_real, temp_imag); else CpuKernels::translatePixel(x, y, g_trans_x[itrans], g_trans_y[itrans], img_real, img_imag, temp_real, temp_imag); real[tid] += temp_real * weight; imag[tid] += temp_imag * weight; } } //BP if (Fweight[tid] > (XFLOAT) 0.0) { // Only asymmetric half is stored if (xp[tid] < (XFLOAT) 0.0) { // Get complex conjugated hermitian symmetry pair xp[tid] = -xp[tid]; yp[tid] = -yp[tid]; zp[tid] = -zp[tid]; imag[tid] = -imag[tid]; } } // Fweight[tid] > (RFLOAT) 0.0 }// end for if_ok_for_next } // for tid for(int tid=0; tid (XFLOAT) 0.0) { int x0 = floorf(xp[tid]); XFLOAT fx = xp[tid] - x0; int x1 = x0 + 1; int y0 = floorf(yp[tid]); XFLOAT fy = yp[tid] - y0; y0 -= mdl_inity; int y1 = y0 + 1; int z0 = floorf(zp[tid]); XFLOAT fz = zp[tid] - z0; z0 -= mdl_initz; int z1 = z0 + 1; XFLOAT mfx = (XFLOAT)1.0 - fx; XFLOAT mfy = (XFLOAT)1.0 - fy; XFLOAT mfz = (XFLOAT)1.0 - fz; // Locking necessary since all threads share the same back projector XFLOAT dd000 = mfz * mfy * mfx; XFLOAT dd001 = mfz * mfy * fx; size_t z0MdlxMdly = (size_t)z0 * (size_t)mdl_x * (size_t)mdl_y; { tbb::spin_mutex::scoped_lock lock(mutexes[z0 * mdl_y + y0]); g_model_real [z0MdlxMdly + y0 * mdl_x + x0]+=dd000 * real[tid]; g_model_imag [z0MdlxMdly + y0 * mdl_x + x0]+=dd000 * imag[tid]; g_model_weight[z0MdlxMdly + y0 * mdl_x + x0]+=dd000 * Fweight[tid]; g_model_real [z0MdlxMdly + y0 * mdl_x + x1]+=dd001 * real[tid]; g_model_imag [z0MdlxMdly + y0 * mdl_x + x1]+=dd001 * imag[tid]; g_model_weight[z0MdlxMdly + y0 * mdl_x + x1]+=dd001 * Fweight[tid]; } XFLOAT dd010 = mfz * fy * mfx; XFLOAT dd011 = mfz * fy * fx; { tbb::spin_mutex::scoped_lock lock(mutexes[z0 * mdl_y + y1]); g_model_real [z0MdlxMdly + y1 * mdl_x + x0]+=dd010 * real[tid]; g_model_imag [z0MdlxMdly + y1 * mdl_x + x0]+=dd010 * imag[tid]; g_model_weight[z0MdlxMdly + y1 * mdl_x + x0]+=dd010 * Fweight[tid]; g_model_real [z0MdlxMdly + y1 * mdl_x + x1]+=dd011 * real[tid]; g_model_imag [z0MdlxMdly + y1 * mdl_x + x1]+=dd011 * imag[tid]; g_model_weight[z0MdlxMdly + y1 * mdl_x + x1]+=dd011 * Fweight[tid]; } XFLOAT dd100 = fz * mfy * mfx; XFLOAT dd101 = fz * mfy * fx; size_t z1MdlxMdly = (size_t)z1 * (size_t)mdl_x * (size_t)mdl_y; { tbb::spin_mutex::scoped_lock lock(mutexes[z1 * mdl_y + y0]); g_model_real [z1MdlxMdly + y0 * mdl_x + x0]+=dd100 * real[tid]; g_model_imag [z1MdlxMdly + y0 * mdl_x + x0]+=dd100 * imag[tid]; g_model_weight[z1MdlxMdly + y0 * mdl_x + x0]+=dd100 * Fweight[tid]; g_model_real [z1MdlxMdly + y0 * mdl_x + x1]+=dd101 * real[tid]; g_model_imag [z1MdlxMdly + y0 * mdl_x + x1]+=dd101 * imag[tid]; g_model_weight[z1MdlxMdly + y0 * mdl_x + x1]+=dd101 * Fweight[tid]; } XFLOAT dd110 = fz * fy * mfx; XFLOAT dd111 = fz * fy * fx; { tbb::spin_mutex::scoped_lock lock(mutexes[z1 * mdl_y + y1]); g_model_real [z1MdlxMdly + y1 * mdl_x + x0]+=dd110 * real[tid]; g_model_imag [z1MdlxMdly + y1 * mdl_x + x0]+=dd110 * imag[tid]; g_model_weight[z1MdlxMdly + y1 * mdl_x + x0]+=dd110 * Fweight[tid]; g_model_real [z1MdlxMdly + y1 * mdl_x + x1]+=dd111 * real[tid]; g_model_imag [z1MdlxMdly + y1 * mdl_x + x1]+=dd111 * imag[tid]; g_model_weight[z1MdlxMdly + y1 * mdl_x + x1]+=dd111 * Fweight[tid]; } } // Fweight[tid] > (RFLOAT) 0.0 } // for tid } // for pass } // for img } // sincos lookup table optimization. Function translatePixel calls // sincos(x*tx + y*ty). We precompute 2D lookup tables for x and y directions. // The first dimension is x or y pixel index, and the second dimension is x or y // translation index. Since sin(a+B) = sin(A) * cos(B) + cos(A) * sin(B), and // cos(A+B) = cos(A) * cos(B) - sin(A) * sin(B), we can use lookup table to // compute sin(x*tx + y*ty) and cos(x*tx + y*ty). template < bool CTF_PREMULTIPLIED > #ifndef __INTEL_COMPILER __attribute__((always_inline)) #endif inline void backprojectRef3D( unsigned long imageCount, XFLOAT *g_img_real, XFLOAT *g_img_imag, XFLOAT *g_trans_x, XFLOAT *g_trans_y, XFLOAT* g_weights, XFLOAT* g_Minvsigma2s, XFLOAT* g_ctfs, unsigned long trans_num, XFLOAT significant_weight, XFLOAT weight_norm, XFLOAT *g_eulers, XFLOAT *g_model_real, XFLOAT *g_model_imag, XFLOAT *g_model_weight, int max_r, int max_r2, XFLOAT padding_factor, unsigned img_x, unsigned img_y, unsigned img_z, size_t img_xyz, unsigned mdl_x, unsigned mdl_y, int mdl_inity, int mdl_initz, tbb::spin_mutex *mutexes) { int img_y_half = img_y / 2; int img_y_half_2 = img_y_half * img_y_half; int img_z_half = img_z / 2; int max_r2_vol = max_r2 * padding_factor * padding_factor; // Set up the sin and cos lookup tables XFLOAT sin_x[trans_num][img_x], cos_x[trans_num][img_x]; XFLOAT sin_y[trans_num][img_y], cos_y[trans_num][img_y]; CpuKernels::computeSincosLookupTable2D(trans_num, g_trans_x, g_trans_y, img_x, img_y, &sin_x[0][0], &cos_x[0][0], &sin_y[0][0], &cos_y[0][0]); // Set up some other variables XFLOAT s_eulers[9]; XFLOAT weight_norm_inverse = (XFLOAT) 1.0 / weight_norm; XFLOAT xp[img_x], yp[img_x], zp[img_x]; XFLOAT real[img_x], imag[img_x], Fweight[img_x]; for (unsigned long img=0; img img_y_half) { y = iy - img_y; } int y2 = y * y; int xmax = sqrt((XFLOAT)(img_y_half_2 - y2)); // minimize locking if possible memset(Fweight,0,sizeof(XFLOAT)*img_x); memset(real, 0,sizeof(XFLOAT)*img_x); memset(imag, 0,sizeof(XFLOAT)*img_x); for (unsigned long itrans = 0; itrans < trans_num; itrans++) { XFLOAT weight = g_weights[img * trans_num + itrans]; if (weight < significant_weight) continue; XFLOAT trans_cos_y, trans_sin_y; if ( y < 0) { trans_cos_y = cos_y[itrans][-y]; trans_sin_y = -sin_y[itrans][-y]; } else { trans_cos_y = cos_y[itrans][y]; trans_sin_y = sin_y[itrans][y]; } XFLOAT *trans_cos_x = &cos_x[itrans][0]; XFLOAT *trans_sin_x = &sin_x[itrans][0]; #pragma omp simd for(int x=0; x max_r2_vol) { Fweight[x] = (XFLOAT) 0.0; } // Only asymmetric half is stored if (xp[x] < (XFLOAT) 0.0) { // Get complex conjugated hermitian symmetry pair xp[x] = -xp[x]; yp[x] = -yp[x]; zp[x] = -zp[x]; imag[x] = -imag[x]; } } // for x direction for(int x=0; x #ifndef __INTEL_COMPILER __attribute__((always_inline)) #endif inline void backprojectSGD( unsigned long imageCount, int block_size, AccProjectorKernel projector, XFLOAT *g_img_real, XFLOAT *g_img_imag, XFLOAT *g_trans_x, XFLOAT *g_trans_y, XFLOAT *g_trans_z, XFLOAT* g_weights, XFLOAT* g_Minvsigma2s, XFLOAT* g_ctfs, unsigned long translation_num, XFLOAT significant_weight, XFLOAT weight_norm, XFLOAT *g_eulers, XFLOAT *g_model_real, XFLOAT *g_model_imag, XFLOAT *g_model_weight, int max_r, int max_r2, XFLOAT padding_factor, unsigned img_x, unsigned img_y, unsigned img_z, size_t img_xyz, unsigned mdl_x, unsigned mdl_y, int mdl_inity, int mdl_initz, tbb::spin_mutex *mutexes) { int img_y_half = img_y / 2; int img_z_half = img_z / 2; int max_r2_vol = max_r2 * padding_factor * padding_factor; // Set up some variables XFLOAT s_eulers[9]; XFLOAT weight_norm_inverse = (XFLOAT) 1.0 / weight_norm; // TODO - does this really help with the call to the projector in here? // // We collect block_size number of values before storing the results to // help vectorization and control memory accesses XFLOAT real[block_size], imag[block_size], Fweight[block_size]; XFLOAT ref_real[block_size], ref_imag[block_size]; XFLOAT xp[block_size], yp[block_size], zp[block_size]; for (unsigned long img=0; img= img_xyz) continue; // just doesn't make sense to proceed in this case int x,y,z,xy; XFLOAT minvsigma2, ctf, img_real, img_imag, weight; if(DATA3D) { z = CpuKernels::floorfracf(pixel, (size_t)((size_t)img_x*(size_t)img_y)); xy = pixel % (img_x*img_y); x = xy % img_x; y = CpuKernels::floorfracf( xy, (size_t)img_x); if (z > img_z_half) { z = z - img_z; if(x==0) ok_for_next=0; } } else { x = pixel % img_x; y = CpuKernels::floorfracf( pixel , (size_t)img_x); } if (y > img_y_half) { y = y - img_y; } // Get logical coordinates in the 3D map if(DATA3D) { xp[tid] = (s_eulers[0] * x + s_eulers[1] * y + s_eulers[2] * z) * padding_factor; yp[tid] = (s_eulers[3] * x + s_eulers[4] * y + s_eulers[5] * z) * padding_factor; zp[tid] = (s_eulers[6] * x + s_eulers[7] * y + s_eulers[8] * z) * padding_factor; } else { xp[tid] = (s_eulers[0] * x + s_eulers[1] * y ) * padding_factor; yp[tid] = (s_eulers[3] * x + s_eulers[4] * y ) * padding_factor; zp[tid] = (s_eulers[6] * x + s_eulers[7] * y ) * padding_factor; } if (xp[tid]*xp[tid] + yp[tid]*yp[tid] + zp[tid]*zp[tid] > max_r2_vol) { ok_for_next = 0; } if(ok_for_next) { ref_real[tid] = (XFLOAT) 0.0; ref_imag[tid] = (XFLOAT) 0.0; if(DATA3D) projector.project3Dmodel( x,y,z, s_eulers[0], s_eulers[1], s_eulers[2], s_eulers[3], s_eulers[4], s_eulers[5], s_eulers[6], s_eulers[7], s_eulers[8], ref_real[tid], ref_imag[tid]); else projector.project3Dmodel( x,y, s_eulers[0], s_eulers[1], s_eulers[3], s_eulers[4], s_eulers[6], s_eulers[7], ref_real[tid], ref_imag[tid]); //WAVG minvsigma2 = g_Minvsigma2s[pixel]; ctf = g_ctfs[pixel]; img_real = g_img_real[pixel]; img_imag = g_img_imag[pixel]; Fweight[tid] = (XFLOAT) 0.0; real[tid] = (XFLOAT) 0.0; imag[tid] = (XFLOAT) 0.0; ref_real[tid] *= ctf; ref_imag[tid] *= ctf; XFLOAT inv_minsigma_ctf; if(CTF_PREMULTIPLIED) inv_minsigma_ctf = weight_norm_inverse * minvsigma2; else inv_minsigma_ctf = weight_norm_inverse * ctf * minvsigma2; XFLOAT temp_real, temp_imag; for (unsigned long itrans = 0; itrans < translation_num; itrans++) { weight = g_weights[img * translation_num + itrans]; if (weight >= significant_weight) { weight = weight * inv_minsigma_ctf; if(CTF_PREMULTIPLIED) Fweight[tid] += weight * ctf * ctf; else Fweight[tid] += weight * ctf; if(DATA3D) CpuKernels::translatePixel(x, y, z, g_trans_x[itrans], g_trans_y[itrans], g_trans_z[itrans], img_real, img_imag, temp_real, temp_imag); else CpuKernels::translatePixel(x, y, g_trans_x[itrans], g_trans_y[itrans], img_real, img_imag, temp_real, temp_imag); real[tid] += (temp_real-ref_real[tid]) * weight; imag[tid] += (temp_imag-ref_imag[tid]) * weight; } } //BP if (Fweight[tid] > (XFLOAT) 0.0) { // Only asymmetric half is stored if (xp[tid] < (XFLOAT) 0.0) { // Get complex conjugated hermitian symmetry pair xp[tid] = -xp[tid]; yp[tid] = -yp[tid]; zp[tid] = -zp[tid]; imag[tid] = -imag[tid]; } } // if (Fweight[tid] > (XFLOAT) 0.0) } //if(ok_for_next) } // for tid for(int tid=0; tid (XFLOAT) 0.0) { int x0 = floorf(xp[tid]); XFLOAT fx = xp[tid] - x0; int x1 = x0 + 1; int y0 = floorf(yp[tid]); XFLOAT fy = yp[tid] - y0; y0 -= mdl_inity; int y1 = y0 + 1; int z0 = floorf(zp[tid]); XFLOAT fz = zp[tid] - z0; z0 -= mdl_initz; int z1 = z0 + 1; XFLOAT mfx = (XFLOAT)1.0 - fx; XFLOAT mfy = (XFLOAT)1.0 - fy; XFLOAT mfz = (XFLOAT)1.0 - fz; XFLOAT dd000 = mfz * mfy * mfx; XFLOAT dd001 = mfz * mfy * fx; size_t z0MdlxMdly = (size_t)z0 * (size_t)mdl_x * (size_t)mdl_y; { tbb::spin_mutex::scoped_lock lock(mutexes[z0 * mdl_y + y0]); g_model_real [z0MdlxMdly + y0 * mdl_x + x0] += dd000 * real[tid]; g_model_imag [z0MdlxMdly + y0 * mdl_x + x0] += dd000 * imag[tid]; g_model_weight[z0MdlxMdly + y0 * mdl_x + x0] += dd000 * Fweight[tid]; g_model_real [z0MdlxMdly + y0 * mdl_x + x1] += dd001 * real[tid]; g_model_imag [z0MdlxMdly + y0 * mdl_x + x1] += dd001 * imag[tid]; g_model_weight[z0MdlxMdly + y0 * mdl_x + x1] += dd001 * Fweight[tid]; } XFLOAT dd010 = mfz * fy * mfx; XFLOAT dd011 = mfz * fy * fx; { tbb::spin_mutex::scoped_lock lock(mutexes[z0 * mdl_y + y1]); g_model_real [z0MdlxMdly + y1 * mdl_x + x0] += dd010 * real[tid]; g_model_imag [z0MdlxMdly + y1 * mdl_x + x0] += dd010 * imag[tid]; g_model_weight[z0MdlxMdly + y1 * mdl_x + x0] += dd010 * Fweight[tid]; g_model_real [z0MdlxMdly + y1 * mdl_x + x1] += dd011 * real[tid]; g_model_imag [z0MdlxMdly + y1 * mdl_x + x1] += dd011 * imag[tid]; g_model_weight[z0MdlxMdly + y1 * mdl_x + x1] += dd011 * Fweight[tid]; } XFLOAT dd100 = fz * mfy * mfx; XFLOAT dd101 = fz * mfy * fx; size_t z1MdlxMdly = (size_t)z1 * (size_t)mdl_x * (size_t)mdl_y; { tbb::spin_mutex::scoped_lock lock(mutexes[z1 * mdl_y + y0]); g_model_real [z1MdlxMdly + y0 * mdl_x + x0] += dd100 * real[tid]; g_model_imag [z1MdlxMdly + y0 * mdl_x + x0] += dd100 * imag[tid]; g_model_weight[z1MdlxMdly + y0 * mdl_x + x0] += dd100 * Fweight[tid]; g_model_real [z1MdlxMdly + y0 * mdl_x + x1] += dd101 * real[tid]; g_model_imag [z1MdlxMdly + y0 * mdl_x + x1] += dd101 * imag[tid]; g_model_weight[z1MdlxMdly + y0 * mdl_x + x1] += dd101 * Fweight[tid]; } XFLOAT dd110 = fz * fy * mfx; XFLOAT dd111 = fz * fy * fx; { tbb::spin_mutex::scoped_lock lock(mutexes[z1 * mdl_y + y1]); g_model_real [z1MdlxMdly + y1 * mdl_x + x0] += dd110 * real[tid]; g_model_imag [z1MdlxMdly + y1 * mdl_x + x0] += dd110 * imag[tid]; g_model_weight[z1MdlxMdly + y1 * mdl_x + x0] += dd110 * Fweight[tid]; g_model_real [z1MdlxMdly + y1 * mdl_x + x1] += dd111 * real[tid]; g_model_imag [z1MdlxMdly + y1 * mdl_x + x1] += dd111 * imag[tid]; g_model_weight[z1MdlxMdly + y1 * mdl_x + x1] += dd111 * Fweight[tid]; } } // Fweight[tid] > (RFLOAT) 0.0 } // for tid } // for pass } // for img } } // namespace relion-3.1.3/src/acc/cpu/cpu_kernels/cpu_utils.h000066400000000000000000000204551411340063500216070ustar00rootroot00000000000000#ifndef CPU_UTILITIES_H #define CPU_UTILITIES_H #include #include #include "src/acc/cpu/cpu_settings.h" #include namespace CpuKernels { #define CHECK_INDEX_DEBUG_FATAL( err ) (HandleCheckIndexPtrDebugFatal( err, __FILE__, __LINE__ )) static void HandleCheckIndexPtrDebugFatal( const char *err, const char *file, int line ) { fprintf(stderr, "DEBUG ERROR: %s in %s:%d\n", err, file, line ); fflush(stdout); raise(SIGSEGV); } template class checkedArray { private: T *underlyingData; public: void initCheckedArray(T *dataToCheck) { underlyingData = dataToCheck; } T& operator[](size_t idx) { if (idx > std::numeric_limits::max()) CHECK_INDEX_DEBUG_FATAL("array index > std::numeric_limits::max()"); return underlyingData[idx]; } const T& operator[](size_t idx) const { if (idx > std::numeric_limits::max()) CHECK_INDEX_DEBUG_FATAL("const: array index > std::numeric_limits::max()"); return underlyingData[idx]; } }; /* * For the following functions always use fast, low-precision intrinsics template< typename T1, typename T2 > static inline int floorfracf(T1 a, T2 b) { // return __float2int_rd(__fdividef( (float)a, (float)b ) ); return (int)(a/b); } template< typename T1, typename T2 > static inline int ceilfracf(T1 a, T2 b) { // return __float2int_ru(__fdividef( (float)a, (float)b ) ); return (int)(a/b + 1); } */ static inline int floorfracf(int a, int b) { return (int)(a/b); } static inline size_t floorfracf(size_t a, int b) { return (size_t)(a/b); } static inline int floorfracf(int a, size_t b) { return (int)(a/b); } static inline size_t floorfracf(size_t a, size_t b) { return (size_t)(a/b); } static inline int ceilfracf(int a, int b) { return (int)(a/b + 1); } static inline size_t ceilfracf(size_t a, int b) { return (size_t)(a/b + (size_t)1); } static inline int ceilfracf(int a, size_t b) { return (int)(a/b + 1); } static inline size_t ceilfracf(size_t a, size_t b) { return (size_t)(a/b + (size_t)1); } #ifndef __INTEL_COMPILER __attribute__((always_inline)) #endif static inline XFLOAT no_tex2D(XFLOAT* mdl, XFLOAT xp, XFLOAT yp, int mdlX, int mdlInitY) { int x0 = floorf(xp); XFLOAT fx = xp - x0; int x1 = x0 + 1; int y0 = floorf(yp); XFLOAT fy = yp - y0; y0 -= mdlInitY; int y1 = y0 + 1; //----------------------------- XFLOAT d00 = mdl[(size_t)y0*(size_t)mdlX+x0]; XFLOAT d01 = mdl[(size_t)y0*(size_t)mdlX+x1]; XFLOAT d10 = mdl[(size_t)y1*(size_t)mdlX+x0]; XFLOAT d11 = mdl[(size_t)y1*(size_t)mdlX+x1]; //----------------------------- XFLOAT dx0 = d00 + (d01 - d00)*fx; XFLOAT dx1 = d10 + (d11 - d10)*fx; //----------------------------- return dx0 + (dx1 - dx0)*fy; } // 2D linear interpolation for complex data that interleaves real and // imaginary data, rather than storing them in a separate array #ifdef __INTEL_COMPILER #pragma omp declare simd uniform(mdlX,mdlInitY) #else __attribute__((always_inline)) inline #endif static void complex2D(std::complex *mdlComplex, XFLOAT &real, XFLOAT &imag, XFLOAT xp, XFLOAT yp, int mdlX, int mdlInitY) { int x0 = floorf(xp); XFLOAT fx = xp - x0; int y0 = floorf(yp); XFLOAT fy = yp - y0; y0 -= mdlInitY; size_t offset1 = ((size_t)y0 * (size_t)mdlX + (size_t)x0); size_t offset2 = offset1 + (size_t)1; size_t offset3 = offset1 + (size_t)mdlX; size_t offset4 = offset3 + (size_t)1; //----------------------------- XFLOAT d00[2], d01[2], d10[2], d11[2]; d00[0] = mdlComplex[offset1].real(); d00[1] = mdlComplex[offset1].imag(); d01[0] = mdlComplex[offset2].real(); d01[1] = mdlComplex[offset2].imag(); d10[0] = mdlComplex[offset3].real(); d10[1] = mdlComplex[offset3].imag(); d11[0] = mdlComplex[offset4].real(); d11[1] = mdlComplex[offset4].imag(); //----------------------------- XFLOAT dx0[2], dx1[2]; dx0[0] = d00[0] + (d01[0] - d00[0]) * fx; dx1[0] = d10[0] + (d11[0] - d10[0]) * fx; dx0[1] = d00[1] + (d01[1] - d00[1]) * fx; dx1[1] = d10[1] + (d11[1] - d10[1]) * fx; //----------------------------- real = dx0[0] + (dx1[0] - dx0[0])*fy; imag = dx0[1] + (dx1[1] - dx0[1])*fy; } #ifndef __INTEL_COMPILER __attribute__((always_inline)) #endif static inline XFLOAT no_tex3D( #ifdef DEBUG_CUDA XFLOAT* _mdl, #else XFLOAT* mdl, #endif XFLOAT xp, XFLOAT yp, XFLOAT zp, int mdlX, int mdlXY, int mdlInitY, int mdlInitZ) { #ifdef DEBUG_CUDA checkedArray mdl; mdl.initCheckedArray(_mdl); #endif int x0 = floorf(xp); XFLOAT fx = xp - x0; int x1 = x0 + 1; int y0 = floorf(yp); XFLOAT fy = yp - y0; y0 -= mdlInitY; int y1 = y0 + 1; int z0 = floorf(zp); XFLOAT fz = zp - z0; z0 -= mdlInitZ; int z1 = z0 + 1; XFLOAT d000 = mdl[(size_t)z0*(size_t)mdlXY+(size_t)y0*(size_t)mdlX+x0]; XFLOAT d001 = mdl[(size_t)z0*(size_t)mdlXY+(size_t)y0*(size_t)mdlX+x1]; XFLOAT d010 = mdl[(size_t)z0*(size_t)mdlXY+(size_t)y1*(size_t)mdlX+x0]; XFLOAT d011 = mdl[(size_t)z0*(size_t)mdlXY+(size_t)y1*(size_t)mdlX+x1]; XFLOAT d100 = mdl[(size_t)z1*(size_t)mdlXY+(size_t)y0*(size_t)mdlX+x0]; XFLOAT d101 = mdl[(size_t)z1*(size_t)mdlXY+(size_t)y0*(size_t)mdlX+x1]; XFLOAT d110 = mdl[(size_t)z1*(size_t)mdlXY+(size_t)y1*(size_t)mdlX+x0]; XFLOAT d111 = mdl[(size_t)z1*(size_t)mdlXY+(size_t)y1*(size_t)mdlX+x1]; //----------------------------- XFLOAT dx00 = d000 + (d001 - d000)*fx; XFLOAT dx01 = d100 + (d101 - d100)*fx; XFLOAT dx10 = d010 + (d011 - d010)*fx; XFLOAT dx11 = d110 + (d111 - d110)*fx; //----------------------------- XFLOAT dxy0 = dx00 + (dx10 - dx00)*fy; XFLOAT dxy1 = dx01 + (dx11 - dx01)*fy; //----------------------------- return dxy0 + (dxy1 - dxy0)*fz; } // 3D linear interpolation for complex data that interleaves real and // imaginary data, rather than storing them in a separate array #ifdef __INTEL_COMPILER #pragma omp declare simd uniform(mdlX,mdlXY,mdlInitY,mdlInitZ) #else __attribute__((always_inline)) inline #endif static void complex3D( std::complex * mdlComplex, XFLOAT &real, XFLOAT &imag, XFLOAT xp, XFLOAT yp, XFLOAT zp, int mdlX, int mdlXY, int mdlInitY, int mdlInitZ ) { int x0 = floorf(xp); XFLOAT fx = xp - x0; int y0 = floorf(yp); XFLOAT fy = yp - y0; y0 -= mdlInitY; int z0 = floorf(zp); XFLOAT fz = zp - z0; z0 -= mdlInitZ; size_t offset1 = (size_t)((size_t)z0*(size_t)mdlXY+(size_t)y0*(size_t)mdlX+(size_t)x0); size_t offset2 = offset1 + (size_t)1; size_t offset3 = offset1 + (size_t)mdlX; size_t offset4 = offset3 + (size_t)1; size_t offset5 = offset1 + (size_t)mdlXY; size_t offset6 = offset2 + (size_t)mdlXY; size_t offset7 = offset3 + (size_t)mdlXY; size_t offset8 = offset4 + (size_t)mdlXY; XFLOAT d000[2], d001[2], d010[2], d011[2]; XFLOAT d100[2], d101[2], d110[2], d111[2]; d000[0] = mdlComplex[offset1].real(); d000[1] = mdlComplex[offset1].imag(); d001[0] = mdlComplex[offset2].real(); d001[1] = mdlComplex[offset2].imag(); d010[0] = mdlComplex[offset3].real(); d010[1] = mdlComplex[offset3].imag(); d011[0] = mdlComplex[offset4].real(); d011[1] = mdlComplex[offset4].imag(); d100[0] = mdlComplex[offset5].real(); d100[1] = mdlComplex[offset5].imag(); d101[0] = mdlComplex[offset6].real(); d101[1] = mdlComplex[offset6].imag(); d110[0] = mdlComplex[offset7].real(); d110[1] = mdlComplex[offset7].imag(); d111[0] = mdlComplex[offset8].real(); d111[1] = mdlComplex[offset8].imag(); //----------------------------- XFLOAT dx00[2], dx01[2], dx10[2], dx11[2]; dx00[0] = d000[0] + (d001[0] - d000[0])*fx; dx01[0] = d100[0] + (d101[0] - d100[0])*fx; dx10[0] = d010[0] + (d011[0] - d010[0])*fx; dx11[0] = d110[0] + (d111[0] - d110[0])*fx; dx00[1] = d000[1] + (d001[1] - d000[1])*fx; dx01[1] = d100[1] + (d101[1] - d100[1])*fx; dx10[1] = d010[1] + (d011[1] - d010[1])*fx; dx11[1] = d110[1] + (d111[1] - d110[1])*fx; //----------------------------- XFLOAT dxy0[2], dxy1[2]; dxy0[0] = dx00[0] + (dx10[0] - dx00[0])*fy; dxy1[0] = dx01[0] + (dx11[0] - dx01[0])*fy; dxy0[1] = dx00[1] + (dx10[1] - dx00[1])*fy; dxy1[1] = dx01[1] + (dx11[1] - dx01[1])*fy; //----------------------------- real = dxy0[0] + (dxy1[0] - dxy0[0])*fz; imag = dxy0[1] + (dxy1[1] - dxy0[1])*fz; } } // end of namespace CpuKernels #endif //CPU_UTILITIES_H relion-3.1.3/src/acc/cpu/cpu_kernels/diff2.h000066400000000000000000001322401411340063500205660ustar00rootroot00000000000000#ifndef DIFF2_KERNELS_H_ #define DIFF2_KERNELS_H_ #include #include #include #include #include "src/acc/cpu/cpu_settings.h" #include "src/acc/acc_projector.h" #include "src/acc/cpu/cpu_kernels/cpu_utils.h" #include "src/acc/cpu/cpu_kernels/helper.h" namespace CpuKernels { /* * DIFFERENCE-BASED KERNELS */ // We are specializing 2D and 3D cases, since they benefit from different // optimizations. // Among the optimizations: // sincos lookup table optimization. Function translatePixel calls // sincos(x*tx + y*ty). We precompute 2D lookup tables for x and y directions. // The first dimension is x or y pixel index, and the second dimension is x or y // translation index. Since sin(a+B) = sin(A) * cos(B) + cos(A) * sin(B), and // cos(A+B) = cos(A) * cos(B) - sin(A) * sin(B), we can use lookup table to // compute sin(x*tx + y*ty) and cos(x*tx + y*ty). /* template void diff2_coarse_2D( unsigned long grid_size, XFLOAT *g_eulers, XFLOAT *g_trans_x, XFLOAT *g_trans_y, XFLOAT *g_trans_z, XFLOAT *g_real, XFLOAT *g_imag, AccProjectorKernel &projector, XFLOAT *g_corr, XFLOAT *g_diff2s, unsigned long trans_num, unsigned long image_size ) { //Prefetch euler matrices XFLOAT s_eulers[eulers_per_block * 9]; int xSize = projector.imgX; int ySize = projector.imgY; XFLOAT sin_x[trans_num][xSize], cos_x[trans_num][xSize]; XFLOAT sin_y[trans_num][ySize], cos_y[trans_num][ySize]; // pre-compute sin and cos for x and y component computeSincosLookupTable2D(trans_num, g_trans_x, g_trans_y, xSize, ySize, &sin_x[0][0], &cos_x[0][0], &sin_y[0][0], &cos_y[0][0]); for (unsigned long block = 0; block < grid_size; block++) { for (int i = 0; i < eulers_per_block * 9; i++) s_eulers[i] = g_eulers[(size_t)block * (size_t)eulers_per_block * (size_t)9 + i]; //Setup variables XFLOAT s_ref_real[eulers_per_block][xSize]; XFLOAT s_ref_imag[eulers_per_block][xSize]; XFLOAT s_real[xSize], s_imag[xSize], s_corr[xSize]; XFLOAT diff2s[trans_num][eulers_per_block]; memset(&diff2s[0][0], 0, sizeof(XFLOAT) * trans_num * eulers_per_block); unsigned long pixel = 0; for(int iy = 0; iy < ySize; iy++) { int xstart = 0, xend = xSize; int y = iy; if (iy > projector.maxR) { if (iy >= ySize - projector.maxR) y = iy - ySize; else { // handle special case for one pixel xstart = projector.maxR; xend = xstart + 1; } } for (int i = 0; i < eulers_per_block; i ++) { #pragma ivdep for(int x = xstart; x < xend; x++) { if(REF3D) { projector.project3Dmodel( x, y, s_eulers[i*9 ], s_eulers[i*9+1], s_eulers[i*9+3], s_eulers[i*9+4], s_eulers[i*9+6], s_eulers[i*9+7], s_ref_real[i][x], s_ref_imag[i][x]); } else { projector.project2Dmodel( x, y, s_eulers[i*9 ], s_eulers[i*9+1], s_eulers[i*9+3], s_eulers[i*9+4], s_ref_real[i][x], s_ref_imag[i][x]); } } } for(int x = xstart; x < xend; x++) { s_real[x] = g_real[pixel + x]; s_imag[x] = g_imag[pixel + x]; s_corr[x] = g_corr[pixel + x] * (XFLOAT)0.5; } for (int itrans=0; itrans void diff2_coarse_3D( unsigned long grid_size, XFLOAT *g_eulers, XFLOAT *g_trans_x, XFLOAT *g_trans_y, XFLOAT *g_trans_z, XFLOAT *g_real, XFLOAT *g_imag, AccProjectorKernel &projector, XFLOAT *g_corr, XFLOAT *g_diff2s, unsigned long trans_num, unsigned long image_size ) { //Prefetch euler matrices XFLOAT s_eulers[eulers_per_block * 9]; // pre-compute sin and cos for x and y component int xSize = projector.imgX; int ySize = projector.imgY; int zSize = projector.imgZ; XFLOAT sin_x[trans_num][xSize], cos_x[trans_num][xSize]; XFLOAT sin_y[trans_num][ySize], cos_y[trans_num][ySize]; XFLOAT sin_z[trans_num][zSize], cos_z[trans_num][zSize]; computeSincosLookupTable3D(trans_num, g_trans_x, g_trans_y, g_trans_z, xSize, ySize, zSize, &sin_x[0][0], &cos_x[0][0], &sin_y[0][0], &cos_y[0][0], &sin_z[0][0], &cos_z[0][0]); for (unsigned long block = 0; block < grid_size; block++) { for (int i = 0; i < eulers_per_block * 9; i++) s_eulers[i] = g_eulers[(size_t)block * (size_t)eulers_per_block * (size_t)9 + i]; //Setup variables XFLOAT s_ref_real[eulers_per_block][xSize]; XFLOAT s_ref_imag[eulers_per_block][xSize]; XFLOAT s_real[xSize]; XFLOAT s_imag[xSize]; XFLOAT s_corr[xSize]; XFLOAT diff2s[trans_num][eulers_per_block]; memset(&diff2s[0][0], 0, sizeof(XFLOAT) * trans_num * eulers_per_block); unsigned long pixel = 0; for(int iz = 0; iz < zSize; iz ++) { int xstart_z = 0, xend_z = xSize; int z = iz; if (z > projector.maxR) { if (z >= zSize - projector.maxR) z = z - projector.imgZ; else { xstart_z = projector.maxR; xend_z = xstart_z + 1; } } for(int iy = 0; iy < ySize; iy++) { int xstart_y = xstart_z, xend_y = xend_z; int y = iy; if (iy > projector.maxR) { if (iy >= ySize - projector.maxR) y = iy - ySize; else { xstart_y = projector.maxR; xend_y = xstart_y + 1; } } XFLOAT ref_real[xSize], ref_imag[xSize]; XFLOAT imgs_real[xSize], imgs_imag[xSize]; for (int i = 0; i < eulers_per_block; i ++) { #pragma ivdep for(int x = xstart_y; x < xend_y; x++) { projector.project3Dmodel( x, y, z, s_eulers[i*9 ], s_eulers[i*9+1], s_eulers[i*9+2], s_eulers[i*9+3], s_eulers[i*9+4], s_eulers[i*9+5], s_eulers[i*9+6], s_eulers[i*9+7], s_eulers[i*9+8], s_ref_real[i][x], s_ref_imag[i][x]); } } for(int x = xstart_y; x < xend_y; x++) { s_real[x] = g_real[pixel + x]; s_imag[x] = g_imag[pixel + x]; s_corr[x] = g_corr[pixel + x] * (XFLOAT)0.5; } for (int itrans=0; itrans #ifndef __INTEL_COMPILER __attribute__((always_inline)) #endif inline void diff2_coarse( unsigned long grid_size, XFLOAT *g_eulers, XFLOAT *trans_x, XFLOAT *trans_y, XFLOAT *trans_z, #ifdef DEBUG_CUDA XFLOAT *_g_real, #else XFLOAT *g_real, #endif XFLOAT *g_imag, AccProjectorKernel &projector, XFLOAT *g_corr, XFLOAT *g_diff2s, unsigned long translation_num, unsigned long image_size ) { #ifdef DEBUG_CUDA checkedArray g_real; g_real.initCheckedArray(_g_real); #endif const int xSize = projector.imgX; const int ySize = projector.imgY; const int zSize = projector.imgZ; const int maxR = projector.maxR; const unsigned pass_num(ceilfracf(image_size,block_sz)); #ifndef __INTEL_COMPILER // pre-compute sin and cos for x and y component XFLOAT sin_x[translation_num][xSize], cos_x[translation_num][xSize]; XFLOAT sin_y[translation_num][ySize], cos_y[translation_num][ySize]; XFLOAT sin_z[translation_num][zSize], cos_z[translation_num][zSize]; if (DATA3D) { computeSincosLookupTable3D(translation_num, trans_x, trans_y, trans_z, xSize, ySize, zSize, &sin_x[0][0], &cos_x[0][0], &sin_y[0][0], &cos_y[0][0], &sin_z[0][0], &cos_z[0][0]); } else { computeSincosLookupTable2D(translation_num, trans_x, trans_y, xSize, ySize, &sin_x[0][0], &cos_x[0][0], &sin_y[0][0], &cos_y[0][0]); } XFLOAT trans_cos_x[block_sz], trans_sin_x[block_sz]; XFLOAT trans_cos_y[block_sz], trans_sin_y[block_sz]; XFLOAT trans_cos_z[block_sz], trans_sin_z[block_sz]; #endif // not Intel Compiler int x[pass_num][block_sz], y[pass_num][block_sz], z[pass_num][block_sz]; XFLOAT s_real[pass_num][block_sz]; XFLOAT s_imag[pass_num][block_sz]; XFLOAT s_corr[pass_num][block_sz]; // Pre-calculate x/y/z for (unsigned pass = 0; pass < pass_num; pass++) { // finish an entire ref image each block unsigned long start = pass * block_sz; unsigned long elements = block_sz; if (start + block_sz >= image_size) elements = image_size - start; // Rotate the reference image per block_sz, saved in cache #pragma omp simd for (int tid=0; tid maxR) z[pass][tid] -= zSize; } else { x[pass][tid] = pixel % xSize; y[pass][tid] = floorfracf(pixel, xSize); z[pass][tid] = (XFLOAT)0.0; } if (y[pass][tid] > maxR) y[pass][tid] -= ySize; s_real[pass][tid] = g_real[pixel]; s_imag[pass][tid] = g_imag[pixel]; s_corr[pass][tid] = g_corr[pixel] * (XFLOAT)0.5; } // Make sure un-used elements are zeroed - just in case if (elements != block_sz) for (int tid=elements; tid < block_sz; tid++) { x[pass][tid] = (XFLOAT)0.0; y[pass][tid] = (XFLOAT)0.0; } } XFLOAT diff2s[translation_num][eulers_per_block]; for (unsigned long block = 0; block < grid_size; block++) { //Prefetch euler matrices with cacheline friendly index XFLOAT s_eulers[eulers_per_block * 16]; for (int e = 0; e < eulers_per_block; e++) for (int i = 0; i < 9; i++) s_eulers[e*16+i] = g_eulers[(size_t)block * (size_t)eulers_per_block * (size_t)9 + e*9+i]; //Setup variables XFLOAT s_ref_real[eulers_per_block][block_sz]; XFLOAT s_ref_imag[eulers_per_block][block_sz]; memset(&diff2s[0][0], 0, sizeof(XFLOAT) * translation_num * eulers_per_block); //Step through data for (unsigned pass = 0; pass < pass_num; pass++) { // finish an entire ref image each block unsigned long start = pass * block_sz; unsigned long elements = block_sz; if (start + block_sz >= image_size) elements = image_size - start; for (int i = 0; i < eulers_per_block; i ++) { #pragma omp simd for (int tid=0; tid= image_size) continue; XFLOAT real, imag; #ifndef __INTEL_COMPILER if(DATA3D) { // translatePixel(x[tid], y[tid], z[tid], tx, ty, tz, s_real[tid], s_imag[tid], real, imag); XFLOAT s = trans_sin_x[tid] * trans_cos_y[tid] + trans_cos_x[tid] * trans_sin_y[tid]; XFLOAT c = trans_cos_x[tid] * trans_cos_y[tid] - trans_sin_x[tid] * trans_sin_y[tid]; XFLOAT ss = s * trans_cos_z[tid] + c * trans_sin_z[tid]; XFLOAT cc = c * trans_cos_z[tid] - s * trans_sin_z[tid]; real = cc * s_real[pass][tid] - ss * s_imag[pass][tid]; imag = cc * s_imag[pass][tid] + ss * s_real[pass][tid]; } else { // 2D data // translatePixel(x[tid], y[tid], tx, ty, s_real[tid], s_imag[tid], real, imag); XFLOAT ss = trans_sin_x[tid] * trans_cos_y[tid] + trans_cos_x[tid] * trans_sin_y[tid]; XFLOAT cc = trans_cos_x[tid] * trans_cos_y[tid] - trans_sin_x[tid] * trans_sin_y[tid]; real = cc * s_real[pass][tid] - ss * s_imag[pass][tid]; imag = cc * s_imag[pass][tid] + ss * s_real[pass][tid]; } #else // Intel Compiler - accept the (hopefully vectorized) sincos call every iteration rather than caching if(DATA3D) translatePixel(x[pass][tid], y[pass][tid], z[pass][tid], tx, ty, tz, s_real[pass][tid], s_imag[pass][tid], real, imag); else translatePixel(x[pass][tid], y[pass][tid], tx, ty, s_real[pass][tid], s_imag[pass][tid], real, imag); #endif // not Intel Compiler #ifdef __INTEL_COMPILER #pragma unroll(eulers_per_block) #endif for (int j = 0; j < eulers_per_block; j ++) { XFLOAT diff_real = s_ref_real[j][tid] - real; XFLOAT diff_imag = s_ref_imag[j][tid] - imag; diff2s[i][j] += (diff_real * diff_real + diff_imag * diff_imag) * s_corr[pass][tid]; } } // for tid } // for each translation } // for each pass XFLOAT *pData = g_diff2s + (size_t)block * (size_t)eulers_per_block * (size_t)translation_num; for(int i=0; i #ifndef __INTEL_COMPILER __attribute__((always_inline)) #endif inline void diff2_fine_2D( unsigned long grid_size, XFLOAT *g_eulers, #ifdef DEBUG_CUDA XFLOAT *_g_imgs_real, #else XFLOAT *g_imgs_real, #endif XFLOAT *g_imgs_imag, XFLOAT *g_trans_x, XFLOAT *g_trans_y, XFLOAT *g_trans_z, AccProjectorKernel &projector, XFLOAT *g_corr_img, XFLOAT *g_diff2s, unsigned long image_size, XFLOAT sum_init, unsigned long orientation_num, unsigned long translation_num, unsigned long num_jobs, unsigned long *d_rot_idx, unsigned long *d_trans_idx, unsigned long *d_job_idx, unsigned long *d_job_num ) { #ifdef DEBUG_CUDA checkedArray g_imgs_real; g_imgs_real.initCheckedArray(_g_imgs_real); #endif // Set up arrays to hold largest possible values int xSize = projector.imgX; int ySize = projector.imgY; XFLOAT sin_x[translation_num][xSize], cos_x[translation_num][xSize]; XFLOAT sin_y[translation_num][ySize], cos_y[translation_num][ySize]; XFLOAT trans_x[translation_num], trans_y[translation_num]; XFLOAT ref_real[xSize], ref_imag[xSize]; XFLOAT imgs_real[xSize], imgs_imag[xSize]; XFLOAT s[translation_num]; // Now do calculations for (unsigned long bid = 0; bid < grid_size; bid++) { unsigned long trans_num = (unsigned long)d_job_num[bid]; unsigned long int iy_part = d_trans_idx[d_job_idx[bid]]; size_t offset = d_rot_idx[d_job_idx[bid]] * 9; XFLOAT e1 = g_eulers[offset ], e2 = g_eulers[offset+1]; XFLOAT e3 = g_eulers[offset+3], e4 = g_eulers[offset+4]; XFLOAT e5 = g_eulers[offset+6], e6 = g_eulers[offset+7]; // build lookup table for sin and cos for(unsigned long i=0; i projector.maxR) { if (iy >= ySize - projector.maxR) y = iy - ySize; else { // handle special case for one pixel xstart = projector.maxR; xend = xstart + 1; } } #pragma omp simd for(int x = xstart; x < xend; x++) { if(REF3D) projector.project3Dmodel(x, y, e1, e2, e3, e4, e5, e6, ref_real[x], ref_imag[x]); else projector.project2Dmodel(x, y, e1, e2, e3, e4, ref_real[x], ref_imag[x]); } #pragma omp simd for(int x = xstart; x < xend; x++) { #ifdef ACC_DOUBLE_PRECISION XFLOAT half_corr = sqrt (g_corr_img[pixel + x] * (XFLOAT)0.5); #else XFLOAT half_corr = sqrtf(g_corr_img[pixel + x] * (XFLOAT)0.5); #endif ref_real[x] *= half_corr; ref_imag[x] *= half_corr; imgs_real[x] = g_imgs_real[pixel + x] * half_corr; imgs_imag[x] = g_imgs_imag[pixel + x] * half_corr; } for (unsigned long itrans=0; itrans g_imgs_real; g_imgs_real.initCheckedArray(_g_imgs_real); #endif // Set up arrays to hold largest possible values int xSize = projector.imgX; int ySize = projector.imgY; int zSize = projector.imgZ; XFLOAT sin_x[translation_num][xSize], cos_x[translation_num][xSize]; XFLOAT sin_y[translation_num][ySize], cos_y[translation_num][ySize]; XFLOAT sin_z[translation_num][zSize], cos_z[translation_num][zSize]; XFLOAT trans_x[translation_num], trans_y[translation_num], trans_z[translation_num]; XFLOAT ref_real[xSize], ref_imag[xSize]; XFLOAT imgs_real[xSize], imgs_imag[xSize]; XFLOAT s[translation_num]; // Now do calculations for (unsigned long bid = 0; bid < grid_size; bid++) { unsigned long trans_num = (unsigned long)d_job_num[bid]; unsigned long int iy_part = d_trans_idx[d_job_idx[bid]]; size_t offset = d_rot_idx[d_job_idx[bid]] * 9; XFLOAT e1 = g_eulers[offset ], e2 = g_eulers[offset+1]; XFLOAT e3 = g_eulers[offset+2], e4 = g_eulers[offset+3]; XFLOAT e5 = g_eulers[offset+4], e6 = g_eulers[offset+5]; XFLOAT e7 = g_eulers[offset+6], e8 = g_eulers[offset+7]; XFLOAT e9 = g_eulers[offset+8]; // pre-compute sin and cos for x and y component for(unsigned long i=0; i projector.maxR) { if (z >= zSize - projector.maxR) z = z - projector.imgZ; else { xstart_z = projector.maxR; xend_z = xstart_z + 1; } } for(int iy = 0; iy < ySize; iy++) { int xstart_y = xstart_z, xend_y = xend_z; int y = iy; if (iy > projector.maxR) { if (iy >= ySize - projector.maxR) y = iy - ySize; else { xstart_y = projector.maxR; xend_y = xstart_y + 1; } } #pragma omp simd for(int x = xstart_y; x < xend_y; x++) { projector.project3Dmodel(x, y, z, e1, e2, e3, e4, e5, e6, e7, e8, e9, ref_real[x], ref_imag[x]); } #pragma omp simd for(int x = xstart_y; x < xend_y; x++) { #ifdef ACC_DOUBLE_PRECISION XFLOAT half_corr = sqrt (g_corr_img[pixel + x] * (XFLOAT)0.5); #else XFLOAT half_corr = sqrtf(g_corr_img[pixel + x] * (XFLOAT)0.5); #endif ref_real[x] *= half_corr; ref_imag[x] *= half_corr; imgs_real[x] = g_imgs_real[pixel + x] * half_corr; imgs_imag[x] = g_imgs_imag[pixel + x] * half_corr; } for (unsigned long itrans=0; itrans #ifndef __INTEL_COMPILER __attribute__((always_inline)) #endif inline void diff2_CC_coarse_2D( unsigned long grid_size, XFLOAT *g_eulers, #ifdef DEBUG_CUDA XFLOAT *_g_imgs_real, #else XFLOAT *g_imgs_real, #endif XFLOAT *g_imgs_imag, XFLOAT *g_trans_x, XFLOAT *g_trans_y, AccProjectorKernel &projector, XFLOAT *g_corr_img, XFLOAT *g_diff2, unsigned long trans_num, unsigned long image_size, XFLOAT exp_local_sqrtXi2 ) { #ifdef DEBUG_CUDA checkedArray g_imgs_real; g_imgs_real.initCheckedArray(_g_imgs_real); #endif // pre-compute sin and cos for x and y direction int xSize = projector.imgX; int ySize = projector.imgY; XFLOAT sin_x[trans_num][xSize], cos_x[trans_num][xSize]; XFLOAT sin_y[trans_num][ySize], cos_y[trans_num][ySize]; computeSincosLookupTable2D(trans_num, g_trans_x, g_trans_y, xSize, ySize, &sin_x[0][0], &cos_x[0][0], &sin_y[0][0], &cos_y[0][0]); // Set up other arrays XFLOAT s_weight[trans_num][xSize]; XFLOAT s_norm[trans_num][xSize]; XFLOAT ref_real[xSize], ref_imag[xSize]; XFLOAT img_real[xSize], img_imag[xSize], corr_imag[xSize]; for (unsigned long iorient = 0; iorient < grid_size; iorient++) { XFLOAT e0,e1,e3,e4,e6,e7; e0 = g_eulers[iorient*9 ]; e1 = g_eulers[iorient*9+1]; e3 = g_eulers[iorient*9+3]; e4 = g_eulers[iorient*9+4]; e6 = g_eulers[iorient*9+6]; e7 = g_eulers[iorient*9+7]; memset(s_weight, 0, sizeof(XFLOAT) * xSize * trans_num); memset(s_norm, 0, sizeof(XFLOAT) * xSize * trans_num); unsigned long pixel = 0; for(int iy = 0; iy < ySize; iy++) { int xstart = 0, xend = xSize; int y = iy; if (iy > projector.maxR) { if (iy >= ySize - projector.maxR) y = iy - ySize; else { // handle special case for one pixel xstart = projector.maxR; xend = xstart + 1; } } #pragma omp simd for(int x = xstart; x < xend; x++) { if(REF3D) projector.project3Dmodel( x, y, e0, e1, e3, e4, e6, e7, ref_real[x], ref_imag[x]); else projector.project2Dmodel( x, y, e0, e1, e3, e4, ref_real[x], ref_imag[x]); img_real[x] = g_imgs_real[pixel + x]; img_imag[x] = g_imgs_imag[pixel + x]; corr_imag[x] = g_corr_img[pixel + x]; } for(unsigned long itrans=0; itrans g_imgs_real; g_imgs_real.initCheckedArray(_g_imgs_real); #endif // pre-compute sin and cos for x, y, and z direction int xSize = projector.imgX; int ySize = projector.imgY; int zSize = projector.imgZ; XFLOAT sin_x[trans_num][xSize], cos_x[trans_num][xSize]; XFLOAT sin_y[trans_num][ySize], cos_y[trans_num][ySize]; XFLOAT sin_z[trans_num][zSize], cos_z[trans_num][zSize]; computeSincosLookupTable3D(trans_num, g_trans_x, g_trans_y, g_trans_z, xSize, ySize, zSize, &sin_x[0][0], &cos_x[0][0], &sin_y[0][0], &cos_y[0][0], &sin_z[0][0], &cos_z[0][0]); // Set up some arrays XFLOAT s_weight[trans_num][xSize]; XFLOAT s_norm[trans_num][xSize]; XFLOAT ref_real[xSize], ref_imag[xSize]; XFLOAT img_real[xSize], img_imag[xSize], corr_imag[xSize]; for (unsigned long iorient = 0; iorient < grid_size; iorient++) { XFLOAT e0, e1, e2, e3, e4, e5, e6, e7, e8; e0 = g_eulers[iorient*9 ]; e1 = g_eulers[iorient*9+1]; e2 = g_eulers[iorient*9+2]; e3 = g_eulers[iorient*9+3]; e4 = g_eulers[iorient*9+4]; e5 = g_eulers[iorient*9+5]; e6 = g_eulers[iorient*9+6]; e7 = g_eulers[iorient*9+7]; e8 = g_eulers[iorient*9+8]; memset(s_weight, 0, sizeof(XFLOAT) * xSize * trans_num); memset(s_norm, 0, sizeof(XFLOAT) * xSize * trans_num); unsigned long pixel = 0; for(int iz = 0; iz < zSize; iz ++) { int xstart_z = 0, xend_z = xSize; int z = iz; if (z > projector.maxR) { if (z >= zSize - projector.maxR) z = z - projector.imgZ; else { xstart_z = projector.maxR; xend_z = xstart_z + 1; } } for(int iy = 0; iy < ySize; iy++) { int xstart_y = xstart_z, xend_y = xend_z; int y = iy; if (iy > projector.maxR) { if (iy >= ySize - projector.maxR) y = iy - ySize; else { xstart_y = projector.maxR; xend_y = xstart_y + 1; } } #pragma omp simd for(int x = xstart_y; x < xend_y; x++) { projector.project3Dmodel( x, y, z, e0, e1, e2, e3, e4, e5, e6, e7, e8, ref_real[x], ref_imag[x]); img_real[x] = g_imgs_real[pixel + x]; img_imag[x] = g_imgs_imag[pixel + x]; corr_imag[x] = g_corr_img[pixel + x]; } for(int itrans=0; itrans #ifndef __INTEL_COMPILER __attribute__((always_inline)) #endif inline void diff2_CC_fine_2D( unsigned long grid_size, XFLOAT *g_eulers, #ifdef DEBUG_CUDA XFLOAT *_g_imgs_real, #else XFLOAT *g_imgs_real, #endif XFLOAT *g_imgs_imag, XFLOAT *g_trans_x, XFLOAT *g_trans_y, AccProjectorKernel &projector, XFLOAT *g_corr_img, XFLOAT *g_diff2s, unsigned long image_size, XFLOAT sum_init, XFLOAT exp_local_sqrtXi2, unsigned long orientation_num, unsigned long translation_num, unsigned long num_jobs, unsigned long *d_rot_idx, unsigned long *d_trans_idx, unsigned long *d_job_idx, unsigned long *d_job_num ) { #ifdef DEBUG_CUDA checkedArray g_imgs_real; g_imgs_real.initCheckedArray(_g_imgs_real); #endif // Set up arrays to hold largest possible values int xSize = projector.imgX; int ySize = projector.imgY; XFLOAT sin_x[translation_num][xSize], cos_x[translation_num][xSize]; XFLOAT sin_y[translation_num][ySize], cos_y[translation_num][ySize]; XFLOAT trans_x[translation_num], trans_y[translation_num]; XFLOAT s [translation_num][xSize]; XFLOAT s_cc[translation_num][xSize]; XFLOAT ref_real[xSize], ref_imag[xSize]; XFLOAT img_real[xSize], img_imag[xSize], corr_imag[xSize]; // Now do calculations for (unsigned long bid = 0; bid < grid_size; bid++) { unsigned long trans_num = d_job_num[bid]; //how many transes we have for this rot // pre-compute sin and cos for x and y direction for(unsigned long i=0; i projector.maxR) { if (iy >= ySize - projector.maxR) y = iy - ySize; else { // handle special case for one pixel xstart = projector.maxR; xend = xstart + 1; } } #pragma omp simd for(int x = xstart; x < xend; x++) { if(REF3D) projector.project3Dmodel( x, y, e0, e1, e3, e4, e6, e7, ref_real[x], ref_imag[x]); else projector.project2Dmodel( x, y, e0, e1, e3, e4, ref_real[x], ref_imag[x]); img_real[x] = g_imgs_real[pixel + x]; img_imag[x] = g_imgs_imag[pixel + x]; corr_imag[x] = g_corr_img [pixel + x]; } for (unsigned long itrans=0; itrans g_imgs_real; g_imgs_real.initCheckedArray(_g_imgs_real); #endif // Set up arrays to hold largest possible values int xSize = projector.imgX; int ySize = projector.imgY; int zSize = projector.imgZ; XFLOAT sin_x[translation_num][xSize], cos_x[translation_num][xSize]; XFLOAT sin_y[translation_num][ySize], cos_y[translation_num][ySize]; XFLOAT sin_z[translation_num][zSize], cos_z[translation_num][zSize]; XFLOAT trans_x[translation_num], trans_y[translation_num], trans_z[translation_num]; XFLOAT s [translation_num][xSize]; XFLOAT s_cc[translation_num][xSize]; XFLOAT ref_real[xSize], ref_imag[xSize]; XFLOAT img_real[xSize], img_imag[xSize], corr_imag[xSize]; // Now do calculations for (unsigned long bid = 0; bid < grid_size; bid++) { unsigned long trans_num = d_job_num[bid]; //how many transes we have for this rot // pre-compute sin and cos for x and y direction for(unsigned long i=0; i projector.maxR) { if (z >= zSize - projector.maxR) z = z - projector.imgZ; else { xstart_z = projector.maxR; xend_z = xstart_z + 1; } } for(int iy = 0; iy < ySize; iy++) { int xstart_y = xstart_z, xend_y = xend_z; int y = iy; if (iy > projector.maxR) { if (iy >= ySize - projector.maxR) y = iy - ySize; else { xstart_y = projector.maxR; xend_y = xstart_y + 1; } } #pragma omp simd for(int x = xstart_y; x < xend_y; x++) { projector.project3Dmodel( x, y, z, e0, e1, e2, e3, e4, e5, e6, e7, e8, ref_real[x], ref_imag[x]); img_real[x] = g_imgs_real[pixel + x]; img_imag[x] = g_imgs_imag[pixel + x]; corr_imag[x] = g_corr_img [pixel + x]; } for (unsigned long itrans=0; itrans::max(); //large negative number else g_weights[pos+itrans] = g_pdf_orientation[ix] + g_pdf_offset[c_itrans] + min_diff2 - g_weights[pos+itrans]; } } } void RNDnormalDitributionComplexWithPowerModulation2D(ACCCOMPLEX* Image, size_t xdim, XFLOAT *spectra) { size_t x,y,size; size = xdim*((xdim-1)*2); for(size_t i=0; i=xdim) y -= (xdim-1)*2; x = i % xdim; int ires = (int)(sqrtf(x*x + y*y)); if(ires=xdim) z -= (xdim-1)*2; //assuming square input images (particles) if(y>=xdim) y -= (xdim-1)*2; //assuming square input images (particles) int ires = (int)(sqrtf(x*x + y*y + z*z)); if(ires radius_p) { g_sum[tid] += (XFLOAT)1.0; g_sum_bg[tid] += img_pixels; } else { #if defined(ACC_DOUBLE_PRECISION) raisedcos = 0.5 + 0.5 * cos ( (radius_p - r) / cosine_width * M_PI); #else raisedcos = 0.5 + 0.5 * cosf( (radius_p - r) / cosine_width * M_PI); #endif g_sum[tid] += raisedcos; g_sum_bg[tid] += raisedcos * img_pixels; } } } } // tid } // bid } void cosineFilter( int block_dim, int block_size, XFLOAT *vol, long int vol_size, long int xdim, long int ydim, long int zdim, long int xinit, long int yinit, long int zinit, bool do_noise, XFLOAT *noise, XFLOAT radius, XFLOAT radius_p, XFLOAT cosine_width, XFLOAT bg_value) { for(int bid=0; bid radius_p) img_pixels=defVal; else { #if defined(ACC_DOUBLE_PRECISION) raisedcos = 0.5 + 0.5 * cos ( (radius_p - r) / cosine_width * M_PI); #else raisedcos = 0.5 + 0.5 * cosf( (radius_p - r) / cosine_width * M_PI); #endif img_pixels= img_pixels*(1-raisedcos) + defVal*raisedcos; } vol[texel]=img_pixels; } } } // tid } // bid } template #ifndef __INTEL_COMPILER __attribute__((always_inline)) inline #endif void cpu_translate2D(T * g_image_in, T * g_image_out, size_t image_size, int xdim, int ydim, int dx, int dy) { int x,y,xp,yp; size_t new_pixel; #ifdef DEBUG_CUDA if (image_size > (size_t)std::numeric_limits::max()) ACC_PTR_DEBUG_INFO("cpu_translate2D: image_size > std::numeric_limits::max()"); #endif for(size_t pixel=0; pixel=0 && xp>=0 && yp=0 && new_pixel #ifndef __INTEL_COMPILER __attribute__((always_inline)) inline #endif void cpu_translate3D(T * g_image_in, T* g_image_out, size_t image_size, int xdim, int ydim, int zdim, int dx, int dy, int dz) { int x,y,z,xp,yp,zp,xy; size_t new_voxel; #ifdef DEBUG_CUDA if (image_size > (size_t)std::numeric_limits::max()) ACC_PTR_DEBUG_INFO("cpu_translate3D: image_size > std::numeric_limits::max()"); #endif for(size_t voxel=0; voxel=0 && yp>=0 && xp>=0 && zp=0 && new_voxel #ifndef __INTEL_COMPILER __attribute__((always_inline)) inline #endif void centerFFT_2D( int batch_size, size_t pixel_start, size_t pixel_end, T *img_in, size_t image_size, int xdim, int ydim, int xshift, int yshift) { #ifdef DEBUG_CUDA if (image_size > (size_t)std::numeric_limits::max()) ACC_PTR_DEBUG_INFO("centerFFT_2D: image_size > std::numeric_limits::max()"); if (image_size*(size_t)batch_size > (size_t)std::numeric_limits::max()) ACC_PTR_DEBUG_INFO("centerFFT_2D: image_size*batch_size > std::numeric_limits::max()"); if (pixel_end > image_size) ACC_PTR_DEBUG_INFO("centerFFT_2D: pixel_end > image_size"); #endif size_t pix_start = pixel_start; size_t pix_end = pixel_end; for(int batch=0; batch( int batch_size, size_t pixel_start, size_t pixel_end, float *img_in, size_t image_size, int xdim, int ydim, int xshift, int yshift); template void centerFFT_2D( int batch_size, size_t pixel_start, size_t pixel_end, double *img_in, size_t image_size, int xdim, int ydim, int xshift, int yshift); template #ifndef __INTEL_COMPILER __attribute__((always_inline)) inline #endif void centerFFT_3D( int batch_size, size_t pixel_start, size_t pixel_end, T *img_in, size_t image_size, int xdim, int ydim, int zdim, int xshift, int yshift, int zshift) { #ifdef DEBUG_CUDA if (image_size > (size_t)std::numeric_limits::max()) ACC_PTR_DEBUG_INFO("centerFFT_3D: image_size > std::numeric_limits::max()"); if (image_size*(size_t)batch_size > (size_t)std::numeric_limits::max()) ACC_PTR_DEBUG_INFO("centerFFT_3D: image_size*batch_size > std::numeric_limits::max()"); if (pixel_end > image_size) ACC_PTR_DEBUG_INFO("centerFFT_3D: pixel_end > image_size"); #endif size_t pix_start = pixel_start; size_t pix_end = pixel_end; int xydim = xdim*ydim; for(int batch=0; batch( int batch_size, size_t pixel_start, size_t pixel_end, float *img_in, size_t image_size, int xdim, int ydim, int zdim, int xshift, int yshift, int zshift); template void centerFFT_3D( int batch_size, size_t pixel_start, size_t pixel_end, double *img_in, size_t image_size, int xdim, int ydim, int zdim, int xshift, int yshift, int zshift); /* TODO - if create optimized CPU version of autopicker * All these functions need to be converted to use internal loops rather than * block and thread indices to operate like other active functions seen in this file void probRatio( int blockIdx_x, int threadIdx_x, XFLOAT *d_Mccf, XFLOAT *d_Mpsi, XFLOAT *d_Maux, XFLOAT *d_Mmean, XFLOAT *d_Mstddev, size_t image_size, XFLOAT normfft, XFLOAT sum_ref_under_circ_mask, XFLOAT sum_ref2_under_circ_mask, XFLOAT expected_Pratio, int NpsiThisBatch, int startPsi, int totalPsis) { |* PLAN TO: * * 1) Pre-filter * d_Mstddev[i] = 1 / (2*d_Mstddev[i]) ( if d_Mstddev[pixel] > 1E-10 ) * d_Mstddev[i] = 1 ( else ) * * 2) Set * sum_ref2_under_circ_mask /= 2. * * 3) Total expression becomes * diff2 = ( exp(k) - 1.f ) / (expected_Pratio - 1.f) * where * k = (normfft * d_Maux[pixel] + d_Mmean[pixel] * sum_ref_under_circ_mask)*d_Mstddev[i] + sum_ref2_under_circ_mask * *| size_t pixel = (size_t)threadIdx_x + (size_t)blockIdx_x*(size_t)PROBRATIO_BLOCK_SIZE; if(pixel (XFLOAT)1E-10) diff2 *= d_Mstddev[pixel]; diff2 += sum_ref2_under_circ_mask; #if defined(ACC_DOUBLE_PRECISION) diff2 = exp(-diff2 / 2.); // exponentiate to reflect the Gaussian error model. sigma=1 after normalization, 0.4=1/sqrt(2pi) #else diff2 = expf(-diff2 / 2.f); #endif // Store fraction of (1 - probability-ratio) wrt (1 - expected Pratio) diff2 = (diff2 - (XFLOAT)1.0) / (expected_Pratio - (XFLOAT)1.0); if (diff2 > Kccf) { Kccf = diff2; Kpsi = (startPsi + psi)*(360/totalPsis); } } d_Mccf[pixel] = Kccf; if (Kpsi >= 0.) d_Mpsi[pixel] = Kpsi; } } void rotateOnly(int blockIdx_x, int blockIdx_y, int threadIdx_x, ACCCOMPLEX *d_Faux, XFLOAT psi, AccProjectorKernel &projector, int startPsi ) { int proj = blockIdx_y; size_t image_size=(size_t)projector.imgX*(size_t)projector.imgY; size_t pixel = (size_t)threadIdx_x + (size_t)blockIdx_x*(size_t)BLOCK_SIZE; if(pixel projector.maxR) { if (y >= projector.imgY - projector.maxR) y = y - projector.imgY; else x = projector.maxR; } XFLOAT sa, ca; #if defined(ACC_DOUBLE_PRECISION) sincos((proj+startPsi)*psi, &sa, &ca); #else sincosf((proj+startPsi)*psi, &sa, &ca); #endif ACCCOMPLEX val; projector.project2Dmodel( x,y, ca, -sa, sa, ca, val.x,val.y); long int out_pixel = proj*image_size + pixel; d_Faux[out_pixel].x =val.x; d_Faux[out_pixel].y =val.y; } } void rotateAndCtf( int blockIdx_x, int blockIdx_y, int threadIdx_x, ACCCOMPLEX *d_Faux, XFLOAT *d_ctf, XFLOAT psi, AccProjectorKernel &projector, int startPsi ) { int proj = blockIdx_y; size_t image_size=(size_t)projector.imgX*(size_t)projector.imgY; size_t pixel = (size_t)threadIdx_x + (size_t)blockIdx_x*(size_t)BLOCK_SIZE; if(pixel projector.maxR) { if (y >= projector.imgY - projector.maxR) y = y - projector.imgY; else x = projector.maxR; } XFLOAT sa, ca; #if defined(ACC_DOUBLE_PRECISION) sincos((proj+startPsi)*psi, &sa, &ca); #else sincosf((proj+startPsi)*psi, &sa, &ca); #endif ACCCOMPLEX val; projector.project2Dmodel( x,y, ca, -sa, sa, ca, val.x,val.y); long int out_pixel = proj*image_size + pixel; d_Faux[out_pixel].x =val.x*d_ctf[pixel]; d_Faux[out_pixel].y =val.y*d_ctf[pixel]; } } void convol_A( int blockIdx_x, int threadIdx_x, ACCCOMPLEX *d_A, ACCCOMPLEX *d_B, size_t image_size) { size_t pixel = (size_t)threadIdx_x + (size_t)blockIdx_x*(size_t)BLOCK_SIZE; if(pixel void cpu_kernel_multi( T *A, T *OUT, T S, size_t image_size) { #ifdef DEBUG_CUDA if (image_size < 0) ACC_PTR_DEBUG_INFO("cpu_kernel_multi: image_size < 0"); #endif for (size_t i = 0; i < image_size; i ++) OUT[i] = A[i]*S; } template void cpu_kernel_multi( T *A, T S, size_t image_size) { #ifdef DEBUG_CUDA if (image_size < 0) ACC_PTR_DEBUG_INFO("cpu_kernel_multi2: image_size < 0"); #endif for (size_t i = 0; i < image_size; i ++) A[i] *= S; } template void cpu_kernel_multi( T *A, T *B, T *OUT, T S, size_t image_size) { #ifdef DEBUG_CUDA if (image_size < 0) ACC_PTR_DEBUG_INFO("cpu_kernel_multi3: image_size < 0"); #endif for (size_t i = 0; i < image_size; i ++) OUT[i] = A[i]*B[i]*S; } /* void batch_multi( int blockIdx_x, int blockIdx_y, int threadIdx_x, XFLOAT *A, XFLOAT *B, XFLOAT *OUT, XFLOAT S, size_t image_size) { sise_t pixel = (size_t)threadIdx_x + (size_t)blockIdx_x*(size_t)BLOCK_SIZE; if(pixel 0) Mstddev[pixel] = sqrt(temp); else Mstddev[pixel] = 0; } } void square(int blockIdx_x, int threadIdx_x, XFLOAT *A, size_t image_size) { size_t pixel = (size_t)threadIdx_x + (size_t)blockIdx_x*(size_t)BLOCK_SIZE; if(pixel #ifndef __INTEL_COMPILER __attribute__((always_inline)) inline #endif void cpu_kernel_make_eulers_2D(int grid_size, int block_size, XFLOAT *alphas, XFLOAT *eulers, unsigned long orientation_num) { #ifdef DEBUG_CUDA if ((size_t)grid_size*(size_t)block_size > (size_t)std::numeric_limits::max()) ACC_PTR_DEBUG_INFO("cpu_kernel_make_eulers_2D: grid_size*block_size > std::numeric_limits::max()"); #endif for(int blockIdx_x=0; blockIdx_x<(int)(grid_size); blockIdx_x++) { for(int threadIdx_x=0; threadIdx_x= orientation_num) return; XFLOAT ca, sa; XFLOAT a = alphas[oid] * (XFLOAT)PI / (XFLOAT)180.0; #ifdef ACC_DOUBLE_PRECISION sincos(a, &sa, &ca); #else sincosf(a, &sa, &ca); #endif if(!invert) { eulers[9 * oid + 0] = ca;//00 eulers[9 * oid + 1] = sa;//01 eulers[9 * oid + 2] = 0 ;//02 eulers[9 * oid + 3] =-sa;//10 eulers[9 * oid + 4] = ca;//11 eulers[9 * oid + 5] = 0 ;//12 eulers[9 * oid + 6] = 0 ;//20 eulers[9 * oid + 7] = 0 ;//21 eulers[9 * oid + 8] = 1 ;//22 } else { eulers[9 * oid + 0] = ca;//00 eulers[9 * oid + 1] =-sa;//10 eulers[9 * oid + 2] = 0 ;//20 eulers[9 * oid + 3] = sa;//01 eulers[9 * oid + 4] = ca;//11 eulers[9 * oid + 5] = 0 ;//21 eulers[9 * oid + 6] = 0 ;//02 eulers[9 * oid + 7] = 0 ;//12 eulers[9 * oid + 8] = 1 ;//22 } } // threadIdx_x } // blockIdx_x } template #ifndef __INTEL_COMPILER __attribute__((always_inline)) inline #endif void cpu_kernel_make_eulers_3D(int grid_size, int block_size, XFLOAT *alphas, XFLOAT *betas, XFLOAT *gammas, XFLOAT *eulers, unsigned long orientation_num, XFLOAT *L, XFLOAT *R) { #ifdef DEBUG_CUDA if ((size_t)grid_size*(size_t)block_size > (size_t)std::numeric_limits::max()) ACC_PTR_DEBUG_INFO("cpu_kernel_make_eulers_3D: grid_size*block_size > std::numeric_limits::max()"); #endif for(int blockIdx_x=0; blockIdx_x<(int)(grid_size); blockIdx_x++) { for(int threadIdx_x=0; threadIdx_x= orientation_num) return; for (int i = 0; i < 9; i ++) B[i] = (XFLOAT) 0.f; a = alphas[oid] * (XFLOAT)PI / (XFLOAT)180.0; b = betas[oid] * (XFLOAT)PI / (XFLOAT)180.0; g = gammas[oid] * (XFLOAT)PI / (XFLOAT)180.0; #ifdef ACC_DOUBLE_PRECISION sincos(a, &sa, &ca); sincos(b, &sb, &cb); sincos(g, &sg, &cg); #else sincosf(a, &sa, &ca); sincosf(b, &sb, &cb); sincosf(g, &sg, &cg); #endif cc = cb * ca; cs = cb * sa; sc = sb * ca; ss = sb * sa; A[0] = ( cg * cc - sg * sa);//00 A[1] = ( cg * cs + sg * ca);//01 A[2] = (-cg * sb ) ;//02 A[3] = (-sg * cc - cg * sa);//10 A[4] = (-sg * cs + cg * ca);//11 A[5] = ( sg * sb ) ;//12 A[6] = ( sc ) ;//20 A[7] = ( ss ) ;//21 A[8] = ( cb ) ;//22 if (doR) { for (int i = 0; i < 9; i++) B[i] = 0.f; for (int i = 0; i < 3; i++) for (int j = 0; j < 3; j++) for (int k = 0; k < 3; k++) B[i * 3 + j] += A[i * 3 + k] * R[k * 3 + j]; } else for (int i = 0; i < 9; i++) B[i] = A[i]; if (doL) { if (doR) for (int i = 0; i < 9; i++) A[i] = B[i]; for (int i = 0; i < 9; i++) B[i] = 0.f; for (int i = 0; i < 3; i++) for (int j = 0; j < 3; j++) for (int k = 0; k < 3; k++) B[i * 3 + j] += L[i * 3 + k] * A[k * 3 + j]; } if(invert) { if (doL) // this could have anisotropy, so inverse neq transpose!!! { XFLOAT det; det = B[0] * (B[4] * B[8] - B[7] * B[5]) - B[1] * (B[3] * B[8] - B[6] * B[5]) + B[2] * (B[3] * B[7] - B[6] * B[4]); eulers[9 * oid + 0] = (B[4] * B[8] - B[7] * B[5]) / det; eulers[9 * oid + 1] = (B[7] * B[2] - B[1] * B[8]) / det; eulers[9 * oid + 2] = (B[1] * B[5] - B[4] * B[2]) / det; eulers[9 * oid + 3] = (B[5] * B[6] - B[8] * B[3]) / det; eulers[9 * oid + 4] = (B[8] * B[0] - B[2] * B[6]) / det; eulers[9 * oid + 5] = (B[2] * B[3] - B[5] * B[0]) / det; eulers[9 * oid + 6] = (B[3] * B[7] - B[6] * B[4]) / det; eulers[9 * oid + 7] = (B[6] * B[1] - B[0] * B[7]) / det; eulers[9 * oid + 8] = (B[0] * B[4] - B[3] * B[1]) / det; } else { eulers[9 * oid + 0] = B[0];//00 eulers[9 * oid + 1] = B[3];//01 eulers[9 * oid + 2] = B[6];//02 eulers[9 * oid + 3] = B[1];//10 eulers[9 * oid + 4] = B[4];//11 eulers[9 * oid + 5] = B[7];//12 eulers[9 * oid + 6] = B[2];//20 eulers[9 * oid + 7] = B[5];//21 eulers[9 * oid + 8] = B[8];//22 } } else { eulers[9 * oid + 0] = B[0];//00 eulers[9 * oid + 1] = B[1];//10 eulers[9 * oid + 2] = B[2];//20 eulers[9 * oid + 3] = B[3];//01 eulers[9 * oid + 4] = B[4];//11 eulers[9 * oid + 5] = B[5];//21 eulers[9 * oid + 6] = B[6];//02 eulers[9 * oid + 7] = B[7];//12 eulers[9 * oid + 8] = B[8];//22 } } // threadIdx_x } // blockIdx_x } } // end of namespace CpuKernels // ------------------------------- Some explicit template instantiations template void CpuKernels::cpu_translate2D(XFLOAT *, XFLOAT*, size_t, int, int, int, int); template void CpuKernels::cpu_translate3D(XFLOAT *, XFLOAT *, size_t, int, int, int, int, int, int); template void CpuKernels::cpu_kernel_multi( XFLOAT *, XFLOAT, size_t); template void CpuKernels::cpu_kernel_make_eulers_3D(int, int, XFLOAT *, XFLOAT *, XFLOAT *, XFLOAT *, unsigned long, XFLOAT *, XFLOAT *); template void CpuKernels::cpu_kernel_make_eulers_3D(int, int, XFLOAT *, XFLOAT *, XFLOAT *, XFLOAT *, unsigned long, XFLOAT *, XFLOAT *); template void CpuKernels::cpu_kernel_make_eulers_3D(int, int, XFLOAT *, XFLOAT *, XFLOAT *, XFLOAT *, unsigned long, XFLOAT *, XFLOAT *); template void CpuKernels::cpu_kernel_make_eulers_3D(int, int, XFLOAT *, XFLOAT *, XFLOAT *, XFLOAT *, unsigned long, XFLOAT *, XFLOAT *); template void CpuKernels::cpu_kernel_make_eulers_3D(int, int, XFLOAT *, XFLOAT *, XFLOAT *, XFLOAT *, unsigned long, XFLOAT *, XFLOAT *); template void CpuKernels::cpu_kernel_make_eulers_3D(int, int, XFLOAT *, XFLOAT *, XFLOAT *, XFLOAT *, unsigned long, XFLOAT *, XFLOAT *); template void CpuKernels::cpu_kernel_make_eulers_3D(int, int, XFLOAT *, XFLOAT *, XFLOAT *, XFLOAT *, unsigned long, XFLOAT *, XFLOAT *); template void CpuKernels::cpu_kernel_make_eulers_3D(int, int, XFLOAT *, XFLOAT *, XFLOAT *, XFLOAT *, unsigned long, XFLOAT *, XFLOAT *); template void CpuKernels::cpu_kernel_make_eulers_2D(int, int, XFLOAT *, XFLOAT *, unsigned long); template void CpuKernels::cpu_kernel_make_eulers_2D(int, int, XFLOAT *, XFLOAT *, unsigned long); // ---------------------------------------------------------------------- relion-3.1.3/src/acc/cpu/cpu_kernels/helper.h000066400000000000000000000472271411340063500210650ustar00rootroot00000000000000#ifndef HELPER_KERNELS_H_ #define HELPER_KERNELS_H_ #include #include #include #include "src/macros.h" #include "src/acc/cpu/cpu_settings.h" #include "src/acc/cpu/cpu_kernels/cpu_utils.h" #include "src/acc/acc_projector.h" #include "src/acc/acc_projectorkernel_impl.h" namespace CpuKernels { template #ifndef __INTEL_COMPILER __attribute__((always_inline)) inline #endif void weights_exponent_coarse( T *g_pdf_orientation, bool *g_pdf_orientation_zeros, T *g_pdf_offset, bool *g_pdf_offset_zeros, T *g_weights, T g_min_diff2, unsigned long nr_coarse_orient, unsigned long nr_coarse_trans, size_t max_idx) { for (size_t idx = 0; idx < max_idx; idx++) { unsigned long itrans = idx % nr_coarse_trans; unsigned long iorient = (idx - itrans) / nr_coarse_trans; T diff2 = g_weights[idx]; if( diff2 < g_min_diff2 || g_pdf_orientation_zeros[iorient] || g_pdf_offset_zeros[itrans]) // TODO - replace with lowest() when C++11 is supported g_weights[idx] = -std::numeric_limits::max(); //large negative number else g_weights[idx] = g_pdf_orientation[iorient] + g_pdf_offset[itrans] + g_min_diff2 - diff2; } } template #ifndef __INTEL_COMPILER __attribute__((always_inline)) inline #endif void exponentiate( T *g_array, T add, size_t size) { for (size_t idx = 0; idx < size; idx++) { T a = g_array[idx] + add; #ifdef ACC_DOUBLE_PRECISION if (a < -700.) g_array[idx] = 0.; else g_array[idx] = exp(a); #else if (a < -88.f) g_array[idx] = 0.f; else g_array[idx] = expf(a); #endif } } template #ifndef __INTEL_COMPILER __attribute__((always_inline)) inline #endif void collect2jobs( int grid_size, int block_size, XFLOAT *g_oo_otrans_x, // otrans-size -> make const XFLOAT *g_oo_otrans_y, // otrans-size -> make const XFLOAT *g_oo_otrans_z, // otrans-size -> make const XFLOAT *g_myp_oo_otrans_x2y2z2, // otrans-size -> make const XFLOAT *g_i_weights, XFLOAT op_significant_weight, // TODO Put in const XFLOAT op_sum_weight, // TODO Put in const unsigned long coarse_trans, unsigned long oversamples_trans, unsigned long oversamples_orient, unsigned long oversamples, bool do_ignore_pdf_direction, XFLOAT *g_o_weights, XFLOAT *g_thr_wsum_prior_offsetx_class, XFLOAT *g_thr_wsum_prior_offsety_class, XFLOAT *g_thr_wsum_prior_offsetz_class, XFLOAT *g_thr_wsum_sigma2_offset, unsigned long *d_rot_idx, unsigned long *d_trans_idx, unsigned long *d_job_idx, unsigned long *d_job_num ) { // block id for (int bid=0; bid < grid_size; bid++) { XFLOAT s_o_weights[block_size]; XFLOAT s_thr_wsum_sigma2_offset[block_size];; XFLOAT s_thr_wsum_prior_offsetx_class[block_size]; XFLOAT s_thr_wsum_prior_offsety_class[block_size]; XFLOAT s_thr_wsum_prior_offsetz_class[block_size]; unsigned long pos = d_job_idx[bid]; unsigned long job_size = d_job_num[bid]; int pass_num = ceilfracf(job_size,block_size); for(int tid=0; tid= op_significant_weight ) //TODO Might be slow (divergent threads) weight /= op_sum_weight; else weight = (XFLOAT)0.0; s_o_weights[tid] += weight; s_thr_wsum_prior_offsetx_class[tid] += weight * g_oo_otrans_x[iy]; s_thr_wsum_prior_offsety_class[tid] += weight * g_oo_otrans_y[iy]; s_thr_wsum_sigma2_offset[tid] += weight * g_myp_oo_otrans_x2y2z2[iy]; } } } for(int tid=1; tid void cpu_translate2D(T * g_image_in, T * g_image_out, size_t image_size, int xdim, int ydim, //not used int dx, int dy); template void cpu_translate3D(T * g_image_in, T * g_image_out, size_t image_size, int xdim, int ydim, int zdim, //not used int dx, int dy, int dz); //---------------------------------------------------------------------------- template void centerFFT_2D( int batch_size, size_t pixel_start, size_t pixel_end, T *img_in, size_t image_size, int xdim, int ydim, int xshift, int yshift); template void centerFFT_3D( int batch_size, size_t pixel_start, size_t pixel_end, T *img_in, size_t image_size, int xdim, int ydim, int zdim, int xshift, int yshift, int zshift); //---------------------------------------------------------------------------- /*void probRatio( int blockIdx_x, int threadIdx_x, XFLOAT *d_Mccf, XFLOAT *d_Mpsi, XFLOAT *d_Maux, XFLOAT *d_Mmean, XFLOAT *d_Mstddev, size_t image_size, XFLOAT normfft, XFLOAT sum_ref_under_circ_mask, XFLOAT sum_ref2_under_circ_mask, XFLOAT expected_Pratio, int NpsiThisBatch, int startPsi, int totalPsis); void rotateOnly(int blockIdx_x, int blockIdx_y, int threadIdx_x, ACCCOMPLEX *d_Faux, XFLOAT psi, AccProjectorKernel &projector, void rotateAndCtf( int blockIdx_x, int blockIdx_y, int threadIdx_x, ACCCOMPLEX *d_Faux, XFLOAT *d_ctf, XFLOAT psi, AccProjectorKernel &projector, int startPsi = 0); |* * Multiplies complex array A (in-place) by B, pixel-by-pixel, after conjugating A *| void convol_A( int blockIdx_x, int threadIdx_x, ACCCOMPLEX *d_A, ACCCOMPLEX *d_B, size_t image_size); |* * Multiplies complex array A (in-place) by B, pixel-by-pixel, after conjugating A, writes to C *| void convol_A( int blockIdx_x, int threadIdx_x, ACCCOMPLEX *d_A, ACCCOMPLEX *d_B, ACCCOMPLEX *d_C, size_t image_size); |* * Multiplies many complex arrays A (in-place) by a single B, pixel-by-pixel, after conjugating A *| void batch_convol_A(int blockIdx_x, int threadIdx_x, ACCCOMPLEX *d_A, ACCCOMPLEX *d_B, size_t image_size); |* * Multiplies many complex arrays A (not in-place) by a single B, pixel-by-pixel, after conjugating A *| void batch_convol_A(int blockIdx_x, int threadIdx_x, ACCCOMPLEX *d_A, ACCCOMPLEX *d_B, ACCCOMPLEX *d_C, size_t image_size); |* * Multiplies complex array A (in-place) by B, pixel-by-pixel, after conjugating B *| void convol_B( int blockIdx_x, int threadIdx_x, ACCCOMPLEX *d_A, ACCCOMPLEX *d_B, size_t image_size); |* * Multiplies complex array A (in-place) by B, pixel-by-pixel, after conjugating B, writes to C *| void convol_B( int blockIdx_x, int threadIdx_x, ACCCOMPLEX *d_A, ACCCOMPLEX *d_B, ACCCOMPLEX *d_C, size_t image_size); |* * Multiplies many complex arrays A (in-place) by a single one B, pixel-by-pixel, after conjugating B *| void batch_convol_B(int blockIdx_x, int threadIdx_x, ACCCOMPLEX *d_A, ACCCOMPLEX *d_B, size_t image_size); |* * Multiplies scalar array A by a scalar S * * OUT[i] = A[i]*S *| template void cpu_kernel_multi( T *A, T *OUT, T S, size_t image_size); */ /* * In place multiplies scalar array A by a scalar S * * A[i] = A[i]*S */ template void cpu_kernel_multi( T *A, T S, size_t image_size); /* * Multiplies scalar array A by scalar array B and a scalar S, pixel-by-pixel * * OUT[i] = A[i]*B[i]*S */ template void cpu_kernel_multi( T *A, T *B, T *OUT, T S, size_t image_size); /* void finalizeMstddev( int blockIdx_x, int threadIdx_x, XFLOAT *Mstddev, XFLOAT *aux, XFLOAT S, size_t image_size); |* * In place squares array in place * * A[i] = A[i]*A[i] *| void square(int blockIdx_x, int threadIdx_x, XFLOAT *A, size_t image_size); */ /* * Casts on device so we can copy_to_host directly into a multidimarray. * template void cast( int blockIdx_x, int threadIdx_x, T1 *IN, T2 *OUT, size_t size) { size_t pixel = (size_t)threadIdx_x + (size_t)blockIdx_x*(size_t)BLOCK_SIZE; if(pixel #ifndef __INTEL_COMPILER __attribute__((always_inline)) inline #endif void kernel_frequencyPass( int grid_size, int block_size, ACCCOMPLEX *A, long int ori_size, size_t Xdim, size_t Ydim, size_t Zdim, XFLOAT edge_low, XFLOAT edge_width, XFLOAT edge_high, XFLOAT angpix, size_t image_size) { #ifdef DEBUG_CUDA if((size_t)grid_size*(size_t)block_size > (size_t)std::numeric_limits::max()) CHECK_INDEX_DEBUG_FATAL("kernel_frequencyPass: grid_size*(size_t)block_size > (size_t)std::numeric_limits::max()"); if (image_size < 0) CHECK_INDEX_DEBUG_FATAL("kernel_frequencyPass: image_size < 0"); #endif // TODO - why not a single loop over image_size pixels? for(int blk=0; blk lows are dead { A[texel].x = 0.; A[texel].y = 0.; } else if (res < edge_high) //highpass => medium lows are almost dead { XFLOAT mul = 0.5 - 0.5 * cos( PI * (res-edge_low)/edge_width); A[texel].x *= mul; A[texel].y *= mul; } } else //lowpass { if (res > edge_high) //lowpass => highs are dead { A[texel].x = 0.; A[texel].y = 0.; } else if (res > edge_low) //lowpass => medium highs are almost dead { XFLOAT mul = 0.5 + 0.5 * cos( PI * (res-edge_low)/edge_width); A[texel].x *= mul; A[texel].y *= mul; } } } } // tid } // blk } template #ifndef __INTEL_COMPILER __attribute__((always_inline)) inline #endif void powerClass(int gridSize, ACCCOMPLEX *g_image, XFLOAT *g_spectrum, size_t image_size, size_t spectrum_size, int xdim, int ydim, int zdim, int res_limit, XFLOAT *g_highres_Xi2) { #ifdef DEBUG_CUDA if((size_t)gridSize*(size_t)POWERCLASS_BLOCK_SIZE > (size_t)std::numeric_limits::max()) CHECK_INDEX_DEBUG_FATAL("kernel_frequencyPass: gridSize*(size_t)POWERCLASS_BLOCK_SIZE > (size_t)std::numeric_limits::max()"); if (image_size < 0) CHECK_INDEX_DEBUG_FATAL("kernel_frequencyPass: image_size < 0"); #endif for(int bid=0; bid=res_limit) s_highres_Xi2[tid] += normFaux; } } } for(int tid=1; tid void cpu_kernel_make_eulers_2D(int grid_size, int block_size, XFLOAT *alphas, XFLOAT *eulers, unsigned long orientation_num); template void cpu_kernel_make_eulers_3D(int grid_size, int block_size, XFLOAT *alphas, XFLOAT *betas, XFLOAT *gammas, XFLOAT *eulers, unsigned long orientation_num, XFLOAT *L, XFLOAT *R); } // end of namespace CpuKernels #endif /* HELPER_KERNELS_H_ */ relion-3.1.3/src/acc/cpu/cpu_kernels/wavg.h000066400000000000000000000265701411340063500205500ustar00rootroot00000000000000#ifndef WAVG_KERNEL_H_ #define WAVG_KERNEL_H_ #include #include #include #include "src/acc/cpu/cpu_settings.h" #include "src/acc/acc_projector.h" #include "src/acc/cpu/cpu_kernels/cpu_utils.h" #include "src/acc/cpu/cpu_kernels/helper.h" namespace CpuKernels { // sincos lookup table optimization. Function translatePixel calls // sincos(x*tx + y*ty). We precompute 2D lookup tables for x and y directions. // The first dimension is x or y pixel index, and the second dimension is x or y // translation index. Since sin(a+B) = sin(A) * cos(B) + cos(A) * sin(B), and // cos(A+B) = cos(A) * cos(B) - sin(A) * sin(B), we can use lookup table to // compute sin(x*tx + y*ty) and cos(x*tx + y*ty). template #ifndef __INTEL_COMPILER __attribute__((always_inline)) inline #endif void wavg_ref3D( XFLOAT * RESTRICT g_eulers, AccProjectorKernel &projector, unsigned long image_size, unsigned long orientation_num, #ifdef DEBUG_CUDA XFLOAT * RESTRICT _g_img_real, #else XFLOAT * RESTRICT g_img_real, #endif XFLOAT * RESTRICT g_img_imag, XFLOAT * RESTRICT g_trans_x, XFLOAT * RESTRICT g_trans_y, XFLOAT * RESTRICT g_trans_z, XFLOAT * RESTRICT g_weights, XFLOAT * RESTRICT g_ctfs, XFLOAT * RESTRICT g_wdiff2s_parts, XFLOAT * RESTRICT g_wdiff2s_AA, XFLOAT * RESTRICT g_wdiff2s_XA, unsigned long trans_num, XFLOAT weight_norm, XFLOAT significant_weight, XFLOAT part_scale) { #ifdef DEBUG_CUDA checkedArray g_img_real; g_img_real.initCheckedArray(_g_img_real); #endif // pre-compute sin and cos for x and y direction int xSize = projector.imgX; int ySize = projector.imgY; XFLOAT sin_x[trans_num][xSize], cos_x[trans_num][xSize]; XFLOAT sin_y[trans_num][ySize], cos_y[trans_num][ySize]; computeSincosLookupTable2D(trans_num, g_trans_x, g_trans_y, xSize, ySize, &sin_x[0][0], &cos_x[0][0], &sin_y[0][0], &cos_y[0][0]); // Set up other arrays XFLOAT ref_real[xSize], ref_imag[xSize]; XFLOAT img_real[xSize], img_imag[xSize]; XFLOAT ctfs[xSize]; XFLOAT wdiff2s_parts[xSize]; XFLOAT wdiff2s_XA [xSize]; XFLOAT wdiff2s_AA [xSize]; for(unsigned long bid=0; bid projector.maxR) { if (iy >= ySize - projector.maxR) y = iy - ySize; else { // handle special case for one pixel xstart = projector.maxR; xend = xstart + 1; } } for(int x = xstart; x < xend; x++) { img_real[x] = g_img_real[pixel + x]; } for(int x = xstart; x < xend; x++) { img_imag[x] = g_img_imag[pixel + x]; } if (REFCTF) { for(int x = xstart; x < xend; x++) ctfs[x] = g_ctfs[pixel + x]; } for(int x = xstart; x < xend; x++) { wdiff2s_parts[x] = g_wdiff2s_parts[pixel + x]; } for(int x = xstart; x < xend; x++) { wdiff2s_XA[x] = g_wdiff2s_XA[pixel + x]; } for(int x = xstart; x < xend; x++) { wdiff2s_AA[x] = g_wdiff2s_AA[pixel + x]; } #pragma omp simd for(int x = xstart; x < xend; x++) { if(REF3D) projector.project3Dmodel(x, y, e0, e1, e3, e4, e6, e7, ref_real[x], ref_imag[x]); else projector.project2Dmodel(x, y, e0, e1, e3, e4, ref_real[x], ref_imag[x]); if (REFCTF) { if(CTFPREMULTIPLIED) { ref_real[x] *= ctfs[x] * ctfs[x]; ref_imag[x] *= ctfs[x] * ctfs[x]; } else { ref_real[x] *= ctfs[x]; ref_imag[x] *= ctfs[x]; } } else { ref_real[x] *= part_scale; ref_imag[x] *= part_scale; } } for (unsigned long itrans = 0; itrans < trans_num; itrans++) { XFLOAT weight = g_weights[bid * trans_num + itrans]; if (weight < significant_weight) continue; weight *= weight_norm_inverse; XFLOAT trans_cos_y, trans_sin_y; if ( y < 0) { trans_cos_y = cos_y[itrans][-y]; trans_sin_y = -sin_y[itrans][-y]; } else { trans_cos_y = cos_y[itrans][y]; trans_sin_y = sin_y[itrans][y]; } XFLOAT *trans_cos_x = &cos_x[itrans][0]; XFLOAT *trans_sin_x = &sin_x[itrans][0]; #pragma omp simd for(int x = xstart; x < xend; x++) { XFLOAT ss = trans_sin_x[x] * trans_cos_y + trans_cos_x[x] * trans_sin_y; XFLOAT cc = trans_cos_x[x] * trans_cos_y - trans_sin_x[x] * trans_sin_y; XFLOAT trans_real = cc * img_real[x] - ss * img_imag[x]; XFLOAT trans_imag = cc * img_imag[x] + ss * img_real[x]; /* XFLOAT trans_real, trans_imag; translatePixel(x, y, g_trans_x[itrans], g_trans_y[itrans], img_real[x], img_imag[x], trans_real, trans_imag); where translatePixel is: int x, int y, XFLOAT tx, XFLOAT ty, XFLOAT &real, XFLOAT &imag, XFLOAT &tReal, XFLOAT &tImag sincosf( x * tx + y * ty , &s, &c ); tReal = c * real - s * imag; tImag = c * imag + s * real; */ XFLOAT diff_real = ref_real[x] - trans_real; XFLOAT diff_imag = ref_imag[x] - trans_imag; wdiff2s_parts[x] += weight * (diff_real * diff_real + diff_imag * diff_imag); wdiff2s_XA [x] += weight * (ref_real[x] * trans_real + ref_imag[x] * trans_imag); wdiff2s_AA [x] += weight * (ref_real[x] * ref_real[x] + ref_imag[x] * ref_imag[x] ); } } // for itrans // Update the globals once for(int x = xstart; x < xend; x++) { g_wdiff2s_parts[pixel + x] = wdiff2s_parts[x]; } for(int x = xstart; x < xend; x++) { g_wdiff2s_XA [pixel + x] = wdiff2s_XA[x]; } for(int x = xstart; x < xend; x++) { g_wdiff2s_AA [pixel + x] = wdiff2s_AA[x]; } pixel += (unsigned long)xSize; } // y direction } // bid } template #ifndef __INTEL_COMPILER __attribute__((always_inline)) inline #endif void wavg_3D( XFLOAT * RESTRICT g_eulers, AccProjectorKernel &projector, unsigned long image_size, unsigned long orientation_num, #ifdef DEBUG_CUDA XFLOAT * RESTRICT _g_img_real, #else XFLOAT * RESTRICT g_img_real, #endif XFLOAT * RESTRICT g_img_imag, XFLOAT * RESTRICT g_trans_x, XFLOAT * RESTRICT g_trans_y, XFLOAT * RESTRICT g_trans_z, XFLOAT * RESTRICT g_weights, XFLOAT * RESTRICT g_ctfs, XFLOAT * RESTRICT g_wdiff2s_parts, XFLOAT * RESTRICT g_wdiff2s_AA, XFLOAT * RESTRICT g_wdiff2s_XA, unsigned long trans_num, XFLOAT weight_norm, XFLOAT significant_weight, XFLOAT part_scale) { #ifdef DEBUG_CUDA checkedArray g_img_real; g_img_real.initCheckedArray(_g_img_real); #endif // pre-compute sin and cos for x and y direction int xSize = projector.imgX; int ySize = projector.imgY; int zSize = projector.imgZ; XFLOAT sin_x[trans_num][xSize], cos_x[trans_num][xSize]; XFLOAT sin_y[trans_num][ySize], cos_y[trans_num][ySize]; XFLOAT sin_z[trans_num][zSize], cos_z[trans_num][zSize]; computeSincosLookupTable3D(trans_num, g_trans_x, g_trans_y, g_trans_z, xSize, ySize, zSize, &sin_x[0][0], &cos_x[0][0], &sin_y[0][0], &cos_y[0][0], &sin_z[0][0], &cos_z[0][0]); // Set up other arrays XFLOAT ref_real[xSize], ref_imag[xSize]; XFLOAT img_real[xSize], img_imag[xSize]; for(unsigned long bid=0; bid projector.maxR) { if (z >= zSize - projector.maxR) z = z - zSize; else { xstart_z = projector.maxR; xend_z = xstart_z + 1; } } for(int iy = 0; iy < ySize; iy++) { int xstart_y = xstart_z, xend_y = xend_z; int y = iy; if (iy > projector.maxR) { if (iy >= ySize - projector.maxR) y = iy - ySize; else { xstart_y = projector.maxR; xend_y = xstart_y + 1; } } #pragma omp simd for(int x = xstart_y; x < xend_y; x++) { projector.project3Dmodel(x, y, z, e0, e1, e2, e3, e4, e5, e6, e7, e8, ref_real[x], ref_imag[x]); if (REFCTF) { if(CTFPREMULTIPLIED) { ref_real[x] *= g_ctfs[pixel + x] * g_ctfs[pixel + x]; ref_imag[x] *= g_ctfs[pixel + x] * g_ctfs[pixel + x]; } else { ref_real[x] *= g_ctfs[pixel + x]; ref_imag[x] *= g_ctfs[pixel + x]; } } else { ref_real[x] *= part_scale; ref_imag[x] *= part_scale; } img_real[x] = g_img_real[pixel + x]; img_imag[x] = g_img_imag[pixel + x]; } for (unsigned long itrans = 0; itrans < trans_num; itrans++) { XFLOAT weight = g_weights[bid * trans_num + itrans]; if (weight < significant_weight) continue; weight *= weight_norm_inverse; XFLOAT trans_cos_z, trans_sin_z; if ( z < 0) { trans_cos_z = cos_z[itrans][-z]; trans_sin_z = -sin_z[itrans][-z]; } else { trans_cos_z = cos_z[itrans][z]; trans_sin_z = sin_z[itrans][z]; } XFLOAT trans_cos_y, trans_sin_y; if ( y < 0) { trans_cos_y = cos_y[itrans][-y]; trans_sin_y = -sin_y[itrans][-y]; } else { trans_cos_y = cos_y[itrans][y]; trans_sin_y = sin_y[itrans][y]; } XFLOAT *trans_cos_x = &cos_x[itrans][0]; XFLOAT *trans_sin_x = &sin_x[itrans][0]; for(int x = xstart_y; x < xend_y; x++) { // TODO check the math XFLOAT s = trans_sin_x[x] * trans_cos_y + trans_cos_x[x] * trans_sin_y; XFLOAT c = trans_cos_x[x] * trans_cos_y - trans_sin_x[x] * trans_sin_y; XFLOAT ss = s * trans_cos_z + c * trans_sin_z; XFLOAT cc = c * trans_cos_z - s * trans_sin_z; XFLOAT trans_real = cc * img_real[x] - ss * img_imag[x]; XFLOAT trans_imag = cc * img_imag[x] + ss * img_real[x]; /* XFLOAT trans_real, trans_imag; translatePixel(x, y, g_trans_x[itrans], g_trans_y[itrans], img_real[x], img_imag[x], trans_real, trans_imag); where translatePixel is: sincosf( x * tx + y * ty , &s, &c ); tReal = c * real - s * imag; tImag = c * imag + s * real; */ XFLOAT diff_real = ref_real[x] - trans_real; XFLOAT diff_imag = ref_imag[x] - trans_imag; g_wdiff2s_parts[pixel + x] += weight * (diff_real * diff_real + diff_imag * diff_imag); g_wdiff2s_XA [pixel + x] += weight * (ref_real[x] * trans_real + ref_imag[x] * trans_imag); g_wdiff2s_AA [pixel + x] += weight * (ref_real[x] * ref_real[x] + ref_imag[x] * ref_imag[x] ); } } // for itrans pixel += (unsigned long)xSize; } // y direction } // z direction } // bid } } // end of namespace CpuKernels #endif /* WAVG_KERNEL_H_ */ relion-3.1.3/src/acc/cpu/cpu_ml_optimiser.cpp000066400000000000000000000135421411340063500211720ustar00rootroot00000000000000// A large amount of this code is direct from cuda_ml_optimizer and so could // be shared (but possibly with difficulty since it is enough different that // we either need a lot of #ifdefs, or a lot of macros/some other mechanism to // abstract the differences). The biggest differences are the type of memory // objects used (std::vector vs. CudaGlobalPtr and CudaCustomAllocator), the // lack of transfers to/from the device, and on-device operations (which are // replaced by loops/function calls). // // CudaFFT has been replaced with lib FFTW, if RELION is configured with mix // precision, both single and double precision FFTW are linked into RELION. // Install fftw-static.x86_64 and fftw-static.i686 to get the libraries without // having to pull them at build time. Over time we hope to replace FFTW with // MKL. // // All Cuda kernels in gpu_utils and gpu_utils/cuda_kernels have been converted // to C functions // // Hot spot loops in the converted C functions have been vectorized with ICC // auto-vectorization with or without #pragma. Loop layout has been modified // to get the best performance on CPU. // // NOTE: Since the GPU code was ported back to CPU there may be additional // changes made in the CUDA code which may not have made it here. #ifdef ALTCPU // Make sure we build for CPU #include "src/acc/cpu/cuda_stubs.h" #include "src/ml_optimiser.h" #include #include #include #include #include #include #include #include "src/acc/acc_ptr.h" #include "src/acc/acc_projector.h" #include "src/acc/acc_projector_plan.h" #include "src/acc/cpu/cpu_benchmark_utils.h" #include "src/acc/cpu/cpu_helper_functions.h" #include "src/acc/cpu/cpu_kernels/helper.h" #include "src/acc/cpu/cpu_kernels/diff2.h" #include "src/acc/cpu/cpu_kernels/wavg.h" #include "src/acc/cpu/cpu_kernels/BP.h" #include "src/acc/cpu/mkl_fft.h" #include "src/acc/data_types.h" #include "src/complex.h" #include "src/helix.h" #include #include "src/parallel.h" #include #include #include #include #include "src/acc/utilities.h" #include "src/acc/utilities_impl.h" #include "src/acc/acc_ml_optimiser.h" #include "src/acc/cpu/cpu_ml_optimiser.h" #include "src/acc/acc_helper_functions.h" #include "src/acc/acc_ml_optimiser_impl.h" #include tbb::spin_mutex mkl_mutex; void MlDataBundle::setup(MlOptimiser *baseMLO) { /*====================================================== PROJECTOR AND BACKPROJECTOR ======================================================*/ unsigned nr_proj = baseMLO->mymodel.PPref.size(); unsigned nr_bproj = baseMLO->wsum_model.BPref.size(); projectors.resize(nr_proj); backprojectors.resize(nr_bproj); //Loop over classes for (int imodel = 0; imodel < nr_proj; imodel++) { projectors[imodel].setMdlDim( baseMLO->mymodel.PPref[imodel].data.xdim, baseMLO->mymodel.PPref[imodel].data.ydim, baseMLO->mymodel.PPref[imodel].data.zdim, baseMLO->mymodel.PPref[imodel].data.yinit, baseMLO->mymodel.PPref[imodel].data.zinit, baseMLO->mymodel.PPref[imodel].r_max, baseMLO->mymodel.PPref[imodel].padding_factor); projectors[imodel].initMdl(baseMLO->mdlClassComplex[imodel]); } for (int imodel = 0; imodel < nr_bproj; imodel++) { backprojectors[imodel].setMdlDim( baseMLO->wsum_model.BPref[imodel].data.xdim, baseMLO->wsum_model.BPref[imodel].data.ydim, baseMLO->wsum_model.BPref[imodel].data.zdim, baseMLO->wsum_model.BPref[imodel].data.yinit, baseMLO->wsum_model.BPref[imodel].data.zinit, baseMLO->wsum_model.BPref[imodel].r_max, baseMLO->wsum_model.BPref[imodel].padding_factor); backprojectors[imodel].initMdl(); } /*====================================================== PROJECTION PLAN ======================================================*/ unsigned nr_classes = baseMLO->mymodel.nr_classes; coarseProjectionPlans.resize(nr_classes); //Can we pre-generate projector plan and corresponding euler matrices for all particles if (!baseMLO->do_skip_align && !baseMLO->do_skip_rotate && !baseMLO->do_auto_refine && baseMLO->mymodel.orientational_prior_mode == NOPRIOR) for (int iclass = 0; iclass < nr_classes; iclass++) { //If doing predefined projector plan at all and is this class significant if (baseMLO->mymodel.pdf_class[iclass] > 0.) { std::vector exp_pointer_dir_nonzeroprior; std::vector exp_pointer_psi_nonzeroprior; std::vector exp_directions_prior; std::vector exp_psi_prior; long unsigned itrans_max = baseMLO->sampling.NrTranslationalSamplings() - 1; long unsigned nr_idir = baseMLO->sampling.NrDirections(0, &exp_pointer_dir_nonzeroprior); long unsigned nr_ipsi = baseMLO->sampling.NrPsiSamplings(0, &exp_pointer_psi_nonzeroprior ); coarseProjectionPlans[iclass].setup( baseMLO->sampling, exp_directions_prior, exp_psi_prior, exp_pointer_dir_nonzeroprior, exp_pointer_psi_nonzeroprior, NULL, //Mcoarse_significant baseMLO->mymodel.pdf_class, baseMLO->mymodel.pdf_direction, nr_idir, nr_ipsi, 0, //idir_min nr_idir - 1, //idir_max 0, //ipsi_min nr_ipsi - 1, //ipsi_max 0, //itrans_min itrans_max, 0, //current_oversampling 1, //nr_oversampled_rot iclass, true, //coarse !IS_NOT_INV, baseMLO->do_skip_align, baseMLO->do_skip_rotate, baseMLO->mymodel.orientational_prior_mode ); } } }; void MlOptimiserCpu::resetData() { transformer1.clear(); transformer2.clear(); classStreams.resize(baseMLO->mymodel.nr_classes, 0); }; void MlOptimiserCpu::expectationOneParticle(unsigned long my_part_id, int thread_id) { AccPtrFactory ptrFactory(AccType::accCPU); accDoExpectationOneParticle(this, my_part_id, thread_id, ptrFactory); }; #endif // ALTCPU relion-3.1.3/src/acc/cpu/cpu_ml_optimiser.h000066400000000000000000000074571411340063500206470ustar00rootroot00000000000000// For the Alternate CPU version, this is essentially a copy of // cuda_ml_optimiser.h. What is different is that device bundles are not // needed, both as a separate class and referenced in MlOptimiserCpu, // which has a few different data members and methods from MlOptimiserCuda to // support the different implementation // Note the the CPU implementation defines the floating point precision used // for XFLOAT using ACC_DOUBLE_PRECISION (ACC_DOUBLE_PRECISION is also used // for the equivalent purpose throughout the code) #ifndef CPU_ML_OPTIMISER_H_ #define CPU_ML_OPTIMISER_H_ #include "src/mpi.h" #include "src/ml_optimiser.h" #include "src/acc/acc_projector_plan.h" #include "src/acc/acc_projector.h" #include "src/acc/acc_backprojector.h" #include "src/acc/cpu/mkl_fft.h" #include "src/acc/cpu/cpu_benchmark_utils.h" #include #include "src/acc/acc_ml_optimiser.h" #include "src/acc/acc_ptr.h" class MlDataBundle { public: std::vector< AccProjector > projectors; std::vector< AccBackprojector > backprojectors; std::vector< AccProjectorPlan > coarseProjectionPlans; void setup(MlOptimiser *baseMLO); ~MlDataBundle() { projectors.clear(); backprojectors.clear(); } }; class MlOptimiserCpu { public: // transformer as holder for reuse of fftw_plans FourierTransformer transformer; MklFFT transformer1; MklFFT transformer2; MlOptimiser *baseMLO; bool refIs3D; bool dataIs3D; int thread_id; MlDataBundle *bundle; std::vector< int > classStreams; #ifdef TIMING_FILES relion_timer timer; #endif //Used for precalculations of projection setup bool generateProjectionPlanOnTheFly; MlOptimiserCpu(MlOptimiser *baseMLOptimiser, MlDataBundle *b, const char * timing_fnm) : baseMLO(baseMLOptimiser), transformer1(baseMLOptimiser->mymodel.data_dim), transformer2(baseMLOptimiser->mymodel.data_dim), refIs3D(baseMLO->mymodel.ref_dim == 3), dataIs3D(baseMLO->mymodel.data_dim == 3), #ifdef TIMING_FILES timer(timing_fnm), #endif generateProjectionPlanOnTheFly(false), thread_id(-1), bundle(b), classStreams(0) { //Can we pre-generate projector plan and corresponding euler matrices for all particles if (baseMLO->do_skip_align || baseMLO->do_skip_rotate || baseMLO->do_auto_refine || baseMLO->mymodel.orientational_prior_mode != NOPRIOR) generateProjectionPlanOnTheFly = true; else generateProjectionPlanOnTheFly = false; }; void resetData(); void expectationOneParticle(unsigned long my_ori_particle, int thread_id); CudaCustomAllocator *getAllocator() { return ((CudaCustomAllocator *)0); }; ~MlOptimiserCpu() {} }; /* class ApplyFoo { float *const my_a; public: void operator()( const blocked_range& r ) const { float *a = my_a; for( size_t i=r.begin(); i!=r.end(); ++i ) Foo(a[i]); } ApplyFoo( float a[] ) : my_a(a) {} }; // Called as follows: // tbb::parallel_for(tbb::blocked_range(my_first_ori_particle, my_last_ori_particle+1), // cpuThreadExpectationSomeParticles(this)); class cpuThreadExpectationSomeParticles { MlOptimiser *const my_optimiser; public: void operator()( const tbb::blocked_range& r ) const { MlOptimiser *mloptimiser = my_optimiser; MlOptimiser::CpuOptimiserType::reference ref = mloptimiser->tbbCpuOptimiser.local(); MlOptimiserCpu *cpuOptimiser = (MlOptimiserCpu *)ref; if(cpuOptimiser == NULL) { cpuOptimiser = new MlOptimiserCpu(mloptimiser, "cpu_optimiser"); cpuOptimiser->resetData(); cpuOptimiser->setupFixedSizedObjects(); cpuOptimiser->setupTunableSizedObjects(); ref = cpuOptimiser; } for( size_t i=r.begin(); i!=r.end(); ++i ) { cpuOptimiser->expectationOneParticle(i); } } cpuThreadExpectationSomeParticles( MlOptimiser *optimiser ) : my_optimiser(optimiser) {} }; */ #endif relion-3.1.3/src/acc/cpu/cpu_projector.cpp000066400000000000000000000012561411340063500204750ustar00rootroot00000000000000#include #include #include "src/acc/cpu/cuda_stubs.h" #include "src/acc/acc_ptr.h" #include "src/acc/acc_projector.h" #include "src/acc/acc_backprojector.h" #include "src/acc/acc_projector_plan.h" #include "src/acc/cpu/cpu_benchmark_utils.h" #include "src/acc/cpu/cpu_helper_functions.h" #include "src/acc/cpu/cpu_kernels/helper.h" #include "src/acc/cpu/cpu_kernels/diff2.h" #include "src/acc/cpu/cpu_kernels/wavg.h" #include "src/acc/cpu/cpu_kernels/BP.h" #include "src/acc/utilities.h" #include "src/acc/data_types.h" #include "src/acc/acc_helper_functions.h" #include "src/acc/cpu/cpu_settings.h" #include #include "src/acc/acc_projector_impl.h" relion-3.1.3/src/acc/cpu/cpu_projector_plan.cpp000066400000000000000000000011271411340063500215040ustar00rootroot00000000000000#ifdef ALTCPU // Make sure we build for CPU #include "src/acc/cpu/cuda_stubs.h" #include "src/acc/settings.h" #include "src/time.h" #include "src/ml_optimiser.h" #include "src/acc/acc_ptr.h" #include "src/acc/acc_projector.h" #include "src/acc/acc_backprojector.h" #include "src/acc/cpu/cpu_helper_functions.h" #include "src/acc/cpu/cpu_kernels/helper.h" #include "src/acc/cpu/cpu_kernels/diff2.h" #include "src/acc/cpu/cpu_kernels/wavg.h" #include "src/acc/data_types.h" #include "src/acc/utilities.h" #include "src/acc/acc_projector_plan.h" #include "src/acc/acc_projector_plan_impl.h" #endifrelion-3.1.3/src/acc/cpu/cpu_settings.h000066400000000000000000000040301411340063500177640ustar00rootroot00000000000000#ifndef CPU_SETTINGS_H_ #define CPU_SETTINGS_H_ #include "src/acc/settings.h" // GENERAL ----------------------------- #define MAX_RESOL_SHARED_MEM 32 #define BLOCK_SIZE 128 // ------------------------------------- // COARSE DIFF ------------------------- #define D2C_BLOCK_SIZE_2D 256 #define D2C_EULERS_PER_BLOCK_2D 16 #define D2C_BLOCK_SIZE_REF3D 256 #define D2C_EULERS_PER_BLOCK_REF3D 16 #define D2C_BLOCK_SIZE_DATA3D 64 #define D2C_EULERS_PER_BLOCK_DATA3D 32 // ------------------------------------- // FINE DIFF --------------------------- #define D2F_BLOCK_SIZE_2D 8 #define D2F_CHUNK_2D 7 #define D2F_BLOCK_SIZE_REF3D 8 #define D2F_CHUNK_REF3D 7 #define D2F_BLOCK_SIZE_DATA3D 8 #define D2F_CHUNK_DATA3D 4 // ------------------------------------- // WAVG -------------------------------- #define WAVG_BLOCK_SIZE_DATA3D 512 #define WAVG_BLOCK_SIZE 256 // ------------------------------------- // MISC -------------------------------- #define SUMW_BLOCK_SIZE 32 #define SOFTMASK_BLOCK_SIZE 128 #define CFTT_BLOCK_SIZE 128 #define PROBRATIO_BLOCK_SIZE 128 #define POWERCLASS_BLOCK_SIZE 128 #define PROJDIFF_CHUNK_SIZE 14 // ------------------------------------- // RANDOMIZATION ----------------------- #define RND_BLOCK_NUM 64 #define RND_BLOCK_SIZE 32 // ------------------------------------- #define BACKPROJECTION4_BLOCK_SIZE 64 #define BACKPROJECTION4_GROUP_SIZE 16 #define BACKPROJECTION4_PREFETCH_COUNT 3 #define BP_2D_BLOCK_SIZE 128 #define BP_REF3D_BLOCK_SIZE 128 #define BP_DATA3D_BLOCK_SIZE 640 #define REF_GROUP_SIZE 3 // -- Number of references to be treated per block -- // This applies to wavg and reduces global memory // accesses roughly proportionally, but scales shared // memory usage by allocating // ( 6*REF_GROUP_SIZE + 4 ) * BLOCK_SIZE XFLOATS. // DEPRECATED #ifdef __INTEL_COMPILER # include # define RESTRICT __restrict__ #else # define RESTRICT #endif #endif /* CPU_SETTINGS_H_ */ relion-3.1.3/src/acc/cpu/cuda_stubs.h000066400000000000000000000005641411340063500174210ustar00rootroot00000000000000#ifndef CUDA_STUBS_H #define CUDA_STUBS_H #undef CUDA typedef float cudaStream_t; typedef double CudaCustomAllocator; typedef int dim3; #define cudaStreamPerThread 0 #define CUSTOM_ALLOCATOR_REGION_NAME( name ) //Do nothing #define LAUNCH_PRIVATE_ERROR(func, status) #define LAUNCH_HANDLE_ERROR( err ) #define DEBUG_HANDLE_ERROR( err ) #define HANDLE_ERROR( err ) #endifrelion-3.1.3/src/acc/cpu/mkl_fft.h000066400000000000000000000220101411340063500166750ustar00rootroot00000000000000#ifndef MKL_FFT_H_ #define MKL_FFT_H_ #include #include /* #include #ifdef DEBUG_MKL #define HANDLE_MKL_ERROR( err ) (MKLHandleError( err, __FILE__, __LINE__ )) #else #define HANDLE_MKL_ERROR( err ) (err) //Do nothing #endif static void MKLHandleError(MKL_LONG err, const char *file, int line ) { if (err != 0) { fprintf(stderr, "MKL error in file '%s' in line %i : %s.\n", __FILE__, __LINE__, "error" ); } } class MklFFT { public: std::vector reals; std::vector fouriers; int direction; int dimension; size_t xSize,ySize,zSize,xFSize,yFSize,zFSize; DFTI_DESCRIPTOR_HANDLE handle; MklFFT(int transformDimension = 2): direction(0), dimension((int)transformDimension), xSize(0), ySize(0), zSize(0), xFSize(0), yFSize(0), zFSize(0), handle(0) {}; void setSize(size_t x, size_t y, size_t z, int setDirection = 0) { int checkDim; if(z>1) checkDim=3; else if(y>1) checkDim=2; else checkDim=1; if(checkDim != dimension) REPORT_ERROR("You are trying to change the dimesion of a MklFFT transformer, which is not allowed"); if( !( (setDirection==-1)||(setDirection==0)||(setDirection==1) ) ) { std::cerr << "*ERROR : Setting a cuda transformer direction to non-defined value" << std::endl; return; } direction = setDirection; clear(); xSize = x; ySize = y; zSize = z; xFSize = x/2 + 1; yFSize = y; zFSize = z; reals.resize(xSize * ySize * zSize); fouriers.resize(xFSize * yFSize * zFSize); MKL_LONG N[3]; if(dimension == 1) N[0] = xSize; else if(dimension == 2){ N[0] = ySize; N[1] = xSize; } else { N[0] = zSize; N[1] = ySize; N[2] = xSize; } #ifdef RELION_SINGLE_PRECISION HANDLE_MKL_ERROR(DftiCreateDescriptor(&handle, DFTI_SINGLE, DFTI_REAL, dimension, N)); #else HANDLE_MKL_ERROR(DftiCreateDescriptor(&handle, DFTI_SINGLE, DFTI_DOUBLE, dimension, N)); #endif HANDLE_MKL_ERROR(DftiSetValue(handle, DFTI_PLACEMENT, DFTI_NOT_INPLACE)); HANDLE_MKL_ERROR(DftiSetValue(handle, DFTI_CONJUGATE_EVEN_STORAGE, DFTI_COMPLEX_COMPLEX)); } void forward() { if(direction==1) { std::cout << "trying to execute a forward plan for a MKL FFT transformer which is backwards-only" << std::endl; return; } if(dimension == 2) { MKL_LONG rs[3]; rs[0] = 0; rs[1] = xSize; rs[2] = 1; HANDLE_MKL_ERROR(DftiSetValue(handle, DFTI_INPUT_STRIDES, rs)); MKL_LONG cs[3]; cs[0] = 0; cs[1] = xFSize; cs[2] = 1; HANDLE_MKL_ERROR(DftiSetValue(handle, DFTI_OUTPUT_STRIDES, cs)); } else if(dimension == 3) { MKL_LONG rs[4]; rs[0] = 0; rs[1] = xSize * ySize; rs[2] = xSize; rs[3] = 1; HANDLE_MKL_ERROR(DftiSetValue(handle, DFTI_INPUT_STRIDES, rs)); MKL_LONG cs[4]; cs[0] = 0; cs[1] = xFSize * ySize; cs[2] = xFSize; cs[3] = 1; HANDLE_MKL_ERROR(DftiSetValue(handle, DFTI_OUTPUT_STRIDES, cs)); } HANDLE_MKL_ERROR(DftiCommitDescriptor(handle)); HANDLE_MKL_ERROR(DftiComputeForward(handle, &reals[0], &fouriers[0])); } void backward() { if(direction==-1) { std::cout << "trying to execute a backwards plan for a MKL FFT transformer which is forwards-only" << std::endl; return; } if(dimension == 2) { MKL_LONG rs[3]; rs[0] = 0; rs[1] = xSize; rs[2] = 1; HANDLE_MKL_ERROR(DftiSetValue(handle, DFTI_OUTPUT_STRIDES, rs)); MKL_LONG cs[3]; cs[0] = 0; cs[1] = xFSize; cs[2] = 1; HANDLE_MKL_ERROR(DftiSetValue(handle, DFTI_INPUT_STRIDES, cs)); } else if(dimension == 3) { MKL_LONG rs[4]; rs[0] = 0; rs[1] = xSize * ySize; rs[2] = xSize; rs[3] = 1; HANDLE_MKL_ERROR(DftiSetValue(handle, DFTI_OUTPUT_STRIDES, rs)); MKL_LONG cs[4]; cs[0] = 0; cs[1] = xFSize * ySize; cs[2] = xFSize; cs[3] = 1; HANDLE_MKL_ERROR(DftiSetValue(handle, DFTI_INPUT_STRIDES, cs)); } HANDLE_MKL_ERROR(DftiCommitDescriptor(handle)); HANDLE_MKL_ERROR(DftiComputeBackward(handle, &fouriers[0], &reals[0])); } void clear() { HANDLE_MKL_ERROR(DftiFreeDescriptor(&handle)); reals.clear(); fouriers.clear(); } ~MklFFT() { clear(); } }; */ #include #include extern tbb::spin_mutex mkl_mutex; class MklFFT { bool planSet; public: AccPtr reals; AccPtr fouriers; int direction; int dimension; size_t xSize,ySize,zSize,xFSize,yFSize,zFSize; #ifdef ACC_DOUBLE_PRECISION /* fftw Forward plan */ fftw_plan fPlanForward; /* fftw Backward plan */ fftw_plan fPlanBackward; #else /* fftw Forward plan */ fftwf_plan fPlanForward; /* fftw Backward plan */ fftwf_plan fPlanBackward; #endif MklFFT(int transformDimension = 2): direction(0), dimension((int)transformDimension), planSet(false), xSize(0), ySize(0), zSize(0) { fPlanForward = fPlanBackward = NULL; }; void setSize(size_t x, size_t y, size_t z, int setDirection = 0) { /* Optional direction input restricts transformer to * forwards or backwards tranformation only, * which reduces memory requirements, especially * for large batches of simulatanous transforms. * * FFTW_FORWARDS === -1 * FFTW_BACKWARDS === +1 * * The default direction is 0 === forwards AND backwards */ int checkDim; if(z>1) checkDim=3; else if(y>1) checkDim=2; else checkDim=1; if(checkDim != dimension) REPORT_ERROR("You are trying to change the dimesion of a MklFFT transformer, which is not allowed"); if( !( (setDirection==-1)||(setDirection==0)||(setDirection==1) ) ) { std::cerr << "*ERROR : Setting a MklFFT transformer direction to non-defined value" << std::endl; return; } direction = setDirection; if( x == xSize && y == ySize && z == zSize && planSet) return; clear(); xSize = x; ySize = y; zSize = z; xFSize = x/2 + 1; yFSize = y; zFSize = z; if ((xSize * ySize * zSize)==0) ACC_PTR_DEBUG_FATAL("Reals array resized to size zero.\n"); // reals.resizeHostCopy(xSize * ySize * zSize); reals.freeHostIfSet(); reals.setSize(xSize * ySize * zSize); reals.hostAlloc(); if ((xFSize * yFSize * zFSize)==0) ACC_PTR_DEBUG_FATAL("Fouriers array resized to size zero.\n"); // fouriers.resizeHostCopy(xFSize * yFSize * zFSize); fouriers.freeHostIfSet(); fouriers.setSize(xFSize * yFSize * zFSize); fouriers.hostAlloc(); int N[3]; if(dimension == 1) N[0] = xSize; else if(dimension == 2){ N[0] = ySize; N[1] = xSize; } else { N[0] = zSize; N[1] = ySize; N[2] = xSize; } { tbb::spin_mutex::scoped_lock lock(mkl_mutex); #ifdef ACC_DOUBLE_PRECISION fPlanForward = fftw_plan_dft_r2c(dimension, N, reals(), (fftw_complex*) fouriers(), FFTW_ESTIMATE); fPlanBackward = fftw_plan_dft_c2r(dimension, N, (fftw_complex*) fouriers(), reals(), FFTW_ESTIMATE); #else fPlanForward = fftwf_plan_dft_r2c(dimension, N, reals(), (fftwf_complex*) fouriers(), FFTW_ESTIMATE); fPlanBackward = fftwf_plan_dft_c2r(dimension, N, (fftwf_complex*) fouriers(), reals(), FFTW_ESTIMATE); #endif planSet = true; } } void forward() { if(direction==1) { std::cout << "trying to execute a forward plan for a MKL FFT transformer which is backwards-only" << std::endl; return; } #ifdef ACC_DOUBLE_PRECISION fftw_execute_dft_r2c(fPlanForward, reals(), (fftw_complex*) fouriers()); #else fftwf_execute_dft_r2c(fPlanForward, reals(), (fftwf_complex*) fouriers()); #endif } void backward() { if(direction==-1) { std::cout << "trying to execute a backwards plan for a MKL FFT transformer which is forwards-only" << std::endl; return; } #ifdef ACC_DOUBLE_PRECISION fftw_execute_dft_c2r(fPlanBackward, (fftw_complex*) fouriers(), reals()); #else fftwf_execute_dft_c2r(fPlanBackward, (fftwf_complex*) fouriers(), reals()); #endif } void clear() { reals.freeIfSet(); fouriers.freeIfSet(); if (planSet) { tbb::spin_mutex::scoped_lock lock(mkl_mutex); #ifdef ACC_DOUBLE_PRECISION fftw_destroy_plan(fPlanForward); fftw_destroy_plan(fPlanBackward); #else fftwf_destroy_plan(fPlanForward); fftwf_destroy_plan(fPlanBackward); #endif fPlanForward = fPlanBackward = NULL; planSet = false; } } ~MklFFT() { clear(); } }; #endif relion-3.1.3/src/acc/cuda/000077500000000000000000000000001411340063500152345ustar00rootroot00000000000000relion-3.1.3/src/acc/cuda/cub/000077500000000000000000000000001411340063500160055ustar00rootroot00000000000000relion-3.1.3/src/acc/cuda/cub/agent/000077500000000000000000000000001411340063500171035ustar00rootroot00000000000000relion-3.1.3/src/acc/cuda/cub/agent/agent_histogram.cuh000066400000000000000000001010631411340063500227600ustar00rootroot00000000000000/****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * cub::AgentHistogram implements a stateful abstraction of CUDA thread blocks for participating in device-wide histogram . */ #pragma once #include #include "../util_type.cuh" #include "../block/block_load.cuh" #include "../grid/grid_queue.cuh" #include "../iterator/cache_modified_input_iterator.cuh" #include "../util_namespace.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /****************************************************************************** * Tuning policy ******************************************************************************/ /** * */ enum BlockHistogramMemoryPreference { GMEM, SMEM, BLEND }; /** * Parameterizable tuning policy type for AgentHistogram */ template < int _BLOCK_THREADS, ///< Threads per thread block int _PIXELS_PER_THREAD, ///< Pixels per thread (per tile of input) BlockLoadAlgorithm _LOAD_ALGORITHM, ///< The BlockLoad algorithm to use CacheLoadModifier _LOAD_MODIFIER, ///< Cache load modifier for reading input elements bool _RLE_COMPRESS, ///< Whether to perform localized RLE to compress samples before histogramming BlockHistogramMemoryPreference _MEM_PREFERENCE, ///< Whether to prefer privatized shared-memory bins (versus privatized global-memory bins) bool _WORK_STEALING> ///< Whether to dequeue tiles from a global work queue struct AgentHistogramPolicy { enum { BLOCK_THREADS = _BLOCK_THREADS, ///< Threads per thread block PIXELS_PER_THREAD = _PIXELS_PER_THREAD, ///< Pixels per thread (per tile of input) IS_RLE_COMPRESS = _RLE_COMPRESS, ///< Whether to perform localized RLE to compress samples before histogramming MEM_PREFERENCE = _MEM_PREFERENCE, ///< Whether to prefer privatized shared-memory bins (versus privatized global-memory bins) IS_WORK_STEALING = _WORK_STEALING, ///< Whether to dequeue tiles from a global work queue }; static const BlockLoadAlgorithm LOAD_ALGORITHM = _LOAD_ALGORITHM; ///< The BlockLoad algorithm to use static const CacheLoadModifier LOAD_MODIFIER = _LOAD_MODIFIER; ///< Cache load modifier for reading input elements }; /****************************************************************************** * Thread block abstractions ******************************************************************************/ /** * \brief AgentHistogram implements a stateful abstraction of CUDA thread blocks for participating in device-wide histogram . */ template < typename AgentHistogramPolicyT, ///< Parameterized AgentHistogramPolicy tuning policy type int PRIVATIZED_SMEM_BINS, ///< Number of privatized shared-memory histogram bins of any channel. Zero indicates privatized counters to be maintained in device-accessible memory. int NUM_CHANNELS, ///< Number of channels interleaved in the input data. Supports up to four channels. int NUM_ACTIVE_CHANNELS, ///< Number of channels actively being histogrammed typename SampleIteratorT, ///< Random-access input iterator type for reading samples typename CounterT, ///< Integer type for counting sample occurrences per histogram bin typename PrivatizedDecodeOpT, ///< The transform operator type for determining privatized counter indices from samples, one for each channel typename OutputDecodeOpT, ///< The transform operator type for determining output bin-ids from privatized counter indices, one for each channel typename OffsetT, ///< Signed integer type for global offsets int PTX_ARCH = CUB_PTX_ARCH> ///< PTX compute capability struct AgentHistogram { //--------------------------------------------------------------------- // Types and constants //--------------------------------------------------------------------- /// The sample type of the input iterator typedef typename std::iterator_traits::value_type SampleT; /// The pixel type of SampleT typedef typename CubVector::Type PixelT; /// The quad type of SampleT typedef typename CubVector::Type QuadT; /// Constants enum { BLOCK_THREADS = AgentHistogramPolicyT::BLOCK_THREADS, PIXELS_PER_THREAD = AgentHistogramPolicyT::PIXELS_PER_THREAD, SAMPLES_PER_THREAD = PIXELS_PER_THREAD * NUM_CHANNELS, QUADS_PER_THREAD = SAMPLES_PER_THREAD / 4, TILE_PIXELS = PIXELS_PER_THREAD * BLOCK_THREADS, TILE_SAMPLES = SAMPLES_PER_THREAD * BLOCK_THREADS, IS_RLE_COMPRESS = AgentHistogramPolicyT::IS_RLE_COMPRESS, MEM_PREFERENCE = (PRIVATIZED_SMEM_BINS > 0) ? AgentHistogramPolicyT::MEM_PREFERENCE : GMEM, IS_WORK_STEALING = AgentHistogramPolicyT::IS_WORK_STEALING, }; /// Cache load modifier for reading input elements static const CacheLoadModifier LOAD_MODIFIER = AgentHistogramPolicyT::LOAD_MODIFIER; /// Input iterator wrapper type (for applying cache modifier) typedef typename If::VALUE, CacheModifiedInputIterator, // Wrap the native input pointer with CacheModifiedInputIterator SampleIteratorT>::Type // Directly use the supplied input iterator type WrappedSampleIteratorT; /// Pixel input iterator type (for applying cache modifier) typedef CacheModifiedInputIterator WrappedPixelIteratorT; /// Qaud input iterator type (for applying cache modifier) typedef CacheModifiedInputIterator WrappedQuadIteratorT; /// Parameterized BlockLoad type for samples typedef BlockLoad< SampleT, BLOCK_THREADS, SAMPLES_PER_THREAD, AgentHistogramPolicyT::LOAD_ALGORITHM> BlockLoadSampleT; /// Parameterized BlockLoad type for pixels typedef BlockLoad< PixelT, BLOCK_THREADS, PIXELS_PER_THREAD, AgentHistogramPolicyT::LOAD_ALGORITHM> BlockLoadPixelT; /// Parameterized BlockLoad type for quads typedef BlockLoad< QuadT, BLOCK_THREADS, QUADS_PER_THREAD, AgentHistogramPolicyT::LOAD_ALGORITHM> BlockLoadQuadT; /// Shared memory type required by this thread block struct _TempStorage { CounterT histograms[NUM_ACTIVE_CHANNELS][PRIVATIZED_SMEM_BINS + 1]; // Smem needed for block-privatized smem histogram (with 1 word of padding) int tile_idx; // Aliasable storage layout union Aliasable { typename BlockLoadSampleT::TempStorage sample_load; // Smem needed for loading a tile of samples typename BlockLoadPixelT::TempStorage pixel_load; // Smem needed for loading a tile of pixels typename BlockLoadQuadT::TempStorage quad_load; // Smem needed for loading a tile of quads } aliasable; }; /// Temporary storage type (unionable) struct TempStorage : Uninitialized<_TempStorage> {}; //--------------------------------------------------------------------- // Per-thread fields //--------------------------------------------------------------------- /// Reference to temp_storage _TempStorage &temp_storage; /// Sample input iterator (with cache modifier applied, if possible) WrappedSampleIteratorT d_wrapped_samples; /// Native pointer for input samples (possibly NULL if unavailable) SampleT* d_native_samples; /// The number of output bins for each channel int (&num_output_bins)[NUM_ACTIVE_CHANNELS]; /// The number of privatized bins for each channel int (&num_privatized_bins)[NUM_ACTIVE_CHANNELS]; /// Reference to gmem privatized histograms for each channel CounterT* d_privatized_histograms[NUM_ACTIVE_CHANNELS]; /// Reference to final output histograms (gmem) CounterT* (&d_output_histograms)[NUM_ACTIVE_CHANNELS]; /// The transform operator for determining output bin-ids from privatized counter indices, one for each channel OutputDecodeOpT (&output_decode_op)[NUM_ACTIVE_CHANNELS]; /// The transform operator for determining privatized counter indices from samples, one for each channel PrivatizedDecodeOpT (&privatized_decode_op)[NUM_ACTIVE_CHANNELS]; /// Whether to prefer privatized smem counters vs privatized global counters bool prefer_smem; //--------------------------------------------------------------------- // Initialize privatized bin counters //--------------------------------------------------------------------- // Initialize privatized bin counters __device__ __forceinline__ void InitBinCounters(CounterT* privatized_histograms[NUM_ACTIVE_CHANNELS]) { // Initialize histogram bin counts to zeros #pragma unroll for (int CHANNEL = 0; CHANNEL < NUM_ACTIVE_CHANNELS; ++CHANNEL) { for (int privatized_bin = threadIdx.x; privatized_bin < num_privatized_bins[CHANNEL]; privatized_bin += BLOCK_THREADS) { privatized_histograms[CHANNEL][privatized_bin] = 0; } } // Barrier to make sure all threads are done updating counters CTA_SYNC(); } // Initialize privatized bin counters. Specialized for privatized shared-memory counters __device__ __forceinline__ void InitSmemBinCounters() { CounterT* privatized_histograms[NUM_ACTIVE_CHANNELS]; for (int CHANNEL = 0; CHANNEL < NUM_ACTIVE_CHANNELS; ++CHANNEL) privatized_histograms[CHANNEL] = temp_storage.histograms[CHANNEL]; InitBinCounters(privatized_histograms); } // Initialize privatized bin counters. Specialized for privatized global-memory counters __device__ __forceinline__ void InitGmemBinCounters() { InitBinCounters(d_privatized_histograms); } //--------------------------------------------------------------------- // Update final output histograms //--------------------------------------------------------------------- // Update final output histograms from privatized histograms __device__ __forceinline__ void StoreOutput(CounterT* privatized_histograms[NUM_ACTIVE_CHANNELS]) { // Barrier to make sure all threads are done updating counters CTA_SYNC(); // Apply privatized bin counts to output bin counts #pragma unroll for (int CHANNEL = 0; CHANNEL < NUM_ACTIVE_CHANNELS; ++CHANNEL) { int channel_bins = num_privatized_bins[CHANNEL]; for (int privatized_bin = threadIdx.x; privatized_bin < channel_bins; privatized_bin += BLOCK_THREADS) { int output_bin = -1; CounterT count = privatized_histograms[CHANNEL][privatized_bin]; bool is_valid = count > 0; output_decode_op[CHANNEL].template BinSelect((SampleT) privatized_bin, output_bin, is_valid); if (output_bin >= 0) { atomicAdd(&d_output_histograms[CHANNEL][output_bin], count); } } } } // Update final output histograms from privatized histograms. Specialized for privatized shared-memory counters __device__ __forceinline__ void StoreSmemOutput() { CounterT* privatized_histograms[NUM_ACTIVE_CHANNELS]; for (int CHANNEL = 0; CHANNEL < NUM_ACTIVE_CHANNELS; ++CHANNEL) privatized_histograms[CHANNEL] = temp_storage.histograms[CHANNEL]; StoreOutput(privatized_histograms); } // Update final output histograms from privatized histograms. Specialized for privatized global-memory counters __device__ __forceinline__ void StoreGmemOutput() { StoreOutput(d_privatized_histograms); } //--------------------------------------------------------------------- // Tile accumulation //--------------------------------------------------------------------- // Accumulate pixels. Specialized for RLE compression. __device__ __forceinline__ void AccumulatePixels( SampleT samples[PIXELS_PER_THREAD][NUM_CHANNELS], bool is_valid[PIXELS_PER_THREAD], CounterT* privatized_histograms[NUM_ACTIVE_CHANNELS], Int2Type is_rle_compress) { #pragma unroll for (int CHANNEL = 0; CHANNEL < NUM_ACTIVE_CHANNELS; ++CHANNEL) { // Bin pixels int bins[PIXELS_PER_THREAD]; #pragma unroll for (int PIXEL = 0; PIXEL < PIXELS_PER_THREAD; ++PIXEL) { bins[PIXEL] = -1; privatized_decode_op[CHANNEL].template BinSelect(samples[PIXEL][CHANNEL], bins[PIXEL], is_valid[PIXEL]); } CounterT accumulator = 1; #pragma unroll for (int PIXEL = 0; PIXEL < PIXELS_PER_THREAD - 1; ++PIXEL) { if (bins[PIXEL] != bins[PIXEL + 1]) { if (bins[PIXEL] >= 0) atomicAdd(privatized_histograms[CHANNEL] + bins[PIXEL], accumulator); accumulator = 0; } accumulator++; } // Last pixel if (bins[PIXELS_PER_THREAD - 1] >= 0) atomicAdd(privatized_histograms[CHANNEL] + bins[PIXELS_PER_THREAD - 1], accumulator); } } // Accumulate pixels. Specialized for individual accumulation of each pixel. __device__ __forceinline__ void AccumulatePixels( SampleT samples[PIXELS_PER_THREAD][NUM_CHANNELS], bool is_valid[PIXELS_PER_THREAD], CounterT* privatized_histograms[NUM_ACTIVE_CHANNELS], Int2Type is_rle_compress) { #pragma unroll for (int PIXEL = 0; PIXEL < PIXELS_PER_THREAD; ++PIXEL) { #pragma unroll for (int CHANNEL = 0; CHANNEL < NUM_ACTIVE_CHANNELS; ++CHANNEL) { int bin = -1; privatized_decode_op[CHANNEL].template BinSelect(samples[PIXEL][CHANNEL], bin, is_valid[PIXEL]); if (bin >= 0) atomicAdd(privatized_histograms[CHANNEL] + bin, 1); } } } /** * Accumulate pixel, specialized for smem privatized histogram */ __device__ __forceinline__ void AccumulateSmemPixels( SampleT samples[PIXELS_PER_THREAD][NUM_CHANNELS], bool is_valid[PIXELS_PER_THREAD]) { CounterT* privatized_histograms[NUM_ACTIVE_CHANNELS]; for (int CHANNEL = 0; CHANNEL < NUM_ACTIVE_CHANNELS; ++CHANNEL) privatized_histograms[CHANNEL] = temp_storage.histograms[CHANNEL]; AccumulatePixels(samples, is_valid, privatized_histograms, Int2Type()); } /** * Accumulate pixel, specialized for gmem privatized histogram */ __device__ __forceinline__ void AccumulateGmemPixels( SampleT samples[PIXELS_PER_THREAD][NUM_CHANNELS], bool is_valid[PIXELS_PER_THREAD]) { AccumulatePixels(samples, is_valid, d_privatized_histograms, Int2Type()); } //--------------------------------------------------------------------- // Tile loading //--------------------------------------------------------------------- // Load full, aligned tile using pixel iterator (multi-channel) template __device__ __forceinline__ void LoadFullAlignedTile( OffsetT block_offset, int valid_samples, SampleT (&samples)[PIXELS_PER_THREAD][NUM_CHANNELS], Int2Type<_NUM_ACTIVE_CHANNELS> num_active_channels) { typedef PixelT AliasedPixels[PIXELS_PER_THREAD]; WrappedPixelIteratorT d_wrapped_pixels((PixelT*) (d_native_samples + block_offset)); // Load using a wrapped pixel iterator BlockLoadPixelT(temp_storage.aliasable.pixel_load).Load( d_wrapped_pixels, reinterpret_cast(samples)); } // Load full, aligned tile using quad iterator (single-channel) __device__ __forceinline__ void LoadFullAlignedTile( OffsetT block_offset, int valid_samples, SampleT (&samples)[PIXELS_PER_THREAD][NUM_CHANNELS], Int2Type<1> num_active_channels) { typedef QuadT AliasedQuads[QUADS_PER_THREAD]; WrappedQuadIteratorT d_wrapped_quads((QuadT*) (d_native_samples + block_offset)); // Load using a wrapped quad iterator BlockLoadQuadT(temp_storage.aliasable.quad_load).Load( d_wrapped_quads, reinterpret_cast(samples)); } // Load full, aligned tile __device__ __forceinline__ void LoadTile( OffsetT block_offset, int valid_samples, SampleT (&samples)[PIXELS_PER_THREAD][NUM_CHANNELS], Int2Type is_full_tile, Int2Type is_aligned) { LoadFullAlignedTile(block_offset, valid_samples, samples, Int2Type()); } // Load full, mis-aligned tile using sample iterator __device__ __forceinline__ void LoadTile( OffsetT block_offset, int valid_samples, SampleT (&samples)[PIXELS_PER_THREAD][NUM_CHANNELS], Int2Type is_full_tile, Int2Type is_aligned) { typedef SampleT AliasedSamples[SAMPLES_PER_THREAD]; // Load using sample iterator BlockLoadSampleT(temp_storage.aliasable.sample_load).Load( d_wrapped_samples + block_offset, reinterpret_cast(samples)); } // Load partially-full, aligned tile using the pixel iterator __device__ __forceinline__ void LoadTile( OffsetT block_offset, int valid_samples, SampleT (&samples)[PIXELS_PER_THREAD][NUM_CHANNELS], Int2Type is_full_tile, Int2Type is_aligned) { typedef PixelT AliasedPixels[PIXELS_PER_THREAD]; WrappedPixelIteratorT d_wrapped_pixels((PixelT*) (d_native_samples + block_offset)); int valid_pixels = valid_samples / NUM_CHANNELS; // Load using a wrapped pixel iterator BlockLoadPixelT(temp_storage.aliasable.pixel_load).Load( d_wrapped_pixels, reinterpret_cast(samples), valid_pixels); } // Load partially-full, mis-aligned tile using sample iterator __device__ __forceinline__ void LoadTile( OffsetT block_offset, int valid_samples, SampleT (&samples)[PIXELS_PER_THREAD][NUM_CHANNELS], Int2Type is_full_tile, Int2Type is_aligned) { typedef SampleT AliasedSamples[SAMPLES_PER_THREAD]; BlockLoadSampleT(temp_storage.aliasable.sample_load).Load( d_wrapped_samples + block_offset, reinterpret_cast(samples), valid_samples); } //--------------------------------------------------------------------- // Tile processing //--------------------------------------------------------------------- // Consume a tile of data samples template < bool IS_ALIGNED, // Whether the tile offset is aligned (quad-aligned for single-channel, pixel-aligned for multi-channel) bool IS_FULL_TILE> // Whether the tile is full __device__ __forceinline__ void ConsumeTile(OffsetT block_offset, int valid_samples) { SampleT samples[PIXELS_PER_THREAD][NUM_CHANNELS]; bool is_valid[PIXELS_PER_THREAD]; // Load tile LoadTile( block_offset, valid_samples, samples, Int2Type(), Int2Type()); // Set valid flags #pragma unroll for (int PIXEL = 0; PIXEL < PIXELS_PER_THREAD; ++PIXEL) is_valid[PIXEL] = IS_FULL_TILE || (((threadIdx.x * PIXELS_PER_THREAD + PIXEL) * NUM_CHANNELS) < valid_samples); // Accumulate samples #if CUB_PTX_ARCH >= 120 if (prefer_smem) AccumulateSmemPixels(samples, is_valid); else AccumulateGmemPixels(samples, is_valid); #else AccumulateGmemPixels(samples, is_valid); #endif } // Consume row tiles. Specialized for work-stealing from queue template __device__ __forceinline__ void ConsumeTiles( OffsetT num_row_pixels, ///< The number of multi-channel pixels per row in the region of interest OffsetT num_rows, ///< The number of rows in the region of interest OffsetT row_stride_samples, ///< The number of samples between starts of consecutive rows in the region of interest int tiles_per_row, ///< Number of image tiles per row GridQueue tile_queue, Int2Type is_work_stealing) { int num_tiles = num_rows * tiles_per_row; int tile_idx = (blockIdx.y * gridDim.x) + blockIdx.x; OffsetT num_even_share_tiles = gridDim.x * gridDim.y; while (tile_idx < num_tiles) { int row = tile_idx / tiles_per_row; int col = tile_idx - (row * tiles_per_row); OffsetT row_offset = row * row_stride_samples; OffsetT col_offset = (col * TILE_SAMPLES); OffsetT tile_offset = row_offset + col_offset; if (col == tiles_per_row - 1) { // Consume a partially-full tile at the end of the row OffsetT num_remaining = (num_row_pixels * NUM_CHANNELS) - col_offset; ConsumeTile(tile_offset, num_remaining); } else { // Consume full tile ConsumeTile(tile_offset, TILE_SAMPLES); } CTA_SYNC(); // Get next tile if (threadIdx.x == 0) temp_storage.tile_idx = tile_queue.Drain(1) + num_even_share_tiles; CTA_SYNC(); tile_idx = temp_storage.tile_idx; } } // Consume row tiles. Specialized for even-share (striped across thread blocks) template __device__ __forceinline__ void ConsumeTiles( OffsetT num_row_pixels, ///< The number of multi-channel pixels per row in the region of interest OffsetT num_rows, ///< The number of rows in the region of interest OffsetT row_stride_samples, ///< The number of samples between starts of consecutive rows in the region of interest int tiles_per_row, ///< Number of image tiles per row GridQueue tile_queue, Int2Type is_work_stealing) { for (int row = blockIdx.y; row < num_rows; row += gridDim.y) { OffsetT row_begin = row * row_stride_samples; OffsetT row_end = row_begin + (num_row_pixels * NUM_CHANNELS); OffsetT tile_offset = row_begin + (blockIdx.x * TILE_SAMPLES); while (tile_offset < row_end) { OffsetT num_remaining = row_end - tile_offset; if (num_remaining < TILE_SAMPLES) { // Consume partial tile ConsumeTile(tile_offset, num_remaining); break; } // Consume full tile ConsumeTile(tile_offset, TILE_SAMPLES); tile_offset += gridDim.x * TILE_SAMPLES; } } } //--------------------------------------------------------------------- // Parameter extraction //--------------------------------------------------------------------- // Return a native pixel pointer (specialized for CacheModifiedInputIterator types) template < CacheLoadModifier _MODIFIER, typename _ValueT, typename _OffsetT> __device__ __forceinline__ SampleT* NativePointer(CacheModifiedInputIterator<_MODIFIER, _ValueT, _OffsetT> itr) { return itr.ptr; } // Return a native pixel pointer (specialized for other types) template __device__ __forceinline__ SampleT* NativePointer(IteratorT itr) { return NULL; } //--------------------------------------------------------------------- // Interface //--------------------------------------------------------------------- /** * Constructor */ __device__ __forceinline__ AgentHistogram( TempStorage &temp_storage, ///< Reference to temp_storage SampleIteratorT d_samples, ///< Input data to reduce int (&num_output_bins)[NUM_ACTIVE_CHANNELS], ///< The number bins per final output histogram int (&num_privatized_bins)[NUM_ACTIVE_CHANNELS], ///< The number bins per privatized histogram CounterT* (&d_output_histograms)[NUM_ACTIVE_CHANNELS], ///< Reference to final output histograms CounterT* (&d_privatized_histograms)[NUM_ACTIVE_CHANNELS], ///< Reference to privatized histograms OutputDecodeOpT (&output_decode_op)[NUM_ACTIVE_CHANNELS], ///< The transform operator for determining output bin-ids from privatized counter indices, one for each channel PrivatizedDecodeOpT (&privatized_decode_op)[NUM_ACTIVE_CHANNELS]) ///< The transform operator for determining privatized counter indices from samples, one for each channel : temp_storage(temp_storage.Alias()), d_wrapped_samples(d_samples), num_output_bins(num_output_bins), num_privatized_bins(num_privatized_bins), d_output_histograms(d_output_histograms), privatized_decode_op(privatized_decode_op), output_decode_op(output_decode_op), d_native_samples(NativePointer(d_wrapped_samples)), prefer_smem((MEM_PREFERENCE == SMEM) ? true : // prefer smem privatized histograms (MEM_PREFERENCE == GMEM) ? false : // prefer gmem privatized histograms blockIdx.x & 1) // prefer blended privatized histograms { int blockId = (blockIdx.y * gridDim.x) + blockIdx.x; // Initialize the locations of this block's privatized histograms for (int CHANNEL = 0; CHANNEL < NUM_ACTIVE_CHANNELS; ++CHANNEL) this->d_privatized_histograms[CHANNEL] = d_privatized_histograms[CHANNEL] + (blockId * num_privatized_bins[CHANNEL]); } /** * Consume image */ __device__ __forceinline__ void ConsumeTiles( OffsetT num_row_pixels, ///< The number of multi-channel pixels per row in the region of interest OffsetT num_rows, ///< The number of rows in the region of interest OffsetT row_stride_samples, ///< The number of samples between starts of consecutive rows in the region of interest int tiles_per_row, ///< Number of image tiles per row GridQueue tile_queue) ///< Queue descriptor for assigning tiles of work to thread blocks { // Check whether all row starting offsets are quad-aligned (in single-channel) or pixel-aligned (in multi-channel) int quad_mask = AlignBytes::ALIGN_BYTES - 1; int pixel_mask = AlignBytes::ALIGN_BYTES - 1; size_t row_bytes = sizeof(SampleT) * row_stride_samples; bool quad_aligned_rows = (NUM_CHANNELS == 1) && (SAMPLES_PER_THREAD % 4 == 0) && // Single channel ((size_t(d_native_samples) & quad_mask) == 0) && // ptr is quad-aligned ((num_rows == 1) || ((row_bytes & quad_mask) == 0)); // number of row-samples is a multiple of the alignment of the quad bool pixel_aligned_rows = (NUM_CHANNELS > 1) && // Multi channel ((size_t(d_native_samples) & pixel_mask) == 0) && // ptr is pixel-aligned ((row_bytes & pixel_mask) == 0); // number of row-samples is a multiple of the alignment of the pixel // Whether rows are aligned and can be vectorized if ((d_native_samples != NULL) && (quad_aligned_rows || pixel_aligned_rows)) ConsumeTiles(num_row_pixels, num_rows, row_stride_samples, tiles_per_row, tile_queue, Int2Type()); else ConsumeTiles(num_row_pixels, num_rows, row_stride_samples, tiles_per_row, tile_queue, Int2Type()); } /** * Initialize privatized bin counters. Specialized for privatized shared-memory counters */ __device__ __forceinline__ void InitBinCounters() { if (prefer_smem) InitSmemBinCounters(); else InitGmemBinCounters(); } /** * Store privatized histogram to device-accessible memory. Specialized for privatized shared-memory counters */ __device__ __forceinline__ void StoreOutput() { if (prefer_smem) StoreSmemOutput(); else StoreGmemOutput(); } }; } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/acc/cuda/cub/agent/agent_radix_sort_downsweep.cuh000066400000000000000000000654201411340063500252420ustar00rootroot00000000000000/****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * AgentRadixSortDownsweep implements a stateful abstraction of CUDA thread blocks for participating in device-wide radix sort downsweep . */ #pragma once #include #include "../thread/thread_load.cuh" #include "../block/block_load.cuh" #include "../block/block_store.cuh" #include "../block/block_radix_rank.cuh" #include "../block/block_exchange.cuh" #include "../util_type.cuh" #include "../iterator/cache_modified_input_iterator.cuh" #include "../util_namespace.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /****************************************************************************** * Tuning policy types ******************************************************************************/ /** * Radix ranking algorithm */ enum RadixRankAlgorithm { RADIX_RANK_BASIC, RADIX_RANK_MEMOIZE, RADIX_RANK_MATCH }; /** * Parameterizable tuning policy type for AgentRadixSortDownsweep */ template < int _BLOCK_THREADS, ///< Threads per thread block int _ITEMS_PER_THREAD, ///< Items per thread (per tile of input) BlockLoadAlgorithm _LOAD_ALGORITHM, ///< The BlockLoad algorithm to use CacheLoadModifier _LOAD_MODIFIER, ///< Cache load modifier for reading keys (and values) RadixRankAlgorithm _RANK_ALGORITHM, ///< The radix ranking algorithm to use BlockScanAlgorithm _SCAN_ALGORITHM, ///< The block scan algorithm to use int _RADIX_BITS> ///< The number of radix bits, i.e., log2(bins) struct AgentRadixSortDownsweepPolicy { enum { BLOCK_THREADS = _BLOCK_THREADS, ///< Threads per thread block ITEMS_PER_THREAD = _ITEMS_PER_THREAD, ///< Items per thread (per tile of input) RADIX_BITS = _RADIX_BITS, ///< The number of radix bits, i.e., log2(bins) }; static const BlockLoadAlgorithm LOAD_ALGORITHM = _LOAD_ALGORITHM; ///< The BlockLoad algorithm to use static const CacheLoadModifier LOAD_MODIFIER = _LOAD_MODIFIER; ///< Cache load modifier for reading keys (and values) static const RadixRankAlgorithm RANK_ALGORITHM = _RANK_ALGORITHM; ///< The radix ranking algorithm to use static const BlockScanAlgorithm SCAN_ALGORITHM = _SCAN_ALGORITHM; ///< The BlockScan algorithm to use }; /****************************************************************************** * Thread block abstractions ******************************************************************************/ /** * \brief AgentRadixSortDownsweep implements a stateful abstraction of CUDA thread blocks for participating in device-wide radix sort downsweep . */ template < typename AgentRadixSortDownsweepPolicy, ///< Parameterized AgentRadixSortDownsweepPolicy tuning policy type bool IS_DESCENDING, ///< Whether or not the sorted-order is high-to-low typename KeyT, ///< KeyT type typename ValueT, ///< ValueT type typename OffsetT> ///< Signed integer type for global offsets struct AgentRadixSortDownsweep { //--------------------------------------------------------------------- // Type definitions and constants //--------------------------------------------------------------------- // Appropriate unsigned-bits representation of KeyT typedef typename Traits::UnsignedBits UnsignedBits; static const UnsignedBits LOWEST_KEY = Traits::LOWEST_KEY; static const UnsignedBits MAX_KEY = Traits::MAX_KEY; static const BlockLoadAlgorithm LOAD_ALGORITHM = AgentRadixSortDownsweepPolicy::LOAD_ALGORITHM; static const CacheLoadModifier LOAD_MODIFIER = AgentRadixSortDownsweepPolicy::LOAD_MODIFIER; static const RadixRankAlgorithm RANK_ALGORITHM = AgentRadixSortDownsweepPolicy::RANK_ALGORITHM; static const BlockScanAlgorithm SCAN_ALGORITHM = AgentRadixSortDownsweepPolicy::SCAN_ALGORITHM; enum { BLOCK_THREADS = AgentRadixSortDownsweepPolicy::BLOCK_THREADS, ITEMS_PER_THREAD = AgentRadixSortDownsweepPolicy::ITEMS_PER_THREAD, RADIX_BITS = AgentRadixSortDownsweepPolicy::RADIX_BITS, TILE_ITEMS = BLOCK_THREADS * ITEMS_PER_THREAD, RADIX_DIGITS = 1 << RADIX_BITS, KEYS_ONLY = Equals::VALUE, }; // Input iterator wrapper type (for applying cache modifier)s typedef CacheModifiedInputIterator KeysItr; typedef CacheModifiedInputIterator ValuesItr; // Radix ranking type to use typedef typename If<(RANK_ALGORITHM == RADIX_RANK_BASIC), BlockRadixRank, typename If<(RANK_ALGORITHM == RADIX_RANK_MEMOIZE), BlockRadixRank, BlockRadixRankMatch >::Type >::Type BlockRadixRankT; enum { /// Number of bin-starting offsets tracked per thread BINS_TRACKED_PER_THREAD = BlockRadixRankT::BINS_TRACKED_PER_THREAD }; // BlockLoad type (keys) typedef BlockLoad< UnsignedBits, BLOCK_THREADS, ITEMS_PER_THREAD, LOAD_ALGORITHM> BlockLoadKeysT; // BlockLoad type (values) typedef BlockLoad< ValueT, BLOCK_THREADS, ITEMS_PER_THREAD, LOAD_ALGORITHM> BlockLoadValuesT; /** * Shared memory storage layout */ union __align__(16) _TempStorage { typename BlockLoadKeysT::TempStorage load_keys; typename BlockLoadValuesT::TempStorage load_values; typename BlockRadixRankT::TempStorage radix_rank; struct { UnsignedBits exchange_keys[TILE_ITEMS]; OffsetT relative_bin_offsets[RADIX_DIGITS]; }; ValueT exchange_values[TILE_ITEMS]; OffsetT exclusive_digit_prefix[RADIX_DIGITS]; }; /// Alias wrapper allowing storage to be unioned struct TempStorage : Uninitialized<_TempStorage> {}; //--------------------------------------------------------------------- // Thread fields //--------------------------------------------------------------------- // Shared storage for this CTA _TempStorage &temp_storage; // Input and output device pointers KeysItr d_keys_in; ValuesItr d_values_in; UnsignedBits *d_keys_out; ValueT *d_values_out; // The global scatter base offset for each digit (valid in the first RADIX_DIGITS threads) OffsetT bin_offset[BINS_TRACKED_PER_THREAD]; // The least-significant bit position of the current digit to extract int current_bit; // Number of bits in current digit int num_bits; // Whether to short-cirucit int short_circuit; //--------------------------------------------------------------------- // Utility methods //--------------------------------------------------------------------- /** * Scatter ranked keys through shared memory, then to device-accessible memory */ template __device__ __forceinline__ void ScatterKeys( UnsignedBits (&twiddled_keys)[ITEMS_PER_THREAD], OffsetT (&relative_bin_offsets)[ITEMS_PER_THREAD], int (&ranks)[ITEMS_PER_THREAD], OffsetT valid_items) { #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) { temp_storage.exchange_keys[ranks[ITEM]] = twiddled_keys[ITEM]; } CTA_SYNC(); #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) { UnsignedBits key = temp_storage.exchange_keys[threadIdx.x + (ITEM * BLOCK_THREADS)]; UnsignedBits digit = BFE(key, current_bit, num_bits); relative_bin_offsets[ITEM] = temp_storage.relative_bin_offsets[digit]; // Un-twiddle key = Traits::TwiddleOut(key); if (FULL_TILE || (static_cast(threadIdx.x + (ITEM * BLOCK_THREADS)) < valid_items)) { d_keys_out[relative_bin_offsets[ITEM] + threadIdx.x + (ITEM * BLOCK_THREADS)] = key; } } } /** * Scatter ranked values through shared memory, then to device-accessible memory */ template __device__ __forceinline__ void ScatterValues( ValueT (&values)[ITEMS_PER_THREAD], OffsetT (&relative_bin_offsets)[ITEMS_PER_THREAD], int (&ranks)[ITEMS_PER_THREAD], OffsetT valid_items) { CTA_SYNC(); #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) { temp_storage.exchange_values[ranks[ITEM]] = values[ITEM]; } CTA_SYNC(); #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) { ValueT value = temp_storage.exchange_values[threadIdx.x + (ITEM * BLOCK_THREADS)]; if (FULL_TILE || (static_cast(threadIdx.x + (ITEM * BLOCK_THREADS)) < valid_items)) { d_values_out[relative_bin_offsets[ITEM] + threadIdx.x + (ITEM * BLOCK_THREADS)] = value; } } } /** * Load a tile of keys (specialized for full tile, any ranking algorithm) */ template __device__ __forceinline__ void LoadKeys( UnsignedBits (&keys)[ITEMS_PER_THREAD], OffsetT block_offset, OffsetT valid_items, UnsignedBits oob_item, Int2Type is_full_tile, Int2Type<_RANK_ALGORITHM> rank_algorithm) { BlockLoadKeysT(temp_storage.load_keys).Load( d_keys_in + block_offset, keys); CTA_SYNC(); } /** * Load a tile of keys (specialized for partial tile, any ranking algorithm) */ template __device__ __forceinline__ void LoadKeys( UnsignedBits (&keys)[ITEMS_PER_THREAD], OffsetT block_offset, OffsetT valid_items, UnsignedBits oob_item, Int2Type is_full_tile, Int2Type<_RANK_ALGORITHM> rank_algorithm) { BlockLoadKeysT(temp_storage.load_keys).Load( d_keys_in + block_offset, keys, valid_items, oob_item); CTA_SYNC(); } /** * Load a tile of keys (specialized for full tile, match ranking algorithm) */ __device__ __forceinline__ void LoadKeys( UnsignedBits (&keys)[ITEMS_PER_THREAD], OffsetT block_offset, OffsetT valid_items, UnsignedBits oob_item, Int2Type is_full_tile, Int2Type rank_algorithm) { LoadDirectWarpStriped(threadIdx.x, d_keys_in + block_offset, keys); } /** * Load a tile of keys (specialized for partial tile, match ranking algorithm) */ __device__ __forceinline__ void LoadKeys( UnsignedBits (&keys)[ITEMS_PER_THREAD], OffsetT block_offset, OffsetT valid_items, UnsignedBits oob_item, Int2Type is_full_tile, Int2Type rank_algorithm) { LoadDirectWarpStriped(threadIdx.x, d_keys_in + block_offset, keys, valid_items, oob_item); } /** * Load a tile of values (specialized for full tile, any ranking algorithm) */ template __device__ __forceinline__ void LoadValues( ValueT (&values)[ITEMS_PER_THREAD], OffsetT block_offset, OffsetT valid_items, Int2Type is_full_tile, Int2Type<_RANK_ALGORITHM> rank_algorithm) { BlockLoadValuesT(temp_storage.load_values).Load( d_values_in + block_offset, values); CTA_SYNC(); } /** * Load a tile of values (specialized for partial tile, any ranking algorithm) */ template __device__ __forceinline__ void LoadValues( ValueT (&values)[ITEMS_PER_THREAD], OffsetT block_offset, OffsetT valid_items, Int2Type is_full_tile, Int2Type<_RANK_ALGORITHM> rank_algorithm) { BlockLoadValuesT(temp_storage.load_values).Load( d_values_in + block_offset, values, valid_items); CTA_SYNC(); } /** * Load a tile of items (specialized for full tile, match ranking algorithm) */ __device__ __forceinline__ void LoadValues( ValueT (&values)[ITEMS_PER_THREAD], OffsetT block_offset, volatile OffsetT valid_items, Int2Type is_full_tile, Int2Type rank_algorithm) { LoadDirectWarpStriped(threadIdx.x, d_values_in + block_offset, values); } /** * Load a tile of items (specialized for partial tile, match ranking algorithm) */ __device__ __forceinline__ void LoadValues( ValueT (&values)[ITEMS_PER_THREAD], OffsetT block_offset, volatile OffsetT valid_items, Int2Type is_full_tile, Int2Type rank_algorithm) { LoadDirectWarpStriped(threadIdx.x, d_values_in + block_offset, values, valid_items); } /** * Truck along associated values */ template __device__ __forceinline__ void GatherScatterValues( OffsetT (&relative_bin_offsets)[ITEMS_PER_THREAD], int (&ranks)[ITEMS_PER_THREAD], OffsetT block_offset, OffsetT valid_items, Int2Type /*is_keys_only*/) { CTA_SYNC(); ValueT values[ITEMS_PER_THREAD]; LoadValues( values, block_offset, valid_items, Int2Type(), Int2Type()); ScatterValues( values, relative_bin_offsets, ranks, valid_items); } /** * Truck along associated values (specialized for key-only sorting) */ template __device__ __forceinline__ void GatherScatterValues( OffsetT (&/*relative_bin_offsets*/)[ITEMS_PER_THREAD], int (&/*ranks*/)[ITEMS_PER_THREAD], OffsetT /*block_offset*/, OffsetT /*valid_items*/, Int2Type /*is_keys_only*/) {} /** * Process tile */ template __device__ __forceinline__ void ProcessTile( OffsetT block_offset, const OffsetT &valid_items = TILE_ITEMS) { UnsignedBits keys[ITEMS_PER_THREAD]; int ranks[ITEMS_PER_THREAD]; OffsetT relative_bin_offsets[ITEMS_PER_THREAD]; // Assign default (min/max) value to all keys UnsignedBits default_key = (IS_DESCENDING) ? LOWEST_KEY : MAX_KEY; // Load tile of keys LoadKeys( keys, block_offset, valid_items, default_key, Int2Type(), Int2Type()); // Twiddle key bits if necessary #pragma unroll for (int KEY = 0; KEY < ITEMS_PER_THREAD; KEY++) { keys[KEY] = Traits::TwiddleIn(keys[KEY]); } // Rank the twiddled keys int exclusive_digit_prefix[BINS_TRACKED_PER_THREAD]; BlockRadixRankT(temp_storage.radix_rank).RankKeys( keys, ranks, current_bit, num_bits, exclusive_digit_prefix); CTA_SYNC(); // Share exclusive digit prefix #pragma unroll for (int track = 0; track < BINS_TRACKED_PER_THREAD; ++track) { int bin_idx = (threadIdx.x * BINS_TRACKED_PER_THREAD) + track; if ((BLOCK_THREADS == RADIX_DIGITS) || (bin_idx < RADIX_DIGITS)) { // Store exclusive prefix temp_storage.exclusive_digit_prefix[bin_idx] = exclusive_digit_prefix[track]; } } CTA_SYNC(); // Get inclusive digit prefix int inclusive_digit_prefix[BINS_TRACKED_PER_THREAD]; #pragma unroll for (int track = 0; track < BINS_TRACKED_PER_THREAD; ++track) { int bin_idx = (threadIdx.x * BINS_TRACKED_PER_THREAD) + track; if ((BLOCK_THREADS == RADIX_DIGITS) || (bin_idx < RADIX_DIGITS)) { if (IS_DESCENDING) { // Get inclusive digit prefix from exclusive prefix (higher bins come first) inclusive_digit_prefix[track] = (bin_idx == 0) ? (BLOCK_THREADS * ITEMS_PER_THREAD) : temp_storage.exclusive_digit_prefix[bin_idx - 1]; } else { // Get inclusive digit prefix from exclusive prefix (lower bins come first) inclusive_digit_prefix[track] = (bin_idx == RADIX_DIGITS - 1) ? (BLOCK_THREADS * ITEMS_PER_THREAD) : temp_storage.exclusive_digit_prefix[bin_idx + 1]; } } } CTA_SYNC(); // Update global scatter base offsets for each digit #pragma unroll for (int track = 0; track < BINS_TRACKED_PER_THREAD; ++track) { int bin_idx = (threadIdx.x * BINS_TRACKED_PER_THREAD) + track; if ((BLOCK_THREADS == RADIX_DIGITS) || (bin_idx < RADIX_DIGITS)) { bin_offset[track] -= exclusive_digit_prefix[track]; temp_storage.relative_bin_offsets[bin_idx] = bin_offset[track]; bin_offset[track] += inclusive_digit_prefix[track]; } } CTA_SYNC(); // Scatter keys ScatterKeys(keys, relative_bin_offsets, ranks, valid_items); // Gather/scatter values GatherScatterValues(relative_bin_offsets , ranks, block_offset, valid_items, Int2Type()); } //--------------------------------------------------------------------- // Copy shortcut //--------------------------------------------------------------------- /** * Copy tiles within the range of input */ template < typename InputIteratorT, typename T> __device__ __forceinline__ void Copy( InputIteratorT d_in, T *d_out, OffsetT block_offset, OffsetT block_end) { // Simply copy the input while (block_offset + TILE_ITEMS <= block_end) { T items[ITEMS_PER_THREAD]; LoadDirectStriped(threadIdx.x, d_in + block_offset, items); CTA_SYNC(); StoreDirectStriped(threadIdx.x, d_out + block_offset, items); block_offset += TILE_ITEMS; } // Clean up last partial tile with guarded-I/O if (block_offset < block_end) { OffsetT valid_items = block_end - block_offset; T items[ITEMS_PER_THREAD]; LoadDirectStriped(threadIdx.x, d_in + block_offset, items, valid_items); CTA_SYNC(); StoreDirectStriped(threadIdx.x, d_out + block_offset, items, valid_items); } } /** * Copy tiles within the range of input (specialized for NullType) */ template __device__ __forceinline__ void Copy( InputIteratorT /*d_in*/, NullType * /*d_out*/, OffsetT /*block_offset*/, OffsetT /*block_end*/) {} //--------------------------------------------------------------------- // Interface //--------------------------------------------------------------------- /** * Constructor */ __device__ __forceinline__ AgentRadixSortDownsweep( TempStorage &temp_storage, OffsetT (&bin_offset)[BINS_TRACKED_PER_THREAD], OffsetT num_items, const KeyT *d_keys_in, KeyT *d_keys_out, const ValueT *d_values_in, ValueT *d_values_out, int current_bit, int num_bits) : temp_storage(temp_storage.Alias()), d_keys_in(reinterpret_cast(d_keys_in)), d_values_in(d_values_in), d_keys_out(reinterpret_cast(d_keys_out)), d_values_out(d_values_out), current_bit(current_bit), num_bits(num_bits), short_circuit(1) { #pragma unroll for (int track = 0; track < BINS_TRACKED_PER_THREAD; ++track) { this->bin_offset[track] = bin_offset[track]; int bin_idx = (threadIdx.x * BINS_TRACKED_PER_THREAD) + track; if ((BLOCK_THREADS == RADIX_DIGITS) || (bin_idx < RADIX_DIGITS)) { // Short circuit if the histogram has only bin counts of only zeros or problem-size short_circuit = short_circuit && ((bin_offset[track] == 0) || (bin_offset[track] == num_items)); } } short_circuit = CTA_SYNC_AND(short_circuit); } /** * Constructor */ __device__ __forceinline__ AgentRadixSortDownsweep( TempStorage &temp_storage, OffsetT num_items, OffsetT *d_spine, const KeyT *d_keys_in, KeyT *d_keys_out, const ValueT *d_values_in, ValueT *d_values_out, int current_bit, int num_bits) : temp_storage(temp_storage.Alias()), d_keys_in(reinterpret_cast(d_keys_in)), d_values_in(d_values_in), d_keys_out(reinterpret_cast(d_keys_out)), d_values_out(d_values_out), current_bit(current_bit), num_bits(num_bits), short_circuit(1) { #pragma unroll for (int track = 0; track < BINS_TRACKED_PER_THREAD; ++track) { int bin_idx = (threadIdx.x * BINS_TRACKED_PER_THREAD) + track; // Load digit bin offsets (each of the first RADIX_DIGITS threads will load an offset for that digit) if ((BLOCK_THREADS == RADIX_DIGITS) || (bin_idx < RADIX_DIGITS)) { if (IS_DESCENDING) bin_idx = RADIX_DIGITS - bin_idx - 1; // Short circuit if the first block's histogram has only bin counts of only zeros or problem-size OffsetT first_block_bin_offset = d_spine[gridDim.x * bin_idx]; short_circuit = short_circuit && ((first_block_bin_offset == 0) || (first_block_bin_offset == num_items)); // Load my block's bin offset for my bin bin_offset[track] = d_spine[(gridDim.x * bin_idx) + blockIdx.x]; } } short_circuit = CTA_SYNC_AND(short_circuit); } /** * Distribute keys from a segment of input tiles. */ __device__ __forceinline__ void ProcessRegion( OffsetT block_offset, OffsetT block_end) { if (short_circuit) { // Copy keys Copy(d_keys_in, d_keys_out, block_offset, block_end); // Copy values Copy(d_values_in, d_values_out, block_offset, block_end); } else { // Process full tiles of tile_items while (block_offset + TILE_ITEMS <= block_end) { ProcessTile(block_offset); block_offset += TILE_ITEMS; CTA_SYNC(); } // Clean up last partial tile with guarded-I/O if (block_offset < block_end) { ProcessTile(block_offset, block_end - block_offset); } } } }; } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/acc/cuda/cub/agent/agent_radix_sort_upsweep.cuh000066400000000000000000000427751411340063500247270ustar00rootroot00000000000000/****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * AgentRadixSortUpsweep implements a stateful abstraction of CUDA thread blocks for participating in device-wide radix sort upsweep . */ #pragma once #include "../thread/thread_reduce.cuh" #include "../thread/thread_load.cuh" #include "../warp/warp_reduce.cuh" #include "../block/block_load.cuh" #include "../util_type.cuh" #include "../iterator/cache_modified_input_iterator.cuh" #include "../util_namespace.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /****************************************************************************** * Tuning policy types ******************************************************************************/ /** * Parameterizable tuning policy type for AgentRadixSortUpsweep */ template < int _BLOCK_THREADS, ///< Threads per thread block int _ITEMS_PER_THREAD, ///< Items per thread (per tile of input) CacheLoadModifier _LOAD_MODIFIER, ///< Cache load modifier for reading keys int _RADIX_BITS> ///< The number of radix bits, i.e., log2(bins) struct AgentRadixSortUpsweepPolicy { enum { BLOCK_THREADS = _BLOCK_THREADS, ///< Threads per thread block ITEMS_PER_THREAD = _ITEMS_PER_THREAD, ///< Items per thread (per tile of input) RADIX_BITS = _RADIX_BITS, ///< The number of radix bits, i.e., log2(bins) }; static const CacheLoadModifier LOAD_MODIFIER = _LOAD_MODIFIER; ///< Cache load modifier for reading keys }; /****************************************************************************** * Thread block abstractions ******************************************************************************/ /** * \brief AgentRadixSortUpsweep implements a stateful abstraction of CUDA thread blocks for participating in device-wide radix sort upsweep . */ template < typename AgentRadixSortUpsweepPolicy, ///< Parameterized AgentRadixSortUpsweepPolicy tuning policy type typename KeyT, ///< KeyT type typename OffsetT> ///< Signed integer type for global offsets struct AgentRadixSortUpsweep { //--------------------------------------------------------------------- // Type definitions and constants //--------------------------------------------------------------------- typedef typename Traits::UnsignedBits UnsignedBits; // Integer type for digit counters (to be packed into words of PackedCounters) typedef unsigned char DigitCounter; // Integer type for packing DigitCounters into columns of shared memory banks typedef unsigned int PackedCounter; static const CacheLoadModifier LOAD_MODIFIER = AgentRadixSortUpsweepPolicy::LOAD_MODIFIER; enum { RADIX_BITS = AgentRadixSortUpsweepPolicy::RADIX_BITS, BLOCK_THREADS = AgentRadixSortUpsweepPolicy::BLOCK_THREADS, KEYS_PER_THREAD = AgentRadixSortUpsweepPolicy::ITEMS_PER_THREAD, RADIX_DIGITS = 1 << RADIX_BITS, LOG_WARP_THREADS = CUB_PTX_LOG_WARP_THREADS, WARP_THREADS = 1 << LOG_WARP_THREADS, WARPS = (BLOCK_THREADS + WARP_THREADS - 1) / WARP_THREADS, TILE_ITEMS = BLOCK_THREADS * KEYS_PER_THREAD, BYTES_PER_COUNTER = sizeof(DigitCounter), LOG_BYTES_PER_COUNTER = Log2::VALUE, PACKING_RATIO = sizeof(PackedCounter) / sizeof(DigitCounter), LOG_PACKING_RATIO = Log2::VALUE, LOG_COUNTER_LANES = CUB_MAX(0, RADIX_BITS - LOG_PACKING_RATIO), COUNTER_LANES = 1 << LOG_COUNTER_LANES, // To prevent counter overflow, we must periodically unpack and aggregate the // digit counters back into registers. Each counter lane is assigned to a // warp for aggregation. LANES_PER_WARP = CUB_MAX(1, (COUNTER_LANES + WARPS - 1) / WARPS), // Unroll tiles in batches without risk of counter overflow UNROLL_COUNT = CUB_MIN(64, 255 / KEYS_PER_THREAD), UNROLLED_ELEMENTS = UNROLL_COUNT * TILE_ITEMS, }; // Input iterator wrapper type (for applying cache modifier)s typedef CacheModifiedInputIterator KeysItr; /** * Shared memory storage layout */ union __align__(16) _TempStorage { DigitCounter thread_counters[COUNTER_LANES][BLOCK_THREADS][PACKING_RATIO]; PackedCounter packed_thread_counters[COUNTER_LANES][BLOCK_THREADS]; OffsetT block_counters[WARP_THREADS][RADIX_DIGITS]; }; /// Alias wrapper allowing storage to be unioned struct TempStorage : Uninitialized<_TempStorage> {}; //--------------------------------------------------------------------- // Thread fields (aggregate state bundle) //--------------------------------------------------------------------- // Shared storage for this CTA _TempStorage &temp_storage; // Thread-local counters for periodically aggregating composite-counter lanes OffsetT local_counts[LANES_PER_WARP][PACKING_RATIO]; // Input and output device pointers KeysItr d_keys_in; // The least-significant bit position of the current digit to extract int current_bit; // Number of bits in current digit int num_bits; //--------------------------------------------------------------------- // Helper structure for templated iteration //--------------------------------------------------------------------- // Iterate template struct Iterate { // BucketKeys static __device__ __forceinline__ void BucketKeys( AgentRadixSortUpsweep &cta, UnsignedBits keys[KEYS_PER_THREAD]) { cta.Bucket(keys[COUNT]); // Next Iterate::BucketKeys(cta, keys); } }; // Terminate template struct Iterate { // BucketKeys static __device__ __forceinline__ void BucketKeys(AgentRadixSortUpsweep &/*cta*/, UnsignedBits /*keys*/[KEYS_PER_THREAD]) {} }; //--------------------------------------------------------------------- // Utility methods //--------------------------------------------------------------------- /** * Decode a key and increment corresponding smem digit counter */ __device__ __forceinline__ void Bucket(UnsignedBits key) { // Perform transform op UnsignedBits converted_key = Traits::TwiddleIn(key); // Extract current digit bits UnsignedBits digit = BFE(converted_key, current_bit, num_bits); // Get sub-counter offset UnsignedBits sub_counter = digit & (PACKING_RATIO - 1); // Get row offset UnsignedBits row_offset = digit >> LOG_PACKING_RATIO; // Increment counter temp_storage.thread_counters[row_offset][threadIdx.x][sub_counter]++; } /** * Reset composite counters */ __device__ __forceinline__ void ResetDigitCounters() { #pragma unroll for (int LANE = 0; LANE < COUNTER_LANES; LANE++) { temp_storage.packed_thread_counters[LANE][threadIdx.x] = 0; } } /** * Reset the unpacked counters in each thread */ __device__ __forceinline__ void ResetUnpackedCounters() { #pragma unroll for (int LANE = 0; LANE < LANES_PER_WARP; LANE++) { #pragma unroll for (int UNPACKED_COUNTER = 0; UNPACKED_COUNTER < PACKING_RATIO; UNPACKED_COUNTER++) { local_counts[LANE][UNPACKED_COUNTER] = 0; } } } /** * Extracts and aggregates the digit counters for each counter lane * owned by this warp */ __device__ __forceinline__ void UnpackDigitCounts() { unsigned int warp_id = threadIdx.x >> LOG_WARP_THREADS; unsigned int warp_tid = LaneId(); #pragma unroll for (int LANE = 0; LANE < LANES_PER_WARP; LANE++) { const int counter_lane = (LANE * WARPS) + warp_id; if (counter_lane < COUNTER_LANES) { #pragma unroll for (int PACKED_COUNTER = 0; PACKED_COUNTER < BLOCK_THREADS; PACKED_COUNTER += WARP_THREADS) { #pragma unroll for (int UNPACKED_COUNTER = 0; UNPACKED_COUNTER < PACKING_RATIO; UNPACKED_COUNTER++) { OffsetT counter = temp_storage.thread_counters[counter_lane][warp_tid + PACKED_COUNTER][UNPACKED_COUNTER]; local_counts[LANE][UNPACKED_COUNTER] += counter; } } } } } /** * Processes a single, full tile */ __device__ __forceinline__ void ProcessFullTile(OffsetT block_offset) { // Tile of keys UnsignedBits keys[KEYS_PER_THREAD]; LoadDirectStriped(threadIdx.x, d_keys_in + block_offset, keys); // Prevent hoisting CTA_SYNC(); // Bucket tile of keys Iterate<0, KEYS_PER_THREAD>::BucketKeys(*this, keys); } /** * Processes a single load (may have some threads masked off) */ __device__ __forceinline__ void ProcessPartialTile( OffsetT block_offset, const OffsetT &block_end) { // Process partial tile if necessary using single loads block_offset += threadIdx.x; while (block_offset < block_end) { // Load and bucket key UnsignedBits key = d_keys_in[block_offset]; Bucket(key); block_offset += BLOCK_THREADS; } } //--------------------------------------------------------------------- // Interface //--------------------------------------------------------------------- /** * Constructor */ __device__ __forceinline__ AgentRadixSortUpsweep( TempStorage &temp_storage, const KeyT *d_keys_in, int current_bit, int num_bits) : temp_storage(temp_storage.Alias()), d_keys_in(reinterpret_cast(d_keys_in)), current_bit(current_bit), num_bits(num_bits) {} /** * Compute radix digit histograms from a segment of input tiles. */ __device__ __forceinline__ void ProcessRegion( OffsetT block_offset, const OffsetT &block_end) { // Reset digit counters in smem and unpacked counters in registers ResetDigitCounters(); ResetUnpackedCounters(); // Unroll batches of full tiles while (block_offset + UNROLLED_ELEMENTS <= block_end) { for (int i = 0; i < UNROLL_COUNT; ++i) { ProcessFullTile(block_offset); block_offset += TILE_ITEMS; } CTA_SYNC(); // Aggregate back into local_count registers to prevent overflow UnpackDigitCounts(); CTA_SYNC(); // Reset composite counters in lanes ResetDigitCounters(); } // Unroll single full tiles while (block_offset + TILE_ITEMS <= block_end) { ProcessFullTile(block_offset); block_offset += TILE_ITEMS; } // Process partial tile if necessary ProcessPartialTile( block_offset, block_end); CTA_SYNC(); // Aggregate back into local_count registers UnpackDigitCounts(); } /** * Extract counts (saving them to the external array) */ template __device__ __forceinline__ void ExtractCounts( OffsetT *counters, int bin_stride = 1, int bin_offset = 0) { unsigned int warp_id = threadIdx.x >> LOG_WARP_THREADS; unsigned int warp_tid = LaneId(); // Place unpacked digit counters in shared memory #pragma unroll for (int LANE = 0; LANE < LANES_PER_WARP; LANE++) { int counter_lane = (LANE * WARPS) + warp_id; if (counter_lane < COUNTER_LANES) { int digit_row = counter_lane << LOG_PACKING_RATIO; #pragma unroll for (int UNPACKED_COUNTER = 0; UNPACKED_COUNTER < PACKING_RATIO; UNPACKED_COUNTER++) { int bin_idx = digit_row + UNPACKED_COUNTER; temp_storage.block_counters[warp_tid][bin_idx] = local_counts[LANE][UNPACKED_COUNTER]; } } } CTA_SYNC(); // Rake-reduce bin_count reductions // Whole blocks #pragma unroll for (int BIN_BASE = RADIX_DIGITS % BLOCK_THREADS; (BIN_BASE + BLOCK_THREADS) <= RADIX_DIGITS; BIN_BASE += BLOCK_THREADS) { int bin_idx = BIN_BASE + threadIdx.x; OffsetT bin_count = 0; #pragma unroll for (int i = 0; i < WARP_THREADS; ++i) bin_count += temp_storage.block_counters[i][bin_idx]; if (IS_DESCENDING) bin_idx = RADIX_DIGITS - bin_idx - 1; counters[(bin_stride * bin_idx) + bin_offset] = bin_count; } // Remainder if ((RADIX_DIGITS % BLOCK_THREADS != 0) && (threadIdx.x < RADIX_DIGITS)) { int bin_idx = threadIdx.x; OffsetT bin_count = 0; #pragma unroll for (int i = 0; i < WARP_THREADS; ++i) bin_count += temp_storage.block_counters[i][bin_idx]; if (IS_DESCENDING) bin_idx = RADIX_DIGITS - bin_idx - 1; counters[(bin_stride * bin_idx) + bin_offset] = bin_count; } } /** * Extract counts */ template __device__ __forceinline__ void ExtractCounts( OffsetT (&bin_count)[BINS_TRACKED_PER_THREAD]) ///< [out] The exclusive prefix sum for the digits [(threadIdx.x * BINS_TRACKED_PER_THREAD) ... (threadIdx.x * BINS_TRACKED_PER_THREAD) + BINS_TRACKED_PER_THREAD - 1] { unsigned int warp_id = threadIdx.x >> LOG_WARP_THREADS; unsigned int warp_tid = LaneId(); // Place unpacked digit counters in shared memory #pragma unroll for (int LANE = 0; LANE < LANES_PER_WARP; LANE++) { int counter_lane = (LANE * WARPS) + warp_id; if (counter_lane < COUNTER_LANES) { int digit_row = counter_lane << LOG_PACKING_RATIO; #pragma unroll for (int UNPACKED_COUNTER = 0; UNPACKED_COUNTER < PACKING_RATIO; UNPACKED_COUNTER++) { int bin_idx = digit_row + UNPACKED_COUNTER; temp_storage.block_counters[warp_tid][bin_idx] = local_counts[LANE][UNPACKED_COUNTER]; } } } CTA_SYNC(); // Rake-reduce bin_count reductions #pragma unroll for (int track = 0; track < BINS_TRACKED_PER_THREAD; ++track) { int bin_idx = (threadIdx.x * BINS_TRACKED_PER_THREAD) + track; if ((BLOCK_THREADS == RADIX_DIGITS) || (bin_idx < RADIX_DIGITS)) { bin_count[track] = 0; #pragma unroll for (int i = 0; i < WARP_THREADS; ++i) bin_count[track] += temp_storage.block_counters[i][bin_idx]; } } } }; } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/acc/cuda/cub/agent/agent_reduce.cuh000066400000000000000000000410241411340063500222320ustar00rootroot00000000000000/****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * cub::AgentReduce implements a stateful abstraction of CUDA thread blocks for participating in device-wide reduction . */ #pragma once #include #include "../block/block_load.cuh" #include "../block/block_reduce.cuh" #include "../grid/grid_mapping.cuh" #include "../grid/grid_even_share.cuh" #include "../util_type.cuh" #include "../iterator/cache_modified_input_iterator.cuh" #include "../util_namespace.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /****************************************************************************** * Tuning policy types ******************************************************************************/ /** * Parameterizable tuning policy type for AgentReduce */ template < int _BLOCK_THREADS, ///< Threads per thread block int _ITEMS_PER_THREAD, ///< Items per thread (per tile of input) int _VECTOR_LOAD_LENGTH, ///< Number of items per vectorized load BlockReduceAlgorithm _BLOCK_ALGORITHM, ///< Cooperative block-wide reduction algorithm to use CacheLoadModifier _LOAD_MODIFIER> ///< Cache load modifier for reading input elements struct AgentReducePolicy { enum { BLOCK_THREADS = _BLOCK_THREADS, ///< Threads per thread block ITEMS_PER_THREAD = _ITEMS_PER_THREAD, ///< Items per thread (per tile of input) VECTOR_LOAD_LENGTH = _VECTOR_LOAD_LENGTH, ///< Number of items per vectorized load }; static const BlockReduceAlgorithm BLOCK_ALGORITHM = _BLOCK_ALGORITHM; ///< Cooperative block-wide reduction algorithm to use static const CacheLoadModifier LOAD_MODIFIER = _LOAD_MODIFIER; ///< Cache load modifier for reading input elements }; /****************************************************************************** * Thread block abstractions ******************************************************************************/ /** * \brief AgentReduce implements a stateful abstraction of CUDA thread blocks for participating in device-wide reduction . * * Each thread reduces only the values it loads. If \p FIRST_TILE, this * partial reduction is stored into \p thread_aggregate. Otherwise it is * accumulated into \p thread_aggregate. */ template < typename AgentReducePolicy, ///< Parameterized AgentReducePolicy tuning policy type typename InputIteratorT, ///< Random-access iterator type for input typename OutputIteratorT, ///< Random-access iterator type for output typename OffsetT, ///< Signed integer type for global offsets typename ReductionOp> ///< Binary reduction operator type having member T operator()(const T &a, const T &b) struct AgentReduce { //--------------------------------------------------------------------- // Types and constants //--------------------------------------------------------------------- /// The input value type typedef typename std::iterator_traits::value_type InputT; /// The output value type typedef typename If<(Equals::value_type, void>::VALUE), // OutputT = (if output iterator's value type is void) ? typename std::iterator_traits::value_type, // ... then the input iterator's value type, typename std::iterator_traits::value_type>::Type OutputT; // ... else the output iterator's value type /// Vector type of InputT for data movement typedef typename CubVector::Type VectorT; /// Input iterator wrapper type (for applying cache modifier) typedef typename If::VALUE, CacheModifiedInputIterator, // Wrap the native input pointer with CacheModifiedInputIterator InputIteratorT>::Type // Directly use the supplied input iterator type WrappedInputIteratorT; /// Constants enum { BLOCK_THREADS = AgentReducePolicy::BLOCK_THREADS, ITEMS_PER_THREAD = AgentReducePolicy::ITEMS_PER_THREAD, VECTOR_LOAD_LENGTH = CUB_MIN(ITEMS_PER_THREAD, AgentReducePolicy::VECTOR_LOAD_LENGTH), TILE_ITEMS = BLOCK_THREADS * ITEMS_PER_THREAD, // Can vectorize according to the policy if the input iterator is a native pointer to a primitive type ATTEMPT_VECTORIZATION = (VECTOR_LOAD_LENGTH > 1) && (ITEMS_PER_THREAD % VECTOR_LOAD_LENGTH == 0) && (IsPointer::VALUE) && Traits::PRIMITIVE, }; static const CacheLoadModifier LOAD_MODIFIER = AgentReducePolicy::LOAD_MODIFIER; static const BlockReduceAlgorithm BLOCK_ALGORITHM = AgentReducePolicy::BLOCK_ALGORITHM; /// Parameterized BlockReduce primitive typedef BlockReduce BlockReduceT; /// Shared memory type required by this thread block struct _TempStorage { typename BlockReduceT::TempStorage reduce; }; /// Alias wrapper allowing storage to be unioned struct TempStorage : Uninitialized<_TempStorage> {}; //--------------------------------------------------------------------- // Per-thread fields //--------------------------------------------------------------------- _TempStorage& temp_storage; ///< Reference to temp_storage InputIteratorT d_in; ///< Input data to reduce WrappedInputIteratorT d_wrapped_in; ///< Wrapped input data to reduce ReductionOp reduction_op; ///< Binary reduction operator //--------------------------------------------------------------------- // Utility //--------------------------------------------------------------------- // Whether or not the input is aligned with the vector type (specialized for types we can vectorize) template static __device__ __forceinline__ bool IsAligned( Iterator d_in, Int2Type /*can_vectorize*/) { return (size_t(d_in) & (sizeof(VectorT) - 1)) == 0; } // Whether or not the input is aligned with the vector type (specialized for types we cannot vectorize) template static __device__ __forceinline__ bool IsAligned( Iterator /*d_in*/, Int2Type /*can_vectorize*/) { return false; } //--------------------------------------------------------------------- // Constructor //--------------------------------------------------------------------- /** * Constructor */ __device__ __forceinline__ AgentReduce( TempStorage& temp_storage, ///< Reference to temp_storage InputIteratorT d_in, ///< Input data to reduce ReductionOp reduction_op) ///< Binary reduction operator : temp_storage(temp_storage.Alias()), d_in(d_in), d_wrapped_in(d_in), reduction_op(reduction_op) {} //--------------------------------------------------------------------- // Tile consumption //--------------------------------------------------------------------- /** * Consume a full tile of input (non-vectorized) */ template __device__ __forceinline__ void ConsumeTile( OutputT &thread_aggregate, OffsetT block_offset, ///< The offset the tile to consume int /*valid_items*/, ///< The number of valid items in the tile Int2Type /*is_full_tile*/, ///< Whether or not this is a full tile Int2Type /*can_vectorize*/) ///< Whether or not we can vectorize loads { OutputT items[ITEMS_PER_THREAD]; // Load items in striped fashion LoadDirectStriped(threadIdx.x, d_wrapped_in + block_offset, items); // Reduce items within each thread stripe thread_aggregate = (IS_FIRST_TILE) ? internal::ThreadReduce(items, reduction_op) : internal::ThreadReduce(items, reduction_op, thread_aggregate); } /** * Consume a full tile of input (vectorized) */ template __device__ __forceinline__ void ConsumeTile( OutputT &thread_aggregate, OffsetT block_offset, ///< The offset the tile to consume int /*valid_items*/, ///< The number of valid items in the tile Int2Type /*is_full_tile*/, ///< Whether or not this is a full tile Int2Type /*can_vectorize*/) ///< Whether or not we can vectorize loads { // Alias items as an array of VectorT and load it in striped fashion enum { WORDS = ITEMS_PER_THREAD / VECTOR_LOAD_LENGTH }; // Fabricate a vectorized input iterator InputT *d_in_unqualified = const_cast(d_in) + block_offset + (threadIdx.x * VECTOR_LOAD_LENGTH); CacheModifiedInputIterator d_vec_in( reinterpret_cast(d_in_unqualified)); // Load items as vector items InputT input_items[ITEMS_PER_THREAD]; VectorT *vec_items = reinterpret_cast(input_items); #pragma unroll for (int i = 0; i < WORDS; ++i) vec_items[i] = d_vec_in[BLOCK_THREADS * i]; // Convert from input type to output type OutputT items[ITEMS_PER_THREAD]; #pragma unroll for (int i = 0; i < ITEMS_PER_THREAD; ++i) items[i] = input_items[i]; // Reduce items within each thread stripe thread_aggregate = (IS_FIRST_TILE) ? internal::ThreadReduce(items, reduction_op) : internal::ThreadReduce(items, reduction_op, thread_aggregate); } /** * Consume a partial tile of input */ template __device__ __forceinline__ void ConsumeTile( OutputT &thread_aggregate, OffsetT block_offset, ///< The offset the tile to consume int valid_items, ///< The number of valid items in the tile Int2Type /*is_full_tile*/, ///< Whether or not this is a full tile Int2Type /*can_vectorize*/) ///< Whether or not we can vectorize loads { // Partial tile int thread_offset = threadIdx.x; // Read first item if ((IS_FIRST_TILE) && (thread_offset < valid_items)) { thread_aggregate = d_wrapped_in[block_offset + thread_offset]; thread_offset += BLOCK_THREADS; } // Continue reading items (block-striped) while (thread_offset < valid_items) { OutputT item = d_wrapped_in[block_offset + thread_offset]; thread_aggregate = reduction_op(thread_aggregate, item); thread_offset += BLOCK_THREADS; } } //--------------------------------------------------------------- // Consume a contiguous segment of tiles //--------------------------------------------------------------------- /** * \brief Reduce a contiguous segment of input tiles */ template __device__ __forceinline__ OutputT ConsumeRange( GridEvenShare &even_share, ///< GridEvenShare descriptor Int2Type can_vectorize) ///< Whether or not we can vectorize loads { OutputT thread_aggregate; if (even_share.block_offset + TILE_ITEMS > even_share.block_end) { // First tile isn't full (not all threads have valid items) int valid_items = even_share.block_end - even_share.block_offset; ConsumeTile(thread_aggregate, even_share.block_offset, valid_items, Int2Type(), can_vectorize); return BlockReduceT(temp_storage.reduce).Reduce(thread_aggregate, reduction_op, valid_items); } // At least one full block ConsumeTile(thread_aggregate, even_share.block_offset, TILE_ITEMS, Int2Type(), can_vectorize); even_share.block_offset += even_share.block_stride; // Consume subsequent full tiles of input while (even_share.block_offset + TILE_ITEMS <= even_share.block_end) { ConsumeTile(thread_aggregate, even_share.block_offset, TILE_ITEMS, Int2Type(), can_vectorize); even_share.block_offset += even_share.block_stride; } // Consume a partially-full tile if (even_share.block_offset < even_share.block_end) { int valid_items = even_share.block_end - even_share.block_offset; ConsumeTile(thread_aggregate, even_share.block_offset, valid_items, Int2Type(), can_vectorize); } // Compute block-wide reduction (all threads have valid items) return BlockReduceT(temp_storage.reduce).Reduce(thread_aggregate, reduction_op); } /** * \brief Reduce a contiguous segment of input tiles */ __device__ __forceinline__ OutputT ConsumeRange( OffsetT block_offset, ///< [in] Threadblock begin offset (inclusive) OffsetT block_end) ///< [in] Threadblock end offset (exclusive) { GridEvenShare even_share; even_share.template BlockInit(block_offset, block_end); return (IsAligned(d_in + block_offset, Int2Type())) ? ConsumeRange(even_share, Int2Type()) : ConsumeRange(even_share, Int2Type()); } /** * Reduce a contiguous segment of input tiles */ __device__ __forceinline__ OutputT ConsumeTiles( GridEvenShare &even_share) ///< [in] GridEvenShare descriptor { // Initialize GRID_MAPPING_STRIP_MINE even-share descriptor for this thread block even_share.template BlockInit(); return (IsAligned(d_in, Int2Type())) ? ConsumeRange(even_share, Int2Type()) : ConsumeRange(even_share, Int2Type()); } }; } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/acc/cuda/cub/agent/agent_reduce_by_key.cuh000066400000000000000000000605231411340063500236010ustar00rootroot00000000000000/****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * cub::AgentReduceByKey implements a stateful abstraction of CUDA thread blocks for participating in device-wide reduce-value-by-key. */ #pragma once #include #include "single_pass_scan_operators.cuh" #include "../block/block_load.cuh" #include "../block/block_store.cuh" #include "../block/block_scan.cuh" #include "../block/block_discontinuity.cuh" #include "../iterator/cache_modified_input_iterator.cuh" #include "../iterator/constant_input_iterator.cuh" #include "../util_namespace.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /****************************************************************************** * Tuning policy types ******************************************************************************/ /** * Parameterizable tuning policy type for AgentReduceByKey */ template < int _BLOCK_THREADS, ///< Threads per thread block int _ITEMS_PER_THREAD, ///< Items per thread (per tile of input) BlockLoadAlgorithm _LOAD_ALGORITHM, ///< The BlockLoad algorithm to use CacheLoadModifier _LOAD_MODIFIER, ///< Cache load modifier for reading input elements BlockScanAlgorithm _SCAN_ALGORITHM> ///< The BlockScan algorithm to use struct AgentReduceByKeyPolicy { enum { BLOCK_THREADS = _BLOCK_THREADS, ///< Threads per thread block ITEMS_PER_THREAD = _ITEMS_PER_THREAD, ///< Items per thread (per tile of input) }; static const BlockLoadAlgorithm LOAD_ALGORITHM = _LOAD_ALGORITHM; ///< The BlockLoad algorithm to use static const CacheLoadModifier LOAD_MODIFIER = _LOAD_MODIFIER; ///< Cache load modifier for reading input elements static const BlockScanAlgorithm SCAN_ALGORITHM = _SCAN_ALGORITHM; ///< The BlockScan algorithm to use }; /****************************************************************************** * Thread block abstractions ******************************************************************************/ /** * \brief AgentReduceByKey implements a stateful abstraction of CUDA thread blocks for participating in device-wide reduce-value-by-key */ template < typename AgentReduceByKeyPolicyT, ///< Parameterized AgentReduceByKeyPolicy tuning policy type typename KeysInputIteratorT, ///< Random-access input iterator type for keys typename UniqueOutputIteratorT, ///< Random-access output iterator type for keys typename ValuesInputIteratorT, ///< Random-access input iterator type for values typename AggregatesOutputIteratorT, ///< Random-access output iterator type for values typename NumRunsOutputIteratorT, ///< Output iterator type for recording number of items selected typename EqualityOpT, ///< KeyT equality operator type typename ReductionOpT, ///< ValueT reduction operator type typename OffsetT> ///< Signed integer type for global offsets struct AgentReduceByKey { //--------------------------------------------------------------------- // Types and constants //--------------------------------------------------------------------- // The input keys type typedef typename std::iterator_traits::value_type KeyInputT; // The output keys type typedef typename If<(Equals::value_type, void>::VALUE), // KeyOutputT = (if output iterator's value type is void) ? typename std::iterator_traits::value_type, // ... then the input iterator's value type, typename std::iterator_traits::value_type>::Type KeyOutputT; // ... else the output iterator's value type // The input values type typedef typename std::iterator_traits::value_type ValueInputT; // The output values type typedef typename If<(Equals::value_type, void>::VALUE), // ValueOutputT = (if output iterator's value type is void) ? typename std::iterator_traits::value_type, // ... then the input iterator's value type, typename std::iterator_traits::value_type>::Type ValueOutputT; // ... else the output iterator's value type // Tuple type for scanning (pairs accumulated segment-value with segment-index) typedef KeyValuePair OffsetValuePairT; // Tuple type for pairing keys and values typedef KeyValuePair KeyValuePairT; // Tile status descriptor interface type typedef ReduceByKeyScanTileState ScanTileStateT; // Guarded inequality functor template struct GuardedInequalityWrapper { _EqualityOpT op; ///< Wrapped equality operator int num_remaining; ///< Items remaining /// Constructor __host__ __device__ __forceinline__ GuardedInequalityWrapper(_EqualityOpT op, int num_remaining) : op(op), num_remaining(num_remaining) {} /// Boolean inequality operator, returns (a != b) template __host__ __device__ __forceinline__ bool operator()(const T &a, const T &b, int idx) const { if (idx < num_remaining) return !op(a, b); // In bounds // Return true if first out-of-bounds item, false otherwise return (idx == num_remaining); } }; // Constants enum { BLOCK_THREADS = AgentReduceByKeyPolicyT::BLOCK_THREADS, ITEMS_PER_THREAD = AgentReduceByKeyPolicyT::ITEMS_PER_THREAD, TILE_ITEMS = BLOCK_THREADS * ITEMS_PER_THREAD, TWO_PHASE_SCATTER = (ITEMS_PER_THREAD > 1), // Whether or not the scan operation has a zero-valued identity value (true if we're performing addition on a primitive type) HAS_IDENTITY_ZERO = (Equals::VALUE) && (Traits::PRIMITIVE), }; // Cache-modified Input iterator wrapper type (for applying cache modifier) for keys typedef typename If::VALUE, CacheModifiedInputIterator, // Wrap the native input pointer with CacheModifiedValuesInputIterator KeysInputIteratorT>::Type // Directly use the supplied input iterator type WrappedKeysInputIteratorT; // Cache-modified Input iterator wrapper type (for applying cache modifier) for values typedef typename If::VALUE, CacheModifiedInputIterator, // Wrap the native input pointer with CacheModifiedValuesInputIterator ValuesInputIteratorT>::Type // Directly use the supplied input iterator type WrappedValuesInputIteratorT; // Cache-modified Input iterator wrapper type (for applying cache modifier) for fixup values typedef typename If::VALUE, CacheModifiedInputIterator, // Wrap the native input pointer with CacheModifiedValuesInputIterator AggregatesOutputIteratorT>::Type // Directly use the supplied input iterator type WrappedFixupInputIteratorT; // Reduce-value-by-segment scan operator typedef ReduceBySegmentOp ReduceBySegmentOpT; // Parameterized BlockLoad type for keys typedef BlockLoad< KeyOutputT, BLOCK_THREADS, ITEMS_PER_THREAD, AgentReduceByKeyPolicyT::LOAD_ALGORITHM> BlockLoadKeysT; // Parameterized BlockLoad type for values typedef BlockLoad< ValueOutputT, BLOCK_THREADS, ITEMS_PER_THREAD, AgentReduceByKeyPolicyT::LOAD_ALGORITHM> BlockLoadValuesT; // Parameterized BlockDiscontinuity type for keys typedef BlockDiscontinuity< KeyOutputT, BLOCK_THREADS> BlockDiscontinuityKeys; // Parameterized BlockScan type typedef BlockScan< OffsetValuePairT, BLOCK_THREADS, AgentReduceByKeyPolicyT::SCAN_ALGORITHM> BlockScanT; // Callback type for obtaining tile prefix during block scan typedef TilePrefixCallbackOp< OffsetValuePairT, ReduceBySegmentOpT, ScanTileStateT> TilePrefixCallbackOpT; // Key and value exchange types typedef KeyOutputT KeyExchangeT[TILE_ITEMS + 1]; typedef ValueOutputT ValueExchangeT[TILE_ITEMS + 1]; // Shared memory type for this thread block union _TempStorage { struct { typename BlockScanT::TempStorage scan; // Smem needed for tile scanning typename TilePrefixCallbackOpT::TempStorage prefix; // Smem needed for cooperative prefix callback typename BlockDiscontinuityKeys::TempStorage discontinuity; // Smem needed for discontinuity detection }; // Smem needed for loading keys typename BlockLoadKeysT::TempStorage load_keys; // Smem needed for loading values typename BlockLoadValuesT::TempStorage load_values; // Smem needed for compacting key value pairs(allows non POD items in this union) Uninitialized raw_exchange; }; // Alias wrapper allowing storage to be unioned struct TempStorage : Uninitialized<_TempStorage> {}; //--------------------------------------------------------------------- // Per-thread fields //--------------------------------------------------------------------- _TempStorage& temp_storage; ///< Reference to temp_storage WrappedKeysInputIteratorT d_keys_in; ///< Input keys UniqueOutputIteratorT d_unique_out; ///< Unique output keys WrappedValuesInputIteratorT d_values_in; ///< Input values AggregatesOutputIteratorT d_aggregates_out; ///< Output value aggregates NumRunsOutputIteratorT d_num_runs_out; ///< Output pointer for total number of segments identified EqualityOpT equality_op; ///< KeyT equality operator ReductionOpT reduction_op; ///< Reduction operator ReduceBySegmentOpT scan_op; ///< Reduce-by-segment scan operator //--------------------------------------------------------------------- // Constructor //--------------------------------------------------------------------- // Constructor __device__ __forceinline__ AgentReduceByKey( TempStorage& temp_storage, ///< Reference to temp_storage KeysInputIteratorT d_keys_in, ///< Input keys UniqueOutputIteratorT d_unique_out, ///< Unique output keys ValuesInputIteratorT d_values_in, ///< Input values AggregatesOutputIteratorT d_aggregates_out, ///< Output value aggregates NumRunsOutputIteratorT d_num_runs_out, ///< Output pointer for total number of segments identified EqualityOpT equality_op, ///< KeyT equality operator ReductionOpT reduction_op) ///< ValueT reduction operator : temp_storage(temp_storage.Alias()), d_keys_in(d_keys_in), d_unique_out(d_unique_out), d_values_in(d_values_in), d_aggregates_out(d_aggregates_out), d_num_runs_out(d_num_runs_out), equality_op(equality_op), reduction_op(reduction_op), scan_op(reduction_op) {} //--------------------------------------------------------------------- // Scatter utility methods //--------------------------------------------------------------------- /** * Directly scatter flagged items to output offsets */ __device__ __forceinline__ void ScatterDirect( KeyValuePairT (&scatter_items)[ITEMS_PER_THREAD], OffsetT (&segment_flags)[ITEMS_PER_THREAD], OffsetT (&segment_indices)[ITEMS_PER_THREAD]) { // Scatter flagged keys and values #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) { if (segment_flags[ITEM]) { d_unique_out[segment_indices[ITEM]] = scatter_items[ITEM].key; d_aggregates_out[segment_indices[ITEM]] = scatter_items[ITEM].value; } } } /** * 2-phase scatter flagged items to output offsets * * The exclusive scan causes each head flag to be paired with the previous * value aggregate: the scatter offsets must be decremented for value aggregates */ __device__ __forceinline__ void ScatterTwoPhase( KeyValuePairT (&scatter_items)[ITEMS_PER_THREAD], OffsetT (&segment_flags)[ITEMS_PER_THREAD], OffsetT (&segment_indices)[ITEMS_PER_THREAD], OffsetT num_tile_segments, OffsetT num_tile_segments_prefix) { CTA_SYNC(); // Compact and scatter pairs #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) { if (segment_flags[ITEM]) { temp_storage.raw_exchange.Alias()[segment_indices[ITEM] - num_tile_segments_prefix] = scatter_items[ITEM]; } } CTA_SYNC(); for (int item = threadIdx.x; item < num_tile_segments; item += BLOCK_THREADS) { KeyValuePairT pair = temp_storage.raw_exchange.Alias()[item]; d_unique_out[num_tile_segments_prefix + item] = pair.key; d_aggregates_out[num_tile_segments_prefix + item] = pair.value; } } /** * Scatter flagged items */ __device__ __forceinline__ void Scatter( KeyValuePairT (&scatter_items)[ITEMS_PER_THREAD], OffsetT (&segment_flags)[ITEMS_PER_THREAD], OffsetT (&segment_indices)[ITEMS_PER_THREAD], OffsetT num_tile_segments, OffsetT num_tile_segments_prefix) { // Do a one-phase scatter if (a) two-phase is disabled or (b) the average number of selected items per thread is less than one if (TWO_PHASE_SCATTER && (num_tile_segments > BLOCK_THREADS)) { ScatterTwoPhase( scatter_items, segment_flags, segment_indices, num_tile_segments, num_tile_segments_prefix); } else { ScatterDirect( scatter_items, segment_flags, segment_indices); } } //--------------------------------------------------------------------- // Cooperatively scan a device-wide sequence of tiles with other CTAs //--------------------------------------------------------------------- /** * Process a tile of input (dynamic chained scan) */ template ///< Whether the current tile is the last tile __device__ __forceinline__ void ConsumeTile( OffsetT num_remaining, ///< Number of global input items remaining (including this tile) int tile_idx, ///< Tile index OffsetT tile_offset, ///< Tile offset ScanTileStateT& tile_state) ///< Global tile state descriptor { KeyOutputT keys[ITEMS_PER_THREAD]; // Tile keys KeyOutputT prev_keys[ITEMS_PER_THREAD]; // Tile keys shuffled up ValueOutputT values[ITEMS_PER_THREAD]; // Tile values OffsetT head_flags[ITEMS_PER_THREAD]; // Segment head flags OffsetT segment_indices[ITEMS_PER_THREAD]; // Segment indices OffsetValuePairT scan_items[ITEMS_PER_THREAD]; // Zipped values and segment flags|indices KeyValuePairT scatter_items[ITEMS_PER_THREAD]; // Zipped key value pairs for scattering // Load keys if (IS_LAST_TILE) BlockLoadKeysT(temp_storage.load_keys).Load(d_keys_in + tile_offset, keys, num_remaining); else BlockLoadKeysT(temp_storage.load_keys).Load(d_keys_in + tile_offset, keys); // Load tile predecessor key in first thread KeyOutputT tile_predecessor; if (threadIdx.x == 0) { tile_predecessor = (tile_idx == 0) ? keys[0] : // First tile gets repeat of first item (thus first item will not be flagged as a head) d_keys_in[tile_offset - 1]; // Subsequent tiles get last key from previous tile } CTA_SYNC(); // Load values if (IS_LAST_TILE) BlockLoadValuesT(temp_storage.load_values).Load(d_values_in + tile_offset, values, num_remaining); else BlockLoadValuesT(temp_storage.load_values).Load(d_values_in + tile_offset, values); CTA_SYNC(); // Initialize head-flags and shuffle up the previous keys if (IS_LAST_TILE) { // Use custom flag operator to additionally flag the first out-of-bounds item GuardedInequalityWrapper flag_op(equality_op, num_remaining); BlockDiscontinuityKeys(temp_storage.discontinuity).FlagHeads( head_flags, keys, prev_keys, flag_op, tile_predecessor); } else { InequalityWrapper flag_op(equality_op); BlockDiscontinuityKeys(temp_storage.discontinuity).FlagHeads( head_flags, keys, prev_keys, flag_op, tile_predecessor); } // Zip values and head flags #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) { scan_items[ITEM].value = values[ITEM]; scan_items[ITEM].key = head_flags[ITEM]; } // Perform exclusive tile scan OffsetValuePairT block_aggregate; // Inclusive block-wide scan aggregate OffsetT num_segments_prefix; // Number of segments prior to this tile ValueOutputT total_aggregate; // The tile prefix folded with block_aggregate if (tile_idx == 0) { // Scan first tile BlockScanT(temp_storage.scan).ExclusiveScan(scan_items, scan_items, scan_op, block_aggregate); num_segments_prefix = 0; total_aggregate = block_aggregate.value; // Update tile status if there are successor tiles if ((!IS_LAST_TILE) && (threadIdx.x == 0)) tile_state.SetInclusive(0, block_aggregate); } else { // Scan non-first tile TilePrefixCallbackOpT prefix_op(tile_state, temp_storage.prefix, scan_op, tile_idx); BlockScanT(temp_storage.scan).ExclusiveScan(scan_items, scan_items, scan_op, prefix_op); block_aggregate = prefix_op.GetBlockAggregate(); num_segments_prefix = prefix_op.GetExclusivePrefix().key; total_aggregate = reduction_op( prefix_op.GetExclusivePrefix().value, block_aggregate.value); } // Rezip scatter items and segment indices #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) { scatter_items[ITEM].key = prev_keys[ITEM]; scatter_items[ITEM].value = scan_items[ITEM].value; segment_indices[ITEM] = scan_items[ITEM].key; } // At this point, each flagged segment head has: // - The key for the previous segment // - The reduced value from the previous segment // - The segment index for the reduced value // Scatter flagged keys and values OffsetT num_tile_segments = block_aggregate.key; Scatter(scatter_items, head_flags, segment_indices, num_tile_segments, num_segments_prefix); // Last thread in last tile will output final count (and last pair, if necessary) if ((IS_LAST_TILE) && (threadIdx.x == BLOCK_THREADS - 1)) { OffsetT num_segments = num_segments_prefix + num_tile_segments; // If the last tile is a whole tile, output the final_value if (num_remaining == TILE_ITEMS) { d_unique_out[num_segments] = keys[ITEMS_PER_THREAD - 1]; d_aggregates_out[num_segments] = total_aggregate; num_segments++; } // Output the total number of items selected *d_num_runs_out = num_segments; } } /** * Scan tiles of items as part of a dynamic chained scan */ __device__ __forceinline__ void ConsumeRange( int num_items, ///< Total number of input items ScanTileStateT& tile_state, ///< Global tile state descriptor int start_tile) ///< The starting tile for the current grid { // Blocks are launched in increasing order, so just assign one tile per block int tile_idx = start_tile + blockIdx.x; // Current tile index OffsetT tile_offset = OffsetT(TILE_ITEMS) * tile_idx; // Global offset for the current tile OffsetT num_remaining = num_items - tile_offset; // Remaining items (including this tile) if (num_remaining > TILE_ITEMS) { // Not last tile ConsumeTile(num_remaining, tile_idx, tile_offset, tile_state); } else if (num_remaining > 0) { // Last tile ConsumeTile(num_remaining, tile_idx, tile_offset, tile_state); } } }; } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/acc/cuda/cub/agent/agent_rle.cuh000066400000000000000000001055171411340063500215550ustar00rootroot00000000000000/****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * cub::AgentRle implements a stateful abstraction of CUDA thread blocks for participating in device-wide run-length-encode. */ #pragma once #include #include "single_pass_scan_operators.cuh" #include "../block/block_load.cuh" #include "../block/block_store.cuh" #include "../block/block_scan.cuh" #include "../block/block_exchange.cuh" #include "../block/block_discontinuity.cuh" #include "../grid/grid_queue.cuh" #include "../iterator/cache_modified_input_iterator.cuh" #include "../iterator/constant_input_iterator.cuh" #include "../util_namespace.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /****************************************************************************** * Tuning policy types ******************************************************************************/ /** * Parameterizable tuning policy type for AgentRle */ template < int _BLOCK_THREADS, ///< Threads per thread block int _ITEMS_PER_THREAD, ///< Items per thread (per tile of input) BlockLoadAlgorithm _LOAD_ALGORITHM, ///< The BlockLoad algorithm to use CacheLoadModifier _LOAD_MODIFIER, ///< Cache load modifier for reading input elements bool _STORE_WARP_TIME_SLICING, ///< Whether or not only one warp's worth of shared memory should be allocated and time-sliced among block-warps during any store-related data transpositions (versus each warp having its own storage) BlockScanAlgorithm _SCAN_ALGORITHM> ///< The BlockScan algorithm to use struct AgentRlePolicy { enum { BLOCK_THREADS = _BLOCK_THREADS, ///< Threads per thread block ITEMS_PER_THREAD = _ITEMS_PER_THREAD, ///< Items per thread (per tile of input) STORE_WARP_TIME_SLICING = _STORE_WARP_TIME_SLICING, ///< Whether or not only one warp's worth of shared memory should be allocated and time-sliced among block-warps during any store-related data transpositions (versus each warp having its own storage) }; static const BlockLoadAlgorithm LOAD_ALGORITHM = _LOAD_ALGORITHM; ///< The BlockLoad algorithm to use static const CacheLoadModifier LOAD_MODIFIER = _LOAD_MODIFIER; ///< Cache load modifier for reading input elements static const BlockScanAlgorithm SCAN_ALGORITHM = _SCAN_ALGORITHM; ///< The BlockScan algorithm to use }; /****************************************************************************** * Thread block abstractions ******************************************************************************/ /** * \brief AgentRle implements a stateful abstraction of CUDA thread blocks for participating in device-wide run-length-encode */ template < typename AgentRlePolicyT, ///< Parameterized AgentRlePolicyT tuning policy type typename InputIteratorT, ///< Random-access input iterator type for data typename OffsetsOutputIteratorT, ///< Random-access output iterator type for offset values typename LengthsOutputIteratorT, ///< Random-access output iterator type for length values typename EqualityOpT, ///< T equality operator type typename OffsetT> ///< Signed integer type for global offsets struct AgentRle { //--------------------------------------------------------------------- // Types and constants //--------------------------------------------------------------------- /// The input value type typedef typename std::iterator_traits::value_type T; /// The lengths output value type typedef typename If<(Equals::value_type, void>::VALUE), // LengthT = (if output iterator's value type is void) ? OffsetT, // ... then the OffsetT type, typename std::iterator_traits::value_type>::Type LengthT; // ... else the output iterator's value type /// Tuple type for scanning (pairs run-length and run-index) typedef KeyValuePair LengthOffsetPair; /// Tile status descriptor interface type typedef ReduceByKeyScanTileState ScanTileStateT; // Constants enum { WARP_THREADS = CUB_WARP_THREADS(PTX_ARCH), BLOCK_THREADS = AgentRlePolicyT::BLOCK_THREADS, ITEMS_PER_THREAD = AgentRlePolicyT::ITEMS_PER_THREAD, WARP_ITEMS = WARP_THREADS * ITEMS_PER_THREAD, TILE_ITEMS = BLOCK_THREADS * ITEMS_PER_THREAD, WARPS = (BLOCK_THREADS + WARP_THREADS - 1) / WARP_THREADS, /// Whether or not to sync after loading data SYNC_AFTER_LOAD = (AgentRlePolicyT::LOAD_ALGORITHM != BLOCK_LOAD_DIRECT), /// Whether or not only one warp's worth of shared memory should be allocated and time-sliced among block-warps during any store-related data transpositions (versus each warp having its own storage) STORE_WARP_TIME_SLICING = AgentRlePolicyT::STORE_WARP_TIME_SLICING, ACTIVE_EXCHANGE_WARPS = (STORE_WARP_TIME_SLICING) ? 1 : WARPS, }; /** * Special operator that signals all out-of-bounds items are not equal to everything else, * forcing both (1) the last item to be tail-flagged and (2) all oob items to be marked * trivial. */ template struct OobInequalityOp { OffsetT num_remaining; EqualityOpT equality_op; __device__ __forceinline__ OobInequalityOp( OffsetT num_remaining, EqualityOpT equality_op) : num_remaining(num_remaining), equality_op(equality_op) {} template __host__ __device__ __forceinline__ bool operator()(T first, T second, Index idx) { if (!LAST_TILE || (idx < num_remaining)) return !equality_op(first, second); else return true; } }; // Cache-modified Input iterator wrapper type (for applying cache modifier) for data typedef typename If::VALUE, CacheModifiedInputIterator, // Wrap the native input pointer with CacheModifiedVLengthnputIterator InputIteratorT>::Type // Directly use the supplied input iterator type WrappedInputIteratorT; // Parameterized BlockLoad type for data typedef BlockLoad< T, AgentRlePolicyT::BLOCK_THREADS, AgentRlePolicyT::ITEMS_PER_THREAD, AgentRlePolicyT::LOAD_ALGORITHM> BlockLoadT; // Parameterized BlockDiscontinuity type for data typedef BlockDiscontinuity BlockDiscontinuityT; // Parameterized WarpScan type typedef WarpScan WarpScanPairs; // Reduce-length-by-run scan operator typedef ReduceBySegmentOp ReduceBySegmentOpT; // Callback type for obtaining tile prefix during block scan typedef TilePrefixCallbackOp< LengthOffsetPair, ReduceBySegmentOpT, ScanTileStateT> TilePrefixCallbackOpT; // Warp exchange types typedef WarpExchange WarpExchangePairs; typedef typename If::Type WarpExchangePairsStorage; typedef WarpExchange WarpExchangeOffsets; typedef WarpExchange WarpExchangeLengths; typedef LengthOffsetPair WarpAggregates[WARPS]; // Shared memory type for this thread block struct _TempStorage { // Aliasable storage layout union Aliasable { struct { typename BlockDiscontinuityT::TempStorage discontinuity; // Smem needed for discontinuity detection typename WarpScanPairs::TempStorage warp_scan[WARPS]; // Smem needed for warp-synchronous scans Uninitialized warp_aggregates; // Smem needed for sharing warp-wide aggregates typename TilePrefixCallbackOpT::TempStorage prefix; // Smem needed for cooperative prefix callback }; // Smem needed for input loading typename BlockLoadT::TempStorage load; // Aliasable layout needed for two-phase scatter union ScatterAliasable { unsigned long long align; WarpExchangePairsStorage exchange_pairs[ACTIVE_EXCHANGE_WARPS]; typename WarpExchangeOffsets::TempStorage exchange_offsets[ACTIVE_EXCHANGE_WARPS]; typename WarpExchangeLengths::TempStorage exchange_lengths[ACTIVE_EXCHANGE_WARPS]; } scatter_aliasable; } aliasable; OffsetT tile_idx; // Shared tile index LengthOffsetPair tile_inclusive; // Inclusive tile prefix LengthOffsetPair tile_exclusive; // Exclusive tile prefix }; // Alias wrapper allowing storage to be unioned struct TempStorage : Uninitialized<_TempStorage> {}; //--------------------------------------------------------------------- // Per-thread fields //--------------------------------------------------------------------- _TempStorage& temp_storage; ///< Reference to temp_storage WrappedInputIteratorT d_in; ///< Pointer to input sequence of data items OffsetsOutputIteratorT d_offsets_out; ///< Input run offsets LengthsOutputIteratorT d_lengths_out; ///< Output run lengths EqualityOpT equality_op; ///< T equality operator ReduceBySegmentOpT scan_op; ///< Reduce-length-by-flag scan operator OffsetT num_items; ///< Total number of input items //--------------------------------------------------------------------- // Constructor //--------------------------------------------------------------------- // Constructor __device__ __forceinline__ AgentRle( TempStorage &temp_storage, ///< [in] Reference to temp_storage InputIteratorT d_in, ///< [in] Pointer to input sequence of data items OffsetsOutputIteratorT d_offsets_out, ///< [out] Pointer to output sequence of run offsets LengthsOutputIteratorT d_lengths_out, ///< [out] Pointer to output sequence of run lengths EqualityOpT equality_op, ///< [in] T equality operator OffsetT num_items) ///< [in] Total number of input items : temp_storage(temp_storage.Alias()), d_in(d_in), d_offsets_out(d_offsets_out), d_lengths_out(d_lengths_out), equality_op(equality_op), scan_op(cub::Sum()), num_items(num_items) {} //--------------------------------------------------------------------- // Utility methods for initializing the selections //--------------------------------------------------------------------- template __device__ __forceinline__ void InitializeSelections( OffsetT tile_offset, OffsetT num_remaining, T (&items)[ITEMS_PER_THREAD], LengthOffsetPair (&lengths_and_num_runs)[ITEMS_PER_THREAD]) { bool head_flags[ITEMS_PER_THREAD]; bool tail_flags[ITEMS_PER_THREAD]; OobInequalityOp inequality_op(num_remaining, equality_op); if (FIRST_TILE && LAST_TILE) { // First-and-last-tile always head-flags the first item and tail-flags the last item BlockDiscontinuityT(temp_storage.aliasable.discontinuity).FlagHeadsAndTails( head_flags, tail_flags, items, inequality_op); } else if (FIRST_TILE) { // First-tile always head-flags the first item // Get the first item from the next tile T tile_successor_item; if (threadIdx.x == BLOCK_THREADS - 1) tile_successor_item = d_in[tile_offset + TILE_ITEMS]; BlockDiscontinuityT(temp_storage.aliasable.discontinuity).FlagHeadsAndTails( head_flags, tail_flags, tile_successor_item, items, inequality_op); } else if (LAST_TILE) { // Last-tile always flags the last item // Get the last item from the previous tile T tile_predecessor_item; if (threadIdx.x == 0) tile_predecessor_item = d_in[tile_offset - 1]; BlockDiscontinuityT(temp_storage.aliasable.discontinuity).FlagHeadsAndTails( head_flags, tile_predecessor_item, tail_flags, items, inequality_op); } else { // Get the first item from the next tile T tile_successor_item; if (threadIdx.x == BLOCK_THREADS - 1) tile_successor_item = d_in[tile_offset + TILE_ITEMS]; // Get the last item from the previous tile T tile_predecessor_item; if (threadIdx.x == 0) tile_predecessor_item = d_in[tile_offset - 1]; BlockDiscontinuityT(temp_storage.aliasable.discontinuity).FlagHeadsAndTails( head_flags, tile_predecessor_item, tail_flags, tile_successor_item, items, inequality_op); } // Zip counts and runs #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) { lengths_and_num_runs[ITEM].key = head_flags[ITEM] && (!tail_flags[ITEM]); lengths_and_num_runs[ITEM].value = ((!head_flags[ITEM]) || (!tail_flags[ITEM])); } } //--------------------------------------------------------------------- // Scan utility methods //--------------------------------------------------------------------- /** * Scan of allocations */ __device__ __forceinline__ void WarpScanAllocations( LengthOffsetPair &tile_aggregate, LengthOffsetPair &warp_aggregate, LengthOffsetPair &warp_exclusive_in_tile, LengthOffsetPair &thread_exclusive_in_warp, LengthOffsetPair (&lengths_and_num_runs)[ITEMS_PER_THREAD]) { // Perform warpscans unsigned int warp_id = ((WARPS == 1) ? 0 : threadIdx.x / WARP_THREADS); int lane_id = LaneId(); LengthOffsetPair identity; identity.key = 0; identity.value = 0; LengthOffsetPair thread_inclusive; LengthOffsetPair thread_aggregate = internal::ThreadReduce(lengths_and_num_runs, scan_op); WarpScanPairs(temp_storage.aliasable.warp_scan[warp_id]).Scan( thread_aggregate, thread_inclusive, thread_exclusive_in_warp, identity, scan_op); // Last lane in each warp shares its warp-aggregate if (lane_id == WARP_THREADS - 1) temp_storage.aliasable.warp_aggregates.Alias()[warp_id] = thread_inclusive; CTA_SYNC(); // Accumulate total selected and the warp-wide prefix warp_exclusive_in_tile = identity; warp_aggregate = temp_storage.aliasable.warp_aggregates.Alias()[warp_id]; tile_aggregate = temp_storage.aliasable.warp_aggregates.Alias()[0]; #pragma unroll for (int WARP = 1; WARP < WARPS; ++WARP) { if (warp_id == WARP) warp_exclusive_in_tile = tile_aggregate; tile_aggregate = scan_op(tile_aggregate, temp_storage.aliasable.warp_aggregates.Alias()[WARP]); } } //--------------------------------------------------------------------- // Utility methods for scattering selections //--------------------------------------------------------------------- /** * Two-phase scatter, specialized for warp time-slicing */ template __device__ __forceinline__ void ScatterTwoPhase( OffsetT tile_num_runs_exclusive_in_global, OffsetT warp_num_runs_aggregate, OffsetT warp_num_runs_exclusive_in_tile, OffsetT (&thread_num_runs_exclusive_in_warp)[ITEMS_PER_THREAD], LengthOffsetPair (&lengths_and_offsets)[ITEMS_PER_THREAD], Int2Type is_warp_time_slice) { unsigned int warp_id = ((WARPS == 1) ? 0 : threadIdx.x / WARP_THREADS); int lane_id = LaneId(); // Locally compact items within the warp (first warp) if (warp_id == 0) { WarpExchangePairs(temp_storage.aliasable.scatter_aliasable.exchange_pairs[0]).ScatterToStriped( lengths_and_offsets, thread_num_runs_exclusive_in_warp); } // Locally compact items within the warp (remaining warps) #pragma unroll for (int SLICE = 1; SLICE < WARPS; ++SLICE) { CTA_SYNC(); if (warp_id == SLICE) { WarpExchangePairs(temp_storage.aliasable.scatter_aliasable.exchange_pairs[0]).ScatterToStriped( lengths_and_offsets, thread_num_runs_exclusive_in_warp); } } // Global scatter #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { if ((ITEM * WARP_THREADS) < warp_num_runs_aggregate - lane_id) { OffsetT item_offset = tile_num_runs_exclusive_in_global + warp_num_runs_exclusive_in_tile + (ITEM * WARP_THREADS) + lane_id; // Scatter offset d_offsets_out[item_offset] = lengths_and_offsets[ITEM].key; // Scatter length if not the first (global) length if ((!FIRST_TILE) || (ITEM != 0) || (threadIdx.x > 0)) { d_lengths_out[item_offset - 1] = lengths_and_offsets[ITEM].value; } } } } /** * Two-phase scatter */ template __device__ __forceinline__ void ScatterTwoPhase( OffsetT tile_num_runs_exclusive_in_global, OffsetT warp_num_runs_aggregate, OffsetT warp_num_runs_exclusive_in_tile, OffsetT (&thread_num_runs_exclusive_in_warp)[ITEMS_PER_THREAD], LengthOffsetPair (&lengths_and_offsets)[ITEMS_PER_THREAD], Int2Type is_warp_time_slice) { unsigned int warp_id = ((WARPS == 1) ? 0 : threadIdx.x / WARP_THREADS); int lane_id = LaneId(); // Unzip OffsetT run_offsets[ITEMS_PER_THREAD]; LengthT run_lengths[ITEMS_PER_THREAD]; #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { run_offsets[ITEM] = lengths_and_offsets[ITEM].key; run_lengths[ITEM] = lengths_and_offsets[ITEM].value; } WarpExchangeOffsets(temp_storage.aliasable.scatter_aliasable.exchange_offsets[warp_id]).ScatterToStriped( run_offsets, thread_num_runs_exclusive_in_warp); WARP_SYNC(0xffffffff); WarpExchangeLengths(temp_storage.aliasable.scatter_aliasable.exchange_lengths[warp_id]).ScatterToStriped( run_lengths, thread_num_runs_exclusive_in_warp); // Global scatter #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { if ((ITEM * WARP_THREADS) + lane_id < warp_num_runs_aggregate) { OffsetT item_offset = tile_num_runs_exclusive_in_global + warp_num_runs_exclusive_in_tile + (ITEM * WARP_THREADS) + lane_id; // Scatter offset d_offsets_out[item_offset] = run_offsets[ITEM]; // Scatter length if not the first (global) length if ((!FIRST_TILE) || (ITEM != 0) || (threadIdx.x > 0)) { d_lengths_out[item_offset - 1] = run_lengths[ITEM]; } } } } /** * Direct scatter */ template __device__ __forceinline__ void ScatterDirect( OffsetT tile_num_runs_exclusive_in_global, OffsetT warp_num_runs_aggregate, OffsetT warp_num_runs_exclusive_in_tile, OffsetT (&thread_num_runs_exclusive_in_warp)[ITEMS_PER_THREAD], LengthOffsetPair (&lengths_and_offsets)[ITEMS_PER_THREAD]) { #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) { if (thread_num_runs_exclusive_in_warp[ITEM] < warp_num_runs_aggregate) { OffsetT item_offset = tile_num_runs_exclusive_in_global + warp_num_runs_exclusive_in_tile + thread_num_runs_exclusive_in_warp[ITEM]; // Scatter offset d_offsets_out[item_offset] = lengths_and_offsets[ITEM].key; // Scatter length if not the first (global) length if (item_offset >= 1) { d_lengths_out[item_offset - 1] = lengths_and_offsets[ITEM].value; } } } } /** * Scatter */ template __device__ __forceinline__ void Scatter( OffsetT tile_num_runs_aggregate, OffsetT tile_num_runs_exclusive_in_global, OffsetT warp_num_runs_aggregate, OffsetT warp_num_runs_exclusive_in_tile, OffsetT (&thread_num_runs_exclusive_in_warp)[ITEMS_PER_THREAD], LengthOffsetPair (&lengths_and_offsets)[ITEMS_PER_THREAD]) { if ((ITEMS_PER_THREAD == 1) || (tile_num_runs_aggregate < BLOCK_THREADS)) { // Direct scatter if the warp has any items if (warp_num_runs_aggregate) { ScatterDirect( tile_num_runs_exclusive_in_global, warp_num_runs_aggregate, warp_num_runs_exclusive_in_tile, thread_num_runs_exclusive_in_warp, lengths_and_offsets); } } else { // Scatter two phase ScatterTwoPhase( tile_num_runs_exclusive_in_global, warp_num_runs_aggregate, warp_num_runs_exclusive_in_tile, thread_num_runs_exclusive_in_warp, lengths_and_offsets, Int2Type()); } } //--------------------------------------------------------------------- // Cooperatively scan a device-wide sequence of tiles with other CTAs //--------------------------------------------------------------------- /** * Process a tile of input (dynamic chained scan) */ template < bool LAST_TILE> __device__ __forceinline__ LengthOffsetPair ConsumeTile( OffsetT num_items, ///< Total number of global input items OffsetT num_remaining, ///< Number of global input items remaining (including this tile) int tile_idx, ///< Tile index OffsetT tile_offset, ///< Tile offset ScanTileStateT &tile_status) ///< Global list of tile status { if (tile_idx == 0) { // First tile // Load items T items[ITEMS_PER_THREAD]; if (LAST_TILE) BlockLoadT(temp_storage.aliasable.load).Load(d_in + tile_offset, items, num_remaining, T()); else BlockLoadT(temp_storage.aliasable.load).Load(d_in + tile_offset, items); if (SYNC_AFTER_LOAD) CTA_SYNC(); // Set flags LengthOffsetPair lengths_and_num_runs[ITEMS_PER_THREAD]; InitializeSelections( tile_offset, num_remaining, items, lengths_and_num_runs); // Exclusive scan of lengths and runs LengthOffsetPair tile_aggregate; LengthOffsetPair warp_aggregate; LengthOffsetPair warp_exclusive_in_tile; LengthOffsetPair thread_exclusive_in_warp; WarpScanAllocations( tile_aggregate, warp_aggregate, warp_exclusive_in_tile, thread_exclusive_in_warp, lengths_and_num_runs); // Update tile status if this is not the last tile if (!LAST_TILE && (threadIdx.x == 0)) tile_status.SetInclusive(0, tile_aggregate); // Update thread_exclusive_in_warp to fold in warp run-length if (thread_exclusive_in_warp.key == 0) thread_exclusive_in_warp.value += warp_exclusive_in_tile.value; LengthOffsetPair lengths_and_offsets[ITEMS_PER_THREAD]; OffsetT thread_num_runs_exclusive_in_warp[ITEMS_PER_THREAD]; LengthOffsetPair lengths_and_num_runs2[ITEMS_PER_THREAD]; // Downsweep scan through lengths_and_num_runs internal::ThreadScanExclusive(lengths_and_num_runs, lengths_and_num_runs2, scan_op, thread_exclusive_in_warp); // Zip #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { lengths_and_offsets[ITEM].value = lengths_and_num_runs2[ITEM].value; lengths_and_offsets[ITEM].key = tile_offset + (threadIdx.x * ITEMS_PER_THREAD) + ITEM; thread_num_runs_exclusive_in_warp[ITEM] = (lengths_and_num_runs[ITEM].key) ? lengths_and_num_runs2[ITEM].key : // keep WARP_THREADS * ITEMS_PER_THREAD; // discard } OffsetT tile_num_runs_aggregate = tile_aggregate.key; OffsetT tile_num_runs_exclusive_in_global = 0; OffsetT warp_num_runs_aggregate = warp_aggregate.key; OffsetT warp_num_runs_exclusive_in_tile = warp_exclusive_in_tile.key; // Scatter Scatter( tile_num_runs_aggregate, tile_num_runs_exclusive_in_global, warp_num_runs_aggregate, warp_num_runs_exclusive_in_tile, thread_num_runs_exclusive_in_warp, lengths_and_offsets); // Return running total (inclusive of this tile) return tile_aggregate; } else { // Not first tile // Load items T items[ITEMS_PER_THREAD]; if (LAST_TILE) BlockLoadT(temp_storage.aliasable.load).Load(d_in + tile_offset, items, num_remaining, T()); else BlockLoadT(temp_storage.aliasable.load).Load(d_in + tile_offset, items); if (SYNC_AFTER_LOAD) CTA_SYNC(); // Set flags LengthOffsetPair lengths_and_num_runs[ITEMS_PER_THREAD]; InitializeSelections( tile_offset, num_remaining, items, lengths_and_num_runs); // Exclusive scan of lengths and runs LengthOffsetPair tile_aggregate; LengthOffsetPair warp_aggregate; LengthOffsetPair warp_exclusive_in_tile; LengthOffsetPair thread_exclusive_in_warp; WarpScanAllocations( tile_aggregate, warp_aggregate, warp_exclusive_in_tile, thread_exclusive_in_warp, lengths_and_num_runs); // First warp computes tile prefix in lane 0 TilePrefixCallbackOpT prefix_op(tile_status, temp_storage.aliasable.prefix, Sum(), tile_idx); unsigned int warp_id = ((WARPS == 1) ? 0 : threadIdx.x / WARP_THREADS); if (warp_id == 0) { prefix_op(tile_aggregate); if (threadIdx.x == 0) temp_storage.tile_exclusive = prefix_op.exclusive_prefix; } CTA_SYNC(); LengthOffsetPair tile_exclusive_in_global = temp_storage.tile_exclusive; // Update thread_exclusive_in_warp to fold in warp and tile run-lengths LengthOffsetPair thread_exclusive = scan_op(tile_exclusive_in_global, warp_exclusive_in_tile); if (thread_exclusive_in_warp.key == 0) thread_exclusive_in_warp.value += thread_exclusive.value; // Downsweep scan through lengths_and_num_runs LengthOffsetPair lengths_and_num_runs2[ITEMS_PER_THREAD]; LengthOffsetPair lengths_and_offsets[ITEMS_PER_THREAD]; OffsetT thread_num_runs_exclusive_in_warp[ITEMS_PER_THREAD]; internal::ThreadScanExclusive(lengths_and_num_runs, lengths_and_num_runs2, scan_op, thread_exclusive_in_warp); // Zip #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { lengths_and_offsets[ITEM].value = lengths_and_num_runs2[ITEM].value; lengths_and_offsets[ITEM].key = tile_offset + (threadIdx.x * ITEMS_PER_THREAD) + ITEM; thread_num_runs_exclusive_in_warp[ITEM] = (lengths_and_num_runs[ITEM].key) ? lengths_and_num_runs2[ITEM].key : // keep WARP_THREADS * ITEMS_PER_THREAD; // discard } OffsetT tile_num_runs_aggregate = tile_aggregate.key; OffsetT tile_num_runs_exclusive_in_global = tile_exclusive_in_global.key; OffsetT warp_num_runs_aggregate = warp_aggregate.key; OffsetT warp_num_runs_exclusive_in_tile = warp_exclusive_in_tile.key; // Scatter Scatter( tile_num_runs_aggregate, tile_num_runs_exclusive_in_global, warp_num_runs_aggregate, warp_num_runs_exclusive_in_tile, thread_num_runs_exclusive_in_warp, lengths_and_offsets); // Return running total (inclusive of this tile) return prefix_op.inclusive_prefix; } } /** * Scan tiles of items as part of a dynamic chained scan */ template ///< Output iterator type for recording number of items selected __device__ __forceinline__ void ConsumeRange( int num_tiles, ///< Total number of input tiles ScanTileStateT& tile_status, ///< Global list of tile status NumRunsIteratorT d_num_runs_out) ///< Output pointer for total number of runs identified { // Blocks are launched in increasing order, so just assign one tile per block int tile_idx = (blockIdx.x * gridDim.y) + blockIdx.y; // Current tile index OffsetT tile_offset = tile_idx * TILE_ITEMS; // Global offset for the current tile OffsetT num_remaining = num_items - tile_offset; // Remaining items (including this tile) if (tile_idx < num_tiles - 1) { // Not the last tile (full) ConsumeTile(num_items, num_remaining, tile_idx, tile_offset, tile_status); } else if (num_remaining > 0) { // The last tile (possibly partially-full) LengthOffsetPair running_total = ConsumeTile(num_items, num_remaining, tile_idx, tile_offset, tile_status); if (threadIdx.x == 0) { // Output the total number of items selected *d_num_runs_out = running_total.key; // The inclusive prefix contains accumulated length reduction for the last run if (running_total.key > 0) d_lengths_out[running_total.key - 1] = running_total.value; } } } }; } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/acc/cuda/cub/agent/agent_scan.cuh000066400000000000000000000444511411340063500217160ustar00rootroot00000000000000/****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * cub::AgentScan implements a stateful abstraction of CUDA thread blocks for participating in device-wide prefix scan . */ #pragma once #include #include "single_pass_scan_operators.cuh" #include "../block/block_load.cuh" #include "../block/block_store.cuh" #include "../block/block_scan.cuh" #include "../grid/grid_queue.cuh" #include "../iterator/cache_modified_input_iterator.cuh" #include "../util_namespace.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /****************************************************************************** * Tuning policy types ******************************************************************************/ /** * Parameterizable tuning policy type for AgentScan */ template < int _BLOCK_THREADS, ///< Threads per thread block int _ITEMS_PER_THREAD, ///< Items per thread (per tile of input) BlockLoadAlgorithm _LOAD_ALGORITHM, ///< The BlockLoad algorithm to use CacheLoadModifier _LOAD_MODIFIER, ///< Cache load modifier for reading input elements BlockStoreAlgorithm _STORE_ALGORITHM, ///< The BlockStore algorithm to use BlockScanAlgorithm _SCAN_ALGORITHM> ///< The BlockScan algorithm to use struct AgentScanPolicy { enum { BLOCK_THREADS = _BLOCK_THREADS, ///< Threads per thread block ITEMS_PER_THREAD = _ITEMS_PER_THREAD, ///< Items per thread (per tile of input) }; static const BlockLoadAlgorithm LOAD_ALGORITHM = _LOAD_ALGORITHM; ///< The BlockLoad algorithm to use static const CacheLoadModifier LOAD_MODIFIER = _LOAD_MODIFIER; ///< Cache load modifier for reading input elements static const BlockStoreAlgorithm STORE_ALGORITHM = _STORE_ALGORITHM; ///< The BlockStore algorithm to use static const BlockScanAlgorithm SCAN_ALGORITHM = _SCAN_ALGORITHM; ///< The BlockScan algorithm to use }; /****************************************************************************** * Thread block abstractions ******************************************************************************/ /** * \brief AgentScan implements a stateful abstraction of CUDA thread blocks for participating in device-wide prefix scan . */ template < typename AgentScanPolicyT, ///< Parameterized AgentScanPolicyT tuning policy type typename InputIteratorT, ///< Random-access input iterator type typename OutputIteratorT, ///< Random-access output iterator type typename ScanOpT, ///< Scan functor type typename InitValueT, ///< The init_value element for ScanOpT type (cub::NullType for inclusive scan) typename OffsetT> ///< Signed integer type for global offsets struct AgentScan { //--------------------------------------------------------------------- // Types and constants //--------------------------------------------------------------------- // The input value type typedef typename std::iterator_traits::value_type InputT; // The output value type typedef typename If<(Equals::value_type, void>::VALUE), // OutputT = (if output iterator's value type is void) ? typename std::iterator_traits::value_type, // ... then the input iterator's value type, typename std::iterator_traits::value_type>::Type OutputT; // ... else the output iterator's value type // Tile status descriptor interface type typedef ScanTileState ScanTileStateT; // Input iterator wrapper type (for applying cache modifier) typedef typename If::VALUE, CacheModifiedInputIterator, // Wrap the native input pointer with CacheModifiedInputIterator InputIteratorT>::Type // Directly use the supplied input iterator type WrappedInputIteratorT; // Constants enum { IS_INCLUSIVE = Equals::VALUE, // Inclusive scan if no init_value type is provided BLOCK_THREADS = AgentScanPolicyT::BLOCK_THREADS, ITEMS_PER_THREAD = AgentScanPolicyT::ITEMS_PER_THREAD, TILE_ITEMS = BLOCK_THREADS * ITEMS_PER_THREAD, }; // Parameterized BlockLoad type typedef BlockLoad< OutputT, AgentScanPolicyT::BLOCK_THREADS, AgentScanPolicyT::ITEMS_PER_THREAD, AgentScanPolicyT::LOAD_ALGORITHM> BlockLoadT; // Parameterized BlockStore type typedef BlockStore< OutputT, AgentScanPolicyT::BLOCK_THREADS, AgentScanPolicyT::ITEMS_PER_THREAD, AgentScanPolicyT::STORE_ALGORITHM> BlockStoreT; // Parameterized BlockScan type typedef BlockScan< OutputT, AgentScanPolicyT::BLOCK_THREADS, AgentScanPolicyT::SCAN_ALGORITHM> BlockScanT; // Callback type for obtaining tile prefix during block scan typedef TilePrefixCallbackOp< OutputT, ScanOpT, ScanTileStateT> TilePrefixCallbackOpT; // Stateful BlockScan prefix callback type for managing a running total while scanning consecutive tiles typedef BlockScanRunningPrefixOp< OutputT, ScanOpT> RunningPrefixCallbackOp; // Shared memory type for this thread block union _TempStorage { typename BlockLoadT::TempStorage load; // Smem needed for tile loading typename BlockStoreT::TempStorage store; // Smem needed for tile storing struct { typename TilePrefixCallbackOpT::TempStorage prefix; // Smem needed for cooperative prefix callback typename BlockScanT::TempStorage scan; // Smem needed for tile scanning }; }; // Alias wrapper allowing storage to be unioned struct TempStorage : Uninitialized<_TempStorage> {}; //--------------------------------------------------------------------- // Per-thread fields //--------------------------------------------------------------------- _TempStorage& temp_storage; ///< Reference to temp_storage WrappedInputIteratorT d_in; ///< Input data OutputIteratorT d_out; ///< Output data ScanOpT scan_op; ///< Binary scan operator InitValueT init_value; ///< The init_value element for ScanOpT //--------------------------------------------------------------------- // Block scan utility methods //--------------------------------------------------------------------- /** * Exclusive scan specialization (first tile) */ __device__ __forceinline__ void ScanTile( OutputT (&items)[ITEMS_PER_THREAD], OutputT init_value, ScanOpT scan_op, OutputT &block_aggregate, Int2Type /*is_inclusive*/) { BlockScanT(temp_storage.scan).ExclusiveScan(items, items, init_value, scan_op, block_aggregate); block_aggregate = scan_op(init_value, block_aggregate); } /** * Inclusive scan specialization (first tile) */ __device__ __forceinline__ void ScanTile( OutputT (&items)[ITEMS_PER_THREAD], InitValueT /*init_value*/, ScanOpT scan_op, OutputT &block_aggregate, Int2Type /*is_inclusive*/) { BlockScanT(temp_storage.scan).InclusiveScan(items, items, scan_op, block_aggregate); } /** * Exclusive scan specialization (subsequent tiles) */ template __device__ __forceinline__ void ScanTile( OutputT (&items)[ITEMS_PER_THREAD], ScanOpT scan_op, PrefixCallback &prefix_op, Int2Type /*is_inclusive*/) { BlockScanT(temp_storage.scan).ExclusiveScan(items, items, scan_op, prefix_op); } /** * Inclusive scan specialization (subsequent tiles) */ template __device__ __forceinline__ void ScanTile( OutputT (&items)[ITEMS_PER_THREAD], ScanOpT scan_op, PrefixCallback &prefix_op, Int2Type /*is_inclusive*/) { BlockScanT(temp_storage.scan).InclusiveScan(items, items, scan_op, prefix_op); } //--------------------------------------------------------------------- // Constructor //--------------------------------------------------------------------- // Constructor __device__ __forceinline__ AgentScan( TempStorage& temp_storage, ///< Reference to temp_storage InputIteratorT d_in, ///< Input data OutputIteratorT d_out, ///< Output data ScanOpT scan_op, ///< Binary scan operator InitValueT init_value) ///< Initial value to seed the exclusive scan : temp_storage(temp_storage.Alias()), d_in(d_in), d_out(d_out), scan_op(scan_op), init_value(init_value) {} //--------------------------------------------------------------------- // Cooperatively scan a device-wide sequence of tiles with other CTAs //--------------------------------------------------------------------- /** * Process a tile of input (dynamic chained scan) */ template ///< Whether the current tile is the last tile __device__ __forceinline__ void ConsumeTile( OffsetT num_remaining, ///< Number of global input items remaining (including this tile) int tile_idx, ///< Tile index OffsetT tile_offset, ///< Tile offset ScanTileStateT& tile_state) ///< Global tile state descriptor { // Load items OutputT items[ITEMS_PER_THREAD]; if (IS_LAST_TILE) BlockLoadT(temp_storage.load).Load(d_in + tile_offset, items, num_remaining); else BlockLoadT(temp_storage.load).Load(d_in + tile_offset, items); CTA_SYNC(); // Perform tile scan if (tile_idx == 0) { // Scan first tile OutputT block_aggregate; ScanTile(items, init_value, scan_op, block_aggregate, Int2Type()); if ((!IS_LAST_TILE) && (threadIdx.x == 0)) tile_state.SetInclusive(0, block_aggregate); } else { // Scan non-first tile TilePrefixCallbackOpT prefix_op(tile_state, temp_storage.prefix, scan_op, tile_idx); ScanTile(items, scan_op, prefix_op, Int2Type()); } CTA_SYNC(); // Store items if (IS_LAST_TILE) BlockStoreT(temp_storage.store).Store(d_out + tile_offset, items, num_remaining); else BlockStoreT(temp_storage.store).Store(d_out + tile_offset, items); } /** * Scan tiles of items as part of a dynamic chained scan */ __device__ __forceinline__ void ConsumeRange( int num_items, ///< Total number of input items ScanTileStateT& tile_state, ///< Global tile state descriptor int start_tile) ///< The starting tile for the current grid { // Blocks are launched in increasing order, so just assign one tile per block int tile_idx = start_tile + blockIdx.x; // Current tile index OffsetT tile_offset = OffsetT(TILE_ITEMS) * tile_idx; // Global offset for the current tile OffsetT num_remaining = num_items - tile_offset; // Remaining items (including this tile) if (num_remaining > TILE_ITEMS) { // Not last tile ConsumeTile(num_remaining, tile_idx, tile_offset, tile_state); } else if (num_remaining > 0) { // Last tile ConsumeTile(num_remaining, tile_idx, tile_offset, tile_state); } } //--------------------------------------------------------------------- // Scan an sequence of consecutive tiles (independent of other thread blocks) //--------------------------------------------------------------------- /** * Process a tile of input */ template < bool IS_FIRST_TILE, bool IS_LAST_TILE> __device__ __forceinline__ void ConsumeTile( OffsetT tile_offset, ///< Tile offset RunningPrefixCallbackOp& prefix_op, ///< Running prefix operator int valid_items = TILE_ITEMS) ///< Number of valid items in the tile { // Load items OutputT items[ITEMS_PER_THREAD]; if (IS_LAST_TILE) BlockLoadT(temp_storage.load).Load(d_in + tile_offset, items, valid_items); else BlockLoadT(temp_storage.load).Load(d_in + tile_offset, items); CTA_SYNC(); // Block scan if (IS_FIRST_TILE) { OutputT block_aggregate; ScanTile(items, init_value, scan_op, block_aggregate, Int2Type()); prefix_op.running_total = block_aggregate; } else { ScanTile(items, scan_op, prefix_op, Int2Type()); } CTA_SYNC(); // Store items if (IS_LAST_TILE) BlockStoreT(temp_storage.store).Store(d_out + tile_offset, items, valid_items); else BlockStoreT(temp_storage.store).Store(d_out + tile_offset, items); } /** * Scan a consecutive share of input tiles */ __device__ __forceinline__ void ConsumeRange( OffsetT range_offset, ///< [in] Threadblock begin offset (inclusive) OffsetT range_end) ///< [in] Threadblock end offset (exclusive) { BlockScanRunningPrefixOp prefix_op(scan_op); if (range_offset + TILE_ITEMS <= range_end) { // Consume first tile of input (full) ConsumeTile(range_offset, prefix_op); range_offset += TILE_ITEMS; // Consume subsequent full tiles of input while (range_offset + TILE_ITEMS <= range_end) { ConsumeTile(range_offset, prefix_op); range_offset += TILE_ITEMS; } // Consume a partially-full tile if (range_offset < range_end) { int valid_items = range_end - range_offset; ConsumeTile(range_offset, prefix_op, valid_items); } } else { // Consume the first tile of input (partially-full) int valid_items = range_end - range_offset; ConsumeTile(range_offset, prefix_op, valid_items); } } /** * Scan a consecutive share of input tiles, seeded with the specified prefix value */ __device__ __forceinline__ void ConsumeRange( OffsetT range_offset, ///< [in] Threadblock begin offset (inclusive) OffsetT range_end, ///< [in] Threadblock end offset (exclusive) OutputT prefix) ///< [in] The prefix to apply to the scan segment { BlockScanRunningPrefixOp prefix_op(prefix, scan_op); // Consume full tiles of input while (range_offset + TILE_ITEMS <= range_end) { ConsumeTile(range_offset, prefix_op); range_offset += TILE_ITEMS; } // Consume a partially-full tile if (range_offset < range_end) { int valid_items = range_end - range_offset; ConsumeTile(range_offset, prefix_op, valid_items); } } }; } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/acc/cuda/cub/agent/agent_segment_fixup.cuh000066400000000000000000000404171411340063500236450ustar00rootroot00000000000000/****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * cub::AgentSegmentFixup implements a stateful abstraction of CUDA thread blocks for participating in device-wide reduce-value-by-key. */ #pragma once #include #include "single_pass_scan_operators.cuh" #include "../block/block_load.cuh" #include "../block/block_store.cuh" #include "../block/block_scan.cuh" #include "../block/block_discontinuity.cuh" #include "../iterator/cache_modified_input_iterator.cuh" #include "../iterator/constant_input_iterator.cuh" #include "../util_namespace.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /****************************************************************************** * Tuning policy types ******************************************************************************/ /** * Parameterizable tuning policy type for AgentSegmentFixup */ template < int _BLOCK_THREADS, ///< Threads per thread block int _ITEMS_PER_THREAD, ///< Items per thread (per tile of input) BlockLoadAlgorithm _LOAD_ALGORITHM, ///< The BlockLoad algorithm to use CacheLoadModifier _LOAD_MODIFIER, ///< Cache load modifier for reading input elements BlockScanAlgorithm _SCAN_ALGORITHM> ///< The BlockScan algorithm to use struct AgentSegmentFixupPolicy { enum { BLOCK_THREADS = _BLOCK_THREADS, ///< Threads per thread block ITEMS_PER_THREAD = _ITEMS_PER_THREAD, ///< Items per thread (per tile of input) }; static const BlockLoadAlgorithm LOAD_ALGORITHM = _LOAD_ALGORITHM; ///< The BlockLoad algorithm to use static const CacheLoadModifier LOAD_MODIFIER = _LOAD_MODIFIER; ///< Cache load modifier for reading input elements static const BlockScanAlgorithm SCAN_ALGORITHM = _SCAN_ALGORITHM; ///< The BlockScan algorithm to use }; /****************************************************************************** * Thread block abstractions ******************************************************************************/ /** * \brief AgentSegmentFixup implements a stateful abstraction of CUDA thread blocks for participating in device-wide reduce-value-by-key */ template < typename AgentSegmentFixupPolicyT, ///< Parameterized AgentSegmentFixupPolicy tuning policy type typename PairsInputIteratorT, ///< Random-access input iterator type for keys typename AggregatesOutputIteratorT, ///< Random-access output iterator type for values typename EqualityOpT, ///< KeyT equality operator type typename ReductionOpT, ///< ValueT reduction operator type typename OffsetT> ///< Signed integer type for global offsets struct AgentSegmentFixup { //--------------------------------------------------------------------- // Types and constants //--------------------------------------------------------------------- // Data type of key-value input iterator typedef typename std::iterator_traits::value_type KeyValuePairT; // Value type typedef typename KeyValuePairT::Value ValueT; // Tile status descriptor interface type typedef ReduceByKeyScanTileState ScanTileStateT; // Constants enum { BLOCK_THREADS = AgentSegmentFixupPolicyT::BLOCK_THREADS, ITEMS_PER_THREAD = AgentSegmentFixupPolicyT::ITEMS_PER_THREAD, TILE_ITEMS = BLOCK_THREADS * ITEMS_PER_THREAD, // Whether or not do fixup using RLE + global atomics USE_ATOMIC_FIXUP = (CUB_PTX_ARCH >= 350) && (Equals::VALUE || Equals::VALUE || Equals::VALUE || Equals::VALUE), // Whether or not the scan operation has a zero-valued identity value (true if we're performing addition on a primitive type) HAS_IDENTITY_ZERO = (Equals::VALUE) && (Traits::PRIMITIVE), }; // Cache-modified Input iterator wrapper type (for applying cache modifier) for keys typedef typename If::VALUE, CacheModifiedInputIterator, // Wrap the native input pointer with CacheModifiedValuesInputIterator PairsInputIteratorT>::Type // Directly use the supplied input iterator type WrappedPairsInputIteratorT; // Cache-modified Input iterator wrapper type (for applying cache modifier) for fixup values typedef typename If::VALUE, CacheModifiedInputIterator, // Wrap the native input pointer with CacheModifiedValuesInputIterator AggregatesOutputIteratorT>::Type // Directly use the supplied input iterator type WrappedFixupInputIteratorT; // Reduce-value-by-segment scan operator typedef ReduceByKeyOp ReduceBySegmentOpT; // Parameterized BlockLoad type for pairs typedef BlockLoad< KeyValuePairT, BLOCK_THREADS, ITEMS_PER_THREAD, AgentSegmentFixupPolicyT::LOAD_ALGORITHM> BlockLoadPairs; // Parameterized BlockScan type typedef BlockScan< KeyValuePairT, BLOCK_THREADS, AgentSegmentFixupPolicyT::SCAN_ALGORITHM> BlockScanT; // Callback type for obtaining tile prefix during block scan typedef TilePrefixCallbackOp< KeyValuePairT, ReduceBySegmentOpT, ScanTileStateT> TilePrefixCallbackOpT; // Shared memory type for this thread block union _TempStorage { struct { typename BlockScanT::TempStorage scan; // Smem needed for tile scanning typename TilePrefixCallbackOpT::TempStorage prefix; // Smem needed for cooperative prefix callback }; // Smem needed for loading keys typename BlockLoadPairs::TempStorage load_pairs; }; // Alias wrapper allowing storage to be unioned struct TempStorage : Uninitialized<_TempStorage> {}; //--------------------------------------------------------------------- // Per-thread fields //--------------------------------------------------------------------- _TempStorage& temp_storage; ///< Reference to temp_storage WrappedPairsInputIteratorT d_pairs_in; ///< Input keys AggregatesOutputIteratorT d_aggregates_out; ///< Output value aggregates WrappedFixupInputIteratorT d_fixup_in; ///< Fixup input values InequalityWrapper inequality_op; ///< KeyT inequality operator ReductionOpT reduction_op; ///< Reduction operator ReduceBySegmentOpT scan_op; ///< Reduce-by-segment scan operator //--------------------------------------------------------------------- // Constructor //--------------------------------------------------------------------- // Constructor __device__ __forceinline__ AgentSegmentFixup( TempStorage& temp_storage, ///< Reference to temp_storage PairsInputIteratorT d_pairs_in, ///< Input keys AggregatesOutputIteratorT d_aggregates_out, ///< Output value aggregates EqualityOpT equality_op, ///< KeyT equality operator ReductionOpT reduction_op) ///< ValueT reduction operator : temp_storage(temp_storage.Alias()), d_pairs_in(d_pairs_in), d_aggregates_out(d_aggregates_out), d_fixup_in(d_aggregates_out), inequality_op(equality_op), reduction_op(reduction_op), scan_op(reduction_op) {} //--------------------------------------------------------------------- // Cooperatively scan a device-wide sequence of tiles with other CTAs //--------------------------------------------------------------------- /** * Process input tile. Specialized for atomic-fixup */ template __device__ __forceinline__ void ConsumeTile( OffsetT num_remaining, ///< Number of global input items remaining (including this tile) int tile_idx, ///< Tile index OffsetT tile_offset, ///< Tile offset ScanTileStateT& tile_state, ///< Global tile state descriptor Int2Type use_atomic_fixup) ///< Marker whether to use atomicAdd (instead of reduce-by-key) { KeyValuePairT pairs[ITEMS_PER_THREAD]; // Load pairs KeyValuePairT oob_pair; oob_pair.key = -1; if (IS_LAST_TILE) BlockLoadPairs(temp_storage.load_pairs).Load(d_pairs_in + tile_offset, pairs, num_remaining, oob_pair); else BlockLoadPairs(temp_storage.load_pairs).Load(d_pairs_in + tile_offset, pairs); // RLE #pragma unroll for (int ITEM = 1; ITEM < ITEMS_PER_THREAD; ++ITEM) { ValueT* d_scatter = d_aggregates_out + pairs[ITEM - 1].key; if (pairs[ITEM].key != pairs[ITEM - 1].key) atomicAdd(d_scatter, pairs[ITEM - 1].value); else pairs[ITEM].value = reduction_op(pairs[ITEM - 1].value, pairs[ITEM].value); } // Flush last item if valid ValueT* d_scatter = d_aggregates_out + pairs[ITEMS_PER_THREAD - 1].key; if ((!IS_LAST_TILE) || (pairs[ITEMS_PER_THREAD - 1].key >= 0)) atomicAdd(d_scatter, pairs[ITEMS_PER_THREAD - 1].value); } /** * Process input tile. Specialized for reduce-by-key fixup */ template __device__ __forceinline__ void ConsumeTile( OffsetT num_remaining, ///< Number of global input items remaining (including this tile) int tile_idx, ///< Tile index OffsetT tile_offset, ///< Tile offset ScanTileStateT& tile_state, ///< Global tile state descriptor Int2Type use_atomic_fixup) ///< Marker whether to use atomicAdd (instead of reduce-by-key) { KeyValuePairT pairs[ITEMS_PER_THREAD]; KeyValuePairT scatter_pairs[ITEMS_PER_THREAD]; // Load pairs KeyValuePairT oob_pair; oob_pair.key = -1; if (IS_LAST_TILE) BlockLoadPairs(temp_storage.load_pairs).Load(d_pairs_in + tile_offset, pairs, num_remaining, oob_pair); else BlockLoadPairs(temp_storage.load_pairs).Load(d_pairs_in + tile_offset, pairs); CTA_SYNC(); KeyValuePairT tile_aggregate; if (tile_idx == 0) { // Exclusive scan of values and segment_flags BlockScanT(temp_storage.scan).ExclusiveScan(pairs, scatter_pairs, scan_op, tile_aggregate); // Update tile status if this is not the last tile if (threadIdx.x == 0) { // Set first segment id to not trigger a flush (invalid from exclusive scan) scatter_pairs[0].key = pairs[0].key; if (!IS_LAST_TILE) tile_state.SetInclusive(0, tile_aggregate); } } else { // Exclusive scan of values and segment_flags TilePrefixCallbackOpT prefix_op(tile_state, temp_storage.prefix, scan_op, tile_idx); BlockScanT(temp_storage.scan).ExclusiveScan(pairs, scatter_pairs, scan_op, prefix_op); tile_aggregate = prefix_op.GetBlockAggregate(); } // Scatter updated values #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) { if (scatter_pairs[ITEM].key != pairs[ITEM].key) { // Update the value at the key location ValueT value = d_fixup_in[scatter_pairs[ITEM].key]; value = reduction_op(value, scatter_pairs[ITEM].value); d_aggregates_out[scatter_pairs[ITEM].key] = value; } } // Finalize the last item if (IS_LAST_TILE) { // Last thread will output final count and last item, if necessary if (threadIdx.x == BLOCK_THREADS - 1) { // If the last tile is a whole tile, the inclusive prefix contains accumulated value reduction for the last segment if (num_remaining == TILE_ITEMS) { // Update the value at the key location OffsetT last_key = pairs[ITEMS_PER_THREAD - 1].key; d_aggregates_out[last_key] = reduction_op(tile_aggregate.value, d_fixup_in[last_key]); } } } } /** * Scan tiles of items as part of a dynamic chained scan */ __device__ __forceinline__ void ConsumeRange( int num_items, ///< Total number of input items int num_tiles, ///< Total number of input tiles ScanTileStateT& tile_state) ///< Global tile state descriptor { // Blocks are launched in increasing order, so just assign one tile per block int tile_idx = (blockIdx.x * gridDim.y) + blockIdx.y; // Current tile index OffsetT tile_offset = tile_idx * TILE_ITEMS; // Global offset for the current tile OffsetT num_remaining = num_items - tile_offset; // Remaining items (including this tile) if (num_remaining > TILE_ITEMS) { // Not the last tile (full) ConsumeTile(num_remaining, tile_idx, tile_offset, tile_state, Int2Type()); } else if (num_remaining > 0) { // The last tile (possibly partially-full) ConsumeTile(num_remaining, tile_idx, tile_offset, tile_state, Int2Type()); } } }; } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/acc/cuda/cub/agent/agent_select_if.cuh000066400000000000000000000716321411340063500227300ustar00rootroot00000000000000/****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * cub::AgentSelectIf implements a stateful abstraction of CUDA thread blocks for participating in device-wide select. */ #pragma once #include #include "single_pass_scan_operators.cuh" #include "../block/block_load.cuh" #include "../block/block_store.cuh" #include "../block/block_scan.cuh" #include "../block/block_exchange.cuh" #include "../block/block_discontinuity.cuh" #include "../grid/grid_queue.cuh" #include "../iterator/cache_modified_input_iterator.cuh" #include "../util_namespace.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /****************************************************************************** * Tuning policy types ******************************************************************************/ /** * Parameterizable tuning policy type for AgentSelectIf */ template < int _BLOCK_THREADS, ///< Threads per thread block int _ITEMS_PER_THREAD, ///< Items per thread (per tile of input) BlockLoadAlgorithm _LOAD_ALGORITHM, ///< The BlockLoad algorithm to use CacheLoadModifier _LOAD_MODIFIER, ///< Cache load modifier for reading input elements BlockScanAlgorithm _SCAN_ALGORITHM> ///< The BlockScan algorithm to use struct AgentSelectIfPolicy { enum { BLOCK_THREADS = _BLOCK_THREADS, ///< Threads per thread block ITEMS_PER_THREAD = _ITEMS_PER_THREAD, ///< Items per thread (per tile of input) }; static const BlockLoadAlgorithm LOAD_ALGORITHM = _LOAD_ALGORITHM; ///< The BlockLoad algorithm to use static const CacheLoadModifier LOAD_MODIFIER = _LOAD_MODIFIER; ///< Cache load modifier for reading input elements static const BlockScanAlgorithm SCAN_ALGORITHM = _SCAN_ALGORITHM; ///< The BlockScan algorithm to use }; /****************************************************************************** * Thread block abstractions ******************************************************************************/ /** * \brief AgentSelectIf implements a stateful abstraction of CUDA thread blocks for participating in device-wide selection * * Performs functor-based selection if SelectOpT functor type != NullType * Otherwise performs flag-based selection if FlagsInputIterator's value type != NullType * Otherwise performs discontinuity selection (keep unique) */ template < typename AgentSelectIfPolicyT, ///< Parameterized AgentSelectIfPolicy tuning policy type typename InputIteratorT, ///< Random-access input iterator type for selection items typename FlagsInputIteratorT, ///< Random-access input iterator type for selections (NullType* if a selection functor or discontinuity flagging is to be used for selection) typename SelectedOutputIteratorT, ///< Random-access input iterator type for selection_flags items typename SelectOpT, ///< Selection operator type (NullType if selections or discontinuity flagging is to be used for selection) typename EqualityOpT, ///< Equality operator type (NullType if selection functor or selections is to be used for selection) typename OffsetT, ///< Signed integer type for global offsets bool KEEP_REJECTS> ///< Whether or not we push rejected items to the back of the output struct AgentSelectIf { //--------------------------------------------------------------------- // Types and constants //--------------------------------------------------------------------- // The input value type typedef typename std::iterator_traits::value_type InputT; // The output value type typedef typename If<(Equals::value_type, void>::VALUE), // OutputT = (if output iterator's value type is void) ? typename std::iterator_traits::value_type, // ... then the input iterator's value type, typename std::iterator_traits::value_type>::Type OutputT; // ... else the output iterator's value type // The flag value type typedef typename std::iterator_traits::value_type FlagT; // Tile status descriptor interface type typedef ScanTileState ScanTileStateT; // Constants enum { USE_SELECT_OP, USE_SELECT_FLAGS, USE_DISCONTINUITY, BLOCK_THREADS = AgentSelectIfPolicyT::BLOCK_THREADS, ITEMS_PER_THREAD = AgentSelectIfPolicyT::ITEMS_PER_THREAD, TILE_ITEMS = BLOCK_THREADS * ITEMS_PER_THREAD, TWO_PHASE_SCATTER = (ITEMS_PER_THREAD > 1), SELECT_METHOD = (!Equals::VALUE) ? USE_SELECT_OP : (!Equals::VALUE) ? USE_SELECT_FLAGS : USE_DISCONTINUITY }; // Cache-modified Input iterator wrapper type (for applying cache modifier) for items typedef typename If::VALUE, CacheModifiedInputIterator, // Wrap the native input pointer with CacheModifiedValuesInputIterator InputIteratorT>::Type // Directly use the supplied input iterator type WrappedInputIteratorT; // Cache-modified Input iterator wrapper type (for applying cache modifier) for values typedef typename If::VALUE, CacheModifiedInputIterator, // Wrap the native input pointer with CacheModifiedValuesInputIterator FlagsInputIteratorT>::Type // Directly use the supplied input iterator type WrappedFlagsInputIteratorT; // Parameterized BlockLoad type for input data typedef BlockLoad< OutputT, BLOCK_THREADS, ITEMS_PER_THREAD, AgentSelectIfPolicyT::LOAD_ALGORITHM> BlockLoadT; // Parameterized BlockLoad type for flags typedef BlockLoad< FlagT, BLOCK_THREADS, ITEMS_PER_THREAD, AgentSelectIfPolicyT::LOAD_ALGORITHM> BlockLoadFlags; // Parameterized BlockDiscontinuity type for items typedef BlockDiscontinuity< OutputT, BLOCK_THREADS> BlockDiscontinuityT; // Parameterized BlockScan type typedef BlockScan< OffsetT, BLOCK_THREADS, AgentSelectIfPolicyT::SCAN_ALGORITHM> BlockScanT; // Callback type for obtaining tile prefix during block scan typedef TilePrefixCallbackOp< OffsetT, cub::Sum, ScanTileStateT> TilePrefixCallbackOpT; // Item exchange type typedef OutputT ItemExchangeT[TILE_ITEMS]; // Shared memory type for this thread block union _TempStorage { struct { typename BlockScanT::TempStorage scan; // Smem needed for tile scanning typename TilePrefixCallbackOpT::TempStorage prefix; // Smem needed for cooperative prefix callback typename BlockDiscontinuityT::TempStorage discontinuity; // Smem needed for discontinuity detection }; // Smem needed for loading items typename BlockLoadT::TempStorage load_items; // Smem needed for loading values typename BlockLoadFlags::TempStorage load_flags; // Smem needed for compacting items (allows non POD items in this union) Uninitialized raw_exchange; }; // Alias wrapper allowing storage to be unioned struct TempStorage : Uninitialized<_TempStorage> {}; //--------------------------------------------------------------------- // Per-thread fields //--------------------------------------------------------------------- _TempStorage& temp_storage; ///< Reference to temp_storage WrappedInputIteratorT d_in; ///< Input items SelectedOutputIteratorT d_selected_out; ///< Unique output items WrappedFlagsInputIteratorT d_flags_in; ///< Input selection flags (if applicable) InequalityWrapper inequality_op; ///< T inequality operator SelectOpT select_op; ///< Selection operator OffsetT num_items; ///< Total number of input items //--------------------------------------------------------------------- // Constructor //--------------------------------------------------------------------- // Constructor __device__ __forceinline__ AgentSelectIf( TempStorage &temp_storage, ///< Reference to temp_storage InputIteratorT d_in, ///< Input data FlagsInputIteratorT d_flags_in, ///< Input selection flags (if applicable) SelectedOutputIteratorT d_selected_out, ///< Output data SelectOpT select_op, ///< Selection operator EqualityOpT equality_op, ///< Equality operator OffsetT num_items) ///< Total number of input items : temp_storage(temp_storage.Alias()), d_in(d_in), d_flags_in(d_flags_in), d_selected_out(d_selected_out), select_op(select_op), inequality_op(equality_op), num_items(num_items) {} //--------------------------------------------------------------------- // Utility methods for initializing the selections //--------------------------------------------------------------------- /** * Initialize selections (specialized for selection operator) */ template __device__ __forceinline__ void InitializeSelections( OffsetT /*tile_offset*/, OffsetT num_tile_items, OutputT (&items)[ITEMS_PER_THREAD], OffsetT (&selection_flags)[ITEMS_PER_THREAD], Int2Type /*select_method*/) { #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) { // Out-of-bounds items are selection_flags selection_flags[ITEM] = 1; if (!IS_LAST_TILE || (OffsetT(threadIdx.x * ITEMS_PER_THREAD) + ITEM < num_tile_items)) selection_flags[ITEM] = select_op(items[ITEM]); } } /** * Initialize selections (specialized for valid flags) */ template __device__ __forceinline__ void InitializeSelections( OffsetT tile_offset, OffsetT num_tile_items, OutputT (&/*items*/)[ITEMS_PER_THREAD], OffsetT (&selection_flags)[ITEMS_PER_THREAD], Int2Type /*select_method*/) { CTA_SYNC(); FlagT flags[ITEMS_PER_THREAD]; if (IS_LAST_TILE) { // Out-of-bounds items are selection_flags BlockLoadFlags(temp_storage.load_flags).Load(d_flags_in + tile_offset, flags, num_tile_items, 1); } else { BlockLoadFlags(temp_storage.load_flags).Load(d_flags_in + tile_offset, flags); } // Convert flag type to selection_flags type #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) { selection_flags[ITEM] = flags[ITEM]; } } /** * Initialize selections (specialized for discontinuity detection) */ template __device__ __forceinline__ void InitializeSelections( OffsetT tile_offset, OffsetT num_tile_items, OutputT (&items)[ITEMS_PER_THREAD], OffsetT (&selection_flags)[ITEMS_PER_THREAD], Int2Type /*select_method*/) { if (IS_FIRST_TILE) { CTA_SYNC(); // Set head selection_flags. First tile sets the first flag for the first item BlockDiscontinuityT(temp_storage.discontinuity).FlagHeads(selection_flags, items, inequality_op); } else { OutputT tile_predecessor; if (threadIdx.x == 0) tile_predecessor = d_in[tile_offset - 1]; CTA_SYNC(); BlockDiscontinuityT(temp_storage.discontinuity).FlagHeads(selection_flags, items, inequality_op, tile_predecessor); } // Set selection flags for out-of-bounds items #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) { // Set selection_flags for out-of-bounds items if ((IS_LAST_TILE) && (OffsetT(threadIdx.x * ITEMS_PER_THREAD) + ITEM >= num_tile_items)) selection_flags[ITEM] = 1; } } //--------------------------------------------------------------------- // Scatter utility methods //--------------------------------------------------------------------- /** * Scatter flagged items to output offsets (specialized for direct scattering) */ template __device__ __forceinline__ void ScatterDirect( OutputT (&items)[ITEMS_PER_THREAD], OffsetT (&selection_flags)[ITEMS_PER_THREAD], OffsetT (&selection_indices)[ITEMS_PER_THREAD], OffsetT num_selections) { // Scatter flagged items #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) { if (selection_flags[ITEM]) { if ((!IS_LAST_TILE) || selection_indices[ITEM] < num_selections) { d_selected_out[selection_indices[ITEM]] = items[ITEM]; } } } } /** * Scatter flagged items to output offsets (specialized for two-phase scattering) */ template __device__ __forceinline__ void ScatterTwoPhase( OutputT (&items)[ITEMS_PER_THREAD], OffsetT (&selection_flags)[ITEMS_PER_THREAD], OffsetT (&selection_indices)[ITEMS_PER_THREAD], int /*num_tile_items*/, ///< Number of valid items in this tile int num_tile_selections, ///< Number of selections in this tile OffsetT num_selections_prefix, ///< Total number of selections prior to this tile OffsetT /*num_rejected_prefix*/, ///< Total number of rejections prior to this tile Int2Type /*is_keep_rejects*/) ///< Marker type indicating whether to keep rejected items in the second partition { CTA_SYNC(); // Compact and scatter items #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) { int local_scatter_offset = selection_indices[ITEM] - num_selections_prefix; if (selection_flags[ITEM]) { temp_storage.raw_exchange.Alias()[local_scatter_offset] = items[ITEM]; } } CTA_SYNC(); for (int item = threadIdx.x; item < num_tile_selections; item += BLOCK_THREADS) { d_selected_out[num_selections_prefix + item] = temp_storage.raw_exchange.Alias()[item]; } } /** * Scatter flagged items to output offsets (specialized for two-phase scattering) */ template __device__ __forceinline__ void ScatterTwoPhase( OutputT (&items)[ITEMS_PER_THREAD], OffsetT (&selection_flags)[ITEMS_PER_THREAD], OffsetT (&selection_indices)[ITEMS_PER_THREAD], int num_tile_items, ///< Number of valid items in this tile int num_tile_selections, ///< Number of selections in this tile OffsetT num_selections_prefix, ///< Total number of selections prior to this tile OffsetT num_rejected_prefix, ///< Total number of rejections prior to this tile Int2Type /*is_keep_rejects*/) ///< Marker type indicating whether to keep rejected items in the second partition { CTA_SYNC(); int tile_num_rejections = num_tile_items - num_tile_selections; // Scatter items to shared memory (rejections first) #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) { int item_idx = (threadIdx.x * ITEMS_PER_THREAD) + ITEM; int local_selection_idx = selection_indices[ITEM] - num_selections_prefix; int local_rejection_idx = item_idx - local_selection_idx; int local_scatter_offset = (selection_flags[ITEM]) ? tile_num_rejections + local_selection_idx : local_rejection_idx; temp_storage.raw_exchange.Alias()[local_scatter_offset] = items[ITEM]; } CTA_SYNC(); // Gather items from shared memory and scatter to global #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) { int item_idx = (ITEM * BLOCK_THREADS) + threadIdx.x; int rejection_idx = item_idx; int selection_idx = item_idx - tile_num_rejections; OffsetT scatter_offset = (item_idx < tile_num_rejections) ? num_items - num_rejected_prefix - rejection_idx - 1 : num_selections_prefix + selection_idx; OutputT item = temp_storage.raw_exchange.Alias()[item_idx]; if (!IS_LAST_TILE || (item_idx < num_tile_items)) { d_selected_out[scatter_offset] = item; } } } /** * Scatter flagged items */ template __device__ __forceinline__ void Scatter( OutputT (&items)[ITEMS_PER_THREAD], OffsetT (&selection_flags)[ITEMS_PER_THREAD], OffsetT (&selection_indices)[ITEMS_PER_THREAD], int num_tile_items, ///< Number of valid items in this tile int num_tile_selections, ///< Number of selections in this tile OffsetT num_selections_prefix, ///< Total number of selections prior to this tile OffsetT num_rejected_prefix, ///< Total number of rejections prior to this tile OffsetT num_selections) ///< Total number of selections including this tile { // Do a two-phase scatter if (a) keeping both partitions or (b) two-phase is enabled and the average number of selection_flags items per thread is greater than one if (KEEP_REJECTS || (TWO_PHASE_SCATTER && (num_tile_selections > BLOCK_THREADS))) { ScatterTwoPhase( items, selection_flags, selection_indices, num_tile_items, num_tile_selections, num_selections_prefix, num_rejected_prefix, Int2Type()); } else { ScatterDirect( items, selection_flags, selection_indices, num_selections); } } //--------------------------------------------------------------------- // Cooperatively scan a device-wide sequence of tiles with other CTAs //--------------------------------------------------------------------- /** * Process first tile of input (dynamic chained scan). Returns the running count of selections (including this tile) */ template __device__ __forceinline__ OffsetT ConsumeFirstTile( int num_tile_items, ///< Number of input items comprising this tile OffsetT tile_offset, ///< Tile offset ScanTileStateT& tile_state) ///< Global tile state descriptor { OutputT items[ITEMS_PER_THREAD]; OffsetT selection_flags[ITEMS_PER_THREAD]; OffsetT selection_indices[ITEMS_PER_THREAD]; // Load items if (IS_LAST_TILE) BlockLoadT(temp_storage.load_items).Load(d_in + tile_offset, items, num_tile_items); else BlockLoadT(temp_storage.load_items).Load(d_in + tile_offset, items); // Initialize selection_flags InitializeSelections( tile_offset, num_tile_items, items, selection_flags, Int2Type()); CTA_SYNC(); // Exclusive scan of selection_flags OffsetT num_tile_selections; BlockScanT(temp_storage.scan).ExclusiveSum(selection_flags, selection_indices, num_tile_selections); if (threadIdx.x == 0) { // Update tile status if this is not the last tile if (!IS_LAST_TILE) tile_state.SetInclusive(0, num_tile_selections); } // Discount any out-of-bounds selections if (IS_LAST_TILE) num_tile_selections -= (TILE_ITEMS - num_tile_items); // Scatter flagged items Scatter( items, selection_flags, selection_indices, num_tile_items, num_tile_selections, 0, 0, num_tile_selections); return num_tile_selections; } /** * Process subsequent tile of input (dynamic chained scan). Returns the running count of selections (including this tile) */ template __device__ __forceinline__ OffsetT ConsumeSubsequentTile( int num_tile_items, ///< Number of input items comprising this tile int tile_idx, ///< Tile index OffsetT tile_offset, ///< Tile offset ScanTileStateT& tile_state) ///< Global tile state descriptor { OutputT items[ITEMS_PER_THREAD]; OffsetT selection_flags[ITEMS_PER_THREAD]; OffsetT selection_indices[ITEMS_PER_THREAD]; // Load items if (IS_LAST_TILE) BlockLoadT(temp_storage.load_items).Load(d_in + tile_offset, items, num_tile_items); else BlockLoadT(temp_storage.load_items).Load(d_in + tile_offset, items); // Initialize selection_flags InitializeSelections( tile_offset, num_tile_items, items, selection_flags, Int2Type()); CTA_SYNC(); // Exclusive scan of values and selection_flags TilePrefixCallbackOpT prefix_op(tile_state, temp_storage.prefix, cub::Sum(), tile_idx); BlockScanT(temp_storage.scan).ExclusiveSum(selection_flags, selection_indices, prefix_op); OffsetT num_tile_selections = prefix_op.GetBlockAggregate(); OffsetT num_selections = prefix_op.GetInclusivePrefix(); OffsetT num_selections_prefix = prefix_op.GetExclusivePrefix(); OffsetT num_rejected_prefix = (tile_idx * TILE_ITEMS) - num_selections_prefix; // Discount any out-of-bounds selections if (IS_LAST_TILE) { int num_discount = TILE_ITEMS - num_tile_items; num_selections -= num_discount; num_tile_selections -= num_discount; } // Scatter flagged items Scatter( items, selection_flags, selection_indices, num_tile_items, num_tile_selections, num_selections_prefix, num_rejected_prefix, num_selections); return num_selections; } /** * Process a tile of input */ template __device__ __forceinline__ OffsetT ConsumeTile( int num_tile_items, ///< Number of input items comprising this tile int tile_idx, ///< Tile index OffsetT tile_offset, ///< Tile offset ScanTileStateT& tile_state) ///< Global tile state descriptor { OffsetT num_selections; if (tile_idx == 0) { num_selections = ConsumeFirstTile(num_tile_items, tile_offset, tile_state); } else { num_selections = ConsumeSubsequentTile(num_tile_items, tile_idx, tile_offset, tile_state); } return num_selections; } /** * Scan tiles of items as part of a dynamic chained scan */ template ///< Output iterator type for recording number of items selection_flags __device__ __forceinline__ void ConsumeRange( int num_tiles, ///< Total number of input tiles ScanTileStateT& tile_state, ///< Global tile state descriptor NumSelectedIteratorT d_num_selected_out) ///< Output total number selection_flags { // Blocks are launched in increasing order, so just assign one tile per block int tile_idx = (blockIdx.x * gridDim.y) + blockIdx.y; // Current tile index OffsetT tile_offset = tile_idx * TILE_ITEMS; // Global offset for the current tile if (tile_idx < num_tiles - 1) { // Not the last tile (full) ConsumeTile(TILE_ITEMS, tile_idx, tile_offset, tile_state); } else { // The last tile (possibly partially-full) OffsetT num_remaining = num_items - tile_offset; OffsetT num_selections = ConsumeTile(num_remaining, tile_idx, tile_offset, tile_state); if (threadIdx.x == 0) { // Output the total number of items selection_flags *d_num_selected_out = num_selections; } } } }; } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/acc/cuda/cub/agent/agent_spmv_csrt.cuh000066400000000000000000000626351411340063500230160ustar00rootroot00000000000000/****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2016, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * cub::AgentSpmv implements a stateful abstraction of CUDA thread blocks for participating in device-wide SpMV. */ #pragma once #include #include "../util_type.cuh" #include "../block/block_reduce.cuh" #include "../block/block_scan.cuh" #include "../block/block_exchange.cuh" #include "../thread/thread_search.cuh" #include "../thread/thread_operators.cuh" #include "../iterator/cache_modified_input_iterator.cuh" #include "../iterator/counting_input_iterator.cuh" #include "../iterator/tex_ref_input_iterator.cuh" #include "../util_namespace.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /****************************************************************************** * Tuning policy ******************************************************************************/ /** * Parameterizable tuning policy type for AgentSpmv */ template < int _BLOCK_THREADS, ///< Threads per thread block int _ITEMS_PER_THREAD, ///< Items per thread (per tile of input) CacheLoadModifier _ROW_OFFSETS_SEARCH_LOAD_MODIFIER, ///< Cache load modifier for reading CSR row-offsets during search CacheLoadModifier _ROW_OFFSETS_LOAD_MODIFIER, ///< Cache load modifier for reading CSR row-offsets CacheLoadModifier _COLUMN_INDICES_LOAD_MODIFIER, ///< Cache load modifier for reading CSR column-indices CacheLoadModifier _VALUES_LOAD_MODIFIER, ///< Cache load modifier for reading CSR values CacheLoadModifier _VECTOR_VALUES_LOAD_MODIFIER, ///< Cache load modifier for reading vector values bool _DIRECT_LOAD_NONZEROS, ///< Whether to load nonzeros directly from global during sequential merging (vs. pre-staged through shared memory) BlockScanAlgorithm _SCAN_ALGORITHM> ///< The BlockScan algorithm to use struct AgentSpmvPolicy { enum { BLOCK_THREADS = _BLOCK_THREADS, ///< Threads per thread block ITEMS_PER_THREAD = _ITEMS_PER_THREAD, ///< Items per thread (per tile of input) DIRECT_LOAD_NONZEROS = _DIRECT_LOAD_NONZEROS, ///< Whether to load nonzeros directly from global during sequential merging (pre-staged through shared memory) }; static const CacheLoadModifier ROW_OFFSETS_SEARCH_LOAD_MODIFIER = _ROW_OFFSETS_SEARCH_LOAD_MODIFIER; ///< Cache load modifier for reading CSR row-offsets static const CacheLoadModifier ROW_OFFSETS_LOAD_MODIFIER = _ROW_OFFSETS_LOAD_MODIFIER; ///< Cache load modifier for reading CSR row-offsets static const CacheLoadModifier COLUMN_INDICES_LOAD_MODIFIER = _COLUMN_INDICES_LOAD_MODIFIER; ///< Cache load modifier for reading CSR column-indices static const CacheLoadModifier VALUES_LOAD_MODIFIER = _VALUES_LOAD_MODIFIER; ///< Cache load modifier for reading CSR values static const CacheLoadModifier VECTOR_VALUES_LOAD_MODIFIER = _VECTOR_VALUES_LOAD_MODIFIER; ///< Cache load modifier for reading vector values static const BlockScanAlgorithm SCAN_ALGORITHM = _SCAN_ALGORITHM; ///< The BlockScan algorithm to use }; /****************************************************************************** * Thread block abstractions ******************************************************************************/ template < typename ValueT, ///< Matrix and vector value type typename OffsetT> ///< Signed integer type for sequence offsets struct SpmvParams { ValueT* d_values; ///< Pointer to the array of \p num_nonzeros values of the corresponding nonzero elements of matrix A. OffsetT* d_row_end_offsets; ///< Pointer to the array of \p m offsets demarcating the end of every row in \p d_column_indices and \p d_values OffsetT* d_column_indices; ///< Pointer to the array of \p num_nonzeros column-indices of the corresponding nonzero elements of matrix A. (Indices are zero-valued.) ValueT* d_vector_x; ///< Pointer to the array of \p num_cols values corresponding to the dense input vector x ValueT* d_vector_y; ///< Pointer to the array of \p num_rows values corresponding to the dense output vector y int num_rows; ///< Number of rows of matrix A. int num_cols; ///< Number of columns of matrix A. int num_nonzeros; ///< Number of nonzero elements of matrix A. ValueT alpha; ///< Alpha multiplicand ValueT beta; ///< Beta addend-multiplicand TexRefInputIterator t_vector_x; }; /** * \brief AgentSpmv implements a stateful abstraction of CUDA thread blocks for participating in device-wide SpMV. */ template < typename AgentSpmvPolicyT, ///< Parameterized AgentSpmvPolicy tuning policy type typename ValueT, ///< Matrix and vector value type typename OffsetT, ///< Signed integer type for sequence offsets bool HAS_ALPHA, ///< Whether the input parameter \p alpha is 1 bool HAS_BETA, ///< Whether the input parameter \p beta is 0 int PTX_ARCH = CUB_PTX_ARCH> ///< PTX compute capability struct AgentSpmv { //--------------------------------------------------------------------- // Types and constants //--------------------------------------------------------------------- /// Constants enum { BLOCK_THREADS = AgentSpmvPolicyT::BLOCK_THREADS, ITEMS_PER_THREAD = AgentSpmvPolicyT::ITEMS_PER_THREAD, TILE_ITEMS = BLOCK_THREADS * ITEMS_PER_THREAD, }; /// 2D merge path coordinate type typedef typename CubVector::Type CoordinateT; /// Input iterator wrapper types (for applying cache modifiers) typedef CacheModifiedInputIterator< AgentSpmvPolicyT::ROW_OFFSETS_SEARCH_LOAD_MODIFIER, OffsetT, OffsetT> RowOffsetsSearchIteratorT; typedef CacheModifiedInputIterator< AgentSpmvPolicyT::ROW_OFFSETS_LOAD_MODIFIER, OffsetT, OffsetT> RowOffsetsIteratorT; typedef CacheModifiedInputIterator< AgentSpmvPolicyT::COLUMN_INDICES_LOAD_MODIFIER, OffsetT, OffsetT> ColumnIndicesIteratorT; typedef CacheModifiedInputIterator< AgentSpmvPolicyT::VALUES_LOAD_MODIFIER, ValueT, OffsetT> ValueIteratorT; typedef CacheModifiedInputIterator< AgentSpmvPolicyT::VECTOR_VALUES_LOAD_MODIFIER, ValueT, OffsetT> VectorValueIteratorT; // Tuple type for scanning (pairs accumulated segment-value with segment-index) typedef KeyValuePair KeyValuePairT; // Reduce-value-by-key scan operator typedef ReduceByKeyOp ReduceBySegmentOpT; // BlockReduce specialization typedef BlockReduce< ValueT, BLOCK_THREADS, BLOCK_REDUCE_WARP_REDUCTIONS> BlockReduceT; // BlockScan specialization typedef BlockScan< KeyValuePairT, BLOCK_THREADS, AgentSpmvPolicyT::SCAN_ALGORITHM> BlockScanT; /// Merge item type (either a non-zero value or a row-end offset) union MergeItem { // Value type to pair with index type OffsetT (NullType if loading values directly during merge) typedef typename If::Type MergeValueT; OffsetT row_end_offset; MergeValueT nonzero; }; /// Shared memory type required by this thread block struct _TempStorage { union { CoordinateT tile_coord; OffsetT turnstile; }; union { // Smem needed for tile of merge items MergeItem merge_items[ITEMS_PER_THREAD + TILE_ITEMS + 1]; // Smem needed for block-wide reduction typename BlockReduceT::TempStorage reduce; // Smem needed for tile scanning typename BlockScanT::TempStorage scan; }; }; /// Temporary storage type (unionable) struct TempStorage : Uninitialized<_TempStorage> {}; //--------------------------------------------------------------------- // Per-thread fields //--------------------------------------------------------------------- _TempStorage& temp_storage; /// Reference to temp_storage SpmvParams& spmv_params; ValueIteratorT wd_values; ///< Wrapped pointer to the array of \p num_nonzeros values of the corresponding nonzero elements of matrix A. RowOffsetsIteratorT wd_row_end_offsets; ///< Wrapped Pointer to the array of \p m offsets demarcating the end of every row in \p d_column_indices and \p d_values ColumnIndicesIteratorT wd_column_indices; ///< Wrapped Pointer to the array of \p num_nonzeros column-indices of the corresponding nonzero elements of matrix A. (Indices are zero-valued.) VectorValueIteratorT wd_vector_x; ///< Wrapped Pointer to the array of \p num_cols values corresponding to the dense input vector x VectorValueIteratorT wd_vector_y; ///< Wrapped Pointer to the array of \p num_cols values corresponding to the dense input vector x //--------------------------------------------------------------------- // Interface //--------------------------------------------------------------------- /** * Constructor */ __device__ __forceinline__ AgentSpmv( TempStorage& temp_storage, ///< Reference to temp_storage SpmvParams& spmv_params) ///< SpMV input parameter bundle : temp_storage(temp_storage.Alias()), spmv_params(spmv_params), wd_values(spmv_params.d_values), wd_row_end_offsets(spmv_params.d_row_end_offsets), wd_column_indices(spmv_params.d_column_indices), wd_vector_x(spmv_params.d_vector_x), wd_vector_y(spmv_params.d_vector_y) {} /** * Consume a merge tile, specialized for direct-load of nonzeros * / __device__ __forceinline__ KeyValuePairT ConsumeTile( int tile_idx, CoordinateT tile_start_coord, CoordinateT tile_end_coord, Int2Type is_direct_load) ///< Marker type indicating whether to load nonzeros directly during path-discovery or beforehand in batch { int tile_num_rows = tile_end_coord.x - tile_start_coord.x; int tile_num_nonzeros = tile_end_coord.y - tile_start_coord.y; OffsetT* s_tile_row_end_offsets = &temp_storage.merge_items[0].row_end_offset; // Gather the row end-offsets for the merge tile into shared memory for (int item = threadIdx.x; item <= tile_num_rows; item += BLOCK_THREADS) { s_tile_row_end_offsets[item] = wd_row_end_offsets[tile_start_coord.x + item]; } __syncthreads(); // Search for the thread's starting coordinate within the merge tile CountingInputIterator tile_nonzero_indices(tile_start_coord.y); CoordinateT thread_start_coord; MergePathSearch( OffsetT(threadIdx.x * ITEMS_PER_THREAD), // Diagonal s_tile_row_end_offsets, // List A tile_nonzero_indices, // List B tile_num_rows, tile_num_nonzeros, thread_start_coord); __syncthreads(); // Perf-sync // Compute the thread's merge path segment CoordinateT thread_current_coord = thread_start_coord; KeyValuePairT scan_segment[ITEMS_PER_THREAD]; ValueT running_total = 0.0; #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) { OffsetT nonzero_idx = CUB_MIN(tile_nonzero_indices[thread_current_coord.y], spmv_params.num_nonzeros - 1); OffsetT column_idx = wd_column_indices[nonzero_idx]; ValueT value = wd_values[nonzero_idx]; ValueT vector_value = wd_vector_x[column_idx]; ValueT nonzero = value * vector_value; OffsetT row_end_offset = s_tile_row_end_offsets[thread_current_coord.x]; if (tile_nonzero_indices[thread_current_coord.y] < row_end_offset) { // Move down (accumulate) running_total += nonzero; scan_segment[ITEM].value = running_total; scan_segment[ITEM].key = tile_num_rows; ++thread_current_coord.y; } else { // Move right (reset) scan_segment[ITEM].value = running_total; scan_segment[ITEM].key = thread_current_coord.x; running_total = 0.0; ++thread_current_coord.x; } } __syncthreads(); // Block-wide reduce-value-by-segment KeyValuePairT tile_carry; ReduceBySegmentOpT scan_op; KeyValuePairT scan_item; scan_item.value = running_total; scan_item.key = thread_current_coord.x; BlockScanT(temp_storage.scan).ExclusiveScan(scan_item, scan_item, scan_op, tile_carry); if (tile_num_rows > 0) { if (threadIdx.x == 0) scan_item.key = -1; // Direct scatter #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) { if (scan_segment[ITEM].key < tile_num_rows) { if (scan_item.key == scan_segment[ITEM].key) scan_segment[ITEM].value = scan_item.value + scan_segment[ITEM].value; if (HAS_ALPHA) { scan_segment[ITEM].value *= spmv_params.alpha; } if (HAS_BETA) { // Update the output vector element ValueT addend = spmv_params.beta * wd_vector_y[tile_start_coord.x + scan_segment[ITEM].key]; scan_segment[ITEM].value += addend; } // Set the output vector element spmv_params.d_vector_y[tile_start_coord.x + scan_segment[ITEM].key] = scan_segment[ITEM].value; } } } // Return the tile's running carry-out return tile_carry; } */ /** * Consume a merge tile, specialized for indirect load of nonzeros * / __device__ __forceinline__ KeyValuePairT ConsumeTile( int tile_idx, CoordinateT tile_start_coord, CoordinateT tile_end_coord, Int2Type is_direct_load) ///< Marker type indicating whether to load nonzeros directly during path-discovery or beforehand in batch { int tile_num_rows = tile_end_coord.x - tile_start_coord.x; int tile_num_nonzeros = tile_end_coord.y - tile_start_coord.y; #if (CUB_PTX_ARCH >= 520) OffsetT* s_tile_row_end_offsets = &temp_storage.merge_items[0].row_end_offset; ValueT* s_tile_nonzeros = &temp_storage.merge_items[tile_num_rows + ITEMS_PER_THREAD].nonzero; // Gather the nonzeros for the merge tile into shared memory #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) { int nonzero_idx = threadIdx.x + (ITEM * BLOCK_THREADS); ValueIteratorT a = wd_values + tile_start_coord.y + nonzero_idx; ColumnIndicesIteratorT ci = wd_column_indices + tile_start_coord.y + nonzero_idx; ValueT* s = s_tile_nonzeros + nonzero_idx; if (nonzero_idx < tile_num_nonzeros) { OffsetT column_idx = *ci; ValueT value = *a; ValueT vector_value = spmv_params.t_vector_x[column_idx]; vector_value = wd_vector_x[column_idx]; ValueT nonzero = value * vector_value; *s = nonzero; } } #else OffsetT* s_tile_row_end_offsets = &temp_storage.merge_items[0].row_end_offset; ValueT* s_tile_nonzeros = &temp_storage.merge_items[tile_num_rows + ITEMS_PER_THREAD].nonzero; // Gather the nonzeros for the merge tile into shared memory if (tile_num_nonzeros > 0) { #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) { int nonzero_idx = threadIdx.x + (ITEM * BLOCK_THREADS); nonzero_idx = CUB_MIN(nonzero_idx, tile_num_nonzeros - 1); OffsetT column_idx = wd_column_indices[tile_start_coord.y + nonzero_idx]; ValueT value = wd_values[tile_start_coord.y + nonzero_idx]; ValueT vector_value = wd_vector_x[column_idx]; ValueT nonzero = value * vector_value; s_tile_nonzeros[nonzero_idx] = nonzero; } } #endif // Gather the row end-offsets for the merge tile into shared memory #pragma unroll 1 for (int item = threadIdx.x; item <= tile_num_rows; item += BLOCK_THREADS) { s_tile_row_end_offsets[item] = wd_row_end_offsets[tile_start_coord.x + item]; } __syncthreads(); // Search for the thread's starting coordinate within the merge tile CountingInputIterator tile_nonzero_indices(tile_start_coord.y); CoordinateT thread_start_coord; MergePathSearch( OffsetT(threadIdx.x * ITEMS_PER_THREAD), // Diagonal s_tile_row_end_offsets, // List A tile_nonzero_indices, // List B tile_num_rows, tile_num_nonzeros, thread_start_coord); __syncthreads(); // Perf-sync // Compute the thread's merge path segment CoordinateT thread_current_coord = thread_start_coord; KeyValuePairT scan_segment[ITEMS_PER_THREAD]; ValueT running_total = 0.0; OffsetT row_end_offset = s_tile_row_end_offsets[thread_current_coord.x]; ValueT nonzero = s_tile_nonzeros[thread_current_coord.y]; #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) { if (tile_nonzero_indices[thread_current_coord.y] < row_end_offset) { // Move down (accumulate) scan_segment[ITEM].value = nonzero; running_total += nonzero; ++thread_current_coord.y; nonzero = s_tile_nonzeros[thread_current_coord.y]; } else { // Move right (reset) scan_segment[ITEM].value = 0.0; running_total = 0.0; ++thread_current_coord.x; row_end_offset = s_tile_row_end_offsets[thread_current_coord.x]; } scan_segment[ITEM].key = thread_current_coord.x; } __syncthreads(); // Block-wide reduce-value-by-segment KeyValuePairT tile_carry; ReduceBySegmentOpT scan_op; KeyValuePairT scan_item; scan_item.value = running_total; scan_item.key = thread_current_coord.x; BlockScanT(temp_storage.scan).ExclusiveScan(scan_item, scan_item, scan_op, tile_carry); if (threadIdx.x == 0) { scan_item.key = thread_start_coord.x; scan_item.value = 0.0; } if (tile_num_rows > 0) { __syncthreads(); // Scan downsweep and scatter ValueT* s_partials = &temp_storage.merge_items[0].nonzero; if (scan_item.key != scan_segment[0].key) { s_partials[scan_item.key] = scan_item.value; } else { scan_segment[0].value += scan_item.value; } #pragma unroll for (int ITEM = 1; ITEM < ITEMS_PER_THREAD; ++ITEM) { if (scan_segment[ITEM - 1].key != scan_segment[ITEM].key) { s_partials[scan_segment[ITEM - 1].key] = scan_segment[ITEM - 1].value; } else { scan_segment[ITEM].value += scan_segment[ITEM - 1].value; } } __syncthreads(); #pragma unroll 1 for (int item = threadIdx.x; item < tile_num_rows; item += BLOCK_THREADS) { spmv_params.d_vector_y[tile_start_coord.x + item] = s_partials[item]; } } // Return the tile's running carry-out return tile_carry; } */ /** * Consume input tile */ __device__ __forceinline__ void ConsumeTile( int merge_items_per_block, ///< [in] Number of merge tiles per block KeyValuePairT* d_tile_carry_pairs) ///< [out] Pointer to the temporary array carry-out dot product row-ids, one per block { // Read our starting coordinates if (threadIdx.x == 0) { // Search our starting coordinates OffsetT diagonal = blockIdx.x * merge_items_per_block; CoordinateT tile_coord; CountingInputIterator nonzero_indices(0); // Search the merge path MergePathSearch( diagonal, RowOffsetsSearchIteratorT(spmv_params.d_row_end_offsets), nonzero_indices, spmv_params.num_rows, spmv_params.num_nonzeros, tile_coord); temp_storage.tile_coord = tile_coord; } __syncthreads(); CoordinateT tile_start_coord = temp_storage.tile_coord; // Mooch __shared__ volatile OffsetT x; x = tile_start_coord.x; // Turnstile if (threadIdx.x == 0) { __threadfence(); temp_storage.turnstile = atomicAdd(spmv_params.d_row_end_offsets - 1, 1); } __syncthreads(); // Last block through turnstile does fixup if (temp_storage.turnstile == gridDim.x - 1) { if (threadIdx.x == 0) { spmv_params.d_row_end_offsets[-1] = 0; } } } }; } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/acc/cuda/cub/agent/agent_spmv_orig.cuh000066400000000000000000001070131411340063500227710ustar00rootroot00000000000000/****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * cub::AgentSpmv implements a stateful abstraction of CUDA thread blocks for participating in device-wide SpMV. */ #pragma once #include #include "../util_type.cuh" #include "../block/block_reduce.cuh" #include "../block/block_scan.cuh" #include "../block/block_exchange.cuh" #include "../thread/thread_search.cuh" #include "../thread/thread_operators.cuh" #include "../iterator/cache_modified_input_iterator.cuh" #include "../iterator/counting_input_iterator.cuh" #include "../iterator/tex_ref_input_iterator.cuh" #include "../util_namespace.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /****************************************************************************** * Tuning policy ******************************************************************************/ /** * Parameterizable tuning policy type for AgentSpmv */ template < int _BLOCK_THREADS, ///< Threads per thread block int _ITEMS_PER_THREAD, ///< Items per thread (per tile of input) CacheLoadModifier _ROW_OFFSETS_SEARCH_LOAD_MODIFIER, ///< Cache load modifier for reading CSR row-offsets during search CacheLoadModifier _ROW_OFFSETS_LOAD_MODIFIER, ///< Cache load modifier for reading CSR row-offsets CacheLoadModifier _COLUMN_INDICES_LOAD_MODIFIER, ///< Cache load modifier for reading CSR column-indices CacheLoadModifier _VALUES_LOAD_MODIFIER, ///< Cache load modifier for reading CSR values CacheLoadModifier _VECTOR_VALUES_LOAD_MODIFIER, ///< Cache load modifier for reading vector values bool _DIRECT_LOAD_NONZEROS, ///< Whether to load nonzeros directly from global during sequential merging (vs. pre-staged through shared memory) BlockScanAlgorithm _SCAN_ALGORITHM> ///< The BlockScan algorithm to use struct AgentSpmvPolicy { enum { BLOCK_THREADS = _BLOCK_THREADS, ///< Threads per thread block ITEMS_PER_THREAD = _ITEMS_PER_THREAD, ///< Items per thread (per tile of input) DIRECT_LOAD_NONZEROS = _DIRECT_LOAD_NONZEROS, ///< Whether to load nonzeros directly from global during sequential merging (pre-staged through shared memory) }; static const CacheLoadModifier ROW_OFFSETS_SEARCH_LOAD_MODIFIER = _ROW_OFFSETS_SEARCH_LOAD_MODIFIER; ///< Cache load modifier for reading CSR row-offsets static const CacheLoadModifier ROW_OFFSETS_LOAD_MODIFIER = _ROW_OFFSETS_LOAD_MODIFIER; ///< Cache load modifier for reading CSR row-offsets static const CacheLoadModifier COLUMN_INDICES_LOAD_MODIFIER = _COLUMN_INDICES_LOAD_MODIFIER; ///< Cache load modifier for reading CSR column-indices static const CacheLoadModifier VALUES_LOAD_MODIFIER = _VALUES_LOAD_MODIFIER; ///< Cache load modifier for reading CSR values static const CacheLoadModifier VECTOR_VALUES_LOAD_MODIFIER = _VECTOR_VALUES_LOAD_MODIFIER; ///< Cache load modifier for reading vector values static const BlockScanAlgorithm SCAN_ALGORITHM = _SCAN_ALGORITHM; ///< The BlockScan algorithm to use }; /****************************************************************************** * Thread block abstractions ******************************************************************************/ template < typename ValueT, ///< Matrix and vector value type typename OffsetT> ///< Signed integer type for sequence offsets struct SpmvParams { ValueT* d_values; ///< Pointer to the array of \p num_nonzeros values of the corresponding nonzero elements of matrix A. OffsetT* d_row_end_offsets; ///< Pointer to the array of \p m offsets demarcating the end of every row in \p d_column_indices and \p d_values OffsetT* d_column_indices; ///< Pointer to the array of \p num_nonzeros column-indices of the corresponding nonzero elements of matrix A. (Indices are zero-valued.) ValueT* d_vector_x; ///< Pointer to the array of \p num_cols values corresponding to the dense input vector x ValueT* d_vector_y; ///< Pointer to the array of \p num_rows values corresponding to the dense output vector y int num_rows; ///< Number of rows of matrix A. int num_cols; ///< Number of columns of matrix A. int num_nonzeros; ///< Number of nonzero elements of matrix A. ValueT alpha; ///< Alpha multiplicand ValueT beta; ///< Beta addend-multiplicand TexRefInputIterator t_vector_x; }; /** * \brief AgentSpmv implements a stateful abstraction of CUDA thread blocks for participating in device-wide SpMV. */ template < typename AgentSpmvPolicyT, ///< Parameterized AgentSpmvPolicy tuning policy type typename ValueT, ///< Matrix and vector value type typename OffsetT, ///< Signed integer type for sequence offsets bool HAS_ALPHA, ///< Whether the input parameter \p alpha is 1 bool HAS_BETA, ///< Whether the input parameter \p beta is 0 int PTX_ARCH = CUB_PTX_ARCH> ///< PTX compute capability struct AgentSpmv { //--------------------------------------------------------------------- // Types and constants //--------------------------------------------------------------------- /// Constants enum { BLOCK_THREADS = AgentSpmvPolicyT::BLOCK_THREADS, ITEMS_PER_THREAD = AgentSpmvPolicyT::ITEMS_PER_THREAD, TILE_ITEMS = BLOCK_THREADS * ITEMS_PER_THREAD, }; /// 2D merge path coordinate type typedef typename CubVector::Type CoordinateT; /// Input iterator wrapper types (for applying cache modifiers) typedef CacheModifiedInputIterator< AgentSpmvPolicyT::ROW_OFFSETS_SEARCH_LOAD_MODIFIER, OffsetT, OffsetT> RowOffsetsSearchIteratorT; typedef CacheModifiedInputIterator< AgentSpmvPolicyT::ROW_OFFSETS_LOAD_MODIFIER, OffsetT, OffsetT> RowOffsetsIteratorT; typedef CacheModifiedInputIterator< AgentSpmvPolicyT::COLUMN_INDICES_LOAD_MODIFIER, OffsetT, OffsetT> ColumnIndicesIteratorT; typedef CacheModifiedInputIterator< AgentSpmvPolicyT::VALUES_LOAD_MODIFIER, ValueT, OffsetT> ValueIteratorT; typedef CacheModifiedInputIterator< AgentSpmvPolicyT::VECTOR_VALUES_LOAD_MODIFIER, ValueT, OffsetT> VectorValueIteratorT; // Tuple type for scanning (pairs accumulated segment-value with segment-index) typedef KeyValuePair KeyValuePairT; // Reduce-value-by-segment scan operator typedef ReduceByKeyOp ReduceBySegmentOpT; // BlockReduce specialization typedef BlockReduce< ValueT, BLOCK_THREADS, BLOCK_REDUCE_WARP_REDUCTIONS> BlockReduceT; // BlockScan specialization typedef BlockScan< KeyValuePairT, BLOCK_THREADS, AgentSpmvPolicyT::SCAN_ALGORITHM> BlockScanT; // BlockScan specialization typedef BlockScan< ValueT, BLOCK_THREADS, AgentSpmvPolicyT::SCAN_ALGORITHM> BlockPrefixSumT; // BlockExchange specialization typedef BlockExchange< ValueT, BLOCK_THREADS, ITEMS_PER_THREAD> BlockExchangeT; /// Merge item type (either a non-zero value or a row-end offset) union MergeItem { // Value type to pair with index type OffsetT (NullType if loading values directly during merge) typedef typename If::Type MergeValueT; OffsetT row_end_offset; MergeValueT nonzero; }; /// Shared memory type required by this thread block struct _TempStorage { CoordinateT tile_coords[2]; union Aliasable { // Smem needed for tile of merge items MergeItem merge_items[ITEMS_PER_THREAD + TILE_ITEMS + 1]; // Smem needed for block exchange typename BlockExchangeT::TempStorage exchange; // Smem needed for block-wide reduction typename BlockReduceT::TempStorage reduce; // Smem needed for tile scanning typename BlockScanT::TempStorage scan; // Smem needed for tile prefix sum typename BlockPrefixSumT::TempStorage prefix_sum; } aliasable; }; /// Temporary storage type (unionable) struct TempStorage : Uninitialized<_TempStorage> {}; //--------------------------------------------------------------------- // Per-thread fields //--------------------------------------------------------------------- _TempStorage& temp_storage; /// Reference to temp_storage SpmvParams& spmv_params; ValueIteratorT wd_values; ///< Wrapped pointer to the array of \p num_nonzeros values of the corresponding nonzero elements of matrix A. RowOffsetsIteratorT wd_row_end_offsets; ///< Wrapped Pointer to the array of \p m offsets demarcating the end of every row in \p d_column_indices and \p d_values ColumnIndicesIteratorT wd_column_indices; ///< Wrapped Pointer to the array of \p num_nonzeros column-indices of the corresponding nonzero elements of matrix A. (Indices are zero-valued.) VectorValueIteratorT wd_vector_x; ///< Wrapped Pointer to the array of \p num_cols values corresponding to the dense input vector x VectorValueIteratorT wd_vector_y; ///< Wrapped Pointer to the array of \p num_cols values corresponding to the dense input vector x //--------------------------------------------------------------------- // Interface //--------------------------------------------------------------------- /** * Constructor */ __device__ __forceinline__ AgentSpmv( TempStorage& temp_storage, ///< Reference to temp_storage SpmvParams& spmv_params) ///< SpMV input parameter bundle : temp_storage(temp_storage.Alias()), spmv_params(spmv_params), wd_values(spmv_params.d_values), wd_row_end_offsets(spmv_params.d_row_end_offsets), wd_column_indices(spmv_params.d_column_indices), wd_vector_x(spmv_params.d_vector_x), wd_vector_y(spmv_params.d_vector_y) {} /** * Consume a merge tile, specialized for direct-load of nonzeros */ __device__ __forceinline__ KeyValuePairT ConsumeTile( int tile_idx, CoordinateT tile_start_coord, CoordinateT tile_end_coord, Int2Type is_direct_load) ///< Marker type indicating whether to load nonzeros directly during path-discovery or beforehand in batch { int tile_num_rows = tile_end_coord.x - tile_start_coord.x; int tile_num_nonzeros = tile_end_coord.y - tile_start_coord.y; OffsetT* s_tile_row_end_offsets = &temp_storage.aliasable.merge_items[0].row_end_offset; // Gather the row end-offsets for the merge tile into shared memory for (int item = threadIdx.x; item <= tile_num_rows; item += BLOCK_THREADS) { s_tile_row_end_offsets[item] = wd_row_end_offsets[tile_start_coord.x + item]; } CTA_SYNC(); // Search for the thread's starting coordinate within the merge tile CountingInputIterator tile_nonzero_indices(tile_start_coord.y); CoordinateT thread_start_coord; MergePathSearch( OffsetT(threadIdx.x * ITEMS_PER_THREAD), // Diagonal s_tile_row_end_offsets, // List A tile_nonzero_indices, // List B tile_num_rows, tile_num_nonzeros, thread_start_coord); CTA_SYNC(); // Perf-sync // Compute the thread's merge path segment CoordinateT thread_current_coord = thread_start_coord; KeyValuePairT scan_segment[ITEMS_PER_THREAD]; ValueT running_total = 0.0; #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) { OffsetT nonzero_idx = CUB_MIN(tile_nonzero_indices[thread_current_coord.y], spmv_params.num_nonzeros - 1); OffsetT column_idx = wd_column_indices[nonzero_idx]; ValueT value = wd_values[nonzero_idx]; ValueT vector_value = spmv_params.t_vector_x[column_idx]; #if (CUB_PTX_ARCH >= 350) vector_value = wd_vector_x[column_idx]; #endif ValueT nonzero = value * vector_value; OffsetT row_end_offset = s_tile_row_end_offsets[thread_current_coord.x]; if (tile_nonzero_indices[thread_current_coord.y] < row_end_offset) { // Move down (accumulate) running_total += nonzero; scan_segment[ITEM].value = running_total; scan_segment[ITEM].key = tile_num_rows; ++thread_current_coord.y; } else { // Move right (reset) scan_segment[ITEM].value = running_total; scan_segment[ITEM].key = thread_current_coord.x; running_total = 0.0; ++thread_current_coord.x; } } CTA_SYNC(); // Block-wide reduce-value-by-segment KeyValuePairT tile_carry; ReduceBySegmentOpT scan_op; KeyValuePairT scan_item; scan_item.value = running_total; scan_item.key = thread_current_coord.x; BlockScanT(temp_storage.aliasable.scan).ExclusiveScan(scan_item, scan_item, scan_op, tile_carry); if (tile_num_rows > 0) { if (threadIdx.x == 0) scan_item.key = -1; // Direct scatter #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) { if (scan_segment[ITEM].key < tile_num_rows) { if (scan_item.key == scan_segment[ITEM].key) scan_segment[ITEM].value = scan_item.value + scan_segment[ITEM].value; if (HAS_ALPHA) { scan_segment[ITEM].value *= spmv_params.alpha; } if (HAS_BETA) { // Update the output vector element ValueT addend = spmv_params.beta * wd_vector_y[tile_start_coord.x + scan_segment[ITEM].key]; scan_segment[ITEM].value += addend; } // Set the output vector element spmv_params.d_vector_y[tile_start_coord.x + scan_segment[ITEM].key] = scan_segment[ITEM].value; } } } // Return the tile's running carry-out return tile_carry; } /** * Consume a merge tile, specialized for indirect load of nonzeros */ __device__ __forceinline__ KeyValuePairT ConsumeTile( int tile_idx, CoordinateT tile_start_coord, CoordinateT tile_end_coord, Int2Type is_direct_load) ///< Marker type indicating whether to load nonzeros directly during path-discovery or beforehand in batch { int tile_num_rows = tile_end_coord.x - tile_start_coord.x; int tile_num_nonzeros = tile_end_coord.y - tile_start_coord.y; #if (CUB_PTX_ARCH >= 520) /* OffsetT* s_tile_row_end_offsets = &temp_storage.merge_items[tile_num_nonzeros].row_end_offset; ValueT* s_tile_nonzeros = &temp_storage.merge_items[0].nonzero; OffsetT col_indices[ITEMS_PER_THREAD]; ValueT mat_values[ITEMS_PER_THREAD]; int nonzero_indices[ITEMS_PER_THREAD]; // Gather the nonzeros for the merge tile into shared memory #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) { nonzero_indices[ITEM] = threadIdx.x + (ITEM * BLOCK_THREADS); ValueIteratorT a = wd_values + tile_start_coord.y + nonzero_indices[ITEM]; ColumnIndicesIteratorT ci = wd_column_indices + tile_start_coord.y + nonzero_indices[ITEM]; col_indices[ITEM] = (nonzero_indices[ITEM] < tile_num_nonzeros) ? *ci : 0; mat_values[ITEM] = (nonzero_indices[ITEM] < tile_num_nonzeros) ? *a : 0.0; } CTA_SYNC(); #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) { VectorValueIteratorT x = wd_vector_x + col_indices[ITEM]; mat_values[ITEM] *= *x; } CTA_SYNC(); #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) { ValueT *s = s_tile_nonzeros + nonzero_indices[ITEM]; *s = mat_values[ITEM]; } CTA_SYNC(); */ OffsetT* s_tile_row_end_offsets = &temp_storage.merge_items[0].row_end_offset; ValueT* s_tile_nonzeros = &temp_storage.merge_items[tile_num_rows + ITEMS_PER_THREAD].nonzero; // Gather the nonzeros for the merge tile into shared memory #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) { int nonzero_idx = threadIdx.x + (ITEM * BLOCK_THREADS); ValueIteratorT a = wd_values + tile_start_coord.y + nonzero_idx; ColumnIndicesIteratorT ci = wd_column_indices + tile_start_coord.y + nonzero_idx; ValueT* s = s_tile_nonzeros + nonzero_idx; if (nonzero_idx < tile_num_nonzeros) { OffsetT column_idx = *ci; ValueT value = *a; ValueT vector_value = spmv_params.t_vector_x[column_idx]; vector_value = wd_vector_x[column_idx]; ValueT nonzero = value * vector_value; *s = nonzero; } } #else OffsetT* s_tile_row_end_offsets = &temp_storage.aliasable.merge_items[0].row_end_offset; ValueT* s_tile_nonzeros = &temp_storage.aliasable.merge_items[tile_num_rows + ITEMS_PER_THREAD].nonzero; // Gather the nonzeros for the merge tile into shared memory if (tile_num_nonzeros > 0) { #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) { int nonzero_idx = threadIdx.x + (ITEM * BLOCK_THREADS); nonzero_idx = CUB_MIN(nonzero_idx, tile_num_nonzeros - 1); OffsetT column_idx = wd_column_indices[tile_start_coord.y + nonzero_idx]; ValueT value = wd_values[tile_start_coord.y + nonzero_idx]; ValueT vector_value = spmv_params.t_vector_x[column_idx]; #if (CUB_PTX_ARCH >= 350) vector_value = wd_vector_x[column_idx]; #endif ValueT nonzero = value * vector_value; s_tile_nonzeros[nonzero_idx] = nonzero; } } #endif // Gather the row end-offsets for the merge tile into shared memory #pragma unroll 1 for (int item = threadIdx.x; item <= tile_num_rows; item += BLOCK_THREADS) { s_tile_row_end_offsets[item] = wd_row_end_offsets[tile_start_coord.x + item]; } CTA_SYNC(); // Search for the thread's starting coordinate within the merge tile CountingInputIterator tile_nonzero_indices(tile_start_coord.y); CoordinateT thread_start_coord; MergePathSearch( OffsetT(threadIdx.x * ITEMS_PER_THREAD), // Diagonal s_tile_row_end_offsets, // List A tile_nonzero_indices, // List B tile_num_rows, tile_num_nonzeros, thread_start_coord); CTA_SYNC(); // Perf-sync // Compute the thread's merge path segment CoordinateT thread_current_coord = thread_start_coord; KeyValuePairT scan_segment[ITEMS_PER_THREAD]; ValueT running_total = 0.0; OffsetT row_end_offset = s_tile_row_end_offsets[thread_current_coord.x]; ValueT nonzero = s_tile_nonzeros[thread_current_coord.y]; #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) { if (tile_nonzero_indices[thread_current_coord.y] < row_end_offset) { // Move down (accumulate) scan_segment[ITEM].value = nonzero; running_total += nonzero; ++thread_current_coord.y; nonzero = s_tile_nonzeros[thread_current_coord.y]; } else { // Move right (reset) scan_segment[ITEM].value = 0.0; running_total = 0.0; ++thread_current_coord.x; row_end_offset = s_tile_row_end_offsets[thread_current_coord.x]; } scan_segment[ITEM].key = thread_current_coord.x; } CTA_SYNC(); // Block-wide reduce-value-by-segment KeyValuePairT tile_carry; ReduceBySegmentOpT scan_op; KeyValuePairT scan_item; scan_item.value = running_total; scan_item.key = thread_current_coord.x; BlockScanT(temp_storage.aliasable.scan).ExclusiveScan(scan_item, scan_item, scan_op, tile_carry); if (threadIdx.x == 0) { scan_item.key = thread_start_coord.x; scan_item.value = 0.0; } if (tile_num_rows > 0) { CTA_SYNC(); // Scan downsweep and scatter ValueT* s_partials = &temp_storage.aliasable.merge_items[0].nonzero; if (scan_item.key != scan_segment[0].key) { s_partials[scan_item.key] = scan_item.value; } else { scan_segment[0].value += scan_item.value; } #pragma unroll for (int ITEM = 1; ITEM < ITEMS_PER_THREAD; ++ITEM) { if (scan_segment[ITEM - 1].key != scan_segment[ITEM].key) { s_partials[scan_segment[ITEM - 1].key] = scan_segment[ITEM - 1].value; } else { scan_segment[ITEM].value += scan_segment[ITEM - 1].value; } } CTA_SYNC(); #pragma unroll 1 for (int item = threadIdx.x; item < tile_num_rows; item += BLOCK_THREADS) { spmv_params.d_vector_y[tile_start_coord.x + item] = s_partials[item]; } } // Return the tile's running carry-out return tile_carry; } /** * Consume a merge tile, specialized for indirect load of nonzeros * / template __device__ __forceinline__ KeyValuePairT ConsumeTile1( int tile_idx, CoordinateT tile_start_coord, CoordinateT tile_end_coord, IsDirectLoadT is_direct_load) ///< Marker type indicating whether to load nonzeros directly during path-discovery or beforehand in batch { int tile_num_rows = tile_end_coord.x - tile_start_coord.x; int tile_num_nonzeros = tile_end_coord.y - tile_start_coord.y; OffsetT* s_tile_row_end_offsets = &temp_storage.merge_items[0].row_end_offset; int warp_idx = threadIdx.x / WARP_THREADS; int lane_idx = LaneId(); // Gather the row end-offsets for the merge tile into shared memory #pragma unroll 1 for (int item = threadIdx.x; item <= tile_num_rows; item += BLOCK_THREADS) { s_tile_row_end_offsets[item] = wd_row_end_offsets[tile_start_coord.x + item]; } CTA_SYNC(); // Search for warp start/end coords if (lane_idx == 0) { MergePathSearch( OffsetT(warp_idx * ITEMS_PER_WARP), // Diagonal s_tile_row_end_offsets, // List A CountingInputIterator(tile_start_coord.y), // List B tile_num_rows, tile_num_nonzeros, temp_storage.warp_coords[warp_idx]); CoordinateT last = {tile_num_rows, tile_num_nonzeros}; temp_storage.warp_coords[WARPS] = last; } CTA_SYNC(); CoordinateT warp_coord = temp_storage.warp_coords[warp_idx]; CoordinateT warp_end_coord = temp_storage.warp_coords[warp_idx + 1]; OffsetT warp_nonzero_idx = tile_start_coord.y + warp_coord.y; // Consume whole rows #pragma unroll 1 for (; warp_coord.x < warp_end_coord.x; ++warp_coord.x) { ValueT row_total = 0.0; OffsetT row_end_offset = s_tile_row_end_offsets[warp_coord.x]; #pragma unroll 1 for (OffsetT nonzero_idx = warp_nonzero_idx + lane_idx; nonzero_idx < row_end_offset; nonzero_idx += WARP_THREADS) { OffsetT column_idx = wd_column_indices[nonzero_idx]; ValueT value = wd_values[nonzero_idx]; ValueT vector_value = wd_vector_x[column_idx]; row_total += value * vector_value; } // Warp reduce row_total = WarpReduceT(temp_storage.warp_reduce[warp_idx]).Sum(row_total); // Output if (lane_idx == 0) { spmv_params.d_vector_y[tile_start_coord.x + warp_coord.x] = row_total; } warp_nonzero_idx = row_end_offset; } // Consume partial portion of thread's last row if (warp_nonzero_idx < tile_start_coord.y + warp_end_coord.y) { ValueT row_total = 0.0; for (OffsetT nonzero_idx = warp_nonzero_idx + lane_idx; nonzero_idx < tile_start_coord.y + warp_end_coord.y; nonzero_idx += WARP_THREADS) { OffsetT column_idx = wd_column_indices[nonzero_idx]; ValueT value = wd_values[nonzero_idx]; ValueT vector_value = wd_vector_x[column_idx]; row_total += value * vector_value; } // Warp reduce row_total = WarpReduceT(temp_storage.warp_reduce[warp_idx]).Sum(row_total); // Output if (lane_idx == 0) { spmv_params.d_vector_y[tile_start_coord.x + warp_coord.x] = row_total; } } // Return the tile's running carry-out KeyValuePairT tile_carry(tile_num_rows, 0.0); return tile_carry; } */ /** * Consume a merge tile, specialized for indirect load of nonzeros * / __device__ __forceinline__ KeyValuePairT ConsumeTile2( int tile_idx, CoordinateT tile_start_coord, CoordinateT tile_end_coord, Int2Type is_direct_load) ///< Marker type indicating whether to load nonzeros directly during path-discovery or beforehand in batch { int tile_num_rows = tile_end_coord.x - tile_start_coord.x; int tile_num_nonzeros = tile_end_coord.y - tile_start_coord.y; ValueT* s_tile_nonzeros = &temp_storage.merge_items[0].nonzero; ValueT nonzeros[ITEMS_PER_THREAD]; // Gather the nonzeros for the merge tile into shared memory #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) { int nonzero_idx = threadIdx.x + (ITEM * BLOCK_THREADS); nonzero_idx = CUB_MIN(nonzero_idx, tile_num_nonzeros - 1); OffsetT column_idx = wd_column_indices[tile_start_coord.y + nonzero_idx]; ValueT value = wd_values[tile_start_coord.y + nonzero_idx]; ValueT vector_value = spmv_params.t_vector_x[column_idx]; #if (CUB_PTX_ARCH >= 350) vector_value = wd_vector_x[column_idx]; #endif nonzeros[ITEM] = value * vector_value; } // Exchange striped->blocked BlockExchangeT(temp_storage.exchange).StripedToBlocked(nonzeros); CTA_SYNC(); // Compute an inclusive prefix sum BlockPrefixSumT(temp_storage.prefix_sum).InclusiveSum(nonzeros, nonzeros); CTA_SYNC(); if (threadIdx.x == 0) s_tile_nonzeros[0] = 0.0; // Scatter back to smem #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) { int item_idx = (threadIdx.x * ITEMS_PER_THREAD) + ITEM + 1; s_tile_nonzeros[item_idx] = nonzeros[ITEM]; } CTA_SYNC(); // Gather the row end-offsets for the merge tile into shared memory #pragma unroll 1 for (int item = threadIdx.x; item < tile_num_rows; item += BLOCK_THREADS) { OffsetT start = CUB_MAX(wd_row_end_offsets[tile_start_coord.x + item - 1], tile_start_coord.y); OffsetT end = wd_row_end_offsets[tile_start_coord.x + item]; start -= tile_start_coord.y; end -= tile_start_coord.y; ValueT row_partial = s_tile_nonzeros[end] - s_tile_nonzeros[start]; spmv_params.d_vector_y[tile_start_coord.x + item] = row_partial; } // Get the tile's carry-out KeyValuePairT tile_carry; if (threadIdx.x == 0) { tile_carry.key = tile_num_rows; OffsetT start = CUB_MAX(wd_row_end_offsets[tile_end_coord.x - 1], tile_start_coord.y); start -= tile_start_coord.y; OffsetT end = tile_num_nonzeros; tile_carry.value = s_tile_nonzeros[end] - s_tile_nonzeros[start]; } // Return the tile's running carry-out return tile_carry; } */ /** * Consume input tile */ __device__ __forceinline__ void ConsumeTile( CoordinateT* d_tile_coordinates, ///< [in] Pointer to the temporary array of tile starting coordinates KeyValuePairT* d_tile_carry_pairs, ///< [out] Pointer to the temporary array carry-out dot product row-ids, one per block int num_merge_tiles) ///< [in] Number of merge tiles { int tile_idx = (blockIdx.x * gridDim.y) + blockIdx.y; // Current tile index if (tile_idx >= num_merge_tiles) return; // Read our starting coordinates if (threadIdx.x < 2) { if (d_tile_coordinates == NULL) { // Search our starting coordinates OffsetT diagonal = (tile_idx + threadIdx.x) * TILE_ITEMS; CoordinateT tile_coord; CountingInputIterator nonzero_indices(0); // Search the merge path MergePathSearch( diagonal, RowOffsetsSearchIteratorT(spmv_params.d_row_end_offsets), nonzero_indices, spmv_params.num_rows, spmv_params.num_nonzeros, tile_coord); temp_storage.tile_coords[threadIdx.x] = tile_coord; } else { temp_storage.tile_coords[threadIdx.x] = d_tile_coordinates[tile_idx + threadIdx.x]; } } CTA_SYNC(); CoordinateT tile_start_coord = temp_storage.tile_coords[0]; CoordinateT tile_end_coord = temp_storage.tile_coords[1]; // Consume multi-segment tile KeyValuePairT tile_carry = ConsumeTile( tile_idx, tile_start_coord, tile_end_coord, Int2Type()); // Output the tile's carry-out if (threadIdx.x == 0) { if (HAS_ALPHA) tile_carry.value *= spmv_params.alpha; tile_carry.key += tile_start_coord.x; d_tile_carry_pairs[tile_idx] = tile_carry; } } }; } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/acc/cuda/cub/agent/agent_spmv_row_based.cuh000066400000000000000000000451631411340063500240050ustar00rootroot00000000000000/****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2016, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * cub::AgentSpmv implements a stateful abstraction of CUDA thread blocks for participating in device-wide SpMV. */ #pragma once #include #include "../util_type.cuh" #include "../block/block_reduce.cuh" #include "../block/block_scan.cuh" #include "../block/block_exchange.cuh" #include "../thread/thread_search.cuh" #include "../thread/thread_operators.cuh" #include "../iterator/cache_modified_input_iterator.cuh" #include "../iterator/counting_input_iterator.cuh" #include "../iterator/tex_ref_input_iterator.cuh" #include "../util_namespace.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /****************************************************************************** * Tuning policy ******************************************************************************/ /** * Parameterizable tuning policy type for AgentSpmv */ template < int _BLOCK_THREADS, ///< Threads per thread block int _ITEMS_PER_THREAD, ///< Items per thread (per tile of input) CacheLoadModifier _ROW_OFFSETS_SEARCH_LOAD_MODIFIER, ///< Cache load modifier for reading CSR row-offsets during search CacheLoadModifier _ROW_OFFSETS_LOAD_MODIFIER, ///< Cache load modifier for reading CSR row-offsets CacheLoadModifier _COLUMN_INDICES_LOAD_MODIFIER, ///< Cache load modifier for reading CSR column-indices CacheLoadModifier _VALUES_LOAD_MODIFIER, ///< Cache load modifier for reading CSR values CacheLoadModifier _VECTOR_VALUES_LOAD_MODIFIER, ///< Cache load modifier for reading vector values bool _DIRECT_LOAD_NONZEROS, ///< Whether to load nonzeros directly from global during sequential merging (vs. pre-staged through shared memory) BlockScanAlgorithm _SCAN_ALGORITHM> ///< The BlockScan algorithm to use struct AgentSpmvPolicy { enum { BLOCK_THREADS = _BLOCK_THREADS, ///< Threads per thread block ITEMS_PER_THREAD = _ITEMS_PER_THREAD, ///< Items per thread (per tile of input) DIRECT_LOAD_NONZEROS = _DIRECT_LOAD_NONZEROS, ///< Whether to load nonzeros directly from global during sequential merging (pre-staged through shared memory) }; static const CacheLoadModifier ROW_OFFSETS_SEARCH_LOAD_MODIFIER = _ROW_OFFSETS_SEARCH_LOAD_MODIFIER; ///< Cache load modifier for reading CSR row-offsets static const CacheLoadModifier ROW_OFFSETS_LOAD_MODIFIER = _ROW_OFFSETS_LOAD_MODIFIER; ///< Cache load modifier for reading CSR row-offsets static const CacheLoadModifier COLUMN_INDICES_LOAD_MODIFIER = _COLUMN_INDICES_LOAD_MODIFIER; ///< Cache load modifier for reading CSR column-indices static const CacheLoadModifier VALUES_LOAD_MODIFIER = _VALUES_LOAD_MODIFIER; ///< Cache load modifier for reading CSR values static const CacheLoadModifier VECTOR_VALUES_LOAD_MODIFIER = _VECTOR_VALUES_LOAD_MODIFIER; ///< Cache load modifier for reading vector values static const BlockScanAlgorithm SCAN_ALGORITHM = _SCAN_ALGORITHM; ///< The BlockScan algorithm to use }; /****************************************************************************** * Thread block abstractions ******************************************************************************/ template < typename ValueT, ///< Matrix and vector value type typename OffsetT> ///< Signed integer type for sequence offsets struct SpmvParams { ValueT* d_values; ///< Pointer to the array of \p num_nonzeros values of the corresponding nonzero elements of matrix A. OffsetT* d_row_end_offsets; ///< Pointer to the array of \p m offsets demarcating the end of every row in \p d_column_indices and \p d_values OffsetT* d_column_indices; ///< Pointer to the array of \p num_nonzeros column-indices of the corresponding nonzero elements of matrix A. (Indices are zero-valued.) ValueT* d_vector_x; ///< Pointer to the array of \p num_cols values corresponding to the dense input vector x ValueT* d_vector_y; ///< Pointer to the array of \p num_rows values corresponding to the dense output vector y int num_rows; ///< Number of rows of matrix A. int num_cols; ///< Number of columns of matrix A. int num_nonzeros; ///< Number of nonzero elements of matrix A. ValueT alpha; ///< Alpha multiplicand ValueT beta; ///< Beta addend-multiplicand TexRefInputIterator t_vector_x; }; /** * \brief AgentSpmv implements a stateful abstraction of CUDA thread blocks for participating in device-wide SpMV. */ template < typename AgentSpmvPolicyT, ///< Parameterized AgentSpmvPolicy tuning policy type typename ValueT, ///< Matrix and vector value type typename OffsetT, ///< Signed integer type for sequence offsets bool HAS_ALPHA, ///< Whether the input parameter \p alpha is 1 bool HAS_BETA, ///< Whether the input parameter \p beta is 0 int PTX_ARCH = CUB_PTX_ARCH> ///< PTX compute capability struct AgentSpmv { //--------------------------------------------------------------------- // Types and constants //--------------------------------------------------------------------- /// Constants enum { BLOCK_THREADS = AgentSpmvPolicyT::BLOCK_THREADS, ITEMS_PER_THREAD = AgentSpmvPolicyT::ITEMS_PER_THREAD, TILE_ITEMS = BLOCK_THREADS * ITEMS_PER_THREAD, }; /// 2D merge path coordinate type typedef typename CubVector::Type CoordinateT; /// Input iterator wrapper types (for applying cache modifiers) typedef CacheModifiedInputIterator< AgentSpmvPolicyT::ROW_OFFSETS_SEARCH_LOAD_MODIFIER, OffsetT, OffsetT> RowOffsetsSearchIteratorT; typedef CacheModifiedInputIterator< AgentSpmvPolicyT::ROW_OFFSETS_LOAD_MODIFIER, OffsetT, OffsetT> RowOffsetsIteratorT; typedef CacheModifiedInputIterator< AgentSpmvPolicyT::COLUMN_INDICES_LOAD_MODIFIER, OffsetT, OffsetT> ColumnIndicesIteratorT; typedef CacheModifiedInputIterator< AgentSpmvPolicyT::VALUES_LOAD_MODIFIER, ValueT, OffsetT> ValueIteratorT; typedef CacheModifiedInputIterator< AgentSpmvPolicyT::VECTOR_VALUES_LOAD_MODIFIER, ValueT, OffsetT> VectorValueIteratorT; // Tuple type for scanning (pairs accumulated segment-value with segment-index) typedef KeyValuePair KeyValuePairT; // Reduce-value-by-segment scan operator typedef ReduceBySegmentOp ReduceBySegmentOpT; // Prefix functor type typedef BlockScanRunningPrefixOp PrefixOpT; // BlockScan specialization typedef BlockScan< KeyValuePairT, BLOCK_THREADS, AgentSpmvPolicyT::SCAN_ALGORITHM> BlockScanT; /// Shared memory type required by this thread block struct _TempStorage { OffsetT tile_nonzero_idx; OffsetT tile_nonzero_idx_end; // Smem needed for tile scanning typename BlockScanT::TempStorage scan; // Smem needed for tile of merge items ValueT nonzeros[TILE_ITEMS + 1]; }; /// Temporary storage type (unionable) struct TempStorage : Uninitialized<_TempStorage> {}; //--------------------------------------------------------------------- // Per-thread fields //--------------------------------------------------------------------- _TempStorage& temp_storage; /// Reference to temp_storage SpmvParams& spmv_params; ValueIteratorT wd_values; ///< Wrapped pointer to the array of \p num_nonzeros values of the corresponding nonzero elements of matrix A. RowOffsetsIteratorT wd_row_end_offsets; ///< Wrapped Pointer to the array of \p m offsets demarcating the end of every row in \p d_column_indices and \p d_values ColumnIndicesIteratorT wd_column_indices; ///< Wrapped Pointer to the array of \p num_nonzeros column-indices of the corresponding nonzero elements of matrix A. (Indices are zero-valued.) VectorValueIteratorT wd_vector_x; ///< Wrapped Pointer to the array of \p num_cols values corresponding to the dense input vector x VectorValueIteratorT wd_vector_y; ///< Wrapped Pointer to the array of \p num_cols values corresponding to the dense input vector x //--------------------------------------------------------------------- // Interface //--------------------------------------------------------------------- /** * Constructor */ __device__ __forceinline__ AgentSpmv( TempStorage& temp_storage, ///< Reference to temp_storage SpmvParams& spmv_params) ///< SpMV input parameter bundle : temp_storage(temp_storage.Alias()), spmv_params(spmv_params), wd_values(spmv_params.d_values), wd_row_end_offsets(spmv_params.d_row_end_offsets), wd_column_indices(spmv_params.d_column_indices), wd_vector_x(spmv_params.d_vector_x), wd_vector_y(spmv_params.d_vector_y) {} __device__ __forceinline__ void InitNan(double& nan_token) { long long NAN_BITS = 0xFFF0000000000001; nan_token = reinterpret_cast(NAN_BITS); // ValueT(0.0) / ValueT(0.0); } __device__ __forceinline__ void InitNan(float& nan_token) { int NAN_BITS = 0xFF800001; nan_token = reinterpret_cast(NAN_BITS); // ValueT(0.0) / ValueT(0.0); } /** * */ template __device__ __forceinline__ void ConsumeStrip( PrefixOpT& prefix_op, ReduceBySegmentOpT& scan_op, ValueT& row_total, ValueT& row_start, OffsetT& tile_nonzero_idx, OffsetT tile_nonzero_idx_end, OffsetT row_nonzero_idx, OffsetT row_nonzero_idx_end) { ValueT NAN_TOKEN; InitNan(NAN_TOKEN); // // Gather a strip of nonzeros into shared memory // #pragma unroll for (int ITEM = 0; ITEM < NNZ_PER_THREAD; ++ITEM) { ValueT nonzero = 0.0; OffsetT local_nonzero_idx = (ITEM * BLOCK_THREADS) + threadIdx.x; OffsetT nonzero_idx = tile_nonzero_idx + local_nonzero_idx; bool in_range = nonzero_idx < tile_nonzero_idx_end; OffsetT nonzero_idx2 = (in_range) ? nonzero_idx : tile_nonzero_idx_end - 1; OffsetT column_idx = wd_column_indices[nonzero_idx2]; ValueT value = wd_values[nonzero_idx2]; ValueT vector_value = wd_vector_x[column_idx]; nonzero = value * vector_value; if (!in_range) nonzero = 0.0; temp_storage.nonzeros[local_nonzero_idx] = nonzero; } __syncthreads(); // // Swap in NANs at local row start offsets // OffsetT local_row_nonzero_idx = row_nonzero_idx - tile_nonzero_idx; if ((local_row_nonzero_idx >= 0) && (local_row_nonzero_idx < TILE_ITEMS)) { // Thread's row starts in this strip row_start = temp_storage.nonzeros[local_row_nonzero_idx]; temp_storage.nonzeros[local_row_nonzero_idx] = NAN_TOKEN; } __syncthreads(); // // Segmented scan // // Read strip of nonzeros into thread-blocked order, setup segment flags KeyValuePairT scan_items[NNZ_PER_THREAD]; for (int ITEM = 0; ITEM < NNZ_PER_THREAD; ++ITEM) { int local_nonzero_idx = (threadIdx.x * NNZ_PER_THREAD) + ITEM; ValueT value = temp_storage.nonzeros[local_nonzero_idx]; bool is_nan = (value != value); scan_items[ITEM].value = (is_nan) ? 0.0 : value; scan_items[ITEM].key = is_nan; } KeyValuePairT tile_aggregate; KeyValuePairT scan_items_out[NNZ_PER_THREAD]; BlockScanT(temp_storage.scan).ExclusiveScan(scan_items, scan_items_out, scan_op, tile_aggregate, prefix_op); // Save the inclusive sum for the last row if (threadIdx.x == 0) { temp_storage.nonzeros[TILE_ITEMS] = prefix_op.running_total.value; } // Store segment totals for (int ITEM = 0; ITEM < NNZ_PER_THREAD; ++ITEM) { int local_nonzero_idx = (threadIdx.x * NNZ_PER_THREAD) + ITEM; if (scan_items[ITEM].key) temp_storage.nonzeros[local_nonzero_idx] = scan_items_out[ITEM].value; } __syncthreads(); // // Update row totals // OffsetT local_row_nonzero_idx_end = row_nonzero_idx_end - tile_nonzero_idx; if ((local_row_nonzero_idx_end >= 0) && (local_row_nonzero_idx_end < TILE_ITEMS)) { // Thread's row ends in this strip row_total = temp_storage.nonzeros[local_row_nonzero_idx_end]; } tile_nonzero_idx += NNZ_PER_THREAD * BLOCK_THREADS; } /** * Consume input tile */ __device__ __forceinline__ void ConsumeTile( int tile_idx, int rows_per_tile) { // // Read in tile of row ranges // // Row range for the thread block OffsetT tile_row_idx = tile_idx * rows_per_tile; OffsetT tile_row_idx_end = CUB_MIN(tile_row_idx + rows_per_tile, spmv_params.num_rows); // Thread's row OffsetT row_idx = tile_row_idx + threadIdx.x; ValueT row_total = 0.0; ValueT row_start = 0.0; // Nonzero range for the thread's row OffsetT row_nonzero_idx = -1; OffsetT row_nonzero_idx_end = -1; if (row_idx < tile_row_idx_end) { row_nonzero_idx = wd_row_end_offsets[row_idx - 1]; row_nonzero_idx_end = wd_row_end_offsets[row_idx]; // Share block's starting nonzero offset if (threadIdx.x == 0) temp_storage.tile_nonzero_idx = row_nonzero_idx; // Share block's ending nonzero offset if (row_idx == tile_row_idx_end - 1) temp_storage.tile_nonzero_idx_end = row_nonzero_idx_end; // Zero-length rows don't participate if (row_nonzero_idx == row_nonzero_idx_end) { row_nonzero_idx = -1; row_nonzero_idx_end = -1; } } __syncthreads(); // // Process strips of nonzeros // // Nonzero range for the thread block OffsetT tile_nonzero_idx = temp_storage.tile_nonzero_idx; OffsetT tile_nonzero_idx_end = temp_storage.tile_nonzero_idx_end; KeyValuePairT tile_prefix(0, 0.0); ReduceBySegmentOpT scan_op; PrefixOpT prefix_op(tile_prefix, scan_op); #pragma unroll 1 while (tile_nonzero_idx < tile_nonzero_idx_end) { ConsumeStrip(prefix_op, scan_op, row_total, row_start, tile_nonzero_idx, tile_nonzero_idx_end, row_nonzero_idx, row_nonzero_idx_end); __syncthreads(); } // // Output to y // if (row_idx < tile_row_idx_end) { if (row_nonzero_idx_end == tile_nonzero_idx_end) { // Last row grabs the inclusive sum row_total = temp_storage.nonzeros[TILE_ITEMS]; } spmv_params.d_vector_y[row_idx] = row_start + row_total; } } }; } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/acc/cuda/cub/agent/single_pass_scan_operators.cuh000066400000000000000000000654641411340063500252340ustar00rootroot00000000000000/****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * Callback operator types for supplying BlockScan prefixes */ #pragma once #include #include "../thread/thread_load.cuh" #include "../thread/thread_store.cuh" #include "../warp/warp_reduce.cuh" #include "../util_arch.cuh" #include "../util_device.cuh" #include "../util_namespace.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /****************************************************************************** * Prefix functor type for maintaining a running prefix while scanning a * region independent of other thread blocks ******************************************************************************/ /** * Stateful callback operator type for supplying BlockScan prefixes. * Maintains a running prefix that can be applied to consecutive * BlockScan operations. */ template < typename T, ///< BlockScan value type typename ScanOpT> ///< Wrapped scan operator type struct BlockScanRunningPrefixOp { ScanOpT op; ///< Wrapped scan operator T running_total; ///< Running block-wide prefix /// Constructor __device__ __forceinline__ BlockScanRunningPrefixOp(ScanOpT op) : op(op) {} /// Constructor __device__ __forceinline__ BlockScanRunningPrefixOp( T starting_prefix, ScanOpT op) : op(op), running_total(starting_prefix) {} /** * Prefix callback operator. Returns the block-wide running_total in thread-0. */ __device__ __forceinline__ T operator()( const T &block_aggregate) ///< The aggregate sum of the BlockScan inputs { T retval = running_total; running_total = op(running_total, block_aggregate); return retval; } }; /****************************************************************************** * Generic tile status interface types for block-cooperative scans ******************************************************************************/ /** * Enumerations of tile status */ enum ScanTileStatus { SCAN_TILE_OOB, // Out-of-bounds (e.g., padding) SCAN_TILE_INVALID = 99, // Not yet processed SCAN_TILE_PARTIAL, // Tile aggregate is available SCAN_TILE_INCLUSIVE, // Inclusive tile prefix is available }; /** * Tile status interface. */ template < typename T, bool SINGLE_WORD = Traits::PRIMITIVE> struct ScanTileState; /** * Tile status interface specialized for scan status and value types * that can be combined into one machine word that can be * read/written coherently in a single access. */ template struct ScanTileState { // Status word type typedef typename If<(sizeof(T) == 8), long long, typename If<(sizeof(T) == 4), int, typename If<(sizeof(T) == 2), short, char>::Type>::Type>::Type StatusWord; // Unit word type typedef typename If<(sizeof(T) == 8), longlong2, typename If<(sizeof(T) == 4), int2, typename If<(sizeof(T) == 2), int, uchar2>::Type>::Type>::Type TxnWord; // Device word type struct TileDescriptor { StatusWord status; T value; }; // Constants enum { TILE_STATUS_PADDING = CUB_PTX_WARP_THREADS, }; // Device storage TxnWord *d_tile_descriptors; /// Constructor __host__ __device__ __forceinline__ ScanTileState() : d_tile_descriptors(NULL) {} /// Initializer __host__ __device__ __forceinline__ cudaError_t Init( int /*num_tiles*/, ///< [in] Number of tiles void *d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t /*temp_storage_bytes*/) ///< [in] Size in bytes of \t d_temp_storage allocation { d_tile_descriptors = reinterpret_cast(d_temp_storage); return cudaSuccess; } /** * Compute device memory needed for tile status */ __host__ __device__ __forceinline__ static cudaError_t AllocationSize( int num_tiles, ///< [in] Number of tiles size_t &temp_storage_bytes) ///< [out] Size in bytes of \t d_temp_storage allocation { temp_storage_bytes = (num_tiles + TILE_STATUS_PADDING) * sizeof(TileDescriptor); // bytes needed for tile status descriptors return cudaSuccess; } /** * Initialize (from device) */ __device__ __forceinline__ void InitializeStatus(int num_tiles) { int tile_idx = (blockIdx.x * blockDim.x) + threadIdx.x; TxnWord val = TxnWord(); TileDescriptor *descriptor = reinterpret_cast(&val); if (tile_idx < num_tiles) { // Not-yet-set descriptor->status = StatusWord(SCAN_TILE_INVALID); d_tile_descriptors[TILE_STATUS_PADDING + tile_idx] = val; } if ((blockIdx.x == 0) && (threadIdx.x < TILE_STATUS_PADDING)) { // Padding descriptor->status = StatusWord(SCAN_TILE_OOB); d_tile_descriptors[threadIdx.x] = val; } } /** * Update the specified tile's inclusive value and corresponding status */ __device__ __forceinline__ void SetInclusive(int tile_idx, T tile_inclusive) { TileDescriptor tile_descriptor; tile_descriptor.status = SCAN_TILE_INCLUSIVE; tile_descriptor.value = tile_inclusive; TxnWord alias; *reinterpret_cast(&alias) = tile_descriptor; ThreadStore(d_tile_descriptors + TILE_STATUS_PADDING + tile_idx, alias); } /** * Update the specified tile's partial value and corresponding status */ __device__ __forceinline__ void SetPartial(int tile_idx, T tile_partial) { TileDescriptor tile_descriptor; tile_descriptor.status = SCAN_TILE_PARTIAL; tile_descriptor.value = tile_partial; TxnWord alias; *reinterpret_cast(&alias) = tile_descriptor; ThreadStore(d_tile_descriptors + TILE_STATUS_PADDING + tile_idx, alias); } /** * Wait for the corresponding tile to become non-invalid */ __device__ __forceinline__ void WaitForValid( int tile_idx, StatusWord &status, T &value) { TileDescriptor tile_descriptor; do { __threadfence_block(); // prevent hoisting loads from loop TxnWord alias = ThreadLoad(d_tile_descriptors + TILE_STATUS_PADDING + tile_idx); tile_descriptor = reinterpret_cast(alias); } while (WARP_ANY((tile_descriptor.status == SCAN_TILE_INVALID), 0xffffffff)); status = tile_descriptor.status; value = tile_descriptor.value; } }; /** * Tile status interface specialized for scan status and value types that * cannot be combined into one machine word. */ template struct ScanTileState { // Status word type typedef char StatusWord; // Constants enum { TILE_STATUS_PADDING = CUB_PTX_WARP_THREADS, }; // Device storage StatusWord *d_tile_status; T *d_tile_partial; T *d_tile_inclusive; /// Constructor __host__ __device__ __forceinline__ ScanTileState() : d_tile_status(NULL), d_tile_partial(NULL), d_tile_inclusive(NULL) {} /// Initializer __host__ __device__ __forceinline__ cudaError_t Init( int num_tiles, ///< [in] Number of tiles void *d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t temp_storage_bytes) ///< [in] Size in bytes of \t d_temp_storage allocation { cudaError_t error = cudaSuccess; do { void* allocations[3]; size_t allocation_sizes[3]; allocation_sizes[0] = (num_tiles + TILE_STATUS_PADDING) * sizeof(StatusWord); // bytes needed for tile status descriptors allocation_sizes[1] = (num_tiles + TILE_STATUS_PADDING) * sizeof(Uninitialized); // bytes needed for partials allocation_sizes[2] = (num_tiles + TILE_STATUS_PADDING) * sizeof(Uninitialized); // bytes needed for inclusives // Compute allocation pointers into the single storage blob if (CubDebug(error = AliasTemporaries(d_temp_storage, temp_storage_bytes, allocations, allocation_sizes))) break; // Alias the offsets d_tile_status = reinterpret_cast(allocations[0]); d_tile_partial = reinterpret_cast(allocations[1]); d_tile_inclusive = reinterpret_cast(allocations[2]); } while (0); return error; } /** * Compute device memory needed for tile status */ __host__ __device__ __forceinline__ static cudaError_t AllocationSize( int num_tiles, ///< [in] Number of tiles size_t &temp_storage_bytes) ///< [out] Size in bytes of \t d_temp_storage allocation { // Specify storage allocation requirements size_t allocation_sizes[3]; allocation_sizes[0] = (num_tiles + TILE_STATUS_PADDING) * sizeof(StatusWord); // bytes needed for tile status descriptors allocation_sizes[1] = (num_tiles + TILE_STATUS_PADDING) * sizeof(Uninitialized); // bytes needed for partials allocation_sizes[2] = (num_tiles + TILE_STATUS_PADDING) * sizeof(Uninitialized); // bytes needed for inclusives // Set the necessary size of the blob void* allocations[3]; return CubDebug(AliasTemporaries(NULL, temp_storage_bytes, allocations, allocation_sizes)); } /** * Initialize (from device) */ __device__ __forceinline__ void InitializeStatus(int num_tiles) { int tile_idx = (blockIdx.x * blockDim.x) + threadIdx.x; if (tile_idx < num_tiles) { // Not-yet-set d_tile_status[TILE_STATUS_PADDING + tile_idx] = StatusWord(SCAN_TILE_INVALID); } if ((blockIdx.x == 0) && (threadIdx.x < TILE_STATUS_PADDING)) { // Padding d_tile_status[threadIdx.x] = StatusWord(SCAN_TILE_OOB); } } /** * Update the specified tile's inclusive value and corresponding status */ __device__ __forceinline__ void SetInclusive(int tile_idx, T tile_inclusive) { // Update tile inclusive value ThreadStore(d_tile_inclusive + TILE_STATUS_PADDING + tile_idx, tile_inclusive); // Fence __threadfence(); // Update tile status ThreadStore(d_tile_status + TILE_STATUS_PADDING + tile_idx, StatusWord(SCAN_TILE_INCLUSIVE)); } /** * Update the specified tile's partial value and corresponding status */ __device__ __forceinline__ void SetPartial(int tile_idx, T tile_partial) { // Update tile partial value ThreadStore(d_tile_partial + TILE_STATUS_PADDING + tile_idx, tile_partial); // Fence __threadfence(); // Update tile status ThreadStore(d_tile_status + TILE_STATUS_PADDING + tile_idx, StatusWord(SCAN_TILE_PARTIAL)); } /** * Wait for the corresponding tile to become non-invalid */ __device__ __forceinline__ void WaitForValid( int tile_idx, StatusWord &status, T &value) { do { status = ThreadLoad(d_tile_status + TILE_STATUS_PADDING + tile_idx); __threadfence(); // prevent hoisting loads from loop or loads below above this one } while (status == SCAN_TILE_INVALID); if (status == StatusWord(SCAN_TILE_PARTIAL)) value = ThreadLoad(d_tile_partial + TILE_STATUS_PADDING + tile_idx); else value = ThreadLoad(d_tile_inclusive + TILE_STATUS_PADDING + tile_idx); } }; /****************************************************************************** * ReduceByKey tile status interface types for block-cooperative scans ******************************************************************************/ /** * Tile status interface for reduction by key. * */ template < typename ValueT, typename KeyT, bool SINGLE_WORD = (Traits::PRIMITIVE) && (sizeof(ValueT) + sizeof(KeyT) < 16)> struct ReduceByKeyScanTileState; /** * Tile status interface for reduction by key, specialized for scan status and value types that * cannot be combined into one machine word. */ template < typename ValueT, typename KeyT> struct ReduceByKeyScanTileState : ScanTileState > { typedef ScanTileState > SuperClass; /// Constructor __host__ __device__ __forceinline__ ReduceByKeyScanTileState() : SuperClass() {} }; /** * Tile status interface for reduction by key, specialized for scan status and value types that * can be combined into one machine word that can be read/written coherently in a single access. */ template < typename ValueT, typename KeyT> struct ReduceByKeyScanTileState { typedef KeyValuePairKeyValuePairT; // Constants enum { PAIR_SIZE = sizeof(ValueT) + sizeof(KeyT), TXN_WORD_SIZE = 1 << Log2::VALUE, STATUS_WORD_SIZE = TXN_WORD_SIZE - PAIR_SIZE, TILE_STATUS_PADDING = CUB_PTX_WARP_THREADS, }; // Status word type typedef typename If<(STATUS_WORD_SIZE == 8), long long, typename If<(STATUS_WORD_SIZE == 4), int, typename If<(STATUS_WORD_SIZE == 2), short, char>::Type>::Type>::Type StatusWord; // Status word type typedef typename If<(TXN_WORD_SIZE == 16), longlong2, typename If<(TXN_WORD_SIZE == 8), long long, int>::Type>::Type TxnWord; // Device word type (for when sizeof(ValueT) == sizeof(KeyT)) struct TileDescriptorBigStatus { KeyT key; ValueT value; StatusWord status; }; // Device word type (for when sizeof(ValueT) != sizeof(KeyT)) struct TileDescriptorLittleStatus { ValueT value; StatusWord status; KeyT key; }; // Device word type typedef typename If< (sizeof(ValueT) == sizeof(KeyT)), TileDescriptorBigStatus, TileDescriptorLittleStatus>::Type TileDescriptor; // Device storage TxnWord *d_tile_descriptors; /// Constructor __host__ __device__ __forceinline__ ReduceByKeyScanTileState() : d_tile_descriptors(NULL) {} /// Initializer __host__ __device__ __forceinline__ cudaError_t Init( int /*num_tiles*/, ///< [in] Number of tiles void *d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t /*temp_storage_bytes*/) ///< [in] Size in bytes of \t d_temp_storage allocation { d_tile_descriptors = reinterpret_cast(d_temp_storage); return cudaSuccess; } /** * Compute device memory needed for tile status */ __host__ __device__ __forceinline__ static cudaError_t AllocationSize( int num_tiles, ///< [in] Number of tiles size_t &temp_storage_bytes) ///< [out] Size in bytes of \t d_temp_storage allocation { temp_storage_bytes = (num_tiles + TILE_STATUS_PADDING) * sizeof(TileDescriptor); // bytes needed for tile status descriptors return cudaSuccess; } /** * Initialize (from device) */ __device__ __forceinline__ void InitializeStatus(int num_tiles) { int tile_idx = (blockIdx.x * blockDim.x) + threadIdx.x; TxnWord val = TxnWord(); TileDescriptor *descriptor = reinterpret_cast(&val); if (tile_idx < num_tiles) { // Not-yet-set descriptor->status = StatusWord(SCAN_TILE_INVALID); d_tile_descriptors[TILE_STATUS_PADDING + tile_idx] = val; } if ((blockIdx.x == 0) && (threadIdx.x < TILE_STATUS_PADDING)) { // Padding descriptor->status = StatusWord(SCAN_TILE_OOB); d_tile_descriptors[threadIdx.x] = val; } } /** * Update the specified tile's inclusive value and corresponding status */ __device__ __forceinline__ void SetInclusive(int tile_idx, KeyValuePairT tile_inclusive) { TileDescriptor tile_descriptor; tile_descriptor.status = SCAN_TILE_INCLUSIVE; tile_descriptor.value = tile_inclusive.value; tile_descriptor.key = tile_inclusive.key; TxnWord alias; *reinterpret_cast(&alias) = tile_descriptor; ThreadStore(d_tile_descriptors + TILE_STATUS_PADDING + tile_idx, alias); } /** * Update the specified tile's partial value and corresponding status */ __device__ __forceinline__ void SetPartial(int tile_idx, KeyValuePairT tile_partial) { TileDescriptor tile_descriptor; tile_descriptor.status = SCAN_TILE_PARTIAL; tile_descriptor.value = tile_partial.value; tile_descriptor.key = tile_partial.key; TxnWord alias; *reinterpret_cast(&alias) = tile_descriptor; ThreadStore(d_tile_descriptors + TILE_STATUS_PADDING + tile_idx, alias); } /** * Wait for the corresponding tile to become non-invalid */ __device__ __forceinline__ void WaitForValid( int tile_idx, StatusWord &status, KeyValuePairT &value) { // TxnWord alias = ThreadLoad(d_tile_descriptors + TILE_STATUS_PADDING + tile_idx); // TileDescriptor tile_descriptor = reinterpret_cast(alias); // // while (tile_descriptor.status == SCAN_TILE_INVALID) // { // __threadfence_block(); // prevent hoisting loads from loop // // alias = ThreadLoad(d_tile_descriptors + TILE_STATUS_PADDING + tile_idx); // tile_descriptor = reinterpret_cast(alias); // } // // status = tile_descriptor.status; // value.value = tile_descriptor.value; // value.key = tile_descriptor.key; TileDescriptor tile_descriptor; do { __threadfence_block(); // prevent hoisting loads from loop TxnWord alias = ThreadLoad(d_tile_descriptors + TILE_STATUS_PADDING + tile_idx); tile_descriptor = reinterpret_cast(alias); } while (WARP_ANY((tile_descriptor.status == SCAN_TILE_INVALID), 0xffffffff)); status = tile_descriptor.status; value.value = tile_descriptor.value; value.key = tile_descriptor.key; } }; /****************************************************************************** * Prefix call-back operator for coupling local block scan within a * block-cooperative scan ******************************************************************************/ /** * Stateful block-scan prefix functor. Provides the the running prefix for * the current tile by using the call-back warp to wait on on * aggregates/prefixes from predecessor tiles to become available. */ template < typename T, typename ScanOpT, typename ScanTileStateT, int PTX_ARCH = CUB_PTX_ARCH> struct TilePrefixCallbackOp { // Parameterized warp reduce typedef WarpReduce WarpReduceT; // Temporary storage type struct _TempStorage { typename WarpReduceT::TempStorage warp_reduce; T exclusive_prefix; T inclusive_prefix; T block_aggregate; }; // Alias wrapper allowing temporary storage to be unioned struct TempStorage : Uninitialized<_TempStorage> {}; // Type of status word typedef typename ScanTileStateT::StatusWord StatusWord; // Fields _TempStorage& temp_storage; ///< Reference to a warp-reduction instance ScanTileStateT& tile_status; ///< Interface to tile status ScanOpT scan_op; ///< Binary scan operator int tile_idx; ///< The current tile index T exclusive_prefix; ///< Exclusive prefix for the tile T inclusive_prefix; ///< Inclusive prefix for the tile // Constructor __device__ __forceinline__ TilePrefixCallbackOp( ScanTileStateT &tile_status, TempStorage &temp_storage, ScanOpT scan_op, int tile_idx) : temp_storage(temp_storage.Alias()), tile_status(tile_status), scan_op(scan_op), tile_idx(tile_idx) {} // Block until all predecessors within the warp-wide window have non-invalid status __device__ __forceinline__ void ProcessWindow( int predecessor_idx, ///< Preceding tile index to inspect StatusWord &predecessor_status, ///< [out] Preceding tile status T &window_aggregate) ///< [out] Relevant partial reduction from this window of preceding tiles { T value; tile_status.WaitForValid(predecessor_idx, predecessor_status, value); // Perform a segmented reduction to get the prefix for the current window. // Use the swizzled scan operator because we are now scanning *down* towards thread0. int tail_flag = (predecessor_status == StatusWord(SCAN_TILE_INCLUSIVE)); window_aggregate = WarpReduceT(temp_storage.warp_reduce).TailSegmentedReduce( value, tail_flag, SwizzleScanOp(scan_op)); } // BlockScan prefix callback functor (called by the first warp) __device__ __forceinline__ T operator()(T block_aggregate) { // Update our status with our tile-aggregate if (threadIdx.x == 0) { temp_storage.block_aggregate = block_aggregate; tile_status.SetPartial(tile_idx, block_aggregate); } int predecessor_idx = tile_idx - threadIdx.x - 1; StatusWord predecessor_status; T window_aggregate; // Wait for the warp-wide window of predecessor tiles to become valid ProcessWindow(predecessor_idx, predecessor_status, window_aggregate); // The exclusive tile prefix starts out as the current window aggregate exclusive_prefix = window_aggregate; // Keep sliding the window back until we come across a tile whose inclusive prefix is known while (WARP_ALL((predecessor_status != StatusWord(SCAN_TILE_INCLUSIVE)), 0xffffffff)) { predecessor_idx -= CUB_PTX_WARP_THREADS; // Update exclusive tile prefix with the window prefix ProcessWindow(predecessor_idx, predecessor_status, window_aggregate); exclusive_prefix = scan_op(window_aggregate, exclusive_prefix); } // Compute the inclusive tile prefix and update the status for this tile if (threadIdx.x == 0) { inclusive_prefix = scan_op(exclusive_prefix, block_aggregate); tile_status.SetInclusive(tile_idx, inclusive_prefix); temp_storage.exclusive_prefix = exclusive_prefix; temp_storage.inclusive_prefix = inclusive_prefix; } // Return exclusive_prefix return exclusive_prefix; } // Get the exclusive prefix stored in temporary storage __device__ __forceinline__ T GetExclusivePrefix() { return temp_storage.exclusive_prefix; } // Get the inclusive prefix stored in temporary storage __device__ __forceinline__ T GetInclusivePrefix() { return temp_storage.inclusive_prefix; } // Get the block aggregate stored in temporary storage __device__ __forceinline__ T GetBlockAggregate() { return temp_storage.block_aggregate; } }; } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/acc/cuda/cub/block/000077500000000000000000000000001411340063500170775ustar00rootroot00000000000000relion-3.1.3/src/acc/cuda/cub/block/block_adjacent_difference.cuh000066400000000000000000000606631411340063500247100ustar00rootroot00000000000000/****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * The cub::BlockDiscontinuity class provides [collective](index.html#sec0) methods for flagging discontinuities within an ordered set of items partitioned across a CUDA thread block. */ #pragma once #include "../util_type.cuh" #include "../util_ptx.cuh" #include "../util_namespace.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { template < typename T, int BLOCK_DIM_X, int BLOCK_DIM_Y = 1, int BLOCK_DIM_Z = 1, int PTX_ARCH = CUB_PTX_ARCH> class BlockAdjacentDifference { private: /****************************************************************************** * Constants and type definitions ******************************************************************************/ /// Constants enum { /// The thread block size in threads BLOCK_THREADS = BLOCK_DIM_X * BLOCK_DIM_Y * BLOCK_DIM_Z, }; /// Shared memory storage layout type (last element from each thread's input) struct _TempStorage { T first_items[BLOCK_THREADS]; T last_items[BLOCK_THREADS]; }; /****************************************************************************** * Utility methods ******************************************************************************/ /// Internal storage allocator __device__ __forceinline__ _TempStorage& PrivateStorage() { __shared__ _TempStorage private_storage; return private_storage; } /// Specialization for when FlagOp has third index param template ::HAS_PARAM> struct ApplyOp { // Apply flag operator static __device__ __forceinline__ T FlagT(FlagOp flag_op, const T &a, const T &b, int idx) { return flag_op(b, a, idx); } }; /// Specialization for when FlagOp does not have a third index param template struct ApplyOp { // Apply flag operator static __device__ __forceinline__ T FlagT(FlagOp flag_op, const T &a, const T &b, int /*idx*/) { return flag_op(b, a); } }; /// Templated unrolling of item comparison (inductive case) template struct Iterate { // Head flags template < int ITEMS_PER_THREAD, typename FlagT, typename FlagOp> static __device__ __forceinline__ void FlagHeads( int linear_tid, FlagT (&flags)[ITEMS_PER_THREAD], ///< [out] Calling thread's discontinuity head_flags T (&input)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items T (&preds)[ITEMS_PER_THREAD], ///< [out] Calling thread's predecessor items FlagOp flag_op) ///< [in] Binary boolean flag predicate { preds[ITERATION] = input[ITERATION - 1]; flags[ITERATION] = ApplyOp::FlagT( flag_op, preds[ITERATION], input[ITERATION], (linear_tid * ITEMS_PER_THREAD) + ITERATION); Iterate::FlagHeads(linear_tid, flags, input, preds, flag_op); } // Tail flags template < int ITEMS_PER_THREAD, typename FlagT, typename FlagOp> static __device__ __forceinline__ void FlagTails( int linear_tid, FlagT (&flags)[ITEMS_PER_THREAD], ///< [out] Calling thread's discontinuity head_flags T (&input)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items FlagOp flag_op) ///< [in] Binary boolean flag predicate { flags[ITERATION] = ApplyOp::FlagT( flag_op, input[ITERATION], input[ITERATION + 1], (linear_tid * ITEMS_PER_THREAD) + ITERATION + 1); Iterate::FlagTails(linear_tid, flags, input, flag_op); } }; /// Templated unrolling of item comparison (termination case) template struct Iterate { // Head flags template < int ITEMS_PER_THREAD, typename FlagT, typename FlagOp> static __device__ __forceinline__ void FlagHeads( int /*linear_tid*/, FlagT (&/*flags*/)[ITEMS_PER_THREAD], ///< [out] Calling thread's discontinuity head_flags T (&/*input*/)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items T (&/*preds*/)[ITEMS_PER_THREAD], ///< [out] Calling thread's predecessor items FlagOp /*flag_op*/) ///< [in] Binary boolean flag predicate {} // Tail flags template < int ITEMS_PER_THREAD, typename FlagT, typename FlagOp> static __device__ __forceinline__ void FlagTails( int /*linear_tid*/, FlagT (&/*flags*/)[ITEMS_PER_THREAD], ///< [out] Calling thread's discontinuity head_flags T (&/*input*/)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items FlagOp /*flag_op*/) ///< [in] Binary boolean flag predicate {} }; /****************************************************************************** * Thread fields ******************************************************************************/ /// Shared storage reference _TempStorage &temp_storage; /// Linear thread-id unsigned int linear_tid; public: /// \smemstorage{BlockDiscontinuity} struct TempStorage : Uninitialized<_TempStorage> {}; /******************************************************************//** * \name Collective constructors *********************************************************************/ //@{ /** * \brief Collective constructor using a private static allocation of shared memory as temporary storage. */ __device__ __forceinline__ BlockAdjacentDifference() : temp_storage(PrivateStorage()), linear_tid(RowMajorTid(BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z)) {} /** * \brief Collective constructor using the specified memory allocation as temporary storage. */ __device__ __forceinline__ BlockAdjacentDifference( TempStorage &temp_storage) ///< [in] Reference to memory allocation having layout type TempStorage : temp_storage(temp_storage.Alias()), linear_tid(RowMajorTid(BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z)) {} //@} end member group /******************************************************************//** * \name Head flag operations *********************************************************************/ //@{ #ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document template < int ITEMS_PER_THREAD, typename FlagT, typename FlagOp> __device__ __forceinline__ void FlagHeads( FlagT (&head_flags)[ITEMS_PER_THREAD], ///< [out] Calling thread's discontinuity head_flags T (&input)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items T (&preds)[ITEMS_PER_THREAD], ///< [out] Calling thread's predecessor items FlagOp flag_op) ///< [in] Binary boolean flag predicate { // Share last item temp_storage.last_items[linear_tid] = input[ITEMS_PER_THREAD - 1]; CTA_SYNC(); if (linear_tid == 0) { // Set flag for first thread-item (preds[0] is undefined) head_flags[0] = 1; } else { preds[0] = temp_storage.last_items[linear_tid - 1]; head_flags[0] = ApplyOp::FlagT(flag_op, preds[0], input[0], linear_tid * ITEMS_PER_THREAD); } // Set head_flags for remaining items Iterate<1, ITEMS_PER_THREAD>::FlagHeads(linear_tid, head_flags, input, preds, flag_op); } template < int ITEMS_PER_THREAD, typename FlagT, typename FlagOp> __device__ __forceinline__ void FlagHeads( FlagT (&head_flags)[ITEMS_PER_THREAD], ///< [out] Calling thread's discontinuity head_flags T (&input)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items T (&preds)[ITEMS_PER_THREAD], ///< [out] Calling thread's predecessor items FlagOp flag_op, ///< [in] Binary boolean flag predicate T tile_predecessor_item) ///< [in] [thread0 only] Item with which to compare the first tile item (input0 from thread0). { // Share last item temp_storage.last_items[linear_tid] = input[ITEMS_PER_THREAD - 1]; CTA_SYNC(); // Set flag for first thread-item preds[0] = (linear_tid == 0) ? tile_predecessor_item : // First thread temp_storage.last_items[linear_tid - 1]; head_flags[0] = ApplyOp::FlagT(flag_op, preds[0], input[0], linear_tid * ITEMS_PER_THREAD); // Set head_flags for remaining items Iterate<1, ITEMS_PER_THREAD>::FlagHeads(linear_tid, head_flags, input, preds, flag_op); } #endif // DOXYGEN_SHOULD_SKIP_THIS template < int ITEMS_PER_THREAD, typename FlagT, typename FlagOp> __device__ __forceinline__ void FlagHeads( FlagT (&head_flags)[ITEMS_PER_THREAD], ///< [out] Calling thread's discontinuity head_flags T (&input)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items FlagOp flag_op) ///< [in] Binary boolean flag predicate { T preds[ITEMS_PER_THREAD]; FlagHeads(head_flags, input, preds, flag_op); } template < int ITEMS_PER_THREAD, typename FlagT, typename FlagOp> __device__ __forceinline__ void FlagHeads( FlagT (&head_flags)[ITEMS_PER_THREAD], ///< [out] Calling thread's discontinuity head_flags T (&input)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items FlagOp flag_op, ///< [in] Binary boolean flag predicate T tile_predecessor_item) ///< [in] [thread0 only] Item with which to compare the first tile item (input0 from thread0). { T preds[ITEMS_PER_THREAD]; FlagHeads(head_flags, input, preds, flag_op, tile_predecessor_item); } template < int ITEMS_PER_THREAD, typename FlagT, typename FlagOp> __device__ __forceinline__ void FlagTails( FlagT (&tail_flags)[ITEMS_PER_THREAD], ///< [out] Calling thread's discontinuity tail_flags T (&input)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items FlagOp flag_op) ///< [in] Binary boolean flag predicate { // Share first item temp_storage.first_items[linear_tid] = input[0]; CTA_SYNC(); // Set flag for last thread-item tail_flags[ITEMS_PER_THREAD - 1] = (linear_tid == BLOCK_THREADS - 1) ? 1 : // Last thread ApplyOp::FlagT( flag_op, input[ITEMS_PER_THREAD - 1], temp_storage.first_items[linear_tid + 1], (linear_tid * ITEMS_PER_THREAD) + ITEMS_PER_THREAD); // Set tail_flags for remaining items Iterate<0, ITEMS_PER_THREAD - 1>::FlagTails(linear_tid, tail_flags, input, flag_op); } template < int ITEMS_PER_THREAD, typename FlagT, typename FlagOp> __device__ __forceinline__ void FlagTails( FlagT (&tail_flags)[ITEMS_PER_THREAD], ///< [out] Calling thread's discontinuity tail_flags T (&input)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items FlagOp flag_op, ///< [in] Binary boolean flag predicate T tile_successor_item) ///< [in] [threadBLOCK_THREADS-1 only] Item with which to compare the last tile item (inputITEMS_PER_THREAD-1 from threadBLOCK_THREADS-1). { // Share first item temp_storage.first_items[linear_tid] = input[0]; CTA_SYNC(); // Set flag for last thread-item T successor_item = (linear_tid == BLOCK_THREADS - 1) ? tile_successor_item : // Last thread temp_storage.first_items[linear_tid + 1]; tail_flags[ITEMS_PER_THREAD - 1] = ApplyOp::FlagT( flag_op, input[ITEMS_PER_THREAD - 1], successor_item, (linear_tid * ITEMS_PER_THREAD) + ITEMS_PER_THREAD); // Set tail_flags for remaining items Iterate<0, ITEMS_PER_THREAD - 1>::FlagTails(linear_tid, tail_flags, input, flag_op); } template < int ITEMS_PER_THREAD, typename FlagT, typename FlagOp> __device__ __forceinline__ void FlagHeadsAndTails( FlagT (&head_flags)[ITEMS_PER_THREAD], ///< [out] Calling thread's discontinuity head_flags FlagT (&tail_flags)[ITEMS_PER_THREAD], ///< [out] Calling thread's discontinuity tail_flags T (&input)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items FlagOp flag_op) ///< [in] Binary boolean flag predicate { // Share first and last items temp_storage.first_items[linear_tid] = input[0]; temp_storage.last_items[linear_tid] = input[ITEMS_PER_THREAD - 1]; CTA_SYNC(); T preds[ITEMS_PER_THREAD]; // Set flag for first thread-item preds[0] = temp_storage.last_items[linear_tid - 1]; if (linear_tid == 0) { head_flags[0] = 1; } else { head_flags[0] = ApplyOp::FlagT( flag_op, preds[0], input[0], linear_tid * ITEMS_PER_THREAD); } // Set flag for last thread-item tail_flags[ITEMS_PER_THREAD - 1] = (linear_tid == BLOCK_THREADS - 1) ? 1 : // Last thread ApplyOp::FlagT( flag_op, input[ITEMS_PER_THREAD - 1], temp_storage.first_items[linear_tid + 1], (linear_tid * ITEMS_PER_THREAD) + ITEMS_PER_THREAD); // Set head_flags for remaining items Iterate<1, ITEMS_PER_THREAD>::FlagHeads(linear_tid, head_flags, input, preds, flag_op); // Set tail_flags for remaining items Iterate<0, ITEMS_PER_THREAD - 1>::FlagTails(linear_tid, tail_flags, input, flag_op); } template < int ITEMS_PER_THREAD, typename FlagT, typename FlagOp> __device__ __forceinline__ void FlagHeadsAndTails( FlagT (&head_flags)[ITEMS_PER_THREAD], ///< [out] Calling thread's discontinuity head_flags FlagT (&tail_flags)[ITEMS_PER_THREAD], ///< [out] Calling thread's discontinuity tail_flags T tile_successor_item, ///< [in] [threadBLOCK_THREADS-1 only] Item with which to compare the last tile item (inputITEMS_PER_THREAD-1 from threadBLOCK_THREADS-1). T (&input)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items FlagOp flag_op) ///< [in] Binary boolean flag predicate { // Share first and last items temp_storage.first_items[linear_tid] = input[0]; temp_storage.last_items[linear_tid] = input[ITEMS_PER_THREAD - 1]; CTA_SYNC(); T preds[ITEMS_PER_THREAD]; // Set flag for first thread-item if (linear_tid == 0) { head_flags[0] = 1; } else { preds[0] = temp_storage.last_items[linear_tid - 1]; head_flags[0] = ApplyOp::FlagT( flag_op, preds[0], input[0], linear_tid * ITEMS_PER_THREAD); } // Set flag for last thread-item T successor_item = (linear_tid == BLOCK_THREADS - 1) ? tile_successor_item : // Last thread temp_storage.first_items[linear_tid + 1]; tail_flags[ITEMS_PER_THREAD - 1] = ApplyOp::FlagT( flag_op, input[ITEMS_PER_THREAD - 1], successor_item, (linear_tid * ITEMS_PER_THREAD) + ITEMS_PER_THREAD); // Set head_flags for remaining items Iterate<1, ITEMS_PER_THREAD>::FlagHeads(linear_tid, head_flags, input, preds, flag_op); // Set tail_flags for remaining items Iterate<0, ITEMS_PER_THREAD - 1>::FlagTails(linear_tid, tail_flags, input, flag_op); } template < int ITEMS_PER_THREAD, typename FlagT, typename FlagOp> __device__ __forceinline__ void FlagHeadsAndTails( FlagT (&head_flags)[ITEMS_PER_THREAD], ///< [out] Calling thread's discontinuity head_flags T tile_predecessor_item, ///< [in] [thread0 only] Item with which to compare the first tile item (input0 from thread0). FlagT (&tail_flags)[ITEMS_PER_THREAD], ///< [out] Calling thread's discontinuity tail_flags T (&input)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items FlagOp flag_op) ///< [in] Binary boolean flag predicate { // Share first and last items temp_storage.first_items[linear_tid] = input[0]; temp_storage.last_items[linear_tid] = input[ITEMS_PER_THREAD - 1]; CTA_SYNC(); T preds[ITEMS_PER_THREAD]; // Set flag for first thread-item preds[0] = (linear_tid == 0) ? tile_predecessor_item : // First thread temp_storage.last_items[linear_tid - 1]; head_flags[0] = ApplyOp::FlagT( flag_op, preds[0], input[0], linear_tid * ITEMS_PER_THREAD); // Set flag for last thread-item tail_flags[ITEMS_PER_THREAD - 1] = (linear_tid == BLOCK_THREADS - 1) ? 1 : // Last thread ApplyOp::FlagT( flag_op, input[ITEMS_PER_THREAD - 1], temp_storage.first_items[linear_tid + 1], (linear_tid * ITEMS_PER_THREAD) + ITEMS_PER_THREAD); // Set head_flags for remaining items Iterate<1, ITEMS_PER_THREAD>::FlagHeads(linear_tid, head_flags, input, preds, flag_op); // Set tail_flags for remaining items Iterate<0, ITEMS_PER_THREAD - 1>::FlagTails(linear_tid, tail_flags, input, flag_op); } template < int ITEMS_PER_THREAD, typename FlagT, typename FlagOp> __device__ __forceinline__ void FlagHeadsAndTails( FlagT (&head_flags)[ITEMS_PER_THREAD], ///< [out] Calling thread's discontinuity head_flags T tile_predecessor_item, ///< [in] [thread0 only] Item with which to compare the first tile item (input0 from thread0). FlagT (&tail_flags)[ITEMS_PER_THREAD], ///< [out] Calling thread's discontinuity tail_flags T tile_successor_item, ///< [in] [threadBLOCK_THREADS-1 only] Item with which to compare the last tile item (inputITEMS_PER_THREAD-1 from threadBLOCK_THREADS-1). T (&input)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items FlagOp flag_op) ///< [in] Binary boolean flag predicate { // Share first and last items temp_storage.first_items[linear_tid] = input[0]; temp_storage.last_items[linear_tid] = input[ITEMS_PER_THREAD - 1]; CTA_SYNC(); T preds[ITEMS_PER_THREAD]; // Set flag for first thread-item preds[0] = (linear_tid == 0) ? tile_predecessor_item : // First thread temp_storage.last_items[linear_tid - 1]; head_flags[0] = ApplyOp::FlagT( flag_op, preds[0], input[0], linear_tid * ITEMS_PER_THREAD); // Set flag for last thread-item T successor_item = (linear_tid == BLOCK_THREADS - 1) ? tile_successor_item : // Last thread temp_storage.first_items[linear_tid + 1]; tail_flags[ITEMS_PER_THREAD - 1] = ApplyOp::FlagT( flag_op, input[ITEMS_PER_THREAD - 1], successor_item, (linear_tid * ITEMS_PER_THREAD) + ITEMS_PER_THREAD); // Set head_flags for remaining items Iterate<1, ITEMS_PER_THREAD>::FlagHeads(linear_tid, head_flags, input, preds, flag_op); // Set tail_flags for remaining items Iterate<0, ITEMS_PER_THREAD - 1>::FlagTails(linear_tid, tail_flags, input, flag_op); } }; } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/acc/cuda/cub/block/block_discontinuity.cuh000066400000000000000000001524261411340063500236710ustar00rootroot00000000000000/****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * The cub::BlockDiscontinuity class provides [collective](index.html#sec0) methods for flagging discontinuities within an ordered set of items partitioned across a CUDA thread block. */ #pragma once #include "../util_type.cuh" #include "../util_ptx.cuh" #include "../util_namespace.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /** * \brief The BlockDiscontinuity class provides [collective](index.html#sec0) methods for flagging discontinuities within an ordered set of items partitioned across a CUDA thread block. ![](discont_logo.png) * \ingroup BlockModule * * \tparam T The data type to be flagged. * \tparam BLOCK_DIM_X The thread block length in threads along the X dimension * \tparam BLOCK_DIM_Y [optional] The thread block length in threads along the Y dimension (default: 1) * \tparam BLOCK_DIM_Z [optional] The thread block length in threads along the Z dimension (default: 1) * \tparam PTX_ARCH [optional] \ptxversion * * \par Overview * - A set of "head flags" (or "tail flags") is often used to indicate corresponding items * that differ from their predecessors (or successors). For example, head flags are convenient * for demarcating disjoint data segments as part of a segmented scan or reduction. * - \blocked * * \par Performance Considerations * - \granularity * * \par A Simple Example * \blockcollective{BlockDiscontinuity} * \par * The code snippet below illustrates the head flagging of 512 integer items that * are partitioned in a [blocked arrangement](index.html#sec5sec3) across 128 threads * where each thread owns 4 consecutive items. * \par * \code * #include // or equivalently * * __global__ void ExampleKernel(...) * { * // Specialize BlockDiscontinuity for a 1D block of 128 threads on type int * typedef cub::BlockDiscontinuity BlockDiscontinuity; * * // Allocate shared memory for BlockDiscontinuity * __shared__ typename BlockDiscontinuity::TempStorage temp_storage; * * // Obtain a segment of consecutive items that are blocked across threads * int thread_data[4]; * ... * * // Collectively compute head flags for discontinuities in the segment * int head_flags[4]; * BlockDiscontinuity(temp_storage).FlagHeads(head_flags, thread_data, cub::Inequality()); * * \endcode * \par * Suppose the set of input \p thread_data across the block of threads is * { [0,0,1,1], [1,1,1,1], [2,3,3,3], [3,4,4,4], ... }. * The corresponding output \p head_flags in those threads will be * { [1,0,1,0], [0,0,0,0], [1,1,0,0], [0,1,0,0], ... }. * * \par Performance Considerations * - Incurs zero bank conflicts for most types * */ template < typename T, int BLOCK_DIM_X, int BLOCK_DIM_Y = 1, int BLOCK_DIM_Z = 1, int PTX_ARCH = CUB_PTX_ARCH> class BlockDiscontinuity { private: /****************************************************************************** * Constants and type definitions ******************************************************************************/ /// Constants enum { /// The thread block size in threads BLOCK_THREADS = BLOCK_DIM_X * BLOCK_DIM_Y * BLOCK_DIM_Z, }; /// Shared memory storage layout type (last element from each thread's input) struct _TempStorage { T first_items[BLOCK_THREADS]; T last_items[BLOCK_THREADS]; }; /****************************************************************************** * Utility methods ******************************************************************************/ /// Internal storage allocator __device__ __forceinline__ _TempStorage& PrivateStorage() { __shared__ _TempStorage private_storage; return private_storage; } /// Specialization for when FlagOp has third index param template ::HAS_PARAM> struct ApplyOp { // Apply flag operator static __device__ __forceinline__ bool FlagT(FlagOp flag_op, const T &a, const T &b, int idx) { return flag_op(a, b, idx); } }; /// Specialization for when FlagOp does not have a third index param template struct ApplyOp { // Apply flag operator static __device__ __forceinline__ bool FlagT(FlagOp flag_op, const T &a, const T &b, int /*idx*/) { return flag_op(a, b); } }; /// Templated unrolling of item comparison (inductive case) template struct Iterate { // Head flags template < int ITEMS_PER_THREAD, typename FlagT, typename FlagOp> static __device__ __forceinline__ void FlagHeads( int linear_tid, FlagT (&flags)[ITEMS_PER_THREAD], ///< [out] Calling thread's discontinuity head_flags T (&input)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items T (&preds)[ITEMS_PER_THREAD], ///< [out] Calling thread's predecessor items FlagOp flag_op) ///< [in] Binary boolean flag predicate { preds[ITERATION] = input[ITERATION - 1]; flags[ITERATION] = ApplyOp::FlagT( flag_op, preds[ITERATION], input[ITERATION], (linear_tid * ITEMS_PER_THREAD) + ITERATION); Iterate::FlagHeads(linear_tid, flags, input, preds, flag_op); } // Tail flags template < int ITEMS_PER_THREAD, typename FlagT, typename FlagOp> static __device__ __forceinline__ void FlagTails( int linear_tid, FlagT (&flags)[ITEMS_PER_THREAD], ///< [out] Calling thread's discontinuity head_flags T (&input)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items FlagOp flag_op) ///< [in] Binary boolean flag predicate { flags[ITERATION] = ApplyOp::FlagT( flag_op, input[ITERATION], input[ITERATION + 1], (linear_tid * ITEMS_PER_THREAD) + ITERATION + 1); Iterate::FlagTails(linear_tid, flags, input, flag_op); } }; /// Templated unrolling of item comparison (termination case) template struct Iterate { // Head flags template < int ITEMS_PER_THREAD, typename FlagT, typename FlagOp> static __device__ __forceinline__ void FlagHeads( int /*linear_tid*/, FlagT (&/*flags*/)[ITEMS_PER_THREAD], ///< [out] Calling thread's discontinuity head_flags T (&/*input*/)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items T (&/*preds*/)[ITEMS_PER_THREAD], ///< [out] Calling thread's predecessor items FlagOp /*flag_op*/) ///< [in] Binary boolean flag predicate {} // Tail flags template < int ITEMS_PER_THREAD, typename FlagT, typename FlagOp> static __device__ __forceinline__ void FlagTails( int /*linear_tid*/, FlagT (&/*flags*/)[ITEMS_PER_THREAD], ///< [out] Calling thread's discontinuity head_flags T (&/*input*/)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items FlagOp /*flag_op*/) ///< [in] Binary boolean flag predicate {} }; /****************************************************************************** * Thread fields ******************************************************************************/ /// Shared storage reference _TempStorage &temp_storage; /// Linear thread-id unsigned int linear_tid; public: /// \smemstorage{BlockDiscontinuity} struct TempStorage : Uninitialized<_TempStorage> {}; /******************************************************************//** * \name Collective constructors *********************************************************************/ //@{ /** * \brief Collective constructor using a private static allocation of shared memory as temporary storage. */ __device__ __forceinline__ BlockDiscontinuity() : temp_storage(PrivateStorage()), linear_tid(RowMajorTid(BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z)) {} /** * \brief Collective constructor using the specified memory allocation as temporary storage. */ __device__ __forceinline__ BlockDiscontinuity( TempStorage &temp_storage) ///< [in] Reference to memory allocation having layout type TempStorage : temp_storage(temp_storage.Alias()), linear_tid(RowMajorTid(BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z)) {} //@} end member group /******************************************************************//** * \name Head flag operations *********************************************************************/ //@{ #ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document template < int ITEMS_PER_THREAD, typename FlagT, typename FlagOp> __device__ __forceinline__ void FlagHeads( FlagT (&head_flags)[ITEMS_PER_THREAD], ///< [out] Calling thread's discontinuity head_flags T (&input)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items T (&preds)[ITEMS_PER_THREAD], ///< [out] Calling thread's predecessor items FlagOp flag_op) ///< [in] Binary boolean flag predicate { // Share last item temp_storage.last_items[linear_tid] = input[ITEMS_PER_THREAD - 1]; CTA_SYNC(); if (linear_tid == 0) { // Set flag for first thread-item (preds[0] is undefined) head_flags[0] = 1; } else { preds[0] = temp_storage.last_items[linear_tid - 1]; head_flags[0] = ApplyOp::FlagT(flag_op, preds[0], input[0], linear_tid * ITEMS_PER_THREAD); } // Set head_flags for remaining items Iterate<1, ITEMS_PER_THREAD>::FlagHeads(linear_tid, head_flags, input, preds, flag_op); } template < int ITEMS_PER_THREAD, typename FlagT, typename FlagOp> __device__ __forceinline__ void FlagHeads( FlagT (&head_flags)[ITEMS_PER_THREAD], ///< [out] Calling thread's discontinuity head_flags T (&input)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items T (&preds)[ITEMS_PER_THREAD], ///< [out] Calling thread's predecessor items FlagOp flag_op, ///< [in] Binary boolean flag predicate T tile_predecessor_item) ///< [in] [thread0 only] Item with which to compare the first tile item (input0 from thread0). { // Share last item temp_storage.last_items[linear_tid] = input[ITEMS_PER_THREAD - 1]; CTA_SYNC(); // Set flag for first thread-item preds[0] = (linear_tid == 0) ? tile_predecessor_item : // First thread temp_storage.last_items[linear_tid - 1]; head_flags[0] = ApplyOp::FlagT(flag_op, preds[0], input[0], linear_tid * ITEMS_PER_THREAD); // Set head_flags for remaining items Iterate<1, ITEMS_PER_THREAD>::FlagHeads(linear_tid, head_flags, input, preds, flag_op); } #endif // DOXYGEN_SHOULD_SKIP_THIS /** * \brief Sets head flags indicating discontinuities between items partitioned across the thread block, for which the first item has no reference and is always flagged. * * \par * - The flag head_flagsi is set for item * inputi when * flag_op(previous-item, inputi) * returns \p true (where previous-item is either the preceding item * in the same thread or the last item in the previous thread). * - For thread0, item input0 is always flagged. * - \blocked * - \granularity * - \smemreuse * * \par Snippet * The code snippet below illustrates the head-flagging of 512 integer items that * are partitioned in a [blocked arrangement](index.html#sec5sec3) across 128 threads * where each thread owns 4 consecutive items. * \par * \code * #include // or equivalently * * __global__ void ExampleKernel(...) * { * // Specialize BlockDiscontinuity for a 1D block of 128 threads on type int * typedef cub::BlockDiscontinuity BlockDiscontinuity; * * // Allocate shared memory for BlockDiscontinuity * __shared__ typename BlockDiscontinuity::TempStorage temp_storage; * * // Obtain a segment of consecutive items that are blocked across threads * int thread_data[4]; * ... * * // Collectively compute head flags for discontinuities in the segment * int head_flags[4]; * BlockDiscontinuity(temp_storage).FlagHeads(head_flags, thread_data, cub::Inequality()); * * \endcode * \par * Suppose the set of input \p thread_data across the block of threads is * { [0,0,1,1], [1,1,1,1], [2,3,3,3], [3,4,4,4], ... }. * The corresponding output \p head_flags in those threads will be * { [1,0,1,0], [0,0,0,0], [1,1,0,0], [0,1,0,0], ... }. * * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. * \tparam FlagT [inferred] The flag type (must be an integer type) * \tparam FlagOp [inferred] Binary predicate functor type having member T operator()(const T &a, const T &b) or member T operator()(const T &a, const T &b, unsigned int b_index), and returning \p true if a discontinuity exists between \p a and \p b, otherwise \p false. \p b_index is the rank of b in the aggregate tile of data. */ template < int ITEMS_PER_THREAD, typename FlagT, typename FlagOp> __device__ __forceinline__ void FlagHeads( FlagT (&head_flags)[ITEMS_PER_THREAD], ///< [out] Calling thread's discontinuity head_flags T (&input)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items FlagOp flag_op) ///< [in] Binary boolean flag predicate { T preds[ITEMS_PER_THREAD]; FlagHeads(head_flags, input, preds, flag_op); } /** * \brief Sets head flags indicating discontinuities between items partitioned across the thread block. * * \par * - The flag head_flagsi is set for item * inputi when * flag_op(previous-item, inputi) * returns \p true (where previous-item is either the preceding item * in the same thread or the last item in the previous thread). * - For thread0, item input0 is compared * against \p tile_predecessor_item. * - \blocked * - \granularity * - \smemreuse * * \par Snippet * The code snippet below illustrates the head-flagging of 512 integer items that * are partitioned in a [blocked arrangement](index.html#sec5sec3) across 128 threads * where each thread owns 4 consecutive items. * \par * \code * #include // or equivalently * * __global__ void ExampleKernel(...) * { * // Specialize BlockDiscontinuity for a 1D block of 128 threads on type int * typedef cub::BlockDiscontinuity BlockDiscontinuity; * * // Allocate shared memory for BlockDiscontinuity * __shared__ typename BlockDiscontinuity::TempStorage temp_storage; * * // Obtain a segment of consecutive items that are blocked across threads * int thread_data[4]; * ... * * // Have thread0 obtain the predecessor item for the entire tile * int tile_predecessor_item; * if (threadIdx.x == 0) tile_predecessor_item == ... * * // Collectively compute head flags for discontinuities in the segment * int head_flags[4]; * BlockDiscontinuity(temp_storage).FlagHeads( * head_flags, thread_data, cub::Inequality(), tile_predecessor_item); * * \endcode * \par * Suppose the set of input \p thread_data across the block of threads is * { [0,0,1,1], [1,1,1,1], [2,3,3,3], [3,4,4,4], ... }, * and that \p tile_predecessor_item is \p 0. The corresponding output \p head_flags in those threads will be * { [0,0,1,0], [0,0,0,0], [1,1,0,0], [0,1,0,0], ... }. * * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. * \tparam FlagT [inferred] The flag type (must be an integer type) * \tparam FlagOp [inferred] Binary predicate functor type having member T operator()(const T &a, const T &b) or member T operator()(const T &a, const T &b, unsigned int b_index), and returning \p true if a discontinuity exists between \p a and \p b, otherwise \p false. \p b_index is the rank of b in the aggregate tile of data. */ template < int ITEMS_PER_THREAD, typename FlagT, typename FlagOp> __device__ __forceinline__ void FlagHeads( FlagT (&head_flags)[ITEMS_PER_THREAD], ///< [out] Calling thread's discontinuity head_flags T (&input)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items FlagOp flag_op, ///< [in] Binary boolean flag predicate T tile_predecessor_item) ///< [in] [thread0 only] Item with which to compare the first tile item (input0 from thread0). { T preds[ITEMS_PER_THREAD]; FlagHeads(head_flags, input, preds, flag_op, tile_predecessor_item); } //@} end member group /******************************************************************//** * \name Tail flag operations *********************************************************************/ //@{ /** * \brief Sets tail flags indicating discontinuities between items partitioned across the thread block, for which the last item has no reference and is always flagged. * * \par * - The flag tail_flagsi is set for item * inputi when * flag_op(inputi, next-item) * returns \p true (where next-item is either the next item * in the same thread or the first item in the next thread). * - For threadBLOCK_THREADS-1, item * inputITEMS_PER_THREAD-1 is always flagged. * - \blocked * - \granularity * - \smemreuse * * \par Snippet * The code snippet below illustrates the tail-flagging of 512 integer items that * are partitioned in a [blocked arrangement](index.html#sec5sec3) across 128 threads * where each thread owns 4 consecutive items. * \par * \code * #include // or equivalently * * __global__ void ExampleKernel(...) * { * // Specialize BlockDiscontinuity for a 1D block of 128 threads on type int * typedef cub::BlockDiscontinuity BlockDiscontinuity; * * // Allocate shared memory for BlockDiscontinuity * __shared__ typename BlockDiscontinuity::TempStorage temp_storage; * * // Obtain a segment of consecutive items that are blocked across threads * int thread_data[4]; * ... * * // Collectively compute tail flags for discontinuities in the segment * int tail_flags[4]; * BlockDiscontinuity(temp_storage).FlagTails(tail_flags, thread_data, cub::Inequality()); * * \endcode * \par * Suppose the set of input \p thread_data across the block of threads is * { [0,0,1,1], [1,1,1,1], [2,3,3,3], ..., [124,125,125,125] }. * The corresponding output \p tail_flags in those threads will be * { [0,1,0,0], [0,0,0,1], [1,0,0,...], ..., [1,0,0,1] }. * * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. * \tparam FlagT [inferred] The flag type (must be an integer type) * \tparam FlagOp [inferred] Binary predicate functor type having member T operator()(const T &a, const T &b) or member T operator()(const T &a, const T &b, unsigned int b_index), and returning \p true if a discontinuity exists between \p a and \p b, otherwise \p false. \p b_index is the rank of b in the aggregate tile of data. */ template < int ITEMS_PER_THREAD, typename FlagT, typename FlagOp> __device__ __forceinline__ void FlagTails( FlagT (&tail_flags)[ITEMS_PER_THREAD], ///< [out] Calling thread's discontinuity tail_flags T (&input)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items FlagOp flag_op) ///< [in] Binary boolean flag predicate { // Share first item temp_storage.first_items[linear_tid] = input[0]; CTA_SYNC(); // Set flag for last thread-item tail_flags[ITEMS_PER_THREAD - 1] = (linear_tid == BLOCK_THREADS - 1) ? 1 : // Last thread ApplyOp::FlagT( flag_op, input[ITEMS_PER_THREAD - 1], temp_storage.first_items[linear_tid + 1], (linear_tid * ITEMS_PER_THREAD) + ITEMS_PER_THREAD); // Set tail_flags for remaining items Iterate<0, ITEMS_PER_THREAD - 1>::FlagTails(linear_tid, tail_flags, input, flag_op); } /** * \brief Sets tail flags indicating discontinuities between items partitioned across the thread block. * * \par * - The flag tail_flagsi is set for item * inputi when * flag_op(inputi, next-item) * returns \p true (where next-item is either the next item * in the same thread or the first item in the next thread). * - For threadBLOCK_THREADS-1, item * inputITEMS_PER_THREAD-1 is compared * against \p tile_successor_item. * - \blocked * - \granularity * - \smemreuse * * \par Snippet * The code snippet below illustrates the tail-flagging of 512 integer items that * are partitioned in a [blocked arrangement](index.html#sec5sec3) across 128 threads * where each thread owns 4 consecutive items. * \par * \code * #include // or equivalently * * __global__ void ExampleKernel(...) * { * // Specialize BlockDiscontinuity for a 1D block of 128 threads on type int * typedef cub::BlockDiscontinuity BlockDiscontinuity; * * // Allocate shared memory for BlockDiscontinuity * __shared__ typename BlockDiscontinuity::TempStorage temp_storage; * * // Obtain a segment of consecutive items that are blocked across threads * int thread_data[4]; * ... * * // Have thread127 obtain the successor item for the entire tile * int tile_successor_item; * if (threadIdx.x == 127) tile_successor_item == ... * * // Collectively compute tail flags for discontinuities in the segment * int tail_flags[4]; * BlockDiscontinuity(temp_storage).FlagTails( * tail_flags, thread_data, cub::Inequality(), tile_successor_item); * * \endcode * \par * Suppose the set of input \p thread_data across the block of threads is * { [0,0,1,1], [1,1,1,1], [2,3,3,3], ..., [124,125,125,125] } * and that \p tile_successor_item is \p 125. The corresponding output \p tail_flags in those threads will be * { [0,1,0,0], [0,0,0,1], [1,0,0,...], ..., [1,0,0,0] }. * * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. * \tparam FlagT [inferred] The flag type (must be an integer type) * \tparam FlagOp [inferred] Binary predicate functor type having member T operator()(const T &a, const T &b) or member T operator()(const T &a, const T &b, unsigned int b_index), and returning \p true if a discontinuity exists between \p a and \p b, otherwise \p false. \p b_index is the rank of b in the aggregate tile of data. */ template < int ITEMS_PER_THREAD, typename FlagT, typename FlagOp> __device__ __forceinline__ void FlagTails( FlagT (&tail_flags)[ITEMS_PER_THREAD], ///< [out] Calling thread's discontinuity tail_flags T (&input)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items FlagOp flag_op, ///< [in] Binary boolean flag predicate T tile_successor_item) ///< [in] [threadBLOCK_THREADS-1 only] Item with which to compare the last tile item (inputITEMS_PER_THREAD-1 from threadBLOCK_THREADS-1). { // Share first item temp_storage.first_items[linear_tid] = input[0]; CTA_SYNC(); // Set flag for last thread-item T successor_item = (linear_tid == BLOCK_THREADS - 1) ? tile_successor_item : // Last thread temp_storage.first_items[linear_tid + 1]; tail_flags[ITEMS_PER_THREAD - 1] = ApplyOp::FlagT( flag_op, input[ITEMS_PER_THREAD - 1], successor_item, (linear_tid * ITEMS_PER_THREAD) + ITEMS_PER_THREAD); // Set tail_flags for remaining items Iterate<0, ITEMS_PER_THREAD - 1>::FlagTails(linear_tid, tail_flags, input, flag_op); } //@} end member group /******************************************************************//** * \name Head & tail flag operations *********************************************************************/ //@{ /** * \brief Sets both head and tail flags indicating discontinuities between items partitioned across the thread block. * * \par * - The flag head_flagsi is set for item * inputi when * flag_op(previous-item, inputi) * returns \p true (where previous-item is either the preceding item * in the same thread or the last item in the previous thread). * - For thread0, item input0 is always flagged. * - The flag tail_flagsi is set for item * inputi when * flag_op(inputi, next-item) * returns \p true (where next-item is either the next item * in the same thread or the first item in the next thread). * - For threadBLOCK_THREADS-1, item * inputITEMS_PER_THREAD-1 is always flagged. * - \blocked * - \granularity * - \smemreuse * * \par Snippet * The code snippet below illustrates the head- and tail-flagging of 512 integer items that * are partitioned in a [blocked arrangement](index.html#sec5sec3) across 128 threads * where each thread owns 4 consecutive items. * \par * \code * #include // or equivalently * * __global__ void ExampleKernel(...) * { * // Specialize BlockDiscontinuity for a 1D block of 128 threads on type int * typedef cub::BlockDiscontinuity BlockDiscontinuity; * * // Allocate shared memory for BlockDiscontinuity * __shared__ typename BlockDiscontinuity::TempStorage temp_storage; * * // Obtain a segment of consecutive items that are blocked across threads * int thread_data[4]; * ... * * // Collectively compute head and flags for discontinuities in the segment * int head_flags[4]; * int tail_flags[4]; * BlockDiscontinuity(temp_storage).FlagTails( * head_flags, tail_flags, thread_data, cub::Inequality()); * * \endcode * \par * Suppose the set of input \p thread_data across the block of threads is * { [0,0,1,1], [1,1,1,1], [2,3,3,3], ..., [124,125,125,125] } * and that the tile_successor_item is \p 125. The corresponding output \p head_flags * in those threads will be { [1,0,1,0], [0,0,0,0], [1,1,0,0], [0,1,0,0], ... }. * and the corresponding output \p tail_flags in those threads will be * { [0,1,0,0], [0,0,0,1], [1,0,0,...], ..., [1,0,0,1] }. * * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. * \tparam FlagT [inferred] The flag type (must be an integer type) * \tparam FlagOp [inferred] Binary predicate functor type having member T operator()(const T &a, const T &b) or member T operator()(const T &a, const T &b, unsigned int b_index), and returning \p true if a discontinuity exists between \p a and \p b, otherwise \p false. \p b_index is the rank of b in the aggregate tile of data. */ template < int ITEMS_PER_THREAD, typename FlagT, typename FlagOp> __device__ __forceinline__ void FlagHeadsAndTails( FlagT (&head_flags)[ITEMS_PER_THREAD], ///< [out] Calling thread's discontinuity head_flags FlagT (&tail_flags)[ITEMS_PER_THREAD], ///< [out] Calling thread's discontinuity tail_flags T (&input)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items FlagOp flag_op) ///< [in] Binary boolean flag predicate { // Share first and last items temp_storage.first_items[linear_tid] = input[0]; temp_storage.last_items[linear_tid] = input[ITEMS_PER_THREAD - 1]; CTA_SYNC(); T preds[ITEMS_PER_THREAD]; // Set flag for first thread-item preds[0] = temp_storage.last_items[linear_tid - 1]; if (linear_tid == 0) { head_flags[0] = 1; } else { head_flags[0] = ApplyOp::FlagT( flag_op, preds[0], input[0], linear_tid * ITEMS_PER_THREAD); } // Set flag for last thread-item tail_flags[ITEMS_PER_THREAD - 1] = (linear_tid == BLOCK_THREADS - 1) ? 1 : // Last thread ApplyOp::FlagT( flag_op, input[ITEMS_PER_THREAD - 1], temp_storage.first_items[linear_tid + 1], (linear_tid * ITEMS_PER_THREAD) + ITEMS_PER_THREAD); // Set head_flags for remaining items Iterate<1, ITEMS_PER_THREAD>::FlagHeads(linear_tid, head_flags, input, preds, flag_op); // Set tail_flags for remaining items Iterate<0, ITEMS_PER_THREAD - 1>::FlagTails(linear_tid, tail_flags, input, flag_op); } /** * \brief Sets both head and tail flags indicating discontinuities between items partitioned across the thread block. * * \par * - The flag head_flagsi is set for item * inputi when * flag_op(previous-item, inputi) * returns \p true (where previous-item is either the preceding item * in the same thread or the last item in the previous thread). * - For thread0, item input0 is always flagged. * - The flag tail_flagsi is set for item * inputi when * flag_op(inputi, next-item) * returns \p true (where next-item is either the next item * in the same thread or the first item in the next thread). * - For threadBLOCK_THREADS-1, item * inputITEMS_PER_THREAD-1 is compared * against \p tile_predecessor_item. * - \blocked * - \granularity * - \smemreuse * * \par Snippet * The code snippet below illustrates the head- and tail-flagging of 512 integer items that * are partitioned in a [blocked arrangement](index.html#sec5sec3) across 128 threads * where each thread owns 4 consecutive items. * \par * \code * #include // or equivalently * * __global__ void ExampleKernel(...) * { * // Specialize BlockDiscontinuity for a 1D block of 128 threads on type int * typedef cub::BlockDiscontinuity BlockDiscontinuity; * * // Allocate shared memory for BlockDiscontinuity * __shared__ typename BlockDiscontinuity::TempStorage temp_storage; * * // Obtain a segment of consecutive items that are blocked across threads * int thread_data[4]; * ... * * // Have thread127 obtain the successor item for the entire tile * int tile_successor_item; * if (threadIdx.x == 127) tile_successor_item == ... * * // Collectively compute head and flags for discontinuities in the segment * int head_flags[4]; * int tail_flags[4]; * BlockDiscontinuity(temp_storage).FlagTails( * head_flags, tail_flags, tile_successor_item, thread_data, cub::Inequality()); * * \endcode * \par * Suppose the set of input \p thread_data across the block of threads is * { [0,0,1,1], [1,1,1,1], [2,3,3,3], ..., [124,125,125,125] } * and that the tile_successor_item is \p 125. The corresponding output \p head_flags * in those threads will be { [1,0,1,0], [0,0,0,0], [1,1,0,0], [0,1,0,0], ... }. * and the corresponding output \p tail_flags in those threads will be * { [0,1,0,0], [0,0,0,1], [1,0,0,...], ..., [1,0,0,0] }. * * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. * \tparam FlagT [inferred] The flag type (must be an integer type) * \tparam FlagOp [inferred] Binary predicate functor type having member T operator()(const T &a, const T &b) or member T operator()(const T &a, const T &b, unsigned int b_index), and returning \p true if a discontinuity exists between \p a and \p b, otherwise \p false. \p b_index is the rank of b in the aggregate tile of data. */ template < int ITEMS_PER_THREAD, typename FlagT, typename FlagOp> __device__ __forceinline__ void FlagHeadsAndTails( FlagT (&head_flags)[ITEMS_PER_THREAD], ///< [out] Calling thread's discontinuity head_flags FlagT (&tail_flags)[ITEMS_PER_THREAD], ///< [out] Calling thread's discontinuity tail_flags T tile_successor_item, ///< [in] [threadBLOCK_THREADS-1 only] Item with which to compare the last tile item (inputITEMS_PER_THREAD-1 from threadBLOCK_THREADS-1). T (&input)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items FlagOp flag_op) ///< [in] Binary boolean flag predicate { // Share first and last items temp_storage.first_items[linear_tid] = input[0]; temp_storage.last_items[linear_tid] = input[ITEMS_PER_THREAD - 1]; CTA_SYNC(); T preds[ITEMS_PER_THREAD]; // Set flag for first thread-item if (linear_tid == 0) { head_flags[0] = 1; } else { preds[0] = temp_storage.last_items[linear_tid - 1]; head_flags[0] = ApplyOp::FlagT( flag_op, preds[0], input[0], linear_tid * ITEMS_PER_THREAD); } // Set flag for last thread-item T successor_item = (linear_tid == BLOCK_THREADS - 1) ? tile_successor_item : // Last thread temp_storage.first_items[linear_tid + 1]; tail_flags[ITEMS_PER_THREAD - 1] = ApplyOp::FlagT( flag_op, input[ITEMS_PER_THREAD - 1], successor_item, (linear_tid * ITEMS_PER_THREAD) + ITEMS_PER_THREAD); // Set head_flags for remaining items Iterate<1, ITEMS_PER_THREAD>::FlagHeads(linear_tid, head_flags, input, preds, flag_op); // Set tail_flags for remaining items Iterate<0, ITEMS_PER_THREAD - 1>::FlagTails(linear_tid, tail_flags, input, flag_op); } /** * \brief Sets both head and tail flags indicating discontinuities between items partitioned across the thread block. * * \par * - The flag head_flagsi is set for item * inputi when * flag_op(previous-item, inputi) * returns \p true (where previous-item is either the preceding item * in the same thread or the last item in the previous thread). * - For thread0, item input0 is compared * against \p tile_predecessor_item. * - The flag tail_flagsi is set for item * inputi when * flag_op(inputi, next-item) * returns \p true (where next-item is either the next item * in the same thread or the first item in the next thread). * - For threadBLOCK_THREADS-1, item * inputITEMS_PER_THREAD-1 is always flagged. * - \blocked * - \granularity * - \smemreuse * * \par Snippet * The code snippet below illustrates the head- and tail-flagging of 512 integer items that * are partitioned in a [blocked arrangement](index.html#sec5sec3) across 128 threads * where each thread owns 4 consecutive items. * \par * \code * #include // or equivalently * * __global__ void ExampleKernel(...) * { * // Specialize BlockDiscontinuity for a 1D block of 128 threads on type int * typedef cub::BlockDiscontinuity BlockDiscontinuity; * * // Allocate shared memory for BlockDiscontinuity * __shared__ typename BlockDiscontinuity::TempStorage temp_storage; * * // Obtain a segment of consecutive items that are blocked across threads * int thread_data[4]; * ... * * // Have thread0 obtain the predecessor item for the entire tile * int tile_predecessor_item; * if (threadIdx.x == 0) tile_predecessor_item == ... * * // Have thread127 obtain the successor item for the entire tile * int tile_successor_item; * if (threadIdx.x == 127) tile_successor_item == ... * * // Collectively compute head and flags for discontinuities in the segment * int head_flags[4]; * int tail_flags[4]; * BlockDiscontinuity(temp_storage).FlagTails( * head_flags, tile_predecessor_item, tail_flags, tile_successor_item, * thread_data, cub::Inequality()); * * \endcode * \par * Suppose the set of input \p thread_data across the block of threads is * { [0,0,1,1], [1,1,1,1], [2,3,3,3], ..., [124,125,125,125] }, * that the \p tile_predecessor_item is \p 0, and that the * \p tile_successor_item is \p 125. The corresponding output \p head_flags * in those threads will be { [0,0,1,0], [0,0,0,0], [1,1,0,0], [0,1,0,0], ... }. * and the corresponding output \p tail_flags in those threads will be * { [0,1,0,0], [0,0,0,1], [1,0,0,...], ..., [1,0,0,1] }. * * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. * \tparam FlagT [inferred] The flag type (must be an integer type) * \tparam FlagOp [inferred] Binary predicate functor type having member T operator()(const T &a, const T &b) or member T operator()(const T &a, const T &b, unsigned int b_index), and returning \p true if a discontinuity exists between \p a and \p b, otherwise \p false. \p b_index is the rank of b in the aggregate tile of data. */ template < int ITEMS_PER_THREAD, typename FlagT, typename FlagOp> __device__ __forceinline__ void FlagHeadsAndTails( FlagT (&head_flags)[ITEMS_PER_THREAD], ///< [out] Calling thread's discontinuity head_flags T tile_predecessor_item, ///< [in] [thread0 only] Item with which to compare the first tile item (input0 from thread0). FlagT (&tail_flags)[ITEMS_PER_THREAD], ///< [out] Calling thread's discontinuity tail_flags T (&input)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items FlagOp flag_op) ///< [in] Binary boolean flag predicate { // Share first and last items temp_storage.first_items[linear_tid] = input[0]; temp_storage.last_items[linear_tid] = input[ITEMS_PER_THREAD - 1]; CTA_SYNC(); T preds[ITEMS_PER_THREAD]; // Set flag for first thread-item preds[0] = (linear_tid == 0) ? tile_predecessor_item : // First thread temp_storage.last_items[linear_tid - 1]; head_flags[0] = ApplyOp::FlagT( flag_op, preds[0], input[0], linear_tid * ITEMS_PER_THREAD); // Set flag for last thread-item tail_flags[ITEMS_PER_THREAD - 1] = (linear_tid == BLOCK_THREADS - 1) ? 1 : // Last thread ApplyOp::FlagT( flag_op, input[ITEMS_PER_THREAD - 1], temp_storage.first_items[linear_tid + 1], (linear_tid * ITEMS_PER_THREAD) + ITEMS_PER_THREAD); // Set head_flags for remaining items Iterate<1, ITEMS_PER_THREAD>::FlagHeads(linear_tid, head_flags, input, preds, flag_op); // Set tail_flags for remaining items Iterate<0, ITEMS_PER_THREAD - 1>::FlagTails(linear_tid, tail_flags, input, flag_op); } /** * \brief Sets both head and tail flags indicating discontinuities between items partitioned across the thread block. * * \par * - The flag head_flagsi is set for item * inputi when * flag_op(previous-item, inputi) * returns \p true (where previous-item is either the preceding item * in the same thread or the last item in the previous thread). * - For thread0, item input0 is compared * against \p tile_predecessor_item. * - The flag tail_flagsi is set for item * inputi when * flag_op(inputi, next-item) * returns \p true (where next-item is either the next item * in the same thread or the first item in the next thread). * - For threadBLOCK_THREADS-1, item * inputITEMS_PER_THREAD-1 is compared * against \p tile_successor_item. * - \blocked * - \granularity * - \smemreuse * * \par Snippet * The code snippet below illustrates the head- and tail-flagging of 512 integer items that * are partitioned in a [blocked arrangement](index.html#sec5sec3) across 128 threads * where each thread owns 4 consecutive items. * \par * \code * #include // or equivalently * * __global__ void ExampleKernel(...) * { * // Specialize BlockDiscontinuity for a 1D block of 128 threads on type int * typedef cub::BlockDiscontinuity BlockDiscontinuity; * * // Allocate shared memory for BlockDiscontinuity * __shared__ typename BlockDiscontinuity::TempStorage temp_storage; * * // Obtain a segment of consecutive items that are blocked across threads * int thread_data[4]; * ... * * // Have thread0 obtain the predecessor item for the entire tile * int tile_predecessor_item; * if (threadIdx.x == 0) tile_predecessor_item == ... * * // Have thread127 obtain the successor item for the entire tile * int tile_successor_item; * if (threadIdx.x == 127) tile_successor_item == ... * * // Collectively compute head and flags for discontinuities in the segment * int head_flags[4]; * int tail_flags[4]; * BlockDiscontinuity(temp_storage).FlagTails( * head_flags, tile_predecessor_item, tail_flags, tile_successor_item, * thread_data, cub::Inequality()); * * \endcode * \par * Suppose the set of input \p thread_data across the block of threads is * { [0,0,1,1], [1,1,1,1], [2,3,3,3], ..., [124,125,125,125] }, * that the \p tile_predecessor_item is \p 0, and that the * \p tile_successor_item is \p 125. The corresponding output \p head_flags * in those threads will be { [0,0,1,0], [0,0,0,0], [1,1,0,0], [0,1,0,0], ... }. * and the corresponding output \p tail_flags in those threads will be * { [0,1,0,0], [0,0,0,1], [1,0,0,...], ..., [1,0,0,0] }. * * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. * \tparam FlagT [inferred] The flag type (must be an integer type) * \tparam FlagOp [inferred] Binary predicate functor type having member T operator()(const T &a, const T &b) or member T operator()(const T &a, const T &b, unsigned int b_index), and returning \p true if a discontinuity exists between \p a and \p b, otherwise \p false. \p b_index is the rank of b in the aggregate tile of data. */ template < int ITEMS_PER_THREAD, typename FlagT, typename FlagOp> __device__ __forceinline__ void FlagHeadsAndTails( FlagT (&head_flags)[ITEMS_PER_THREAD], ///< [out] Calling thread's discontinuity head_flags T tile_predecessor_item, ///< [in] [thread0 only] Item with which to compare the first tile item (input0 from thread0). FlagT (&tail_flags)[ITEMS_PER_THREAD], ///< [out] Calling thread's discontinuity tail_flags T tile_successor_item, ///< [in] [threadBLOCK_THREADS-1 only] Item with which to compare the last tile item (inputITEMS_PER_THREAD-1 from threadBLOCK_THREADS-1). T (&input)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items FlagOp flag_op) ///< [in] Binary boolean flag predicate { // Share first and last items temp_storage.first_items[linear_tid] = input[0]; temp_storage.last_items[linear_tid] = input[ITEMS_PER_THREAD - 1]; CTA_SYNC(); T preds[ITEMS_PER_THREAD]; // Set flag for first thread-item preds[0] = (linear_tid == 0) ? tile_predecessor_item : // First thread temp_storage.last_items[linear_tid - 1]; head_flags[0] = ApplyOp::FlagT( flag_op, preds[0], input[0], linear_tid * ITEMS_PER_THREAD); // Set flag for last thread-item T successor_item = (linear_tid == BLOCK_THREADS - 1) ? tile_successor_item : // Last thread temp_storage.first_items[linear_tid + 1]; tail_flags[ITEMS_PER_THREAD - 1] = ApplyOp::FlagT( flag_op, input[ITEMS_PER_THREAD - 1], successor_item, (linear_tid * ITEMS_PER_THREAD) + ITEMS_PER_THREAD); // Set head_flags for remaining items Iterate<1, ITEMS_PER_THREAD>::FlagHeads(linear_tid, head_flags, input, preds, flag_op); // Set tail_flags for remaining items Iterate<0, ITEMS_PER_THREAD - 1>::FlagTails(linear_tid, tail_flags, input, flag_op); } //@} end member group }; } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/acc/cuda/cub/block/block_exchange.cuh000066400000000000000000001461211411340063500225410ustar00rootroot00000000000000/****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * The cub::BlockExchange class provides [collective](index.html#sec0) methods for rearranging data partitioned across a CUDA thread block. */ #pragma once #include "../util_ptx.cuh" #include "../util_arch.cuh" #include "../util_macro.cuh" #include "../util_type.cuh" #include "../util_namespace.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /** * \brief The BlockExchange class provides [collective](index.html#sec0) methods for rearranging data partitioned across a CUDA thread block. ![](transpose_logo.png) * \ingroup BlockModule * * \tparam T The data type to be exchanged. * \tparam BLOCK_DIM_X The thread block length in threads along the X dimension * \tparam ITEMS_PER_THREAD The number of items partitioned onto each thread. * \tparam WARP_TIME_SLICING [optional] When \p true, only use enough shared memory for a single warp's worth of tile data, time-slicing the block-wide exchange over multiple synchronized rounds. Yields a smaller memory footprint at the expense of decreased parallelism. (Default: false) * \tparam BLOCK_DIM_Y [optional] The thread block length in threads along the Y dimension (default: 1) * \tparam BLOCK_DIM_Z [optional] The thread block length in threads along the Z dimension (default: 1) * \tparam PTX_ARCH [optional] \ptxversion * * \par Overview * - It is commonplace for blocks of threads to rearrange data items between * threads. For example, the device-accessible memory subsystem prefers access patterns * where data items are "striped" across threads (where consecutive threads access consecutive items), * yet most block-wide operations prefer a "blocked" partitioning of items across threads * (where consecutive items belong to a single thread). * - BlockExchange supports the following types of data exchanges: * - Transposing between [blocked](index.html#sec5sec3) and [striped](index.html#sec5sec3) arrangements * - Transposing between [blocked](index.html#sec5sec3) and [warp-striped](index.html#sec5sec3) arrangements * - Scattering ranked items to a [blocked arrangement](index.html#sec5sec3) * - Scattering ranked items to a [striped arrangement](index.html#sec5sec3) * - \rowmajor * * \par A Simple Example * \blockcollective{BlockExchange} * \par * The code snippet below illustrates the conversion from a "blocked" to a "striped" arrangement * of 512 integer items partitioned across 128 threads where each thread owns 4 items. * \par * \code * #include // or equivalently * * __global__ void ExampleKernel(int *d_data, ...) * { * // Specialize BlockExchange for a 1D block of 128 threads owning 4 integer items each * typedef cub::BlockExchange BlockExchange; * * // Allocate shared memory for BlockExchange * __shared__ typename BlockExchange::TempStorage temp_storage; * * // Load a tile of data striped across threads * int thread_data[4]; * cub::LoadDirectStriped<128>(threadIdx.x, d_data, thread_data); * * // Collectively exchange data into a blocked arrangement across threads * BlockExchange(temp_storage).StripedToBlocked(thread_data); * * \endcode * \par * Suppose the set of striped input \p thread_data across the block of threads is * { [0,128,256,384], [1,129,257,385], ..., [127,255,383,511] }. * The corresponding output \p thread_data in those threads will be * { [0,1,2,3], [4,5,6,7], [8,9,10,11], ..., [508,509,510,511] }. * * \par Performance Considerations * - Proper device-specific padding ensures zero bank conflicts for most types. * */ template < typename InputT, int BLOCK_DIM_X, int ITEMS_PER_THREAD, bool WARP_TIME_SLICING = false, int BLOCK_DIM_Y = 1, int BLOCK_DIM_Z = 1, int PTX_ARCH = CUB_PTX_ARCH> class BlockExchange { private: /****************************************************************************** * Constants ******************************************************************************/ /// Constants enum { /// The thread block size in threads BLOCK_THREADS = BLOCK_DIM_X * BLOCK_DIM_Y * BLOCK_DIM_Z, LOG_WARP_THREADS = CUB_LOG_WARP_THREADS(PTX_ARCH), WARP_THREADS = 1 << LOG_WARP_THREADS, WARPS = (BLOCK_THREADS + WARP_THREADS - 1) / WARP_THREADS, LOG_SMEM_BANKS = CUB_LOG_SMEM_BANKS(PTX_ARCH), SMEM_BANKS = 1 << LOG_SMEM_BANKS, TILE_ITEMS = BLOCK_THREADS * ITEMS_PER_THREAD, TIME_SLICES = (WARP_TIME_SLICING) ? WARPS : 1, TIME_SLICED_THREADS = (WARP_TIME_SLICING) ? CUB_MIN(BLOCK_THREADS, WARP_THREADS) : BLOCK_THREADS, TIME_SLICED_ITEMS = TIME_SLICED_THREADS * ITEMS_PER_THREAD, WARP_TIME_SLICED_THREADS = CUB_MIN(BLOCK_THREADS, WARP_THREADS), WARP_TIME_SLICED_ITEMS = WARP_TIME_SLICED_THREADS * ITEMS_PER_THREAD, // Insert padding to avoid bank conflicts during raking when items per thread is a power of two and > 4 (otherwise we can typically use 128b loads) INSERT_PADDING = (ITEMS_PER_THREAD > 4) && (PowerOfTwo::VALUE), PADDING_ITEMS = (INSERT_PADDING) ? (TIME_SLICED_ITEMS >> LOG_SMEM_BANKS) : 0, }; /****************************************************************************** * Type definitions ******************************************************************************/ /// Shared memory storage layout type struct __align__(16) _TempStorage { InputT buff[TIME_SLICED_ITEMS + PADDING_ITEMS]; }; public: /// \smemstorage{BlockExchange} struct TempStorage : Uninitialized<_TempStorage> {}; private: /****************************************************************************** * Thread fields ******************************************************************************/ /// Shared storage reference _TempStorage &temp_storage; /// Linear thread-id unsigned int linear_tid; unsigned int lane_id; unsigned int warp_id; unsigned int warp_offset; /****************************************************************************** * Utility methods ******************************************************************************/ /// Internal storage allocator __device__ __forceinline__ _TempStorage& PrivateStorage() { __shared__ _TempStorage private_storage; return private_storage; } /** * Transposes data items from blocked arrangement to striped arrangement. Specialized for no timeslicing. */ template __device__ __forceinline__ void BlockedToStriped( InputT input_items[ITEMS_PER_THREAD], ///< [in] Items to exchange, converting between blocked and striped arrangements. OutputT output_items[ITEMS_PER_THREAD], ///< [out] Items to exchange, converting between blocked and striped arrangements. Int2Type /*time_slicing*/) { #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { int item_offset = (linear_tid * ITEMS_PER_THREAD) + ITEM; if (INSERT_PADDING) item_offset += item_offset >> LOG_SMEM_BANKS; temp_storage.buff[item_offset] = input_items[ITEM]; } CTA_SYNC(); #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { int item_offset = int(ITEM * BLOCK_THREADS) + linear_tid; if (INSERT_PADDING) item_offset += item_offset >> LOG_SMEM_BANKS; output_items[ITEM] = temp_storage.buff[item_offset]; } } /** * Transposes data items from blocked arrangement to striped arrangement. Specialized for warp-timeslicing. */ template __device__ __forceinline__ void BlockedToStriped( InputT input_items[ITEMS_PER_THREAD], ///< [in] Items to exchange, converting between blocked and striped arrangements. OutputT output_items[ITEMS_PER_THREAD], ///< [out] Items to exchange, converting between blocked and striped arrangements. Int2Type /*time_slicing*/) { InputT temp_items[ITEMS_PER_THREAD]; #pragma unroll for (int SLICE = 0; SLICE < TIME_SLICES; SLICE++) { const int SLICE_OFFSET = SLICE * TIME_SLICED_ITEMS; const int SLICE_OOB = SLICE_OFFSET + TIME_SLICED_ITEMS; CTA_SYNC(); if (warp_id == SLICE) { #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { int item_offset = (lane_id * ITEMS_PER_THREAD) + ITEM; if (INSERT_PADDING) item_offset += item_offset >> LOG_SMEM_BANKS; temp_storage.buff[item_offset] = input_items[ITEM]; } } CTA_SYNC(); #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { // Read a strip of items const int STRIP_OFFSET = ITEM * BLOCK_THREADS; const int STRIP_OOB = STRIP_OFFSET + BLOCK_THREADS; if ((SLICE_OFFSET < STRIP_OOB) && (SLICE_OOB > STRIP_OFFSET)) { int item_offset = STRIP_OFFSET + linear_tid - SLICE_OFFSET; if ((item_offset >= 0) && (item_offset < TIME_SLICED_ITEMS)) { if (INSERT_PADDING) item_offset += item_offset >> LOG_SMEM_BANKS; temp_items[ITEM] = temp_storage.buff[item_offset]; } } } } // Copy #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { output_items[ITEM] = temp_items[ITEM]; } } /** * Transposes data items from blocked arrangement to warp-striped arrangement. Specialized for no timeslicing */ template __device__ __forceinline__ void BlockedToWarpStriped( InputT input_items[ITEMS_PER_THREAD], ///< [in] Items to exchange, converting between blocked and striped arrangements. OutputT output_items[ITEMS_PER_THREAD], ///< [out] Items to exchange, converting between blocked and striped arrangements. Int2Type /*time_slicing*/) { #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { int item_offset = warp_offset + ITEM + (lane_id * ITEMS_PER_THREAD); if (INSERT_PADDING) item_offset += item_offset >> LOG_SMEM_BANKS; temp_storage.buff[item_offset] = input_items[ITEM]; } WARP_SYNC(0xffffffff); #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { int item_offset = warp_offset + (ITEM * WARP_TIME_SLICED_THREADS) + lane_id; if (INSERT_PADDING) item_offset += item_offset >> LOG_SMEM_BANKS; output_items[ITEM] = temp_storage.buff[item_offset]; } } /** * Transposes data items from blocked arrangement to warp-striped arrangement. Specialized for warp-timeslicing */ template __device__ __forceinline__ void BlockedToWarpStriped( InputT input_items[ITEMS_PER_THREAD], ///< [in] Items to exchange, converting between blocked and striped arrangements. OutputT output_items[ITEMS_PER_THREAD], ///< [out] Items to exchange, converting between blocked and striped arrangements. Int2Type /*time_slicing*/) { if (warp_id == 0) { #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { int item_offset = ITEM + (lane_id * ITEMS_PER_THREAD); if (INSERT_PADDING) item_offset += item_offset >> LOG_SMEM_BANKS; temp_storage.buff[item_offset] = input_items[ITEM]; } WARP_SYNC(0xffffffff); #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { int item_offset = (ITEM * WARP_TIME_SLICED_THREADS) + lane_id; if (INSERT_PADDING) item_offset += item_offset >> LOG_SMEM_BANKS; output_items[ITEM] = temp_storage.buff[item_offset]; } } #pragma unroll for (unsigned int SLICE = 1; SLICE < TIME_SLICES; ++SLICE) { CTA_SYNC(); if (warp_id == SLICE) { #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { int item_offset = ITEM + (lane_id * ITEMS_PER_THREAD); if (INSERT_PADDING) item_offset += item_offset >> LOG_SMEM_BANKS; temp_storage.buff[item_offset] = input_items[ITEM]; } WARP_SYNC(0xffffffff); #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { int item_offset = (ITEM * WARP_TIME_SLICED_THREADS) + lane_id; if (INSERT_PADDING) item_offset += item_offset >> LOG_SMEM_BANKS; output_items[ITEM] = temp_storage.buff[item_offset]; } } } } /** * Transposes data items from striped arrangement to blocked arrangement. Specialized for no timeslicing. */ template __device__ __forceinline__ void StripedToBlocked( InputT input_items[ITEMS_PER_THREAD], ///< [in] Items to exchange, converting between blocked and striped arrangements. OutputT output_items[ITEMS_PER_THREAD], ///< [out] Items to exchange, converting between blocked and striped arrangements. Int2Type /*time_slicing*/) { #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { int item_offset = int(ITEM * BLOCK_THREADS) + linear_tid; if (INSERT_PADDING) item_offset += item_offset >> LOG_SMEM_BANKS; temp_storage.buff[item_offset] = input_items[ITEM]; } CTA_SYNC(); // No timeslicing #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { int item_offset = (linear_tid * ITEMS_PER_THREAD) + ITEM; if (INSERT_PADDING) item_offset += item_offset >> LOG_SMEM_BANKS; output_items[ITEM] = temp_storage.buff[item_offset]; } } /** * Transposes data items from striped arrangement to blocked arrangement. Specialized for warp-timeslicing. */ template __device__ __forceinline__ void StripedToBlocked( InputT input_items[ITEMS_PER_THREAD], ///< [in] Items to exchange, converting between blocked and striped arrangements. OutputT output_items[ITEMS_PER_THREAD], ///< [out] Items to exchange, converting between blocked and striped arrangements. Int2Type /*time_slicing*/) { // Warp time-slicing InputT temp_items[ITEMS_PER_THREAD]; #pragma unroll for (int SLICE = 0; SLICE < TIME_SLICES; SLICE++) { const int SLICE_OFFSET = SLICE * TIME_SLICED_ITEMS; const int SLICE_OOB = SLICE_OFFSET + TIME_SLICED_ITEMS; CTA_SYNC(); #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { // Write a strip of items const int STRIP_OFFSET = ITEM * BLOCK_THREADS; const int STRIP_OOB = STRIP_OFFSET + BLOCK_THREADS; if ((SLICE_OFFSET < STRIP_OOB) && (SLICE_OOB > STRIP_OFFSET)) { int item_offset = STRIP_OFFSET + linear_tid - SLICE_OFFSET; if ((item_offset >= 0) && (item_offset < TIME_SLICED_ITEMS)) { if (INSERT_PADDING) item_offset += item_offset >> LOG_SMEM_BANKS; temp_storage.buff[item_offset] = input_items[ITEM]; } } } CTA_SYNC(); if (warp_id == SLICE) { #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { int item_offset = (lane_id * ITEMS_PER_THREAD) + ITEM; if (INSERT_PADDING) item_offset += item_offset >> LOG_SMEM_BANKS; temp_items[ITEM] = temp_storage.buff[item_offset]; } } } // Copy #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { output_items[ITEM] = temp_items[ITEM]; } } /** * Transposes data items from warp-striped arrangement to blocked arrangement. Specialized for no timeslicing */ template __device__ __forceinline__ void WarpStripedToBlocked( InputT input_items[ITEMS_PER_THREAD], ///< [in] Items to exchange, converting between blocked and striped arrangements. OutputT output_items[ITEMS_PER_THREAD], ///< [out] Items to exchange, converting between blocked and striped arrangements. Int2Type /*time_slicing*/) { #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { int item_offset = warp_offset + (ITEM * WARP_TIME_SLICED_THREADS) + lane_id; if (INSERT_PADDING) item_offset += item_offset >> LOG_SMEM_BANKS; temp_storage.buff[item_offset] = input_items[ITEM]; } WARP_SYNC(0xffffffff); #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { int item_offset = warp_offset + ITEM + (lane_id * ITEMS_PER_THREAD); if (INSERT_PADDING) item_offset += item_offset >> LOG_SMEM_BANKS; output_items[ITEM] = temp_storage.buff[item_offset]; } } /** * Transposes data items from warp-striped arrangement to blocked arrangement. Specialized for warp-timeslicing */ template __device__ __forceinline__ void WarpStripedToBlocked( InputT input_items[ITEMS_PER_THREAD], ///< [in] Items to exchange, converting between blocked and striped arrangements. OutputT output_items[ITEMS_PER_THREAD], ///< [out] Items to exchange, converting between blocked and striped arrangements. Int2Type /*time_slicing*/) { #pragma unroll for (unsigned int SLICE = 0; SLICE < TIME_SLICES; ++SLICE) { CTA_SYNC(); if (warp_id == SLICE) { #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { int item_offset = (ITEM * WARP_TIME_SLICED_THREADS) + lane_id; if (INSERT_PADDING) item_offset += item_offset >> LOG_SMEM_BANKS; temp_storage.buff[item_offset] = input_items[ITEM]; } WARP_SYNC(0xffffffff); #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { int item_offset = ITEM + (lane_id * ITEMS_PER_THREAD); if (INSERT_PADDING) item_offset += item_offset >> LOG_SMEM_BANKS; output_items[ITEM] = temp_storage.buff[item_offset]; } } } } /** * Exchanges data items annotated by rank into blocked arrangement. Specialized for no timeslicing. */ template __device__ __forceinline__ void ScatterToBlocked( InputT input_items[ITEMS_PER_THREAD], ///< [in] Items to exchange, converting between blocked and striped arrangements. OutputT output_items[ITEMS_PER_THREAD], ///< [out] Items to exchange, converting between blocked and striped arrangements. OffsetT ranks[ITEMS_PER_THREAD], ///< [in] Corresponding scatter ranks Int2Type /*time_slicing*/) { #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { int item_offset = ranks[ITEM]; if (INSERT_PADDING) item_offset = SHR_ADD(item_offset, LOG_SMEM_BANKS, item_offset); temp_storage.buff[item_offset] = input_items[ITEM]; } CTA_SYNC(); #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { int item_offset = (linear_tid * ITEMS_PER_THREAD) + ITEM; if (INSERT_PADDING) item_offset = SHR_ADD(item_offset, LOG_SMEM_BANKS, item_offset); output_items[ITEM] = temp_storage.buff[item_offset]; } } /** * Exchanges data items annotated by rank into blocked arrangement. Specialized for warp-timeslicing. */ template __device__ __forceinline__ void ScatterToBlocked( InputT input_items[ITEMS_PER_THREAD], ///< [in] Items to exchange, converting between blocked and striped arrangements. OutputT output_items[ITEMS_PER_THREAD], ///< [out] Items to exchange, converting between blocked and striped arrangements. OffsetT ranks[ITEMS_PER_THREAD], ///< [in] Corresponding scatter ranks Int2Type /*time_slicing*/) { InputT temp_items[ITEMS_PER_THREAD]; #pragma unroll for (int SLICE = 0; SLICE < TIME_SLICES; SLICE++) { CTA_SYNC(); const int SLICE_OFFSET = TIME_SLICED_ITEMS * SLICE; #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { int item_offset = ranks[ITEM] - SLICE_OFFSET; if ((item_offset >= 0) && (item_offset < WARP_TIME_SLICED_ITEMS)) { if (INSERT_PADDING) item_offset = SHR_ADD(item_offset, LOG_SMEM_BANKS, item_offset); temp_storage.buff[item_offset] = input_items[ITEM]; } } CTA_SYNC(); if (warp_id == SLICE) { #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { int item_offset = (lane_id * ITEMS_PER_THREAD) + ITEM; if (INSERT_PADDING) item_offset = SHR_ADD(item_offset, LOG_SMEM_BANKS, item_offset); temp_items[ITEM] = temp_storage.buff[item_offset]; } } } // Copy #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { output_items[ITEM] = temp_items[ITEM]; } } /** * Exchanges data items annotated by rank into striped arrangement. Specialized for no timeslicing. */ template __device__ __forceinline__ void ScatterToStriped( InputT input_items[ITEMS_PER_THREAD], ///< [in] Items to exchange, converting between blocked and striped arrangements. OutputT output_items[ITEMS_PER_THREAD], ///< [out] Items to exchange, converting between blocked and striped arrangements. OffsetT ranks[ITEMS_PER_THREAD], ///< [in] Corresponding scatter ranks Int2Type /*time_slicing*/) { #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { int item_offset = ranks[ITEM]; if (INSERT_PADDING) item_offset = SHR_ADD(item_offset, LOG_SMEM_BANKS, item_offset); temp_storage.buff[item_offset] = input_items[ITEM]; } CTA_SYNC(); #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { int item_offset = int(ITEM * BLOCK_THREADS) + linear_tid; if (INSERT_PADDING) item_offset = SHR_ADD(item_offset, LOG_SMEM_BANKS, item_offset); output_items[ITEM] = temp_storage.buff[item_offset]; } } /** * Exchanges data items annotated by rank into striped arrangement. Specialized for warp-timeslicing. */ template __device__ __forceinline__ void ScatterToStriped( InputT input_items[ITEMS_PER_THREAD], ///< [in] Items to exchange, converting between blocked and striped arrangements. OutputT output_items[ITEMS_PER_THREAD], ///< [out] Items to exchange, converting between blocked and striped arrangements. OffsetT ranks[ITEMS_PER_THREAD], ///< [in] Corresponding scatter ranks Int2Type /*time_slicing*/) { InputT temp_items[ITEMS_PER_THREAD]; #pragma unroll for (int SLICE = 0; SLICE < TIME_SLICES; SLICE++) { const int SLICE_OFFSET = SLICE * TIME_SLICED_ITEMS; const int SLICE_OOB = SLICE_OFFSET + TIME_SLICED_ITEMS; CTA_SYNC(); #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { int item_offset = ranks[ITEM] - SLICE_OFFSET; if ((item_offset >= 0) && (item_offset < WARP_TIME_SLICED_ITEMS)) { if (INSERT_PADDING) item_offset = SHR_ADD(item_offset, LOG_SMEM_BANKS, item_offset); temp_storage.buff[item_offset] = input_items[ITEM]; } } CTA_SYNC(); #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { // Read a strip of items const int STRIP_OFFSET = ITEM * BLOCK_THREADS; const int STRIP_OOB = STRIP_OFFSET + BLOCK_THREADS; if ((SLICE_OFFSET < STRIP_OOB) && (SLICE_OOB > STRIP_OFFSET)) { int item_offset = STRIP_OFFSET + linear_tid - SLICE_OFFSET; if ((item_offset >= 0) && (item_offset < TIME_SLICED_ITEMS)) { if (INSERT_PADDING) item_offset += item_offset >> LOG_SMEM_BANKS; temp_items[ITEM] = temp_storage.buff[item_offset]; } } } } // Copy #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { output_items[ITEM] = temp_items[ITEM]; } } public: /******************************************************************//** * \name Collective constructors *********************************************************************/ //@{ /** * \brief Collective constructor using a private static allocation of shared memory as temporary storage. */ __device__ __forceinline__ BlockExchange() : temp_storage(PrivateStorage()), linear_tid(RowMajorTid(BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z)), warp_id((WARPS == 1) ? 0 : linear_tid / WARP_THREADS), lane_id(LaneId()), warp_offset(warp_id * WARP_TIME_SLICED_ITEMS) {} /** * \brief Collective constructor using the specified memory allocation as temporary storage. */ __device__ __forceinline__ BlockExchange( TempStorage &temp_storage) ///< [in] Reference to memory allocation having layout type TempStorage : temp_storage(temp_storage.Alias()), linear_tid(RowMajorTid(BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z)), lane_id(LaneId()), warp_id((WARPS == 1) ? 0 : linear_tid / WARP_THREADS), warp_offset(warp_id * WARP_TIME_SLICED_ITEMS) {} //@} end member group /******************************************************************//** * \name Structured exchanges *********************************************************************/ //@{ /** * \brief Transposes data items from striped arrangement to blocked arrangement. * * \par * - \smemreuse * * \par Snippet * The code snippet below illustrates the conversion from a "striped" to a "blocked" arrangement * of 512 integer items partitioned across 128 threads where each thread owns 4 items. * \par * \code * #include // or equivalently * * __global__ void ExampleKernel(int *d_data, ...) * { * // Specialize BlockExchange for a 1D block of 128 threads owning 4 integer items each * typedef cub::BlockExchange BlockExchange; * * // Allocate shared memory for BlockExchange * __shared__ typename BlockExchange::TempStorage temp_storage; * * // Load a tile of ordered data into a striped arrangement across block threads * int thread_data[4]; * cub::LoadDirectStriped<128>(threadIdx.x, d_data, thread_data); * * // Collectively exchange data into a blocked arrangement across threads * BlockExchange(temp_storage).StripedToBlocked(thread_data, thread_data); * * \endcode * \par * Suppose the set of striped input \p thread_data across the block of threads is * { [0,128,256,384], [1,129,257,385], ..., [127,255,383,511] } after loading from device-accessible memory. * The corresponding output \p thread_data in those threads will be * { [0,1,2,3], [4,5,6,7], [8,9,10,11], ..., [508,509,510,511] }. * */ template __device__ __forceinline__ void StripedToBlocked( InputT input_items[ITEMS_PER_THREAD], ///< [in] Items to exchange, converting between striped and blocked arrangements. OutputT output_items[ITEMS_PER_THREAD]) ///< [out] Items from exchange, converting between striped and blocked arrangements. { StripedToBlocked(input_items, output_items, Int2Type()); } /** * \brief Transposes data items from blocked arrangement to striped arrangement. * * \par * - \smemreuse * * \par Snippet * The code snippet below illustrates the conversion from a "blocked" to a "striped" arrangement * of 512 integer items partitioned across 128 threads where each thread owns 4 items. * \par * \code * #include // or equivalently * * __global__ void ExampleKernel(int *d_data, ...) * { * // Specialize BlockExchange for a 1D block of 128 threads owning 4 integer items each * typedef cub::BlockExchange BlockExchange; * * // Allocate shared memory for BlockExchange * __shared__ typename BlockExchange::TempStorage temp_storage; * * // Obtain a segment of consecutive items that are blocked across threads * int thread_data[4]; * ... * * // Collectively exchange data into a striped arrangement across threads * BlockExchange(temp_storage).BlockedToStriped(thread_data, thread_data); * * // Store data striped across block threads into an ordered tile * cub::StoreDirectStriped(threadIdx.x, d_data, thread_data); * * \endcode * \par * Suppose the set of blocked input \p thread_data across the block of threads is * { [0,1,2,3], [4,5,6,7], [8,9,10,11], ..., [508,509,510,511] }. * The corresponding output \p thread_data in those threads will be * { [0,128,256,384], [1,129,257,385], ..., [127,255,383,511] } in * preparation for storing to device-accessible memory. * */ template __device__ __forceinline__ void BlockedToStriped( InputT input_items[ITEMS_PER_THREAD], ///< [in] Items to exchange, converting between striped and blocked arrangements. OutputT output_items[ITEMS_PER_THREAD]) ///< [out] Items from exchange, converting between striped and blocked arrangements. { BlockedToStriped(input_items, output_items, Int2Type()); } /** * \brief Transposes data items from warp-striped arrangement to blocked arrangement. * * \par * - \smemreuse * * \par Snippet * The code snippet below illustrates the conversion from a "warp-striped" to a "blocked" arrangement * of 512 integer items partitioned across 128 threads where each thread owns 4 items. * \par * \code * #include // or equivalently * * __global__ void ExampleKernel(int *d_data, ...) * { * // Specialize BlockExchange for a 1D block of 128 threads owning 4 integer items each * typedef cub::BlockExchange BlockExchange; * * // Allocate shared memory for BlockExchange * __shared__ typename BlockExchange::TempStorage temp_storage; * * // Load a tile of ordered data into a warp-striped arrangement across warp threads * int thread_data[4]; * cub::LoadSWarptriped(threadIdx.x, d_data, thread_data); * * // Collectively exchange data into a blocked arrangement across threads * BlockExchange(temp_storage).WarpStripedToBlocked(thread_data); * * \endcode * \par * Suppose the set of warp-striped input \p thread_data across the block of threads is * { [0,32,64,96], [1,33,65,97], [2,34,66,98], ..., [415,447,479,511] } * after loading from device-accessible memory. (The first 128 items are striped across * the first warp of 32 threads, the second 128 items are striped across the second warp, etc.) * The corresponding output \p thread_data in those threads will be * { [0,1,2,3], [4,5,6,7], [8,9,10,11], ..., [508,509,510,511] }. * */ template __device__ __forceinline__ void WarpStripedToBlocked( InputT input_items[ITEMS_PER_THREAD], ///< [in] Items to exchange, converting between striped and blocked arrangements. OutputT output_items[ITEMS_PER_THREAD]) ///< [out] Items from exchange, converting between striped and blocked arrangements. { WarpStripedToBlocked(input_items, output_items, Int2Type()); } /** * \brief Transposes data items from blocked arrangement to warp-striped arrangement. * * \par * - \smemreuse * * \par Snippet * The code snippet below illustrates the conversion from a "blocked" to a "warp-striped" arrangement * of 512 integer items partitioned across 128 threads where each thread owns 4 items. * \par * \code * #include // or equivalently * * __global__ void ExampleKernel(int *d_data, ...) * { * // Specialize BlockExchange for a 1D block of 128 threads owning 4 integer items each * typedef cub::BlockExchange BlockExchange; * * // Allocate shared memory for BlockExchange * __shared__ typename BlockExchange::TempStorage temp_storage; * * // Obtain a segment of consecutive items that are blocked across threads * int thread_data[4]; * ... * * // Collectively exchange data into a warp-striped arrangement across threads * BlockExchange(temp_storage).BlockedToWarpStriped(thread_data, thread_data); * * // Store data striped across warp threads into an ordered tile * cub::StoreDirectStriped(threadIdx.x, d_data, thread_data); * * \endcode * \par * Suppose the set of blocked input \p thread_data across the block of threads is * { [0,1,2,3], [4,5,6,7], [8,9,10,11], ..., [508,509,510,511] }. * The corresponding output \p thread_data in those threads will be * { [0,32,64,96], [1,33,65,97], [2,34,66,98], ..., [415,447,479,511] } * in preparation for storing to device-accessible memory. (The first 128 items are striped across * the first warp of 32 threads, the second 128 items are striped across the second warp, etc.) * */ template __device__ __forceinline__ void BlockedToWarpStriped( InputT input_items[ITEMS_PER_THREAD], ///< [in] Items to exchange, converting between striped and blocked arrangements. OutputT output_items[ITEMS_PER_THREAD]) ///< [out] Items from exchange, converting between striped and blocked arrangements. { BlockedToWarpStriped(input_items, output_items, Int2Type()); } //@} end member group /******************************************************************//** * \name Scatter exchanges *********************************************************************/ //@{ /** * \brief Exchanges data items annotated by rank into blocked arrangement. * * \par * - \smemreuse * * \tparam OffsetT [inferred] Signed integer type for local offsets */ template __device__ __forceinline__ void ScatterToBlocked( InputT input_items[ITEMS_PER_THREAD], ///< [in] Items to exchange, converting between striped and blocked arrangements. OutputT output_items[ITEMS_PER_THREAD], ///< [out] Items from exchange, converting between striped and blocked arrangements. OffsetT ranks[ITEMS_PER_THREAD]) ///< [in] Corresponding scatter ranks { ScatterToBlocked(input_items, output_items, ranks, Int2Type()); } /** * \brief Exchanges data items annotated by rank into striped arrangement. * * \par * - \smemreuse * * \tparam OffsetT [inferred] Signed integer type for local offsets */ template __device__ __forceinline__ void ScatterToStriped( InputT input_items[ITEMS_PER_THREAD], ///< [in] Items to exchange, converting between striped and blocked arrangements. OutputT output_items[ITEMS_PER_THREAD], ///< [out] Items from exchange, converting between striped and blocked arrangements. OffsetT ranks[ITEMS_PER_THREAD]) ///< [in] Corresponding scatter ranks { ScatterToStriped(input_items, output_items, ranks, Int2Type()); } /** * \brief Exchanges data items annotated by rank into striped arrangement. Items with rank -1 are not exchanged. * * \par * - \smemreuse * * \tparam OffsetT [inferred] Signed integer type for local offsets */ template __device__ __forceinline__ void ScatterToStripedGuarded( InputT input_items[ITEMS_PER_THREAD], ///< [in] Items to exchange, converting between striped and blocked arrangements. OutputT output_items[ITEMS_PER_THREAD], ///< [out] Items from exchange, converting between striped and blocked arrangements. OffsetT ranks[ITEMS_PER_THREAD]) ///< [in] Corresponding scatter ranks { #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { int item_offset = ranks[ITEM]; if (INSERT_PADDING) item_offset = SHR_ADD(item_offset, LOG_SMEM_BANKS, item_offset); if (ranks[ITEM] >= 0) temp_storage.buff[item_offset] = input_items[ITEM]; } CTA_SYNC(); #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { int item_offset = int(ITEM * BLOCK_THREADS) + linear_tid; if (INSERT_PADDING) item_offset = SHR_ADD(item_offset, LOG_SMEM_BANKS, item_offset); output_items[ITEM] = temp_storage.buff[item_offset]; } } /** * \brief Exchanges valid data items annotated by rank into striped arrangement. * * \par * - \smemreuse * * \tparam OffsetT [inferred] Signed integer type for local offsets * \tparam ValidFlag [inferred] FlagT type denoting which items are valid */ template __device__ __forceinline__ void ScatterToStripedFlagged( InputT input_items[ITEMS_PER_THREAD], ///< [in] Items to exchange, converting between striped and blocked arrangements. OutputT output_items[ITEMS_PER_THREAD], ///< [out] Items from exchange, converting between striped and blocked arrangements. OffsetT ranks[ITEMS_PER_THREAD], ///< [in] Corresponding scatter ranks ValidFlag is_valid[ITEMS_PER_THREAD]) ///< [in] Corresponding flag denoting item validity { #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { int item_offset = ranks[ITEM]; if (INSERT_PADDING) item_offset = SHR_ADD(item_offset, LOG_SMEM_BANKS, item_offset); if (is_valid[ITEM]) temp_storage.buff[item_offset] = input_items[ITEM]; } CTA_SYNC(); #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { int item_offset = int(ITEM * BLOCK_THREADS) + linear_tid; if (INSERT_PADDING) item_offset = SHR_ADD(item_offset, LOG_SMEM_BANKS, item_offset); output_items[ITEM] = temp_storage.buff[item_offset]; } } //@} end member group #ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document __device__ __forceinline__ void StripedToBlocked( InputT items[ITEMS_PER_THREAD]) ///< [in-out] Items to exchange, converting between striped and blocked arrangements. { StripedToBlocked(items, items); } __device__ __forceinline__ void BlockedToStriped( InputT items[ITEMS_PER_THREAD]) ///< [in-out] Items to exchange, converting between striped and blocked arrangements. { BlockedToStriped(items, items); } __device__ __forceinline__ void WarpStripedToBlocked( InputT items[ITEMS_PER_THREAD]) ///< [in-out] Items to exchange, converting between striped and blocked arrangements. { WarpStripedToBlocked(items, items); } __device__ __forceinline__ void BlockedToWarpStriped( InputT items[ITEMS_PER_THREAD]) ///< [in-out] Items to exchange, converting between striped and blocked arrangements. { BlockedToWarpStriped(items, items); } template __device__ __forceinline__ void ScatterToBlocked( InputT items[ITEMS_PER_THREAD], ///< [in-out] Items to exchange, converting between striped and blocked arrangements. OffsetT ranks[ITEMS_PER_THREAD]) ///< [in] Corresponding scatter ranks { ScatterToBlocked(items, items, ranks); } template __device__ __forceinline__ void ScatterToStriped( InputT items[ITEMS_PER_THREAD], ///< [in-out] Items to exchange, converting between striped and blocked arrangements. OffsetT ranks[ITEMS_PER_THREAD]) ///< [in] Corresponding scatter ranks { ScatterToStriped(items, items, ranks); } template __device__ __forceinline__ void ScatterToStripedGuarded( InputT items[ITEMS_PER_THREAD], ///< [in-out] Items to exchange, converting between striped and blocked arrangements. OffsetT ranks[ITEMS_PER_THREAD]) ///< [in] Corresponding scatter ranks { ScatterToStripedGuarded(items, items, ranks); } template __device__ __forceinline__ void ScatterToStripedFlagged( InputT items[ITEMS_PER_THREAD], ///< [in-out] Items to exchange, converting between striped and blocked arrangements. OffsetT ranks[ITEMS_PER_THREAD], ///< [in] Corresponding scatter ranks ValidFlag is_valid[ITEMS_PER_THREAD]) ///< [in] Corresponding flag denoting item validity { ScatterToStriped(items, items, ranks, is_valid); } #endif // DOXYGEN_SHOULD_SKIP_THIS }; #ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document template < typename T, int ITEMS_PER_THREAD, int LOGICAL_WARP_THREADS = CUB_PTX_WARP_THREADS, int PTX_ARCH = CUB_PTX_ARCH> class WarpExchange { private: /****************************************************************************** * Constants ******************************************************************************/ /// Constants enum { // Whether the logical warp size and the PTX warp size coincide IS_ARCH_WARP = (LOGICAL_WARP_THREADS == CUB_WARP_THREADS(PTX_ARCH)), WARP_ITEMS = (ITEMS_PER_THREAD * LOGICAL_WARP_THREADS) + 1, LOG_SMEM_BANKS = CUB_LOG_SMEM_BANKS(PTX_ARCH), SMEM_BANKS = 1 << LOG_SMEM_BANKS, // Insert padding if the number of items per thread is a power of two and > 4 (otherwise we can typically use 128b loads) INSERT_PADDING = (ITEMS_PER_THREAD > 4) && (PowerOfTwo::VALUE), PADDING_ITEMS = (INSERT_PADDING) ? (WARP_ITEMS >> LOG_SMEM_BANKS) : 0, }; /****************************************************************************** * Type definitions ******************************************************************************/ /// Shared memory storage layout type struct _TempStorage { T buff[WARP_ITEMS + PADDING_ITEMS]; }; public: /// \smemstorage{WarpExchange} struct TempStorage : Uninitialized<_TempStorage> {}; private: /****************************************************************************** * Thread fields ******************************************************************************/ _TempStorage &temp_storage; int lane_id; public: /****************************************************************************** * Construction ******************************************************************************/ /// Constructor __device__ __forceinline__ WarpExchange( TempStorage &temp_storage) : temp_storage(temp_storage.Alias()), lane_id(IS_ARCH_WARP ? LaneId() : LaneId() % LOGICAL_WARP_THREADS) {} /****************************************************************************** * Interface ******************************************************************************/ /** * \brief Exchanges valid data items annotated by rank into striped arrangement. * * \par * - \smemreuse * * \tparam OffsetT [inferred] Signed integer type for local offsets */ template __device__ __forceinline__ void ScatterToStriped( T items[ITEMS_PER_THREAD], ///< [in-out] Items to exchange OffsetT ranks[ITEMS_PER_THREAD]) ///< [in] Corresponding scatter ranks { #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { if (INSERT_PADDING) ranks[ITEM] = SHR_ADD(ranks[ITEM], LOG_SMEM_BANKS, ranks[ITEM]); temp_storage.buff[ranks[ITEM]] = items[ITEM]; } WARP_SYNC(0xffffffff); #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { int item_offset = (ITEM * LOGICAL_WARP_THREADS) + lane_id; if (INSERT_PADDING) item_offset = SHR_ADD(item_offset, LOG_SMEM_BANKS, item_offset); items[ITEM] = temp_storage.buff[item_offset]; } } }; #endif // DOXYGEN_SHOULD_SKIP_THIS } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/acc/cuda/cub/block/block_histogram.cuh000066400000000000000000000376431411340063500227640ustar00rootroot00000000000000/****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * The cub::BlockHistogram class provides [collective](index.html#sec0) methods for constructing block-wide histograms from data samples partitioned across a CUDA thread block. */ #pragma once #include "specializations/block_histogram_sort.cuh" #include "specializations/block_histogram_atomic.cuh" #include "../util_ptx.cuh" #include "../util_arch.cuh" #include "../util_namespace.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /****************************************************************************** * Algorithmic variants ******************************************************************************/ /** * \brief BlockHistogramAlgorithm enumerates alternative algorithms for the parallel construction of block-wide histograms. */ enum BlockHistogramAlgorithm { /** * \par Overview * Sorting followed by differentiation. Execution is comprised of two phases: * -# Sort the data using efficient radix sort * -# Look for "runs" of same-valued keys by detecting discontinuities; the run-lengths are histogram bin counts. * * \par Performance Considerations * Delivers consistent throughput regardless of sample bin distribution. */ BLOCK_HISTO_SORT, /** * \par Overview * Use atomic addition to update byte counts directly * * \par Performance Considerations * Performance is strongly tied to the hardware implementation of atomic * addition, and may be significantly degraded for non uniformly-random * input distributions where many concurrent updates are likely to be * made to the same bin counter. */ BLOCK_HISTO_ATOMIC, }; /****************************************************************************** * Block histogram ******************************************************************************/ /** * \brief The BlockHistogram class provides [collective](index.html#sec0) methods for constructing block-wide histograms from data samples partitioned across a CUDA thread block. ![](histogram_logo.png) * \ingroup BlockModule * * \tparam T The sample type being histogrammed (must be castable to an integer bin identifier) * \tparam BLOCK_DIM_X The thread block length in threads along the X dimension * \tparam ITEMS_PER_THREAD The number of items per thread * \tparam BINS The number bins within the histogram * \tparam ALGORITHM [optional] cub::BlockHistogramAlgorithm enumerator specifying the underlying algorithm to use (default: cub::BLOCK_HISTO_SORT) * \tparam BLOCK_DIM_Y [optional] The thread block length in threads along the Y dimension (default: 1) * \tparam BLOCK_DIM_Z [optional] The thread block length in threads along the Z dimension (default: 1) * \tparam PTX_ARCH [optional] \ptxversion * * \par Overview * - A histogram * counts the number of observations that fall into each of the disjoint categories (known as bins). * - BlockHistogram can be optionally specialized to use different algorithms: * -# cub::BLOCK_HISTO_SORT. Sorting followed by differentiation. [More...](\ref cub::BlockHistogramAlgorithm) * -# cub::BLOCK_HISTO_ATOMIC. Use atomic addition to update byte counts directly. [More...](\ref cub::BlockHistogramAlgorithm) * * \par Performance Considerations * - \granularity * * \par A Simple Example * \blockcollective{BlockHistogram} * \par * The code snippet below illustrates a 256-bin histogram of 512 integer samples that * are partitioned across 128 threads where each thread owns 4 samples. * \par * \code * #include // or equivalently * * __global__ void ExampleKernel(...) * { * // Specialize a 256-bin BlockHistogram type for a 1D block of 128 threads having 4 character samples each * typedef cub::BlockHistogram BlockHistogram; * * // Allocate shared memory for BlockHistogram * __shared__ typename BlockHistogram::TempStorage temp_storage; * * // Allocate shared memory for block-wide histogram bin counts * __shared__ unsigned int smem_histogram[256]; * * // Obtain input samples per thread * unsigned char data[4]; * ... * * // Compute the block-wide histogram * BlockHistogram(temp_storage).Histogram(data, smem_histogram); * * \endcode * * \par Performance and Usage Considerations * - The histogram output can be constructed in shared or device-accessible memory * - See cub::BlockHistogramAlgorithm for performance details regarding algorithmic alternatives * */ template < typename T, int BLOCK_DIM_X, int ITEMS_PER_THREAD, int BINS, BlockHistogramAlgorithm ALGORITHM = BLOCK_HISTO_SORT, int BLOCK_DIM_Y = 1, int BLOCK_DIM_Z = 1, int PTX_ARCH = CUB_PTX_ARCH> class BlockHistogram { private: /****************************************************************************** * Constants and type definitions ******************************************************************************/ /// Constants enum { /// The thread block size in threads BLOCK_THREADS = BLOCK_DIM_X * BLOCK_DIM_Y * BLOCK_DIM_Z, }; /** * Ensure the template parameterization meets the requirements of the * targeted device architecture. BLOCK_HISTO_ATOMIC can only be used * on version SM120 or later. Otherwise BLOCK_HISTO_SORT is used * regardless. */ static const BlockHistogramAlgorithm SAFE_ALGORITHM = ((ALGORITHM == BLOCK_HISTO_ATOMIC) && (PTX_ARCH < 120)) ? BLOCK_HISTO_SORT : ALGORITHM; /// Internal specialization. typedef typename If<(SAFE_ALGORITHM == BLOCK_HISTO_SORT), BlockHistogramSort, BlockHistogramAtomic >::Type InternalBlockHistogram; /// Shared memory storage layout type for BlockHistogram typedef typename InternalBlockHistogram::TempStorage _TempStorage; /****************************************************************************** * Thread fields ******************************************************************************/ /// Shared storage reference _TempStorage &temp_storage; /// Linear thread-id unsigned int linear_tid; /****************************************************************************** * Utility methods ******************************************************************************/ /// Internal storage allocator __device__ __forceinline__ _TempStorage& PrivateStorage() { __shared__ _TempStorage private_storage; return private_storage; } public: /// \smemstorage{BlockHistogram} struct TempStorage : Uninitialized<_TempStorage> {}; /******************************************************************//** * \name Collective constructors *********************************************************************/ //@{ /** * \brief Collective constructor using a private static allocation of shared memory as temporary storage. */ __device__ __forceinline__ BlockHistogram() : temp_storage(PrivateStorage()), linear_tid(RowMajorTid(BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z)) {} /** * \brief Collective constructor using the specified memory allocation as temporary storage. */ __device__ __forceinline__ BlockHistogram( TempStorage &temp_storage) ///< [in] Reference to memory allocation having layout type TempStorage : temp_storage(temp_storage.Alias()), linear_tid(RowMajorTid(BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z)) {} //@} end member group /******************************************************************//** * \name Histogram operations *********************************************************************/ //@{ /** * \brief Initialize the shared histogram counters to zero. * * \par Snippet * The code snippet below illustrates a the initialization and update of a * histogram of 512 integer samples that are partitioned across 128 threads * where each thread owns 4 samples. * \par * \code * #include // or equivalently * * __global__ void ExampleKernel(...) * { * // Specialize a 256-bin BlockHistogram type for a 1D block of 128 threads having 4 character samples each * typedef cub::BlockHistogram BlockHistogram; * * // Allocate shared memory for BlockHistogram * __shared__ typename BlockHistogram::TempStorage temp_storage; * * // Allocate shared memory for block-wide histogram bin counts * __shared__ unsigned int smem_histogram[256]; * * // Obtain input samples per thread * unsigned char thread_samples[4]; * ... * * // Initialize the block-wide histogram * BlockHistogram(temp_storage).InitHistogram(smem_histogram); * * // Update the block-wide histogram * BlockHistogram(temp_storage).Composite(thread_samples, smem_histogram); * * \endcode * * \tparam CounterT [inferred] Histogram counter type */ template __device__ __forceinline__ void InitHistogram(CounterT histogram[BINS]) { // Initialize histogram bin counts to zeros int histo_offset = 0; #pragma unroll for(; histo_offset + BLOCK_THREADS <= BINS; histo_offset += BLOCK_THREADS) { histogram[histo_offset + linear_tid] = 0; } // Finish up with guarded initialization if necessary if ((BINS % BLOCK_THREADS != 0) && (histo_offset + linear_tid < BINS)) { histogram[histo_offset + linear_tid] = 0; } } /** * \brief Constructs a block-wide histogram in shared/device-accessible memory. Each thread contributes an array of input elements. * * \par * - \granularity * - \smemreuse * * \par Snippet * The code snippet below illustrates a 256-bin histogram of 512 integer samples that * are partitioned across 128 threads where each thread owns 4 samples. * \par * \code * #include // or equivalently * * __global__ void ExampleKernel(...) * { * // Specialize a 256-bin BlockHistogram type for a 1D block of 128 threads having 4 character samples each * typedef cub::BlockHistogram BlockHistogram; * * // Allocate shared memory for BlockHistogram * __shared__ typename BlockHistogram::TempStorage temp_storage; * * // Allocate shared memory for block-wide histogram bin counts * __shared__ unsigned int smem_histogram[256]; * * // Obtain input samples per thread * unsigned char thread_samples[4]; * ... * * // Compute the block-wide histogram * BlockHistogram(temp_storage).Histogram(thread_samples, smem_histogram); * * \endcode * * \tparam CounterT [inferred] Histogram counter type */ template < typename CounterT > __device__ __forceinline__ void Histogram( T (&items)[ITEMS_PER_THREAD], ///< [in] Calling thread's input values to histogram CounterT histogram[BINS]) ///< [out] Reference to shared/device-accessible memory histogram { // Initialize histogram bin counts to zeros InitHistogram(histogram); CTA_SYNC(); // Composite the histogram InternalBlockHistogram(temp_storage).Composite(items, histogram); } /** * \brief Updates an existing block-wide histogram in shared/device-accessible memory. Each thread composites an array of input elements. * * \par * - \granularity * - \smemreuse * * \par Snippet * The code snippet below illustrates a the initialization and update of a * histogram of 512 integer samples that are partitioned across 128 threads * where each thread owns 4 samples. * \par * \code * #include // or equivalently * * __global__ void ExampleKernel(...) * { * // Specialize a 256-bin BlockHistogram type for a 1D block of 128 threads having 4 character samples each * typedef cub::BlockHistogram BlockHistogram; * * // Allocate shared memory for BlockHistogram * __shared__ typename BlockHistogram::TempStorage temp_storage; * * // Allocate shared memory for block-wide histogram bin counts * __shared__ unsigned int smem_histogram[256]; * * // Obtain input samples per thread * unsigned char thread_samples[4]; * ... * * // Initialize the block-wide histogram * BlockHistogram(temp_storage).InitHistogram(smem_histogram); * * // Update the block-wide histogram * BlockHistogram(temp_storage).Composite(thread_samples, smem_histogram); * * \endcode * * \tparam CounterT [inferred] Histogram counter type */ template < typename CounterT > __device__ __forceinline__ void Composite( T (&items)[ITEMS_PER_THREAD], ///< [in] Calling thread's input values to histogram CounterT histogram[BINS]) ///< [out] Reference to shared/device-accessible memory histogram { InternalBlockHistogram(temp_storage).Composite(items, histogram); } }; } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/acc/cuda/cub/block/block_load.cuh000066400000000000000000001540311411340063500216750ustar00rootroot00000000000000/****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * Operations for reading linear tiles of data into the CUDA thread block. */ #pragma once #include #include "block_exchange.cuh" #include "../iterator/cache_modified_input_iterator.cuh" #include "../util_ptx.cuh" #include "../util_macro.cuh" #include "../util_type.cuh" #include "../util_namespace.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /** * \addtogroup UtilIo * @{ */ /******************************************************************//** * \name Blocked arrangement I/O (direct) *********************************************************************/ //@{ /** * \brief Load a linear segment of items into a blocked arrangement across the thread block. * * \blocked * * \tparam T [inferred] The data type to load. * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. * \tparam InputIteratorT [inferred] The random-access iterator type for input \iterator. */ template < typename InputT, int ITEMS_PER_THREAD, typename InputIteratorT> __device__ __forceinline__ void LoadDirectBlocked( int linear_tid, ///< [in] A suitable 1D thread-identifier for the calling thread (e.g., (threadIdx.y * blockDim.x) + linear_tid for 2D thread blocks) InputIteratorT block_itr, ///< [in] The thread block's base input iterator for loading from InputT (&items)[ITEMS_PER_THREAD]) ///< [out] Data to load { InputIteratorT thread_itr = block_itr + (linear_tid * ITEMS_PER_THREAD); // Load directly in thread-blocked order #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { items[ITEM] = thread_itr[ITEM]; } } /** * \brief Load a linear segment of items into a blocked arrangement across the thread block, guarded by range. * * \blocked * * \tparam T [inferred] The data type to load. * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. * \tparam InputIteratorT [inferred] The random-access iterator type for input \iterator. */ template < typename InputT, int ITEMS_PER_THREAD, typename InputIteratorT> __device__ __forceinline__ void LoadDirectBlocked( int linear_tid, ///< [in] A suitable 1D thread-identifier for the calling thread (e.g., (threadIdx.y * blockDim.x) + linear_tid for 2D thread blocks) InputIteratorT block_itr, ///< [in] The thread block's base input iterator for loading from InputT (&items)[ITEMS_PER_THREAD], ///< [out] Data to load int valid_items) ///< [in] Number of valid items to load { InputIteratorT thread_itr = block_itr + (linear_tid * ITEMS_PER_THREAD); #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { if ((linear_tid * ITEMS_PER_THREAD) + ITEM < valid_items) { items[ITEM] = thread_itr[ITEM]; } } } /** * \brief Load a linear segment of items into a blocked arrangement across the thread block, guarded by range, with a fall-back assignment of out-of-bound elements.. * * \blocked * * \tparam T [inferred] The data type to load. * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. * \tparam InputIteratorT [inferred] The random-access iterator type for input \iterator. */ template < typename InputT, typename DefaultT, int ITEMS_PER_THREAD, typename InputIteratorT> __device__ __forceinline__ void LoadDirectBlocked( int linear_tid, ///< [in] A suitable 1D thread-identifier for the calling thread (e.g., (threadIdx.y * blockDim.x) + linear_tid for 2D thread blocks) InputIteratorT block_itr, ///< [in] The thread block's base input iterator for loading from InputT (&items)[ITEMS_PER_THREAD], ///< [out] Data to load int valid_items, ///< [in] Number of valid items to load DefaultT oob_default) ///< [in] Default value to assign out-of-bound items { #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) items[ITEM] = oob_default; LoadDirectBlocked(linear_tid, block_itr, items, valid_items); } #ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document /** * Internal implementation for load vectorization */ template < CacheLoadModifier MODIFIER, typename T, int ITEMS_PER_THREAD> __device__ __forceinline__ void InternalLoadDirectBlockedVectorized( int linear_tid, ///< [in] A suitable 1D thread-identifier for the calling thread (e.g., (threadIdx.y * blockDim.x) + linear_tid for 2D thread blocks) T *block_ptr, ///< [in] Input pointer for loading from T (&items)[ITEMS_PER_THREAD]) ///< [out] Data to load { // Biggest memory access word that T is a whole multiple of typedef typename UnitWord::DeviceWord DeviceWord; enum { TOTAL_WORDS = sizeof(items) / sizeof(DeviceWord), VECTOR_SIZE = (TOTAL_WORDS % 4 == 0) ? 4 : (TOTAL_WORDS % 2 == 0) ? 2 : 1, VECTORS_PER_THREAD = TOTAL_WORDS / VECTOR_SIZE, }; // Vector type typedef typename CubVector::Type Vector; // Vector items Vector vec_items[VECTORS_PER_THREAD]; // Aliased input ptr Vector* vec_ptr = reinterpret_cast(block_ptr) + (linear_tid * VECTORS_PER_THREAD); // Load directly in thread-blocked order #pragma unroll for (int ITEM = 0; ITEM < VECTORS_PER_THREAD; ITEM++) { vec_items[ITEM] = ThreadLoad(vec_ptr + ITEM); } // Copy #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { items[ITEM] = *(reinterpret_cast(vec_items) + ITEM); } } #endif // DOXYGEN_SHOULD_SKIP_THIS /** * \brief Load a linear segment of items into a blocked arrangement across the thread block. * * \blocked * * The input offset (\p block_ptr + \p block_offset) must be quad-item aligned * * The following conditions will prevent vectorization and loading will fall back to cub::BLOCK_LOAD_DIRECT: * - \p ITEMS_PER_THREAD is odd * - The data type \p T is not a built-in primitive or CUDA vector type (e.g., \p short, \p int2, \p double, \p float2, etc.) * * \tparam T [inferred] The data type to load. * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. */ template < typename T, int ITEMS_PER_THREAD> __device__ __forceinline__ void LoadDirectBlockedVectorized( int linear_tid, ///< [in] A suitable 1D thread-identifier for the calling thread (e.g., (threadIdx.y * blockDim.x) + linear_tid for 2D thread blocks) T *block_ptr, ///< [in] Input pointer for loading from T (&items)[ITEMS_PER_THREAD]) ///< [out] Data to load { InternalLoadDirectBlockedVectorized(linear_tid, block_ptr, items); } //@} end member group /******************************************************************//** * \name Striped arrangement I/O (direct) *********************************************************************/ //@{ /** * \brief Load a linear segment of items into a striped arrangement across the thread block. * * \striped * * \tparam BLOCK_THREADS The thread block size in threads * \tparam T [inferred] The data type to load. * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. * \tparam InputIteratorT [inferred] The random-access iterator type for input \iterator. */ template < int BLOCK_THREADS, typename InputT, int ITEMS_PER_THREAD, typename InputIteratorT> __device__ __forceinline__ void LoadDirectStriped( int linear_tid, ///< [in] A suitable 1D thread-identifier for the calling thread (e.g., (threadIdx.y * blockDim.x) + linear_tid for 2D thread blocks) InputIteratorT block_itr, ///< [in] The thread block's base input iterator for loading from InputT (&items)[ITEMS_PER_THREAD]) ///< [out] Data to load { InputIteratorT thread_itr = block_itr + linear_tid; #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { items[ITEM] = thread_itr[ITEM * BLOCK_THREADS]; } } /** * \brief Load a linear segment of items into a striped arrangement across the thread block, guarded by range * * \striped * * \tparam BLOCK_THREADS The thread block size in threads * \tparam T [inferred] The data type to load. * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. * \tparam InputIteratorT [inferred] The random-access iterator type for input \iterator. */ template < int BLOCK_THREADS, typename InputT, int ITEMS_PER_THREAD, typename InputIteratorT> __device__ __forceinline__ void LoadDirectStriped( int linear_tid, ///< [in] A suitable 1D thread-identifier for the calling thread (e.g., (threadIdx.y * blockDim.x) + linear_tid for 2D thread blocks) InputIteratorT block_itr, ///< [in] The thread block's base input iterator for loading from InputT (&items)[ITEMS_PER_THREAD], ///< [out] Data to load int valid_items) ///< [in] Number of valid items to load { InputIteratorT thread_itr = block_itr + linear_tid; #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { if (linear_tid + (ITEM * BLOCK_THREADS) < valid_items) { items[ITEM] = thread_itr[ITEM * BLOCK_THREADS]; } } } /** * \brief Load a linear segment of items into a striped arrangement across the thread block, guarded by range, with a fall-back assignment of out-of-bound elements. * * \striped * * \tparam BLOCK_THREADS The thread block size in threads * \tparam T [inferred] The data type to load. * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. * \tparam InputIteratorT [inferred] The random-access iterator type for input \iterator. */ template < int BLOCK_THREADS, typename InputT, typename DefaultT, int ITEMS_PER_THREAD, typename InputIteratorT> __device__ __forceinline__ void LoadDirectStriped( int linear_tid, ///< [in] A suitable 1D thread-identifier for the calling thread (e.g., (threadIdx.y * blockDim.x) + linear_tid for 2D thread blocks) InputIteratorT block_itr, ///< [in] The thread block's base input iterator for loading from InputT (&items)[ITEMS_PER_THREAD], ///< [out] Data to load int valid_items, ///< [in] Number of valid items to load DefaultT oob_default) ///< [in] Default value to assign out-of-bound items { #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) items[ITEM] = oob_default; LoadDirectStriped(linear_tid, block_itr, items, valid_items); } //@} end member group /******************************************************************//** * \name Warp-striped arrangement I/O (direct) *********************************************************************/ //@{ /** * \brief Load a linear segment of items into a warp-striped arrangement across the thread block. * * \warpstriped * * \par Usage Considerations * The number of threads in the thread block must be a multiple of the architecture's warp size. * * \tparam T [inferred] The data type to load. * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. * \tparam InputIteratorT [inferred] The random-access iterator type for input \iterator. */ template < typename InputT, int ITEMS_PER_THREAD, typename InputIteratorT> __device__ __forceinline__ void LoadDirectWarpStriped( int linear_tid, ///< [in] A suitable 1D thread-identifier for the calling thread (e.g., (threadIdx.y * blockDim.x) + linear_tid for 2D thread blocks) InputIteratorT block_itr, ///< [in] The thread block's base input iterator for loading from InputT (&items)[ITEMS_PER_THREAD]) ///< [out] Data to load { int tid = linear_tid & (CUB_PTX_WARP_THREADS - 1); int wid = linear_tid >> CUB_PTX_LOG_WARP_THREADS; int warp_offset = wid * CUB_PTX_WARP_THREADS * ITEMS_PER_THREAD; InputIteratorT thread_itr = block_itr + warp_offset + tid ; // Load directly in warp-striped order #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { items[ITEM] = thread_itr[(ITEM * CUB_PTX_WARP_THREADS)]; } } /** * \brief Load a linear segment of items into a warp-striped arrangement across the thread block, guarded by range * * \warpstriped * * \par Usage Considerations * The number of threads in the thread block must be a multiple of the architecture's warp size. * * \tparam T [inferred] The data type to load. * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. * \tparam InputIteratorT [inferred] The random-access iterator type for input \iterator. */ template < typename InputT, int ITEMS_PER_THREAD, typename InputIteratorT> __device__ __forceinline__ void LoadDirectWarpStriped( int linear_tid, ///< [in] A suitable 1D thread-identifier for the calling thread (e.g., (threadIdx.y * blockDim.x) + linear_tid for 2D thread blocks) InputIteratorT block_itr, ///< [in] The thread block's base input iterator for loading from InputT (&items)[ITEMS_PER_THREAD], ///< [out] Data to load int valid_items) ///< [in] Number of valid items to load { int tid = linear_tid & (CUB_PTX_WARP_THREADS - 1); int wid = linear_tid >> CUB_PTX_LOG_WARP_THREADS; int warp_offset = wid * CUB_PTX_WARP_THREADS * ITEMS_PER_THREAD; InputIteratorT thread_itr = block_itr + warp_offset + tid ; // Load directly in warp-striped order #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { if (warp_offset + tid + (ITEM * CUB_PTX_WARP_THREADS) < valid_items) { items[ITEM] = thread_itr[(ITEM * CUB_PTX_WARP_THREADS)]; } } } /** * \brief Load a linear segment of items into a warp-striped arrangement across the thread block, guarded by range, with a fall-back assignment of out-of-bound elements. * * \warpstriped * * \par Usage Considerations * The number of threads in the thread block must be a multiple of the architecture's warp size. * * \tparam T [inferred] The data type to load. * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. * \tparam InputIteratorT [inferred] The random-access iterator type for input \iterator. */ template < typename InputT, typename DefaultT, int ITEMS_PER_THREAD, typename InputIteratorT> __device__ __forceinline__ void LoadDirectWarpStriped( int linear_tid, ///< [in] A suitable 1D thread-identifier for the calling thread (e.g., (threadIdx.y * blockDim.x) + linear_tid for 2D thread blocks) InputIteratorT block_itr, ///< [in] The thread block's base input iterator for loading from InputT (&items)[ITEMS_PER_THREAD], ///< [out] Data to load int valid_items, ///< [in] Number of valid items to load DefaultT oob_default) ///< [in] Default value to assign out-of-bound items { // Load directly in warp-striped order #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) items[ITEM] = oob_default; LoadDirectWarpStriped(linear_tid, block_itr, items, valid_items); } //@} end member group /** @} */ // end group UtilIo //----------------------------------------------------------------------------- // Generic BlockLoad abstraction //----------------------------------------------------------------------------- /** * \brief cub::BlockLoadAlgorithm enumerates alternative algorithms for cub::BlockLoad to read a linear segment of data from memory into a blocked arrangement across a CUDA thread block. */ /** * \brief cub::BlockLoadAlgorithm enumerates alternative algorithms for cub::BlockLoad to read a linear segment of data from memory into a blocked arrangement across a CUDA thread block. */ enum BlockLoadAlgorithm { /** * \par Overview * * A [blocked arrangement](index.html#sec5sec3) of data is read * directly from memory. * * \par Performance Considerations * - The utilization of memory transactions (coalescing) decreases as the * access stride between threads increases (i.e., the number items per thread). */ BLOCK_LOAD_DIRECT, /** * \par Overview * * A [blocked arrangement](index.html#sec5sec3) of data is read * from memory using CUDA's built-in vectorized loads as a coalescing optimization. * For example, ld.global.v4.s32 instructions will be generated * when \p T = \p int and \p ITEMS_PER_THREAD % 4 == 0. * * \par Performance Considerations * - The utilization of memory transactions (coalescing) remains high until the the * access stride between threads (i.e., the number items per thread) exceeds the * maximum vector load width (typically 4 items or 64B, whichever is lower). * - The following conditions will prevent vectorization and loading will fall back to cub::BLOCK_LOAD_DIRECT: * - \p ITEMS_PER_THREAD is odd * - The \p InputIteratorTis not a simple pointer type * - The block input offset is not quadword-aligned * - The data type \p T is not a built-in primitive or CUDA vector type (e.g., \p short, \p int2, \p double, \p float2, etc.) */ BLOCK_LOAD_VECTORIZE, /** * \par Overview * * A [striped arrangement](index.html#sec5sec3) of data is read * efficiently from memory and then locally transposed into a * [blocked arrangement](index.html#sec5sec3). * * \par Performance Considerations * - The utilization of memory transactions (coalescing) remains high regardless * of items loaded per thread. * - The local reordering incurs slightly longer latencies and throughput than the * direct cub::BLOCK_LOAD_DIRECT and cub::BLOCK_LOAD_VECTORIZE alternatives. */ BLOCK_LOAD_TRANSPOSE, /** * \par Overview * * A [warp-striped arrangement](index.html#sec5sec3) of data is * read efficiently from memory and then locally transposed into a * [blocked arrangement](index.html#sec5sec3). * * \par Usage Considerations * - BLOCK_THREADS must be a multiple of WARP_THREADS * * \par Performance Considerations * - The utilization of memory transactions (coalescing) remains high regardless * of items loaded per thread. * - The local reordering incurs slightly larger latencies than the * direct cub::BLOCK_LOAD_DIRECT and cub::BLOCK_LOAD_VECTORIZE alternatives. * - Provisions more shared storage, but incurs smaller latencies than the * BLOCK_LOAD_WARP_TRANSPOSE_TIMESLICED alternative. */ BLOCK_LOAD_WARP_TRANSPOSE, /** * \par Overview * * Like \p BLOCK_LOAD_WARP_TRANSPOSE, a [warp-striped arrangement](index.html#sec5sec3) * of data is read directly from memory and then is locally transposed into a * [blocked arrangement](index.html#sec5sec3). To reduce the shared memory * requirement, only one warp's worth of shared memory is provisioned and is * subsequently time-sliced among warps. * * \par Usage Considerations * - BLOCK_THREADS must be a multiple of WARP_THREADS * * \par Performance Considerations * - The utilization of memory transactions (coalescing) remains high regardless * of items loaded per thread. * - Provisions less shared memory temporary storage, but incurs larger * latencies than the BLOCK_LOAD_WARP_TRANSPOSE alternative. */ BLOCK_LOAD_WARP_TRANSPOSE_TIMESLICED, }; /** * \brief The BlockLoad class provides [collective](index.html#sec0) data movement methods for loading a linear segment of items from memory into a [blocked arrangement](index.html#sec5sec3) across a CUDA thread block. ![](block_load_logo.png) * \ingroup BlockModule * \ingroup UtilIo * * \tparam InputT The data type to read into (which must be convertible from the input iterator's value type). * \tparam BLOCK_DIM_X The thread block length in threads along the X dimension * \tparam ITEMS_PER_THREAD The number of consecutive items partitioned onto each thread. * \tparam ALGORITHM [optional] cub::BlockLoadAlgorithm tuning policy. default: cub::BLOCK_LOAD_DIRECT. * \tparam WARP_TIME_SLICING [optional] Whether or not only one warp's worth of shared memory should be allocated and time-sliced among block-warps during any load-related data transpositions (versus each warp having its own storage). (default: false) * \tparam BLOCK_DIM_Y [optional] The thread block length in threads along the Y dimension (default: 1) * \tparam BLOCK_DIM_Z [optional] The thread block length in threads along the Z dimension (default: 1) * \tparam PTX_ARCH [optional] \ptxversion * * \par Overview * - The BlockLoad class provides a single data movement abstraction that can be specialized * to implement different cub::BlockLoadAlgorithm strategies. This facilitates different * performance policies for different architectures, data types, granularity sizes, etc. * - BlockLoad can be optionally specialized by different data movement strategies: * -# cub::BLOCK_LOAD_DIRECT. A [blocked arrangement](index.html#sec5sec3) * of data is read directly from memory. [More...](\ref cub::BlockLoadAlgorithm) * -# cub::BLOCK_LOAD_VECTORIZE. A [blocked arrangement](index.html#sec5sec3) * of data is read directly from memory using CUDA's built-in vectorized loads as a * coalescing optimization. [More...](\ref cub::BlockLoadAlgorithm) * -# cub::BLOCK_LOAD_TRANSPOSE. A [striped arrangement](index.html#sec5sec3) * of data is read directly from memory and is then locally transposed into a * [blocked arrangement](index.html#sec5sec3). [More...](\ref cub::BlockLoadAlgorithm) * -# cub::BLOCK_LOAD_WARP_TRANSPOSE. A [warp-striped arrangement](index.html#sec5sec3) * of data is read directly from memory and is then locally transposed into a * [blocked arrangement](index.html#sec5sec3). [More...](\ref cub::BlockLoadAlgorithm) * -# cub::BLOCK_LOAD_WARP_TRANSPOSE_TIMESLICED,. A [warp-striped arrangement](index.html#sec5sec3) * of data is read directly from memory and is then locally transposed into a * [blocked arrangement](index.html#sec5sec3) one warp at a time. [More...](\ref cub::BlockLoadAlgorithm) * - \rowmajor * * \par A Simple Example * \blockcollective{BlockLoad} * \par * The code snippet below illustrates the loading of a linear * segment of 512 integers into a "blocked" arrangement across 128 threads where each * thread owns 4 consecutive items. The load is specialized for \p BLOCK_LOAD_WARP_TRANSPOSE, * meaning memory references are efficiently coalesced using a warp-striped access * pattern (after which items are locally reordered among threads). * \par * \code * #include // or equivalently * * __global__ void ExampleKernel(int *d_data, ...) * { * // Specialize BlockLoad for a 1D block of 128 threads owning 4 integer items each * typedef cub::BlockLoad BlockLoad; * * // Allocate shared memory for BlockLoad * __shared__ typename BlockLoad::TempStorage temp_storage; * * // Load a segment of consecutive items that are blocked across threads * int thread_data[4]; * BlockLoad(temp_storage).Load(d_data, thread_data); * * \endcode * \par * Suppose the input \p d_data is 0, 1, 2, 3, 4, 5, .... * The set of \p thread_data across the block of threads in those threads will be * { [0,1,2,3], [4,5,6,7], ..., [508,509,510,511] }. * */ template < typename InputT, int BLOCK_DIM_X, int ITEMS_PER_THREAD, BlockLoadAlgorithm ALGORITHM = BLOCK_LOAD_DIRECT, int BLOCK_DIM_Y = 1, int BLOCK_DIM_Z = 1, int PTX_ARCH = CUB_PTX_ARCH> class BlockLoad { private: /****************************************************************************** * Constants and typed definitions ******************************************************************************/ /// Constants enum { /// The thread block size in threads BLOCK_THREADS = BLOCK_DIM_X * BLOCK_DIM_Y * BLOCK_DIM_Z, }; /****************************************************************************** * Algorithmic variants ******************************************************************************/ /// Load helper template struct LoadInternal; /** * BLOCK_LOAD_DIRECT specialization of load helper */ template struct LoadInternal { /// Shared memory storage layout type typedef NullType TempStorage; /// Linear thread-id int linear_tid; /// Constructor __device__ __forceinline__ LoadInternal( TempStorage &/*temp_storage*/, int linear_tid) : linear_tid(linear_tid) {} /// Load a linear segment of items from memory template __device__ __forceinline__ void Load( InputIteratorT block_itr, ///< [in] The thread block's base input iterator for loading from InputT (&items)[ITEMS_PER_THREAD]) ///< [out] Data to load { LoadDirectBlocked(linear_tid, block_itr, items); } /// Load a linear segment of items from memory, guarded by range template __device__ __forceinline__ void Load( InputIteratorT block_itr, ///< [in] The thread block's base input iterator for loading from InputT (&items)[ITEMS_PER_THREAD], ///< [out] Data to load int valid_items) ///< [in] Number of valid items to load { LoadDirectBlocked(linear_tid, block_itr, items, valid_items); } /// Load a linear segment of items from memory, guarded by range, with a fall-back assignment of out-of-bound elements template __device__ __forceinline__ void Load( InputIteratorT block_itr, ///< [in] The thread block's base input iterator for loading from InputT (&items)[ITEMS_PER_THREAD], ///< [out] Data to load int valid_items, ///< [in] Number of valid items to load DefaultT oob_default) ///< [in] Default value to assign out-of-bound items { LoadDirectBlocked(linear_tid, block_itr, items, valid_items, oob_default); } }; /** * BLOCK_LOAD_VECTORIZE specialization of load helper */ template struct LoadInternal { /// Shared memory storage layout type typedef NullType TempStorage; /// Linear thread-id int linear_tid; /// Constructor __device__ __forceinline__ LoadInternal( TempStorage &/*temp_storage*/, int linear_tid) : linear_tid(linear_tid) {} /// Load a linear segment of items from memory, specialized for native pointer types (attempts vectorization) template __device__ __forceinline__ void Load( InputT *block_ptr, ///< [in] The thread block's base input iterator for loading from InputT (&items)[ITEMS_PER_THREAD]) ///< [out] Data to load { InternalLoadDirectBlockedVectorized(linear_tid, block_ptr, items); } /// Load a linear segment of items from memory, specialized for native pointer types (attempts vectorization) template __device__ __forceinline__ void Load( const InputT *block_ptr, ///< [in] The thread block's base input iterator for loading from InputT (&items)[ITEMS_PER_THREAD]) ///< [out] Data to load { InternalLoadDirectBlockedVectorized(linear_tid, block_ptr, items); } /// Load a linear segment of items from memory, specialized for native pointer types (attempts vectorization) template < CacheLoadModifier MODIFIER, typename ValueType, typename OffsetT> __device__ __forceinline__ void Load( CacheModifiedInputIterator block_itr, ///< [in] The thread block's base input iterator for loading from InputT (&items)[ITEMS_PER_THREAD]) ///< [out] Data to load { InternalLoadDirectBlockedVectorized(linear_tid, block_itr.ptr, items); } /// Load a linear segment of items from memory, specialized for opaque input iterators (skips vectorization) template __device__ __forceinline__ void Load( _InputIteratorT block_itr, ///< [in] The thread block's base input iterator for loading from InputT (&items)[ITEMS_PER_THREAD]) ///< [out] Data to load { LoadDirectBlocked(linear_tid, block_itr, items); } /// Load a linear segment of items from memory, guarded by range (skips vectorization) template __device__ __forceinline__ void Load( InputIteratorT block_itr, ///< [in] The thread block's base input iterator for loading from InputT (&items)[ITEMS_PER_THREAD], ///< [out] Data to load int valid_items) ///< [in] Number of valid items to load { LoadDirectBlocked(linear_tid, block_itr, items, valid_items); } /// Load a linear segment of items from memory, guarded by range, with a fall-back assignment of out-of-bound elements (skips vectorization) template __device__ __forceinline__ void Load( InputIteratorT block_itr, ///< [in] The thread block's base input iterator for loading from InputT (&items)[ITEMS_PER_THREAD], ///< [out] Data to load int valid_items, ///< [in] Number of valid items to load DefaultT oob_default) ///< [in] Default value to assign out-of-bound items { LoadDirectBlocked(linear_tid, block_itr, items, valid_items, oob_default); } }; /** * BLOCK_LOAD_TRANSPOSE specialization of load helper */ template struct LoadInternal { // BlockExchange utility type for keys typedef BlockExchange BlockExchange; /// Shared memory storage layout type struct _TempStorage : BlockExchange::TempStorage { /// Temporary storage for partially-full block guard volatile int valid_items; }; /// Alias wrapper allowing storage to be unioned struct TempStorage : Uninitialized<_TempStorage> {}; /// Thread reference to shared storage _TempStorage &temp_storage; /// Linear thread-id int linear_tid; /// Constructor __device__ __forceinline__ LoadInternal( TempStorage &temp_storage, int linear_tid) : temp_storage(temp_storage.Alias()), linear_tid(linear_tid) {} /// Load a linear segment of items from memory template __device__ __forceinline__ void Load( InputIteratorT block_itr, ///< [in] The thread block's base input iterator for loading from InputT (&items)[ITEMS_PER_THREAD]) ///< [out] Data to load{ { LoadDirectStriped(linear_tid, block_itr, items); BlockExchange(temp_storage).StripedToBlocked(items, items); } /// Load a linear segment of items from memory, guarded by range template __device__ __forceinline__ void Load( InputIteratorT block_itr, ///< [in] The thread block's base input iterator for loading from InputT (&items)[ITEMS_PER_THREAD], ///< [out] Data to load int valid_items) ///< [in] Number of valid items to load { if (linear_tid == 0) temp_storage.valid_items = valid_items; // Move through volatile smem as a workaround to prevent RF spilling on subsequent loads CTA_SYNC(); LoadDirectStriped(linear_tid, block_itr, items, temp_storage.valid_items); BlockExchange(temp_storage).StripedToBlocked(items, items); } /// Load a linear segment of items from memory, guarded by range, with a fall-back assignment of out-of-bound elements template __device__ __forceinline__ void Load( InputIteratorT block_itr, ///< [in] The thread block's base input iterator for loading from InputT (&items)[ITEMS_PER_THREAD], ///< [out] Data to load int valid_items, ///< [in] Number of valid items to load DefaultT oob_default) ///< [in] Default value to assign out-of-bound items { if (linear_tid == 0) temp_storage.valid_items = valid_items; // Move through volatile smem as a workaround to prevent RF spilling on subsequent loads CTA_SYNC(); LoadDirectStriped(linear_tid, block_itr, items, temp_storage.valid_items, oob_default); BlockExchange(temp_storage).StripedToBlocked(items, items); } }; /** * BLOCK_LOAD_WARP_TRANSPOSE specialization of load helper */ template struct LoadInternal { enum { WARP_THREADS = CUB_WARP_THREADS(PTX_ARCH) }; // Assert BLOCK_THREADS must be a multiple of WARP_THREADS CUB_STATIC_ASSERT((BLOCK_THREADS % WARP_THREADS == 0), "BLOCK_THREADS must be a multiple of WARP_THREADS"); // BlockExchange utility type for keys typedef BlockExchange BlockExchange; /// Shared memory storage layout type struct _TempStorage : BlockExchange::TempStorage { /// Temporary storage for partially-full block guard volatile int valid_items; }; /// Alias wrapper allowing storage to be unioned struct TempStorage : Uninitialized<_TempStorage> {}; /// Thread reference to shared storage _TempStorage &temp_storage; /// Linear thread-id int linear_tid; /// Constructor __device__ __forceinline__ LoadInternal( TempStorage &temp_storage, int linear_tid) : temp_storage(temp_storage.Alias()), linear_tid(linear_tid) {} /// Load a linear segment of items from memory template __device__ __forceinline__ void Load( InputIteratorT block_itr, ///< [in] The thread block's base input iterator for loading from InputT (&items)[ITEMS_PER_THREAD]) ///< [out] Data to load{ { LoadDirectWarpStriped(linear_tid, block_itr, items); BlockExchange(temp_storage).WarpStripedToBlocked(items, items); } /// Load a linear segment of items from memory, guarded by range template __device__ __forceinline__ void Load( InputIteratorT block_itr, ///< [in] The thread block's base input iterator for loading from InputT (&items)[ITEMS_PER_THREAD], ///< [out] Data to load int valid_items) ///< [in] Number of valid items to load { if (linear_tid == 0) temp_storage.valid_items = valid_items; // Move through volatile smem as a workaround to prevent RF spilling on subsequent loads CTA_SYNC(); LoadDirectWarpStriped(linear_tid, block_itr, items, temp_storage.valid_items); BlockExchange(temp_storage).WarpStripedToBlocked(items, items); } /// Load a linear segment of items from memory, guarded by range, with a fall-back assignment of out-of-bound elements template __device__ __forceinline__ void Load( InputIteratorT block_itr, ///< [in] The thread block's base input iterator for loading from InputT (&items)[ITEMS_PER_THREAD], ///< [out] Data to load int valid_items, ///< [in] Number of valid items to load DefaultT oob_default) ///< [in] Default value to assign out-of-bound items { if (linear_tid == 0) temp_storage.valid_items = valid_items; // Move through volatile smem as a workaround to prevent RF spilling on subsequent loads CTA_SYNC(); LoadDirectWarpStriped(linear_tid, block_itr, items, temp_storage.valid_items, oob_default); BlockExchange(temp_storage).WarpStripedToBlocked(items, items); } }; /** * BLOCK_LOAD_WARP_TRANSPOSE_TIMESLICED specialization of load helper */ template struct LoadInternal { enum { WARP_THREADS = CUB_WARP_THREADS(PTX_ARCH) }; // Assert BLOCK_THREADS must be a multiple of WARP_THREADS CUB_STATIC_ASSERT((BLOCK_THREADS % WARP_THREADS == 0), "BLOCK_THREADS must be a multiple of WARP_THREADS"); // BlockExchange utility type for keys typedef BlockExchange BlockExchange; /// Shared memory storage layout type struct _TempStorage : BlockExchange::TempStorage { /// Temporary storage for partially-full block guard volatile int valid_items; }; /// Alias wrapper allowing storage to be unioned struct TempStorage : Uninitialized<_TempStorage> {}; /// Thread reference to shared storage _TempStorage &temp_storage; /// Linear thread-id int linear_tid; /// Constructor __device__ __forceinline__ LoadInternal( TempStorage &temp_storage, int linear_tid) : temp_storage(temp_storage.Alias()), linear_tid(linear_tid) {} /// Load a linear segment of items from memory template __device__ __forceinline__ void Load( InputIteratorT block_itr, ///< [in] The thread block's base input iterator for loading from InputT (&items)[ITEMS_PER_THREAD]) ///< [out] Data to load{ { LoadDirectWarpStriped(linear_tid, block_itr, items); BlockExchange(temp_storage).WarpStripedToBlocked(items, items); } /// Load a linear segment of items from memory, guarded by range template __device__ __forceinline__ void Load( InputIteratorT block_itr, ///< [in] The thread block's base input iterator for loading from InputT (&items)[ITEMS_PER_THREAD], ///< [out] Data to load int valid_items) ///< [in] Number of valid items to load { if (linear_tid == 0) temp_storage.valid_items = valid_items; // Move through volatile smem as a workaround to prevent RF spilling on subsequent loads CTA_SYNC(); LoadDirectWarpStriped(linear_tid, block_itr, items, temp_storage.valid_items); BlockExchange(temp_storage).WarpStripedToBlocked(items, items); } /// Load a linear segment of items from memory, guarded by range, with a fall-back assignment of out-of-bound elements template __device__ __forceinline__ void Load( InputIteratorT block_itr, ///< [in] The thread block's base input iterator for loading from InputT (&items)[ITEMS_PER_THREAD], ///< [out] Data to load int valid_items, ///< [in] Number of valid items to load DefaultT oob_default) ///< [in] Default value to assign out-of-bound items { if (linear_tid == 0) temp_storage.valid_items = valid_items; // Move through volatile smem as a workaround to prevent RF spilling on subsequent loads CTA_SYNC(); LoadDirectWarpStriped(linear_tid, block_itr, items, temp_storage.valid_items, oob_default); BlockExchange(temp_storage).WarpStripedToBlocked(items, items); } }; /****************************************************************************** * Type definitions ******************************************************************************/ /// Internal load implementation to use typedef LoadInternal InternalLoad; /// Shared memory storage layout type typedef typename InternalLoad::TempStorage _TempStorage; /****************************************************************************** * Utility methods ******************************************************************************/ /// Internal storage allocator __device__ __forceinline__ _TempStorage& PrivateStorage() { __shared__ _TempStorage private_storage; return private_storage; } /****************************************************************************** * Thread fields ******************************************************************************/ /// Thread reference to shared storage _TempStorage &temp_storage; /// Linear thread-id int linear_tid; public: /// \smemstorage{BlockLoad} struct TempStorage : Uninitialized<_TempStorage> {}; /******************************************************************//** * \name Collective constructors *********************************************************************/ //@{ /** * \brief Collective constructor using a private static allocation of shared memory as temporary storage. */ __device__ __forceinline__ BlockLoad() : temp_storage(PrivateStorage()), linear_tid(RowMajorTid(BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z)) {} /** * \brief Collective constructor using the specified memory allocation as temporary storage. */ __device__ __forceinline__ BlockLoad( TempStorage &temp_storage) ///< [in] Reference to memory allocation having layout type TempStorage : temp_storage(temp_storage.Alias()), linear_tid(RowMajorTid(BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z)) {} //@} end member group /******************************************************************//** * \name Data movement *********************************************************************/ //@{ /** * \brief Load a linear segment of items from memory. * * \par * - \blocked * - \smemreuse * * \par Snippet * The code snippet below illustrates the loading of a linear * segment of 512 integers into a "blocked" arrangement across 128 threads where each * thread owns 4 consecutive items. The load is specialized for \p BLOCK_LOAD_WARP_TRANSPOSE, * meaning memory references are efficiently coalesced using a warp-striped access * pattern (after which items are locally reordered among threads). * \par * \code * #include // or equivalently * * __global__ void ExampleKernel(int *d_data, ...) * { * // Specialize BlockLoad for a 1D block of 128 threads owning 4 integer items each * typedef cub::BlockLoad BlockLoad; * * // Allocate shared memory for BlockLoad * __shared__ typename BlockLoad::TempStorage temp_storage; * * // Load a segment of consecutive items that are blocked across threads * int thread_data[4]; * BlockLoad(temp_storage).Load(d_data, thread_data); * * \endcode * \par * Suppose the input \p d_data is 0, 1, 2, 3, 4, 5, .... * The set of \p thread_data across the block of threads in those threads will be * { [0,1,2,3], [4,5,6,7], ..., [508,509,510,511] }. * */ template __device__ __forceinline__ void Load( InputIteratorT block_itr, ///< [in] The thread block's base input iterator for loading from InputT (&items)[ITEMS_PER_THREAD]) ///< [out] Data to load { InternalLoad(temp_storage, linear_tid).Load(block_itr, items); } /** * \brief Load a linear segment of items from memory, guarded by range. * * \par * - \blocked * - \smemreuse * * \par Snippet * The code snippet below illustrates the guarded loading of a linear * segment of 512 integers into a "blocked" arrangement across 128 threads where each * thread owns 4 consecutive items. The load is specialized for \p BLOCK_LOAD_WARP_TRANSPOSE, * meaning memory references are efficiently coalesced using a warp-striped access * pattern (after which items are locally reordered among threads). * \par * \code * #include // or equivalently * * __global__ void ExampleKernel(int *d_data, int valid_items, ...) * { * // Specialize BlockLoad for a 1D block of 128 threads owning 4 integer items each * typedef cub::BlockLoad BlockLoad; * * // Allocate shared memory for BlockLoad * __shared__ typename BlockLoad::TempStorage temp_storage; * * // Load a segment of consecutive items that are blocked across threads * int thread_data[4]; * BlockLoad(temp_storage).Load(d_data, thread_data, valid_items); * * \endcode * \par * Suppose the input \p d_data is 0, 1, 2, 3, 4, 5, 6... and \p valid_items is \p 5. * The set of \p thread_data across the block of threads in those threads will be * { [0,1,2,3], [4,?,?,?], ..., [?,?,?,?] }, with only the first two threads * being unmasked to load portions of valid data (and other items remaining unassigned). * */ template __device__ __forceinline__ void Load( InputIteratorT block_itr, ///< [in] The thread block's base input iterator for loading from InputT (&items)[ITEMS_PER_THREAD], ///< [out] Data to load int valid_items) ///< [in] Number of valid items to load { InternalLoad(temp_storage, linear_tid).Load(block_itr, items, valid_items); } /** * \brief Load a linear segment of items from memory, guarded by range, with a fall-back assignment of out-of-bound elements * * \par * - \blocked * - \smemreuse * * \par Snippet * The code snippet below illustrates the guarded loading of a linear * segment of 512 integers into a "blocked" arrangement across 128 threads where each * thread owns 4 consecutive items. The load is specialized for \p BLOCK_LOAD_WARP_TRANSPOSE, * meaning memory references are efficiently coalesced using a warp-striped access * pattern (after which items are locally reordered among threads). * \par * \code * #include // or equivalently * * __global__ void ExampleKernel(int *d_data, int valid_items, ...) * { * // Specialize BlockLoad for a 1D block of 128 threads owning 4 integer items each * typedef cub::BlockLoad BlockLoad; * * // Allocate shared memory for BlockLoad * __shared__ typename BlockLoad::TempStorage temp_storage; * * // Load a segment of consecutive items that are blocked across threads * int thread_data[4]; * BlockLoad(temp_storage).Load(d_data, thread_data, valid_items, -1); * * \endcode * \par * Suppose the input \p d_data is 0, 1, 2, 3, 4, 5, 6..., * \p valid_items is \p 5, and the out-of-bounds default is \p -1. * The set of \p thread_data across the block of threads in those threads will be * { [0,1,2,3], [4,-1,-1,-1], ..., [-1,-1,-1,-1] }, with only the first two threads * being unmasked to load portions of valid data (and other items are assigned \p -1) * */ template __device__ __forceinline__ void Load( InputIteratorT block_itr, ///< [in] The thread block's base input iterator for loading from InputT (&items)[ITEMS_PER_THREAD], ///< [out] Data to load int valid_items, ///< [in] Number of valid items to load DefaultT oob_default) ///< [in] Default value to assign out-of-bound items { InternalLoad(temp_storage, linear_tid).Load(block_itr, items, valid_items, oob_default); } //@} end member group }; } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/acc/cuda/cub/block/block_radix_rank.cuh000066400000000000000000000612361411340063500231040ustar00rootroot00000000000000/****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * cub::BlockRadixRank provides operations for ranking unsigned integer types within a CUDA thread block */ #pragma once #include #include "../thread/thread_reduce.cuh" #include "../thread/thread_scan.cuh" #include "../block/block_scan.cuh" #include "../util_ptx.cuh" #include "../util_arch.cuh" #include "../util_type.cuh" #include "../util_namespace.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /** * \brief BlockRadixRank provides operations for ranking unsigned integer types within a CUDA thread block. * \ingroup BlockModule * * \tparam BLOCK_DIM_X The thread block length in threads along the X dimension * \tparam RADIX_BITS The number of radix bits per digit place * \tparam IS_DESCENDING Whether or not the sorted-order is high-to-low * \tparam MEMOIZE_OUTER_SCAN [optional] Whether or not to buffer outer raking scan partials to incur fewer shared memory reads at the expense of higher register pressure (default: true for architectures SM35 and newer, false otherwise). See BlockScanAlgorithm::BLOCK_SCAN_RAKING_MEMOIZE for more details. * \tparam INNER_SCAN_ALGORITHM [optional] The cub::BlockScanAlgorithm algorithm to use (default: cub::BLOCK_SCAN_WARP_SCANS) * \tparam SMEM_CONFIG [optional] Shared memory bank mode (default: \p cudaSharedMemBankSizeFourByte) * \tparam BLOCK_DIM_Y [optional] The thread block length in threads along the Y dimension (default: 1) * \tparam BLOCK_DIM_Z [optional] The thread block length in threads along the Z dimension (default: 1) * \tparam PTX_ARCH [optional] \ptxversion * * \par Overview * Blah... * - Keys must be in a form suitable for radix ranking (i.e., unsigned bits). * - \blocked * * \par Performance Considerations * - \granularity * * \par Examples * \par * - Example 1: Simple radix rank of 32-bit integer keys * \code * #include * * template * __global__ void ExampleKernel(...) * { * * \endcode */ template < int BLOCK_DIM_X, int RADIX_BITS, bool IS_DESCENDING, bool MEMOIZE_OUTER_SCAN = (CUB_PTX_ARCH >= 350) ? true : false, BlockScanAlgorithm INNER_SCAN_ALGORITHM = BLOCK_SCAN_WARP_SCANS, cudaSharedMemConfig SMEM_CONFIG = cudaSharedMemBankSizeFourByte, int BLOCK_DIM_Y = 1, int BLOCK_DIM_Z = 1, int PTX_ARCH = CUB_PTX_ARCH> class BlockRadixRank { private: /****************************************************************************** * Type definitions and constants ******************************************************************************/ // Integer type for digit counters (to be packed into words of type PackedCounters) typedef unsigned short DigitCounter; // Integer type for packing DigitCounters into columns of shared memory banks typedef typename If<(SMEM_CONFIG == cudaSharedMemBankSizeEightByte), unsigned long long, unsigned int>::Type PackedCounter; enum { // The thread block size in threads BLOCK_THREADS = BLOCK_DIM_X * BLOCK_DIM_Y * BLOCK_DIM_Z, RADIX_DIGITS = 1 << RADIX_BITS, LOG_WARP_THREADS = CUB_LOG_WARP_THREADS(PTX_ARCH), WARP_THREADS = 1 << LOG_WARP_THREADS, WARPS = (BLOCK_THREADS + WARP_THREADS - 1) / WARP_THREADS, BYTES_PER_COUNTER = sizeof(DigitCounter), LOG_BYTES_PER_COUNTER = Log2::VALUE, PACKING_RATIO = sizeof(PackedCounter) / sizeof(DigitCounter), LOG_PACKING_RATIO = Log2::VALUE, LOG_COUNTER_LANES = CUB_MAX((RADIX_BITS - LOG_PACKING_RATIO), 0), // Always at least one lane COUNTER_LANES = 1 << LOG_COUNTER_LANES, // The number of packed counters per thread (plus one for padding) PADDED_COUNTER_LANES = COUNTER_LANES + 1, RAKING_SEGMENT = PADDED_COUNTER_LANES, }; public: enum { /// Number of bin-starting offsets tracked per thread BINS_TRACKED_PER_THREAD = CUB_MAX(1, RADIX_DIGITS / BLOCK_THREADS), }; private: /// BlockScan type typedef BlockScan< PackedCounter, BLOCK_DIM_X, INNER_SCAN_ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH> BlockScan; /// Shared memory storage layout type for BlockRadixRank struct __align__(16) _TempStorage { union Aliasable { DigitCounter digit_counters[PADDED_COUNTER_LANES][BLOCK_THREADS][PACKING_RATIO]; PackedCounter raking_grid[BLOCK_THREADS][RAKING_SEGMENT]; } aliasable; // Storage for scanning local ranks typename BlockScan::TempStorage block_scan; }; /****************************************************************************** * Thread fields ******************************************************************************/ /// Shared storage reference _TempStorage &temp_storage; /// Linear thread-id unsigned int linear_tid; /// Copy of raking segment, promoted to registers PackedCounter cached_segment[RAKING_SEGMENT]; /****************************************************************************** * Utility methods ******************************************************************************/ /** * Internal storage allocator */ __device__ __forceinline__ _TempStorage& PrivateStorage() { __shared__ _TempStorage private_storage; return private_storage; } /** * Performs upsweep raking reduction, returning the aggregate */ __device__ __forceinline__ PackedCounter Upsweep() { PackedCounter *smem_raking_ptr = temp_storage.aliasable.raking_grid[linear_tid]; PackedCounter *raking_ptr; if (MEMOIZE_OUTER_SCAN) { // Copy data into registers #pragma unroll for (int i = 0; i < RAKING_SEGMENT; i++) { cached_segment[i] = smem_raking_ptr[i]; } raking_ptr = cached_segment; } else { raking_ptr = smem_raking_ptr; } return internal::ThreadReduce(raking_ptr, Sum()); } /// Performs exclusive downsweep raking scan __device__ __forceinline__ void ExclusiveDownsweep( PackedCounter raking_partial) { PackedCounter *smem_raking_ptr = temp_storage.aliasable.raking_grid[linear_tid]; PackedCounter *raking_ptr = (MEMOIZE_OUTER_SCAN) ? cached_segment : smem_raking_ptr; // Exclusive raking downsweep scan internal::ThreadScanExclusive(raking_ptr, raking_ptr, Sum(), raking_partial); if (MEMOIZE_OUTER_SCAN) { // Copy data back to smem #pragma unroll for (int i = 0; i < RAKING_SEGMENT; i++) { smem_raking_ptr[i] = cached_segment[i]; } } } /** * Reset shared memory digit counters */ __device__ __forceinline__ void ResetCounters() { // Reset shared memory digit counters #pragma unroll for (int LANE = 0; LANE < PADDED_COUNTER_LANES; LANE++) { *((PackedCounter*) temp_storage.aliasable.digit_counters[LANE][linear_tid]) = 0; } } /** * Block-scan prefix callback */ struct PrefixCallBack { __device__ __forceinline__ PackedCounter operator()(PackedCounter block_aggregate) { PackedCounter block_prefix = 0; // Propagate totals in packed fields #pragma unroll for (int PACKED = 1; PACKED < PACKING_RATIO; PACKED++) { block_prefix += block_aggregate << (sizeof(DigitCounter) * 8 * PACKED); } return block_prefix; } }; /** * Scan shared memory digit counters. */ __device__ __forceinline__ void ScanCounters() { // Upsweep scan PackedCounter raking_partial = Upsweep(); // Compute exclusive sum PackedCounter exclusive_partial; PrefixCallBack prefix_call_back; BlockScan(temp_storage.block_scan).ExclusiveSum(raking_partial, exclusive_partial, prefix_call_back); // Downsweep scan with exclusive partial ExclusiveDownsweep(exclusive_partial); } public: /// \smemstorage{BlockScan} struct TempStorage : Uninitialized<_TempStorage> {}; /******************************************************************//** * \name Collective constructors *********************************************************************/ //@{ /** * \brief Collective constructor using a private static allocation of shared memory as temporary storage. */ __device__ __forceinline__ BlockRadixRank() : temp_storage(PrivateStorage()), linear_tid(RowMajorTid(BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z)) {} /** * \brief Collective constructor using the specified memory allocation as temporary storage. */ __device__ __forceinline__ BlockRadixRank( TempStorage &temp_storage) ///< [in] Reference to memory allocation having layout type TempStorage : temp_storage(temp_storage.Alias()), linear_tid(RowMajorTid(BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z)) {} //@} end member group /******************************************************************//** * \name Raking *********************************************************************/ //@{ /** * \brief Rank keys. */ template < typename UnsignedBits, int KEYS_PER_THREAD> __device__ __forceinline__ void RankKeys( UnsignedBits (&keys)[KEYS_PER_THREAD], ///< [in] Keys for this tile int (&ranks)[KEYS_PER_THREAD], ///< [out] For each key, the local rank within the tile int current_bit, ///< [in] The least-significant bit position of the current digit to extract int num_bits) ///< [in] The number of bits in the current digit { DigitCounter thread_prefixes[KEYS_PER_THREAD]; // For each key, the count of previous keys in this tile having the same digit DigitCounter* digit_counters[KEYS_PER_THREAD]; // For each key, the byte-offset of its corresponding digit counter in smem // Reset shared memory digit counters ResetCounters(); #pragma unroll for (int ITEM = 0; ITEM < KEYS_PER_THREAD; ++ITEM) { // Get digit unsigned int digit = BFE(keys[ITEM], current_bit, num_bits); // Get sub-counter unsigned int sub_counter = digit >> LOG_COUNTER_LANES; // Get counter lane unsigned int counter_lane = digit & (COUNTER_LANES - 1); if (IS_DESCENDING) { sub_counter = PACKING_RATIO - 1 - sub_counter; counter_lane = COUNTER_LANES - 1 - counter_lane; } // Pointer to smem digit counter digit_counters[ITEM] = &temp_storage.aliasable.digit_counters[counter_lane][linear_tid][sub_counter]; // Load thread-exclusive prefix thread_prefixes[ITEM] = *digit_counters[ITEM]; // Store inclusive prefix *digit_counters[ITEM] = thread_prefixes[ITEM] + 1; } CTA_SYNC(); // Scan shared memory counters ScanCounters(); CTA_SYNC(); // Extract the local ranks of each key for (int ITEM = 0; ITEM < KEYS_PER_THREAD; ++ITEM) { // Add in thread block exclusive prefix ranks[ITEM] = thread_prefixes[ITEM] + *digit_counters[ITEM]; } } /** * \brief Rank keys. For the lower \p RADIX_DIGITS threads, digit counts for each digit are provided for the corresponding thread. */ template < typename UnsignedBits, int KEYS_PER_THREAD> __device__ __forceinline__ void RankKeys( UnsignedBits (&keys)[KEYS_PER_THREAD], ///< [in] Keys for this tile int (&ranks)[KEYS_PER_THREAD], ///< [out] For each key, the local rank within the tile (out parameter) int current_bit, ///< [in] The least-significant bit position of the current digit to extract int num_bits, ///< [in] The number of bits in the current digit int (&exclusive_digit_prefix)[BINS_TRACKED_PER_THREAD]) ///< [out] The exclusive prefix sum for the digits [(threadIdx.x * BINS_TRACKED_PER_THREAD) ... (threadIdx.x * BINS_TRACKED_PER_THREAD) + BINS_TRACKED_PER_THREAD - 1] { // Rank keys RankKeys(keys, ranks, current_bit, num_bits); // Get the inclusive and exclusive digit totals corresponding to the calling thread. #pragma unroll for (int track = 0; track < BINS_TRACKED_PER_THREAD; ++track) { int bin_idx = (linear_tid * BINS_TRACKED_PER_THREAD) + track; if ((BLOCK_THREADS == RADIX_DIGITS) || (bin_idx < RADIX_DIGITS)) { if (IS_DESCENDING) bin_idx = RADIX_DIGITS - bin_idx - 1; // Obtain ex/inclusive digit counts. (Unfortunately these all reside in the // first counter column, resulting in unavoidable bank conflicts.) unsigned int counter_lane = (bin_idx & (COUNTER_LANES - 1)); unsigned int sub_counter = bin_idx >> (LOG_COUNTER_LANES); exclusive_digit_prefix[track] = temp_storage.aliasable.digit_counters[counter_lane][0][sub_counter]; } } } }; /** * Radix-rank using match.any */ template < int BLOCK_DIM_X, int RADIX_BITS, bool IS_DESCENDING, BlockScanAlgorithm INNER_SCAN_ALGORITHM = BLOCK_SCAN_WARP_SCANS, int BLOCK_DIM_Y = 1, int BLOCK_DIM_Z = 1, int PTX_ARCH = CUB_PTX_ARCH> class BlockRadixRankMatch { private: /****************************************************************************** * Type definitions and constants ******************************************************************************/ typedef int32_t RankT; typedef int32_t DigitCounterT; enum { // The thread block size in threads BLOCK_THREADS = BLOCK_DIM_X * BLOCK_DIM_Y * BLOCK_DIM_Z, RADIX_DIGITS = 1 << RADIX_BITS, LOG_WARP_THREADS = CUB_LOG_WARP_THREADS(PTX_ARCH), WARP_THREADS = 1 << LOG_WARP_THREADS, WARPS = (BLOCK_THREADS + WARP_THREADS - 1) / WARP_THREADS, PADDED_WARPS = ((WARPS & 0x1) == 0) ? WARPS + 1 : WARPS, COUNTERS = PADDED_WARPS * RADIX_DIGITS, RAKING_SEGMENT = (COUNTERS + BLOCK_THREADS - 1) / BLOCK_THREADS, PADDED_RAKING_SEGMENT = ((RAKING_SEGMENT & 0x1) == 0) ? RAKING_SEGMENT + 1 : RAKING_SEGMENT, }; public: enum { /// Number of bin-starting offsets tracked per thread BINS_TRACKED_PER_THREAD = CUB_MAX(1, RADIX_DIGITS / BLOCK_THREADS), }; private: /// BlockScan type typedef BlockScan< DigitCounterT, BLOCK_THREADS, INNER_SCAN_ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH> BlockScanT; /// Shared memory storage layout type for BlockRadixRank struct __align__(16) _TempStorage { typename BlockScanT::TempStorage block_scan; union __align__(16) Aliasable { volatile DigitCounterT warp_digit_counters[RADIX_DIGITS][PADDED_WARPS]; DigitCounterT raking_grid[BLOCK_THREADS][PADDED_RAKING_SEGMENT]; } aliasable; }; /****************************************************************************** * Thread fields ******************************************************************************/ /// Shared storage reference _TempStorage &temp_storage; /// Linear thread-id unsigned int linear_tid; public: /// \smemstorage{BlockScan} struct TempStorage : Uninitialized<_TempStorage> {}; /******************************************************************//** * \name Collective constructors *********************************************************************/ //@{ /** * \brief Collective constructor using the specified memory allocation as temporary storage. */ __device__ __forceinline__ BlockRadixRankMatch( TempStorage &temp_storage) ///< [in] Reference to memory allocation having layout type TempStorage : temp_storage(temp_storage.Alias()), linear_tid(RowMajorTid(BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z)) {} //@} end member group /******************************************************************//** * \name Raking *********************************************************************/ //@{ /** * \brief Rank keys. */ template < typename UnsignedBits, int KEYS_PER_THREAD> __device__ __forceinline__ void RankKeys( UnsignedBits (&keys)[KEYS_PER_THREAD], ///< [in] Keys for this tile int (&ranks)[KEYS_PER_THREAD], ///< [out] For each key, the local rank within the tile int current_bit, ///< [in] The least-significant bit position of the current digit to extract int num_bits) ///< [in] The number of bits in the current digit { // Initialize shared digit counters #pragma unroll for (int ITEM = 0; ITEM < PADDED_RAKING_SEGMENT; ++ITEM) temp_storage.aliasable.raking_grid[linear_tid][ITEM] = 0; CTA_SYNC(); // Each warp will strip-mine its section of input, one strip at a time volatile DigitCounterT *digit_counters[KEYS_PER_THREAD]; uint32_t lane_id = LaneId(); uint32_t warp_id = linear_tid >> LOG_WARP_THREADS; uint32_t lane_mask_lt = LaneMaskLt(); #pragma unroll for (int ITEM = 0; ITEM < KEYS_PER_THREAD; ++ITEM) { // My digit uint32_t digit = BFE(keys[ITEM], current_bit, num_bits); if (IS_DESCENDING) digit = RADIX_DIGITS - digit - 1; // Mask of peers who have same digit as me uint32_t peer_mask = MatchAny(digit); // Pointer to smem digit counter for this key digit_counters[ITEM] = &temp_storage.aliasable.warp_digit_counters[digit][warp_id]; // Number of occurrences in previous strips DigitCounterT warp_digit_prefix = *digit_counters[ITEM]; // Warp-sync WARP_SYNC(0xFFFFFFFF); // Number of peers having same digit as me int32_t digit_count = __popc(peer_mask); // Number of lower-ranked peers having same digit seen so far int32_t peer_digit_prefix = __popc(peer_mask & lane_mask_lt); if (peer_digit_prefix == 0) { // First thread for each digit updates the shared warp counter *digit_counters[ITEM] = DigitCounterT(warp_digit_prefix + digit_count); } // Warp-sync WARP_SYNC(0xFFFFFFFF); // Number of prior keys having same digit ranks[ITEM] = warp_digit_prefix + DigitCounterT(peer_digit_prefix); } CTA_SYNC(); // Scan warp counters DigitCounterT scan_counters[PADDED_RAKING_SEGMENT]; #pragma unroll for (int ITEM = 0; ITEM < PADDED_RAKING_SEGMENT; ++ITEM) scan_counters[ITEM] = temp_storage.aliasable.raking_grid[linear_tid][ITEM]; BlockScanT(temp_storage.block_scan).ExclusiveSum(scan_counters, scan_counters); #pragma unroll for (int ITEM = 0; ITEM < PADDED_RAKING_SEGMENT; ++ITEM) temp_storage.aliasable.raking_grid[linear_tid][ITEM] = scan_counters[ITEM]; CTA_SYNC(); // Seed ranks with counter values from previous warps #pragma unroll for (int ITEM = 0; ITEM < KEYS_PER_THREAD; ++ITEM) ranks[ITEM] += *digit_counters[ITEM]; } /** * \brief Rank keys. For the lower \p RADIX_DIGITS threads, digit counts for each digit are provided for the corresponding thread. */ template < typename UnsignedBits, int KEYS_PER_THREAD> __device__ __forceinline__ void RankKeys( UnsignedBits (&keys)[KEYS_PER_THREAD], ///< [in] Keys for this tile int (&ranks)[KEYS_PER_THREAD], ///< [out] For each key, the local rank within the tile (out parameter) int current_bit, ///< [in] The least-significant bit position of the current digit to extract int num_bits, ///< [in] The number of bits in the current digit int (&exclusive_digit_prefix)[BINS_TRACKED_PER_THREAD]) ///< [out] The exclusive prefix sum for the digits [(threadIdx.x * BINS_TRACKED_PER_THREAD) ... (threadIdx.x * BINS_TRACKED_PER_THREAD) + BINS_TRACKED_PER_THREAD - 1] { RankKeys(keys, ranks, current_bit, num_bits); // Get exclusive count for each digit #pragma unroll for (int track = 0; track < BINS_TRACKED_PER_THREAD; ++track) { int bin_idx = (linear_tid * BINS_TRACKED_PER_THREAD) + track; if ((BLOCK_THREADS == RADIX_DIGITS) || (bin_idx < RADIX_DIGITS)) { if (IS_DESCENDING) bin_idx = RADIX_DIGITS - bin_idx - 1; exclusive_digit_prefix[track] = temp_storage.aliasable.warp_digit_counters[bin_idx][0]; } } } }; } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/acc/cuda/cub/block/block_radix_sort.cuh000066400000000000000000001126071411340063500231370ustar00rootroot00000000000000/****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * The cub::BlockRadixSort class provides [collective](index.html#sec0) methods for radix sorting of items partitioned across a CUDA thread block. */ #pragma once #include "block_exchange.cuh" #include "block_radix_rank.cuh" #include "../util_ptx.cuh" #include "../util_arch.cuh" #include "../util_type.cuh" #include "../util_namespace.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /** * \brief The BlockRadixSort class provides [collective](index.html#sec0) methods for sorting items partitioned across a CUDA thread block using a radix sorting method. ![](sorting_logo.png) * \ingroup BlockModule * * \tparam KeyT KeyT type * \tparam BLOCK_DIM_X The thread block length in threads along the X dimension * \tparam ITEMS_PER_THREAD The number of items per thread * \tparam ValueT [optional] ValueT type (default: cub::NullType, which indicates a keys-only sort) * \tparam RADIX_BITS [optional] The number of radix bits per digit place (default: 4 bits) * \tparam MEMOIZE_OUTER_SCAN [optional] Whether or not to buffer outer raking scan partials to incur fewer shared memory reads at the expense of higher register pressure (default: true for architectures SM35 and newer, false otherwise). * \tparam INNER_SCAN_ALGORITHM [optional] The cub::BlockScanAlgorithm algorithm to use (default: cub::BLOCK_SCAN_WARP_SCANS) * \tparam SMEM_CONFIG [optional] Shared memory bank mode (default: \p cudaSharedMemBankSizeFourByte) * \tparam BLOCK_DIM_Y [optional] The thread block length in threads along the Y dimension (default: 1) * \tparam BLOCK_DIM_Z [optional] The thread block length in threads along the Z dimension (default: 1) * \tparam PTX_ARCH [optional] \ptxversion * * \par Overview * - The [radix sorting method](http://en.wikipedia.org/wiki/Radix_sort) arranges * items into ascending order. It relies upon a positional representation for * keys, i.e., each key is comprised of an ordered sequence of symbols (e.g., digits, * characters, etc.) specified from least-significant to most-significant. For a * given input sequence of keys and a set of rules specifying a total ordering * of the symbolic alphabet, the radix sorting method produces a lexicographic * ordering of those keys. * - BlockRadixSort can sort all of the built-in C++ numeric primitive types, e.g.: * unsigned char, \p int, \p double, etc. Within each key, the implementation treats fixed-length * bit-sequences of \p RADIX_BITS as radix digit places. Although the direct radix sorting * method can only be applied to unsigned integral types, BlockRadixSort * is able to sort signed and floating-point types via simple bit-wise transformations * that ensure lexicographic key ordering. * - \rowmajor * * \par Performance Considerations * - \granularity * * \par A Simple Example * \blockcollective{BlockRadixSort} * \par * The code snippet below illustrates a sort of 512 integer keys that * are partitioned in a [blocked arrangement](index.html#sec5sec3) across 128 threads * where each thread owns 4 consecutive items. * \par * \code * #include // or equivalently * * __global__ void ExampleKernel(...) * { * // Specialize BlockRadixSort for a 1D block of 128 threads owning 4 integer items each * typedef cub::BlockRadixSort BlockRadixSort; * * // Allocate shared memory for BlockRadixSort * __shared__ typename BlockRadixSort::TempStorage temp_storage; * * // Obtain a segment of consecutive items that are blocked across threads * int thread_keys[4]; * ... * * // Collectively sort the keys * BlockRadixSort(temp_storage).Sort(thread_keys); * * ... * \endcode * \par * Suppose the set of input \p thread_keys across the block of threads is * { [0,511,1,510], [2,509,3,508], [4,507,5,506], ..., [254,257,255,256] }. The * corresponding output \p thread_keys in those threads will be * { [0,1,2,3], [4,5,6,7], [8,9,10,11], ..., [508,509,510,511] }. * */ template < typename KeyT, int BLOCK_DIM_X, int ITEMS_PER_THREAD, typename ValueT = NullType, int RADIX_BITS = 4, bool MEMOIZE_OUTER_SCAN = (CUB_PTX_ARCH >= 350) ? true : false, BlockScanAlgorithm INNER_SCAN_ALGORITHM = BLOCK_SCAN_WARP_SCANS, cudaSharedMemConfig SMEM_CONFIG = cudaSharedMemBankSizeFourByte, int BLOCK_DIM_Y = 1, int BLOCK_DIM_Z = 1, int PTX_ARCH = CUB_PTX_ARCH> class BlockRadixSort { private: /****************************************************************************** * Constants and type definitions ******************************************************************************/ enum { // The thread block size in threads BLOCK_THREADS = BLOCK_DIM_X * BLOCK_DIM_Y * BLOCK_DIM_Z, // Whether or not there are values to be trucked along with keys KEYS_ONLY = Equals::VALUE, }; // KeyT traits and unsigned bits type typedef Traits KeyTraits; typedef typename KeyTraits::UnsignedBits UnsignedBits; /// Ascending BlockRadixRank utility type typedef BlockRadixRank< BLOCK_DIM_X, RADIX_BITS, false, MEMOIZE_OUTER_SCAN, INNER_SCAN_ALGORITHM, SMEM_CONFIG, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH> AscendingBlockRadixRank; /// Descending BlockRadixRank utility type typedef BlockRadixRank< BLOCK_DIM_X, RADIX_BITS, true, MEMOIZE_OUTER_SCAN, INNER_SCAN_ALGORITHM, SMEM_CONFIG, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH> DescendingBlockRadixRank; /// BlockExchange utility type for keys typedef BlockExchange BlockExchangeKeys; /// BlockExchange utility type for values typedef BlockExchange BlockExchangeValues; /// Shared memory storage layout type union _TempStorage { typename AscendingBlockRadixRank::TempStorage asending_ranking_storage; typename DescendingBlockRadixRank::TempStorage descending_ranking_storage; typename BlockExchangeKeys::TempStorage exchange_keys; typename BlockExchangeValues::TempStorage exchange_values; }; /****************************************************************************** * Thread fields ******************************************************************************/ /// Shared storage reference _TempStorage &temp_storage; /// Linear thread-id unsigned int linear_tid; /****************************************************************************** * Utility methods ******************************************************************************/ /// Internal storage allocator __device__ __forceinline__ _TempStorage& PrivateStorage() { __shared__ _TempStorage private_storage; return private_storage; } /// Rank keys (specialized for ascending sort) __device__ __forceinline__ void RankKeys( UnsignedBits (&unsigned_keys)[ITEMS_PER_THREAD], int (&ranks)[ITEMS_PER_THREAD], int begin_bit, int pass_bits, Int2Type /*is_descending*/) { AscendingBlockRadixRank(temp_storage.asending_ranking_storage).RankKeys( unsigned_keys, ranks, begin_bit, pass_bits); } /// Rank keys (specialized for descending sort) __device__ __forceinline__ void RankKeys( UnsignedBits (&unsigned_keys)[ITEMS_PER_THREAD], int (&ranks)[ITEMS_PER_THREAD], int begin_bit, int pass_bits, Int2Type /*is_descending*/) { DescendingBlockRadixRank(temp_storage.descending_ranking_storage).RankKeys( unsigned_keys, ranks, begin_bit, pass_bits); } /// ExchangeValues (specialized for key-value sort, to-blocked arrangement) __device__ __forceinline__ void ExchangeValues( ValueT (&values)[ITEMS_PER_THREAD], int (&ranks)[ITEMS_PER_THREAD], Int2Type /*is_keys_only*/, Int2Type /*is_blocked*/) { CTA_SYNC(); // Exchange values through shared memory in blocked arrangement BlockExchangeValues(temp_storage.exchange_values).ScatterToBlocked(values, ranks); } /// ExchangeValues (specialized for key-value sort, to-striped arrangement) __device__ __forceinline__ void ExchangeValues( ValueT (&values)[ITEMS_PER_THREAD], int (&ranks)[ITEMS_PER_THREAD], Int2Type /*is_keys_only*/, Int2Type /*is_blocked*/) { CTA_SYNC(); // Exchange values through shared memory in blocked arrangement BlockExchangeValues(temp_storage.exchange_values).ScatterToStriped(values, ranks); } /// ExchangeValues (specialized for keys-only sort) template __device__ __forceinline__ void ExchangeValues( ValueT (&/*values*/)[ITEMS_PER_THREAD], int (&/*ranks*/)[ITEMS_PER_THREAD], Int2Type /*is_keys_only*/, Int2Type /*is_blocked*/) {} /// Sort blocked arrangement template __device__ __forceinline__ void SortBlocked( KeyT (&keys)[ITEMS_PER_THREAD], ///< Keys to sort ValueT (&values)[ITEMS_PER_THREAD], ///< Values to sort int begin_bit, ///< The beginning (least-significant) bit index needed for key comparison int end_bit, ///< The past-the-end (most-significant) bit index needed for key comparison Int2Type is_descending, ///< Tag whether is a descending-order sort Int2Type is_keys_only) ///< Tag whether is keys-only sort { UnsignedBits (&unsigned_keys)[ITEMS_PER_THREAD] = reinterpret_cast(keys); // Twiddle bits if necessary #pragma unroll for (int KEY = 0; KEY < ITEMS_PER_THREAD; KEY++) { unsigned_keys[KEY] = KeyTraits::TwiddleIn(unsigned_keys[KEY]); } // Radix sorting passes while (true) { int pass_bits = CUB_MIN(RADIX_BITS, end_bit - begin_bit); // Rank the blocked keys int ranks[ITEMS_PER_THREAD]; RankKeys(unsigned_keys, ranks, begin_bit, pass_bits, is_descending); begin_bit += RADIX_BITS; CTA_SYNC(); // Exchange keys through shared memory in blocked arrangement BlockExchangeKeys(temp_storage.exchange_keys).ScatterToBlocked(keys, ranks); // Exchange values through shared memory in blocked arrangement ExchangeValues(values, ranks, is_keys_only, Int2Type()); // Quit if done if (begin_bit >= end_bit) break; CTA_SYNC(); } // Untwiddle bits if necessary #pragma unroll for (int KEY = 0; KEY < ITEMS_PER_THREAD; KEY++) { unsigned_keys[KEY] = KeyTraits::TwiddleOut(unsigned_keys[KEY]); } } public: #ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document /// Sort blocked -> striped arrangement template __device__ __forceinline__ void SortBlockedToStriped( KeyT (&keys)[ITEMS_PER_THREAD], ///< Keys to sort ValueT (&values)[ITEMS_PER_THREAD], ///< Values to sort int begin_bit, ///< The beginning (least-significant) bit index needed for key comparison int end_bit, ///< The past-the-end (most-significant) bit index needed for key comparison Int2Type is_descending, ///< Tag whether is a descending-order sort Int2Type is_keys_only) ///< Tag whether is keys-only sort { UnsignedBits (&unsigned_keys)[ITEMS_PER_THREAD] = reinterpret_cast(keys); // Twiddle bits if necessary #pragma unroll for (int KEY = 0; KEY < ITEMS_PER_THREAD; KEY++) { unsigned_keys[KEY] = KeyTraits::TwiddleIn(unsigned_keys[KEY]); } // Radix sorting passes while (true) { int pass_bits = CUB_MIN(RADIX_BITS, end_bit - begin_bit); // Rank the blocked keys int ranks[ITEMS_PER_THREAD]; RankKeys(unsigned_keys, ranks, begin_bit, pass_bits, is_descending); begin_bit += RADIX_BITS; CTA_SYNC(); // Check if this is the last pass if (begin_bit >= end_bit) { // Last pass exchanges keys through shared memory in striped arrangement BlockExchangeKeys(temp_storage.exchange_keys).ScatterToStriped(keys, ranks); // Last pass exchanges through shared memory in striped arrangement ExchangeValues(values, ranks, is_keys_only, Int2Type()); // Quit break; } // Exchange keys through shared memory in blocked arrangement BlockExchangeKeys(temp_storage.exchange_keys).ScatterToBlocked(keys, ranks); // Exchange values through shared memory in blocked arrangement ExchangeValues(values, ranks, is_keys_only, Int2Type()); CTA_SYNC(); } // Untwiddle bits if necessary #pragma unroll for (int KEY = 0; KEY < ITEMS_PER_THREAD; KEY++) { unsigned_keys[KEY] = KeyTraits::TwiddleOut(unsigned_keys[KEY]); } } #endif // DOXYGEN_SHOULD_SKIP_THIS /// \smemstorage{BlockRadixSort} struct TempStorage : Uninitialized<_TempStorage> {}; /******************************************************************//** * \name Collective constructors *********************************************************************/ //@{ /** * \brief Collective constructor using a private static allocation of shared memory as temporary storage. */ __device__ __forceinline__ BlockRadixSort() : temp_storage(PrivateStorage()), linear_tid(RowMajorTid(BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z)) {} /** * \brief Collective constructor using the specified memory allocation as temporary storage. */ __device__ __forceinline__ BlockRadixSort( TempStorage &temp_storage) ///< [in] Reference to memory allocation having layout type TempStorage : temp_storage(temp_storage.Alias()), linear_tid(RowMajorTid(BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z)) {} //@} end member group /******************************************************************//** * \name Sorting (blocked arrangements) *********************************************************************/ //@{ /** * \brief Performs an ascending block-wide radix sort over a [blocked arrangement](index.html#sec5sec3) of keys. * * \par * - \granularity * - \smemreuse * * \par Snippet * The code snippet below illustrates a sort of 512 integer keys that * are partitioned in a [blocked arrangement](index.html#sec5sec3) across 128 threads * where each thread owns 4 consecutive keys. * \par * \code * #include // or equivalently * * __global__ void ExampleKernel(...) * { * // Specialize BlockRadixSort for a 1D block of 128 threads owning 4 integer keys each * typedef cub::BlockRadixSort BlockRadixSort; * * // Allocate shared memory for BlockRadixSort * __shared__ typename BlockRadixSort::TempStorage temp_storage; * * // Obtain a segment of consecutive items that are blocked across threads * int thread_keys[4]; * ... * * // Collectively sort the keys * BlockRadixSort(temp_storage).Sort(thread_keys); * * \endcode * \par * Suppose the set of input \p thread_keys across the block of threads is * { [0,511,1,510], [2,509,3,508], [4,507,5,506], ..., [254,257,255,256] }. * The corresponding output \p thread_keys in those threads will be * { [0,1,2,3], [4,5,6,7], [8,9,10,11], ..., [508,509,510,511] }. */ __device__ __forceinline__ void Sort( KeyT (&keys)[ITEMS_PER_THREAD], ///< [in-out] Keys to sort int begin_bit = 0, ///< [in] [optional] The beginning (least-significant) bit index needed for key comparison int end_bit = sizeof(KeyT) * 8) ///< [in] [optional] The past-the-end (most-significant) bit index needed for key comparison { NullType values[ITEMS_PER_THREAD]; SortBlocked(keys, values, begin_bit, end_bit, Int2Type(), Int2Type()); } /** * \brief Performs an ascending block-wide radix sort across a [blocked arrangement](index.html#sec5sec3) of keys and values. * * \par * - BlockRadixSort can only accommodate one associated tile of values. To "truck along" * more than one tile of values, simply perform a key-value sort of the keys paired * with a temporary value array that enumerates the key indices. The reordered indices * can then be used as a gather-vector for exchanging other associated tile data through * shared memory. * - \granularity * - \smemreuse * * \par Snippet * The code snippet below illustrates a sort of 512 integer keys and values that * are partitioned in a [blocked arrangement](index.html#sec5sec3) across 128 threads * where each thread owns 4 consecutive pairs. * \par * \code * #include // or equivalently * * __global__ void ExampleKernel(...) * { * // Specialize BlockRadixSort for a 1D block of 128 threads owning 4 integer keys and values each * typedef cub::BlockRadixSort BlockRadixSort; * * // Allocate shared memory for BlockRadixSort * __shared__ typename BlockRadixSort::TempStorage temp_storage; * * // Obtain a segment of consecutive items that are blocked across threads * int thread_keys[4]; * int thread_values[4]; * ... * * // Collectively sort the keys and values among block threads * BlockRadixSort(temp_storage).Sort(thread_keys, thread_values); * * \endcode * \par * Suppose the set of input \p thread_keys across the block of threads is * { [0,511,1,510], [2,509,3,508], [4,507,5,506], ..., [254,257,255,256] }. The * corresponding output \p thread_keys in those threads will be * { [0,1,2,3], [4,5,6,7], [8,9,10,11], ..., [508,509,510,511] }. * */ __device__ __forceinline__ void Sort( KeyT (&keys)[ITEMS_PER_THREAD], ///< [in-out] Keys to sort ValueT (&values)[ITEMS_PER_THREAD], ///< [in-out] Values to sort int begin_bit = 0, ///< [in] [optional] The beginning (least-significant) bit index needed for key comparison int end_bit = sizeof(KeyT) * 8) ///< [in] [optional] The past-the-end (most-significant) bit index needed for key comparison { SortBlocked(keys, values, begin_bit, end_bit, Int2Type(), Int2Type()); } /** * \brief Performs a descending block-wide radix sort over a [blocked arrangement](index.html#sec5sec3) of keys. * * \par * - \granularity * - \smemreuse * * \par Snippet * The code snippet below illustrates a sort of 512 integer keys that * are partitioned in a [blocked arrangement](index.html#sec5sec3) across 128 threads * where each thread owns 4 consecutive keys. * \par * \code * #include // or equivalently * * __global__ void ExampleKernel(...) * { * // Specialize BlockRadixSort for a 1D block of 128 threads owning 4 integer keys each * typedef cub::BlockRadixSort BlockRadixSort; * * // Allocate shared memory for BlockRadixSort * __shared__ typename BlockRadixSort::TempStorage temp_storage; * * // Obtain a segment of consecutive items that are blocked across threads * int thread_keys[4]; * ... * * // Collectively sort the keys * BlockRadixSort(temp_storage).Sort(thread_keys); * * \endcode * \par * Suppose the set of input \p thread_keys across the block of threads is * { [0,511,1,510], [2,509,3,508], [4,507,5,506], ..., [254,257,255,256] }. * The corresponding output \p thread_keys in those threads will be * { [511,510,509,508], [11,10,9,8], [7,6,5,4], ..., [3,2,1,0] }. */ __device__ __forceinline__ void SortDescending( KeyT (&keys)[ITEMS_PER_THREAD], ///< [in-out] Keys to sort int begin_bit = 0, ///< [in] [optional] The beginning (least-significant) bit index needed for key comparison int end_bit = sizeof(KeyT) * 8) ///< [in] [optional] The past-the-end (most-significant) bit index needed for key comparison { NullType values[ITEMS_PER_THREAD]; SortBlocked(keys, values, begin_bit, end_bit, Int2Type(), Int2Type()); } /** * \brief Performs a descending block-wide radix sort across a [blocked arrangement](index.html#sec5sec3) of keys and values. * * \par * - BlockRadixSort can only accommodate one associated tile of values. To "truck along" * more than one tile of values, simply perform a key-value sort of the keys paired * with a temporary value array that enumerates the key indices. The reordered indices * can then be used as a gather-vector for exchanging other associated tile data through * shared memory. * - \granularity * - \smemreuse * * \par Snippet * The code snippet below illustrates a sort of 512 integer keys and values that * are partitioned in a [blocked arrangement](index.html#sec5sec3) across 128 threads * where each thread owns 4 consecutive pairs. * \par * \code * #include // or equivalently * * __global__ void ExampleKernel(...) * { * // Specialize BlockRadixSort for a 1D block of 128 threads owning 4 integer keys and values each * typedef cub::BlockRadixSort BlockRadixSort; * * // Allocate shared memory for BlockRadixSort * __shared__ typename BlockRadixSort::TempStorage temp_storage; * * // Obtain a segment of consecutive items that are blocked across threads * int thread_keys[4]; * int thread_values[4]; * ... * * // Collectively sort the keys and values among block threads * BlockRadixSort(temp_storage).Sort(thread_keys, thread_values); * * \endcode * \par * Suppose the set of input \p thread_keys across the block of threads is * { [0,511,1,510], [2,509,3,508], [4,507,5,506], ..., [254,257,255,256] }. The * corresponding output \p thread_keys in those threads will be * { [511,510,509,508], [11,10,9,8], [7,6,5,4], ..., [3,2,1,0] }. * */ __device__ __forceinline__ void SortDescending( KeyT (&keys)[ITEMS_PER_THREAD], ///< [in-out] Keys to sort ValueT (&values)[ITEMS_PER_THREAD], ///< [in-out] Values to sort int begin_bit = 0, ///< [in] [optional] The beginning (least-significant) bit index needed for key comparison int end_bit = sizeof(KeyT) * 8) ///< [in] [optional] The past-the-end (most-significant) bit index needed for key comparison { SortBlocked(keys, values, begin_bit, end_bit, Int2Type(), Int2Type()); } //@} end member group /******************************************************************//** * \name Sorting (blocked arrangement -> striped arrangement) *********************************************************************/ //@{ /** * \brief Performs an ascending radix sort across a [blocked arrangement](index.html#sec5sec3) of keys, leaving them in a [striped arrangement](index.html#sec5sec3). * * \par * - \granularity * - \smemreuse * * \par Snippet * The code snippet below illustrates a sort of 512 integer keys that * are initially partitioned in a [blocked arrangement](index.html#sec5sec3) across 128 threads * where each thread owns 4 consecutive keys. The final partitioning is striped. * \par * \code * #include // or equivalently * * __global__ void ExampleKernel(...) * { * // Specialize BlockRadixSort for a 1D block of 128 threads owning 4 integer keys each * typedef cub::BlockRadixSort BlockRadixSort; * * // Allocate shared memory for BlockRadixSort * __shared__ typename BlockRadixSort::TempStorage temp_storage; * * // Obtain a segment of consecutive items that are blocked across threads * int thread_keys[4]; * ... * * // Collectively sort the keys * BlockRadixSort(temp_storage).SortBlockedToStriped(thread_keys); * * \endcode * \par * Suppose the set of input \p thread_keys across the block of threads is * { [0,511,1,510], [2,509,3,508], [4,507,5,506], ..., [254,257,255,256] }. The * corresponding output \p thread_keys in those threads will be * { [0,128,256,384], [1,129,257,385], [2,130,258,386], ..., [127,255,383,511] }. * */ __device__ __forceinline__ void SortBlockedToStriped( KeyT (&keys)[ITEMS_PER_THREAD], ///< [in-out] Keys to sort int begin_bit = 0, ///< [in] [optional] The beginning (least-significant) bit index needed for key comparison int end_bit = sizeof(KeyT) * 8) ///< [in] [optional] The past-the-end (most-significant) bit index needed for key comparison { NullType values[ITEMS_PER_THREAD]; SortBlockedToStriped(keys, values, begin_bit, end_bit, Int2Type(), Int2Type()); } /** * \brief Performs an ascending radix sort across a [blocked arrangement](index.html#sec5sec3) of keys and values, leaving them in a [striped arrangement](index.html#sec5sec3). * * \par * - BlockRadixSort can only accommodate one associated tile of values. To "truck along" * more than one tile of values, simply perform a key-value sort of the keys paired * with a temporary value array that enumerates the key indices. The reordered indices * can then be used as a gather-vector for exchanging other associated tile data through * shared memory. * - \granularity * - \smemreuse * * \par Snippet * The code snippet below illustrates a sort of 512 integer keys and values that * are initially partitioned in a [blocked arrangement](index.html#sec5sec3) across 128 threads * where each thread owns 4 consecutive pairs. The final partitioning is striped. * \par * \code * #include // or equivalently * * __global__ void ExampleKernel(...) * { * // Specialize BlockRadixSort for a 1D block of 128 threads owning 4 integer keys and values each * typedef cub::BlockRadixSort BlockRadixSort; * * // Allocate shared memory for BlockRadixSort * __shared__ typename BlockRadixSort::TempStorage temp_storage; * * // Obtain a segment of consecutive items that are blocked across threads * int thread_keys[4]; * int thread_values[4]; * ... * * // Collectively sort the keys and values among block threads * BlockRadixSort(temp_storage).SortBlockedToStriped(thread_keys, thread_values); * * \endcode * \par * Suppose the set of input \p thread_keys across the block of threads is * { [0,511,1,510], [2,509,3,508], [4,507,5,506], ..., [254,257,255,256] }. The * corresponding output \p thread_keys in those threads will be * { [0,128,256,384], [1,129,257,385], [2,130,258,386], ..., [127,255,383,511] }. * */ __device__ __forceinline__ void SortBlockedToStriped( KeyT (&keys)[ITEMS_PER_THREAD], ///< [in-out] Keys to sort ValueT (&values)[ITEMS_PER_THREAD], ///< [in-out] Values to sort int begin_bit = 0, ///< [in] [optional] The beginning (least-significant) bit index needed for key comparison int end_bit = sizeof(KeyT) * 8) ///< [in] [optional] The past-the-end (most-significant) bit index needed for key comparison { SortBlockedToStriped(keys, values, begin_bit, end_bit, Int2Type(), Int2Type()); } /** * \brief Performs a descending radix sort across a [blocked arrangement](index.html#sec5sec3) of keys, leaving them in a [striped arrangement](index.html#sec5sec3). * * \par * - \granularity * - \smemreuse * * \par Snippet * The code snippet below illustrates a sort of 512 integer keys that * are initially partitioned in a [blocked arrangement](index.html#sec5sec3) across 128 threads * where each thread owns 4 consecutive keys. The final partitioning is striped. * \par * \code * #include // or equivalently * * __global__ void ExampleKernel(...) * { * // Specialize BlockRadixSort for a 1D block of 128 threads owning 4 integer keys each * typedef cub::BlockRadixSort BlockRadixSort; * * // Allocate shared memory for BlockRadixSort * __shared__ typename BlockRadixSort::TempStorage temp_storage; * * // Obtain a segment of consecutive items that are blocked across threads * int thread_keys[4]; * ... * * // Collectively sort the keys * BlockRadixSort(temp_storage).SortBlockedToStriped(thread_keys); * * \endcode * \par * Suppose the set of input \p thread_keys across the block of threads is * { [0,511,1,510], [2,509,3,508], [4,507,5,506], ..., [254,257,255,256] }. The * corresponding output \p thread_keys in those threads will be * { [511,383,255,127], [386,258,130,2], [385,257,128,1], ..., [384,256,128,0] }. * */ __device__ __forceinline__ void SortDescendingBlockedToStriped( KeyT (&keys)[ITEMS_PER_THREAD], ///< [in-out] Keys to sort int begin_bit = 0, ///< [in] [optional] The beginning (least-significant) bit index needed for key comparison int end_bit = sizeof(KeyT) * 8) ///< [in] [optional] The past-the-end (most-significant) bit index needed for key comparison { NullType values[ITEMS_PER_THREAD]; SortBlockedToStriped(keys, values, begin_bit, end_bit, Int2Type(), Int2Type()); } /** * \brief Performs a descending radix sort across a [blocked arrangement](index.html#sec5sec3) of keys and values, leaving them in a [striped arrangement](index.html#sec5sec3). * * \par * - BlockRadixSort can only accommodate one associated tile of values. To "truck along" * more than one tile of values, simply perform a key-value sort of the keys paired * with a temporary value array that enumerates the key indices. The reordered indices * can then be used as a gather-vector for exchanging other associated tile data through * shared memory. * - \granularity * - \smemreuse * * \par Snippet * The code snippet below illustrates a sort of 512 integer keys and values that * are initially partitioned in a [blocked arrangement](index.html#sec5sec3) across 128 threads * where each thread owns 4 consecutive pairs. The final partitioning is striped. * \par * \code * #include // or equivalently * * __global__ void ExampleKernel(...) * { * // Specialize BlockRadixSort for a 1D block of 128 threads owning 4 integer keys and values each * typedef cub::BlockRadixSort BlockRadixSort; * * // Allocate shared memory for BlockRadixSort * __shared__ typename BlockRadixSort::TempStorage temp_storage; * * // Obtain a segment of consecutive items that are blocked across threads * int thread_keys[4]; * int thread_values[4]; * ... * * // Collectively sort the keys and values among block threads * BlockRadixSort(temp_storage).SortBlockedToStriped(thread_keys, thread_values); * * \endcode * \par * Suppose the set of input \p thread_keys across the block of threads is * { [0,511,1,510], [2,509,3,508], [4,507,5,506], ..., [254,257,255,256] }. The * corresponding output \p thread_keys in those threads will be * { [511,383,255,127], [386,258,130,2], [385,257,128,1], ..., [384,256,128,0] }. * */ __device__ __forceinline__ void SortDescendingBlockedToStriped( KeyT (&keys)[ITEMS_PER_THREAD], ///< [in-out] Keys to sort ValueT (&values)[ITEMS_PER_THREAD], ///< [in-out] Values to sort int begin_bit = 0, ///< [in] [optional] The beginning (least-significant) bit index needed for key comparison int end_bit = sizeof(KeyT) * 8) ///< [in] [optional] The past-the-end (most-significant) bit index needed for key comparison { SortBlockedToStriped(keys, values, begin_bit, end_bit, Int2Type(), Int2Type()); } //@} end member group }; /** * \example example_block_radix_sort.cu */ } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/acc/cuda/cub/block/block_raking_layout.cuh000066400000000000000000000140451411340063500236260ustar00rootroot00000000000000/****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * cub::BlockRakingLayout provides a conflict-free shared memory layout abstraction for warp-raking across thread block data. */ #pragma once #include "../util_macro.cuh" #include "../util_arch.cuh" #include "../util_type.cuh" #include "../util_namespace.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /** * \brief BlockRakingLayout provides a conflict-free shared memory layout abstraction for 1D raking across thread block data. ![](raking.png) * \ingroup BlockModule * * \par Overview * This type facilitates a shared memory usage pattern where a block of CUDA * threads places elements into shared memory and then reduces the active * parallelism to one "raking" warp of threads for serially aggregating consecutive * sequences of shared items. Padding is inserted to eliminate bank conflicts * (for most data types). * * \tparam T The data type to be exchanged. * \tparam BLOCK_THREADS The thread block size in threads. * \tparam PTX_ARCH [optional] \ptxversion */ template < typename T, int BLOCK_THREADS, int PTX_ARCH = CUB_PTX_ARCH> struct BlockRakingLayout { //--------------------------------------------------------------------- // Constants and type definitions //--------------------------------------------------------------------- enum { /// The total number of elements that need to be cooperatively reduced SHARED_ELEMENTS = BLOCK_THREADS, /// Maximum number of warp-synchronous raking threads MAX_RAKING_THREADS = CUB_MIN(BLOCK_THREADS, CUB_WARP_THREADS(PTX_ARCH)), /// Number of raking elements per warp-synchronous raking thread (rounded up) SEGMENT_LENGTH = (SHARED_ELEMENTS + MAX_RAKING_THREADS - 1) / MAX_RAKING_THREADS, /// Never use a raking thread that will have no valid data (e.g., when BLOCK_THREADS is 62 and SEGMENT_LENGTH is 2, we should only use 31 raking threads) RAKING_THREADS = (SHARED_ELEMENTS + SEGMENT_LENGTH - 1) / SEGMENT_LENGTH, /// Whether we will have bank conflicts (technically we should find out if the GCD is > 1) HAS_CONFLICTS = (CUB_SMEM_BANKS(PTX_ARCH) % SEGMENT_LENGTH == 0), /// Degree of bank conflicts (e.g., 4-way) CONFLICT_DEGREE = (HAS_CONFLICTS) ? (MAX_RAKING_THREADS * SEGMENT_LENGTH) / CUB_SMEM_BANKS(PTX_ARCH) : 1, /// Pad each segment length with one element if segment length is not relatively prime to warp size and can't be optimized as a vector load USE_SEGMENT_PADDING = ((SEGMENT_LENGTH & 1) == 0) && (SEGMENT_LENGTH > 2), /// Total number of elements in the raking grid GRID_ELEMENTS = RAKING_THREADS * (SEGMENT_LENGTH + USE_SEGMENT_PADDING), /// Whether or not we need bounds checking during raking (the number of reduction elements is not a multiple of the number of raking threads) UNGUARDED = (SHARED_ELEMENTS % RAKING_THREADS == 0), }; /** * \brief Shared memory storage type */ struct __align__(16) _TempStorage { T buff[BlockRakingLayout::GRID_ELEMENTS]; }; /// Alias wrapper allowing storage to be unioned struct TempStorage : Uninitialized<_TempStorage> {}; /** * \brief Returns the location for the calling thread to place data into the grid */ static __device__ __forceinline__ T* PlacementPtr( TempStorage &temp_storage, unsigned int linear_tid) { // Offset for partial unsigned int offset = linear_tid; // Add in one padding element for every segment if (USE_SEGMENT_PADDING > 0) { offset += offset / SEGMENT_LENGTH; } // Incorporating a block of padding partials every shared memory segment return temp_storage.Alias().buff + offset; } /** * \brief Returns the location for the calling thread to begin sequential raking */ static __device__ __forceinline__ T* RakingPtr( TempStorage &temp_storage, unsigned int linear_tid) { return temp_storage.Alias().buff + (linear_tid * (SEGMENT_LENGTH + USE_SEGMENT_PADDING)); } }; } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/acc/cuda/cub/block/block_reduce.cuh000066400000000000000000000613241411340063500222270ustar00rootroot00000000000000/****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * The cub::BlockReduce class provides [collective](index.html#sec0) methods for computing a parallel reduction of items partitioned across a CUDA thread block. */ #pragma once #include "specializations/block_reduce_raking.cuh" #include "specializations/block_reduce_raking_commutative_only.cuh" #include "specializations/block_reduce_warp_reductions.cuh" #include "../util_ptx.cuh" #include "../util_type.cuh" #include "../thread/thread_operators.cuh" #include "../util_namespace.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /****************************************************************************** * Algorithmic variants ******************************************************************************/ /** * BlockReduceAlgorithm enumerates alternative algorithms for parallel * reduction across a CUDA thread block. */ enum BlockReduceAlgorithm { /** * \par Overview * An efficient "raking" reduction algorithm that only supports commutative * reduction operators (true for most operations, e.g., addition). * * \par * Execution is comprised of three phases: * -# Upsweep sequential reduction in registers (if threads contribute more * than one input each). Threads in warps other than the first warp place * their partial reductions into shared memory. * -# Upsweep sequential reduction in shared memory. Threads within the first * warp continue to accumulate by raking across segments of shared partial reductions * -# A warp-synchronous Kogge-Stone style reduction within the raking warp. * * \par * \image html block_reduce.png *
\p BLOCK_REDUCE_RAKING data flow for a hypothetical 16-thread thread block and 4-thread raking warp.
* * \par Performance Considerations * - This variant performs less communication than BLOCK_REDUCE_RAKING_NON_COMMUTATIVE * and is preferable when the reduction operator is commutative. This variant * applies fewer reduction operators than BLOCK_REDUCE_WARP_REDUCTIONS, and can provide higher overall * throughput across the GPU when suitably occupied. However, turn-around latency may be * higher than to BLOCK_REDUCE_WARP_REDUCTIONS and thus less-desirable * when the GPU is under-occupied. */ BLOCK_REDUCE_RAKING_COMMUTATIVE_ONLY, /** * \par Overview * An efficient "raking" reduction algorithm that supports commutative * (e.g., addition) and non-commutative (e.g., string concatenation) reduction * operators. \blocked. * * \par * Execution is comprised of three phases: * -# Upsweep sequential reduction in registers (if threads contribute more * than one input each). Each thread then places the partial reduction * of its item(s) into shared memory. * -# Upsweep sequential reduction in shared memory. Threads within a * single warp rake across segments of shared partial reductions. * -# A warp-synchronous Kogge-Stone style reduction within the raking warp. * * \par * \image html block_reduce.png *
\p BLOCK_REDUCE_RAKING data flow for a hypothetical 16-thread thread block and 4-thread raking warp.
* * \par Performance Considerations * - This variant performs more communication than BLOCK_REDUCE_RAKING * and is only preferable when the reduction operator is non-commutative. This variant * applies fewer reduction operators than BLOCK_REDUCE_WARP_REDUCTIONS, and can provide higher overall * throughput across the GPU when suitably occupied. However, turn-around latency may be * higher than to BLOCK_REDUCE_WARP_REDUCTIONS and thus less-desirable * when the GPU is under-occupied. */ BLOCK_REDUCE_RAKING, /** * \par Overview * A quick "tiled warp-reductions" reduction algorithm that supports commutative * (e.g., addition) and non-commutative (e.g., string concatenation) reduction * operators. * * \par * Execution is comprised of four phases: * -# Upsweep sequential reduction in registers (if threads contribute more * than one input each). Each thread then places the partial reduction * of its item(s) into shared memory. * -# Compute a shallow, but inefficient warp-synchronous Kogge-Stone style * reduction within each warp. * -# A propagation phase where the warp reduction outputs in each warp are * updated with the aggregate from each preceding warp. * * \par * \image html block_scan_warpscans.png *
\p BLOCK_REDUCE_WARP_REDUCTIONS data flow for a hypothetical 16-thread thread block and 4-thread raking warp.
* * \par Performance Considerations * - This variant applies more reduction operators than BLOCK_REDUCE_RAKING * or BLOCK_REDUCE_RAKING_NON_COMMUTATIVE, which may result in lower overall * throughput across the GPU. However turn-around latency may be lower and * thus useful when the GPU is under-occupied. */ BLOCK_REDUCE_WARP_REDUCTIONS, }; /****************************************************************************** * Block reduce ******************************************************************************/ /** * \brief The BlockReduce class provides [collective](index.html#sec0) methods for computing a parallel reduction of items partitioned across a CUDA thread block. ![](reduce_logo.png) * \ingroup BlockModule * * \tparam T Data type being reduced * \tparam BLOCK_DIM_X The thread block length in threads along the X dimension * \tparam ALGORITHM [optional] cub::BlockReduceAlgorithm enumerator specifying the underlying algorithm to use (default: cub::BLOCK_REDUCE_WARP_REDUCTIONS) * \tparam BLOCK_DIM_Y [optional] The thread block length in threads along the Y dimension (default: 1) * \tparam BLOCK_DIM_Z [optional] The thread block length in threads along the Z dimension (default: 1) * \tparam PTX_ARCH [optional] \ptxversion * * \par Overview * - A reduction (or fold) * uses a binary combining operator to compute a single aggregate from a list of input elements. * - \rowmajor * - BlockReduce can be optionally specialized by algorithm to accommodate different latency/throughput workload profiles: * -# cub::BLOCK_REDUCE_RAKING_COMMUTATIVE_ONLY. An efficient "raking" reduction algorithm that only supports commutative reduction operators. [More...](\ref cub::BlockReduceAlgorithm) * -# cub::BLOCK_REDUCE_RAKING. An efficient "raking" reduction algorithm that supports commutative and non-commutative reduction operators. [More...](\ref cub::BlockReduceAlgorithm) * -# cub::BLOCK_REDUCE_WARP_REDUCTIONS. A quick "tiled warp-reductions" reduction algorithm that supports commutative and non-commutative reduction operators. [More...](\ref cub::BlockReduceAlgorithm) * * \par Performance Considerations * - \granularity * - Very efficient (only one synchronization barrier). * - Incurs zero bank conflicts for most types * - Computation is slightly more efficient (i.e., having lower instruction overhead) for: * - Summation (vs. generic reduction) * - \p BLOCK_THREADS is a multiple of the architecture's warp size * - Every thread has a valid input (i.e., full vs. partial-tiles) * - See cub::BlockReduceAlgorithm for performance details regarding algorithmic alternatives * * \par A Simple Example * \blockcollective{BlockReduce} * \par * The code snippet below illustrates a sum reduction of 512 integer items that * are partitioned in a [blocked arrangement](index.html#sec5sec3) across 128 threads * where each thread owns 4 consecutive items. * \par * \code * #include // or equivalently * * __global__ void ExampleKernel(...) * { * // Specialize BlockReduce for a 1D block of 128 threads on type int * typedef cub::BlockReduce BlockReduce; * * // Allocate shared memory for BlockReduce * __shared__ typename BlockReduce::TempStorage temp_storage; * * // Obtain a segment of consecutive items that are blocked across threads * int thread_data[4]; * ... * * // Compute the block-wide sum for thread0 * int aggregate = BlockReduce(temp_storage).Sum(thread_data); * * \endcode * */ template < typename T, int BLOCK_DIM_X, BlockReduceAlgorithm ALGORITHM = BLOCK_REDUCE_WARP_REDUCTIONS, int BLOCK_DIM_Y = 1, int BLOCK_DIM_Z = 1, int PTX_ARCH = CUB_PTX_ARCH> class BlockReduce { private: /****************************************************************************** * Constants and type definitions ******************************************************************************/ /// Constants enum { /// The thread block size in threads BLOCK_THREADS = BLOCK_DIM_X * BLOCK_DIM_Y * BLOCK_DIM_Z, }; typedef BlockReduceWarpReductions WarpReductions; typedef BlockReduceRakingCommutativeOnly RakingCommutativeOnly; typedef BlockReduceRaking Raking; /// Internal specialization type typedef typename If<(ALGORITHM == BLOCK_REDUCE_WARP_REDUCTIONS), WarpReductions, typename If<(ALGORITHM == BLOCK_REDUCE_RAKING_COMMUTATIVE_ONLY), RakingCommutativeOnly, Raking>::Type>::Type InternalBlockReduce; // BlockReduceRaking /// Shared memory storage layout type for BlockReduce typedef typename InternalBlockReduce::TempStorage _TempStorage; /****************************************************************************** * Utility methods ******************************************************************************/ /// Internal storage allocator __device__ __forceinline__ _TempStorage& PrivateStorage() { __shared__ _TempStorage private_storage; return private_storage; } /****************************************************************************** * Thread fields ******************************************************************************/ /// Shared storage reference _TempStorage &temp_storage; /// Linear thread-id unsigned int linear_tid; public: /// \smemstorage{BlockReduce} struct TempStorage : Uninitialized<_TempStorage> {}; /******************************************************************//** * \name Collective constructors *********************************************************************/ //@{ /** * \brief Collective constructor using a private static allocation of shared memory as temporary storage. */ __device__ __forceinline__ BlockReduce() : temp_storage(PrivateStorage()), linear_tid(RowMajorTid(BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z)) {} /** * \brief Collective constructor using the specified memory allocation as temporary storage. */ __device__ __forceinline__ BlockReduce( TempStorage &temp_storage) ///< [in] Reference to memory allocation having layout type TempStorage : temp_storage(temp_storage.Alias()), linear_tid(RowMajorTid(BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z)) {} //@} end member group /******************************************************************//** * \name Generic reductions *********************************************************************/ //@{ /** * \brief Computes a block-wide reduction for thread0 using the specified binary reduction functor. Each thread contributes one input element. * * \par * - The return value is undefined in threads other than thread0. * - \rowmajor * - \smemreuse * * \par Snippet * The code snippet below illustrates a max reduction of 128 integer items that * are partitioned across 128 threads. * \par * \code * #include // or equivalently * * __global__ void ExampleKernel(...) * { * // Specialize BlockReduce for a 1D block of 128 threads on type int * typedef cub::BlockReduce BlockReduce; * * // Allocate shared memory for BlockReduce * __shared__ typename BlockReduce::TempStorage temp_storage; * * // Each thread obtains an input item * int thread_data; * ... * * // Compute the block-wide max for thread0 * int aggregate = BlockReduce(temp_storage).Reduce(thread_data, cub::Max()); * * \endcode * * \tparam ReductionOp [inferred] Binary reduction functor type having member T operator()(const T &a, const T &b) */ template __device__ __forceinline__ T Reduce( T input, ///< [in] Calling thread's input ReductionOp reduction_op) ///< [in] Binary reduction functor { return InternalBlockReduce(temp_storage).template Reduce(input, BLOCK_THREADS, reduction_op); } /** * \brief Computes a block-wide reduction for thread0 using the specified binary reduction functor. Each thread contributes an array of consecutive input elements. * * \par * - The return value is undefined in threads other than thread0. * - \granularity * - \smemreuse * * \par Snippet * The code snippet below illustrates a max reduction of 512 integer items that * are partitioned in a [blocked arrangement](index.html#sec5sec3) across 128 threads * where each thread owns 4 consecutive items. * \par * \code * #include // or equivalently * * __global__ void ExampleKernel(...) * { * // Specialize BlockReduce for a 1D block of 128 threads on type int * typedef cub::BlockReduce BlockReduce; * * // Allocate shared memory for BlockReduce * __shared__ typename BlockReduce::TempStorage temp_storage; * * // Obtain a segment of consecutive items that are blocked across threads * int thread_data[4]; * ... * * // Compute the block-wide max for thread0 * int aggregate = BlockReduce(temp_storage).Reduce(thread_data, cub::Max()); * * \endcode * * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. * \tparam ReductionOp [inferred] Binary reduction functor type having member T operator()(const T &a, const T &b) */ template < int ITEMS_PER_THREAD, typename ReductionOp> __device__ __forceinline__ T Reduce( T (&inputs)[ITEMS_PER_THREAD], ///< [in] Calling thread's input segment ReductionOp reduction_op) ///< [in] Binary reduction functor { // Reduce partials T partial = internal::ThreadReduce(inputs, reduction_op); return Reduce(partial, reduction_op); } /** * \brief Computes a block-wide reduction for thread0 using the specified binary reduction functor. The first \p num_valid threads each contribute one input element. * * \par * - The return value is undefined in threads other than thread0. * - \rowmajor * - \smemreuse * * \par Snippet * The code snippet below illustrates a max reduction of a partially-full tile of integer items that * are partitioned across 128 threads. * \par * \code * #include // or equivalently * * __global__ void ExampleKernel(int num_valid, ...) * { * // Specialize BlockReduce for a 1D block of 128 threads on type int * typedef cub::BlockReduce BlockReduce; * * // Allocate shared memory for BlockReduce * __shared__ typename BlockReduce::TempStorage temp_storage; * * // Each thread obtains an input item * int thread_data; * if (threadIdx.x < num_valid) thread_data = ... * * // Compute the block-wide max for thread0 * int aggregate = BlockReduce(temp_storage).Reduce(thread_data, cub::Max(), num_valid); * * \endcode * * \tparam ReductionOp [inferred] Binary reduction functor type having member T operator()(const T &a, const T &b) */ template __device__ __forceinline__ T Reduce( T input, ///< [in] Calling thread's input ReductionOp reduction_op, ///< [in] Binary reduction functor int num_valid) ///< [in] Number of threads containing valid elements (may be less than BLOCK_THREADS) { // Determine if we scan skip bounds checking if (num_valid >= BLOCK_THREADS) { return InternalBlockReduce(temp_storage).template Reduce(input, num_valid, reduction_op); } else { return InternalBlockReduce(temp_storage).template Reduce(input, num_valid, reduction_op); } } //@} end member group /******************************************************************//** * \name Summation reductions *********************************************************************/ //@{ /** * \brief Computes a block-wide reduction for thread0 using addition (+) as the reduction operator. Each thread contributes one input element. * * \par * - The return value is undefined in threads other than thread0. * - \rowmajor * - \smemreuse * * \par Snippet * The code snippet below illustrates a sum reduction of 128 integer items that * are partitioned across 128 threads. * \par * \code * #include // or equivalently * * __global__ void ExampleKernel(...) * { * // Specialize BlockReduce for a 1D block of 128 threads on type int * typedef cub::BlockReduce BlockReduce; * * // Allocate shared memory for BlockReduce * __shared__ typename BlockReduce::TempStorage temp_storage; * * // Each thread obtains an input item * int thread_data; * ... * * // Compute the block-wide sum for thread0 * int aggregate = BlockReduce(temp_storage).Sum(thread_data); * * \endcode * */ __device__ __forceinline__ T Sum( T input) ///< [in] Calling thread's input { return InternalBlockReduce(temp_storage).template Sum(input, BLOCK_THREADS); } /** * \brief Computes a block-wide reduction for thread0 using addition (+) as the reduction operator. Each thread contributes an array of consecutive input elements. * * \par * - The return value is undefined in threads other than thread0. * - \granularity * - \smemreuse * * \par Snippet * The code snippet below illustrates a sum reduction of 512 integer items that * are partitioned in a [blocked arrangement](index.html#sec5sec3) across 128 threads * where each thread owns 4 consecutive items. * \par * \code * #include // or equivalently * * __global__ void ExampleKernel(...) * { * // Specialize BlockReduce for a 1D block of 128 threads on type int * typedef cub::BlockReduce BlockReduce; * * // Allocate shared memory for BlockReduce * __shared__ typename BlockReduce::TempStorage temp_storage; * * // Obtain a segment of consecutive items that are blocked across threads * int thread_data[4]; * ... * * // Compute the block-wide sum for thread0 * int aggregate = BlockReduce(temp_storage).Sum(thread_data); * * \endcode * * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. */ template __device__ __forceinline__ T Sum( T (&inputs)[ITEMS_PER_THREAD]) ///< [in] Calling thread's input segment { // Reduce partials T partial = internal::ThreadReduce(inputs, cub::Sum()); return Sum(partial); } /** * \brief Computes a block-wide reduction for thread0 using addition (+) as the reduction operator. The first \p num_valid threads each contribute one input element. * * \par * - The return value is undefined in threads other than thread0. * - \rowmajor * - \smemreuse * * \par Snippet * The code snippet below illustrates a sum reduction of a partially-full tile of integer items that * are partitioned across 128 threads. * \par * \code * #include // or equivalently * * __global__ void ExampleKernel(int num_valid, ...) * { * // Specialize BlockReduce for a 1D block of 128 threads on type int * typedef cub::BlockReduce BlockReduce; * * // Allocate shared memory for BlockReduce * __shared__ typename BlockReduce::TempStorage temp_storage; * * // Each thread obtains an input item (up to num_items) * int thread_data; * if (threadIdx.x < num_valid) * thread_data = ... * * // Compute the block-wide sum for thread0 * int aggregate = BlockReduce(temp_storage).Sum(thread_data, num_valid); * * \endcode * */ __device__ __forceinline__ T Sum( T input, ///< [in] Calling thread's input int num_valid) ///< [in] Number of threads containing valid elements (may be less than BLOCK_THREADS) { // Determine if we scan skip bounds checking if (num_valid >= BLOCK_THREADS) { return InternalBlockReduce(temp_storage).template Sum(input, num_valid); } else { return InternalBlockReduce(temp_storage).template Sum(input, num_valid); } } //@} end member group }; /** * \example example_block_reduce.cu */ } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/acc/cuda/cub/block/block_scan.cuh000066400000000000000000003111021411340063500216740ustar00rootroot00000000000000/****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * The cub::BlockScan class provides [collective](index.html#sec0) methods for computing a parallel prefix sum/scan of items partitioned across a CUDA thread block. */ #pragma once #include "specializations/block_scan_raking.cuh" #include "specializations/block_scan_warp_scans.cuh" #include "../util_arch.cuh" #include "../util_type.cuh" #include "../util_ptx.cuh" #include "../util_namespace.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /****************************************************************************** * Algorithmic variants ******************************************************************************/ /** * \brief BlockScanAlgorithm enumerates alternative algorithms for cub::BlockScan to compute a parallel prefix scan across a CUDA thread block. */ enum BlockScanAlgorithm { /** * \par Overview * An efficient "raking reduce-then-scan" prefix scan algorithm. Execution is comprised of five phases: * -# Upsweep sequential reduction in registers (if threads contribute more than one input each). Each thread then places the partial reduction of its item(s) into shared memory. * -# Upsweep sequential reduction in shared memory. Threads within a single warp rake across segments of shared partial reductions. * -# A warp-synchronous Kogge-Stone style exclusive scan within the raking warp. * -# Downsweep sequential exclusive scan in shared memory. Threads within a single warp rake across segments of shared partial reductions, seeded with the warp-scan output. * -# Downsweep sequential scan in registers (if threads contribute more than one input), seeded with the raking scan output. * * \par * \image html block_scan_raking.png *
\p BLOCK_SCAN_RAKING data flow for a hypothetical 16-thread thread block and 4-thread raking warp.
* * \par Performance Considerations * - Although this variant may suffer longer turnaround latencies when the * GPU is under-occupied, it can often provide higher overall throughput * across the GPU when suitably occupied. */ BLOCK_SCAN_RAKING, /** * \par Overview * Similar to cub::BLOCK_SCAN_RAKING, but with fewer shared memory reads at * the expense of higher register pressure. Raking threads preserve their * "upsweep" segment of values in registers while performing warp-synchronous * scan, allowing the "downsweep" not to re-read them from shared memory. */ BLOCK_SCAN_RAKING_MEMOIZE, /** * \par Overview * A quick "tiled warpscans" prefix scan algorithm. Execution is comprised of four phases: * -# Upsweep sequential reduction in registers (if threads contribute more than one input each). Each thread then places the partial reduction of its item(s) into shared memory. * -# Compute a shallow, but inefficient warp-synchronous Kogge-Stone style scan within each warp. * -# A propagation phase where the warp scan outputs in each warp are updated with the aggregate from each preceding warp. * -# Downsweep sequential scan in registers (if threads contribute more than one input), seeded with the raking scan output. * * \par * \image html block_scan_warpscans.png *
\p BLOCK_SCAN_WARP_SCANS data flow for a hypothetical 16-thread thread block and 4-thread raking warp.
* * \par Performance Considerations * - Although this variant may suffer lower overall throughput across the * GPU because due to a heavy reliance on inefficient warpscans, it can * often provide lower turnaround latencies when the GPU is under-occupied. */ BLOCK_SCAN_WARP_SCANS, }; /****************************************************************************** * Block scan ******************************************************************************/ /** * \brief The BlockScan class provides [collective](index.html#sec0) methods for computing a parallel prefix sum/scan of items partitioned across a CUDA thread block. ![](block_scan_logo.png) * \ingroup BlockModule * * \tparam T Data type being scanned * \tparam BLOCK_DIM_X The thread block length in threads along the X dimension * \tparam ALGORITHM [optional] cub::BlockScanAlgorithm enumerator specifying the underlying algorithm to use (default: cub::BLOCK_SCAN_RAKING) * \tparam BLOCK_DIM_Y [optional] The thread block length in threads along the Y dimension (default: 1) * \tparam BLOCK_DIM_Z [optional] The thread block length in threads along the Z dimension (default: 1) * \tparam PTX_ARCH [optional] \ptxversion * * \par Overview * - Given a list of input elements and a binary reduction operator, a [prefix scan](http://en.wikipedia.org/wiki/Prefix_sum) * produces an output list where each element is computed to be the reduction * of the elements occurring earlier in the input list. Prefix sum * connotes a prefix scan with the addition operator. The term \em inclusive indicates * that the ith output reduction incorporates the ith input. * The term \em exclusive indicates the ith input is not incorporated into * the ith output reduction. * - \rowmajor * - BlockScan can be optionally specialized by algorithm to accommodate different workload profiles: * -# cub::BLOCK_SCAN_RAKING. An efficient (high throughput) "raking reduce-then-scan" prefix scan algorithm. [More...](\ref cub::BlockScanAlgorithm) * -# cub::BLOCK_SCAN_RAKING_MEMOIZE. Similar to cub::BLOCK_SCAN_RAKING, but having higher throughput at the expense of additional register pressure for intermediate storage. [More...](\ref cub::BlockScanAlgorithm) * -# cub::BLOCK_SCAN_WARP_SCANS. A quick (low latency) "tiled warpscans" prefix scan algorithm. [More...](\ref cub::BlockScanAlgorithm) * * \par Performance Considerations * - \granularity * - Uses special instructions when applicable (e.g., warp \p SHFL) * - Uses synchronization-free communication between warp lanes when applicable * - Invokes a minimal number of minimal block-wide synchronization barriers (only * one or two depending on algorithm selection) * - Incurs zero bank conflicts for most types * - Computation is slightly more efficient (i.e., having lower instruction overhead) for: * - Prefix sum variants (vs. generic scan) * - \blocksize * - See cub::BlockScanAlgorithm for performance details regarding algorithmic alternatives * * \par A Simple Example * \blockcollective{BlockScan} * \par * The code snippet below illustrates an exclusive prefix sum of 512 integer items that * are partitioned in a [blocked arrangement](index.html#sec5sec3) across 128 threads * where each thread owns 4 consecutive items. * \par * \code * #include // or equivalently * * __global__ void ExampleKernel(...) * { * // Specialize BlockScan for a 1D block of 128 threads on type int * typedef cub::BlockScan BlockScan; * * // Allocate shared memory for BlockScan * __shared__ typename BlockScan::TempStorage temp_storage; * * // Obtain a segment of consecutive items that are blocked across threads * int thread_data[4]; * ... * * // Collectively compute the block-wide exclusive prefix sum * BlockScan(temp_storage).ExclusiveSum(thread_data, thread_data); * * \endcode * \par * Suppose the set of input \p thread_data across the block of threads is * {[1,1,1,1], [1,1,1,1], ..., [1,1,1,1]}. * The corresponding output \p thread_data in those threads will be * {[0,1,2,3], [4,5,6,7], ..., [508,509,510,511]}. * */ template < typename T, int BLOCK_DIM_X, BlockScanAlgorithm ALGORITHM = BLOCK_SCAN_RAKING, int BLOCK_DIM_Y = 1, int BLOCK_DIM_Z = 1, int PTX_ARCH = CUB_PTX_ARCH> class BlockScan { private: /****************************************************************************** * Constants and type definitions ******************************************************************************/ /// Constants enum { /// The thread block size in threads BLOCK_THREADS = BLOCK_DIM_X * BLOCK_DIM_Y * BLOCK_DIM_Z, }; /** * Ensure the template parameterization meets the requirements of the * specified algorithm. Currently, the BLOCK_SCAN_WARP_SCANS policy * cannot be used with thread block sizes not a multiple of the * architectural warp size. */ static const BlockScanAlgorithm SAFE_ALGORITHM = ((ALGORITHM == BLOCK_SCAN_WARP_SCANS) && (BLOCK_THREADS % CUB_WARP_THREADS(PTX_ARCH) != 0)) ? BLOCK_SCAN_RAKING : ALGORITHM; typedef BlockScanWarpScans WarpScans; typedef BlockScanRaking Raking; /// Define the delegate type for the desired algorithm typedef typename If<(SAFE_ALGORITHM == BLOCK_SCAN_WARP_SCANS), WarpScans, Raking>::Type InternalBlockScan; /// Shared memory storage layout type for BlockScan typedef typename InternalBlockScan::TempStorage _TempStorage; /****************************************************************************** * Thread fields ******************************************************************************/ /// Shared storage reference _TempStorage &temp_storage; /// Linear thread-id unsigned int linear_tid; /****************************************************************************** * Utility methods ******************************************************************************/ /// Internal storage allocator __device__ __forceinline__ _TempStorage& PrivateStorage() { __shared__ _TempStorage private_storage; return private_storage; } /****************************************************************************** * Public types ******************************************************************************/ public: /// \smemstorage{BlockScan} struct TempStorage : Uninitialized<_TempStorage> {}; /******************************************************************//** * \name Collective constructors *********************************************************************/ //@{ /** * \brief Collective constructor using a private static allocation of shared memory as temporary storage. */ __device__ __forceinline__ BlockScan() : temp_storage(PrivateStorage()), linear_tid(RowMajorTid(BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z)) {} /** * \brief Collective constructor using the specified memory allocation as temporary storage. */ __device__ __forceinline__ BlockScan( TempStorage &temp_storage) ///< [in] Reference to memory allocation having layout type TempStorage : temp_storage(temp_storage.Alias()), linear_tid(RowMajorTid(BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z)) {} //@} end member group /******************************************************************//** * \name Exclusive prefix sum operations *********************************************************************/ //@{ /** * \brief Computes an exclusive block-wide prefix scan using addition (+) as the scan operator. Each thread contributes one input element. The value of 0 is applied as the initial value, and is assigned to \p output in thread0. * * \par * - \identityzero * - \rowmajor * - \smemreuse * * \par Snippet * The code snippet below illustrates an exclusive prefix sum of 128 integer items that * are partitioned across 128 threads. * \par * \code * #include // or equivalently * * __global__ void ExampleKernel(...) * { * // Specialize BlockScan for a 1D block of 128 threads on type int * typedef cub::BlockScan BlockScan; * * // Allocate shared memory for BlockScan * __shared__ typename BlockScan::TempStorage temp_storage; * * // Obtain input item for each thread * int thread_data; * ... * * // Collectively compute the block-wide exclusive prefix sum * BlockScan(temp_storage).ExclusiveSum(thread_data, thread_data); * * \endcode * \par * Suppose the set of input \p thread_data across the block of threads is 1, 1, ..., 1. The * corresponding output \p thread_data in those threads will be 0, 1, ..., 127. * */ __device__ __forceinline__ void ExclusiveSum( T input, ///< [in] Calling thread's input item T &output) ///< [out] Calling thread's output item (may be aliased to \p input) { T initial_value = 0; ExclusiveScan(input, output, initial_value, cub::Sum()); } /** * \brief Computes an exclusive block-wide prefix scan using addition (+) as the scan operator. Each thread contributes one input element. The value of 0 is applied as the initial value, and is assigned to \p output in thread0. Also provides every thread with the block-wide \p block_aggregate of all inputs. * * \par * - \identityzero * - \rowmajor * - \smemreuse * * \par Snippet * The code snippet below illustrates an exclusive prefix sum of 128 integer items that * are partitioned across 128 threads. * \par * \code * #include // or equivalently * * __global__ void ExampleKernel(...) * { * // Specialize BlockScan for a 1D block of 128 threads on type int * typedef cub::BlockScan BlockScan; * * // Allocate shared memory for BlockScan * __shared__ typename BlockScan::TempStorage temp_storage; * * // Obtain input item for each thread * int thread_data; * ... * * // Collectively compute the block-wide exclusive prefix sum * int block_aggregate; * BlockScan(temp_storage).ExclusiveSum(thread_data, thread_data, block_aggregate); * * \endcode * \par * Suppose the set of input \p thread_data across the block of threads is 1, 1, ..., 1. The * corresponding output \p thread_data in those threads will be 0, 1, ..., 127. * Furthermore the value \p 128 will be stored in \p block_aggregate for all threads. * */ __device__ __forceinline__ void ExclusiveSum( T input, ///< [in] Calling thread's input item T &output, ///< [out] Calling thread's output item (may be aliased to \p input) T &block_aggregate) ///< [out] block-wide aggregate reduction of input items { T initial_value = 0; ExclusiveScan(input, output, initial_value, cub::Sum(), block_aggregate); } /** * \brief Computes an exclusive block-wide prefix scan using addition (+) as the scan operator. Each thread contributes one input element. Instead of using 0 as the block-wide prefix, the call-back functor \p block_prefix_callback_op is invoked by the first warp in the block, and the value returned by lane0 in that warp is used as the "seed" value that logically prefixes the thread block's scan inputs. Also provides every thread with the block-wide \p block_aggregate of all inputs. * * \par * - \identityzero * - The \p block_prefix_callback_op functor must implement a member function T operator()(T block_aggregate). * The functor's input parameter \p block_aggregate is the same value also returned by the scan operation. * The functor will be invoked by the first warp of threads in the block, however only the return value from * lane0 is applied as the block-wide prefix. Can be stateful. * - \rowmajor * - \smemreuse * * \par Snippet * The code snippet below illustrates a single thread block that progressively * computes an exclusive prefix sum over multiple "tiles" of input using a * prefix functor to maintain a running total between block-wide scans. Each tile consists * of 128 integer items that are partitioned across 128 threads. * \par * \code * #include // or equivalently * * // A stateful callback functor that maintains a running prefix to be applied * // during consecutive scan operations. * struct BlockPrefixCallbackOp * { * // Running prefix * int running_total; * * // Constructor * __device__ BlockPrefixCallbackOp(int running_total) : running_total(running_total) {} * * // Callback operator to be entered by the first warp of threads in the block. * // Thread-0 is responsible for returning a value for seeding the block-wide scan. * __device__ int operator()(int block_aggregate) * { * int old_prefix = running_total; * running_total += block_aggregate; * return old_prefix; * } * }; * * __global__ void ExampleKernel(int *d_data, int num_items, ...) * { * // Specialize BlockScan for a 1D block of 128 threads * typedef cub::BlockScan BlockScan; * * // Allocate shared memory for BlockScan * __shared__ typename BlockScan::TempStorage temp_storage; * * // Initialize running total * BlockPrefixCallbackOp prefix_op(0); * * // Have the block iterate over segments of items * for (int block_offset = 0; block_offset < num_items; block_offset += 128) * { * // Load a segment of consecutive items that are blocked across threads * int thread_data = d_data[block_offset]; * * // Collectively compute the block-wide exclusive prefix sum * BlockScan(temp_storage).ExclusiveSum( * thread_data, thread_data, prefix_op); * CTA_SYNC(); * * // Store scanned items to output segment * d_data[block_offset] = thread_data; * } * \endcode * \par * Suppose the input \p d_data is 1, 1, 1, 1, 1, 1, 1, 1, .... * The corresponding output for the first segment will be 0, 1, ..., 127. * The output for the second segment will be 128, 129, ..., 255. * * \tparam BlockPrefixCallbackOp [inferred] Call-back functor type having member T operator()(T block_aggregate) */ template __device__ __forceinline__ void ExclusiveSum( T input, ///< [in] Calling thread's input item T &output, ///< [out] Calling thread's output item (may be aliased to \p input) BlockPrefixCallbackOp &block_prefix_callback_op) ///< [in-out] [warp0 only] Call-back functor for specifying a block-wide prefix to be applied to the logical input sequence. { ExclusiveScan(input, output, cub::Sum(), block_prefix_callback_op); } //@} end member group /******************************************************************//** * \name Exclusive prefix sum operations (multiple data per thread) *********************************************************************/ //@{ /** * \brief Computes an exclusive block-wide prefix scan using addition (+) as the scan operator. Each thread contributes an array of consecutive input elements. The value of 0 is applied as the initial value, and is assigned to \p output[0] in thread0. * * \par * - \identityzero * - \blocked * - \granularity * - \smemreuse * * \par Snippet * The code snippet below illustrates an exclusive prefix sum of 512 integer items that * are partitioned in a [blocked arrangement](index.html#sec5sec3) across 128 threads * where each thread owns 4 consecutive items. * \par * \code * #include // or equivalently * * __global__ void ExampleKernel(...) * { * // Specialize BlockScan for a 1D block of 128 threads on type int * typedef cub::BlockScan BlockScan; * * // Allocate shared memory for BlockScan * __shared__ typename BlockScan::TempStorage temp_storage; * * // Obtain a segment of consecutive items that are blocked across threads * int thread_data[4]; * ... * * // Collectively compute the block-wide exclusive prefix sum * BlockScan(temp_storage).ExclusiveSum(thread_data, thread_data); * * \endcode * \par * Suppose the set of input \p thread_data across the block of threads is { [1,1,1,1], [1,1,1,1], ..., [1,1,1,1] }. The * corresponding output \p thread_data in those threads will be { [0,1,2,3], [4,5,6,7], ..., [508,509,510,511] }. * * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. */ template __device__ __forceinline__ void ExclusiveSum( T (&input)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items T (&output)[ITEMS_PER_THREAD]) ///< [out] Calling thread's output items (may be aliased to \p input) { T initial_value = 0; ExclusiveScan(input, output, initial_value, cub::Sum()); } /** * \brief Computes an exclusive block-wide prefix scan using addition (+) as the scan operator. Each thread contributes an array of consecutive input elements. The value of 0 is applied as the initial value, and is assigned to \p output[0] in thread0. Also provides every thread with the block-wide \p block_aggregate of all inputs. * * \par * - \identityzero * - \blocked * - \granularity * - \smemreuse * * \par Snippet * The code snippet below illustrates an exclusive prefix sum of 512 integer items that * are partitioned in a [blocked arrangement](index.html#sec5sec3) across 128 threads * where each thread owns 4 consecutive items. * \par * \code * #include // or equivalently * * __global__ void ExampleKernel(...) * { * // Specialize BlockScan for a 1D block of 128 threads on type int * typedef cub::BlockScan BlockScan; * * // Allocate shared memory for BlockScan * __shared__ typename BlockScan::TempStorage temp_storage; * * // Obtain a segment of consecutive items that are blocked across threads * int thread_data[4]; * ... * * // Collectively compute the block-wide exclusive prefix sum * int block_aggregate; * BlockScan(temp_storage).ExclusiveSum(thread_data, thread_data, block_aggregate); * * \endcode * \par * Suppose the set of input \p thread_data across the block of threads is { [1,1,1,1], [1,1,1,1], ..., [1,1,1,1] }. The * corresponding output \p thread_data in those threads will be { [0,1,2,3], [4,5,6,7], ..., [508,509,510,511] }. * Furthermore the value \p 512 will be stored in \p block_aggregate for all threads. * * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. */ template __device__ __forceinline__ void ExclusiveSum( T (&input)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items T (&output)[ITEMS_PER_THREAD], ///< [out] Calling thread's output items (may be aliased to \p input) T &block_aggregate) ///< [out] block-wide aggregate reduction of input items { // Reduce consecutive thread items in registers T initial_value = 0; ExclusiveScan(input, output, initial_value, cub::Sum(), block_aggregate); } /** * \brief Computes an exclusive block-wide prefix scan using addition (+) as the scan operator. Each thread contributes an array of consecutive input elements. Instead of using 0 as the block-wide prefix, the call-back functor \p block_prefix_callback_op is invoked by the first warp in the block, and the value returned by lane0 in that warp is used as the "seed" value that logically prefixes the thread block's scan inputs. Also provides every thread with the block-wide \p block_aggregate of all inputs. * * \par * - \identityzero * - The \p block_prefix_callback_op functor must implement a member function T operator()(T block_aggregate). * The functor's input parameter \p block_aggregate is the same value also returned by the scan operation. * The functor will be invoked by the first warp of threads in the block, however only the return value from * lane0 is applied as the block-wide prefix. Can be stateful. * - \blocked * - \granularity * - \smemreuse * * \par Snippet * The code snippet below illustrates a single thread block that progressively * computes an exclusive prefix sum over multiple "tiles" of input using a * prefix functor to maintain a running total between block-wide scans. Each tile consists * of 512 integer items that are partitioned in a [blocked arrangement](index.html#sec5sec3) * across 128 threads where each thread owns 4 consecutive items. * \par * \code * #include // or equivalently * * // A stateful callback functor that maintains a running prefix to be applied * // during consecutive scan operations. * struct BlockPrefixCallbackOp * { * // Running prefix * int running_total; * * // Constructor * __device__ BlockPrefixCallbackOp(int running_total) : running_total(running_total) {} * * // Callback operator to be entered by the first warp of threads in the block. * // Thread-0 is responsible for returning a value for seeding the block-wide scan. * __device__ int operator()(int block_aggregate) * { * int old_prefix = running_total; * running_total += block_aggregate; * return old_prefix; * } * }; * * __global__ void ExampleKernel(int *d_data, int num_items, ...) * { * // Specialize BlockLoad, BlockStore, and BlockScan for a 1D block of 128 threads, 4 ints per thread * typedef cub::BlockLoad BlockLoad; * typedef cub::BlockStore BlockStore; * typedef cub::BlockScan BlockScan; * * // Allocate aliased shared memory for BlockLoad, BlockStore, and BlockScan * __shared__ union { * typename BlockLoad::TempStorage load; * typename BlockScan::TempStorage scan; * typename BlockStore::TempStorage store; * } temp_storage; * * // Initialize running total * BlockPrefixCallbackOp prefix_op(0); * * // Have the block iterate over segments of items * for (int block_offset = 0; block_offset < num_items; block_offset += 128 * 4) * { * // Load a segment of consecutive items that are blocked across threads * int thread_data[4]; * BlockLoad(temp_storage.load).Load(d_data + block_offset, thread_data); * CTA_SYNC(); * * // Collectively compute the block-wide exclusive prefix sum * int block_aggregate; * BlockScan(temp_storage.scan).ExclusiveSum( * thread_data, thread_data, prefix_op); * CTA_SYNC(); * * // Store scanned items to output segment * BlockStore(temp_storage.store).Store(d_data + block_offset, thread_data); * CTA_SYNC(); * } * \endcode * \par * Suppose the input \p d_data is 1, 1, 1, 1, 1, 1, 1, 1, .... * The corresponding output for the first segment will be 0, 1, 2, 3, ..., 510, 511. * The output for the second segment will be 512, 513, 514, 515, ..., 1022, 1023. * * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. * \tparam BlockPrefixCallbackOp [inferred] Call-back functor type having member T operator()(T block_aggregate) */ template < int ITEMS_PER_THREAD, typename BlockPrefixCallbackOp> __device__ __forceinline__ void ExclusiveSum( T (&input)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items T (&output)[ITEMS_PER_THREAD], ///< [out] Calling thread's output items (may be aliased to \p input) BlockPrefixCallbackOp &block_prefix_callback_op) ///< [in-out] [warp0 only] Call-back functor for specifying a block-wide prefix to be applied to the logical input sequence. { ExclusiveScan(input, output, cub::Sum(), block_prefix_callback_op); } //@} end member group // Exclusive prefix sums /******************************************************************//** * \name Exclusive prefix scan operations *********************************************************************/ //@{ /** * \brief Computes an exclusive block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. * * \par * - Supports non-commutative scan operators. * - \rowmajor * - \smemreuse * * \par Snippet * The code snippet below illustrates an exclusive prefix max scan of 128 integer items that * are partitioned across 128 threads. * \par * \code * #include // or equivalently * * __global__ void ExampleKernel(...) * { * // Specialize BlockScan for a 1D block of 128 threads on type int * typedef cub::BlockScan BlockScan; * * // Allocate shared memory for BlockScan * __shared__ typename BlockScan::TempStorage temp_storage; * * // Obtain input item for each thread * int thread_data; * ... * * // Collectively compute the block-wide exclusive prefix max scan * BlockScan(temp_storage).ExclusiveScan(thread_data, thread_data, INT_MIN, cub::Max()); * * \endcode * \par * Suppose the set of input \p thread_data across the block of threads is 0, -1, 2, -3, ..., 126, -127. The * corresponding output \p thread_data in those threads will be INT_MIN, 0, 0, 2, ..., 124, 126. * * \tparam ScanOp [inferred] Binary scan functor type having member T operator()(const T &a, const T &b) */ template __device__ __forceinline__ void ExclusiveScan( T input, ///< [in] Calling thread's input item T &output, ///< [out] Calling thread's output item (may be aliased to \p input) T initial_value, ///< [in] Initial value to seed the exclusive scan (and is assigned to \p output[0] in thread0) ScanOp scan_op) ///< [in] Binary scan functor { InternalBlockScan(temp_storage).ExclusiveScan(input, output, initial_value, scan_op); } /** * \brief Computes an exclusive block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. Also provides every thread with the block-wide \p block_aggregate of all inputs. * * \par * - Supports non-commutative scan operators. * - \rowmajor * - \smemreuse * * \par Snippet * The code snippet below illustrates an exclusive prefix max scan of 128 integer items that * are partitioned across 128 threads. * \par * \code * #include // or equivalently * * __global__ void ExampleKernel(...) * { * // Specialize BlockScan for a 1D block of 128 threads on type int * typedef cub::BlockScan BlockScan; * * // Allocate shared memory for BlockScan * __shared__ typename BlockScan::TempStorage temp_storage; * * // Obtain input item for each thread * int thread_data; * ... * * // Collectively compute the block-wide exclusive prefix max scan * int block_aggregate; * BlockScan(temp_storage).ExclusiveScan(thread_data, thread_data, INT_MIN, cub::Max(), block_aggregate); * * \endcode * \par * Suppose the set of input \p thread_data across the block of threads is 0, -1, 2, -3, ..., 126, -127. The * corresponding output \p thread_data in those threads will be INT_MIN, 0, 0, 2, ..., 124, 126. * Furthermore the value \p 126 will be stored in \p block_aggregate for all threads. * * \tparam ScanOp [inferred] Binary scan functor type having member T operator()(const T &a, const T &b) */ template __device__ __forceinline__ void ExclusiveScan( T input, ///< [in] Calling thread's input items T &output, ///< [out] Calling thread's output items (may be aliased to \p input) T initial_value, ///< [in] Initial value to seed the exclusive scan (and is assigned to \p output[0] in thread0) ScanOp scan_op, ///< [in] Binary scan functor T &block_aggregate) ///< [out] block-wide aggregate reduction of input items { InternalBlockScan(temp_storage).ExclusiveScan(input, output, initial_value, scan_op, block_aggregate); } /** * \brief Computes an exclusive block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. the call-back functor \p block_prefix_callback_op is invoked by the first warp in the block, and the value returned by lane0 in that warp is used as the "seed" value that logically prefixes the thread block's scan inputs. Also provides every thread with the block-wide \p block_aggregate of all inputs. * * \par * - The \p block_prefix_callback_op functor must implement a member function T operator()(T block_aggregate). * The functor's input parameter \p block_aggregate is the same value also returned by the scan operation. * The functor will be invoked by the first warp of threads in the block, however only the return value from * lane0 is applied as the block-wide prefix. Can be stateful. * - Supports non-commutative scan operators. * - \rowmajor * - \smemreuse * * \par Snippet * The code snippet below illustrates a single thread block that progressively * computes an exclusive prefix max scan over multiple "tiles" of input using a * prefix functor to maintain a running total between block-wide scans. Each tile consists * of 128 integer items that are partitioned across 128 threads. * \par * \code * #include // or equivalently * * // A stateful callback functor that maintains a running prefix to be applied * // during consecutive scan operations. * struct BlockPrefixCallbackOp * { * // Running prefix * int running_total; * * // Constructor * __device__ BlockPrefixCallbackOp(int running_total) : running_total(running_total) {} * * // Callback operator to be entered by the first warp of threads in the block. * // Thread-0 is responsible for returning a value for seeding the block-wide scan. * __device__ int operator()(int block_aggregate) * { * int old_prefix = running_total; * running_total = (block_aggregate > old_prefix) ? block_aggregate : old_prefix; * return old_prefix; * } * }; * * __global__ void ExampleKernel(int *d_data, int num_items, ...) * { * // Specialize BlockScan for a 1D block of 128 threads * typedef cub::BlockScan BlockScan; * * // Allocate shared memory for BlockScan * __shared__ typename BlockScan::TempStorage temp_storage; * * // Initialize running total * BlockPrefixCallbackOp prefix_op(INT_MIN); * * // Have the block iterate over segments of items * for (int block_offset = 0; block_offset < num_items; block_offset += 128) * { * // Load a segment of consecutive items that are blocked across threads * int thread_data = d_data[block_offset]; * * // Collectively compute the block-wide exclusive prefix max scan * BlockScan(temp_storage).ExclusiveScan( * thread_data, thread_data, INT_MIN, cub::Max(), prefix_op); * CTA_SYNC(); * * // Store scanned items to output segment * d_data[block_offset] = thread_data; * } * \endcode * \par * Suppose the input \p d_data is 0, -1, 2, -3, 4, -5, .... * The corresponding output for the first segment will be INT_MIN, 0, 0, 2, ..., 124, 126. * The output for the second segment will be 126, 128, 128, 130, ..., 252, 254. * * \tparam ScanOp [inferred] Binary scan functor type having member T operator()(const T &a, const T &b) * \tparam BlockPrefixCallbackOp [inferred] Call-back functor type having member T operator()(T block_aggregate) */ template < typename ScanOp, typename BlockPrefixCallbackOp> __device__ __forceinline__ void ExclusiveScan( T input, ///< [in] Calling thread's input item T &output, ///< [out] Calling thread's output item (may be aliased to \p input) ScanOp scan_op, ///< [in] Binary scan functor BlockPrefixCallbackOp &block_prefix_callback_op) ///< [in-out] [warp0 only] Call-back functor for specifying a block-wide prefix to be applied to the logical input sequence. { InternalBlockScan(temp_storage).ExclusiveScan(input, output, scan_op, block_prefix_callback_op); } //@} end member group // Inclusive prefix sums /******************************************************************//** * \name Exclusive prefix scan operations (multiple data per thread) *********************************************************************/ //@{ /** * \brief Computes an exclusive block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes an array of consecutive input elements. * * \par * - Supports non-commutative scan operators. * - \blocked * - \granularity * - \smemreuse * * \par Snippet * The code snippet below illustrates an exclusive prefix max scan of 512 integer items that * are partitioned in a [blocked arrangement](index.html#sec5sec3) across 128 threads * where each thread owns 4 consecutive items. * \par * \code * #include // or equivalently * * __global__ void ExampleKernel(...) * { * // Specialize BlockScan for a 1D block of 128 threads on type int * typedef cub::BlockScan BlockScan; * * // Allocate shared memory for BlockScan * __shared__ typename BlockScan::TempStorage temp_storage; * * // Obtain a segment of consecutive items that are blocked across threads * int thread_data[4]; * ... * * // Collectively compute the block-wide exclusive prefix max scan * BlockScan(temp_storage).ExclusiveScan(thread_data, thread_data, INT_MIN, cub::Max()); * * \endcode * \par * Suppose the set of input \p thread_data across the block of threads is * { [0,-1,2,-3], [4,-5,6,-7], ..., [508,-509,510,-511] }. * The corresponding output \p thread_data in those threads will be * { [INT_MIN,0,0,2], [2,4,4,6], ..., [506,508,508,510] }. * * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. * \tparam ScanOp [inferred] Binary scan functor type having member T operator()(const T &a, const T &b) */ template < int ITEMS_PER_THREAD, typename ScanOp> __device__ __forceinline__ void ExclusiveScan( T (&input)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items T (&output)[ITEMS_PER_THREAD], ///< [out] Calling thread's output items (may be aliased to \p input) T initial_value, ///< [in] Initial value to seed the exclusive scan (and is assigned to \p output[0] in thread0) ScanOp scan_op) ///< [in] Binary scan functor { // Reduce consecutive thread items in registers T thread_prefix = internal::ThreadReduce(input, scan_op); // Exclusive thread block-scan ExclusiveScan(thread_prefix, thread_prefix, initial_value, scan_op); // Exclusive scan in registers with prefix as seed internal::ThreadScanExclusive(input, output, scan_op, thread_prefix); } /** * \brief Computes an exclusive block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes an array of consecutive input elements. Also provides every thread with the block-wide \p block_aggregate of all inputs. * * \par * - Supports non-commutative scan operators. * - \blocked * - \granularity * - \smemreuse * * \par Snippet * The code snippet below illustrates an exclusive prefix max scan of 512 integer items that * are partitioned in a [blocked arrangement](index.html#sec5sec3) across 128 threads * where each thread owns 4 consecutive items. * \par * \code * #include // or equivalently * * __global__ void ExampleKernel(...) * { * // Specialize BlockScan for a 1D block of 128 threads on type int * typedef cub::BlockScan BlockScan; * * // Allocate shared memory for BlockScan * __shared__ typename BlockScan::TempStorage temp_storage; * * // Obtain a segment of consecutive items that are blocked across threads * int thread_data[4]; * ... * * // Collectively compute the block-wide exclusive prefix max scan * int block_aggregate; * BlockScan(temp_storage).ExclusiveScan(thread_data, thread_data, INT_MIN, cub::Max(), block_aggregate); * * \endcode * \par * Suppose the set of input \p thread_data across the block of threads is { [0,-1,2,-3], [4,-5,6,-7], ..., [508,-509,510,-511] }. The * corresponding output \p thread_data in those threads will be { [INT_MIN,0,0,2], [2,4,4,6], ..., [506,508,508,510] }. * Furthermore the value \p 510 will be stored in \p block_aggregate for all threads. * * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. * \tparam ScanOp [inferred] Binary scan functor type having member T operator()(const T &a, const T &b) */ template < int ITEMS_PER_THREAD, typename ScanOp> __device__ __forceinline__ void ExclusiveScan( T (&input)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items T (&output)[ITEMS_PER_THREAD], ///< [out] Calling thread's output items (may be aliased to \p input) T initial_value, ///< [in] Initial value to seed the exclusive scan (and is assigned to \p output[0] in thread0) ScanOp scan_op, ///< [in] Binary scan functor T &block_aggregate) ///< [out] block-wide aggregate reduction of input items { // Reduce consecutive thread items in registers T thread_prefix = internal::ThreadReduce(input, scan_op); // Exclusive thread block-scan ExclusiveScan(thread_prefix, thread_prefix, initial_value, scan_op, block_aggregate); // Exclusive scan in registers with prefix as seed internal::ThreadScanExclusive(input, output, scan_op, thread_prefix); } /** * \brief Computes an exclusive block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes an array of consecutive input elements. the call-back functor \p block_prefix_callback_op is invoked by the first warp in the block, and the value returned by lane0 in that warp is used as the "seed" value that logically prefixes the thread block's scan inputs. Also provides every thread with the block-wide \p block_aggregate of all inputs. * * \par * - The \p block_prefix_callback_op functor must implement a member function T operator()(T block_aggregate). * The functor's input parameter \p block_aggregate is the same value also returned by the scan operation. * The functor will be invoked by the first warp of threads in the block, however only the return value from * lane0 is applied as the block-wide prefix. Can be stateful. * - Supports non-commutative scan operators. * - \blocked * - \granularity * - \smemreuse * * \par Snippet * The code snippet below illustrates a single thread block that progressively * computes an exclusive prefix max scan over multiple "tiles" of input using a * prefix functor to maintain a running total between block-wide scans. Each tile consists * of 128 integer items that are partitioned across 128 threads. * \par * \code * #include // or equivalently * * // A stateful callback functor that maintains a running prefix to be applied * // during consecutive scan operations. * struct BlockPrefixCallbackOp * { * // Running prefix * int running_total; * * // Constructor * __device__ BlockPrefixCallbackOp(int running_total) : running_total(running_total) {} * * // Callback operator to be entered by the first warp of threads in the block. * // Thread-0 is responsible for returning a value for seeding the block-wide scan. * __device__ int operator()(int block_aggregate) * { * int old_prefix = running_total; * running_total = (block_aggregate > old_prefix) ? block_aggregate : old_prefix; * return old_prefix; * } * }; * * __global__ void ExampleKernel(int *d_data, int num_items, ...) * { * // Specialize BlockLoad, BlockStore, and BlockScan for a 1D block of 128 threads, 4 ints per thread * typedef cub::BlockLoad BlockLoad; * typedef cub::BlockStore BlockStore; * typedef cub::BlockScan BlockScan; * * // Allocate aliased shared memory for BlockLoad, BlockStore, and BlockScan * __shared__ union { * typename BlockLoad::TempStorage load; * typename BlockScan::TempStorage scan; * typename BlockStore::TempStorage store; * } temp_storage; * * // Initialize running total * BlockPrefixCallbackOp prefix_op(0); * * // Have the block iterate over segments of items * for (int block_offset = 0; block_offset < num_items; block_offset += 128 * 4) * { * // Load a segment of consecutive items that are blocked across threads * int thread_data[4]; * BlockLoad(temp_storage.load).Load(d_data + block_offset, thread_data); * CTA_SYNC(); * * // Collectively compute the block-wide exclusive prefix max scan * BlockScan(temp_storage.scan).ExclusiveScan( * thread_data, thread_data, INT_MIN, cub::Max(), prefix_op); * CTA_SYNC(); * * // Store scanned items to output segment * BlockStore(temp_storage.store).Store(d_data + block_offset, thread_data); * CTA_SYNC(); * } * \endcode * \par * Suppose the input \p d_data is 0, -1, 2, -3, 4, -5, .... * The corresponding output for the first segment will be INT_MIN, 0, 0, 2, 2, 4, ..., 508, 510. * The output for the second segment will be 510, 512, 512, 514, 514, 516, ..., 1020, 1022. * * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. * \tparam ScanOp [inferred] Binary scan functor type having member T operator()(const T &a, const T &b) * \tparam BlockPrefixCallbackOp [inferred] Call-back functor type having member T operator()(T block_aggregate) */ template < int ITEMS_PER_THREAD, typename ScanOp, typename BlockPrefixCallbackOp> __device__ __forceinline__ void ExclusiveScan( T (&input)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items T (&output)[ITEMS_PER_THREAD], ///< [out] Calling thread's output items (may be aliased to \p input) ScanOp scan_op, ///< [in] Binary scan functor BlockPrefixCallbackOp &block_prefix_callback_op) ///< [in-out] [warp0 only] Call-back functor for specifying a block-wide prefix to be applied to the logical input sequence. { // Reduce consecutive thread items in registers T thread_prefix = internal::ThreadReduce(input, scan_op); // Exclusive thread block-scan ExclusiveScan(thread_prefix, thread_prefix, scan_op, block_prefix_callback_op); // Exclusive scan in registers with prefix as seed internal::ThreadScanExclusive(input, output, scan_op, thread_prefix); } //@} end member group #ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document no-initial-value scans /******************************************************************//** * \name Exclusive prefix scan operations (no initial value, single datum per thread) *********************************************************************/ //@{ /** * \brief Computes an exclusive block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. With no initial value, the output computed for thread0 is undefined. * * \par * - Supports non-commutative scan operators. * - \rowmajor * - \smemreuse * * \tparam ScanOp [inferred] Binary scan functor type having member T operator()(const T &a, const T &b) */ template __device__ __forceinline__ void ExclusiveScan( T input, ///< [in] Calling thread's input item T &output, ///< [out] Calling thread's output item (may be aliased to \p input) ScanOp scan_op) ///< [in] Binary scan functor { InternalBlockScan(temp_storage).ExclusiveScan(input, output, scan_op); } /** * \brief Computes an exclusive block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. Also provides every thread with the block-wide \p block_aggregate of all inputs. With no initial value, the output computed for thread0 is undefined. * * \par * - Supports non-commutative scan operators. * - \rowmajor * - \smemreuse * * \tparam ScanOp [inferred] Binary scan functor type having member T operator()(const T &a, const T &b) */ template __device__ __forceinline__ void ExclusiveScan( T input, ///< [in] Calling thread's input item T &output, ///< [out] Calling thread's output item (may be aliased to \p input) ScanOp scan_op, ///< [in] Binary scan functor T &block_aggregate) ///< [out] block-wide aggregate reduction of input items { InternalBlockScan(temp_storage).ExclusiveScan(input, output, scan_op, block_aggregate); } //@} end member group /******************************************************************//** * \name Exclusive prefix scan operations (no initial value, multiple data per thread) *********************************************************************/ //@{ /** * \brief Computes an exclusive block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes an array of consecutive input elements. With no initial value, the output computed for thread0 is undefined. * * \par * - Supports non-commutative scan operators. * - \blocked * - \granularity * - \smemreuse * * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. * \tparam ScanOp [inferred] Binary scan functor type having member T operator()(const T &a, const T &b) */ template < int ITEMS_PER_THREAD, typename ScanOp> __device__ __forceinline__ void ExclusiveScan( T (&input)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items T (&output)[ITEMS_PER_THREAD], ///< [out] Calling thread's output items (may be aliased to \p input) ScanOp scan_op) ///< [in] Binary scan functor { // Reduce consecutive thread items in registers T thread_partial = internal::ThreadReduce(input, scan_op); // Exclusive thread block-scan ExclusiveScan(thread_partial, thread_partial, scan_op); // Exclusive scan in registers with prefix internal::ThreadScanExclusive(input, output, scan_op, thread_partial, (linear_tid != 0)); } /** * \brief Computes an exclusive block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes an array of consecutive input elements. Also provides every thread with the block-wide \p block_aggregate of all inputs. With no initial value, the output computed for thread0 is undefined. * * \par * - Supports non-commutative scan operators. * - \blocked * - \granularity * - \smemreuse * * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. * \tparam ScanOp [inferred] Binary scan functor type having member T operator()(const T &a, const T &b) */ template < int ITEMS_PER_THREAD, typename ScanOp> __device__ __forceinline__ void ExclusiveScan( T (&input)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items T (&output)[ITEMS_PER_THREAD], ///< [out] Calling thread's output items (may be aliased to \p input) ScanOp scan_op, ///< [in] Binary scan functor T &block_aggregate) ///< [out] block-wide aggregate reduction of input items { // Reduce consecutive thread items in registers T thread_partial = internal::ThreadReduce(input, scan_op); // Exclusive thread block-scan ExclusiveScan(thread_partial, thread_partial, scan_op, block_aggregate); // Exclusive scan in registers with prefix internal::ThreadScanExclusive(input, output, scan_op, thread_partial, (linear_tid != 0)); } //@} end member group #endif // DOXYGEN_SHOULD_SKIP_THIS // Do not document no-initial-value scans /******************************************************************//** * \name Inclusive prefix sum operations *********************************************************************/ //@{ /** * \brief Computes an inclusive block-wide prefix scan using addition (+) as the scan operator. Each thread contributes one input element. * * \par * - \rowmajor * - \smemreuse * * \par Snippet * The code snippet below illustrates an inclusive prefix sum of 128 integer items that * are partitioned across 128 threads. * \par * \code * #include // or equivalently * * __global__ void ExampleKernel(...) * { * // Specialize BlockScan for a 1D block of 128 threads on type int * typedef cub::BlockScan BlockScan; * * // Allocate shared memory for BlockScan * __shared__ typename BlockScan::TempStorage temp_storage; * * // Obtain input item for each thread * int thread_data; * ... * * // Collectively compute the block-wide inclusive prefix sum * BlockScan(temp_storage).InclusiveSum(thread_data, thread_data); * * \endcode * \par * Suppose the set of input \p thread_data across the block of threads is 1, 1, ..., 1. The * corresponding output \p thread_data in those threads will be 1, 2, ..., 128. * */ __device__ __forceinline__ void InclusiveSum( T input, ///< [in] Calling thread's input item T &output) ///< [out] Calling thread's output item (may be aliased to \p input) { InclusiveScan(input, output, cub::Sum()); } /** * \brief Computes an inclusive block-wide prefix scan using addition (+) as the scan operator. Each thread contributes one input element. Also provides every thread with the block-wide \p block_aggregate of all inputs. * * \par * - \rowmajor * - \smemreuse * * \par Snippet * The code snippet below illustrates an inclusive prefix sum of 128 integer items that * are partitioned across 128 threads. * \par * \code * #include // or equivalently * * __global__ void ExampleKernel(...) * { * // Specialize BlockScan for a 1D block of 128 threads on type int * typedef cub::BlockScan BlockScan; * * // Allocate shared memory for BlockScan * __shared__ typename BlockScan::TempStorage temp_storage; * * // Obtain input item for each thread * int thread_data; * ... * * // Collectively compute the block-wide inclusive prefix sum * int block_aggregate; * BlockScan(temp_storage).InclusiveSum(thread_data, thread_data, block_aggregate); * * \endcode * \par * Suppose the set of input \p thread_data across the block of threads is 1, 1, ..., 1. The * corresponding output \p thread_data in those threads will be 1, 2, ..., 128. * Furthermore the value \p 128 will be stored in \p block_aggregate for all threads. * */ __device__ __forceinline__ void InclusiveSum( T input, ///< [in] Calling thread's input item T &output, ///< [out] Calling thread's output item (may be aliased to \p input) T &block_aggregate) ///< [out] block-wide aggregate reduction of input items { InclusiveScan(input, output, cub::Sum(), block_aggregate); } /** * \brief Computes an inclusive block-wide prefix scan using addition (+) as the scan operator. Each thread contributes one input element. Instead of using 0 as the block-wide prefix, the call-back functor \p block_prefix_callback_op is invoked by the first warp in the block, and the value returned by lane0 in that warp is used as the "seed" value that logically prefixes the thread block's scan inputs. Also provides every thread with the block-wide \p block_aggregate of all inputs. * * \par * - The \p block_prefix_callback_op functor must implement a member function T operator()(T block_aggregate). * The functor's input parameter \p block_aggregate is the same value also returned by the scan operation. * The functor will be invoked by the first warp of threads in the block, however only the return value from * lane0 is applied as the block-wide prefix. Can be stateful. * - \rowmajor * - \smemreuse * * \par Snippet * The code snippet below illustrates a single thread block that progressively * computes an inclusive prefix sum over multiple "tiles" of input using a * prefix functor to maintain a running total between block-wide scans. Each tile consists * of 128 integer items that are partitioned across 128 threads. * \par * \code * #include // or equivalently * * // A stateful callback functor that maintains a running prefix to be applied * // during consecutive scan operations. * struct BlockPrefixCallbackOp * { * // Running prefix * int running_total; * * // Constructor * __device__ BlockPrefixCallbackOp(int running_total) : running_total(running_total) {} * * // Callback operator to be entered by the first warp of threads in the block. * // Thread-0 is responsible for returning a value for seeding the block-wide scan. * __device__ int operator()(int block_aggregate) * { * int old_prefix = running_total; * running_total += block_aggregate; * return old_prefix; * } * }; * * __global__ void ExampleKernel(int *d_data, int num_items, ...) * { * // Specialize BlockScan for a 1D block of 128 threads * typedef cub::BlockScan BlockScan; * * // Allocate shared memory for BlockScan * __shared__ typename BlockScan::TempStorage temp_storage; * * // Initialize running total * BlockPrefixCallbackOp prefix_op(0); * * // Have the block iterate over segments of items * for (int block_offset = 0; block_offset < num_items; block_offset += 128) * { * // Load a segment of consecutive items that are blocked across threads * int thread_data = d_data[block_offset]; * * // Collectively compute the block-wide inclusive prefix sum * BlockScan(temp_storage).InclusiveSum( * thread_data, thread_data, prefix_op); * CTA_SYNC(); * * // Store scanned items to output segment * d_data[block_offset] = thread_data; * } * \endcode * \par * Suppose the input \p d_data is 1, 1, 1, 1, 1, 1, 1, 1, .... * The corresponding output for the first segment will be 1, 2, ..., 128. * The output for the second segment will be 129, 130, ..., 256. * * \tparam BlockPrefixCallbackOp [inferred] Call-back functor type having member T operator()(T block_aggregate) */ template __device__ __forceinline__ void InclusiveSum( T input, ///< [in] Calling thread's input item T &output, ///< [out] Calling thread's output item (may be aliased to \p input) BlockPrefixCallbackOp &block_prefix_callback_op) ///< [in-out] [warp0 only] Call-back functor for specifying a block-wide prefix to be applied to the logical input sequence. { InclusiveScan(input, output, cub::Sum(), block_prefix_callback_op); } //@} end member group /******************************************************************//** * \name Inclusive prefix sum operations (multiple data per thread) *********************************************************************/ //@{ /** * \brief Computes an inclusive block-wide prefix scan using addition (+) as the scan operator. Each thread contributes an array of consecutive input elements. * * \par * - \blocked * - \granularity * - \smemreuse * * \par Snippet * The code snippet below illustrates an inclusive prefix sum of 512 integer items that * are partitioned in a [blocked arrangement](index.html#sec5sec3) across 128 threads * where each thread owns 4 consecutive items. * \par * \code * #include // or equivalently * * __global__ void ExampleKernel(...) * { * // Specialize BlockScan for a 1D block of 128 threads on type int * typedef cub::BlockScan BlockScan; * * // Allocate shared memory for BlockScan * __shared__ typename BlockScan::TempStorage temp_storage; * * // Obtain a segment of consecutive items that are blocked across threads * int thread_data[4]; * ... * * // Collectively compute the block-wide inclusive prefix sum * BlockScan(temp_storage).InclusiveSum(thread_data, thread_data); * * \endcode * \par * Suppose the set of input \p thread_data across the block of threads is { [1,1,1,1], [1,1,1,1], ..., [1,1,1,1] }. The * corresponding output \p thread_data in those threads will be { [1,2,3,4], [5,6,7,8], ..., [509,510,511,512] }. * * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. */ template __device__ __forceinline__ void InclusiveSum( T (&input)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items T (&output)[ITEMS_PER_THREAD]) ///< [out] Calling thread's output items (may be aliased to \p input) { if (ITEMS_PER_THREAD == 1) { InclusiveSum(input[0], output[0]); } else { // Reduce consecutive thread items in registers Sum scan_op; T thread_prefix = internal::ThreadReduce(input, scan_op); // Exclusive thread block-scan ExclusiveSum(thread_prefix, thread_prefix); // Inclusive scan in registers with prefix as seed internal::ThreadScanInclusive(input, output, scan_op, thread_prefix, (linear_tid != 0)); } } /** * \brief Computes an inclusive block-wide prefix scan using addition (+) as the scan operator. Each thread contributes an array of consecutive input elements. Also provides every thread with the block-wide \p block_aggregate of all inputs. * * \par * - \blocked * - \granularity * - \smemreuse * * \par Snippet * The code snippet below illustrates an inclusive prefix sum of 512 integer items that * are partitioned in a [blocked arrangement](index.html#sec5sec3) across 128 threads * where each thread owns 4 consecutive items. * \par * \code * #include // or equivalently * * __global__ void ExampleKernel(...) * { * // Specialize BlockScan for a 1D block of 128 threads on type int * typedef cub::BlockScan BlockScan; * * // Allocate shared memory for BlockScan * __shared__ typename BlockScan::TempStorage temp_storage; * * // Obtain a segment of consecutive items that are blocked across threads * int thread_data[4]; * ... * * // Collectively compute the block-wide inclusive prefix sum * int block_aggregate; * BlockScan(temp_storage).InclusiveSum(thread_data, thread_data, block_aggregate); * * \endcode * \par * Suppose the set of input \p thread_data across the block of threads is * { [1,1,1,1], [1,1,1,1], ..., [1,1,1,1] }. The * corresponding output \p thread_data in those threads will be * { [1,2,3,4], [5,6,7,8], ..., [509,510,511,512] }. * Furthermore the value \p 512 will be stored in \p block_aggregate for all threads. * * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. * \tparam ScanOp [inferred] Binary scan functor type having member T operator()(const T &a, const T &b) */ template __device__ __forceinline__ void InclusiveSum( T (&input)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items T (&output)[ITEMS_PER_THREAD], ///< [out] Calling thread's output items (may be aliased to \p input) T &block_aggregate) ///< [out] block-wide aggregate reduction of input items { if (ITEMS_PER_THREAD == 1) { InclusiveSum(input[0], output[0], block_aggregate); } else { // Reduce consecutive thread items in registers Sum scan_op; T thread_prefix = internal::ThreadReduce(input, scan_op); // Exclusive thread block-scan ExclusiveSum(thread_prefix, thread_prefix, block_aggregate); // Inclusive scan in registers with prefix as seed internal::ThreadScanInclusive(input, output, scan_op, thread_prefix, (linear_tid != 0)); } } /** * \brief Computes an inclusive block-wide prefix scan using addition (+) as the scan operator. Each thread contributes an array of consecutive input elements. Instead of using 0 as the block-wide prefix, the call-back functor \p block_prefix_callback_op is invoked by the first warp in the block, and the value returned by lane0 in that warp is used as the "seed" value that logically prefixes the thread block's scan inputs. Also provides every thread with the block-wide \p block_aggregate of all inputs. * * \par * - The \p block_prefix_callback_op functor must implement a member function T operator()(T block_aggregate). * The functor's input parameter \p block_aggregate is the same value also returned by the scan operation. * The functor will be invoked by the first warp of threads in the block, however only the return value from * lane0 is applied as the block-wide prefix. Can be stateful. * - \blocked * - \granularity * - \smemreuse * * \par Snippet * The code snippet below illustrates a single thread block that progressively * computes an inclusive prefix sum over multiple "tiles" of input using a * prefix functor to maintain a running total between block-wide scans. Each tile consists * of 512 integer items that are partitioned in a [blocked arrangement](index.html#sec5sec3) * across 128 threads where each thread owns 4 consecutive items. * \par * \code * #include // or equivalently * * // A stateful callback functor that maintains a running prefix to be applied * // during consecutive scan operations. * struct BlockPrefixCallbackOp * { * // Running prefix * int running_total; * * // Constructor * __device__ BlockPrefixCallbackOp(int running_total) : running_total(running_total) {} * * // Callback operator to be entered by the first warp of threads in the block. * // Thread-0 is responsible for returning a value for seeding the block-wide scan. * __device__ int operator()(int block_aggregate) * { * int old_prefix = running_total; * running_total += block_aggregate; * return old_prefix; * } * }; * * __global__ void ExampleKernel(int *d_data, int num_items, ...) * { * // Specialize BlockLoad, BlockStore, and BlockScan for a 1D block of 128 threads, 4 ints per thread * typedef cub::BlockLoad BlockLoad; * typedef cub::BlockStore BlockStore; * typedef cub::BlockScan BlockScan; * * // Allocate aliased shared memory for BlockLoad, BlockStore, and BlockScan * __shared__ union { * typename BlockLoad::TempStorage load; * typename BlockScan::TempStorage scan; * typename BlockStore::TempStorage store; * } temp_storage; * * // Initialize running total * BlockPrefixCallbackOp prefix_op(0); * * // Have the block iterate over segments of items * for (int block_offset = 0; block_offset < num_items; block_offset += 128 * 4) * { * // Load a segment of consecutive items that are blocked across threads * int thread_data[4]; * BlockLoad(temp_storage.load).Load(d_data + block_offset, thread_data); * CTA_SYNC(); * * // Collectively compute the block-wide inclusive prefix sum * BlockScan(temp_storage.scan).IncluisveSum( * thread_data, thread_data, prefix_op); * CTA_SYNC(); * * // Store scanned items to output segment * BlockStore(temp_storage.store).Store(d_data + block_offset, thread_data); * CTA_SYNC(); * } * \endcode * \par * Suppose the input \p d_data is 1, 1, 1, 1, 1, 1, 1, 1, .... * The corresponding output for the first segment will be 1, 2, 3, 4, ..., 511, 512. * The output for the second segment will be 513, 514, 515, 516, ..., 1023, 1024. * * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. * \tparam BlockPrefixCallbackOp [inferred] Call-back functor type having member T operator()(T block_aggregate) */ template < int ITEMS_PER_THREAD, typename BlockPrefixCallbackOp> __device__ __forceinline__ void InclusiveSum( T (&input)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items T (&output)[ITEMS_PER_THREAD], ///< [out] Calling thread's output items (may be aliased to \p input) BlockPrefixCallbackOp &block_prefix_callback_op) ///< [in-out] [warp0 only] Call-back functor for specifying a block-wide prefix to be applied to the logical input sequence. { if (ITEMS_PER_THREAD == 1) { InclusiveSum(input[0], output[0], block_prefix_callback_op); } else { // Reduce consecutive thread items in registers Sum scan_op; T thread_prefix = internal::ThreadReduce(input, scan_op); // Exclusive thread block-scan ExclusiveSum(thread_prefix, thread_prefix, block_prefix_callback_op); // Inclusive scan in registers with prefix as seed internal::ThreadScanInclusive(input, output, scan_op, thread_prefix); } } //@} end member group /******************************************************************//** * \name Inclusive prefix scan operations *********************************************************************/ //@{ /** * \brief Computes an inclusive block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. * * \par * - Supports non-commutative scan operators. * - \rowmajor * - \smemreuse * * \par Snippet * The code snippet below illustrates an inclusive prefix max scan of 128 integer items that * are partitioned across 128 threads. * \par * \code * #include // or equivalently * * __global__ void ExampleKernel(...) * { * // Specialize BlockScan for a 1D block of 128 threads on type int * typedef cub::BlockScan BlockScan; * * // Allocate shared memory for BlockScan * __shared__ typename BlockScan::TempStorage temp_storage; * * // Obtain input item for each thread * int thread_data; * ... * * // Collectively compute the block-wide inclusive prefix max scan * BlockScan(temp_storage).InclusiveScan(thread_data, thread_data, cub::Max()); * * \endcode * \par * Suppose the set of input \p thread_data across the block of threads is 0, -1, 2, -3, ..., 126, -127. The * corresponding output \p thread_data in those threads will be 0, 0, 2, 2, ..., 126, 126. * * \tparam ScanOp [inferred] Binary scan functor type having member T operator()(const T &a, const T &b) */ template __device__ __forceinline__ void InclusiveScan( T input, ///< [in] Calling thread's input item T &output, ///< [out] Calling thread's output item (may be aliased to \p input) ScanOp scan_op) ///< [in] Binary scan functor { InternalBlockScan(temp_storage).InclusiveScan(input, output, scan_op); } /** * \brief Computes an inclusive block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. Also provides every thread with the block-wide \p block_aggregate of all inputs. * * \par * - Supports non-commutative scan operators. * - \rowmajor * - \smemreuse * * \par Snippet * The code snippet below illustrates an inclusive prefix max scan of 128 integer items that * are partitioned across 128 threads. * \par * \code * #include // or equivalently * * __global__ void ExampleKernel(...) * { * // Specialize BlockScan for a 1D block of 128 threads on type int * typedef cub::BlockScan BlockScan; * * // Allocate shared memory for BlockScan * __shared__ typename BlockScan::TempStorage temp_storage; * * // Obtain input item for each thread * int thread_data; * ... * * // Collectively compute the block-wide inclusive prefix max scan * int block_aggregate; * BlockScan(temp_storage).InclusiveScan(thread_data, thread_data, cub::Max(), block_aggregate); * * \endcode * \par * Suppose the set of input \p thread_data across the block of threads is 0, -1, 2, -3, ..., 126, -127. The * corresponding output \p thread_data in those threads will be 0, 0, 2, 2, ..., 126, 126. * Furthermore the value \p 126 will be stored in \p block_aggregate for all threads. * * \tparam ScanOp [inferred] Binary scan functor type having member T operator()(const T &a, const T &b) */ template __device__ __forceinline__ void InclusiveScan( T input, ///< [in] Calling thread's input item T &output, ///< [out] Calling thread's output item (may be aliased to \p input) ScanOp scan_op, ///< [in] Binary scan functor T &block_aggregate) ///< [out] block-wide aggregate reduction of input items { InternalBlockScan(temp_storage).InclusiveScan(input, output, scan_op, block_aggregate); } /** * \brief Computes an inclusive block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. the call-back functor \p block_prefix_callback_op is invoked by the first warp in the block, and the value returned by lane0 in that warp is used as the "seed" value that logically prefixes the thread block's scan inputs. Also provides every thread with the block-wide \p block_aggregate of all inputs. * * \par * - The \p block_prefix_callback_op functor must implement a member function T operator()(T block_aggregate). * The functor's input parameter \p block_aggregate is the same value also returned by the scan operation. * The functor will be invoked by the first warp of threads in the block, however only the return value from * lane0 is applied as the block-wide prefix. Can be stateful. * - Supports non-commutative scan operators. * - \rowmajor * - \smemreuse * * \par Snippet * The code snippet below illustrates a single thread block that progressively * computes an inclusive prefix max scan over multiple "tiles" of input using a * prefix functor to maintain a running total between block-wide scans. Each tile consists * of 128 integer items that are partitioned across 128 threads. * \par * \code * #include // or equivalently * * // A stateful callback functor that maintains a running prefix to be applied * // during consecutive scan operations. * struct BlockPrefixCallbackOp * { * // Running prefix * int running_total; * * // Constructor * __device__ BlockPrefixCallbackOp(int running_total) : running_total(running_total) {} * * // Callback operator to be entered by the first warp of threads in the block. * // Thread-0 is responsible for returning a value for seeding the block-wide scan. * __device__ int operator()(int block_aggregate) * { * int old_prefix = running_total; * running_total = (block_aggregate > old_prefix) ? block_aggregate : old_prefix; * return old_prefix; * } * }; * * __global__ void ExampleKernel(int *d_data, int num_items, ...) * { * // Specialize BlockScan for a 1D block of 128 threads * typedef cub::BlockScan BlockScan; * * // Allocate shared memory for BlockScan * __shared__ typename BlockScan::TempStorage temp_storage; * * // Initialize running total * BlockPrefixCallbackOp prefix_op(INT_MIN); * * // Have the block iterate over segments of items * for (int block_offset = 0; block_offset < num_items; block_offset += 128) * { * // Load a segment of consecutive items that are blocked across threads * int thread_data = d_data[block_offset]; * * // Collectively compute the block-wide inclusive prefix max scan * BlockScan(temp_storage).InclusiveScan( * thread_data, thread_data, cub::Max(), prefix_op); * CTA_SYNC(); * * // Store scanned items to output segment * d_data[block_offset] = thread_data; * } * \endcode * \par * Suppose the input \p d_data is 0, -1, 2, -3, 4, -5, .... * The corresponding output for the first segment will be 0, 0, 2, 2, ..., 126, 126. * The output for the second segment will be 128, 128, 130, 130, ..., 254, 254. * * \tparam ScanOp [inferred] Binary scan functor type having member T operator()(const T &a, const T &b) * \tparam BlockPrefixCallbackOp [inferred] Call-back functor type having member T operator()(T block_aggregate) */ template < typename ScanOp, typename BlockPrefixCallbackOp> __device__ __forceinline__ void InclusiveScan( T input, ///< [in] Calling thread's input item T &output, ///< [out] Calling thread's output item (may be aliased to \p input) ScanOp scan_op, ///< [in] Binary scan functor BlockPrefixCallbackOp &block_prefix_callback_op) ///< [in-out] [warp0 only] Call-back functor for specifying a block-wide prefix to be applied to the logical input sequence. { InternalBlockScan(temp_storage).InclusiveScan(input, output, scan_op, block_prefix_callback_op); } //@} end member group /******************************************************************//** * \name Inclusive prefix scan operations (multiple data per thread) *********************************************************************/ //@{ /** * \brief Computes an inclusive block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes an array of consecutive input elements. * * \par * - Supports non-commutative scan operators. * - \blocked * - \granularity * - \smemreuse * * \par Snippet * The code snippet below illustrates an inclusive prefix max scan of 512 integer items that * are partitioned in a [blocked arrangement](index.html#sec5sec3) across 128 threads * where each thread owns 4 consecutive items. * \par * \code * #include // or equivalently * * __global__ void ExampleKernel(...) * { * // Specialize BlockScan for a 1D block of 128 threads on type int * typedef cub::BlockScan BlockScan; * * // Allocate shared memory for BlockScan * __shared__ typename BlockScan::TempStorage temp_storage; * * // Obtain a segment of consecutive items that are blocked across threads * int thread_data[4]; * ... * * // Collectively compute the block-wide inclusive prefix max scan * BlockScan(temp_storage).InclusiveScan(thread_data, thread_data, cub::Max()); * * \endcode * \par * Suppose the set of input \p thread_data across the block of threads is { [0,-1,2,-3], [4,-5,6,-7], ..., [508,-509,510,-511] }. The * corresponding output \p thread_data in those threads will be { [0,0,2,2], [4,4,6,6], ..., [508,508,510,510] }. * * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. * \tparam ScanOp [inferred] Binary scan functor type having member T operator()(const T &a, const T &b) */ template < int ITEMS_PER_THREAD, typename ScanOp> __device__ __forceinline__ void InclusiveScan( T (&input)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items T (&output)[ITEMS_PER_THREAD], ///< [out] Calling thread's output items (may be aliased to \p input) ScanOp scan_op) ///< [in] Binary scan functor { if (ITEMS_PER_THREAD == 1) { InclusiveScan(input[0], output[0], scan_op); } else { // Reduce consecutive thread items in registers T thread_prefix = internal::ThreadReduce(input, scan_op); // Exclusive thread block-scan ExclusiveScan(thread_prefix, thread_prefix, scan_op); // Inclusive scan in registers with prefix as seed (first thread does not seed) internal::ThreadScanInclusive(input, output, scan_op, thread_prefix, (linear_tid != 0)); } } /** * \brief Computes an inclusive block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes an array of consecutive input elements. Also provides every thread with the block-wide \p block_aggregate of all inputs. * * \par * - Supports non-commutative scan operators. * - \blocked * - \granularity * - \smemreuse * * \par Snippet * The code snippet below illustrates an inclusive prefix max scan of 512 integer items that * are partitioned in a [blocked arrangement](index.html#sec5sec3) across 128 threads * where each thread owns 4 consecutive items. * \par * \code * #include // or equivalently * * __global__ void ExampleKernel(...) * { * // Specialize BlockScan for a 1D block of 128 threads on type int * typedef cub::BlockScan BlockScan; * * // Allocate shared memory for BlockScan * __shared__ typename BlockScan::TempStorage temp_storage; * * // Obtain a segment of consecutive items that are blocked across threads * int thread_data[4]; * ... * * // Collectively compute the block-wide inclusive prefix max scan * int block_aggregate; * BlockScan(temp_storage).InclusiveScan(thread_data, thread_data, cub::Max(), block_aggregate); * * \endcode * \par * Suppose the set of input \p thread_data across the block of threads is * { [0,-1,2,-3], [4,-5,6,-7], ..., [508,-509,510,-511] }. * The corresponding output \p thread_data in those threads will be * { [0,0,2,2], [4,4,6,6], ..., [508,508,510,510] }. * Furthermore the value \p 510 will be stored in \p block_aggregate for all threads. * * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. * \tparam ScanOp [inferred] Binary scan functor type having member T operator()(const T &a, const T &b) */ template < int ITEMS_PER_THREAD, typename ScanOp> __device__ __forceinline__ void InclusiveScan( T (&input)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items T (&output)[ITEMS_PER_THREAD], ///< [out] Calling thread's output items (may be aliased to \p input) ScanOp scan_op, ///< [in] Binary scan functor T &block_aggregate) ///< [out] block-wide aggregate reduction of input items { if (ITEMS_PER_THREAD == 1) { InclusiveScan(input[0], output[0], scan_op, block_aggregate); } else { // Reduce consecutive thread items in registers T thread_prefix = internal::ThreadReduce(input, scan_op); // Exclusive thread block-scan (with no initial value) ExclusiveScan(thread_prefix, thread_prefix, scan_op, block_aggregate); // Inclusive scan in registers with prefix as seed (first thread does not seed) internal::ThreadScanInclusive(input, output, scan_op, thread_prefix, (linear_tid != 0)); } } /** * \brief Computes an inclusive block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes an array of consecutive input elements. the call-back functor \p block_prefix_callback_op is invoked by the first warp in the block, and the value returned by lane0 in that warp is used as the "seed" value that logically prefixes the thread block's scan inputs. Also provides every thread with the block-wide \p block_aggregate of all inputs. * * \par * - The \p block_prefix_callback_op functor must implement a member function T operator()(T block_aggregate). * The functor's input parameter \p block_aggregate is the same value also returned by the scan operation. * The functor will be invoked by the first warp of threads in the block, however only the return value from * lane0 is applied as the block-wide prefix. Can be stateful. * - Supports non-commutative scan operators. * - \blocked * - \granularity * - \smemreuse * * \par Snippet * The code snippet below illustrates a single thread block that progressively * computes an inclusive prefix max scan over multiple "tiles" of input using a * prefix functor to maintain a running total between block-wide scans. Each tile consists * of 128 integer items that are partitioned across 128 threads. * \par * \code * #include // or equivalently * * // A stateful callback functor that maintains a running prefix to be applied * // during consecutive scan operations. * struct BlockPrefixCallbackOp * { * // Running prefix * int running_total; * * // Constructor * __device__ BlockPrefixCallbackOp(int running_total) : running_total(running_total) {} * * // Callback operator to be entered by the first warp of threads in the block. * // Thread-0 is responsible for returning a value for seeding the block-wide scan. * __device__ int operator()(int block_aggregate) * { * int old_prefix = running_total; * running_total = (block_aggregate > old_prefix) ? block_aggregate : old_prefix; * return old_prefix; * } * }; * * __global__ void ExampleKernel(int *d_data, int num_items, ...) * { * // Specialize BlockLoad, BlockStore, and BlockScan for a 1D block of 128 threads, 4 ints per thread * typedef cub::BlockLoad BlockLoad; * typedef cub::BlockStore BlockStore; * typedef cub::BlockScan BlockScan; * * // Allocate aliased shared memory for BlockLoad, BlockStore, and BlockScan * __shared__ union { * typename BlockLoad::TempStorage load; * typename BlockScan::TempStorage scan; * typename BlockStore::TempStorage store; * } temp_storage; * * // Initialize running total * BlockPrefixCallbackOp prefix_op(0); * * // Have the block iterate over segments of items * for (int block_offset = 0; block_offset < num_items; block_offset += 128 * 4) * { * // Load a segment of consecutive items that are blocked across threads * int thread_data[4]; * BlockLoad(temp_storage.load).Load(d_data + block_offset, thread_data); * CTA_SYNC(); * * // Collectively compute the block-wide inclusive prefix max scan * BlockScan(temp_storage.scan).InclusiveScan( * thread_data, thread_data, cub::Max(), prefix_op); * CTA_SYNC(); * * // Store scanned items to output segment * BlockStore(temp_storage.store).Store(d_data + block_offset, thread_data); * CTA_SYNC(); * } * \endcode * \par * Suppose the input \p d_data is 0, -1, 2, -3, 4, -5, .... * The corresponding output for the first segment will be 0, 0, 2, 2, 4, 4, ..., 510, 510. * The output for the second segment will be 512, 512, 514, 514, 516, 516, ..., 1022, 1022. * * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. * \tparam ScanOp [inferred] Binary scan functor type having member T operator()(const T &a, const T &b) * \tparam BlockPrefixCallbackOp [inferred] Call-back functor type having member T operator()(T block_aggregate) */ template < int ITEMS_PER_THREAD, typename ScanOp, typename BlockPrefixCallbackOp> __device__ __forceinline__ void InclusiveScan( T (&input)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items T (&output)[ITEMS_PER_THREAD], ///< [out] Calling thread's output items (may be aliased to \p input) ScanOp scan_op, ///< [in] Binary scan functor BlockPrefixCallbackOp &block_prefix_callback_op) ///< [in-out] [warp0 only] Call-back functor for specifying a block-wide prefix to be applied to the logical input sequence. { if (ITEMS_PER_THREAD == 1) { InclusiveScan(input[0], output[0], scan_op, block_prefix_callback_op); } else { // Reduce consecutive thread items in registers T thread_prefix = internal::ThreadReduce(input, scan_op); // Exclusive thread block-scan ExclusiveScan(thread_prefix, thread_prefix, scan_op, block_prefix_callback_op); // Inclusive scan in registers with prefix as seed internal::ThreadScanInclusive(input, output, scan_op, thread_prefix); } } //@} end member group }; /** * \example example_block_scan.cu */ } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/acc/cuda/cub/block/block_shuffle.cuh000066400000000000000000000272741411340063500224220ustar00rootroot00000000000000/****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * The cub::BlockShuffle class provides [collective](index.html#sec0) methods for shuffling data partitioned across a CUDA thread block. */ #pragma once #include "../util_arch.cuh" #include "../util_ptx.cuh" #include "../util_macro.cuh" #include "../util_type.cuh" #include "../util_namespace.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /** * \brief The BlockShuffle class provides [collective](index.html#sec0) methods for shuffling data partitioned across a CUDA thread block. * \ingroup BlockModule * * \tparam T The data type to be exchanged. * \tparam BLOCK_DIM_X The thread block length in threads along the X dimension * \tparam BLOCK_DIM_Y [optional] The thread block length in threads along the Y dimension (default: 1) * \tparam BLOCK_DIM_Z [optional] The thread block length in threads along the Z dimension (default: 1) * \tparam PTX_ARCH [optional] \ptxversion * * \par Overview * It is commonplace for blocks of threads to rearrange data items between * threads. The BlockShuffle abstraction allows threads to efficiently shift items * either (a) up to their successor or (b) down to their predecessor. * */ template < typename T, int BLOCK_DIM_X, int BLOCK_DIM_Y = 1, int BLOCK_DIM_Z = 1, int PTX_ARCH = CUB_PTX_ARCH> class BlockShuffle { private: /****************************************************************************** * Constants ******************************************************************************/ enum { BLOCK_THREADS = BLOCK_DIM_X * BLOCK_DIM_Y * BLOCK_DIM_Z, LOG_WARP_THREADS = CUB_LOG_WARP_THREADS(PTX_ARCH), WARP_THREADS = 1 << LOG_WARP_THREADS, WARPS = (BLOCK_THREADS + WARP_THREADS - 1) / WARP_THREADS, }; /****************************************************************************** * Type definitions ******************************************************************************/ /// Shared memory storage layout type (last element from each thread's input) struct _TempStorage { T prev[BLOCK_THREADS]; T next[BLOCK_THREADS]; }; public: /// \smemstorage{BlockShuffle} struct TempStorage : Uninitialized<_TempStorage> {}; private: /****************************************************************************** * Thread fields ******************************************************************************/ /// Shared storage reference _TempStorage &temp_storage; /// Linear thread-id unsigned int linear_tid; /****************************************************************************** * Utility methods ******************************************************************************/ /// Internal storage allocator __device__ __forceinline__ _TempStorage& PrivateStorage() { __shared__ _TempStorage private_storage; return private_storage; } public: /******************************************************************//** * \name Collective constructors *********************************************************************/ //@{ /** * \brief Collective constructor using a private static allocation of shared memory as temporary storage. */ __device__ __forceinline__ BlockShuffle() : temp_storage(PrivateStorage()), linear_tid(RowMajorTid(BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z)) {} /** * \brief Collective constructor using the specified memory allocation as temporary storage. */ __device__ __forceinline__ BlockShuffle( TempStorage &temp_storage) ///< [in] Reference to memory allocation having layout type TempStorage : temp_storage(temp_storage.Alias()), linear_tid(RowMajorTid(BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z)) {} //@} end member group /******************************************************************//** * \name Shuffle movement *********************************************************************/ //@{ /** * \brief Each threadi obtains the \p input provided by threadi+distance. The offset \p distance may be negative. * * \par * - \smemreuse */ __device__ __forceinline__ void Offset( T input, ///< [in] The input item from the calling thread (threadi) T& output, ///< [out] The \p input item from the successor (or predecessor) thread threadi+distance (may be aliased to \p input). This value is only updated for for threadi when 0 <= (i + \p distance) < BLOCK_THREADS-1 int distance = 1) ///< [in] Offset distance (may be negative) { temp_storage[linear_tid].prev = input; CTA_SYNC(); if ((linear_tid + distance >= 0) && (linear_tid + distance < BLOCK_THREADS)) output = temp_storage[linear_tid + distance].prev; } /** * \brief Each threadi obtains the \p input provided by threadi+distance. * * \par * - \smemreuse */ __device__ __forceinline__ void Rotate( T input, ///< [in] The calling thread's input item T& output, ///< [out] The \p input item from thread thread(i+distance>)% (may be aliased to \p input). This value is not updated for threadBLOCK_THREADS-1 unsigned int distance = 1) ///< [in] Offset distance (0 < \p distance < BLOCK_THREADS) { temp_storage[linear_tid].prev = input; CTA_SYNC(); unsigned int offset = threadIdx.x + distance; if (offset >= BLOCK_THREADS) offset -= BLOCK_THREADS; output = temp_storage[offset].prev; } /** * \brief The thread block rotates its [blocked arrangement](index.html#sec5sec3) of \p input items, shifting it up by one item * * \par * - \blocked * - \granularity * - \smemreuse */ template __device__ __forceinline__ void Up( T (&input)[ITEMS_PER_THREAD], ///< [in] The calling thread's input items T (&prev)[ITEMS_PER_THREAD]) ///< [out] The corresponding predecessor items (may be aliased to \p input). The item \p prev[0] is not updated for thread0. { temp_storage[linear_tid].prev = input[ITEMS_PER_THREAD - 1]; CTA_SYNC(); #pragma unroll for (int ITEM = ITEMS_PER_THREAD - 1; ITEM > 0; --ITEM) prev[ITEM] = input[ITEM - 1]; if (linear_tid > 0) prev[0] = temp_storage[linear_tid - 1].prev; } /** * \brief The thread block rotates its [blocked arrangement](index.html#sec5sec3) of \p input items, shifting it up by one item. All threads receive the \p input provided by threadBLOCK_THREADS-1. * * \par * - \blocked * - \granularity * - \smemreuse */ template __device__ __forceinline__ void Up( T (&input)[ITEMS_PER_THREAD], ///< [in] The calling thread's input items T (&prev)[ITEMS_PER_THREAD], ///< [out] The corresponding predecessor items (may be aliased to \p input). The item \p prev[0] is not updated for thread0. T &block_suffix) ///< [out] The item \p input[ITEMS_PER_THREAD-1] from threadBLOCK_THREADS-1, provided to all threads { Up(input, prev); block_suffix = temp_storage[BLOCK_THREADS - 1].prev; } /** * \brief The thread block rotates its [blocked arrangement](index.html#sec5sec3) of \p input items, shifting it down by one item * * \par * - \blocked * - \granularity * - \smemreuse */ template __device__ __forceinline__ void Down( T (&input)[ITEMS_PER_THREAD], ///< [in] The calling thread's input items T (&prev)[ITEMS_PER_THREAD]) ///< [out] The corresponding predecessor items (may be aliased to \p input). The value \p prev[0] is not updated for threadBLOCK_THREADS-1. { temp_storage[linear_tid].prev = input[ITEMS_PER_THREAD - 1]; CTA_SYNC(); #pragma unroll for (int ITEM = ITEMS_PER_THREAD - 1; ITEM > 0; --ITEM) prev[ITEM] = input[ITEM - 1]; if (linear_tid > 0) prev[0] = temp_storage[linear_tid - 1].prev; } /** * \brief The thread block rotates its [blocked arrangement](index.html#sec5sec3) of input items, shifting it down by one item. All threads receive \p input[0] provided by thread0. * * \par * - \blocked * - \granularity * - \smemreuse */ template __device__ __forceinline__ void Down( T (&input)[ITEMS_PER_THREAD], ///< [in] The calling thread's input items T (&prev)[ITEMS_PER_THREAD], ///< [out] The corresponding predecessor items (may be aliased to \p input). The value \p prev[0] is not updated for threadBLOCK_THREADS-1. T &block_prefix) ///< [out] The item \p input[0] from thread0, provided to all threads { Up(input, prev); block_prefix = temp_storage[BLOCK_THREADS - 1].prev; } //@} end member group }; } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/acc/cuda/cub/block/block_store.cuh000066400000000000000000001207511411340063500221140ustar00rootroot00000000000000/****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * Operations for writing linear segments of data from the CUDA thread block */ #pragma once #include #include "block_exchange.cuh" #include "../util_ptx.cuh" #include "../util_macro.cuh" #include "../util_type.cuh" #include "../util_namespace.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /** * \addtogroup UtilIo * @{ */ /******************************************************************//** * \name Blocked arrangement I/O (direct) *********************************************************************/ //@{ /** * \brief Store a blocked arrangement of items across a thread block into a linear segment of items. * * \blocked * * \tparam T [inferred] The data type to store. * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. * \tparam OutputIteratorT [inferred] The random-access iterator type for output \iterator. */ template < typename T, int ITEMS_PER_THREAD, typename OutputIteratorT> __device__ __forceinline__ void StoreDirectBlocked( int linear_tid, ///< [in] A suitable 1D thread-identifier for the calling thread (e.g., (threadIdx.y * blockDim.x) + linear_tid for 2D thread blocks) OutputIteratorT block_itr, ///< [in] The thread block's base output iterator for storing to T (&items)[ITEMS_PER_THREAD]) ///< [in] Data to store { OutputIteratorT thread_itr = block_itr + (linear_tid * ITEMS_PER_THREAD); // Store directly in thread-blocked order #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { thread_itr[ITEM] = items[ITEM]; } } /** * \brief Store a blocked arrangement of items across a thread block into a linear segment of items, guarded by range * * \blocked * * \tparam T [inferred] The data type to store. * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. * \tparam OutputIteratorT [inferred] The random-access iterator type for output \iterator. */ template < typename T, int ITEMS_PER_THREAD, typename OutputIteratorT> __device__ __forceinline__ void StoreDirectBlocked( int linear_tid, ///< [in] A suitable 1D thread-identifier for the calling thread (e.g., (threadIdx.y * blockDim.x) + linear_tid for 2D thread blocks) OutputIteratorT block_itr, ///< [in] The thread block's base output iterator for storing to T (&items)[ITEMS_PER_THREAD], ///< [in] Data to store int valid_items) ///< [in] Number of valid items to write { OutputIteratorT thread_itr = block_itr + (linear_tid * ITEMS_PER_THREAD); // Store directly in thread-blocked order #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { if (ITEM + (linear_tid * ITEMS_PER_THREAD) < valid_items) { thread_itr[ITEM] = items[ITEM]; } } } /** * \brief Store a blocked arrangement of items across a thread block into a linear segment of items. * * \blocked * * The output offset (\p block_ptr + \p block_offset) must be quad-item aligned, * which is the default starting offset returned by \p cudaMalloc() * * \par * The following conditions will prevent vectorization and storing will fall back to cub::BLOCK_STORE_DIRECT: * - \p ITEMS_PER_THREAD is odd * - The data type \p T is not a built-in primitive or CUDA vector type (e.g., \p short, \p int2, \p double, \p float2, etc.) * * \tparam T [inferred] The data type to store. * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. * */ template < typename T, int ITEMS_PER_THREAD> __device__ __forceinline__ void StoreDirectBlockedVectorized( int linear_tid, ///< [in] A suitable 1D thread-identifier for the calling thread (e.g., (threadIdx.y * blockDim.x) + linear_tid for 2D thread blocks) T *block_ptr, ///< [in] Input pointer for storing from T (&items)[ITEMS_PER_THREAD]) ///< [in] Data to store { enum { // Maximum CUDA vector size is 4 elements MAX_VEC_SIZE = CUB_MIN(4, ITEMS_PER_THREAD), // Vector size must be a power of two and an even divisor of the items per thread VEC_SIZE = ((((MAX_VEC_SIZE - 1) & MAX_VEC_SIZE) == 0) && ((ITEMS_PER_THREAD % MAX_VEC_SIZE) == 0)) ? MAX_VEC_SIZE : 1, VECTORS_PER_THREAD = ITEMS_PER_THREAD / VEC_SIZE, }; // Vector type typedef typename CubVector::Type Vector; // Alias global pointer Vector *block_ptr_vectors = reinterpret_cast(const_cast(block_ptr)); // Alias pointers (use "raw" array here which should get optimized away to prevent conservative PTXAS lmem spilling) Vector raw_vector[VECTORS_PER_THREAD]; T *raw_items = reinterpret_cast(raw_vector); // Copy #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { raw_items[ITEM] = items[ITEM]; } // Direct-store using vector types StoreDirectBlocked(linear_tid, block_ptr_vectors, raw_vector); } //@} end member group /******************************************************************//** * \name Striped arrangement I/O (direct) *********************************************************************/ //@{ /** * \brief Store a striped arrangement of data across the thread block into a linear segment of items. * * \striped * * \tparam BLOCK_THREADS The thread block size in threads * \tparam T [inferred] The data type to store. * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. * \tparam OutputIteratorT [inferred] The random-access iterator type for output \iterator. */ template < int BLOCK_THREADS, typename T, int ITEMS_PER_THREAD, typename OutputIteratorT> __device__ __forceinline__ void StoreDirectStriped( int linear_tid, ///< [in] A suitable 1D thread-identifier for the calling thread (e.g., (threadIdx.y * blockDim.x) + linear_tid for 2D thread blocks) OutputIteratorT block_itr, ///< [in] The thread block's base output iterator for storing to T (&items)[ITEMS_PER_THREAD]) ///< [in] Data to store { OutputIteratorT thread_itr = block_itr + linear_tid; // Store directly in striped order #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { thread_itr[(ITEM * BLOCK_THREADS)] = items[ITEM]; } } /** * \brief Store a striped arrangement of data across the thread block into a linear segment of items, guarded by range * * \striped * * \tparam BLOCK_THREADS The thread block size in threads * \tparam T [inferred] The data type to store. * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. * \tparam OutputIteratorT [inferred] The random-access iterator type for output \iterator. */ template < int BLOCK_THREADS, typename T, int ITEMS_PER_THREAD, typename OutputIteratorT> __device__ __forceinline__ void StoreDirectStriped( int linear_tid, ///< [in] A suitable 1D thread-identifier for the calling thread (e.g., (threadIdx.y * blockDim.x) + linear_tid for 2D thread blocks) OutputIteratorT block_itr, ///< [in] The thread block's base output iterator for storing to T (&items)[ITEMS_PER_THREAD], ///< [in] Data to store int valid_items) ///< [in] Number of valid items to write { OutputIteratorT thread_itr = block_itr + linear_tid; // Store directly in striped order #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { if ((ITEM * BLOCK_THREADS) + linear_tid < valid_items) { thread_itr[(ITEM * BLOCK_THREADS)] = items[ITEM]; } } } //@} end member group /******************************************************************//** * \name Warp-striped arrangement I/O (direct) *********************************************************************/ //@{ /** * \brief Store a warp-striped arrangement of data across the thread block into a linear segment of items. * * \warpstriped * * \par Usage Considerations * The number of threads in the thread block must be a multiple of the architecture's warp size. * * \tparam T [inferred] The data type to store. * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. * \tparam OutputIteratorT [inferred] The random-access iterator type for output \iterator. */ template < typename T, int ITEMS_PER_THREAD, typename OutputIteratorT> __device__ __forceinline__ void StoreDirectWarpStriped( int linear_tid, ///< [in] A suitable 1D thread-identifier for the calling thread (e.g., (threadIdx.y * blockDim.x) + linear_tid for 2D thread blocks) OutputIteratorT block_itr, ///< [in] The thread block's base output iterator for storing to T (&items)[ITEMS_PER_THREAD]) ///< [out] Data to load { int tid = linear_tid & (CUB_PTX_WARP_THREADS - 1); int wid = linear_tid >> CUB_PTX_LOG_WARP_THREADS; int warp_offset = wid * CUB_PTX_WARP_THREADS * ITEMS_PER_THREAD; OutputIteratorT thread_itr = block_itr + warp_offset + tid; // Store directly in warp-striped order #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { thread_itr[(ITEM * CUB_PTX_WARP_THREADS)] = items[ITEM]; } } /** * \brief Store a warp-striped arrangement of data across the thread block into a linear segment of items, guarded by range * * \warpstriped * * \par Usage Considerations * The number of threads in the thread block must be a multiple of the architecture's warp size. * * \tparam T [inferred] The data type to store. * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. * \tparam OutputIteratorT [inferred] The random-access iterator type for output \iterator. */ template < typename T, int ITEMS_PER_THREAD, typename OutputIteratorT> __device__ __forceinline__ void StoreDirectWarpStriped( int linear_tid, ///< [in] A suitable 1D thread-identifier for the calling thread (e.g., (threadIdx.y * blockDim.x) + linear_tid for 2D thread blocks) OutputIteratorT block_itr, ///< [in] The thread block's base output iterator for storing to T (&items)[ITEMS_PER_THREAD], ///< [in] Data to store int valid_items) ///< [in] Number of valid items to write { int tid = linear_tid & (CUB_PTX_WARP_THREADS - 1); int wid = linear_tid >> CUB_PTX_LOG_WARP_THREADS; int warp_offset = wid * CUB_PTX_WARP_THREADS * ITEMS_PER_THREAD; OutputIteratorT thread_itr = block_itr + warp_offset + tid; // Store directly in warp-striped order #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { if (warp_offset + tid + (ITEM * CUB_PTX_WARP_THREADS) < valid_items) { thread_itr[(ITEM * CUB_PTX_WARP_THREADS)] = items[ITEM]; } } } //@} end member group /** @} */ // end group UtilIo //----------------------------------------------------------------------------- // Generic BlockStore abstraction //----------------------------------------------------------------------------- /** * \brief cub::BlockStoreAlgorithm enumerates alternative algorithms for cub::BlockStore to write a blocked arrangement of items across a CUDA thread block to a linear segment of memory. */ enum BlockStoreAlgorithm { /** * \par Overview * * A [blocked arrangement](index.html#sec5sec3) of data is written * directly to memory. * * \par Performance Considerations * - The utilization of memory transactions (coalescing) decreases as the * access stride between threads increases (i.e., the number items per thread). */ BLOCK_STORE_DIRECT, /** * \par Overview * * A [blocked arrangement](index.html#sec5sec3) of data is written directly * to memory using CUDA's built-in vectorized stores as a coalescing optimization. * For example, st.global.v4.s32 instructions will be generated * when \p T = \p int and \p ITEMS_PER_THREAD % 4 == 0. * * \par Performance Considerations * - The utilization of memory transactions (coalescing) remains high until the the * access stride between threads (i.e., the number items per thread) exceeds the * maximum vector store width (typically 4 items or 64B, whichever is lower). * - The following conditions will prevent vectorization and writing will fall back to cub::BLOCK_STORE_DIRECT: * - \p ITEMS_PER_THREAD is odd * - The \p OutputIteratorT is not a simple pointer type * - The block output offset is not quadword-aligned * - The data type \p T is not a built-in primitive or CUDA vector type (e.g., \p short, \p int2, \p double, \p float2, etc.) */ BLOCK_STORE_VECTORIZE, /** * \par Overview * A [blocked arrangement](index.html#sec5sec3) is locally * transposed and then efficiently written to memory as a [striped arrangement](index.html#sec5sec3). * * \par Performance Considerations * - The utilization of memory transactions (coalescing) remains high regardless * of items written per thread. * - The local reordering incurs slightly longer latencies and throughput than the * direct cub::BLOCK_STORE_DIRECT and cub::BLOCK_STORE_VECTORIZE alternatives. */ BLOCK_STORE_TRANSPOSE, /** * \par Overview * A [blocked arrangement](index.html#sec5sec3) is locally * transposed and then efficiently written to memory as a * [warp-striped arrangement](index.html#sec5sec3) * * \par Usage Considerations * - BLOCK_THREADS must be a multiple of WARP_THREADS * * \par Performance Considerations * - The utilization of memory transactions (coalescing) remains high regardless * of items written per thread. * - The local reordering incurs slightly longer latencies and throughput than the * direct cub::BLOCK_STORE_DIRECT and cub::BLOCK_STORE_VECTORIZE alternatives. */ BLOCK_STORE_WARP_TRANSPOSE, /** * \par Overview * A [blocked arrangement](index.html#sec5sec3) is locally * transposed and then efficiently written to memory as a * [warp-striped arrangement](index.html#sec5sec3) * To reduce the shared memory requirement, only one warp's worth of shared * memory is provisioned and is subsequently time-sliced among warps. * * \par Usage Considerations * - BLOCK_THREADS must be a multiple of WARP_THREADS * * \par Performance Considerations * - The utilization of memory transactions (coalescing) remains high regardless * of items written per thread. * - Provisions less shared memory temporary storage, but incurs larger * latencies than the BLOCK_STORE_WARP_TRANSPOSE alternative. */ BLOCK_STORE_WARP_TRANSPOSE_TIMESLICED, }; /** * \brief The BlockStore class provides [collective](index.html#sec0) data movement methods for writing a [blocked arrangement](index.html#sec5sec3) of items partitioned across a CUDA thread block to a linear segment of memory. ![](block_store_logo.png) * \ingroup BlockModule * \ingroup UtilIo * * \tparam T The type of data to be written. * \tparam BLOCK_DIM_X The thread block length in threads along the X dimension * \tparam ITEMS_PER_THREAD The number of consecutive items partitioned onto each thread. * \tparam ALGORITHM [optional] cub::BlockStoreAlgorithm tuning policy enumeration. default: cub::BLOCK_STORE_DIRECT. * \tparam WARP_TIME_SLICING [optional] Whether or not only one warp's worth of shared memory should be allocated and time-sliced among block-warps during any load-related data transpositions (versus each warp having its own storage). (default: false) * \tparam BLOCK_DIM_Y [optional] The thread block length in threads along the Y dimension (default: 1) * \tparam BLOCK_DIM_Z [optional] The thread block length in threads along the Z dimension (default: 1) * \tparam PTX_ARCH [optional] \ptxversion * * \par Overview * - The BlockStore class provides a single data movement abstraction that can be specialized * to implement different cub::BlockStoreAlgorithm strategies. This facilitates different * performance policies for different architectures, data types, granularity sizes, etc. * - BlockStore can be optionally specialized by different data movement strategies: * -# cub::BLOCK_STORE_DIRECT. A [blocked arrangement](index.html#sec5sec3) of data is written * directly to memory. [More...](\ref cub::BlockStoreAlgorithm) * -# cub::BLOCK_STORE_VECTORIZE. A [blocked arrangement](index.html#sec5sec3) * of data is written directly to memory using CUDA's built-in vectorized stores as a * coalescing optimization. [More...](\ref cub::BlockStoreAlgorithm) * -# cub::BLOCK_STORE_TRANSPOSE. A [blocked arrangement](index.html#sec5sec3) * is locally transposed into a [striped arrangement](index.html#sec5sec3) which is * then written to memory. [More...](\ref cub::BlockStoreAlgorithm) * -# cub::BLOCK_STORE_WARP_TRANSPOSE. A [blocked arrangement](index.html#sec5sec3) * is locally transposed into a [warp-striped arrangement](index.html#sec5sec3) which is * then written to memory. [More...](\ref cub::BlockStoreAlgorithm) * - \rowmajor * * \par A Simple Example * \blockcollective{BlockStore} * \par * The code snippet below illustrates the storing of a "blocked" arrangement * of 512 integers across 128 threads (where each thread owns 4 consecutive items) * into a linear segment of memory. The store is specialized for \p BLOCK_STORE_WARP_TRANSPOSE, * meaning items are locally reordered among threads so that memory references will be * efficiently coalesced using a warp-striped access pattern. * \par * \code * #include // or equivalently * * __global__ void ExampleKernel(int *d_data, ...) * { * // Specialize BlockStore for a 1D block of 128 threads owning 4 integer items each * typedef cub::BlockStore BlockStore; * * // Allocate shared memory for BlockStore * __shared__ typename BlockStore::TempStorage temp_storage; * * // Obtain a segment of consecutive items that are blocked across threads * int thread_data[4]; * ... * * // Store items to linear memory * int thread_data[4]; * BlockStore(temp_storage).Store(d_data, thread_data); * * \endcode * \par * Suppose the set of \p thread_data across the block of threads is * { [0,1,2,3], [4,5,6,7], ..., [508,509,510,511] }. * The output \p d_data will be 0, 1, 2, 3, 4, 5, .... * */ template < typename T, int BLOCK_DIM_X, int ITEMS_PER_THREAD, BlockStoreAlgorithm ALGORITHM = BLOCK_STORE_DIRECT, int BLOCK_DIM_Y = 1, int BLOCK_DIM_Z = 1, int PTX_ARCH = CUB_PTX_ARCH> class BlockStore { private: /****************************************************************************** * Constants and typed definitions ******************************************************************************/ /// Constants enum { /// The thread block size in threads BLOCK_THREADS = BLOCK_DIM_X * BLOCK_DIM_Y * BLOCK_DIM_Z, }; /****************************************************************************** * Algorithmic variants ******************************************************************************/ /// Store helper template struct StoreInternal; /** * BLOCK_STORE_DIRECT specialization of store helper */ template struct StoreInternal { /// Shared memory storage layout type typedef NullType TempStorage; /// Linear thread-id int linear_tid; /// Constructor __device__ __forceinline__ StoreInternal( TempStorage &/*temp_storage*/, int linear_tid) : linear_tid(linear_tid) {} /// Store items into a linear segment of memory template __device__ __forceinline__ void Store( OutputIteratorT block_itr, ///< [in] The thread block's base output iterator for storing to T (&items)[ITEMS_PER_THREAD]) ///< [in] Data to store { StoreDirectBlocked(linear_tid, block_itr, items); } /// Store items into a linear segment of memory, guarded by range template __device__ __forceinline__ void Store( OutputIteratorT block_itr, ///< [in] The thread block's base output iterator for storing to T (&items)[ITEMS_PER_THREAD], ///< [in] Data to store int valid_items) ///< [in] Number of valid items to write { StoreDirectBlocked(linear_tid, block_itr, items, valid_items); } }; /** * BLOCK_STORE_VECTORIZE specialization of store helper */ template struct StoreInternal { /// Shared memory storage layout type typedef NullType TempStorage; /// Linear thread-id int linear_tid; /// Constructor __device__ __forceinline__ StoreInternal( TempStorage &/*temp_storage*/, int linear_tid) : linear_tid(linear_tid) {} /// Store items into a linear segment of memory, specialized for native pointer types (attempts vectorization) __device__ __forceinline__ void Store( T *block_ptr, ///< [in] The thread block's base output iterator for storing to T (&items)[ITEMS_PER_THREAD]) ///< [in] Data to store { StoreDirectBlockedVectorized(linear_tid, block_ptr, items); } /// Store items into a linear segment of memory, specialized for opaque input iterators (skips vectorization) template __device__ __forceinline__ void Store( OutputIteratorT block_itr, ///< [in] The thread block's base output iterator for storing to T (&items)[ITEMS_PER_THREAD]) ///< [in] Data to store { StoreDirectBlocked(linear_tid, block_itr, items); } /// Store items into a linear segment of memory, guarded by range template __device__ __forceinline__ void Store( OutputIteratorT block_itr, ///< [in] The thread block's base output iterator for storing to T (&items)[ITEMS_PER_THREAD], ///< [in] Data to store int valid_items) ///< [in] Number of valid items to write { StoreDirectBlocked(linear_tid, block_itr, items, valid_items); } }; /** * BLOCK_STORE_TRANSPOSE specialization of store helper */ template struct StoreInternal { // BlockExchange utility type for keys typedef BlockExchange BlockExchange; /// Shared memory storage layout type struct _TempStorage : BlockExchange::TempStorage { /// Temporary storage for partially-full block guard volatile int valid_items; }; /// Alias wrapper allowing storage to be unioned struct TempStorage : Uninitialized<_TempStorage> {}; /// Thread reference to shared storage _TempStorage &temp_storage; /// Linear thread-id int linear_tid; /// Constructor __device__ __forceinline__ StoreInternal( TempStorage &temp_storage, int linear_tid) : temp_storage(temp_storage.Alias()), linear_tid(linear_tid) {} /// Store items into a linear segment of memory template __device__ __forceinline__ void Store( OutputIteratorT block_itr, ///< [in] The thread block's base output iterator for storing to T (&items)[ITEMS_PER_THREAD]) ///< [in] Data to store { BlockExchange(temp_storage).BlockedToStriped(items); StoreDirectStriped(linear_tid, block_itr, items); } /// Store items into a linear segment of memory, guarded by range template __device__ __forceinline__ void Store( OutputIteratorT block_itr, ///< [in] The thread block's base output iterator for storing to T (&items)[ITEMS_PER_THREAD], ///< [in] Data to store int valid_items) ///< [in] Number of valid items to write { BlockExchange(temp_storage).BlockedToStriped(items); if (linear_tid == 0) temp_storage.valid_items = valid_items; // Move through volatile smem as a workaround to prevent RF spilling on subsequent loads CTA_SYNC(); StoreDirectStriped(linear_tid, block_itr, items, temp_storage.valid_items); } }; /** * BLOCK_STORE_WARP_TRANSPOSE specialization of store helper */ template struct StoreInternal { enum { WARP_THREADS = CUB_WARP_THREADS(PTX_ARCH) }; // Assert BLOCK_THREADS must be a multiple of WARP_THREADS CUB_STATIC_ASSERT((BLOCK_THREADS % WARP_THREADS == 0), "BLOCK_THREADS must be a multiple of WARP_THREADS"); // BlockExchange utility type for keys typedef BlockExchange BlockExchange; /// Shared memory storage layout type struct _TempStorage : BlockExchange::TempStorage { /// Temporary storage for partially-full block guard volatile int valid_items; }; /// Alias wrapper allowing storage to be unioned struct TempStorage : Uninitialized<_TempStorage> {}; /// Thread reference to shared storage _TempStorage &temp_storage; /// Linear thread-id int linear_tid; /// Constructor __device__ __forceinline__ StoreInternal( TempStorage &temp_storage, int linear_tid) : temp_storage(temp_storage.Alias()), linear_tid(linear_tid) {} /// Store items into a linear segment of memory template __device__ __forceinline__ void Store( OutputIteratorT block_itr, ///< [in] The thread block's base output iterator for storing to T (&items)[ITEMS_PER_THREAD]) ///< [in] Data to store { BlockExchange(temp_storage).BlockedToWarpStriped(items); StoreDirectWarpStriped(linear_tid, block_itr, items); } /// Store items into a linear segment of memory, guarded by range template __device__ __forceinline__ void Store( OutputIteratorT block_itr, ///< [in] The thread block's base output iterator for storing to T (&items)[ITEMS_PER_THREAD], ///< [in] Data to store int valid_items) ///< [in] Number of valid items to write { BlockExchange(temp_storage).BlockedToWarpStriped(items); if (linear_tid == 0) temp_storage.valid_items = valid_items; // Move through volatile smem as a workaround to prevent RF spilling on subsequent loads CTA_SYNC(); StoreDirectWarpStriped(linear_tid, block_itr, items, temp_storage.valid_items); } }; /** * BLOCK_STORE_WARP_TRANSPOSE_TIMESLICED specialization of store helper */ template struct StoreInternal { enum { WARP_THREADS = CUB_WARP_THREADS(PTX_ARCH) }; // Assert BLOCK_THREADS must be a multiple of WARP_THREADS CUB_STATIC_ASSERT((BLOCK_THREADS % WARP_THREADS == 0), "BLOCK_THREADS must be a multiple of WARP_THREADS"); // BlockExchange utility type for keys typedef BlockExchange BlockExchange; /// Shared memory storage layout type struct _TempStorage : BlockExchange::TempStorage { /// Temporary storage for partially-full block guard volatile int valid_items; }; /// Alias wrapper allowing storage to be unioned struct TempStorage : Uninitialized<_TempStorage> {}; /// Thread reference to shared storage _TempStorage &temp_storage; /// Linear thread-id int linear_tid; /// Constructor __device__ __forceinline__ StoreInternal( TempStorage &temp_storage, int linear_tid) : temp_storage(temp_storage.Alias()), linear_tid(linear_tid) {} /// Store items into a linear segment of memory template __device__ __forceinline__ void Store( OutputIteratorT block_itr, ///< [in] The thread block's base output iterator for storing to T (&items)[ITEMS_PER_THREAD]) ///< [in] Data to store { BlockExchange(temp_storage).BlockedToWarpStriped(items); StoreDirectWarpStriped(linear_tid, block_itr, items); } /// Store items into a linear segment of memory, guarded by range template __device__ __forceinline__ void Store( OutputIteratorT block_itr, ///< [in] The thread block's base output iterator for storing to T (&items)[ITEMS_PER_THREAD], ///< [in] Data to store int valid_items) ///< [in] Number of valid items to write { BlockExchange(temp_storage).BlockedToWarpStriped(items); if (linear_tid == 0) temp_storage.valid_items = valid_items; // Move through volatile smem as a workaround to prevent RF spilling on subsequent loads CTA_SYNC(); StoreDirectWarpStriped(linear_tid, block_itr, items, temp_storage.valid_items); } }; /****************************************************************************** * Type definitions ******************************************************************************/ /// Internal load implementation to use typedef StoreInternal InternalStore; /// Shared memory storage layout type typedef typename InternalStore::TempStorage _TempStorage; /****************************************************************************** * Utility methods ******************************************************************************/ /// Internal storage allocator __device__ __forceinline__ _TempStorage& PrivateStorage() { __shared__ _TempStorage private_storage; return private_storage; } /****************************************************************************** * Thread fields ******************************************************************************/ /// Thread reference to shared storage _TempStorage &temp_storage; /// Linear thread-id int linear_tid; public: /// \smemstorage{BlockStore} struct TempStorage : Uninitialized<_TempStorage> {}; /******************************************************************//** * \name Collective constructors *********************************************************************/ //@{ /** * \brief Collective constructor using a private static allocation of shared memory as temporary storage. */ __device__ __forceinline__ BlockStore() : temp_storage(PrivateStorage()), linear_tid(RowMajorTid(BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z)) {} /** * \brief Collective constructor using the specified memory allocation as temporary storage. */ __device__ __forceinline__ BlockStore( TempStorage &temp_storage) ///< [in] Reference to memory allocation having layout type TempStorage : temp_storage(temp_storage.Alias()), linear_tid(RowMajorTid(BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z)) {} //@} end member group /******************************************************************//** * \name Data movement *********************************************************************/ //@{ /** * \brief Store items into a linear segment of memory. * * \par * - \blocked * - \smemreuse * * \par Snippet * The code snippet below illustrates the storing of a "blocked" arrangement * of 512 integers across 128 threads (where each thread owns 4 consecutive items) * into a linear segment of memory. The store is specialized for \p BLOCK_STORE_WARP_TRANSPOSE, * meaning items are locally reordered among threads so that memory references will be * efficiently coalesced using a warp-striped access pattern. * \par * \code * #include // or equivalently * * __global__ void ExampleKernel(int *d_data, ...) * { * // Specialize BlockStore for a 1D block of 128 threads owning 4 integer items each * typedef cub::BlockStore BlockStore; * * // Allocate shared memory for BlockStore * __shared__ typename BlockStore::TempStorage temp_storage; * * // Obtain a segment of consecutive items that are blocked across threads * int thread_data[4]; * ... * * // Store items to linear memory * int thread_data[4]; * BlockStore(temp_storage).Store(d_data, thread_data); * * \endcode * \par * Suppose the set of \p thread_data across the block of threads is * { [0,1,2,3], [4,5,6,7], ..., [508,509,510,511] }. * The output \p d_data will be 0, 1, 2, 3, 4, 5, .... * */ template __device__ __forceinline__ void Store( OutputIteratorT block_itr, ///< [in] The thread block's base output iterator for storing to T (&items)[ITEMS_PER_THREAD]) ///< [in] Data to store { InternalStore(temp_storage, linear_tid).Store(block_itr, items); } /** * \brief Store items into a linear segment of memory, guarded by range. * * \par * - \blocked * - \smemreuse * * \par Snippet * The code snippet below illustrates the guarded storing of a "blocked" arrangement * of 512 integers across 128 threads (where each thread owns 4 consecutive items) * into a linear segment of memory. The store is specialized for \p BLOCK_STORE_WARP_TRANSPOSE, * meaning items are locally reordered among threads so that memory references will be * efficiently coalesced using a warp-striped access pattern. * \par * \code * #include // or equivalently * * __global__ void ExampleKernel(int *d_data, int valid_items, ...) * { * // Specialize BlockStore for a 1D block of 128 threads owning 4 integer items each * typedef cub::BlockStore BlockStore; * * // Allocate shared memory for BlockStore * __shared__ typename BlockStore::TempStorage temp_storage; * * // Obtain a segment of consecutive items that are blocked across threads * int thread_data[4]; * ... * * // Store items to linear memory * int thread_data[4]; * BlockStore(temp_storage).Store(d_data, thread_data, valid_items); * * \endcode * \par * Suppose the set of \p thread_data across the block of threads is * { [0,1,2,3], [4,5,6,7], ..., [508,509,510,511] } and \p valid_items is \p 5. * The output \p d_data will be 0, 1, 2, 3, 4, ?, ?, ?, ..., with * only the first two threads being unmasked to store portions of valid data. * */ template __device__ __forceinline__ void Store( OutputIteratorT block_itr, ///< [in] The thread block's base output iterator for storing to T (&items)[ITEMS_PER_THREAD], ///< [in] Data to store int valid_items) ///< [in] Number of valid items to write { InternalStore(temp_storage, linear_tid).Store(block_itr, items, valid_items); } }; } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/acc/cuda/cub/block/specializations/000077500000000000000000000000001411340063500223005ustar00rootroot00000000000000relion-3.1.3/src/acc/cuda/cub/block/specializations/block_histogram_atomic.cuh000066400000000000000000000063251411340063500275120ustar00rootroot00000000000000/****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * The cub::BlockHistogramAtomic class provides atomic-based methods for constructing block-wide histograms from data samples partitioned across a CUDA thread block. */ #pragma once #include "../../util_namespace.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /** * \brief The BlockHistogramAtomic class provides atomic-based methods for constructing block-wide histograms from data samples partitioned across a CUDA thread block. */ template struct BlockHistogramAtomic { /// Shared memory storage layout type struct TempStorage {}; /// Constructor __device__ __forceinline__ BlockHistogramAtomic( TempStorage &temp_storage) {} /// Composite data onto an existing histogram template < typename T, typename CounterT, int ITEMS_PER_THREAD> __device__ __forceinline__ void Composite( T (&items)[ITEMS_PER_THREAD], ///< [in] Calling thread's input values to histogram CounterT histogram[BINS]) ///< [out] Reference to shared/device-accessible memory histogram { // Update histogram #pragma unroll for (int i = 0; i < ITEMS_PER_THREAD; ++i) { atomicAdd(histogram + items[i], 1); } } }; } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/acc/cuda/cub/block/specializations/block_histogram_sort.cuh000066400000000000000000000200061411340063500272150ustar00rootroot00000000000000/****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * The cub::BlockHistogramSort class provides sorting-based methods for constructing block-wide histograms from data samples partitioned across a CUDA thread block. */ #pragma once #include "../../block/block_radix_sort.cuh" #include "../../block/block_discontinuity.cuh" #include "../../util_ptx.cuh" #include "../../util_namespace.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /** * \brief The BlockHistogramSort class provides sorting-based methods for constructing block-wide histograms from data samples partitioned across a CUDA thread block. */ template < typename T, ///< Sample type int BLOCK_DIM_X, ///< The thread block length in threads along the X dimension int ITEMS_PER_THREAD, ///< The number of samples per thread int BINS, ///< The number of bins into which histogram samples may fall int BLOCK_DIM_Y, ///< The thread block length in threads along the Y dimension int BLOCK_DIM_Z, ///< The thread block length in threads along the Z dimension int PTX_ARCH> ///< The PTX compute capability for which to to specialize this collective struct BlockHistogramSort { /// Constants enum { /// The thread block size in threads BLOCK_THREADS = BLOCK_DIM_X * BLOCK_DIM_Y * BLOCK_DIM_Z, }; // Parameterize BlockRadixSort type for our thread block typedef BlockRadixSort< T, BLOCK_DIM_X, ITEMS_PER_THREAD, NullType, 4, (PTX_ARCH >= 350) ? true : false, BLOCK_SCAN_WARP_SCANS, cudaSharedMemBankSizeFourByte, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH> BlockRadixSortT; // Parameterize BlockDiscontinuity type for our thread block typedef BlockDiscontinuity< T, BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH> BlockDiscontinuityT; /// Shared memory union _TempStorage { // Storage for sorting bin values typename BlockRadixSortT::TempStorage sort; struct { // Storage for detecting discontinuities in the tile of sorted bin values typename BlockDiscontinuityT::TempStorage flag; // Storage for noting begin/end offsets of bin runs in the tile of sorted bin values unsigned int run_begin[BINS]; unsigned int run_end[BINS]; }; }; /// Alias wrapper allowing storage to be unioned struct TempStorage : Uninitialized<_TempStorage> {}; // Thread fields _TempStorage &temp_storage; unsigned int linear_tid; /// Constructor __device__ __forceinline__ BlockHistogramSort( TempStorage &temp_storage) : temp_storage(temp_storage.Alias()), linear_tid(RowMajorTid(BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z)) {} // Discontinuity functor struct DiscontinuityOp { // Reference to temp_storage _TempStorage &temp_storage; // Constructor __device__ __forceinline__ DiscontinuityOp(_TempStorage &temp_storage) : temp_storage(temp_storage) {} // Discontinuity predicate __device__ __forceinline__ bool operator()(const T &a, const T &b, int b_index) { if (a != b) { // Note the begin/end offsets in shared storage temp_storage.run_begin[b] = b_index; temp_storage.run_end[a] = b_index; return true; } else { return false; } } }; // Composite data onto an existing histogram template < typename CounterT > __device__ __forceinline__ void Composite( T (&items)[ITEMS_PER_THREAD], ///< [in] Calling thread's input values to histogram CounterT histogram[BINS]) ///< [out] Reference to shared/device-accessible memory histogram { enum { TILE_SIZE = BLOCK_THREADS * ITEMS_PER_THREAD }; // Sort bytes in blocked arrangement BlockRadixSortT(temp_storage.sort).Sort(items); CTA_SYNC(); // Initialize the shared memory's run_begin and run_end for each bin int histo_offset = 0; #pragma unroll for(; histo_offset + BLOCK_THREADS <= BINS; histo_offset += BLOCK_THREADS) { temp_storage.run_begin[histo_offset + linear_tid] = TILE_SIZE; temp_storage.run_end[histo_offset + linear_tid] = TILE_SIZE; } // Finish up with guarded initialization if necessary if ((BINS % BLOCK_THREADS != 0) && (histo_offset + linear_tid < BINS)) { temp_storage.run_begin[histo_offset + linear_tid] = TILE_SIZE; temp_storage.run_end[histo_offset + linear_tid] = TILE_SIZE; } CTA_SYNC(); int flags[ITEMS_PER_THREAD]; // unused // Compute head flags to demarcate contiguous runs of the same bin in the sorted tile DiscontinuityOp flag_op(temp_storage); BlockDiscontinuityT(temp_storage.flag).FlagHeads(flags, items, flag_op); // Update begin for first item if (linear_tid == 0) temp_storage.run_begin[items[0]] = 0; CTA_SYNC(); // Composite into histogram histo_offset = 0; #pragma unroll for(; histo_offset + BLOCK_THREADS <= BINS; histo_offset += BLOCK_THREADS) { int thread_offset = histo_offset + linear_tid; CounterT count = temp_storage.run_end[thread_offset] - temp_storage.run_begin[thread_offset]; histogram[thread_offset] += count; } // Finish up with guarded composition if necessary if ((BINS % BLOCK_THREADS != 0) && (histo_offset + linear_tid < BINS)) { int thread_offset = histo_offset + linear_tid; CounterT count = temp_storage.run_end[thread_offset] - temp_storage.run_begin[thread_offset]; histogram[thread_offset] += count; } } }; } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/acc/cuda/cub/block/specializations/block_reduce_raking.cuh000066400000000000000000000225571411340063500267700ustar00rootroot00000000000000/****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * cub::BlockReduceRaking provides raking-based methods of parallel reduction across a CUDA thread block. Supports non-commutative reduction operators. */ #pragma once #include "../../block/block_raking_layout.cuh" #include "../../warp/warp_reduce.cuh" #include "../../thread/thread_reduce.cuh" #include "../../util_ptx.cuh" #include "../../util_namespace.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /** * \brief BlockReduceRaking provides raking-based methods of parallel reduction across a CUDA thread block. Supports non-commutative reduction operators. * * Supports non-commutative binary reduction operators. Unlike commutative * reduction operators (e.g., addition), the application of a non-commutative * reduction operator (e.g, string concatenation) across a sequence of inputs must * honor the relative ordering of items and partial reductions when applying the * reduction operator. * * Compared to the implementation of BlockReduceRaking (which does not support * non-commutative operators), this implementation requires a few extra * rounds of inter-thread communication. */ template < typename T, ///< Data type being reduced int BLOCK_DIM_X, ///< The thread block length in threads along the X dimension int BLOCK_DIM_Y, ///< The thread block length in threads along the Y dimension int BLOCK_DIM_Z, ///< The thread block length in threads along the Z dimension int PTX_ARCH> ///< The PTX compute capability for which to to specialize this collective struct BlockReduceRaking { /// Constants enum { /// The thread block size in threads BLOCK_THREADS = BLOCK_DIM_X * BLOCK_DIM_Y * BLOCK_DIM_Z, }; /// Layout type for padded thread block raking grid typedef BlockRakingLayout BlockRakingLayout; /// WarpReduce utility type typedef typename WarpReduce::InternalWarpReduce WarpReduce; /// Constants enum { /// Number of raking threads RAKING_THREADS = BlockRakingLayout::RAKING_THREADS, /// Number of raking elements per warp synchronous raking thread SEGMENT_LENGTH = BlockRakingLayout::SEGMENT_LENGTH, /// Cooperative work can be entirely warp synchronous WARP_SYNCHRONOUS = (RAKING_THREADS == BLOCK_THREADS), /// Whether or not warp-synchronous reduction should be unguarded (i.e., the warp-reduction elements is a power of two WARP_SYNCHRONOUS_UNGUARDED = PowerOfTwo::VALUE, /// Whether or not accesses into smem are unguarded RAKING_UNGUARDED = BlockRakingLayout::UNGUARDED, }; /// Shared memory storage layout type union _TempStorage { typename WarpReduce::TempStorage warp_storage; ///< Storage for warp-synchronous reduction typename BlockRakingLayout::TempStorage raking_grid; ///< Padded thread block raking grid }; /// Alias wrapper allowing storage to be unioned struct TempStorage : Uninitialized<_TempStorage> {}; // Thread fields _TempStorage &temp_storage; unsigned int linear_tid; /// Constructor __device__ __forceinline__ BlockReduceRaking( TempStorage &temp_storage) : temp_storage(temp_storage.Alias()), linear_tid(RowMajorTid(BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z)) {} template __device__ __forceinline__ T RakingReduction( ReductionOp reduction_op, ///< [in] Binary scan operator T *raking_segment, T partial, ///< [in] [lane0 only] Warp-wide aggregate reduction of input items int num_valid, ///< [in] Number of valid elements (may be less than BLOCK_THREADS) Int2Type /*iteration*/) { // Update partial if addend is in range if ((IS_FULL_TILE && RAKING_UNGUARDED) || ((linear_tid * SEGMENT_LENGTH) + ITERATION < num_valid)) { T addend = raking_segment[ITERATION]; partial = reduction_op(partial, addend); } return RakingReduction(reduction_op, raking_segment, partial, num_valid, Int2Type()); } template __device__ __forceinline__ T RakingReduction( ReductionOp /*reduction_op*/, ///< [in] Binary scan operator T * /*raking_segment*/, T partial, ///< [in] [lane0 only] Warp-wide aggregate reduction of input items int /*num_valid*/, ///< [in] Number of valid elements (may be less than BLOCK_THREADS) Int2Type /*iteration*/) { return partial; } /// Computes a thread block-wide reduction using the specified reduction operator. The first num_valid threads each contribute one reduction partial. The return value is only valid for thread0. template < bool IS_FULL_TILE, typename ReductionOp> __device__ __forceinline__ T Reduce( T partial, ///< [in] Calling thread's input partial reductions int num_valid, ///< [in] Number of valid elements (may be less than BLOCK_THREADS) ReductionOp reduction_op) ///< [in] Binary reduction operator { if (WARP_SYNCHRONOUS) { // Short-circuit directly to warp synchronous reduction (unguarded if active threads is a power-of-two) partial = WarpReduce(temp_storage.warp_storage).template Reduce( partial, num_valid, reduction_op); } else { // Place partial into shared memory grid. *BlockRakingLayout::PlacementPtr(temp_storage.raking_grid, linear_tid) = partial; CTA_SYNC(); // Reduce parallelism to one warp if (linear_tid < RAKING_THREADS) { // Raking reduction in grid T *raking_segment = BlockRakingLayout::RakingPtr(temp_storage.raking_grid, linear_tid); partial = raking_segment[0]; partial = RakingReduction(reduction_op, raking_segment, partial, num_valid, Int2Type<1>()); partial = WarpReduce(temp_storage.warp_storage).template Reduce( partial, num_valid, reduction_op); } } return partial; } /// Computes a thread block-wide reduction using addition (+) as the reduction operator. The first num_valid threads each contribute one reduction partial. The return value is only valid for thread0. template __device__ __forceinline__ T Sum( T partial, ///< [in] Calling thread's input partial reductions int num_valid) ///< [in] Number of valid elements (may be less than BLOCK_THREADS) { cub::Sum reduction_op; return Reduce(partial, num_valid, reduction_op); } }; } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/acc/cuda/cub/block/specializations/block_reduce_raking_commutative_only.cuh000066400000000000000000000202611411340063500324340ustar00rootroot00000000000000/****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * cub::BlockReduceRakingCommutativeOnly provides raking-based methods of parallel reduction across a CUDA thread block. Does not support non-commutative reduction operators. */ #pragma once #include "block_reduce_raking.cuh" #include "../../warp/warp_reduce.cuh" #include "../../thread/thread_reduce.cuh" #include "../../util_ptx.cuh" #include "../../util_namespace.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /** * \brief BlockReduceRakingCommutativeOnly provides raking-based methods of parallel reduction across a CUDA thread block. Does not support non-commutative reduction operators. Does not support block sizes that are not a multiple of the warp size. */ template < typename T, ///< Data type being reduced int BLOCK_DIM_X, ///< The thread block length in threads along the X dimension int BLOCK_DIM_Y, ///< The thread block length in threads along the Y dimension int BLOCK_DIM_Z, ///< The thread block length in threads along the Z dimension int PTX_ARCH> ///< The PTX compute capability for which to to specialize this collective struct BlockReduceRakingCommutativeOnly { /// Constants enum { /// The thread block size in threads BLOCK_THREADS = BLOCK_DIM_X * BLOCK_DIM_Y * BLOCK_DIM_Z, }; // The fall-back implementation to use when BLOCK_THREADS is not a multiple of the warp size or not all threads have valid values typedef BlockReduceRaking FallBack; /// Constants enum { /// Number of warp threads WARP_THREADS = CUB_WARP_THREADS(PTX_ARCH), /// Whether or not to use fall-back USE_FALLBACK = ((BLOCK_THREADS % WARP_THREADS != 0) || (BLOCK_THREADS <= WARP_THREADS)), /// Number of raking threads RAKING_THREADS = WARP_THREADS, /// Number of threads actually sharing items with the raking threads SHARING_THREADS = CUB_MAX(1, BLOCK_THREADS - RAKING_THREADS), /// Number of raking elements per warp synchronous raking thread SEGMENT_LENGTH = SHARING_THREADS / WARP_THREADS, }; /// WarpReduce utility type typedef WarpReduce WarpReduce; /// Layout type for padded thread block raking grid typedef BlockRakingLayout BlockRakingLayout; /// Shared memory storage layout type union _TempStorage { struct { typename WarpReduce::TempStorage warp_storage; ///< Storage for warp-synchronous reduction typename BlockRakingLayout::TempStorage raking_grid; ///< Padded thread block raking grid }; typename FallBack::TempStorage fallback_storage; ///< Fall-back storage for non-commutative block scan }; /// Alias wrapper allowing storage to be unioned struct TempStorage : Uninitialized<_TempStorage> {}; // Thread fields _TempStorage &temp_storage; unsigned int linear_tid; /// Constructor __device__ __forceinline__ BlockReduceRakingCommutativeOnly( TempStorage &temp_storage) : temp_storage(temp_storage.Alias()), linear_tid(RowMajorTid(BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z)) {} /// Computes a thread block-wide reduction using addition (+) as the reduction operator. The first num_valid threads each contribute one reduction partial. The return value is only valid for thread0. template __device__ __forceinline__ T Sum( T partial, ///< [in] Calling thread's input partial reductions int num_valid) ///< [in] Number of valid elements (may be less than BLOCK_THREADS) { if (USE_FALLBACK || !FULL_TILE) { return FallBack(temp_storage.fallback_storage).template Sum(partial, num_valid); } else { // Place partial into shared memory grid if (linear_tid >= RAKING_THREADS) *BlockRakingLayout::PlacementPtr(temp_storage.raking_grid, linear_tid - RAKING_THREADS) = partial; CTA_SYNC(); // Reduce parallelism to one warp if (linear_tid < RAKING_THREADS) { // Raking reduction in grid T *raking_segment = BlockRakingLayout::RakingPtr(temp_storage.raking_grid, linear_tid); partial = internal::ThreadReduce(raking_segment, cub::Sum(), partial); // Warpscan partial = WarpReduce(temp_storage.warp_storage).Sum(partial); } } return partial; } /// Computes a thread block-wide reduction using the specified reduction operator. The first num_valid threads each contribute one reduction partial. The return value is only valid for thread0. template < bool FULL_TILE, typename ReductionOp> __device__ __forceinline__ T Reduce( T partial, ///< [in] Calling thread's input partial reductions int num_valid, ///< [in] Number of valid elements (may be less than BLOCK_THREADS) ReductionOp reduction_op) ///< [in] Binary reduction operator { if (USE_FALLBACK || !FULL_TILE) { return FallBack(temp_storage.fallback_storage).template Reduce(partial, num_valid, reduction_op); } else { // Place partial into shared memory grid if (linear_tid >= RAKING_THREADS) *BlockRakingLayout::PlacementPtr(temp_storage.raking_grid, linear_tid - RAKING_THREADS) = partial; CTA_SYNC(); // Reduce parallelism to one warp if (linear_tid < RAKING_THREADS) { // Raking reduction in grid T *raking_segment = BlockRakingLayout::RakingPtr(temp_storage.raking_grid, linear_tid); partial = internal::ThreadReduce(raking_segment, reduction_op, partial); // Warpscan partial = WarpReduce(temp_storage.warp_storage).Reduce(partial, reduction_op); } } return partial; } }; } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/acc/cuda/cub/block/specializations/block_reduce_warp_reductions.cuh000066400000000000000000000233261411340063500307200ustar00rootroot00000000000000/****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * cub::BlockReduceWarpReductions provides variants of warp-reduction-based parallel reduction across a CUDA thread block. Supports non-commutative reduction operators. */ #pragma once #include "../../warp/warp_reduce.cuh" #include "../../util_ptx.cuh" #include "../../util_arch.cuh" #include "../../util_namespace.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /** * \brief BlockReduceWarpReductions provides variants of warp-reduction-based parallel reduction across a CUDA thread block. Supports non-commutative reduction operators. */ template < typename T, ///< Data type being reduced int BLOCK_DIM_X, ///< The thread block length in threads along the X dimension int BLOCK_DIM_Y, ///< The thread block length in threads along the Y dimension int BLOCK_DIM_Z, ///< The thread block length in threads along the Z dimension int PTX_ARCH> ///< The PTX compute capability for which to to specialize this collective struct BlockReduceWarpReductions { /// Constants enum { /// The thread block size in threads BLOCK_THREADS = BLOCK_DIM_X * BLOCK_DIM_Y * BLOCK_DIM_Z, /// Number of warp threads WARP_THREADS = CUB_WARP_THREADS(PTX_ARCH), /// Number of active warps WARPS = (BLOCK_THREADS + WARP_THREADS - 1) / WARP_THREADS, /// The logical warp size for warp reductions LOGICAL_WARP_SIZE = CUB_MIN(BLOCK_THREADS, WARP_THREADS), /// Whether or not the logical warp size evenly divides the thread block size EVEN_WARP_MULTIPLE = (BLOCK_THREADS % LOGICAL_WARP_SIZE == 0) }; /// WarpReduce utility type typedef typename WarpReduce::InternalWarpReduce WarpReduce; /// Shared memory storage layout type struct _TempStorage { typename WarpReduce::TempStorage warp_reduce[WARPS]; ///< Buffer for warp-synchronous scan T warp_aggregates[WARPS]; ///< Shared totals from each warp-synchronous scan T block_prefix; ///< Shared prefix for the entire thread block }; /// Alias wrapper allowing storage to be unioned struct TempStorage : Uninitialized<_TempStorage> {}; // Thread fields _TempStorage &temp_storage; unsigned int linear_tid; unsigned int warp_id; unsigned int lane_id; /// Constructor __device__ __forceinline__ BlockReduceWarpReductions( TempStorage &temp_storage) : temp_storage(temp_storage.Alias()), linear_tid(RowMajorTid(BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z)), warp_id((WARPS == 1) ? 0 : linear_tid / WARP_THREADS), lane_id(LaneId()) {} template __device__ __forceinline__ T ApplyWarpAggregates( ReductionOp reduction_op, ///< [in] Binary scan operator T warp_aggregate, ///< [in] [lane0 only] Warp-wide aggregate reduction of input items int num_valid, ///< [in] Number of valid elements (may be less than BLOCK_THREADS) Int2Type /*successor_warp*/) { if (FULL_TILE || (SUCCESSOR_WARP * LOGICAL_WARP_SIZE < num_valid)) { T addend = temp_storage.warp_aggregates[SUCCESSOR_WARP]; warp_aggregate = reduction_op(warp_aggregate, addend); } return ApplyWarpAggregates(reduction_op, warp_aggregate, num_valid, Int2Type()); } template __device__ __forceinline__ T ApplyWarpAggregates( ReductionOp /*reduction_op*/, ///< [in] Binary scan operator T warp_aggregate, ///< [in] [lane0 only] Warp-wide aggregate reduction of input items int /*num_valid*/, ///< [in] Number of valid elements (may be less than BLOCK_THREADS) Int2Type /*successor_warp*/) { return warp_aggregate; } /// Returns block-wide aggregate in thread0. template < bool FULL_TILE, typename ReductionOp> __device__ __forceinline__ T ApplyWarpAggregates( ReductionOp reduction_op, ///< [in] Binary scan operator T warp_aggregate, ///< [in] [lane0 only] Warp-wide aggregate reduction of input items int num_valid) ///< [in] Number of valid elements (may be less than BLOCK_THREADS) { // Share lane aggregates if (lane_id == 0) { temp_storage.warp_aggregates[warp_id] = warp_aggregate; } CTA_SYNC(); // Update total aggregate in warp 0, lane 0 if (linear_tid == 0) { warp_aggregate = ApplyWarpAggregates(reduction_op, warp_aggregate, num_valid, Int2Type<1>()); } return warp_aggregate; } /// Computes a thread block-wide reduction using addition (+) as the reduction operator. The first num_valid threads each contribute one reduction partial. The return value is only valid for thread0. template __device__ __forceinline__ T Sum( T input, ///< [in] Calling thread's input partial reductions int num_valid) ///< [in] Number of valid elements (may be less than BLOCK_THREADS) { cub::Sum reduction_op; unsigned int warp_offset = warp_id * LOGICAL_WARP_SIZE; unsigned int warp_num_valid = (FULL_TILE && EVEN_WARP_MULTIPLE) ? LOGICAL_WARP_SIZE : (warp_offset < num_valid) ? num_valid - warp_offset : 0; // Warp reduction in every warp T warp_aggregate = WarpReduce(temp_storage.warp_reduce[warp_id]).template Reduce<(FULL_TILE && EVEN_WARP_MULTIPLE), 1>( input, warp_num_valid, cub::Sum()); // Update outputs and block_aggregate with warp-wide aggregates from lane-0s return ApplyWarpAggregates(reduction_op, warp_aggregate, num_valid); } /// Computes a thread block-wide reduction using the specified reduction operator. The first num_valid threads each contribute one reduction partial. The return value is only valid for thread0. template < bool FULL_TILE, typename ReductionOp> __device__ __forceinline__ T Reduce( T input, ///< [in] Calling thread's input partial reductions int num_valid, ///< [in] Number of valid elements (may be less than BLOCK_THREADS) ReductionOp reduction_op) ///< [in] Binary reduction operator { unsigned int warp_offset = warp_id * LOGICAL_WARP_SIZE; unsigned int warp_num_valid = (FULL_TILE && EVEN_WARP_MULTIPLE) ? LOGICAL_WARP_SIZE : (warp_offset < static_cast(num_valid)) ? num_valid - warp_offset : 0; // Warp reduction in every warp T warp_aggregate = WarpReduce(temp_storage.warp_reduce[warp_id]).template Reduce<(FULL_TILE && EVEN_WARP_MULTIPLE), 1>( input, warp_num_valid, reduction_op); // Update outputs and block_aggregate with warp-wide aggregates from lane-0s return ApplyWarpAggregates(reduction_op, warp_aggregate, num_valid); } }; } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/acc/cuda/cub/block/specializations/block_scan_raking.cuh000066400000000000000000000674541411340063500264520ustar00rootroot00000000000000/****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * cub::BlockScanRaking provides variants of raking-based parallel prefix scan across a CUDA thread block. */ #pragma once #include "../../util_ptx.cuh" #include "../../util_arch.cuh" #include "../../block/block_raking_layout.cuh" #include "../../thread/thread_reduce.cuh" #include "../../thread/thread_scan.cuh" #include "../../warp/warp_scan.cuh" #include "../../util_namespace.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /** * \brief BlockScanRaking provides variants of raking-based parallel prefix scan across a CUDA thread block. */ template < typename T, ///< Data type being scanned int BLOCK_DIM_X, ///< The thread block length in threads along the X dimension int BLOCK_DIM_Y, ///< The thread block length in threads along the Y dimension int BLOCK_DIM_Z, ///< The thread block length in threads along the Z dimension bool MEMOIZE, ///< Whether or not to buffer outer raking scan partials to incur fewer shared memory reads at the expense of higher register pressure int PTX_ARCH> ///< The PTX compute capability for which to to specialize this collective struct BlockScanRaking { //--------------------------------------------------------------------- // Types and constants //--------------------------------------------------------------------- /// Constants enum { /// The thread block size in threads BLOCK_THREADS = BLOCK_DIM_X * BLOCK_DIM_Y * BLOCK_DIM_Z, }; /// Layout type for padded thread block raking grid typedef BlockRakingLayout BlockRakingLayout; /// Constants enum { /// Number of raking threads RAKING_THREADS = BlockRakingLayout::RAKING_THREADS, /// Number of raking elements per warp synchronous raking thread SEGMENT_LENGTH = BlockRakingLayout::SEGMENT_LENGTH, /// Cooperative work can be entirely warp synchronous WARP_SYNCHRONOUS = (BLOCK_THREADS == RAKING_THREADS), }; /// WarpScan utility type typedef WarpScan WarpScan; /// Shared memory storage layout type struct _TempStorage { typename WarpScan::TempStorage warp_scan; ///< Buffer for warp-synchronous scan typename BlockRakingLayout::TempStorage raking_grid; ///< Padded thread block raking grid T block_aggregate; ///< Block aggregate }; /// Alias wrapper allowing storage to be unioned struct TempStorage : Uninitialized<_TempStorage> {}; //--------------------------------------------------------------------- // Per-thread fields //--------------------------------------------------------------------- // Thread fields _TempStorage &temp_storage; unsigned int linear_tid; T cached_segment[SEGMENT_LENGTH]; //--------------------------------------------------------------------- // Utility methods //--------------------------------------------------------------------- /// Templated reduction template __device__ __forceinline__ T GuardedReduce( T* raking_ptr, ///< [in] Input array ScanOp scan_op, ///< [in] Binary reduction operator T raking_partial, ///< [in] Prefix to seed reduction with Int2Type /*iteration*/) { if ((BlockRakingLayout::UNGUARDED) || (((linear_tid * SEGMENT_LENGTH) + ITERATION) < BLOCK_THREADS)) { T addend = raking_ptr[ITERATION]; raking_partial = scan_op(raking_partial, addend); } return GuardedReduce(raking_ptr, scan_op, raking_partial, Int2Type()); } /// Templated reduction (base case) template __device__ __forceinline__ T GuardedReduce( T* /*raking_ptr*/, ///< [in] Input array ScanOp /*scan_op*/, ///< [in] Binary reduction operator T raking_partial, ///< [in] Prefix to seed reduction with Int2Type /*iteration*/) { return raking_partial; } /// Templated copy template __device__ __forceinline__ void CopySegment( T* out, ///< [out] Out array T* in, ///< [in] Input array Int2Type /*iteration*/) { out[ITERATION] = in[ITERATION]; CopySegment(out, in, Int2Type()); } /// Templated copy (base case) __device__ __forceinline__ void CopySegment( T* /*out*/, ///< [out] Out array T* /*in*/, ///< [in] Input array Int2Type /*iteration*/) {} /// Performs upsweep raking reduction, returning the aggregate template __device__ __forceinline__ T Upsweep( ScanOp scan_op) { T *smem_raking_ptr = BlockRakingLayout::RakingPtr(temp_storage.raking_grid, linear_tid); // Read data into registers CopySegment(cached_segment, smem_raking_ptr, Int2Type<0>()); T raking_partial = cached_segment[0]; return GuardedReduce(cached_segment, scan_op, raking_partial, Int2Type<1>()); } /// Performs exclusive downsweep raking scan template __device__ __forceinline__ void ExclusiveDownsweep( ScanOp scan_op, T raking_partial, bool apply_prefix = true) { T *smem_raking_ptr = BlockRakingLayout::RakingPtr(temp_storage.raking_grid, linear_tid); // Read data back into registers if (!MEMOIZE) { CopySegment(cached_segment, smem_raking_ptr, Int2Type<0>()); } internal::ThreadScanExclusive(cached_segment, cached_segment, scan_op, raking_partial, apply_prefix); // Write data back to smem CopySegment(smem_raking_ptr, cached_segment, Int2Type<0>()); } /// Performs inclusive downsweep raking scan template __device__ __forceinline__ void InclusiveDownsweep( ScanOp scan_op, T raking_partial, bool apply_prefix = true) { T *smem_raking_ptr = BlockRakingLayout::RakingPtr(temp_storage.raking_grid, linear_tid); // Read data back into registers if (!MEMOIZE) { CopySegment(cached_segment, smem_raking_ptr, Int2Type<0>()); } internal::ThreadScanInclusive(cached_segment, cached_segment, scan_op, raking_partial, apply_prefix); // Write data back to smem CopySegment(smem_raking_ptr, cached_segment, Int2Type<0>()); } //--------------------------------------------------------------------- // Constructors //--------------------------------------------------------------------- /// Constructor __device__ __forceinline__ BlockScanRaking( TempStorage &temp_storage) : temp_storage(temp_storage.Alias()), linear_tid(RowMajorTid(BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z)) {} //--------------------------------------------------------------------- // Exclusive scans //--------------------------------------------------------------------- /// Computes an exclusive thread block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. With no initial value, the output computed for thread0 is undefined. template __device__ __forceinline__ void ExclusiveScan( T input, ///< [in] Calling thread's input item T &exclusive_output, ///< [out] Calling thread's output item (may be aliased to \p input) ScanOp scan_op) ///< [in] Binary scan operator { if (WARP_SYNCHRONOUS) { // Short-circuit directly to warp-synchronous scan WarpScan(temp_storage.warp_scan).ExclusiveScan(input, exclusive_output, scan_op); } else { // Place thread partial into shared memory raking grid T *placement_ptr = BlockRakingLayout::PlacementPtr(temp_storage.raking_grid, linear_tid); *placement_ptr = input; CTA_SYNC(); // Reduce parallelism down to just raking threads if (linear_tid < RAKING_THREADS) { // Raking upsweep reduction across shared partials T upsweep_partial = Upsweep(scan_op); // Warp-synchronous scan T exclusive_partial; WarpScan(temp_storage.warp_scan).ExclusiveScan(upsweep_partial, exclusive_partial, scan_op); // Exclusive raking downsweep scan ExclusiveDownsweep(scan_op, exclusive_partial, (linear_tid != 0)); } CTA_SYNC(); // Grab thread prefix from shared memory exclusive_output = *placement_ptr; } } /// Computes an exclusive thread block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. template __device__ __forceinline__ void ExclusiveScan( T input, ///< [in] Calling thread's input items T &output, ///< [out] Calling thread's output items (may be aliased to \p input) const T &initial_value, ///< [in] Initial value to seed the exclusive scan ScanOp scan_op) ///< [in] Binary scan operator { if (WARP_SYNCHRONOUS) { // Short-circuit directly to warp-synchronous scan WarpScan(temp_storage.warp_scan).ExclusiveScan(input, output, initial_value, scan_op); } else { // Place thread partial into shared memory raking grid T *placement_ptr = BlockRakingLayout::PlacementPtr(temp_storage.raking_grid, linear_tid); *placement_ptr = input; CTA_SYNC(); // Reduce parallelism down to just raking threads if (linear_tid < RAKING_THREADS) { // Raking upsweep reduction across shared partials T upsweep_partial = Upsweep(scan_op); // Exclusive Warp-synchronous scan T exclusive_partial; WarpScan(temp_storage.warp_scan).ExclusiveScan(upsweep_partial, exclusive_partial, initial_value, scan_op); // Exclusive raking downsweep scan ExclusiveDownsweep(scan_op, exclusive_partial); } CTA_SYNC(); // Grab exclusive partial from shared memory output = *placement_ptr; } } /// Computes an exclusive thread block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. Also provides every thread with the block-wide \p block_aggregate of all inputs. With no initial value, the output computed for thread0 is undefined. template __device__ __forceinline__ void ExclusiveScan( T input, ///< [in] Calling thread's input item T &output, ///< [out] Calling thread's output item (may be aliased to \p input) ScanOp scan_op, ///< [in] Binary scan operator T &block_aggregate) ///< [out] Threadblock-wide aggregate reduction of input items { if (WARP_SYNCHRONOUS) { // Short-circuit directly to warp-synchronous scan WarpScan(temp_storage.warp_scan).ExclusiveScan(input, output, scan_op, block_aggregate); } else { // Place thread partial into shared memory raking grid T *placement_ptr = BlockRakingLayout::PlacementPtr(temp_storage.raking_grid, linear_tid); *placement_ptr = input; CTA_SYNC(); // Reduce parallelism down to just raking threads if (linear_tid < RAKING_THREADS) { // Raking upsweep reduction across shared partials T upsweep_partial= Upsweep(scan_op); // Warp-synchronous scan T inclusive_partial; T exclusive_partial; WarpScan(temp_storage.warp_scan).Scan(upsweep_partial, inclusive_partial, exclusive_partial, scan_op); // Exclusive raking downsweep scan ExclusiveDownsweep(scan_op, exclusive_partial, (linear_tid != 0)); // Broadcast aggregate to all threads if (linear_tid == RAKING_THREADS - 1) temp_storage.block_aggregate = inclusive_partial; } CTA_SYNC(); // Grab thread prefix from shared memory output = *placement_ptr; // Retrieve block aggregate block_aggregate = temp_storage.block_aggregate; } } /// Computes an exclusive thread block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. Also provides every thread with the block-wide \p block_aggregate of all inputs. template __device__ __forceinline__ void ExclusiveScan( T input, ///< [in] Calling thread's input items T &output, ///< [out] Calling thread's output items (may be aliased to \p input) const T &initial_value, ///< [in] Initial value to seed the exclusive scan ScanOp scan_op, ///< [in] Binary scan operator T &block_aggregate) ///< [out] Threadblock-wide aggregate reduction of input items { if (WARP_SYNCHRONOUS) { // Short-circuit directly to warp-synchronous scan WarpScan(temp_storage.warp_scan).ExclusiveScan(input, output, initial_value, scan_op, block_aggregate); } else { // Place thread partial into shared memory raking grid T *placement_ptr = BlockRakingLayout::PlacementPtr(temp_storage.raking_grid, linear_tid); *placement_ptr = input; CTA_SYNC(); // Reduce parallelism down to just raking threads if (linear_tid < RAKING_THREADS) { // Raking upsweep reduction across shared partials T upsweep_partial = Upsweep(scan_op); // Warp-synchronous scan T exclusive_partial; WarpScan(temp_storage.warp_scan).ExclusiveScan(upsweep_partial, exclusive_partial, initial_value, scan_op, block_aggregate); // Exclusive raking downsweep scan ExclusiveDownsweep(scan_op, exclusive_partial); // Broadcast aggregate to other threads if (linear_tid == 0) temp_storage.block_aggregate = block_aggregate; } CTA_SYNC(); // Grab exclusive partial from shared memory output = *placement_ptr; // Retrieve block aggregate block_aggregate = temp_storage.block_aggregate; } } /// Computes an exclusive thread block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. the call-back functor \p block_prefix_callback_op is invoked by the first warp in the block, and the value returned by lane0 in that warp is used as the "seed" value that logically prefixes the thread block's scan inputs. Also provides every thread with the block-wide \p block_aggregate of all inputs. template < typename ScanOp, typename BlockPrefixCallbackOp> __device__ __forceinline__ void ExclusiveScan( T input, ///< [in] Calling thread's input item T &output, ///< [out] Calling thread's output item (may be aliased to \p input) ScanOp scan_op, ///< [in] Binary scan operator BlockPrefixCallbackOp &block_prefix_callback_op) ///< [in-out] [warp0 only] Call-back functor for specifying a thread block-wide prefix to be applied to all inputs. { if (WARP_SYNCHRONOUS) { // Short-circuit directly to warp-synchronous scan T block_aggregate; WarpScan warp_scan(temp_storage.warp_scan); warp_scan.ExclusiveScan(input, output, scan_op, block_aggregate); // Obtain warp-wide prefix in lane0, then broadcast to other lanes T block_prefix = block_prefix_callback_op(block_aggregate); block_prefix = warp_scan.Broadcast(block_prefix, 0); output = scan_op(block_prefix, output); if (linear_tid == 0) output = block_prefix; } else { // Place thread partial into shared memory raking grid T *placement_ptr = BlockRakingLayout::PlacementPtr(temp_storage.raking_grid, linear_tid); *placement_ptr = input; CTA_SYNC(); // Reduce parallelism down to just raking threads if (linear_tid < RAKING_THREADS) { WarpScan warp_scan(temp_storage.warp_scan); // Raking upsweep reduction across shared partials T upsweep_partial = Upsweep(scan_op); // Warp-synchronous scan T exclusive_partial, block_aggregate; warp_scan.ExclusiveScan(upsweep_partial, exclusive_partial, scan_op, block_aggregate); // Obtain block-wide prefix in lane0, then broadcast to other lanes T block_prefix = block_prefix_callback_op(block_aggregate); block_prefix = warp_scan.Broadcast(block_prefix, 0); // Update prefix with warpscan exclusive partial T downsweep_prefix = scan_op(block_prefix, exclusive_partial); if (linear_tid == 0) downsweep_prefix = block_prefix; // Exclusive raking downsweep scan ExclusiveDownsweep(scan_op, downsweep_prefix); } CTA_SYNC(); // Grab thread prefix from shared memory output = *placement_ptr; } } //--------------------------------------------------------------------- // Inclusive scans //--------------------------------------------------------------------- /// Computes an inclusive thread block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. template __device__ __forceinline__ void InclusiveScan( T input, ///< [in] Calling thread's input item T &output, ///< [out] Calling thread's output item (may be aliased to \p input) ScanOp scan_op) ///< [in] Binary scan operator { if (WARP_SYNCHRONOUS) { // Short-circuit directly to warp-synchronous scan WarpScan(temp_storage.warp_scan).InclusiveScan(input, output, scan_op); } else { // Place thread partial into shared memory raking grid T *placement_ptr = BlockRakingLayout::PlacementPtr(temp_storage.raking_grid, linear_tid); *placement_ptr = input; CTA_SYNC(); // Reduce parallelism down to just raking threads if (linear_tid < RAKING_THREADS) { // Raking upsweep reduction across shared partials T upsweep_partial = Upsweep(scan_op); // Exclusive Warp-synchronous scan T exclusive_partial; WarpScan(temp_storage.warp_scan).ExclusiveScan(upsweep_partial, exclusive_partial, scan_op); // Inclusive raking downsweep scan InclusiveDownsweep(scan_op, exclusive_partial, (linear_tid != 0)); } CTA_SYNC(); // Grab thread prefix from shared memory output = *placement_ptr; } } /// Computes an inclusive thread block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. Also provides every thread with the block-wide \p block_aggregate of all inputs. template __device__ __forceinline__ void InclusiveScan( T input, ///< [in] Calling thread's input item T &output, ///< [out] Calling thread's output item (may be aliased to \p input) ScanOp scan_op, ///< [in] Binary scan operator T &block_aggregate) ///< [out] Threadblock-wide aggregate reduction of input items { if (WARP_SYNCHRONOUS) { // Short-circuit directly to warp-synchronous scan WarpScan(temp_storage.warp_scan).InclusiveScan(input, output, scan_op, block_aggregate); } else { // Place thread partial into shared memory raking grid T *placement_ptr = BlockRakingLayout::PlacementPtr(temp_storage.raking_grid, linear_tid); *placement_ptr = input; CTA_SYNC(); // Reduce parallelism down to just raking threads if (linear_tid < RAKING_THREADS) { // Raking upsweep reduction across shared partials T upsweep_partial = Upsweep(scan_op); // Warp-synchronous scan T inclusive_partial; T exclusive_partial; WarpScan(temp_storage.warp_scan).Scan(upsweep_partial, inclusive_partial, exclusive_partial, scan_op); // Inclusive raking downsweep scan InclusiveDownsweep(scan_op, exclusive_partial, (linear_tid != 0)); // Broadcast aggregate to all threads if (linear_tid == RAKING_THREADS - 1) temp_storage.block_aggregate = inclusive_partial; } CTA_SYNC(); // Grab thread prefix from shared memory output = *placement_ptr; // Retrieve block aggregate block_aggregate = temp_storage.block_aggregate; } } /// Computes an inclusive thread block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. the call-back functor \p block_prefix_callback_op is invoked by the first warp in the block, and the value returned by lane0 in that warp is used as the "seed" value that logically prefixes the thread block's scan inputs. Also provides every thread with the block-wide \p block_aggregate of all inputs. template < typename ScanOp, typename BlockPrefixCallbackOp> __device__ __forceinline__ void InclusiveScan( T input, ///< [in] Calling thread's input item T &output, ///< [out] Calling thread's output item (may be aliased to \p input) ScanOp scan_op, ///< [in] Binary scan operator BlockPrefixCallbackOp &block_prefix_callback_op) ///< [in-out] [warp0 only] Call-back functor for specifying a thread block-wide prefix to be applied to all inputs. { if (WARP_SYNCHRONOUS) { // Short-circuit directly to warp-synchronous scan T block_aggregate; WarpScan warp_scan(temp_storage.warp_scan); warp_scan.InclusiveScan(input, output, scan_op, block_aggregate); // Obtain warp-wide prefix in lane0, then broadcast to other lanes T block_prefix = block_prefix_callback_op(block_aggregate); block_prefix = warp_scan.Broadcast(block_prefix, 0); // Update prefix with exclusive warpscan partial output = scan_op(block_prefix, output); } else { // Place thread partial into shared memory raking grid T *placement_ptr = BlockRakingLayout::PlacementPtr(temp_storage.raking_grid, linear_tid); *placement_ptr = input; CTA_SYNC(); // Reduce parallelism down to just raking threads if (linear_tid < RAKING_THREADS) { WarpScan warp_scan(temp_storage.warp_scan); // Raking upsweep reduction across shared partials T upsweep_partial = Upsweep(scan_op); // Warp-synchronous scan T exclusive_partial, block_aggregate; warp_scan.ExclusiveScan(upsweep_partial, exclusive_partial, scan_op, block_aggregate); // Obtain block-wide prefix in lane0, then broadcast to other lanes T block_prefix = block_prefix_callback_op(block_aggregate); block_prefix = warp_scan.Broadcast(block_prefix, 0); // Update prefix with warpscan exclusive partial T downsweep_prefix = scan_op(block_prefix, exclusive_partial); if (linear_tid == 0) downsweep_prefix = block_prefix; // Inclusive raking downsweep scan InclusiveDownsweep(scan_op, downsweep_prefix); } CTA_SYNC(); // Grab thread prefix from shared memory output = *placement_ptr; } } }; } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/acc/cuda/cub/block/specializations/block_scan_warp_scans.cuh000066400000000000000000000452551411340063500273320ustar00rootroot00000000000000/****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * cub::BlockScanWarpscans provides warpscan-based variants of parallel prefix scan across a CUDA thread block. */ #pragma once #include "../../util_arch.cuh" #include "../../util_ptx.cuh" #include "../../warp/warp_scan.cuh" #include "../../util_namespace.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /** * \brief BlockScanWarpScans provides warpscan-based variants of parallel prefix scan across a CUDA thread block. */ template < typename T, int BLOCK_DIM_X, ///< The thread block length in threads along the X dimension int BLOCK_DIM_Y, ///< The thread block length in threads along the Y dimension int BLOCK_DIM_Z, ///< The thread block length in threads along the Z dimension int PTX_ARCH> ///< The PTX compute capability for which to to specialize this collective struct BlockScanWarpScans { //--------------------------------------------------------------------- // Types and constants //--------------------------------------------------------------------- /// Constants enum { /// Number of warp threads WARP_THREADS = CUB_WARP_THREADS(PTX_ARCH), /// The thread block size in threads BLOCK_THREADS = BLOCK_DIM_X * BLOCK_DIM_Y * BLOCK_DIM_Z, /// Number of active warps WARPS = (BLOCK_THREADS + WARP_THREADS - 1) / WARP_THREADS, }; /// WarpScan utility type typedef WarpScan WarpScanT; /// WarpScan utility type typedef WarpScan WarpAggregateScan; /// Shared memory storage layout type struct __align__(32) _TempStorage { T warp_aggregates[WARPS]; typename WarpScanT::TempStorage warp_scan[WARPS]; ///< Buffer for warp-synchronous scans T block_prefix; ///< Shared prefix for the entire thread block }; /// Alias wrapper allowing storage to be unioned struct TempStorage : Uninitialized<_TempStorage> {}; //--------------------------------------------------------------------- // Per-thread fields //--------------------------------------------------------------------- // Thread fields _TempStorage &temp_storage; unsigned int linear_tid; unsigned int warp_id; unsigned int lane_id; //--------------------------------------------------------------------- // Constructors //--------------------------------------------------------------------- /// Constructor __device__ __forceinline__ BlockScanWarpScans( TempStorage &temp_storage) : temp_storage(temp_storage.Alias()), linear_tid(RowMajorTid(BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z)), warp_id((WARPS == 1) ? 0 : linear_tid / WARP_THREADS), lane_id(LaneId()) {} //--------------------------------------------------------------------- // Utility methods //--------------------------------------------------------------------- template __device__ __forceinline__ void ApplyWarpAggregates( T &warp_prefix, ///< [out] The calling thread's partial reduction ScanOp scan_op, ///< [in] Binary scan operator T &block_aggregate, ///< [out] Threadblock-wide aggregate reduction of input items Int2Type /*addend_warp*/) { if (warp_id == WARP) warp_prefix = block_aggregate; T addend = temp_storage.warp_aggregates[WARP]; block_aggregate = scan_op(block_aggregate, addend); ApplyWarpAggregates(warp_prefix, scan_op, block_aggregate, Int2Type()); } template __device__ __forceinline__ void ApplyWarpAggregates( T &/*warp_prefix*/, ///< [out] The calling thread's partial reduction ScanOp /*scan_op*/, ///< [in] Binary scan operator T &/*block_aggregate*/, ///< [out] Threadblock-wide aggregate reduction of input items Int2Type /*addend_warp*/) {} /// Use the warp-wide aggregates to compute the calling warp's prefix. Also returns block-wide aggregate in all threads. template __device__ __forceinline__ T ComputeWarpPrefix( ScanOp scan_op, ///< [in] Binary scan operator T warp_aggregate, ///< [in] [laneWARP_THREADS - 1 only] Warp-wide aggregate reduction of input items T &block_aggregate) ///< [out] Threadblock-wide aggregate reduction of input items { // Last lane in each warp shares its warp-aggregate if (lane_id == WARP_THREADS - 1) temp_storage.warp_aggregates[warp_id] = warp_aggregate; CTA_SYNC(); // Accumulate block aggregates and save the one that is our warp's prefix T warp_prefix; block_aggregate = temp_storage.warp_aggregates[0]; // Use template unrolling (since the PTX backend can't handle unrolling it for SM1x) ApplyWarpAggregates(warp_prefix, scan_op, block_aggregate, Int2Type<1>()); /* #pragma unroll for (int WARP = 1; WARP < WARPS; ++WARP) { if (warp_id == WARP) warp_prefix = block_aggregate; T addend = temp_storage.warp_aggregates[WARP]; block_aggregate = scan_op(block_aggregate, addend); } */ return warp_prefix; } /// Use the warp-wide aggregates and initial-value to compute the calling warp's prefix. Also returns block-wide aggregate in all threads. template __device__ __forceinline__ T ComputeWarpPrefix( ScanOp scan_op, ///< [in] Binary scan operator T warp_aggregate, ///< [in] [laneWARP_THREADS - 1 only] Warp-wide aggregate reduction of input items T &block_aggregate, ///< [out] Threadblock-wide aggregate reduction of input items const T &initial_value) ///< [in] Initial value to seed the exclusive scan { T warp_prefix = ComputeWarpPrefix(scan_op, warp_aggregate, block_aggregate); warp_prefix = scan_op(initial_value, warp_prefix); if (warp_id == 0) warp_prefix = initial_value; return warp_prefix; } //--------------------------------------------------------------------- // Exclusive scans //--------------------------------------------------------------------- /// Computes an exclusive thread block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. With no initial value, the output computed for thread0 is undefined. template __device__ __forceinline__ void ExclusiveScan( T input, ///< [in] Calling thread's input item T &exclusive_output, ///< [out] Calling thread's output item (may be aliased to \p input) ScanOp scan_op) ///< [in] Binary scan operator { // Compute block-wide exclusive scan. The exclusive output from tid0 is invalid. T block_aggregate; ExclusiveScan(input, exclusive_output, scan_op, block_aggregate); } /// Computes an exclusive thread block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. template __device__ __forceinline__ void ExclusiveScan( T input, ///< [in] Calling thread's input items T &exclusive_output, ///< [out] Calling thread's output items (may be aliased to \p input) const T &initial_value, ///< [in] Initial value to seed the exclusive scan ScanOp scan_op) ///< [in] Binary scan operator { T block_aggregate; ExclusiveScan(input, exclusive_output, initial_value, scan_op, block_aggregate); } /// Computes an exclusive thread block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. Also provides every thread with the block-wide \p block_aggregate of all inputs. With no initial value, the output computed for thread0 is undefined. template __device__ __forceinline__ void ExclusiveScan( T input, ///< [in] Calling thread's input item T &exclusive_output, ///< [out] Calling thread's output item (may be aliased to \p input) ScanOp scan_op, ///< [in] Binary scan operator T &block_aggregate) ///< [out] Threadblock-wide aggregate reduction of input items { // Compute warp scan in each warp. The exclusive output from each lane0 is invalid. T inclusive_output; WarpScanT(temp_storage.warp_scan[warp_id]).Scan(input, inclusive_output, exclusive_output, scan_op); // Compute the warp-wide prefix and block-wide aggregate for each warp. Warp prefix for warp0 is invalid. T warp_prefix = ComputeWarpPrefix(scan_op, inclusive_output, block_aggregate); // Apply warp prefix to our lane's partial if (warp_id != 0) { exclusive_output = scan_op(warp_prefix, exclusive_output); if (lane_id == 0) exclusive_output = warp_prefix; } } /// Computes an exclusive thread block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. Also provides every thread with the block-wide \p block_aggregate of all inputs. template __device__ __forceinline__ void ExclusiveScan( T input, ///< [in] Calling thread's input items T &exclusive_output, ///< [out] Calling thread's output items (may be aliased to \p input) const T &initial_value, ///< [in] Initial value to seed the exclusive scan ScanOp scan_op, ///< [in] Binary scan operator T &block_aggregate) ///< [out] Threadblock-wide aggregate reduction of input items { // Compute warp scan in each warp. The exclusive output from each lane0 is invalid. T inclusive_output; WarpScanT(temp_storage.warp_scan[warp_id]).Scan(input, inclusive_output, exclusive_output, scan_op); // Compute the warp-wide prefix and block-wide aggregate for each warp T warp_prefix = ComputeWarpPrefix(scan_op, inclusive_output, block_aggregate, initial_value); // Apply warp prefix to our lane's partial exclusive_output = scan_op(warp_prefix, exclusive_output); if (lane_id == 0) exclusive_output = warp_prefix; } /// Computes an exclusive thread block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. the call-back functor \p block_prefix_callback_op is invoked by the first warp in the block, and the value returned by lane0 in that warp is used as the "seed" value that logically prefixes the thread block's scan inputs. Also provides every thread with the block-wide \p block_aggregate of all inputs. template < typename ScanOp, typename BlockPrefixCallbackOp> __device__ __forceinline__ void ExclusiveScan( T input, ///< [in] Calling thread's input item T &exclusive_output, ///< [out] Calling thread's output item (may be aliased to \p input) ScanOp scan_op, ///< [in] Binary scan operator BlockPrefixCallbackOp &block_prefix_callback_op) ///< [in-out] [warp0 only] Call-back functor for specifying a thread block-wide prefix to be applied to all inputs. { // Compute block-wide exclusive scan. The exclusive output from tid0 is invalid. T block_aggregate; ExclusiveScan(input, exclusive_output, scan_op, block_aggregate); // Use the first warp to determine the thread block prefix, returning the result in lane0 if (warp_id == 0) { T block_prefix = block_prefix_callback_op(block_aggregate); if (lane_id == 0) { // Share the prefix with all threads temp_storage.block_prefix = block_prefix; exclusive_output = block_prefix; // The block prefix is the exclusive output for tid0 } } CTA_SYNC(); // Incorporate thread block prefix into outputs T block_prefix = temp_storage.block_prefix; if (linear_tid > 0) { exclusive_output = scan_op(block_prefix, exclusive_output); } } //--------------------------------------------------------------------- // Inclusive scans //--------------------------------------------------------------------- /// Computes an inclusive thread block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. template __device__ __forceinline__ void InclusiveScan( T input, ///< [in] Calling thread's input item T &inclusive_output, ///< [out] Calling thread's output item (may be aliased to \p input) ScanOp scan_op) ///< [in] Binary scan operator { T block_aggregate; InclusiveScan(input, inclusive_output, scan_op, block_aggregate); } /// Computes an inclusive thread block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. Also provides every thread with the block-wide \p block_aggregate of all inputs. template __device__ __forceinline__ void InclusiveScan( T input, ///< [in] Calling thread's input item T &inclusive_output, ///< [out] Calling thread's output item (may be aliased to \p input) ScanOp scan_op, ///< [in] Binary scan operator T &block_aggregate) ///< [out] Threadblock-wide aggregate reduction of input items { WarpScanT(temp_storage.warp_scan[warp_id]).InclusiveScan(input, inclusive_output, scan_op); // Compute the warp-wide prefix and block-wide aggregate for each warp. Warp prefix for warp0 is invalid. T warp_prefix = ComputeWarpPrefix(scan_op, inclusive_output, block_aggregate); // Apply warp prefix to our lane's partial if (warp_id != 0) { inclusive_output = scan_op(warp_prefix, inclusive_output); } } /// Computes an inclusive thread block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. the call-back functor \p block_prefix_callback_op is invoked by the first warp in the block, and the value returned by lane0 in that warp is used as the "seed" value that logically prefixes the thread block's scan inputs. Also provides every thread with the block-wide \p block_aggregate of all inputs. template < typename ScanOp, typename BlockPrefixCallbackOp> __device__ __forceinline__ void InclusiveScan( T input, ///< [in] Calling thread's input item T &exclusive_output, ///< [out] Calling thread's output item (may be aliased to \p input) ScanOp scan_op, ///< [in] Binary scan operator BlockPrefixCallbackOp &block_prefix_callback_op) ///< [in-out] [warp0 only] Call-back functor for specifying a thread block-wide prefix to be applied to all inputs. { T block_aggregate; InclusiveScan(input, exclusive_output, scan_op, block_aggregate); // Use the first warp to determine the thread block prefix, returning the result in lane0 if (warp_id == 0) { T block_prefix = block_prefix_callback_op(block_aggregate); if (lane_id == 0) { // Share the prefix with all threads temp_storage.block_prefix = block_prefix; } } CTA_SYNC(); // Incorporate thread block prefix into outputs T block_prefix = temp_storage.block_prefix; exclusive_output = scan_op(block_prefix, exclusive_output); } }; } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/acc/cuda/cub/block/specializations/block_scan_warp_scans2.cuh000066400000000000000000000507251411340063500274120ustar00rootroot00000000000000/****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * cub::BlockScanWarpscans provides warpscan-based variants of parallel prefix scan across a CUDA thread block. */ #pragma once #include "../../util_arch.cuh" #include "../../util_ptx.cuh" #include "../../warp/warp_scan.cuh" #include "../../util_namespace.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /** * \brief BlockScanWarpScans provides warpscan-based variants of parallel prefix scan across a CUDA thread block. */ template < typename T, int BLOCK_DIM_X, ///< The thread block length in threads along the X dimension int BLOCK_DIM_Y, ///< The thread block length in threads along the Y dimension int BLOCK_DIM_Z, ///< The thread block length in threads along the Z dimension int PTX_ARCH> ///< The PTX compute capability for which to to specialize this collective struct BlockScanWarpScans { //--------------------------------------------------------------------- // Types and constants //--------------------------------------------------------------------- /// Constants enum { /// Number of warp threads WARP_THREADS = CUB_WARP_THREADS(PTX_ARCH), /// The thread block size in threads BLOCK_THREADS = BLOCK_DIM_X * BLOCK_DIM_Y * BLOCK_DIM_Z, /// Number of active warps WARPS = (BLOCK_THREADS + WARP_THREADS - 1) / WARP_THREADS, }; /// WarpScan utility type typedef WarpScan WarpScanT; /// WarpScan utility type typedef WarpScan WarpAggregateScanT; /// Shared memory storage layout type struct _TempStorage { typename WarpAggregateScanT::TempStorage inner_scan[WARPS]; ///< Buffer for warp-synchronous scans typename WarpScanT::TempStorage warp_scan[WARPS]; ///< Buffer for warp-synchronous scans T warp_aggregates[WARPS]; T block_prefix; ///< Shared prefix for the entire thread block }; /// Alias wrapper allowing storage to be unioned struct TempStorage : Uninitialized<_TempStorage> {}; //--------------------------------------------------------------------- // Per-thread fields //--------------------------------------------------------------------- // Thread fields _TempStorage &temp_storage; unsigned int linear_tid; unsigned int warp_id; unsigned int lane_id; //--------------------------------------------------------------------- // Constructors //--------------------------------------------------------------------- /// Constructor __device__ __forceinline__ BlockScanWarpScans( TempStorage &temp_storage) : temp_storage(temp_storage.Alias()), linear_tid(RowMajorTid(BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z)), warp_id((WARPS == 1) ? 0 : linear_tid / WARP_THREADS), lane_id(LaneId()) {} //--------------------------------------------------------------------- // Utility methods //--------------------------------------------------------------------- template __device__ __forceinline__ void ApplyWarpAggregates( T &warp_prefix, ///< [out] The calling thread's partial reduction ScanOp scan_op, ///< [in] Binary scan operator T &block_aggregate, ///< [out] Threadblock-wide aggregate reduction of input items Int2Type addend_warp) { if (warp_id == WARP) warp_prefix = block_aggregate; T addend = temp_storage.warp_aggregates[WARP]; block_aggregate = scan_op(block_aggregate, addend); ApplyWarpAggregates(warp_prefix, scan_op, block_aggregate, Int2Type()); } template __device__ __forceinline__ void ApplyWarpAggregates( T &warp_prefix, ///< [out] The calling thread's partial reduction ScanOp scan_op, ///< [in] Binary scan operator T &block_aggregate, ///< [out] Threadblock-wide aggregate reduction of input items Int2Type addend_warp) {} /// Use the warp-wide aggregates to compute the calling warp's prefix. Also returns block-wide aggregate in all threads. template __device__ __forceinline__ T ComputeWarpPrefix( ScanOp scan_op, ///< [in] Binary scan operator T warp_aggregate, ///< [in] [laneWARP_THREADS - 1 only] Warp-wide aggregate reduction of input items T &block_aggregate) ///< [out] Threadblock-wide aggregate reduction of input items { // Last lane in each warp shares its warp-aggregate if (lane_id == WARP_THREADS - 1) temp_storage.warp_aggregates[warp_id] = warp_aggregate; CTA_SYNC(); // Accumulate block aggregates and save the one that is our warp's prefix T warp_prefix; block_aggregate = temp_storage.warp_aggregates[0]; // Use template unrolling (since the PTX backend can't handle unrolling it for SM1x) ApplyWarpAggregates(warp_prefix, scan_op, block_aggregate, Int2Type<1>()); /* #pragma unroll for (int WARP = 1; WARP < WARPS; ++WARP) { if (warp_id == WARP) warp_prefix = block_aggregate; T addend = temp_storage.warp_aggregates[WARP]; block_aggregate = scan_op(block_aggregate, addend); } */ return warp_prefix; } /// Use the warp-wide aggregates and initial-value to compute the calling warp's prefix. Also returns block-wide aggregate in all threads. template __device__ __forceinline__ T ComputeWarpPrefix( ScanOp scan_op, ///< [in] Binary scan operator T warp_aggregate, ///< [in] [laneWARP_THREADS - 1 only] Warp-wide aggregate reduction of input items T &block_aggregate, ///< [out] Threadblock-wide aggregate reduction of input items const T &initial_value) ///< [in] Initial value to seed the exclusive scan { T warp_prefix = ComputeWarpPrefix(scan_op, warp_aggregate, block_aggregate); warp_prefix = scan_op(initial_value, warp_prefix); if (warp_id == 0) warp_prefix = initial_value; return warp_prefix; } //--------------------------------------------------------------------- // Exclusive scans //--------------------------------------------------------------------- /// Computes an exclusive thread block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. With no initial value, the output computed for thread0 is undefined. template __device__ __forceinline__ void ExclusiveScan( T input, ///< [in] Calling thread's input item T &exclusive_output, ///< [out] Calling thread's output item (may be aliased to \p input) ScanOp scan_op) ///< [in] Binary scan operator { // Compute block-wide exclusive scan. The exclusive output from tid0 is invalid. T block_aggregate; ExclusiveScan(input, exclusive_output, scan_op, block_aggregate); } /// Computes an exclusive thread block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. template __device__ __forceinline__ void ExclusiveScan( T input, ///< [in] Calling thread's input items T &exclusive_output, ///< [out] Calling thread's output items (may be aliased to \p input) const T &initial_value, ///< [in] Initial value to seed the exclusive scan ScanOp scan_op) ///< [in] Binary scan operator { T block_aggregate; ExclusiveScan(input, exclusive_output, initial_value, scan_op, block_aggregate); } /// Computes an exclusive thread block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. Also provides every thread with the block-wide \p block_aggregate of all inputs. With no initial value, the output computed for thread0 is undefined. template __device__ __forceinline__ void ExclusiveScan( T input, ///< [in] Calling thread's input item T &exclusive_output, ///< [out] Calling thread's output item (may be aliased to \p input) ScanOp scan_op, ///< [in] Binary scan operator T &block_aggregate) ///< [out] Threadblock-wide aggregate reduction of input items { WarpScanT my_warp_scan(temp_storage.warp_scan[warp_id]); // Compute warp scan in each warp. The exclusive output from each lane0 is invalid. T inclusive_output; my_warp_scan.Scan(input, inclusive_output, exclusive_output, scan_op); // Compute the warp-wide prefix and block-wide aggregate for each warp. Warp prefix for warp0 is invalid. // T warp_prefix = ComputeWarpPrefix(scan_op, inclusive_output, block_aggregate); //-------------------------------------------------- // Last lane in each warp shares its warp-aggregate if (lane_id == WARP_THREADS - 1) temp_storage.warp_aggregates[warp_id] = inclusive_output; CTA_SYNC(); // Get the warp scan partial T warp_inclusive, warp_prefix; if (lane_id < WARPS) { // Scan the warpscan partials T warp_val = temp_storage.warp_aggregates[lane_id]; WarpAggregateScanT(temp_storage.inner_scan[warp_id]).Scan(warp_val, warp_inclusive, warp_prefix, scan_op); } warp_prefix = my_warp_scan.Broadcast(warp_prefix, warp_id); block_aggregate = my_warp_scan.Broadcast(warp_inclusive, WARPS - 1); //-------------------------------------------------- // Apply warp prefix to our lane's partial if (warp_id != 0) { exclusive_output = scan_op(warp_prefix, exclusive_output); if (lane_id == 0) exclusive_output = warp_prefix; } } /// Computes an exclusive thread block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. Also provides every thread with the block-wide \p block_aggregate of all inputs. template __device__ __forceinline__ void ExclusiveScan( T input, ///< [in] Calling thread's input items T &exclusive_output, ///< [out] Calling thread's output items (may be aliased to \p input) const T &initial_value, ///< [in] Initial value to seed the exclusive scan ScanOp scan_op, ///< [in] Binary scan operator T &block_aggregate) ///< [out] Threadblock-wide aggregate reduction of input items { WarpScanT my_warp_scan(temp_storage.warp_scan[warp_id]); // Compute warp scan in each warp. The exclusive output from each lane0 is invalid. T inclusive_output; my_warp_scan.Scan(input, inclusive_output, exclusive_output, scan_op); // Compute the warp-wide prefix and block-wide aggregate for each warp // T warp_prefix = ComputeWarpPrefix(scan_op, inclusive_output, block_aggregate, initial_value); //-------------------------------------------------- // Last lane in each warp shares its warp-aggregate if (lane_id == WARP_THREADS - 1) temp_storage.warp_aggregates[warp_id] = inclusive_output; CTA_SYNC(); // Get the warp scan partial T warp_inclusive, warp_prefix; if (lane_id < WARPS) { // Scan the warpscan partials T warp_val = temp_storage.warp_aggregates[lane_id]; WarpAggregateScanT(temp_storage.inner_scan[warp_id]).Scan(warp_val, warp_inclusive, warp_prefix, initial_value, scan_op); } warp_prefix = my_warp_scan.Broadcast(warp_prefix, warp_id); block_aggregate = my_warp_scan.Broadcast(warp_inclusive, WARPS - 1); //-------------------------------------------------- // Apply warp prefix to our lane's partial exclusive_output = scan_op(warp_prefix, exclusive_output); if (lane_id == 0) exclusive_output = warp_prefix; } /// Computes an exclusive thread block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. the call-back functor \p block_prefix_callback_op is invoked by the first warp in the block, and the value returned by lane0 in that warp is used as the "seed" value that logically prefixes the thread block's scan inputs. Also provides every thread with the block-wide \p block_aggregate of all inputs. template < typename ScanOp, typename BlockPrefixCallbackOp> __device__ __forceinline__ void ExclusiveScan( T input, ///< [in] Calling thread's input item T &exclusive_output, ///< [out] Calling thread's output item (may be aliased to \p input) ScanOp scan_op, ///< [in] Binary scan operator BlockPrefixCallbackOp &block_prefix_callback_op) ///< [in-out] [warp0 only] Call-back functor for specifying a thread block-wide prefix to be applied to all inputs. { // Compute block-wide exclusive scan. The exclusive output from tid0 is invalid. T block_aggregate; ExclusiveScan(input, exclusive_output, scan_op, block_aggregate); // Use the first warp to determine the thread block prefix, returning the result in lane0 if (warp_id == 0) { T block_prefix = block_prefix_callback_op(block_aggregate); if (lane_id == 0) { // Share the prefix with all threads temp_storage.block_prefix = block_prefix; exclusive_output = block_prefix; // The block prefix is the exclusive output for tid0 } } CTA_SYNC(); // Incorporate thread block prefix into outputs T block_prefix = temp_storage.block_prefix; if (linear_tid > 0) { exclusive_output = scan_op(block_prefix, exclusive_output); } } //--------------------------------------------------------------------- // Inclusive scans //--------------------------------------------------------------------- /// Computes an inclusive thread block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. template __device__ __forceinline__ void InclusiveScan( T input, ///< [in] Calling thread's input item T &inclusive_output, ///< [out] Calling thread's output item (may be aliased to \p input) ScanOp scan_op) ///< [in] Binary scan operator { T block_aggregate; InclusiveScan(input, inclusive_output, scan_op, block_aggregate); } /// Computes an inclusive thread block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. Also provides every thread with the block-wide \p block_aggregate of all inputs. template __device__ __forceinline__ void InclusiveScan( T input, ///< [in] Calling thread's input item T &inclusive_output, ///< [out] Calling thread's output item (may be aliased to \p input) ScanOp scan_op, ///< [in] Binary scan operator T &block_aggregate) ///< [out] Threadblock-wide aggregate reduction of input items { WarpScanT(temp_storage.warp_scan[warp_id]).InclusiveScan(input, inclusive_output, scan_op); // Compute the warp-wide prefix and block-wide aggregate for each warp. Warp prefix for warp0 is invalid. T warp_prefix = ComputeWarpPrefix(scan_op, inclusive_output, block_aggregate); // Apply warp prefix to our lane's partial if (warp_id != 0) { inclusive_output = scan_op(warp_prefix, inclusive_output); } } /// Computes an inclusive thread block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. the call-back functor \p block_prefix_callback_op is invoked by the first warp in the block, and the value returned by lane0 in that warp is used as the "seed" value that logically prefixes the thread block's scan inputs. Also provides every thread with the block-wide \p block_aggregate of all inputs. template < typename ScanOp, typename BlockPrefixCallbackOp> __device__ __forceinline__ void InclusiveScan( T input, ///< [in] Calling thread's input item T &exclusive_output, ///< [out] Calling thread's output item (may be aliased to \p input) ScanOp scan_op, ///< [in] Binary scan operator BlockPrefixCallbackOp &block_prefix_callback_op) ///< [in-out] [warp0 only] Call-back functor for specifying a thread block-wide prefix to be applied to all inputs. { T block_aggregate; InclusiveScan(input, exclusive_output, scan_op, block_aggregate); // Use the first warp to determine the thread block prefix, returning the result in lane0 if (warp_id == 0) { T block_prefix = block_prefix_callback_op(block_aggregate); if (lane_id == 0) { // Share the prefix with all threads temp_storage.block_prefix = block_prefix; } } CTA_SYNC(); // Incorporate thread block prefix into outputs T block_prefix = temp_storage.block_prefix; exclusive_output = scan_op(block_prefix, exclusive_output); } }; } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/acc/cuda/cub/block/specializations/block_scan_warp_scans3.cuh000066400000000000000000000460071411340063500274110ustar00rootroot00000000000000/****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * cub::BlockScanWarpscans provides warpscan-based variants of parallel prefix scan across a CUDA thread block. */ #pragma once #include "../../util_arch.cuh" #include "../../util_ptx.cuh" #include "../../warp/warp_scan.cuh" #include "../../util_namespace.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /** * \brief BlockScanWarpScans provides warpscan-based variants of parallel prefix scan across a CUDA thread block. */ template < typename T, int BLOCK_DIM_X, ///< The thread block length in threads along the X dimension int BLOCK_DIM_Y, ///< The thread block length in threads along the Y dimension int BLOCK_DIM_Z, ///< The thread block length in threads along the Z dimension int PTX_ARCH> ///< The PTX compute capability for which to to specialize this collective struct BlockScanWarpScans { //--------------------------------------------------------------------- // Types and constants //--------------------------------------------------------------------- /// Constants enum { /// The thread block size in threads BLOCK_THREADS = BLOCK_DIM_X * BLOCK_DIM_Y * BLOCK_DIM_Z, /// Number of warp threads INNER_WARP_THREADS = CUB_WARP_THREADS(PTX_ARCH), OUTER_WARP_THREADS = BLOCK_THREADS / INNER_WARP_THREADS, /// Number of outer scan warps OUTER_WARPS = INNER_WARP_THREADS }; /// Outer WarpScan utility type typedef WarpScan OuterWarpScanT; /// Inner WarpScan utility type typedef WarpScan InnerWarpScanT; typedef typename OuterWarpScanT::TempStorage OuterScanArray[OUTER_WARPS]; /// Shared memory storage layout type struct _TempStorage { union Aliasable { Uninitialized outer_warp_scan; ///< Buffer for warp-synchronous outer scans typename InnerWarpScanT::TempStorage inner_warp_scan; ///< Buffer for warp-synchronous inner scan } aliasable; T warp_aggregates[OUTER_WARPS]; T block_aggregate; ///< Shared prefix for the entire thread block }; /// Alias wrapper allowing storage to be unioned struct TempStorage : Uninitialized<_TempStorage> {}; //--------------------------------------------------------------------- // Per-thread fields //--------------------------------------------------------------------- // Thread fields _TempStorage &temp_storage; unsigned int linear_tid; unsigned int warp_id; unsigned int lane_id; //--------------------------------------------------------------------- // Constructors //--------------------------------------------------------------------- /// Constructor __device__ __forceinline__ BlockScanWarpScans( TempStorage &temp_storage) : temp_storage(temp_storage.Alias()), linear_tid(RowMajorTid(BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z)), warp_id((OUTER_WARPS == 1) ? 0 : linear_tid / OUTER_WARP_THREADS), lane_id((OUTER_WARPS == 1) ? linear_tid : linear_tid % OUTER_WARP_THREADS) {} //--------------------------------------------------------------------- // Exclusive scans //--------------------------------------------------------------------- /// Computes an exclusive thread block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. With no initial value, the output computed for thread0 is undefined. template __device__ __forceinline__ void ExclusiveScan( T input, ///< [in] Calling thread's input item T &exclusive_output, ///< [out] Calling thread's output item (may be aliased to \p input) ScanOp scan_op) ///< [in] Binary scan operator { // Compute block-wide exclusive scan. The exclusive output from tid0 is invalid. T block_aggregate; ExclusiveScan(input, exclusive_output, scan_op, block_aggregate); } /// Computes an exclusive thread block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. template __device__ __forceinline__ void ExclusiveScan( T input, ///< [in] Calling thread's input items T &exclusive_output, ///< [out] Calling thread's output items (may be aliased to \p input) const T &initial_value, ///< [in] Initial value to seed the exclusive scan ScanOp scan_op) ///< [in] Binary scan operator { T block_aggregate; ExclusiveScan(input, exclusive_output, initial_value, scan_op, block_aggregate); } /// Computes an exclusive thread block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. Also provides every thread with the block-wide \p block_aggregate of all inputs. With no initial value, the output computed for thread0 is undefined. template __device__ __forceinline__ void ExclusiveScan( T input, ///< [in] Calling thread's input item T &exclusive_output, ///< [out] Calling thread's output item (may be aliased to \p input) ScanOp scan_op, ///< [in] Binary scan operator T &block_aggregate) ///< [out] Threadblock-wide aggregate reduction of input items { // Compute warp scan in each warp. The exclusive output from each lane0 is invalid. T inclusive_output; OuterWarpScanT(temp_storage.aliasable.outer_warp_scan.Alias()[warp_id]).Scan( input, inclusive_output, exclusive_output, scan_op); // Share outer warp total if (lane_id == OUTER_WARP_THREADS - 1) temp_storage.warp_aggregates[warp_id] = inclusive_output; CTA_SYNC(); if (linear_tid < INNER_WARP_THREADS) { T outer_warp_input = temp_storage.warp_aggregates[linear_tid]; T outer_warp_exclusive; InnerWarpScanT(temp_storage.aliasable.inner_warp_scan).ExclusiveScan( outer_warp_input, outer_warp_exclusive, scan_op, block_aggregate); temp_storage.block_aggregate = block_aggregate; temp_storage.warp_aggregates[linear_tid] = outer_warp_exclusive; } CTA_SYNC(); if (warp_id != 0) { // Retrieve block aggregate block_aggregate = temp_storage.block_aggregate; // Apply warp prefix to our lane's partial T outer_warp_exclusive = temp_storage.warp_aggregates[warp_id]; exclusive_output = scan_op(outer_warp_exclusive, exclusive_output); if (lane_id == 0) exclusive_output = outer_warp_exclusive; } } /// Computes an exclusive thread block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. Also provides every thread with the block-wide \p block_aggregate of all inputs. template __device__ __forceinline__ void ExclusiveScan( T input, ///< [in] Calling thread's input items T &exclusive_output, ///< [out] Calling thread's output items (may be aliased to \p input) const T &initial_value, ///< [in] Initial value to seed the exclusive scan ScanOp scan_op, ///< [in] Binary scan operator T &block_aggregate) ///< [out] Threadblock-wide aggregate reduction of input items { // Compute warp scan in each warp. The exclusive output from each lane0 is invalid. T inclusive_output; OuterWarpScanT(temp_storage.aliasable.outer_warp_scan.Alias()[warp_id]).Scan( input, inclusive_output, exclusive_output, scan_op); // Share outer warp total if (lane_id == OUTER_WARP_THREADS - 1) { temp_storage.warp_aggregates[warp_id] = inclusive_output; } CTA_SYNC(); if (linear_tid < INNER_WARP_THREADS) { T outer_warp_input = temp_storage.warp_aggregates[linear_tid]; T outer_warp_exclusive; InnerWarpScanT(temp_storage.aliasable.inner_warp_scan).ExclusiveScan( outer_warp_input, outer_warp_exclusive, initial_value, scan_op, block_aggregate); temp_storage.block_aggregate = block_aggregate; temp_storage.warp_aggregates[linear_tid] = outer_warp_exclusive; } CTA_SYNC(); // Retrieve block aggregate block_aggregate = temp_storage.block_aggregate; // Apply warp prefix to our lane's partial T outer_warp_exclusive = temp_storage.warp_aggregates[warp_id]; exclusive_output = scan_op(outer_warp_exclusive, exclusive_output); if (lane_id == 0) exclusive_output = outer_warp_exclusive; } /// Computes an exclusive thread block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. The call-back functor \p block_prefix_callback_op is invoked by the first warp in the block, and the value returned by lane0 in that warp is used as the "seed" value that logically prefixes the thread block's scan inputs. template < typename ScanOp, typename BlockPrefixCallbackOp> __device__ __forceinline__ void ExclusiveScan( T input, ///< [in] Calling thread's input item T &exclusive_output, ///< [out] Calling thread's output item (may be aliased to \p input) ScanOp scan_op, ///< [in] Binary scan operator BlockPrefixCallbackOp &block_prefix_callback_op) ///< [in-out] [warp0 only] Call-back functor for specifying a thread block-wide prefix to be applied to all inputs. { // Compute warp scan in each warp. The exclusive output from each lane0 is invalid. T inclusive_output; OuterWarpScanT(temp_storage.aliasable.outer_warp_scan.Alias()[warp_id]).Scan( input, inclusive_output, exclusive_output, scan_op); // Share outer warp total if (lane_id == OUTER_WARP_THREADS - 1) temp_storage.warp_aggregates[warp_id] = inclusive_output; CTA_SYNC(); if (linear_tid < INNER_WARP_THREADS) { InnerWarpScanT inner_scan(temp_storage.aliasable.inner_warp_scan); T upsweep = temp_storage.warp_aggregates[linear_tid]; T downsweep_prefix, block_aggregate; inner_scan.ExclusiveScan(upsweep, downsweep_prefix, scan_op, block_aggregate); // Use callback functor to get block prefix in lane0 and then broadcast to other lanes T block_prefix = block_prefix_callback_op(block_aggregate); block_prefix = inner_scan.Broadcast(block_prefix, 0); downsweep_prefix = scan_op(block_prefix, downsweep_prefix); if (linear_tid == 0) downsweep_prefix = block_prefix; temp_storage.warp_aggregates[linear_tid] = downsweep_prefix; } CTA_SYNC(); // Apply warp prefix to our lane's partial (or assign it if partial is invalid) T outer_warp_exclusive = temp_storage.warp_aggregates[warp_id]; exclusive_output = scan_op(outer_warp_exclusive, exclusive_output); if (lane_id == 0) exclusive_output = outer_warp_exclusive; } //--------------------------------------------------------------------- // Inclusive scans //--------------------------------------------------------------------- /// Computes an inclusive thread block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. template __device__ __forceinline__ void InclusiveScan( T input, ///< [in] Calling thread's input item T &inclusive_output, ///< [out] Calling thread's output item (may be aliased to \p input) ScanOp scan_op) ///< [in] Binary scan operator { T block_aggregate; InclusiveScan(input, inclusive_output, scan_op, block_aggregate); } /// Computes an inclusive thread block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. Also provides every thread with the block-wide \p block_aggregate of all inputs. template __device__ __forceinline__ void InclusiveScan( T input, ///< [in] Calling thread's input item T &inclusive_output, ///< [out] Calling thread's output item (may be aliased to \p input) ScanOp scan_op, ///< [in] Binary scan operator T &block_aggregate) ///< [out] Threadblock-wide aggregate reduction of input items { // Compute warp scan in each warp. The exclusive output from each lane0 is invalid. OuterWarpScanT(temp_storage.aliasable.outer_warp_scan.Alias()[warp_id]).InclusiveScan( input, inclusive_output, scan_op); // Share outer warp total if (lane_id == OUTER_WARP_THREADS - 1) temp_storage.warp_aggregates[warp_id] = inclusive_output; CTA_SYNC(); if (linear_tid < INNER_WARP_THREADS) { T outer_warp_input = temp_storage.warp_aggregates[linear_tid]; T outer_warp_exclusive; InnerWarpScanT(temp_storage.aliasable.inner_warp_scan).ExclusiveScan( outer_warp_input, outer_warp_exclusive, scan_op, block_aggregate); temp_storage.block_aggregate = block_aggregate; temp_storage.warp_aggregates[linear_tid] = outer_warp_exclusive; } CTA_SYNC(); if (warp_id != 0) { // Retrieve block aggregate block_aggregate = temp_storage.block_aggregate; // Apply warp prefix to our lane's partial T outer_warp_exclusive = temp_storage.warp_aggregates[warp_id]; inclusive_output = scan_op(outer_warp_exclusive, inclusive_output); } } /// Computes an inclusive thread block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. the call-back functor \p block_prefix_callback_op is invoked by the first warp in the block, and the value returned by lane0 in that warp is used as the "seed" value that logically prefixes the thread block's scan inputs. template < typename ScanOp, typename BlockPrefixCallbackOp> __device__ __forceinline__ void InclusiveScan( T input, ///< [in] Calling thread's input item T &inclusive_output, ///< [out] Calling thread's output item (may be aliased to \p input) ScanOp scan_op, ///< [in] Binary scan operator BlockPrefixCallbackOp &block_prefix_callback_op) ///< [in-out] [warp0 only] Call-back functor for specifying a thread block-wide prefix to be applied to all inputs. { // Compute warp scan in each warp. The exclusive output from each lane0 is invalid. OuterWarpScanT(temp_storage.aliasable.outer_warp_scan.Alias()[warp_id]).InclusiveScan( input, inclusive_output, scan_op); // Share outer warp total if (lane_id == OUTER_WARP_THREADS - 1) temp_storage.warp_aggregates[warp_id] = inclusive_output; CTA_SYNC(); if (linear_tid < INNER_WARP_THREADS) { InnerWarpScanT inner_scan(temp_storage.aliasable.inner_warp_scan); T upsweep = temp_storage.warp_aggregates[linear_tid]; T downsweep_prefix, block_aggregate; inner_scan.ExclusiveScan(upsweep, downsweep_prefix, scan_op, block_aggregate); // Use callback functor to get block prefix in lane0 and then broadcast to other lanes T block_prefix = block_prefix_callback_op(block_aggregate); block_prefix = inner_scan.Broadcast(block_prefix, 0); downsweep_prefix = scan_op(block_prefix, downsweep_prefix); if (linear_tid == 0) downsweep_prefix = block_prefix; temp_storage.warp_aggregates[linear_tid] = downsweep_prefix; } CTA_SYNC(); // Apply warp prefix to our lane's partial T outer_warp_exclusive = temp_storage.warp_aggregates[warp_id]; inclusive_output = scan_op(outer_warp_exclusive, inclusive_output); } }; } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/acc/cuda/cub/cub.cuh000066400000000000000000000070521411340063500172630ustar00rootroot00000000000000/****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * CUB umbrella include file */ #pragma once // Block #include "block/block_histogram.cuh" #include "block/block_discontinuity.cuh" #include "block/block_exchange.cuh" #include "block/block_load.cuh" #include "block/block_radix_rank.cuh" #include "block/block_radix_sort.cuh" #include "block/block_reduce.cuh" #include "block/block_scan.cuh" #include "block/block_store.cuh" //#include "block/block_shift.cuh" // Device #include "device/device_histogram.cuh" #include "device/device_partition.cuh" #include "device/device_radix_sort.cuh" #include "device/device_reduce.cuh" #include "device/device_run_length_encode.cuh" #include "device/device_scan.cuh" #include "device/device_segmented_radix_sort.cuh" #include "device/device_segmented_reduce.cuh" #include "device/device_select.cuh" #include "device/device_spmv.cuh" // Grid //#include "grid/grid_barrier.cuh" #include "grid/grid_even_share.cuh" #include "grid/grid_mapping.cuh" #include "grid/grid_queue.cuh" // Thread #include "thread/thread_load.cuh" #include "thread/thread_operators.cuh" #include "thread/thread_reduce.cuh" #include "thread/thread_scan.cuh" #include "thread/thread_store.cuh" // Warp #include "warp/warp_reduce.cuh" #include "warp/warp_scan.cuh" // Iterator #include "iterator/arg_index_input_iterator.cuh" #include "iterator/cache_modified_input_iterator.cuh" #include "iterator/cache_modified_output_iterator.cuh" #include "iterator/constant_input_iterator.cuh" #include "iterator/counting_input_iterator.cuh" #include "iterator/tex_obj_input_iterator.cuh" #include "iterator/tex_ref_input_iterator.cuh" #include "iterator/transform_input_iterator.cuh" // Util #include "util_arch.cuh" #include "util_debug.cuh" #include "util_device.cuh" #include "util_macro.cuh" #include "util_ptx.cuh" #include "util_type.cuh" relion-3.1.3/src/acc/cuda/cub/device/000077500000000000000000000000001411340063500172445ustar00rootroot00000000000000relion-3.1.3/src/acc/cuda/cub/device/device_histogram.cuh000066400000000000000000001521131411340063500232640ustar00rootroot00000000000000 /****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * cub::DeviceHistogram provides device-wide parallel operations for constructing histogram(s) from a sequence of samples data residing within device-accessible memory. */ #pragma once #include #include #include #include "dispatch/dispatch_histogram.cuh" #include "../util_namespace.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /** * \brief DeviceHistogram provides device-wide parallel operations for constructing histogram(s) from a sequence of samples data residing within device-accessible memory. ![](histogram_logo.png) * \ingroup SingleModule * * \par Overview * A histogram * counts the number of observations that fall into each of the disjoint categories (known as bins). * * \par Usage Considerations * \cdp_class{DeviceHistogram} * */ struct DeviceHistogram { /******************************************************************//** * \name Evenly-segmented bin ranges *********************************************************************/ //@{ /** * \brief Computes an intensity histogram from a sequence of data samples using equal-width bins. * * \par * - The number of histogram bins is (\p num_levels - 1) * - All bins comprise the same width of sample values: (\p upper_level - \p lower_level) / (\p num_levels - 1) * - \devicestorage * * \par Snippet * The code snippet below illustrates the computation of a six-bin histogram * from a sequence of float samples * * \par * \code * #include // or equivalently * * // Declare, allocate, and initialize device-accessible pointers for input samples and * // output histogram * int num_samples; // e.g., 10 * float* d_samples; // e.g., [2.2, 6.0, 7.1, 2.9, 3.5, 0.3, 2.9, 2.0, 6.1, 999.5] * int* d_histogram; // e.g., [ -, -, -, -, -, -, -, -] * int num_levels; // e.g., 7 (seven level boundaries for six bins) * float lower_level; // e.g., 0.0 (lower sample value boundary of lowest bin) * float upper_level; // e.g., 12.0 (upper sample value boundary of upper bin) * ... * * // Determine temporary device storage requirements * void* d_temp_storage = NULL; * size_t temp_storage_bytes = 0; * cub::DeviceHistogram::HistogramEven(d_temp_storage, temp_storage_bytes, * d_samples, d_histogram, num_levels, lower_level, upper_level, num_samples); * * // Allocate temporary storage * cudaMalloc(&d_temp_storage, temp_storage_bytes); * * // Compute histograms * cub::DeviceHistogram::HistogramEven(d_temp_storage, temp_storage_bytes, * d_samples, d_histogram, num_levels, lower_level, upper_level, num_samples); * * // d_histogram <-- [1, 0, 5, 0, 3, 0, 0, 0]; * * \endcode * * \tparam SampleIteratorT [inferred] Random-access input iterator type for reading input samples. \iterator * \tparam CounterT [inferred] Integer type for histogram bin counters * \tparam LevelT [inferred] Type for specifying boundaries (levels) * \tparam OffsetT [inferred] Signed integer type for sequence offsets, list lengths, pointer differences, etc. \offset_size1 */ template < typename SampleIteratorT, typename CounterT, typename LevelT, typename OffsetT> CUB_RUNTIME_FUNCTION static cudaError_t HistogramEven( void* d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t& temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation SampleIteratorT d_samples, ///< [in] The pointer to the input sequence of data samples. CounterT* d_histogram, ///< [out] The pointer to the histogram counter output array of length num_levels - 1. int num_levels, ///< [in] The number of boundaries (levels) for delineating histogram samples. Implies that the number of bins is num_levels - 1. LevelT lower_level, ///< [in] The lower sample value bound (inclusive) for the lowest histogram bin. LevelT upper_level, ///< [in] The upper sample value bound (exclusive) for the highest histogram bin. OffsetT num_samples, ///< [in] The number of input samples (i.e., the length of \p d_samples) cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. May cause significant slowdown. Default is \p false. { /// The sample value type of the input iterator typedef typename std::iterator_traits::value_type SampleT; CounterT* d_histogram1[1] = {d_histogram}; int num_levels1[1] = {num_levels}; LevelT lower_level1[1] = {lower_level}; LevelT upper_level1[1] = {upper_level}; return MultiHistogramEven<1, 1>( d_temp_storage, temp_storage_bytes, d_samples, d_histogram1, num_levels1, lower_level1, upper_level1, num_samples, 1, sizeof(SampleT) * num_samples, stream, debug_synchronous); } /** * \brief Computes an intensity histogram from a sequence of data samples using equal-width bins. * * \par * - A two-dimensional region of interest within \p d_samples can be specified * using the \p num_row_samples, num_rows, and \p row_stride_bytes parameters. * - The row stride must be a whole multiple of the sample data type * size, i.e., (row_stride_bytes % sizeof(SampleT)) == 0. * - The number of histogram bins is (\p num_levels - 1) * - All bins comprise the same width of sample values: (\p upper_level - \p lower_level) / (\p num_levels - 1) * - \devicestorage * * \par Snippet * The code snippet below illustrates the computation of a six-bin histogram * from a 2x5 region of interest within a flattened 2x7 array of float samples. * * \par * \code * #include // or equivalently * * // Declare, allocate, and initialize device-accessible pointers for input samples and * // output histogram * int num_row_samples; // e.g., 5 * int num_rows; // e.g., 2; * size_t row_stride_bytes; // e.g., 7 * sizeof(float) * float* d_samples; // e.g., [2.2, 6.0, 7.1, 2.9, 3.5, -, -, * // 0.3, 2.9, 2.0, 6.1, 999.5, -, -] * int* d_histogram; // e.g., [ -, -, -, -, -, -, -, -] * int num_levels; // e.g., 7 (seven level boundaries for six bins) * float lower_level; // e.g., 0.0 (lower sample value boundary of lowest bin) * float upper_level; // e.g., 12.0 (upper sample value boundary of upper bin) * ... * * // Determine temporary device storage requirements * void* d_temp_storage = NULL; * size_t temp_storage_bytes = 0; * cub::DeviceHistogram::HistogramEven(d_temp_storage, temp_storage_bytes, * d_samples, d_histogram, num_levels, lower_level, upper_level, * num_row_samples, num_rows, row_stride_bytes); * * // Allocate temporary storage * cudaMalloc(&d_temp_storage, temp_storage_bytes); * * // Compute histograms * cub::DeviceHistogram::HistogramEven(d_temp_storage, temp_storage_bytes, d_samples, d_histogram, * d_samples, d_histogram, num_levels, lower_level, upper_level, * num_row_samples, num_rows, row_stride_bytes); * * // d_histogram <-- [1, 0, 5, 0, 3, 0, 0, 0]; * * \endcode * * \tparam SampleIteratorT [inferred] Random-access input iterator type for reading input samples. \iterator * \tparam CounterT [inferred] Integer type for histogram bin counters * \tparam LevelT [inferred] Type for specifying boundaries (levels) * \tparam OffsetT [inferred] Signed integer type for sequence offsets, list lengths, pointer differences, etc. \offset_size1 */ template < typename SampleIteratorT, typename CounterT, typename LevelT, typename OffsetT> CUB_RUNTIME_FUNCTION static cudaError_t HistogramEven( void* d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t& temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation SampleIteratorT d_samples, ///< [in] The pointer to the input sequence of data samples. CounterT* d_histogram, ///< [out] The pointer to the histogram counter output array of length num_levels - 1. int num_levels, ///< [in] The number of boundaries (levels) for delineating histogram samples. Implies that the number of bins is num_levels - 1. LevelT lower_level, ///< [in] The lower sample value bound (inclusive) for the lowest histogram bin. LevelT upper_level, ///< [in] The upper sample value bound (exclusive) for the highest histogram bin. OffsetT num_row_samples, ///< [in] The number of data samples per row in the region of interest OffsetT num_rows, ///< [in] The number of rows in the region of interest size_t row_stride_bytes, ///< [in] The number of bytes between starts of consecutive rows in the region of interest cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. May cause significant slowdown. Default is \p false. { CounterT* d_histogram1[1] = {d_histogram}; int num_levels1[1] = {num_levels}; LevelT lower_level1[1] = {lower_level}; LevelT upper_level1[1] = {upper_level}; return MultiHistogramEven<1, 1>( d_temp_storage, temp_storage_bytes, d_samples, d_histogram1, num_levels1, lower_level1, upper_level1, num_row_samples, num_rows, row_stride_bytes, stream, debug_synchronous); } /** * \brief Computes per-channel intensity histograms from a sequence of multi-channel "pixel" data samples using equal-width bins. * * \par * - The input is a sequence of pixel structures, where each pixel comprises * a record of \p NUM_CHANNELS consecutive data samples (e.g., an RGBA pixel). * - Of the \p NUM_CHANNELS specified, the function will only compute histograms * for the first \p NUM_ACTIVE_CHANNELS (e.g., only RGB histograms from RGBA * pixel samples). * - The number of histogram bins for channeli is num_levels[i] - 1. * - For channeli, the range of values for all histogram bins * have the same width: (upper_level[i] - lower_level[i]) / ( num_levels[i] - 1) * - \devicestorage * * \par Snippet * The code snippet below illustrates the computation of three 256-bin RGB histograms * from a quad-channel sequence of RGBA pixels (8 bits per channel per pixel) * * \par * \code * #include // or equivalently * * // Declare, allocate, and initialize device-accessible pointers for input samples * // and output histograms * int num_pixels; // e.g., 5 * unsigned char* d_samples; // e.g., [(2, 6, 7, 5), (3, 0, 2, 1), (7, 0, 6, 2), * // (0, 6, 7, 5), (3, 0, 2, 6)] * int* d_histogram[3]; // e.g., three device pointers to three device buffers, * // each allocated with 256 integer counters * int num_levels[3]; // e.g., {257, 257, 257}; * unsigned int lower_level[3]; // e.g., {0, 0, 0}; * unsigned int upper_level[3]; // e.g., {256, 256, 256}; * ... * * // Determine temporary device storage requirements * void* d_temp_storage = NULL; * size_t temp_storage_bytes = 0; * cub::DeviceHistogram::MultiHistogramEven<4, 3>(d_temp_storage, temp_storage_bytes, * d_samples, d_histogram, num_levels, lower_level, upper_level, num_pixels); * * // Allocate temporary storage * cudaMalloc(&d_temp_storage, temp_storage_bytes); * * // Compute histograms * cub::DeviceHistogram::MultiHistogramEven<4, 3>(d_temp_storage, temp_storage_bytes, * d_samples, d_histogram, num_levels, lower_level, upper_level, num_pixels); * * // d_histogram <-- [ [1, 0, 1, 2, 0, 0, 0, 1, 0, 0, 0, ..., 0], * // [0, 3, 0, 0, 0, 0, 2, 0, 0, 0, 0, ..., 0], * // [0, 0, 2, 0, 0, 0, 1, 2, 0, 0, 0, ..., 0] ] * * \endcode * * \tparam NUM_CHANNELS Number of channels interleaved in the input data (may be greater than the number of channels being actively histogrammed) * \tparam NUM_ACTIVE_CHANNELS [inferred] Number of channels actively being histogrammed * \tparam SampleIteratorT [inferred] Random-access input iterator type for reading input samples. \iterator * \tparam CounterT [inferred] Integer type for histogram bin counters * \tparam LevelT [inferred] Type for specifying boundaries (levels) * \tparam OffsetT [inferred] Signed integer type for sequence offsets, list lengths, pointer differences, etc. \offset_size1 */ template < int NUM_CHANNELS, int NUM_ACTIVE_CHANNELS, typename SampleIteratorT, typename CounterT, typename LevelT, typename OffsetT> CUB_RUNTIME_FUNCTION static cudaError_t MultiHistogramEven( void* d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t& temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation SampleIteratorT d_samples, ///< [in] The pointer to the multi-channel input sequence of data samples. The samples from different channels are assumed to be interleaved (e.g., an array of 32-bit pixels where each pixel consists of four RGBA 8-bit samples). CounterT* d_histogram[NUM_ACTIVE_CHANNELS], ///< [out] The pointers to the histogram counter output arrays, one for each active channel. For channeli, the allocation length of d_histogram[i] should be num_levels[i] - 1. int num_levels[NUM_ACTIVE_CHANNELS], ///< [in] The number of boundaries (levels) for delineating histogram samples in each active channel. Implies that the number of bins for channeli is num_levels[i] - 1. LevelT lower_level[NUM_ACTIVE_CHANNELS], ///< [in] The lower sample value bound (inclusive) for the lowest histogram bin in each active channel. LevelT upper_level[NUM_ACTIVE_CHANNELS], ///< [in] The upper sample value bound (exclusive) for the highest histogram bin in each active channel. OffsetT num_pixels, ///< [in] The number of multi-channel pixels (i.e., the length of \p d_samples / NUM_CHANNELS) cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. May cause significant slowdown. Default is \p false. { /// The sample value type of the input iterator typedef typename std::iterator_traits::value_type SampleT; return MultiHistogramEven( d_temp_storage, temp_storage_bytes, d_samples, d_histogram, num_levels, lower_level, upper_level, num_pixels, 1, sizeof(SampleT) * NUM_CHANNELS * num_pixels, stream, debug_synchronous); } /** * \brief Computes per-channel intensity histograms from a sequence of multi-channel "pixel" data samples using equal-width bins. * * \par * - The input is a sequence of pixel structures, where each pixel comprises * a record of \p NUM_CHANNELS consecutive data samples (e.g., an RGBA pixel). * - Of the \p NUM_CHANNELS specified, the function will only compute histograms * for the first \p NUM_ACTIVE_CHANNELS (e.g., only RGB histograms from RGBA * pixel samples). * - A two-dimensional region of interest within \p d_samples can be specified * using the \p num_row_samples, num_rows, and \p row_stride_bytes parameters. * - The row stride must be a whole multiple of the sample data type * size, i.e., (row_stride_bytes % sizeof(SampleT)) == 0. * - The number of histogram bins for channeli is num_levels[i] - 1. * - For channeli, the range of values for all histogram bins * have the same width: (upper_level[i] - lower_level[i]) / ( num_levels[i] - 1) * - \devicestorage * * \par Snippet * The code snippet below illustrates the computation of three 256-bin RGB histograms from a 2x3 region of * interest of within a flattened 2x4 array of quad-channel RGBA pixels (8 bits per channel per pixel). * * \par * \code * #include // or equivalently * * // Declare, allocate, and initialize device-accessible pointers for input samples * // and output histograms * int num_row_pixels; // e.g., 3 * int num_rows; // e.g., 2 * size_t row_stride_bytes; // e.g., 4 * sizeof(unsigned char) * NUM_CHANNELS * unsigned char* d_samples; // e.g., [(2, 6, 7, 5), (3, 0, 2, 1), (7, 0, 6, 2), (-, -, -, -), * // (0, 6, 7, 5), (3, 0, 2, 6), (1, 1, 1, 1), (-, -, -, -)] * int* d_histogram[3]; // e.g., three device pointers to three device buffers, * // each allocated with 256 integer counters * int num_levels[3]; // e.g., {257, 257, 257}; * unsigned int lower_level[3]; // e.g., {0, 0, 0}; * unsigned int upper_level[3]; // e.g., {256, 256, 256}; * ... * * // Determine temporary device storage requirements * void* d_temp_storage = NULL; * size_t temp_storage_bytes = 0; * cub::DeviceHistogram::MultiHistogramEven<4, 3>(d_temp_storage, temp_storage_bytes, * d_samples, d_histogram, num_levels, lower_level, upper_level, * num_row_pixels, num_rows, row_stride_bytes); * * // Allocate temporary storage * cudaMalloc(&d_temp_storage, temp_storage_bytes); * * // Compute histograms * cub::DeviceHistogram::MultiHistogramEven<4, 3>(d_temp_storage, temp_storage_bytes, * d_samples, d_histogram, num_levels, lower_level, upper_level, * num_row_pixels, num_rows, row_stride_bytes); * * // d_histogram <-- [ [1, 1, 1, 2, 0, 0, 0, 1, 0, 0, 0, ..., 0], * // [0, 4, 0, 0, 0, 0, 2, 0, 0, 0, 0, ..., 0], * // [0, 1, 2, 0, 0, 0, 1, 2, 0, 0, 0, ..., 0] ] * * \endcode * * \tparam NUM_CHANNELS Number of channels interleaved in the input data (may be greater than the number of channels being actively histogrammed) * \tparam NUM_ACTIVE_CHANNELS [inferred] Number of channels actively being histogrammed * \tparam SampleIteratorT [inferred] Random-access input iterator type for reading input samples. \iterator * \tparam CounterT [inferred] Integer type for histogram bin counters * \tparam LevelT [inferred] Type for specifying boundaries (levels) * \tparam OffsetT [inferred] Signed integer type for sequence offsets, list lengths, pointer differences, etc. \offset_size1 */ template < int NUM_CHANNELS, int NUM_ACTIVE_CHANNELS, typename SampleIteratorT, typename CounterT, typename LevelT, typename OffsetT> CUB_RUNTIME_FUNCTION static cudaError_t MultiHistogramEven( void* d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t& temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation SampleIteratorT d_samples, ///< [in] The pointer to the multi-channel input sequence of data samples. The samples from different channels are assumed to be interleaved (e.g., an array of 32-bit pixels where each pixel consists of four RGBA 8-bit samples). CounterT* d_histogram[NUM_ACTIVE_CHANNELS], ///< [out] The pointers to the histogram counter output arrays, one for each active channel. For channeli, the allocation length of d_histogram[i] should be num_levels[i] - 1. int num_levels[NUM_ACTIVE_CHANNELS], ///< [in] The number of boundaries (levels) for delineating histogram samples in each active channel. Implies that the number of bins for channeli is num_levels[i] - 1. LevelT lower_level[NUM_ACTIVE_CHANNELS], ///< [in] The lower sample value bound (inclusive) for the lowest histogram bin in each active channel. LevelT upper_level[NUM_ACTIVE_CHANNELS], ///< [in] The upper sample value bound (exclusive) for the highest histogram bin in each active channel. OffsetT num_row_pixels, ///< [in] The number of multi-channel pixels per row in the region of interest OffsetT num_rows, ///< [in] The number of rows in the region of interest size_t row_stride_bytes, ///< [in] The number of bytes between starts of consecutive rows in the region of interest cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. May cause significant slowdown. Default is \p false. { /// The sample value type of the input iterator typedef typename std::iterator_traits::value_type SampleT; Int2Type is_byte_sample; if ((sizeof(OffsetT) > sizeof(int)) && ((unsigned long long) (num_rows * row_stride_bytes) < (unsigned long long) std::numeric_limits::max())) { // Down-convert OffsetT data type return DipatchHistogram::DispatchEven( d_temp_storage, temp_storage_bytes, d_samples, d_histogram, num_levels, lower_level, upper_level, (int) num_row_pixels, (int) num_rows, (int) (row_stride_bytes / sizeof(SampleT)), stream, debug_synchronous, is_byte_sample); } return DipatchHistogram::DispatchEven( d_temp_storage, temp_storage_bytes, d_samples, d_histogram, num_levels, lower_level, upper_level, num_row_pixels, num_rows, (OffsetT) (row_stride_bytes / sizeof(SampleT)), stream, debug_synchronous, is_byte_sample); } //@} end member group /******************************************************************//** * \name Custom bin ranges *********************************************************************/ //@{ /** * \brief Computes an intensity histogram from a sequence of data samples using the specified bin boundary levels. * * \par * - The number of histogram bins is (\p num_levels - 1) * - The value range for bini is [level[i], level[i+1]) * - \devicestorage * * \par Snippet * The code snippet below illustrates the computation of an six-bin histogram * from a sequence of float samples * * \par * \code * #include // or equivalently * * // Declare, allocate, and initialize device-accessible pointers for input samples and * // output histogram * int num_samples; // e.g., 10 * float* d_samples; // e.g., [2.2, 6.0, 7.1, 2.9, 3.5, 0.3, 2.9, 2.0, 6.1, 999.5] * int* d_histogram; // e.g., [ -, -, -, -, -, -, -, -] * int num_levels // e.g., 7 (seven level boundaries for six bins) * float* d_levels; // e.g., [0.0, 2.0, 4.0, 6.0, 8.0, 12.0, 16.0] * ... * * // Determine temporary device storage requirements * void* d_temp_storage = NULL; * size_t temp_storage_bytes = 0; * cub::DeviceHistogram::HistogramRange(d_temp_storage, temp_storage_bytes, * d_samples, d_histogram, num_levels, d_levels, num_samples); * * // Allocate temporary storage * cudaMalloc(&d_temp_storage, temp_storage_bytes); * * // Compute histograms * cub::DeviceHistogram::HistogramRange(d_temp_storage, temp_storage_bytes, * d_samples, d_histogram, num_levels, d_levels, num_samples); * * // d_histogram <-- [1, 0, 5, 0, 3, 0, 0, 0]; * * \endcode * * \tparam SampleIteratorT [inferred] Random-access input iterator type for reading input samples. \iterator * \tparam CounterT [inferred] Integer type for histogram bin counters * \tparam LevelT [inferred] Type for specifying boundaries (levels) * \tparam OffsetT [inferred] Signed integer type for sequence offsets, list lengths, pointer differences, etc. \offset_size1 */ template < typename SampleIteratorT, typename CounterT, typename LevelT, typename OffsetT> CUB_RUNTIME_FUNCTION static cudaError_t HistogramRange( void* d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t& temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation SampleIteratorT d_samples, ///< [in] The pointer to the input sequence of data samples. CounterT* d_histogram, ///< [out] The pointer to the histogram counter output array of length num_levels - 1. int num_levels, ///< [in] The number of boundaries (levels) for delineating histogram samples. Implies that the number of bins is num_levels - 1. LevelT* d_levels, ///< [in] The pointer to the array of boundaries (levels). Bin ranges are defined by consecutive boundary pairings: lower sample value boundaries are inclusive and upper sample value boundaries are exclusive. OffsetT num_samples, ///< [in] The number of data samples per row in the region of interest cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. May cause significant slowdown. Default is \p false. { /// The sample value type of the input iterator typedef typename std::iterator_traits::value_type SampleT; CounterT* d_histogram1[1] = {d_histogram}; int num_levels1[1] = {num_levels}; LevelT* d_levels1[1] = {d_levels}; return MultiHistogramRange<1, 1>( d_temp_storage, temp_storage_bytes, d_samples, d_histogram1, num_levels1, d_levels1, num_samples, 1, sizeof(SampleT) * num_samples, stream, debug_synchronous); } /** * \brief Computes an intensity histogram from a sequence of data samples using the specified bin boundary levels. * * \par * - A two-dimensional region of interest within \p d_samples can be specified * using the \p num_row_samples, num_rows, and \p row_stride_bytes parameters. * - The row stride must be a whole multiple of the sample data type * size, i.e., (row_stride_bytes % sizeof(SampleT)) == 0. * - The number of histogram bins is (\p num_levels - 1) * - The value range for bini is [level[i], level[i+1]) * - \devicestorage * * \par Snippet * The code snippet below illustrates the computation of a six-bin histogram * from a 2x5 region of interest within a flattened 2x7 array of float samples. * * \par * \code * #include // or equivalently * * // Declare, allocate, and initialize device-accessible pointers for input samples and * // output histogram * int num_row_samples; // e.g., 5 * int num_rows; // e.g., 2; * int row_stride_bytes; // e.g., 7 * sizeof(float) * float* d_samples; // e.g., [2.2, 6.0, 7.1, 2.9, 3.5, -, -, * // 0.3, 2.9, 2.0, 6.1, 999.5, -, -] * int* d_histogram; // e.g., [ , , , , , , , ] * int num_levels // e.g., 7 (seven level boundaries for six bins) * float *d_levels; // e.g., [0.0, 2.0, 4.0, 6.0, 8.0, 12.0, 16.0] * ... * * // Determine temporary device storage requirements * void* d_temp_storage = NULL; * size_t temp_storage_bytes = 0; * cub::DeviceHistogram::HistogramRange(d_temp_storage, temp_storage_bytes, * d_samples, d_histogram, num_levels, d_levels, * num_row_samples, num_rows, row_stride_bytes); * * // Allocate temporary storage * cudaMalloc(&d_temp_storage, temp_storage_bytes); * * // Compute histograms * cub::DeviceHistogram::HistogramRange(d_temp_storage, temp_storage_bytes, * d_samples, d_histogram, num_levels, d_levels, * num_row_samples, num_rows, row_stride_bytes); * * // d_histogram <-- [1, 0, 5, 0, 3, 0, 0, 0]; * * \endcode * * \tparam SampleIteratorT [inferred] Random-access input iterator type for reading input samples. \iterator * \tparam CounterT [inferred] Integer type for histogram bin counters * \tparam LevelT [inferred] Type for specifying boundaries (levels) * \tparam OffsetT [inferred] Signed integer type for sequence offsets, list lengths, pointer differences, etc. \offset_size1 */ template < typename SampleIteratorT, typename CounterT, typename LevelT, typename OffsetT> CUB_RUNTIME_FUNCTION static cudaError_t HistogramRange( void* d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t& temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation SampleIteratorT d_samples, ///< [in] The pointer to the input sequence of data samples. CounterT* d_histogram, ///< [out] The pointer to the histogram counter output array of length num_levels - 1. int num_levels, ///< [in] The number of boundaries (levels) for delineating histogram samples. Implies that the number of bins is num_levels - 1. LevelT* d_levels, ///< [in] The pointer to the array of boundaries (levels). Bin ranges are defined by consecutive boundary pairings: lower sample value boundaries are inclusive and upper sample value boundaries are exclusive. OffsetT num_row_samples, ///< [in] The number of data samples per row in the region of interest OffsetT num_rows, ///< [in] The number of rows in the region of interest size_t row_stride_bytes, ///< [in] The number of bytes between starts of consecutive rows in the region of interest cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. May cause significant slowdown. Default is \p false. { CounterT* d_histogram1[1] = {d_histogram}; int num_levels1[1] = {num_levels}; LevelT* d_levels1[1] = {d_levels}; return MultiHistogramRange<1, 1>( d_temp_storage, temp_storage_bytes, d_samples, d_histogram1, num_levels1, d_levels1, num_row_samples, num_rows, row_stride_bytes, stream, debug_synchronous); } /** * \brief Computes per-channel intensity histograms from a sequence of multi-channel "pixel" data samples using the specified bin boundary levels. * * \par * - The input is a sequence of pixel structures, where each pixel comprises * a record of \p NUM_CHANNELS consecutive data samples (e.g., an RGBA pixel). * - Of the \p NUM_CHANNELS specified, the function will only compute histograms * for the first \p NUM_ACTIVE_CHANNELS (e.g., RGB histograms from RGBA * pixel samples). * - The number of histogram bins for channeli is num_levels[i] - 1. * - For channeli, the range of values for all histogram bins * have the same width: (upper_level[i] - lower_level[i]) / ( num_levels[i] - 1) * - \devicestorage * * \par Snippet * The code snippet below illustrates the computation of three 4-bin RGB histograms * from a quad-channel sequence of RGBA pixels (8 bits per channel per pixel) * * \par * \code * #include // or equivalently * * // Declare, allocate, and initialize device-accessible pointers for input samples * // and output histograms * int num_pixels; // e.g., 5 * unsigned char *d_samples; // e.g., [(2, 6, 7, 5),(3, 0, 2, 1),(7, 0, 6, 2), * // (0, 6, 7, 5),(3, 0, 2, 6)] * unsigned int *d_histogram[3]; // e.g., [[ -, -, -, -],[ -, -, -, -],[ -, -, -, -]]; * int num_levels[3]; // e.g., {5, 5, 5}; * unsigned int *d_levels[3]; // e.g., [ [0, 2, 4, 6, 8], * // [0, 2, 4, 6, 8], * // [0, 2, 4, 6, 8] ]; * ... * * // Determine temporary device storage requirements * void* d_temp_storage = NULL; * size_t temp_storage_bytes = 0; * cub::DeviceHistogram::MultiHistogramRange<4, 3>(d_temp_storage, temp_storage_bytes, * d_samples, d_histogram, num_levels, d_levels, num_pixels); * * // Allocate temporary storage * cudaMalloc(&d_temp_storage, temp_storage_bytes); * * // Compute histograms * cub::DeviceHistogram::MultiHistogramRange<4, 3>(d_temp_storage, temp_storage_bytes, * d_samples, d_histogram, num_levels, d_levels, num_pixels); * * // d_histogram <-- [ [1, 3, 0, 1], * // [3, 0, 0, 2], * // [0, 2, 0, 3] ] * * \endcode * * \tparam NUM_CHANNELS Number of channels interleaved in the input data (may be greater than the number of channels being actively histogrammed) * \tparam NUM_ACTIVE_CHANNELS [inferred] Number of channels actively being histogrammed * \tparam SampleIteratorT [inferred] Random-access input iterator type for reading input samples. \iterator * \tparam CounterT [inferred] Integer type for histogram bin counters * \tparam LevelT [inferred] Type for specifying boundaries (levels) * \tparam OffsetT [inferred] Signed integer type for sequence offsets, list lengths, pointer differences, etc. \offset_size1 */ template < int NUM_CHANNELS, int NUM_ACTIVE_CHANNELS, typename SampleIteratorT, typename CounterT, typename LevelT, typename OffsetT> CUB_RUNTIME_FUNCTION static cudaError_t MultiHistogramRange( void* d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t& temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation SampleIteratorT d_samples, ///< [in] The pointer to the multi-channel input sequence of data samples. The samples from different channels are assumed to be interleaved (e.g., an array of 32-bit pixels where each pixel consists of four RGBA 8-bit samples). CounterT* d_histogram[NUM_ACTIVE_CHANNELS], ///< [out] The pointers to the histogram counter output arrays, one for each active channel. For channeli, the allocation length of d_histogram[i] should be num_levels[i] - 1. int num_levels[NUM_ACTIVE_CHANNELS], ///< [in] The number of boundaries (levels) for delineating histogram samples in each active channel. Implies that the number of bins for channeli is num_levels[i] - 1. LevelT* d_levels[NUM_ACTIVE_CHANNELS], ///< [in] The pointers to the arrays of boundaries (levels), one for each active channel. Bin ranges are defined by consecutive boundary pairings: lower sample value boundaries are inclusive and upper sample value boundaries are exclusive. OffsetT num_pixels, ///< [in] The number of multi-channel pixels (i.e., the length of \p d_samples / NUM_CHANNELS) cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. May cause significant slowdown. Default is \p false. { /// The sample value type of the input iterator typedef typename std::iterator_traits::value_type SampleT; return MultiHistogramRange( d_temp_storage, temp_storage_bytes, d_samples, d_histogram, num_levels, d_levels, num_pixels, 1, sizeof(SampleT) * NUM_CHANNELS * num_pixels, stream, debug_synchronous); } /** * \brief Computes per-channel intensity histograms from a sequence of multi-channel "pixel" data samples using the specified bin boundary levels. * * \par * - The input is a sequence of pixel structures, where each pixel comprises * a record of \p NUM_CHANNELS consecutive data samples (e.g., an RGBA pixel). * - Of the \p NUM_CHANNELS specified, the function will only compute histograms * for the first \p NUM_ACTIVE_CHANNELS (e.g., RGB histograms from RGBA * pixel samples). * - A two-dimensional region of interest within \p d_samples can be specified * using the \p num_row_samples, num_rows, and \p row_stride_bytes parameters. * - The row stride must be a whole multiple of the sample data type * size, i.e., (row_stride_bytes % sizeof(SampleT)) == 0. * - The number of histogram bins for channeli is num_levels[i] - 1. * - For channeli, the range of values for all histogram bins * have the same width: (upper_level[i] - lower_level[i]) / ( num_levels[i] - 1) * - \devicestorage * * \par Snippet * The code snippet below illustrates the computation of three 4-bin RGB histograms from a 2x3 region of * interest of within a flattened 2x4 array of quad-channel RGBA pixels (8 bits per channel per pixel). * * \par * \code * #include // or equivalently * * // Declare, allocate, and initialize device-accessible pointers for input samples * // and output histograms * int num_row_pixels; // e.g., 3 * int num_rows; // e.g., 2 * size_t row_stride_bytes; // e.g., 4 * sizeof(unsigned char) * NUM_CHANNELS * unsigned char* d_samples; // e.g., [(2, 6, 7, 5),(3, 0, 2, 1),(1, 1, 1, 1),(-, -, -, -), * // (7, 0, 6, 2),(0, 6, 7, 5),(3, 0, 2, 6),(-, -, -, -)] * int* d_histogram[3]; // e.g., [[ -, -, -, -],[ -, -, -, -],[ -, -, -, -]]; * int num_levels[3]; // e.g., {5, 5, 5}; * unsigned int* d_levels[3]; // e.g., [ [0, 2, 4, 6, 8], * // [0, 2, 4, 6, 8], * // [0, 2, 4, 6, 8] ]; * ... * * // Determine temporary device storage requirements * void* d_temp_storage = NULL; * size_t temp_storage_bytes = 0; * cub::DeviceHistogram::MultiHistogramRange<4, 3>(d_temp_storage, temp_storage_bytes, * d_samples, d_histogram, num_levels, d_levels, num_row_pixels, num_rows, row_stride_bytes); * * // Allocate temporary storage * cudaMalloc(&d_temp_storage, temp_storage_bytes); * * // Compute histograms * cub::DeviceHistogram::MultiHistogramRange<4, 3>(d_temp_storage, temp_storage_bytes, * d_samples, d_histogram, num_levels, d_levels, num_row_pixels, num_rows, row_stride_bytes); * * // d_histogram <-- [ [2, 3, 0, 1], * // [3, 0, 0, 2], * // [1, 2, 0, 3] ] * * \endcode * * \tparam NUM_CHANNELS Number of channels interleaved in the input data (may be greater than the number of channels being actively histogrammed) * \tparam NUM_ACTIVE_CHANNELS [inferred] Number of channels actively being histogrammed * \tparam SampleIteratorT [inferred] Random-access input iterator type for reading input samples. \iterator * \tparam CounterT [inferred] Integer type for histogram bin counters * \tparam LevelT [inferred] Type for specifying boundaries (levels) * \tparam OffsetT [inferred] Signed integer type for sequence offsets, list lengths, pointer differences, etc. \offset_size1 */ template < int NUM_CHANNELS, int NUM_ACTIVE_CHANNELS, typename SampleIteratorT, typename CounterT, typename LevelT, typename OffsetT> CUB_RUNTIME_FUNCTION static cudaError_t MultiHistogramRange( void* d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t& temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation SampleIteratorT d_samples, ///< [in] The pointer to the multi-channel input sequence of data samples. The samples from different channels are assumed to be interleaved (e.g., an array of 32-bit pixels where each pixel consists of four RGBA 8-bit samples). CounterT* d_histogram[NUM_ACTIVE_CHANNELS], ///< [out] The pointers to the histogram counter output arrays, one for each active channel. For channeli, the allocation length of d_histogram[i] should be num_levels[i] - 1. int num_levels[NUM_ACTIVE_CHANNELS], ///< [in] The number of boundaries (levels) for delineating histogram samples in each active channel. Implies that the number of bins for channeli is num_levels[i] - 1. LevelT* d_levels[NUM_ACTIVE_CHANNELS], ///< [in] The pointers to the arrays of boundaries (levels), one for each active channel. Bin ranges are defined by consecutive boundary pairings: lower sample value boundaries are inclusive and upper sample value boundaries are exclusive. OffsetT num_row_pixels, ///< [in] The number of multi-channel pixels per row in the region of interest OffsetT num_rows, ///< [in] The number of rows in the region of interest size_t row_stride_bytes, ///< [in] The number of bytes between starts of consecutive rows in the region of interest cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. May cause significant slowdown. Default is \p false. { /// The sample value type of the input iterator typedef typename std::iterator_traits::value_type SampleT; Int2Type is_byte_sample; if ((sizeof(OffsetT) > sizeof(int)) && ((unsigned long long) (num_rows * row_stride_bytes) < (unsigned long long) std::numeric_limits::max())) { // Down-convert OffsetT data type return DipatchHistogram::DispatchRange( d_temp_storage, temp_storage_bytes, d_samples, d_histogram, num_levels, d_levels, (int) num_row_pixels, (int) num_rows, (int) (row_stride_bytes / sizeof(SampleT)), stream, debug_synchronous, is_byte_sample); } return DipatchHistogram::DispatchRange( d_temp_storage, temp_storage_bytes, d_samples, d_histogram, num_levels, d_levels, num_row_pixels, num_rows, (OffsetT) (row_stride_bytes / sizeof(SampleT)), stream, debug_synchronous, is_byte_sample); } //@} end member group }; } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/acc/cuda/cub/device/device_partition.cuh000066400000000000000000000331031411340063500232750ustar00rootroot00000000000000 /****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * cub::DevicePartition provides device-wide, parallel operations for partitioning sequences of data items residing within device-accessible memory. */ #pragma once #include #include #include "dispatch/dispatch_select_if.cuh" #include "../util_namespace.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /** * \brief DevicePartition provides device-wide, parallel operations for partitioning sequences of data items residing within device-accessible memory. ![](partition_logo.png) * \ingroup SingleModule * * \par Overview * These operations apply a selection criterion to construct a partitioned output sequence from items selected/unselected from * a specified input sequence. * * \par Usage Considerations * \cdp_class{DevicePartition} * * \par Performance * \linear_performance{partition} * * \par * The following chart illustrates DevicePartition::If * performance across different CUDA architectures for \p int32 items, * where 50% of the items are randomly selected for the first partition. * \plots_below * * \image html partition_if_int32_50_percent.png * */ struct DevicePartition { /** * \brief Uses the \p d_flags sequence to split the corresponding items from \p d_in into a partitioned sequence \p d_out. The total number of items copied into the first partition is written to \p d_num_selected_out. ![](partition_flags_logo.png) * * \par * - The value type of \p d_flags must be castable to \p bool (e.g., \p bool, \p char, \p int, etc.). * - Copies of the selected items are compacted into \p d_out and maintain their original * relative ordering, however copies of the unselected items are compacted into the * rear of \p d_out in reverse order. * - \devicestorage * * \par Snippet * The code snippet below illustrates the compaction of items selected from an \p int device vector. * \par * \code * #include // or equivalently * * // Declare, allocate, and initialize device-accessible pointers for input, flags, and output * int num_items; // e.g., 8 * int *d_in; // e.g., [1, 2, 3, 4, 5, 6, 7, 8] * char *d_flags; // e.g., [1, 0, 0, 1, 0, 1, 1, 0] * int *d_out; // e.g., [ , , , , , , , ] * int *d_num_selected_out; // e.g., [ ] * ... * * // Determine temporary device storage requirements * void *d_temp_storage = NULL; * size_t temp_storage_bytes = 0; * cub::DevicePartition::Flagged(d_temp_storage, temp_storage_bytes, d_in, d_flags, d_out, d_num_selected_out, num_items); * * // Allocate temporary storage * cudaMalloc(&d_temp_storage, temp_storage_bytes); * * // Run selection * cub::DevicePartition::Flagged(d_temp_storage, temp_storage_bytes, d_in, d_flags, d_out, d_num_selected_out, num_items); * * // d_out <-- [1, 4, 6, 7, 8, 5, 3, 2] * // d_num_selected_out <-- [4] * * \endcode * * \tparam InputIteratorT [inferred] Random-access input iterator type for reading input items \iterator * \tparam FlagIterator [inferred] Random-access input iterator type for reading selection flags \iterator * \tparam OutputIteratorT [inferred] Random-access output iterator type for writing output items \iterator * \tparam NumSelectedIteratorT [inferred] Output iterator type for recording the number of items selected \iterator */ template < typename InputIteratorT, typename FlagIterator, typename OutputIteratorT, typename NumSelectedIteratorT> CUB_RUNTIME_FUNCTION __forceinline__ static cudaError_t Flagged( void* d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t &temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation InputIteratorT d_in, ///< [in] Pointer to the input sequence of data items FlagIterator d_flags, ///< [in] Pointer to the input sequence of selection flags OutputIteratorT d_out, ///< [out] Pointer to the output sequence of partitioned data items NumSelectedIteratorT d_num_selected_out, ///< [out] Pointer to the output total number of items selected (i.e., the offset of the unselected partition) int num_items, ///< [in] Total number of items to select from cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. May cause significant slowdown. Default is \p false. { typedef int OffsetT; // Signed integer type for global offsets typedef NullType SelectOp; // Selection op (not used) typedef NullType EqualityOp; // Equality operator (not used) return DispatchSelectIf::Dispatch( d_temp_storage, temp_storage_bytes, d_in, d_flags, d_out, d_num_selected_out, SelectOp(), EqualityOp(), num_items, stream, debug_synchronous); } /** * \brief Uses the \p select_op functor to split the corresponding items from \p d_in into a partitioned sequence \p d_out. The total number of items copied into the first partition is written to \p d_num_selected_out. ![](partition_logo.png) * * \par * - Copies of the selected items are compacted into \p d_out and maintain their original * relative ordering, however copies of the unselected items are compacted into the * rear of \p d_out in reverse order. * - \devicestorage * * \par Performance * The following charts illustrate saturated partition-if performance across different * CUDA architectures for \p int32 and \p int64 items, respectively. Items are * selected for the first partition with 50% probability. * * \image html partition_if_int32_50_percent.png * \image html partition_if_int64_50_percent.png * * \par * The following charts are similar, but 5% selection probability for the first partition: * * \image html partition_if_int32_5_percent.png * \image html partition_if_int64_5_percent.png * * \par Snippet * The code snippet below illustrates the compaction of items selected from an \p int device vector. * \par * \code * #include // or equivalently * * // Functor type for selecting values less than some criteria * struct LessThan * { * int compare; * * CUB_RUNTIME_FUNCTION __forceinline__ * LessThan(int compare) : compare(compare) {} * * CUB_RUNTIME_FUNCTION __forceinline__ * bool operator()(const int &a) const { * return (a < compare); * } * }; * * // Declare, allocate, and initialize device-accessible pointers for input and output * int num_items; // e.g., 8 * int *d_in; // e.g., [0, 2, 3, 9, 5, 2, 81, 8] * int *d_out; // e.g., [ , , , , , , , ] * int *d_num_selected_out; // e.g., [ ] * LessThan select_op(7); * ... * * // Determine temporary device storage requirements * void *d_temp_storage = NULL; * size_t temp_storage_bytes = 0; * cub::DeviceSelect::If(d_temp_storage, temp_storage_bytes, d_in, d_out, d_num_selected_out, num_items, select_op); * * // Allocate temporary storage * cudaMalloc(&d_temp_storage, temp_storage_bytes); * * // Run selection * cub::DeviceSelect::If(d_temp_storage, temp_storage_bytes, d_in, d_out, d_num_selected_out, num_items, select_op); * * // d_out <-- [0, 2, 3, 5, 2, 8, 81, 9] * // d_num_selected_out <-- [5] * * \endcode * * \tparam InputIteratorT [inferred] Random-access input iterator type for reading input items \iterator * \tparam OutputIteratorT [inferred] Random-access output iterator type for writing output items \iterator * \tparam NumSelectedIteratorT [inferred] Output iterator type for recording the number of items selected \iterator * \tparam SelectOp [inferred] Selection functor type having member bool operator()(const T &a) */ template < typename InputIteratorT, typename OutputIteratorT, typename NumSelectedIteratorT, typename SelectOp> CUB_RUNTIME_FUNCTION __forceinline__ static cudaError_t If( void* d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t &temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation InputIteratorT d_in, ///< [in] Pointer to the input sequence of data items OutputIteratorT d_out, ///< [out] Pointer to the output sequence of partitioned data items NumSelectedIteratorT d_num_selected_out, ///< [out] Pointer to the output total number of items selected (i.e., the offset of the unselected partition) int num_items, ///< [in] Total number of items to select from SelectOp select_op, ///< [in] Unary selection operator cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. May cause significant slowdown. Default is \p false. { typedef int OffsetT; // Signed integer type for global offsets typedef NullType* FlagIterator; // FlagT iterator type (not used) typedef NullType EqualityOp; // Equality operator (not used) return DispatchSelectIf::Dispatch( d_temp_storage, temp_storage_bytes, d_in, NULL, d_out, d_num_selected_out, select_op, EqualityOp(), num_items, stream, debug_synchronous); } }; /** * \example example_device_partition_flagged.cu * \example example_device_partition_if.cu */ } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/acc/cuda/cub/device/device_radix_sort.cuh000066400000000000000000001225221411340063500234460ustar00rootroot00000000000000 /****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * cub::DeviceRadixSort provides device-wide, parallel operations for computing a radix sort across a sequence of data items residing within device-accessible memory. */ #pragma once #include #include #include "dispatch/dispatch_radix_sort.cuh" #include "../util_arch.cuh" #include "../util_namespace.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /** * \brief DeviceRadixSort provides device-wide, parallel operations for computing a radix sort across a sequence of data items residing within device-accessible memory. ![](sorting_logo.png) * \ingroup SingleModule * * \par Overview * The [radix sorting method](http://en.wikipedia.org/wiki/Radix_sort) arranges * items into ascending (or descending) order. The algorithm relies upon a positional representation for * keys, i.e., each key is comprised of an ordered sequence of symbols (e.g., digits, * characters, etc.) specified from least-significant to most-significant. For a * given input sequence of keys and a set of rules specifying a total ordering * of the symbolic alphabet, the radix sorting method produces a lexicographic * ordering of those keys. * * \par * DeviceRadixSort can sort all of the built-in C++ numeric primitive types, e.g.: * unsigned char, \p int, \p double, etc. Although the direct radix sorting * method can only be applied to unsigned integral types, DeviceRadixSort * is able to sort signed and floating-point types via simple bit-wise transformations * that ensure lexicographic key ordering. * * \par Usage Considerations * \cdp_class{DeviceRadixSort} * * \par Performance * \linear_performance{radix sort} The following chart illustrates DeviceRadixSort::SortKeys * performance across different CUDA architectures for uniform-random \p uint32 keys. * \plots_below * * \image html lsb_radix_sort_int32_keys.png * */ struct DeviceRadixSort { /******************************************************************//** * \name KeyT-value pairs *********************************************************************/ //@{ /** * \brief Sorts key-value pairs into ascending order. (~2N auxiliary storage required) * * \par * - The contents of the input data are not altered by the sorting operation * - An optional bit subrange [begin_bit, end_bit) of differentiating key bits can be specified. This can reduce overall sorting overhead and yield a corresponding performance improvement. * - \devicestorageNP For sorting using only O(P) temporary storage, see the sorting interface using DoubleBuffer wrappers below. * - \devicestorage * * \par Performance * The following charts illustrate saturated sorting performance across different * CUDA architectures for uniform-random uint32,uint32 and * uint64,uint64 pairs, respectively. * * \image html lsb_radix_sort_int32_pairs.png * \image html lsb_radix_sort_int64_pairs.png * * \par Snippet * The code snippet below illustrates the sorting of a device vector of \p int keys * with associated vector of \p int values. * \par * \code * #include // or equivalently * * // Declare, allocate, and initialize device-accessible pointers for sorting data * int num_items; // e.g., 7 * int *d_keys_in; // e.g., [8, 6, 7, 5, 3, 0, 9] * int *d_keys_out; // e.g., [ ... ] * int *d_values_in; // e.g., [0, 1, 2, 3, 4, 5, 6] * int *d_values_out; // e.g., [ ... ] * ... * * // Determine temporary device storage requirements * void *d_temp_storage = NULL; * size_t temp_storage_bytes = 0; * cub::DeviceRadixSort::SortPairs(d_temp_storage, temp_storage_bytes, * d_keys_in, d_keys_out, d_values_in, d_values_out, num_items); * * // Allocate temporary storage * cudaMalloc(&d_temp_storage, temp_storage_bytes); * * // Run sorting operation * cub::DeviceRadixSort::SortPairs(d_temp_storage, temp_storage_bytes, * d_keys_in, d_keys_out, d_values_in, d_values_out, num_items); * * // d_keys_out <-- [0, 3, 5, 6, 7, 8, 9] * // d_values_out <-- [5, 4, 3, 1, 2, 0, 6] * * \endcode * * \tparam KeyT [inferred] KeyT type * \tparam ValueT [inferred] ValueT type */ template < typename KeyT, typename ValueT> CUB_RUNTIME_FUNCTION static cudaError_t SortPairs( void *d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t &temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation const KeyT *d_keys_in, ///< [in] Pointer to the input data of key data to sort KeyT *d_keys_out, ///< [out] Pointer to the sorted output sequence of key data const ValueT *d_values_in, ///< [in] Pointer to the corresponding input sequence of associated value items ValueT *d_values_out, ///< [out] Pointer to the correspondingly-reordered output sequence of associated value items int num_items, ///< [in] Number of items to sort int begin_bit = 0, ///< [in] [optional] The least-significant bit index (inclusive) needed for key comparison int end_bit = sizeof(KeyT) * 8, ///< [in] [optional] The most-significant bit index (exclusive) needed for key comparison (e.g., sizeof(unsigned int) * 8) cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is \p false. { // Signed integer type for global offsets typedef int OffsetT; DoubleBuffer d_keys(const_cast(d_keys_in), d_keys_out); DoubleBuffer d_values(const_cast(d_values_in), d_values_out); return DispatchRadixSort::Dispatch( d_temp_storage, temp_storage_bytes, d_keys, d_values, num_items, begin_bit, end_bit, false, stream, debug_synchronous); } /** * \brief Sorts key-value pairs into ascending order. (~N auxiliary storage required) * * \par * - The sorting operation is given a pair of key buffers and a corresponding * pair of associated value buffers. Each pair is managed by a DoubleBuffer * structure that indicates which of the two buffers is "current" (and thus * contains the input data to be sorted). * - The contents of both buffers within each pair may be altered by the sorting * operation. * - Upon completion, the sorting operation will update the "current" indicator * within each DoubleBuffer wrapper to reference which of the two buffers * now contains the sorted output sequence (a function of the number of key bits * specified and the targeted device architecture). * - An optional bit subrange [begin_bit, end_bit) of differentiating key bits can be specified. This can reduce overall sorting overhead and yield a corresponding performance improvement. * - \devicestorageP * - \devicestorage * * \par Performance * The following charts illustrate saturated sorting performance across different * CUDA architectures for uniform-random uint32,uint32 and * uint64,uint64 pairs, respectively. * * \image html lsb_radix_sort_int32_pairs.png * \image html lsb_radix_sort_int64_pairs.png * * \par Snippet * The code snippet below illustrates the sorting of a device vector of \p int keys * with associated vector of \p int values. * \par * \code * #include // or equivalently * * // Declare, allocate, and initialize device-accessible pointers for sorting data * int num_items; // e.g., 7 * int *d_key_buf; // e.g., [8, 6, 7, 5, 3, 0, 9] * int *d_key_alt_buf; // e.g., [ ... ] * int *d_value_buf; // e.g., [0, 1, 2, 3, 4, 5, 6] * int *d_value_alt_buf; // e.g., [ ... ] * ... * * // Create a set of DoubleBuffers to wrap pairs of device pointers * cub::DoubleBuffer d_keys(d_key_buf, d_key_alt_buf); * cub::DoubleBuffer d_values(d_value_buf, d_value_alt_buf); * * // Determine temporary device storage requirements * void *d_temp_storage = NULL; * size_t temp_storage_bytes = 0; * cub::DeviceRadixSort::SortPairs(d_temp_storage, temp_storage_bytes, d_keys, d_values, num_items); * * // Allocate temporary storage * cudaMalloc(&d_temp_storage, temp_storage_bytes); * * // Run sorting operation * cub::DeviceRadixSort::SortPairs(d_temp_storage, temp_storage_bytes, d_keys, d_values, num_items); * * // d_keys.Current() <-- [0, 3, 5, 6, 7, 8, 9] * // d_values.Current() <-- [5, 4, 3, 1, 2, 0, 6] * * \endcode * * \tparam KeyT [inferred] KeyT type * \tparam ValueT [inferred] ValueT type */ template < typename KeyT, typename ValueT> CUB_RUNTIME_FUNCTION static cudaError_t SortPairs( void *d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t &temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation DoubleBuffer &d_keys, ///< [in,out] Reference to the double-buffer of keys whose "current" device-accessible buffer contains the unsorted input keys and, upon return, is updated to point to the sorted output keys DoubleBuffer &d_values, ///< [in,out] Double-buffer of values whose "current" device-accessible buffer contains the unsorted input values and, upon return, is updated to point to the sorted output values int num_items, ///< [in] Number of items to sort int begin_bit = 0, ///< [in] [optional] The least-significant bit index (inclusive) needed for key comparison int end_bit = sizeof(KeyT) * 8, ///< [in] [optional] The most-significant bit index (exclusive) needed for key comparison (e.g., sizeof(unsigned int) * 8) cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is \p false. { // Signed integer type for global offsets typedef int OffsetT; return DispatchRadixSort::Dispatch( d_temp_storage, temp_storage_bytes, d_keys, d_values, num_items, begin_bit, end_bit, true, stream, debug_synchronous); } /** * \brief Sorts key-value pairs into descending order. (~2N auxiliary storage required). * * \par * - The contents of the input data are not altered by the sorting operation * - An optional bit subrange [begin_bit, end_bit) of differentiating key bits can be specified. This can reduce overall sorting overhead and yield a corresponding performance improvement. * - \devicestorageNP For sorting using only O(P) temporary storage, see the sorting interface using DoubleBuffer wrappers below. * - \devicestorage * * \par Performance * Performance is similar to DeviceRadixSort::SortPairs. * * \par Snippet * The code snippet below illustrates the sorting of a device vector of \p int keys * with associated vector of \p int values. * \par * \code * #include // or equivalently * * // Declare, allocate, and initialize device-accessible pointers for sorting data * int num_items; // e.g., 7 * int *d_keys_in; // e.g., [8, 6, 7, 5, 3, 0, 9] * int *d_keys_out; // e.g., [ ... ] * int *d_values_in; // e.g., [0, 1, 2, 3, 4, 5, 6] * int *d_values_out; // e.g., [ ... ] * ... * * // Determine temporary device storage requirements * void *d_temp_storage = NULL; * size_t temp_storage_bytes = 0; * cub::DeviceRadixSort::SortPairsDescending(d_temp_storage, temp_storage_bytes, * d_keys_in, d_keys_out, d_values_in, d_values_out, num_items); * * // Allocate temporary storage * cudaMalloc(&d_temp_storage, temp_storage_bytes); * * // Run sorting operation * cub::DeviceRadixSort::SortPairsDescending(d_temp_storage, temp_storage_bytes, * d_keys_in, d_keys_out, d_values_in, d_values_out, num_items); * * // d_keys_out <-- [9, 8, 7, 6, 5, 3, 0] * // d_values_out <-- [6, 0, 2, 1, 3, 4, 5] * * \endcode * * \tparam KeyT [inferred] KeyT type * \tparam ValueT [inferred] ValueT type */ template < typename KeyT, typename ValueT> CUB_RUNTIME_FUNCTION static cudaError_t SortPairsDescending( void *d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t &temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation const KeyT *d_keys_in, ///< [in] Pointer to the input data of key data to sort KeyT *d_keys_out, ///< [out] Pointer to the sorted output sequence of key data const ValueT *d_values_in, ///< [in] Pointer to the corresponding input sequence of associated value items ValueT *d_values_out, ///< [out] Pointer to the correspondingly-reordered output sequence of associated value items int num_items, ///< [in] Number of items to sort int begin_bit = 0, ///< [in] [optional] The least-significant bit index (inclusive) needed for key comparison int end_bit = sizeof(KeyT) * 8, ///< [in] [optional] The most-significant bit index (exclusive) needed for key comparison (e.g., sizeof(unsigned int) * 8) cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is \p false. { // Signed integer type for global offsets typedef int OffsetT; DoubleBuffer d_keys(const_cast(d_keys_in), d_keys_out); DoubleBuffer d_values(const_cast(d_values_in), d_values_out); return DispatchRadixSort::Dispatch( d_temp_storage, temp_storage_bytes, d_keys, d_values, num_items, begin_bit, end_bit, false, stream, debug_synchronous); } /** * \brief Sorts key-value pairs into descending order. (~N auxiliary storage required). * * \par * - The sorting operation is given a pair of key buffers and a corresponding * pair of associated value buffers. Each pair is managed by a DoubleBuffer * structure that indicates which of the two buffers is "current" (and thus * contains the input data to be sorted). * - The contents of both buffers within each pair may be altered by the sorting * operation. * - Upon completion, the sorting operation will update the "current" indicator * within each DoubleBuffer wrapper to reference which of the two buffers * now contains the sorted output sequence (a function of the number of key bits * specified and the targeted device architecture). * - An optional bit subrange [begin_bit, end_bit) of differentiating key bits can be specified. This can reduce overall sorting overhead and yield a corresponding performance improvement. * - \devicestorageP * - \devicestorage * * \par Performance * Performance is similar to DeviceRadixSort::SortPairs. * * \par Snippet * The code snippet below illustrates the sorting of a device vector of \p int keys * with associated vector of \p int values. * \par * \code * #include // or equivalently * * // Declare, allocate, and initialize device-accessible pointers for sorting data * int num_items; // e.g., 7 * int *d_key_buf; // e.g., [8, 6, 7, 5, 3, 0, 9] * int *d_key_alt_buf; // e.g., [ ... ] * int *d_value_buf; // e.g., [0, 1, 2, 3, 4, 5, 6] * int *d_value_alt_buf; // e.g., [ ... ] * ... * * // Create a set of DoubleBuffers to wrap pairs of device pointers * cub::DoubleBuffer d_keys(d_key_buf, d_key_alt_buf); * cub::DoubleBuffer d_values(d_value_buf, d_value_alt_buf); * * // Determine temporary device storage requirements * void *d_temp_storage = NULL; * size_t temp_storage_bytes = 0; * cub::DeviceRadixSort::SortPairsDescending(d_temp_storage, temp_storage_bytes, d_keys, d_values, num_items); * * // Allocate temporary storage * cudaMalloc(&d_temp_storage, temp_storage_bytes); * * // Run sorting operation * cub::DeviceRadixSort::SortPairsDescending(d_temp_storage, temp_storage_bytes, d_keys, d_values, num_items); * * // d_keys.Current() <-- [9, 8, 7, 6, 5, 3, 0] * // d_values.Current() <-- [6, 0, 2, 1, 3, 4, 5] * * \endcode * * \tparam KeyT [inferred] KeyT type * \tparam ValueT [inferred] ValueT type */ template < typename KeyT, typename ValueT> CUB_RUNTIME_FUNCTION static cudaError_t SortPairsDescending( void *d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t &temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation DoubleBuffer &d_keys, ///< [in,out] Reference to the double-buffer of keys whose "current" device-accessible buffer contains the unsorted input keys and, upon return, is updated to point to the sorted output keys DoubleBuffer &d_values, ///< [in,out] Double-buffer of values whose "current" device-accessible buffer contains the unsorted input values and, upon return, is updated to point to the sorted output values int num_items, ///< [in] Number of items to sort int begin_bit = 0, ///< [in] [optional] The least-significant bit index (inclusive) needed for key comparison int end_bit = sizeof(KeyT) * 8, ///< [in] [optional] The most-significant bit index (exclusive) needed for key comparison (e.g., sizeof(unsigned int) * 8) cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is \p false. { // Signed integer type for global offsets typedef int OffsetT; return DispatchRadixSort::Dispatch( d_temp_storage, temp_storage_bytes, d_keys, d_values, num_items, begin_bit, end_bit, true, stream, debug_synchronous); } //@} end member group /******************************************************************//** * \name Keys-only *********************************************************************/ //@{ /** * \brief Sorts keys into ascending order. (~2N auxiliary storage required) * * \par * - The contents of the input data are not altered by the sorting operation * - An optional bit subrange [begin_bit, end_bit) of differentiating key bits can be specified. This can reduce overall sorting overhead and yield a corresponding performance improvement. * - \devicestorageNP For sorting using only O(P) temporary storage, see the sorting interface using DoubleBuffer wrappers below. * - \devicestorage * * \par Performance * The following charts illustrate saturated sorting performance across different * CUDA architectures for uniform-random \p uint32 and \p uint64 keys, respectively. * * \image html lsb_radix_sort_int32_keys.png * \image html lsb_radix_sort_int64_keys.png * * \par Snippet * The code snippet below illustrates the sorting of a device vector of \p int keys. * \par * \code * #include // or equivalently * * // Declare, allocate, and initialize device-accessible pointers for sorting data * int num_items; // e.g., 7 * int *d_keys_in; // e.g., [8, 6, 7, 5, 3, 0, 9] * int *d_keys_out; // e.g., [ ... ] * ... * * // Determine temporary device storage requirements * void *d_temp_storage = NULL; * size_t temp_storage_bytes = 0; * cub::DeviceRadixSort::SortKeys(d_temp_storage, temp_storage_bytes, d_keys_in, d_keys_out, num_items); * * // Allocate temporary storage * cudaMalloc(&d_temp_storage, temp_storage_bytes); * * // Run sorting operation * cub::DeviceRadixSort::SortKeys(d_temp_storage, temp_storage_bytes, d_keys_in, d_keys_out, num_items); * * // d_keys_out <-- [0, 3, 5, 6, 7, 8, 9] * * \endcode * * \tparam KeyT [inferred] KeyT type */ template CUB_RUNTIME_FUNCTION static cudaError_t SortKeys( void *d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t &temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation const KeyT *d_keys_in, ///< [in] Pointer to the input data of key data to sort KeyT *d_keys_out, ///< [out] Pointer to the sorted output sequence of key data int num_items, ///< [in] Number of items to sort int begin_bit = 0, ///< [in] [optional] The least-significant bit index (inclusive) needed for key comparison int end_bit = sizeof(KeyT) * 8, ///< [in] [optional] The most-significant bit index (exclusive) needed for key comparison (e.g., sizeof(unsigned int) * 8) cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is \p false. { // Signed integer type for global offsets typedef int OffsetT; // Null value type DoubleBuffer d_keys(const_cast(d_keys_in), d_keys_out); DoubleBuffer d_values; return DispatchRadixSort::Dispatch( d_temp_storage, temp_storage_bytes, d_keys, d_values, num_items, begin_bit, end_bit, false, stream, debug_synchronous); } /** * \brief Sorts keys into ascending order. (~N auxiliary storage required). * * \par * - The sorting operation is given a pair of key buffers managed by a * DoubleBuffer structure that indicates which of the two buffers is * "current" (and thus contains the input data to be sorted). * - The contents of both buffers may be altered by the sorting operation. * - Upon completion, the sorting operation will update the "current" indicator * within the DoubleBuffer wrapper to reference which of the two buffers * now contains the sorted output sequence (a function of the number of key bits * specified and the targeted device architecture). * - An optional bit subrange [begin_bit, end_bit) of differentiating key bits can be specified. This can reduce overall sorting overhead and yield a corresponding performance improvement. * - \devicestorageP * - \devicestorage * * \par Performance * The following charts illustrate saturated sorting performance across different * CUDA architectures for uniform-random \p uint32 and \p uint64 keys, respectively. * * \image html lsb_radix_sort_int32_keys.png * \image html lsb_radix_sort_int64_keys.png * * \par Snippet * The code snippet below illustrates the sorting of a device vector of \p int keys. * \par * \code * #include // or equivalently * * // Declare, allocate, and initialize device-accessible pointers for sorting data * int num_items; // e.g., 7 * int *d_key_buf; // e.g., [8, 6, 7, 5, 3, 0, 9] * int *d_key_alt_buf; // e.g., [ ... ] * ... * * // Create a DoubleBuffer to wrap the pair of device pointers * cub::DoubleBuffer d_keys(d_key_buf, d_key_alt_buf); * * // Determine temporary device storage requirements * void *d_temp_storage = NULL; * size_t temp_storage_bytes = 0; * cub::DeviceRadixSort::SortKeys(d_temp_storage, temp_storage_bytes, d_keys, num_items); * * // Allocate temporary storage * cudaMalloc(&d_temp_storage, temp_storage_bytes); * * // Run sorting operation * cub::DeviceRadixSort::SortKeys(d_temp_storage, temp_storage_bytes, d_keys, num_items); * * // d_keys.Current() <-- [0, 3, 5, 6, 7, 8, 9] * * \endcode * * \tparam KeyT [inferred] KeyT type */ template CUB_RUNTIME_FUNCTION static cudaError_t SortKeys( void *d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t &temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation DoubleBuffer &d_keys, ///< [in,out] Reference to the double-buffer of keys whose "current" device-accessible buffer contains the unsorted input keys and, upon return, is updated to point to the sorted output keys int num_items, ///< [in] Number of items to sort int begin_bit = 0, ///< [in] [optional] The least-significant bit index (inclusive) needed for key comparison int end_bit = sizeof(KeyT) * 8, ///< [in] [optional] The most-significant bit index (exclusive) needed for key comparison (e.g., sizeof(unsigned int) * 8) cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is \p false. { // Signed integer type for global offsets typedef int OffsetT; // Null value type DoubleBuffer d_values; return DispatchRadixSort::Dispatch( d_temp_storage, temp_storage_bytes, d_keys, d_values, num_items, begin_bit, end_bit, true, stream, debug_synchronous); } /** * \brief Sorts keys into descending order. (~2N auxiliary storage required). * * \par * - The contents of the input data are not altered by the sorting operation * - An optional bit subrange [begin_bit, end_bit) of differentiating key bits can be specified. This can reduce overall sorting overhead and yield a corresponding performance improvement. * - \devicestorageNP For sorting using only O(P) temporary storage, see the sorting interface using DoubleBuffer wrappers below. * - \devicestorage * * \par Performance * Performance is similar to DeviceRadixSort::SortKeys. * * \par Snippet * The code snippet below illustrates the sorting of a device vector of \p int keys. * \par * \code * #include // or equivalently * * // Declare, allocate, and initialize device-accessible pointers for sorting data * int num_items; // e.g., 7 * int *d_keys_in; // e.g., [8, 6, 7, 5, 3, 0, 9] * int *d_keys_out; // e.g., [ ... ] * ... * * // Create a DoubleBuffer to wrap the pair of device pointers * cub::DoubleBuffer d_keys(d_key_buf, d_key_alt_buf); * * // Determine temporary device storage requirements * void *d_temp_storage = NULL; * size_t temp_storage_bytes = 0; * cub::DeviceRadixSort::SortKeysDescending(d_temp_storage, temp_storage_bytes, d_keys_in, d_keys_out, num_items); * * // Allocate temporary storage * cudaMalloc(&d_temp_storage, temp_storage_bytes); * * // Run sorting operation * cub::DeviceRadixSort::SortKeysDescending(d_temp_storage, temp_storage_bytes, d_keys_in, d_keys_out, num_items); * * // d_keys_out <-- [9, 8, 7, 6, 5, 3, 0]s * * \endcode * * \tparam KeyT [inferred] KeyT type */ template CUB_RUNTIME_FUNCTION static cudaError_t SortKeysDescending( void *d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t &temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation const KeyT *d_keys_in, ///< [in] Pointer to the input data of key data to sort KeyT *d_keys_out, ///< [out] Pointer to the sorted output sequence of key data int num_items, ///< [in] Number of items to sort int begin_bit = 0, ///< [in] [optional] The least-significant bit index (inclusive) needed for key comparison int end_bit = sizeof(KeyT) * 8, ///< [in] [optional] The most-significant bit index (exclusive) needed for key comparison (e.g., sizeof(unsigned int) * 8) cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is \p false. { // Signed integer type for global offsets typedef int OffsetT; DoubleBuffer d_keys(const_cast(d_keys_in), d_keys_out); DoubleBuffer d_values; return DispatchRadixSort::Dispatch( d_temp_storage, temp_storage_bytes, d_keys, d_values, num_items, begin_bit, end_bit, false, stream, debug_synchronous); } /** * \brief Sorts keys into descending order. (~N auxiliary storage required). * * \par * - The sorting operation is given a pair of key buffers managed by a * DoubleBuffer structure that indicates which of the two buffers is * "current" (and thus contains the input data to be sorted). * - The contents of both buffers may be altered by the sorting operation. * - Upon completion, the sorting operation will update the "current" indicator * within the DoubleBuffer wrapper to reference which of the two buffers * now contains the sorted output sequence (a function of the number of key bits * specified and the targeted device architecture). * - An optional bit subrange [begin_bit, end_bit) of differentiating key bits can be specified. This can reduce overall sorting overhead and yield a corresponding performance improvement. * - \devicestorageP * - \devicestorage * * \par Performance * Performance is similar to DeviceRadixSort::SortKeys. * * \par Snippet * The code snippet below illustrates the sorting of a device vector of \p int keys. * \par * \code * #include // or equivalently * * // Declare, allocate, and initialize device-accessible pointers for sorting data * int num_items; // e.g., 7 * int *d_key_buf; // e.g., [8, 6, 7, 5, 3, 0, 9] * int *d_key_alt_buf; // e.g., [ ... ] * ... * * // Create a DoubleBuffer to wrap the pair of device pointers * cub::DoubleBuffer d_keys(d_key_buf, d_key_alt_buf); * * // Determine temporary device storage requirements * void *d_temp_storage = NULL; * size_t temp_storage_bytes = 0; * cub::DeviceRadixSort::SortKeysDescending(d_temp_storage, temp_storage_bytes, d_keys, num_items); * * // Allocate temporary storage * cudaMalloc(&d_temp_storage, temp_storage_bytes); * * // Run sorting operation * cub::DeviceRadixSort::SortKeysDescending(d_temp_storage, temp_storage_bytes, d_keys, num_items); * * // d_keys.Current() <-- [9, 8, 7, 6, 5, 3, 0] * * \endcode * * \tparam KeyT [inferred] KeyT type */ template CUB_RUNTIME_FUNCTION static cudaError_t SortKeysDescending( void *d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t &temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation DoubleBuffer &d_keys, ///< [in,out] Reference to the double-buffer of keys whose "current" device-accessible buffer contains the unsorted input keys and, upon return, is updated to point to the sorted output keys int num_items, ///< [in] Number of items to sort int begin_bit = 0, ///< [in] [optional] The least-significant bit index (inclusive) needed for key comparison int end_bit = sizeof(KeyT) * 8, ///< [in] [optional] The most-significant bit index (exclusive) needed for key comparison (e.g., sizeof(unsigned int) * 8) cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is \p false. { // Signed integer type for global offsets typedef int OffsetT; // Null value type DoubleBuffer d_values; return DispatchRadixSort::Dispatch( d_temp_storage, temp_storage_bytes, d_keys, d_values, num_items, begin_bit, end_bit, true, stream, debug_synchronous); } //@} end member group }; /** * \example example_device_radix_sort.cu */ } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/acc/cuda/cub/device/device_reduce.cuh000066400000000000000000001137701411340063500225440ustar00rootroot00000000000000 /****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * cub::DeviceReduce provides device-wide, parallel operations for computing a reduction across a sequence of data items residing within device-accessible memory. */ #pragma once #include #include #include #include "../iterator/arg_index_input_iterator.cuh" #include "dispatch/dispatch_reduce.cuh" #include "dispatch/dispatch_reduce_by_key.cuh" #include "../util_namespace.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /** * \brief DeviceReduce provides device-wide, parallel operations for computing a reduction across a sequence of data items residing within device-accessible memory. ![](reduce_logo.png) * \ingroup SingleModule * * \par Overview * A reduction (or fold) * uses a binary combining operator to compute a single aggregate from a sequence of input elements. * * \par Usage Considerations * \cdp_class{DeviceReduce} * * \par Performance * \linear_performance{reduction, reduce-by-key, and run-length encode} * * \par * The following chart illustrates DeviceReduce::Sum * performance across different CUDA architectures for \p int32 keys. * * \image html reduce_int32.png * * \par * The following chart illustrates DeviceReduce::ReduceByKey (summation) * performance across different CUDA architectures for \p fp32 * values. Segments are identified by \p int32 keys, and have lengths uniformly sampled from [1,1000]. * * \image html reduce_by_key_fp32_len_500.png * * \par * \plots_below * */ struct DeviceReduce { /** * \brief Computes a device-wide reduction using the specified binary \p reduction_op functor and initial value \p init. * * \par * - Does not support binary reduction operators that are non-commutative. * - Provides "run-to-run" determinism for pseudo-associative reduction * (e.g., addition of floating point types) on the same GPU device. * However, results for pseudo-associative reduction may be inconsistent * from one device to a another device of a different compute-capability * because CUB can employ different tile-sizing for different architectures. * - \devicestorage * * \par Snippet * The code snippet below illustrates a user-defined min-reduction of a device vector of \p int data elements. * \par * \code * #include // or equivalently * * // CustomMin functor * struct CustomMin * { * template * __device__ __forceinline__ * T operator()(const T &a, const T &b) const { * return (b < a) ? b : a; * } * }; * * // Declare, allocate, and initialize device-accessible pointers for input and output * int num_items; // e.g., 7 * int *d_in; // e.g., [8, 6, 7, 5, 3, 0, 9] * int *d_out; // e.g., [-] * CustomMin min_op; * int init; // e.g., INT_MAX * ... * * // Determine temporary device storage requirements * void *d_temp_storage = NULL; * size_t temp_storage_bytes = 0; * cub::DeviceReduce::Reduce(d_temp_storage, temp_storage_bytes, d_in, d_out, num_items, min_op, init); * * // Allocate temporary storage * cudaMalloc(&d_temp_storage, temp_storage_bytes); * * // Run reduction * cub::DeviceReduce::Reduce(d_temp_storage, temp_storage_bytes, d_in, d_out, num_items, min_op, init); * * // d_out <-- [0] * * \endcode * * \tparam InputIteratorT [inferred] Random-access input iterator type for reading input items \iterator * \tparam OutputIteratorT [inferred] Output iterator type for recording the reduced aggregate \iterator * \tparam ReductionOpT [inferred] Binary reduction functor type having member T operator()(const T &a, const T &b) * \tparam T [inferred] Data element type that is convertible to the \p value type of \p InputIteratorT */ template < typename InputIteratorT, typename OutputIteratorT, typename ReductionOpT, typename T> CUB_RUNTIME_FUNCTION static cudaError_t Reduce( void *d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t &temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation InputIteratorT d_in, ///< [in] Pointer to the input sequence of data items OutputIteratorT d_out, ///< [out] Pointer to the output aggregate int num_items, ///< [in] Total number of input items (i.e., length of \p d_in) ReductionOpT reduction_op, ///< [in] Binary reduction functor T init, ///< [in] Initial value of the reduction cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is \p false. { // Signed integer type for global offsets typedef int OffsetT; return DispatchReduce::Dispatch( d_temp_storage, temp_storage_bytes, d_in, d_out, num_items, reduction_op, init, stream, debug_synchronous); } /** * \brief Computes a device-wide sum using the addition (\p +) operator. * * \par * - Uses \p 0 as the initial value of the reduction. * - Does not support \p + operators that are non-commutative.. * - Provides "run-to-run" determinism for pseudo-associative reduction * (e.g., addition of floating point types) on the same GPU device. * However, results for pseudo-associative reduction may be inconsistent * from one device to a another device of a different compute-capability * because CUB can employ different tile-sizing for different architectures. * - \devicestorage * * \par Performance * The following charts illustrate saturated sum-reduction performance across different * CUDA architectures for \p int32 and \p int64 items, respectively. * * \image html reduce_int32.png * \image html reduce_int64.png * * \par Snippet * The code snippet below illustrates the sum-reduction of a device vector of \p int data elements. * \par * \code * #include // or equivalently * * // Declare, allocate, and initialize device-accessible pointers for input and output * int num_items; // e.g., 7 * int *d_in; // e.g., [8, 6, 7, 5, 3, 0, 9] * int *d_out; // e.g., [-] * ... * * // Determine temporary device storage requirements * void *d_temp_storage = NULL; * size_t temp_storage_bytes = 0; * cub::DeviceReduce::Sum(d_temp_storage, temp_storage_bytes, d_in, d_out, num_items); * * // Allocate temporary storage * cudaMalloc(&d_temp_storage, temp_storage_bytes); * * // Run sum-reduction * cub::DeviceReduce::Sum(d_temp_storage, temp_storage_bytes, d_in, d_out, num_items); * * // d_out <-- [38] * * \endcode * * \tparam InputIteratorT [inferred] Random-access input iterator type for reading input items \iterator * \tparam OutputIteratorT [inferred] Output iterator type for recording the reduced aggregate \iterator */ template < typename InputIteratorT, typename OutputIteratorT> CUB_RUNTIME_FUNCTION static cudaError_t Sum( void *d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t &temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation InputIteratorT d_in, ///< [in] Pointer to the input sequence of data items OutputIteratorT d_out, ///< [out] Pointer to the output aggregate int num_items, ///< [in] Total number of input items (i.e., length of \p d_in) cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is \p false. { // Signed integer type for global offsets typedef int OffsetT; // The output value type typedef typename If<(Equals::value_type, void>::VALUE), // OutputT = (if output iterator's value type is void) ? typename std::iterator_traits::value_type, // ... then the input iterator's value type, typename std::iterator_traits::value_type>::Type OutputT; // ... else the output iterator's value type return DispatchReduce::Dispatch( d_temp_storage, temp_storage_bytes, d_in, d_out, num_items, cub::Sum(), OutputT(), // zero-initialize stream, debug_synchronous); } /** * \brief Computes a device-wide minimum using the less-than ('<') operator. * * \par * - Uses std::numeric_limits::max() as the initial value of the reduction. * - Does not support \p < operators that are non-commutative. * - Provides "run-to-run" determinism for pseudo-associative reduction * (e.g., addition of floating point types) on the same GPU device. * However, results for pseudo-associative reduction may be inconsistent * from one device to a another device of a different compute-capability * because CUB can employ different tile-sizing for different architectures. * - \devicestorage * * \par Snippet * The code snippet below illustrates the min-reduction of a device vector of \p int data elements. * \par * \code * #include // or equivalently * * // Declare, allocate, and initialize device-accessible pointers for input and output * int num_items; // e.g., 7 * int *d_in; // e.g., [8, 6, 7, 5, 3, 0, 9] * int *d_out; // e.g., [-] * ... * * // Determine temporary device storage requirements * void *d_temp_storage = NULL; * size_t temp_storage_bytes = 0; * cub::DeviceReduce::Min(d_temp_storage, temp_storage_bytes, d_in, d_out, num_items); * * // Allocate temporary storage * cudaMalloc(&d_temp_storage, temp_storage_bytes); * * // Run min-reduction * cub::DeviceReduce::Min(d_temp_storage, temp_storage_bytes, d_in, d_out, num_items); * * // d_out <-- [0] * * \endcode * * \tparam InputIteratorT [inferred] Random-access input iterator type for reading input items \iterator * \tparam OutputIteratorT [inferred] Output iterator type for recording the reduced aggregate \iterator */ template < typename InputIteratorT, typename OutputIteratorT> CUB_RUNTIME_FUNCTION static cudaError_t Min( void *d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t &temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation InputIteratorT d_in, ///< [in] Pointer to the input sequence of data items OutputIteratorT d_out, ///< [out] Pointer to the output aggregate int num_items, ///< [in] Total number of input items (i.e., length of \p d_in) cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is \p false. { // Signed integer type for global offsets typedef int OffsetT; // The input value type typedef typename std::iterator_traits::value_type InputT; return DispatchReduce::Dispatch( d_temp_storage, temp_storage_bytes, d_in, d_out, num_items, cub::Min(), Traits::Max(), // replace with std::numeric_limits::max() when C++11 support is more prevalent stream, debug_synchronous); } /** * \brief Finds the first device-wide minimum using the less-than ('<') operator, also returning the index of that item. * * \par * - The output value type of \p d_out is cub::KeyValuePair (assuming the value type of \p d_in is \p T) * - The minimum is written to d_out.value and its offset in the input array is written to d_out.key. * - The {1, std::numeric_limits::max()} tuple is produced for zero-length inputs * - Does not support \p < operators that are non-commutative. * - Provides "run-to-run" determinism for pseudo-associative reduction * (e.g., addition of floating point types) on the same GPU device. * However, results for pseudo-associative reduction may be inconsistent * from one device to a another device of a different compute-capability * because CUB can employ different tile-sizing for different architectures. * - \devicestorage * * \par Snippet * The code snippet below illustrates the argmin-reduction of a device vector of \p int data elements. * \par * \code * #include // or equivalently * * // Declare, allocate, and initialize device-accessible pointers for input and output * int num_items; // e.g., 7 * int *d_in; // e.g., [8, 6, 7, 5, 3, 0, 9] * KeyValuePair *d_out; // e.g., [{-,-}] * ... * * // Determine temporary device storage requirements * void *d_temp_storage = NULL; * size_t temp_storage_bytes = 0; * cub::DeviceReduce::ArgMin(d_temp_storage, temp_storage_bytes, d_in, d_argmin, num_items); * * // Allocate temporary storage * cudaMalloc(&d_temp_storage, temp_storage_bytes); * * // Run argmin-reduction * cub::DeviceReduce::ArgMin(d_temp_storage, temp_storage_bytes, d_in, d_argmin, num_items); * * // d_out <-- [{5, 0}] * * \endcode * * \tparam InputIteratorT [inferred] Random-access input iterator type for reading input items (of some type \p T) \iterator * \tparam OutputIteratorT [inferred] Output iterator type for recording the reduced aggregate (having value type cub::KeyValuePair) \iterator */ template < typename InputIteratorT, typename OutputIteratorT> CUB_RUNTIME_FUNCTION static cudaError_t ArgMin( void *d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t &temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation InputIteratorT d_in, ///< [in] Pointer to the input sequence of data items OutputIteratorT d_out, ///< [out] Pointer to the output aggregate int num_items, ///< [in] Total number of input items (i.e., length of \p d_in) cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is \p false. { // Signed integer type for global offsets typedef int OffsetT; // The input type typedef typename std::iterator_traits::value_type InputValueT; // The output tuple type typedef typename If<(Equals::value_type, void>::VALUE), // OutputT = (if output iterator's value type is void) ? KeyValuePair, // ... then the key value pair OffsetT + InputValueT typename std::iterator_traits::value_type>::Type OutputTupleT; // ... else the output iterator's value type // The output value type typedef typename OutputTupleT::Value OutputValueT; // Wrapped input iterator to produce index-value tuples typedef ArgIndexInputIterator ArgIndexInputIteratorT; ArgIndexInputIteratorT d_indexed_in(d_in); // Initial value OutputTupleT initial_value(1, Traits::Max()); // replace with std::numeric_limits::max() when C++11 support is more prevalent return DispatchReduce::Dispatch( d_temp_storage, temp_storage_bytes, d_indexed_in, d_out, num_items, cub::ArgMin(), initial_value, stream, debug_synchronous); } /** * \brief Computes a device-wide maximum using the greater-than ('>') operator. * * \par * - Uses std::numeric_limits::lowest() as the initial value of the reduction. * - Does not support \p > operators that are non-commutative. * - Provides "run-to-run" determinism for pseudo-associative reduction * (e.g., addition of floating point types) on the same GPU device. * However, results for pseudo-associative reduction may be inconsistent * from one device to a another device of a different compute-capability * because CUB can employ different tile-sizing for different architectures. * - \devicestorage * * \par Snippet * The code snippet below illustrates the max-reduction of a device vector of \p int data elements. * \par * \code * #include // or equivalently * * // Declare, allocate, and initialize device-accessible pointers for input and output * int num_items; // e.g., 7 * int *d_in; // e.g., [8, 6, 7, 5, 3, 0, 9] * int *d_out; // e.g., [-] * ... * * // Determine temporary device storage requirements * void *d_temp_storage = NULL; * size_t temp_storage_bytes = 0; * cub::DeviceReduce::Max(d_temp_storage, temp_storage_bytes, d_in, d_max, num_items); * * // Allocate temporary storage * cudaMalloc(&d_temp_storage, temp_storage_bytes); * * // Run max-reduction * cub::DeviceReduce::Max(d_temp_storage, temp_storage_bytes, d_in, d_max, num_items); * * // d_out <-- [9] * * \endcode * * \tparam InputIteratorT [inferred] Random-access input iterator type for reading input items \iterator * \tparam OutputIteratorT [inferred] Output iterator type for recording the reduced aggregate \iterator */ template < typename InputIteratorT, typename OutputIteratorT> CUB_RUNTIME_FUNCTION static cudaError_t Max( void *d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t &temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation InputIteratorT d_in, ///< [in] Pointer to the input sequence of data items OutputIteratorT d_out, ///< [out] Pointer to the output aggregate int num_items, ///< [in] Total number of input items (i.e., length of \p d_in) cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is \p false. { // Signed integer type for global offsets typedef int OffsetT; // The input value type typedef typename std::iterator_traits::value_type InputT; return DispatchReduce::Dispatch( d_temp_storage, temp_storage_bytes, d_in, d_out, num_items, cub::Max(), Traits::Lowest(), // replace with std::numeric_limits::lowest() when C++11 support is more prevalent stream, debug_synchronous); } /** * \brief Finds the first device-wide maximum using the greater-than ('>') operator, also returning the index of that item * * \par * - The output value type of \p d_out is cub::KeyValuePair (assuming the value type of \p d_in is \p T) * - The maximum is written to d_out.value and its offset in the input array is written to d_out.key. * - The {1, std::numeric_limits::lowest()} tuple is produced for zero-length inputs * - Does not support \p > operators that are non-commutative. * - Provides "run-to-run" determinism for pseudo-associative reduction * (e.g., addition of floating point types) on the same GPU device. * However, results for pseudo-associative reduction may be inconsistent * from one device to a another device of a different compute-capability * because CUB can employ different tile-sizing for different architectures. * - \devicestorage * * \par Snippet * The code snippet below illustrates the argmax-reduction of a device vector of \p int data elements. * \par * \code * #include // or equivalently * * // Declare, allocate, and initialize device-accessible pointers for input and output * int num_items; // e.g., 7 * int *d_in; // e.g., [8, 6, 7, 5, 3, 0, 9] * KeyValuePair *d_out; // e.g., [{-,-}] * ... * * // Determine temporary device storage requirements * void *d_temp_storage = NULL; * size_t temp_storage_bytes = 0; * cub::DeviceReduce::ArgMax(d_temp_storage, temp_storage_bytes, d_in, d_argmax, num_items); * * // Allocate temporary storage * cudaMalloc(&d_temp_storage, temp_storage_bytes); * * // Run argmax-reduction * cub::DeviceReduce::ArgMax(d_temp_storage, temp_storage_bytes, d_in, d_argmax, num_items); * * // d_out <-- [{6, 9}] * * \endcode * * \tparam InputIteratorT [inferred] Random-access input iterator type for reading input items (of some type \p T) \iterator * \tparam OutputIteratorT [inferred] Output iterator type for recording the reduced aggregate (having value type cub::KeyValuePair) \iterator */ template < typename InputIteratorT, typename OutputIteratorT> CUB_RUNTIME_FUNCTION static cudaError_t ArgMax( void *d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t &temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation InputIteratorT d_in, ///< [in] Pointer to the input sequence of data items OutputIteratorT d_out, ///< [out] Pointer to the output aggregate int num_items, ///< [in] Total number of input items (i.e., length of \p d_in) cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is \p false. { // Signed integer type for global offsets typedef int OffsetT; // The input type typedef typename std::iterator_traits::value_type InputValueT; // The output tuple type typedef typename If<(Equals::value_type, void>::VALUE), // OutputT = (if output iterator's value type is void) ? KeyValuePair, // ... then the key value pair OffsetT + InputValueT typename std::iterator_traits::value_type>::Type OutputTupleT; // ... else the output iterator's value type // The output value type typedef typename OutputTupleT::Value OutputValueT; // Wrapped input iterator to produce index-value tuples typedef ArgIndexInputIterator ArgIndexInputIteratorT; ArgIndexInputIteratorT d_indexed_in(d_in); // Initial value OutputTupleT initial_value(1, Traits::Lowest()); // replace with std::numeric_limits::lowest() when C++11 support is more prevalent return DispatchReduce::Dispatch( d_temp_storage, temp_storage_bytes, d_indexed_in, d_out, num_items, cub::ArgMax(), initial_value, stream, debug_synchronous); } /** * \brief Reduces segments of values, where segments are demarcated by corresponding runs of identical keys. * * \par * This operation computes segmented reductions within \p d_values_in using * the specified binary \p reduction_op functor. The segments are identified by * "runs" of corresponding keys in \p d_keys_in, where runs are maximal ranges of * consecutive, identical keys. For the ith run encountered, * the first key of the run and the corresponding value aggregate of that run are * written to d_unique_out[i] and d_aggregates_out[i], * respectively. The total number of runs encountered is written to \p d_num_runs_out. * * \par * - The == equality operator is used to determine whether keys are equivalent * - Provides "run-to-run" determinism for pseudo-associative reduction * (e.g., addition of floating point types) on the same GPU device. * However, results for pseudo-associative reduction may be inconsistent * from one device to a another device of a different compute-capability * because CUB can employ different tile-sizing for different architectures. * - \devicestorage * * \par Performance * The following chart illustrates reduction-by-key (sum) performance across * different CUDA architectures for \p fp32 and \p fp64 values, respectively. Segments * are identified by \p int32 keys, and have lengths uniformly sampled from [1,1000]. * * \image html reduce_by_key_fp32_len_500.png * \image html reduce_by_key_fp64_len_500.png * * \par * The following charts are similar, but with segment lengths uniformly sampled from [1,10]: * * \image html reduce_by_key_fp32_len_5.png * \image html reduce_by_key_fp64_len_5.png * * \par Snippet * The code snippet below illustrates the segmented reduction of \p int values grouped * by runs of associated \p int keys. * \par * \code * #include // or equivalently * * // CustomMin functor * struct CustomMin * { * template * CUB_RUNTIME_FUNCTION __forceinline__ * T operator()(const T &a, const T &b) const { * return (b < a) ? b : a; * } * }; * * // Declare, allocate, and initialize device-accessible pointers for input and output * int num_items; // e.g., 8 * int *d_keys_in; // e.g., [0, 2, 2, 9, 5, 5, 5, 8] * int *d_values_in; // e.g., [0, 7, 1, 6, 2, 5, 3, 4] * int *d_unique_out; // e.g., [-, -, -, -, -, -, -, -] * int *d_aggregates_out; // e.g., [-, -, -, -, -, -, -, -] * int *d_num_runs_out; // e.g., [-] * CustomMin reduction_op; * ... * * // Determine temporary device storage requirements * void *d_temp_storage = NULL; * size_t temp_storage_bytes = 0; * cub::DeviceReduce::ReduceByKey(d_temp_storage, temp_storage_bytes, d_keys_in, d_unique_out, d_values_in, d_aggregates_out, d_num_runs_out, reduction_op, num_items); * * // Allocate temporary storage * cudaMalloc(&d_temp_storage, temp_storage_bytes); * * // Run reduce-by-key * cub::DeviceReduce::ReduceByKey(d_temp_storage, temp_storage_bytes, d_keys_in, d_unique_out, d_values_in, d_aggregates_out, d_num_runs_out, reduction_op, num_items); * * // d_unique_out <-- [0, 2, 9, 5, 8] * // d_aggregates_out <-- [0, 1, 6, 2, 4] * // d_num_runs_out <-- [5] * * \endcode * * \tparam KeysInputIteratorT [inferred] Random-access input iterator type for reading input keys \iterator * \tparam UniqueOutputIteratorT [inferred] Random-access output iterator type for writing unique output keys \iterator * \tparam ValuesInputIteratorT [inferred] Random-access input iterator type for reading input values \iterator * \tparam AggregatesOutputIterator [inferred] Random-access output iterator type for writing output value aggregates \iterator * \tparam NumRunsOutputIteratorT [inferred] Output iterator type for recording the number of runs encountered \iterator * \tparam ReductionOpT [inferred] Binary reduction functor type having member T operator()(const T &a, const T &b) */ template < typename KeysInputIteratorT, typename UniqueOutputIteratorT, typename ValuesInputIteratorT, typename AggregatesOutputIteratorT, typename NumRunsOutputIteratorT, typename ReductionOpT> CUB_RUNTIME_FUNCTION __forceinline__ static cudaError_t ReduceByKey( void *d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t &temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation KeysInputIteratorT d_keys_in, ///< [in] Pointer to the input sequence of keys UniqueOutputIteratorT d_unique_out, ///< [out] Pointer to the output sequence of unique keys (one key per run) ValuesInputIteratorT d_values_in, ///< [in] Pointer to the input sequence of corresponding values AggregatesOutputIteratorT d_aggregates_out, ///< [out] Pointer to the output sequence of value aggregates (one aggregate per run) NumRunsOutputIteratorT d_num_runs_out, ///< [out] Pointer to total number of runs encountered (i.e., the length of d_unique_out) ReductionOpT reduction_op, ///< [in] Binary reduction functor int num_items, ///< [in] Total number of associated key+value pairs (i.e., the length of \p d_in_keys and \p d_in_values) cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. May cause significant slowdown. Default is \p false. { // Signed integer type for global offsets typedef int OffsetT; // FlagT iterator type (not used) // Selection op (not used) // Default == operator typedef Equality EqualityOp; return DispatchReduceByKey::Dispatch( d_temp_storage, temp_storage_bytes, d_keys_in, d_unique_out, d_values_in, d_aggregates_out, d_num_runs_out, EqualityOp(), reduction_op, num_items, stream, debug_synchronous); } }; /** * \example example_device_reduce.cu */ } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/acc/cuda/cub/device/device_run_length_encode.cuh000066400000000000000000000347411411340063500247570ustar00rootroot00000000000000 /****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * cub::DeviceRunLengthEncode provides device-wide, parallel operations for computing a run-length encoding across a sequence of data items residing within device-accessible memory. */ #pragma once #include #include #include "dispatch/dispatch_rle.cuh" #include "dispatch/dispatch_reduce_by_key.cuh" #include "../util_namespace.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /** * \brief DeviceRunLengthEncode provides device-wide, parallel operations for demarcating "runs" of same-valued items within a sequence residing within device-accessible memory. ![](run_length_encode_logo.png) * \ingroup SingleModule * * \par Overview * A run-length encoding * computes a simple compressed representation of a sequence of input elements such that each * maximal "run" of consecutive same-valued data items is encoded as a single data value along with a * count of the elements in that run. * * \par Usage Considerations * \cdp_class{DeviceRunLengthEncode} * * \par Performance * \linear_performance{run-length encode} * * \par * The following chart illustrates DeviceRunLengthEncode::RunLengthEncode performance across * different CUDA architectures for \p int32 items. * Segments have lengths uniformly sampled from [1,1000]. * * \image html rle_int32_len_500.png * * \par * \plots_below * */ struct DeviceRunLengthEncode { /** * \brief Computes a run-length encoding of the sequence \p d_in. * * \par * - For the ith run encountered, the first key of the run and its length are written to * d_unique_out[i] and d_counts_out[i], * respectively. * - The total number of runs encountered is written to \p d_num_runs_out. * - The == equality operator is used to determine whether values are equivalent * - \devicestorage * * \par Performance * The following charts illustrate saturated encode performance across different * CUDA architectures for \p int32 and \p int64 items, respectively. Segments have * lengths uniformly sampled from [1,1000]. * * \image html rle_int32_len_500.png * \image html rle_int64_len_500.png * * \par * The following charts are similar, but with segment lengths uniformly sampled from [1,10]: * * \image html rle_int32_len_5.png * \image html rle_int64_len_5.png * * \par Snippet * The code snippet below illustrates the run-length encoding of a sequence of \p int values. * \par * \code * #include // or equivalently * * // Declare, allocate, and initialize device-accessible pointers for input and output * int num_items; // e.g., 8 * int *d_in; // e.g., [0, 2, 2, 9, 5, 5, 5, 8] * int *d_unique_out; // e.g., [ , , , , , , , ] * int *d_counts_out; // e.g., [ , , , , , , , ] * int *d_num_runs_out; // e.g., [ ] * ... * * // Determine temporary device storage requirements * void *d_temp_storage = NULL; * size_t temp_storage_bytes = 0; * cub::DeviceRunLengthEncode::Encode(d_temp_storage, temp_storage_bytes, d_in, d_unique_out, d_counts_out, d_num_runs_out, num_items); * * // Allocate temporary storage * cudaMalloc(&d_temp_storage, temp_storage_bytes); * * // Run encoding * cub::DeviceRunLengthEncode::Encode(d_temp_storage, temp_storage_bytes, d_in, d_unique_out, d_counts_out, d_num_runs_out, num_items); * * // d_unique_out <-- [0, 2, 9, 5, 8] * // d_counts_out <-- [1, 2, 1, 3, 1] * // d_num_runs_out <-- [5] * * \endcode * * \tparam InputIteratorT [inferred] Random-access input iterator type for reading input items \iterator * \tparam UniqueOutputIteratorT [inferred] Random-access output iterator type for writing unique output items \iterator * \tparam LengthsOutputIteratorT [inferred] Random-access output iterator type for writing output counts \iterator * \tparam NumRunsOutputIteratorT [inferred] Output iterator type for recording the number of runs encountered \iterator */ template < typename InputIteratorT, typename UniqueOutputIteratorT, typename LengthsOutputIteratorT, typename NumRunsOutputIteratorT> CUB_RUNTIME_FUNCTION __forceinline__ static cudaError_t Encode( void* d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t &temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation InputIteratorT d_in, ///< [in] Pointer to the input sequence of keys UniqueOutputIteratorT d_unique_out, ///< [out] Pointer to the output sequence of unique keys (one key per run) LengthsOutputIteratorT d_counts_out, ///< [out] Pointer to the output sequence of run-lengths (one count per run) NumRunsOutputIteratorT d_num_runs_out, ///< [out] Pointer to total number of runs int num_items, ///< [in] Total number of associated key+value pairs (i.e., the length of \p d_in_keys and \p d_in_values) cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. May cause significant slowdown. Default is \p false. { typedef int OffsetT; // Signed integer type for global offsets typedef NullType* FlagIterator; // FlagT iterator type (not used) typedef NullType SelectOp; // Selection op (not used) typedef Equality EqualityOp; // Default == operator typedef cub::Sum ReductionOp; // Value reduction operator // The lengths output value type typedef typename If<(Equals::value_type, void>::VALUE), // LengthT = (if output iterator's value type is void) ? OffsetT, // ... then the OffsetT type, typename std::iterator_traits::value_type>::Type LengthT; // ... else the output iterator's value type // Generator type for providing 1s values for run-length reduction typedef ConstantInputIterator LengthsInputIteratorT; return DispatchReduceByKey::Dispatch( d_temp_storage, temp_storage_bytes, d_in, d_unique_out, LengthsInputIteratorT((LengthT) 1), d_counts_out, d_num_runs_out, EqualityOp(), ReductionOp(), num_items, stream, debug_synchronous); } /** * \brief Enumerates the starting offsets and lengths of all non-trivial runs (of length > 1) of same-valued keys in the sequence \p d_in. * * \par * - For the ith non-trivial run, the run's starting offset * and its length are written to d_offsets_out[i] and * d_lengths_out[i], respectively. * - The total number of runs encountered is written to \p d_num_runs_out. * - The == equality operator is used to determine whether values are equivalent * - \devicestorage * * \par Performance * * \par Snippet * The code snippet below illustrates the identification of non-trivial runs within a sequence of \p int values. * \par * \code * #include // or equivalently * * // Declare, allocate, and initialize device-accessible pointers for input and output * int num_items; // e.g., 8 * int *d_in; // e.g., [0, 2, 2, 9, 5, 5, 5, 8] * int *d_offsets_out; // e.g., [ , , , , , , , ] * int *d_lengths_out; // e.g., [ , , , , , , , ] * int *d_num_runs_out; // e.g., [ ] * ... * * // Determine temporary device storage requirements * void *d_temp_storage = NULL; * size_t temp_storage_bytes = 0; * cub::DeviceRunLengthEncode::NonTrivialRuns(d_temp_storage, temp_storage_bytes, d_in, d_offsets_out, d_lengths_out, d_num_runs_out, num_items); * * // Allocate temporary storage * cudaMalloc(&d_temp_storage, temp_storage_bytes); * * // Run encoding * cub::DeviceRunLengthEncode::NonTrivialRuns(d_temp_storage, temp_storage_bytes, d_in, d_offsets_out, d_lengths_out, d_num_runs_out, num_items); * * // d_offsets_out <-- [1, 4] * // d_lengths_out <-- [2, 3] * // d_num_runs_out <-- [2] * * \endcode * * \tparam InputIteratorT [inferred] Random-access input iterator type for reading input items \iterator * \tparam OffsetsOutputIteratorT [inferred] Random-access output iterator type for writing run-offset values \iterator * \tparam LengthsOutputIteratorT [inferred] Random-access output iterator type for writing run-length values \iterator * \tparam NumRunsOutputIteratorT [inferred] Output iterator type for recording the number of runs encountered \iterator */ template < typename InputIteratorT, typename OffsetsOutputIteratorT, typename LengthsOutputIteratorT, typename NumRunsOutputIteratorT> CUB_RUNTIME_FUNCTION __forceinline__ static cudaError_t NonTrivialRuns( void* d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t &temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation InputIteratorT d_in, ///< [in] Pointer to input sequence of data items OffsetsOutputIteratorT d_offsets_out, ///< [out] Pointer to output sequence of run-offsets (one offset per non-trivial run) LengthsOutputIteratorT d_lengths_out, ///< [out] Pointer to output sequence of run-lengths (one count per non-trivial run) NumRunsOutputIteratorT d_num_runs_out, ///< [out] Pointer to total number of runs (i.e., length of \p d_offsets_out) int num_items, ///< [in] Total number of associated key+value pairs (i.e., the length of \p d_in_keys and \p d_in_values) cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. May cause significant slowdown. Default is \p false. { typedef int OffsetT; // Signed integer type for global offsets typedef Equality EqualityOp; // Default == operator return DeviceRleDispatch::Dispatch( d_temp_storage, temp_storage_bytes, d_in, d_offsets_out, d_lengths_out, d_num_runs_out, EqualityOp(), num_items, stream, debug_synchronous); } }; } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/acc/cuda/cub/device/device_scan.cuh000066400000000000000000000524321411340063500222160ustar00rootroot00000000000000 /****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * cub::DeviceScan provides device-wide, parallel operations for computing a prefix scan across a sequence of data items residing within device-accessible memory. */ #pragma once #include #include #include "dispatch/dispatch_scan.cuh" #include "../util_namespace.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /** * \brief DeviceScan provides device-wide, parallel operations for computing a prefix scan across a sequence of data items residing within device-accessible memory. ![](device_scan.png) * \ingroup SingleModule * * \par Overview * Given a sequence of input elements and a binary reduction operator, a [prefix scan](http://en.wikipedia.org/wiki/Prefix_sum) * produces an output sequence where each element is computed to be the reduction * of the elements occurring earlier in the input sequence. Prefix sum * connotes a prefix scan with the addition operator. The term \em inclusive indicates * that the ith output reduction incorporates the ith input. * The term \em exclusive indicates the ith input is not incorporated into * the ith output reduction. * * \par * As of CUB 1.0.1 (2013), CUB's device-wide scan APIs have implemented our "decoupled look-back" algorithm * for performing global prefix scan with only a single pass through the * input data, as described in our 2016 technical report [1]. The central * idea is to leverage a small, constant factor of redundant work in order to overlap the latencies * of global prefix propagation with local computation. As such, our algorithm requires only * ~2n data movement (n inputs are read, n outputs are written), and typically * proceeds at "memcpy" speeds. * * \par * [1] [Duane Merrill and Michael Garland. "Single-pass Parallel Prefix Scan with Decoupled Look-back", NVIDIA Technical Report NVR-2016-002, 2016.](https://research.nvidia.com/publication/single-pass-parallel-prefix-scan-decoupled-look-back) * * \par Usage Considerations * \cdp_class{DeviceScan} * * \par Performance * \linear_performance{prefix scan} * * \par * The following chart illustrates DeviceScan::ExclusiveSum * performance across different CUDA architectures for \p int32 keys. * \plots_below * * \image html scan_int32.png * */ struct DeviceScan { /******************************************************************//** * \name Exclusive scans *********************************************************************/ //@{ /** * \brief Computes a device-wide exclusive prefix sum. The value of 0 is applied as the initial value, and is assigned to *d_out. * * \par * - Supports non-commutative sum operators. * - Provides "run-to-run" determinism for pseudo-associative reduction * (e.g., addition of floating point types) on the same GPU device. * However, results for pseudo-associative reduction may be inconsistent * from one device to a another device of a different compute-capability * because CUB can employ different tile-sizing for different architectures. * - \devicestorage * * \par Performance * The following charts illustrate saturated exclusive sum performance across different * CUDA architectures for \p int32 and \p int64 items, respectively. * * \image html scan_int32.png * \image html scan_int64.png * * \par Snippet * The code snippet below illustrates the exclusive prefix sum of an \p int device vector. * \par * \code * #include // or equivalently * * // Declare, allocate, and initialize device-accessible pointers for input and output * int num_items; // e.g., 7 * int *d_in; // e.g., [8, 6, 7, 5, 3, 0, 9] * int *d_out; // e.g., [ , , , , , , ] * ... * * // Determine temporary device storage requirements * void *d_temp_storage = NULL; * size_t temp_storage_bytes = 0; * cub::DeviceScan::ExclusiveSum(d_temp_storage, temp_storage_bytes, d_in, d_out, num_items); * * // Allocate temporary storage * cudaMalloc(&d_temp_storage, temp_storage_bytes); * * // Run exclusive prefix sum * cub::DeviceScan::ExclusiveSum(d_temp_storage, temp_storage_bytes, d_in, d_out, num_items); * * // d_out s<-- [0, 8, 14, 21, 26, 29, 29] * * \endcode * * \tparam InputIteratorT [inferred] Random-access input iterator type for reading scan inputs \iterator * \tparam OutputIteratorT [inferred] Random-access output iterator type for writing scan outputs \iterator */ template < typename InputIteratorT, typename OutputIteratorT> CUB_RUNTIME_FUNCTION static cudaError_t ExclusiveSum( void *d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t &temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation InputIteratorT d_in, ///< [in] Pointer to the input sequence of data items OutputIteratorT d_out, ///< [out] Pointer to the output sequence of data items int num_items, ///< [in] Total number of input items (i.e., the length of \p d_in) cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. May cause significant slowdown. Default is \p false. { // Signed integer type for global offsets typedef int OffsetT; // The output value type typedef typename If<(Equals::value_type, void>::VALUE), // OutputT = (if output iterator's value type is void) ? typename std::iterator_traits::value_type, // ... then the input iterator's value type, typename std::iterator_traits::value_type>::Type OutputT; // ... else the output iterator's value type // Initial value OutputT init_value = 0; return DispatchScan::Dispatch( d_temp_storage, temp_storage_bytes, d_in, d_out, Sum(), init_value, num_items, stream, debug_synchronous); } /** * \brief Computes a device-wide exclusive prefix scan using the specified binary \p scan_op functor. The \p init_value value is applied as the initial value, and is assigned to *d_out. * * \par * - Supports non-commutative scan operators. * - Provides "run-to-run" determinism for pseudo-associative reduction * (e.g., addition of floating point types) on the same GPU device. * However, results for pseudo-associative reduction may be inconsistent * from one device to a another device of a different compute-capability * because CUB can employ different tile-sizing for different architectures. * - \devicestorage * * \par Snippet * The code snippet below illustrates the exclusive prefix min-scan of an \p int device vector * \par * \code * #include // or equivalently * * // CustomMin functor * struct CustomMin * { * template * CUB_RUNTIME_FUNCTION __forceinline__ * T operator()(const T &a, const T &b) const { * return (b < a) ? b : a; * } * }; * * // Declare, allocate, and initialize device-accessible pointers for input and output * int num_items; // e.g., 7 * int *d_in; // e.g., [8, 6, 7, 5, 3, 0, 9] * int *d_out; // e.g., [ , , , , , , ] * CustomMin min_op * ... * * // Determine temporary device storage requirements for exclusive prefix scan * void *d_temp_storage = NULL; * size_t temp_storage_bytes = 0; * cub::DeviceScan::ExclusiveScan(d_temp_storage, temp_storage_bytes, d_in, d_out, min_op, (int) MAX_INT, num_items); * * // Allocate temporary storage for exclusive prefix scan * cudaMalloc(&d_temp_storage, temp_storage_bytes); * * // Run exclusive prefix min-scan * cub::DeviceScan::ExclusiveScan(d_temp_storage, temp_storage_bytes, d_in, d_out, min_op, (int) MAX_INT, num_items); * * // d_out <-- [2147483647, 8, 6, 6, 5, 3, 0] * * \endcode * * \tparam InputIteratorT [inferred] Random-access input iterator type for reading scan inputs \iterator * \tparam OutputIteratorT [inferred] Random-access output iterator type for writing scan outputs \iterator * \tparam ScanOp [inferred] Binary scan functor type having member T operator()(const T &a, const T &b) * \tparam Identity [inferred] Type of the \p identity value used Binary scan functor type having member T operator()(const T &a, const T &b) */ template < typename InputIteratorT, typename OutputIteratorT, typename ScanOpT, typename InitValueT> CUB_RUNTIME_FUNCTION static cudaError_t ExclusiveScan( void *d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t &temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation InputIteratorT d_in, ///< [in] Pointer to the input sequence of data items OutputIteratorT d_out, ///< [out] Pointer to the output sequence of data items ScanOpT scan_op, ///< [in] Binary scan functor InitValueT init_value, ///< [in] Initial value to seed the exclusive scan (and is assigned to *d_out) int num_items, ///< [in] Total number of input items (i.e., the length of \p d_in) cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. May cause significant slowdown. Default is \p false. { // Signed integer type for global offsets typedef int OffsetT; return DispatchScan::Dispatch( d_temp_storage, temp_storage_bytes, d_in, d_out, scan_op, init_value, num_items, stream, debug_synchronous); } //@} end member group /******************************************************************//** * \name Inclusive scans *********************************************************************/ //@{ /** * \brief Computes a device-wide inclusive prefix sum. * * \par * - Supports non-commutative sum operators. * - Provides "run-to-run" determinism for pseudo-associative reduction * (e.g., addition of floating point types) on the same GPU device. * However, results for pseudo-associative reduction may be inconsistent * from one device to a another device of a different compute-capability * because CUB can employ different tile-sizing for different architectures. * - \devicestorage * * \par Snippet * The code snippet below illustrates the inclusive prefix sum of an \p int device vector. * \par * \code * #include // or equivalently * * // Declare, allocate, and initialize device-accessible pointers for input and output * int num_items; // e.g., 7 * int *d_in; // e.g., [8, 6, 7, 5, 3, 0, 9] * int *d_out; // e.g., [ , , , , , , ] * ... * * // Determine temporary device storage requirements for inclusive prefix sum * void *d_temp_storage = NULL; * size_t temp_storage_bytes = 0; * cub::DeviceScan::InclusiveSum(d_temp_storage, temp_storage_bytes, d_in, d_out, num_items); * * // Allocate temporary storage for inclusive prefix sum * cudaMalloc(&d_temp_storage, temp_storage_bytes); * * // Run inclusive prefix sum * cub::DeviceScan::InclusiveSum(d_temp_storage, temp_storage_bytes, d_in, d_out, num_items); * * // d_out <-- [8, 14, 21, 26, 29, 29, 38] * * \endcode * * \tparam InputIteratorT [inferred] Random-access input iterator type for reading scan inputs \iterator * \tparam OutputIteratorT [inferred] Random-access output iterator type for writing scan outputs \iterator */ template < typename InputIteratorT, typename OutputIteratorT> CUB_RUNTIME_FUNCTION static cudaError_t InclusiveSum( void* d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t& temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation InputIteratorT d_in, ///< [in] Pointer to the input sequence of data items OutputIteratorT d_out, ///< [out] Pointer to the output sequence of data items int num_items, ///< [in] Total number of input items (i.e., the length of \p d_in) cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. May cause significant slowdown. Default is \p false. { // Signed integer type for global offsets typedef int OffsetT; return DispatchScan::Dispatch( d_temp_storage, temp_storage_bytes, d_in, d_out, Sum(), NullType(), num_items, stream, debug_synchronous); } /** * \brief Computes a device-wide inclusive prefix scan using the specified binary \p scan_op functor. * * \par * - Supports non-commutative scan operators. * - Provides "run-to-run" determinism for pseudo-associative reduction * (e.g., addition of floating point types) on the same GPU device. * However, results for pseudo-associative reduction may be inconsistent * from one device to a another device of a different compute-capability * because CUB can employ different tile-sizing for different architectures. * - \devicestorage * * \par Snippet * The code snippet below illustrates the inclusive prefix min-scan of an \p int device vector. * \par * \code * #include // or equivalently * * // CustomMin functor * struct CustomMin * { * template * CUB_RUNTIME_FUNCTION __forceinline__ * T operator()(const T &a, const T &b) const { * return (b < a) ? b : a; * } * }; * * // Declare, allocate, and initialize device-accessible pointers for input and output * int num_items; // e.g., 7 * int *d_in; // e.g., [8, 6, 7, 5, 3, 0, 9] * int *d_out; // e.g., [ , , , , , , ] * CustomMin min_op; * ... * * // Determine temporary device storage requirements for inclusive prefix scan * void *d_temp_storage = NULL; * size_t temp_storage_bytes = 0; * cub::DeviceScan::InclusiveScan(d_temp_storage, temp_storage_bytes, d_in, d_out, min_op, num_items); * * // Allocate temporary storage for inclusive prefix scan * cudaMalloc(&d_temp_storage, temp_storage_bytes); * * // Run inclusive prefix min-scan * cub::DeviceScan::InclusiveScan(d_temp_storage, temp_storage_bytes, d_in, d_out, min_op, num_items); * * // d_out <-- [8, 6, 6, 5, 3, 0, 0] * * \endcode * * \tparam InputIteratorT [inferred] Random-access input iterator type for reading scan inputs \iterator * \tparam OutputIteratorT [inferred] Random-access output iterator type for writing scan outputs \iterator * \tparam ScanOp [inferred] Binary scan functor type having member T operator()(const T &a, const T &b) */ template < typename InputIteratorT, typename OutputIteratorT, typename ScanOpT> CUB_RUNTIME_FUNCTION static cudaError_t InclusiveScan( void *d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t &temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation InputIteratorT d_in, ///< [in] Pointer to the input sequence of data items OutputIteratorT d_out, ///< [out] Pointer to the output sequence of data items ScanOpT scan_op, ///< [in] Binary scan functor int num_items, ///< [in] Total number of input items (i.e., the length of \p d_in) cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. May cause significant slowdown. Default is \p false. { // Signed integer type for global offsets typedef int OffsetT; return DispatchScan::Dispatch( d_temp_storage, temp_storage_bytes, d_in, d_out, scan_op, NullType(), num_items, stream, debug_synchronous); } //@} end member group }; /** * \example example_device_scan.cu */ } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/acc/cuda/cub/device/device_segmented_radix_sort.cuh000066400000000000000000001525431411340063500255070ustar00rootroot00000000000000 /****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * cub::DeviceSegmentedRadixSort provides device-wide, parallel operations for computing a batched radix sort across multiple, non-overlapping sequences of data items residing within device-accessible memory. */ #pragma once #include #include #include "dispatch/dispatch_radix_sort.cuh" #include "../util_arch.cuh" #include "../util_namespace.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /** * \brief DeviceSegmentedRadixSort provides device-wide, parallel operations for computing a batched radix sort across multiple, non-overlapping sequences of data items residing within device-accessible memory. ![](segmented_sorting_logo.png) * \ingroup SegmentedModule * * \par Overview * The [radix sorting method](http://en.wikipedia.org/wiki/Radix_sort) arranges * items into ascending (or descending) order. The algorithm relies upon a positional representation for * keys, i.e., each key is comprised of an ordered sequence of symbols (e.g., digits, * characters, etc.) specified from least-significant to most-significant. For a * given input sequence of keys and a set of rules specifying a total ordering * of the symbolic alphabet, the radix sorting method produces a lexicographic * ordering of those keys. * * \par * DeviceSegmentedRadixSort can sort all of the built-in C++ numeric primitive types, e.g.: * unsigned char, \p int, \p double, etc. Although the direct radix sorting * method can only be applied to unsigned integral types, DeviceSegmentedRadixSort * is able to sort signed and floating-point types via simple bit-wise transformations * that ensure lexicographic key ordering. * * \par Usage Considerations * \cdp_class{DeviceSegmentedRadixSort} * */ struct DeviceSegmentedRadixSort { /******************************************************************//** * \name Key-value pairs *********************************************************************/ //@{ /** * \brief Sorts segments of key-value pairs into ascending order. (~2N auxiliary storage required) * * \par * - The contents of the input data are not altered by the sorting operation * - When input a contiguous sequence of segments, a single sequence * \p segment_offsets (of length num_segments+1) can be aliased * for both the \p d_begin_offsets and \p d_end_offsets parameters (where * the latter is specified as segment_offsets+1). * - An optional bit subrange [begin_bit, end_bit) of differentiating key bits can be specified. This can reduce overall sorting overhead and yield a corresponding performance improvement. * - \devicestorageNP For sorting using only O(P) temporary storage, see the sorting interface using DoubleBuffer wrappers below. * - \devicestorage * * \par Snippet * The code snippet below illustrates the batched sorting of three segments (with one zero-length segment) of \p int keys * with associated vector of \p int values. * \par * \code * #include // or equivalently * * // Declare, allocate, and initialize device-accessible pointers for sorting data * int num_items; // e.g., 7 * int num_segments; // e.g., 3 * int *d_offsets; // e.g., [0, 3, 3, 7] * int *d_keys_in; // e.g., [8, 6, 7, 5, 3, 0, 9] * int *d_keys_out; // e.g., [-, -, -, -, -, -, -] * int *d_values_in; // e.g., [0, 1, 2, 3, 4, 5, 6] * int *d_values_out; // e.g., [-, -, -, -, -, -, -] * ... * * // Determine temporary device storage requirements * void *d_temp_storage = NULL; * size_t temp_storage_bytes = 0; * cub::DeviceSegmentedRadixSort::SortPairs(d_temp_storage, temp_storage_bytes, * d_keys_in, d_keys_out, d_values_in, d_values_out, * num_items, num_segments, d_offsets, d_offsets + 1); * * // Allocate temporary storage * cudaMalloc(&d_temp_storage, temp_storage_bytes); * * // Run sorting operation * cub::DeviceSegmentedRadixSort::SortPairs(d_temp_storage, temp_storage_bytes, * d_keys_in, d_keys_out, d_values_in, d_values_out, * num_items, num_segments, d_offsets, d_offsets + 1); * * // d_keys_out <-- [6, 7, 8, 0, 3, 5, 9] * // d_values_out <-- [1, 2, 0, 5, 4, 3, 6] * * \endcode * * \tparam KeyT [inferred] Key type * \tparam ValueT [inferred] Value type * \tparam OffsetIteratorT [inferred] Random-access input iterator type for reading segment offsets \iterator */ template < typename KeyT, typename ValueT, typename OffsetIteratorT> CUB_RUNTIME_FUNCTION static cudaError_t SortPairs( void *d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t &temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation const KeyT *d_keys_in, ///< [in] %Device-accessible pointer to the input data of key data to sort KeyT *d_keys_out, ///< [out] %Device-accessible pointer to the sorted output sequence of key data const ValueT *d_values_in, ///< [in] %Device-accessible pointer to the corresponding input sequence of associated value items ValueT *d_values_out, ///< [out] %Device-accessible pointer to the correspondingly-reordered output sequence of associated value items int num_items, ///< [in] The total number of items to sort (across all segments) int num_segments, ///< [in] The number of segments that comprise the sorting data OffsetIteratorT d_begin_offsets, ///< [in] Pointer to the sequence of beginning offsets of length \p num_segments, such that d_begin_offsets[i] is the first element of the ith data segment in d_keys_* and d_values_* OffsetIteratorT d_end_offsets, ///< [in] Pointer to the sequence of ending offsets of length \p num_segments, such that d_end_offsets[i]-1 is the last element of the ith data segment in d_keys_* and d_values_*. If d_end_offsets[i]-1 <= d_begin_offsets[i], the ith is considered empty. int begin_bit = 0, ///< [in] [optional] The least-significant bit index (inclusive) needed for key comparison int end_bit = sizeof(KeyT) * 8, ///< [in] [optional] The most-significant bit index (exclusive) needed for key comparison (e.g., sizeof(unsigned int) * 8) cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is \p false. { // Signed integer type for global offsets typedef int OffsetT; DoubleBuffer d_keys(const_cast(d_keys_in), d_keys_out); DoubleBuffer d_values(const_cast(d_values_in), d_values_out); return DispatchSegmentedRadixSort::Dispatch( d_temp_storage, temp_storage_bytes, d_keys, d_values, num_items, num_segments, d_begin_offsets, d_end_offsets, begin_bit, end_bit, false, stream, debug_synchronous); } /** * \brief Sorts segments of key-value pairs into ascending order. (~N auxiliary storage required) * * \par * - The sorting operation is given a pair of key buffers and a corresponding * pair of associated value buffers. Each pair is managed by a DoubleBuffer * structure that indicates which of the two buffers is "current" (and thus * contains the input data to be sorted). * - The contents of both buffers within each pair may be altered by the sorting * operation. * - Upon completion, the sorting operation will update the "current" indicator * within each DoubleBuffer wrapper to reference which of the two buffers * now contains the sorted output sequence (a function of the number of key bits * specified and the targeted device architecture). * - When input a contiguous sequence of segments, a single sequence * \p segment_offsets (of length num_segments+1) can be aliased * for both the \p d_begin_offsets and \p d_end_offsets parameters (where * the latter is specified as segment_offsets+1). * - An optional bit subrange [begin_bit, end_bit) of differentiating key bits can be specified. This can reduce overall sorting overhead and yield a corresponding performance improvement. * - \devicestorageP * - \devicestorage * * \par Snippet * The code snippet below illustrates the batched sorting of three segments (with one zero-length segment) of \p int keys * with associated vector of \p int values. * \par * \code * #include // or equivalently * * // Declare, allocate, and initialize device-accessible pointers for sorting data * int num_items; // e.g., 7 * int num_segments; // e.g., 3 * int *d_offsets; // e.g., [0, 3, 3, 7] * int *d_key_buf; // e.g., [8, 6, 7, 5, 3, 0, 9] * int *d_key_alt_buf; // e.g., [-, -, -, -, -, -, -] * int *d_value_buf; // e.g., [0, 1, 2, 3, 4, 5, 6] * int *d_value_alt_buf; // e.g., [-, -, -, -, -, -, -] * ... * * // Create a set of DoubleBuffers to wrap pairs of device pointers * cub::DoubleBuffer d_keys(d_key_buf, d_key_alt_buf); * cub::DoubleBuffer d_values(d_value_buf, d_value_alt_buf); * * // Determine temporary device storage requirements * void *d_temp_storage = NULL; * size_t temp_storage_bytes = 0; * cub::DeviceSegmentedRadixSort::SortPairs(d_temp_storage, temp_storage_bytes, d_keys, d_values, * num_items, num_segments, d_offsets, d_offsets + 1); * * // Allocate temporary storage * cudaMalloc(&d_temp_storage, temp_storage_bytes); * * // Run sorting operation * cub::DeviceSegmentedRadixSort::SortPairs(d_temp_storage, temp_storage_bytes, d_keys, d_values, * num_items, num_segments, d_offsets, d_offsets + 1); * * // d_keys.Current() <-- [6, 7, 8, 0, 3, 5, 9] * // d_values.Current() <-- [5, 4, 3, 1, 2, 0, 6] * * \endcode * * \tparam KeyT [inferred] Key type * \tparam ValueT [inferred] Value type * \tparam OffsetIteratorT [inferred] Random-access input iterator type for reading segment offsets \iterator */ template < typename KeyT, typename ValueT, typename OffsetIteratorT> CUB_RUNTIME_FUNCTION static cudaError_t SortPairs( void *d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t &temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation DoubleBuffer &d_keys, ///< [in,out] Reference to the double-buffer of keys whose "current" device-accessible buffer contains the unsorted input keys and, upon return, is updated to point to the sorted output keys DoubleBuffer &d_values, ///< [in,out] Double-buffer of values whose "current" device-accessible buffer contains the unsorted input values and, upon return, is updated to point to the sorted output values int num_items, ///< [in] The total number of items to sort (across all segments) int num_segments, ///< [in] The number of segments that comprise the sorting data OffsetIteratorT d_begin_offsets, ///< [in] Pointer to the sequence of beginning offsets of length \p num_segments, such that d_begin_offsets[i] is the first element of the ith data segment in d_keys_* and d_values_* OffsetIteratorT d_end_offsets, ///< [in] Pointer to the sequence of ending offsets of length \p num_segments, such that d_end_offsets[i]-1 is the last element of the ith data segment in d_keys_* and d_values_*. If d_end_offsets[i]-1 <= d_begin_offsets[i], the ith is considered empty. int begin_bit = 0, ///< [in] [optional] The least-significant bit index (inclusive) needed for key comparison int end_bit = sizeof(KeyT) * 8, ///< [in] [optional] The most-significant bit index (exclusive) needed for key comparison (e.g., sizeof(unsigned int) * 8) cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is \p false. { // Signed integer type for global offsets typedef int OffsetT; return DispatchSegmentedRadixSort::Dispatch( d_temp_storage, temp_storage_bytes, d_keys, d_values, num_items, num_segments, d_begin_offsets, d_end_offsets, begin_bit, end_bit, true, stream, debug_synchronous); } /** * \brief Sorts segments of key-value pairs into descending order. (~2N auxiliary storage required). * * \par * - The contents of the input data are not altered by the sorting operation * - When input a contiguous sequence of segments, a single sequence * \p segment_offsets (of length num_segments+1) can be aliased * for both the \p d_begin_offsets and \p d_end_offsets parameters (where * the latter is specified as segment_offsets+1). * - An optional bit subrange [begin_bit, end_bit) of differentiating key bits can be specified. This can reduce overall sorting overhead and yield a corresponding performance improvement. * - \devicestorageNP For sorting using only O(P) temporary storage, see the sorting interface using DoubleBuffer wrappers below. * - \devicestorage * * \par Snippet * The code snippet below illustrates the batched sorting of three segments (with one zero-length segment) of \p int keys * with associated vector of \p int values. * \par * \code * #include // or equivalently * * // Declare, allocate, and initialize device-accessible pointers for sorting data * int num_items; // e.g., 7 * int num_segments; // e.g., 3 * int *d_offsets; // e.g., [0, 3, 3, 7] * int *d_keys_in; // e.g., [8, 6, 7, 5, 3, 0, 9] * int *d_keys_out; // e.g., [-, -, -, -, -, -, -] * int *d_values_in; // e.g., [0, 1, 2, 3, 4, 5, 6] * int *d_values_out; // e.g., [-, -, -, -, -, -, -] * ... * * // Determine temporary device storage requirements * void *d_temp_storage = NULL; * size_t temp_storage_bytes = 0; * cub::DeviceSegmentedRadixSort::SortPairsDescending(d_temp_storage, temp_storage_bytes, * d_keys_in, d_keys_out, d_values_in, d_values_out, * num_items, num_segments, d_offsets, d_offsets + 1); * * // Allocate temporary storage * cudaMalloc(&d_temp_storage, temp_storage_bytes); * * // Run sorting operation * cub::DeviceSegmentedRadixSort::SortPairsDescending(d_temp_storage, temp_storage_bytes, * d_keys_in, d_keys_out, d_values_in, d_values_out, * num_items, num_segments, d_offsets, d_offsets + 1); * * // d_keys_out <-- [8, 7, 6, 9, 5, 3, 0] * // d_values_out <-- [0, 2, 1, 6, 3, 4, 5] * * \endcode * * \tparam KeyT [inferred] Key type * \tparam ValueT [inferred] Value type * \tparam OffsetIteratorT [inferred] Random-access input iterator type for reading segment offsets \iterator */ template < typename KeyT, typename ValueT, typename OffsetIteratorT> CUB_RUNTIME_FUNCTION static cudaError_t SortPairsDescending( void *d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t &temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation const KeyT *d_keys_in, ///< [in] %Device-accessible pointer to the input data of key data to sort KeyT *d_keys_out, ///< [out] %Device-accessible pointer to the sorted output sequence of key data const ValueT *d_values_in, ///< [in] %Device-accessible pointer to the corresponding input sequence of associated value items ValueT *d_values_out, ///< [out] %Device-accessible pointer to the correspondingly-reordered output sequence of associated value items int num_items, ///< [in] The total number of items to sort (across all segments) int num_segments, ///< [in] The number of segments that comprise the sorting data OffsetIteratorT d_begin_offsets, ///< [in] Pointer to the sequence of beginning offsets of length \p num_segments, such that d_begin_offsets[i] is the first element of the ith data segment in d_keys_* and d_values_* OffsetIteratorT d_end_offsets, ///< [in] Pointer to the sequence of ending offsets of length \p num_segments, such that d_end_offsets[i]-1 is the last element of the ith data segment in d_keys_* and d_values_*. If d_end_offsets[i]-1 <= d_begin_offsets[i], the ith is considered empty. int begin_bit = 0, ///< [in] [optional] The least-significant bit index (inclusive) needed for key comparison int end_bit = sizeof(KeyT) * 8, ///< [in] [optional] The most-significant bit index (exclusive) needed for key comparison (e.g., sizeof(unsigned int) * 8) cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is \p false. { // Signed integer type for global offsets typedef int OffsetT; DoubleBuffer d_keys(const_cast(d_keys_in), d_keys_out); DoubleBuffer d_values(const_cast(d_values_in), d_values_out); return DispatchSegmentedRadixSort::Dispatch( d_temp_storage, temp_storage_bytes, d_keys, d_values, num_items, num_segments, d_begin_offsets, d_end_offsets, begin_bit, end_bit, false, stream, debug_synchronous); } /** * \brief Sorts segments of key-value pairs into descending order. (~N auxiliary storage required). * * \par * - The sorting operation is given a pair of key buffers and a corresponding * pair of associated value buffers. Each pair is managed by a DoubleBuffer * structure that indicates which of the two buffers is "current" (and thus * contains the input data to be sorted). * - The contents of both buffers within each pair may be altered by the sorting * operation. * - Upon completion, the sorting operation will update the "current" indicator * within each DoubleBuffer wrapper to reference which of the two buffers * now contains the sorted output sequence (a function of the number of key bits * specified and the targeted device architecture). * - When input a contiguous sequence of segments, a single sequence * \p segment_offsets (of length num_segments+1) can be aliased * for both the \p d_begin_offsets and \p d_end_offsets parameters (where * the latter is specified as segment_offsets+1). * - An optional bit subrange [begin_bit, end_bit) of differentiating key bits can be specified. This can reduce overall sorting overhead and yield a corresponding performance improvement. * - \devicestorageP * - \devicestorage * * \par Snippet * The code snippet below illustrates the batched sorting of three segments (with one zero-length segment) of \p int keys * with associated vector of \p int values. * \par * \code * #include // or equivalently * * // Declare, allocate, and initialize device-accessible pointers for sorting data * int num_items; // e.g., 7 * int num_segments; // e.g., 3 * int *d_offsets; // e.g., [0, 3, 3, 7] * int *d_key_buf; // e.g., [8, 6, 7, 5, 3, 0, 9] * int *d_key_alt_buf; // e.g., [-, -, -, -, -, -, -] * int *d_value_buf; // e.g., [0, 1, 2, 3, 4, 5, 6] * int *d_value_alt_buf; // e.g., [-, -, -, -, -, -, -] * ... * * // Create a set of DoubleBuffers to wrap pairs of device pointers * cub::DoubleBuffer d_keys(d_key_buf, d_key_alt_buf); * cub::DoubleBuffer d_values(d_value_buf, d_value_alt_buf); * * // Determine temporary device storage requirements * void *d_temp_storage = NULL; * size_t temp_storage_bytes = 0; * cub::DeviceSegmentedRadixSort::SortPairsDescending(d_temp_storage, temp_storage_bytes, d_keys, d_values, * num_items, num_segments, d_offsets, d_offsets + 1); * * // Allocate temporary storage * cudaMalloc(&d_temp_storage, temp_storage_bytes); * * // Run sorting operation * cub::DeviceSegmentedRadixSort::SortPairsDescending(d_temp_storage, temp_storage_bytes, d_keys, d_values, * num_items, num_segments, d_offsets, d_offsets + 1); * * // d_keys.Current() <-- [8, 7, 6, 9, 5, 3, 0] * // d_values.Current() <-- [0, 2, 1, 6, 3, 4, 5] * * \endcode * * \tparam KeyT [inferred] Key type * \tparam ValueT [inferred] Value type * \tparam OffsetIteratorT [inferred] Random-access input iterator type for reading segment offsets \iterator */ template < typename KeyT, typename ValueT, typename OffsetIteratorT> CUB_RUNTIME_FUNCTION static cudaError_t SortPairsDescending( void *d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t &temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation DoubleBuffer &d_keys, ///< [in,out] Reference to the double-buffer of keys whose "current" device-accessible buffer contains the unsorted input keys and, upon return, is updated to point to the sorted output keys DoubleBuffer &d_values, ///< [in,out] Double-buffer of values whose "current" device-accessible buffer contains the unsorted input values and, upon return, is updated to point to the sorted output values int num_items, ///< [in] The total number of items to sort (across all segments) int num_segments, ///< [in] The number of segments that comprise the sorting data OffsetIteratorT d_begin_offsets, ///< [in] Pointer to the sequence of beginning offsets of length \p num_segments, such that d_begin_offsets[i] is the first element of the ith data segment in d_keys_* and d_values_* OffsetIteratorT d_end_offsets, ///< [in] Pointer to the sequence of ending offsets of length \p num_segments, such that d_end_offsets[i]-1 is the last element of the ith data segment in d_keys_* and d_values_*. If d_end_offsets[i]-1 <= d_begin_offsets[i], the ith is considered empty. int begin_bit = 0, ///< [in] [optional] The least-significant bit index (inclusive) needed for key comparison int end_bit = sizeof(KeyT) * 8, ///< [in] [optional] The most-significant bit index (exclusive) needed for key comparison (e.g., sizeof(unsigned int) * 8) cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is \p false. { // Signed integer type for global offsets typedef int OffsetT; return DispatchSegmentedRadixSort::Dispatch( d_temp_storage, temp_storage_bytes, d_keys, d_values, num_items, num_segments, d_begin_offsets, d_end_offsets, begin_bit, end_bit, true, stream, debug_synchronous); } //@} end member group /******************************************************************//** * \name Keys-only *********************************************************************/ //@{ /** * \brief Sorts segments of keys into ascending order. (~2N auxiliary storage required) * * \par * - The contents of the input data are not altered by the sorting operation * - An optional bit subrange [begin_bit, end_bit) of differentiating key bits can be specified. This can reduce overall sorting overhead and yield a corresponding performance improvement. * - When input a contiguous sequence of segments, a single sequence * \p segment_offsets (of length num_segments+1) can be aliased * for both the \p d_begin_offsets and \p d_end_offsets parameters (where * the latter is specified as segment_offsets+1). * - \devicestorageNP For sorting using only O(P) temporary storage, see the sorting interface using DoubleBuffer wrappers below. * - \devicestorage * * \par Snippet * The code snippet below illustrates the batched sorting of three segments (with one zero-length segment) of \p int keys. * \par * \code * #include // or equivalently * * // Declare, allocate, and initialize device-accessible pointers for sorting data * int num_items; // e.g., 7 * int num_segments; // e.g., 3 * int *d_offsets; // e.g., [0, 3, 3, 7] * int *d_keys_in; // e.g., [8, 6, 7, 5, 3, 0, 9] * int *d_keys_out; // e.g., [-, -, -, -, -, -, -] * ... * * // Determine temporary device storage requirements * void *d_temp_storage = NULL; * size_t temp_storage_bytes = 0; * cub::DeviceSegmentedRadixSort::SortKeys(d_temp_storage, temp_storage_bytes, d_keys_in, d_keys_out, * num_items, num_segments, d_offsets, d_offsets + 1); * * // Allocate temporary storage * cudaMalloc(&d_temp_storage, temp_storage_bytes); * * // Run sorting operation * cub::DeviceSegmentedRadixSort::SortKeys(d_temp_storage, temp_storage_bytes, d_keys_in, d_keys_out, * num_items, num_segments, d_offsets, d_offsets + 1); * * // d_keys_out <-- [6, 7, 8, 0, 3, 5, 9] * * \endcode * * \tparam KeyT [inferred] Key type * \tparam OffsetIteratorT [inferred] Random-access input iterator type for reading segment offsets \iterator */ template < typename KeyT, typename OffsetIteratorT> CUB_RUNTIME_FUNCTION static cudaError_t SortKeys( void *d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t &temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation const KeyT *d_keys_in, ///< [in] %Device-accessible pointer to the input data of key data to sort KeyT *d_keys_out, ///< [out] %Device-accessible pointer to the sorted output sequence of key data int num_items, ///< [in] The total number of items to sort (across all segments) int num_segments, ///< [in] The number of segments that comprise the sorting data OffsetIteratorT d_begin_offsets, ///< [in] Pointer to the sequence of beginning offsets of length \p num_segments, such that d_begin_offsets[i] is the first element of the ith data segment in d_keys_* and d_values_* OffsetIteratorT d_end_offsets, ///< [in] Pointer to the sequence of ending offsets of length \p num_segments, such that d_end_offsets[i]-1 is the last element of the ith data segment in d_keys_* and d_values_*. If d_end_offsets[i]-1 <= d_begin_offsets[i], the ith is considered empty. int begin_bit = 0, ///< [in] [optional] The least-significant bit index (inclusive) needed for key comparison int end_bit = sizeof(KeyT) * 8, ///< [in] [optional] The most-significant bit index (exclusive) needed for key comparison (e.g., sizeof(unsigned int) * 8) cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is \p false. { // Signed integer type for global offsets typedef int OffsetT; // Null value type DoubleBuffer d_keys(const_cast(d_keys_in), d_keys_out); DoubleBuffer d_values; return DispatchSegmentedRadixSort::Dispatch( d_temp_storage, temp_storage_bytes, d_keys, d_values, num_items, num_segments, d_begin_offsets, d_end_offsets, begin_bit, end_bit, false, stream, debug_synchronous); } /** * \brief Sorts segments of keys into ascending order. (~N auxiliary storage required). * * \par * - The sorting operation is given a pair of key buffers managed by a * DoubleBuffer structure that indicates which of the two buffers is * "current" (and thus contains the input data to be sorted). * - The contents of both buffers may be altered by the sorting operation. * - Upon completion, the sorting operation will update the "current" indicator * within the DoubleBuffer wrapper to reference which of the two buffers * now contains the sorted output sequence (a function of the number of key bits * specified and the targeted device architecture). * - When input a contiguous sequence of segments, a single sequence * \p segment_offsets (of length num_segments+1) can be aliased * for both the \p d_begin_offsets and \p d_end_offsets parameters (where * the latter is specified as segment_offsets+1). * - An optional bit subrange [begin_bit, end_bit) of differentiating key bits can be specified. This can reduce overall sorting overhead and yield a corresponding performance improvement. * - \devicestorageP * - \devicestorage * * \par Snippet * The code snippet below illustrates the batched sorting of three segments (with one zero-length segment) of \p int keys. * \par * \code * #include // or equivalently * * // Declare, allocate, and initialize device-accessible pointers for sorting data * int num_items; // e.g., 7 * int num_segments; // e.g., 3 * int *d_offsets; // e.g., [0, 3, 3, 7] * int *d_key_buf; // e.g., [8, 6, 7, 5, 3, 0, 9] * int *d_key_alt_buf; // e.g., [-, -, -, -, -, -, -] * ... * * // Create a DoubleBuffer to wrap the pair of device pointers * cub::DoubleBuffer d_keys(d_key_buf, d_key_alt_buf); * * // Determine temporary device storage requirements * void *d_temp_storage = NULL; * size_t temp_storage_bytes = 0; * cub::DeviceSegmentedRadixSort::SortKeys(d_temp_storage, temp_storage_bytes, d_keys, * num_items, num_segments, d_offsets, d_offsets + 1); * * // Allocate temporary storage * cudaMalloc(&d_temp_storage, temp_storage_bytes); * * // Run sorting operation * cub::DeviceSegmentedRadixSort::SortKeys(d_temp_storage, temp_storage_bytes, d_keys, * num_items, num_segments, d_offsets, d_offsets + 1); * * // d_keys.Current() <-- [6, 7, 8, 0, 3, 5, 9] * * \endcode * * \tparam KeyT [inferred] Key type * \tparam OffsetIteratorT [inferred] Random-access input iterator type for reading segment offsets \iterator */ template < typename KeyT, typename OffsetIteratorT> CUB_RUNTIME_FUNCTION static cudaError_t SortKeys( void *d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t &temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation DoubleBuffer &d_keys, ///< [in,out] Reference to the double-buffer of keys whose "current" device-accessible buffer contains the unsorted input keys and, upon return, is updated to point to the sorted output keys int num_items, ///< [in] The total number of items to sort (across all segments) int num_segments, ///< [in] The number of segments that comprise the sorting data OffsetIteratorT d_begin_offsets, ///< [in] Pointer to the sequence of beginning offsets of length \p num_segments, such that d_begin_offsets[i] is the first element of the ith data segment in d_keys_* and d_values_* OffsetIteratorT d_end_offsets, ///< [in] Pointer to the sequence of ending offsets of length \p num_segments, such that d_end_offsets[i]-1 is the last element of the ith data segment in d_keys_* and d_values_*. If d_end_offsets[i]-1 <= d_begin_offsets[i], the ith is considered empty. int begin_bit = 0, ///< [in] [optional] The least-significant bit index (inclusive) needed for key comparison int end_bit = sizeof(KeyT) * 8, ///< [in] [optional] The most-significant bit index (exclusive) needed for key comparison (e.g., sizeof(unsigned int) * 8) cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is \p false. { // Signed integer type for global offsets typedef int OffsetT; // Null value type DoubleBuffer d_values; return DispatchSegmentedRadixSort::Dispatch( d_temp_storage, temp_storage_bytes, d_keys, d_values, num_items, num_segments, d_begin_offsets, d_end_offsets, begin_bit, end_bit, true, stream, debug_synchronous); } /** * \brief Sorts segments of keys into descending order. (~2N auxiliary storage required). * * \par * - The contents of the input data are not altered by the sorting operation * - When input a contiguous sequence of segments, a single sequence * \p segment_offsets (of length num_segments+1) can be aliased * for both the \p d_begin_offsets and \p d_end_offsets parameters (where * the latter is specified as segment_offsets+1). * - An optional bit subrange [begin_bit, end_bit) of differentiating key bits can be specified. This can reduce overall sorting overhead and yield a corresponding performance improvement. * - \devicestorageNP For sorting using only O(P) temporary storage, see the sorting interface using DoubleBuffer wrappers below. * - \devicestorage * * \par Snippet * The code snippet below illustrates the batched sorting of three segments (with one zero-length segment) of \p int keys. * \par * \code * #include // or equivalently * * // Declare, allocate, and initialize device-accessible pointers for sorting data * int num_items; // e.g., 7 * int num_segments; // e.g., 3 * int *d_offsets; // e.g., [0, 3, 3, 7] * int *d_keys_in; // e.g., [8, 6, 7, 5, 3, 0, 9] * int *d_keys_out; // e.g., [-, -, -, -, -, -, -] * ... * * // Create a DoubleBuffer to wrap the pair of device pointers * cub::DoubleBuffer d_keys(d_key_buf, d_key_alt_buf); * * // Determine temporary device storage requirements * void *d_temp_storage = NULL; * size_t temp_storage_bytes = 0; * cub::DeviceSegmentedRadixSort::SortKeysDescending(d_temp_storage, temp_storage_bytes, d_keys_in, d_keys_out, * num_items, num_segments, d_offsets, d_offsets + 1); * * // Allocate temporary storage * cudaMalloc(&d_temp_storage, temp_storage_bytes); * * // Run sorting operation * cub::DeviceSegmentedRadixSort::SortKeysDescending(d_temp_storage, temp_storage_bytes, d_keys_in, d_keys_out, * num_items, num_segments, d_offsets, d_offsets + 1); * * // d_keys_out <-- [8, 7, 6, 9, 5, 3, 0] * * \endcode * * \tparam KeyT [inferred] Key type * \tparam OffsetIteratorT [inferred] Random-access input iterator type for reading segment offsets \iterator */ template < typename KeyT, typename OffsetIteratorT> CUB_RUNTIME_FUNCTION static cudaError_t SortKeysDescending( void *d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t &temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation const KeyT *d_keys_in, ///< [in] %Device-accessible pointer to the input data of key data to sort KeyT *d_keys_out, ///< [out] %Device-accessible pointer to the sorted output sequence of key data int num_items, ///< [in] The total number of items to sort (across all segments) int num_segments, ///< [in] The number of segments that comprise the sorting data OffsetIteratorT d_begin_offsets, ///< [in] Pointer to the sequence of beginning offsets of length \p num_segments, such that d_begin_offsets[i] is the first element of the ith data segment in d_keys_* and d_values_* OffsetIteratorT d_end_offsets, ///< [in] Pointer to the sequence of ending offsets of length \p num_segments, such that d_end_offsets[i]-1 is the last element of the ith data segment in d_keys_* and d_values_*. If d_end_offsets[i]-1 <= d_begin_offsets[i], the ith is considered empty. int begin_bit = 0, ///< [in] [optional] The least-significant bit index (inclusive) needed for key comparison int end_bit = sizeof(KeyT) * 8, ///< [in] [optional] The most-significant bit index (exclusive) needed for key comparison (e.g., sizeof(unsigned int) * 8) cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is \p false. { // Signed integer type for global offsets typedef int OffsetT; DoubleBuffer d_keys(const_cast(d_keys_in), d_keys_out); DoubleBuffer d_values; return DispatchSegmentedRadixSort::Dispatch( d_temp_storage, temp_storage_bytes, d_keys, d_values, num_items, num_segments, d_begin_offsets, d_end_offsets, begin_bit, end_bit, false, stream, debug_synchronous); } /** * \brief Sorts segments of keys into descending order. (~N auxiliary storage required). * * \par * - The sorting operation is given a pair of key buffers managed by a * DoubleBuffer structure that indicates which of the two buffers is * "current" (and thus contains the input data to be sorted). * - The contents of both buffers may be altered by the sorting operation. * - Upon completion, the sorting operation will update the "current" indicator * within the DoubleBuffer wrapper to reference which of the two buffers * now contains the sorted output sequence (a function of the number of key bits * specified and the targeted device architecture). * - When input a contiguous sequence of segments, a single sequence * \p segment_offsets (of length num_segments+1) can be aliased * for both the \p d_begin_offsets and \p d_end_offsets parameters (where * the latter is specified as segment_offsets+1). * - An optional bit subrange [begin_bit, end_bit) of differentiating key bits can be specified. This can reduce overall sorting overhead and yield a corresponding performance improvement. * - \devicestorageP * - \devicestorage * * \par Snippet * The code snippet below illustrates the batched sorting of three segments (with one zero-length segment) of \p int keys. * \par * \code * #include // or equivalently * * // Declare, allocate, and initialize device-accessible pointers for sorting data * int num_items; // e.g., 7 * int num_segments; // e.g., 3 * int *d_offsets; // e.g., [0, 3, 3, 7] * int *d_key_buf; // e.g., [8, 6, 7, 5, 3, 0, 9] * int *d_key_alt_buf; // e.g., [-, -, -, -, -, -, -] * ... * * // Create a DoubleBuffer to wrap the pair of device pointers * cub::DoubleBuffer d_keys(d_key_buf, d_key_alt_buf); * * // Determine temporary device storage requirements * void *d_temp_storage = NULL; * size_t temp_storage_bytes = 0; * cub::DeviceSegmentedRadixSort::SortKeysDescending(d_temp_storage, temp_storage_bytes, d_keys, * num_items, num_segments, d_offsets, d_offsets + 1); * * // Allocate temporary storage * cudaMalloc(&d_temp_storage, temp_storage_bytes); * * // Run sorting operation * cub::DeviceSegmentedRadixSort::SortKeysDescending(d_temp_storage, temp_storage_bytes, d_keys, * num_items, num_segments, d_offsets, d_offsets + 1); * * // d_keys.Current() <-- [8, 7, 6, 9, 5, 3, 0] * * \endcode * * \tparam KeyT [inferred] Key type * \tparam OffsetIteratorT [inferred] Random-access input iterator type for reading segment offsets \iterator */ template < typename KeyT, typename OffsetIteratorT> CUB_RUNTIME_FUNCTION static cudaError_t SortKeysDescending( void *d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t &temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation DoubleBuffer &d_keys, ///< [in,out] Reference to the double-buffer of keys whose "current" device-accessible buffer contains the unsorted input keys and, upon return, is updated to point to the sorted output keys int num_items, ///< [in] The total number of items to sort (across all segments) int num_segments, ///< [in] The number of segments that comprise the sorting data OffsetIteratorT d_begin_offsets, ///< [in] Pointer to the sequence of beginning offsets of length \p num_segments, such that d_begin_offsets[i] is the first element of the ith data segment in d_keys_* and d_values_* OffsetIteratorT d_end_offsets, ///< [in] Pointer to the sequence of ending offsets of length \p num_segments, such that d_end_offsets[i]-1 is the last element of the ith data segment in d_keys_* and d_values_*. If d_end_offsets[i]-1 <= d_begin_offsets[i], the ith is considered empty. int begin_bit = 0, ///< [in] [optional] The least-significant bit index (inclusive) needed for key comparison int end_bit = sizeof(KeyT) * 8, ///< [in] [optional] The most-significant bit index (exclusive) needed for key comparison (e.g., sizeof(unsigned int) * 8) cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is \p false. { // Signed integer type for global offsets typedef int OffsetT; // Null value type DoubleBuffer d_values; return DispatchSegmentedRadixSort::Dispatch( d_temp_storage, temp_storage_bytes, d_keys, d_values, num_items, num_segments, d_begin_offsets, d_end_offsets, begin_bit, end_bit, true, stream, debug_synchronous); } //@} end member group }; } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/acc/cuda/cub/device/device_segmented_reduce.cuh000066400000000000000000001072531411340063500245760ustar00rootroot00000000000000 /****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * cub::DeviceSegmentedReduce provides device-wide, parallel operations for computing a batched reduction across multiple sequences of data items residing within device-accessible memory. */ #pragma once #include #include #include "../iterator/arg_index_input_iterator.cuh" #include "dispatch/dispatch_reduce.cuh" #include "dispatch/dispatch_reduce_by_key.cuh" #include "../util_type.cuh" #include "../util_namespace.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /** * \brief DeviceSegmentedReduce provides device-wide, parallel operations for computing a reduction across multiple sequences of data items residing within device-accessible memory. ![](reduce_logo.png) * \ingroup SegmentedModule * * \par Overview * A reduction (or fold) * uses a binary combining operator to compute a single aggregate from a sequence of input elements. * * \par Usage Considerations * \cdp_class{DeviceSegmentedReduce} * */ struct DeviceSegmentedReduce { /** * \brief Computes a device-wide segmented reduction using the specified binary \p reduction_op functor. * * \par * - Does not support binary reduction operators that are non-commutative. * - When input a contiguous sequence of segments, a single sequence * \p segment_offsets (of length num_segments+1) can be aliased * for both the \p d_begin_offsets and \p d_end_offsets parameters (where * the latter is specified as segment_offsets+1). * - \devicestorage * * \par Snippet * The code snippet below illustrates a custom min-reduction of a device vector of \p int data elements. * \par * \code * #include // or equivalently * * // CustomMin functor * struct CustomMin * { * template * CUB_RUNTIME_FUNCTION __forceinline__ * T operator()(const T &a, const T &b) const { * return (b < a) ? b : a; * } * }; * * // Declare, allocate, and initialize device-accessible pointers for input and output * int num_segments; // e.g., 3 * int *d_offsets; // e.g., [0, 3, 3, 7] * int *d_in; // e.g., [8, 6, 7, 5, 3, 0, 9] * int *d_out; // e.g., [-, -, -] * CustomMin min_op; * int initial_value; // e.g., INT_MAX * ... * * // Determine temporary device storage requirements * void *d_temp_storage = NULL; * size_t temp_storage_bytes = 0; * cub::DeviceSegmentedReduce::Reduce(d_temp_storage, temp_storage_bytes, d_in, d_out, * num_segments, d_offsets, d_offsets + 1, min_op, initial_value); * * // Allocate temporary storage * cudaMalloc(&d_temp_storage, temp_storage_bytes); * * // Run reduction * cub::DeviceSegmentedReduce::Reduce(d_temp_storage, temp_storage_bytes, d_in, d_out, * num_segments, d_offsets, d_offsets + 1, min_op, initial_value); * * // d_out <-- [6, INT_MAX, 0] * * \endcode * * \tparam InputIteratorT [inferred] Random-access input iterator type for reading input items \iterator * \tparam OutputIteratorT [inferred] Output iterator type for recording the reduced aggregate \iterator * \tparam OffsetIteratorT [inferred] Random-access input iterator type for reading segment offsets \iterator * \tparam ReductionOp [inferred] Binary reduction functor type having member T operator()(const T &a, const T &b) * \tparam T [inferred] Data element type that is convertible to the \p value type of \p InputIteratorT */ template < typename InputIteratorT, typename OutputIteratorT, typename OffsetIteratorT, typename ReductionOp, typename T> CUB_RUNTIME_FUNCTION static cudaError_t Reduce( void *d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t &temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation InputIteratorT d_in, ///< [in] Pointer to the input sequence of data items OutputIteratorT d_out, ///< [out] Pointer to the output aggregate int num_segments, ///< [in] The number of segments that comprise the sorting data OffsetIteratorT d_begin_offsets, ///< [in] Pointer to the sequence of beginning offsets of length \p num_segments, such that d_begin_offsets[i] is the first element of the ith data segment in d_keys_* and d_values_* OffsetIteratorT d_end_offsets, ///< [in] Pointer to the sequence of ending offsets of length \p num_segments, such that d_end_offsets[i]-1 is the last element of the ith data segment in d_keys_* and d_values_*. If d_end_offsets[i]-1 <= d_begin_offsets[i], the ith is considered empty. ReductionOp reduction_op, ///< [in] Binary reduction functor T initial_value, ///< [in] Initial value of the reduction for each segment cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is \p false. { // Signed integer type for global offsets typedef int OffsetT; return DispatchSegmentedReduce::Dispatch( d_temp_storage, temp_storage_bytes, d_in, d_out, num_segments, d_begin_offsets, d_end_offsets, reduction_op, initial_value, stream, debug_synchronous); } /** * \brief Computes a device-wide segmented sum using the addition ('+') operator. * * \par * - Uses \p 0 as the initial value of the reduction for each segment. * - When input a contiguous sequence of segments, a single sequence * \p segment_offsets (of length num_segments+1) can be aliased * for both the \p d_begin_offsets and \p d_end_offsets parameters (where * the latter is specified as segment_offsets+1). * - Does not support \p + operators that are non-commutative.. * - \devicestorage * * \par Snippet * The code snippet below illustrates the sum reduction of a device vector of \p int data elements. * \par * \code * #include // or equivalently * * // Declare, allocate, and initialize device-accessible pointers for input and output * int num_segments; // e.g., 3 * int *d_offsets; // e.g., [0, 3, 3, 7] * int *d_in; // e.g., [8, 6, 7, 5, 3, 0, 9] * int *d_out; // e.g., [-, -, -] * ... * * // Determine temporary device storage requirements * void *d_temp_storage = NULL; * size_t temp_storage_bytes = 0; * cub::DeviceSegmentedReduce::Sum(d_temp_storage, temp_storage_bytes, d_in, d_out, * num_segments, d_offsets, d_offsets + 1); * * // Allocate temporary storage * cudaMalloc(&d_temp_storage, temp_storage_bytes); * * // Run sum-reduction * cub::DeviceSegmentedReduce::Sum(d_temp_storage, temp_storage_bytes, d_in, d_out, * num_segments, d_offsets, d_offsets + 1); * * // d_out <-- [21, 0, 17] * * \endcode * * \tparam InputIteratorT [inferred] Random-access input iterator type for reading input items \iterator * \tparam OutputIteratorT [inferred] Output iterator type for recording the reduced aggregate \iterator * \tparam OffsetIteratorT [inferred] Random-access input iterator type for reading segment offsets \iterator */ template < typename InputIteratorT, typename OutputIteratorT, typename OffsetIteratorT> CUB_RUNTIME_FUNCTION static cudaError_t Sum( void *d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t &temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation InputIteratorT d_in, ///< [in] Pointer to the input sequence of data items OutputIteratorT d_out, ///< [out] Pointer to the output aggregate int num_segments, ///< [in] The number of segments that comprise the sorting data OffsetIteratorT d_begin_offsets, ///< [in] Pointer to the sequence of beginning offsets of length \p num_segments, such that d_begin_offsets[i] is the first element of the ith data segment in d_keys_* and d_values_* OffsetIteratorT d_end_offsets, ///< [in] Pointer to the sequence of ending offsets of length \p num_segments, such that d_end_offsets[i]-1 is the last element of the ith data segment in d_keys_* and d_values_*. If d_end_offsets[i]-1 <= d_begin_offsets[i], the ith is considered empty. cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is \p false. { // Signed integer type for global offsets typedef int OffsetT; // The output value type typedef typename If<(Equals::value_type, void>::VALUE), // OutputT = (if output iterator's value type is void) ? typename std::iterator_traits::value_type, // ... then the input iterator's value type, typename std::iterator_traits::value_type>::Type OutputT; // ... else the output iterator's value type return DispatchSegmentedReduce::Dispatch( d_temp_storage, temp_storage_bytes, d_in, d_out, num_segments, d_begin_offsets, d_end_offsets, cub::Sum(), OutputT(), // zero-initialize stream, debug_synchronous); } /** * \brief Computes a device-wide segmented minimum using the less-than ('<') operator. * * \par * - Uses std::numeric_limits::max() as the initial value of the reduction for each segment. * - When input a contiguous sequence of segments, a single sequence * \p segment_offsets (of length num_segments+1) can be aliased * for both the \p d_begin_offsets and \p d_end_offsets parameters (where * the latter is specified as segment_offsets+1). * - Does not support \p < operators that are non-commutative. * - \devicestorage * * \par Snippet * The code snippet below illustrates the min-reduction of a device vector of \p int data elements. * \par * \code * #include // or equivalently * * // Declare, allocate, and initialize device-accessible pointers for input and output * int num_segments; // e.g., 3 * int *d_offsets; // e.g., [0, 3, 3, 7] * int *d_in; // e.g., [8, 6, 7, 5, 3, 0, 9] * int *d_out; // e.g., [-, -, -] * ... * * // Determine temporary device storage requirements * void *d_temp_storage = NULL; * size_t temp_storage_bytes = 0; * cub::DeviceSegmentedReduce::Min(d_temp_storage, temp_storage_bytes, d_in, d_out, * num_segments, d_offsets, d_offsets + 1); * * // Allocate temporary storage * cudaMalloc(&d_temp_storage, temp_storage_bytes); * * // Run min-reduction * cub::DeviceSegmentedReduce::Min(d_temp_storage, temp_storage_bytes, d_in, d_out, * num_segments, d_offsets, d_offsets + 1); * * // d_out <-- [6, INT_MAX, 0] * * \endcode * * \tparam InputIteratorT [inferred] Random-access input iterator type for reading input items \iterator * \tparam OutputIteratorT [inferred] Output iterator type for recording the reduced aggregate \iterator * \tparam OffsetIteratorT [inferred] Random-access input iterator type for reading segment offsets \iterator */ template < typename InputIteratorT, typename OutputIteratorT, typename OffsetIteratorT> CUB_RUNTIME_FUNCTION static cudaError_t Min( void *d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t &temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation InputIteratorT d_in, ///< [in] Pointer to the input sequence of data items OutputIteratorT d_out, ///< [out] Pointer to the output aggregate int num_segments, ///< [in] The number of segments that comprise the sorting data OffsetIteratorT d_begin_offsets, ///< [in] Pointer to the sequence of beginning offsets of length \p num_segments, such that d_begin_offsets[i] is the first element of the ith data segment in d_keys_* and d_values_* OffsetIteratorT d_end_offsets, ///< [in] Pointer to the sequence of ending offsets of length \p num_segments, such that d_end_offsets[i]-1 is the last element of the ith data segment in d_keys_* and d_values_*. If d_end_offsets[i]-1 <= d_begin_offsets[i], the ith is considered empty. cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is \p false. { // Signed integer type for global offsets typedef int OffsetT; // The input value type typedef typename std::iterator_traits::value_type InputT; return DispatchSegmentedReduce::Dispatch( d_temp_storage, temp_storage_bytes, d_in, d_out, num_segments, d_begin_offsets, d_end_offsets, cub::Min(), Traits::Max(), // replace with std::numeric_limits::max() when C++11 support is more prevalent stream, debug_synchronous); } /** * \brief Finds the first device-wide minimum in each segment using the less-than ('<') operator, also returning the in-segment index of that item. * * \par * - The output value type of \p d_out is cub::KeyValuePair (assuming the value type of \p d_in is \p T) * - The minimum of the ith segment is written to d_out[i].value and its offset in that segment is written to d_out[i].key. * - The {1, std::numeric_limits::max()} tuple is produced for zero-length inputs * - When input a contiguous sequence of segments, a single sequence * \p segment_offsets (of length num_segments+1) can be aliased * for both the \p d_begin_offsets and \p d_end_offsets parameters (where * the latter is specified as segment_offsets+1). * - Does not support \p < operators that are non-commutative. * - \devicestorage * * \par Snippet * The code snippet below illustrates the argmin-reduction of a device vector of \p int data elements. * \par * \code * #include // or equivalently * * // Declare, allocate, and initialize device-accessible pointers for input and output * int num_segments; // e.g., 3 * int *d_offsets; // e.g., [0, 3, 3, 7] * int *d_in; // e.g., [8, 6, 7, 5, 3, 0, 9] * KeyValuePair *d_out; // e.g., [{-,-}, {-,-}, {-,-}] * ... * * // Determine temporary device storage requirements * void *d_temp_storage = NULL; * size_t temp_storage_bytes = 0; * cub::DeviceSegmentedReduce::ArgMin(d_temp_storage, temp_storage_bytes, d_in, d_out, * num_segments, d_offsets, d_offsets + 1); * * // Allocate temporary storage * cudaMalloc(&d_temp_storage, temp_storage_bytes); * * // Run argmin-reduction * cub::DeviceSegmentedReduce::ArgMin(d_temp_storage, temp_storage_bytes, d_in, d_out, * num_segments, d_offsets, d_offsets + 1); * * // d_out <-- [{1,6}, {1,INT_MAX}, {2,0}] * * \endcode * * \tparam InputIteratorT [inferred] Random-access input iterator type for reading input items (of some type \p T) \iterator * \tparam OutputIteratorT [inferred] Output iterator type for recording the reduced aggregate (having value type KeyValuePair) \iterator * \tparam OffsetIteratorT [inferred] Random-access input iterator type for reading segment offsets \iterator */ template < typename InputIteratorT, typename OutputIteratorT, typename OffsetIteratorT> CUB_RUNTIME_FUNCTION static cudaError_t ArgMin( void *d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t &temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation InputIteratorT d_in, ///< [in] Pointer to the input sequence of data items OutputIteratorT d_out, ///< [out] Pointer to the output aggregate int num_segments, ///< [in] The number of segments that comprise the sorting data OffsetIteratorT d_begin_offsets, ///< [in] Pointer to the sequence of beginning offsets of length \p num_segments, such that d_begin_offsets[i] is the first element of the ith data segment in d_keys_* and d_values_* OffsetIteratorT d_end_offsets, ///< [in] Pointer to the sequence of ending offsets of length \p num_segments, such that d_end_offsets[i]-1 is the last element of the ith data segment in d_keys_* and d_values_*. If d_end_offsets[i]-1 <= d_begin_offsets[i], the ith is considered empty. cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is \p false. { // Signed integer type for global offsets typedef int OffsetT; // The input type typedef typename std::iterator_traits::value_type InputValueT; // The output tuple type typedef typename If<(Equals::value_type, void>::VALUE), // OutputT = (if output iterator's value type is void) ? KeyValuePair, // ... then the key value pair OffsetT + InputValueT typename std::iterator_traits::value_type>::Type OutputTupleT; // ... else the output iterator's value type // The output value type typedef typename OutputTupleT::Value OutputValueT; // Wrapped input iterator to produce index-value tuples typedef ArgIndexInputIterator ArgIndexInputIteratorT; ArgIndexInputIteratorT d_indexed_in(d_in); // Initial value OutputTupleT initial_value(1, Traits::Max()); // replace with std::numeric_limits::max() when C++11 support is more prevalent return DispatchSegmentedReduce::Dispatch( d_temp_storage, temp_storage_bytes, d_indexed_in, d_out, num_segments, d_begin_offsets, d_end_offsets, cub::ArgMin(), initial_value, stream, debug_synchronous); } /** * \brief Computes a device-wide segmented maximum using the greater-than ('>') operator. * * \par * - Uses std::numeric_limits::lowest() as the initial value of the reduction. * - When input a contiguous sequence of segments, a single sequence * \p segment_offsets (of length num_segments+1) can be aliased * for both the \p d_begin_offsets and \p d_end_offsets parameters (where * the latter is specified as segment_offsets+1). * - Does not support \p > operators that are non-commutative. * - \devicestorage * * \par Snippet * The code snippet below illustrates the max-reduction of a device vector of \p int data elements. * \par * \code * #include // or equivalently * * // Declare, allocate, and initialize device-accessible pointers for input and output * int num_segments; // e.g., 3 * int *d_offsets; // e.g., [0, 3, 3, 7] * int *d_in; // e.g., [8, 6, 7, 5, 3, 0, 9] * int *d_out; // e.g., [-, -, -] * ... * * // Determine temporary device storage requirements * void *d_temp_storage = NULL; * size_t temp_storage_bytes = 0; * cub::DeviceSegmentedReduce::Max(d_temp_storage, temp_storage_bytes, d_in, d_out, * num_segments, d_offsets, d_offsets + 1); * * // Allocate temporary storage * cudaMalloc(&d_temp_storage, temp_storage_bytes); * * // Run max-reduction * cub::DeviceSegmentedReduce::Max(d_temp_storage, temp_storage_bytes, d_in, d_out, * num_segments, d_offsets, d_offsets + 1); * * // d_out <-- [8, INT_MIN, 9] * * \endcode * * \tparam InputIteratorT [inferred] Random-access input iterator type for reading input items \iterator * \tparam OutputIteratorT [inferred] Output iterator type for recording the reduced aggregate \iterator * \tparam OffsetIteratorT [inferred] Random-access input iterator type for reading segment offsets \iterator */ template < typename InputIteratorT, typename OutputIteratorT, typename OffsetIteratorT> CUB_RUNTIME_FUNCTION static cudaError_t Max( void *d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t &temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation InputIteratorT d_in, ///< [in] Pointer to the input sequence of data items OutputIteratorT d_out, ///< [out] Pointer to the output aggregate int num_segments, ///< [in] The number of segments that comprise the sorting data OffsetIteratorT d_begin_offsets, ///< [in] Pointer to the sequence of beginning offsets of length \p num_segments, such that d_begin_offsets[i] is the first element of the ith data segment in d_keys_* and d_values_* OffsetIteratorT d_end_offsets, ///< [in] Pointer to the sequence of ending offsets of length \p num_segments, such that d_end_offsets[i]-1 is the last element of the ith data segment in d_keys_* and d_values_*. If d_end_offsets[i]-1 <= d_begin_offsets[i], the ith is considered empty. cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is \p false. { // Signed integer type for global offsets typedef int OffsetT; // The input value type typedef typename std::iterator_traits::value_type InputT; return DispatchSegmentedReduce::Dispatch( d_temp_storage, temp_storage_bytes, d_in, d_out, num_segments, d_begin_offsets, d_end_offsets, cub::Max(), Traits::Lowest(), // replace with std::numeric_limits::lowest() when C++11 support is more prevalent stream, debug_synchronous); } /** * \brief Finds the first device-wide maximum in each segment using the greater-than ('>') operator, also returning the in-segment index of that item * * \par * - The output value type of \p d_out is cub::KeyValuePair (assuming the value type of \p d_in is \p T) * - The maximum of the ith segment is written to d_out[i].value and its offset in that segment is written to d_out[i].key. * - The {1, std::numeric_limits::lowest()} tuple is produced for zero-length inputs * - When input a contiguous sequence of segments, a single sequence * \p segment_offsets (of length num_segments+1) can be aliased * for both the \p d_begin_offsets and \p d_end_offsets parameters (where * the latter is specified as segment_offsets+1). * - Does not support \p > operators that are non-commutative. * - \devicestorage * * \par Snippet * The code snippet below illustrates the argmax-reduction of a device vector of \p int data elements. * \par * \code * #include // or equivalently * * // Declare, allocate, and initialize device-accessible pointers for input and output * int num_segments; // e.g., 3 * int *d_offsets; // e.g., [0, 3, 3, 7] * int *d_in; // e.g., [8, 6, 7, 5, 3, 0, 9] * KeyValuePair *d_out; // e.g., [{-,-}, {-,-}, {-,-}] * ... * * // Determine temporary device storage requirements * void *d_temp_storage = NULL; * size_t temp_storage_bytes = 0; * cub::DeviceSegmentedReduce::ArgMax(d_temp_storage, temp_storage_bytes, d_in, d_out, * num_segments, d_offsets, d_offsets + 1); * * // Allocate temporary storage * cudaMalloc(&d_temp_storage, temp_storage_bytes); * * // Run argmax-reduction * cub::DeviceSegmentedReduce::ArgMax(d_temp_storage, temp_storage_bytes, d_in, d_out, * num_segments, d_offsets, d_offsets + 1); * * // d_out <-- [{0,8}, {1,INT_MIN}, {3,9}] * * \endcode * * \tparam InputIteratorT [inferred] Random-access input iterator type for reading input items (of some type \p T) \iterator * \tparam OutputIteratorT [inferred] Output iterator type for recording the reduced aggregate (having value type KeyValuePair) \iterator * \tparam OffsetIteratorT [inferred] Random-access input iterator type for reading segment offsets \iterator */ template < typename InputIteratorT, typename OutputIteratorT, typename OffsetIteratorT> CUB_RUNTIME_FUNCTION static cudaError_t ArgMax( void *d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t &temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation InputIteratorT d_in, ///< [in] Pointer to the input sequence of data items OutputIteratorT d_out, ///< [out] Pointer to the output aggregate int num_segments, ///< [in] The number of segments that comprise the sorting data OffsetIteratorT d_begin_offsets, ///< [in] Pointer to the sequence of beginning offsets of length \p num_segments, such that d_begin_offsets[i] is the first element of the ith data segment in d_keys_* and d_values_* OffsetIteratorT d_end_offsets, ///< [in] Pointer to the sequence of ending offsets of length \p num_segments, such that d_end_offsets[i]-1 is the last element of the ith data segment in d_keys_* and d_values_*. If d_end_offsets[i]-1 <= d_begin_offsets[i], the ith is considered empty. cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is \p false. { // Signed integer type for global offsets typedef int OffsetT; // The input type typedef typename std::iterator_traits::value_type InputValueT; // The output tuple type typedef typename If<(Equals::value_type, void>::VALUE), // OutputT = (if output iterator's value type is void) ? KeyValuePair, // ... then the key value pair OffsetT + InputValueT typename std::iterator_traits::value_type>::Type OutputTupleT; // ... else the output iterator's value type // The output value type typedef typename OutputTupleT::Value OutputValueT; // Wrapped input iterator to produce index-value tuples typedef ArgIndexInputIterator ArgIndexInputIteratorT; ArgIndexInputIteratorT d_indexed_in(d_in); // Initial value OutputTupleT initial_value(1, Traits::Lowest()); // replace with std::numeric_limits::lowest() when C++11 support is more prevalent return DispatchSegmentedReduce::Dispatch( d_temp_storage, temp_storage_bytes, d_indexed_in, d_out, num_segments, d_begin_offsets, d_end_offsets, cub::ArgMax(), initial_value, stream, debug_synchronous); } }; } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/acc/cuda/cub/device/device_select.cuh000066400000000000000000000446261411340063500225570ustar00rootroot00000000000000 /****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * cub::DeviceSelect provides device-wide, parallel operations for compacting selected items from sequences of data items residing within device-accessible memory. */ #pragma once #include #include #include "dispatch/dispatch_select_if.cuh" #include "../util_namespace.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /** * \brief DeviceSelect provides device-wide, parallel operations for compacting selected items from sequences of data items residing within device-accessible memory. ![](select_logo.png) * \ingroup SingleModule * * \par Overview * These operations apply a selection criterion to selectively copy * items from a specified input sequence to a compact output sequence. * * \par Usage Considerations * \cdp_class{DeviceSelect} * * \par Performance * \linear_performance{select-flagged, select-if, and select-unique} * * \par * The following chart illustrates DeviceSelect::If * performance across different CUDA architectures for \p int32 items, * where 50% of the items are randomly selected. * * \image html select_if_int32_50_percent.png * * \par * The following chart illustrates DeviceSelect::Unique * performance across different CUDA architectures for \p int32 items * where segments have lengths uniformly sampled from [1,1000]. * * \image html select_unique_int32_len_500.png * * \par * \plots_below * */ struct DeviceSelect { /** * \brief Uses the \p d_flags sequence to selectively copy the corresponding items from \p d_in into \p d_out. The total number of items selected is written to \p d_num_selected_out. ![](select_flags_logo.png) * * \par * - The value type of \p d_flags must be castable to \p bool (e.g., \p bool, \p char, \p int, etc.). * - Copies of the selected items are compacted into \p d_out and maintain their original relative ordering. * - \devicestorage * * \par Snippet * The code snippet below illustrates the compaction of items selected from an \p int device vector. * \par * \code * #include // or equivalently * * // Declare, allocate, and initialize device-accessible pointers for input, flags, and output * int num_items; // e.g., 8 * int *d_in; // e.g., [1, 2, 3, 4, 5, 6, 7, 8] * char *d_flags; // e.g., [1, 0, 0, 1, 0, 1, 1, 0] * int *d_out; // e.g., [ , , , , , , , ] * int *d_num_selected_out; // e.g., [ ] * ... * * // Determine temporary device storage requirements * void *d_temp_storage = NULL; * size_t temp_storage_bytes = 0; * cub::DeviceSelect::Flagged(d_temp_storage, temp_storage_bytes, d_in, d_flags, d_out, d_num_selected_out, num_items); * * // Allocate temporary storage * cudaMalloc(&d_temp_storage, temp_storage_bytes); * * // Run selection * cub::DeviceSelect::Flagged(d_temp_storage, temp_storage_bytes, d_in, d_flags, d_out, d_num_selected_out, num_items); * * // d_out <-- [1, 4, 6, 7] * // d_num_selected_out <-- [4] * * \endcode * * \tparam InputIteratorT [inferred] Random-access input iterator type for reading input items \iterator * \tparam FlagIterator [inferred] Random-access input iterator type for reading selection flags \iterator * \tparam OutputIteratorT [inferred] Random-access output iterator type for writing selected items \iterator * \tparam NumSelectedIteratorT [inferred] Output iterator type for recording the number of items selected \iterator */ template < typename InputIteratorT, typename FlagIterator, typename OutputIteratorT, typename NumSelectedIteratorT> CUB_RUNTIME_FUNCTION __forceinline__ static cudaError_t Flagged( void* d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t &temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation InputIteratorT d_in, ///< [in] Pointer to the input sequence of data items FlagIterator d_flags, ///< [in] Pointer to the input sequence of selection flags OutputIteratorT d_out, ///< [out] Pointer to the output sequence of selected data items NumSelectedIteratorT d_num_selected_out, ///< [out] Pointer to the output total number of items selected (i.e., length of \p d_out) int num_items, ///< [in] Total number of input items (i.e., length of \p d_in) cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. May cause significant slowdown. Default is \p false. { typedef int OffsetT; // Signed integer type for global offsets typedef NullType SelectOp; // Selection op (not used) typedef NullType EqualityOp; // Equality operator (not used) return DispatchSelectIf::Dispatch( d_temp_storage, temp_storage_bytes, d_in, d_flags, d_out, d_num_selected_out, SelectOp(), EqualityOp(), num_items, stream, debug_synchronous); } /** * \brief Uses the \p select_op functor to selectively copy items from \p d_in into \p d_out. The total number of items selected is written to \p d_num_selected_out. ![](select_logo.png) * * \par * - Copies of the selected items are compacted into \p d_out and maintain their original relative ordering. * - \devicestorage * * \par Performance * The following charts illustrate saturated select-if performance across different * CUDA architectures for \p int32 and \p int64 items, respectively. Items are * selected with 50% probability. * * \image html select_if_int32_50_percent.png * \image html select_if_int64_50_percent.png * * \par * The following charts are similar, but 5% selection probability: * * \image html select_if_int32_5_percent.png * \image html select_if_int64_5_percent.png * * \par Snippet * The code snippet below illustrates the compaction of items selected from an \p int device vector. * \par * \code * #include // or equivalently * * // Functor type for selecting values less than some criteria * struct LessThan * { * int compare; * * CUB_RUNTIME_FUNCTION __forceinline__ * LessThan(int compare) : compare(compare) {} * * CUB_RUNTIME_FUNCTION __forceinline__ * bool operator()(const int &a) const { * return (a < compare); * } * }; * * // Declare, allocate, and initialize device-accessible pointers for input and output * int num_items; // e.g., 8 * int *d_in; // e.g., [0, 2, 3, 9, 5, 2, 81, 8] * int *d_out; // e.g., [ , , , , , , , ] * int *d_num_selected_out; // e.g., [ ] * LessThan select_op(7); * ... * * // Determine temporary device storage requirements * void *d_temp_storage = NULL; * size_t temp_storage_bytes = 0; * cub::DeviceSelect::If(d_temp_storage, temp_storage_bytes, d_in, d_out, d_num_selected_out, num_items, select_op); * * // Allocate temporary storage * cudaMalloc(&d_temp_storage, temp_storage_bytes); * * // Run selection * cub::DeviceSelect::If(d_temp_storage, temp_storage_bytes, d_in, d_out, d_num_selected_out, num_items, select_op); * * // d_out <-- [0, 2, 3, 5, 2] * // d_num_selected_out <-- [5] * * \endcode * * \tparam InputIteratorT [inferred] Random-access input iterator type for reading input items \iterator * \tparam OutputIteratorT [inferred] Random-access output iterator type for writing selected items \iterator * \tparam NumSelectedIteratorT [inferred] Output iterator type for recording the number of items selected \iterator * \tparam SelectOp [inferred] Selection operator type having member bool operator()(const T &a) */ template < typename InputIteratorT, typename OutputIteratorT, typename NumSelectedIteratorT, typename SelectOp> CUB_RUNTIME_FUNCTION __forceinline__ static cudaError_t If( void* d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t &temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation InputIteratorT d_in, ///< [in] Pointer to the input sequence of data items OutputIteratorT d_out, ///< [out] Pointer to the output sequence of selected data items NumSelectedIteratorT d_num_selected_out, ///< [out] Pointer to the output total number of items selected (i.e., length of \p d_out) int num_items, ///< [in] Total number of input items (i.e., length of \p d_in) SelectOp select_op, ///< [in] Unary selection operator cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. May cause significant slowdown. Default is \p false. { typedef int OffsetT; // Signed integer type for global offsets typedef NullType* FlagIterator; // FlagT iterator type (not used) typedef NullType EqualityOp; // Equality operator (not used) return DispatchSelectIf::Dispatch( d_temp_storage, temp_storage_bytes, d_in, NULL, d_out, d_num_selected_out, select_op, EqualityOp(), num_items, stream, debug_synchronous); } /** * \brief Given an input sequence \p d_in having runs of consecutive equal-valued keys, only the first key from each run is selectively copied to \p d_out. The total number of items selected is written to \p d_num_selected_out. ![](unique_logo.png) * * \par * - The == equality operator is used to determine whether keys are equivalent * - Copies of the selected items are compacted into \p d_out and maintain their original relative ordering. * - \devicestorage * * \par Performance * The following charts illustrate saturated select-unique performance across different * CUDA architectures for \p int32 and \p int64 items, respectively. Segments have * lengths uniformly sampled from [1,1000]. * * \image html select_unique_int32_len_500.png * \image html select_unique_int64_len_500.png * * \par * The following charts are similar, but with segment lengths uniformly sampled from [1,10]: * * \image html select_unique_int32_len_5.png * \image html select_unique_int64_len_5.png * * \par Snippet * The code snippet below illustrates the compaction of items selected from an \p int device vector. * \par * \code * #include // or equivalently * * // Declare, allocate, and initialize device-accessible pointers for input and output * int num_items; // e.g., 8 * int *d_in; // e.g., [0, 2, 2, 9, 5, 5, 5, 8] * int *d_out; // e.g., [ , , , , , , , ] * int *d_num_selected_out; // e.g., [ ] * ... * * // Determine temporary device storage requirements * void *d_temp_storage = NULL; * size_t temp_storage_bytes = 0; * cub::DeviceSelect::Unique(d_temp_storage, temp_storage_bytes, d_in, d_out, d_num_selected_out, num_items); * * // Allocate temporary storage * cudaMalloc(&d_temp_storage, temp_storage_bytes); * * // Run selection * cub::DeviceSelect::Unique(d_temp_storage, temp_storage_bytes, d_in, d_out, d_num_selected_out, num_items); * * // d_out <-- [0, 2, 9, 5, 8] * // d_num_selected_out <-- [5] * * \endcode * * \tparam InputIteratorT [inferred] Random-access input iterator type for reading input items \iterator * \tparam OutputIteratorT [inferred] Random-access output iterator type for writing selected items \iterator * \tparam NumSelectedIteratorT [inferred] Output iterator type for recording the number of items selected \iterator */ template < typename InputIteratorT, typename OutputIteratorT, typename NumSelectedIteratorT> CUB_RUNTIME_FUNCTION __forceinline__ static cudaError_t Unique( void* d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t &temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation InputIteratorT d_in, ///< [in] Pointer to the input sequence of data items OutputIteratorT d_out, ///< [out] Pointer to the output sequence of selected data items NumSelectedIteratorT d_num_selected_out, ///< [out] Pointer to the output total number of items selected (i.e., length of \p d_out) int num_items, ///< [in] Total number of input items (i.e., length of \p d_in) cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. May cause significant slowdown. Default is \p false. { typedef int OffsetT; // Signed integer type for global offsets typedef NullType* FlagIterator; // FlagT iterator type (not used) typedef NullType SelectOp; // Selection op (not used) typedef Equality EqualityOp; // Default == operator return DispatchSelectIf::Dispatch( d_temp_storage, temp_storage_bytes, d_in, NULL, d_out, d_num_selected_out, SelectOp(), EqualityOp(), num_items, stream, debug_synchronous); } }; /** * \example example_device_select_flagged.cu * \example example_device_select_if.cu * \example example_device_select_unique.cu */ } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/acc/cuda/cub/device/device_spmv.cuh000066400000000000000000000205131411340063500222520ustar00rootroot00000000000000 /****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * cub::DeviceSpmv provides device-wide parallel operations for performing sparse-matrix * vector multiplication (SpMV). */ #pragma once #include #include #include #include "dispatch/dispatch_spmv_orig.cuh" #include "../util_namespace.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /** * \brief DeviceSpmv provides device-wide parallel operations for performing sparse-matrix * dense-vector multiplication (SpMV). * \ingroup SingleModule * * \par Overview * The [SpMV computation](http://en.wikipedia.org/wiki/Sparse_matrix-vector_multiplication) * performs the matrix-vector operation * y = alpha*A*x + beta*y, * where: * - A is an mxn sparse matrix whose non-zero structure is specified in * [compressed-storage-row (CSR) format](http://en.wikipedia.org/wiki/Sparse_matrix#Compressed_row_Storage_.28CRS_or_CSR.29) * (i.e., three arrays: values, row_offsets, and column_indices) * - x and y are dense vectors * - alpha and beta are scalar multiplicands * * \par Usage Considerations * \cdp_class{DeviceSpmv} * */ struct DeviceSpmv { /******************************************************************//** * \name CSR matrix operations *********************************************************************/ //@{ /** * \brief This function performs the matrix-vector operation y = A*x. * * \par Snippet * The code snippet below illustrates SpMV upon a 9x9 CSR matrix A * representing a 3x3 lattice (24 non-zeros). * * \par * \code * #include // or equivalently * * // Declare, allocate, and initialize device-accessible pointers for input matrix A, input vector x, * // and output vector y * int num_rows = 9; * int num_cols = 9; * int num_nonzeros = 24; * * float* d_values; // e.g., [1, 1, 1, 1, 1, 1, 1, 1, * // 1, 1, 1, 1, 1, 1, 1, 1, * // 1, 1, 1, 1, 1, 1, 1, 1] * * int* d_column_indices; // e.g., [1, 3, 0, 2, 4, 1, 5, 0, * // 4, 6, 1, 3, 5, 7, 2, 4, * // 8, 3, 7, 4, 6, 8, 5, 7] * * int* d_row_offsets; // e.g., [0, 2, 5, 7, 10, 14, 17, 19, 22, 24] * * float* d_vector_x; // e.g., [1, 1, 1, 1, 1, 1, 1, 1, 1] * float* d_vector_y; // e.g., [ , , , , , , , , ] * ... * * // Determine temporary device storage requirements * void* d_temp_storage = NULL; * size_t temp_storage_bytes = 0; * cub::DeviceSpmv::CsrMV(d_temp_storage, temp_storage_bytes, d_values, * d_row_offsets, d_column_indices, d_vector_x, d_vector_y, * num_rows, num_cols, num_nonzeros, alpha, beta); * * // Allocate temporary storage * cudaMalloc(&d_temp_storage, temp_storage_bytes); * * // Run SpMV * cub::DeviceSpmv::CsrMV(d_temp_storage, temp_storage_bytes, d_values, * d_row_offsets, d_column_indices, d_vector_x, d_vector_y, * num_rows, num_cols, num_nonzeros, alpha, beta); * * // d_vector_y <-- [2, 3, 2, 3, 4, 3, 2, 3, 2] * * \endcode * * \tparam ValueT [inferred] Matrix and vector value type (e.g., /p float, /p double, etc.) */ template < typename ValueT> CUB_RUNTIME_FUNCTION static cudaError_t CsrMV( void* d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t& temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation ValueT* d_values, ///< [in] Pointer to the array of \p num_nonzeros values of the corresponding nonzero elements of matrix A. int* d_row_offsets, ///< [in] Pointer to the array of \p m + 1 offsets demarcating the start of every row in \p d_column_indices and \p d_values (with the final entry being equal to \p num_nonzeros) int* d_column_indices, ///< [in] Pointer to the array of \p num_nonzeros column-indices of the corresponding nonzero elements of matrix A. (Indices are zero-valued.) ValueT* d_vector_x, ///< [in] Pointer to the array of \p num_cols values corresponding to the dense input vector x ValueT* d_vector_y, ///< [out] Pointer to the array of \p num_rows values corresponding to the dense output vector y int num_rows, ///< [in] number of rows of matrix A. int num_cols, ///< [in] number of columns of matrix A. int num_nonzeros, ///< [in] number of nonzero elements of matrix A. cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. May cause significant slowdown. Default is \p false. { SpmvParams spmv_params; spmv_params.d_values = d_values; spmv_params.d_row_end_offsets = d_row_offsets + 1; spmv_params.d_column_indices = d_column_indices; spmv_params.d_vector_x = d_vector_x; spmv_params.d_vector_y = d_vector_y; spmv_params.num_rows = num_rows; spmv_params.num_cols = num_cols; spmv_params.num_nonzeros = num_nonzeros; spmv_params.alpha = 1.0; spmv_params.beta = 0.0; return DispatchSpmv::Dispatch( d_temp_storage, temp_storage_bytes, spmv_params, stream, debug_synchronous); } //@} end member group }; } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/acc/cuda/cub/device/dispatch/000077500000000000000000000000001411340063500210435ustar00rootroot00000000000000relion-3.1.3/src/acc/cuda/cub/device/dispatch/dispatch_histogram.cuh000066400000000000000000001576121411340063500254340ustar00rootroot00000000000000 /****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * cub::DeviceHistogram provides device-wide parallel operations for constructing histogram(s) from a sequence of samples data residing within device-accessible memory. */ #pragma once #include #include #include #include "../../agent/agent_histogram.cuh" #include "../../util_debug.cuh" #include "../../util_device.cuh" #include "../../thread/thread_search.cuh" #include "../../grid/grid_queue.cuh" #include "../../util_namespace.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /****************************************************************************** * Histogram kernel entry points *****************************************************************************/ /** * Histogram initialization kernel entry point */ template < int NUM_ACTIVE_CHANNELS, ///< Number of channels actively being histogrammed typename CounterT, ///< Integer type for counting sample occurrences per histogram bin typename OffsetT> ///< Signed integer type for global offsets __global__ void DeviceHistogramInitKernel( ArrayWrapper num_output_bins_wrapper, ///< Number of output histogram bins per channel ArrayWrapper d_output_histograms_wrapper, ///< Histogram counter data having logical dimensions CounterT[NUM_ACTIVE_CHANNELS][num_bins.array[CHANNEL]] GridQueue tile_queue) ///< Drain queue descriptor for dynamically mapping tile data onto thread blocks { if ((threadIdx.x == 0) && (blockIdx.x == 0)) tile_queue.ResetDrain(); int output_bin = (blockIdx.x * blockDim.x) + threadIdx.x; #pragma unroll for (int CHANNEL = 0; CHANNEL < NUM_ACTIVE_CHANNELS; ++CHANNEL) { if (output_bin < num_output_bins_wrapper.array[CHANNEL]) d_output_histograms_wrapper.array[CHANNEL][output_bin] = 0; } } /** * Histogram privatized sweep kernel entry point (multi-block). Computes privatized histograms, one per thread block. */ template < typename AgentHistogramPolicyT, ///< Parameterized AgentHistogramPolicy tuning policy type int PRIVATIZED_SMEM_BINS, ///< Maximum number of histogram bins per channel (e.g., up to 256) int NUM_CHANNELS, ///< Number of channels interleaved in the input data (may be greater than the number of channels being actively histogrammed) int NUM_ACTIVE_CHANNELS, ///< Number of channels actively being histogrammed typename SampleIteratorT, ///< The input iterator type. \iterator. typename CounterT, ///< Integer type for counting sample occurrences per histogram bin typename PrivatizedDecodeOpT, ///< The transform operator type for determining privatized counter indices from samples, one for each channel typename OutputDecodeOpT, ///< The transform operator type for determining output bin-ids from privatized counter indices, one for each channel typename OffsetT> ///< Signed integer type for global offsets __launch_bounds__ (int(AgentHistogramPolicyT::BLOCK_THREADS)) __global__ void DeviceHistogramSweepKernel( SampleIteratorT d_samples, ///< Input data to reduce ArrayWrapper num_output_bins_wrapper, ///< The number bins per final output histogram ArrayWrapper num_privatized_bins_wrapper, ///< The number bins per privatized histogram ArrayWrapper d_output_histograms_wrapper, ///< Reference to final output histograms ArrayWrapper d_privatized_histograms_wrapper, ///< Reference to privatized histograms ArrayWrapper output_decode_op_wrapper, ///< The transform operator for determining output bin-ids from privatized counter indices, one for each channel ArrayWrapper privatized_decode_op_wrapper, ///< The transform operator for determining privatized counter indices from samples, one for each channel OffsetT num_row_pixels, ///< The number of multi-channel pixels per row in the region of interest OffsetT num_rows, ///< The number of rows in the region of interest OffsetT row_stride_samples, ///< The number of samples between starts of consecutive rows in the region of interest int tiles_per_row, ///< Number of image tiles per row GridQueue tile_queue) ///< Drain queue descriptor for dynamically mapping tile data onto thread blocks { // Thread block type for compositing input tiles typedef AgentHistogram< AgentHistogramPolicyT, PRIVATIZED_SMEM_BINS, NUM_CHANNELS, NUM_ACTIVE_CHANNELS, SampleIteratorT, CounterT, PrivatizedDecodeOpT, OutputDecodeOpT, OffsetT> AgentHistogramT; // Shared memory for AgentHistogram __shared__ typename AgentHistogramT::TempStorage temp_storage; AgentHistogramT agent( temp_storage, d_samples, num_output_bins_wrapper.array, num_privatized_bins_wrapper.array, d_output_histograms_wrapper.array, d_privatized_histograms_wrapper.array, output_decode_op_wrapper.array, privatized_decode_op_wrapper.array); // Initialize counters agent.InitBinCounters(); // Consume input tiles agent.ConsumeTiles( num_row_pixels, num_rows, row_stride_samples, tiles_per_row, tile_queue); // Store output to global (if necessary) agent.StoreOutput(); } /****************************************************************************** * Dispatch ******************************************************************************/ /** * Utility class for dispatching the appropriately-tuned kernels for DeviceHistogram */ template < int NUM_CHANNELS, ///< Number of channels interleaved in the input data (may be greater than the number of channels being actively histogrammed) int NUM_ACTIVE_CHANNELS, ///< Number of channels actively being histogrammed typename SampleIteratorT, ///< Random-access input iterator type for reading input items \iterator typename CounterT, ///< Integer type for counting sample occurrences per histogram bin typename LevelT, ///< Type for specifying bin level boundaries typename OffsetT> ///< Signed integer type for global offsets struct DipatchHistogram { //--------------------------------------------------------------------- // Types and constants //--------------------------------------------------------------------- /// The sample value type of the input iterator typedef typename std::iterator_traits::value_type SampleT; enum { // Maximum number of bins per channel for which we will use a privatized smem strategy MAX_PRIVATIZED_SMEM_BINS = 256 }; //--------------------------------------------------------------------- // Transform functors for converting samples to bin-ids //--------------------------------------------------------------------- // Searches for bin given a list of bin-boundary levels template struct SearchTransform { LevelIteratorT d_levels; // Pointer to levels array int num_output_levels; // Number of levels in array // Initializer __host__ __device__ __forceinline__ void Init( LevelIteratorT d_levels, // Pointer to levels array int num_output_levels) // Number of levels in array { this->d_levels = d_levels; this->num_output_levels = num_output_levels; } // Method for converting samples to bin-ids template __host__ __device__ __forceinline__ void BinSelect(_SampleT sample, int &bin, bool valid) { /// Level iterator wrapper type typedef typename If::VALUE, CacheModifiedInputIterator, // Wrap the native input pointer with CacheModifiedInputIterator LevelIteratorT>::Type // Directly use the supplied input iterator type WrappedLevelIteratorT; WrappedLevelIteratorT wrapped_levels(d_levels); int num_bins = num_output_levels - 1; if (valid) { bin = UpperBound(wrapped_levels, num_output_levels, (LevelT) sample) - 1; if (bin >= num_bins) bin = -1; } } }; // Scales samples to evenly-spaced bins struct ScaleTransform { int num_bins; // Number of levels in array LevelT max; // Max sample level (exclusive) LevelT min; // Min sample level (inclusive) LevelT scale; // Bin scaling factor // Initializer template __host__ __device__ __forceinline__ void Init( int num_output_levels, // Number of levels in array _LevelT max, // Max sample level (exclusive) _LevelT min, // Min sample level (inclusive) _LevelT scale) // Bin scaling factor { this->num_bins = num_output_levels - 1; this->max = max; this->min = min; this->scale = scale; } // Initializer (float specialization) __host__ __device__ __forceinline__ void Init( int num_output_levels, // Number of levels in array float max, // Max sample level (exclusive) float min, // Min sample level (inclusive) float scale) // Bin scaling factor { this->num_bins = num_output_levels - 1; this->max = max; this->min = min; this->scale = float(1.0) / scale; } // Initializer (double specialization) __host__ __device__ __forceinline__ void Init( int num_output_levels, // Number of levels in array double max, // Max sample level (exclusive) double min, // Min sample level (inclusive) double scale) // Bin scaling factor { this->num_bins = num_output_levels - 1; this->max = max; this->min = min; this->scale = double(1.0) / scale; } // Method for converting samples to bin-ids template __host__ __device__ __forceinline__ void BinSelect(_SampleT sample, int &bin, bool valid) { LevelT level_sample = (LevelT) sample; if (valid && (level_sample >= min) && (level_sample < max)) bin = (int) ((level_sample - min) / scale); } // Method for converting samples to bin-ids (float specialization) template __host__ __device__ __forceinline__ void BinSelect(float sample, int &bin, bool valid) { LevelT level_sample = (LevelT) sample; if (valid && (level_sample >= min) && (level_sample < max)) bin = (int) ((level_sample - min) * scale); } // Method for converting samples to bin-ids (double specialization) template __host__ __device__ __forceinline__ void BinSelect(double sample, int &bin, bool valid) { LevelT level_sample = (LevelT) sample; if (valid && (level_sample >= min) && (level_sample < max)) bin = (int) ((level_sample - min) * scale); } }; // Pass-through bin transform operator struct PassThruTransform { // Method for converting samples to bin-ids template __host__ __device__ __forceinline__ void BinSelect(_SampleT sample, int &bin, bool valid) { if (valid) bin = (int) sample; } }; //--------------------------------------------------------------------- // Tuning policies //--------------------------------------------------------------------- template struct TScale { enum { V_SCALE = (sizeof(SampleT) + sizeof(int) - 1) / sizeof(int), VALUE = CUB_MAX((NOMINAL_ITEMS_PER_THREAD / NUM_ACTIVE_CHANNELS / V_SCALE), 1) }; }; /// SM11 struct Policy110 { // HistogramSweepPolicy typedef AgentHistogramPolicy< 512, (NUM_CHANNELS == 1) ? 8 : 2, BLOCK_LOAD_DIRECT, LOAD_DEFAULT, true, GMEM, false> HistogramSweepPolicy; }; /// SM20 struct Policy200 { // HistogramSweepPolicy typedef AgentHistogramPolicy< (NUM_CHANNELS == 1) ? 256 : 128, (NUM_CHANNELS == 1) ? 8 : 3, (NUM_CHANNELS == 1) ? BLOCK_LOAD_DIRECT : BLOCK_LOAD_WARP_TRANSPOSE, LOAD_DEFAULT, true, SMEM, false> HistogramSweepPolicy; }; /// SM30 struct Policy300 { // HistogramSweepPolicy typedef AgentHistogramPolicy< 512, (NUM_CHANNELS == 1) ? 8 : 2, BLOCK_LOAD_DIRECT, LOAD_DEFAULT, true, GMEM, false> HistogramSweepPolicy; }; /// SM35 struct Policy350 { // HistogramSweepPolicy typedef AgentHistogramPolicy< 128, TScale<8>::VALUE, BLOCK_LOAD_DIRECT, LOAD_LDG, true, BLEND, true> HistogramSweepPolicy; }; /// SM50 struct Policy500 { // HistogramSweepPolicy typedef AgentHistogramPolicy< 384, TScale<16>::VALUE, BLOCK_LOAD_DIRECT, LOAD_LDG, true, SMEM, false> HistogramSweepPolicy; }; //--------------------------------------------------------------------- // Tuning policies of current PTX compiler pass //--------------------------------------------------------------------- #if (CUB_PTX_ARCH >= 500) typedef Policy500 PtxPolicy; #elif (CUB_PTX_ARCH >= 350) typedef Policy350 PtxPolicy; #elif (CUB_PTX_ARCH >= 300) typedef Policy300 PtxPolicy; #elif (CUB_PTX_ARCH >= 200) typedef Policy200 PtxPolicy; #else typedef Policy110 PtxPolicy; #endif // "Opaque" policies (whose parameterizations aren't reflected in the type signature) struct PtxHistogramSweepPolicy : PtxPolicy::HistogramSweepPolicy {}; //--------------------------------------------------------------------- // Utilities //--------------------------------------------------------------------- /** * Initialize kernel dispatch configurations with the policies corresponding to the PTX assembly we will use */ template CUB_RUNTIME_FUNCTION __forceinline__ static cudaError_t InitConfigs( int ptx_version, KernelConfig &histogram_sweep_config) { #if (CUB_PTX_ARCH > 0) // We're on the device, so initialize the kernel dispatch configurations with the current PTX policy return histogram_sweep_config.template Init(); #else // We're on the host, so lookup and initialize the kernel dispatch configurations with the policies that match the device's PTX version if (ptx_version >= 500) { return histogram_sweep_config.template Init(); } else if (ptx_version >= 350) { return histogram_sweep_config.template Init(); } else if (ptx_version >= 300) { return histogram_sweep_config.template Init(); } else if (ptx_version >= 200) { return histogram_sweep_config.template Init(); } else if (ptx_version >= 110) { return histogram_sweep_config.template Init(); } else { // No global atomic support return cudaErrorNotSupported; } #endif } /** * Kernel kernel dispatch configuration */ struct KernelConfig { int block_threads; int pixels_per_thread; template CUB_RUNTIME_FUNCTION __forceinline__ cudaError_t Init() { block_threads = BlockPolicy::BLOCK_THREADS; pixels_per_thread = BlockPolicy::PIXELS_PER_THREAD; return cudaSuccess; } }; //--------------------------------------------------------------------- // Dispatch entrypoints //--------------------------------------------------------------------- /** * Privatization-based dispatch routine */ template < typename PrivatizedDecodeOpT, ///< The transform operator type for determining privatized counter indices from samples, one for each channel typename OutputDecodeOpT, ///< The transform operator type for determining output bin-ids from privatized counter indices, one for each channel typename DeviceHistogramInitKernelT, ///< Function type of cub::DeviceHistogramInitKernel typename DeviceHistogramSweepKernelT> ///< Function type of cub::DeviceHistogramSweepKernel CUB_RUNTIME_FUNCTION __forceinline__ static cudaError_t PrivatizedDispatch( void* d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t& temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation SampleIteratorT d_samples, ///< [in] The pointer to the input sequence of sample items. The samples from different channels are assumed to be interleaved (e.g., an array of 32-bit pixels where each pixel consists of four RGBA 8-bit samples). CounterT* d_output_histograms[NUM_ACTIVE_CHANNELS], ///< [out] The pointers to the histogram counter output arrays, one for each active channel. For channeli, the allocation length of d_histograms[i] should be num_output_levels[i] - 1. int num_privatized_levels[NUM_ACTIVE_CHANNELS], ///< [in] The number of bin level boundaries for delineating histogram samples in each active channel. Implies that the number of bins for channeli is num_output_levels[i] - 1. PrivatizedDecodeOpT privatized_decode_op[NUM_ACTIVE_CHANNELS], ///< [in] Transform operators for determining bin-ids from samples, one for each channel int num_output_levels[NUM_ACTIVE_CHANNELS], ///< [in] The number of bin level boundaries for delineating histogram samples in each active channel. Implies that the number of bins for channeli is num_output_levels[i] - 1. OutputDecodeOpT output_decode_op[NUM_ACTIVE_CHANNELS], ///< [in] Transform operators for determining bin-ids from samples, one for each channel int max_num_output_bins, ///< [in] Maximum number of output bins in any channel OffsetT num_row_pixels, ///< [in] The number of multi-channel pixels per row in the region of interest OffsetT num_rows, ///< [in] The number of rows in the region of interest OffsetT row_stride_samples, ///< [in] The number of samples between starts of consecutive rows in the region of interest DeviceHistogramInitKernelT histogram_init_kernel, ///< [in] Kernel function pointer to parameterization of cub::DeviceHistogramInitKernel DeviceHistogramSweepKernelT histogram_sweep_kernel, ///< [in] Kernel function pointer to parameterization of cub::DeviceHistogramSweepKernel KernelConfig histogram_sweep_config, ///< [in] Dispatch parameters that match the policy that \p histogram_sweep_kernel was compiled for cudaStream_t stream, ///< [in] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous) ///< [in] Whether or not to synchronize the stream after every kernel launch to check for errors. May cause significant slowdown. Default is \p false. { #ifndef CUB_RUNTIME_ENABLED // Kernel launch not supported from this device return CubDebug(cudaErrorNotSupported); #else cudaError error = cudaSuccess; do { // Get device ordinal int device_ordinal; if (CubDebug(error = cudaGetDevice(&device_ordinal))) break; // Get SM count int sm_count; if (CubDebug(error = cudaDeviceGetAttribute (&sm_count, cudaDevAttrMultiProcessorCount, device_ordinal))) break; // Get SM occupancy for histogram_sweep_kernel int histogram_sweep_sm_occupancy; if (CubDebug(error = MaxSmOccupancy( histogram_sweep_sm_occupancy, histogram_sweep_kernel, histogram_sweep_config.block_threads))) break; // Get device occupancy for histogram_sweep_kernel int histogram_sweep_occupancy = histogram_sweep_sm_occupancy * sm_count; if (num_row_pixels * NUM_CHANNELS == row_stride_samples) { // Treat as a single linear array of samples num_row_pixels *= num_rows; num_rows = 1; row_stride_samples = num_row_pixels * NUM_CHANNELS; } // Get grid dimensions, trying to keep total blocks ~histogram_sweep_occupancy int pixels_per_tile = histogram_sweep_config.block_threads * histogram_sweep_config.pixels_per_thread; int tiles_per_row = int(num_row_pixels + pixels_per_tile - 1) / pixels_per_tile; int blocks_per_row = CUB_MIN(histogram_sweep_occupancy, tiles_per_row); int blocks_per_col = (blocks_per_row > 0) ? int(CUB_MIN(histogram_sweep_occupancy / blocks_per_row, num_rows)) : 0; int num_thread_blocks = blocks_per_row * blocks_per_col; dim3 sweep_grid_dims; sweep_grid_dims.x = (unsigned int) blocks_per_row; sweep_grid_dims.y = (unsigned int) blocks_per_col; sweep_grid_dims.z = 1; // Temporary storage allocation requirements const int NUM_ALLOCATIONS = NUM_ACTIVE_CHANNELS + 1; void* allocations[NUM_ALLOCATIONS]; size_t allocation_sizes[NUM_ALLOCATIONS]; for (int CHANNEL = 0; CHANNEL < NUM_ACTIVE_CHANNELS; ++CHANNEL) allocation_sizes[CHANNEL] = size_t(num_thread_blocks) * (num_privatized_levels[CHANNEL] - 1) * sizeof(CounterT); allocation_sizes[NUM_ALLOCATIONS - 1] = GridQueue::AllocationSize(); // Alias the temporary allocations from the single storage blob (or compute the necessary size of the blob) if (CubDebug(error = AliasTemporaries(d_temp_storage, temp_storage_bytes, allocations, allocation_sizes))) break; if (d_temp_storage == NULL) { // Return if the caller is simply requesting the size of the storage allocation break; } // Construct the grid queue descriptor GridQueue tile_queue(allocations[NUM_ALLOCATIONS - 1]); // Setup array wrapper for histogram channel output (because we can't pass static arrays as kernel parameters) ArrayWrapper d_output_histograms_wrapper; for (int CHANNEL = 0; CHANNEL < NUM_ACTIVE_CHANNELS; ++CHANNEL) d_output_histograms_wrapper.array[CHANNEL] = d_output_histograms[CHANNEL]; // Setup array wrapper for privatized per-block histogram channel output (because we can't pass static arrays as kernel parameters) ArrayWrapper d_privatized_histograms_wrapper; for (int CHANNEL = 0; CHANNEL < NUM_ACTIVE_CHANNELS; ++CHANNEL) d_privatized_histograms_wrapper.array[CHANNEL] = (CounterT*) allocations[CHANNEL]; // Setup array wrapper for sweep bin transforms (because we can't pass static arrays as kernel parameters) ArrayWrapper privatized_decode_op_wrapper; for (int CHANNEL = 0; CHANNEL < NUM_ACTIVE_CHANNELS; ++CHANNEL) privatized_decode_op_wrapper.array[CHANNEL] = privatized_decode_op[CHANNEL]; // Setup array wrapper for aggregation bin transforms (because we can't pass static arrays as kernel parameters) ArrayWrapper output_decode_op_wrapper; for (int CHANNEL = 0; CHANNEL < NUM_ACTIVE_CHANNELS; ++CHANNEL) output_decode_op_wrapper.array[CHANNEL] = output_decode_op[CHANNEL]; // Setup array wrapper for num privatized bins (because we can't pass static arrays as kernel parameters) ArrayWrapper num_privatized_bins_wrapper; for (int CHANNEL = 0; CHANNEL < NUM_ACTIVE_CHANNELS; ++CHANNEL) num_privatized_bins_wrapper.array[CHANNEL] = num_privatized_levels[CHANNEL] - 1; // Setup array wrapper for num output bins (because we can't pass static arrays as kernel parameters) ArrayWrapper num_output_bins_wrapper; for (int CHANNEL = 0; CHANNEL < NUM_ACTIVE_CHANNELS; ++CHANNEL) num_output_bins_wrapper.array[CHANNEL] = num_output_levels[CHANNEL] - 1; int histogram_init_block_threads = 256; int histogram_init_grid_dims = (max_num_output_bins + histogram_init_block_threads - 1) / histogram_init_block_threads; // Log DeviceHistogramInitKernel configuration if (debug_synchronous) _CubLog("Invoking DeviceHistogramInitKernel<<<%d, %d, 0, %lld>>>()\n", histogram_init_grid_dims, histogram_init_block_threads, (long long) stream); // Invoke histogram_init_kernel histogram_init_kernel<<>>( num_output_bins_wrapper, d_output_histograms_wrapper, tile_queue); // Return if empty problem if ((blocks_per_row == 0) || (blocks_per_col == 0)) break; // Log histogram_sweep_kernel configuration if (debug_synchronous) _CubLog("Invoking histogram_sweep_kernel<<<{%d, %d, %d}, %d, 0, %lld>>>(), %d pixels per thread, %d SM occupancy\n", sweep_grid_dims.x, sweep_grid_dims.y, sweep_grid_dims.z, histogram_sweep_config.block_threads, (long long) stream, histogram_sweep_config.pixels_per_thread, histogram_sweep_sm_occupancy); // Invoke histogram_sweep_kernel histogram_sweep_kernel<<>>( d_samples, num_output_bins_wrapper, num_privatized_bins_wrapper, d_output_histograms_wrapper, d_privatized_histograms_wrapper, output_decode_op_wrapper, privatized_decode_op_wrapper, num_row_pixels, num_rows, row_stride_samples, tiles_per_row, tile_queue); // Check for failure to launch if (CubDebug(error = cudaPeekAtLastError())) break; // Sync the stream if specified to flush runtime errors if (debug_synchronous && (CubDebug(error = SyncStream(stream)))) break; } while (0); return error; #endif // CUB_RUNTIME_ENABLED } /** * Dispatch routine for HistogramRange, specialized for sample types larger than 8bit */ CUB_RUNTIME_FUNCTION static cudaError_t DispatchRange( void* d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t& temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation SampleIteratorT d_samples, ///< [in] The pointer to the multi-channel input sequence of data samples. The samples from different channels are assumed to be interleaved (e.g., an array of 32-bit pixels where each pixel consists of four RGBA 8-bit samples). CounterT* d_output_histograms[NUM_ACTIVE_CHANNELS], ///< [out] The pointers to the histogram counter output arrays, one for each active channel. For channeli, the allocation length of d_histograms[i] should be num_output_levels[i] - 1. int num_output_levels[NUM_ACTIVE_CHANNELS], ///< [in] The number of boundaries (levels) for delineating histogram samples in each active channel. Implies that the number of bins for channeli is num_output_levels[i] - 1. LevelT *d_levels[NUM_ACTIVE_CHANNELS], ///< [in] The pointers to the arrays of boundaries (levels), one for each active channel. Bin ranges are defined by consecutive boundary pairings: lower sample value boundaries are inclusive and upper sample value boundaries are exclusive. OffsetT num_row_pixels, ///< [in] The number of multi-channel pixels per row in the region of interest OffsetT num_rows, ///< [in] The number of rows in the region of interest OffsetT row_stride_samples, ///< [in] The number of samples between starts of consecutive rows in the region of interest cudaStream_t stream, ///< [in] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous, ///< [in] Whether or not to synchronize the stream after every kernel launch to check for errors. May cause significant slowdown. Default is \p false. Int2Type is_byte_sample) ///< [in] Marker type indicating whether or not SampleT is a 8b type { cudaError error = cudaSuccess; do { // Get PTX version int ptx_version; #if (CUB_PTX_ARCH == 0) if (CubDebug(error = PtxVersion(ptx_version))) break; #else ptx_version = CUB_PTX_ARCH; #endif // Get kernel dispatch configurations KernelConfig histogram_sweep_config; if (CubDebug(error = InitConfigs(ptx_version, histogram_sweep_config))) break; // Use the search transform op for converting samples to privatized bins typedef SearchTransform PrivatizedDecodeOpT; // Use the pass-thru transform op for converting privatized bins to output bins typedef PassThruTransform OutputDecodeOpT; PrivatizedDecodeOpT privatized_decode_op[NUM_ACTIVE_CHANNELS]; OutputDecodeOpT output_decode_op[NUM_ACTIVE_CHANNELS]; int max_levels = num_output_levels[0]; for (int channel = 0; channel < NUM_ACTIVE_CHANNELS; ++channel) { privatized_decode_op[channel].Init(d_levels[channel], num_output_levels[channel]); if (num_output_levels[channel] > max_levels) max_levels = num_output_levels[channel]; } int max_num_output_bins = max_levels - 1; // Dispatch if (max_num_output_bins > MAX_PRIVATIZED_SMEM_BINS) { // Too many bins to keep in shared memory. const int PRIVATIZED_SMEM_BINS = 0; if (CubDebug(error = PrivatizedDispatch( d_temp_storage, temp_storage_bytes, d_samples, d_output_histograms, num_output_levels, privatized_decode_op, num_output_levels, output_decode_op, max_num_output_bins, num_row_pixels, num_rows, row_stride_samples, DeviceHistogramInitKernel, DeviceHistogramSweepKernel, histogram_sweep_config, stream, debug_synchronous))) break; } else { // Dispatch shared-privatized approach const int PRIVATIZED_SMEM_BINS = MAX_PRIVATIZED_SMEM_BINS; if (CubDebug(error = PrivatizedDispatch( d_temp_storage, temp_storage_bytes, d_samples, d_output_histograms, num_output_levels, privatized_decode_op, num_output_levels, output_decode_op, max_num_output_bins, num_row_pixels, num_rows, row_stride_samples, DeviceHistogramInitKernel, DeviceHistogramSweepKernel, histogram_sweep_config, stream, debug_synchronous))) break; } } while (0); return error; } /** * Dispatch routine for HistogramRange, specialized for 8-bit sample types (computes 256-bin privatized histograms and then reduces to user-specified levels) */ CUB_RUNTIME_FUNCTION static cudaError_t DispatchRange( void* d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t& temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation SampleIteratorT d_samples, ///< [in] The pointer to the multi-channel input sequence of data samples. The samples from different channels are assumed to be interleaved (e.g., an array of 32-bit pixels where each pixel consists of four RGBA 8-bit samples). CounterT* d_output_histograms[NUM_ACTIVE_CHANNELS], ///< [out] The pointers to the histogram counter output arrays, one for each active channel. For channeli, the allocation length of d_histograms[i] should be num_output_levels[i] - 1. int num_output_levels[NUM_ACTIVE_CHANNELS], ///< [in] The number of boundaries (levels) for delineating histogram samples in each active channel. Implies that the number of bins for channeli is num_output_levels[i] - 1. LevelT *d_levels[NUM_ACTIVE_CHANNELS], ///< [in] The pointers to the arrays of boundaries (levels), one for each active channel. Bin ranges are defined by consecutive boundary pairings: lower sample value boundaries are inclusive and upper sample value boundaries are exclusive. OffsetT num_row_pixels, ///< [in] The number of multi-channel pixels per row in the region of interest OffsetT num_rows, ///< [in] The number of rows in the region of interest OffsetT row_stride_samples, ///< [in] The number of samples between starts of consecutive rows in the region of interest cudaStream_t stream, ///< [in] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous, ///< [in] Whether or not to synchronize the stream after every kernel launch to check for errors. May cause significant slowdown. Default is \p false. Int2Type is_byte_sample) ///< [in] Marker type indicating whether or not SampleT is a 8b type { cudaError error = cudaSuccess; do { // Get PTX version int ptx_version; #if (CUB_PTX_ARCH == 0) if (CubDebug(error = PtxVersion(ptx_version))) break; #else ptx_version = CUB_PTX_ARCH; #endif // Get kernel dispatch configurations KernelConfig histogram_sweep_config; if (CubDebug(error = InitConfigs(ptx_version, histogram_sweep_config))) break; // Use the pass-thru transform op for converting samples to privatized bins typedef PassThruTransform PrivatizedDecodeOpT; // Use the search transform op for converting privatized bins to output bins typedef SearchTransform OutputDecodeOpT; int num_privatized_levels[NUM_ACTIVE_CHANNELS]; PrivatizedDecodeOpT privatized_decode_op[NUM_ACTIVE_CHANNELS]; OutputDecodeOpT output_decode_op[NUM_ACTIVE_CHANNELS]; int max_levels = num_output_levels[0]; // Maximum number of levels in any channel for (int channel = 0; channel < NUM_ACTIVE_CHANNELS; ++channel) { num_privatized_levels[channel] = 257; output_decode_op[channel].Init(d_levels[channel], num_output_levels[channel]); if (num_output_levels[channel] > max_levels) max_levels = num_output_levels[channel]; } int max_num_output_bins = max_levels - 1; const int PRIVATIZED_SMEM_BINS = 256; if (CubDebug(error = PrivatizedDispatch( d_temp_storage, temp_storage_bytes, d_samples, d_output_histograms, num_privatized_levels, privatized_decode_op, num_output_levels, output_decode_op, max_num_output_bins, num_row_pixels, num_rows, row_stride_samples, DeviceHistogramInitKernel, DeviceHistogramSweepKernel, histogram_sweep_config, stream, debug_synchronous))) break; } while (0); return error; } /** * Dispatch routine for HistogramEven, specialized for sample types larger than 8-bit */ CUB_RUNTIME_FUNCTION __forceinline__ static cudaError_t DispatchEven( void* d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t& temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation SampleIteratorT d_samples, ///< [in] The pointer to the input sequence of sample items. The samples from different channels are assumed to be interleaved (e.g., an array of 32-bit pixels where each pixel consists of four RGBA 8-bit samples). CounterT* d_output_histograms[NUM_ACTIVE_CHANNELS], ///< [out] The pointers to the histogram counter output arrays, one for each active channel. For channeli, the allocation length of d_histograms[i] should be num_output_levels[i] - 1. int num_output_levels[NUM_ACTIVE_CHANNELS], ///< [in] The number of bin level boundaries for delineating histogram samples in each active channel. Implies that the number of bins for channeli is num_output_levels[i] - 1. LevelT lower_level[NUM_ACTIVE_CHANNELS], ///< [in] The lower sample value bound (inclusive) for the lowest histogram bin in each active channel. LevelT upper_level[NUM_ACTIVE_CHANNELS], ///< [in] The upper sample value bound (exclusive) for the highest histogram bin in each active channel. OffsetT num_row_pixels, ///< [in] The number of multi-channel pixels per row in the region of interest OffsetT num_rows, ///< [in] The number of rows in the region of interest OffsetT row_stride_samples, ///< [in] The number of samples between starts of consecutive rows in the region of interest cudaStream_t stream, ///< [in] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous, ///< [in] Whether or not to synchronize the stream after every kernel launch to check for errors. May cause significant slowdown. Default is \p false. Int2Type is_byte_sample) ///< [in] Marker type indicating whether or not SampleT is a 8b type { cudaError error = cudaSuccess; do { // Get PTX version int ptx_version; #if (CUB_PTX_ARCH == 0) if (CubDebug(error = PtxVersion(ptx_version))) break; #else ptx_version = CUB_PTX_ARCH; #endif // Get kernel dispatch configurations KernelConfig histogram_sweep_config; if (CubDebug(error = InitConfigs(ptx_version, histogram_sweep_config))) break; // Use the scale transform op for converting samples to privatized bins typedef ScaleTransform PrivatizedDecodeOpT; // Use the pass-thru transform op for converting privatized bins to output bins typedef PassThruTransform OutputDecodeOpT; PrivatizedDecodeOpT privatized_decode_op[NUM_ACTIVE_CHANNELS]; OutputDecodeOpT output_decode_op[NUM_ACTIVE_CHANNELS]; int max_levels = num_output_levels[0]; for (int channel = 0; channel < NUM_ACTIVE_CHANNELS; ++channel) { int bins = num_output_levels[channel] - 1; LevelT scale = (upper_level[channel] - lower_level[channel]) / bins; privatized_decode_op[channel].Init(num_output_levels[channel], upper_level[channel], lower_level[channel], scale); if (num_output_levels[channel] > max_levels) max_levels = num_output_levels[channel]; } int max_num_output_bins = max_levels - 1; if (max_num_output_bins > MAX_PRIVATIZED_SMEM_BINS) { // Dispatch shared-privatized approach const int PRIVATIZED_SMEM_BINS = 0; if (CubDebug(error = PrivatizedDispatch( d_temp_storage, temp_storage_bytes, d_samples, d_output_histograms, num_output_levels, privatized_decode_op, num_output_levels, output_decode_op, max_num_output_bins, num_row_pixels, num_rows, row_stride_samples, DeviceHistogramInitKernel, DeviceHistogramSweepKernel, histogram_sweep_config, stream, debug_synchronous))) break; } else { // Dispatch shared-privatized approach const int PRIVATIZED_SMEM_BINS = MAX_PRIVATIZED_SMEM_BINS; if (CubDebug(error = PrivatizedDispatch( d_temp_storage, temp_storage_bytes, d_samples, d_output_histograms, num_output_levels, privatized_decode_op, num_output_levels, output_decode_op, max_num_output_bins, num_row_pixels, num_rows, row_stride_samples, DeviceHistogramInitKernel, DeviceHistogramSweepKernel, histogram_sweep_config, stream, debug_synchronous))) break; } } while (0); return error; } /** * Dispatch routine for HistogramEven, specialized for 8-bit sample types (computes 256-bin privatized histograms and then reduces to user-specified levels) */ CUB_RUNTIME_FUNCTION __forceinline__ static cudaError_t DispatchEven( void* d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t& temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation SampleIteratorT d_samples, ///< [in] The pointer to the input sequence of sample items. The samples from different channels are assumed to be interleaved (e.g., an array of 32-bit pixels where each pixel consists of four RGBA 8-bit samples). CounterT* d_output_histograms[NUM_ACTIVE_CHANNELS], ///< [out] The pointers to the histogram counter output arrays, one for each active channel. For channeli, the allocation length of d_histograms[i] should be num_output_levels[i] - 1. int num_output_levels[NUM_ACTIVE_CHANNELS], ///< [in] The number of bin level boundaries for delineating histogram samples in each active channel. Implies that the number of bins for channeli is num_output_levels[i] - 1. LevelT lower_level[NUM_ACTIVE_CHANNELS], ///< [in] The lower sample value bound (inclusive) for the lowest histogram bin in each active channel. LevelT upper_level[NUM_ACTIVE_CHANNELS], ///< [in] The upper sample value bound (exclusive) for the highest histogram bin in each active channel. OffsetT num_row_pixels, ///< [in] The number of multi-channel pixels per row in the region of interest OffsetT num_rows, ///< [in] The number of rows in the region of interest OffsetT row_stride_samples, ///< [in] The number of samples between starts of consecutive rows in the region of interest cudaStream_t stream, ///< [in] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous, ///< [in] Whether or not to synchronize the stream after every kernel launch to check for errors. May cause significant slowdown. Default is \p false. Int2Type is_byte_sample) ///< [in] Marker type indicating whether or not SampleT is a 8b type { cudaError error = cudaSuccess; do { // Get PTX version int ptx_version; #if (CUB_PTX_ARCH == 0) if (CubDebug(error = PtxVersion(ptx_version))) break; #else ptx_version = CUB_PTX_ARCH; #endif // Get kernel dispatch configurations KernelConfig histogram_sweep_config; if (CubDebug(error = InitConfigs(ptx_version, histogram_sweep_config))) break; // Use the pass-thru transform op for converting samples to privatized bins typedef PassThruTransform PrivatizedDecodeOpT; // Use the scale transform op for converting privatized bins to output bins typedef ScaleTransform OutputDecodeOpT; int num_privatized_levels[NUM_ACTIVE_CHANNELS]; PrivatizedDecodeOpT privatized_decode_op[NUM_ACTIVE_CHANNELS]; OutputDecodeOpT output_decode_op[NUM_ACTIVE_CHANNELS]; int max_levels = num_output_levels[0]; for (int channel = 0; channel < NUM_ACTIVE_CHANNELS; ++channel) { num_privatized_levels[channel] = 257; int bins = num_output_levels[channel] - 1; LevelT scale = (upper_level[channel] - lower_level[channel]) / bins; output_decode_op[channel].Init(num_output_levels[channel], upper_level[channel], lower_level[channel], scale); if (num_output_levels[channel] > max_levels) max_levels = num_output_levels[channel]; } int max_num_output_bins = max_levels - 1; const int PRIVATIZED_SMEM_BINS = 256; if (CubDebug(error = PrivatizedDispatch( d_temp_storage, temp_storage_bytes, d_samples, d_output_histograms, num_privatized_levels, privatized_decode_op, num_output_levels, output_decode_op, max_num_output_bins, num_row_pixels, num_rows, row_stride_samples, DeviceHistogramInitKernel, DeviceHistogramSweepKernel, histogram_sweep_config, stream, debug_synchronous))) break; } while (0); return error; } }; } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/acc/cuda/cub/device/dispatch/dispatch_radix_sort.cuh000066400000000000000000002406651411340063500256160ustar00rootroot00000000000000 /****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * cub::DeviceRadixSort provides device-wide, parallel operations for computing a radix sort across a sequence of data items residing within device-accessible memory. */ #pragma once #include #include #include "../../agent/agent_radix_sort_upsweep.cuh" #include "../../agent/agent_radix_sort_downsweep.cuh" #include "../../agent/agent_scan.cuh" #include "../../block/block_radix_sort.cuh" #include "../../grid/grid_even_share.cuh" #include "../../util_type.cuh" #include "../../util_debug.cuh" #include "../../util_device.cuh" #include "../../util_namespace.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /****************************************************************************** * Kernel entry points *****************************************************************************/ /** * Upsweep digit-counting kernel entry point (multi-block). Computes privatized digit histograms, one per block. */ template < typename ChainedPolicyT, ///< Chained tuning policy bool ALT_DIGIT_BITS, ///< Whether or not to use the alternate (lower-bits) policy bool IS_DESCENDING, ///< Whether or not the sorted-order is high-to-low typename KeyT, ///< Key type typename OffsetT> ///< Signed integer type for global offsets __launch_bounds__ (int((ALT_DIGIT_BITS) ? ChainedPolicyT::ActivePolicy::AltUpsweepPolicy::BLOCK_THREADS : ChainedPolicyT::ActivePolicy::UpsweepPolicy::BLOCK_THREADS)) __global__ void DeviceRadixSortUpsweepKernel( const KeyT *d_keys, ///< [in] Input keys buffer OffsetT *d_spine, ///< [out] Privatized (per block) digit histograms (striped, i.e., 0s counts from each block, then 1s counts from each block, etc.) OffsetT /*num_items*/, ///< [in] Total number of input data items int current_bit, ///< [in] Bit position of current radix digit int num_bits, ///< [in] Number of bits of current radix digit GridEvenShare even_share) ///< [in] Even-share descriptor for mapan equal number of tiles onto each thread block { enum { TILE_ITEMS = ChainedPolicyT::ActivePolicy::AltUpsweepPolicy::BLOCK_THREADS * ChainedPolicyT::ActivePolicy::AltUpsweepPolicy::ITEMS_PER_THREAD }; // Parameterize AgentRadixSortUpsweep type for the current configuration typedef AgentRadixSortUpsweep< typename If<(ALT_DIGIT_BITS), typename ChainedPolicyT::ActivePolicy::AltUpsweepPolicy, typename ChainedPolicyT::ActivePolicy::UpsweepPolicy>::Type, KeyT, OffsetT> AgentRadixSortUpsweepT; // Shared memory storage __shared__ typename AgentRadixSortUpsweepT::TempStorage temp_storage; // Initialize GRID_MAPPING_RAKE even-share descriptor for this thread block even_share.template BlockInit(); AgentRadixSortUpsweepT upsweep(temp_storage, d_keys, current_bit, num_bits); upsweep.ProcessRegion(even_share.block_offset, even_share.block_end); CTA_SYNC(); // Write out digit counts (striped) upsweep.ExtractCounts(d_spine, gridDim.x, blockIdx.x); } /** * Spine scan kernel entry point (single-block). Computes an exclusive prefix sum over the privatized digit histograms */ template < typename ChainedPolicyT, ///< Chained tuning policy typename OffsetT> ///< Signed integer type for global offsets __launch_bounds__ (int(ChainedPolicyT::ActivePolicy::ScanPolicy::BLOCK_THREADS), 1) __global__ void RadixSortScanBinsKernel( OffsetT *d_spine, ///< [in,out] Privatized (per block) digit histograms (striped, i.e., 0s counts from each block, then 1s counts from each block, etc.) int num_counts) ///< [in] Total number of bin-counts { // Parameterize the AgentScan type for the current configuration typedef AgentScan< typename ChainedPolicyT::ActivePolicy::ScanPolicy, OffsetT*, OffsetT*, cub::Sum, OffsetT, OffsetT> AgentScanT; // Shared memory storage __shared__ typename AgentScanT::TempStorage temp_storage; // Block scan instance AgentScanT block_scan(temp_storage, d_spine, d_spine, cub::Sum(), OffsetT(0)) ; // Process full input tiles int block_offset = 0; BlockScanRunningPrefixOp prefix_op(0, Sum()); while (block_offset + AgentScanT::TILE_ITEMS <= num_counts) { block_scan.template ConsumeTile(block_offset, prefix_op); block_offset += AgentScanT::TILE_ITEMS; } } /** * Downsweep pass kernel entry point (multi-block). Scatters keys (and values) into corresponding bins for the current digit place. */ template < typename ChainedPolicyT, ///< Chained tuning policy bool ALT_DIGIT_BITS, ///< Whether or not to use the alternate (lower-bits) policy bool IS_DESCENDING, ///< Whether or not the sorted-order is high-to-low typename KeyT, ///< Key type typename ValueT, ///< Value type typename OffsetT> ///< Signed integer type for global offsets __launch_bounds__ (int((ALT_DIGIT_BITS) ? ChainedPolicyT::ActivePolicy::AltDownsweepPolicy::BLOCK_THREADS : ChainedPolicyT::ActivePolicy::DownsweepPolicy::BLOCK_THREADS)) __global__ void DeviceRadixSortDownsweepKernel( const KeyT *d_keys_in, ///< [in] Input keys buffer KeyT *d_keys_out, ///< [in] Output keys buffer const ValueT *d_values_in, ///< [in] Input values buffer ValueT *d_values_out, ///< [in] Output values buffer OffsetT *d_spine, ///< [in] Scan of privatized (per block) digit histograms (striped, i.e., 0s counts from each block, then 1s counts from each block, etc.) OffsetT num_items, ///< [in] Total number of input data items int current_bit, ///< [in] Bit position of current radix digit int num_bits, ///< [in] Number of bits of current radix digit GridEvenShare even_share) ///< [in] Even-share descriptor for mapan equal number of tiles onto each thread block { enum { TILE_ITEMS = ChainedPolicyT::ActivePolicy::AltUpsweepPolicy::BLOCK_THREADS * ChainedPolicyT::ActivePolicy::AltUpsweepPolicy::ITEMS_PER_THREAD }; // Parameterize AgentRadixSortDownsweep type for the current configuration typedef AgentRadixSortDownsweep< typename If<(ALT_DIGIT_BITS), typename ChainedPolicyT::ActivePolicy::AltDownsweepPolicy, typename ChainedPolicyT::ActivePolicy::DownsweepPolicy>::Type, IS_DESCENDING, KeyT, ValueT, OffsetT> AgentRadixSortDownsweepT; // Shared memory storage __shared__ typename AgentRadixSortDownsweepT::TempStorage temp_storage; // Initialize even-share descriptor for this thread block even_share.template BlockInit(); // Process input tiles AgentRadixSortDownsweepT(temp_storage, num_items, d_spine, d_keys_in, d_keys_out, d_values_in, d_values_out, current_bit, num_bits).ProcessRegion( even_share.block_offset, even_share.block_end); } /** * Single pass kernel entry point (single-block). Fully sorts a tile of input. */ template < typename ChainedPolicyT, ///< Chained tuning policy bool IS_DESCENDING, ///< Whether or not the sorted-order is high-to-low typename KeyT, ///< Key type typename ValueT, ///< Value type typename OffsetT> ///< Signed integer type for global offsets __launch_bounds__ (int(ChainedPolicyT::ActivePolicy::SingleTilePolicy::BLOCK_THREADS), 1) __global__ void DeviceRadixSortSingleTileKernel( const KeyT *d_keys_in, ///< [in] Input keys buffer KeyT *d_keys_out, ///< [in] Output keys buffer const ValueT *d_values_in, ///< [in] Input values buffer ValueT *d_values_out, ///< [in] Output values buffer OffsetT num_items, ///< [in] Total number of input data items int current_bit, ///< [in] Bit position of current radix digit int end_bit) ///< [in] The past-the-end (most-significant) bit index needed for key comparison { // Constants enum { BLOCK_THREADS = ChainedPolicyT::ActivePolicy::SingleTilePolicy::BLOCK_THREADS, ITEMS_PER_THREAD = ChainedPolicyT::ActivePolicy::SingleTilePolicy::ITEMS_PER_THREAD, KEYS_ONLY = Equals::VALUE, }; // BlockRadixSort type typedef BlockRadixSort< KeyT, BLOCK_THREADS, ITEMS_PER_THREAD, ValueT, ChainedPolicyT::ActivePolicy::SingleTilePolicy::RADIX_BITS, (ChainedPolicyT::ActivePolicy::SingleTilePolicy::RANK_ALGORITHM == RADIX_RANK_MEMOIZE), ChainedPolicyT::ActivePolicy::SingleTilePolicy::SCAN_ALGORITHM> BlockRadixSortT; // BlockLoad type (keys) typedef BlockLoad< KeyT, BLOCK_THREADS, ITEMS_PER_THREAD, ChainedPolicyT::ActivePolicy::SingleTilePolicy::LOAD_ALGORITHM> BlockLoadKeys; // BlockLoad type (values) typedef BlockLoad< ValueT, BLOCK_THREADS, ITEMS_PER_THREAD, ChainedPolicyT::ActivePolicy::SingleTilePolicy::LOAD_ALGORITHM> BlockLoadValues; // Unsigned word for key bits typedef typename Traits::UnsignedBits UnsignedBitsT; // Shared memory storage __shared__ union TempStorage { typename BlockRadixSortT::TempStorage sort; typename BlockLoadKeys::TempStorage load_keys; typename BlockLoadValues::TempStorage load_values; } temp_storage; // Keys and values for the block KeyT keys[ITEMS_PER_THREAD]; ValueT values[ITEMS_PER_THREAD]; // Get default (min/max) value for out-of-bounds keys UnsignedBitsT default_key_bits = (IS_DESCENDING) ? Traits::LOWEST_KEY : Traits::MAX_KEY; KeyT default_key = reinterpret_cast(default_key_bits); // Load keys BlockLoadKeys(temp_storage.load_keys).Load(d_keys_in, keys, num_items, default_key); CTA_SYNC(); // Load values if (!KEYS_ONLY) { BlockLoadValues(temp_storage.load_values).Load(d_values_in, values, num_items); CTA_SYNC(); } // Sort tile BlockRadixSortT(temp_storage.sort).SortBlockedToStriped( keys, values, current_bit, end_bit, Int2Type(), Int2Type()); // Store keys and values #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) { int item_offset = ITEM * BLOCK_THREADS + threadIdx.x; if (item_offset < num_items) { d_keys_out[item_offset] = keys[ITEM]; if (!KEYS_ONLY) d_values_out[item_offset] = values[ITEM]; } } } /** * Segmented radix sorting pass (one block per segment) */ template < typename ChainedPolicyT, ///< Chained tuning policy bool ALT_DIGIT_BITS, ///< Whether or not to use the alternate (lower-bits) policy bool IS_DESCENDING, ///< Whether or not the sorted-order is high-to-low typename KeyT, ///< Key type typename ValueT, ///< Value type typename OffsetIteratorT, ///< Random-access input iterator type for reading segment offsets \iterator typename OffsetT> ///< Signed integer type for global offsets __launch_bounds__ (int((ALT_DIGIT_BITS) ? ChainedPolicyT::ActivePolicy::AltSegmentedPolicy::BLOCK_THREADS : ChainedPolicyT::ActivePolicy::SegmentedPolicy::BLOCK_THREADS)) __global__ void DeviceSegmentedRadixSortKernel( const KeyT *d_keys_in, ///< [in] Input keys buffer KeyT *d_keys_out, ///< [in] Output keys buffer const ValueT *d_values_in, ///< [in] Input values buffer ValueT *d_values_out, ///< [in] Output values buffer OffsetIteratorT d_begin_offsets, ///< [in] Pointer to the sequence of beginning offsets of length \p num_segments, such that d_begin_offsets[i] is the first element of the ith data segment in d_keys_* and d_values_* OffsetIteratorT d_end_offsets, ///< [in] Pointer to the sequence of ending offsets of length \p num_segments, such that d_end_offsets[i]-1 is the last element of the ith data segment in d_keys_* and d_values_*. If d_end_offsets[i]-1 <= d_begin_offsets[i], the ith is considered empty. int /*num_segments*/, ///< [in] The number of segments that comprise the sorting data int current_bit, ///< [in] Bit position of current radix digit int pass_bits) ///< [in] Number of bits of current radix digit { // // Constants // typedef typename If<(ALT_DIGIT_BITS), typename ChainedPolicyT::ActivePolicy::AltSegmentedPolicy, typename ChainedPolicyT::ActivePolicy::SegmentedPolicy>::Type SegmentedPolicyT; enum { BLOCK_THREADS = SegmentedPolicyT::BLOCK_THREADS, ITEMS_PER_THREAD = SegmentedPolicyT::ITEMS_PER_THREAD, RADIX_BITS = SegmentedPolicyT::RADIX_BITS, TILE_ITEMS = BLOCK_THREADS * ITEMS_PER_THREAD, RADIX_DIGITS = 1 << RADIX_BITS, KEYS_ONLY = Equals::VALUE, }; // Upsweep type typedef AgentRadixSortUpsweep< AgentRadixSortUpsweepPolicy, KeyT, OffsetT> BlockUpsweepT; // Digit-scan type typedef BlockScan DigitScanT; // Downsweep type typedef AgentRadixSortDownsweep BlockDownsweepT; enum { /// Number of bin-starting offsets tracked per thread BINS_TRACKED_PER_THREAD = BlockDownsweepT::BINS_TRACKED_PER_THREAD }; // // Process input tiles // // Shared memory storage __shared__ union { typename BlockUpsweepT::TempStorage upsweep; typename BlockDownsweepT::TempStorage downsweep; struct { volatile OffsetT reverse_counts_in[RADIX_DIGITS]; volatile OffsetT reverse_counts_out[RADIX_DIGITS]; typename DigitScanT::TempStorage scan; }; } temp_storage; OffsetT segment_begin = d_begin_offsets[blockIdx.x]; OffsetT segment_end = d_end_offsets[blockIdx.x]; OffsetT num_items = segment_end - segment_begin; // Check if empty segment if (num_items <= 0) return; // Upsweep BlockUpsweepT upsweep(temp_storage.upsweep, d_keys_in, current_bit, pass_bits); upsweep.ProcessRegion(segment_begin, segment_end); CTA_SYNC(); // The count of each digit value in this pass (valid in the first RADIX_DIGITS threads) OffsetT bin_count[BINS_TRACKED_PER_THREAD]; upsweep.ExtractCounts(bin_count); CTA_SYNC(); if (IS_DESCENDING) { // Reverse bin counts #pragma unroll for (int track = 0; track < BINS_TRACKED_PER_THREAD; ++track) { int bin_idx = (threadIdx.x * BINS_TRACKED_PER_THREAD) + track; if ((BLOCK_THREADS == RADIX_DIGITS) || (bin_idx < RADIX_DIGITS)) temp_storage.reverse_counts_in[bin_idx] = bin_count[track]; } CTA_SYNC(); #pragma unroll for (int track = 0; track < BINS_TRACKED_PER_THREAD; ++track) { int bin_idx = (threadIdx.x * BINS_TRACKED_PER_THREAD) + track; if ((BLOCK_THREADS == RADIX_DIGITS) || (bin_idx < RADIX_DIGITS)) bin_count[track] = temp_storage.reverse_counts_in[RADIX_DIGITS - bin_idx - 1]; } } // Scan OffsetT bin_offset[BINS_TRACKED_PER_THREAD]; // The global scatter base offset for each digit value in this pass (valid in the first RADIX_DIGITS threads) DigitScanT(temp_storage.scan).ExclusiveSum(bin_count, bin_offset); #pragma unroll for (int track = 0; track < BINS_TRACKED_PER_THREAD; ++track) { bin_offset[track] += segment_begin; } if (IS_DESCENDING) { // Reverse bin offsets #pragma unroll for (int track = 0; track < BINS_TRACKED_PER_THREAD; ++track) { int bin_idx = (threadIdx.x * BINS_TRACKED_PER_THREAD) + track; if ((BLOCK_THREADS == RADIX_DIGITS) || (bin_idx < RADIX_DIGITS)) temp_storage.reverse_counts_out[threadIdx.x] = bin_offset[track]; } CTA_SYNC(); #pragma unroll for (int track = 0; track < BINS_TRACKED_PER_THREAD; ++track) { int bin_idx = (threadIdx.x * BINS_TRACKED_PER_THREAD) + track; if ((BLOCK_THREADS == RADIX_DIGITS) || (bin_idx < RADIX_DIGITS)) bin_offset[track] = temp_storage.reverse_counts_out[RADIX_DIGITS - bin_idx - 1]; } } CTA_SYNC(); // Downsweep BlockDownsweepT downsweep(temp_storage.downsweep, bin_offset, num_items, d_keys_in, d_keys_out, d_values_in, d_values_out, current_bit, pass_bits); downsweep.ProcessRegion(segment_begin, segment_end); } /****************************************************************************** * Policy ******************************************************************************/ /** * Tuning policy for kernel specialization */ template < typename KeyT, ///< Key type typename ValueT, ///< Value type typename OffsetT> ///< Signed integer type for global offsets struct DeviceRadixSortPolicy { //------------------------------------------------------------------------------ // Constants //------------------------------------------------------------------------------ enum { // Whether this is a keys-only (or key-value) sort KEYS_ONLY = (Equals::VALUE), // Relative size of KeyT type to a 4-byte word SCALE_FACTOR_4B = (CUB_MAX(sizeof(KeyT), sizeof(ValueT)) + 3) / 4, }; //------------------------------------------------------------------------------ // Architecture-specific tuning policies //------------------------------------------------------------------------------ /// SM13 struct Policy130 : ChainedPolicy<130, Policy130, Policy130> { enum { PRIMARY_RADIX_BITS = 5, ALT_RADIX_BITS = PRIMARY_RADIX_BITS - 1, }; // Keys-only upsweep policies typedef AgentRadixSortUpsweepPolicy <128, CUB_MAX(1, 19 / SCALE_FACTOR_4B), LOAD_DEFAULT, PRIMARY_RADIX_BITS> UpsweepPolicyKeys; typedef AgentRadixSortUpsweepPolicy <128, CUB_MAX(1, 15 / SCALE_FACTOR_4B), LOAD_DEFAULT, ALT_RADIX_BITS> AltUpsweepPolicyKeys; // Key-value pairs upsweep policies typedef AgentRadixSortUpsweepPolicy <128, CUB_MAX(1, 19 / SCALE_FACTOR_4B), LOAD_DEFAULT, PRIMARY_RADIX_BITS> UpsweepPolicyPairs; typedef AgentRadixSortUpsweepPolicy <128, CUB_MAX(1, 15 / SCALE_FACTOR_4B), LOAD_DEFAULT, ALT_RADIX_BITS> AltUpsweepPolicyPairs; // Upsweep policies typedef typename If::Type UpsweepPolicy; typedef typename If::Type AltUpsweepPolicy; // Scan policy typedef AgentScanPolicy <256, 4, BLOCK_LOAD_VECTORIZE, LOAD_DEFAULT, BLOCK_STORE_VECTORIZE, BLOCK_SCAN_WARP_SCANS> ScanPolicy; // Keys-only downsweep policies typedef AgentRadixSortDownsweepPolicy <64, CUB_MAX(1, 19 / SCALE_FACTOR_4B), BLOCK_LOAD_WARP_TRANSPOSE, LOAD_DEFAULT, RADIX_RANK_BASIC, BLOCK_SCAN_WARP_SCANS, PRIMARY_RADIX_BITS> DownsweepPolicyKeys; typedef AgentRadixSortDownsweepPolicy <128, CUB_MAX(1, 15 / SCALE_FACTOR_4B), BLOCK_LOAD_WARP_TRANSPOSE, LOAD_DEFAULT, RADIX_RANK_BASIC, BLOCK_SCAN_WARP_SCANS, ALT_RADIX_BITS> AltDownsweepPolicyKeys; // Key-value pairs downsweep policies typedef AgentRadixSortDownsweepPolicy <64, CUB_MAX(1, 19 / SCALE_FACTOR_4B), BLOCK_LOAD_WARP_TRANSPOSE, LOAD_DEFAULT, RADIX_RANK_BASIC, BLOCK_SCAN_WARP_SCANS, PRIMARY_RADIX_BITS> DownsweepPolicyPairs; typedef AgentRadixSortDownsweepPolicy <128, CUB_MAX(1, 15 / SCALE_FACTOR_4B), BLOCK_LOAD_WARP_TRANSPOSE, LOAD_DEFAULT, RADIX_RANK_BASIC, BLOCK_SCAN_WARP_SCANS, ALT_RADIX_BITS> AltDownsweepPolicyPairs; // Downsweep policies typedef typename If::Type DownsweepPolicy; typedef typename If::Type AltDownsweepPolicy; // Single-tile policy typedef DownsweepPolicy SingleTilePolicy; // Segmented policies typedef DownsweepPolicy SegmentedPolicy; typedef AltDownsweepPolicy AltSegmentedPolicy; }; /// SM20 struct Policy200 : ChainedPolicy<200, Policy200, Policy130> { enum { PRIMARY_RADIX_BITS = 5, ALT_RADIX_BITS = PRIMARY_RADIX_BITS - 1, }; // Keys-only upsweep policies typedef AgentRadixSortUpsweepPolicy <64, CUB_MAX(1, 18 / SCALE_FACTOR_4B), LOAD_DEFAULT, PRIMARY_RADIX_BITS> UpsweepPolicyKeys; typedef AgentRadixSortUpsweepPolicy <64, CUB_MAX(1, 18 / SCALE_FACTOR_4B), LOAD_DEFAULT, ALT_RADIX_BITS> AltUpsweepPolicyKeys; // Key-value pairs upsweep policies typedef AgentRadixSortUpsweepPolicy <128, CUB_MAX(1, 13 / SCALE_FACTOR_4B), LOAD_DEFAULT, PRIMARY_RADIX_BITS> UpsweepPolicyPairs; typedef AgentRadixSortUpsweepPolicy <128, CUB_MAX(1, 13 / SCALE_FACTOR_4B), LOAD_DEFAULT, ALT_RADIX_BITS> AltUpsweepPolicyPairs; // Upsweep policies typedef typename If::Type UpsweepPolicy; typedef typename If::Type AltUpsweepPolicy; // Scan policy typedef AgentScanPolicy <512, 4, BLOCK_LOAD_VECTORIZE, LOAD_DEFAULT, BLOCK_STORE_VECTORIZE, BLOCK_SCAN_RAKING_MEMOIZE> ScanPolicy; // Keys-only downsweep policies typedef AgentRadixSortDownsweepPolicy <64, CUB_MAX(1, 18 / SCALE_FACTOR_4B), BLOCK_LOAD_WARP_TRANSPOSE, LOAD_DEFAULT, RADIX_RANK_BASIC, BLOCK_SCAN_WARP_SCANS, PRIMARY_RADIX_BITS> DownsweepPolicyKeys; typedef AgentRadixSortDownsweepPolicy <64, CUB_MAX(1, 18 / SCALE_FACTOR_4B), BLOCK_LOAD_WARP_TRANSPOSE, LOAD_DEFAULT, RADIX_RANK_BASIC, BLOCK_SCAN_WARP_SCANS, ALT_RADIX_BITS> AltDownsweepPolicyKeys; // Key-value pairs downsweep policies typedef AgentRadixSortDownsweepPolicy <128, CUB_MAX(1, 13 / SCALE_FACTOR_4B), BLOCK_LOAD_WARP_TRANSPOSE, LOAD_DEFAULT, RADIX_RANK_BASIC, BLOCK_SCAN_WARP_SCANS, PRIMARY_RADIX_BITS> DownsweepPolicyPairs; typedef AgentRadixSortDownsweepPolicy <128, CUB_MAX(1, 13 / SCALE_FACTOR_4B), BLOCK_LOAD_WARP_TRANSPOSE, LOAD_DEFAULT, RADIX_RANK_BASIC, BLOCK_SCAN_WARP_SCANS, ALT_RADIX_BITS> AltDownsweepPolicyPairs; // Downsweep policies typedef typename If::Type DownsweepPolicy; typedef typename If::Type AltDownsweepPolicy; // Single-tile policy typedef DownsweepPolicy SingleTilePolicy; // Segmented policies typedef DownsweepPolicy SegmentedPolicy; typedef AltDownsweepPolicy AltSegmentedPolicy; }; /// SM30 struct Policy300 : ChainedPolicy<300, Policy300, Policy200> { enum { PRIMARY_RADIX_BITS = 5, ALT_RADIX_BITS = PRIMARY_RADIX_BITS - 1, }; // Keys-only upsweep policies typedef AgentRadixSortUpsweepPolicy <256, CUB_MAX(1, 7 / SCALE_FACTOR_4B), LOAD_DEFAULT, PRIMARY_RADIX_BITS> UpsweepPolicyKeys; typedef AgentRadixSortUpsweepPolicy <256, CUB_MAX(1, 7 / SCALE_FACTOR_4B), LOAD_DEFAULT, ALT_RADIX_BITS> AltUpsweepPolicyKeys; // Key-value pairs upsweep policies typedef AgentRadixSortUpsweepPolicy <256, CUB_MAX(1, 5 / SCALE_FACTOR_4B), LOAD_DEFAULT, PRIMARY_RADIX_BITS> UpsweepPolicyPairs; typedef AgentRadixSortUpsweepPolicy <256, CUB_MAX(1, 5 / SCALE_FACTOR_4B), LOAD_DEFAULT, ALT_RADIX_BITS> AltUpsweepPolicyPairs; // Upsweep policies typedef typename If::Type UpsweepPolicy; typedef typename If::Type AltUpsweepPolicy; // Scan policy typedef AgentScanPolicy <1024, 4, BLOCK_LOAD_VECTORIZE, LOAD_DEFAULT, BLOCK_STORE_VECTORIZE, BLOCK_SCAN_WARP_SCANS> ScanPolicy; // Keys-only downsweep policies typedef AgentRadixSortDownsweepPolicy <128, CUB_MAX(1, 14 / SCALE_FACTOR_4B), BLOCK_LOAD_WARP_TRANSPOSE, LOAD_DEFAULT, RADIX_RANK_BASIC, BLOCK_SCAN_WARP_SCANS, PRIMARY_RADIX_BITS> DownsweepPolicyKeys; typedef AgentRadixSortDownsweepPolicy <128, CUB_MAX(1, 14 / SCALE_FACTOR_4B), BLOCK_LOAD_WARP_TRANSPOSE, LOAD_DEFAULT, RADIX_RANK_BASIC, BLOCK_SCAN_WARP_SCANS, ALT_RADIX_BITS> AltDownsweepPolicyKeys; // Key-value pairs downsweep policies typedef AgentRadixSortDownsweepPolicy <128, CUB_MAX(1, 10 / SCALE_FACTOR_4B), BLOCK_LOAD_TRANSPOSE, LOAD_DEFAULT, RADIX_RANK_BASIC, BLOCK_SCAN_WARP_SCANS, PRIMARY_RADIX_BITS> DownsweepPolicyPairs; typedef AgentRadixSortDownsweepPolicy <128, CUB_MAX(1, 10 / SCALE_FACTOR_4B), BLOCK_LOAD_TRANSPOSE, LOAD_DEFAULT, RADIX_RANK_BASIC, BLOCK_SCAN_WARP_SCANS, ALT_RADIX_BITS> AltDownsweepPolicyPairs; // Downsweep policies typedef typename If::Type DownsweepPolicy; typedef typename If::Type AltDownsweepPolicy; // Single-tile policy typedef DownsweepPolicy SingleTilePolicy; // Segmented policies typedef DownsweepPolicy SegmentedPolicy; typedef AltDownsweepPolicy AltSegmentedPolicy; }; /// SM35 struct Policy350 : ChainedPolicy<350, Policy350, Policy300> { enum { PRIMARY_RADIX_BITS = 6, // 1.72B 32b keys/s, 1.17B 32b pairs/s, 1.55B 32b segmented keys/s (K40m) }; // Scan policy typedef AgentScanPolicy <1024, 4, BLOCK_LOAD_VECTORIZE, LOAD_DEFAULT, BLOCK_STORE_VECTORIZE, BLOCK_SCAN_WARP_SCANS> ScanPolicy; // Keys-only downsweep policies typedef AgentRadixSortDownsweepPolicy <128, CUB_MAX(1, 9 / SCALE_FACTOR_4B), BLOCK_LOAD_WARP_TRANSPOSE, LOAD_LDG, RADIX_RANK_MATCH, BLOCK_SCAN_WARP_SCANS, PRIMARY_RADIX_BITS> DownsweepPolicyKeys; typedef AgentRadixSortDownsweepPolicy <64, CUB_MAX(1, 18 / SCALE_FACTOR_4B), BLOCK_LOAD_DIRECT, LOAD_LDG, RADIX_RANK_MEMOIZE, BLOCK_SCAN_WARP_SCANS, PRIMARY_RADIX_BITS - 1> AltDownsweepPolicyKeys; // Key-value pairs downsweep policies typedef DownsweepPolicyKeys DownsweepPolicyPairs; typedef AgentRadixSortDownsweepPolicy <128, CUB_MAX(1, 15 / SCALE_FACTOR_4B), BLOCK_LOAD_DIRECT, LOAD_LDG, RADIX_RANK_MEMOIZE, BLOCK_SCAN_WARP_SCANS, PRIMARY_RADIX_BITS - 1> AltDownsweepPolicyPairs; // Downsweep policies typedef typename If::Type DownsweepPolicy; typedef typename If::Type AltDownsweepPolicy; // Upsweep policies typedef DownsweepPolicy UpsweepPolicy; typedef AltDownsweepPolicy AltUpsweepPolicy; // Single-tile policy typedef DownsweepPolicy SingleTilePolicy; // Segmented policies typedef DownsweepPolicy SegmentedPolicy; typedef AltDownsweepPolicy AltSegmentedPolicy; }; /// SM50 struct Policy500 : ChainedPolicy<500, Policy500, Policy350> { enum { PRIMARY_RADIX_BITS = 7, // 3.5B 32b keys/s, 1.92B 32b pairs/s (TitanX) SINGLE_TILE_RADIX_BITS = 6, SEGMENTED_RADIX_BITS = 6, // 3.1B 32b segmented keys/s (TitanX) }; // ScanPolicy typedef AgentScanPolicy <512, 23, BLOCK_LOAD_WARP_TRANSPOSE, LOAD_DEFAULT, BLOCK_STORE_WARP_TRANSPOSE, BLOCK_SCAN_RAKING_MEMOIZE> ScanPolicy; // Downsweep policies typedef AgentRadixSortDownsweepPolicy <160, CUB_MAX(1, 39 / SCALE_FACTOR_4B), BLOCK_LOAD_WARP_TRANSPOSE, LOAD_DEFAULT, RADIX_RANK_BASIC, BLOCK_SCAN_WARP_SCANS, PRIMARY_RADIX_BITS> DownsweepPolicy; typedef AgentRadixSortDownsweepPolicy <256, CUB_MAX(1, 16 / SCALE_FACTOR_4B), BLOCK_LOAD_DIRECT, LOAD_LDG, RADIX_RANK_MEMOIZE, BLOCK_SCAN_RAKING_MEMOIZE, PRIMARY_RADIX_BITS - 1> AltDownsweepPolicy; // Upsweep policies typedef DownsweepPolicy UpsweepPolicy; typedef AltDownsweepPolicy AltUpsweepPolicy; // Single-tile policy typedef AgentRadixSortDownsweepPolicy <256, CUB_MAX(1, 19 / SCALE_FACTOR_4B), BLOCK_LOAD_DIRECT, LOAD_LDG, RADIX_RANK_MEMOIZE, BLOCK_SCAN_WARP_SCANS, SINGLE_TILE_RADIX_BITS> SingleTilePolicy; // Segmented policies typedef AgentRadixSortDownsweepPolicy <192, CUB_MAX(1, 31 / SCALE_FACTOR_4B), BLOCK_LOAD_WARP_TRANSPOSE, LOAD_DEFAULT, RADIX_RANK_MEMOIZE, BLOCK_SCAN_WARP_SCANS, SEGMENTED_RADIX_BITS> SegmentedPolicy; typedef AgentRadixSortDownsweepPolicy <256, CUB_MAX(1, 11 / SCALE_FACTOR_4B), BLOCK_LOAD_WARP_TRANSPOSE, LOAD_DEFAULT, RADIX_RANK_MEMOIZE, BLOCK_SCAN_WARP_SCANS, SEGMENTED_RADIX_BITS - 1> AltSegmentedPolicy; }; /// SM60 (GP100) struct Policy600 : ChainedPolicy<600, Policy600, Policy500> { enum { PRIMARY_RADIX_BITS = 7, // 6.9B 32b keys/s (Quadro P100) SINGLE_TILE_RADIX_BITS = 6, SEGMENTED_RADIX_BITS = 6, // 5.9B 32b segmented keys/s (Quadro P100) }; // ScanPolicy typedef AgentScanPolicy <512, 23, BLOCK_LOAD_WARP_TRANSPOSE, LOAD_DEFAULT, BLOCK_STORE_WARP_TRANSPOSE, BLOCK_SCAN_RAKING_MEMOIZE> ScanPolicy; // Downsweep policies typedef AgentRadixSortDownsweepPolicy <256, CUB_MAX(1, 25 / SCALE_FACTOR_4B), BLOCK_LOAD_TRANSPOSE, LOAD_DEFAULT, RADIX_RANK_MATCH, BLOCK_SCAN_WARP_SCANS, PRIMARY_RADIX_BITS> DownsweepPolicy; typedef AgentRadixSortDownsweepPolicy <192, CUB_MAX(1, 39 / SCALE_FACTOR_4B), BLOCK_LOAD_TRANSPOSE, LOAD_DEFAULT, RADIX_RANK_MEMOIZE, BLOCK_SCAN_WARP_SCANS, PRIMARY_RADIX_BITS - 1> AltDownsweepPolicy; // Upsweep policies typedef DownsweepPolicy UpsweepPolicy; typedef AltDownsweepPolicy AltUpsweepPolicy; // Single-tile policy typedef AgentRadixSortDownsweepPolicy <256, CUB_MAX(1, 19 / SCALE_FACTOR_4B), BLOCK_LOAD_DIRECT, LOAD_LDG, RADIX_RANK_MEMOIZE, BLOCK_SCAN_WARP_SCANS, SINGLE_TILE_RADIX_BITS> SingleTilePolicy; // Segmented policies typedef AgentRadixSortDownsweepPolicy <192, CUB_MAX(1, 39 / SCALE_FACTOR_4B), BLOCK_LOAD_TRANSPOSE, LOAD_DEFAULT, RADIX_RANK_MEMOIZE, BLOCK_SCAN_WARP_SCANS, SEGMENTED_RADIX_BITS> SegmentedPolicy; typedef AgentRadixSortDownsweepPolicy <384, CUB_MAX(1, 11 / SCALE_FACTOR_4B), BLOCK_LOAD_TRANSPOSE, LOAD_DEFAULT, RADIX_RANK_MEMOIZE, BLOCK_SCAN_WARP_SCANS, SEGMENTED_RADIX_BITS - 1> AltSegmentedPolicy; }; /// SM61 (GP104) struct Policy610 : ChainedPolicy<610, Policy610, Policy600> { enum { PRIMARY_RADIX_BITS = 7, // 3.4B 32b keys/s, 1.83B 32b pairs/s (1080) SINGLE_TILE_RADIX_BITS = 6, SEGMENTED_RADIX_BITS = 6, // 3.3B 32b segmented keys/s (1080) }; // ScanPolicy typedef AgentScanPolicy <512, 23, BLOCK_LOAD_WARP_TRANSPOSE, LOAD_DEFAULT, BLOCK_STORE_WARP_TRANSPOSE, BLOCK_SCAN_RAKING_MEMOIZE> ScanPolicy; // Downsweep policies typedef AgentRadixSortDownsweepPolicy <384, CUB_MAX(1, 31 / SCALE_FACTOR_4B), BLOCK_LOAD_DIRECT, LOAD_DEFAULT, RADIX_RANK_MATCH, BLOCK_SCAN_RAKING_MEMOIZE, PRIMARY_RADIX_BITS> DownsweepPolicy; typedef AgentRadixSortDownsweepPolicy <256, CUB_MAX(1, 35 / SCALE_FACTOR_4B), BLOCK_LOAD_TRANSPOSE, LOAD_DEFAULT, RADIX_RANK_MEMOIZE, BLOCK_SCAN_RAKING_MEMOIZE, PRIMARY_RADIX_BITS - 1> AltDownsweepPolicy; // Upsweep policies typedef AgentRadixSortUpsweepPolicy <128, CUB_MAX(1, 16 / SCALE_FACTOR_4B), LOAD_LDG, PRIMARY_RADIX_BITS> UpsweepPolicy; typedef AgentRadixSortUpsweepPolicy <128, CUB_MAX(1, 16 / SCALE_FACTOR_4B), LOAD_LDG, PRIMARY_RADIX_BITS - 1> AltUpsweepPolicy; // Single-tile policy typedef AgentRadixSortDownsweepPolicy <256, CUB_MAX(1, 19 / SCALE_FACTOR_4B), BLOCK_LOAD_DIRECT, LOAD_LDG, RADIX_RANK_MEMOIZE, BLOCK_SCAN_WARP_SCANS, SINGLE_TILE_RADIX_BITS> SingleTilePolicy; // Segmented policies typedef AgentRadixSortDownsweepPolicy <192, CUB_MAX(1, 39 / SCALE_FACTOR_4B), BLOCK_LOAD_TRANSPOSE, LOAD_DEFAULT, RADIX_RANK_MEMOIZE, BLOCK_SCAN_WARP_SCANS, SEGMENTED_RADIX_BITS> SegmentedPolicy; typedef AgentRadixSortDownsweepPolicy <384, CUB_MAX(1, 11 / SCALE_FACTOR_4B), BLOCK_LOAD_TRANSPOSE, LOAD_DEFAULT, RADIX_RANK_MEMOIZE, BLOCK_SCAN_WARP_SCANS, SEGMENTED_RADIX_BITS - 1> AltSegmentedPolicy; }; /// SM62 (Tegra, less RF) struct Policy620 : ChainedPolicy<620, Policy620, Policy610> { enum { PRIMARY_RADIX_BITS = 5, ALT_RADIX_BITS = PRIMARY_RADIX_BITS - 1, }; // ScanPolicy typedef AgentScanPolicy <512, 23, BLOCK_LOAD_WARP_TRANSPOSE, LOAD_DEFAULT, BLOCK_STORE_WARP_TRANSPOSE, BLOCK_SCAN_RAKING_MEMOIZE> ScanPolicy; // Downsweep policies typedef AgentRadixSortDownsweepPolicy <256, CUB_MAX(1, 16 / SCALE_FACTOR_4B), BLOCK_LOAD_DIRECT, LOAD_DEFAULT, RADIX_RANK_MEMOIZE, BLOCK_SCAN_RAKING_MEMOIZE, PRIMARY_RADIX_BITS> DownsweepPolicy; typedef AgentRadixSortDownsweepPolicy <256, CUB_MAX(1, 16 / SCALE_FACTOR_4B), BLOCK_LOAD_DIRECT, LOAD_DEFAULT, RADIX_RANK_MEMOIZE, BLOCK_SCAN_RAKING_MEMOIZE, ALT_RADIX_BITS> AltDownsweepPolicy; // Upsweep policies typedef DownsweepPolicy UpsweepPolicy; typedef AltDownsweepPolicy AltUpsweepPolicy; // Single-tile policy typedef AgentRadixSortDownsweepPolicy <256, CUB_MAX(1, 19 / SCALE_FACTOR_4B), BLOCK_LOAD_DIRECT, LOAD_LDG, RADIX_RANK_MEMOIZE, BLOCK_SCAN_WARP_SCANS, PRIMARY_RADIX_BITS> SingleTilePolicy; // Segmented policies typedef DownsweepPolicy SegmentedPolicy; typedef AltDownsweepPolicy AltSegmentedPolicy; }; /// SM70 (GV100) struct Policy700 : ChainedPolicy<700, Policy700, Policy620> { enum { PRIMARY_RADIX_BITS = 6, // 7.62B 32b keys/s (GV100) SINGLE_TILE_RADIX_BITS = 6, SEGMENTED_RADIX_BITS = 6, // 8.7B 32b segmented keys/s (GV100) }; // ScanPolicy typedef AgentScanPolicy <512, 23, BLOCK_LOAD_WARP_TRANSPOSE, LOAD_DEFAULT, BLOCK_STORE_WARP_TRANSPOSE, BLOCK_SCAN_RAKING_MEMOIZE> ScanPolicy; // Downsweep policies typedef AgentRadixSortDownsweepPolicy <256, CUB_MAX(1, 47 / SCALE_FACTOR_4B), BLOCK_LOAD_TRANSPOSE, LOAD_DEFAULT, RADIX_RANK_MEMOIZE, BLOCK_SCAN_WARP_SCANS, PRIMARY_RADIX_BITS> DownsweepPolicy; typedef AgentRadixSortDownsweepPolicy <384, CUB_MAX(1, 29 / SCALE_FACTOR_4B), BLOCK_LOAD_TRANSPOSE, LOAD_DEFAULT, RADIX_RANK_MEMOIZE, BLOCK_SCAN_WARP_SCANS, PRIMARY_RADIX_BITS - 1> AltDownsweepPolicy; // Upsweep policies typedef AgentRadixSortDownsweepPolicy <128, CUB_MAX(1, 47 / SCALE_FACTOR_4B), BLOCK_LOAD_TRANSPOSE, LOAD_DEFAULT, RADIX_RANK_MATCH, BLOCK_SCAN_WARP_SCANS, PRIMARY_RADIX_BITS> UpsweepPolicy; typedef AgentRadixSortDownsweepPolicy <128, CUB_MAX(1, 29 / SCALE_FACTOR_4B), BLOCK_LOAD_TRANSPOSE, LOAD_DEFAULT, RADIX_RANK_MATCH, BLOCK_SCAN_WARP_SCANS, PRIMARY_RADIX_BITS - 1> AltUpsweepPolicy; // Single-tile policy typedef AgentRadixSortDownsweepPolicy <256, CUB_MAX(1, 19 / SCALE_FACTOR_4B), BLOCK_LOAD_DIRECT, LOAD_LDG, RADIX_RANK_MEMOIZE, BLOCK_SCAN_WARP_SCANS, SINGLE_TILE_RADIX_BITS> SingleTilePolicy; // Segmented policies typedef AgentRadixSortDownsweepPolicy <192, CUB_MAX(1, 39 / SCALE_FACTOR_4B), BLOCK_LOAD_TRANSPOSE, LOAD_DEFAULT, RADIX_RANK_MEMOIZE, BLOCK_SCAN_WARP_SCANS, SEGMENTED_RADIX_BITS> SegmentedPolicy; typedef AgentRadixSortDownsweepPolicy <384, CUB_MAX(1, 11 / SCALE_FACTOR_4B), BLOCK_LOAD_TRANSPOSE, LOAD_DEFAULT, RADIX_RANK_MEMOIZE, BLOCK_SCAN_WARP_SCANS, SEGMENTED_RADIX_BITS - 1> AltSegmentedPolicy; }; /// MaxPolicy typedef Policy700 MaxPolicy; }; /****************************************************************************** * Single-problem dispatch ******************************************************************************/ /** * Utility class for dispatching the appropriately-tuned kernels for device-wide radix sort */ template < bool IS_DESCENDING, ///< Whether or not the sorted-order is high-to-low typename KeyT, ///< Key type typename ValueT, ///< Value type typename OffsetT> ///< Signed integer type for global offsets struct DispatchRadixSort : DeviceRadixSortPolicy { //------------------------------------------------------------------------------ // Constants //------------------------------------------------------------------------------ enum { // Whether this is a keys-only (or key-value) sort KEYS_ONLY = (Equals::VALUE), }; //------------------------------------------------------------------------------ // Problem state //------------------------------------------------------------------------------ void *d_temp_storage; ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t &temp_storage_bytes; ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation DoubleBuffer &d_keys; ///< [in,out] Double-buffer whose current buffer contains the unsorted input keys and, upon return, is updated to point to the sorted output keys DoubleBuffer &d_values; ///< [in,out] Double-buffer whose current buffer contains the unsorted input values and, upon return, is updated to point to the sorted output values OffsetT num_items; ///< [in] Number of items to sort int begin_bit; ///< [in] The beginning (least-significant) bit index needed for key comparison int end_bit; ///< [in] The past-the-end (most-significant) bit index needed for key comparison cudaStream_t stream; ///< [in] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous; ///< [in] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is \p false. int ptx_version; ///< [in] PTX version bool is_overwrite_okay; ///< [in] Whether is okay to overwrite source buffers //------------------------------------------------------------------------------ // Constructor //------------------------------------------------------------------------------ /// Constructor CUB_RUNTIME_FUNCTION __forceinline__ DispatchRadixSort( void* d_temp_storage, size_t &temp_storage_bytes, DoubleBuffer &d_keys, DoubleBuffer &d_values, OffsetT num_items, int begin_bit, int end_bit, bool is_overwrite_okay, cudaStream_t stream, bool debug_synchronous, int ptx_version) : d_temp_storage(d_temp_storage), temp_storage_bytes(temp_storage_bytes), d_keys(d_keys), d_values(d_values), num_items(num_items), begin_bit(begin_bit), end_bit(end_bit), stream(stream), debug_synchronous(debug_synchronous), ptx_version(ptx_version), is_overwrite_okay(is_overwrite_okay) {} //------------------------------------------------------------------------------ // Small-problem (single tile) invocation //------------------------------------------------------------------------------ /// Invoke a single block to sort in-core template < typename ActivePolicyT, ///< Umbrella policy active for the target device typename SingleTileKernelT> ///< Function type of cub::DeviceRadixSortSingleTileKernel CUB_RUNTIME_FUNCTION __forceinline__ cudaError_t InvokeSingleTile( SingleTileKernelT single_tile_kernel) ///< [in] Kernel function pointer to parameterization of cub::DeviceRadixSortSingleTileKernel { #ifndef CUB_RUNTIME_ENABLED (void)single_tile_kernel; // Kernel launch not supported from this device return CubDebug(cudaErrorNotSupported ); #else cudaError error = cudaSuccess; do { // Return if the caller is simply requesting the size of the storage allocation if (d_temp_storage == NULL) { temp_storage_bytes = 1; break; } // Return if empty problem if (num_items == 0) break; // Log single_tile_kernel configuration if (debug_synchronous) _CubLog("Invoking single_tile_kernel<<<%d, %d, 0, %lld>>>(), %d items per thread, %d SM occupancy, current bit %d, bit_grain %d\n", 1, ActivePolicyT::SingleTilePolicy::BLOCK_THREADS, (long long) stream, ActivePolicyT::SingleTilePolicy::ITEMS_PER_THREAD, 1, begin_bit, ActivePolicyT::SingleTilePolicy::RADIX_BITS); // Invoke upsweep_kernel with same grid size as downsweep_kernel single_tile_kernel<<<1, ActivePolicyT::SingleTilePolicy::BLOCK_THREADS, 0, stream>>>( d_keys.Current(), d_keys.Alternate(), d_values.Current(), d_values.Alternate(), num_items, begin_bit, end_bit); // Check for failure to launch if (CubDebug(error = cudaPeekAtLastError())) break; // Sync the stream if specified to flush runtime errors if (debug_synchronous && (CubDebug(error = SyncStream(stream)))) break; // Update selector d_keys.selector ^= 1; d_values.selector ^= 1; } while (0); return error; #endif // CUB_RUNTIME_ENABLED } //------------------------------------------------------------------------------ // Normal problem size invocation //------------------------------------------------------------------------------ /** * Invoke a three-kernel sorting pass at the current bit. */ template CUB_RUNTIME_FUNCTION __forceinline__ cudaError_t InvokePass( const KeyT *d_keys_in, KeyT *d_keys_out, const ValueT *d_values_in, ValueT *d_values_out, OffsetT *d_spine, int spine_length, int ¤t_bit, PassConfigT &pass_config) { cudaError error = cudaSuccess; do { int pass_bits = CUB_MIN(pass_config.radix_bits, (end_bit - current_bit)); // Log upsweep_kernel configuration if (debug_synchronous) _CubLog("Invoking upsweep_kernel<<<%d, %d, 0, %lld>>>(), %d items per thread, %d SM occupancy, current bit %d, bit_grain %d\n", pass_config.even_share.grid_size, pass_config.upsweep_config.block_threads, (long long) stream, pass_config.upsweep_config.items_per_thread, pass_config.upsweep_config.sm_occupancy, current_bit, pass_bits); // Invoke upsweep_kernel with same grid size as downsweep_kernel pass_config.upsweep_kernel<<>>( d_keys_in, d_spine, num_items, current_bit, pass_bits, pass_config.even_share); // Check for failure to launch if (CubDebug(error = cudaPeekAtLastError())) break; // Sync the stream if specified to flush runtime errors if (debug_synchronous && (CubDebug(error = SyncStream(stream)))) break; // Log scan_kernel configuration if (debug_synchronous) _CubLog("Invoking scan_kernel<<<%d, %d, 0, %lld>>>(), %d items per thread\n", 1, pass_config.scan_config.block_threads, (long long) stream, pass_config.scan_config.items_per_thread); // Invoke scan_kernel pass_config.scan_kernel<<<1, pass_config.scan_config.block_threads, 0, stream>>>( d_spine, spine_length); // Check for failure to launch if (CubDebug(error = cudaPeekAtLastError())) break; // Sync the stream if specified to flush runtime errors if (debug_synchronous && (CubDebug(error = SyncStream(stream)))) break; // Log downsweep_kernel configuration if (debug_synchronous) _CubLog("Invoking downsweep_kernel<<<%d, %d, 0, %lld>>>(), %d items per thread, %d SM occupancy\n", pass_config.even_share.grid_size, pass_config.downsweep_config.block_threads, (long long) stream, pass_config.downsweep_config.items_per_thread, pass_config.downsweep_config.sm_occupancy); // Invoke downsweep_kernel pass_config.downsweep_kernel<<>>( d_keys_in, d_keys_out, d_values_in, d_values_out, d_spine, num_items, current_bit, pass_bits, pass_config.even_share); // Check for failure to launch if (CubDebug(error = cudaPeekAtLastError())) break; // Sync the stream if specified to flush runtime errors if (debug_synchronous && (CubDebug(error = SyncStream(stream)))) break; // Update current bit current_bit += pass_bits; } while (0); return error; } /// Pass configuration structure template < typename UpsweepKernelT, typename ScanKernelT, typename DownsweepKernelT> struct PassConfig { UpsweepKernelT upsweep_kernel; KernelConfig upsweep_config; ScanKernelT scan_kernel; KernelConfig scan_config; DownsweepKernelT downsweep_kernel; KernelConfig downsweep_config; int radix_bits; int radix_digits; int max_downsweep_grid_size; GridEvenShare even_share; /// Initialize pass configuration template < typename UpsweepPolicyT, typename ScanPolicyT, typename DownsweepPolicyT> CUB_RUNTIME_FUNCTION __forceinline__ cudaError_t InitPassConfig( UpsweepKernelT upsweep_kernel, ScanKernelT scan_kernel, DownsweepKernelT downsweep_kernel, int ptx_version, int sm_count, int num_items) { cudaError error = cudaSuccess; do { this->upsweep_kernel = upsweep_kernel; this->scan_kernel = scan_kernel; this->downsweep_kernel = downsweep_kernel; radix_bits = DownsweepPolicyT::RADIX_BITS; radix_digits = 1 << radix_bits; if (CubDebug(error = upsweep_config.Init(upsweep_kernel))) break; if (CubDebug(error = scan_config.Init(scan_kernel))) break; if (CubDebug(error = downsweep_config.Init(downsweep_kernel))) break; max_downsweep_grid_size = (downsweep_config.sm_occupancy * sm_count) * CUB_SUBSCRIPTION_FACTOR(ptx_version); even_share.DispatchInit( num_items, max_downsweep_grid_size, CUB_MAX(downsweep_config.tile_size, upsweep_config.tile_size)); } while (0); return error; } }; /// Invocation (run multiple digit passes) template < typename ActivePolicyT, ///< Umbrella policy active for the target device typename UpsweepKernelT, ///< Function type of cub::DeviceRadixSortUpsweepKernel typename ScanKernelT, ///< Function type of cub::SpineScanKernel typename DownsweepKernelT> ///< Function type of cub::DeviceRadixSortDownsweepKernel CUB_RUNTIME_FUNCTION __forceinline__ cudaError_t InvokePasses( UpsweepKernelT upsweep_kernel, ///< [in] Kernel function pointer to parameterization of cub::DeviceRadixSortUpsweepKernel UpsweepKernelT alt_upsweep_kernel, ///< [in] Alternate kernel function pointer to parameterization of cub::DeviceRadixSortUpsweepKernel ScanKernelT scan_kernel, ///< [in] Kernel function pointer to parameterization of cub::SpineScanKernel DownsweepKernelT downsweep_kernel, ///< [in] Kernel function pointer to parameterization of cub::DeviceRadixSortDownsweepKernel DownsweepKernelT alt_downsweep_kernel) ///< [in] Alternate kernel function pointer to parameterization of cub::DeviceRadixSortDownsweepKernel { #ifndef CUB_RUNTIME_ENABLED (void)upsweep_kernel; (void)alt_upsweep_kernel; (void)scan_kernel; (void)downsweep_kernel; (void)alt_downsweep_kernel; // Kernel launch not supported from this device return CubDebug(cudaErrorNotSupported ); #else cudaError error = cudaSuccess; do { // Get device ordinal int device_ordinal; if (CubDebug(error = cudaGetDevice(&device_ordinal))) break; // Get SM count int sm_count; if (CubDebug(error = cudaDeviceGetAttribute (&sm_count, cudaDevAttrMultiProcessorCount, device_ordinal))) break; // Init regular and alternate-digit kernel configurations PassConfig pass_config, alt_pass_config; if ((error = pass_config.template InitPassConfig< typename ActivePolicyT::UpsweepPolicy, typename ActivePolicyT::ScanPolicy, typename ActivePolicyT::DownsweepPolicy>( upsweep_kernel, scan_kernel, downsweep_kernel, ptx_version, sm_count, num_items))) break; if ((error = alt_pass_config.template InitPassConfig< typename ActivePolicyT::AltUpsweepPolicy, typename ActivePolicyT::ScanPolicy, typename ActivePolicyT::AltDownsweepPolicy>( alt_upsweep_kernel, scan_kernel, alt_downsweep_kernel, ptx_version, sm_count, num_items))) break; // Get maximum spine length int max_grid_size = CUB_MAX(pass_config.max_downsweep_grid_size, alt_pass_config.max_downsweep_grid_size); int spine_length = (max_grid_size * pass_config.radix_digits) + pass_config.scan_config.tile_size; // Temporary storage allocation requirements void* allocations[3]; size_t allocation_sizes[3] = { spine_length * sizeof(OffsetT), // bytes needed for privatized block digit histograms (is_overwrite_okay) ? 0 : num_items * sizeof(KeyT), // bytes needed for 3rd keys buffer (is_overwrite_okay || (KEYS_ONLY)) ? 0 : num_items * sizeof(ValueT), // bytes needed for 3rd values buffer }; // Alias the temporary allocations from the single storage blob (or compute the necessary size of the blob) if (CubDebug(error = AliasTemporaries(d_temp_storage, temp_storage_bytes, allocations, allocation_sizes))) break; // Return if the caller is simply requesting the size of the storage allocation if (d_temp_storage == NULL) return cudaSuccess; // Pass planning. Run passes of the alternate digit-size configuration until we have an even multiple of our preferred digit size int num_bits = end_bit - begin_bit; int num_passes = (num_bits + pass_config.radix_bits - 1) / pass_config.radix_bits; bool is_num_passes_odd = num_passes & 1; int max_alt_passes = (num_passes * pass_config.radix_bits) - num_bits; int alt_end_bit = CUB_MIN(end_bit, begin_bit + (max_alt_passes * alt_pass_config.radix_bits)); // Alias the temporary storage allocations OffsetT *d_spine = static_cast(allocations[0]); DoubleBuffer d_keys_remaining_passes( (is_overwrite_okay || is_num_passes_odd) ? d_keys.Alternate() : static_cast(allocations[1]), (is_overwrite_okay) ? d_keys.Current() : (is_num_passes_odd) ? static_cast(allocations[1]) : d_keys.Alternate()); DoubleBuffer d_values_remaining_passes( (is_overwrite_okay || is_num_passes_odd) ? d_values.Alternate() : static_cast(allocations[2]), (is_overwrite_okay) ? d_values.Current() : (is_num_passes_odd) ? static_cast(allocations[2]) : d_values.Alternate()); // Run first pass, consuming from the input's current buffers int current_bit = begin_bit; if (CubDebug(error = InvokePass( d_keys.Current(), d_keys_remaining_passes.Current(), d_values.Current(), d_values_remaining_passes.Current(), d_spine, spine_length, current_bit, (current_bit < alt_end_bit) ? alt_pass_config : pass_config))) break; // Run remaining passes while (current_bit < end_bit) { if (CubDebug(error = InvokePass( d_keys_remaining_passes.d_buffers[d_keys_remaining_passes.selector], d_keys_remaining_passes.d_buffers[d_keys_remaining_passes.selector ^ 1], d_values_remaining_passes.d_buffers[d_keys_remaining_passes.selector], d_values_remaining_passes.d_buffers[d_keys_remaining_passes.selector ^ 1], d_spine, spine_length, current_bit, (current_bit < alt_end_bit) ? alt_pass_config : pass_config))) break;; // Invert selectors d_keys_remaining_passes.selector ^= 1; d_values_remaining_passes.selector ^= 1; } // Update selector if (!is_overwrite_okay) { num_passes = 1; // Sorted data always ends up in the other vector } d_keys.selector = (d_keys.selector + num_passes) & 1; d_values.selector = (d_values.selector + num_passes) & 1; } while (0); return error; #endif // CUB_RUNTIME_ENABLED } //------------------------------------------------------------------------------ // Chained policy invocation //------------------------------------------------------------------------------ /// Invocation template CUB_RUNTIME_FUNCTION __forceinline__ cudaError_t Invoke() { typedef typename DispatchRadixSort::MaxPolicy MaxPolicyT; typedef typename ActivePolicyT::SingleTilePolicy SingleTilePolicyT; // Force kernel code-generation in all compiler passes if (num_items <= (SingleTilePolicyT::BLOCK_THREADS * SingleTilePolicyT::ITEMS_PER_THREAD)) { // Small, single tile size return InvokeSingleTile( DeviceRadixSortSingleTileKernel); } else { // Regular size return InvokePasses( DeviceRadixSortUpsweepKernel< MaxPolicyT, false, IS_DESCENDING, KeyT, OffsetT>, DeviceRadixSortUpsweepKernel< MaxPolicyT, true, IS_DESCENDING, KeyT, OffsetT>, RadixSortScanBinsKernel< MaxPolicyT, OffsetT>, DeviceRadixSortDownsweepKernel< MaxPolicyT, false, IS_DESCENDING, KeyT, ValueT, OffsetT>, DeviceRadixSortDownsweepKernel< MaxPolicyT, true, IS_DESCENDING, KeyT, ValueT, OffsetT>); } } //------------------------------------------------------------------------------ // Dispatch entrypoints //------------------------------------------------------------------------------ /** * Internal dispatch routine */ CUB_RUNTIME_FUNCTION __forceinline__ static cudaError_t Dispatch( void* d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t &temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation DoubleBuffer &d_keys, ///< [in,out] Double-buffer whose current buffer contains the unsorted input keys and, upon return, is updated to point to the sorted output keys DoubleBuffer &d_values, ///< [in,out] Double-buffer whose current buffer contains the unsorted input values and, upon return, is updated to point to the sorted output values OffsetT num_items, ///< [in] Number of items to sort int begin_bit, ///< [in] The beginning (least-significant) bit index needed for key comparison int end_bit, ///< [in] The past-the-end (most-significant) bit index needed for key comparison bool is_overwrite_okay, ///< [in] Whether is okay to overwrite source buffers cudaStream_t stream, ///< [in] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous) ///< [in] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is \p false. { typedef typename DispatchRadixSort::MaxPolicy MaxPolicyT; cudaError_t error; do { // Get PTX version int ptx_version; if (CubDebug(error = PtxVersion(ptx_version))) break; // Create dispatch functor DispatchRadixSort dispatch( d_temp_storage, temp_storage_bytes, d_keys, d_values, num_items, begin_bit, end_bit, is_overwrite_okay, stream, debug_synchronous, ptx_version); // Dispatch to chained policy if (CubDebug(error = MaxPolicyT::Invoke(ptx_version, dispatch))) break; } while (0); return error; } }; /****************************************************************************** * Segmented dispatch ******************************************************************************/ /** * Utility class for dispatching the appropriately-tuned kernels for segmented device-wide radix sort */ template < bool IS_DESCENDING, ///< Whether or not the sorted-order is high-to-low typename KeyT, ///< Key type typename ValueT, ///< Value type typename OffsetIteratorT, ///< Random-access input iterator type for reading segment offsets \iterator typename OffsetT> ///< Signed integer type for global offsets struct DispatchSegmentedRadixSort : DeviceRadixSortPolicy { //------------------------------------------------------------------------------ // Constants //------------------------------------------------------------------------------ enum { // Whether this is a keys-only (or key-value) sort KEYS_ONLY = (Equals::VALUE), }; //------------------------------------------------------------------------------ // Parameter members //------------------------------------------------------------------------------ void *d_temp_storage; ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t &temp_storage_bytes; ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation DoubleBuffer &d_keys; ///< [in,out] Double-buffer whose current buffer contains the unsorted input keys and, upon return, is updated to point to the sorted output keys DoubleBuffer &d_values; ///< [in,out] Double-buffer whose current buffer contains the unsorted input values and, upon return, is updated to point to the sorted output values OffsetT num_items; ///< [in] Number of items to sort OffsetT num_segments; ///< [in] The number of segments that comprise the sorting data OffsetIteratorT d_begin_offsets; ///< [in] Pointer to the sequence of beginning offsets of length \p num_segments, such that d_begin_offsets[i] is the first element of the ith data segment in d_keys_* and d_values_* OffsetIteratorT d_end_offsets; ///< [in] Pointer to the sequence of ending offsets of length \p num_segments, such that d_end_offsets[i]-1 is the last element of the ith data segment in d_keys_* and d_values_*. If d_end_offsets[i]-1 <= d_begin_offsets[i], the ith is considered empty. int begin_bit; ///< [in] The beginning (least-significant) bit index needed for key comparison int end_bit; ///< [in] The past-the-end (most-significant) bit index needed for key comparison cudaStream_t stream; ///< [in] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous; ///< [in] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is \p false. int ptx_version; ///< [in] PTX version bool is_overwrite_okay; ///< [in] Whether is okay to overwrite source buffers //------------------------------------------------------------------------------ // Constructors //------------------------------------------------------------------------------ /// Constructor CUB_RUNTIME_FUNCTION __forceinline__ DispatchSegmentedRadixSort( void* d_temp_storage, size_t &temp_storage_bytes, DoubleBuffer &d_keys, DoubleBuffer &d_values, OffsetT num_items, OffsetT num_segments, OffsetIteratorT d_begin_offsets, OffsetIteratorT d_end_offsets, int begin_bit, int end_bit, bool is_overwrite_okay, cudaStream_t stream, bool debug_synchronous, int ptx_version) : d_temp_storage(d_temp_storage), temp_storage_bytes(temp_storage_bytes), d_keys(d_keys), d_values(d_values), num_items(num_items), num_segments(num_segments), d_begin_offsets(d_begin_offsets), d_end_offsets(d_end_offsets), begin_bit(begin_bit), end_bit(end_bit), is_overwrite_okay(is_overwrite_okay), stream(stream), debug_synchronous(debug_synchronous), ptx_version(ptx_version) {} //------------------------------------------------------------------------------ // Multi-segment invocation //------------------------------------------------------------------------------ /// Invoke a three-kernel sorting pass at the current bit. template CUB_RUNTIME_FUNCTION __forceinline__ cudaError_t InvokePass( const KeyT *d_keys_in, KeyT *d_keys_out, const ValueT *d_values_in, ValueT *d_values_out, int ¤t_bit, PassConfigT &pass_config) { cudaError error = cudaSuccess; do { int pass_bits = CUB_MIN(pass_config.radix_bits, (end_bit - current_bit)); // Log kernel configuration if (debug_synchronous) _CubLog("Invoking segmented_kernels<<<%d, %d, 0, %lld>>>(), %d items per thread, %d SM occupancy, current bit %d, bit_grain %d\n", num_segments, pass_config.segmented_config.block_threads, (long long) stream, pass_config.segmented_config.items_per_thread, pass_config.segmented_config.sm_occupancy, current_bit, pass_bits); pass_config.segmented_kernel<<>>( d_keys_in, d_keys_out, d_values_in, d_values_out, d_begin_offsets, d_end_offsets, num_segments, current_bit, pass_bits); // Check for failure to launch if (CubDebug(error = cudaPeekAtLastError())) break; // Sync the stream if specified to flush runtime errors if (debug_synchronous && (CubDebug(error = SyncStream(stream)))) break; // Update current bit current_bit += pass_bits; } while (0); return error; } /// PassConfig data structure template struct PassConfig { SegmentedKernelT segmented_kernel; KernelConfig segmented_config; int radix_bits; int radix_digits; /// Initialize pass configuration template CUB_RUNTIME_FUNCTION __forceinline__ cudaError_t InitPassConfig(SegmentedKernelT segmented_kernel) { this->segmented_kernel = segmented_kernel; this->radix_bits = SegmentedPolicyT::RADIX_BITS; this->radix_digits = 1 << radix_bits; return CubDebug(segmented_config.Init(segmented_kernel)); } }; /// Invocation (run multiple digit passes) template < typename ActivePolicyT, ///< Umbrella policy active for the target device typename SegmentedKernelT> ///< Function type of cub::DeviceSegmentedRadixSortKernel CUB_RUNTIME_FUNCTION __forceinline__ cudaError_t InvokePasses( SegmentedKernelT segmented_kernel, ///< [in] Kernel function pointer to parameterization of cub::DeviceSegmentedRadixSortKernel SegmentedKernelT alt_segmented_kernel) ///< [in] Alternate kernel function pointer to parameterization of cub::DeviceSegmentedRadixSortKernel { #ifndef CUB_RUNTIME_ENABLED (void)segmented_kernel; (void)alt_segmented_kernel; // Kernel launch not supported from this device return CubDebug(cudaErrorNotSupported ); #else cudaError error = cudaSuccess; do { // Init regular and alternate kernel configurations PassConfig pass_config, alt_pass_config; if ((error = pass_config.template InitPassConfig(segmented_kernel))) break; if ((error = alt_pass_config.template InitPassConfig(alt_segmented_kernel))) break; // Temporary storage allocation requirements void* allocations[2]; size_t allocation_sizes[2] = { (is_overwrite_okay) ? 0 : num_items * sizeof(KeyT), // bytes needed for 3rd keys buffer (is_overwrite_okay || (KEYS_ONLY)) ? 0 : num_items * sizeof(ValueT), // bytes needed for 3rd values buffer }; // Alias the temporary allocations from the single storage blob (or compute the necessary size of the blob) if (CubDebug(error = AliasTemporaries(d_temp_storage, temp_storage_bytes, allocations, allocation_sizes))) break; // Return if the caller is simply requesting the size of the storage allocation if (d_temp_storage == NULL) { if (temp_storage_bytes == 0) temp_storage_bytes = 1; return cudaSuccess; } // Pass planning. Run passes of the alternate digit-size configuration until we have an even multiple of our preferred digit size int radix_bits = ActivePolicyT::SegmentedPolicy::RADIX_BITS; int alt_radix_bits = ActivePolicyT::AltSegmentedPolicy::RADIX_BITS; int num_bits = end_bit - begin_bit; int num_passes = (num_bits + radix_bits - 1) / radix_bits; bool is_num_passes_odd = num_passes & 1; int max_alt_passes = (num_passes * radix_bits) - num_bits; int alt_end_bit = CUB_MIN(end_bit, begin_bit + (max_alt_passes * alt_radix_bits)); DoubleBuffer d_keys_remaining_passes( (is_overwrite_okay || is_num_passes_odd) ? d_keys.Alternate() : static_cast(allocations[0]), (is_overwrite_okay) ? d_keys.Current() : (is_num_passes_odd) ? static_cast(allocations[0]) : d_keys.Alternate()); DoubleBuffer d_values_remaining_passes( (is_overwrite_okay || is_num_passes_odd) ? d_values.Alternate() : static_cast(allocations[1]), (is_overwrite_okay) ? d_values.Current() : (is_num_passes_odd) ? static_cast(allocations[1]) : d_values.Alternate()); // Run first pass, consuming from the input's current buffers int current_bit = begin_bit; if (CubDebug(error = InvokePass( d_keys.Current(), d_keys_remaining_passes.Current(), d_values.Current(), d_values_remaining_passes.Current(), current_bit, (current_bit < alt_end_bit) ? alt_pass_config : pass_config))) break; // Run remaining passes while (current_bit < end_bit) { if (CubDebug(error = InvokePass( d_keys_remaining_passes.d_buffers[d_keys_remaining_passes.selector], d_keys_remaining_passes.d_buffers[d_keys_remaining_passes.selector ^ 1], d_values_remaining_passes.d_buffers[d_keys_remaining_passes.selector], d_values_remaining_passes.d_buffers[d_keys_remaining_passes.selector ^ 1], current_bit, (current_bit < alt_end_bit) ? alt_pass_config : pass_config))) break; // Invert selectors and update current bit d_keys_remaining_passes.selector ^= 1; d_values_remaining_passes.selector ^= 1; } // Update selector if (!is_overwrite_okay) { num_passes = 1; // Sorted data always ends up in the other vector } d_keys.selector = (d_keys.selector + num_passes) & 1; d_values.selector = (d_values.selector + num_passes) & 1; } while (0); return error; #endif // CUB_RUNTIME_ENABLED } //------------------------------------------------------------------------------ // Chained policy invocation //------------------------------------------------------------------------------ /// Invocation template CUB_RUNTIME_FUNCTION __forceinline__ cudaError_t Invoke() { typedef typename DispatchSegmentedRadixSort::MaxPolicy MaxPolicyT; // Force kernel code-generation in all compiler passes return InvokePasses( DeviceSegmentedRadixSortKernel, DeviceSegmentedRadixSortKernel); } //------------------------------------------------------------------------------ // Dispatch entrypoints //------------------------------------------------------------------------------ /// Internal dispatch routine CUB_RUNTIME_FUNCTION __forceinline__ static cudaError_t Dispatch( void* d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t &temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation DoubleBuffer &d_keys, ///< [in,out] Double-buffer whose current buffer contains the unsorted input keys and, upon return, is updated to point to the sorted output keys DoubleBuffer &d_values, ///< [in,out] Double-buffer whose current buffer contains the unsorted input values and, upon return, is updated to point to the sorted output values int num_items, ///< [in] Number of items to sort int num_segments, ///< [in] The number of segments that comprise the sorting data OffsetIteratorT d_begin_offsets, ///< [in] Pointer to the sequence of beginning offsets of length \p num_segments, such that d_begin_offsets[i] is the first element of the ith data segment in d_keys_* and d_values_* OffsetIteratorT d_end_offsets, ///< [in] Pointer to the sequence of ending offsets of length \p num_segments, such that d_end_offsets[i]-1 is the last element of the ith data segment in d_keys_* and d_values_*. If d_end_offsets[i]-1 <= d_begin_offsets[i], the ith is considered empty. int begin_bit, ///< [in] The beginning (least-significant) bit index needed for key comparison int end_bit, ///< [in] The past-the-end (most-significant) bit index needed for key comparison bool is_overwrite_okay, ///< [in] Whether is okay to overwrite source buffers cudaStream_t stream, ///< [in] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous) ///< [in] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is \p false. { typedef typename DispatchSegmentedRadixSort::MaxPolicy MaxPolicyT; cudaError_t error; do { // Get PTX version int ptx_version; if (CubDebug(error = PtxVersion(ptx_version))) break; // Create dispatch functor DispatchSegmentedRadixSort dispatch( d_temp_storage, temp_storage_bytes, d_keys, d_values, num_items, num_segments, d_begin_offsets, d_end_offsets, begin_bit, end_bit, is_overwrite_okay, stream, debug_synchronous, ptx_version); // Dispatch to chained policy if (CubDebug(error = MaxPolicyT::Invoke(ptx_version, dispatch))) break; } while (0); return error; } }; } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/acc/cuda/cub/device/dispatch/dispatch_reduce.cuh000066400000000000000000001232651411340063500247030ustar00rootroot00000000000000 /****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * cub::DeviceReduce provides device-wide, parallel operations for computing a reduction across a sequence of data items residing within device-accessible memory. */ #pragma once #include #include #include "../../agent/agent_reduce.cuh" #include "../../iterator/arg_index_input_iterator.cuh" #include "../../thread/thread_operators.cuh" #include "../../grid/grid_even_share.cuh" #include "../../iterator/arg_index_input_iterator.cuh" #include "../../util_debug.cuh" #include "../../util_device.cuh" #include "../../util_namespace.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /****************************************************************************** * Kernel entry points *****************************************************************************/ /** * Reduce region kernel entry point (multi-block). Computes privatized reductions, one per thread block. */ template < typename ChainedPolicyT, ///< Chained tuning policy typename InputIteratorT, ///< Random-access input iterator type for reading input items \iterator typename OutputIteratorT, ///< Output iterator type for recording the reduced aggregate \iterator typename OffsetT, ///< Signed integer type for global offsets typename ReductionOpT> ///< Binary reduction functor type having member T operator()(const T &a, const T &b) __launch_bounds__ (int(ChainedPolicyT::ActivePolicy::ReducePolicy::BLOCK_THREADS)) __global__ void DeviceReduceKernel( InputIteratorT d_in, ///< [in] Pointer to the input sequence of data items OutputIteratorT d_out, ///< [out] Pointer to the output aggregate OffsetT num_items, ///< [in] Total number of input data items GridEvenShare even_share, ///< [in] Even-share descriptor for mapping an equal number of tiles onto each thread block ReductionOpT reduction_op) ///< [in] Binary reduction functor { // The output value type typedef typename If<(Equals::value_type, void>::VALUE), // OutputT = (if output iterator's value type is void) ? typename std::iterator_traits::value_type, // ... then the input iterator's value type, typename std::iterator_traits::value_type>::Type OutputT; // ... else the output iterator's value type // Thread block type for reducing input tiles typedef AgentReduce< typename ChainedPolicyT::ActivePolicy::ReducePolicy, InputIteratorT, OutputIteratorT, OffsetT, ReductionOpT> AgentReduceT; // Shared memory storage __shared__ typename AgentReduceT::TempStorage temp_storage; // Consume input tiles OutputT block_aggregate = AgentReduceT(temp_storage, d_in, reduction_op).ConsumeTiles(even_share); // Output result if (threadIdx.x == 0) d_out[blockIdx.x] = block_aggregate; } /** * Reduce a single tile kernel entry point (single-block). Can be used to aggregate privatized thread block reductions from a previous multi-block reduction pass. */ template < typename ChainedPolicyT, ///< Chained tuning policy typename InputIteratorT, ///< Random-access input iterator type for reading input items \iterator typename OutputIteratorT, ///< Output iterator type for recording the reduced aggregate \iterator typename OffsetT, ///< Signed integer type for global offsets typename ReductionOpT, ///< Binary reduction functor type having member T operator()(const T &a, const T &b) typename OuputT> ///< Data element type that is convertible to the \p value type of \p OutputIteratorT __launch_bounds__ (int(ChainedPolicyT::ActivePolicy::SingleTilePolicy::BLOCK_THREADS), 1) __global__ void DeviceReduceSingleTileKernel( InputIteratorT d_in, ///< [in] Pointer to the input sequence of data items OutputIteratorT d_out, ///< [out] Pointer to the output aggregate OffsetT num_items, ///< [in] Total number of input data items ReductionOpT reduction_op, ///< [in] Binary reduction functor OuputT init) ///< [in] The initial value of the reduction { // Thread block type for reducing input tiles typedef AgentReduce< typename ChainedPolicyT::ActivePolicy::SingleTilePolicy, InputIteratorT, OutputIteratorT, OffsetT, ReductionOpT> AgentReduceT; // Shared memory storage __shared__ typename AgentReduceT::TempStorage temp_storage; // Check if empty problem if (num_items == 0) { if (threadIdx.x == 0) *d_out = init; return; } // Consume input tiles OuputT block_aggregate = AgentReduceT(temp_storage, d_in, reduction_op).ConsumeRange( OffsetT(0), num_items); // Output result if (threadIdx.x == 0) *d_out = reduction_op(init, block_aggregate); } /// Normalize input iterator to segment offset template __device__ __forceinline__ void NormalizeReductionOutput( T &/*val*/, OffsetT /*base_offset*/, IteratorT /*itr*/) {} /// Normalize input iterator to segment offset (specialized for arg-index) template __device__ __forceinline__ void NormalizeReductionOutput( KeyValuePairT &val, OffsetT base_offset, ArgIndexInputIterator /*itr*/) { val.key -= base_offset; } /** * Segmented reduction (one block per segment) */ template < typename ChainedPolicyT, ///< Chained tuning policy typename InputIteratorT, ///< Random-access input iterator type for reading input items \iterator typename OutputIteratorT, ///< Output iterator type for recording the reduced aggregate \iterator typename OffsetIteratorT, ///< Random-access input iterator type for reading segment offsets \iterator typename OffsetT, ///< Signed integer type for global offsets typename ReductionOpT, ///< Binary reduction functor type having member T operator()(const T &a, const T &b) typename OutputT> ///< Data element type that is convertible to the \p value type of \p OutputIteratorT __launch_bounds__ (int(ChainedPolicyT::ActivePolicy::ReducePolicy::BLOCK_THREADS)) __global__ void DeviceSegmentedReduceKernel( InputIteratorT d_in, ///< [in] Pointer to the input sequence of data items OutputIteratorT d_out, ///< [out] Pointer to the output aggregate OffsetIteratorT d_begin_offsets, ///< [in] Pointer to the sequence of beginning offsets of length \p num_segments, such that d_begin_offsets[i] is the first element of the ith data segment in d_keys_* and d_values_* OffsetIteratorT d_end_offsets, ///< [in] Pointer to the sequence of ending offsets of length \p num_segments, such that d_end_offsets[i]-1 is the last element of the ith data segment in d_keys_* and d_values_*. If d_end_offsets[i]-1 <= d_begin_offsets[i], the ith is considered empty. int /*num_segments*/, ///< [in] The number of segments that comprise the sorting data ReductionOpT reduction_op, ///< [in] Binary reduction functor OutputT init) ///< [in] The initial value of the reduction { // Thread block type for reducing input tiles typedef AgentReduce< typename ChainedPolicyT::ActivePolicy::ReducePolicy, InputIteratorT, OutputIteratorT, OffsetT, ReductionOpT> AgentReduceT; // Shared memory storage __shared__ typename AgentReduceT::TempStorage temp_storage; OffsetT segment_begin = d_begin_offsets[blockIdx.x]; OffsetT segment_end = d_end_offsets[blockIdx.x]; // Check if empty problem if (segment_begin == segment_end) { if (threadIdx.x == 0) d_out[blockIdx.x] = init; return; } // Consume input tiles OutputT block_aggregate = AgentReduceT(temp_storage, d_in, reduction_op).ConsumeRange( segment_begin, segment_end); // Normalize as needed NormalizeReductionOutput(block_aggregate, segment_begin, d_in); if (threadIdx.x == 0) d_out[blockIdx.x] = reduction_op(init, block_aggregate);; } /****************************************************************************** * Policy ******************************************************************************/ template < typename OuputT, ///< Data type typename OffsetT, ///< Signed integer type for global offsets typename ReductionOpT> ///< Binary reduction functor type having member T operator()(const T &a, const T &b) struct DeviceReducePolicy { //------------------------------------------------------------------------------ // Architecture-specific tuning policies //------------------------------------------------------------------------------ /// SM13 struct Policy130 : ChainedPolicy<130, Policy130, Policy130> { // ReducePolicy typedef AgentReducePolicy< CUB_NOMINAL_CONFIG(128, 8, OuputT), ///< Threads per block, items per thread 2, ///< Number of items per vectorized load BLOCK_REDUCE_RAKING, ///< Cooperative block-wide reduction algorithm to use LOAD_DEFAULT> ///< Cache load modifier ReducePolicy; // SingleTilePolicy typedef ReducePolicy SingleTilePolicy; // SegmentedReducePolicy typedef ReducePolicy SegmentedReducePolicy; }; /// SM20 struct Policy200 : ChainedPolicy<200, Policy200, Policy130> { // ReducePolicy (GTX 580: 178.9 GB/s @ 48M 4B items, 158.1 GB/s @ 192M 1B items) typedef AgentReducePolicy< CUB_NOMINAL_CONFIG(128, 8, OuputT), ///< Threads per block, items per thread 4, ///< Number of items per vectorized load BLOCK_REDUCE_RAKING, ///< Cooperative block-wide reduction algorithm to use LOAD_DEFAULT> ///< Cache load modifier ReducePolicy; // SingleTilePolicy typedef ReducePolicy SingleTilePolicy; // SegmentedReducePolicy typedef ReducePolicy SegmentedReducePolicy; }; /// SM30 struct Policy300 : ChainedPolicy<300, Policy300, Policy200> { // ReducePolicy (GTX670: 154.0 @ 48M 4B items) typedef AgentReducePolicy< CUB_NOMINAL_CONFIG(256, 20, OuputT), ///< Threads per block, items per thread 2, ///< Number of items per vectorized load BLOCK_REDUCE_WARP_REDUCTIONS, ///< Cooperative block-wide reduction algorithm to use LOAD_DEFAULT> ///< Cache load modifier ReducePolicy; // SingleTilePolicy typedef ReducePolicy SingleTilePolicy; // SegmentedReducePolicy typedef ReducePolicy SegmentedReducePolicy; }; /// SM35 struct Policy350 : ChainedPolicy<350, Policy350, Policy300> { // ReducePolicy (GTX Titan: 255.1 GB/s @ 48M 4B items; 228.7 GB/s @ 192M 1B items) typedef AgentReducePolicy< CUB_NOMINAL_CONFIG(256, 20, OuputT), ///< Threads per block, items per thread 4, ///< Number of items per vectorized load BLOCK_REDUCE_WARP_REDUCTIONS, ///< Cooperative block-wide reduction algorithm to use LOAD_LDG> ///< Cache load modifier ReducePolicy; // SingleTilePolicy typedef ReducePolicy SingleTilePolicy; // SegmentedReducePolicy typedef ReducePolicy SegmentedReducePolicy; }; /// SM60 struct Policy600 : ChainedPolicy<600, Policy600, Policy350> { // ReducePolicy (P100: 591 GB/s @ 64M 4B items; 583 GB/s @ 256M 1B items) typedef AgentReducePolicy< CUB_NOMINAL_CONFIG(256, 16, OuputT), ///< Threads per block, items per thread 4, ///< Number of items per vectorized load BLOCK_REDUCE_WARP_REDUCTIONS, ///< Cooperative block-wide reduction algorithm to use LOAD_LDG> ///< Cache load modifier ReducePolicy; // SingleTilePolicy typedef ReducePolicy SingleTilePolicy; // SegmentedReducePolicy typedef ReducePolicy SegmentedReducePolicy; }; /// MaxPolicy typedef Policy600 MaxPolicy; }; /****************************************************************************** * Single-problem dispatch ******************************************************************************/ /** * Utility class for dispatching the appropriately-tuned kernels for device-wide reduction */ template < typename InputIteratorT, ///< Random-access input iterator type for reading input items \iterator typename OutputIteratorT, ///< Output iterator type for recording the reduced aggregate \iterator typename OffsetT, ///< Signed integer type for global offsets typename ReductionOpT> ///< Binary reduction functor type having member T operator()(const T &a, const T &b) struct DispatchReduce : DeviceReducePolicy< typename If<(Equals::value_type, void>::VALUE), // OutputT = (if output iterator's value type is void) ? typename std::iterator_traits::value_type, // ... then the input iterator's value type, typename std::iterator_traits::value_type>::Type, // ... else the output iterator's value type OffsetT, ReductionOpT> { //------------------------------------------------------------------------------ // Constants //------------------------------------------------------------------------------ // Data type of output iterator typedef typename If<(Equals::value_type, void>::VALUE), // OutputT = (if output iterator's value type is void) ? typename std::iterator_traits::value_type, // ... then the input iterator's value type, typename std::iterator_traits::value_type>::Type OutputT; // ... else the output iterator's value type //------------------------------------------------------------------------------ // Problem state //------------------------------------------------------------------------------ void *d_temp_storage; ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t &temp_storage_bytes; ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation InputIteratorT d_in; ///< [in] Pointer to the input sequence of data items OutputIteratorT d_out; ///< [out] Pointer to the output aggregate OffsetT num_items; ///< [in] Total number of input items (i.e., length of \p d_in) ReductionOpT reduction_op; ///< [in] Binary reduction functor OutputT init; ///< [in] The initial value of the reduction cudaStream_t stream; ///< [in] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous; ///< [in] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is \p false. int ptx_version; ///< [in] PTX version //------------------------------------------------------------------------------ // Constructor //------------------------------------------------------------------------------ /// Constructor CUB_RUNTIME_FUNCTION __forceinline__ DispatchReduce( void* d_temp_storage, size_t &temp_storage_bytes, InputIteratorT d_in, OutputIteratorT d_out, OffsetT num_items, ReductionOpT reduction_op, OutputT init, cudaStream_t stream, bool debug_synchronous, int ptx_version) : d_temp_storage(d_temp_storage), temp_storage_bytes(temp_storage_bytes), d_in(d_in), d_out(d_out), num_items(num_items), reduction_op(reduction_op), init(init), stream(stream), debug_synchronous(debug_synchronous), ptx_version(ptx_version) {} //------------------------------------------------------------------------------ // Small-problem (single tile) invocation //------------------------------------------------------------------------------ /// Invoke a single block block to reduce in-core template < typename ActivePolicyT, ///< Umbrella policy active for the target device typename SingleTileKernelT> ///< Function type of cub::DeviceReduceSingleTileKernel CUB_RUNTIME_FUNCTION __forceinline__ cudaError_t InvokeSingleTile( SingleTileKernelT single_tile_kernel) ///< [in] Kernel function pointer to parameterization of cub::DeviceReduceSingleTileKernel { #ifndef CUB_RUNTIME_ENABLED (void)single_tile_kernel; // Kernel launch not supported from this device return CubDebug(cudaErrorNotSupported ); #else cudaError error = cudaSuccess; do { // Return if the caller is simply requesting the size of the storage allocation if (d_temp_storage == NULL) { temp_storage_bytes = 1; break; } // Log single_reduce_sweep_kernel configuration if (debug_synchronous) _CubLog("Invoking DeviceReduceSingleTileKernel<<<1, %d, 0, %lld>>>(), %d items per thread\n", ActivePolicyT::SingleTilePolicy::BLOCK_THREADS, (long long) stream, ActivePolicyT::SingleTilePolicy::ITEMS_PER_THREAD); // Invoke single_reduce_sweep_kernel single_tile_kernel<<<1, ActivePolicyT::SingleTilePolicy::BLOCK_THREADS, 0, stream>>>( d_in, d_out, num_items, reduction_op, init); // Check for failure to launch if (CubDebug(error = cudaPeekAtLastError())) break; // Sync the stream if specified to flush runtime errors if (debug_synchronous && (CubDebug(error = SyncStream(stream)))) break; } while (0); return error; #endif // CUB_RUNTIME_ENABLED } //------------------------------------------------------------------------------ // Normal problem size invocation (two-pass) //------------------------------------------------------------------------------ /// Invoke two-passes to reduce template < typename ActivePolicyT, ///< Umbrella policy active for the target device typename ReduceKernelT, ///< Function type of cub::DeviceReduceKernel typename SingleTileKernelT> ///< Function type of cub::DeviceReduceSingleTileKernel CUB_RUNTIME_FUNCTION __forceinline__ cudaError_t InvokePasses( ReduceKernelT reduce_kernel, ///< [in] Kernel function pointer to parameterization of cub::DeviceReduceKernel SingleTileKernelT single_tile_kernel) ///< [in] Kernel function pointer to parameterization of cub::DeviceReduceSingleTileKernel { #ifndef CUB_RUNTIME_ENABLED (void) reduce_kernel; (void) single_tile_kernel; // Kernel launch not supported from this device return CubDebug(cudaErrorNotSupported ); #else cudaError error = cudaSuccess; do { // Get device ordinal int device_ordinal; if (CubDebug(error = cudaGetDevice(&device_ordinal))) break; // Get SM count int sm_count; if (CubDebug(error = cudaDeviceGetAttribute (&sm_count, cudaDevAttrMultiProcessorCount, device_ordinal))) break; // Init regular kernel configuration KernelConfig reduce_config; if (CubDebug(error = reduce_config.Init(reduce_kernel))) break; int reduce_device_occupancy = reduce_config.sm_occupancy * sm_count; // Even-share work distribution int max_blocks = reduce_device_occupancy * CUB_SUBSCRIPTION_FACTOR(ptx_version); GridEvenShare even_share; even_share.DispatchInit(num_items, max_blocks, reduce_config.tile_size); // Temporary storage allocation requirements void* allocations[1]; size_t allocation_sizes[1] = { max_blocks * sizeof(OutputT) // bytes needed for privatized block reductions }; // Alias the temporary allocations from the single storage blob (or compute the necessary size of the blob) if (CubDebug(error = AliasTemporaries(d_temp_storage, temp_storage_bytes, allocations, allocation_sizes))) break; if (d_temp_storage == NULL) { // Return if the caller is simply requesting the size of the storage allocation return cudaSuccess; } // Alias the allocation for the privatized per-block reductions OutputT *d_block_reductions = (OutputT*) allocations[0]; // Get grid size for device_reduce_sweep_kernel int reduce_grid_size = even_share.grid_size; // Log device_reduce_sweep_kernel configuration if (debug_synchronous) _CubLog("Invoking DeviceReduceKernel<<<%d, %d, 0, %lld>>>(), %d items per thread, %d SM occupancy\n", reduce_grid_size, ActivePolicyT::ReducePolicy::BLOCK_THREADS, (long long) stream, ActivePolicyT::ReducePolicy::ITEMS_PER_THREAD, reduce_config.sm_occupancy); // Invoke DeviceReduceKernel reduce_kernel<<>>( d_in, d_block_reductions, num_items, even_share, reduction_op); // Check for failure to launch if (CubDebug(error = cudaPeekAtLastError())) break; // Sync the stream if specified to flush runtime errors if (debug_synchronous && (CubDebug(error = SyncStream(stream)))) break; // Log single_reduce_sweep_kernel configuration if (debug_synchronous) _CubLog("Invoking DeviceReduceSingleTileKernel<<<1, %d, 0, %lld>>>(), %d items per thread\n", ActivePolicyT::SingleTilePolicy::BLOCK_THREADS, (long long) stream, ActivePolicyT::SingleTilePolicy::ITEMS_PER_THREAD); // Invoke DeviceReduceSingleTileKernel single_tile_kernel<<<1, ActivePolicyT::SingleTilePolicy::BLOCK_THREADS, 0, stream>>>( d_block_reductions, d_out, reduce_grid_size, reduction_op, init); // Check for failure to launch if (CubDebug(error = cudaPeekAtLastError())) break; // Sync the stream if specified to flush runtime errors if (debug_synchronous && (CubDebug(error = SyncStream(stream)))) break; } while (0); return error; #endif // CUB_RUNTIME_ENABLED } //------------------------------------------------------------------------------ // Chained policy invocation //------------------------------------------------------------------------------ /// Invocation template CUB_RUNTIME_FUNCTION __forceinline__ cudaError_t Invoke() { typedef typename ActivePolicyT::SingleTilePolicy SingleTilePolicyT; typedef typename DispatchReduce::MaxPolicy MaxPolicyT; // Force kernel code-generation in all compiler passes if (num_items <= (SingleTilePolicyT::BLOCK_THREADS * SingleTilePolicyT::ITEMS_PER_THREAD)) { // Small, single tile size return InvokeSingleTile( DeviceReduceSingleTileKernel); } else { // Regular size return InvokePasses( DeviceReduceKernel, DeviceReduceSingleTileKernel); } } //------------------------------------------------------------------------------ // Dispatch entrypoints //------------------------------------------------------------------------------ /** * Internal dispatch routine for computing a device-wide reduction */ CUB_RUNTIME_FUNCTION __forceinline__ static cudaError_t Dispatch( void *d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t &temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation InputIteratorT d_in, ///< [in] Pointer to the input sequence of data items OutputIteratorT d_out, ///< [out] Pointer to the output aggregate OffsetT num_items, ///< [in] Total number of input items (i.e., length of \p d_in) ReductionOpT reduction_op, ///< [in] Binary reduction functor OutputT init, ///< [in] The initial value of the reduction cudaStream_t stream, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is \p false. { typedef typename DispatchReduce::MaxPolicy MaxPolicyT; cudaError error = cudaSuccess; do { // Get PTX version int ptx_version; if (CubDebug(error = PtxVersion(ptx_version))) break; // Create dispatch functor DispatchReduce dispatch( d_temp_storage, temp_storage_bytes, d_in, d_out, num_items, reduction_op, init, stream, debug_synchronous, ptx_version); // Dispatch to chained policy if (CubDebug(error = MaxPolicyT::Invoke(ptx_version, dispatch))) break; } while (0); return error; } }; /****************************************************************************** * Segmented dispatch ******************************************************************************/ /** * Utility class for dispatching the appropriately-tuned kernels for device-wide reduction */ template < typename InputIteratorT, ///< Random-access input iterator type for reading input items \iterator typename OutputIteratorT, ///< Output iterator type for recording the reduced aggregate \iterator typename OffsetIteratorT, ///< Random-access input iterator type for reading segment offsets \iterator typename OffsetT, ///< Signed integer type for global offsets typename ReductionOpT> ///< Binary reduction functor type having member T operator()(const T &a, const T &b) struct DispatchSegmentedReduce : DeviceReducePolicy< typename std::iterator_traits::value_type, OffsetT, ReductionOpT> { //------------------------------------------------------------------------------ // Constants //------------------------------------------------------------------------------ /// The output value type typedef typename If<(Equals::value_type, void>::VALUE), // OutputT = (if output iterator's value type is void) ? typename std::iterator_traits::value_type, // ... then the input iterator's value type, typename std::iterator_traits::value_type>::Type OutputT; // ... else the output iterator's value type //------------------------------------------------------------------------------ // Problem state //------------------------------------------------------------------------------ void *d_temp_storage; ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t &temp_storage_bytes; ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation InputIteratorT d_in; ///< [in] Pointer to the input sequence of data items OutputIteratorT d_out; ///< [out] Pointer to the output aggregate OffsetT num_segments; ///< [in] The number of segments that comprise the sorting data OffsetIteratorT d_begin_offsets; ///< [in] Pointer to the sequence of beginning offsets of length \p num_segments, such that d_begin_offsets[i] is the first element of the ith data segment in d_keys_* and d_values_* OffsetIteratorT d_end_offsets; ///< [in] Pointer to the sequence of ending offsets of length \p num_segments, such that d_end_offsets[i]-1 is the last element of the ith data segment in d_keys_* and d_values_*. If d_end_offsets[i]-1 <= d_begin_offsets[i], the ith is considered empty. ReductionOpT reduction_op; ///< [in] Binary reduction functor OutputT init; ///< [in] The initial value of the reduction cudaStream_t stream; ///< [in] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous; ///< [in] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is \p false. int ptx_version; ///< [in] PTX version //------------------------------------------------------------------------------ // Constructor //------------------------------------------------------------------------------ /// Constructor CUB_RUNTIME_FUNCTION __forceinline__ DispatchSegmentedReduce( void* d_temp_storage, size_t &temp_storage_bytes, InputIteratorT d_in, OutputIteratorT d_out, OffsetT num_segments, OffsetIteratorT d_begin_offsets, OffsetIteratorT d_end_offsets, ReductionOpT reduction_op, OutputT init, cudaStream_t stream, bool debug_synchronous, int ptx_version) : d_temp_storage(d_temp_storage), temp_storage_bytes(temp_storage_bytes), d_in(d_in), d_out(d_out), num_segments(num_segments), d_begin_offsets(d_begin_offsets), d_end_offsets(d_end_offsets), reduction_op(reduction_op), init(init), stream(stream), debug_synchronous(debug_synchronous), ptx_version(ptx_version) {} //------------------------------------------------------------------------------ // Chained policy invocation //------------------------------------------------------------------------------ /// Invocation template < typename ActivePolicyT, ///< Umbrella policy active for the target device typename DeviceSegmentedReduceKernelT> ///< Function type of cub::DeviceSegmentedReduceKernel CUB_RUNTIME_FUNCTION __forceinline__ cudaError_t InvokePasses( DeviceSegmentedReduceKernelT segmented_reduce_kernel) ///< [in] Kernel function pointer to parameterization of cub::DeviceSegmentedReduceKernel { #ifndef CUB_RUNTIME_ENABLED (void)segmented_reduce_kernel; // Kernel launch not supported from this device return CubDebug(cudaErrorNotSupported ); #else cudaError error = cudaSuccess; do { // Return if the caller is simply requesting the size of the storage allocation if (d_temp_storage == NULL) { temp_storage_bytes = 1; return cudaSuccess; } // Init kernel configuration KernelConfig segmented_reduce_config; if (CubDebug(error = segmented_reduce_config.Init(segmented_reduce_kernel))) break; // Log device_reduce_sweep_kernel configuration if (debug_synchronous) _CubLog("Invoking SegmentedDeviceReduceKernel<<<%d, %d, 0, %lld>>>(), %d items per thread, %d SM occupancy\n", num_segments, ActivePolicyT::SegmentedReducePolicy::BLOCK_THREADS, (long long) stream, ActivePolicyT::SegmentedReducePolicy::ITEMS_PER_THREAD, segmented_reduce_config.sm_occupancy); // Invoke DeviceReduceKernel segmented_reduce_kernel<<>>( d_in, d_out, d_begin_offsets, d_end_offsets, num_segments, reduction_op, init); // Check for failure to launch if (CubDebug(error = cudaPeekAtLastError())) break; // Sync the stream if specified to flush runtime errors if (debug_synchronous && (CubDebug(error = SyncStream(stream)))) break; } while (0); return error; #endif // CUB_RUNTIME_ENABLED } /// Invocation template CUB_RUNTIME_FUNCTION __forceinline__ cudaError_t Invoke() { typedef typename DispatchSegmentedReduce::MaxPolicy MaxPolicyT; // Force kernel code-generation in all compiler passes return InvokePasses( DeviceSegmentedReduceKernel); } //------------------------------------------------------------------------------ // Dispatch entrypoints //------------------------------------------------------------------------------ /** * Internal dispatch routine for computing a device-wide reduction */ CUB_RUNTIME_FUNCTION __forceinline__ static cudaError_t Dispatch( void *d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t &temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation InputIteratorT d_in, ///< [in] Pointer to the input sequence of data items OutputIteratorT d_out, ///< [out] Pointer to the output aggregate int num_segments, ///< [in] The number of segments that comprise the sorting data OffsetIteratorT d_begin_offsets, ///< [in] Pointer to the sequence of beginning offsets of length \p num_segments, such that d_begin_offsets[i] is the first element of the ith data segment in d_keys_* and d_values_* OffsetIteratorT d_end_offsets, ///< [in] Pointer to the sequence of ending offsets of length \p num_segments, such that d_end_offsets[i]-1 is the last element of the ith data segment in d_keys_* and d_values_*. If d_end_offsets[i]-1 <= d_begin_offsets[i], the ith is considered empty. ReductionOpT reduction_op, ///< [in] Binary reduction functor OutputT init, ///< [in] The initial value of the reduction cudaStream_t stream, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is \p false. { typedef typename DispatchSegmentedReduce::MaxPolicy MaxPolicyT; if (num_segments <= 0) return cudaSuccess; cudaError error = cudaSuccess; do { // Get PTX version int ptx_version; if (CubDebug(error = PtxVersion(ptx_version))) break; // Create dispatch functor DispatchSegmentedReduce dispatch( d_temp_storage, temp_storage_bytes, d_in, d_out, num_segments, d_begin_offsets, d_end_offsets, reduction_op, init, stream, debug_synchronous, ptx_version); // Dispatch to chained policy if (CubDebug(error = MaxPolicyT::Invoke(ptx_version, dispatch))) break; } while (0); return error; } }; } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/acc/cuda/cub/device/dispatch/dispatch_reduce_by_key.cuh000066400000000000000000000616061411340063500262450ustar00rootroot00000000000000 /****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * cub::DeviceReduceByKey provides device-wide, parallel operations for reducing segments of values residing within device-accessible memory. */ #pragma once #include #include #include "dispatch_scan.cuh" #include "../../agent/agent_reduce_by_key.cuh" #include "../../thread/thread_operators.cuh" #include "../../grid/grid_queue.cuh" #include "../../util_device.cuh" #include "../../util_namespace.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /****************************************************************************** * Kernel entry points *****************************************************************************/ /** * Multi-block reduce-by-key sweep kernel entry point */ template < typename AgentReduceByKeyPolicyT, ///< Parameterized AgentReduceByKeyPolicyT tuning policy type typename KeysInputIteratorT, ///< Random-access input iterator type for keys typename UniqueOutputIteratorT, ///< Random-access output iterator type for keys typename ValuesInputIteratorT, ///< Random-access input iterator type for values typename AggregatesOutputIteratorT, ///< Random-access output iterator type for values typename NumRunsOutputIteratorT, ///< Output iterator type for recording number of segments encountered typename ScanTileStateT, ///< Tile status interface type typename EqualityOpT, ///< KeyT equality operator type typename ReductionOpT, ///< ValueT reduction operator type typename OffsetT> ///< Signed integer type for global offsets __launch_bounds__ (int(AgentReduceByKeyPolicyT::BLOCK_THREADS)) __global__ void DeviceReduceByKeyKernel( KeysInputIteratorT d_keys_in, ///< Pointer to the input sequence of keys UniqueOutputIteratorT d_unique_out, ///< Pointer to the output sequence of unique keys (one key per run) ValuesInputIteratorT d_values_in, ///< Pointer to the input sequence of corresponding values AggregatesOutputIteratorT d_aggregates_out, ///< Pointer to the output sequence of value aggregates (one aggregate per run) NumRunsOutputIteratorT d_num_runs_out, ///< Pointer to total number of runs encountered (i.e., the length of d_unique_out) ScanTileStateT tile_state, ///< Tile status interface int start_tile, ///< The starting tile for the current grid EqualityOpT equality_op, ///< KeyT equality operator ReductionOpT reduction_op, ///< ValueT reduction operator OffsetT num_items) ///< Total number of items to select from { // Thread block type for reducing tiles of value segments typedef AgentReduceByKey< AgentReduceByKeyPolicyT, KeysInputIteratorT, UniqueOutputIteratorT, ValuesInputIteratorT, AggregatesOutputIteratorT, NumRunsOutputIteratorT, EqualityOpT, ReductionOpT, OffsetT> AgentReduceByKeyT; // Shared memory for AgentReduceByKey __shared__ typename AgentReduceByKeyT::TempStorage temp_storage; // Process tiles AgentReduceByKeyT(temp_storage, d_keys_in, d_unique_out, d_values_in, d_aggregates_out, d_num_runs_out, equality_op, reduction_op).ConsumeRange( num_items, tile_state, start_tile); } /****************************************************************************** * Dispatch ******************************************************************************/ /** * Utility class for dispatching the appropriately-tuned kernels for DeviceReduceByKey */ template < typename KeysInputIteratorT, ///< Random-access input iterator type for keys typename UniqueOutputIteratorT, ///< Random-access output iterator type for keys typename ValuesInputIteratorT, ///< Random-access input iterator type for values typename AggregatesOutputIteratorT, ///< Random-access output iterator type for values typename NumRunsOutputIteratorT, ///< Output iterator type for recording number of segments encountered typename EqualityOpT, ///< KeyT equality operator type typename ReductionOpT, ///< ValueT reduction operator type typename OffsetT> ///< Signed integer type for global offsets struct DispatchReduceByKey { //------------------------------------------------------------------------- // Types and constants //------------------------------------------------------------------------- // The input keys type typedef typename std::iterator_traits::value_type KeyInputT; // The output keys type typedef typename If<(Equals::value_type, void>::VALUE), // KeyOutputT = (if output iterator's value type is void) ? typename std::iterator_traits::value_type, // ... then the input iterator's value type, typename std::iterator_traits::value_type>::Type KeyOutputT; // ... else the output iterator's value type // The input values type typedef typename std::iterator_traits::value_type ValueInputT; // The output values type typedef typename If<(Equals::value_type, void>::VALUE), // ValueOutputT = (if output iterator's value type is void) ? typename std::iterator_traits::value_type, // ... then the input iterator's value type, typename std::iterator_traits::value_type>::Type ValueOutputT; // ... else the output iterator's value type enum { INIT_KERNEL_THREADS = 128, MAX_INPUT_BYTES = CUB_MAX(sizeof(KeyOutputT), sizeof(ValueOutputT)), COMBINED_INPUT_BYTES = sizeof(KeyOutputT) + sizeof(ValueOutputT), }; // Tile status descriptor interface type typedef ReduceByKeyScanTileState ScanTileStateT; //------------------------------------------------------------------------- // Tuning policies //------------------------------------------------------------------------- /// SM35 struct Policy350 { enum { NOMINAL_4B_ITEMS_PER_THREAD = 6, ITEMS_PER_THREAD = (MAX_INPUT_BYTES <= 8) ? 6 : CUB_MIN(NOMINAL_4B_ITEMS_PER_THREAD, CUB_MAX(1, ((NOMINAL_4B_ITEMS_PER_THREAD * 8) + COMBINED_INPUT_BYTES - 1) / COMBINED_INPUT_BYTES)), }; typedef AgentReduceByKeyPolicy< 128, ITEMS_PER_THREAD, BLOCK_LOAD_DIRECT, LOAD_LDG, BLOCK_SCAN_WARP_SCANS> ReduceByKeyPolicyT; }; /// SM30 struct Policy300 { enum { NOMINAL_4B_ITEMS_PER_THREAD = 6, ITEMS_PER_THREAD = CUB_MIN(NOMINAL_4B_ITEMS_PER_THREAD, CUB_MAX(1, ((NOMINAL_4B_ITEMS_PER_THREAD * 8) + COMBINED_INPUT_BYTES - 1) / COMBINED_INPUT_BYTES)), }; typedef AgentReduceByKeyPolicy< 128, ITEMS_PER_THREAD, BLOCK_LOAD_WARP_TRANSPOSE, LOAD_DEFAULT, BLOCK_SCAN_WARP_SCANS> ReduceByKeyPolicyT; }; /// SM20 struct Policy200 { enum { NOMINAL_4B_ITEMS_PER_THREAD = 11, ITEMS_PER_THREAD = CUB_MIN(NOMINAL_4B_ITEMS_PER_THREAD, CUB_MAX(1, ((NOMINAL_4B_ITEMS_PER_THREAD * 8) + COMBINED_INPUT_BYTES - 1) / COMBINED_INPUT_BYTES)), }; typedef AgentReduceByKeyPolicy< 128, ITEMS_PER_THREAD, BLOCK_LOAD_WARP_TRANSPOSE, LOAD_DEFAULT, BLOCK_SCAN_WARP_SCANS> ReduceByKeyPolicyT; }; /// SM13 struct Policy130 { enum { NOMINAL_4B_ITEMS_PER_THREAD = 7, ITEMS_PER_THREAD = CUB_MIN(NOMINAL_4B_ITEMS_PER_THREAD, CUB_MAX(1, ((NOMINAL_4B_ITEMS_PER_THREAD * 8) + COMBINED_INPUT_BYTES - 1) / COMBINED_INPUT_BYTES)), }; typedef AgentReduceByKeyPolicy< 128, ITEMS_PER_THREAD, BLOCK_LOAD_WARP_TRANSPOSE, LOAD_DEFAULT, BLOCK_SCAN_WARP_SCANS> ReduceByKeyPolicyT; }; /// SM11 struct Policy110 { enum { NOMINAL_4B_ITEMS_PER_THREAD = 5, ITEMS_PER_THREAD = CUB_MIN(NOMINAL_4B_ITEMS_PER_THREAD, CUB_MAX(1, (NOMINAL_4B_ITEMS_PER_THREAD * 8) / COMBINED_INPUT_BYTES)), }; typedef AgentReduceByKeyPolicy< 64, ITEMS_PER_THREAD, BLOCK_LOAD_WARP_TRANSPOSE, LOAD_DEFAULT, BLOCK_SCAN_RAKING> ReduceByKeyPolicyT; }; /****************************************************************************** * Tuning policies of current PTX compiler pass ******************************************************************************/ #if (CUB_PTX_ARCH >= 350) typedef Policy350 PtxPolicy; #elif (CUB_PTX_ARCH >= 300) typedef Policy300 PtxPolicy; #elif (CUB_PTX_ARCH >= 200) typedef Policy200 PtxPolicy; #elif (CUB_PTX_ARCH >= 130) typedef Policy130 PtxPolicy; #else typedef Policy110 PtxPolicy; #endif // "Opaque" policies (whose parameterizations aren't reflected in the type signature) struct PtxReduceByKeyPolicy : PtxPolicy::ReduceByKeyPolicyT {}; /****************************************************************************** * Utilities ******************************************************************************/ /** * Initialize kernel dispatch configurations with the policies corresponding to the PTX assembly we will use */ template CUB_RUNTIME_FUNCTION __forceinline__ static void InitConfigs( int ptx_version, KernelConfig &reduce_by_key_config) { #if (CUB_PTX_ARCH > 0) (void)ptx_version; // We're on the device, so initialize the kernel dispatch configurations with the current PTX policy reduce_by_key_config.template Init(); #else // We're on the host, so lookup and initialize the kernel dispatch configurations with the policies that match the device's PTX version if (ptx_version >= 350) { reduce_by_key_config.template Init(); } else if (ptx_version >= 300) { reduce_by_key_config.template Init(); } else if (ptx_version >= 200) { reduce_by_key_config.template Init(); } else if (ptx_version >= 130) { reduce_by_key_config.template Init(); } else { reduce_by_key_config.template Init(); } #endif } /** * Kernel kernel dispatch configuration. */ struct KernelConfig { int block_threads; int items_per_thread; int tile_items; template CUB_RUNTIME_FUNCTION __forceinline__ void Init() { block_threads = PolicyT::BLOCK_THREADS; items_per_thread = PolicyT::ITEMS_PER_THREAD; tile_items = block_threads * items_per_thread; } }; //--------------------------------------------------------------------- // Dispatch entrypoints //--------------------------------------------------------------------- /** * Internal dispatch routine for computing a device-wide reduce-by-key using the * specified kernel functions. */ template < typename ScanInitKernelT, ///< Function type of cub::DeviceScanInitKernel typename ReduceByKeyKernelT> ///< Function type of cub::DeviceReduceByKeyKernelT CUB_RUNTIME_FUNCTION __forceinline__ static cudaError_t Dispatch( void* d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t& temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation KeysInputIteratorT d_keys_in, ///< [in] Pointer to the input sequence of keys UniqueOutputIteratorT d_unique_out, ///< [out] Pointer to the output sequence of unique keys (one key per run) ValuesInputIteratorT d_values_in, ///< [in] Pointer to the input sequence of corresponding values AggregatesOutputIteratorT d_aggregates_out, ///< [out] Pointer to the output sequence of value aggregates (one aggregate per run) NumRunsOutputIteratorT d_num_runs_out, ///< [out] Pointer to total number of runs encountered (i.e., the length of d_unique_out) EqualityOpT equality_op, ///< [in] KeyT equality operator ReductionOpT reduction_op, ///< [in] ValueT reduction operator OffsetT num_items, ///< [in] Total number of items to select from cudaStream_t stream, ///< [in] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous, ///< [in] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is \p false. int /*ptx_version*/, ///< [in] PTX version of dispatch kernels ScanInitKernelT init_kernel, ///< [in] Kernel function pointer to parameterization of cub::DeviceScanInitKernel ReduceByKeyKernelT reduce_by_key_kernel, ///< [in] Kernel function pointer to parameterization of cub::DeviceReduceByKeyKernel KernelConfig reduce_by_key_config) ///< [in] Dispatch parameters that match the policy that \p reduce_by_key_kernel was compiled for { #ifndef CUB_RUNTIME_ENABLED (void)d_temp_storage; (void)temp_storage_bytes; (void)d_keys_in; (void)d_unique_out; (void)d_values_in; (void)d_aggregates_out; (void)d_num_runs_out; (void)equality_op; (void)reduction_op; (void)num_items; (void)stream; (void)debug_synchronous; (void)init_kernel; (void)reduce_by_key_kernel; (void)reduce_by_key_config; // Kernel launch not supported from this device return CubDebug(cudaErrorNotSupported); #else cudaError error = cudaSuccess; do { // Get device ordinal int device_ordinal; if (CubDebug(error = cudaGetDevice(&device_ordinal))) break; // Get SM count int sm_count; if (CubDebug(error = cudaDeviceGetAttribute (&sm_count, cudaDevAttrMultiProcessorCount, device_ordinal))) break; // Number of input tiles int tile_size = reduce_by_key_config.block_threads * reduce_by_key_config.items_per_thread; int num_tiles = (num_items + tile_size - 1) / tile_size; // Specify temporary storage allocation requirements size_t allocation_sizes[1]; if (CubDebug(error = ScanTileStateT::AllocationSize(num_tiles, allocation_sizes[0]))) break; // bytes needed for tile status descriptors // Compute allocation pointers into the single storage blob (or compute the necessary size of the blob) void* allocations[1]; if (CubDebug(error = AliasTemporaries(d_temp_storage, temp_storage_bytes, allocations, allocation_sizes))) break; if (d_temp_storage == NULL) { // Return if the caller is simply requesting the size of the storage allocation break; } // Construct the tile status interface ScanTileStateT tile_state; if (CubDebug(error = tile_state.Init(num_tiles, allocations[0], allocation_sizes[0]))) break; // Log init_kernel configuration int init_grid_size = CUB_MAX(1, (num_tiles + INIT_KERNEL_THREADS - 1) / INIT_KERNEL_THREADS); if (debug_synchronous) _CubLog("Invoking init_kernel<<<%d, %d, 0, %lld>>>()\n", init_grid_size, INIT_KERNEL_THREADS, (long long) stream); // Invoke init_kernel to initialize tile descriptors init_kernel<<>>( tile_state, num_tiles, d_num_runs_out); // Check for failure to launch if (CubDebug(error = cudaPeekAtLastError())) break; // Sync the stream if specified to flush runtime errors if (debug_synchronous && (CubDebug(error = SyncStream(stream)))) break; // Return if empty problem if (num_items == 0) break; // Get SM occupancy for reduce_by_key_kernel int reduce_by_key_sm_occupancy; if (CubDebug(error = MaxSmOccupancy( reduce_by_key_sm_occupancy, // out reduce_by_key_kernel, reduce_by_key_config.block_threads))) break; // Get max x-dimension of grid int max_dim_x; if (CubDebug(error = cudaDeviceGetAttribute(&max_dim_x, cudaDevAttrMaxGridDimX, device_ordinal))) break;; // Run grids in epochs (in case number of tiles exceeds max x-dimension int scan_grid_size = CUB_MIN(num_tiles, max_dim_x); for (int start_tile = 0; start_tile < num_tiles; start_tile += scan_grid_size) { // Log reduce_by_key_kernel configuration if (debug_synchronous) _CubLog("Invoking %d reduce_by_key_kernel<<<%d, %d, 0, %lld>>>(), %d items per thread, %d SM occupancy\n", start_tile, scan_grid_size, reduce_by_key_config.block_threads, (long long) stream, reduce_by_key_config.items_per_thread, reduce_by_key_sm_occupancy); // Invoke reduce_by_key_kernel reduce_by_key_kernel<<>>( d_keys_in, d_unique_out, d_values_in, d_aggregates_out, d_num_runs_out, tile_state, start_tile, equality_op, reduction_op, num_items); // Check for failure to launch if (CubDebug(error = cudaPeekAtLastError())) break; // Sync the stream if specified to flush runtime errors if (debug_synchronous && (CubDebug(error = SyncStream(stream)))) break; } } while (0); return error; #endif // CUB_RUNTIME_ENABLED } /** * Internal dispatch routine */ CUB_RUNTIME_FUNCTION __forceinline__ static cudaError_t Dispatch( void* d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t& temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation KeysInputIteratorT d_keys_in, ///< [in] Pointer to the input sequence of keys UniqueOutputIteratorT d_unique_out, ///< [out] Pointer to the output sequence of unique keys (one key per run) ValuesInputIteratorT d_values_in, ///< [in] Pointer to the input sequence of corresponding values AggregatesOutputIteratorT d_aggregates_out, ///< [out] Pointer to the output sequence of value aggregates (one aggregate per run) NumRunsOutputIteratorT d_num_runs_out, ///< [out] Pointer to total number of runs encountered (i.e., the length of d_unique_out) EqualityOpT equality_op, ///< [in] KeyT equality operator ReductionOpT reduction_op, ///< [in] ValueT reduction operator OffsetT num_items, ///< [in] Total number of items to select from cudaStream_t stream, ///< [in] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous) ///< [in] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is \p false. { cudaError error = cudaSuccess; do { // Get PTX version int ptx_version; #if (CUB_PTX_ARCH == 0) if (CubDebug(error = PtxVersion(ptx_version))) break; #else ptx_version = CUB_PTX_ARCH; #endif // Get kernel kernel dispatch configurations KernelConfig reduce_by_key_config; InitConfigs(ptx_version, reduce_by_key_config); // Dispatch if (CubDebug(error = Dispatch( d_temp_storage, temp_storage_bytes, d_keys_in, d_unique_out, d_values_in, d_aggregates_out, d_num_runs_out, equality_op, reduction_op, num_items, stream, debug_synchronous, ptx_version, DeviceCompactInitKernel, DeviceReduceByKeyKernel, reduce_by_key_config))) break; } while (0); return error; } }; } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/acc/cuda/cub/device/dispatch/dispatch_rle.cuh000066400000000000000000000560611411340063500242150ustar00rootroot00000000000000 /****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * cub::DeviceRle provides device-wide, parallel operations for run-length-encoding sequences of data items residing within device-accessible memory. */ #pragma once #include #include #include "dispatch_scan.cuh" #include "../../agent/agent_rle.cuh" #include "../../thread/thread_operators.cuh" #include "../../grid/grid_queue.cuh" #include "../../util_device.cuh" #include "../../util_namespace.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /****************************************************************************** * Kernel entry points *****************************************************************************/ /** * Select kernel entry point (multi-block) * * Performs functor-based selection if SelectOp functor type != NullType * Otherwise performs flag-based selection if FlagIterator's value type != NullType * Otherwise performs discontinuity selection (keep unique) */ template < typename AgentRlePolicyT, ///< Parameterized AgentRlePolicyT tuning policy type typename InputIteratorT, ///< Random-access input iterator type for reading input items \iterator typename OffsetsOutputIteratorT, ///< Random-access output iterator type for writing run-offset values \iterator typename LengthsOutputIteratorT, ///< Random-access output iterator type for writing run-length values \iterator typename NumRunsOutputIteratorT, ///< Output iterator type for recording the number of runs encountered \iterator typename ScanTileStateT, ///< Tile status interface type typename EqualityOpT, ///< T equality operator type typename OffsetT> ///< Signed integer type for global offsets __launch_bounds__ (int(AgentRlePolicyT::BLOCK_THREADS)) __global__ void DeviceRleSweepKernel( InputIteratorT d_in, ///< [in] Pointer to input sequence of data items OffsetsOutputIteratorT d_offsets_out, ///< [out] Pointer to output sequence of run-offsets LengthsOutputIteratorT d_lengths_out, ///< [out] Pointer to output sequence of run-lengths NumRunsOutputIteratorT d_num_runs_out, ///< [out] Pointer to total number of runs (i.e., length of \p d_offsets_out) ScanTileStateT tile_status, ///< [in] Tile status interface EqualityOpT equality_op, ///< [in] Equality operator for input items OffsetT num_items, ///< [in] Total number of input items (i.e., length of \p d_in) int num_tiles) ///< [in] Total number of tiles for the entire problem { // Thread block type for selecting data from input tiles typedef AgentRle< AgentRlePolicyT, InputIteratorT, OffsetsOutputIteratorT, LengthsOutputIteratorT, EqualityOpT, OffsetT> AgentRleT; // Shared memory for AgentRle __shared__ typename AgentRleT::TempStorage temp_storage; // Process tiles AgentRleT(temp_storage, d_in, d_offsets_out, d_lengths_out, equality_op, num_items).ConsumeRange( num_tiles, tile_status, d_num_runs_out); } /****************************************************************************** * Dispatch ******************************************************************************/ /** * Utility class for dispatching the appropriately-tuned kernels for DeviceRle */ template < typename InputIteratorT, ///< Random-access input iterator type for reading input items \iterator typename OffsetsOutputIteratorT, ///< Random-access output iterator type for writing run-offset values \iterator typename LengthsOutputIteratorT, ///< Random-access output iterator type for writing run-length values \iterator typename NumRunsOutputIteratorT, ///< Output iterator type for recording the number of runs encountered \iterator typename EqualityOpT, ///< T equality operator type typename OffsetT> ///< Signed integer type for global offsets struct DeviceRleDispatch { /****************************************************************************** * Types and constants ******************************************************************************/ // The input value type typedef typename std::iterator_traits::value_type T; // The lengths output value type typedef typename If<(Equals::value_type, void>::VALUE), // LengthT = (if output iterator's value type is void) ? OffsetT, // ... then the OffsetT type, typename std::iterator_traits::value_type>::Type LengthT; // ... else the output iterator's value type enum { INIT_KERNEL_THREADS = 128, }; // Tile status descriptor interface type typedef ReduceByKeyScanTileState ScanTileStateT; /****************************************************************************** * Tuning policies ******************************************************************************/ /// SM35 struct Policy350 { enum { NOMINAL_4B_ITEMS_PER_THREAD = 15, ITEMS_PER_THREAD = CUB_MIN(NOMINAL_4B_ITEMS_PER_THREAD, CUB_MAX(1, (NOMINAL_4B_ITEMS_PER_THREAD * 4 / sizeof(T)))), }; typedef AgentRlePolicy< 96, ITEMS_PER_THREAD, BLOCK_LOAD_DIRECT, LOAD_LDG, true, BLOCK_SCAN_WARP_SCANS> RleSweepPolicy; }; /// SM30 struct Policy300 { enum { NOMINAL_4B_ITEMS_PER_THREAD = 5, ITEMS_PER_THREAD = CUB_MIN(NOMINAL_4B_ITEMS_PER_THREAD, CUB_MAX(1, (NOMINAL_4B_ITEMS_PER_THREAD * 4 / sizeof(T)))), }; typedef AgentRlePolicy< 256, ITEMS_PER_THREAD, BLOCK_LOAD_WARP_TRANSPOSE, LOAD_DEFAULT, true, BLOCK_SCAN_RAKING_MEMOIZE> RleSweepPolicy; }; /// SM20 struct Policy200 { enum { NOMINAL_4B_ITEMS_PER_THREAD = 15, ITEMS_PER_THREAD = CUB_MIN(NOMINAL_4B_ITEMS_PER_THREAD, CUB_MAX(1, (NOMINAL_4B_ITEMS_PER_THREAD * 4 / sizeof(T)))), }; typedef AgentRlePolicy< 128, ITEMS_PER_THREAD, BLOCK_LOAD_WARP_TRANSPOSE, LOAD_DEFAULT, false, BLOCK_SCAN_WARP_SCANS> RleSweepPolicy; }; /// SM13 struct Policy130 { enum { NOMINAL_4B_ITEMS_PER_THREAD = 9, ITEMS_PER_THREAD = CUB_MIN(NOMINAL_4B_ITEMS_PER_THREAD, CUB_MAX(1, (NOMINAL_4B_ITEMS_PER_THREAD * 4 / sizeof(T)))), }; typedef AgentRlePolicy< 64, ITEMS_PER_THREAD, BLOCK_LOAD_WARP_TRANSPOSE, LOAD_DEFAULT, true, BLOCK_SCAN_RAKING_MEMOIZE> RleSweepPolicy; }; /// SM10 struct Policy100 { enum { NOMINAL_4B_ITEMS_PER_THREAD = 9, ITEMS_PER_THREAD = CUB_MIN(NOMINAL_4B_ITEMS_PER_THREAD, CUB_MAX(1, (NOMINAL_4B_ITEMS_PER_THREAD * 4 / sizeof(T)))), }; typedef AgentRlePolicy< 256, ITEMS_PER_THREAD, BLOCK_LOAD_WARP_TRANSPOSE, LOAD_DEFAULT, true, BLOCK_SCAN_RAKING_MEMOIZE> RleSweepPolicy; }; /****************************************************************************** * Tuning policies of current PTX compiler pass ******************************************************************************/ #if (CUB_PTX_ARCH >= 350) typedef Policy350 PtxPolicy; #elif (CUB_PTX_ARCH >= 300) typedef Policy300 PtxPolicy; #elif (CUB_PTX_ARCH >= 200) typedef Policy200 PtxPolicy; #elif (CUB_PTX_ARCH >= 130) typedef Policy130 PtxPolicy; #else typedef Policy100 PtxPolicy; #endif // "Opaque" policies (whose parameterizations aren't reflected in the type signature) struct PtxRleSweepPolicy : PtxPolicy::RleSweepPolicy {}; /****************************************************************************** * Utilities ******************************************************************************/ /** * Initialize kernel dispatch configurations with the policies corresponding to the PTX assembly we will use */ template CUB_RUNTIME_FUNCTION __forceinline__ static void InitConfigs( int ptx_version, KernelConfig& device_rle_config) { #if (CUB_PTX_ARCH > 0) // We're on the device, so initialize the kernel dispatch configurations with the current PTX policy device_rle_config.template Init(); #else // We're on the host, so lookup and initialize the kernel dispatch configurations with the policies that match the device's PTX version if (ptx_version >= 350) { device_rle_config.template Init(); } else if (ptx_version >= 300) { device_rle_config.template Init(); } else if (ptx_version >= 200) { device_rle_config.template Init(); } else if (ptx_version >= 130) { device_rle_config.template Init(); } else { device_rle_config.template Init(); } #endif } /** * Kernel kernel dispatch configuration. Mirrors the constants within AgentRlePolicyT. */ struct KernelConfig { int block_threads; int items_per_thread; BlockLoadAlgorithm load_policy; bool store_warp_time_slicing; BlockScanAlgorithm scan_algorithm; template CUB_RUNTIME_FUNCTION __forceinline__ void Init() { block_threads = AgentRlePolicyT::BLOCK_THREADS; items_per_thread = AgentRlePolicyT::ITEMS_PER_THREAD; load_policy = AgentRlePolicyT::LOAD_ALGORITHM; store_warp_time_slicing = AgentRlePolicyT::STORE_WARP_TIME_SLICING; scan_algorithm = AgentRlePolicyT::SCAN_ALGORITHM; } CUB_RUNTIME_FUNCTION __forceinline__ void Print() { printf("%d, %d, %d, %d, %d", block_threads, items_per_thread, load_policy, store_warp_time_slicing, scan_algorithm); } }; /****************************************************************************** * Dispatch entrypoints ******************************************************************************/ /** * Internal dispatch routine for computing a device-wide run-length-encode using the * specified kernel functions. */ template < typename DeviceScanInitKernelPtr, ///< Function type of cub::DeviceScanInitKernel typename DeviceRleSweepKernelPtr> ///< Function type of cub::DeviceRleSweepKernelPtr CUB_RUNTIME_FUNCTION __forceinline__ static cudaError_t Dispatch( void* d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t& temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation InputIteratorT d_in, ///< [in] Pointer to the input sequence of data items OffsetsOutputIteratorT d_offsets_out, ///< [out] Pointer to the output sequence of run-offsets LengthsOutputIteratorT d_lengths_out, ///< [out] Pointer to the output sequence of run-lengths NumRunsOutputIteratorT d_num_runs_out, ///< [out] Pointer to the total number of runs encountered (i.e., length of \p d_offsets_out) EqualityOpT equality_op, ///< [in] Equality operator for input items OffsetT num_items, ///< [in] Total number of input items (i.e., length of \p d_in) cudaStream_t stream, ///< [in] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous, ///< [in] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is \p false. int ptx_version, ///< [in] PTX version of dispatch kernels DeviceScanInitKernelPtr device_scan_init_kernel, ///< [in] Kernel function pointer to parameterization of cub::DeviceScanInitKernel DeviceRleSweepKernelPtr device_rle_sweep_kernel, ///< [in] Kernel function pointer to parameterization of cub::DeviceRleSweepKernel KernelConfig device_rle_config) ///< [in] Dispatch parameters that match the policy that \p device_rle_sweep_kernel was compiled for { #ifndef CUB_RUNTIME_ENABLED // Kernel launch not supported from this device return CubDebug(cudaErrorNotSupported); #else cudaError error = cudaSuccess; do { // Get device ordinal int device_ordinal; if (CubDebug(error = cudaGetDevice(&device_ordinal))) break; // Get SM count int sm_count; if (CubDebug(error = cudaDeviceGetAttribute (&sm_count, cudaDevAttrMultiProcessorCount, device_ordinal))) break; // Number of input tiles int tile_size = device_rle_config.block_threads * device_rle_config.items_per_thread; int num_tiles = (num_items + tile_size - 1) / tile_size; // Specify temporary storage allocation requirements size_t allocation_sizes[1]; if (CubDebug(error = ScanTileStateT::AllocationSize(num_tiles, allocation_sizes[0]))) break; // bytes needed for tile status descriptors // Compute allocation pointers into the single storage blob (or compute the necessary size of the blob) void* allocations[1]; if (CubDebug(error = AliasTemporaries(d_temp_storage, temp_storage_bytes, allocations, allocation_sizes))) break; if (d_temp_storage == NULL) { // Return if the caller is simply requesting the size of the storage allocation break; } // Construct the tile status interface ScanTileStateT tile_status; if (CubDebug(error = tile_status.Init(num_tiles, allocations[0], allocation_sizes[0]))) break; // Log device_scan_init_kernel configuration int init_grid_size = CUB_MAX(1, (num_tiles + INIT_KERNEL_THREADS - 1) / INIT_KERNEL_THREADS); if (debug_synchronous) _CubLog("Invoking device_scan_init_kernel<<<%d, %d, 0, %lld>>>()\n", init_grid_size, INIT_KERNEL_THREADS, (long long) stream); // Invoke device_scan_init_kernel to initialize tile descriptors and queue descriptors device_scan_init_kernel<<>>( tile_status, num_tiles, d_num_runs_out); // Check for failure to launch if (CubDebug(error = cudaPeekAtLastError())) break; // Sync the stream if specified to flush runtime errors if (debug_synchronous && (CubDebug(error = SyncStream(stream)))) break; // Return if empty problem if (num_items == 0) break; // Get SM occupancy for device_rle_sweep_kernel int device_rle_kernel_sm_occupancy; if (CubDebug(error = MaxSmOccupancy( device_rle_kernel_sm_occupancy, // out device_rle_sweep_kernel, device_rle_config.block_threads))) break; // Get max x-dimension of grid int max_dim_x; if (CubDebug(error = cudaDeviceGetAttribute(&max_dim_x, cudaDevAttrMaxGridDimX, device_ordinal))) break;; // Get grid size for scanning tiles dim3 scan_grid_size; scan_grid_size.z = 1; scan_grid_size.y = ((unsigned int) num_tiles + max_dim_x - 1) / max_dim_x; scan_grid_size.x = CUB_MIN(num_tiles, max_dim_x); // Log device_rle_sweep_kernel configuration if (debug_synchronous) _CubLog("Invoking device_rle_sweep_kernel<<<{%d,%d,%d}, %d, 0, %lld>>>(), %d items per thread, %d SM occupancy\n", scan_grid_size.x, scan_grid_size.y, scan_grid_size.z, device_rle_config.block_threads, (long long) stream, device_rle_config.items_per_thread, device_rle_kernel_sm_occupancy); // Invoke device_rle_sweep_kernel device_rle_sweep_kernel<<>>( d_in, d_offsets_out, d_lengths_out, d_num_runs_out, tile_status, equality_op, num_items, num_tiles); // Check for failure to launch if (CubDebug(error = cudaPeekAtLastError())) break; // Sync the stream if specified to flush runtime errors if (debug_synchronous && (CubDebug(error = SyncStream(stream)))) break; } while (0); return error; #endif // CUB_RUNTIME_ENABLED } /** * Internal dispatch routine */ CUB_RUNTIME_FUNCTION __forceinline__ static cudaError_t Dispatch( void* d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t& temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation InputIteratorT d_in, ///< [in] Pointer to input sequence of data items OffsetsOutputIteratorT d_offsets_out, ///< [out] Pointer to output sequence of run-offsets LengthsOutputIteratorT d_lengths_out, ///< [out] Pointer to output sequence of run-lengths NumRunsOutputIteratorT d_num_runs_out, ///< [out] Pointer to total number of runs (i.e., length of \p d_offsets_out) EqualityOpT equality_op, ///< [in] Equality operator for input items OffsetT num_items, ///< [in] Total number of input items (i.e., length of \p d_in) cudaStream_t stream, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is \p false. { cudaError error = cudaSuccess; do { // Get PTX version int ptx_version; #if (CUB_PTX_ARCH == 0) if (CubDebug(error = PtxVersion(ptx_version))) break; #else ptx_version = CUB_PTX_ARCH; #endif // Get kernel kernel dispatch configurations KernelConfig device_rle_config; InitConfigs(ptx_version, device_rle_config); // Dispatch if (CubDebug(error = Dispatch( d_temp_storage, temp_storage_bytes, d_in, d_offsets_out, d_lengths_out, d_num_runs_out, equality_op, num_items, stream, debug_synchronous, ptx_version, DeviceCompactInitKernel, DeviceRleSweepKernel, device_rle_config))) break; } while (0); return error; } }; } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/acc/cuda/cub/device/dispatch/dispatch_scan.cuh000066400000000000000000000543201411340063500243530ustar00rootroot00000000000000 /****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * cub::DeviceScan provides device-wide, parallel operations for computing a prefix scan across a sequence of data items residing within device-accessible memory. */ #pragma once #include #include #include "../../agent/agent_scan.cuh" #include "../../thread/thread_operators.cuh" #include "../../grid/grid_queue.cuh" #include "../../util_arch.cuh" #include "../../util_debug.cuh" #include "../../util_device.cuh" #include "../../util_namespace.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /****************************************************************************** * Kernel entry points *****************************************************************************/ /** * Initialization kernel for tile status initialization (multi-block) */ template < typename ScanTileStateT> ///< Tile status interface type __global__ void DeviceScanInitKernel( ScanTileStateT tile_state, ///< [in] Tile status interface int num_tiles) ///< [in] Number of tiles { // Initialize tile status tile_state.InitializeStatus(num_tiles); } /** * Initialization kernel for tile status initialization (multi-block) */ template < typename ScanTileStateT, ///< Tile status interface type typename NumSelectedIteratorT> ///< Output iterator type for recording the number of items selected __global__ void DeviceCompactInitKernel( ScanTileStateT tile_state, ///< [in] Tile status interface int num_tiles, ///< [in] Number of tiles NumSelectedIteratorT d_num_selected_out) ///< [out] Pointer to the total number of items selected (i.e., length of \p d_selected_out) { // Initialize tile status tile_state.InitializeStatus(num_tiles); // Initialize d_num_selected_out if ((blockIdx.x == 0) && (threadIdx.x == 0)) *d_num_selected_out = 0; } /** * Scan kernel entry point (multi-block) */ template < typename ScanPolicyT, ///< Parameterized ScanPolicyT tuning policy type typename InputIteratorT, ///< Random-access input iterator type for reading scan inputs \iterator typename OutputIteratorT, ///< Random-access output iterator type for writing scan outputs \iterator typename ScanTileStateT, ///< Tile status interface type typename ScanOpT, ///< Binary scan functor type having member T operator()(const T &a, const T &b) typename InitValueT, ///< Initial value to seed the exclusive scan (cub::NullType for inclusive scans) typename OffsetT> ///< Signed integer type for global offsets __launch_bounds__ (int(ScanPolicyT::BLOCK_THREADS)) __global__ void DeviceScanKernel( InputIteratorT d_in, ///< Input data OutputIteratorT d_out, ///< Output data ScanTileStateT tile_state, ///< Tile status interface int start_tile, ///< The starting tile for the current grid ScanOpT scan_op, ///< Binary scan functor InitValueT init_value, ///< Initial value to seed the exclusive scan OffsetT num_items) ///< Total number of scan items for the entire problem { // Thread block type for scanning input tiles typedef AgentScan< ScanPolicyT, InputIteratorT, OutputIteratorT, ScanOpT, InitValueT, OffsetT> AgentScanT; // Shared memory for AgentScan __shared__ typename AgentScanT::TempStorage temp_storage; // Process tiles AgentScanT(temp_storage, d_in, d_out, scan_op, init_value).ConsumeRange( num_items, tile_state, start_tile); } /****************************************************************************** * Dispatch ******************************************************************************/ /** * Utility class for dispatching the appropriately-tuned kernels for DeviceScan */ template < typename InputIteratorT, ///< Random-access input iterator type for reading scan inputs \iterator typename OutputIteratorT, ///< Random-access output iterator type for writing scan outputs \iterator typename ScanOpT, ///< Binary scan functor type having member T operator()(const T &a, const T &b) typename InitValueT, ///< The init_value element type for ScanOpT (cub::NullType for inclusive scans) typename OffsetT> ///< Signed integer type for global offsets struct DispatchScan { //--------------------------------------------------------------------- // Constants and Types //--------------------------------------------------------------------- enum { INIT_KERNEL_THREADS = 128 }; // The output value type typedef typename If<(Equals::value_type, void>::VALUE), // OutputT = (if output iterator's value type is void) ? typename std::iterator_traits::value_type, // ... then the input iterator's value type, typename std::iterator_traits::value_type>::Type OutputT; // ... else the output iterator's value type // Tile status descriptor interface type typedef ScanTileState ScanTileStateT; //--------------------------------------------------------------------- // Tuning policies //--------------------------------------------------------------------- /// SM600 struct Policy600 { typedef AgentScanPolicy< CUB_NOMINAL_CONFIG(128, 15, OutputT), ///< Threads per block, items per thread BLOCK_LOAD_TRANSPOSE, LOAD_DEFAULT, BLOCK_STORE_TRANSPOSE, BLOCK_SCAN_WARP_SCANS> ScanPolicyT; }; /// SM520 struct Policy520 { // Titan X: 32.47B items/s @ 48M 32-bit T typedef AgentScanPolicy< CUB_NOMINAL_CONFIG(128, 12, OutputT), ///< Threads per block, items per thread BLOCK_LOAD_DIRECT, LOAD_LDG, BLOCK_STORE_WARP_TRANSPOSE, BLOCK_SCAN_WARP_SCANS> ScanPolicyT; }; /// SM35 struct Policy350 { // GTX Titan: 29.5B items/s (232.4 GB/s) @ 48M 32-bit T typedef AgentScanPolicy< CUB_NOMINAL_CONFIG(128, 12, OutputT), ///< Threads per block, items per thread BLOCK_LOAD_DIRECT, LOAD_LDG, BLOCK_STORE_WARP_TRANSPOSE_TIMESLICED, BLOCK_SCAN_RAKING> ScanPolicyT; }; /// SM30 struct Policy300 { typedef AgentScanPolicy< CUB_NOMINAL_CONFIG(256, 9, OutputT), ///< Threads per block, items per thread BLOCK_LOAD_WARP_TRANSPOSE, LOAD_DEFAULT, BLOCK_STORE_WARP_TRANSPOSE, BLOCK_SCAN_WARP_SCANS> ScanPolicyT; }; /// SM20 struct Policy200 { // GTX 580: 20.3B items/s (162.3 GB/s) @ 48M 32-bit T typedef AgentScanPolicy< CUB_NOMINAL_CONFIG(128, 12, OutputT), ///< Threads per block, items per thread BLOCK_LOAD_WARP_TRANSPOSE, LOAD_DEFAULT, BLOCK_STORE_WARP_TRANSPOSE, BLOCK_SCAN_WARP_SCANS> ScanPolicyT; }; /// SM13 struct Policy130 { typedef AgentScanPolicy< CUB_NOMINAL_CONFIG(96, 21, OutputT), ///< Threads per block, items per thread BLOCK_LOAD_WARP_TRANSPOSE, LOAD_DEFAULT, BLOCK_STORE_WARP_TRANSPOSE, BLOCK_SCAN_RAKING_MEMOIZE> ScanPolicyT; }; /// SM10 struct Policy100 { typedef AgentScanPolicy< CUB_NOMINAL_CONFIG(64, 9, OutputT), ///< Threads per block, items per thread BLOCK_LOAD_WARP_TRANSPOSE, LOAD_DEFAULT, BLOCK_STORE_WARP_TRANSPOSE, BLOCK_SCAN_WARP_SCANS> ScanPolicyT; }; //--------------------------------------------------------------------- // Tuning policies of current PTX compiler pass //--------------------------------------------------------------------- #if (CUB_PTX_ARCH >= 600) typedef Policy600 PtxPolicy; #elif (CUB_PTX_ARCH >= 520) typedef Policy520 PtxPolicy; #elif (CUB_PTX_ARCH >= 350) typedef Policy350 PtxPolicy; #elif (CUB_PTX_ARCH >= 300) typedef Policy300 PtxPolicy; #elif (CUB_PTX_ARCH >= 200) typedef Policy200 PtxPolicy; #elif (CUB_PTX_ARCH >= 130) typedef Policy130 PtxPolicy; #else typedef Policy100 PtxPolicy; #endif // "Opaque" policies (whose parameterizations aren't reflected in the type signature) struct PtxAgentScanPolicy : PtxPolicy::ScanPolicyT {}; //--------------------------------------------------------------------- // Utilities //--------------------------------------------------------------------- /** * Initialize kernel dispatch configurations with the policies corresponding to the PTX assembly we will use */ template CUB_RUNTIME_FUNCTION __forceinline__ static void InitConfigs( int ptx_version, KernelConfig &scan_kernel_config) { #if (CUB_PTX_ARCH > 0) (void)ptx_version; // We're on the device, so initialize the kernel dispatch configurations with the current PTX policy scan_kernel_config.template Init(); #else // We're on the host, so lookup and initialize the kernel dispatch configurations with the policies that match the device's PTX version if (ptx_version >= 600) { scan_kernel_config.template Init(); } else if (ptx_version >= 520) { scan_kernel_config.template Init(); } else if (ptx_version >= 350) { scan_kernel_config.template Init(); } else if (ptx_version >= 300) { scan_kernel_config.template Init(); } else if (ptx_version >= 200) { scan_kernel_config.template Init(); } else if (ptx_version >= 130) { scan_kernel_config.template Init(); } else { scan_kernel_config.template Init(); } #endif } /** * Kernel kernel dispatch configuration. */ struct KernelConfig { int block_threads; int items_per_thread; int tile_items; template CUB_RUNTIME_FUNCTION __forceinline__ void Init() { block_threads = PolicyT::BLOCK_THREADS; items_per_thread = PolicyT::ITEMS_PER_THREAD; tile_items = block_threads * items_per_thread; } }; //--------------------------------------------------------------------- // Dispatch entrypoints //--------------------------------------------------------------------- /** * Internal dispatch routine for computing a device-wide prefix scan using the * specified kernel functions. */ template < typename ScanInitKernelPtrT, ///< Function type of cub::DeviceScanInitKernel typename ScanSweepKernelPtrT> ///< Function type of cub::DeviceScanKernelPtrT CUB_RUNTIME_FUNCTION __forceinline__ static cudaError_t Dispatch( void* d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t& temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation InputIteratorT d_in, ///< [in] Pointer to the input sequence of data items OutputIteratorT d_out, ///< [out] Pointer to the output sequence of data items ScanOpT scan_op, ///< [in] Binary scan functor InitValueT init_value, ///< [in] Initial value to seed the exclusive scan OffsetT num_items, ///< [in] Total number of input items (i.e., the length of \p d_in) cudaStream_t stream, ///< [in] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous, ///< [in] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is \p false. int /*ptx_version*/, ///< [in] PTX version of dispatch kernels ScanInitKernelPtrT init_kernel, ///< [in] Kernel function pointer to parameterization of cub::DeviceScanInitKernel ScanSweepKernelPtrT scan_kernel, ///< [in] Kernel function pointer to parameterization of cub::DeviceScanKernel KernelConfig scan_kernel_config) ///< [in] Dispatch parameters that match the policy that \p scan_kernel was compiled for { #ifndef CUB_RUNTIME_ENABLED (void)d_temp_storage; (void)temp_storage_bytes; (void)d_in; (void)d_out; (void)scan_op; (void)init_value; (void)num_items; (void)stream; (void)debug_synchronous; (void)init_kernel; (void)scan_kernel; (void)scan_kernel_config; // Kernel launch not supported from this device return CubDebug(cudaErrorNotSupported); #else cudaError error = cudaSuccess; do { // Get device ordinal int device_ordinal; if (CubDebug(error = cudaGetDevice(&device_ordinal))) break; // Get SM count int sm_count; if (CubDebug(error = cudaDeviceGetAttribute (&sm_count, cudaDevAttrMultiProcessorCount, device_ordinal))) break; // Number of input tiles int tile_size = scan_kernel_config.block_threads * scan_kernel_config.items_per_thread; int num_tiles = (num_items + tile_size - 1) / tile_size; // Specify temporary storage allocation requirements size_t allocation_sizes[1]; if (CubDebug(error = ScanTileStateT::AllocationSize(num_tiles, allocation_sizes[0]))) break; // bytes needed for tile status descriptors // Compute allocation pointers into the single storage blob (or compute the necessary size of the blob) void* allocations[1]; if (CubDebug(error = AliasTemporaries(d_temp_storage, temp_storage_bytes, allocations, allocation_sizes))) break; if (d_temp_storage == NULL) { // Return if the caller is simply requesting the size of the storage allocation break; } // Return if empty problem if (num_items == 0) break; // Construct the tile status interface ScanTileStateT tile_state; if (CubDebug(error = tile_state.Init(num_tiles, allocations[0], allocation_sizes[0]))) break; // Log init_kernel configuration int init_grid_size = (num_tiles + INIT_KERNEL_THREADS - 1) / INIT_KERNEL_THREADS; if (debug_synchronous) _CubLog("Invoking init_kernel<<<%d, %d, 0, %lld>>>()\n", init_grid_size, INIT_KERNEL_THREADS, (long long) stream); // Invoke init_kernel to initialize tile descriptors init_kernel<<>>( tile_state, num_tiles); // Check for failure to launch if (CubDebug(error = cudaPeekAtLastError())) break; // Sync the stream if specified to flush runtime errors if (debug_synchronous && (CubDebug(error = SyncStream(stream)))) break; // Get SM occupancy for scan_kernel int scan_sm_occupancy; if (CubDebug(error = MaxSmOccupancy( scan_sm_occupancy, // out scan_kernel, scan_kernel_config.block_threads))) break; // Get max x-dimension of grid int max_dim_x; if (CubDebug(error = cudaDeviceGetAttribute(&max_dim_x, cudaDevAttrMaxGridDimX, device_ordinal))) break;; // Run grids in epochs (in case number of tiles exceeds max x-dimension int scan_grid_size = CUB_MIN(num_tiles, max_dim_x); for (int start_tile = 0; start_tile < num_tiles; start_tile += scan_grid_size) { // Log scan_kernel configuration if (debug_synchronous) _CubLog("Invoking %d scan_kernel<<<%d, %d, 0, %lld>>>(), %d items per thread, %d SM occupancy\n", start_tile, scan_grid_size, scan_kernel_config.block_threads, (long long) stream, scan_kernel_config.items_per_thread, scan_sm_occupancy); // Invoke scan_kernel scan_kernel<<>>( d_in, d_out, tile_state, start_tile, scan_op, init_value, num_items); // Check for failure to launch if (CubDebug(error = cudaPeekAtLastError())) break; // Sync the stream if specified to flush runtime errors if (debug_synchronous && (CubDebug(error = SyncStream(stream)))) break; } } while (0); return error; #endif // CUB_RUNTIME_ENABLED } /** * Internal dispatch routine */ CUB_RUNTIME_FUNCTION __forceinline__ static cudaError_t Dispatch( void* d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t& temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation InputIteratorT d_in, ///< [in] Pointer to the input sequence of data items OutputIteratorT d_out, ///< [out] Pointer to the output sequence of data items ScanOpT scan_op, ///< [in] Binary scan functor InitValueT init_value, ///< [in] Initial value to seed the exclusive scan OffsetT num_items, ///< [in] Total number of input items (i.e., the length of \p d_in) cudaStream_t stream, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is \p false. { cudaError error = cudaSuccess; do { // Get PTX version int ptx_version; if (CubDebug(error = PtxVersion(ptx_version))) break; // Get kernel kernel dispatch configurations KernelConfig scan_kernel_config; InitConfigs(ptx_version, scan_kernel_config); // Dispatch if (CubDebug(error = Dispatch( d_temp_storage, temp_storage_bytes, d_in, d_out, scan_op, init_value, num_items, stream, debug_synchronous, ptx_version, DeviceScanInitKernel, DeviceScanKernel, scan_kernel_config))) break; } while (0); return error; } }; } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/acc/cuda/cub/device/dispatch/dispatch_select_if.cuh000066400000000000000000000576061411340063500253760ustar00rootroot00000000000000 /****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * cub::DeviceSelect provides device-wide, parallel operations for selecting items from sequences of data items residing within device-accessible memory. */ #pragma once #include #include #include "dispatch_scan.cuh" #include "../../agent/agent_select_if.cuh" #include "../../thread/thread_operators.cuh" #include "../../grid/grid_queue.cuh" #include "../../util_device.cuh" #include "../../util_namespace.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /****************************************************************************** * Kernel entry points *****************************************************************************/ /** * Select kernel entry point (multi-block) * * Performs functor-based selection if SelectOpT functor type != NullType * Otherwise performs flag-based selection if FlagsInputIterator's value type != NullType * Otherwise performs discontinuity selection (keep unique) */ template < typename AgentSelectIfPolicyT, ///< Parameterized AgentSelectIfPolicyT tuning policy type typename InputIteratorT, ///< Random-access input iterator type for reading input items typename FlagsInputIteratorT, ///< Random-access input iterator type for reading selection flags (NullType* if a selection functor or discontinuity flagging is to be used for selection) typename SelectedOutputIteratorT, ///< Random-access output iterator type for writing selected items typename NumSelectedIteratorT, ///< Output iterator type for recording the number of items selected typename ScanTileStateT, ///< Tile status interface type typename SelectOpT, ///< Selection operator type (NullType if selection flags or discontinuity flagging is to be used for selection) typename EqualityOpT, ///< Equality operator type (NullType if selection functor or selection flags is to be used for selection) typename OffsetT, ///< Signed integer type for global offsets bool KEEP_REJECTS> ///< Whether or not we push rejected items to the back of the output __launch_bounds__ (int(AgentSelectIfPolicyT::BLOCK_THREADS)) __global__ void DeviceSelectSweepKernel( InputIteratorT d_in, ///< [in] Pointer to the input sequence of data items FlagsInputIteratorT d_flags, ///< [in] Pointer to the input sequence of selection flags (if applicable) SelectedOutputIteratorT d_selected_out, ///< [out] Pointer to the output sequence of selected data items NumSelectedIteratorT d_num_selected_out, ///< [out] Pointer to the total number of items selected (i.e., length of \p d_selected_out) ScanTileStateT tile_status, ///< [in] Tile status interface SelectOpT select_op, ///< [in] Selection operator EqualityOpT equality_op, ///< [in] Equality operator OffsetT num_items, ///< [in] Total number of input items (i.e., length of \p d_in) int num_tiles) ///< [in] Total number of tiles for the entire problem { // Thread block type for selecting data from input tiles typedef AgentSelectIf< AgentSelectIfPolicyT, InputIteratorT, FlagsInputIteratorT, SelectedOutputIteratorT, SelectOpT, EqualityOpT, OffsetT, KEEP_REJECTS> AgentSelectIfT; // Shared memory for AgentSelectIf __shared__ typename AgentSelectIfT::TempStorage temp_storage; // Process tiles AgentSelectIfT(temp_storage, d_in, d_flags, d_selected_out, select_op, equality_op, num_items).ConsumeRange( num_tiles, tile_status, d_num_selected_out); } /****************************************************************************** * Dispatch ******************************************************************************/ /** * Utility class for dispatching the appropriately-tuned kernels for DeviceSelect */ template < typename InputIteratorT, ///< Random-access input iterator type for reading input items typename FlagsInputIteratorT, ///< Random-access input iterator type for reading selection flags (NullType* if a selection functor or discontinuity flagging is to be used for selection) typename SelectedOutputIteratorT, ///< Random-access output iterator type for writing selected items typename NumSelectedIteratorT, ///< Output iterator type for recording the number of items selected typename SelectOpT, ///< Selection operator type (NullType if selection flags or discontinuity flagging is to be used for selection) typename EqualityOpT, ///< Equality operator type (NullType if selection functor or selection flags is to be used for selection) typename OffsetT, ///< Signed integer type for global offsets bool KEEP_REJECTS> ///< Whether or not we push rejected items to the back of the output struct DispatchSelectIf { /****************************************************************************** * Types and constants ******************************************************************************/ // The output value type typedef typename If<(Equals::value_type, void>::VALUE), // OutputT = (if output iterator's value type is void) ? typename std::iterator_traits::value_type, // ... then the input iterator's value type, typename std::iterator_traits::value_type>::Type OutputT; // ... else the output iterator's value type // The flag value type typedef typename std::iterator_traits::value_type FlagT; enum { INIT_KERNEL_THREADS = 128, }; // Tile status descriptor interface type typedef ScanTileState ScanTileStateT; /****************************************************************************** * Tuning policies ******************************************************************************/ /// SM35 struct Policy350 { enum { NOMINAL_4B_ITEMS_PER_THREAD = 10, ITEMS_PER_THREAD = CUB_MIN(NOMINAL_4B_ITEMS_PER_THREAD, CUB_MAX(1, (NOMINAL_4B_ITEMS_PER_THREAD * 4 / sizeof(OutputT)))), }; typedef AgentSelectIfPolicy< 128, ITEMS_PER_THREAD, BLOCK_LOAD_DIRECT, LOAD_LDG, BLOCK_SCAN_WARP_SCANS> SelectIfPolicyT; }; /// SM30 struct Policy300 { enum { NOMINAL_4B_ITEMS_PER_THREAD = 7, ITEMS_PER_THREAD = CUB_MIN(NOMINAL_4B_ITEMS_PER_THREAD, CUB_MAX(3, (NOMINAL_4B_ITEMS_PER_THREAD * 4 / sizeof(OutputT)))), }; typedef AgentSelectIfPolicy< 128, ITEMS_PER_THREAD, BLOCK_LOAD_WARP_TRANSPOSE, LOAD_DEFAULT, BLOCK_SCAN_WARP_SCANS> SelectIfPolicyT; }; /// SM20 struct Policy200 { enum { NOMINAL_4B_ITEMS_PER_THREAD = (KEEP_REJECTS) ? 7 : 15, ITEMS_PER_THREAD = CUB_MIN(NOMINAL_4B_ITEMS_PER_THREAD, CUB_MAX(1, (NOMINAL_4B_ITEMS_PER_THREAD * 4 / sizeof(OutputT)))), }; typedef AgentSelectIfPolicy< 128, ITEMS_PER_THREAD, BLOCK_LOAD_WARP_TRANSPOSE, LOAD_DEFAULT, BLOCK_SCAN_WARP_SCANS> SelectIfPolicyT; }; /// SM13 struct Policy130 { enum { NOMINAL_4B_ITEMS_PER_THREAD = 9, ITEMS_PER_THREAD = CUB_MIN(NOMINAL_4B_ITEMS_PER_THREAD, CUB_MAX(1, (NOMINAL_4B_ITEMS_PER_THREAD * 4 / sizeof(OutputT)))), }; typedef AgentSelectIfPolicy< 64, ITEMS_PER_THREAD, BLOCK_LOAD_WARP_TRANSPOSE, LOAD_DEFAULT, BLOCK_SCAN_RAKING_MEMOIZE> SelectIfPolicyT; }; /// SM10 struct Policy100 { enum { NOMINAL_4B_ITEMS_PER_THREAD = 9, ITEMS_PER_THREAD = CUB_MIN(NOMINAL_4B_ITEMS_PER_THREAD, CUB_MAX(1, (NOMINAL_4B_ITEMS_PER_THREAD * 4 / sizeof(OutputT)))), }; typedef AgentSelectIfPolicy< 64, ITEMS_PER_THREAD, BLOCK_LOAD_WARP_TRANSPOSE, LOAD_DEFAULT, BLOCK_SCAN_RAKING> SelectIfPolicyT; }; /****************************************************************************** * Tuning policies of current PTX compiler pass ******************************************************************************/ #if (CUB_PTX_ARCH >= 350) typedef Policy350 PtxPolicy; #elif (CUB_PTX_ARCH >= 300) typedef Policy300 PtxPolicy; #elif (CUB_PTX_ARCH >= 200) typedef Policy200 PtxPolicy; #elif (CUB_PTX_ARCH >= 130) typedef Policy130 PtxPolicy; #else typedef Policy100 PtxPolicy; #endif // "Opaque" policies (whose parameterizations aren't reflected in the type signature) struct PtxSelectIfPolicyT : PtxPolicy::SelectIfPolicyT {}; /****************************************************************************** * Utilities ******************************************************************************/ /** * Initialize kernel dispatch configurations with the policies corresponding to the PTX assembly we will use */ template CUB_RUNTIME_FUNCTION __forceinline__ static void InitConfigs( int ptx_version, KernelConfig &select_if_config) { #if (CUB_PTX_ARCH > 0) (void)ptx_version; // We're on the device, so initialize the kernel dispatch configurations with the current PTX policy select_if_config.template Init(); #else // We're on the host, so lookup and initialize the kernel dispatch configurations with the policies that match the device's PTX version if (ptx_version >= 350) { select_if_config.template Init(); } else if (ptx_version >= 300) { select_if_config.template Init(); } else if (ptx_version >= 200) { select_if_config.template Init(); } else if (ptx_version >= 130) { select_if_config.template Init(); } else { select_if_config.template Init(); } #endif } /** * Kernel kernel dispatch configuration. */ struct KernelConfig { int block_threads; int items_per_thread; int tile_items; template CUB_RUNTIME_FUNCTION __forceinline__ void Init() { block_threads = PolicyT::BLOCK_THREADS; items_per_thread = PolicyT::ITEMS_PER_THREAD; tile_items = block_threads * items_per_thread; } }; /****************************************************************************** * Dispatch entrypoints ******************************************************************************/ /** * Internal dispatch routine for computing a device-wide selection using the * specified kernel functions. */ template < typename ScanInitKernelPtrT, ///< Function type of cub::DeviceScanInitKernel typename SelectIfKernelPtrT> ///< Function type of cub::SelectIfKernelPtrT CUB_RUNTIME_FUNCTION __forceinline__ static cudaError_t Dispatch( void* d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t& temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation InputIteratorT d_in, ///< [in] Pointer to the input sequence of data items FlagsInputIteratorT d_flags, ///< [in] Pointer to the input sequence of selection flags (if applicable) SelectedOutputIteratorT d_selected_out, ///< [in] Pointer to the output sequence of selected data items NumSelectedIteratorT d_num_selected_out, ///< [in] Pointer to the total number of items selected (i.e., length of \p d_selected_out) SelectOpT select_op, ///< [in] Selection operator EqualityOpT equality_op, ///< [in] Equality operator OffsetT num_items, ///< [in] Total number of input items (i.e., length of \p d_in) cudaStream_t stream, ///< [in] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous, ///< [in] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is \p false. int /*ptx_version*/, ///< [in] PTX version of dispatch kernels ScanInitKernelPtrT scan_init_kernel, ///< [in] Kernel function pointer to parameterization of cub::DeviceScanInitKernel SelectIfKernelPtrT select_if_kernel, ///< [in] Kernel function pointer to parameterization of cub::DeviceSelectSweepKernel KernelConfig select_if_config) ///< [in] Dispatch parameters that match the policy that \p select_if_kernel was compiled for { #ifndef CUB_RUNTIME_ENABLED (void)d_temp_storage; (void)temp_storage_bytes; (void)d_in; (void)d_flags; (void)d_selected_out; (void)d_num_selected_out; (void)select_op; (void)equality_op; (void)num_items; (void)stream; (void)debug_synchronous; (void)scan_init_kernel; (void)select_if_kernel; (void)select_if_config; // Kernel launch not supported from this device return CubDebug(cudaErrorNotSupported); #else cudaError error = cudaSuccess; do { // Get device ordinal int device_ordinal; if (CubDebug(error = cudaGetDevice(&device_ordinal))) break; // Get SM count int sm_count; if (CubDebug(error = cudaDeviceGetAttribute (&sm_count, cudaDevAttrMultiProcessorCount, device_ordinal))) break; // Number of input tiles int tile_size = select_if_config.block_threads * select_if_config.items_per_thread; int num_tiles = (num_items + tile_size - 1) / tile_size; // Specify temporary storage allocation requirements size_t allocation_sizes[1]; if (CubDebug(error = ScanTileStateT::AllocationSize(num_tiles, allocation_sizes[0]))) break; // bytes needed for tile status descriptors // Compute allocation pointers into the single storage blob (or compute the necessary size of the blob) void* allocations[1]; if (CubDebug(error = AliasTemporaries(d_temp_storage, temp_storage_bytes, allocations, allocation_sizes))) break; if (d_temp_storage == NULL) { // Return if the caller is simply requesting the size of the storage allocation break; } // Construct the tile status interface ScanTileStateT tile_status; if (CubDebug(error = tile_status.Init(num_tiles, allocations[0], allocation_sizes[0]))) break; // Log scan_init_kernel configuration int init_grid_size = CUB_MAX(1, (num_tiles + INIT_KERNEL_THREADS - 1) / INIT_KERNEL_THREADS); if (debug_synchronous) _CubLog("Invoking scan_init_kernel<<<%d, %d, 0, %lld>>>()\n", init_grid_size, INIT_KERNEL_THREADS, (long long) stream); // Invoke scan_init_kernel to initialize tile descriptors scan_init_kernel<<>>( tile_status, num_tiles, d_num_selected_out); // Check for failure to launch if (CubDebug(error = cudaPeekAtLastError())) break; // Sync the stream if specified to flush runtime errors if (debug_synchronous && (CubDebug(error = SyncStream(stream)))) break; // Return if empty problem if (num_items == 0) break; // Get SM occupancy for select_if_kernel int range_select_sm_occupancy; if (CubDebug(error = MaxSmOccupancy( range_select_sm_occupancy, // out select_if_kernel, select_if_config.block_threads))) break; // Get max x-dimension of grid int max_dim_x; if (CubDebug(error = cudaDeviceGetAttribute(&max_dim_x, cudaDevAttrMaxGridDimX, device_ordinal))) break;; // Get grid size for scanning tiles dim3 scan_grid_size; scan_grid_size.z = 1; scan_grid_size.y = ((unsigned int) num_tiles + max_dim_x - 1) / max_dim_x; scan_grid_size.x = CUB_MIN(num_tiles, max_dim_x); // Log select_if_kernel configuration if (debug_synchronous) _CubLog("Invoking select_if_kernel<<<{%d,%d,%d}, %d, 0, %lld>>>(), %d items per thread, %d SM occupancy\n", scan_grid_size.x, scan_grid_size.y, scan_grid_size.z, select_if_config.block_threads, (long long) stream, select_if_config.items_per_thread, range_select_sm_occupancy); // Invoke select_if_kernel select_if_kernel<<>>( d_in, d_flags, d_selected_out, d_num_selected_out, tile_status, select_op, equality_op, num_items, num_tiles); // Check for failure to launch if (CubDebug(error = cudaPeekAtLastError())) break; // Sync the stream if specified to flush runtime errors if (debug_synchronous && (CubDebug(error = SyncStream(stream)))) break; } while (0); return error; #endif // CUB_RUNTIME_ENABLED } /** * Internal dispatch routine */ CUB_RUNTIME_FUNCTION __forceinline__ static cudaError_t Dispatch( void* d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t& temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation InputIteratorT d_in, ///< [in] Pointer to the input sequence of data items FlagsInputIteratorT d_flags, ///< [in] Pointer to the input sequence of selection flags (if applicable) SelectedOutputIteratorT d_selected_out, ///< [in] Pointer to the output sequence of selected data items NumSelectedIteratorT d_num_selected_out, ///< [in] Pointer to the total number of items selected (i.e., length of \p d_selected_out) SelectOpT select_op, ///< [in] Selection operator EqualityOpT equality_op, ///< [in] Equality operator OffsetT num_items, ///< [in] Total number of input items (i.e., length of \p d_in) cudaStream_t stream, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is \p false. { cudaError error = cudaSuccess; do { // Get PTX version int ptx_version; #if (CUB_PTX_ARCH == 0) if (CubDebug(error = PtxVersion(ptx_version))) break; #else ptx_version = CUB_PTX_ARCH; #endif // Get kernel kernel dispatch configurations KernelConfig select_if_config; InitConfigs(ptx_version, select_if_config); // Dispatch if (CubDebug(error = Dispatch( d_temp_storage, temp_storage_bytes, d_in, d_flags, d_selected_out, d_num_selected_out, select_op, equality_op, num_items, stream, debug_synchronous, ptx_version, DeviceCompactInitKernel, DeviceSelectSweepKernel, select_if_config))) break; } while (0); return error; } }; } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/acc/cuda/cub/device/dispatch/dispatch_spmv_csrt.cuh000066400000000000000000000415571411340063500254570ustar00rootroot00000000000000 /****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2016, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * cub::DeviceSpmv provides device-wide parallel operations for performing sparse-matrix * vector multiplication (SpMV). */ #pragma once #include #include #include "dispatch_scan.cuh" #include "../../agent/agent_spmv_orig.cuh" #include "../../util_type.cuh" #include "../../util_debug.cuh" #include "../../util_device.cuh" #include "../../util_namespace.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /****************************************************************************** * SpMV kernel entry points *****************************************************************************/ /** * Spmv agent entry point */ template < typename SpmvPolicyT, ///< Parameterized SpmvPolicy tuning policy type typename ValueT, ///< Matrix and vector value type typename OffsetT, ///< Signed integer type for sequence offsets bool HAS_ALPHA, ///< Whether the input parameter Alpha is 1 bool HAS_BETA> ///< Whether the input parameter Beta is 0 __launch_bounds__ (int(SpmvPolicyT::BLOCK_THREADS)) __global__ void DeviceSpmvKernel( SpmvParams spmv_params, ///< [in] SpMV input parameter bundle int merge_items_per_block, ///< [in] Number of merge tiles per block KeyValuePair* d_tile_carry_pairs) ///< [out] Pointer to the temporary array carry-out dot product row-ids, one per block { // Spmv agent type specialization typedef AgentSpmv< SpmvPolicyT, ValueT, OffsetT, HAS_ALPHA, HAS_BETA> AgentSpmvT; // Shared memory for AgentSpmv __shared__ typename AgentSpmvT::TempStorage temp_storage; AgentSpmvT(temp_storage, spmv_params).ConsumeTile( merge_items_per_block, d_tile_carry_pairs); } /****************************************************************************** * Dispatch ******************************************************************************/ /** * Utility class for dispatching the appropriately-tuned kernels for DeviceSpmv */ template < typename ValueT, ///< Matrix and vector value type typename OffsetT> ///< Signed integer type for global offsets struct DispatchSpmv { //--------------------------------------------------------------------- // Constants and Types //--------------------------------------------------------------------- enum { INIT_KERNEL_THREADS = 128 }; // SpmvParams bundle type typedef SpmvParams SpmvParamsT; // Tuple type for scanning {row id, accumulated value} typedef KeyValuePair KeyValuePairT; //--------------------------------------------------------------------- // Tuning policies //--------------------------------------------------------------------- /// SM11 struct Policy110 { typedef AgentSpmvPolicy< 128, 1, LOAD_DEFAULT, LOAD_DEFAULT, LOAD_DEFAULT, LOAD_DEFAULT, LOAD_DEFAULT, false, BLOCK_SCAN_WARP_SCANS> SpmvPolicyT; }; /// SM20 struct Policy200 { typedef AgentSpmvPolicy< 96, 18, LOAD_DEFAULT, LOAD_DEFAULT, LOAD_DEFAULT, LOAD_DEFAULT, LOAD_DEFAULT, false, BLOCK_SCAN_RAKING> SpmvPolicyT; }; /// SM30 struct Policy300 { typedef AgentSpmvPolicy< 96, 6, LOAD_DEFAULT, LOAD_DEFAULT, LOAD_DEFAULT, LOAD_DEFAULT, LOAD_DEFAULT, false, BLOCK_SCAN_WARP_SCANS> SpmvPolicyT; }; /// SM35 struct Policy350 { /* typedef AgentSpmvPolicy< (sizeof(ValueT) > 4) ? 96 : 128, (sizeof(ValueT) > 4) ? 4 : 7, LOAD_LDG, LOAD_CA, LOAD_LDG, LOAD_LDG, LOAD_LDG, (sizeof(ValueT) > 4) ? true : false, BLOCK_SCAN_WARP_SCANS> SpmvPolicyT; */ typedef AgentSpmvPolicy< 128, 5, LOAD_CA, LOAD_CA, LOAD_LDG, LOAD_LDG, LOAD_LDG, (sizeof(ValueT) > 4) ? true : false, BLOCK_SCAN_WARP_SCANS> SpmvPolicyT; }; /// SM37 struct Policy370 { typedef AgentSpmvPolicy< (sizeof(ValueT) > 4) ? 128 : 128, (sizeof(ValueT) > 4) ? 9 : 14, LOAD_LDG, LOAD_CA, LOAD_LDG, LOAD_LDG, LOAD_LDG, false, BLOCK_SCAN_WARP_SCANS> SpmvPolicyT; }; /// SM50 struct Policy500 { typedef AgentSpmvPolicy< (sizeof(ValueT) > 4) ? 64 : 128, (sizeof(ValueT) > 4) ? 6 : 7, LOAD_LDG, LOAD_DEFAULT, (sizeof(ValueT) > 4) ? LOAD_LDG : LOAD_DEFAULT, (sizeof(ValueT) > 4) ? LOAD_LDG : LOAD_DEFAULT, LOAD_LDG, (sizeof(ValueT) > 4) ? true : false, (sizeof(ValueT) > 4) ? BLOCK_SCAN_WARP_SCANS : BLOCK_SCAN_RAKING_MEMOIZE> SpmvPolicyT; }; //--------------------------------------------------------------------- // Tuning policies of current PTX compiler pass //--------------------------------------------------------------------- #if (CUB_PTX_ARCH >= 500) typedef Policy500 PtxPolicy; #elif (CUB_PTX_ARCH >= 370) typedef Policy370 PtxPolicy; #elif (CUB_PTX_ARCH >= 350) typedef Policy350 PtxPolicy; #elif (CUB_PTX_ARCH >= 300) typedef Policy300 PtxPolicy; #elif (CUB_PTX_ARCH >= 200) typedef Policy200 PtxPolicy; #else typedef Policy110 PtxPolicy; #endif // "Opaque" policies (whose parameterizations aren't reflected in the type signature) struct PtxSpmvPolicyT : PtxPolicy::SpmvPolicyT {}; //--------------------------------------------------------------------- // Utilities //--------------------------------------------------------------------- /** * Initialize kernel dispatch configurations with the policies corresponding to the PTX assembly we will use */ template CUB_RUNTIME_FUNCTION __forceinline__ static void InitConfigs( int ptx_version, KernelConfig &spmv_config) { #if (CUB_PTX_ARCH > 0) // We're on the device, so initialize the kernel dispatch configurations with the current PTX policy spmv_config.template Init(); #else // We're on the host, so lookup and initialize the kernel dispatch configurations with the policies that match the device's PTX version if (ptx_version >= 500) { spmv_config.template Init(); } else if (ptx_version >= 370) { spmv_config.template Init(); } else if (ptx_version >= 350) { spmv_config.template Init(); } else if (ptx_version >= 300) { spmv_config.template Init(); } else if (ptx_version >= 200) { spmv_config.template Init(); } else { spmv_config.template Init(); } #endif } /** * Kernel kernel dispatch configuration. */ struct KernelConfig { int block_threads; int items_per_thread; int tile_items; template CUB_RUNTIME_FUNCTION __forceinline__ void Init() { block_threads = PolicyT::BLOCK_THREADS; items_per_thread = PolicyT::ITEMS_PER_THREAD; tile_items = block_threads * items_per_thread; } }; //--------------------------------------------------------------------- // Dispatch entrypoints //--------------------------------------------------------------------- /** * Internal dispatch routine for computing a device-wide reduction using the * specified kernel functions. * * If the input is larger than a single tile, this method uses two-passes of * kernel invocations. */ template < typename SpmvKernelT> ///< Function type of cub::AgentSpmvKernel CUB_RUNTIME_FUNCTION __forceinline__ static cudaError_t Dispatch( void* d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t& temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation SpmvParamsT& spmv_params, ///< SpMV input parameter bundle cudaStream_t stream, ///< [in] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous, ///< [in] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is \p false. SpmvKernelT spmv_kernel, ///< [in] Kernel function pointer to parameterization of AgentSpmvKernel KernelConfig spmv_config) ///< [in] Dispatch parameters that match the policy that \p spmv_kernel was compiled for { #ifndef CUB_RUNTIME_ENABLED // Kernel launch not supported from this device return CubDebug(cudaErrorNotSupported ); #else cudaError error = cudaSuccess; do { // Get device ordinal int device_ordinal; if (CubDebug(error = cudaGetDevice(&device_ordinal))) break; // Get SM count int sm_count; if (CubDebug(error = cudaDeviceGetAttribute (&sm_count, cudaDevAttrMultiProcessorCount, device_ordinal))) break; // Total number of spmv work items int num_merge_items = spmv_params.num_rows + spmv_params.num_nonzeros; // Get SM occupancy for kernels int spmv_sm_occupancy; if (CubDebug(error = MaxSmOccupancy( spmv_sm_occupancy, spmv_kernel, spmv_config.block_threads))) break; int spmv_device_occupancy = spmv_sm_occupancy * sm_count; // Grid dimensions int spmv_grid_size = CUB_MIN(((num_merge_items + spmv_config.block_threads - 1) / spmv_config.block_threads), spmv_device_occupancy); // Merge items per block int merge_items_per_block = (num_merge_items + spmv_grid_size - 1) / spmv_grid_size; // Get the temporary storage allocation requirements size_t allocation_sizes[1]; allocation_sizes[0] = spmv_grid_size * sizeof(KeyValuePairT); // bytes needed for block carry-out pairs // Alias the temporary allocations from the single storage blob (or compute the necessary size of the blob) void* allocations[1]; if (CubDebug(error = AliasTemporaries(d_temp_storage, temp_storage_bytes, allocations, allocation_sizes))) break; if (d_temp_storage == NULL) { // Return if the caller is simply requesting the size of the storage allocation return cudaSuccess; } KeyValuePairT* d_tile_carry_pairs = (KeyValuePairT*) allocations[0]; // Agent carry-out pairs // Log spmv_kernel configuration if (debug_synchronous) _CubLog("Invoking spmv_kernel<<<%d, %d, 0, %lld>>>(), %d items per thread, %d SM occupancy\n", spmv_grid_size, spmv_config.block_threads, (long long) stream, spmv_config.items_per_thread, spmv_sm_occupancy); // Invoke spmv_kernel spmv_kernel<<>>( spmv_params, merge_items_per_block, d_tile_carry_pairs); // Check for failure to launch if (CubDebug(error = cudaPeekAtLastError())) break; // Sync the stream if specified to flush runtime errors if (debug_synchronous && (CubDebug(error = SyncStream(stream)))) break; } while (0); return error; #endif // CUB_RUNTIME_ENABLED } /** * Internal dispatch routine for computing a device-wide reduction */ CUB_RUNTIME_FUNCTION __forceinline__ static cudaError_t Dispatch( void* d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t& temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation SpmvParamsT& spmv_params, ///< SpMV input parameter bundle cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. May cause significant slowdown. Default is \p false. { cudaError error = cudaSuccess; do { // Get PTX version int ptx_version; #if (CUB_PTX_ARCH == 0) if (CubDebug(error = PtxVersion(ptx_version))) break; #else ptx_version = CUB_PTX_ARCH; #endif // Get kernel kernel dispatch configurations KernelConfig spmv_config; InitConfigs(ptx_version, spmv_config); if (CubDebug(error = Dispatch( d_temp_storage, temp_storage_bytes, spmv_params, stream, debug_synchronous, DeviceSpmvKernel, spmv_config))) break; } while (0); return error; } }; } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/acc/cuda/cub/device/dispatch/dispatch_spmv_orig.cuh000066400000000000000000001047011411340063500254330ustar00rootroot00000000000000 /****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * cub::DeviceSpmv provides device-wide parallel operations for performing sparse-matrix * vector multiplication (SpMV). */ #pragma once #include #include #include "../../agent/single_pass_scan_operators.cuh" #include "../../agent/agent_segment_fixup.cuh" #include "../../agent/agent_spmv_orig.cuh" #include "../../util_type.cuh" #include "../../util_debug.cuh" #include "../../util_device.cuh" #include "../../thread/thread_search.cuh" #include "../../grid/grid_queue.cuh" #include "../../util_namespace.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /****************************************************************************** * SpMV kernel entry points *****************************************************************************/ /** * Spmv search kernel. Identifies merge path starting coordinates for each tile. */ template < typename AgentSpmvPolicyT, ///< Parameterized SpmvPolicy tuning policy type typename ValueT, ///< Matrix and vector value type typename OffsetT> ///< Signed integer type for sequence offsets __global__ void DeviceSpmv1ColKernel( SpmvParams spmv_params) ///< [in] SpMV input parameter bundle { typedef CacheModifiedInputIterator< AgentSpmvPolicyT::VECTOR_VALUES_LOAD_MODIFIER, ValueT, OffsetT> VectorValueIteratorT; VectorValueIteratorT wrapped_vector_x(spmv_params.d_vector_x); int row_idx = (blockIdx.x * blockDim.x) + threadIdx.x; if (row_idx < spmv_params.num_rows) { OffsetT end_nonzero_idx = spmv_params.d_row_end_offsets[row_idx]; OffsetT nonzero_idx = spmv_params.d_row_end_offsets[row_idx - 1]; ValueT value = 0.0; if (end_nonzero_idx != nonzero_idx) { value = spmv_params.d_values[nonzero_idx] * wrapped_vector_x[spmv_params.d_column_indices[nonzero_idx]]; } spmv_params.d_vector_y[row_idx] = value; } } /** * Spmv search kernel. Identifies merge path starting coordinates for each tile. */ template < typename SpmvPolicyT, ///< Parameterized SpmvPolicy tuning policy type typename OffsetT, ///< Signed integer type for sequence offsets typename CoordinateT, ///< Merge path coordinate type typename SpmvParamsT> ///< SpmvParams type __global__ void DeviceSpmvSearchKernel( int num_merge_tiles, ///< [in] Number of SpMV merge tiles (spmv grid size) CoordinateT* d_tile_coordinates, ///< [out] Pointer to the temporary array of tile starting coordinates SpmvParamsT spmv_params) ///< [in] SpMV input parameter bundle { /// Constants enum { BLOCK_THREADS = SpmvPolicyT::BLOCK_THREADS, ITEMS_PER_THREAD = SpmvPolicyT::ITEMS_PER_THREAD, TILE_ITEMS = BLOCK_THREADS * ITEMS_PER_THREAD, }; typedef CacheModifiedInputIterator< SpmvPolicyT::ROW_OFFSETS_SEARCH_LOAD_MODIFIER, OffsetT, OffsetT> RowOffsetsSearchIteratorT; // Find the starting coordinate for all tiles (plus the end coordinate of the last one) int tile_idx = (blockIdx.x * blockDim.x) + threadIdx.x; if (tile_idx < num_merge_tiles + 1) { OffsetT diagonal = (tile_idx * TILE_ITEMS); CoordinateT tile_coordinate; CountingInputIterator nonzero_indices(0); // Search the merge path MergePathSearch( diagonal, RowOffsetsSearchIteratorT(spmv_params.d_row_end_offsets), nonzero_indices, spmv_params.num_rows, spmv_params.num_nonzeros, tile_coordinate); // Output starting offset d_tile_coordinates[tile_idx] = tile_coordinate; } } /** * Spmv agent entry point */ template < typename SpmvPolicyT, ///< Parameterized SpmvPolicy tuning policy type typename ScanTileStateT, ///< Tile status interface type typename ValueT, ///< Matrix and vector value type typename OffsetT, ///< Signed integer type for sequence offsets typename CoordinateT, ///< Merge path coordinate type bool HAS_ALPHA, ///< Whether the input parameter Alpha is 1 bool HAS_BETA> ///< Whether the input parameter Beta is 0 __launch_bounds__ (int(SpmvPolicyT::BLOCK_THREADS)) __global__ void DeviceSpmvKernel( SpmvParams spmv_params, ///< [in] SpMV input parameter bundle CoordinateT* d_tile_coordinates, ///< [in] Pointer to the temporary array of tile starting coordinates KeyValuePair* d_tile_carry_pairs, ///< [out] Pointer to the temporary array carry-out dot product row-ids, one per block int num_tiles, ///< [in] Number of merge tiles ScanTileStateT tile_state, ///< [in] Tile status interface for fixup reduce-by-key kernel int num_segment_fixup_tiles) ///< [in] Number of reduce-by-key tiles (fixup grid size) { // Spmv agent type specialization typedef AgentSpmv< SpmvPolicyT, ValueT, OffsetT, HAS_ALPHA, HAS_BETA> AgentSpmvT; // Shared memory for AgentSpmv __shared__ typename AgentSpmvT::TempStorage temp_storage; AgentSpmvT(temp_storage, spmv_params).ConsumeTile( d_tile_coordinates, d_tile_carry_pairs, num_tiles); // Initialize fixup tile status tile_state.InitializeStatus(num_segment_fixup_tiles); } /** * Multi-block reduce-by-key sweep kernel entry point */ template < typename AgentSegmentFixupPolicyT, ///< Parameterized AgentSegmentFixupPolicy tuning policy type typename PairsInputIteratorT, ///< Random-access input iterator type for keys typename AggregatesOutputIteratorT, ///< Random-access output iterator type for values typename OffsetT, ///< Signed integer type for global offsets typename ScanTileStateT> ///< Tile status interface type __launch_bounds__ (int(AgentSegmentFixupPolicyT::BLOCK_THREADS)) __global__ void DeviceSegmentFixupKernel( PairsInputIteratorT d_pairs_in, ///< [in] Pointer to the array carry-out dot product row-ids, one per spmv block AggregatesOutputIteratorT d_aggregates_out, ///< [in,out] Output value aggregates OffsetT num_items, ///< [in] Total number of items to select from int num_tiles, ///< [in] Total number of tiles for the entire problem ScanTileStateT tile_state) ///< [in] Tile status interface { // Thread block type for reducing tiles of value segments typedef AgentSegmentFixup< AgentSegmentFixupPolicyT, PairsInputIteratorT, AggregatesOutputIteratorT, cub::Equality, cub::Sum, OffsetT> AgentSegmentFixupT; // Shared memory for AgentSegmentFixup __shared__ typename AgentSegmentFixupT::TempStorage temp_storage; // Process tiles AgentSegmentFixupT(temp_storage, d_pairs_in, d_aggregates_out, cub::Equality(), cub::Sum()).ConsumeRange( num_items, num_tiles, tile_state); } /****************************************************************************** * Dispatch ******************************************************************************/ /** * Utility class for dispatching the appropriately-tuned kernels for DeviceSpmv */ template < typename ValueT, ///< Matrix and vector value type typename OffsetT> ///< Signed integer type for global offsets struct DispatchSpmv { //--------------------------------------------------------------------- // Constants and Types //--------------------------------------------------------------------- enum { INIT_KERNEL_THREADS = 128 }; // SpmvParams bundle type typedef SpmvParams SpmvParamsT; // 2D merge path coordinate type typedef typename CubVector::Type CoordinateT; // Tile status descriptor interface type typedef ReduceByKeyScanTileState ScanTileStateT; // Tuple type for scanning (pairs accumulated segment-value with segment-index) typedef KeyValuePair KeyValuePairT; //--------------------------------------------------------------------- // Tuning policies //--------------------------------------------------------------------- /// SM11 struct Policy110 { typedef AgentSpmvPolicy< 128, 1, LOAD_DEFAULT, LOAD_DEFAULT, LOAD_DEFAULT, LOAD_DEFAULT, LOAD_DEFAULT, false, BLOCK_SCAN_WARP_SCANS> SpmvPolicyT; typedef AgentSegmentFixupPolicy< 128, 4, BLOCK_LOAD_VECTORIZE, LOAD_DEFAULT, BLOCK_SCAN_WARP_SCANS> SegmentFixupPolicyT; }; /// SM20 struct Policy200 { typedef AgentSpmvPolicy< 96, 18, LOAD_DEFAULT, LOAD_DEFAULT, LOAD_DEFAULT, LOAD_DEFAULT, LOAD_DEFAULT, false, BLOCK_SCAN_RAKING> SpmvPolicyT; typedef AgentSegmentFixupPolicy< 128, 4, BLOCK_LOAD_VECTORIZE, LOAD_DEFAULT, BLOCK_SCAN_WARP_SCANS> SegmentFixupPolicyT; }; /// SM30 struct Policy300 { typedef AgentSpmvPolicy< 96, 6, LOAD_DEFAULT, LOAD_DEFAULT, LOAD_DEFAULT, LOAD_DEFAULT, LOAD_DEFAULT, false, BLOCK_SCAN_WARP_SCANS> SpmvPolicyT; typedef AgentSegmentFixupPolicy< 128, 4, BLOCK_LOAD_VECTORIZE, LOAD_DEFAULT, BLOCK_SCAN_WARP_SCANS> SegmentFixupPolicyT; }; /// SM35 struct Policy350 { typedef AgentSpmvPolicy< (sizeof(ValueT) > 4) ? 96 : 128, (sizeof(ValueT) > 4) ? 4 : 7, LOAD_LDG, LOAD_CA, LOAD_LDG, LOAD_LDG, LOAD_LDG, (sizeof(ValueT) > 4) ? true : false, BLOCK_SCAN_WARP_SCANS> SpmvPolicyT; typedef AgentSegmentFixupPolicy< 128, 3, BLOCK_LOAD_VECTORIZE, LOAD_LDG, BLOCK_SCAN_WARP_SCANS> SegmentFixupPolicyT; }; /// SM37 struct Policy370 { typedef AgentSpmvPolicy< (sizeof(ValueT) > 4) ? 128 : 128, (sizeof(ValueT) > 4) ? 9 : 14, LOAD_LDG, LOAD_CA, LOAD_LDG, LOAD_LDG, LOAD_LDG, false, BLOCK_SCAN_WARP_SCANS> SpmvPolicyT; typedef AgentSegmentFixupPolicy< 128, 3, BLOCK_LOAD_VECTORIZE, LOAD_LDG, BLOCK_SCAN_WARP_SCANS> SegmentFixupPolicyT; }; /// SM50 struct Policy500 { typedef AgentSpmvPolicy< (sizeof(ValueT) > 4) ? 64 : 128, (sizeof(ValueT) > 4) ? 6 : 7, LOAD_LDG, LOAD_DEFAULT, (sizeof(ValueT) > 4) ? LOAD_LDG : LOAD_DEFAULT, (sizeof(ValueT) > 4) ? LOAD_LDG : LOAD_DEFAULT, LOAD_LDG, (sizeof(ValueT) > 4) ? true : false, (sizeof(ValueT) > 4) ? BLOCK_SCAN_WARP_SCANS : BLOCK_SCAN_RAKING_MEMOIZE> SpmvPolicyT; typedef AgentSegmentFixupPolicy< 128, 3, BLOCK_LOAD_VECTORIZE, LOAD_LDG, BLOCK_SCAN_RAKING_MEMOIZE> SegmentFixupPolicyT; }; //--------------------------------------------------------------------- // Tuning policies of current PTX compiler pass //--------------------------------------------------------------------- #if (CUB_PTX_ARCH >= 500) typedef Policy500 PtxPolicy; #elif (CUB_PTX_ARCH >= 370) typedef Policy370 PtxPolicy; #elif (CUB_PTX_ARCH >= 350) typedef Policy350 PtxPolicy; #elif (CUB_PTX_ARCH >= 300) typedef Policy300 PtxPolicy; #elif (CUB_PTX_ARCH >= 200) typedef Policy200 PtxPolicy; #else typedef Policy110 PtxPolicy; #endif // "Opaque" policies (whose parameterizations aren't reflected in the type signature) struct PtxSpmvPolicyT : PtxPolicy::SpmvPolicyT {}; struct PtxSegmentFixupPolicy : PtxPolicy::SegmentFixupPolicyT {}; //--------------------------------------------------------------------- // Utilities //--------------------------------------------------------------------- /** * Initialize kernel dispatch configurations with the policies corresponding to the PTX assembly we will use */ template CUB_RUNTIME_FUNCTION __forceinline__ static void InitConfigs( int ptx_version, KernelConfig &spmv_config, KernelConfig &segment_fixup_config) { #if (CUB_PTX_ARCH > 0) // We're on the device, so initialize the kernel dispatch configurations with the current PTX policy spmv_config.template Init(); segment_fixup_config.template Init(); #else // We're on the host, so lookup and initialize the kernel dispatch configurations with the policies that match the device's PTX version if (ptx_version >= 500) { spmv_config.template Init(); segment_fixup_config.template Init(); } else if (ptx_version >= 370) { spmv_config.template Init(); segment_fixup_config.template Init(); } else if (ptx_version >= 350) { spmv_config.template Init(); segment_fixup_config.template Init(); } else if (ptx_version >= 300) { spmv_config.template Init(); segment_fixup_config.template Init(); } else if (ptx_version >= 200) { spmv_config.template Init(); segment_fixup_config.template Init(); } else { spmv_config.template Init(); segment_fixup_config.template Init(); } #endif } /** * Kernel kernel dispatch configuration. */ struct KernelConfig { int block_threads; int items_per_thread; int tile_items; template CUB_RUNTIME_FUNCTION __forceinline__ void Init() { block_threads = PolicyT::BLOCK_THREADS; items_per_thread = PolicyT::ITEMS_PER_THREAD; tile_items = block_threads * items_per_thread; } }; //--------------------------------------------------------------------- // Dispatch entrypoints //--------------------------------------------------------------------- /** * Internal dispatch routine for computing a device-wide reduction using the * specified kernel functions. * * If the input is larger than a single tile, this method uses two-passes of * kernel invocations. */ template < typename Spmv1ColKernelT, ///< Function type of cub::DeviceSpmv1ColKernel typename SpmvSearchKernelT, ///< Function type of cub::AgentSpmvSearchKernel typename SpmvKernelT, ///< Function type of cub::AgentSpmvKernel typename SegmentFixupKernelT> ///< Function type of cub::DeviceSegmentFixupKernelT CUB_RUNTIME_FUNCTION __forceinline__ static cudaError_t Dispatch( void* d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t& temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation SpmvParamsT& spmv_params, ///< SpMV input parameter bundle cudaStream_t stream, ///< [in] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous, ///< [in] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is \p false. Spmv1ColKernelT spmv_1col_kernel, ///< [in] Kernel function pointer to parameterization of DeviceSpmv1ColKernel SpmvSearchKernelT spmv_search_kernel, ///< [in] Kernel function pointer to parameterization of AgentSpmvSearchKernel SpmvKernelT spmv_kernel, ///< [in] Kernel function pointer to parameterization of AgentSpmvKernel SegmentFixupKernelT segment_fixup_kernel, ///< [in] Kernel function pointer to parameterization of cub::DeviceSegmentFixupKernel KernelConfig spmv_config, ///< [in] Dispatch parameters that match the policy that \p spmv_kernel was compiled for KernelConfig segment_fixup_config) ///< [in] Dispatch parameters that match the policy that \p segment_fixup_kernel was compiled for { #ifndef CUB_RUNTIME_ENABLED // Kernel launch not supported from this device return CubDebug(cudaErrorNotSupported ); #else cudaError error = cudaSuccess; do { if (spmv_params.num_cols == 1) { if (d_temp_storage == NULL) { // Return if the caller is simply requesting the size of the storage allocation temp_storage_bytes = 1; break; } // Get search/init grid dims int degen_col_kernel_block_size = INIT_KERNEL_THREADS; int degen_col_kernel_grid_size = (spmv_params.num_rows + degen_col_kernel_block_size - 1) / degen_col_kernel_block_size; if (debug_synchronous) _CubLog("Invoking spmv_1col_kernel<<<%d, %d, 0, %lld>>>()\n", degen_col_kernel_grid_size, degen_col_kernel_block_size, (long long) stream); // Invoke spmv_search_kernel spmv_1col_kernel<<>>( spmv_params); // Check for failure to launch if (CubDebug(error = cudaPeekAtLastError())) break; // Sync the stream if specified to flush runtime errors if (debug_synchronous && (CubDebug(error = SyncStream(stream)))) break; break; } // Get device ordinal int device_ordinal; if (CubDebug(error = cudaGetDevice(&device_ordinal))) break; // Get SM count int sm_count; if (CubDebug(error = cudaDeviceGetAttribute (&sm_count, cudaDevAttrMultiProcessorCount, device_ordinal))) break; // Get max x-dimension of grid int max_dim_x; if (CubDebug(error = cudaDeviceGetAttribute(&max_dim_x, cudaDevAttrMaxGridDimX, device_ordinal))) break;; // Total number of spmv work items int num_merge_items = spmv_params.num_rows + spmv_params.num_nonzeros; // Tile sizes of kernels int merge_tile_size = spmv_config.block_threads * spmv_config.items_per_thread; int segment_fixup_tile_size = segment_fixup_config.block_threads * segment_fixup_config.items_per_thread; // Number of tiles for kernels unsigned int num_merge_tiles = (num_merge_items + merge_tile_size - 1) / merge_tile_size; unsigned int num_segment_fixup_tiles = (num_merge_tiles + segment_fixup_tile_size - 1) / segment_fixup_tile_size; // Get SM occupancy for kernels int spmv_sm_occupancy; if (CubDebug(error = MaxSmOccupancy( spmv_sm_occupancy, spmv_kernel, spmv_config.block_threads))) break; int segment_fixup_sm_occupancy; if (CubDebug(error = MaxSmOccupancy( segment_fixup_sm_occupancy, segment_fixup_kernel, segment_fixup_config.block_threads))) break; // Get grid dimensions dim3 spmv_grid_size( CUB_MIN(num_merge_tiles, max_dim_x), (num_merge_tiles + max_dim_x - 1) / max_dim_x, 1); dim3 segment_fixup_grid_size( CUB_MIN(num_segment_fixup_tiles, max_dim_x), (num_segment_fixup_tiles + max_dim_x - 1) / max_dim_x, 1); // Get the temporary storage allocation requirements size_t allocation_sizes[3]; if (CubDebug(error = ScanTileStateT::AllocationSize(num_segment_fixup_tiles, allocation_sizes[0]))) break; // bytes needed for reduce-by-key tile status descriptors allocation_sizes[1] = num_merge_tiles * sizeof(KeyValuePairT); // bytes needed for block carry-out pairs allocation_sizes[2] = (num_merge_tiles + 1) * sizeof(CoordinateT); // bytes needed for tile starting coordinates // Alias the temporary allocations from the single storage blob (or compute the necessary size of the blob) void* allocations[3]; if (CubDebug(error = AliasTemporaries(d_temp_storage, temp_storage_bytes, allocations, allocation_sizes))) break; if (d_temp_storage == NULL) { // Return if the caller is simply requesting the size of the storage allocation break; } // Construct the tile status interface ScanTileStateT tile_state; if (CubDebug(error = tile_state.Init(num_segment_fixup_tiles, allocations[0], allocation_sizes[0]))) break; // Alias the other allocations KeyValuePairT* d_tile_carry_pairs = (KeyValuePairT*) allocations[1]; // Agent carry-out pairs CoordinateT* d_tile_coordinates = (CoordinateT*) allocations[2]; // Agent starting coordinates // Get search/init grid dims int search_block_size = INIT_KERNEL_THREADS; int search_grid_size = (num_merge_tiles + 1 + search_block_size - 1) / search_block_size; #if (CUB_PTX_ARCH == 0) // Init textures if (CubDebug(error = spmv_params.t_vector_x.BindTexture(spmv_params.d_vector_x))) break; #endif if (search_grid_size < sm_count) // if (num_merge_tiles < spmv_sm_occupancy * sm_count) { // Not enough spmv tiles to saturate the device: have spmv blocks search their own staring coords d_tile_coordinates = NULL; } else { // Use separate search kernel if we have enough spmv tiles to saturate the device // Log spmv_search_kernel configuration if (debug_synchronous) _CubLog("Invoking spmv_search_kernel<<<%d, %d, 0, %lld>>>()\n", search_grid_size, search_block_size, (long long) stream); // Invoke spmv_search_kernel spmv_search_kernel<<>>( num_merge_tiles, d_tile_coordinates, spmv_params); // Check for failure to launch if (CubDebug(error = cudaPeekAtLastError())) break; // Sync the stream if specified to flush runtime errors if (debug_synchronous && (CubDebug(error = SyncStream(stream)))) break; } // Log spmv_kernel configuration if (debug_synchronous) _CubLog("Invoking spmv_kernel<<<{%d,%d,%d}, %d, 0, %lld>>>(), %d items per thread, %d SM occupancy\n", spmv_grid_size.x, spmv_grid_size.y, spmv_grid_size.z, spmv_config.block_threads, (long long) stream, spmv_config.items_per_thread, spmv_sm_occupancy); // Invoke spmv_kernel spmv_kernel<<>>( spmv_params, d_tile_coordinates, d_tile_carry_pairs, num_merge_tiles, tile_state, num_segment_fixup_tiles); // Check for failure to launch if (CubDebug(error = cudaPeekAtLastError())) break; // Sync the stream if specified to flush runtime errors if (debug_synchronous && (CubDebug(error = SyncStream(stream)))) break; // Run reduce-by-key fixup if necessary if (num_merge_tiles > 1) { // Log segment_fixup_kernel configuration if (debug_synchronous) _CubLog("Invoking segment_fixup_kernel<<<{%d,%d,%d}, %d, 0, %lld>>>(), %d items per thread, %d SM occupancy\n", segment_fixup_grid_size.x, segment_fixup_grid_size.y, segment_fixup_grid_size.z, segment_fixup_config.block_threads, (long long) stream, segment_fixup_config.items_per_thread, segment_fixup_sm_occupancy); // Invoke segment_fixup_kernel segment_fixup_kernel<<>>( d_tile_carry_pairs, spmv_params.d_vector_y, num_merge_tiles, num_segment_fixup_tiles, tile_state); // Check for failure to launch if (CubDebug(error = cudaPeekAtLastError())) break; // Sync the stream if specified to flush runtime errors if (debug_synchronous && (CubDebug(error = SyncStream(stream)))) break; } #if (CUB_PTX_ARCH == 0) // Free textures if (CubDebug(error = spmv_params.t_vector_x.UnbindTexture())) break; #endif } while (0); return error; #endif // CUB_RUNTIME_ENABLED } /** * Internal dispatch routine for computing a device-wide reduction */ CUB_RUNTIME_FUNCTION __forceinline__ static cudaError_t Dispatch( void* d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t& temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation SpmvParamsT& spmv_params, ///< SpMV input parameter bundle cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. May cause significant slowdown. Default is \p false. { cudaError error = cudaSuccess; do { // Get PTX version int ptx_version; #if (CUB_PTX_ARCH == 0) if (CubDebug(error = PtxVersion(ptx_version))) break; #else ptx_version = CUB_PTX_ARCH; #endif // Get kernel kernel dispatch configurations KernelConfig spmv_config, segment_fixup_config; InitConfigs(ptx_version, spmv_config, segment_fixup_config); if (CubDebug(error = Dispatch( d_temp_storage, temp_storage_bytes, spmv_params, stream, debug_synchronous, DeviceSpmv1ColKernel, DeviceSpmvSearchKernel, DeviceSpmvKernel, DeviceSegmentFixupKernel, spmv_config, segment_fixup_config))) break; /* // Dispatch if (spmv_params.beta == 0.0) { if (spmv_params.alpha == 1.0) { // Dispatch y = A*x if (CubDebug(error = Dispatch( d_temp_storage, temp_storage_bytes, spmv_params, stream, debug_synchronous, DeviceSpmv1ColKernel, DeviceSpmvSearchKernel, DeviceSpmvKernel, DeviceSegmentFixupKernel, spmv_config, segment_fixup_config))) break; } else { // Dispatch y = alpha*A*x if (CubDebug(error = Dispatch( d_temp_storage, temp_storage_bytes, spmv_params, stream, debug_synchronous, DeviceSpmvSearchKernel, DeviceSpmvKernel, DeviceSegmentFixupKernel, spmv_config, segment_fixup_config))) break; } } else { if (spmv_params.alpha == 1.0) { // Dispatch y = A*x + beta*y if (CubDebug(error = Dispatch( d_temp_storage, temp_storage_bytes, spmv_params, stream, debug_synchronous, DeviceSpmvSearchKernel, DeviceSpmvKernel, DeviceSegmentFixupKernel, spmv_config, segment_fixup_config))) break; } else { // Dispatch y = alpha*A*x + beta*y if (CubDebug(error = Dispatch( d_temp_storage, temp_storage_bytes, spmv_params, stream, debug_synchronous, DeviceSpmvSearchKernel, DeviceSpmvKernel, DeviceSegmentFixupKernel, spmv_config, segment_fixup_config))) break; } } */ } while (0); return error; } }; } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/acc/cuda/cub/device/dispatch/dispatch_spmv_row_based.cuh000066400000000000000000001060521411340063500264410ustar00rootroot00000000000000 /****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2016, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * cub::DeviceSpmv provides device-wide parallel operations for performing sparse-matrix * vector multiplication (SpMV). */ #pragma once #include #include #include "../../agent/single_pass_scan_operators.cuh" #include "../../agent/agent_segment_fixup.cuh" #include "../../agent/agent_spmv_row_based.cuh" #include "../../util_type.cuh" #include "../../util_debug.cuh" #include "../../util_device.cuh" #include "../../thread/thread_search.cuh" #include "../../grid/grid_queue.cuh" #include "../../util_namespace.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /****************************************************************************** * SpMV kernel entry points *****************************************************************************/ /** * Spmv search kernel. Identifies merge path starting coordinates for each tile. */ template < typename AgentSpmvPolicyT, ///< Parameterized SpmvPolicy tuning policy type typename ValueT, ///< Matrix and vector value type typename OffsetT> ///< Signed integer type for sequence offsets __global__ void DeviceSpmv1ColKernel( SpmvParams spmv_params) ///< [in] SpMV input parameter bundle { typedef CacheModifiedInputIterator< AgentSpmvPolicyT::VECTOR_VALUES_LOAD_MODIFIER, ValueT, OffsetT> VectorValueIteratorT; VectorValueIteratorT wrapped_vector_x(spmv_params.d_vector_x); int row_idx = (blockIdx.x * blockDim.x) + threadIdx.x; if (row_idx < spmv_params.num_rows) { OffsetT end_nonzero_idx = spmv_params.d_row_end_offsets[row_idx]; OffsetT nonzero_idx = spmv_params.d_row_end_offsets[row_idx - 1]; ValueT value = 0.0; if (end_nonzero_idx != nonzero_idx) { value = spmv_params.d_values[nonzero_idx] * wrapped_vector_x[spmv_params.d_column_indices[nonzero_idx]]; } spmv_params.d_vector_y[row_idx] = value; } } /** * Spmv search kernel. Identifies merge path starting coordinates for each tile. */ template < typename SpmvPolicyT, ///< Parameterized SpmvPolicy tuning policy type typename OffsetT, ///< Signed integer type for sequence offsets typename CoordinateT, ///< Merge path coordinate type typename SpmvParamsT> ///< SpmvParams type __global__ void DeviceSpmvSearchKernel( int num_spmv_tiles, ///< [in] Number of SpMV merge tiles (spmv grid size) CoordinateT* d_tile_coordinates, ///< [out] Pointer to the temporary array of tile starting coordinates SpmvParamsT spmv_params) ///< [in] SpMV input parameter bundle { /// Constants enum { BLOCK_THREADS = SpmvPolicyT::BLOCK_THREADS, ITEMS_PER_THREAD = SpmvPolicyT::ITEMS_PER_THREAD, TILE_ITEMS = BLOCK_THREADS * ITEMS_PER_THREAD, }; typedef CacheModifiedInputIterator< SpmvPolicyT::ROW_OFFSETS_SEARCH_LOAD_MODIFIER, OffsetT, OffsetT> RowOffsetsSearchIteratorT; // Find the starting coordinate for all tiles (plus the end coordinate of the last one) int tile_idx = (blockIdx.x * blockDim.x) + threadIdx.x; if (tile_idx < num_spmv_tiles + 1) { OffsetT diagonal = (tile_idx * TILE_ITEMS); CoordinateT tile_coordinate; CountingInputIterator nonzero_indices(0); // Search the merge path MergePathSearch( diagonal, RowOffsetsSearchIteratorT(spmv_params.d_row_end_offsets), nonzero_indices, spmv_params.num_rows, spmv_params.num_nonzeros, tile_coordinate); // Output starting offset d_tile_coordinates[tile_idx] = tile_coordinate; } } /** * Spmv agent entry point */ template < typename SpmvPolicyT, ///< Parameterized SpmvPolicy tuning policy type typename ScanTileStateT, ///< Tile status interface type typename ValueT, ///< Matrix and vector value type typename OffsetT, ///< Signed integer type for sequence offsets typename CoordinateT, ///< Merge path coordinate type bool HAS_ALPHA, ///< Whether the input parameter Alpha is 1 bool HAS_BETA> ///< Whether the input parameter Beta is 0 __launch_bounds__ (int(SpmvPolicyT::BLOCK_THREADS)) __global__ void DeviceSpmvKernel( SpmvParams spmv_params, ///< [in] SpMV input parameter bundle // CoordinateT* d_tile_coordinates, ///< [in] Pointer to the temporary array of tile starting coordinates // KeyValuePair* d_tile_carry_pairs, ///< [out] Pointer to the temporary array carry-out dot product row-ids, one per block // int num_tiles, ///< [in] Number of merge tiles // ScanTileStateT tile_state, ///< [in] Tile status interface for fixup reduce-by-key kernel // int num_fixup_tiles, ///< [in] Number of reduce-by-key tiles (fixup grid size) int rows_per_tile) ///< [in] Number of rows per tile { // Spmv agent type specialization typedef AgentSpmv< SpmvPolicyT, ValueT, OffsetT, HAS_ALPHA, HAS_BETA> AgentSpmvT; // Shared memory for AgentSpmv __shared__ typename AgentSpmvT::TempStorage temp_storage; AgentSpmvT(temp_storage, spmv_params).ConsumeTile( blockIdx.x, rows_per_tile); /* AgentSpmvT(temp_storage, spmv_params).ConsumeTile( d_tile_coordinates, d_tile_carry_pairs, num_tiles); // Initialize fixup tile status tile_state.InitializeStatus(num_fixup_tiles); */ } /** * Multi-block reduce-by-key sweep kernel entry point */ template < typename AgentSegmentFixupPolicyT, ///< Parameterized AgentSegmentFixupPolicy tuning policy type typename PairsInputIteratorT, ///< Random-access input iterator type for keys typename AggregatesOutputIteratorT, ///< Random-access output iterator type for values typename OffsetT, ///< Signed integer type for global offsets typename ScanTileStateT> ///< Tile status interface type __launch_bounds__ (int(AgentSegmentFixupPolicyT::BLOCK_THREADS)) __global__ void DeviceSegmentFixupKernel( PairsInputIteratorT d_pairs_in, ///< [in] Pointer to the array carry-out dot product row-ids, one per spmv block AggregatesOutputIteratorT d_aggregates_out, ///< [in,out] Output value aggregates OffsetT num_items, ///< [in] Total number of items to select from int num_tiles, ///< [in] Total number of tiles for the entire problem ScanTileStateT tile_state) ///< [in] Tile status interface { // Thread block type for reducing tiles of value segments typedef AgentSegmentFixup< AgentSegmentFixupPolicyT, PairsInputIteratorT, AggregatesOutputIteratorT, cub::Equality, cub::Sum, OffsetT> AgentSegmentFixupT; // Shared memory for AgentSegmentFixup __shared__ typename AgentSegmentFixupT::TempStorage temp_storage; // Process tiles AgentSegmentFixupT(temp_storage, d_pairs_in, d_aggregates_out, cub::Equality(), cub::Sum()).ConsumeRange( num_items, num_tiles, tile_state); } /****************************************************************************** * Dispatch ******************************************************************************/ /** * Utility class for dispatching the appropriately-tuned kernels for DeviceSpmv */ template < typename ValueT, ///< Matrix and vector value type typename OffsetT> ///< Signed integer type for global offsets struct DispatchSpmv { //--------------------------------------------------------------------- // Constants and Types //--------------------------------------------------------------------- enum { INIT_KERNEL_THREADS = 128 }; // SpmvParams bundle type typedef SpmvParams SpmvParamsT; // 2D merge path coordinate type typedef typename CubVector::Type CoordinateT; // Tile status descriptor interface type typedef ReduceByKeyScanTileState ScanTileStateT; // Tuple type for scanning (pairs accumulated segment-value with segment-index) typedef KeyValuePair KeyValuePairT; //--------------------------------------------------------------------- // Tuning policies //--------------------------------------------------------------------- /// SM11 struct Policy110 { typedef AgentSpmvPolicy< 128, 1, LOAD_DEFAULT, LOAD_DEFAULT, LOAD_DEFAULT, LOAD_DEFAULT, LOAD_DEFAULT, false, BLOCK_SCAN_WARP_SCANS> SpmvPolicyT; typedef AgentSegmentFixupPolicy< 128, 4, BLOCK_LOAD_VECTORIZE, LOAD_DEFAULT, BLOCK_SCAN_WARP_SCANS> SegmentFixupPolicyT; }; /// SM20 struct Policy200 { typedef AgentSpmvPolicy< 96, 18, LOAD_DEFAULT, LOAD_DEFAULT, LOAD_DEFAULT, LOAD_DEFAULT, LOAD_DEFAULT, false, BLOCK_SCAN_RAKING> SpmvPolicyT; typedef AgentSegmentFixupPolicy< 128, 4, BLOCK_LOAD_VECTORIZE, LOAD_DEFAULT, BLOCK_SCAN_WARP_SCANS> SegmentFixupPolicyT; }; /// SM30 struct Policy300 { typedef AgentSpmvPolicy< 96, 6, LOAD_DEFAULT, LOAD_DEFAULT, LOAD_DEFAULT, LOAD_DEFAULT, LOAD_DEFAULT, false, BLOCK_SCAN_WARP_SCANS> SpmvPolicyT; typedef AgentSegmentFixupPolicy< 128, 4, BLOCK_LOAD_VECTORIZE, LOAD_DEFAULT, BLOCK_SCAN_WARP_SCANS> SegmentFixupPolicyT; }; /// SM35 struct Policy350 { typedef AgentSpmvPolicy< (sizeof(ValueT) > 4) ? 64 : 128, (sizeof(ValueT) > 4) ? 7 : 7, LOAD_LDG, LOAD_LDG, LOAD_LDG, LOAD_LDG, LOAD_LDG, false, BLOCK_SCAN_WARP_SCANS> SpmvPolicyT; typedef AgentSegmentFixupPolicy< 128, 3, BLOCK_LOAD_VECTORIZE, LOAD_LDG, BLOCK_SCAN_WARP_SCANS> SegmentFixupPolicyT; }; /// SM37 struct Policy370 { typedef AgentSpmvPolicy< (sizeof(ValueT) > 4) ? 128 : 128, (sizeof(ValueT) > 4) ? 7 : 7, LOAD_LDG, LOAD_CA, LOAD_LDG, LOAD_LDG, LOAD_LDG, false, BLOCK_SCAN_WARP_SCANS> SpmvPolicyT; typedef AgentSegmentFixupPolicy< 128, 3, BLOCK_LOAD_VECTORIZE, LOAD_LDG, BLOCK_SCAN_WARP_SCANS> SegmentFixupPolicyT; }; /// SM50 struct Policy500 { typedef AgentSpmvPolicy< (sizeof(ValueT) > 4) ? 64 : 64, 7, LOAD_DEFAULT, LOAD_CA, LOAD_DEFAULT, LOAD_DEFAULT, LOAD_LDG, false, BLOCK_SCAN_RAKING_MEMOIZE> SpmvPolicyT; typedef AgentSegmentFixupPolicy< 128, 3, BLOCK_LOAD_VECTORIZE, LOAD_LDG, BLOCK_SCAN_RAKING_MEMOIZE> SegmentFixupPolicyT; }; //--------------------------------------------------------------------- // Tuning policies of current PTX compiler pass //--------------------------------------------------------------------- #if (CUB_PTX_ARCH >= 500) typedef Policy500 PtxPolicy; #elif (CUB_PTX_ARCH >= 370) typedef Policy370 PtxPolicy; #elif (CUB_PTX_ARCH >= 350) typedef Policy350 PtxPolicy; #elif (CUB_PTX_ARCH >= 300) typedef Policy300 PtxPolicy; #elif (CUB_PTX_ARCH >= 200) typedef Policy200 PtxPolicy; #else typedef Policy110 PtxPolicy; #endif // "Opaque" policies (whose parameterizations aren't reflected in the type signature) struct PtxSpmvPolicyT : PtxPolicy::SpmvPolicyT {}; struct PtxSegmentFixupPolicy : PtxPolicy::SegmentFixupPolicyT {}; //--------------------------------------------------------------------- // Utilities //--------------------------------------------------------------------- /** * Initialize kernel dispatch configurations with the policies corresponding to the PTX assembly we will use */ template CUB_RUNTIME_FUNCTION __forceinline__ static void InitConfigs( int ptx_version, KernelConfig &spmv_config, KernelConfig &fixup_config) { #if (CUB_PTX_ARCH > 0) // We're on the device, so initialize the kernel dispatch configurations with the current PTX policy spmv_config.template Init(); fixup_config.template Init(); #else // We're on the host, so lookup and initialize the kernel dispatch configurations with the policies that match the device's PTX version if (ptx_version >= 500) { spmv_config.template Init(); fixup_config.template Init(); } else if (ptx_version >= 370) { spmv_config.template Init(); fixup_config.template Init(); } else if (ptx_version >= 350) { spmv_config.template Init(); fixup_config.template Init(); } else if (ptx_version >= 300) { spmv_config.template Init(); fixup_config.template Init(); } else if (ptx_version >= 200) { spmv_config.template Init(); fixup_config.template Init(); } else { spmv_config.template Init(); fixup_config.template Init(); } #endif } /** * Kernel kernel dispatch configuration. */ struct KernelConfig { int block_threads; int items_per_thread; int tile_items; template CUB_RUNTIME_FUNCTION __forceinline__ void Init() { block_threads = PolicyT::BLOCK_THREADS; items_per_thread = PolicyT::ITEMS_PER_THREAD; tile_items = block_threads * items_per_thread; } }; //--------------------------------------------------------------------- // Dispatch entrypoints //--------------------------------------------------------------------- /** * Internal dispatch routine for computing a device-wide reduction using the * specified kernel functions. * * If the input is larger than a single tile, this method uses two-passes of * kernel invocations. */ template < // typename Spmv1ColKernelT, ///< Function type of cub::DeviceSpmv1ColKernel // typename SpmvSearchKernelT, ///< Function type of cub::AgentSpmvSearchKernel typename SpmvKernelT> ///< Function type of cub::AgentSpmvKernel // typename SegmentFixupKernelT> ///< Function type of cub::DeviceSegmentFixupKernelT CUB_RUNTIME_FUNCTION __forceinline__ static cudaError_t Dispatch( void* d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t& temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation SpmvParamsT& spmv_params, ///< SpMV input parameter bundle cudaStream_t stream, ///< [in] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous, ///< [in] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is \p false. // Spmv1ColKernelT spmv_1col_kernel, ///< [in] Kernel function pointer to parameterization of DeviceSpmv1ColKernel // SpmvSearchKernelT spmv_search_kernel, ///< [in] Kernel function pointer to parameterization of AgentSpmvSearchKernel SpmvKernelT spmv_kernel, ///< [in] Kernel function pointer to parameterization of AgentSpmvKernel // SegmentFixupKernelT fixup_kernel, ///< [in] Kernel function pointer to parameterization of cub::DeviceSegmentFixupKernel KernelConfig spmv_config, ///< [in] Dispatch parameters that match the policy that \p spmv_kernel was compiled for KernelConfig fixup_config) ///< [in] Dispatch parameters that match the policy that \p fixup_kernel was compiled for { #ifndef CUB_RUNTIME_ENABLED // Kernel launch not supported from this device return CubDebug(cudaErrorNotSupported ); #else cudaError error = cudaSuccess; do { /* if (spmv_params.num_cols == 1) { if (d_temp_storage == NULL) { // Return if the caller is simply requesting the size of the storage allocation temp_storage_bytes = 1; return cudaSuccess; } // Get search/init grid dims int degen_col_kernel_block_size = INIT_KERNEL_THREADS; int degen_col_kernel_grid_size = (spmv_params.num_rows + degen_col_kernel_block_size - 1) / degen_col_kernel_block_size; if (debug_synchronous) _CubLog("Invoking spmv_1col_kernel<<<%d, %d, 0, %lld>>>()\n", degen_col_kernel_grid_size, degen_col_kernel_block_size, (long long) stream); // Invoke spmv_search_kernel spmv_1col_kernel<<>>( spmv_params); // Check for failure to launch if (CubDebug(error = cudaPeekAtLastError())) break; // Sync the stream if specified to flush runtime errors if (debug_synchronous && (CubDebug(error = SyncStream(stream)))) break; break; } */ // Get device ordinal int device_ordinal; if (CubDebug(error = cudaGetDevice(&device_ordinal))) break; // Get SM count int sm_count; if (CubDebug(error = cudaDeviceGetAttribute (&sm_count, cudaDevAttrMultiProcessorCount, device_ordinal))) break; // Get max x-dimension of grid int max_dim_x; if (CubDebug(error = cudaDeviceGetAttribute(&max_dim_x, cudaDevAttrMaxGridDimX, device_ordinal))) break;; // Get SM occupancy for kernels int spmv_sm_occupancy; if (CubDebug(error = MaxSmOccupancy( spmv_sm_occupancy, spmv_kernel, spmv_config.block_threads))) break; // Tile sizes of kernels int spmv_tile_size = spmv_config.block_threads * spmv_config.items_per_thread; int fixup_tile_size = fixup_config.block_threads * fixup_config.items_per_thread; unsigned int rows_per_tile = spmv_config.block_threads; if (spmv_params.num_rows < rows_per_tile * spmv_sm_occupancy * sm_count * 8) { // Decrease rows per tile if needed to accomodate high expansion factor unsigned int expansion_factor = (spmv_params.num_nonzeros) / spmv_params.num_rows; if ((expansion_factor > 0) && (expansion_factor > spmv_config.items_per_thread)) rows_per_tile = (spmv_tile_size) / expansion_factor; // Decrease rows per tile if needed to accomodate minimum parallelism unsigned int spmv_device_occupancy = sm_count * 2; // unsigned int spmv_device_occupancy = sm_count * ((spmv_sm_occupancy + 1) / 2); if (spmv_params.num_rows < spmv_device_occupancy * rows_per_tile) rows_per_tile = (spmv_params.num_rows) / spmv_device_occupancy; } rows_per_tile = CUB_MAX(rows_per_tile, 2); if (debug_synchronous) _CubLog("Rows per tile: %d\n", rows_per_tile); // Number of tiles for kernels unsigned int num_spmv_tiles = (spmv_params.num_rows + rows_per_tile - 1) / rows_per_tile; // unsigned int num_fixup_tiles = (num_spmv_tiles + fixup_tile_size - 1) / fixup_tile_size; // Get grid dimensions dim3 spmv_grid_size( CUB_MIN(num_spmv_tiles, max_dim_x), (num_spmv_tiles + max_dim_x - 1) / max_dim_x, 1); /* dim3 spmv_grid_size( CUB_MIN(num_spmv_tiles, max_dim_x), (num_spmv_tiles + max_dim_x - 1) / max_dim_x, 1); dim3 fixup_grid_size( CUB_MIN(num_fixup_tiles, max_dim_x), (num_fixup_tiles + max_dim_x - 1) / max_dim_x, 1); */ // Get the temporary storage allocation requirements size_t allocation_sizes[3]; // if (CubDebug(error = ScanTileStateT::AllocationSize(num_fixup_tiles, allocation_sizes[0]))) break; // bytes needed for reduce-by-key tile status descriptors allocation_sizes[0] = 0; allocation_sizes[1] = num_spmv_tiles * sizeof(KeyValuePairT); // bytes needed for block carry-out pairs allocation_sizes[2] = (num_spmv_tiles + 1) * sizeof(CoordinateT); // bytes needed for tile starting coordinates // Alias the temporary allocations from the single storage blob (or compute the necessary size of the blob) void* allocations[3]; if (CubDebug(error = AliasTemporaries(d_temp_storage, temp_storage_bytes, allocations, allocation_sizes))) break; if (d_temp_storage == NULL) { // Return if the caller is simply requesting the size of the storage allocation return cudaSuccess; } // Construct the tile status interface /* ScanTileStateT tile_state; if (CubDebug(error = tile_state.Init(num_fixup_tiles, allocations[0], allocation_sizes[0]))) break; */ // Alias the other allocations KeyValuePairT* d_tile_carry_pairs = (KeyValuePairT*) allocations[1]; // Agent carry-out pairs CoordinateT* d_tile_coordinates = (CoordinateT*) allocations[2]; // Agent starting coordinates // Get search/init grid dims int search_block_size = INIT_KERNEL_THREADS; int search_grid_size = (num_spmv_tiles + 1 + search_block_size - 1) / search_block_size; #if (CUB_PTX_ARCH == 0) // Init textures // if (CubDebug(error = spmv_params.t_vector_x.BindTexture(spmv_params.d_vector_x))) break; #endif /* if (search_grid_size < sm_count) { // Not enough spmv tiles to saturate the device: have spmv blocks search their own staring coords d_tile_coordinates = NULL; } else { // Use separate search kernel if we have enough spmv tiles to saturate the device // Log spmv_search_kernel configuration if (debug_synchronous) _CubLog("Invoking spmv_search_kernel<<<%d, %d, 0, %lld>>>()\n", search_grid_size, search_block_size, (long long) stream); // Invoke spmv_search_kernel spmv_search_kernel<<>>( num_spmv_tiles, d_tile_coordinates, spmv_params); // Check for failure to launch if (CubDebug(error = cudaPeekAtLastError())) break; // Sync the stream if specified to flush runtime errors if (debug_synchronous && (CubDebug(error = SyncStream(stream)))) break; } */ // Log spmv_kernel configuration if (debug_synchronous) _CubLog("Invoking spmv_kernel<<<{%d,%d,%d}, %d, 0, %lld>>>(), %d items per thread, %d SM occupancy\n", spmv_grid_size.x, spmv_grid_size.y, spmv_grid_size.z, spmv_config.block_threads, (long long) stream, spmv_config.items_per_thread, spmv_sm_occupancy); // Invoke spmv_kernel spmv_kernel<<>>( spmv_params, // d_tile_coordinates, // d_tile_carry_pairs, // num_spmv_tiles, // tile_state, // num_fixup_tiles, rows_per_tile); // Check for failure to launch if (CubDebug(error = cudaPeekAtLastError())) break; // Sync the stream if specified to flush runtime errors if (debug_synchronous && (CubDebug(error = SyncStream(stream)))) break; /* // Run reduce-by-key fixup if necessary if (num_spmv_tiles > 1) { // Log fixup_kernel configuration if (debug_synchronous) _CubLog("Invoking fixup_kernel<<<{%d,%d,%d}, %d, 0, %lld>>>(), %d items per thread, %d SM occupancy\n", fixup_grid_size.x, fixup_grid_size.y, fixup_grid_size.z, fixup_config.block_threads, (long long) stream, fixup_config.items_per_thread, fixup_sm_occupancy); // Invoke fixup_kernel fixup_kernel<<>>( d_tile_carry_pairs, spmv_params.d_vector_y, num_spmv_tiles, num_fixup_tiles, tile_state); // Check for failure to launch if (CubDebug(error = cudaPeekAtLastError())) break; // Sync the stream if specified to flush runtime errors if (debug_synchronous && (CubDebug(error = SyncStream(stream)))) break; } */ #if (CUB_PTX_ARCH == 0) // Free textures // if (CubDebug(error = spmv_params.t_vector_x.UnbindTexture())) break; #endif } while (0); return error; #endif // CUB_RUNTIME_ENABLED } /** * Internal dispatch routine for computing a device-wide reduction */ CUB_RUNTIME_FUNCTION __forceinline__ static cudaError_t Dispatch( void* d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t& temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation SpmvParamsT& spmv_params, ///< SpMV input parameter bundle cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. May cause significant slowdown. Default is \p false. { cudaError error = cudaSuccess; do { // Get PTX version int ptx_version; #if (CUB_PTX_ARCH == 0) if (CubDebug(error = PtxVersion(ptx_version))) break; #else ptx_version = CUB_PTX_ARCH; #endif // Get kernel kernel dispatch configurations KernelConfig spmv_config, fixup_config; InitConfigs(ptx_version, spmv_config, fixup_config); if (CubDebug(error = Dispatch( d_temp_storage, temp_storage_bytes, spmv_params, stream, debug_synchronous, // DeviceSpmv1ColKernel, // DeviceSpmvSearchKernel, DeviceSpmvKernel, // DeviceSegmentFixupKernel, spmv_config, fixup_config))) break; /* // Dispatch if (spmv_params.beta == 0.0) { if (spmv_params.alpha == 1.0) { // Dispatch y = A*x if (CubDebug(error = Dispatch( d_temp_storage, temp_storage_bytes, spmv_params, stream, debug_synchronous, DeviceSpmv1ColKernel, DeviceSpmvSearchKernel, DeviceSpmvKernel, DeviceSegmentFixupKernel, spmv_config, fixup_config))) break; } else { // Dispatch y = alpha*A*x if (CubDebug(error = Dispatch( d_temp_storage, temp_storage_bytes, spmv_params, stream, debug_synchronous, DeviceSpmvSearchKernel, DeviceSpmvKernel, DeviceSegmentFixupKernel, spmv_config, fixup_config))) break; } } else { if (spmv_params.alpha == 1.0) { // Dispatch y = A*x + beta*y if (CubDebug(error = Dispatch( d_temp_storage, temp_storage_bytes, spmv_params, stream, debug_synchronous, DeviceSpmvSearchKernel, DeviceSpmvKernel, DeviceSegmentFixupKernel, spmv_config, fixup_config))) break; } else { // Dispatch y = alpha*A*x + beta*y if (CubDebug(error = Dispatch( d_temp_storage, temp_storage_bytes, spmv_params, stream, debug_synchronous, DeviceSpmvSearchKernel, DeviceSpmvKernel, DeviceSegmentFixupKernel, spmv_config, fixup_config))) break; } } */ } while (0); return error; } }; } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/acc/cuda/cub/grid/000077500000000000000000000000001411340063500167325ustar00rootroot00000000000000relion-3.1.3/src/acc/cuda/cub/grid/grid_barrier.cuh000066400000000000000000000133451411340063500220740ustar00rootroot00000000000000/****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * cub::GridBarrier implements a software global barrier among thread blocks within a CUDA grid */ #pragma once #include "../util_debug.cuh" #include "../util_namespace.cuh" #include "../thread/thread_load.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /** * \addtogroup GridModule * @{ */ /** * \brief GridBarrier implements a software global barrier among thread blocks within a CUDA grid */ class GridBarrier { protected : typedef unsigned int SyncFlag; // Counters in global device memory SyncFlag* d_sync; public: /** * Constructor */ GridBarrier() : d_sync(NULL) {} /** * Synchronize */ __device__ __forceinline__ void Sync() const { volatile SyncFlag *d_vol_sync = d_sync; // Threadfence and syncthreads to make sure global writes are visible before // thread-0 reports in with its sync counter __threadfence(); CTA_SYNC(); if (blockIdx.x == 0) { // Report in ourselves if (threadIdx.x == 0) { d_vol_sync[blockIdx.x] = 1; } CTA_SYNC(); // Wait for everyone else to report in for (int peer_block = threadIdx.x; peer_block < gridDim.x; peer_block += blockDim.x) { while (ThreadLoad(d_sync + peer_block) == 0) { __threadfence_block(); } } CTA_SYNC(); // Let everyone know it's safe to proceed for (int peer_block = threadIdx.x; peer_block < gridDim.x; peer_block += blockDim.x) { d_vol_sync[peer_block] = 0; } } else { if (threadIdx.x == 0) { // Report in d_vol_sync[blockIdx.x] = 1; // Wait for acknowledgment while (ThreadLoad(d_sync + blockIdx.x) == 1) { __threadfence_block(); } } CTA_SYNC(); } } }; /** * \brief GridBarrierLifetime extends GridBarrier to provide lifetime management of the temporary device storage needed for cooperation. * * Uses RAII for lifetime, i.e., device resources are reclaimed when * the destructor is called. */ class GridBarrierLifetime : public GridBarrier { protected: // Number of bytes backed by d_sync size_t sync_bytes; public: /** * Constructor */ GridBarrierLifetime() : GridBarrier(), sync_bytes(0) {} /** * DeviceFrees and resets the progress counters */ cudaError_t HostReset() { cudaError_t retval = cudaSuccess; if (d_sync) { CubDebug(retval = cudaFree(d_sync)); d_sync = NULL; } sync_bytes = 0; return retval; } /** * Destructor */ virtual ~GridBarrierLifetime() { HostReset(); } /** * Sets up the progress counters for the next kernel launch (lazily * allocating and initializing them if necessary) */ cudaError_t Setup(int sweep_grid_size) { cudaError_t retval = cudaSuccess; do { size_t new_sync_bytes = sweep_grid_size * sizeof(SyncFlag); if (new_sync_bytes > sync_bytes) { if (d_sync) { if (CubDebug(retval = cudaFree(d_sync))) break; } sync_bytes = new_sync_bytes; // Allocate and initialize to zero if (CubDebug(retval = cudaMalloc((void**) &d_sync, sync_bytes))) break; if (CubDebug(retval = cudaMemset(d_sync, 0, new_sync_bytes))) break; } } while (0); return retval; } }; /** @} */ // end group GridModule } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/acc/cuda/cub/grid/grid_even_share.cuh000066400000000000000000000200061411340063500225550ustar00rootroot00000000000000/****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * cub::GridEvenShare is a descriptor utility for distributing input among CUDA thread blocks in an "even-share" fashion. Each thread block gets roughly the same number of fixed-size work units (grains). */ #pragma once #include "../util_namespace.cuh" #include "../util_macro.cuh" #include "grid_mapping.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /** * \addtogroup GridModule * @{ */ /** * \brief GridEvenShare is a descriptor utility for distributing input among * CUDA thread blocks in an "even-share" fashion. Each thread block gets roughly * the same number of input tiles. * * \par Overview * Each thread block is assigned a consecutive sequence of input tiles. To help * preserve alignment and eliminate the overhead of guarded loads for all but the * last thread block, to GridEvenShare assigns one of three different amounts of * work to a given thread block: "big", "normal", or "last". The "big" workloads * are one scheduling grain larger than "normal". The "last" work unit for the * last thread block may be partially-full if the input is not an even multiple of * the scheduling grain size. * * \par * Before invoking a child grid, a parent thread will typically construct an * instance of GridEvenShare. The instance can be passed to child thread blocks * which can initialize their per-thread block offsets using \p BlockInit(). */ template struct GridEvenShare { private: OffsetT total_tiles; int big_shares; OffsetT big_share_items; OffsetT normal_share_items; OffsetT normal_base_offset; public: /// Total number of input items OffsetT num_items; /// Grid size in thread blocks int grid_size; /// OffsetT into input marking the beginning of the owning thread block's segment of input tiles OffsetT block_offset; /// OffsetT into input of marking the end (one-past) of the owning thread block's segment of input tiles OffsetT block_end; /// Stride between input tiles OffsetT block_stride; /** * \brief Constructor. */ __host__ __device__ __forceinline__ GridEvenShare() : total_tiles(0), big_shares(0), big_share_items(0), normal_share_items(0), normal_base_offset(0), num_items(0), grid_size(0), block_offset(0), block_end(0), block_stride(0) {} /** * \brief Dispatch initializer. To be called prior prior to kernel launch. */ __host__ __device__ __forceinline__ void DispatchInit( OffsetT num_items, ///< Total number of input items int max_grid_size, ///< Maximum grid size allowable (actual grid size may be less if not warranted by the the number of input items) int tile_items) ///< Number of data items per input tile { this->block_offset = num_items; // Initialize past-the-end this->block_end = num_items; // Initialize past-the-end this->num_items = num_items; this->total_tiles = (num_items + tile_items - 1) / tile_items; this->grid_size = CUB_MIN(total_tiles, max_grid_size); OffsetT avg_tiles_per_block = total_tiles / grid_size; this->big_shares = total_tiles - (avg_tiles_per_block * grid_size); // leftover grains go to big blocks this->normal_share_items = avg_tiles_per_block * tile_items; this->normal_base_offset = big_shares * tile_items; this->big_share_items = normal_share_items + tile_items; } /** * \brief Initializes ranges for the specified thread block index. Specialized * for a "raking" access pattern in which each thread block is assigned a * consecutive sequence of input tiles. */ template __device__ __forceinline__ void BlockInit( int block_id, Int2Type /*strategy_tag*/) { block_stride = TILE_ITEMS; if (block_id < big_shares) { // This thread block gets a big share of grains (avg_tiles_per_block + 1) block_offset = (block_id * big_share_items); block_end = block_offset + big_share_items; } else if (block_id < total_tiles) { // This thread block gets a normal share of grains (avg_tiles_per_block) block_offset = normal_base_offset + (block_id * normal_share_items); block_end = CUB_MIN(num_items, block_offset + normal_share_items); } // Else default past-the-end } /** * \brief Block-initialization, specialized for a "raking" access * pattern in which each thread block is assigned a consecutive sequence * of input tiles. */ template __device__ __forceinline__ void BlockInit( int block_id, Int2Type /*strategy_tag*/) { block_stride = grid_size * TILE_ITEMS; block_offset = (block_id * TILE_ITEMS); block_end = num_items; } /** * \brief Block-initialization, specialized for "strip mining" access * pattern in which the input tiles assigned to each thread block are * separated by a stride equal to the the extent of the grid. */ template < int TILE_ITEMS, GridMappingStrategy STRATEGY> __device__ __forceinline__ void BlockInit() { BlockInit(blockIdx.x, Int2Type()); } /** * \brief Block-initialization, specialized for a "raking" access * pattern in which each thread block is assigned a consecutive sequence * of input tiles. */ template __device__ __forceinline__ void BlockInit( OffsetT block_offset, ///< [in] Threadblock begin offset (inclusive) OffsetT block_end) ///< [in] Threadblock end offset (exclusive) { this->block_offset = block_offset; this->block_end = block_end; this->block_stride = TILE_ITEMS; } }; /** @} */ // end group GridModule } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/acc/cuda/cub/grid/grid_mapping.cuh000066400000000000000000000113331411340063500220740ustar00rootroot00000000000000/****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * cub::GridMappingStrategy enumerates alternative strategies for mapping constant-sized tiles of device-wide data onto a grid of CUDA thread blocks. */ #pragma once #include "../util_namespace.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /** * \addtogroup GridModule * @{ */ /****************************************************************************** * Mapping policies *****************************************************************************/ /** * \brief cub::GridMappingStrategy enumerates alternative strategies for mapping constant-sized tiles of device-wide data onto a grid of CUDA thread blocks. */ enum GridMappingStrategy { /** * \brief An a "raking" access pattern in which each thread block is * assigned a consecutive sequence of input tiles * * \par Overview * The input is evenly partitioned into \p p segments, where \p p is * constant and corresponds loosely to the number of thread blocks that may * actively reside on the target device. Each segment is comprised of * consecutive tiles, where a tile is a small, constant-sized unit of input * to be processed to completion before the thread block terminates or * obtains more work. The kernel invokes \p p thread blocks, each * of which iteratively consumes a segment of n/p elements * in tile-size increments. */ GRID_MAPPING_RAKE, /** * \brief An a "strip mining" access pattern in which the input tiles assigned * to each thread block are separated by a stride equal to the the extent of * the grid. * * \par Overview * The input is evenly partitioned into \p p sets, where \p p is * constant and corresponds loosely to the number of thread blocks that may * actively reside on the target device. Each set is comprised of * data tiles separated by stride \p tiles, where a tile is a small, * constant-sized unit of input to be processed to completion before the * thread block terminates or obtains more work. The kernel invokes \p p * thread blocks, each of which iteratively consumes a segment of * n/p elements in tile-size increments. */ GRID_MAPPING_STRIP_MINE, /** * \brief A dynamic "queue-based" strategy for assigning input tiles to thread blocks. * * \par Overview * The input is treated as a queue to be dynamically consumed by a grid of * thread blocks. Work is atomically dequeued in tiles, where a tile is a * unit of input to be processed to completion before the thread block * terminates or obtains more work. The grid size \p p is constant, * loosely corresponding to the number of thread blocks that may actively * reside on the target device. */ GRID_MAPPING_DYNAMIC, }; /** @} */ // end group GridModule } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/acc/cuda/cub/grid/grid_queue.cuh000066400000000000000000000164631411340063500215760ustar00rootroot00000000000000/****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * cub::GridQueue is a descriptor utility for dynamic queue management. */ #pragma once #include "../util_namespace.cuh" #include "../util_debug.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /** * \addtogroup GridModule * @{ */ /** * \brief GridQueue is a descriptor utility for dynamic queue management. * * \par Overview * GridQueue descriptors provides abstractions for "filling" or * "draining" globally-shared vectors. * * \par * A "filling" GridQueue works by atomically-adding to a zero-initialized counter, * returning a unique offset for the calling thread to write its items. * The GridQueue maintains the total "fill-size". The fill counter must be reset * using GridQueue::ResetFill by the host or kernel instance prior to the kernel instance that * will be filling. * * \par * Similarly, a "draining" GridQueue works by works by atomically-incrementing a * zero-initialized counter, returning a unique offset for the calling thread to * read its items. Threads can safely drain until the array's logical fill-size is * exceeded. The drain counter must be reset using GridQueue::ResetDrain or * GridQueue::FillAndResetDrain by the host or kernel instance prior to the kernel instance that * will be filling. (For dynamic work distribution of existing data, the corresponding fill-size * is simply the number of elements in the array.) * * \par * Iterative work management can be implemented simply with a pair of flip-flopping * work buffers, each with an associated set of fill and drain GridQueue descriptors. * * \tparam OffsetT Signed integer type for global offsets */ template class GridQueue { private: /// Counter indices enum { FILL = 0, DRAIN = 1, }; /// Pair of counters OffsetT *d_counters; public: /// Returns the device allocation size in bytes needed to construct a GridQueue instance __host__ __device__ __forceinline__ static size_t AllocationSize() { return sizeof(OffsetT) * 2; } /// Constructs an invalid GridQueue descriptor __host__ __device__ __forceinline__ GridQueue() : d_counters(NULL) {} /// Constructs a GridQueue descriptor around the device storage allocation __host__ __device__ __forceinline__ GridQueue( void *d_storage) ///< Device allocation to back the GridQueue. Must be at least as big as AllocationSize(). : d_counters((OffsetT*) d_storage) {} /// This operation sets the fill-size and resets the drain counter, preparing the GridQueue for draining in the next kernel instance. To be called by the host or by a kernel prior to that which will be draining. __host__ __device__ __forceinline__ cudaError_t FillAndResetDrain( OffsetT fill_size, cudaStream_t stream = 0) { #if (CUB_PTX_ARCH > 0) (void)stream; d_counters[FILL] = fill_size; d_counters[DRAIN] = 0; return cudaSuccess; #else OffsetT counters[2]; counters[FILL] = fill_size; counters[DRAIN] = 0; return CubDebug(cudaMemcpyAsync(d_counters, counters, sizeof(OffsetT) * 2, cudaMemcpyHostToDevice, stream)); #endif } /// This operation resets the drain so that it may advance to meet the existing fill-size. To be called by the host or by a kernel prior to that which will be draining. __host__ __device__ __forceinline__ cudaError_t ResetDrain(cudaStream_t stream = 0) { #if (CUB_PTX_ARCH > 0) (void)stream; d_counters[DRAIN] = 0; return cudaSuccess; #else return CubDebug(cudaMemsetAsync(d_counters + DRAIN, 0, sizeof(OffsetT), stream)); #endif } /// This operation resets the fill counter. To be called by the host or by a kernel prior to that which will be filling. __host__ __device__ __forceinline__ cudaError_t ResetFill(cudaStream_t stream = 0) { #if (CUB_PTX_ARCH > 0) (void)stream; d_counters[FILL] = 0; return cudaSuccess; #else return CubDebug(cudaMemsetAsync(d_counters + FILL, 0, sizeof(OffsetT), stream)); #endif } /// Returns the fill-size established by the parent or by the previous kernel. __host__ __device__ __forceinline__ cudaError_t FillSize( OffsetT &fill_size, cudaStream_t stream = 0) { #if (CUB_PTX_ARCH > 0) (void)stream; fill_size = d_counters[FILL]; return cudaSuccess; #else return CubDebug(cudaMemcpyAsync(&fill_size, d_counters + FILL, sizeof(OffsetT), cudaMemcpyDeviceToHost, stream)); #endif } /// Drain \p num_items from the queue. Returns offset from which to read items. To be called from CUDA kernel. __device__ __forceinline__ OffsetT Drain(OffsetT num_items) { return atomicAdd(d_counters + DRAIN, num_items); } /// Fill \p num_items into the queue. Returns offset from which to write items. To be called from CUDA kernel. __device__ __forceinline__ OffsetT Fill(OffsetT num_items) { return atomicAdd(d_counters + FILL, num_items); } }; #ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document /** * Reset grid queue (call with 1 block of 1 thread) */ template __global__ void FillAndResetDrainKernel( GridQueue grid_queue, OffsetT num_items) { grid_queue.FillAndResetDrain(num_items); } #endif // DOXYGEN_SHOULD_SKIP_THIS /** @} */ // end group GridModule } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/acc/cuda/cub/host/000077500000000000000000000000001411340063500167625ustar00rootroot00000000000000relion-3.1.3/src/acc/cuda/cub/host/mutex.cuh000066400000000000000000000107451411340063500206340ustar00rootroot00000000000000/****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * Simple portable mutex */ #pragma once #if (__cplusplus > 199711L) || (defined(_MSC_VER) && _MSC_VER >= 1800) #include #else #if defined(_WIN32) || defined(_WIN64) #include #define WIN32_LEAN_AND_MEAN #define NOMINMAX #include #undef WIN32_LEAN_AND_MEAN #undef NOMINMAX /** * Compiler read/write barrier */ #pragma intrinsic(_ReadWriteBarrier) #endif #endif #include "../util_namespace.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /** * Simple portable mutex * - Wraps std::mutex when compiled with C++11 or newer (supported on all platforms) * - Uses GNU/Windows spinlock mechanisms for pre C++11 (supported on x86/x64 when compiled with cl.exe or g++) */ struct Mutex { #if (__cplusplus > 199711L) || (defined(_MSC_VER) && _MSC_VER >= 1800) std::mutex mtx; void Lock() { mtx.lock(); } void Unlock() { mtx.unlock(); } void TryLock() { mtx.try_lock(); } #else //__cplusplus > 199711L #if defined(_MSC_VER) // Microsoft VC++ typedef long Spinlock; #else // GNU g++ typedef int Spinlock; /** * Compiler read/write barrier */ __forceinline__ void _ReadWriteBarrier() { __sync_synchronize(); } /** * Atomic exchange */ __forceinline__ long _InterlockedExchange(volatile int * const Target, const int Value) { // NOTE: __sync_lock_test_and_set would be an acquire barrier, so we force a full barrier _ReadWriteBarrier(); return __sync_lock_test_and_set(Target, Value); } /** * Pause instruction to prevent excess processor bus usage */ __forceinline__ void YieldProcessor() { } #endif // defined(_MSC_VER) /// Lock member volatile Spinlock lock; /** * Constructor */ Mutex() : lock(0) {} /** * Return when the specified spinlock has been acquired */ __forceinline__ void Lock() { while (1) { if (!_InterlockedExchange(&lock, 1)) return; while (lock) YieldProcessor(); } } /** * Release the specified spinlock */ __forceinline__ void Unlock() { _ReadWriteBarrier(); lock = 0; } #endif // __cplusplus > 199711L }; } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/acc/cuda/cub/iterator/000077500000000000000000000000001411340063500176365ustar00rootroot00000000000000relion-3.1.3/src/acc/cuda/cub/iterator/arg_index_input_iterator.cuh000066400000000000000000000211151411340063500254270ustar00rootroot00000000000000/****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * Random-access iterator types */ #pragma once #include #include #include "../thread/thread_load.cuh" #include "../thread/thread_store.cuh" #include "../util_device.cuh" #include "../util_namespace.cuh" #include #if (THRUST_VERSION >= 100700) // This iterator is compatible with Thrust API 1.7 and newer #include #include #endif // THRUST_VERSION /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /** * \addtogroup UtilIterator * @{ */ /** * \brief A random-access input wrapper for pairing dereferenced values with their corresponding indices (forming \p KeyValuePair tuples). * * \par Overview * - ArgIndexInputIteratorTwraps a random access input iterator \p itr of type \p InputIteratorT. * Dereferencing an ArgIndexInputIteratorTat offset \p i produces a \p KeyValuePair value whose * \p key field is \p i and whose \p value field is itr[i]. * - Can be used with any data type. * - Can be constructed, manipulated, and exchanged within and between host and device * functions. Wrapped host memory can only be dereferenced on the host, and wrapped * device memory can only be dereferenced on the device. * - Compatible with Thrust API v1.7 or newer. * * \par Snippet * The code snippet below illustrates the use of \p ArgIndexInputIteratorTto * dereference an array of doubles * \par * \code * #include // or equivalently * * // Declare, allocate, and initialize a device array * double *d_in; // e.g., [8.0, 6.0, 7.0, 5.0, 3.0, 0.0, 9.0] * * // Create an iterator wrapper * cub::ArgIndexInputIterator itr(d_in); * * // Within device code: * typedef typename cub::ArgIndexInputIterator::value_type Tuple; * Tuple item_offset_pair.key = *itr; * printf("%f @ %d\n", * item_offset_pair.value, * item_offset_pair.key); // 8.0 @ 0 * * itr = itr + 6; * item_offset_pair.key = *itr; * printf("%f @ %d\n", * item_offset_pair.value, * item_offset_pair.key); // 9.0 @ 6 * * \endcode * * \tparam InputIteratorT The value type of the wrapped input iterator * \tparam OffsetT The difference type of this iterator (Default: \p ptrdiff_t) * \tparam OutputValueT The paired value type of the tuple (Default: value type of input iterator) */ template < typename InputIteratorT, typename OffsetT = ptrdiff_t, typename OutputValueT = typename std::iterator_traits::value_type> class ArgIndexInputIterator { public: // Required iterator traits typedef ArgIndexInputIterator self_type; ///< My own type typedef OffsetT difference_type; ///< Type to express the result of subtracting one iterator from another typedef KeyValuePair value_type; ///< The type of the element the iterator can point to typedef value_type* pointer; ///< The type of a pointer to an element the iterator can point to typedef value_type reference; ///< The type of a reference to an element the iterator can point to #if (THRUST_VERSION >= 100700) // Use Thrust's iterator categories so we can use these iterators in Thrust 1.7 (or newer) methods typedef typename thrust::detail::iterator_facade_category< thrust::any_system_tag, thrust::random_access_traversal_tag, value_type, reference >::type iterator_category; ///< The iterator category #else typedef std::random_access_iterator_tag iterator_category; ///< The iterator category #endif // THRUST_VERSION private: InputIteratorT itr; difference_type offset; public: /// Constructor __host__ __device__ __forceinline__ ArgIndexInputIterator( InputIteratorT itr, ///< Input iterator to wrap difference_type offset = 0) ///< OffsetT (in items) from \p itr denoting the position of the iterator : itr(itr), offset(offset) {} /// Postfix increment __host__ __device__ __forceinline__ self_type operator++(int) { self_type retval = *this; offset++; return retval; } /// Prefix increment __host__ __device__ __forceinline__ self_type operator++() { offset++; return *this; } /// Indirection __host__ __device__ __forceinline__ reference operator*() const { value_type retval; retval.value = itr[offset]; retval.key = offset; return retval; } /// Addition template __host__ __device__ __forceinline__ self_type operator+(Distance n) const { self_type retval(itr, offset + n); return retval; } /// Addition assignment template __host__ __device__ __forceinline__ self_type& operator+=(Distance n) { offset += n; return *this; } /// Subtraction template __host__ __device__ __forceinline__ self_type operator-(Distance n) const { self_type retval(itr, offset - n); return retval; } /// Subtraction assignment template __host__ __device__ __forceinline__ self_type& operator-=(Distance n) { offset -= n; return *this; } /// Distance __host__ __device__ __forceinline__ difference_type operator-(self_type other) const { return offset - other.offset; } /// Array subscript template __host__ __device__ __forceinline__ reference operator[](Distance n) const { self_type offset = (*this) + n; return *offset; } /// Structure dereference __host__ __device__ __forceinline__ pointer operator->() { return &(*(*this)); } /// Equal to __host__ __device__ __forceinline__ bool operator==(const self_type& rhs) { return ((itr == rhs.itr) && (offset == rhs.offset)); } /// Not equal to __host__ __device__ __forceinline__ bool operator!=(const self_type& rhs) { return ((itr != rhs.itr) || (offset != rhs.offset)); } /// Normalize __host__ __device__ __forceinline__ void normalize() { itr += offset; offset = 0; } /// ostream operator friend std::ostream& operator<<(std::ostream& os, const self_type& /*itr*/) { return os; } }; /** @} */ // end group UtilIterator } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/acc/cuda/cub/iterator/cache_modified_input_iterator.cuh000066400000000000000000000176501411340063500264030ustar00rootroot00000000000000/****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * Random-access iterator types */ #pragma once #include #include #include "../thread/thread_load.cuh" #include "../thread/thread_store.cuh" #include "../util_device.cuh" #include "../util_namespace.cuh" #if (THRUST_VERSION >= 100700) // This iterator is compatible with Thrust API 1.7 and newer #include #include #endif // THRUST_VERSION /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /** * \addtogroup UtilIterator * @{ */ /** * \brief A random-access input wrapper for dereferencing array values using a PTX cache load modifier. * * \par Overview * - CacheModifiedInputIteratorTis a random-access input iterator that wraps a native * device pointer of type ValueType*. \p ValueType references are * made by reading \p ValueType values through loads modified by \p MODIFIER. * - Can be used to load any data type from memory using PTX cache load modifiers (e.g., "LOAD_LDG", * "LOAD_CG", "LOAD_CA", "LOAD_CS", "LOAD_CV", etc.). * - Can be constructed, manipulated, and exchanged within and between host and device * functions, but can only be dereferenced within device functions. * - Compatible with Thrust API v1.7 or newer. * * \par Snippet * The code snippet below illustrates the use of \p CacheModifiedInputIteratorTto * dereference a device array of double using the "ldg" PTX load modifier * (i.e., load values through texture cache). * \par * \code * #include // or equivalently * * // Declare, allocate, and initialize a device array * double *d_in; // e.g., [8.0, 6.0, 7.0, 5.0, 3.0, 0.0, 9.0] * * // Create an iterator wrapper * cub::CacheModifiedInputIterator itr(d_in); * * // Within device code: * printf("%f\n", itr[0]); // 8.0 * printf("%f\n", itr[1]); // 6.0 * printf("%f\n", itr[6]); // 9.0 * * \endcode * * \tparam CacheLoadModifier The cub::CacheLoadModifier to use when accessing data * \tparam ValueType The value type of this iterator * \tparam OffsetT The difference type of this iterator (Default: \p ptrdiff_t) */ template < CacheLoadModifier MODIFIER, typename ValueType, typename OffsetT = ptrdiff_t> class CacheModifiedInputIterator { public: // Required iterator traits typedef CacheModifiedInputIterator self_type; ///< My own type typedef OffsetT difference_type; ///< Type to express the result of subtracting one iterator from another typedef ValueType value_type; ///< The type of the element the iterator can point to typedef ValueType* pointer; ///< The type of a pointer to an element the iterator can point to typedef ValueType reference; ///< The type of a reference to an element the iterator can point to #if (THRUST_VERSION >= 100700) // Use Thrust's iterator categories so we can use these iterators in Thrust 1.7 (or newer) methods typedef typename thrust::detail::iterator_facade_category< thrust::device_system_tag, thrust::random_access_traversal_tag, value_type, reference >::type iterator_category; ///< The iterator category #else typedef std::random_access_iterator_tag iterator_category; ///< The iterator category #endif // THRUST_VERSION public: /// Wrapped native pointer ValueType* ptr; /// Constructor template __host__ __device__ __forceinline__ CacheModifiedInputIterator( QualifiedValueType* ptr) ///< Native pointer to wrap : ptr(const_cast::Type *>(ptr)) {} /// Postfix increment __host__ __device__ __forceinline__ self_type operator++(int) { self_type retval = *this; ptr++; return retval; } /// Prefix increment __host__ __device__ __forceinline__ self_type operator++() { ptr++; return *this; } /// Indirection __device__ __forceinline__ reference operator*() const { return ThreadLoad(ptr); } /// Addition template __host__ __device__ __forceinline__ self_type operator+(Distance n) const { self_type retval(ptr + n); return retval; } /// Addition assignment template __host__ __device__ __forceinline__ self_type& operator+=(Distance n) { ptr += n; return *this; } /// Subtraction template __host__ __device__ __forceinline__ self_type operator-(Distance n) const { self_type retval(ptr - n); return retval; } /// Subtraction assignment template __host__ __device__ __forceinline__ self_type& operator-=(Distance n) { ptr -= n; return *this; } /// Distance __host__ __device__ __forceinline__ difference_type operator-(self_type other) const { return ptr - other.ptr; } /// Array subscript template __device__ __forceinline__ reference operator[](Distance n) const { return ThreadLoad(ptr + n); } /// Structure dereference __device__ __forceinline__ pointer operator->() { return &ThreadLoad(ptr); } /// Equal to __host__ __device__ __forceinline__ bool operator==(const self_type& rhs) { return (ptr == rhs.ptr); } /// Not equal to __host__ __device__ __forceinline__ bool operator!=(const self_type& rhs) { return (ptr != rhs.ptr); } /// ostream operator friend std::ostream& operator<<(std::ostream& os, const self_type& /*itr*/) { return os; } }; /** @} */ // end group UtilIterator } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/acc/cuda/cub/iterator/cache_modified_output_iterator.cuh000066400000000000000000000202021411340063500265670ustar00rootroot00000000000000/****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * Random-access iterator types */ #pragma once #include #include #include "../thread/thread_load.cuh" #include "../thread/thread_store.cuh" #include "../util_device.cuh" #include "../util_namespace.cuh" #if (THRUST_VERSION >= 100700) // This iterator is compatible with Thrust API 1.7 and newer #include #include #endif // THRUST_VERSION /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /** * \addtogroup UtilIterator * @{ */ /** * \brief A random-access output wrapper for storing array values using a PTX cache-modifier. * * \par Overview * - CacheModifiedOutputIterator is a random-access output iterator that wraps a native * device pointer of type ValueType*. \p ValueType references are * made by writing \p ValueType values through stores modified by \p MODIFIER. * - Can be used to store any data type to memory using PTX cache store modifiers (e.g., "STORE_WB", * "STORE_CG", "STORE_CS", "STORE_WT", etc.). * - Can be constructed, manipulated, and exchanged within and between host and device * functions, but can only be dereferenced within device functions. * - Compatible with Thrust API v1.7 or newer. * * \par Snippet * The code snippet below illustrates the use of \p CacheModifiedOutputIterator to * dereference a device array of doubles using the "wt" PTX load modifier * (i.e., write-through to system memory). * \par * \code * #include // or equivalently * * // Declare, allocate, and initialize a device array * double *d_out; // e.g., [, , , , , , ] * * // Create an iterator wrapper * cub::CacheModifiedOutputIterator itr(d_out); * * // Within device code: * itr[0] = 8.0; * itr[1] = 66.0; * itr[55] = 24.0; * * \endcode * * \par Usage Considerations * - Can only be dereferenced within device code * * \tparam CacheStoreModifier The cub::CacheStoreModifier to use when accessing data * \tparam ValueType The value type of this iterator * \tparam OffsetT The difference type of this iterator (Default: \p ptrdiff_t) */ template < CacheStoreModifier MODIFIER, typename ValueType, typename OffsetT = ptrdiff_t> class CacheModifiedOutputIterator { private: // Proxy object struct Reference { ValueType* ptr; /// Constructor __host__ __device__ __forceinline__ Reference(ValueType* ptr) : ptr(ptr) {} /// Assignment __device__ __forceinline__ ValueType operator =(ValueType val) { ThreadStore(ptr, val); return val; } }; public: // Required iterator traits typedef CacheModifiedOutputIterator self_type; ///< My own type typedef OffsetT difference_type; ///< Type to express the result of subtracting one iterator from another typedef void value_type; ///< The type of the element the iterator can point to typedef void pointer; ///< The type of a pointer to an element the iterator can point to typedef Reference reference; ///< The type of a reference to an element the iterator can point to #if (THRUST_VERSION >= 100700) // Use Thrust's iterator categories so we can use these iterators in Thrust 1.7 (or newer) methods typedef typename thrust::detail::iterator_facade_category< thrust::device_system_tag, thrust::random_access_traversal_tag, value_type, reference >::type iterator_category; ///< The iterator category #else typedef std::random_access_iterator_tag iterator_category; ///< The iterator category #endif // THRUST_VERSION private: ValueType* ptr; public: /// Constructor template __host__ __device__ __forceinline__ CacheModifiedOutputIterator( QualifiedValueType* ptr) ///< Native pointer to wrap : ptr(const_cast::Type *>(ptr)) {} /// Postfix increment __host__ __device__ __forceinline__ self_type operator++(int) { self_type retval = *this; ptr++; return retval; } /// Prefix increment __host__ __device__ __forceinline__ self_type operator++() { ptr++; return *this; } /// Indirection __host__ __device__ __forceinline__ reference operator*() const { return Reference(ptr); } /// Addition template __host__ __device__ __forceinline__ self_type operator+(Distance n) const { self_type retval(ptr + n); return retval; } /// Addition assignment template __host__ __device__ __forceinline__ self_type& operator+=(Distance n) { ptr += n; return *this; } /// Subtraction template __host__ __device__ __forceinline__ self_type operator-(Distance n) const { self_type retval(ptr - n); return retval; } /// Subtraction assignment template __host__ __device__ __forceinline__ self_type& operator-=(Distance n) { ptr -= n; return *this; } /// Distance __host__ __device__ __forceinline__ difference_type operator-(self_type other) const { return ptr - other.ptr; } /// Array subscript template __host__ __device__ __forceinline__ reference operator[](Distance n) const { return Reference(ptr + n); } /// Equal to __host__ __device__ __forceinline__ bool operator==(const self_type& rhs) { return (ptr == rhs.ptr); } /// Not equal to __host__ __device__ __forceinline__ bool operator!=(const self_type& rhs) { return (ptr != rhs.ptr); } /// ostream operator friend std::ostream& operator<<(std::ostream& os, const self_type& itr) { return os; } }; /** @} */ // end group UtilIterator } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/acc/cuda/cub/iterator/constant_input_iterator.cuh000066400000000000000000000167421411340063500253320ustar00rootroot00000000000000/****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * Random-access iterator types */ #pragma once #include #include #include "../thread/thread_load.cuh" #include "../thread/thread_store.cuh" #include "../util_namespace.cuh" #if (THRUST_VERSION >= 100700) // This iterator is compatible with Thrust API 1.7 and newer #include #include #endif // THRUST_VERSION /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /** * \addtogroup UtilIterator * @{ */ /** * \brief A random-access input generator for dereferencing a sequence of homogeneous values * * \par Overview * - Read references to a ConstantInputIteratorTiterator always return the supplied constant * of type \p ValueType. * - Can be used with any data type. * - Can be constructed, manipulated, dereferenced, and exchanged within and between host and device * functions. * - Compatible with Thrust API v1.7 or newer. * * \par Snippet * The code snippet below illustrates the use of \p ConstantInputIteratorTto * dereference a sequence of homogeneous doubles. * \par * \code * #include // or equivalently * * cub::ConstantInputIterator itr(5.0); * * printf("%f\n", itr[0]); // 5.0 * printf("%f\n", itr[1]); // 5.0 * printf("%f\n", itr[2]); // 5.0 * printf("%f\n", itr[50]); // 5.0 * * \endcode * * \tparam ValueType The value type of this iterator * \tparam OffsetT The difference type of this iterator (Default: \p ptrdiff_t) */ template < typename ValueType, typename OffsetT = ptrdiff_t> class ConstantInputIterator { public: // Required iterator traits typedef ConstantInputIterator self_type; ///< My own type typedef OffsetT difference_type; ///< Type to express the result of subtracting one iterator from another typedef ValueType value_type; ///< The type of the element the iterator can point to typedef ValueType* pointer; ///< The type of a pointer to an element the iterator can point to typedef ValueType reference; ///< The type of a reference to an element the iterator can point to #if (THRUST_VERSION >= 100700) // Use Thrust's iterator categories so we can use these iterators in Thrust 1.7 (or newer) methods typedef typename thrust::detail::iterator_facade_category< thrust::any_system_tag, thrust::random_access_traversal_tag, value_type, reference >::type iterator_category; ///< The iterator category #else typedef std::random_access_iterator_tag iterator_category; ///< The iterator category #endif // THRUST_VERSION private: ValueType val; OffsetT offset; #ifdef _WIN32 OffsetT pad[CUB_MAX(1, (16 / sizeof(OffsetT) - 1))]; // Workaround for win32 parameter-passing bug (ulonglong2 argmin DeviceReduce) #endif public: /// Constructor __host__ __device__ __forceinline__ ConstantInputIterator( ValueType val, ///< Starting value for the iterator instance to report OffsetT offset = 0) ///< Base offset : val(val), offset(offset) {} /// Postfix increment __host__ __device__ __forceinline__ self_type operator++(int) { self_type retval = *this; offset++; return retval; } /// Prefix increment __host__ __device__ __forceinline__ self_type operator++() { offset++; return *this; } /// Indirection __host__ __device__ __forceinline__ reference operator*() const { return val; } /// Addition template __host__ __device__ __forceinline__ self_type operator+(Distance n) const { self_type retval(val, offset + n); return retval; } /// Addition assignment template __host__ __device__ __forceinline__ self_type& operator+=(Distance n) { offset += n; return *this; } /// Subtraction template __host__ __device__ __forceinline__ self_type operator-(Distance n) const { self_type retval(val, offset - n); return retval; } /// Subtraction assignment template __host__ __device__ __forceinline__ self_type& operator-=(Distance n) { offset -= n; return *this; } /// Distance __host__ __device__ __forceinline__ difference_type operator-(self_type other) const { return offset - other.offset; } /// Array subscript template __host__ __device__ __forceinline__ reference operator[](Distance /*n*/) const { return val; } /// Structure dereference __host__ __device__ __forceinline__ pointer operator->() { return &val; } /// Equal to __host__ __device__ __forceinline__ bool operator==(const self_type& rhs) { return (offset == rhs.offset) && ((val == rhs.val)); } /// Not equal to __host__ __device__ __forceinline__ bool operator!=(const self_type& rhs) { return (offset != rhs.offset) || (val!= rhs.val); } /// ostream operator friend std::ostream& operator<<(std::ostream& os, const self_type& itr) { os << "[" << itr.val << "," << itr.offset << "]"; return os; } }; /** @} */ // end group UtilIterator } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/acc/cuda/cub/iterator/counting_input_iterator.cuh000066400000000000000000000163111411340063500253170ustar00rootroot00000000000000/****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * Random-access iterator types */ #pragma once #include #include #include "../thread/thread_load.cuh" #include "../thread/thread_store.cuh" #include "../util_device.cuh" #include "../util_namespace.cuh" #if (THRUST_VERSION >= 100700) // This iterator is compatible with Thrust API 1.7 and newer #include #include #endif // THRUST_VERSION /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /** * \addtogroup UtilIterator * @{ */ /** * \brief A random-access input generator for dereferencing a sequence of incrementing integer values. * * \par Overview * - After initializing a CountingInputIteratorTto a certain integer \p base, read references * at \p offset will return the value \p base + \p offset. * - Can be constructed, manipulated, dereferenced, and exchanged within and between host and device * functions. * - Compatible with Thrust API v1.7 or newer. * * \par Snippet * The code snippet below illustrates the use of \p CountingInputIteratorTto * dereference a sequence of incrementing integers. * \par * \code * #include // or equivalently * * cub::CountingInputIterator itr(5); * * printf("%d\n", itr[0]); // 5 * printf("%d\n", itr[1]); // 6 * printf("%d\n", itr[2]); // 7 * printf("%d\n", itr[50]); // 55 * * \endcode * * \tparam ValueType The value type of this iterator * \tparam OffsetT The difference type of this iterator (Default: \p ptrdiff_t) */ template < typename ValueType, typename OffsetT = ptrdiff_t> class CountingInputIterator { public: // Required iterator traits typedef CountingInputIterator self_type; ///< My own type typedef OffsetT difference_type; ///< Type to express the result of subtracting one iterator from another typedef ValueType value_type; ///< The type of the element the iterator can point to typedef ValueType* pointer; ///< The type of a pointer to an element the iterator can point to typedef ValueType reference; ///< The type of a reference to an element the iterator can point to #if (THRUST_VERSION >= 100700) // Use Thrust's iterator categories so we can use these iterators in Thrust 1.7 (or newer) methods typedef typename thrust::detail::iterator_facade_category< thrust::any_system_tag, thrust::random_access_traversal_tag, value_type, reference >::type iterator_category; ///< The iterator category #else typedef std::random_access_iterator_tag iterator_category; ///< The iterator category #endif // THRUST_VERSION private: ValueType val; public: /// Constructor __host__ __device__ __forceinline__ CountingInputIterator( const ValueType &val) ///< Starting value for the iterator instance to report : val(val) {} /// Postfix increment __host__ __device__ __forceinline__ self_type operator++(int) { self_type retval = *this; val++; return retval; } /// Prefix increment __host__ __device__ __forceinline__ self_type operator++() { val++; return *this; } /// Indirection __host__ __device__ __forceinline__ reference operator*() const { return val; } /// Addition template __host__ __device__ __forceinline__ self_type operator+(Distance n) const { self_type retval(val + (ValueType) n); return retval; } /// Addition assignment template __host__ __device__ __forceinline__ self_type& operator+=(Distance n) { val += (ValueType) n; return *this; } /// Subtraction template __host__ __device__ __forceinline__ self_type operator-(Distance n) const { self_type retval(val - (ValueType) n); return retval; } /// Subtraction assignment template __host__ __device__ __forceinline__ self_type& operator-=(Distance n) { val -= n; return *this; } /// Distance __host__ __device__ __forceinline__ difference_type operator-(self_type other) const { return (difference_type) (val - other.val); } /// Array subscript template __host__ __device__ __forceinline__ reference operator[](Distance n) const { return val + (ValueType) n; } /// Structure dereference __host__ __device__ __forceinline__ pointer operator->() { return &val; } /// Equal to __host__ __device__ __forceinline__ bool operator==(const self_type& rhs) { return (val == rhs.val); } /// Not equal to __host__ __device__ __forceinline__ bool operator!=(const self_type& rhs) { return (val != rhs.val); } /// ostream operator friend std::ostream& operator<<(std::ostream& os, const self_type& itr) { os << "[" << itr.val << "]"; return os; } }; /** @} */ // end group UtilIterator } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/acc/cuda/cub/iterator/discard_output_iterator.cuh000066400000000000000000000152411411340063500253040ustar00rootroot00000000000000/****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * Random-access iterator types */ #pragma once #include #include #include "../util_namespace.cuh" #include "../util_macro.cuh" #if (THRUST_VERSION >= 100700) // This iterator is compatible with Thrust API 1.7 and newer #include #include #endif // THRUST_VERSION /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /** * \addtogroup UtilIterator * @{ */ /** * \brief A discard iterator */ template class DiscardOutputIterator { public: // Required iterator traits typedef DiscardOutputIterator self_type; ///< My own type typedef OffsetT difference_type; ///< Type to express the result of subtracting one iterator from another typedef void value_type; ///< The type of the element the iterator can point to typedef void pointer; ///< The type of a pointer to an element the iterator can point to typedef void reference; ///< The type of a reference to an element the iterator can point to #if (THRUST_VERSION >= 100700) // Use Thrust's iterator categories so we can use these iterators in Thrust 1.7 (or newer) methods typedef typename thrust::detail::iterator_facade_category< thrust::any_system_tag, thrust::random_access_traversal_tag, value_type, reference >::type iterator_category; ///< The iterator category #else typedef std::random_access_iterator_tag iterator_category; ///< The iterator category #endif // THRUST_VERSION private: OffsetT offset; #if defined(_WIN32) || !defined(_WIN64) // Workaround for win32 parameter-passing bug (ulonglong2 argmin DeviceReduce) OffsetT pad[CUB_MAX(1, (16 / sizeof(OffsetT) - 1))]; #endif public: /// Constructor __host__ __device__ __forceinline__ DiscardOutputIterator( OffsetT offset = 0) ///< Base offset : offset(offset) {} /// Postfix increment __host__ __device__ __forceinline__ self_type operator++(int) { self_type retval = *this; offset++; return retval; } /// Prefix increment __host__ __device__ __forceinline__ self_type operator++() { offset++; return *this; } /// Indirection __host__ __device__ __forceinline__ self_type& operator*() { // return self reference, which can be assigned to anything return *this; } /// Addition template __host__ __device__ __forceinline__ self_type operator+(Distance n) const { self_type retval(offset + n); return retval; } /// Addition assignment template __host__ __device__ __forceinline__ self_type& operator+=(Distance n) { offset += n; return *this; } /// Subtraction template __host__ __device__ __forceinline__ self_type operator-(Distance n) const { self_type retval(offset - n); return retval; } /// Subtraction assignment template __host__ __device__ __forceinline__ self_type& operator-=(Distance n) { offset -= n; return *this; } /// Distance __host__ __device__ __forceinline__ difference_type operator-(self_type other) const { return offset - other.offset; } /// Array subscript template __host__ __device__ __forceinline__ self_type& operator[](Distance n) { // return self reference, which can be assigned to anything return *this; } /// Structure dereference __host__ __device__ __forceinline__ pointer operator->() { return; } /// Assignment to self (no-op) __host__ __device__ __forceinline__ void operator=(self_type const& other) { offset = other.offset; } /// Assignment to anything else (no-op) template __host__ __device__ __forceinline__ void operator=(T const&) {} /// Cast to void* operator __host__ __device__ __forceinline__ operator void*() const { return NULL; } /// Equal to __host__ __device__ __forceinline__ bool operator==(const self_type& rhs) { return (offset == rhs.offset); } /// Not equal to __host__ __device__ __forceinline__ bool operator!=(const self_type& rhs) { return (offset != rhs.offset); } /// ostream operator friend std::ostream& operator<<(std::ostream& os, const self_type& itr) { os << "[" << itr.offset << "]"; return os; } }; /** @} */ // end group UtilIterator } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/acc/cuda/cub/iterator/tex_obj_input_iterator.cuh000066400000000000000000000244621411340063500251310ustar00rootroot00000000000000/****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * Random-access iterator types */ #pragma once #include #include #include "../thread/thread_load.cuh" #include "../thread/thread_store.cuh" #include "../util_device.cuh" #include "../util_debug.cuh" #include "../util_namespace.cuh" #if (THRUST_VERSION >= 100700) // This iterator is compatible with Thrust API 1.7 and newer #include #include #endif // THRUST_VERSION /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /** * \addtogroup UtilIterator * @{ */ /** * \brief A random-access input wrapper for dereferencing array values through texture cache. Uses newer Kepler-style texture objects. * * \par Overview * - TexObjInputIteratorTwraps a native device pointer of type ValueType*. References * to elements are to be loaded through texture cache. * - Can be used to load any data type from memory through texture cache. * - Can be manipulated and exchanged within and between host and device * functions, can only be constructed within host functions, and can only be * dereferenced within device functions. * - With regard to nested/dynamic parallelism, TexObjInputIteratorTiterators may only be * created by the host thread, but can be used by any descendant kernel. * - Compatible with Thrust API v1.7 or newer. * * \par Snippet * The code snippet below illustrates the use of \p TexRefInputIteratorTto * dereference a device array of doubles through texture cache. * \par * \code * #include // or equivalently * * // Declare, allocate, and initialize a device array * int num_items; // e.g., 7 * double *d_in; // e.g., [8.0, 6.0, 7.0, 5.0, 3.0, 0.0, 9.0] * * // Create an iterator wrapper * cub::TexObjInputIterator itr; * itr.BindTexture(d_in, sizeof(double) * num_items); * ... * * // Within device code: * printf("%f\n", itr[0]); // 8.0 * printf("%f\n", itr[1]); // 6.0 * printf("%f\n", itr[6]); // 9.0 * * ... * itr.UnbindTexture(); * * \endcode * * \tparam T The value type of this iterator * \tparam OffsetT The difference type of this iterator (Default: \p ptrdiff_t) */ template < typename T, typename OffsetT = ptrdiff_t> class TexObjInputIterator { public: // Required iterator traits typedef TexObjInputIterator self_type; ///< My own type typedef OffsetT difference_type; ///< Type to express the result of subtracting one iterator from another typedef T value_type; ///< The type of the element the iterator can point to typedef T* pointer; ///< The type of a pointer to an element the iterator can point to typedef T reference; ///< The type of a reference to an element the iterator can point to #if (THRUST_VERSION >= 100700) // Use Thrust's iterator categories so we can use these iterators in Thrust 1.7 (or newer) methods typedef typename thrust::detail::iterator_facade_category< thrust::device_system_tag, thrust::random_access_traversal_tag, value_type, reference >::type iterator_category; ///< The iterator category #else typedef std::random_access_iterator_tag iterator_category; ///< The iterator category #endif // THRUST_VERSION private: // Largest texture word we can use in device typedef typename UnitWord::TextureWord TextureWord; // Number of texture words per T enum { TEXTURE_MULTIPLE = sizeof(T) / sizeof(TextureWord) }; private: T* ptr; difference_type tex_offset; cudaTextureObject_t tex_obj; public: /// Constructor __host__ __device__ __forceinline__ TexObjInputIterator() : ptr(NULL), tex_offset(0), tex_obj(0) {} /// Use this iterator to bind \p ptr with a texture reference template cudaError_t BindTexture( QualifiedT *ptr, ///< Native pointer to wrap that is aligned to cudaDeviceProp::textureAlignment size_t bytes = size_t(-1), ///< Number of bytes in the range size_t tex_offset = 0) ///< OffsetT (in items) from \p ptr denoting the position of the iterator { this->ptr = const_cast::Type *>(ptr); this->tex_offset = tex_offset; cudaChannelFormatDesc channel_desc = cudaCreateChannelDesc(); cudaResourceDesc res_desc; cudaTextureDesc tex_desc; memset(&res_desc, 0, sizeof(cudaResourceDesc)); memset(&tex_desc, 0, sizeof(cudaTextureDesc)); res_desc.resType = cudaResourceTypeLinear; res_desc.res.linear.devPtr = this->ptr; res_desc.res.linear.desc = channel_desc; res_desc.res.linear.sizeInBytes = bytes; tex_desc.readMode = cudaReadModeElementType; return cudaCreateTextureObject(&tex_obj, &res_desc, &tex_desc, NULL); } /// Unbind this iterator from its texture reference cudaError_t UnbindTexture() { return cudaDestroyTextureObject(tex_obj); } /// Postfix increment __host__ __device__ __forceinline__ self_type operator++(int) { self_type retval = *this; tex_offset++; return retval; } /// Prefix increment __host__ __device__ __forceinline__ self_type operator++() { tex_offset++; return *this; } /// Indirection __host__ __device__ __forceinline__ reference operator*() const { #if (CUB_PTX_ARCH == 0) // Simply dereference the pointer on the host return ptr[tex_offset]; #else // Move array of uninitialized words, then alias and assign to return value TextureWord words[TEXTURE_MULTIPLE]; #pragma unroll for (int i = 0; i < TEXTURE_MULTIPLE; ++i) { words[i] = tex1Dfetch( tex_obj, (tex_offset * TEXTURE_MULTIPLE) + i); } // Load from words return *reinterpret_cast(words); #endif } /// Addition template __host__ __device__ __forceinline__ self_type operator+(Distance n) const { self_type retval; retval.ptr = ptr; retval.tex_obj = tex_obj; retval.tex_offset = tex_offset + n; return retval; } /// Addition assignment template __host__ __device__ __forceinline__ self_type& operator+=(Distance n) { tex_offset += n; return *this; } /// Subtraction template __host__ __device__ __forceinline__ self_type operator-(Distance n) const { self_type retval; retval.ptr = ptr; retval.tex_obj = tex_obj; retval.tex_offset = tex_offset - n; return retval; } /// Subtraction assignment template __host__ __device__ __forceinline__ self_type& operator-=(Distance n) { tex_offset -= n; return *this; } /// Distance __host__ __device__ __forceinline__ difference_type operator-(self_type other) const { return tex_offset - other.tex_offset; } /// Array subscript template __host__ __device__ __forceinline__ reference operator[](Distance n) const { self_type offset = (*this) + n; return *offset; } /// Structure dereference __host__ __device__ __forceinline__ pointer operator->() { return &(*(*this)); } /// Equal to __host__ __device__ __forceinline__ bool operator==(const self_type& rhs) { return ((ptr == rhs.ptr) && (tex_offset == rhs.tex_offset) && (tex_obj == rhs.tex_obj)); } /// Not equal to __host__ __device__ __forceinline__ bool operator!=(const self_type& rhs) { return ((ptr != rhs.ptr) || (tex_offset != rhs.tex_offset) || (tex_obj != rhs.tex_obj)); } /// ostream operator friend std::ostream& operator<<(std::ostream& os, const self_type& itr) { return os; } }; /** @} */ // end group UtilIterator } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/acc/cuda/cub/iterator/tex_ref_input_iterator.cuh000066400000000000000000000302161411340063500251250ustar00rootroot00000000000000/****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * Random-access iterator types */ #pragma once #include #include #include "../thread/thread_load.cuh" #include "../thread/thread_store.cuh" #include "../util_device.cuh" #include "../util_debug.cuh" #include "../util_namespace.cuh" #if (CUDA_VERSION >= 5050) || defined(DOXYGEN_ACTIVE) // This iterator is compatible with CUDA 5.5 and newer #if (THRUST_VERSION >= 100700) // This iterator is compatible with Thrust API 1.7 and newer #include #include #endif // THRUST_VERSION /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /****************************************************************************** * Static file-scope Tesla/Fermi-style texture references *****************************************************************************/ #ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document // Anonymous namespace namespace { /// Global texture reference specialized by type template struct IteratorTexRef { /// And by unique ID template struct TexId { // Largest texture word we can use in device typedef typename UnitWord::DeviceWord DeviceWord; typedef typename UnitWord::TextureWord TextureWord; // Number of texture words per T enum { DEVICE_MULTIPLE = sizeof(T) / sizeof(DeviceWord), TEXTURE_MULTIPLE = sizeof(T) / sizeof(TextureWord) }; // Texture reference type typedef texture TexRef; // Texture reference static TexRef ref; /// Bind texture static cudaError_t BindTexture(void *d_in, size_t &offset) { if (d_in) { cudaChannelFormatDesc tex_desc = cudaCreateChannelDesc(); ref.channelDesc = tex_desc; return (CubDebug(cudaBindTexture(&offset, ref, d_in))); } return cudaSuccess; } /// Unbind texture static cudaError_t UnbindTexture() { return CubDebug(cudaUnbindTexture(ref)); } /// Fetch element template static __device__ __forceinline__ T Fetch(Distance tex_offset) { DeviceWord temp[DEVICE_MULTIPLE]; TextureWord *words = reinterpret_cast(temp); #pragma unroll for (int i = 0; i < TEXTURE_MULTIPLE; ++i) { words[i] = tex1Dfetch(ref, (tex_offset * TEXTURE_MULTIPLE) + i); } return reinterpret_cast(temp); } }; }; // Texture reference definitions template template typename IteratorTexRef::template TexId::TexRef IteratorTexRef::template TexId::ref = 0; } // Anonymous namespace #endif // DOXYGEN_SHOULD_SKIP_THIS /** * \addtogroup UtilIterator * @{ */ /** * \brief A random-access input wrapper for dereferencing array values through texture cache. Uses older Tesla/Fermi-style texture references. * * \par Overview * - TexRefInputIteratorTwraps a native device pointer of type ValueType*. References * to elements are to be loaded through texture cache. * - Can be used to load any data type from memory through texture cache. * - Can be manipulated and exchanged within and between host and device * functions, can only be constructed within host functions, and can only be * dereferenced within device functions. * - The \p UNIQUE_ID template parameter is used to statically name the underlying texture * reference. Only one TexRefInputIteratorTinstance can be bound at any given time for a * specific combination of (1) data type \p T, (2) \p UNIQUE_ID, (3) host * thread, and (4) compilation .o unit. * - With regard to nested/dynamic parallelism, TexRefInputIteratorTiterators may only be * created by the host thread and used by a top-level kernel (i.e. the one which is launched * from the host). * - Compatible with Thrust API v1.7 or newer. * - Compatible with CUDA toolkit v5.5 or newer. * * \par Snippet * The code snippet below illustrates the use of \p TexRefInputIteratorTto * dereference a device array of doubles through texture cache. * \par * \code * #include // or equivalently * * // Declare, allocate, and initialize a device array * int num_items; // e.g., 7 * double *d_in; // e.g., [8.0, 6.0, 7.0, 5.0, 3.0, 0.0, 9.0] * * // Create an iterator wrapper * cub::TexRefInputIterator itr; * itr.BindTexture(d_in, sizeof(double) * num_items); * ... * * // Within device code: * printf("%f\n", itr[0]); // 8.0 * printf("%f\n", itr[1]); // 6.0 * printf("%f\n", itr[6]); // 9.0 * * ... * itr.UnbindTexture(); * * \endcode * * \tparam T The value type of this iterator * \tparam UNIQUE_ID A globally-unique identifier (within the compilation unit) to name the underlying texture reference * \tparam OffsetT The difference type of this iterator (Default: \p ptrdiff_t) */ template < typename T, int UNIQUE_ID, typename OffsetT = ptrdiff_t> class TexRefInputIterator { public: // Required iterator traits typedef TexRefInputIterator self_type; ///< My own type typedef OffsetT difference_type; ///< Type to express the result of subtracting one iterator from another typedef T value_type; ///< The type of the element the iterator can point to typedef T* pointer; ///< The type of a pointer to an element the iterator can point to typedef T reference; ///< The type of a reference to an element the iterator can point to #if (THRUST_VERSION >= 100700) // Use Thrust's iterator categories so we can use these iterators in Thrust 1.7 (or newer) methods typedef typename thrust::detail::iterator_facade_category< thrust::device_system_tag, thrust::random_access_traversal_tag, value_type, reference >::type iterator_category; ///< The iterator category #else typedef std::random_access_iterator_tag iterator_category; ///< The iterator category #endif // THRUST_VERSION private: T* ptr; difference_type tex_offset; // Texture reference wrapper (old Tesla/Fermi-style textures) typedef typename IteratorTexRef::template TexId TexId; public: /* /// Constructor __host__ __device__ __forceinline__ TexRefInputIterator() : ptr(NULL), tex_offset(0) {} */ /// Use this iterator to bind \p ptr with a texture reference template cudaError_t BindTexture( QualifiedT *ptr, ///< Native pointer to wrap that is aligned to cudaDeviceProp::textureAlignment size_t bytes = size_t(-1), ///< Number of bytes in the range size_t tex_offset = 0) ///< OffsetT (in items) from \p ptr denoting the position of the iterator { this->ptr = const_cast::Type *>(ptr); size_t offset; cudaError_t retval = TexId::BindTexture(this->ptr + tex_offset, offset); this->tex_offset = (difference_type) (offset / sizeof(QualifiedT)); return retval; } /// Unbind this iterator from its texture reference cudaError_t UnbindTexture() { return TexId::UnbindTexture(); } /// Postfix increment __host__ __device__ __forceinline__ self_type operator++(int) { self_type retval = *this; tex_offset++; return retval; } /// Prefix increment __host__ __device__ __forceinline__ self_type operator++() { tex_offset++; return *this; } /// Indirection __host__ __device__ __forceinline__ reference operator*() const { #if (CUB_PTX_ARCH == 0) // Simply dereference the pointer on the host return ptr[tex_offset]; #else // Use the texture reference return TexId::Fetch(tex_offset); #endif } /// Addition template __host__ __device__ __forceinline__ self_type operator+(Distance n) const { self_type retval; retval.ptr = ptr; retval.tex_offset = tex_offset + n; return retval; } /// Addition assignment template __host__ __device__ __forceinline__ self_type& operator+=(Distance n) { tex_offset += n; return *this; } /// Subtraction template __host__ __device__ __forceinline__ self_type operator-(Distance n) const { self_type retval; retval.ptr = ptr; retval.tex_offset = tex_offset - n; return retval; } /// Subtraction assignment template __host__ __device__ __forceinline__ self_type& operator-=(Distance n) { tex_offset -= n; return *this; } /// Distance __host__ __device__ __forceinline__ difference_type operator-(self_type other) const { return tex_offset - other.tex_offset; } /// Array subscript template __host__ __device__ __forceinline__ reference operator[](Distance n) const { self_type offset = (*this) + n; return *offset; } /// Structure dereference __host__ __device__ __forceinline__ pointer operator->() { return &(*(*this)); } /// Equal to __host__ __device__ __forceinline__ bool operator==(const self_type& rhs) { return ((ptr == rhs.ptr) && (tex_offset == rhs.tex_offset)); } /// Not equal to __host__ __device__ __forceinline__ bool operator!=(const self_type& rhs) { return ((ptr != rhs.ptr) || (tex_offset != rhs.tex_offset)); } /// ostream operator friend std::ostream& operator<<(std::ostream& os, const self_type& itr) { return os; } }; /** @} */ // end group UtilIterator } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) #endif // CUDA_VERSION relion-3.1.3/src/acc/cuda/cub/iterator/transform_input_iterator.cuh000066400000000000000000000206431411340063500255070ustar00rootroot00000000000000/****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * Random-access iterator types */ #pragma once #include #include #include "../thread/thread_load.cuh" #include "../thread/thread_store.cuh" #include "../util_device.cuh" #include "../util_namespace.cuh" #if (THRUST_VERSION >= 100700) // This iterator is compatible with Thrust API 1.7 and newer #include #include #endif // THRUST_VERSION /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /** * \addtogroup UtilIterator * @{ */ /** * \brief A random-access input wrapper for transforming dereferenced values. * * \par Overview * - TransformInputIteratorTwraps a unary conversion functor of type \p * ConversionOp and a random-access input iterator of type InputIteratorT, * using the former to produce references of type \p ValueType from the latter. * - Can be used with any data type. * - Can be constructed, manipulated, and exchanged within and between host and device * functions. Wrapped host memory can only be dereferenced on the host, and wrapped * device memory can only be dereferenced on the device. * - Compatible with Thrust API v1.7 or newer. * * \par Snippet * The code snippet below illustrates the use of \p TransformInputIteratorTto * dereference an array of integers, tripling the values and converting them to doubles. * \par * \code * #include // or equivalently * * // Functor for tripling integer values and converting to doubles * struct TripleDoubler * { * __host__ __device__ __forceinline__ * double operator()(const int &a) const { * return double(a * 3); * } * }; * * // Declare, allocate, and initialize a device array * int *d_in; // e.g., [8, 6, 7, 5, 3, 0, 9] * TripleDoubler conversion_op; * * // Create an iterator wrapper * cub::TransformInputIterator itr(d_in, conversion_op); * * // Within device code: * printf("%f\n", itr[0]); // 24.0 * printf("%f\n", itr[1]); // 18.0 * printf("%f\n", itr[6]); // 27.0 * * \endcode * * \tparam ValueType The value type of this iterator * \tparam ConversionOp Unary functor type for mapping objects of type \p InputType to type \p ValueType. Must have member ValueType operator()(const InputType &datum). * \tparam InputIteratorT The type of the wrapped input iterator * \tparam OffsetT The difference type of this iterator (Default: \p ptrdiff_t) * */ template < typename ValueType, typename ConversionOp, typename InputIteratorT, typename OffsetT = ptrdiff_t> class TransformInputIterator { public: // Required iterator traits typedef TransformInputIterator self_type; ///< My own type typedef OffsetT difference_type; ///< Type to express the result of subtracting one iterator from another typedef ValueType value_type; ///< The type of the element the iterator can point to typedef ValueType* pointer; ///< The type of a pointer to an element the iterator can point to typedef ValueType reference; ///< The type of a reference to an element the iterator can point to #if (THRUST_VERSION >= 100700) // Use Thrust's iterator categories so we can use these iterators in Thrust 1.7 (or newer) methods typedef typename thrust::detail::iterator_facade_category< thrust::any_system_tag, thrust::random_access_traversal_tag, value_type, reference >::type iterator_category; ///< The iterator category #else typedef std::random_access_iterator_tag iterator_category; ///< The iterator category #endif // THRUST_VERSION private: ConversionOp conversion_op; InputIteratorT input_itr; public: /// Constructor __host__ __device__ __forceinline__ TransformInputIterator( InputIteratorT input_itr, ///< Input iterator to wrap ConversionOp conversion_op) ///< Conversion functor to wrap : conversion_op(conversion_op), input_itr(input_itr) {} /// Postfix increment __host__ __device__ __forceinline__ self_type operator++(int) { self_type retval = *this; input_itr++; return retval; } /// Prefix increment __host__ __device__ __forceinline__ self_type operator++() { input_itr++; return *this; } /// Indirection __host__ __device__ __forceinline__ reference operator*() const { return conversion_op(*input_itr); } /// Addition template __host__ __device__ __forceinline__ self_type operator+(Distance n) const { self_type retval(input_itr + n, conversion_op); return retval; } /// Addition assignment template __host__ __device__ __forceinline__ self_type& operator+=(Distance n) { input_itr += n; return *this; } /// Subtraction template __host__ __device__ __forceinline__ self_type operator-(Distance n) const { self_type retval(input_itr - n, conversion_op); return retval; } /// Subtraction assignment template __host__ __device__ __forceinline__ self_type& operator-=(Distance n) { input_itr -= n; return *this; } /// Distance __host__ __device__ __forceinline__ difference_type operator-(self_type other) const { return input_itr - other.input_itr; } /// Array subscript template __host__ __device__ __forceinline__ reference operator[](Distance n) const { return conversion_op(input_itr[n]); } /// Structure dereference __host__ __device__ __forceinline__ pointer operator->() { return &conversion_op(*input_itr); } /// Equal to __host__ __device__ __forceinline__ bool operator==(const self_type& rhs) { return (input_itr == rhs.input_itr); } /// Not equal to __host__ __device__ __forceinline__ bool operator!=(const self_type& rhs) { return (input_itr != rhs.input_itr); } /// ostream operator friend std::ostream& operator<<(std::ostream& os, const self_type& itr) { return os; } }; /** @} */ // end group UtilIterator } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/acc/cuda/cub/thread/000077500000000000000000000000001411340063500172545ustar00rootroot00000000000000relion-3.1.3/src/acc/cuda/cub/thread/thread_load.cuh000066400000000000000000000446651411340063500222420ustar00rootroot00000000000000/****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * Thread utilities for reading memory using PTX cache modifiers. */ #pragma once #include #include #include "../util_ptx.cuh" #include "../util_type.cuh" #include "../util_namespace.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /** * \addtogroup UtilIo * @{ */ //----------------------------------------------------------------------------- // Tags and constants //----------------------------------------------------------------------------- /** * \brief Enumeration of cache modifiers for memory load operations. */ enum CacheLoadModifier { LOAD_DEFAULT, ///< Default (no modifier) LOAD_CA, ///< Cache at all levels LOAD_CG, ///< Cache at global level LOAD_CS, ///< Cache streaming (likely to be accessed once) LOAD_CV, ///< Cache as volatile (including cached system lines) LOAD_LDG, ///< Cache as texture LOAD_VOLATILE, ///< Volatile (any memory space) }; /** * \name Thread I/O (cache modified) * @{ */ /** * \brief Thread utility for reading memory using cub::CacheLoadModifier cache modifiers. Can be used to load any data type. * * \par Example * \code * #include // or equivalently * * // 32-bit load using cache-global modifier: * int *d_in; * int val = cub::ThreadLoad(d_in + threadIdx.x); * * // 16-bit load using default modifier * short *d_in; * short val = cub::ThreadLoad(d_in + threadIdx.x); * * // 256-bit load using cache-volatile modifier * double4 *d_in; * double4 val = cub::ThreadLoad(d_in + threadIdx.x); * * // 96-bit load using cache-streaming modifier * struct TestFoo { bool a; short b; }; * TestFoo *d_struct; * TestFoo val = cub::ThreadLoad(d_in + threadIdx.x); * \endcode * * \tparam MODIFIER [inferred] CacheLoadModifier enumeration * \tparam InputIteratorT [inferred] Input iterator type \iterator */ template < CacheLoadModifier MODIFIER, typename InputIteratorT> __device__ __forceinline__ typename std::iterator_traits::value_type ThreadLoad(InputIteratorT itr); //@} end member group #ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document /// Helper structure for templated load iteration (inductive case) template struct IterateThreadLoad { template static __device__ __forceinline__ void Load(T const *ptr, T *vals) { vals[COUNT] = ThreadLoad(ptr + COUNT); IterateThreadLoad::template Load(ptr, vals); } template static __device__ __forceinline__ void Dereference(InputIteratorT itr, T *vals) { vals[COUNT] = itr[COUNT]; IterateThreadLoad::Dereference(itr, vals); } }; /// Helper structure for templated load iteration (termination case) template struct IterateThreadLoad { template static __device__ __forceinline__ void Load(T const * /*ptr*/, T * /*vals*/) {} template static __device__ __forceinline__ void Dereference(InputIteratorT /*itr*/, T * /*vals*/) {} }; /** * Define a uint4 (16B) ThreadLoad specialization for the given Cache load modifier */ #define _CUB_LOAD_16(cub_modifier, ptx_modifier) \ template<> \ __device__ __forceinline__ uint4 ThreadLoad(uint4 const *ptr) \ { \ uint4 retval; \ asm volatile ("ld."#ptx_modifier".v4.u32 {%0, %1, %2, %3}, [%4];" : \ "=r"(retval.x), \ "=r"(retval.y), \ "=r"(retval.z), \ "=r"(retval.w) : \ _CUB_ASM_PTR_(ptr)); \ return retval; \ } \ template<> \ __device__ __forceinline__ ulonglong2 ThreadLoad(ulonglong2 const *ptr) \ { \ ulonglong2 retval; \ asm volatile ("ld."#ptx_modifier".v2.u64 {%0, %1}, [%2];" : \ "=l"(retval.x), \ "=l"(retval.y) : \ _CUB_ASM_PTR_(ptr)); \ return retval; \ } /** * Define a uint2 (8B) ThreadLoad specialization for the given Cache load modifier */ #define _CUB_LOAD_8(cub_modifier, ptx_modifier) \ template<> \ __device__ __forceinline__ ushort4 ThreadLoad(ushort4 const *ptr) \ { \ ushort4 retval; \ asm volatile ("ld."#ptx_modifier".v4.u16 {%0, %1, %2, %3}, [%4];" : \ "=h"(retval.x), \ "=h"(retval.y), \ "=h"(retval.z), \ "=h"(retval.w) : \ _CUB_ASM_PTR_(ptr)); \ return retval; \ } \ template<> \ __device__ __forceinline__ uint2 ThreadLoad(uint2 const *ptr) \ { \ uint2 retval; \ asm volatile ("ld."#ptx_modifier".v2.u32 {%0, %1}, [%2];" : \ "=r"(retval.x), \ "=r"(retval.y) : \ _CUB_ASM_PTR_(ptr)); \ return retval; \ } \ template<> \ __device__ __forceinline__ unsigned long long ThreadLoad(unsigned long long const *ptr) \ { \ unsigned long long retval; \ asm volatile ("ld."#ptx_modifier".u64 %0, [%1];" : \ "=l"(retval) : \ _CUB_ASM_PTR_(ptr)); \ return retval; \ } /** * Define a uint (4B) ThreadLoad specialization for the given Cache load modifier */ #define _CUB_LOAD_4(cub_modifier, ptx_modifier) \ template<> \ __device__ __forceinline__ unsigned int ThreadLoad(unsigned int const *ptr) \ { \ unsigned int retval; \ asm volatile ("ld."#ptx_modifier".u32 %0, [%1];" : \ "=r"(retval) : \ _CUB_ASM_PTR_(ptr)); \ return retval; \ } /** * Define a unsigned short (2B) ThreadLoad specialization for the given Cache load modifier */ #define _CUB_LOAD_2(cub_modifier, ptx_modifier) \ template<> \ __device__ __forceinline__ unsigned short ThreadLoad(unsigned short const *ptr) \ { \ unsigned short retval; \ asm volatile ("ld."#ptx_modifier".u16 %0, [%1];" : \ "=h"(retval) : \ _CUB_ASM_PTR_(ptr)); \ return retval; \ } /** * Define an unsigned char (1B) ThreadLoad specialization for the given Cache load modifier */ #define _CUB_LOAD_1(cub_modifier, ptx_modifier) \ template<> \ __device__ __forceinline__ unsigned char ThreadLoad(unsigned char const *ptr) \ { \ unsigned short retval; \ asm volatile ( \ "{" \ " .reg .u8 datum;" \ " ld."#ptx_modifier".u8 datum, [%1];" \ " cvt.u16.u8 %0, datum;" \ "}" : \ "=h"(retval) : \ _CUB_ASM_PTR_(ptr)); \ return (unsigned char) retval; \ } /** * Define powers-of-two ThreadLoad specializations for the given Cache load modifier */ #define _CUB_LOAD_ALL(cub_modifier, ptx_modifier) \ _CUB_LOAD_16(cub_modifier, ptx_modifier) \ _CUB_LOAD_8(cub_modifier, ptx_modifier) \ _CUB_LOAD_4(cub_modifier, ptx_modifier) \ _CUB_LOAD_2(cub_modifier, ptx_modifier) \ _CUB_LOAD_1(cub_modifier, ptx_modifier) \ /** * Define powers-of-two ThreadLoad specializations for the various Cache load modifiers */ #if CUB_PTX_ARCH >= 200 _CUB_LOAD_ALL(LOAD_CA, ca) _CUB_LOAD_ALL(LOAD_CG, cg) _CUB_LOAD_ALL(LOAD_CS, cs) _CUB_LOAD_ALL(LOAD_CV, cv) #else _CUB_LOAD_ALL(LOAD_CA, global) // Use volatile to ensure coherent reads when this PTX is JIT'd to run on newer architectures with L1 _CUB_LOAD_ALL(LOAD_CG, volatile.global) _CUB_LOAD_ALL(LOAD_CS, global) _CUB_LOAD_ALL(LOAD_CV, volatile.global) #endif #if CUB_PTX_ARCH >= 350 _CUB_LOAD_ALL(LOAD_LDG, global.nc) #else _CUB_LOAD_ALL(LOAD_LDG, global) #endif // Macro cleanup #undef _CUB_LOAD_ALL #undef _CUB_LOAD_1 #undef _CUB_LOAD_2 #undef _CUB_LOAD_4 #undef _CUB_LOAD_8 #undef _CUB_LOAD_16 /** * ThreadLoad definition for LOAD_DEFAULT modifier on iterator types */ template __device__ __forceinline__ typename std::iterator_traits::value_type ThreadLoad( InputIteratorT itr, Int2Type /*modifier*/, Int2Type /*is_pointer*/) { return *itr; } /** * ThreadLoad definition for LOAD_DEFAULT modifier on pointer types */ template __device__ __forceinline__ T ThreadLoad( T *ptr, Int2Type /*modifier*/, Int2Type /*is_pointer*/) { return *ptr; } /** * ThreadLoad definition for LOAD_VOLATILE modifier on primitive pointer types */ template __device__ __forceinline__ T ThreadLoadVolatilePointer( T *ptr, Int2Type /*is_primitive*/) { T retval = *reinterpret_cast(ptr); return retval; } /** * ThreadLoad definition for LOAD_VOLATILE modifier on non-primitive pointer types */ template __device__ __forceinline__ T ThreadLoadVolatilePointer( T *ptr, Int2Type /*is_primitive*/) { typedef typename UnitWord::VolatileWord VolatileWord; // Word type for memcopying const int VOLATILE_MULTIPLE = sizeof(T) / sizeof(VolatileWord); /* VolatileWord words[VOLATILE_MULTIPLE]; IterateThreadLoad<0, VOLATILE_MULTIPLE>::Dereference( reinterpret_cast(ptr), words); return *reinterpret_cast(words); */ T retval; VolatileWord *words = reinterpret_cast(&retval); IterateThreadLoad<0, VOLATILE_MULTIPLE>::Dereference( reinterpret_cast(ptr), words); return retval; } /** * ThreadLoad definition for LOAD_VOLATILE modifier on pointer types */ template __device__ __forceinline__ T ThreadLoad( T *ptr, Int2Type /*modifier*/, Int2Type /*is_pointer*/) { // Apply tags for partial-specialization return ThreadLoadVolatilePointer(ptr, Int2Type::PRIMITIVE>()); } /** * ThreadLoad definition for generic modifiers on pointer types */ template __device__ __forceinline__ T ThreadLoad( T const *ptr, Int2Type /*modifier*/, Int2Type /*is_pointer*/) { typedef typename UnitWord::DeviceWord DeviceWord; const int DEVICE_MULTIPLE = sizeof(T) / sizeof(DeviceWord); DeviceWord words[DEVICE_MULTIPLE]; IterateThreadLoad<0, DEVICE_MULTIPLE>::template Load( reinterpret_cast(const_cast(ptr)), words); return *reinterpret_cast(words); } /** * ThreadLoad definition for generic modifiers */ template < CacheLoadModifier MODIFIER, typename InputIteratorT> __device__ __forceinline__ typename std::iterator_traits::value_type ThreadLoad(InputIteratorT itr) { // Apply tags for partial-specialization return ThreadLoad( itr, Int2Type(), Int2Type::VALUE>()); } #endif // DOXYGEN_SHOULD_SKIP_THIS /** @} */ // end group UtilIo } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/acc/cuda/cub/thread/thread_operators.cuh000066400000000000000000000220141411340063500233210ustar00rootroot00000000000000/****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * Simple binary operator functor types */ /****************************************************************************** * Simple functor operators ******************************************************************************/ #pragma once #include "../util_macro.cuh" #include "../util_type.cuh" #include "../util_namespace.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /** * \addtogroup UtilModule * @{ */ /** * \brief Default equality functor */ struct Equality { /// Boolean equality operator, returns (a == b) template __host__ __device__ __forceinline__ bool operator()(const T &a, const T &b) const { return a == b; } }; /** * \brief Default inequality functor */ struct Inequality { /// Boolean inequality operator, returns (a != b) template __host__ __device__ __forceinline__ bool operator()(const T &a, const T &b) const { return a != b; } }; /** * \brief Inequality functor (wraps equality functor) */ template struct InequalityWrapper { /// Wrapped equality operator EqualityOp op; /// Constructor __host__ __device__ __forceinline__ InequalityWrapper(EqualityOp op) : op(op) {} /// Boolean inequality operator, returns (a != b) template __host__ __device__ __forceinline__ bool operator()(const T &a, const T &b) { return !op(a, b); } }; /** * \brief Default sum functor */ struct Sum { /// Boolean sum operator, returns a + b template __host__ __device__ __forceinline__ T operator()(const T &a, const T &b) const { return a + b; } }; /** * \brief Default max functor */ struct Max { /// Boolean max operator, returns (a > b) ? a : b template __host__ __device__ __forceinline__ T operator()(const T &a, const T &b) const { return CUB_MAX(a, b); } }; /** * \brief Arg max functor (keeps the value and offset of the first occurrence of the larger item) */ struct ArgMax { /// Boolean max operator, preferring the item having the smaller offset in case of ties template __host__ __device__ __forceinline__ KeyValuePair operator()( const KeyValuePair &a, const KeyValuePair &b) const { // Mooch BUG (device reduce argmax gk110 3.2 million random fp32) // return ((b.value > a.value) || ((a.value == b.value) && (b.key < a.key))) ? b : a; if ((b.value > a.value) || ((a.value == b.value) && (b.key < a.key))) return b; return a; } }; /** * \brief Default min functor */ struct Min { /// Boolean min operator, returns (a < b) ? a : b template __host__ __device__ __forceinline__ T operator()(const T &a, const T &b) const { return CUB_MIN(a, b); } }; /** * \brief Arg min functor (keeps the value and offset of the first occurrence of the smallest item) */ struct ArgMin { /// Boolean min operator, preferring the item having the smaller offset in case of ties template __host__ __device__ __forceinline__ KeyValuePair operator()( const KeyValuePair &a, const KeyValuePair &b) const { // Mooch BUG (device reduce argmax gk110 3.2 million random fp32) // return ((b.value < a.value) || ((a.value == b.value) && (b.key < a.key))) ? b : a; if ((b.value < a.value) || ((a.value == b.value) && (b.key < a.key))) return b; return a; } }; /** * \brief Default cast functor */ template struct CastOp { /// Cast operator, returns (B) a template __host__ __device__ __forceinline__ B operator()(const A &a) const { return (B) a; } }; /** * \brief Binary operator wrapper for switching non-commutative scan arguments */ template class SwizzleScanOp { private: /// Wrapped scan operator ScanOp scan_op; public: /// Constructor __host__ __device__ __forceinline__ SwizzleScanOp(ScanOp scan_op) : scan_op(scan_op) {} /// Switch the scan arguments template __host__ __device__ __forceinline__ T operator()(const T &a, const T &b) { T _a(a); T _b(b); return scan_op(_b, _a); } }; /** * \brief Reduce-by-segment functor. * * Given two cub::KeyValuePair inputs \p a and \p b and a * binary associative combining operator \p f(const T &x, const T &y), * an instance of this functor returns a cub::KeyValuePair whose \p key * field is a.key + b.key, and whose \p value field * is either b.value if b.key is non-zero, or f(a.value, b.value) otherwise. * * ReduceBySegmentOp is an associative, non-commutative binary combining operator * for input sequences of cub::KeyValuePair pairings. Such * sequences are typically used to represent a segmented set of values to be reduced * and a corresponding set of {0,1}-valued integer "head flags" demarcating the * first value of each segment. * */ template ///< Binary reduction operator to apply to values struct ReduceBySegmentOp { /// Wrapped reduction operator ReductionOpT op; /// Constructor __host__ __device__ __forceinline__ ReduceBySegmentOp() {} /// Constructor __host__ __device__ __forceinline__ ReduceBySegmentOp(ReductionOpT op) : op(op) {} /// Scan operator template ///< KeyValuePair pairing of T (value) and OffsetT (head flag) __host__ __device__ __forceinline__ KeyValuePairT operator()( const KeyValuePairT &first, ///< First partial reduction const KeyValuePairT &second) ///< Second partial reduction { KeyValuePairT retval; retval.key = first.key + second.key; retval.value = (second.key) ? second.value : // The second partial reduction spans a segment reset, so it's value aggregate becomes the running aggregate op(first.value, second.value); // The second partial reduction does not span a reset, so accumulate both into the running aggregate return retval; } }; template ///< Binary reduction operator to apply to values struct ReduceByKeyOp { /// Wrapped reduction operator ReductionOpT op; /// Constructor __host__ __device__ __forceinline__ ReduceByKeyOp() {} /// Constructor __host__ __device__ __forceinline__ ReduceByKeyOp(ReductionOpT op) : op(op) {} /// Scan operator template __host__ __device__ __forceinline__ KeyValuePairT operator()( const KeyValuePairT &first, ///< First partial reduction const KeyValuePairT &second) ///< Second partial reduction { KeyValuePairT retval = second; if (first.key == second.key) retval.value = op(first.value, retval.value); return retval; } }; /** @} */ // end group UtilModule } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/acc/cuda/cub/thread/thread_reduce.cuh000066400000000000000000000136251411340063500225620ustar00rootroot00000000000000/****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * Thread utilities for sequential reduction over statically-sized array types */ #pragma once #include "../thread/thread_operators.cuh" #include "../util_namespace.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /// Internal namespace (to prevent ADL mishaps between static functions when mixing different CUB installations) namespace internal { /** * Sequential reduction over statically-sized array types */ template < int LENGTH, typename T, typename ReductionOp> __device__ __forceinline__ T ThreadReduce( T* input, ///< [in] Input array ReductionOp reduction_op, ///< [in] Binary reduction operator T prefix, ///< [in] Prefix to seed reduction with Int2Type /*length*/) { T retval = prefix; #pragma unroll for (int i = 0; i < LENGTH; ++i) retval = reduction_op(retval, input[i]); return retval; } /** * \brief Perform a sequential reduction over \p LENGTH elements of the \p input array, seeded with the specified \p prefix. The aggregate is returned. * * \tparam LENGTH LengthT of input array * \tparam T [inferred] The data type to be reduced. * \tparam ScanOp [inferred] Binary reduction operator type having member T operator()(const T &a, const T &b) */ template < int LENGTH, typename T, typename ReductionOp> __device__ __forceinline__ T ThreadReduce( T* input, ///< [in] Input array ReductionOp reduction_op, ///< [in] Binary reduction operator T prefix) ///< [in] Prefix to seed reduction with { return ThreadReduce(input, reduction_op, prefix, Int2Type()); } /** * \brief Perform a sequential reduction over \p LENGTH elements of the \p input array. The aggregate is returned. * * \tparam LENGTH LengthT of input array * \tparam T [inferred] The data type to be reduced. * \tparam ScanOp [inferred] Binary reduction operator type having member T operator()(const T &a, const T &b) */ template < int LENGTH, typename T, typename ReductionOp> __device__ __forceinline__ T ThreadReduce( T* input, ///< [in] Input array ReductionOp reduction_op) ///< [in] Binary reduction operator { T prefix = input[0]; return ThreadReduce(input + 1, reduction_op, prefix); } /** * \brief Perform a sequential reduction over the statically-sized \p input array, seeded with the specified \p prefix. The aggregate is returned. * * \tparam LENGTH [inferred] LengthT of \p input array * \tparam T [inferred] The data type to be reduced. * \tparam ScanOp [inferred] Binary reduction operator type having member T operator()(const T &a, const T &b) */ template < int LENGTH, typename T, typename ReductionOp> __device__ __forceinline__ T ThreadReduce( T (&input)[LENGTH], ///< [in] Input array ReductionOp reduction_op, ///< [in] Binary reduction operator T prefix) ///< [in] Prefix to seed reduction with { return ThreadReduce(input, reduction_op, prefix, Int2Type()); } /** * \brief Serial reduction with the specified operator * * \tparam LENGTH [inferred] LengthT of \p input array * \tparam T [inferred] The data type to be reduced. * \tparam ScanOp [inferred] Binary reduction operator type having member T operator()(const T &a, const T &b) */ template < int LENGTH, typename T, typename ReductionOp> __device__ __forceinline__ T ThreadReduce( T (&input)[LENGTH], ///< [in] Input array ReductionOp reduction_op) ///< [in] Binary reduction operator { return ThreadReduce((T*) input, reduction_op); } } // internal namespace } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/acc/cuda/cub/thread/thread_scan.cuh000066400000000000000000000245061411340063500222370ustar00rootroot00000000000000/****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * Thread utilities for sequential prefix scan over statically-sized array types */ #pragma once #include "../thread/thread_operators.cuh" #include "../util_namespace.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /// Internal namespace (to prevent ADL mishaps between static functions when mixing different CUB installations) namespace internal { /** * \addtogroup UtilModule * @{ */ /** * \name Sequential prefix scan over statically-sized array types * @{ */ template < int LENGTH, typename T, typename ScanOp> __device__ __forceinline__ T ThreadScanExclusive( T inclusive, T exclusive, T *input, ///< [in] Input array T *output, ///< [out] Output array (may be aliased to \p input) ScanOp scan_op, ///< [in] Binary scan operator Int2Type /*length*/) { #pragma unroll for (int i = 0; i < LENGTH; ++i) { inclusive = scan_op(exclusive, input[i]); output[i] = exclusive; exclusive = inclusive; } return inclusive; } /** * \brief Perform a sequential exclusive prefix scan over \p LENGTH elements of the \p input array, seeded with the specified \p prefix. The aggregate is returned. * * \tparam LENGTH LengthT of \p input and \p output arrays * \tparam T [inferred] The data type to be scanned. * \tparam ScanOp [inferred] Binary scan operator type having member T operator()(const T &a, const T &b) */ template < int LENGTH, typename T, typename ScanOp> __device__ __forceinline__ T ThreadScanExclusive( T *input, ///< [in] Input array T *output, ///< [out] Output array (may be aliased to \p input) ScanOp scan_op, ///< [in] Binary scan operator T prefix, ///< [in] Prefix to seed scan with bool apply_prefix = true) ///< [in] Whether or not the calling thread should apply its prefix. If not, the first output element is undefined. (Handy for preventing thread-0 from applying a prefix.) { T inclusive = input[0]; if (apply_prefix) { inclusive = scan_op(prefix, inclusive); } output[0] = prefix; T exclusive = inclusive; return ThreadScanExclusive(inclusive, exclusive, input + 1, output + 1, scan_op, Int2Type()); } /** * \brief Perform a sequential exclusive prefix scan over the statically-sized \p input array, seeded with the specified \p prefix. The aggregate is returned. * * \tparam LENGTH [inferred] LengthT of \p input and \p output arrays * \tparam T [inferred] The data type to be scanned. * \tparam ScanOp [inferred] Binary scan operator type having member T operator()(const T &a, const T &b) */ template < int LENGTH, typename T, typename ScanOp> __device__ __forceinline__ T ThreadScanExclusive( T (&input)[LENGTH], ///< [in] Input array T (&output)[LENGTH], ///< [out] Output array (may be aliased to \p input) ScanOp scan_op, ///< [in] Binary scan operator T prefix, ///< [in] Prefix to seed scan with bool apply_prefix = true) ///< [in] Whether or not the calling thread should apply its prefix. (Handy for preventing thread-0 from applying a prefix.) { return ThreadScanExclusive((T*) input, (T*) output, scan_op, prefix, apply_prefix); } template < int LENGTH, typename T, typename ScanOp> __device__ __forceinline__ T ThreadScanInclusive( T inclusive, T *input, ///< [in] Input array T *output, ///< [out] Output array (may be aliased to \p input) ScanOp scan_op, ///< [in] Binary scan operator Int2Type /*length*/) { #pragma unroll for (int i = 0; i < LENGTH; ++i) { inclusive = scan_op(inclusive, input[i]); output[i] = inclusive; } return inclusive; } /** * \brief Perform a sequential inclusive prefix scan over \p LENGTH elements of the \p input array. The aggregate is returned. * * \tparam LENGTH LengthT of \p input and \p output arrays * \tparam T [inferred] The data type to be scanned. * \tparam ScanOp [inferred] Binary scan operator type having member T operator()(const T &a, const T &b) */ template < int LENGTH, typename T, typename ScanOp> __device__ __forceinline__ T ThreadScanInclusive( T *input, ///< [in] Input array T *output, ///< [out] Output array (may be aliased to \p input) ScanOp scan_op) ///< [in] Binary scan operator { T inclusive = input[0]; output[0] = inclusive; // Continue scan return ThreadScanInclusive(inclusive, input + 1, output + 1, scan_op, Int2Type()); } /** * \brief Perform a sequential inclusive prefix scan over the statically-sized \p input array. The aggregate is returned. * * \tparam LENGTH [inferred] LengthT of \p input and \p output arrays * \tparam T [inferred] The data type to be scanned. * \tparam ScanOp [inferred] Binary scan operator type having member T operator()(const T &a, const T &b) */ template < int LENGTH, typename T, typename ScanOp> __device__ __forceinline__ T ThreadScanInclusive( T (&input)[LENGTH], ///< [in] Input array T (&output)[LENGTH], ///< [out] Output array (may be aliased to \p input) ScanOp scan_op) ///< [in] Binary scan operator { return ThreadScanInclusive((T*) input, (T*) output, scan_op); } /** * \brief Perform a sequential inclusive prefix scan over \p LENGTH elements of the \p input array, seeded with the specified \p prefix. The aggregate is returned. * * \tparam LENGTH LengthT of \p input and \p output arrays * \tparam T [inferred] The data type to be scanned. * \tparam ScanOp [inferred] Binary scan operator type having member T operator()(const T &a, const T &b) */ template < int LENGTH, typename T, typename ScanOp> __device__ __forceinline__ T ThreadScanInclusive( T *input, ///< [in] Input array T *output, ///< [out] Output array (may be aliased to \p input) ScanOp scan_op, ///< [in] Binary scan operator T prefix, ///< [in] Prefix to seed scan with bool apply_prefix = true) ///< [in] Whether or not the calling thread should apply its prefix. (Handy for preventing thread-0 from applying a prefix.) { T inclusive = input[0]; if (apply_prefix) { inclusive = scan_op(prefix, inclusive); } output[0] = inclusive; // Continue scan return ThreadScanInclusive(inclusive, input + 1, output + 1, scan_op, Int2Type()); } /** * \brief Perform a sequential inclusive prefix scan over the statically-sized \p input array, seeded with the specified \p prefix. The aggregate is returned. * * \tparam LENGTH [inferred] LengthT of \p input and \p output arrays * \tparam T [inferred] The data type to be scanned. * \tparam ScanOp [inferred] Binary scan operator type having member T operator()(const T &a, const T &b) */ template < int LENGTH, typename T, typename ScanOp> __device__ __forceinline__ T ThreadScanInclusive( T (&input)[LENGTH], ///< [in] Input array T (&output)[LENGTH], ///< [out] Output array (may be aliased to \p input) ScanOp scan_op, ///< [in] Binary scan operator T prefix, ///< [in] Prefix to seed scan with bool apply_prefix = true) ///< [in] Whether or not the calling thread should apply its prefix. (Handy for preventing thread-0 from applying a prefix.) { return ThreadScanInclusive((T*) input, (T*) output, scan_op, prefix, apply_prefix); } //@} end member group /** @} */ // end group UtilModule } // internal namespace } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/acc/cuda/cub/thread/thread_search.cuh000066400000000000000000000112751411340063500225570ustar00rootroot00000000000000/****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * Thread utilities for sequential search */ #pragma once #include "../util_namespace.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /** * Computes the begin offsets into A and B for the specific diagonal */ template < typename AIteratorT, typename BIteratorT, typename OffsetT, typename CoordinateT> __host__ __device__ __forceinline__ void MergePathSearch( OffsetT diagonal, AIteratorT a, BIteratorT b, OffsetT a_len, OffsetT b_len, CoordinateT& path_coordinate) { /// The value type of the input iterator typedef typename std::iterator_traits::value_type T; OffsetT split_min = CUB_MAX(diagonal - b_len, 0); OffsetT split_max = CUB_MIN(diagonal, a_len); while (split_min < split_max) { OffsetT split_pivot = (split_min + split_max) >> 1; if (a[split_pivot] <= b[diagonal - split_pivot - 1]) { // Move candidate split range up A, down B split_min = split_pivot + 1; } else { // Move candidate split range up B, down A split_max = split_pivot; } } path_coordinate.x = CUB_MIN(split_min, a_len); path_coordinate.y = diagonal - split_min; } /** * \brief Returns the offset of the first value within \p input which does not compare less than \p val */ template < typename InputIteratorT, typename OffsetT, typename T> __device__ __forceinline__ OffsetT LowerBound( InputIteratorT input, ///< [in] Input sequence OffsetT num_items, ///< [in] Input sequence length T val) ///< [in] Search key { OffsetT retval = 0; while (num_items > 0) { OffsetT half = num_items >> 1; if (input[retval + half] < val) { retval = retval + (half + 1); num_items = num_items - (half + 1); } else { num_items = half; } } return retval; } /** * \brief Returns the offset of the first value within \p input which compares greater than \p val */ template < typename InputIteratorT, typename OffsetT, typename T> __device__ __forceinline__ OffsetT UpperBound( InputIteratorT input, ///< [in] Input sequence OffsetT num_items, ///< [in] Input sequence length T val) ///< [in] Search key { OffsetT retval = 0; while (num_items > 0) { OffsetT half = num_items >> 1; if (val < input[retval + half]) { num_items = half; } else { retval = retval + (half + 1); num_items = num_items - (half + 1); } } return retval; } } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/acc/cuda/cub/thread/thread_store.cuh000066400000000000000000000430141411340063500224420ustar00rootroot00000000000000/****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * Thread utilities for writing memory using PTX cache modifiers. */ #pragma once #include #include "../util_ptx.cuh" #include "../util_type.cuh" #include "../util_namespace.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /** * \addtogroup UtilIo * @{ */ //----------------------------------------------------------------------------- // Tags and constants //----------------------------------------------------------------------------- /** * \brief Enumeration of cache modifiers for memory store operations. */ enum CacheStoreModifier { STORE_DEFAULT, ///< Default (no modifier) STORE_WB, ///< Cache write-back all coherent levels STORE_CG, ///< Cache at global level STORE_CS, ///< Cache streaming (likely to be accessed once) STORE_WT, ///< Cache write-through (to system memory) STORE_VOLATILE, ///< Volatile shared (any memory space) }; /** * \name Thread I/O (cache modified) * @{ */ /** * \brief Thread utility for writing memory using cub::CacheStoreModifier cache modifiers. Can be used to store any data type. * * \par Example * \code * #include // or equivalently * * // 32-bit store using cache-global modifier: * int *d_out; * int val; * cub::ThreadStore(d_out + threadIdx.x, val); * * // 16-bit store using default modifier * short *d_out; * short val; * cub::ThreadStore(d_out + threadIdx.x, val); * * // 256-bit store using write-through modifier * double4 *d_out; * double4 val; * cub::ThreadStore(d_out + threadIdx.x, val); * * // 96-bit store using cache-streaming cache modifier * struct TestFoo { bool a; short b; }; * TestFoo *d_struct; * TestFoo val; * cub::ThreadStore(d_out + threadIdx.x, val); * \endcode * * \tparam MODIFIER [inferred] CacheStoreModifier enumeration * \tparam InputIteratorT [inferred] Output iterator type \iterator * \tparam T [inferred] Data type of output value */ template < CacheStoreModifier MODIFIER, typename OutputIteratorT, typename T> __device__ __forceinline__ void ThreadStore(OutputIteratorT itr, T val); //@} end member group #ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document /// Helper structure for templated store iteration (inductive case) template struct IterateThreadStore { template static __device__ __forceinline__ void Store(T *ptr, T *vals) { ThreadStore(ptr + COUNT, vals[COUNT]); IterateThreadStore::template Store(ptr, vals); } template static __device__ __forceinline__ void Dereference(OutputIteratorT ptr, T *vals) { ptr[COUNT] = vals[COUNT]; IterateThreadStore::Dereference(ptr, vals); } }; /// Helper structure for templated store iteration (termination case) template struct IterateThreadStore { template static __device__ __forceinline__ void Store(T * /*ptr*/, T * /*vals*/) {} template static __device__ __forceinline__ void Dereference(OutputIteratorT /*ptr*/, T * /*vals*/) {} }; /** * Define a uint4 (16B) ThreadStore specialization for the given Cache load modifier */ #define _CUB_STORE_16(cub_modifier, ptx_modifier) \ template<> \ __device__ __forceinline__ void ThreadStore(uint4* ptr, uint4 val) \ { \ asm volatile ("st."#ptx_modifier".v4.u32 [%0], {%1, %2, %3, %4};" : : \ _CUB_ASM_PTR_(ptr), \ "r"(val.x), \ "r"(val.y), \ "r"(val.z), \ "r"(val.w)); \ } \ template<> \ __device__ __forceinline__ void ThreadStore(ulonglong2* ptr, ulonglong2 val) \ { \ asm volatile ("st."#ptx_modifier".v2.u64 [%0], {%1, %2};" : : \ _CUB_ASM_PTR_(ptr), \ "l"(val.x), \ "l"(val.y)); \ } /** * Define a uint2 (8B) ThreadStore specialization for the given Cache load modifier */ #define _CUB_STORE_8(cub_modifier, ptx_modifier) \ template<> \ __device__ __forceinline__ void ThreadStore(ushort4* ptr, ushort4 val) \ { \ asm volatile ("st."#ptx_modifier".v4.u16 [%0], {%1, %2, %3, %4};" : : \ _CUB_ASM_PTR_(ptr), \ "h"(val.x), \ "h"(val.y), \ "h"(val.z), \ "h"(val.w)); \ } \ template<> \ __device__ __forceinline__ void ThreadStore(uint2* ptr, uint2 val) \ { \ asm volatile ("st."#ptx_modifier".v2.u32 [%0], {%1, %2};" : : \ _CUB_ASM_PTR_(ptr), \ "r"(val.x), \ "r"(val.y)); \ } \ template<> \ __device__ __forceinline__ void ThreadStore(unsigned long long* ptr, unsigned long long val) \ { \ asm volatile ("st."#ptx_modifier".u64 [%0], %1;" : : \ _CUB_ASM_PTR_(ptr), \ "l"(val)); \ } /** * Define a unsigned int (4B) ThreadStore specialization for the given Cache load modifier */ #define _CUB_STORE_4(cub_modifier, ptx_modifier) \ template<> \ __device__ __forceinline__ void ThreadStore(unsigned int* ptr, unsigned int val) \ { \ asm volatile ("st."#ptx_modifier".u32 [%0], %1;" : : \ _CUB_ASM_PTR_(ptr), \ "r"(val)); \ } /** * Define a unsigned short (2B) ThreadStore specialization for the given Cache load modifier */ #define _CUB_STORE_2(cub_modifier, ptx_modifier) \ template<> \ __device__ __forceinline__ void ThreadStore(unsigned short* ptr, unsigned short val) \ { \ asm volatile ("st."#ptx_modifier".u16 [%0], %1;" : : \ _CUB_ASM_PTR_(ptr), \ "h"(val)); \ } /** * Define a unsigned char (1B) ThreadStore specialization for the given Cache load modifier */ #define _CUB_STORE_1(cub_modifier, ptx_modifier) \ template<> \ __device__ __forceinline__ void ThreadStore(unsigned char* ptr, unsigned char val) \ { \ asm volatile ( \ "{" \ " .reg .u8 datum;" \ " cvt.u8.u16 datum, %1;" \ " st."#ptx_modifier".u8 [%0], datum;" \ "}" : : \ _CUB_ASM_PTR_(ptr), \ "h"((unsigned short) val)); \ } /** * Define powers-of-two ThreadStore specializations for the given Cache load modifier */ #define _CUB_STORE_ALL(cub_modifier, ptx_modifier) \ _CUB_STORE_16(cub_modifier, ptx_modifier) \ _CUB_STORE_8(cub_modifier, ptx_modifier) \ _CUB_STORE_4(cub_modifier, ptx_modifier) \ _CUB_STORE_2(cub_modifier, ptx_modifier) \ _CUB_STORE_1(cub_modifier, ptx_modifier) \ /** * Define ThreadStore specializations for the various Cache load modifiers */ #if CUB_PTX_ARCH >= 200 _CUB_STORE_ALL(STORE_WB, wb) _CUB_STORE_ALL(STORE_CG, cg) _CUB_STORE_ALL(STORE_CS, cs) _CUB_STORE_ALL(STORE_WT, wt) #else _CUB_STORE_ALL(STORE_WB, global) _CUB_STORE_ALL(STORE_CG, global) _CUB_STORE_ALL(STORE_CS, global) _CUB_STORE_ALL(STORE_WT, volatile.global) #endif // Macro cleanup #undef _CUB_STORE_ALL #undef _CUB_STORE_1 #undef _CUB_STORE_2 #undef _CUB_STORE_4 #undef _CUB_STORE_8 #undef _CUB_STORE_16 /** * ThreadStore definition for STORE_DEFAULT modifier on iterator types */ template __device__ __forceinline__ void ThreadStore( OutputIteratorT itr, T val, Int2Type /*modifier*/, Int2Type /*is_pointer*/) { *itr = val; } /** * ThreadStore definition for STORE_DEFAULT modifier on pointer types */ template __device__ __forceinline__ void ThreadStore( T *ptr, T val, Int2Type /*modifier*/, Int2Type /*is_pointer*/) { *ptr = val; } /** * ThreadStore definition for STORE_VOLATILE modifier on primitive pointer types */ template __device__ __forceinline__ void ThreadStoreVolatilePtr( T *ptr, T val, Int2Type /*is_primitive*/) { *reinterpret_cast(ptr) = val; } /** * ThreadStore definition for STORE_VOLATILE modifier on non-primitive pointer types */ template __device__ __forceinline__ void ThreadStoreVolatilePtr( T *ptr, T val, Int2Type /*is_primitive*/) { // Create a temporary using shuffle-words, then store using volatile-words typedef typename UnitWord::VolatileWord VolatileWord; typedef typename UnitWord::ShuffleWord ShuffleWord; const int VOLATILE_MULTIPLE = sizeof(T) / sizeof(VolatileWord); const int SHUFFLE_MULTIPLE = sizeof(T) / sizeof(ShuffleWord); VolatileWord words[VOLATILE_MULTIPLE]; #pragma unroll for (int i = 0; i < SHUFFLE_MULTIPLE; ++i) reinterpret_cast(words)[i] = reinterpret_cast(&val)[i]; IterateThreadStore<0, VOLATILE_MULTIPLE>::template Dereference( reinterpret_cast(ptr), words); } /** * ThreadStore definition for STORE_VOLATILE modifier on pointer types */ template __device__ __forceinline__ void ThreadStore( T *ptr, T val, Int2Type /*modifier*/, Int2Type /*is_pointer*/) { ThreadStoreVolatilePtr(ptr, val, Int2Type::PRIMITIVE>()); } /** * ThreadStore definition for generic modifiers on pointer types */ template __device__ __forceinline__ void ThreadStore( T *ptr, T val, Int2Type /*modifier*/, Int2Type /*is_pointer*/) { // Create a temporary using shuffle-words, then store using device-words typedef typename UnitWord::DeviceWord DeviceWord; typedef typename UnitWord::ShuffleWord ShuffleWord; const int DEVICE_MULTIPLE = sizeof(T) / sizeof(DeviceWord); const int SHUFFLE_MULTIPLE = sizeof(T) / sizeof(ShuffleWord); DeviceWord words[DEVICE_MULTIPLE]; #pragma unroll for (int i = 0; i < SHUFFLE_MULTIPLE; ++i) reinterpret_cast(words)[i] = reinterpret_cast(&val)[i]; IterateThreadStore<0, DEVICE_MULTIPLE>::template Store( reinterpret_cast(ptr), words); } /** * ThreadStore definition for generic modifiers */ template __device__ __forceinline__ void ThreadStore(OutputIteratorT itr, T val) { ThreadStore( itr, val, Int2Type(), Int2Type::VALUE>()); } #endif // DOXYGEN_SHOULD_SKIP_THIS /** @} */ // end group UtilIo } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/acc/cuda/cub/util_allocator.cuh000066400000000000000000000700541411340063500215310ustar00rootroot00000000000000/****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /****************************************************************************** * Simple caching allocator for device memory allocations. The allocator is * thread-safe and capable of managing device allocations on multiple devices. ******************************************************************************/ #pragma once #include "util_namespace.cuh" #include "util_debug.cuh" #include #include #include "host/mutex.cuh" #include /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /** * \addtogroup UtilMgmt * @{ */ /****************************************************************************** * CachingDeviceAllocator (host use) ******************************************************************************/ /** * \brief A simple caching allocator for device memory allocations. * * \par Overview * The allocator is thread-safe and stream-safe and is capable of managing cached * device allocations on multiple devices. It behaves as follows: * * \par * - Allocations from the allocator are associated with an \p active_stream. Once freed, * the allocation becomes available immediately for reuse within the \p active_stream * with which it was associated with during allocation, and it becomes available for * reuse within other streams when all prior work submitted to \p active_stream has completed. * - Allocations are categorized and cached by bin size. A new allocation request of * a given size will only consider cached allocations within the corresponding bin. * - Bin limits progress geometrically in accordance with the growth factor * \p bin_growth provided during construction. Unused device allocations within * a larger bin cache are not reused for allocation requests that categorize to * smaller bin sizes. * - Allocation requests below (\p bin_growth ^ \p min_bin) are rounded up to * (\p bin_growth ^ \p min_bin). * - Allocations above (\p bin_growth ^ \p max_bin) are not rounded up to the nearest * bin and are simply freed when they are deallocated instead of being returned * to a bin-cache. * - %If the total storage of cached allocations on a given device will exceed * \p max_cached_bytes, allocations for that device are simply freed when they are * deallocated instead of being returned to their bin-cache. * * \par * For example, the default-constructed CachingDeviceAllocator is configured with: * - \p bin_growth = 8 * - \p min_bin = 3 * - \p max_bin = 7 * - \p max_cached_bytes = 6MB - 1B * * \par * which delineates five bin-sizes: 512B, 4KB, 32KB, 256KB, and 2MB * and sets a maximum of 6,291,455 cached bytes per device * */ struct CachingDeviceAllocator { //--------------------------------------------------------------------- // Constants //--------------------------------------------------------------------- /// Out-of-bounds bin static const unsigned int INVALID_BIN = (unsigned int) -1; /// Invalid size static const size_t INVALID_SIZE = (size_t) -1; #ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document /// Invalid device ordinal static const int INVALID_DEVICE_ORDINAL = -1; //--------------------------------------------------------------------- // Type definitions and helper types //--------------------------------------------------------------------- /** * Descriptor for device memory allocations */ struct BlockDescriptor { void* d_ptr; // Device pointer size_t bytes; // Size of allocation in bytes unsigned int bin; // Bin enumeration int device; // device ordinal cudaStream_t associated_stream; // Associated associated_stream cudaEvent_t ready_event; // Signal when associated stream has run to the point at which this block was freed // Constructor (suitable for searching maps for a specific block, given its pointer and device) BlockDescriptor(void *d_ptr, int device) : d_ptr(d_ptr), bytes(0), bin(INVALID_BIN), device(device), associated_stream(0), ready_event(0) {} // Constructor (suitable for searching maps for a range of suitable blocks, given a device) BlockDescriptor(int device) : d_ptr(NULL), bytes(0), bin(INVALID_BIN), device(device), associated_stream(0), ready_event(0) {} // Comparison functor for comparing device pointers static bool PtrCompare(const BlockDescriptor &a, const BlockDescriptor &b) { if (a.device == b.device) return (a.d_ptr < b.d_ptr); else return (a.device < b.device); } // Comparison functor for comparing allocation sizes static bool SizeCompare(const BlockDescriptor &a, const BlockDescriptor &b) { if (a.device == b.device) return (a.bytes < b.bytes); else return (a.device < b.device); } }; /// BlockDescriptor comparator function interface typedef bool (*Compare)(const BlockDescriptor &, const BlockDescriptor &); class TotalBytes { public: size_t free; size_t live; TotalBytes() { free = live = 0; } }; /// Set type for cached blocks (ordered by size) typedef std::multiset CachedBlocks; /// Set type for live blocks (ordered by ptr) typedef std::multiset BusyBlocks; /// Map type of device ordinals to the number of cached bytes cached by each device typedef std::map GpuCachedBytes; //--------------------------------------------------------------------- // Utility functions //--------------------------------------------------------------------- /** * Integer pow function for unsigned base and exponent */ static unsigned int IntPow( unsigned int base, unsigned int exp) { unsigned int retval = 1; while (exp > 0) { if (exp & 1) { retval = retval * base; // multiply the result by the current base } base = base * base; // square the base exp = exp >> 1; // divide the exponent in half } return retval; } /** * Round up to the nearest power-of */ void NearestPowerOf( unsigned int &power, size_t &rounded_bytes, unsigned int base, size_t value) { power = 0; rounded_bytes = 1; if (value * base < value) { // Overflow power = sizeof(size_t) * 8; rounded_bytes = size_t(0) - 1; return; } while (rounded_bytes < value) { rounded_bytes *= base; power++; } } //--------------------------------------------------------------------- // Fields //--------------------------------------------------------------------- cub::Mutex mutex; /// Mutex for thread-safety unsigned int bin_growth; /// Geometric growth factor for bin-sizes unsigned int min_bin; /// Minimum bin enumeration unsigned int max_bin; /// Maximum bin enumeration size_t min_bin_bytes; /// Minimum bin size size_t max_bin_bytes; /// Maximum bin size size_t max_cached_bytes; /// Maximum aggregate cached bytes per device const bool skip_cleanup; /// Whether or not to skip a call to FreeAllCached() when destructor is called. (The CUDA runtime may have already shut down for statically declared allocators) bool debug; /// Whether or not to print (de)allocation events to stdout GpuCachedBytes cached_bytes; /// Map of device ordinal to aggregate cached bytes on that device CachedBlocks cached_blocks; /// Set of cached device allocations available for reuse BusyBlocks live_blocks; /// Set of live device allocations currently in use #endif // DOXYGEN_SHOULD_SKIP_THIS //--------------------------------------------------------------------- // Methods //--------------------------------------------------------------------- /** * \brief Constructor. */ CachingDeviceAllocator( unsigned int bin_growth, ///< Geometric growth factor for bin-sizes unsigned int min_bin = 1, ///< Minimum bin (default is bin_growth ^ 1) unsigned int max_bin = INVALID_BIN, ///< Maximum bin (default is no max bin) size_t max_cached_bytes = INVALID_SIZE, ///< Maximum aggregate cached bytes per device (default is no limit) bool skip_cleanup = false, ///< Whether or not to skip a call to \p FreeAllCached() when the destructor is called (default is to deallocate) bool debug = false) ///< Whether or not to print (de)allocation events to stdout (default is no stderr output) : bin_growth(bin_growth), min_bin(min_bin), max_bin(max_bin), min_bin_bytes(IntPow(bin_growth, min_bin)), max_bin_bytes(IntPow(bin_growth, max_bin)), max_cached_bytes(max_cached_bytes), skip_cleanup(skip_cleanup), debug(debug), cached_blocks(BlockDescriptor::SizeCompare), live_blocks(BlockDescriptor::PtrCompare) {} /** * \brief Default constructor. * * Configured with: * \par * - \p bin_growth = 8 * - \p min_bin = 3 * - \p max_bin = 7 * - \p max_cached_bytes = (\p bin_growth ^ \p max_bin) * 3) - 1 = 6,291,455 bytes * * which delineates five bin-sizes: 512B, 4KB, 32KB, 256KB, and 2MB and * sets a maximum of 6,291,455 cached bytes per device */ CachingDeviceAllocator( bool skip_cleanup = false, bool debug = false) : bin_growth(8), min_bin(3), max_bin(7), min_bin_bytes(IntPow(bin_growth, min_bin)), max_bin_bytes(IntPow(bin_growth, max_bin)), max_cached_bytes((max_bin_bytes * 3) - 1), skip_cleanup(skip_cleanup), debug(debug), cached_blocks(BlockDescriptor::SizeCompare), live_blocks(BlockDescriptor::PtrCompare) {} /** * \brief Sets the limit on the number bytes this allocator is allowed to cache per device. * * Changing the ceiling of cached bytes does not cause any allocations (in-use or * cached-in-reserve) to be freed. See \p FreeAllCached(). */ cudaError_t SetMaxCachedBytes( size_t max_cached_bytes) { // Lock mutex.Lock(); if (debug) _CubLog("Changing max_cached_bytes (%lld -> %lld)\n", (long long) this->max_cached_bytes, (long long) max_cached_bytes); this->max_cached_bytes = max_cached_bytes; // Unlock mutex.Unlock(); return cudaSuccess; } /** * \brief Provides a suitable allocation of device memory for the given size on the specified device. * * Once freed, the allocation becomes available immediately for reuse within the \p active_stream * with which it was associated with during allocation, and it becomes available for reuse within other * streams when all prior work submitted to \p active_stream has completed. */ cudaError_t DeviceAllocate( int device, ///< [in] Device on which to place the allocation void **d_ptr, ///< [out] Reference to pointer to the allocation size_t bytes, ///< [in] Minimum number of bytes for the allocation cudaStream_t active_stream = 0) ///< [in] The stream to be associated with this allocation { *d_ptr = NULL; int entrypoint_device = INVALID_DEVICE_ORDINAL; cudaError_t error = cudaSuccess; if (device == INVALID_DEVICE_ORDINAL) { if (CubDebug(error = cudaGetDevice(&entrypoint_device))) return error; device = entrypoint_device; } // Create a block descriptor for the requested allocation bool found = false; BlockDescriptor search_key(device); search_key.associated_stream = active_stream; NearestPowerOf(search_key.bin, search_key.bytes, bin_growth, bytes); if (search_key.bin > max_bin) { // Bin is greater than our maximum bin: allocate the request // exactly and give out-of-bounds bin. It will not be cached // for reuse when returned. search_key.bin = INVALID_BIN; search_key.bytes = bytes; } else { // Search for a suitable cached allocation: lock mutex.Lock(); if (search_key.bin < min_bin) { // Bin is less than minimum bin: round up search_key.bin = min_bin; search_key.bytes = min_bin_bytes; } // Iterate through the range of cached blocks on the same device in the same bin CachedBlocks::iterator block_itr = cached_blocks.lower_bound(search_key); while ((block_itr != cached_blocks.end()) && (block_itr->device == device) && (block_itr->bin == search_key.bin)) { // To prevent races with reusing blocks returned by the host but still // in use by the device, only consider cached blocks that are // either (from the active stream) or (from an idle stream) if ((active_stream == block_itr->associated_stream) || (cudaEventQuery(block_itr->ready_event) != cudaErrorNotReady)) { // Reuse existing cache block. Insert into live blocks. found = true; search_key = *block_itr; search_key.associated_stream = active_stream; live_blocks.insert(search_key); // Remove from free blocks cached_bytes[device].free -= search_key.bytes; cached_bytes[device].live += search_key.bytes; if (debug) _CubLog("\tDevice %d reused cached block at %p (%lld bytes) for stream %lld (previously associated with stream %lld).\n", device, search_key.d_ptr, (long long) search_key.bytes, (long long) search_key.associated_stream, (long long) block_itr->associated_stream); cached_blocks.erase(block_itr); break; } block_itr++; } // Done searching: unlock mutex.Unlock(); } // Allocate the block if necessary if (!found) { // Set runtime's current device to specified device (entrypoint may not be set) if (device != entrypoint_device) { if (CubDebug(error = cudaGetDevice(&entrypoint_device))) return error; if (CubDebug(error = cudaSetDevice(device))) return error; } // Attempt to allocate if (CubDebug(error = cudaMalloc(&search_key.d_ptr, search_key.bytes)) == cudaErrorMemoryAllocation) { // The allocation attempt failed: free all cached blocks on device and retry if (debug) _CubLog("\tDevice %d failed to allocate %lld bytes for stream %lld, retrying after freeing cached allocations", device, (long long) search_key.bytes, (long long) search_key.associated_stream); error = cudaSuccess; // Reset the error we will return cudaGetLastError(); // Reset CUDART's error // Lock mutex.Lock(); // Iterate the range of free blocks on the same device BlockDescriptor free_key(device); CachedBlocks::iterator block_itr = cached_blocks.lower_bound(free_key); while ((block_itr != cached_blocks.end()) && (block_itr->device == device)) { // No need to worry about synchronization with the device: cudaFree is // blocking and will synchronize across all kernels executing // on the current device // Free device memory and destroy stream event. if (CubDebug(error = cudaFree(block_itr->d_ptr))) break; if (CubDebug(error = cudaEventDestroy(block_itr->ready_event))) break; // Reduce balance and erase entry cached_bytes[device].free -= block_itr->bytes; if (debug) _CubLog("\tDevice %d freed %lld bytes.\n\t\t %lld available blocks cached (%lld bytes), %lld live blocks (%lld bytes) outstanding.\n", device, (long long) block_itr->bytes, (long long) cached_blocks.size(), (long long) cached_bytes[device].free, (long long) live_blocks.size(), (long long) cached_bytes[device].live); cached_blocks.erase(block_itr); block_itr++; } // Unlock mutex.Unlock(); // Return under error if (error) return error; // Try to allocate again if (CubDebug(error = cudaMalloc(&search_key.d_ptr, search_key.bytes))) return error; } // Create ready event if (CubDebug(error = cudaEventCreateWithFlags(&search_key.ready_event, cudaEventDisableTiming))) return error; // Insert into live blocks mutex.Lock(); live_blocks.insert(search_key); cached_bytes[device].live += search_key.bytes; mutex.Unlock(); if (debug) _CubLog("\tDevice %d allocated new device block at %p (%lld bytes associated with stream %lld).\n", device, search_key.d_ptr, (long long) search_key.bytes, (long long) search_key.associated_stream); // Attempt to revert back to previous device if necessary if ((entrypoint_device != INVALID_DEVICE_ORDINAL) && (entrypoint_device != device)) { if (CubDebug(error = cudaSetDevice(entrypoint_device))) return error; } } // Copy device pointer to output parameter *d_ptr = search_key.d_ptr; if (debug) _CubLog("\t\t%lld available blocks cached (%lld bytes), %lld live blocks outstanding(%lld bytes).\n", (long long) cached_blocks.size(), (long long) cached_bytes[device].free, (long long) live_blocks.size(), (long long) cached_bytes[device].live); return error; } /** * \brief Provides a suitable allocation of device memory for the given size on the current device. * * Once freed, the allocation becomes available immediately for reuse within the \p active_stream * with which it was associated with during allocation, and it becomes available for reuse within other * streams when all prior work submitted to \p active_stream has completed. */ cudaError_t DeviceAllocate( void **d_ptr, ///< [out] Reference to pointer to the allocation size_t bytes, ///< [in] Minimum number of bytes for the allocation cudaStream_t active_stream = 0) ///< [in] The stream to be associated with this allocation { return DeviceAllocate(INVALID_DEVICE_ORDINAL, d_ptr, bytes, active_stream); } /** * \brief Frees a live allocation of device memory on the specified device, returning it to the allocator. * * Once freed, the allocation becomes available immediately for reuse within the \p active_stream * with which it was associated with during allocation, and it becomes available for reuse within other * streams when all prior work submitted to \p active_stream has completed. */ cudaError_t DeviceFree( int device, void* d_ptr) { int entrypoint_device = INVALID_DEVICE_ORDINAL; cudaError_t error = cudaSuccess; if (device == INVALID_DEVICE_ORDINAL) { if (CubDebug(error = cudaGetDevice(&entrypoint_device))) return error; device = entrypoint_device; } // Lock mutex.Lock(); // Find corresponding block descriptor bool recached = false; BlockDescriptor search_key(d_ptr, device); BusyBlocks::iterator block_itr = live_blocks.find(search_key); if (block_itr != live_blocks.end()) { // Remove from live blocks search_key = *block_itr; live_blocks.erase(block_itr); cached_bytes[device].live -= search_key.bytes; // Keep the returned allocation if bin is valid and we won't exceed the max cached threshold if ((search_key.bin != INVALID_BIN) && (cached_bytes[device].free + search_key.bytes <= max_cached_bytes)) { // Insert returned allocation into free blocks recached = true; cached_blocks.insert(search_key); cached_bytes[device].free += search_key.bytes; if (debug) _CubLog("\tDevice %d returned %lld bytes from associated stream %lld.\n\t\t %lld available blocks cached (%lld bytes), %lld live blocks outstanding. (%lld bytes)\n", device, (long long) search_key.bytes, (long long) search_key.associated_stream, (long long) cached_blocks.size(), (long long) cached_bytes[device].free, (long long) live_blocks.size(), (long long) cached_bytes[device].live); } } // Unlock mutex.Unlock(); // First set to specified device (entrypoint may not be set) if (device != entrypoint_device) { if (CubDebug(error = cudaGetDevice(&entrypoint_device))) return error; if (CubDebug(error = cudaSetDevice(device))) return error; } if (recached) { // Insert the ready event in the associated stream (must have current device set properly) if (CubDebug(error = cudaEventRecord(search_key.ready_event, search_key.associated_stream))) return error; } else { // Free the allocation from the runtime and cleanup the event. if (CubDebug(error = cudaFree(d_ptr))) return error; if (CubDebug(error = cudaEventDestroy(search_key.ready_event))) return error; if (debug) _CubLog("\tDevice %d freed %lld bytes from associated stream %lld.\n\t\t %lld available blocks cached (%lld bytes), %lld live blocks (%lld bytes) outstanding.\n", device, (long long) search_key.bytes, (long long) search_key.associated_stream, (long long) cached_blocks.size(), (long long) cached_bytes[device].free, (long long) live_blocks.size(), (long long) cached_bytes[device].live); } // Reset device if ((entrypoint_device != INVALID_DEVICE_ORDINAL) && (entrypoint_device != device)) { if (CubDebug(error = cudaSetDevice(entrypoint_device))) return error; } return error; } /** * \brief Frees a live allocation of device memory on the current device, returning it to the allocator. * * Once freed, the allocation becomes available immediately for reuse within the \p active_stream * with which it was associated with during allocation, and it becomes available for reuse within other * streams when all prior work submitted to \p active_stream has completed. */ cudaError_t DeviceFree( void* d_ptr) { return DeviceFree(INVALID_DEVICE_ORDINAL, d_ptr); } /** * \brief Frees all cached device allocations on all devices */ cudaError_t FreeAllCached() { cudaError_t error = cudaSuccess; int entrypoint_device = INVALID_DEVICE_ORDINAL; int current_device = INVALID_DEVICE_ORDINAL; mutex.Lock(); while (!cached_blocks.empty()) { // Get first block CachedBlocks::iterator begin = cached_blocks.begin(); // Get entry-point device ordinal if necessary if (entrypoint_device == INVALID_DEVICE_ORDINAL) { if (CubDebug(error = cudaGetDevice(&entrypoint_device))) break; } // Set current device ordinal if necessary if (begin->device != current_device) { if (CubDebug(error = cudaSetDevice(begin->device))) break; current_device = begin->device; } // Free device memory if (CubDebug(error = cudaFree(begin->d_ptr))) break; if (CubDebug(error = cudaEventDestroy(begin->ready_event))) break; // Reduce balance and erase entry cached_bytes[current_device].free -= begin->bytes; if (debug) _CubLog("\tDevice %d freed %lld bytes.\n\t\t %lld available blocks cached (%lld bytes), %lld live blocks (%lld bytes) outstanding.\n", current_device, (long long) begin->bytes, (long long) cached_blocks.size(), (long long) cached_bytes[current_device].free, (long long) live_blocks.size(), (long long) cached_bytes[current_device].live); cached_blocks.erase(begin); } mutex.Unlock(); // Attempt to revert back to entry-point device if necessary if (entrypoint_device != INVALID_DEVICE_ORDINAL) { if (CubDebug(error = cudaSetDevice(entrypoint_device))) return error; } return error; } /** * \brief Destructor */ virtual ~CachingDeviceAllocator() { if (!skip_cleanup) FreeAllCached(); } }; /** @} */ // end group UtilMgmt } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/acc/cuda/cub/util_arch.cuh000066400000000000000000000151601411340063500204630ustar00rootroot00000000000000/****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * Static architectural properties by SM version. */ #pragma once #include "util_namespace.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { #ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document #if (__CUDACC_VER_MAJOR__ >= 9) && !defined(CUB_USE_COOPERATIVE_GROUPS) #define CUB_USE_COOPERATIVE_GROUPS #endif /// CUB_PTX_ARCH reflects the PTX version targeted by the active compiler pass (or zero during the host pass). #ifndef CUB_PTX_ARCH #ifndef __CUDA_ARCH__ #define CUB_PTX_ARCH 0 #else #define CUB_PTX_ARCH __CUDA_ARCH__ #endif #endif /// Whether or not the source targeted by the active compiler pass is allowed to invoke device kernels or methods from the CUDA runtime API. #ifndef CUB_RUNTIME_FUNCTION #if !defined(__CUDA_ARCH__) || (__CUDA_ARCH__>= 350 && defined(__CUDACC_RDC__)) #define CUB_RUNTIME_ENABLED #define CUB_RUNTIME_FUNCTION __host__ __device__ #else #define CUB_RUNTIME_FUNCTION __host__ #endif #endif /// Number of threads per warp #ifndef CUB_LOG_WARP_THREADS #define CUB_LOG_WARP_THREADS(arch) \ (5) #define CUB_WARP_THREADS(arch) \ (1 << CUB_LOG_WARP_THREADS(arch)) #define CUB_PTX_WARP_THREADS CUB_WARP_THREADS(CUB_PTX_ARCH) #define CUB_PTX_LOG_WARP_THREADS CUB_LOG_WARP_THREADS(CUB_PTX_ARCH) #endif /// Number of smem banks #ifndef CUB_LOG_SMEM_BANKS #define CUB_LOG_SMEM_BANKS(arch) \ ((arch >= 200) ? \ (5) : \ (4)) #define CUB_SMEM_BANKS(arch) \ (1 << CUB_LOG_SMEM_BANKS(arch)) #define CUB_PTX_LOG_SMEM_BANKS CUB_LOG_SMEM_BANKS(CUB_PTX_ARCH) #define CUB_PTX_SMEM_BANKS CUB_SMEM_BANKS(CUB_PTX_ARCH) #endif /// Oversubscription factor #ifndef CUB_SUBSCRIPTION_FACTOR #define CUB_SUBSCRIPTION_FACTOR(arch) \ ((arch >= 300) ? \ (5) : \ ((arch >= 200) ? \ (3) : \ (10))) #define CUB_PTX_SUBSCRIPTION_FACTOR CUB_SUBSCRIPTION_FACTOR(CUB_PTX_ARCH) #endif /// Prefer padding overhead vs X-way conflicts greater than this threshold #ifndef CUB_PREFER_CONFLICT_OVER_PADDING #define CUB_PREFER_CONFLICT_OVER_PADDING(arch) \ ((arch >= 300) ? \ (1) : \ (4)) #define CUB_PTX_PREFER_CONFLICT_OVER_PADDING CUB_PREFER_CONFLICT_OVER_PADDING(CUB_PTX_ARCH) #endif /// Scale down the number of warps to keep same amount of "tile" storage as the nominal configuration for 4B data. Minimum of two warps. #ifndef CUB_BLOCK_THREADS #define CUB_BLOCK_THREADS(NOMINAL_4B_BLOCK_THREADS, T, PTX_ARCH) \ (CUB_MIN( \ NOMINAL_4B_BLOCK_THREADS * 2, \ CUB_WARP_THREADS(PTX_ARCH) * CUB_MAX( \ (NOMINAL_4B_BLOCK_THREADS / CUB_WARP_THREADS(PTX_ARCH)) * 3 / 4, \ (NOMINAL_4B_BLOCK_THREADS / CUB_WARP_THREADS(PTX_ARCH)) * 4 / sizeof(T)))) #endif /// Scale up/down number of items per thread to keep the same amount of "tile" storage as the nominal configuration for 4B data. Minimum 1 item per thread #ifndef CUB_ITEMS_PER_THREAD #define CUB_ITEMS_PER_THREAD(NOMINAL_4B_ITEMS_PER_THREAD, NOMINAL_4B_BLOCK_THREADS, T, PTX_ARCH) \ (CUB_MIN( \ NOMINAL_4B_ITEMS_PER_THREAD * 2, \ CUB_MAX( \ 1, \ (NOMINAL_4B_ITEMS_PER_THREAD * NOMINAL_4B_BLOCK_THREADS * 4 / sizeof(T)) / CUB_BLOCK_THREADS(NOMINAL_4B_BLOCK_THREADS, T, PTX_ARCH)))) #endif /// Define both nominal threads-per-block and items-per-thread #ifndef CUB_NOMINAL_CONFIG #define CUB_NOMINAL_CONFIG(NOMINAL_4B_BLOCK_THREADS, NOMINAL_4B_ITEMS_PER_THREAD, T) \ CUB_BLOCK_THREADS(NOMINAL_4B_BLOCK_THREADS, T, 200), \ CUB_ITEMS_PER_THREAD(NOMINAL_4B_ITEMS_PER_THREAD, NOMINAL_4B_BLOCK_THREADS, T, 200) #endif #endif // Do not document } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/acc/cuda/cub/util_debug.cuh000066400000000000000000000117051411340063500206350ustar00rootroot00000000000000/****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * Error and event logging routines. * * The following macros definitions are supported: * - \p CUB_LOG. Simple event messages are printed to \p stdout. */ #pragma once #include #include "util_namespace.cuh" #include "util_arch.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /** * \addtogroup UtilMgmt * @{ */ /// CUB error reporting macro (prints error messages to stderr) #if (defined(DEBUG) || defined(_DEBUG)) && !defined(CUB_STDERR) #define CUB_STDERR #endif /** * \brief %If \p CUB_STDERR is defined and \p error is not \p cudaSuccess, the corresponding error message is printed to \p stderr (or \p stdout in device code) along with the supplied source context. * * \return The CUDA error. */ __host__ __device__ __forceinline__ cudaError_t Debug( cudaError_t error, const char* filename, int line) { (void)filename; (void)line; #ifdef CUB_STDERR if (error) { #if (CUB_PTX_ARCH == 0) fprintf(stderr, "CUDA error %d [%s, %d]: %s\n", error, filename, line, cudaGetErrorString(error)); fflush(stderr); #elif (CUB_PTX_ARCH >= 200) printf("CUDA error %d [block (%d,%d,%d) thread (%d,%d,%d), %s, %d]\n", error, blockIdx.z, blockIdx.y, blockIdx.x, threadIdx.z, threadIdx.y, threadIdx.x, filename, line); #endif } #endif return error; } /** * \brief Debug macro */ #ifndef CubDebug #define CubDebug(e) cub::Debug((cudaError_t) (e), __FILE__, __LINE__) #endif /** * \brief Debug macro with exit */ #ifndef CubDebugExit #define CubDebugExit(e) if (cub::Debug((cudaError_t) (e), __FILE__, __LINE__)) { exit(1); } #endif /** * \brief Log macro for printf statements. */ #if !defined(_CubLog) #if !(defined(__clang__) && defined(__CUDA__)) #if (CUB_PTX_ARCH == 0) #define _CubLog(format, ...) printf(format,__VA_ARGS__); #elif (CUB_PTX_ARCH >= 200) #define _CubLog(format, ...) printf("[block (%d,%d,%d), thread (%d,%d,%d)]: " format, blockIdx.z, blockIdx.y, blockIdx.x, threadIdx.z, threadIdx.y, threadIdx.x, __VA_ARGS__); #endif #else // XXX shameless hack for clang around variadic printf... // Compilies w/o supplying -std=c++11 but shows warning, // so we sielence them :) #pragma clang diagnostic ignored "-Wc++11-extensions" #pragma clang diagnostic ignored "-Wunnamed-type-template-args" template inline __host__ __device__ void va_printf(char const* format, Args const&... args) { #ifdef __CUDA_ARCH__ printf(format, blockIdx.z, blockIdx.y, blockIdx.x, threadIdx.z, threadIdx.y, threadIdx.x, args...); #else printf(format, args...); #endif } #ifndef __CUDA_ARCH__ #define _CubLog(format, ...) va_printf(format,__VA_ARGS__); #else #define _CubLog(format, ...) va_printf("[block (%d,%d,%d), thread (%d,%d,%d)]: " format, __VA_ARGS__); #endif #endif #endif /** @} */ // end group UtilMgmt } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/acc/cuda/cub/util_device.cuh000066400000000000000000000246421411340063500210120ustar00rootroot00000000000000/****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * Properties of a given CUDA device and the corresponding PTX bundle */ #pragma once #include "util_type.cuh" #include "util_arch.cuh" #include "util_debug.cuh" #include "util_namespace.cuh" #include "util_macro.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /** * \addtogroup UtilMgmt * @{ */ #ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document /** * Alias temporaries to externally-allocated device storage (or simply return the amount of storage needed). */ template __host__ __device__ __forceinline__ cudaError_t AliasTemporaries( void *d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t &temp_storage_bytes, ///< [in,out] Size in bytes of \t d_temp_storage allocation void* (&allocations)[ALLOCATIONS], ///< [in,out] Pointers to device allocations needed size_t (&allocation_sizes)[ALLOCATIONS]) ///< [in] Sizes in bytes of device allocations needed { const int ALIGN_BYTES = 256; const int ALIGN_MASK = ~(ALIGN_BYTES - 1); // Compute exclusive prefix sum over allocation requests size_t allocation_offsets[ALLOCATIONS]; size_t bytes_needed = 0; for (int i = 0; i < ALLOCATIONS; ++i) { size_t allocation_bytes = (allocation_sizes[i] + ALIGN_BYTES - 1) & ALIGN_MASK; allocation_offsets[i] = bytes_needed; bytes_needed += allocation_bytes; } bytes_needed += ALIGN_BYTES - 1; // Check if the caller is simply requesting the size of the storage allocation if (!d_temp_storage) { temp_storage_bytes = bytes_needed; return cudaSuccess; } // Check if enough storage provided if (temp_storage_bytes < bytes_needed) { return CubDebug(cudaErrorInvalidValue); } // Alias d_temp_storage = (void *) ((size_t(d_temp_storage) + ALIGN_BYTES - 1) & ALIGN_MASK); for (int i = 0; i < ALLOCATIONS; ++i) { allocations[i] = static_cast(d_temp_storage) + allocation_offsets[i]; } return cudaSuccess; } /** * Empty kernel for querying PTX manifest metadata (e.g., version) for the current device */ template __global__ void EmptyKernel(void) { } #endif // DOXYGEN_SHOULD_SKIP_THIS /** * \brief Retrieves the PTX version that will be used on the current device (major * 100 + minor * 10) */ CUB_RUNTIME_FUNCTION __forceinline__ cudaError_t PtxVersion(int &ptx_version) { struct Dummy { /// Type definition of the EmptyKernel kernel entry point typedef void (*EmptyKernelPtr)(); /// Force EmptyKernel to be generated if this class is used CUB_RUNTIME_FUNCTION __forceinline__ EmptyKernelPtr Empty() { return EmptyKernel; } }; #ifndef CUB_RUNTIME_ENABLED (void)ptx_version; // CUDA API calls not supported from this device return cudaErrorInvalidConfiguration; #elif (CUB_PTX_ARCH > 0) ptx_version = CUB_PTX_ARCH; return cudaSuccess; #else cudaError_t error = cudaSuccess; do { cudaFuncAttributes empty_kernel_attrs; if (CubDebug(error = cudaFuncGetAttributes(&empty_kernel_attrs, EmptyKernel))) break; ptx_version = empty_kernel_attrs.ptxVersion * 10; } while (0); return error; #endif } /** * \brief Retrieves the SM version (major * 100 + minor * 10) */ CUB_RUNTIME_FUNCTION __forceinline__ cudaError_t SmVersion(int &sm_version, int device_ordinal) { #ifndef CUB_RUNTIME_ENABLED (void)sm_version; (void)device_ordinal; // CUDA API calls not supported from this device return cudaErrorInvalidConfiguration; #else cudaError_t error = cudaSuccess; do { // Fill in SM version int major, minor; if (CubDebug(error = cudaDeviceGetAttribute(&major, cudaDevAttrComputeCapabilityMajor, device_ordinal))) break; if (CubDebug(error = cudaDeviceGetAttribute(&minor, cudaDevAttrComputeCapabilityMinor, device_ordinal))) break; sm_version = major * 100 + minor * 10; } while (0); return error; #endif } #ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document /** * Synchronize the stream if specified */ CUB_RUNTIME_FUNCTION __forceinline__ static cudaError_t SyncStream(cudaStream_t stream) { #if (CUB_PTX_ARCH == 0) return cudaStreamSynchronize(stream); #else (void)stream; // Device can't yet sync on a specific stream return cudaDeviceSynchronize(); #endif } /** * \brief Computes maximum SM occupancy in thread blocks for executing the given kernel function pointer \p kernel_ptr on the current device with \p block_threads per thread block. * * \par Snippet * The code snippet below illustrates the use of the MaxSmOccupancy function. * \par * \code * #include // or equivalently * * template * __global__ void ExampleKernel() * { * // Allocate shared memory for BlockScan * __shared__ volatile T buffer[4096]; * * ... * } * * ... * * // Determine SM occupancy for ExampleKernel specialized for unsigned char * int max_sm_occupancy; * MaxSmOccupancy(max_sm_occupancy, ExampleKernel, 64); * * // max_sm_occupancy <-- 4 on SM10 * // max_sm_occupancy <-- 8 on SM20 * // max_sm_occupancy <-- 12 on SM35 * * \endcode * */ template CUB_RUNTIME_FUNCTION __forceinline__ cudaError_t MaxSmOccupancy( int &max_sm_occupancy, ///< [out] maximum number of thread blocks that can reside on a single SM KernelPtr kernel_ptr, ///< [in] Kernel pointer for which to compute SM occupancy int block_threads, ///< [in] Number of threads per thread block int dynamic_smem_bytes = 0) { #ifndef CUB_RUNTIME_ENABLED (void)dynamic_smem_bytes; (void)block_threads; (void)kernel_ptr; (void)max_sm_occupancy; // CUDA API calls not supported from this device return CubDebug(cudaErrorInvalidConfiguration); #else return cudaOccupancyMaxActiveBlocksPerMultiprocessor ( &max_sm_occupancy, kernel_ptr, block_threads, dynamic_smem_bytes); #endif // CUB_RUNTIME_ENABLED } /****************************************************************************** * Policy management ******************************************************************************/ /** * Kernel dispatch configuration */ struct KernelConfig { int block_threads; int items_per_thread; int tile_size; int sm_occupancy; CUB_RUNTIME_FUNCTION __forceinline__ KernelConfig() : block_threads(0), items_per_thread(0), tile_size(0), sm_occupancy(0) {} template CUB_RUNTIME_FUNCTION __forceinline__ cudaError_t Init(KernelPtrT kernel_ptr) { block_threads = AgentPolicyT::BLOCK_THREADS; items_per_thread = AgentPolicyT::ITEMS_PER_THREAD; tile_size = block_threads * items_per_thread; cudaError_t retval = MaxSmOccupancy(sm_occupancy, kernel_ptr, block_threads); return retval; } }; /// Helper for dispatching into a policy chain template struct ChainedPolicy { /// The policy for the active compiler pass typedef typename If<(CUB_PTX_ARCH < PTX_VERSION), typename PrevPolicyT::ActivePolicy, PolicyT>::Type ActivePolicy; /// Specializes and dispatches op in accordance to the first policy in the chain of adequate PTX version template CUB_RUNTIME_FUNCTION __forceinline__ static cudaError_t Invoke(int ptx_version, FunctorT &op) { if (ptx_version < PTX_VERSION) { return PrevPolicyT::Invoke(ptx_version, op); } return op.template Invoke(); } }; /// Helper for dispatching into a policy chain (end-of-chain specialization) template struct ChainedPolicy { /// The policy for the active compiler pass typedef PolicyT ActivePolicy; /// Specializes and dispatches op in accordance to the first policy in the chain of adequate PTX version template CUB_RUNTIME_FUNCTION __forceinline__ static cudaError_t Invoke(int /*ptx_version*/, FunctorT &op) { return op.template Invoke(); } }; #endif // Do not document /** @} */ // end group UtilMgmt } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/acc/cuda/cub/util_macro.cuh000066400000000000000000000071761411340063500206570ustar00rootroot00000000000000/****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /****************************************************************************** * Common C/C++ macro utilities ******************************************************************************/ #pragma once #include "util_namespace.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /** * \addtogroup UtilModule * @{ */ #ifndef CUB_ALIGN #if defined(_WIN32) || defined(_WIN64) /// Align struct #define CUB_ALIGN(bytes) __declspec(align(32)) #else /// Align struct #define CUB_ALIGN(bytes) __attribute__((aligned(bytes))) #endif #endif #ifndef CUB_MAX /// Select maximum(a, b) #define CUB_MAX(a, b) (((b) > (a)) ? (b) : (a)) #endif #ifndef CUB_MIN /// Select minimum(a, b) #define CUB_MIN(a, b) (((b) < (a)) ? (b) : (a)) #endif #ifndef CUB_QUOTIENT_FLOOR /// Quotient of x/y rounded down to nearest integer #define CUB_QUOTIENT_FLOOR(x, y) ((x) / (y)) #endif #ifndef CUB_QUOTIENT_CEILING /// Quotient of x/y rounded up to nearest integer #define CUB_QUOTIENT_CEILING(x, y) (((x) + (y) - 1) / (y)) #endif #ifndef CUB_ROUND_UP_NEAREST /// x rounded up to the nearest multiple of y #define CUB_ROUND_UP_NEAREST(x, y) ((((x) + (y) - 1) / (y)) * y) #endif #ifndef CUB_ROUND_DOWN_NEAREST /// x rounded down to the nearest multiple of y #define CUB_ROUND_DOWN_NEAREST(x, y) (((x) / (y)) * y) #endif #ifndef CUB_STATIC_ASSERT #ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document #define CUB_CAT_(a, b) a ## b #define CUB_CAT(a, b) CUB_CAT_(a, b) #endif // DOXYGEN_SHOULD_SKIP_THIS /// Static assert #define CUB_STATIC_ASSERT(cond, msg) typedef int CUB_CAT(cub_static_assert, __LINE__)[(cond) ? 1 : -1] #endif /** @} */ // end group UtilModule } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/acc/cuda/cub/util_namespace.cuh000066400000000000000000000040641411340063500215030ustar00rootroot00000000000000/****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * Place-holder for prefixing the cub namespace */ #pragma once // For example: //#define CUB_NS_PREFIX namespace thrust{ namespace detail { //#define CUB_NS_POSTFIX } } #ifndef CUB_NS_PREFIX #define CUB_NS_PREFIX #endif #ifndef CUB_NS_POSTFIX #define CUB_NS_POSTFIX #endif relion-3.1.3/src/acc/cuda/cub/util_ptx.cuh000066400000000000000000000511061411340063500203610ustar00rootroot00000000000000/****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * PTX intrinsics */ #pragma once #include "util_type.cuh" #include "util_arch.cuh" #include "util_namespace.cuh" #include "util_debug.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /** * \addtogroup UtilPtx * @{ */ /****************************************************************************** * PTX helper macros ******************************************************************************/ #ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document /** * Register modifier for pointer-types (for inlining PTX assembly) */ #if defined(_WIN64) || defined(__LP64__) #define __CUB_LP64__ 1 // 64-bit register modifier for inlined asm #define _CUB_ASM_PTR_ "l" #define _CUB_ASM_PTR_SIZE_ "u64" #else #define __CUB_LP64__ 0 // 32-bit register modifier for inlined asm #define _CUB_ASM_PTR_ "r" #define _CUB_ASM_PTR_SIZE_ "u32" #endif #endif // DOXYGEN_SHOULD_SKIP_THIS /****************************************************************************** * Inlined PTX intrinsics ******************************************************************************/ /** * \brief Shift-right then add. Returns (\p x >> \p shift) + \p addend. */ __device__ __forceinline__ unsigned int SHR_ADD( unsigned int x, unsigned int shift, unsigned int addend) { unsigned int ret; #if CUB_PTX_ARCH >= 200 asm ("vshr.u32.u32.u32.clamp.add %0, %1, %2, %3;" : "=r"(ret) : "r"(x), "r"(shift), "r"(addend)); #else ret = (x >> shift) + addend; #endif return ret; } /** * \brief Shift-left then add. Returns (\p x << \p shift) + \p addend. */ __device__ __forceinline__ unsigned int SHL_ADD( unsigned int x, unsigned int shift, unsigned int addend) { unsigned int ret; #if CUB_PTX_ARCH >= 200 asm ("vshl.u32.u32.u32.clamp.add %0, %1, %2, %3;" : "=r"(ret) : "r"(x), "r"(shift), "r"(addend)); #else ret = (x << shift) + addend; #endif return ret; } #ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document /** * Bitfield-extract. */ template __device__ __forceinline__ unsigned int BFE( UnsignedBits source, unsigned int bit_start, unsigned int num_bits, Int2Type /*byte_len*/) { unsigned int bits; #if CUB_PTX_ARCH >= 200 asm ("bfe.u32 %0, %1, %2, %3;" : "=r"(bits) : "r"((unsigned int) source), "r"(bit_start), "r"(num_bits)); #else const unsigned int MASK = (1 << num_bits) - 1; bits = (source >> bit_start) & MASK; #endif return bits; } /** * Bitfield-extract for 64-bit types. */ template __device__ __forceinline__ unsigned int BFE( UnsignedBits source, unsigned int bit_start, unsigned int num_bits, Int2Type<8> /*byte_len*/) { const unsigned long long MASK = (1ull << num_bits) - 1; return (source >> bit_start) & MASK; } #endif // DOXYGEN_SHOULD_SKIP_THIS /** * \brief Bitfield-extract. Extracts \p num_bits from \p source starting at bit-offset \p bit_start. The input \p source may be an 8b, 16b, 32b, or 64b unsigned integer type. */ template __device__ __forceinline__ unsigned int BFE( UnsignedBits source, unsigned int bit_start, unsigned int num_bits) { return BFE(source, bit_start, num_bits, Int2Type()); } /** * \brief Bitfield insert. Inserts the \p num_bits least significant bits of \p y into \p x at bit-offset \p bit_start. */ __device__ __forceinline__ void BFI( unsigned int &ret, unsigned int x, unsigned int y, unsigned int bit_start, unsigned int num_bits) { #if CUB_PTX_ARCH >= 200 asm ("bfi.b32 %0, %1, %2, %3, %4;" : "=r"(ret) : "r"(y), "r"(x), "r"(bit_start), "r"(num_bits)); #else x <<= bit_start; unsigned int MASK_X = ((1 << num_bits) - 1) << bit_start; unsigned int MASK_Y = ~MASK_X; ret = (y & MASK_Y) | (x & MASK_X); #endif } /** * \brief Three-operand add. Returns \p x + \p y + \p z. */ __device__ __forceinline__ unsigned int IADD3(unsigned int x, unsigned int y, unsigned int z) { #if CUB_PTX_ARCH >= 200 asm ("vadd.u32.u32.u32.add %0, %1, %2, %3;" : "=r"(x) : "r"(x), "r"(y), "r"(z)); #else x = x + y + z; #endif return x; } /** * \brief Byte-permute. Pick four arbitrary bytes from two 32-bit registers, and reassemble them into a 32-bit destination register. For SM2.0 or later. * * \par * The bytes in the two source registers \p a and \p b are numbered from 0 to 7: * {\p b, \p a} = {{b7, b6, b5, b4}, {b3, b2, b1, b0}}. For each of the four bytes * {b3, b2, b1, b0} selected in the return value, a 4-bit selector is defined within * the four lower "nibbles" of \p index: {\p index } = {n7, n6, n5, n4, n3, n2, n1, n0} * * \par Snippet * The code snippet below illustrates byte-permute. * \par * \code * #include * * __global__ void ExampleKernel(...) * { * int a = 0x03020100; * int b = 0x07060504; * int index = 0x00007531; * * int selected = PRMT(a, b, index); // 0x07050301 * * \endcode * */ __device__ __forceinline__ int PRMT(unsigned int a, unsigned int b, unsigned int index) { int ret; asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(ret) : "r"(a), "r"(b), "r"(index)); return ret; } #ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document /** * Sync-threads barrier. */ __device__ __forceinline__ void BAR(int count) { asm volatile("bar.sync 1, %0;" : : "r"(count)); } /** * CTA barrier */ __device__ __forceinline__ void CTA_SYNC() { __syncthreads(); } /** * CTA barrier with predicate */ __device__ __forceinline__ int CTA_SYNC_AND(int p) { return __syncthreads_and(p); } /** * Warp barrier */ __device__ __forceinline__ void WARP_SYNC(unsigned int member_mask) { #ifdef CUB_USE_COOPERATIVE_GROUPS __syncwarp(member_mask); #endif } /** * Warp any */ __device__ __forceinline__ int WARP_ANY(int predicate, unsigned int member_mask) { #ifdef CUB_USE_COOPERATIVE_GROUPS return __any_sync(member_mask, predicate); #else return ::__any(predicate); #endif } /** * Warp any */ __device__ __forceinline__ int WARP_ALL(int predicate, unsigned int member_mask) { #ifdef CUB_USE_COOPERATIVE_GROUPS return __all_sync(member_mask, predicate); #else return ::__all(predicate); #endif } /** * Warp ballot */ __device__ __forceinline__ int WARP_BALLOT(int predicate, unsigned int member_mask) { #ifdef CUB_USE_COOPERATIVE_GROUPS return __ballot_sync(member_mask, predicate); #else return __ballot(predicate); #endif } /** * Warp synchronous shfl_up */ __device__ __forceinline__ unsigned int SHFL_UP_SYNC(unsigned int word, int src_offset, int first_lane, unsigned int member_mask) { #ifdef CUB_USE_COOPERATIVE_GROUPS asm volatile("shfl.sync.up.b32 %0, %1, %2, %3, %4;" : "=r"(word) : "r"(word), "r"(src_offset), "r"(first_lane), "r"(member_mask)); #else asm volatile("shfl.up.b32 %0, %1, %2, %3;" : "=r"(word) : "r"(word), "r"(src_offset), "r"(first_lane)); #endif return word; } /** * Warp synchronous shfl_down */ __device__ __forceinline__ unsigned int SHFL_DOWN_SYNC(unsigned int word, int src_offset, int last_lane, unsigned int member_mask) { #ifdef CUB_USE_COOPERATIVE_GROUPS asm volatile("shfl.sync.down.b32 %0, %1, %2, %3, %4;" : "=r"(word) : "r"(word), "r"(src_offset), "r"(last_lane), "r"(member_mask)); #else asm volatile("shfl.down.b32 %0, %1, %2, %3;" : "=r"(word) : "r"(word), "r"(src_offset), "r"(last_lane)); #endif return word; } /** * Warp synchronous shfl_idx */ __device__ __forceinline__ unsigned int SHFL_IDX_SYNC(unsigned int word, int src_lane, int last_lane, unsigned int member_mask) { #ifdef CUB_USE_COOPERATIVE_GROUPS asm volatile("shfl.sync.idx.b32 %0, %1, %2, %3, %4;" : "=r"(word) : "r"(word), "r"(src_lane), "r"(last_lane), "r"(member_mask)); #else asm volatile("shfl.idx.b32 %0, %1, %2, %3;" : "=r"(word) : "r"(word), "r"(src_lane), "r"(last_lane)); #endif return word; } /** * Floating point multiply. (Mantissa LSB rounds towards zero.) */ __device__ __forceinline__ float FMUL_RZ(float a, float b) { float d; asm ("mul.rz.f32 %0, %1, %2;" : "=f"(d) : "f"(a), "f"(b)); return d; } /** * Floating point multiply-add. (Mantissa LSB rounds towards zero.) */ __device__ __forceinline__ float FFMA_RZ(float a, float b, float c) { float d; asm ("fma.rz.f32 %0, %1, %2, %3;" : "=f"(d) : "f"(a), "f"(b), "f"(c)); return d; } #endif // DOXYGEN_SHOULD_SKIP_THIS /** * \brief Terminates the calling thread */ __device__ __forceinline__ void ThreadExit() { asm volatile("exit;"); } /** * \brief Abort execution and generate an interrupt to the host CPU */ __device__ __forceinline__ void ThreadTrap() { asm volatile("trap;"); } /** * \brief Returns the row-major linear thread identifier for a multidimensional thread block */ __device__ __forceinline__ int RowMajorTid(int block_dim_x, int block_dim_y, int block_dim_z) { return ((block_dim_z == 1) ? 0 : (threadIdx.z * block_dim_x * block_dim_y)) + ((block_dim_y == 1) ? 0 : (threadIdx.y * block_dim_x)) + threadIdx.x; } /** * \brief Returns the warp lane ID of the calling thread */ __device__ __forceinline__ unsigned int LaneId() { unsigned int ret; asm ("mov.u32 %0, %%laneid;" : "=r"(ret) ); return ret; } /** * \brief Returns the warp ID of the calling thread. Warp ID is guaranteed to be unique among warps, but may not correspond to a zero-based ranking within the thread block. */ __device__ __forceinline__ unsigned int WarpId() { unsigned int ret; asm ("mov.u32 %0, %%warpid;" : "=r"(ret) ); return ret; } /** * \brief Returns the warp lane mask of all lanes less than the calling thread */ __device__ __forceinline__ unsigned int LaneMaskLt() { unsigned int ret; asm ("mov.u32 %0, %%lanemask_lt;" : "=r"(ret) ); return ret; } /** * \brief Returns the warp lane mask of all lanes less than or equal to the calling thread */ __device__ __forceinline__ unsigned int LaneMaskLe() { unsigned int ret; asm ("mov.u32 %0, %%lanemask_le;" : "=r"(ret) ); return ret; } /** * \brief Returns the warp lane mask of all lanes greater than the calling thread */ __device__ __forceinline__ unsigned int LaneMaskGt() { unsigned int ret; asm ("mov.u32 %0, %%lanemask_gt;" : "=r"(ret) ); return ret; } /** * \brief Returns the warp lane mask of all lanes greater than or equal to the calling thread */ __device__ __forceinline__ unsigned int LaneMaskGe() { unsigned int ret; asm ("mov.u32 %0, %%lanemask_ge;" : "=r"(ret) ); return ret; } /** @} */ // end group UtilPtx /** * \brief Shuffle-up for any data type. Each warp-lanei obtains the value \p input contributed by warp-lanei-src_offset. For thread lanes \e i < src_offset, the thread's own \p input is returned to the thread. ![](shfl_up_logo.png) * \ingroup WarpModule * * \par * - Available only for SM3.0 or newer * * \par Snippet * The code snippet below illustrates each thread obtaining a \p double value from the * predecessor of its predecessor. * \par * \code * #include // or equivalently * * __global__ void ExampleKernel(...) * { * // Obtain one input item per thread * double thread_data = ... * * // Obtain item from two ranks below * double peer_data = ShuffleUp(thread_data, 2, 0, 0xffffffff); * * \endcode * \par * Suppose the set of input \p thread_data across the first warp of threads is {1.0, 2.0, 3.0, 4.0, 5.0, ..., 32.0}. * The corresponding output \p peer_data will be {1.0, 2.0, 1.0, 2.0, 3.0, ..., 30.0}. * */ template __device__ __forceinline__ T ShuffleUp( T input, ///< [in] The value to broadcast int src_offset, ///< [in] The relative down-offset of the peer to read from int first_lane, ///< [in] Index of first lane in segment (typically 0) unsigned int member_mask) ///< [in] 32-bit mask of participating warp lanes { typedef typename UnitWord::ShuffleWord ShuffleWord; const int WORDS = (sizeof(T) + sizeof(ShuffleWord) - 1) / sizeof(ShuffleWord); T output; ShuffleWord *output_alias = reinterpret_cast(&output); ShuffleWord *input_alias = reinterpret_cast(&input); unsigned int shuffle_word; shuffle_word = SHFL_UP_SYNC((unsigned int)input_alias[0], src_offset, first_lane, member_mask); output_alias[0] = shuffle_word; #pragma unroll for (int WORD = 1; WORD < WORDS; ++WORD) { shuffle_word = SHFL_UP_SYNC((unsigned int)input_alias[WORD], src_offset, first_lane, member_mask); output_alias[WORD] = shuffle_word; } return output; } /** * \brief Shuffle-down for any data type. Each warp-lanei obtains the value \p input contributed by warp-lanei+src_offset. For thread lanes \e i >= WARP_THREADS, the thread's own \p input is returned to the thread. ![](shfl_down_logo.png) * \ingroup WarpModule * * \par * - Available only for SM3.0 or newer * * \par Snippet * The code snippet below illustrates each thread obtaining a \p double value from the * successor of its successor. * \par * \code * #include // or equivalently * * __global__ void ExampleKernel(...) * { * // Obtain one input item per thread * double thread_data = ... * * // Obtain item from two ranks below * double peer_data = ShuffleDown(thread_data, 2, 31, 0xffffffff); * * \endcode * \par * Suppose the set of input \p thread_data across the first warp of threads is {1.0, 2.0, 3.0, 4.0, 5.0, ..., 32.0}. * The corresponding output \p peer_data will be {3.0, 4.0, 5.0, 6.0, 7.0, ..., 32.0}. * */ template __device__ __forceinline__ T ShuffleDown( T input, ///< [in] The value to broadcast int src_offset, ///< [in] The relative up-offset of the peer to read from int last_lane, ///< [in] Index of first lane in segment (typically 31) unsigned int member_mask) ///< [in] 32-bit mask of participating warp lanes { typedef typename UnitWord::ShuffleWord ShuffleWord; const int WORDS = (sizeof(T) + sizeof(ShuffleWord) - 1) / sizeof(ShuffleWord); T output; ShuffleWord *output_alias = reinterpret_cast(&output); ShuffleWord *input_alias = reinterpret_cast(&input); unsigned int shuffle_word; shuffle_word = SHFL_DOWN_SYNC((unsigned int)input_alias[0], src_offset, last_lane, member_mask); output_alias[0] = shuffle_word; #pragma unroll for (int WORD = 1; WORD < WORDS; ++WORD) { shuffle_word = SHFL_DOWN_SYNC((unsigned int)input_alias[WORD], src_offset, last_lane, member_mask); output_alias[WORD] = shuffle_word; } return output; } /** * \brief Shuffle-broadcast for any data type. Each warp-lanei obtains the value \p input * contributed by warp-lanesrc_lane. For \p src_lane < 0 or \p src_lane >= WARP_THREADS, * then the thread's own \p input is returned to the thread. ![](shfl_broadcast_logo.png) * * \ingroup WarpModule * * \par * - Available only for SM3.0 or newer * * \par Snippet * The code snippet below illustrates each thread obtaining a \p double value from warp-lane0. * * \par * \code * #include // or equivalently * * __global__ void ExampleKernel(...) * { * // Obtain one input item per thread * double thread_data = ... * * // Obtain item from thread 0 * double peer_data = ShuffleIndex(thread_data, 0, 32, 0xffffffff); * * \endcode * \par * Suppose the set of input \p thread_data across the first warp of threads is {1.0, 2.0, 3.0, 4.0, 5.0, ..., 32.0}. * The corresponding output \p peer_data will be {1.0, 1.0, 1.0, 1.0, 1.0, ..., 1.0}. * */ template __device__ __forceinline__ T ShuffleIndex( T input, ///< [in] The value to broadcast int src_lane, ///< [in] Which warp lane is to do the broadcasting int logical_warp_threads, ///< [in] Number of threads per logical warp unsigned int member_mask) ///< [in] 32-bit mask of participating warp lanes { typedef typename UnitWord::ShuffleWord ShuffleWord; const int WORDS = (sizeof(T) + sizeof(ShuffleWord) - 1) / sizeof(ShuffleWord); T output; ShuffleWord *output_alias = reinterpret_cast(&output); ShuffleWord *input_alias = reinterpret_cast(&input); unsigned int shuffle_word; shuffle_word = SHFL_IDX_SYNC((unsigned int)input_alias[0], src_lane, logical_warp_threads - 1, member_mask); output_alias[0] = shuffle_word; #pragma unroll for (int WORD = 1; WORD < WORDS; ++WORD) { shuffle_word = SHFL_IDX_SYNC((unsigned int)input_alias[WORD], src_lane, logical_warp_threads - 1, member_mask); output_alias[WORD] = shuffle_word; } return output; } /** * Compute a 32b mask of threads having the same least-significant * LABEL_BITS of \p label as the calling thread. */ template inline __device__ unsigned int MatchAny(unsigned int label) { unsigned int retval; // Extract masks of common threads for each bit #pragma unroll for (int BIT = 0; BIT < LABEL_BITS; ++BIT) { unsigned int mask; unsigned int current_bit = 1 << BIT; asm ("{\n" " .reg .pred p;\n" " and.b32 %0, %1, %2;" " setp.eq.u32 p, %0, %2;\n" #ifdef CUB_USE_COOPERATIVE_GROUPS " vote.ballot.sync.b32 %0, p, 0xffffffff;\n" #else " vote.ballot.b32 %0, p;\n" #endif " @!p not.b32 %0, %0;\n" "}\n" : "=r"(mask) : "r"(label), "r"(current_bit)); // Remove peers who differ retval = (BIT == 0) ? mask : retval & mask; } return retval; // // VOLTA match // unsigned int retval; // asm ("{\n" // " match.any.sync.b32 %0, %1, 0xffffffff;\n" // "}\n" : "=r"(retval) : "r"(label)); // return retval; } } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/acc/cuda/cub/util_type.cuh000066400000000000000000001150321411340063500205260ustar00rootroot00000000000000/****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * Common type manipulation (metaprogramming) utilities */ #pragma once #include #include #include #include "util_macro.cuh" #include "util_arch.cuh" #include "util_namespace.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /** * \addtogroup UtilModule * @{ */ /****************************************************************************** * Type equality ******************************************************************************/ /** * \brief Type selection (IF ? ThenType : ElseType) */ template struct If { /// Conditional type result typedef ThenType Type; // true }; #ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document template struct If { typedef ElseType Type; // false }; #endif // DOXYGEN_SHOULD_SKIP_THIS /****************************************************************************** * Conditional types ******************************************************************************/ /** * \brief Type equality test */ template struct Equals { enum { VALUE = 0, NEGATE = 1 }; }; #ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document template struct Equals { enum { VALUE = 1, NEGATE = 0 }; }; #endif // DOXYGEN_SHOULD_SKIP_THIS /****************************************************************************** * Static math ******************************************************************************/ /** * \brief Statically determine log2(N), rounded up. * * For example: * Log2<8>::VALUE // 3 * Log2<3>::VALUE // 2 */ template struct Log2 { /// Static logarithm value enum { VALUE = Log2> 1), COUNT + 1>::VALUE }; // Inductive case }; #ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document template struct Log2 { enum {VALUE = (1 << (COUNT - 1) < N) ? // Base case COUNT : COUNT - 1 }; }; #endif // DOXYGEN_SHOULD_SKIP_THIS /** * \brief Statically determine if N is a power-of-two */ template struct PowerOfTwo { enum { VALUE = ((N & (N - 1)) == 0) }; }; /****************************************************************************** * Pointer vs. iterator detection ******************************************************************************/ /** * \brief Pointer vs. iterator */ template struct IsPointer { enum { VALUE = 0 }; }; #ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document template struct IsPointer { enum { VALUE = 1 }; }; #endif // DOXYGEN_SHOULD_SKIP_THIS /****************************************************************************** * Qualifier detection ******************************************************************************/ /** * \brief Volatile modifier test */ template struct IsVolatile { enum { VALUE = 0 }; }; #ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document template struct IsVolatile { enum { VALUE = 1 }; }; #endif // DOXYGEN_SHOULD_SKIP_THIS /****************************************************************************** * Qualifier removal ******************************************************************************/ /** * \brief Removes \p const and \p volatile qualifiers from type \p Tp. * * For example: * typename RemoveQualifiers::Type // int; */ template struct RemoveQualifiers { /// Type without \p const and \p volatile qualifiers typedef Up Type; }; #ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document template struct RemoveQualifiers { typedef Up Type; }; template struct RemoveQualifiers { typedef Up Type; }; template struct RemoveQualifiers { typedef Up Type; }; /****************************************************************************** * Marker types ******************************************************************************/ /** * \brief A simple "NULL" marker type */ struct NullType { #ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document template __host__ __device__ __forceinline__ NullType& operator =(const T&) { return *this; } __host__ __device__ __forceinline__ bool operator ==(const NullType&) { return true; } __host__ __device__ __forceinline__ bool operator !=(const NullType&) { return false; } #endif // DOXYGEN_SHOULD_SKIP_THIS }; /** * \brief Allows for the treatment of an integral constant as a type at compile-time (e.g., to achieve static call dispatch based on constant integral values) */ template struct Int2Type { enum {VALUE = A}; }; #ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document /****************************************************************************** * Size and alignment ******************************************************************************/ /// Structure alignment template struct AlignBytes { struct Pad { T val; char byte; }; enum { /// The "true CUDA" alignment of T in bytes ALIGN_BYTES = sizeof(Pad) - sizeof(T) }; /// The "truly aligned" type typedef T Type; }; // Specializations where host C++ compilers (e.g., 32-bit Windows) may disagree // with device C++ compilers (EDG) on types passed as template parameters through // kernel functions #define __CUB_ALIGN_BYTES(t, b) \ template <> struct AlignBytes \ { enum { ALIGN_BYTES = b }; typedef __align__(b) t Type; }; __CUB_ALIGN_BYTES(short4, 8) __CUB_ALIGN_BYTES(ushort4, 8) __CUB_ALIGN_BYTES(int2, 8) __CUB_ALIGN_BYTES(uint2, 8) __CUB_ALIGN_BYTES(long long, 8) __CUB_ALIGN_BYTES(unsigned long long, 8) __CUB_ALIGN_BYTES(float2, 8) __CUB_ALIGN_BYTES(double, 8) #ifdef _WIN32 __CUB_ALIGN_BYTES(long2, 8) __CUB_ALIGN_BYTES(ulong2, 8) #else __CUB_ALIGN_BYTES(long2, 16) __CUB_ALIGN_BYTES(ulong2, 16) #endif __CUB_ALIGN_BYTES(int4, 16) __CUB_ALIGN_BYTES(uint4, 16) __CUB_ALIGN_BYTES(float4, 16) __CUB_ALIGN_BYTES(long4, 16) __CUB_ALIGN_BYTES(ulong4, 16) __CUB_ALIGN_BYTES(longlong2, 16) __CUB_ALIGN_BYTES(ulonglong2, 16) __CUB_ALIGN_BYTES(double2, 16) __CUB_ALIGN_BYTES(longlong4, 16) __CUB_ALIGN_BYTES(ulonglong4, 16) __CUB_ALIGN_BYTES(double4, 16) template struct AlignBytes : AlignBytes {}; template struct AlignBytes : AlignBytes {}; template struct AlignBytes : AlignBytes {}; /// Unit-words of data movement template struct UnitWord { enum { ALIGN_BYTES = AlignBytes::ALIGN_BYTES }; template struct IsMultiple { enum { UNIT_ALIGN_BYTES = AlignBytes::ALIGN_BYTES, IS_MULTIPLE = (sizeof(T) % sizeof(Unit) == 0) && (ALIGN_BYTES % UNIT_ALIGN_BYTES == 0) }; }; /// Biggest shuffle word that T is a whole multiple of and is not larger than the alignment of T typedef typename If::IS_MULTIPLE, unsigned int, typename If::IS_MULTIPLE, unsigned short, unsigned char>::Type>::Type ShuffleWord; /// Biggest volatile word that T is a whole multiple of and is not larger than the alignment of T typedef typename If::IS_MULTIPLE, unsigned long long, ShuffleWord>::Type VolatileWord; /// Biggest memory-access word that T is a whole multiple of and is not larger than the alignment of T typedef typename If::IS_MULTIPLE, ulonglong2, VolatileWord>::Type DeviceWord; /// Biggest texture reference word that T is a whole multiple of and is not larger than the alignment of T typedef typename If::IS_MULTIPLE, uint4, typename If::IS_MULTIPLE, uint2, ShuffleWord>::Type>::Type TextureWord; }; // float2 specialization workaround (for SM10-SM13) template <> struct UnitWord { typedef int ShuffleWord; #if (CUB_PTX_ARCH > 0) && (CUB_PTX_ARCH <= 130) typedef float VolatileWord; typedef uint2 DeviceWord; #else typedef unsigned long long VolatileWord; typedef unsigned long long DeviceWord; #endif typedef float2 TextureWord; }; // float4 specialization workaround (for SM10-SM13) template <> struct UnitWord { typedef int ShuffleWord; #if (CUB_PTX_ARCH > 0) && (CUB_PTX_ARCH <= 130) typedef float VolatileWord; typedef uint4 DeviceWord; #else typedef unsigned long long VolatileWord; typedef ulonglong2 DeviceWord; #endif typedef float4 TextureWord; }; // char2 specialization workaround (for SM10-SM13) template <> struct UnitWord { typedef unsigned short ShuffleWord; #if (CUB_PTX_ARCH > 0) && (CUB_PTX_ARCH <= 130) typedef unsigned short VolatileWord; typedef short DeviceWord; #else typedef unsigned short VolatileWord; typedef unsigned short DeviceWord; #endif typedef unsigned short TextureWord; }; template struct UnitWord : UnitWord {}; template struct UnitWord : UnitWord {}; template struct UnitWord : UnitWord {}; #endif // DOXYGEN_SHOULD_SKIP_THIS /****************************************************************************** * Vector type inference utilities. ******************************************************************************/ /** * \brief Exposes a member typedef \p Type that names the corresponding CUDA vector type if one exists. Otherwise \p Type refers to the CubVector structure itself, which will wrap the corresponding \p x, \p y, etc. vector fields. */ template struct CubVector; #ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document enum { /// The maximum number of elements in CUDA vector types MAX_VEC_ELEMENTS = 4, }; /** * Generic vector-1 type */ template struct CubVector { T x; typedef T BaseType; typedef CubVector Type; }; /** * Generic vector-2 type */ template struct CubVector { T x; T y; typedef T BaseType; typedef CubVector Type; }; /** * Generic vector-3 type */ template struct CubVector { T x; T y; T z; typedef T BaseType; typedef CubVector Type; }; /** * Generic vector-4 type */ template struct CubVector { T x; T y; T z; T w; typedef T BaseType; typedef CubVector Type; }; /** * Macro for expanding partially-specialized built-in vector types */ #define CUB_DEFINE_VECTOR_TYPE(base_type,short_type) \ \ template<> struct CubVector : short_type##1 \ { \ typedef base_type BaseType; \ typedef short_type##1 Type; \ __host__ __device__ __forceinline__ CubVector operator+(const CubVector &other) const { \ CubVector retval; \ retval.x = x + other.x; \ return retval; \ } \ __host__ __device__ __forceinline__ CubVector operator-(const CubVector &other) const { \ CubVector retval; \ retval.x = x - other.x; \ return retval; \ } \ }; \ \ template<> struct CubVector : short_type##2 \ { \ typedef base_type BaseType; \ typedef short_type##2 Type; \ __host__ __device__ __forceinline__ CubVector operator+(const CubVector &other) const { \ CubVector retval; \ retval.x = x + other.x; \ retval.y = y + other.y; \ return retval; \ } \ __host__ __device__ __forceinline__ CubVector operator-(const CubVector &other) const { \ CubVector retval; \ retval.x = x - other.x; \ retval.y = y - other.y; \ return retval; \ } \ }; \ \ template<> struct CubVector : short_type##3 \ { \ typedef base_type BaseType; \ typedef short_type##3 Type; \ __host__ __device__ __forceinline__ CubVector operator+(const CubVector &other) const { \ CubVector retval; \ retval.x = x + other.x; \ retval.y = y + other.y; \ retval.z = z + other.z; \ return retval; \ } \ __host__ __device__ __forceinline__ CubVector operator-(const CubVector &other) const { \ CubVector retval; \ retval.x = x - other.x; \ retval.y = y - other.y; \ retval.z = z - other.z; \ return retval; \ } \ }; \ \ template<> struct CubVector : short_type##4 \ { \ typedef base_type BaseType; \ typedef short_type##4 Type; \ __host__ __device__ __forceinline__ CubVector operator+(const CubVector &other) const { \ CubVector retval; \ retval.x = x + other.x; \ retval.y = y + other.y; \ retval.z = z + other.z; \ retval.w = w + other.w; \ return retval; \ } \ __host__ __device__ __forceinline__ CubVector operator-(const CubVector &other) const { \ CubVector retval; \ retval.x = x - other.x; \ retval.y = y - other.y; \ retval.z = z - other.z; \ retval.w = w - other.w; \ return retval; \ } \ }; // Expand CUDA vector types for built-in primitives CUB_DEFINE_VECTOR_TYPE(char, char) CUB_DEFINE_VECTOR_TYPE(signed char, char) CUB_DEFINE_VECTOR_TYPE(short, short) CUB_DEFINE_VECTOR_TYPE(int, int) CUB_DEFINE_VECTOR_TYPE(long, long) CUB_DEFINE_VECTOR_TYPE(long long, longlong) CUB_DEFINE_VECTOR_TYPE(unsigned char, uchar) CUB_DEFINE_VECTOR_TYPE(unsigned short, ushort) CUB_DEFINE_VECTOR_TYPE(unsigned int, uint) CUB_DEFINE_VECTOR_TYPE(unsigned long, ulong) CUB_DEFINE_VECTOR_TYPE(unsigned long long, ulonglong) CUB_DEFINE_VECTOR_TYPE(float, float) CUB_DEFINE_VECTOR_TYPE(double, double) CUB_DEFINE_VECTOR_TYPE(bool, uchar) // Undefine macros #undef CUB_DEFINE_VECTOR_TYPE #endif // DOXYGEN_SHOULD_SKIP_THIS /****************************************************************************** * Wrapper types ******************************************************************************/ /** * \brief A storage-backing wrapper that allows types with non-trivial constructors to be aliased in unions */ template struct Uninitialized { /// Biggest memory-access word that T is a whole multiple of and is not larger than the alignment of T typedef typename UnitWord::DeviceWord DeviceWord; enum { WORDS = sizeof(T) / sizeof(DeviceWord) }; /// Backing storage DeviceWord storage[WORDS]; /// Alias __host__ __device__ __forceinline__ T& Alias() { return reinterpret_cast(*this); } }; /** * \brief A key identifier paired with a corresponding value */ template < typename _Key, typename _Value #if defined(_WIN32) && !defined(_WIN64) , bool KeyIsLT = (AlignBytes<_Key>::ALIGN_BYTES < AlignBytes<_Value>::ALIGN_BYTES) , bool ValIsLT = (AlignBytes<_Value>::ALIGN_BYTES < AlignBytes<_Key>::ALIGN_BYTES) #endif // #if defined(_WIN32) && !defined(_WIN64) > struct KeyValuePair { typedef _Key Key; ///< Key data type typedef _Value Value; ///< Value data type Key key; ///< Item key Value value; ///< Item value /// Constructor __host__ __device__ __forceinline__ KeyValuePair() {} /// Constructor __host__ __device__ __forceinline__ KeyValuePair(Key const& key, Value const& value) : key(key), value(value) {} /// Inequality operator __host__ __device__ __forceinline__ bool operator !=(const KeyValuePair &b) { return (value != b.value) || (key != b.key); } }; #if defined(_WIN32) && !defined(_WIN64) /** * Win32 won't do 16B alignment. This can present two problems for * should-be-16B-aligned (but actually 8B aligned) built-in and intrinsics members: * 1) If a smaller-aligned item were to be listed first, the host compiler places the * should-be-16B item at too early an offset (and disagrees with device compiler) * 2) Or, if a smaller-aligned item lists second, the host compiler gets the size * of the struct wrong (and disagrees with device compiler) * * So we put the larger-should-be-aligned item first, and explicitly pad the * end of the struct */ /// Smaller key specialization template struct KeyValuePair { typedef K Key; typedef V Value; typedef char Pad[AlignBytes::ALIGN_BYTES - AlignBytes::ALIGN_BYTES]; Value value; // Value has larger would-be alignment and goes first Key key; Pad pad; /// Constructor __host__ __device__ __forceinline__ KeyValuePair() {} /// Constructor __host__ __device__ __forceinline__ KeyValuePair(Key const& key, Value const& value) : key(key), value(value) {} /// Inequality operator __host__ __device__ __forceinline__ bool operator !=(const KeyValuePair &b) { return (value != b.value) || (key != b.key); } }; /// Smaller value specialization template struct KeyValuePair { typedef K Key; typedef V Value; typedef char Pad[AlignBytes::ALIGN_BYTES - AlignBytes::ALIGN_BYTES]; Key key; // Key has larger would-be alignment and goes first Value value; Pad pad; /// Constructor __host__ __device__ __forceinline__ KeyValuePair() {} /// Constructor __host__ __device__ __forceinline__ KeyValuePair(Key const& key, Value const& value) : key(key), value(value) {} /// Inequality operator __host__ __device__ __forceinline__ bool operator !=(const KeyValuePair &b) { return (value != b.value) || (key != b.key); } }; #endif // #if defined(_WIN32) && !defined(_WIN64) #ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document /** * \brief A wrapper for passing simple static arrays as kernel parameters */ template struct ArrayWrapper { /// Statically-sized array of type \p T T array[COUNT]; /// Constructor __host__ __device__ __forceinline__ ArrayWrapper() {} }; #endif // DOXYGEN_SHOULD_SKIP_THIS /** * \brief Double-buffer storage wrapper for multi-pass stream transformations that require more than one storage array for streaming intermediate results back and forth. * * Many multi-pass computations require a pair of "ping-pong" storage * buffers (e.g., one for reading from and the other for writing to, and then * vice-versa for the subsequent pass). This structure wraps a set of device * buffers and a "selector" member to track which is "current". */ template struct DoubleBuffer { /// Pair of device buffer pointers T *d_buffers[2]; /// Selector into \p d_buffers (i.e., the active/valid buffer) int selector; /// \brief Constructor __host__ __device__ __forceinline__ DoubleBuffer() { selector = 0; d_buffers[0] = NULL; d_buffers[1] = NULL; } /// \brief Constructor __host__ __device__ __forceinline__ DoubleBuffer( T *d_current, ///< The currently valid buffer T *d_alternate) ///< Alternate storage buffer of the same size as \p d_current { selector = 0; d_buffers[0] = d_current; d_buffers[1] = d_alternate; } /// \brief Return pointer to the currently valid buffer __host__ __device__ __forceinline__ T* Current() { return d_buffers[selector]; } /// \brief Return pointer to the currently invalid buffer __host__ __device__ __forceinline__ T* Alternate() { return d_buffers[selector ^ 1]; } }; /****************************************************************************** * Typedef-detection ******************************************************************************/ /** * \brief Defines a structure \p detector_name that is templated on type \p T. The \p detector_name struct exposes a constant member \p VALUE indicating whether or not parameter \p T exposes a nested type \p nested_type_name */ #define CUB_DEFINE_DETECT_NESTED_TYPE(detector_name, nested_type_name) \ template \ struct detector_name \ { \ template \ static char& test(typename C::nested_type_name*); \ template \ static int& test(...); \ enum \ { \ VALUE = sizeof(test(0)) < sizeof(int) \ }; \ }; /****************************************************************************** * Simple enable-if (similar to Boost) ******************************************************************************/ /** * \brief Simple enable-if (similar to Boost) */ template struct EnableIf { /// Enable-if type for SFINAE dummy variables typedef T Type; }; template struct EnableIf {}; /****************************************************************************** * Typedef-detection ******************************************************************************/ /** * \brief Determine whether or not BinaryOp's functor is of the form bool operator()(const T& a, const T&b) or bool operator()(const T& a, const T&b, unsigned int idx) */ template struct BinaryOpHasIdxParam { private: /* template struct SFINAE1 {}; template struct SFINAE2 {}; template struct SFINAE3 {}; template struct SFINAE4 {}; */ template struct SFINAE5 {}; template struct SFINAE6 {}; template struct SFINAE7 {}; template struct SFINAE8 {}; /* template static char Test(SFINAE1 *); template static char Test(SFINAE2 *); template static char Test(SFINAE3 *); template static char Test(SFINAE4 *); */ template static char Test(SFINAE5 *); template static char Test(SFINAE6 *); template static char Test(SFINAE7 *); template static char Test(SFINAE8 *); template static int Test(...); public: /// Whether the functor BinaryOp has a third unsigned int index param static const bool HAS_PARAM = sizeof(Test(NULL)) == sizeof(char); }; /****************************************************************************** * Simple type traits utilities. * * For example: * Traits::CATEGORY // SIGNED_INTEGER * Traits::NULL_TYPE // true * Traits::CATEGORY // NOT_A_NUMBER * Traits::PRIMITIVE; // false * ******************************************************************************/ /** * \brief Basic type traits categories */ enum Category { NOT_A_NUMBER, SIGNED_INTEGER, UNSIGNED_INTEGER, FLOATING_POINT }; /** * \brief Basic type traits */ template struct BaseTraits { /// Category static const Category CATEGORY = _CATEGORY; enum { PRIMITIVE = _PRIMITIVE, NULL_TYPE = _NULL_TYPE, }; }; /** * Basic type traits (unsigned primitive specialization) */ template struct BaseTraits { typedef _UnsignedBits UnsignedBits; static const Category CATEGORY = UNSIGNED_INTEGER; static const UnsignedBits LOWEST_KEY = UnsignedBits(0); static const UnsignedBits MAX_KEY = UnsignedBits(-1); enum { PRIMITIVE = true, NULL_TYPE = false, }; static __device__ __forceinline__ UnsignedBits TwiddleIn(UnsignedBits key) { return key; } static __device__ __forceinline__ UnsignedBits TwiddleOut(UnsignedBits key) { return key; } static __host__ __device__ __forceinline__ T Max() { UnsignedBits retval = MAX_KEY; return reinterpret_cast(retval); } static __host__ __device__ __forceinline__ T Lowest() { UnsignedBits retval = LOWEST_KEY; return reinterpret_cast(retval); } }; /** * Basic type traits (signed primitive specialization) */ template struct BaseTraits { typedef _UnsignedBits UnsignedBits; static const Category CATEGORY = SIGNED_INTEGER; static const UnsignedBits HIGH_BIT = UnsignedBits(1) << ((sizeof(UnsignedBits) * 8) - 1); static const UnsignedBits LOWEST_KEY = HIGH_BIT; static const UnsignedBits MAX_KEY = UnsignedBits(-1) ^ HIGH_BIT; enum { PRIMITIVE = true, NULL_TYPE = false, }; static __device__ __forceinline__ UnsignedBits TwiddleIn(UnsignedBits key) { return key ^ HIGH_BIT; }; static __device__ __forceinline__ UnsignedBits TwiddleOut(UnsignedBits key) { return key ^ HIGH_BIT; }; static __host__ __device__ __forceinline__ T Max() { UnsignedBits retval = MAX_KEY; return reinterpret_cast(retval); } static __host__ __device__ __forceinline__ T Lowest() { UnsignedBits retval = LOWEST_KEY; return reinterpret_cast(retval); } }; template struct FpLimits; template <> struct FpLimits { static __host__ __device__ __forceinline__ float Max() { return FLT_MAX; } static __host__ __device__ __forceinline__ float Lowest() { return FLT_MAX * float(-1); } }; template <> struct FpLimits { static __host__ __device__ __forceinline__ double Max() { return DBL_MAX; } static __host__ __device__ __forceinline__ double Lowest() { return DBL_MAX * double(-1); } }; /** * Basic type traits (fp primitive specialization) */ template struct BaseTraits { typedef _UnsignedBits UnsignedBits; static const Category CATEGORY = FLOATING_POINT; static const UnsignedBits HIGH_BIT = UnsignedBits(1) << ((sizeof(UnsignedBits) * 8) - 1); static const UnsignedBits LOWEST_KEY = UnsignedBits(-1); static const UnsignedBits MAX_KEY = UnsignedBits(-1) ^ HIGH_BIT; enum { PRIMITIVE = true, NULL_TYPE = false, }; static __device__ __forceinline__ UnsignedBits TwiddleIn(UnsignedBits key) { UnsignedBits mask = (key & HIGH_BIT) ? UnsignedBits(-1) : HIGH_BIT; return key ^ mask; }; static __device__ __forceinline__ UnsignedBits TwiddleOut(UnsignedBits key) { UnsignedBits mask = (key & HIGH_BIT) ? HIGH_BIT : UnsignedBits(-1); return key ^ mask; }; static __host__ __device__ __forceinline__ T Max() { return FpLimits::Max(); } static __host__ __device__ __forceinline__ T Lowest() { return FpLimits::Lowest(); } }; /** * \brief Numeric type traits */ template struct NumericTraits : BaseTraits {}; template <> struct NumericTraits : BaseTraits {}; template <> struct NumericTraits : BaseTraits<(std::numeric_limits::is_signed) ? SIGNED_INTEGER : UNSIGNED_INTEGER, true, false, unsigned char, char> {}; template <> struct NumericTraits : BaseTraits {}; template <> struct NumericTraits : BaseTraits {}; template <> struct NumericTraits : BaseTraits {}; template <> struct NumericTraits : BaseTraits {}; template <> struct NumericTraits : BaseTraits {}; template <> struct NumericTraits : BaseTraits {}; template <> struct NumericTraits : BaseTraits {}; template <> struct NumericTraits : BaseTraits {}; template <> struct NumericTraits : BaseTraits {}; template <> struct NumericTraits : BaseTraits {}; template <> struct NumericTraits : BaseTraits {}; template <> struct NumericTraits : BaseTraits {}; template <> struct NumericTraits : BaseTraits::VolatileWord, bool> {}; /** * \brief Type traits */ template struct Traits : NumericTraits::Type> {}; #endif // DOXYGEN_SHOULD_SKIP_THIS /** @} */ // end group UtilModule } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/acc/cuda/cub/warp/000077500000000000000000000000001411340063500167565ustar00rootroot00000000000000relion-3.1.3/src/acc/cuda/cub/warp/specializations/000077500000000000000000000000001411340063500221575ustar00rootroot00000000000000relion-3.1.3/src/acc/cuda/cub/warp/specializations/warp_reduce_shfl.cuh000066400000000000000000000515621411340063500262050ustar00rootroot00000000000000/****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * cub::WarpReduceShfl provides SHFL-based variants of parallel reduction of items partitioned across a CUDA thread warp. */ #pragma once #include "../../thread/thread_operators.cuh" #include "../../util_ptx.cuh" #include "../../util_type.cuh" #include "../../util_macro.cuh" #include "../../util_namespace.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /** * \brief WarpReduceShfl provides SHFL-based variants of parallel reduction of items partitioned across a CUDA thread warp. * * LOGICAL_WARP_THREADS must be a power-of-two */ template < typename T, ///< Data type being reduced int LOGICAL_WARP_THREADS, ///< Number of threads per logical warp int PTX_ARCH> ///< The PTX compute capability for which to to specialize this collective struct WarpReduceShfl { //--------------------------------------------------------------------- // Constants and type definitions //--------------------------------------------------------------------- enum { /// Whether the logical warp size and the PTX warp size coincide IS_ARCH_WARP = (LOGICAL_WARP_THREADS == CUB_WARP_THREADS(PTX_ARCH)), /// The number of warp reduction steps STEPS = Log2::VALUE, /// Number of logical warps in a PTX warp LOGICAL_WARPS = CUB_WARP_THREADS(PTX_ARCH) / LOGICAL_WARP_THREADS, }; template struct IsInteger { enum { ///Whether the data type is a small (32b or less) integer for which we can use a single SFHL instruction per exchange IS_SMALL_UNSIGNED = (Traits::CATEGORY == UNSIGNED_INTEGER) && (sizeof(S) <= sizeof(unsigned int)) }; }; // Creates a mask where the last thread in each logical warp is set template struct LastLaneMask { enum { BASE_MASK = 1 << (LOGICAL_WARP_THREADS - 1), MASK = (LastLaneMask::MASK << LOGICAL_WARP_THREADS) | BASE_MASK, }; }; // Creates a mask where the last thread in each logical warp is set template struct LastLaneMask { enum { MASK = 1 << (LOGICAL_WARP_THREADS - 1), }; }; /// Shared memory storage layout type typedef NullType TempStorage; //--------------------------------------------------------------------- // Thread fields //--------------------------------------------------------------------- int lane_id; int member_mask; //--------------------------------------------------------------------- // Construction //--------------------------------------------------------------------- /// Constructor __device__ __forceinline__ WarpReduceShfl( TempStorage &/*temp_storage*/) : lane_id(LaneId()), member_mask(IS_ARCH_WARP ? 0xffffffff : (0xffffffff >> (32 - LOGICAL_WARP_THREADS)) << (LaneId() / LOGICAL_WARP_THREADS)) {} //--------------------------------------------------------------------- // Reduction steps //--------------------------------------------------------------------- /// Reduction (specialized for summation across uint32 types) __device__ __forceinline__ unsigned int ReduceStep( unsigned int input, ///< [in] Calling thread's input item. cub::Sum /*reduction_op*/, ///< [in] Binary reduction operator int last_lane, ///< [in] Index of last lane in segment int offset) ///< [in] Up-offset to pull from { unsigned int output; // Use predicate set from SHFL to guard against invalid peers #ifdef CUB_USE_COOPERATIVE_GROUPS asm volatile( "{" " .reg .u32 r0;" " .reg .pred p;" " shfl.sync.down.b32 r0|p, %1, %2, %3, %5;" " @p add.u32 r0, r0, %4;" " mov.u32 %0, r0;" "}" : "=r"(output) : "r"(input), "r"(offset), "r"(last_lane), "r"(input), "r"(member_mask)); #else asm volatile( "{" " .reg .u32 r0;" " .reg .pred p;" " shfl.down.b32 r0|p, %1, %2, %3;" " @p add.u32 r0, r0, %4;" " mov.u32 %0, r0;" "}" : "=r"(output) : "r"(input), "r"(offset), "r"(last_lane), "r"(input)); #endif return output; } /// Reduction (specialized for summation across fp32 types) __device__ __forceinline__ float ReduceStep( float input, ///< [in] Calling thread's input item. cub::Sum /*reduction_op*/, ///< [in] Binary reduction operator int last_lane, ///< [in] Index of last lane in segment int offset) ///< [in] Up-offset to pull from { float output; // Use predicate set from SHFL to guard against invalid peers #ifdef CUB_USE_COOPERATIVE_GROUPS asm volatile( "{" " .reg .f32 r0;" " .reg .pred p;" " shfl.sync.down.b32 r0|p, %1, %2, %3, %5;" " @p add.f32 r0, r0, %4;" " mov.f32 %0, r0;" "}" : "=f"(output) : "f"(input), "r"(offset), "r"(last_lane), "f"(input), "r"(member_mask)); #else asm volatile( "{" " .reg .f32 r0;" " .reg .pred p;" " shfl.down.b32 r0|p, %1, %2, %3;" " @p add.f32 r0, r0, %4;" " mov.f32 %0, r0;" "}" : "=f"(output) : "f"(input), "r"(offset), "r"(last_lane), "f"(input)); #endif return output; } /// Reduction (specialized for summation across unsigned long long types) __device__ __forceinline__ unsigned long long ReduceStep( unsigned long long input, ///< [in] Calling thread's input item. cub::Sum /*reduction_op*/, ///< [in] Binary reduction operator int last_lane, ///< [in] Index of last lane in segment int offset) ///< [in] Up-offset to pull from { unsigned long long output; #ifdef CUB_USE_COOPERATIVE_GROUPS asm volatile( "{" " .reg .u32 lo;" " .reg .u32 hi;" " .reg .pred p;" " mov.b64 {lo, hi}, %1;" " shfl.sync.down.b32 lo|p, lo, %2, %3, %4;" " shfl.sync.down.b32 hi|p, hi, %2, %3, %4;" " mov.b64 %0, {lo, hi};" " @p add.u64 %0, %0, %1;" "}" : "=l"(output) : "l"(input), "r"(offset), "r"(last_lane), "r"(member_mask)); #else asm volatile( "{" " .reg .u32 lo;" " .reg .u32 hi;" " .reg .pred p;" " mov.b64 {lo, hi}, %1;" " shfl.down.b32 lo|p, lo, %2, %3;" " shfl.down.b32 hi|p, hi, %2, %3;" " mov.b64 %0, {lo, hi};" " @p add.u64 %0, %0, %1;" "}" : "=l"(output) : "l"(input), "r"(offset), "r"(last_lane)); #endif return output; } /// Reduction (specialized for summation across long long types) __device__ __forceinline__ long long ReduceStep( long long input, ///< [in] Calling thread's input item. cub::Sum /*reduction_op*/, ///< [in] Binary reduction operator int last_lane, ///< [in] Index of last lane in segment int offset) ///< [in] Up-offset to pull from { long long output; // Use predicate set from SHFL to guard against invalid peers #ifdef CUB_USE_COOPERATIVE_GROUPS asm volatile( "{" " .reg .u32 lo;" " .reg .u32 hi;" " .reg .pred p;" " mov.b64 {lo, hi}, %1;" " shfl.sync.down.b32 lo|p, lo, %2, %3, %4;" " shfl.sync.down.b32 hi|p, hi, %2, %3, %4;" " mov.b64 %0, {lo, hi};" " @p add.s64 %0, %0, %1;" "}" : "=l"(output) : "l"(input), "r"(offset), "r"(last_lane), "r"(member_mask)); #else asm volatile( "{" " .reg .u32 lo;" " .reg .u32 hi;" " .reg .pred p;" " mov.b64 {lo, hi}, %1;" " shfl.down.b32 lo|p, lo, %2, %3;" " shfl.down.b32 hi|p, hi, %2, %3;" " mov.b64 %0, {lo, hi};" " @p add.s64 %0, %0, %1;" "}" : "=l"(output) : "l"(input), "r"(offset), "r"(last_lane)); #endif return output; } /// Reduction (specialized for summation across double types) __device__ __forceinline__ double ReduceStep( double input, ///< [in] Calling thread's input item. cub::Sum /*reduction_op*/, ///< [in] Binary reduction operator int last_lane, ///< [in] Index of last lane in segment int offset) ///< [in] Up-offset to pull from { double output; // Use predicate set from SHFL to guard against invalid peers #ifdef CUB_USE_COOPERATIVE_GROUPS asm volatile( "{" " .reg .u32 lo;" " .reg .u32 hi;" " .reg .pred p;" " .reg .f64 r0;" " mov.b64 %0, %1;" " mov.b64 {lo, hi}, %1;" " shfl.sync.down.b32 lo|p, lo, %2, %3, %4;" " shfl.sync.down.b32 hi|p, hi, %2, %3, %4;" " mov.b64 r0, {lo, hi};" " @p add.f64 %0, %0, r0;" "}" : "=d"(output) : "d"(input), "r"(offset), "r"(last_lane), "r"(member_mask)); #else asm volatile( "{" " .reg .u32 lo;" " .reg .u32 hi;" " .reg .pred p;" " .reg .f64 r0;" " mov.b64 %0, %1;" " mov.b64 {lo, hi}, %1;" " shfl.down.b32 lo|p, lo, %2, %3;" " shfl.down.b32 hi|p, hi, %2, %3;" " mov.b64 r0, {lo, hi};" " @p add.f64 %0, %0, r0;" "}" : "=d"(output) : "d"(input), "r"(offset), "r"(last_lane)); #endif return output; } /// Reduction (specialized for swizzled ReduceByKeyOp across KeyValuePair types) template __device__ __forceinline__ KeyValuePair ReduceStep( KeyValuePair input, ///< [in] Calling thread's input item. SwizzleScanOp > /*reduction_op*/, ///< [in] Binary reduction operator int last_lane, ///< [in] Index of last lane in segment int offset) ///< [in] Up-offset to pull from { KeyValuePair output; KeyT other_key = ShuffleDown(input.key, offset, last_lane, member_mask); output.key = input.key; output.value = ReduceStep( input.value, cub::Sum(), last_lane, offset, Int2Type::IS_SMALL_UNSIGNED>()); if (input.key != other_key) output.value = input.value; return output; } /// Reduction (specialized for swizzled ReduceBySegmentOp across KeyValuePair types) template __device__ __forceinline__ KeyValuePair ReduceStep( KeyValuePair input, ///< [in] Calling thread's input item. SwizzleScanOp > /*reduction_op*/, ///< [in] Binary reduction operator int last_lane, ///< [in] Index of last lane in segment int offset) ///< [in] Up-offset to pull from { KeyValuePair output; output.value = ReduceStep(input.value, cub::Sum(), last_lane, offset, Int2Type::IS_SMALL_UNSIGNED>()); output.key = ReduceStep(input.key, cub::Sum(), last_lane, offset, Int2Type::IS_SMALL_UNSIGNED>()); if (input.key > 0) output.value = input.value; return output; } /// Reduction step (generic) template __device__ __forceinline__ _T ReduceStep( _T input, ///< [in] Calling thread's input item. ReductionOp reduction_op, ///< [in] Binary reduction operator int last_lane, ///< [in] Index of last lane in segment int offset) ///< [in] Up-offset to pull from { _T output = input; _T temp = ShuffleDown(output, offset, last_lane, member_mask); // Perform reduction op if valid if (offset + lane_id <= last_lane) output = reduction_op(input, temp); return output; } /// Reduction step (specialized for small unsigned integers size 32b or less) template __device__ __forceinline__ _T ReduceStep( _T input, ///< [in] Calling thread's input item. ReductionOp reduction_op, ///< [in] Binary reduction operator int last_lane, ///< [in] Index of last lane in segment int offset, ///< [in] Up-offset to pull from Int2Type /*is_small_unsigned*/) ///< [in] Marker type indicating whether T is a small unsigned integer { return ReduceStep(input, reduction_op, last_lane, offset); } /// Reduction step (specialized for types other than small unsigned integers size 32b or less) template __device__ __forceinline__ _T ReduceStep( _T input, ///< [in] Calling thread's input item. ReductionOp reduction_op, ///< [in] Binary reduction operator int last_lane, ///< [in] Index of last lane in segment int offset, ///< [in] Up-offset to pull from Int2Type /*is_small_unsigned*/) ///< [in] Marker type indicating whether T is a small unsigned integer { return ReduceStep(input, reduction_op, last_lane, offset); } //--------------------------------------------------------------------- // Templated inclusive scan iteration //--------------------------------------------------------------------- template __device__ __forceinline__ void ReduceStep( T& input, ///< [in] Calling thread's input item. ReductionOp reduction_op, ///< [in] Binary reduction operator int last_lane, ///< [in] Index of last lane in segment Int2Type /*step*/) { input = ReduceStep(input, reduction_op, last_lane, 1 << STEP, Int2Type::IS_SMALL_UNSIGNED>()); ReduceStep(input, reduction_op, last_lane, Int2Type()); } template __device__ __forceinline__ void ReduceStep( T& /*input*/, ///< [in] Calling thread's input item. ReductionOp /*reduction_op*/, ///< [in] Binary reduction operator int /*last_lane*/, ///< [in] Index of last lane in segment Int2Type /*step*/) {} //--------------------------------------------------------------------- // Reduction operations //--------------------------------------------------------------------- /// Reduction template < bool ALL_LANES_VALID, ///< Whether all lanes in each warp are contributing a valid fold of items int FOLDED_ITEMS_PER_LANE, ///< Number of items folded into each lane typename ReductionOp> __device__ __forceinline__ T Reduce( T input, ///< [in] Calling thread's input int folded_items_per_warp, ///< [in] Total number of valid items folded into each logical warp ReductionOp reduction_op) ///< [in] Binary reduction operator { // Get the last thread in the logical warp int first_warp_thread = 0; int last_warp_thread = LOGICAL_WARP_THREADS - 1; if (!IS_ARCH_WARP) { first_warp_thread = lane_id & (~(LOGICAL_WARP_THREADS - 1)); last_warp_thread |= lane_id; } // Common case is FOLDED_ITEMS_PER_LANE = 1 (or a multiple of 32) int lanes_with_valid_data = (folded_items_per_warp - 1) / FOLDED_ITEMS_PER_LANE; // Get the last valid lane int last_lane = (ALL_LANES_VALID) ? last_warp_thread : CUB_MIN(last_warp_thread, first_warp_thread + lanes_with_valid_data); T output = input; // // Iterate reduction steps // #pragma unroll // for (int STEP = 0; STEP < STEPS; STEP++) // { // output = ReduceStep(output, reduction_op, last_lane, 1 << STEP, Int2Type::IS_SMALL_UNSIGNED>()); // } // Template-iterate reduction steps ReduceStep(output, reduction_op, last_lane, Int2Type<0>()); return output; } /// Segmented reduction template < bool HEAD_SEGMENTED, ///< Whether flags indicate a segment-head or a segment-tail typename FlagT, typename ReductionOp> __device__ __forceinline__ T SegmentedReduce( T input, ///< [in] Calling thread's input FlagT flag, ///< [in] Whether or not the current lane is a segment head/tail ReductionOp reduction_op) ///< [in] Binary reduction operator { // Get the start flags for each thread in the warp. int warp_flags = WARP_BALLOT(flag, member_mask); if (HEAD_SEGMENTED) warp_flags >>= 1; // Mask in the last lanes of each logical warp warp_flags |= LastLaneMask<1, LOGICAL_WARPS>::MASK; // Mask out the bits below the current thread warp_flags &= LaneMaskGe(); // Find the next set flag int last_lane = __clz(__brev(warp_flags)); T output = input; // // Iterate reduction steps // #pragma unroll // for (int STEP = 0; STEP < STEPS; STEP++) // { // output = ReduceStep(output, reduction_op, last_lane, 1 << STEP, Int2Type::IS_SMALL_UNSIGNED>()); // } // Template-iterate reduction steps ReduceStep(output, reduction_op, last_lane, Int2Type<0>()); return output; } }; } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/acc/cuda/cub/warp/specializations/warp_reduce_smem.cuh000066400000000000000000000345351411340063500262130ustar00rootroot00000000000000/****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * cub::WarpReduceSmem provides smem-based variants of parallel reduction of items partitioned across a CUDA thread warp. */ #pragma once #include "../../thread/thread_operators.cuh" #include "../../thread/thread_load.cuh" #include "../../thread/thread_store.cuh" #include "../../util_type.cuh" #include "../../util_namespace.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /** * \brief WarpReduceSmem provides smem-based variants of parallel reduction of items partitioned across a CUDA thread warp. */ template < typename T, ///< Data type being reduced int LOGICAL_WARP_THREADS, ///< Number of threads per logical warp int PTX_ARCH> ///< The PTX compute capability for which to to specialize this collective struct WarpReduceSmem { /****************************************************************************** * Constants and type definitions ******************************************************************************/ enum { /// Whether the logical warp size and the PTX warp size coincide IS_ARCH_WARP = (LOGICAL_WARP_THREADS == CUB_WARP_THREADS(PTX_ARCH)), /// Whether the logical warp size is a power-of-two IS_POW_OF_TWO = PowerOfTwo::VALUE, /// The number of warp scan steps STEPS = Log2::VALUE, /// The number of threads in half a warp HALF_WARP_THREADS = 1 << (STEPS - 1), /// The number of shared memory elements per warp WARP_SMEM_ELEMENTS = LOGICAL_WARP_THREADS + HALF_WARP_THREADS, /// FlagT status (when not using ballot) UNSET = 0x0, // Is initially unset SET = 0x1, // Is initially set SEEN = 0x2, // Has seen another head flag from a successor peer }; /// Shared memory flag type typedef unsigned char SmemFlag; /// Shared memory storage layout type (1.5 warps-worth of elements for each warp) struct _TempStorage { T reduce[WARP_SMEM_ELEMENTS]; SmemFlag flags[WARP_SMEM_ELEMENTS]; }; // Alias wrapper allowing storage to be unioned struct TempStorage : Uninitialized<_TempStorage> {}; /****************************************************************************** * Thread fields ******************************************************************************/ _TempStorage &temp_storage; unsigned int lane_id; unsigned int member_mask; /****************************************************************************** * Construction ******************************************************************************/ /// Constructor __device__ __forceinline__ WarpReduceSmem( TempStorage &temp_storage) : temp_storage(temp_storage.Alias()), lane_id(IS_ARCH_WARP ? LaneId() : LaneId() % LOGICAL_WARP_THREADS), member_mask(!IS_POW_OF_TWO ? (0xffffffff >> (32 - LOGICAL_WARP_THREADS)) : // non-power-of-two subwarps cannot be tiled (0xffffffff >> (32 - LOGICAL_WARP_THREADS)) << (LaneId() / LOGICAL_WARP_THREADS)) {} /****************************************************************************** * Utility methods ******************************************************************************/ //--------------------------------------------------------------------- // Regular reduction //--------------------------------------------------------------------- /** * Reduction step */ template < bool ALL_LANES_VALID, ///< Whether all lanes in each warp are contributing a valid fold of items int FOLDED_ITEMS_PER_LANE, ///< Number of items folded into each lane typename ReductionOp, int STEP> __device__ __forceinline__ T ReduceStep( T input, ///< [in] Calling thread's input int folded_items_per_warp, ///< [in] Total number of valid items folded into each logical warp ReductionOp reduction_op, ///< [in] Reduction operator Int2Type /*step*/) { const int OFFSET = 1 << STEP; // Share input through buffer ThreadStore(&temp_storage.reduce[lane_id], input); WARP_SYNC(member_mask); // Update input if peer_addend is in range if ((ALL_LANES_VALID && IS_POW_OF_TWO) || ((lane_id + OFFSET) * FOLDED_ITEMS_PER_LANE < folded_items_per_warp)) { T peer_addend = ThreadLoad(&temp_storage.reduce[lane_id + OFFSET]); input = reduction_op(input, peer_addend); } WARP_SYNC(member_mask); return ReduceStep(input, folded_items_per_warp, reduction_op, Int2Type()); } /** * Reduction step (terminate) */ template < bool ALL_LANES_VALID, ///< Whether all lanes in each warp are contributing a valid fold of items int FOLDED_ITEMS_PER_LANE, ///< Number of items folded into each lane typename ReductionOp> __device__ __forceinline__ T ReduceStep( T input, ///< [in] Calling thread's input int /*folded_items_per_warp*/, ///< [in] Total number of valid items folded into each logical warp ReductionOp /*reduction_op*/, ///< [in] Reduction operator Int2Type /*step*/) { return input; } //--------------------------------------------------------------------- // Segmented reduction //--------------------------------------------------------------------- /** * Ballot-based segmented reduce */ template < bool HEAD_SEGMENTED, ///< Whether flags indicate a segment-head or a segment-tail typename FlagT, typename ReductionOp> __device__ __forceinline__ T SegmentedReduce( T input, ///< [in] Calling thread's input FlagT flag, ///< [in] Whether or not the current lane is a segment head/tail ReductionOp reduction_op, ///< [in] Reduction operator Int2Type /*has_ballot*/) ///< [in] Marker type for whether the target arch has ballot functionality { // Get the start flags for each thread in the warp. int warp_flags = WARP_BALLOT(flag, member_mask); if (!HEAD_SEGMENTED) warp_flags <<= 1; // Keep bits above the current thread. warp_flags &= LaneMaskGt(); // Accommodate packing of multiple logical warps in a single physical warp if (!IS_ARCH_WARP) { warp_flags >>= (LaneId() / LOGICAL_WARP_THREADS) * LOGICAL_WARP_THREADS; } // Find next flag int next_flag = __clz(__brev(warp_flags)); // Clip the next segment at the warp boundary if necessary if (LOGICAL_WARP_THREADS != 32) next_flag = CUB_MIN(next_flag, LOGICAL_WARP_THREADS); #pragma unroll for (int STEP = 0; STEP < STEPS; STEP++) { const int OFFSET = 1 << STEP; // Share input into buffer ThreadStore(&temp_storage.reduce[lane_id], input); WARP_SYNC(member_mask); // Update input if peer_addend is in range if (OFFSET + lane_id < next_flag) { T peer_addend = ThreadLoad(&temp_storage.reduce[lane_id + OFFSET]); input = reduction_op(input, peer_addend); } WARP_SYNC(member_mask); } return input; } /** * Smem-based segmented reduce */ template < bool HEAD_SEGMENTED, ///< Whether flags indicate a segment-head or a segment-tail typename FlagT, typename ReductionOp> __device__ __forceinline__ T SegmentedReduce( T input, ///< [in] Calling thread's input FlagT flag, ///< [in] Whether or not the current lane is a segment head/tail ReductionOp reduction_op, ///< [in] Reduction operator Int2Type /*has_ballot*/) ///< [in] Marker type for whether the target arch has ballot functionality { enum { UNSET = 0x0, // Is initially unset SET = 0x1, // Is initially set SEEN = 0x2, // Has seen another head flag from a successor peer }; // Alias flags onto shared data storage volatile SmemFlag *flag_storage = temp_storage.flags; SmemFlag flag_status = (flag) ? SET : UNSET; for (int STEP = 0; STEP < STEPS; STEP++) { const int OFFSET = 1 << STEP; // Share input through buffer ThreadStore(&temp_storage.reduce[lane_id], input); WARP_SYNC(member_mask); // Get peer from buffer T peer_addend = ThreadLoad(&temp_storage.reduce[lane_id + OFFSET]); WARP_SYNC(member_mask); // Share flag through buffer flag_storage[lane_id] = flag_status; // Get peer flag from buffer SmemFlag peer_flag_status = flag_storage[lane_id + OFFSET]; // Update input if peer was in range if (lane_id < LOGICAL_WARP_THREADS - OFFSET) { if (HEAD_SEGMENTED) { // Head-segmented if ((flag_status & SEEN) == 0) { // Has not seen a more distant head flag if (peer_flag_status & SET) { // Has now seen a head flag flag_status |= SEEN; } else { // Peer is not a head flag: grab its count input = reduction_op(input, peer_addend); } // Update seen status to include that of peer flag_status |= (peer_flag_status & SEEN); } } else { // Tail-segmented. Simply propagate flag status if (!flag_status) { input = reduction_op(input, peer_addend); flag_status |= peer_flag_status; } } } } return input; } /****************************************************************************** * Interface ******************************************************************************/ /** * Reduction */ template < bool ALL_LANES_VALID, ///< Whether all lanes in each warp are contributing a valid fold of items int FOLDED_ITEMS_PER_LANE, ///< Number of items folded into each lane typename ReductionOp> __device__ __forceinline__ T Reduce( T input, ///< [in] Calling thread's input int folded_items_per_warp, ///< [in] Total number of valid items folded into each logical warp ReductionOp reduction_op) ///< [in] Reduction operator { return ReduceStep(input, folded_items_per_warp, reduction_op, Int2Type<0>()); } /** * Segmented reduction */ template < bool HEAD_SEGMENTED, ///< Whether flags indicate a segment-head or a segment-tail typename FlagT, typename ReductionOp> __device__ __forceinline__ T SegmentedReduce( T input, ///< [in] Calling thread's input FlagT flag, ///< [in] Whether or not the current lane is a segment head/tail ReductionOp reduction_op) ///< [in] Reduction operator { return SegmentedReduce(input, flag, reduction_op, Int2Type<(PTX_ARCH >= 200)>()); } }; } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/acc/cuda/cub/warp/specializations/warp_scan_shfl.cuh000066400000000000000000000634331411340063500256620ustar00rootroot00000000000000/****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * cub::WarpScanShfl provides SHFL-based variants of parallel prefix scan of items partitioned across a CUDA thread warp. */ #pragma once #include "../../thread/thread_operators.cuh" #include "../../util_type.cuh" #include "../../util_ptx.cuh" #include "../../util_namespace.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /** * \brief WarpScanShfl provides SHFL-based variants of parallel prefix scan of items partitioned across a CUDA thread warp. */ template < typename T, ///< Data type being scanned int LOGICAL_WARP_THREADS, ///< Number of threads per logical warp int PTX_ARCH> ///< The PTX compute capability for which to to specialize this collective struct WarpScanShfl { //--------------------------------------------------------------------- // Constants and type definitions //--------------------------------------------------------------------- enum { /// Whether the logical warp size and the PTX warp size coincide IS_ARCH_WARP = (LOGICAL_WARP_THREADS == CUB_WARP_THREADS(PTX_ARCH)), /// The number of warp scan steps STEPS = Log2::VALUE, /// The 5-bit SHFL mask for logically splitting warps into sub-segments starts 8-bits up SHFL_C = ((0xFFFFFFFFU << STEPS) & 31) << 8, }; template struct IntegerTraits { enum { ///Whether the data type is a small (32b or less) integer for which we can use a single SFHL instruction per exchange IS_SMALL_UNSIGNED = (Traits::CATEGORY == UNSIGNED_INTEGER) && (sizeof(S) <= sizeof(unsigned int)) }; }; /// Shared memory storage layout type struct TempStorage {}; //--------------------------------------------------------------------- // Thread fields //--------------------------------------------------------------------- unsigned int lane_id; unsigned int member_mask; //--------------------------------------------------------------------- // Construction //--------------------------------------------------------------------- /// Constructor __device__ __forceinline__ WarpScanShfl( TempStorage &/*temp_storage*/) : lane_id(IS_ARCH_WARP ? LaneId() : LaneId() % LOGICAL_WARP_THREADS), member_mask(IS_ARCH_WARP ? 0xffffffff : (0xffffffff >> (32 - LOGICAL_WARP_THREADS)) << (LaneId() / LOGICAL_WARP_THREADS)) {} //--------------------------------------------------------------------- // Inclusive scan steps //--------------------------------------------------------------------- /// Inclusive prefix scan step (specialized for summation across int32 types) __device__ __forceinline__ int InclusiveScanStep( int input, ///< [in] Calling thread's input item. cub::Sum /*scan_op*/, ///< [in] Binary scan operator int first_lane, ///< [in] Index of first lane in segment int offset) ///< [in] Up-offset to pull from { int output; int shfl_c = first_lane | SHFL_C; // Shuffle control (mask and first-lane) // Use predicate set from SHFL to guard against invalid peers #ifdef CUB_USE_COOPERATIVE_GROUPS asm volatile( "{" " .reg .s32 r0;" " .reg .pred p;" " shfl.sync.up.b32 r0|p, %1, %2, %3, %5;" " @p add.s32 r0, r0, %4;" " mov.s32 %0, r0;" "}" : "=r"(output) : "r"(input), "r"(offset), "r"(shfl_c), "r"(input), "r"(member_mask)); #else asm volatile( "{" " .reg .s32 r0;" " .reg .pred p;" " shfl.up.b32 r0|p, %1, %2, %3;" " @p add.s32 r0, r0, %4;" " mov.s32 %0, r0;" "}" : "=r"(output) : "r"(input), "r"(offset), "r"(shfl_c), "r"(input)); #endif return output; } /// Inclusive prefix scan step (specialized for summation across uint32 types) __device__ __forceinline__ unsigned int InclusiveScanStep( unsigned int input, ///< [in] Calling thread's input item. cub::Sum /*scan_op*/, ///< [in] Binary scan operator int first_lane, ///< [in] Index of first lane in segment int offset) ///< [in] Up-offset to pull from { unsigned int output; int shfl_c = first_lane | SHFL_C; // Shuffle control (mask and first-lane) // Use predicate set from SHFL to guard against invalid peers #ifdef CUB_USE_COOPERATIVE_GROUPS asm volatile( "{" " .reg .u32 r0;" " .reg .pred p;" " shfl.sync.up.b32 r0|p, %1, %2, %3, %5;" " @p add.u32 r0, r0, %4;" " mov.u32 %0, r0;" "}" : "=r"(output) : "r"(input), "r"(offset), "r"(shfl_c), "r"(input), "r"(member_mask)); #else asm volatile( "{" " .reg .u32 r0;" " .reg .pred p;" " shfl.up.b32 r0|p, %1, %2, %3;" " @p add.u32 r0, r0, %4;" " mov.u32 %0, r0;" "}" : "=r"(output) : "r"(input), "r"(offset), "r"(shfl_c), "r"(input)); #endif return output; } /// Inclusive prefix scan step (specialized for summation across fp32 types) __device__ __forceinline__ float InclusiveScanStep( float input, ///< [in] Calling thread's input item. cub::Sum /*scan_op*/, ///< [in] Binary scan operator int first_lane, ///< [in] Index of first lane in segment int offset) ///< [in] Up-offset to pull from { float output; int shfl_c = first_lane | SHFL_C; // Shuffle control (mask and first-lane) // Use predicate set from SHFL to guard against invalid peers #ifdef CUB_USE_COOPERATIVE_GROUPS asm volatile( "{" " .reg .f32 r0;" " .reg .pred p;" " shfl.sync.up.b32 r0|p, %1, %2, %3, %5;" " @p add.f32 r0, r0, %4;" " mov.f32 %0, r0;" "}" : "=f"(output) : "f"(input), "r"(offset), "r"(shfl_c), "f"(input), "r"(member_mask)); #else asm volatile( "{" " .reg .f32 r0;" " .reg .pred p;" " shfl.up.b32 r0|p, %1, %2, %3;" " @p add.f32 r0, r0, %4;" " mov.f32 %0, r0;" "}" : "=f"(output) : "f"(input), "r"(offset), "r"(shfl_c), "f"(input)); #endif return output; } /// Inclusive prefix scan step (specialized for summation across unsigned long long types) __device__ __forceinline__ unsigned long long InclusiveScanStep( unsigned long long input, ///< [in] Calling thread's input item. cub::Sum /*scan_op*/, ///< [in] Binary scan operator int first_lane, ///< [in] Index of first lane in segment int offset) ///< [in] Up-offset to pull from { unsigned long long output; int shfl_c = first_lane | SHFL_C; // Shuffle control (mask and first-lane) // Use predicate set from SHFL to guard against invalid peers #ifdef CUB_USE_COOPERATIVE_GROUPS asm volatile( "{" " .reg .u64 r0;" " .reg .u32 lo;" " .reg .u32 hi;" " .reg .pred p;" " mov.b64 {lo, hi}, %1;" " shfl.sync.up.b32 lo|p, lo, %2, %3, %5;" " shfl.sync.up.b32 hi|p, hi, %2, %3, %5;" " mov.b64 r0, {lo, hi};" " @p add.u64 r0, r0, %4;" " mov.u64 %0, r0;" "}" : "=l"(output) : "l"(input), "r"(offset), "r"(shfl_c), "l"(input), "r"(member_mask)); #else asm volatile( "{" " .reg .u64 r0;" " .reg .u32 lo;" " .reg .u32 hi;" " .reg .pred p;" " mov.b64 {lo, hi}, %1;" " shfl.up.b32 lo|p, lo, %2, %3;" " shfl.up.b32 hi|p, hi, %2, %3;" " mov.b64 r0, {lo, hi};" " @p add.u64 r0, r0, %4;" " mov.u64 %0, r0;" "}" : "=l"(output) : "l"(input), "r"(offset), "r"(shfl_c), "l"(input)); #endif return output; } /// Inclusive prefix scan step (specialized for summation across long long types) __device__ __forceinline__ long long InclusiveScanStep( long long input, ///< [in] Calling thread's input item. cub::Sum /*scan_op*/, ///< [in] Binary scan operator int first_lane, ///< [in] Index of first lane in segment int offset) ///< [in] Up-offset to pull from { long long output; int shfl_c = first_lane | SHFL_C; // Shuffle control (mask and first-lane) // Use predicate set from SHFL to guard against invalid peers #ifdef CUB_USE_COOPERATIVE_GROUPS asm volatile( "{" " .reg .s64 r0;" " .reg .u32 lo;" " .reg .u32 hi;" " .reg .pred p;" " mov.b64 {lo, hi}, %1;" " shfl.sync.up.b32 lo|p, lo, %2, %3, %5;" " shfl.sync.up.b32 hi|p, hi, %2, %3, %5;" " mov.b64 r0, {lo, hi};" " @p add.s64 r0, r0, %4;" " mov.s64 %0, r0;" "}" : "=l"(output) : "l"(input), "r"(offset), "r"(shfl_c), "l"(input), "r"(member_mask)); #else asm volatile( "{" " .reg .s64 r0;" " .reg .u32 lo;" " .reg .u32 hi;" " .reg .pred p;" " mov.b64 {lo, hi}, %1;" " shfl.up.b32 lo|p, lo, %2, %3;" " shfl.up.b32 hi|p, hi, %2, %3;" " mov.b64 r0, {lo, hi};" " @p add.s64 r0, r0, %4;" " mov.s64 %0, r0;" "}" : "=l"(output) : "l"(input), "r"(offset), "r"(shfl_c), "l"(input)); #endif return output; } /// Inclusive prefix scan step (specialized for summation across fp64 types) __device__ __forceinline__ double InclusiveScanStep( double input, ///< [in] Calling thread's input item. cub::Sum /*scan_op*/, ///< [in] Binary scan operator int first_lane, ///< [in] Index of first lane in segment int offset) ///< [in] Up-offset to pull from { double output; int shfl_c = first_lane | SHFL_C; // Shuffle control (mask and first-lane) // Use predicate set from SHFL to guard against invalid peers #ifdef CUB_USE_COOPERATIVE_GROUPS asm volatile( "{" " .reg .u32 lo;" " .reg .u32 hi;" " .reg .pred p;" " .reg .f64 r0;" " mov.b64 %0, %1;" " mov.b64 {lo, hi}, %1;" " shfl.sync.up.b32 lo|p, lo, %2, %3, %4;" " shfl.sync.up.b32 hi|p, hi, %2, %3, %4;" " mov.b64 r0, {lo, hi};" " @p add.f64 %0, %0, r0;" "}" : "=d"(output) : "d"(input), "r"(offset), "r"(shfl_c), "r"(member_mask)); #else asm volatile( "{" " .reg .u32 lo;" " .reg .u32 hi;" " .reg .pred p;" " .reg .f64 r0;" " mov.b64 %0, %1;" " mov.b64 {lo, hi}, %1;" " shfl.up.b32 lo|p, lo, %2, %3;" " shfl.up.b32 hi|p, hi, %2, %3;" " mov.b64 r0, {lo, hi};" " @p add.f64 %0, %0, r0;" "}" : "=d"(output) : "d"(input), "r"(offset), "r"(shfl_c)); #endif return output; } /* /// Inclusive prefix scan (specialized for ReduceBySegmentOp across KeyValuePair types) template __device__ __forceinline__ KeyValuePairInclusiveScanStep( KeyValuePair input, ///< [in] Calling thread's input item. ReduceBySegmentOp scan_op, ///< [in] Binary scan operator int first_lane, ///< [in] Index of first lane in segment int offset) ///< [in] Up-offset to pull from { KeyValuePair output; output.value = InclusiveScanStep(input.value, cub::Sum(), first_lane, offset, Int2Type::IS_SMALL_UNSIGNED>()); output.key = InclusiveScanStep(input.key, cub::Sum(), first_lane, offset, Int2Type::IS_SMALL_UNSIGNED>()); if (input.key > 0) output.value = input.value; return output; } */ /// Inclusive prefix scan step (generic) template __device__ __forceinline__ _T InclusiveScanStep( _T input, ///< [in] Calling thread's input item. ScanOpT scan_op, ///< [in] Binary scan operator int first_lane, ///< [in] Index of first lane in segment int offset) ///< [in] Up-offset to pull from { _T temp = ShuffleUp(input, offset, first_lane, member_mask); // Perform scan op if from a valid peer _T output = scan_op(temp, input); if (static_cast(lane_id) < first_lane + offset) output = input; return output; } /// Inclusive prefix scan step (specialized for small integers size 32b or less) template __device__ __forceinline__ _T InclusiveScanStep( _T input, ///< [in] Calling thread's input item. ScanOpT scan_op, ///< [in] Binary scan operator int first_lane, ///< [in] Index of first lane in segment int offset, ///< [in] Up-offset to pull from Int2Type /*is_small_unsigned*/) ///< [in] Marker type indicating whether T is a small integer { return InclusiveScanStep(input, scan_op, first_lane, offset); } /// Inclusive prefix scan step (specialized for types other than small integers size 32b or less) template __device__ __forceinline__ _T InclusiveScanStep( _T input, ///< [in] Calling thread's input item. ScanOpT scan_op, ///< [in] Binary scan operator int first_lane, ///< [in] Index of first lane in segment int offset, ///< [in] Up-offset to pull from Int2Type /*is_small_unsigned*/) ///< [in] Marker type indicating whether T is a small integer { return InclusiveScanStep(input, scan_op, first_lane, offset); } //--------------------------------------------------------------------- // Templated inclusive scan iteration //--------------------------------------------------------------------- template __device__ __forceinline__ void InclusiveScanStep( _T& input, ///< [in] Calling thread's input item. ScanOp scan_op, ///< [in] Binary scan operator int first_lane, ///< [in] Index of first lane in segment Int2Type /*step*/) ///< [in] Marker type indicating scan step { input = InclusiveScanStep(input, scan_op, first_lane, 1 << STEP, Int2Type::IS_SMALL_UNSIGNED>()); InclusiveScanStep(input, scan_op, first_lane, Int2Type()); } template __device__ __forceinline__ void InclusiveScanStep( _T& /*input*/, ///< [in] Calling thread's input item. ScanOp /*scan_op*/, ///< [in] Binary scan operator int /*first_lane*/, ///< [in] Index of first lane in segment Int2Type /*step*/) ///< [in] Marker type indicating scan step {} /****************************************************************************** * Interface ******************************************************************************/ //--------------------------------------------------------------------- // Broadcast //--------------------------------------------------------------------- /// Broadcast __device__ __forceinline__ T Broadcast( T input, ///< [in] The value to broadcast int src_lane) ///< [in] Which warp lane is to do the broadcasting { return ShuffleIndex(input, src_lane, LOGICAL_WARP_THREADS, member_mask); } //--------------------------------------------------------------------- // Inclusive operations //--------------------------------------------------------------------- /// Inclusive scan template __device__ __forceinline__ void InclusiveScan( _T input, ///< [in] Calling thread's input item. _T &inclusive_output, ///< [out] Calling thread's output item. May be aliased with \p input. ScanOpT scan_op) ///< [in] Binary scan operator { inclusive_output = input; // Iterate scan steps int segment_first_lane = 0; // Iterate scan steps // InclusiveScanStep(inclusive_output, scan_op, segment_first_lane, Int2Type<0>()); // Iterate scan steps #pragma unroll for (int STEP = 0; STEP < STEPS; STEP++) { inclusive_output = InclusiveScanStep( inclusive_output, scan_op, segment_first_lane, (1 << STEP), Int2Type::IS_SMALL_UNSIGNED>()); } } /// Inclusive scan, specialized for reduce-value-by-key template __device__ __forceinline__ void InclusiveScan( KeyValuePair input, ///< [in] Calling thread's input item. KeyValuePair &inclusive_output, ///< [out] Calling thread's output item. May be aliased with \p input. ReduceByKeyOp scan_op) ///< [in] Binary scan operator { inclusive_output = input; KeyT pred_key = ShuffleUp(inclusive_output.key, 1, 0, member_mask); unsigned int ballot = WARP_BALLOT((pred_key != inclusive_output.key), member_mask); // Mask away all lanes greater than ours ballot = ballot & LaneMaskLe(); // Find index of first set bit int segment_first_lane = CUB_MAX(0, 31 - __clz(ballot)); // Iterate scan steps // InclusiveScanStep(inclusive_output.value, scan_op.op, segment_first_lane, Int2Type<0>()); // Iterate scan steps #pragma unroll for (int STEP = 0; STEP < STEPS; STEP++) { inclusive_output.value = InclusiveScanStep( inclusive_output.value, scan_op.op, segment_first_lane, (1 << STEP), Int2Type::IS_SMALL_UNSIGNED>()); } } /// Inclusive scan with aggregate template __device__ __forceinline__ void InclusiveScan( T input, ///< [in] Calling thread's input item. T &inclusive_output, ///< [out] Calling thread's output item. May be aliased with \p input. ScanOpT scan_op, ///< [in] Binary scan operator T &warp_aggregate) ///< [out] Warp-wide aggregate reduction of input items. { InclusiveScan(input, inclusive_output, scan_op); // Grab aggregate from last warp lane warp_aggregate = ShuffleIndex(inclusive_output, LOGICAL_WARP_THREADS - 1, LOGICAL_WARP_THREADS, member_mask); } //--------------------------------------------------------------------- // Get exclusive from inclusive //--------------------------------------------------------------------- /// Update inclusive and exclusive using input and inclusive template __device__ __forceinline__ void Update( T /*input*/, ///< [in] T &inclusive, ///< [in, out] T &exclusive, ///< [out] ScanOpT /*scan_op*/, ///< [in] IsIntegerT /*is_integer*/) ///< [in] { // initial value unknown exclusive = ShuffleUp(inclusive, 1, 0, member_mask); } /// Update inclusive and exclusive using input and inclusive (specialized for summation of integer types) __device__ __forceinline__ void Update( T input, T &inclusive, T &exclusive, cub::Sum /*scan_op*/, Int2Type /*is_integer*/) { // initial value presumed 0 exclusive = inclusive - input; } /// Update inclusive and exclusive using initial value using input, inclusive, and initial value template __device__ __forceinline__ void Update ( T /*input*/, T &inclusive, T &exclusive, ScanOpT scan_op, T initial_value, IsIntegerT /*is_integer*/) { inclusive = scan_op(initial_value, inclusive); exclusive = ShuffleUp(inclusive, 1, 0, member_mask); if (lane_id == 0) exclusive = initial_value; } /// Update inclusive and exclusive using initial value using input and inclusive (specialized for summation of integer types) __device__ __forceinline__ void Update ( T input, T &inclusive, T &exclusive, cub::Sum scan_op, T initial_value, Int2Type /*is_integer*/) { inclusive = scan_op(initial_value, inclusive); exclusive = inclusive - input; } /// Update inclusive, exclusive, and warp aggregate using input and inclusive template __device__ __forceinline__ void Update ( T input, T &inclusive, T &exclusive, T &warp_aggregate, ScanOpT scan_op, IsIntegerT is_integer) { warp_aggregate = ShuffleIndex(inclusive, LOGICAL_WARP_THREADS - 1, LOGICAL_WARP_THREADS, member_mask); Update(input, inclusive, exclusive, scan_op, is_integer); } /// Update inclusive, exclusive, and warp aggregate using input, inclusive, and initial value template __device__ __forceinline__ void Update ( T input, T &inclusive, T &exclusive, T &warp_aggregate, ScanOpT scan_op, T initial_value, IsIntegerT is_integer) { warp_aggregate = ShuffleIndex(inclusive, LOGICAL_WARP_THREADS - 1, LOGICAL_WARP_THREADS, member_mask); Update(input, inclusive, exclusive, scan_op, initial_value, is_integer); } }; } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/acc/cuda/cub/warp/specializations/warp_scan_smem.cuh000066400000000000000000000374271411340063500256730ustar00rootroot00000000000000/****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * cub::WarpScanSmem provides smem-based variants of parallel prefix scan of items partitioned across a CUDA thread warp. */ #pragma once #include "../../thread/thread_operators.cuh" #include "../../thread/thread_load.cuh" #include "../../thread/thread_store.cuh" #include "../../util_type.cuh" #include "../../util_namespace.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /** * \brief WarpScanSmem provides smem-based variants of parallel prefix scan of items partitioned across a CUDA thread warp. */ template < typename T, ///< Data type being scanned int LOGICAL_WARP_THREADS, ///< Number of threads per logical warp int PTX_ARCH> ///< The PTX compute capability for which to to specialize this collective struct WarpScanSmem { /****************************************************************************** * Constants and type definitions ******************************************************************************/ enum { /// Whether the logical warp size and the PTX warp size coincide IS_ARCH_WARP = (LOGICAL_WARP_THREADS == CUB_WARP_THREADS(PTX_ARCH)), /// Whether the logical warp size is a power-of-two IS_POW_OF_TWO = PowerOfTwo::VALUE, /// The number of warp scan steps STEPS = Log2::VALUE, /// The number of threads in half a warp HALF_WARP_THREADS = 1 << (STEPS - 1), /// The number of shared memory elements per warp WARP_SMEM_ELEMENTS = LOGICAL_WARP_THREADS + HALF_WARP_THREADS, }; /// Storage cell type (workaround for SM1x compiler bugs with custom-ops like Max() on signed chars) typedef typename If<((Equals::VALUE || Equals::VALUE) && (PTX_ARCH < 200)), int, T>::Type CellT; /// Shared memory storage layout type (1.5 warps-worth of elements for each warp) typedef CellT _TempStorage[WARP_SMEM_ELEMENTS]; // Alias wrapper allowing storage to be unioned struct TempStorage : Uninitialized<_TempStorage> {}; /****************************************************************************** * Thread fields ******************************************************************************/ _TempStorage &temp_storage; unsigned int lane_id; unsigned int member_mask; /****************************************************************************** * Construction ******************************************************************************/ /// Constructor __device__ __forceinline__ WarpScanSmem( TempStorage &temp_storage) : temp_storage(temp_storage.Alias()), lane_id(IS_ARCH_WARP ? LaneId() : LaneId() % LOGICAL_WARP_THREADS), member_mask(!IS_POW_OF_TWO ? (0xffffffff >> (32 - LOGICAL_WARP_THREADS)) : // non-power-of-two subwarps cannot be tiled (0xffffffff >> (32 - LOGICAL_WARP_THREADS)) << (LaneId() / LOGICAL_WARP_THREADS)) {} /****************************************************************************** * Utility methods ******************************************************************************/ /// Basic inclusive scan iteration (template unrolled, inductive-case specialization) template < bool HAS_IDENTITY, int STEP, typename ScanOp> __device__ __forceinline__ void ScanStep( T &partial, ScanOp scan_op, Int2Type /*step*/) { const int OFFSET = 1 << STEP; // Share partial into buffer ThreadStore(&temp_storage[HALF_WARP_THREADS + lane_id], (CellT) partial); WARP_SYNC(member_mask); // Update partial if addend is in range if (HAS_IDENTITY || (lane_id >= OFFSET)) { T addend = (T) ThreadLoad(&temp_storage[HALF_WARP_THREADS + lane_id - OFFSET]); partial = scan_op(addend, partial); } WARP_SYNC(member_mask); ScanStep(partial, scan_op, Int2Type()); } /// Basic inclusive scan iteration(template unrolled, base-case specialization) template < bool HAS_IDENTITY, typename ScanOp> __device__ __forceinline__ void ScanStep( T &/*partial*/, ScanOp /*scan_op*/, Int2Type /*step*/) {} /// Inclusive prefix scan (specialized for summation across primitive types) __device__ __forceinline__ void InclusiveScan( T input, ///< [in] Calling thread's input item. T &output, ///< [out] Calling thread's output item. May be aliased with \p input. Sum scan_op, ///< [in] Binary scan operator Int2Type /*is_primitive*/) ///< [in] Marker type indicating whether T is primitive type { T identity = 0; ThreadStore(&temp_storage[lane_id], (CellT) identity); WARP_SYNC(member_mask); // Iterate scan steps output = input; ScanStep(output, scan_op, Int2Type<0>()); } /// Inclusive prefix scan template __device__ __forceinline__ void InclusiveScan( T input, ///< [in] Calling thread's input item. T &output, ///< [out] Calling thread's output item. May be aliased with \p input. ScanOp scan_op, ///< [in] Binary scan operator Int2Type /*is_primitive*/) ///< [in] Marker type indicating whether T is primitive type { // Iterate scan steps output = input; ScanStep(output, scan_op, Int2Type<0>()); } /****************************************************************************** * Interface ******************************************************************************/ //--------------------------------------------------------------------- // Broadcast //--------------------------------------------------------------------- /// Broadcast __device__ __forceinline__ T Broadcast( T input, ///< [in] The value to broadcast unsigned int src_lane) ///< [in] Which warp lane is to do the broadcasting { if (lane_id == src_lane) { ThreadStore(temp_storage, (CellT) input); } WARP_SYNC(member_mask); return (T)ThreadLoad(temp_storage); } //--------------------------------------------------------------------- // Inclusive operations //--------------------------------------------------------------------- /// Inclusive scan template __device__ __forceinline__ void InclusiveScan( T input, ///< [in] Calling thread's input item. T &inclusive_output, ///< [out] Calling thread's output item. May be aliased with \p input. ScanOp scan_op) ///< [in] Binary scan operator { InclusiveScan(input, inclusive_output, scan_op, Int2Type::PRIMITIVE>()); } /// Inclusive scan with aggregate template __device__ __forceinline__ void InclusiveScan( T input, ///< [in] Calling thread's input item. T &inclusive_output, ///< [out] Calling thread's output item. May be aliased with \p input. ScanOp scan_op, ///< [in] Binary scan operator T &warp_aggregate) ///< [out] Warp-wide aggregate reduction of input items. { InclusiveScan(input, inclusive_output, scan_op); // Retrieve aggregate ThreadStore(&temp_storage[HALF_WARP_THREADS + lane_id], (CellT) inclusive_output); WARP_SYNC(member_mask); warp_aggregate = (T) ThreadLoad(&temp_storage[WARP_SMEM_ELEMENTS - 1]); WARP_SYNC(member_mask); } //--------------------------------------------------------------------- // Get exclusive from inclusive //--------------------------------------------------------------------- /// Update inclusive and exclusive using input and inclusive template __device__ __forceinline__ void Update( T /*input*/, ///< [in] T &inclusive, ///< [in, out] T &exclusive, ///< [out] ScanOpT /*scan_op*/, ///< [in] IsIntegerT /*is_integer*/) ///< [in] { // initial value unknown ThreadStore(&temp_storage[HALF_WARP_THREADS + lane_id], (CellT) inclusive); WARP_SYNC(member_mask); exclusive = (T) ThreadLoad(&temp_storage[HALF_WARP_THREADS + lane_id - 1]); } /// Update inclusive and exclusive using input and inclusive (specialized for summation of integer types) __device__ __forceinline__ void Update( T input, T &inclusive, T &exclusive, cub::Sum /*scan_op*/, Int2Type /*is_integer*/) { // initial value presumed 0 exclusive = inclusive - input; } /// Update inclusive and exclusive using initial value using input, inclusive, and initial value template __device__ __forceinline__ void Update ( T /*input*/, T &inclusive, T &exclusive, ScanOpT scan_op, T initial_value, IsIntegerT /*is_integer*/) { inclusive = scan_op(initial_value, inclusive); ThreadStore(&temp_storage[HALF_WARP_THREADS + lane_id], (CellT) inclusive); WARP_SYNC(member_mask); exclusive = (T) ThreadLoad(&temp_storage[HALF_WARP_THREADS + lane_id - 1]); if (lane_id == 0) exclusive = initial_value; } /// Update inclusive and exclusive using initial value using input and inclusive (specialized for summation of integer types) __device__ __forceinline__ void Update ( T input, T &inclusive, T &exclusive, cub::Sum scan_op, T initial_value, Int2Type /*is_integer*/) { inclusive = scan_op(initial_value, inclusive); exclusive = inclusive - input; } /// Update inclusive, exclusive, and warp aggregate using input and inclusive template __device__ __forceinline__ void Update ( T /*input*/, T &inclusive, T &exclusive, T &warp_aggregate, ScanOpT /*scan_op*/, IsIntegerT /*is_integer*/) { // Initial value presumed to be unknown or identity (either way our padding is correct) ThreadStore(&temp_storage[HALF_WARP_THREADS + lane_id], (CellT) inclusive); WARP_SYNC(member_mask); exclusive = (T) ThreadLoad(&temp_storage[HALF_WARP_THREADS + lane_id - 1]); warp_aggregate = (T) ThreadLoad(&temp_storage[WARP_SMEM_ELEMENTS - 1]); } /// Update inclusive, exclusive, and warp aggregate using input and inclusive (specialized for summation of integer types) __device__ __forceinline__ void Update ( T input, T &inclusive, T &exclusive, T &warp_aggregate, cub::Sum /*scan_o*/, Int2Type /*is_integer*/) { // Initial value presumed to be unknown or identity (either way our padding is correct) ThreadStore(&temp_storage[HALF_WARP_THREADS + lane_id], (CellT) inclusive); WARP_SYNC(member_mask); warp_aggregate = (T) ThreadLoad(&temp_storage[WARP_SMEM_ELEMENTS - 1]); exclusive = inclusive - input; } /// Update inclusive, exclusive, and warp aggregate using input, inclusive, and initial value template __device__ __forceinline__ void Update ( T /*input*/, T &inclusive, T &exclusive, T &warp_aggregate, ScanOpT scan_op, T initial_value, IsIntegerT /*is_integer*/) { // Broadcast warp aggregate ThreadStore(&temp_storage[HALF_WARP_THREADS + lane_id], (CellT) inclusive); WARP_SYNC(member_mask); warp_aggregate = (T) ThreadLoad(&temp_storage[WARP_SMEM_ELEMENTS - 1]); WARP_SYNC(member_mask); // Update inclusive with initial value inclusive = scan_op(initial_value, inclusive); // Get exclusive from exclusive ThreadStore(&temp_storage[HALF_WARP_THREADS + lane_id - 1], (CellT) inclusive); WARP_SYNC(member_mask); exclusive = (T) ThreadLoad(&temp_storage[HALF_WARP_THREADS + lane_id - 2]); if (lane_id == 0) exclusive = initial_value; } }; } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/acc/cuda/cub/warp/warp_reduce.cuh000066400000000000000000000607261411340063500217720ustar00rootroot00000000000000/****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * The cub::WarpReduce class provides [collective](index.html#sec0) methods for computing a parallel reduction of items partitioned across a CUDA thread warp. */ #pragma once #include "specializations/warp_reduce_shfl.cuh" #include "specializations/warp_reduce_smem.cuh" #include "../thread/thread_operators.cuh" #include "../util_arch.cuh" #include "../util_type.cuh" #include "../util_namespace.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /** * \addtogroup WarpModule * @{ */ /** * \brief The WarpReduce class provides [collective](index.html#sec0) methods for computing a parallel reduction of items partitioned across a CUDA thread warp. ![](warp_reduce_logo.png) * * \tparam T The reduction input/output element type * \tparam LOGICAL_WARP_THREADS [optional] The number of threads per "logical" warp (may be less than the number of hardware warp threads). Default is the warp size of the targeted CUDA compute-capability (e.g., 32 threads for SM20). * \tparam PTX_ARCH [optional] \ptxversion * * \par Overview * - A reduction (or fold) * uses a binary combining operator to compute a single aggregate from a list of input elements. * - Supports "logical" warps smaller than the physical warp size (e.g., logical warps of 8 threads) * - The number of entrant threads must be an multiple of \p LOGICAL_WARP_THREADS * * \par Performance Considerations * - Uses special instructions when applicable (e.g., warp \p SHFL instructions) * - Uses synchronization-free communication between warp lanes when applicable * - Incurs zero bank conflicts for most types * - Computation is slightly more efficient (i.e., having lower instruction overhead) for: * - Summation (vs. generic reduction) * - The architecture's warp size is a whole multiple of \p LOGICAL_WARP_THREADS * * \par Simple Examples * \warpcollective{WarpReduce} * \par * The code snippet below illustrates four concurrent warp sum reductions within a block of * 128 threads (one per each of the 32-thread warps). * \par * \code * #include * * __global__ void ExampleKernel(...) * { * // Specialize WarpReduce for type int * typedef cub::WarpReduce WarpReduce; * * // Allocate WarpReduce shared memory for 4 warps * __shared__ typename WarpReduce::TempStorage temp_storage[4]; * * // Obtain one input item per thread * int thread_data = ... * * // Return the warp-wide sums to each lane0 (threads 0, 32, 64, and 96) * int warp_id = threadIdx.x / 32; * int aggregate = WarpReduce(temp_storage[warp_id]).Sum(thread_data); * * \endcode * \par * Suppose the set of input \p thread_data across the block of threads is {0, 1, 2, 3, ..., 127}. * The corresponding output \p aggregate in threads 0, 32, 64, and 96 will \p 496, \p 1520, * \p 2544, and \p 3568, respectively (and is undefined in other threads). * * \par * The code snippet below illustrates a single warp sum reduction within a block of * 128 threads. * \par * \code * #include * * __global__ void ExampleKernel(...) * { * // Specialize WarpReduce for type int * typedef cub::WarpReduce WarpReduce; * * // Allocate WarpReduce shared memory for one warp * __shared__ typename WarpReduce::TempStorage temp_storage; * ... * * // Only the first warp performs a reduction * if (threadIdx.x < 32) * { * // Obtain one input item per thread * int thread_data = ... * * // Return the warp-wide sum to lane0 * int aggregate = WarpReduce(temp_storage).Sum(thread_data); * * \endcode * \par * Suppose the set of input \p thread_data across the warp of threads is {0, 1, 2, 3, ..., 31}. * The corresponding output \p aggregate in thread0 will be \p 496 (and is undefined in other threads). * */ template < typename T, int LOGICAL_WARP_THREADS = CUB_PTX_WARP_THREADS, int PTX_ARCH = CUB_PTX_ARCH> class WarpReduce { private: /****************************************************************************** * Constants and type definitions ******************************************************************************/ enum { /// Whether the logical warp size and the PTX warp size coincide IS_ARCH_WARP = (LOGICAL_WARP_THREADS == CUB_WARP_THREADS(PTX_ARCH)), /// Whether the logical warp size is a power-of-two IS_POW_OF_TWO = PowerOfTwo::VALUE, }; public: #ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document /// Internal specialization. Use SHFL-based reduction if (architecture is >= SM30) and (LOGICAL_WARP_THREADS is a power-of-two) typedef typename If<(PTX_ARCH >= 300) && (IS_POW_OF_TWO), WarpReduceShfl, WarpReduceSmem >::Type InternalWarpReduce; #endif // DOXYGEN_SHOULD_SKIP_THIS private: /// Shared memory storage layout type for WarpReduce typedef typename InternalWarpReduce::TempStorage _TempStorage; /****************************************************************************** * Thread fields ******************************************************************************/ /// Shared storage reference _TempStorage &temp_storage; /****************************************************************************** * Utility methods ******************************************************************************/ public: /// \smemstorage{WarpReduce} struct TempStorage : Uninitialized<_TempStorage> {}; /******************************************************************//** * \name Collective constructors *********************************************************************/ //@{ /** * \brief Collective constructor using the specified memory allocation as temporary storage. Logical warp and lane identifiers are constructed from threadIdx.x. */ __device__ __forceinline__ WarpReduce( TempStorage &temp_storage) ///< [in] Reference to memory allocation having layout type TempStorage : temp_storage(temp_storage.Alias()) {} //@} end member group /******************************************************************//** * \name Summation reductions *********************************************************************/ //@{ /** * \brief Computes a warp-wide sum in the calling warp. The output is valid in warp lane0. * * \smemreuse * * \par Snippet * The code snippet below illustrates four concurrent warp sum reductions within a block of * 128 threads (one per each of the 32-thread warps). * \par * \code * #include * * __global__ void ExampleKernel(...) * { * // Specialize WarpReduce for type int * typedef cub::WarpReduce WarpReduce; * * // Allocate WarpReduce shared memory for 4 warps * __shared__ typename WarpReduce::TempStorage temp_storage[4]; * * // Obtain one input item per thread * int thread_data = ... * * // Return the warp-wide sums to each lane0 * int warp_id = threadIdx.x / 32; * int aggregate = WarpReduce(temp_storage[warp_id]).Sum(thread_data); * * \endcode * \par * Suppose the set of input \p thread_data across the block of threads is {0, 1, 2, 3, ..., 127}. * The corresponding output \p aggregate in threads 0, 32, 64, and 96 will \p 496, \p 1520, * \p 2544, and \p 3568, respectively (and is undefined in other threads). * */ __device__ __forceinline__ T Sum( T input) ///< [in] Calling thread's input { return InternalWarpReduce(temp_storage).template Reduce(input, LOGICAL_WARP_THREADS, cub::Sum()); } /** * \brief Computes a partially-full warp-wide sum in the calling warp. The output is valid in warp lane0. * * All threads across the calling warp must agree on the same value for \p valid_items. Otherwise the result is undefined. * * \smemreuse * * \par Snippet * The code snippet below illustrates a sum reduction within a single, partially-full * block of 32 threads (one warp). * \par * \code * #include * * __global__ void ExampleKernel(int *d_data, int valid_items) * { * // Specialize WarpReduce for type int * typedef cub::WarpReduce WarpReduce; * * // Allocate WarpReduce shared memory for one warp * __shared__ typename WarpReduce::TempStorage temp_storage; * * // Obtain one input item per thread if in range * int thread_data; * if (threadIdx.x < valid_items) * thread_data = d_data[threadIdx.x]; * * // Return the warp-wide sums to each lane0 * int aggregate = WarpReduce(temp_storage).Sum( * thread_data, valid_items); * * \endcode * \par * Suppose the input \p d_data is {0, 1, 2, 3, 4, ... and \p valid_items * is \p 4. The corresponding output \p aggregate in thread0 is \p 6 (and is * undefined in other threads). * */ __device__ __forceinline__ T Sum( T input, ///< [in] Calling thread's input int valid_items) ///< [in] Total number of valid items in the calling thread's logical warp (may be less than \p LOGICAL_WARP_THREADS) { // Determine if we don't need bounds checking return InternalWarpReduce(temp_storage).template Reduce(input, valid_items, cub::Sum()); } /** * \brief Computes a segmented sum in the calling warp where segments are defined by head-flags. The sum of each segment is returned to the first lane in that segment (which always includes lane0). * * \smemreuse * * \par Snippet * The code snippet below illustrates a head-segmented warp sum * reduction within a block of 32 threads (one warp). * \par * \code * #include * * __global__ void ExampleKernel(...) * { * // Specialize WarpReduce for type int * typedef cub::WarpReduce WarpReduce; * * // Allocate WarpReduce shared memory for one warp * __shared__ typename WarpReduce::TempStorage temp_storage; * * // Obtain one input item and flag per thread * int thread_data = ... * int head_flag = ... * * // Return the warp-wide sums to each lane0 * int aggregate = WarpReduce(temp_storage).HeadSegmentedSum( * thread_data, head_flag); * * \endcode * \par * Suppose the set of input \p thread_data and \p head_flag across the block of threads * is {0, 1, 2, 3, ..., 31 and is {1, 0, 0, 0, 1, 0, 0, 0, ..., 1, 0, 0, 0, * respectively. The corresponding output \p aggregate in threads 0, 4, 8, etc. will be * \p 6, \p 22, \p 38, etc. (and is undefined in other threads). * * \tparam ReductionOp [inferred] Binary reduction operator type having member T operator()(const T &a, const T &b) * */ template < typename FlagT> __device__ __forceinline__ T HeadSegmentedSum( T input, ///< [in] Calling thread's input FlagT head_flag) ///< [in] Head flag denoting whether or not \p input is the start of a new segment { return HeadSegmentedReduce(input, head_flag, cub::Sum()); } /** * \brief Computes a segmented sum in the calling warp where segments are defined by tail-flags. The sum of each segment is returned to the first lane in that segment (which always includes lane0). * * \smemreuse * * \par Snippet * The code snippet below illustrates a tail-segmented warp sum * reduction within a block of 32 threads (one warp). * \par * \code * #include * * __global__ void ExampleKernel(...) * { * // Specialize WarpReduce for type int * typedef cub::WarpReduce WarpReduce; * * // Allocate WarpReduce shared memory for one warp * __shared__ typename WarpReduce::TempStorage temp_storage; * * // Obtain one input item and flag per thread * int thread_data = ... * int tail_flag = ... * * // Return the warp-wide sums to each lane0 * int aggregate = WarpReduce(temp_storage).TailSegmentedSum( * thread_data, tail_flag); * * \endcode * \par * Suppose the set of input \p thread_data and \p tail_flag across the block of threads * is {0, 1, 2, 3, ..., 31 and is {0, 0, 0, 1, 0, 0, 0, 1, ..., 0, 0, 0, 1, * respectively. The corresponding output \p aggregate in threads 0, 4, 8, etc. will be * \p 6, \p 22, \p 38, etc. (and is undefined in other threads). * * \tparam ReductionOp [inferred] Binary reduction operator type having member T operator()(const T &a, const T &b) */ template < typename FlagT> __device__ __forceinline__ T TailSegmentedSum( T input, ///< [in] Calling thread's input FlagT tail_flag) ///< [in] Head flag denoting whether or not \p input is the start of a new segment { return TailSegmentedReduce(input, tail_flag, cub::Sum()); } //@} end member group /******************************************************************//** * \name Generic reductions *********************************************************************/ //@{ /** * \brief Computes a warp-wide reduction in the calling warp using the specified binary reduction functor. The output is valid in warp lane0. * * Supports non-commutative reduction operators * * \smemreuse * * \par Snippet * The code snippet below illustrates four concurrent warp max reductions within a block of * 128 threads (one per each of the 32-thread warps). * \par * \code * #include * * __global__ void ExampleKernel(...) * { * // Specialize WarpReduce for type int * typedef cub::WarpReduce WarpReduce; * * // Allocate WarpReduce shared memory for 4 warps * __shared__ typename WarpReduce::TempStorage temp_storage[4]; * * // Obtain one input item per thread * int thread_data = ... * * // Return the warp-wide reductions to each lane0 * int warp_id = threadIdx.x / 32; * int aggregate = WarpReduce(temp_storage[warp_id]).Reduce( * thread_data, cub::Max()); * * \endcode * \par * Suppose the set of input \p thread_data across the block of threads is {0, 1, 2, 3, ..., 127}. * The corresponding output \p aggregate in threads 0, 32, 64, and 96 will \p 31, \p 63, * \p 95, and \p 127, respectively (and is undefined in other threads). * * \tparam ReductionOp [inferred] Binary reduction operator type having member T operator()(const T &a, const T &b) */ template __device__ __forceinline__ T Reduce( T input, ///< [in] Calling thread's input ReductionOp reduction_op) ///< [in] Binary reduction operator { return InternalWarpReduce(temp_storage).template Reduce(input, LOGICAL_WARP_THREADS, reduction_op); } /** * \brief Computes a partially-full warp-wide reduction in the calling warp using the specified binary reduction functor. The output is valid in warp lane0. * * All threads across the calling warp must agree on the same value for \p valid_items. Otherwise the result is undefined. * * Supports non-commutative reduction operators * * \smemreuse * * \par Snippet * The code snippet below illustrates a max reduction within a single, partially-full * block of 32 threads (one warp). * \par * \code * #include * * __global__ void ExampleKernel(int *d_data, int valid_items) * { * // Specialize WarpReduce for type int * typedef cub::WarpReduce WarpReduce; * * // Allocate WarpReduce shared memory for one warp * __shared__ typename WarpReduce::TempStorage temp_storage; * * // Obtain one input item per thread if in range * int thread_data; * if (threadIdx.x < valid_items) * thread_data = d_data[threadIdx.x]; * * // Return the warp-wide reductions to each lane0 * int aggregate = WarpReduce(temp_storage).Reduce( * thread_data, cub::Max(), valid_items); * * \endcode * \par * Suppose the input \p d_data is {0, 1, 2, 3, 4, ... and \p valid_items * is \p 4. The corresponding output \p aggregate in thread0 is \p 3 (and is * undefined in other threads). * * \tparam ReductionOp [inferred] Binary reduction operator type having member T operator()(const T &a, const T &b) */ template __device__ __forceinline__ T Reduce( T input, ///< [in] Calling thread's input ReductionOp reduction_op, ///< [in] Binary reduction operator int valid_items) ///< [in] Total number of valid items in the calling thread's logical warp (may be less than \p LOGICAL_WARP_THREADS) { return InternalWarpReduce(temp_storage).template Reduce(input, valid_items, reduction_op); } /** * \brief Computes a segmented reduction in the calling warp where segments are defined by head-flags. The reduction of each segment is returned to the first lane in that segment (which always includes lane0). * * Supports non-commutative reduction operators * * \smemreuse * * \par Snippet * The code snippet below illustrates a head-segmented warp max * reduction within a block of 32 threads (one warp). * \par * \code * #include * * __global__ void ExampleKernel(...) * { * // Specialize WarpReduce for type int * typedef cub::WarpReduce WarpReduce; * * // Allocate WarpReduce shared memory for one warp * __shared__ typename WarpReduce::TempStorage temp_storage; * * // Obtain one input item and flag per thread * int thread_data = ... * int head_flag = ... * * // Return the warp-wide reductions to each lane0 * int aggregate = WarpReduce(temp_storage).HeadSegmentedReduce( * thread_data, head_flag, cub::Max()); * * \endcode * \par * Suppose the set of input \p thread_data and \p head_flag across the block of threads * is {0, 1, 2, 3, ..., 31 and is {1, 0, 0, 0, 1, 0, 0, 0, ..., 1, 0, 0, 0, * respectively. The corresponding output \p aggregate in threads 0, 4, 8, etc. will be * \p 3, \p 7, \p 11, etc. (and is undefined in other threads). * * \tparam ReductionOp [inferred] Binary reduction operator type having member T operator()(const T &a, const T &b) */ template < typename ReductionOp, typename FlagT> __device__ __forceinline__ T HeadSegmentedReduce( T input, ///< [in] Calling thread's input FlagT head_flag, ///< [in] Head flag denoting whether or not \p input is the start of a new segment ReductionOp reduction_op) ///< [in] Reduction operator { return InternalWarpReduce(temp_storage).template SegmentedReduce(input, head_flag, reduction_op); } /** * \brief Computes a segmented reduction in the calling warp where segments are defined by tail-flags. The reduction of each segment is returned to the first lane in that segment (which always includes lane0). * * Supports non-commutative reduction operators * * \smemreuse * * \par Snippet * The code snippet below illustrates a tail-segmented warp max * reduction within a block of 32 threads (one warp). * \par * \code * #include * * __global__ void ExampleKernel(...) * { * // Specialize WarpReduce for type int * typedef cub::WarpReduce WarpReduce; * * // Allocate WarpReduce shared memory for one warp * __shared__ typename WarpReduce::TempStorage temp_storage; * * // Obtain one input item and flag per thread * int thread_data = ... * int tail_flag = ... * * // Return the warp-wide reductions to each lane0 * int aggregate = WarpReduce(temp_storage).TailSegmentedReduce( * thread_data, tail_flag, cub::Max()); * * \endcode * \par * Suppose the set of input \p thread_data and \p tail_flag across the block of threads * is {0, 1, 2, 3, ..., 31 and is {0, 0, 0, 1, 0, 0, 0, 1, ..., 0, 0, 0, 1, * respectively. The corresponding output \p aggregate in threads 0, 4, 8, etc. will be * \p 3, \p 7, \p 11, etc. (and is undefined in other threads). * * \tparam ReductionOp [inferred] Binary reduction operator type having member T operator()(const T &a, const T &b) */ template < typename ReductionOp, typename FlagT> __device__ __forceinline__ T TailSegmentedReduce( T input, ///< [in] Calling thread's input FlagT tail_flag, ///< [in] Tail flag denoting whether or not \p input is the end of the current segment ReductionOp reduction_op) ///< [in] Reduction operator { return InternalWarpReduce(temp_storage).template SegmentedReduce(input, tail_flag, reduction_op); } //@} end member group }; /** @} */ // end group WarpModule } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/acc/cuda/cub/warp/warp_scan.cuh000066400000000000000000001140031411340063500214330ustar00rootroot00000000000000/****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * The cub::WarpScan class provides [collective](index.html#sec0) methods for computing a parallel prefix scan of items partitioned across a CUDA thread warp. */ #pragma once #include "specializations/warp_scan_shfl.cuh" #include "specializations/warp_scan_smem.cuh" #include "../thread/thread_operators.cuh" #include "../util_arch.cuh" #include "../util_type.cuh" #include "../util_namespace.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /** * \addtogroup WarpModule * @{ */ /** * \brief The WarpScan class provides [collective](index.html#sec0) methods for computing a parallel prefix scan of items partitioned across a CUDA thread warp. ![](warp_scan_logo.png) * * \tparam T The scan input/output element type * \tparam LOGICAL_WARP_THREADS [optional] The number of threads per "logical" warp (may be less than the number of hardware warp threads). Default is the warp size associated with the CUDA Compute Capability targeted by the compiler (e.g., 32 threads for SM20). * \tparam PTX_ARCH [optional] \ptxversion * * \par Overview * - Given a list of input elements and a binary reduction operator, a [prefix scan](http://en.wikipedia.org/wiki/Prefix_sum) * produces an output list where each element is computed to be the reduction * of the elements occurring earlier in the input list. Prefix sum * connotes a prefix scan with the addition operator. The term \em inclusive indicates * that the ith output reduction incorporates the ith input. * The term \em exclusive indicates the ith input is not incorporated into * the ith output reduction. * - Supports non-commutative scan operators * - Supports "logical" warps smaller than the physical warp size (e.g., a logical warp of 8 threads) * - The number of entrant threads must be an multiple of \p LOGICAL_WARP_THREADS * * \par Performance Considerations * - Uses special instructions when applicable (e.g., warp \p SHFL) * - Uses synchronization-free communication between warp lanes when applicable * - Incurs zero bank conflicts for most types * - Computation is slightly more efficient (i.e., having lower instruction overhead) for: * - Summation (vs. generic scan) * - The architecture's warp size is a whole multiple of \p LOGICAL_WARP_THREADS * * \par Simple Examples * \warpcollective{WarpScan} * \par * The code snippet below illustrates four concurrent warp prefix sums within a block of * 128 threads (one per each of the 32-thread warps). * \par * \code * #include * * __global__ void ExampleKernel(...) * { * // Specialize WarpScan for type int * typedef cub::WarpScan WarpScan; * * // Allocate WarpScan shared memory for 4 warps * __shared__ typename WarpScan::TempStorage temp_storage[4]; * * // Obtain one input item per thread * int thread_data = ... * * // Compute warp-wide prefix sums * int warp_id = threadIdx.x / 32; * WarpScan(temp_storage[warp_id]).ExclusiveSum(thread_data, thread_data); * * \endcode * \par * Suppose the set of input \p thread_data across the block of threads is {1, 1, 1, 1, ...}. * The corresponding output \p thread_data in each of the four warps of threads will be * 0, 1, 2, 3, ..., 31}. * * \par * The code snippet below illustrates a single warp prefix sum within a block of * 128 threads. * \par * \code * #include * * __global__ void ExampleKernel(...) * { * // Specialize WarpScan for type int * typedef cub::WarpScan WarpScan; * * // Allocate WarpScan shared memory for one warp * __shared__ typename WarpScan::TempStorage temp_storage; * ... * * // Only the first warp performs a prefix sum * if (threadIdx.x < 32) * { * // Obtain one input item per thread * int thread_data = ... * * // Compute warp-wide prefix sums * WarpScan(temp_storage).ExclusiveSum(thread_data, thread_data); * * \endcode * \par * Suppose the set of input \p thread_data across the warp of threads is {1, 1, 1, 1, ...}. * The corresponding output \p thread_data will be {0, 1, 2, 3, ..., 31}. * */ template < typename T, int LOGICAL_WARP_THREADS = CUB_PTX_WARP_THREADS, int PTX_ARCH = CUB_PTX_ARCH> class WarpScan { private: /****************************************************************************** * Constants and type definitions ******************************************************************************/ enum { /// Whether the logical warp size and the PTX warp size coincide IS_ARCH_WARP = (LOGICAL_WARP_THREADS == CUB_WARP_THREADS(PTX_ARCH)), /// Whether the logical warp size is a power-of-two IS_POW_OF_TWO = ((LOGICAL_WARP_THREADS & (LOGICAL_WARP_THREADS - 1)) == 0), /// Whether the data type is an integer (which has fully-associative addition) IS_INTEGER = ((Traits::CATEGORY == SIGNED_INTEGER) || (Traits::CATEGORY == UNSIGNED_INTEGER)) }; /// Internal specialization. Use SHFL-based scan if (architecture is >= SM30) and (LOGICAL_WARP_THREADS is a power-of-two) typedef typename If<(PTX_ARCH >= 300) && (IS_POW_OF_TWO), WarpScanShfl, WarpScanSmem >::Type InternalWarpScan; /// Shared memory storage layout type for WarpScan typedef typename InternalWarpScan::TempStorage _TempStorage; /****************************************************************************** * Thread fields ******************************************************************************/ /// Shared storage reference _TempStorage &temp_storage; unsigned int lane_id; /****************************************************************************** * Public types ******************************************************************************/ public: /// \smemstorage{WarpScan} struct TempStorage : Uninitialized<_TempStorage> {}; /******************************************************************//** * \name Collective constructors *********************************************************************/ //@{ /** * \brief Collective constructor using the specified memory allocation as temporary storage. Logical warp and lane identifiers are constructed from threadIdx.x. */ __device__ __forceinline__ WarpScan( TempStorage &temp_storage) ///< [in] Reference to memory allocation having layout type TempStorage : temp_storage(temp_storage.Alias()), lane_id(IS_ARCH_WARP ? LaneId() : LaneId() % LOGICAL_WARP_THREADS) {} //@} end member group /******************************************************************//** * \name Inclusive prefix sums *********************************************************************/ //@{ /** * \brief Computes an inclusive prefix sum across the calling warp. * * \par * - \smemreuse * * \par Snippet * The code snippet below illustrates four concurrent warp-wide inclusive prefix sums within a block of * 128 threads (one per each of the 32-thread warps). * \par * \code * #include * * __global__ void ExampleKernel(...) * { * // Specialize WarpScan for type int * typedef cub::WarpScan WarpScan; * * // Allocate WarpScan shared memory for 4 warps * __shared__ typename WarpScan::TempStorage temp_storage[4]; * * // Obtain one input item per thread * int thread_data = ... * * // Compute inclusive warp-wide prefix sums * int warp_id = threadIdx.x / 32; * WarpScan(temp_storage[warp_id]).InclusiveSum(thread_data, thread_data); * * \endcode * \par * Suppose the set of input \p thread_data across the block of threads is {1, 1, 1, 1, ...}. * The corresponding output \p thread_data in each of the four warps of threads will be * 1, 2, 3, ..., 32}. */ __device__ __forceinline__ void InclusiveSum( T input, ///< [in] Calling thread's input item. T &inclusive_output) ///< [out] Calling thread's output item. May be aliased with \p input. { InclusiveScan(input, inclusive_output, cub::Sum()); } /** * \brief Computes an inclusive prefix sum across the calling warp. Also provides every thread with the warp-wide \p warp_aggregate of all inputs. * * \par * - \smemreuse * * \par Snippet * The code snippet below illustrates four concurrent warp-wide inclusive prefix sums within a block of * 128 threads (one per each of the 32-thread warps). * \par * \code * #include * * __global__ void ExampleKernel(...) * { * // Specialize WarpScan for type int * typedef cub::WarpScan WarpScan; * * // Allocate WarpScan shared memory for 4 warps * __shared__ typename WarpScan::TempStorage temp_storage[4]; * * // Obtain one input item per thread * int thread_data = ... * * // Compute inclusive warp-wide prefix sums * int warp_aggregate; * int warp_id = threadIdx.x / 32; * WarpScan(temp_storage[warp_id]).InclusiveSum(thread_data, thread_data, warp_aggregate); * * \endcode * \par * Suppose the set of input \p thread_data across the block of threads is {1, 1, 1, 1, ...}. * The corresponding output \p thread_data in each of the four warps of threads will be * 1, 2, 3, ..., 32}. Furthermore, \p warp_aggregate for all threads in all warps will be \p 32. */ __device__ __forceinline__ void InclusiveSum( T input, ///< [in] Calling thread's input item. T &inclusive_output, ///< [out] Calling thread's output item. May be aliased with \p input. T &warp_aggregate) ///< [out] Warp-wide aggregate reduction of input items. { InclusiveScan(input, inclusive_output, cub::Sum(), warp_aggregate); } //@} end member group /******************************************************************//** * \name Exclusive prefix sums *********************************************************************/ //@{ /** * \brief Computes an exclusive prefix sum across the calling warp. The value of 0 is applied as the initial value, and is assigned to \p exclusive_output in thread0. * * \par * - \identityzero * - \smemreuse * * \par Snippet * The code snippet below illustrates four concurrent warp-wide exclusive prefix sums within a block of * 128 threads (one per each of the 32-thread warps). * \par * \code * #include * * __global__ void ExampleKernel(...) * { * // Specialize WarpScan for type int * typedef cub::WarpScan WarpScan; * * // Allocate WarpScan shared memory for 4 warps * __shared__ typename WarpScan::TempStorage temp_storage[4]; * * // Obtain one input item per thread * int thread_data = ... * * // Compute exclusive warp-wide prefix sums * int warp_id = threadIdx.x / 32; * WarpScan(temp_storage[warp_id]).ExclusiveSum(thread_data, thread_data); * * \endcode * \par * Suppose the set of input \p thread_data across the block of threads is {1, 1, 1, 1, ...}. * The corresponding output \p thread_data in each of the four warps of threads will be * 0, 1, 2, ..., 31}. * */ __device__ __forceinline__ void ExclusiveSum( T input, ///< [in] Calling thread's input item. T &exclusive_output) ///< [out] Calling thread's output item. May be aliased with \p input. { T initial_value = 0; ExclusiveScan(input, exclusive_output, initial_value, cub::Sum()); } /** * \brief Computes an exclusive prefix sum across the calling warp. The value of 0 is applied as the initial value, and is assigned to \p exclusive_output in thread0. Also provides every thread with the warp-wide \p warp_aggregate of all inputs. * * \par * - \identityzero * - \smemreuse * * \par Snippet * The code snippet below illustrates four concurrent warp-wide exclusive prefix sums within a block of * 128 threads (one per each of the 32-thread warps). * \par * \code * #include * * __global__ void ExampleKernel(...) * { * // Specialize WarpScan for type int * typedef cub::WarpScan WarpScan; * * // Allocate WarpScan shared memory for 4 warps * __shared__ typename WarpScan::TempStorage temp_storage[4]; * * // Obtain one input item per thread * int thread_data = ... * * // Compute exclusive warp-wide prefix sums * int warp_aggregate; * int warp_id = threadIdx.x / 32; * WarpScan(temp_storage[warp_id]).ExclusiveSum(thread_data, thread_data, warp_aggregate); * * \endcode * \par * Suppose the set of input \p thread_data across the block of threads is {1, 1, 1, 1, ...}. * The corresponding output \p thread_data in each of the four warps of threads will be * 0, 1, 2, ..., 31}. Furthermore, \p warp_aggregate for all threads in all warps will be \p 32. */ __device__ __forceinline__ void ExclusiveSum( T input, ///< [in] Calling thread's input item. T &exclusive_output, ///< [out] Calling thread's output item. May be aliased with \p input. T &warp_aggregate) ///< [out] Warp-wide aggregate reduction of input items. { T initial_value = 0; ExclusiveScan(input, exclusive_output, initial_value, cub::Sum(), warp_aggregate); } //@} end member group /******************************************************************//** * \name Inclusive prefix scans *********************************************************************/ //@{ /** * \brief Computes an inclusive prefix scan using the specified binary scan functor across the calling warp. * * \par * - \smemreuse * * \par Snippet * The code snippet below illustrates four concurrent warp-wide inclusive prefix max scans within a block of * 128 threads (one per each of the 32-thread warps). * \par * \code * #include * * __global__ void ExampleKernel(...) * { * // Specialize WarpScan for type int * typedef cub::WarpScan WarpScan; * * // Allocate WarpScan shared memory for 4 warps * __shared__ typename WarpScan::TempStorage temp_storage[4]; * * // Obtain one input item per thread * int thread_data = ... * * // Compute inclusive warp-wide prefix max scans * int warp_id = threadIdx.x / 32; * WarpScan(temp_storage[warp_id]).InclusiveScan(thread_data, thread_data, cub::Max()); * * \endcode * \par * Suppose the set of input \p thread_data across the block of threads is {0, -1, 2, -3, ..., 126, -127}. * The corresponding output \p thread_data in the first warp would be * 0, 0, 2, 2, ..., 30, 30, the output for the second warp would be 32, 32, 34, 34, ..., 62, 62, etc. * * \tparam ScanOp [inferred] Binary scan operator type having member T operator()(const T &a, const T &b) */ template __device__ __forceinline__ void InclusiveScan( T input, ///< [in] Calling thread's input item. T &inclusive_output, ///< [out] Calling thread's output item. May be aliased with \p input. ScanOp scan_op) ///< [in] Binary scan operator { InternalWarpScan(temp_storage).InclusiveScan(input, inclusive_output, scan_op); } /** * \brief Computes an inclusive prefix scan using the specified binary scan functor across the calling warp. Also provides every thread with the warp-wide \p warp_aggregate of all inputs. * * \par * - \smemreuse * * \par Snippet * The code snippet below illustrates four concurrent warp-wide inclusive prefix max scans within a block of * 128 threads (one per each of the 32-thread warps). * \par * \code * #include * * __global__ void ExampleKernel(...) * { * // Specialize WarpScan for type int * typedef cub::WarpScan WarpScan; * * // Allocate WarpScan shared memory for 4 warps * __shared__ typename WarpScan::TempStorage temp_storage[4]; * * // Obtain one input item per thread * int thread_data = ... * * // Compute inclusive warp-wide prefix max scans * int warp_aggregate; * int warp_id = threadIdx.x / 32; * WarpScan(temp_storage[warp_id]).InclusiveScan( * thread_data, thread_data, cub::Max(), warp_aggregate); * * \endcode * \par * Suppose the set of input \p thread_data across the block of threads is {0, -1, 2, -3, ..., 126, -127}. * The corresponding output \p thread_data in the first warp would be * 0, 0, 2, 2, ..., 30, 30, the output for the second warp would be 32, 32, 34, 34, ..., 62, 62, etc. * Furthermore, \p warp_aggregate would be assigned \p 30 for threads in the first warp, \p 62 for threads * in the second warp, etc. * * \tparam ScanOp [inferred] Binary scan operator type having member T operator()(const T &a, const T &b) */ template __device__ __forceinline__ void InclusiveScan( T input, ///< [in] Calling thread's input item. T &inclusive_output, ///< [out] Calling thread's output item. May be aliased with \p input. ScanOp scan_op, ///< [in] Binary scan operator T &warp_aggregate) ///< [out] Warp-wide aggregate reduction of input items. { InternalWarpScan(temp_storage).InclusiveScan(input, inclusive_output, scan_op, warp_aggregate); } //@} end member group /******************************************************************//** * \name Exclusive prefix scans *********************************************************************/ //@{ /** * \brief Computes an exclusive prefix scan using the specified binary scan functor across the calling warp. Because no initial value is supplied, the \p output computed for warp-lane0 is undefined. * * \par * - \smemreuse * * \par Snippet * The code snippet below illustrates four concurrent warp-wide exclusive prefix max scans within a block of * 128 threads (one per each of the 32-thread warps). * \par * \code * #include * * __global__ void ExampleKernel(...) * { * // Specialize WarpScan for type int * typedef cub::WarpScan WarpScan; * * // Allocate WarpScan shared memory for 4 warps * __shared__ typename WarpScan::TempStorage temp_storage[4]; * * // Obtain one input item per thread * int thread_data = ... * * // Compute exclusive warp-wide prefix max scans * int warp_id = threadIdx.x / 32; * WarpScan(temp_storage[warp_id]).ExclusiveScan(thread_data, thread_data, cub::Max()); * * \endcode * \par * Suppose the set of input \p thread_data across the block of threads is {0, -1, 2, -3, ..., 126, -127}. * The corresponding output \p thread_data in the first warp would be * ?, 0, 0, 2, ..., 28, 30, the output for the second warp would be ?, 32, 32, 34, ..., 60, 62, etc. * (The output \p thread_data in warp lane0 is undefined.) * * \tparam ScanOp [inferred] Binary scan operator type having member T operator()(const T &a, const T &b) */ template __device__ __forceinline__ void ExclusiveScan( T input, ///< [in] Calling thread's input item. T &exclusive_output, ///< [out] Calling thread's output item. May be aliased with \p input. ScanOp scan_op) ///< [in] Binary scan operator { InternalWarpScan internal(temp_storage); T inclusive_output; internal.InclusiveScan(input, inclusive_output, scan_op); internal.Update( input, inclusive_output, exclusive_output, scan_op, Int2Type()); } /** * \brief Computes an exclusive prefix scan using the specified binary scan functor across the calling warp. * * \par * - \smemreuse * * \par Snippet * The code snippet below illustrates four concurrent warp-wide exclusive prefix max scans within a block of * 128 threads (one per each of the 32-thread warps). * \par * \code * #include * * __global__ void ExampleKernel(...) * { * // Specialize WarpScan for type int * typedef cub::WarpScan WarpScan; * * // Allocate WarpScan shared memory for 4 warps * __shared__ typename WarpScan::TempStorage temp_storage[4]; * * // Obtain one input item per thread * int thread_data = ... * * // Compute exclusive warp-wide prefix max scans * int warp_id = threadIdx.x / 32; * WarpScan(temp_storage[warp_id]).ExclusiveScan(thread_data, thread_data, INT_MIN, cub::Max()); * * \endcode * \par * Suppose the set of input \p thread_data across the block of threads is {0, -1, 2, -3, ..., 126, -127}. * The corresponding output \p thread_data in the first warp would be * INT_MIN, 0, 0, 2, ..., 28, 30, the output for the second warp would be 30, 32, 32, 34, ..., 60, 62, etc. * * \tparam ScanOp [inferred] Binary scan operator type having member T operator()(const T &a, const T &b) */ template __device__ __forceinline__ void ExclusiveScan( T input, ///< [in] Calling thread's input item. T &exclusive_output, ///< [out] Calling thread's output item. May be aliased with \p input. T initial_value, ///< [in] Initial value to seed the exclusive scan ScanOp scan_op) ///< [in] Binary scan operator { InternalWarpScan internal(temp_storage); T inclusive_output; internal.InclusiveScan(input, inclusive_output, scan_op); internal.Update( input, inclusive_output, exclusive_output, scan_op, initial_value, Int2Type()); } /** * \brief Computes an exclusive prefix scan using the specified binary scan functor across the calling warp. Because no initial value is supplied, the \p output computed for warp-lane0 is undefined. Also provides every thread with the warp-wide \p warp_aggregate of all inputs. * * \par * - \smemreuse * * \par Snippet * The code snippet below illustrates four concurrent warp-wide exclusive prefix max scans within a block of * 128 threads (one per each of the 32-thread warps). * \par * \code * #include * * __global__ void ExampleKernel(...) * { * // Specialize WarpScan for type int * typedef cub::WarpScan WarpScan; * * // Allocate WarpScan shared memory for 4 warps * __shared__ typename WarpScan::TempStorage temp_storage[4]; * * // Obtain one input item per thread * int thread_data = ... * * // Compute exclusive warp-wide prefix max scans * int warp_aggregate; * int warp_id = threadIdx.x / 32; * WarpScan(temp_storage[warp_id]).ExclusiveScan(thread_data, thread_data, cub::Max(), warp_aggregate); * * \endcode * \par * Suppose the set of input \p thread_data across the block of threads is {0, -1, 2, -3, ..., 126, -127}. * The corresponding output \p thread_data in the first warp would be * ?, 0, 0, 2, ..., 28, 30, the output for the second warp would be ?, 32, 32, 34, ..., 60, 62, etc. * (The output \p thread_data in warp lane0 is undefined.) Furthermore, \p warp_aggregate would be assigned \p 30 for threads in the first warp, \p 62 for threads * in the second warp, etc. * * \tparam ScanOp [inferred] Binary scan operator type having member T operator()(const T &a, const T &b) */ template __device__ __forceinline__ void ExclusiveScan( T input, ///< [in] Calling thread's input item. T &exclusive_output, ///< [out] Calling thread's output item. May be aliased with \p input. ScanOp scan_op, ///< [in] Binary scan operator T &warp_aggregate) ///< [out] Warp-wide aggregate reduction of input items. { InternalWarpScan internal(temp_storage); T inclusive_output; internal.InclusiveScan(input, inclusive_output, scan_op); internal.Update( input, inclusive_output, exclusive_output, warp_aggregate, scan_op, Int2Type()); } /** * \brief Computes an exclusive prefix scan using the specified binary scan functor across the calling warp. Also provides every thread with the warp-wide \p warp_aggregate of all inputs. * * \par * - \smemreuse * * \par Snippet * The code snippet below illustrates four concurrent warp-wide exclusive prefix max scans within a block of * 128 threads (one per each of the 32-thread warps). * \par * \code * #include * * __global__ void ExampleKernel(...) * { * // Specialize WarpScan for type int * typedef cub::WarpScan WarpScan; * * // Allocate WarpScan shared memory for 4 warps * __shared__ typename WarpScan::TempStorage temp_storage[4]; * * // Obtain one input item per thread * int thread_data = ... * * // Compute exclusive warp-wide prefix max scans * int warp_aggregate; * int warp_id = threadIdx.x / 32; * WarpScan(temp_storage[warp_id]).ExclusiveScan(thread_data, thread_data, INT_MIN, cub::Max(), warp_aggregate); * * \endcode * \par * Suppose the set of input \p thread_data across the block of threads is {0, -1, 2, -3, ..., 126, -127}. * The corresponding output \p thread_data in the first warp would be * INT_MIN, 0, 0, 2, ..., 28, 30, the output for the second warp would be 30, 32, 32, 34, ..., 60, 62, etc. * Furthermore, \p warp_aggregate would be assigned \p 30 for threads in the first warp, \p 62 for threads * in the second warp, etc. * * \tparam ScanOp [inferred] Binary scan operator type having member T operator()(const T &a, const T &b) */ template __device__ __forceinline__ void ExclusiveScan( T input, ///< [in] Calling thread's input item. T &exclusive_output, ///< [out] Calling thread's output item. May be aliased with \p input. T initial_value, ///< [in] Initial value to seed the exclusive scan ScanOp scan_op, ///< [in] Binary scan operator T &warp_aggregate) ///< [out] Warp-wide aggregate reduction of input items. { InternalWarpScan internal(temp_storage); T inclusive_output; internal.InclusiveScan(input, inclusive_output, scan_op); internal.Update( input, inclusive_output, exclusive_output, warp_aggregate, scan_op, initial_value, Int2Type()); } //@} end member group /******************************************************************//** * \name Combination (inclusive & exclusive) prefix scans *********************************************************************/ //@{ /** * \brief Computes both inclusive and exclusive prefix scans using the specified binary scan functor across the calling warp. Because no initial value is supplied, the \p exclusive_output computed for warp-lane0 is undefined. * * \par * - \smemreuse * * \par Snippet * The code snippet below illustrates four concurrent warp-wide exclusive prefix max scans within a block of * 128 threads (one per each of the 32-thread warps). * \par * \code * #include * * __global__ void ExampleKernel(...) * { * // Specialize WarpScan for type int * typedef cub::WarpScan WarpScan; * * // Allocate WarpScan shared memory for 4 warps * __shared__ typename WarpScan::TempStorage temp_storage[4]; * * // Obtain one input item per thread * int thread_data = ... * * // Compute exclusive warp-wide prefix max scans * int inclusive_partial, exclusive_partial; * WarpScan(temp_storage[warp_id]).Scan(thread_data, inclusive_partial, exclusive_partial, cub::Max()); * * \endcode * \par * Suppose the set of input \p thread_data across the block of threads is {0, -1, 2, -3, ..., 126, -127}. * The corresponding output \p inclusive_partial in the first warp would be * 0, 0, 2, 2, ..., 30, 30, the output for the second warp would be 32, 32, 34, 34, ..., 62, 62, etc. * The corresponding output \p exclusive_partial in the first warp would be * ?, 0, 0, 2, ..., 28, 30, the output for the second warp would be ?, 32, 32, 34, ..., 60, 62, etc. * (The output \p thread_data in warp lane0 is undefined.) * * \tparam ScanOp [inferred] Binary scan operator type having member T operator()(const T &a, const T &b) */ template __device__ __forceinline__ void Scan( T input, ///< [in] Calling thread's input item. T &inclusive_output, ///< [out] Calling thread's inclusive-scan output item. T &exclusive_output, ///< [out] Calling thread's exclusive-scan output item. ScanOp scan_op) ///< [in] Binary scan operator { InternalWarpScan internal(temp_storage); internal.InclusiveScan(input, inclusive_output, scan_op); internal.Update( input, inclusive_output, exclusive_output, scan_op, Int2Type()); } /** * \brief Computes both inclusive and exclusive prefix scans using the specified binary scan functor across the calling warp. * * \par * - \smemreuse * * \par Snippet * The code snippet below illustrates four concurrent warp-wide prefix max scans within a block of * 128 threads (one per each of the 32-thread warps). * \par * \code * #include * * __global__ void ExampleKernel(...) * { * // Specialize WarpScan for type int * typedef cub::WarpScan WarpScan; * * // Allocate WarpScan shared memory for 4 warps * __shared__ typename WarpScan::TempStorage temp_storage[4]; * * // Obtain one input item per thread * int thread_data = ... * * // Compute inclusive warp-wide prefix max scans * int warp_id = threadIdx.x / 32; * int inclusive_partial, exclusive_partial; * WarpScan(temp_storage[warp_id]).Scan(thread_data, inclusive_partial, exclusive_partial, INT_MIN, cub::Max()); * * \endcode * \par * Suppose the set of input \p thread_data across the block of threads is {0, -1, 2, -3, ..., 126, -127}. * The corresponding output \p inclusive_partial in the first warp would be * 0, 0, 2, 2, ..., 30, 30, the output for the second warp would be 32, 32, 34, 34, ..., 62, 62, etc. * The corresponding output \p exclusive_partial in the first warp would be * INT_MIN, 0, 0, 2, ..., 28, 30, the output for the second warp would be 30, 32, 32, 34, ..., 60, 62, etc. * * \tparam ScanOp [inferred] Binary scan operator type having member T operator()(const T &a, const T &b) */ template __device__ __forceinline__ void Scan( T input, ///< [in] Calling thread's input item. T &inclusive_output, ///< [out] Calling thread's inclusive-scan output item. T &exclusive_output, ///< [out] Calling thread's exclusive-scan output item. T initial_value, ///< [in] Initial value to seed the exclusive scan ScanOp scan_op) ///< [in] Binary scan operator { InternalWarpScan internal(temp_storage); internal.InclusiveScan(input, inclusive_output, scan_op); internal.Update( input, inclusive_output, exclusive_output, scan_op, initial_value, Int2Type()); } //@} end member group /******************************************************************//** * \name Data exchange *********************************************************************/ //@{ /** * \brief Broadcast the value \p input from warp-lanesrc_lane to all lanes in the warp * * \par * - \smemreuse * * \par Snippet * The code snippet below illustrates the warp-wide broadcasts of values from * lanes0 in each of four warps to all other threads in those warps. * \par * \code * #include * * __global__ void ExampleKernel(...) * { * // Specialize WarpScan for type int * typedef cub::WarpScan WarpScan; * * // Allocate WarpScan shared memory for 4 warps * __shared__ typename WarpScan::TempStorage temp_storage[4]; * * // Obtain one input item per thread * int thread_data = ... * * // Broadcast from lane0 in each warp to all other threads in the warp * int warp_id = threadIdx.x / 32; * thread_data = WarpScan(temp_storage[warp_id]).Broadcast(thread_data, 0); * * \endcode * \par * Suppose the set of input \p thread_data across the block of threads is {0, 1, 2, 3, ..., 127}. * The corresponding output \p thread_data will be * {0, 0, ..., 0} in warp0, * {32, 32, ..., 32} in warp1, * {64, 64, ..., 64} in warp2, etc. */ __device__ __forceinline__ T Broadcast( T input, ///< [in] The value to broadcast unsigned int src_lane) ///< [in] Which warp lane is to do the broadcasting { return InternalWarpScan(temp_storage).Broadcast(input, src_lane); } //@} end member group }; /** @} */ // end group WarpModule } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/acc/cuda/cuda_autopicker.cu000066400000000000000000001110151411340063500207260ustar00rootroot00000000000000#undef ALTCPU #include #include #include #include #include #include #include #include #include #include "src/ml_optimiser.h" #include "src/acc/acc_ptr.h" #include "src/acc/acc_projector.h" #include "src/acc/acc_backprojector.h" #include "src/acc/acc_projector_plan.h" #include "src/acc/cuda/cuda_kernels/helper.cuh" #include "src/acc/cuda/cuda_mem_utils.h" #include "src/acc/cuda/cuda_settings.h" #include "src/acc/cuda/cuda_benchmark_utils.h" #include "src/acc/cuda/cuda_fft.h" #include "src/macros.h" #include "src/error.h" #ifdef CUDA_FORCESTL #include "src/acc/cuda/cuda_utils_stl.cuh" #else #include "src/acc/cuda/cuda_utils_cub.cuh" #endif #include "src/acc/utilities.h" #include "src/acc/acc_helper_functions.h" #include "src/acc/cuda/cuda_autopicker.h" AutoPickerCuda::AutoPickerCuda(AutoPicker *basePicker, int dev_id, const char * timing_fnm) : node(NULL), basePckr(basePicker), allocator(new CudaCustomAllocator(0, 1)), micTransformer(0, allocator), cudaTransformer1(0, allocator), #ifdef TIMING_FILES timer(timing_fnm), #endif cudaTransformer2(0, allocator) { projectors.resize(basePckr->Mrefs.size()); have_warned_batching=false; /*====================================================== DEVICE SETTINGS ======================================================*/ device_id = dev_id; int devCount; HANDLE_ERROR(cudaGetDeviceCount(&devCount)); if(dev_id >= devCount) { //std::cerr << " using device_id=" << dev_id << " (device no. " << dev_id+1 << ") which is higher than the available number of devices=" << devCount << std::endl; CRITICAL(ERR_GPUID); } else HANDLE_ERROR(cudaSetDevice(dev_id)); }; AutoPickerCuda::AutoPickerCuda(AutoPickerMpi *basePicker, int dev_id, const char * timing_fnm) : basePckr(basePicker), allocator(new CudaCustomAllocator(0, 1)), micTransformer(0, allocator), cudaTransformer1(0, allocator), #ifdef TIMING_FILES timer(timing_fnm), #endif cudaTransformer2(0, allocator) { node = basePicker->getNode(); basePicker->verb = (node->isLeader()) ? 1 : 0; projectors.resize(basePckr->Mrefs.size()); have_warned_batching=false; /*====================================================== DEVICE SETTINGS ======================================================*/ device_id = dev_id; int devCount; HANDLE_ERROR(cudaGetDeviceCount(&devCount)); if(dev_id >= devCount) { //std::cerr << " using device_id=" << dev_id << " (device no. " << dev_id+1 << ") which is higher than the available number of devices=" << devCount << std::endl; CRITICAL(ERR_GPUID); } else HANDLE_ERROR(cudaSetDevice(dev_id)); }; void AutoPickerCuda::run() { long int my_first_micrograph, my_last_micrograph, my_nr_micrographs; if(node!=NULL) { // Each node does part of the work divide_equally(basePckr->fn_micrographs.size(), node->size, node->rank, my_first_micrograph, my_last_micrograph); } else { my_first_micrograph = 0; my_last_micrograph = basePckr->fn_micrographs.size() - 1; } my_nr_micrographs = my_last_micrograph - my_first_micrograph + 1; int barstep; if (basePckr->verb > 0) { std::cout << " Autopicking ..." << std::endl; init_progress_bar(my_nr_micrographs); barstep = XMIPP_MAX(1, my_nr_micrographs / 60); } if (!basePckr->do_read_fom_maps) { CTIC(timer,"setupProjectors"); for (int iref = 0; iref < (basePckr->Mrefs.size()); iref++) { projectors[iref].setMdlDim( basePckr->PPref[iref].data.xdim, basePckr->PPref[iref].data.ydim, basePckr->PPref[iref].data.zdim, basePckr->PPref[iref].data.yinit, basePckr->PPref[iref].data.zinit, basePckr->PPref[iref].r_max, basePckr->PPref[iref].padding_factor); projectors[iref].initMdl(&(basePckr->PPref[iref].data.data[0])); } CTOC(timer,"setupProjectors"); } FileName fn_olddir=""; for (long int imic = my_first_micrograph; imic <= my_last_micrograph; imic++) { if (basePckr->verb > 0 && imic % barstep == 0) progress_bar(imic); // Check new-style outputdirectory exists and make it if not! FileName fn_dir = basePckr->getOutputRootName(basePckr->fn_micrographs[imic]); fn_dir = fn_dir.beforeLastOf("/"); if (fn_dir != fn_olddir) { // Make a Particles directory int res = system(("mkdir -p " + fn_dir).c_str()); fn_olddir = fn_dir; } #ifdef TIMING basePckr->timer.tic(basePckr->TIMING_A5); #endif autoPickOneMicrograph(basePckr->fn_micrographs[imic], imic); } #ifdef TIMING basePckr->timer.toc(basePckr->TIMING_A5); #endif if (basePckr->verb > 0) progress_bar(my_nr_micrographs); cudaDeviceReset(); } void AutoPickerCuda::calculateStddevAndMeanUnderMask(AccPtr< ACCCOMPLEX > &d_Fmic, AccPtr< ACCCOMPLEX > &d_Fmic2, AccPtr< ACCCOMPLEX > &d_Fmsk, int nr_nonzero_pixels_mask, AccPtr< XFLOAT > &d_Mstddev, AccPtr< XFLOAT > &d_Mmean, size_t x, size_t y, size_t mic_size, size_t workSize) { cudaTransformer2.setSize(workSize,workSize,1); deviceInitValue(d_Mstddev, (XFLOAT)0.); RFLOAT normfft = (RFLOAT)(mic_size * mic_size) / (RFLOAT)nr_nonzero_pixels_mask; AccPtr< ACCCOMPLEX > d_Fcov = d_Fmic.make< ACCCOMPLEX >(); d_Fcov.deviceAlloc(d_Fmic.getSize()); CTIC(timer,"PRE-multi_0"); int Bsize( (int) ceilf(( float)d_Fmic.getSize()/(float)BLOCK_SIZE)); cuda_kernel_convol_B<<>>( ~d_Fmic, ~d_Fmsk, ~d_Fcov, d_Fmic.getSize()); LAUNCH_HANDLE_ERROR(cudaGetLastError()); CTOC(timer,"PRE-multi_0"); CTIC(timer,"PRE-window_0"); windowFourierTransform2( d_Fcov, cudaTransformer2.fouriers, x, y, 1, workSize/2+1, workSize, 1); CTOC(timer,"PRE-window_0"); CTIC(timer,"PRE-Transform_0"); cudaTransformer2.backward(); CTOC(timer,"PRE-Transform_0"); Bsize = ( (int) ceilf(( float)cudaTransformer2.reals.getSize()/(float)BLOCK_SIZE)); cuda_kernel_multi<<>>( ~cudaTransformer2.reals, ~cudaTransformer2.reals, (XFLOAT) normfft, cudaTransformer2.reals.getSize()); LAUNCH_HANDLE_ERROR(cudaGetLastError()); CTIC(timer,"PRE-multi_1"); cuda_kernel_multi<<>>( ~cudaTransformer2.reals, ~cudaTransformer2.reals, ~d_Mstddev, (XFLOAT) -1, cudaTransformer2.reals.getSize()); LAUNCH_HANDLE_ERROR(cudaGetLastError()); CTOC(timer,"PRE-multi_1"); CTIC(timer,"PRE-CenterFFT_0"); runCenterFFT(cudaTransformer2.reals, (int)cudaTransformer2.xSize, (int)cudaTransformer2.ySize, false, 1); CTOC(timer,"PRE-CenterFFT_0"); cudaTransformer2.reals.cpOnAcc(d_Mmean); //TODO remove the need for this CTIC(timer,"PRE-multi_2"); Bsize = ( (int) ceilf(( float)d_Fmsk.getSize()/(float)BLOCK_SIZE)); cuda_kernel_convol_A<<>>( ~d_Fmsk, ~d_Fmic2, ~d_Fcov, d_Fmsk.getSize()); LAUNCH_HANDLE_ERROR(cudaGetLastError()); CTOC(timer,"PRE-multi_2"); CTIC(timer,"PRE-window_1"); windowFourierTransform2( d_Fcov, cudaTransformer2.fouriers, x, y, 1, workSize/2+1, workSize, 1); CTOC(timer,"PRE-window_1"); CTIC(timer,"PRE-Transform_1"); cudaTransformer2.backward(); CTOC(timer,"PRE-Transform_1"); CTIC(timer,"PRE-multi_3"); Bsize = ( (int) ceilf(( float)d_Mstddev.getSize()/(float)BLOCK_SIZE)); cuda_kernel_finalizeMstddev<<>>( ~d_Mstddev, ~cudaTransformer2.reals, normfft, d_Mstddev.getSize()); LAUNCH_HANDLE_ERROR(cudaGetLastError()); CTOC(timer,"PRE-multi_3"); CTIC(timer,"PRE-CenterFFT_1"); runCenterFFT(d_Mstddev, (int)workSize, (int)workSize, false, 1); CTOC(timer,"PRE-CenterFFT_1"); } void AutoPickerCuda::autoPickOneMicrograph(FileName &fn_mic, long int imic) { Image Imic; MultidimArray Faux, Faux2, Fmic; MultidimArray Maux, Mstddev, Mmean, Mstddev2, Mavg, Mccf_best, Mpsi_best, Fctf, Mccf_best_combined, Mpsi_best_combined; MultidimArray Mclass_best_combined; AccPtr d_Mccf_best(basePckr->workSize*basePckr->workSize, allocator); AccPtr d_Mpsi_best(basePckr->workSize*basePckr->workSize, allocator); d_Mccf_best.deviceAlloc(); d_Mpsi_best.deviceAlloc(); // Always use the same random seed init_random_generator(basePckr->random_seed + imic); RFLOAT sum_ref_under_circ_mask, sum_ref2_under_circ_mask; int my_skip_side = basePckr->autopick_skip_side + basePckr->particle_size/2; CTF ctf; int Npsi = 360 / basePckr->psi_sampling; int min_distance_pix = ROUND(basePckr->min_particle_distance / basePckr->angpix); XFLOAT scale = (XFLOAT)basePckr->workSize / (XFLOAT)basePckr->micrograph_size; // Read in the micrograph #ifdef TIMING basePckr->timer.tic(basePckr->TIMING_A6); #endif CTIC(timer,"readMicrograph"); Imic.read(fn_mic); CTOC(timer,"readMicrograph"); CTIC(timer,"setXmippOrigin_0"); Imic().setXmippOrigin(); CTOC(timer,"setXmippOrigin_0"); #ifdef TIMING basePckr->timer.toc(basePckr->TIMING_A6); #endif // Let's just check the square size again.... RFLOAT my_size, my_xsize, my_ysize; my_xsize = XSIZE(Imic()); my_ysize = YSIZE(Imic()); my_size = (my_xsize != my_ysize) ? XMIPP_MAX(my_xsize, my_ysize) : my_xsize; if (basePckr->extra_padding > 0) my_size += 2 * basePckr->extra_padding; if (my_size != basePckr->micrograph_size || my_xsize != basePckr->micrograph_xsize || my_ysize != basePckr->micrograph_ysize) { Imic().printShape(); std::cerr << " micrograph_size= " << basePckr->micrograph_size << " micrograph_xsize= " << basePckr->micrograph_xsize << " micrograph_ysize= " << basePckr->micrograph_ysize << std::endl; REPORT_ERROR("AutoPicker::autoPickOneMicrograph ERROR: No differently sized micrographs are allowed in one run, sorry you will have to run separately for each size..."); } if(!basePckr->do_read_fom_maps) { CTIC(timer,"setSize_micTr"); micTransformer.setSize(basePckr->micrograph_size, basePckr->micrograph_size, 1,1); CTOC(timer,"setSize_micTr"); CTIC(timer,"setSize_cudaTr"); cudaTransformer1.setSize(basePckr->workSize,basePckr->workSize, 1, Npsi, FFTW_BACKWARD); CTOC(timer,"setSize_cudaTr"); } HANDLE_ERROR(cudaDeviceSynchronize()); if(cudaTransformer1.batchSize.size()>1 && !have_warned_batching) { have_warned_batching = true; std::cerr << std::endl << "*-----------------------------WARNING------------------------------------------------*"<< std::endl; std::cerr << "With the current settings the GPU memory is imposing a soft limit on your performace," << std::endl; std::cerr << "since one or more micrographs has to use (at least " << cudaTransformer1.batchSize.size() << ") batches of orientations to "<< std::endl; std::cerr << "achieve the total requested " << Npsi << " orientations. Consider using" << std::endl; std::cerr << "\t higher --ang" << std::endl; std::cerr << "\t harder --shrink" << std::endl; std::cerr << "\t higher --lowpass with --shrink 0" << std::endl; std::cerr << "*------------------------------------------------------------------------------------*"<< std::endl; } // Set mean to zero and stddev to 1 to prevent numerical problems with one-sweep stddev calculations.... RFLOAT avg0, stddev0, minval0, maxval0; #ifdef TIMING basePckr->timer.tic(basePckr->TIMING_A7); #endif CTIC(timer,"computeStats"); Imic().computeStats(avg0, stddev0, minval0, maxval0); CTOC(timer,"computeStats"); #ifdef TIMING basePckr->timer.toc(basePckr->TIMING_A7); #endif CTIC(timer,"middlePassFilter"); FOR_ALL_DIRECT_ELEMENTS_IN_MULTIDIMARRAY(Imic()) { // Remove pixel values that are too far away from the mean if ( ABS(DIRECT_MULTIDIM_ELEM(Imic(), n) - avg0) / stddev0 > basePckr->outlier_removal_zscore) DIRECT_MULTIDIM_ELEM(Imic(), n) = avg0; DIRECT_MULTIDIM_ELEM(Imic(), n) = (DIRECT_MULTIDIM_ELEM(Imic(), n) - avg0) / stddev0; } CTOC(timer,"middlePassFilter"); if (basePckr->micrograph_xsize != basePckr->micrograph_size || basePckr->micrograph_ysize != basePckr->micrograph_size) { CTIC(timer,"rewindow"); // Window non-square micrographs to be a square with the largest side rewindow(Imic, basePckr->micrograph_size); CTOC(timer,"rewindow"); CTIC(timer,"gaussNoiseOutside"); // Fill region outside the original window with white Gaussian noise to prevent all-zeros in Mstddev FOR_ALL_ELEMENTS_IN_ARRAY2D(Imic()) { if (i < FIRST_XMIPP_INDEX(basePckr->micrograph_ysize) || i > LAST_XMIPP_INDEX(basePckr->micrograph_ysize) || j < FIRST_XMIPP_INDEX(basePckr->micrograph_xsize) || j > LAST_XMIPP_INDEX(basePckr->micrograph_xsize) ) A2D_ELEM(Imic(), i, j) = rnd_gaus(0.,1.); } CTOC(timer,"gaussNoiseOutside"); } #ifdef TIMING basePckr->timer.tic(basePckr->TIMING_A8); #endif CTIC(timer,"CTFread"); // Read in the CTF information if needed if (basePckr->do_ctf) { // Search for this micrograph in the metadata table FOR_ALL_OBJECTS_IN_METADATA_TABLE(basePckr->MDmic) { FileName fn_tmp; basePckr->MDmic.getValue(EMDL_MICROGRAPH_NAME, fn_tmp); if (fn_tmp==fn_mic) { ctf.readByGroup(basePckr->MDmic, &basePckr->obsModel); Fctf.resize(basePckr->workSize,basePckr->workSize/2+1); ctf.getFftwImage(Fctf, basePckr->micrograph_size, basePckr->micrograph_size, basePckr->angpix, false, false, basePckr->intact_ctf_first_peak, true); break; } } } CTOC(timer,"CTFread"); #ifdef TIMING basePckr->timer.toc(basePckr->TIMING_A8); #endif #ifdef TIMING basePckr->timer.tic(basePckr->TIMING_A9); #endif CTIC(timer,"mccfResize"); Mccf_best.resize(basePckr->workSize,basePckr->workSize); CTOC(timer,"mccfResize"); CTIC(timer,"mpsiResize"); Mpsi_best.resize(basePckr->workSize,basePckr->workSize); CTOC(timer,"mpsiResize"); #ifdef TIMING basePckr->timer.toc(basePckr->TIMING_A9); #endif AccPtr< ACCCOMPLEX > d_Fmic(allocator); AccPtr d_Mmean(allocator); AccPtr d_Mstddev(allocator); #ifdef TIMING basePckr->timer.tic(basePckr->TIMING_B1); #endif RFLOAT normfft = (RFLOAT)(basePckr->micrograph_size*basePckr->micrograph_size) / (RFLOAT)basePckr->nr_pixels_circular_mask;; if (basePckr->do_read_fom_maps) { CTIC(timer,"readFromFomMaps_0"); FileName fn_tmp=basePckr->getOutputRootName(fn_mic)+"_"+basePckr->fn_out+"_stddevNoise.spi"; Image It; It.read(fn_tmp); if (basePckr->autopick_helical_segments) Mstddev2 = It(); else Mstddev = It(); fn_tmp=basePckr->getOutputRootName(fn_mic)+"_"+basePckr->fn_out+"_avgNoise.spi"; It.read(fn_tmp); if (basePckr->autopick_helical_segments) Mavg = It(); else Mmean = It(); CTOC(timer,"readFromFomMaps_0"); } else { /* * Squared difference FOM: * Sum ( (X-mu)/sig - A )^2 = * = Sum((X-mu)/sig)^2 - 2 Sum (A*(X-mu)/sig) + Sum(A)^2 * = (1/sig^2)*Sum(X^2) - (2*mu/sig^2)*Sum(X) + (mu^2/sig^2)*Sum(1) - (2/sig)*Sum(AX) + (2*mu/sig)*Sum(A) + Sum(A^2) * * However, the squared difference with an "empty" ie all-zero reference is: * Sum ( (X-mu)/sig)^2 * * The ratio of the probabilities thereby becomes: * P(ref) = 1/sqrt(2pi) * exp (( (X-mu)/sig - A )^2 / -2 ) // assuming sigma = 1! * P(zero) = 1/sqrt(2pi) * exp (( (X-mu)/sig )^2 / -2 ) * * P(ref)/P(zero) = exp(( (X-mu)/sig - A )^2 / -2) / exp ( ( (X-mu)/sig )^2 / -2) * = exp( (- (2/sig)*Sum(AX) + (2*mu/sig)*Sum(A) + Sum(A^2)) / - 2 ) * * Therefore, I do not need to calculate (X-mu)/sig beforehand!!! * */ CTIC(timer,"Imic_insert"); for(int i = 0; i< Imic().nzyxdim ; i++) micTransformer.reals[i] = (XFLOAT) Imic().data[i]; micTransformer.reals.cpToDevice(); CTOC(timer,"Imic_insert"); CTIC(timer,"runCenterFFT_0"); runCenterFFT(micTransformer.reals, micTransformer.xSize, micTransformer.ySize, true, 1); CTOC(timer,"runCenterFFT_0"); CTIC(timer,"FourierTransform_0"); micTransformer.forward(); int FMultiBsize = ( (int) ceilf(( float)micTransformer.fouriers.getSize()*2/(float)BLOCK_SIZE)); CudaKernels::cuda_kernel_multi<<>>( (XFLOAT*)~micTransformer.fouriers, (XFLOAT)1/((XFLOAT)(micTransformer.reals.getSize())), micTransformer.fouriers.getSize()*2); LAUNCH_HANDLE_ERROR(cudaGetLastError()); CTOC(timer,"FourierTransform_0"); if (basePckr->highpass > 0.) { CTIC(timer,"highpass"); micTransformer.fouriers.streamSync(); lowPassFilterMapGPU( micTransformer.fouriers, (size_t)1, micTransformer.yFSize, micTransformer.xFSize, XSIZE(Imic()), basePckr->lowpass, basePckr->highpass, basePckr->angpix, 2, true); //false = lowpass, true=highpass micTransformer.fouriers.streamSync(); micTransformer.backward(); micTransformer.reals.streamSync(); CTOC(timer,"highpass"); } CTIC(timer,"F_cp"); AccPtr< ACCCOMPLEX > Ftmp(allocator); Ftmp.setSize(micTransformer.fouriers.getSize()); Ftmp.deviceAlloc(); micTransformer.fouriers.cpOnAcc(Ftmp); CTOC(timer,"F_cp"); // Also calculate the FFT of the squared micrograph CTIC(timer,"SquareImic"); cuda_kernel_square<<>>( ~micTransformer.reals, micTransformer.reals.getSize()); LAUNCH_HANDLE_ERROR(cudaGetLastError()); CTOC(timer,"SquareImic"); CTIC(timer,"FourierTransform_1"); micTransformer.forward(); CudaKernels::cuda_kernel_multi<<>>( (XFLOAT*)~micTransformer.fouriers, (XFLOAT)1/((XFLOAT)(micTransformer.reals.getSize())), micTransformer.fouriers.getSize()*2); LAUNCH_HANDLE_ERROR(cudaGetLastError()); CTOC(timer,"FourierTransform_1"); // The following calculate mu and sig under the solvent area at every position in the micrograph CTIC(timer,"calculateStddevAndMeanUnderMask"); d_Mstddev.deviceAlloc(basePckr->workSize*basePckr->workSize); d_Mmean.deviceAlloc(basePckr->workSize*basePckr->workSize); if (basePckr->autopick_helical_segments) { AccPtr< ACCCOMPLEX > d_Fmsk2(basePckr->Favgmsk.nzyxdim, allocator); AccPtr d_Mavg(allocator); AccPtr d_Mstddev2(allocator); d_Fmsk2.deviceAlloc(); d_Mavg.deviceAlloc(basePckr->workSize*basePckr->workSize); d_Mstddev2.deviceAlloc(basePckr->workSize*basePckr->workSize); //TODO Do this only once further up in scope for(int i = 0; i< d_Fmsk2.getSize() ; i++) { d_Fmsk2[i].x = basePckr->Favgmsk.data[i].real; d_Fmsk2[i].y = basePckr->Favgmsk.data[i].imag; } d_Fmsk2.cpToDevice(); d_Fmsk2.streamSync(); calculateStddevAndMeanUnderMask(Ftmp, micTransformer.fouriers, d_Fmsk2, basePckr->nr_pixels_avg_mask, d_Mstddev2, d_Mavg, micTransformer.xFSize, micTransformer.yFSize, basePckr->micrograph_size, basePckr->workSize); d_Mstddev2.hostAlloc(); d_Mstddev2.cpToHost(); d_Mstddev2.streamSync(); Mstddev2.resizeNoCp(1, basePckr->workSize, basePckr->workSize); for(int i = 0; i < d_Mstddev2.getSize() ; i ++) Mstddev2.data[i] = d_Mstddev2[i]; d_Mavg.hostAlloc(); d_Mavg.cpToHost(); d_Mavg.streamSync(); Mavg.resizeNoCp(1, basePckr->workSize, basePckr->workSize); for(int i = 0; i < d_Mavg.getSize() ; i ++) Mavg.data[i] = d_Mavg[i]; } //TODO Do this only once further up in scope AccPtr< ACCCOMPLEX > d_Fmsk(basePckr->Finvmsk.nzyxdim, allocator); d_Fmsk.deviceAlloc(); for(int i = 0; i< d_Fmsk.getSize() ; i++) { d_Fmsk[i].x = basePckr->Finvmsk.data[i].real; d_Fmsk[i].y = basePckr->Finvmsk.data[i].imag; } d_Fmsk.cpToDevice(); d_Fmsk.streamSync(); calculateStddevAndMeanUnderMask(Ftmp, micTransformer.fouriers, d_Fmsk, basePckr->nr_pixels_circular_invmask, d_Mstddev, d_Mmean, micTransformer.xFSize, micTransformer.yFSize, basePckr->micrograph_size, basePckr->workSize); //TODO remove this d_Mstddev.hostAlloc(); d_Mstddev.cpToHost(); d_Mstddev.streamSync(); Mstddev.resizeNoCp(1, basePckr->workSize, basePckr->workSize); //TODO put this in a kernel for(int i = 0; i < d_Mstddev.getSize() ; i ++) { Mstddev.data[i] = d_Mstddev[i]; if (d_Mstddev[i] > (XFLOAT)1E-10) d_Mstddev[i] = 1 / d_Mstddev[i]; else d_Mstddev[i] = 1; } d_Mstddev.cpToDevice(); d_Mstddev.streamSync(); d_Mmean.hostAlloc(); d_Mmean.cpToHost(); d_Mmean.streamSync(); Mmean.resizeNoCp(1, basePckr->workSize, basePckr->workSize); for(int i = 0; i < d_Mmean.getSize() ; i ++) Mmean.data[i] = d_Mmean[i]; CTOC(timer,"calculateStddevAndMeanUnderMask"); // From now on use downsized Fmic, as the cross-correlation with the references can be done at lower resolution CTIC(timer,"windowFourierTransform_0"); d_Fmic.setSize((basePckr->workSize/2+1)*(basePckr->workSize)); d_Fmic.deviceAlloc(); windowFourierTransform2( Ftmp, d_Fmic, basePckr->micrograph_size/2+1, basePckr->micrograph_size, 1, //Input dimensions basePckr->workSize/2+1, basePckr->workSize, 1 //Output dimensions ); CTOC(timer,"windowFourierTransform_0"); if (basePckr->do_write_fom_maps) { CTIC(timer,"writeToFomMaps"); // TMP output FileName fn_tmp=basePckr->getOutputRootName(fn_mic)+"_"+basePckr->fn_out+"_stddevNoise.spi"; Image It; It() = (basePckr->autopick_helical_segments) ? Mstddev2 : Mstddev; It.write(fn_tmp); fn_tmp=basePckr->getOutputRootName(fn_mic)+"_"+basePckr->fn_out+"_avgNoise.spi"; It() = (basePckr->autopick_helical_segments) ? Mavg : Mmean; It.write(fn_tmp); CTOC(timer,"writeToFomMaps"); } }// end if do_read_fom_maps // Now start looking for the peaks of all references // Clear the output vector with all peaks CTIC(timer,"initPeaks"); std::vector peaks; peaks.clear(); CTOC(timer,"initPeaks"); #ifdef TIMING basePckr->timer.toc(basePckr->TIMING_B1); #endif if (basePckr->autopick_helical_segments) { if (basePckr->do_read_fom_maps) { FileName fn_tmp; Image It_float; Image It_int; fn_tmp = basePckr->getOutputRootName(fn_mic)+"_"+basePckr->fn_out+"_combinedCCF.spi"; It_float.read(fn_tmp); Mccf_best_combined = It_float(); if (basePckr->do_amyloid) { fn_tmp = basePckr->getOutputRootName(fn_mic)+"_"+basePckr->fn_out+"_combinedPSI.spi"; It_float.read(fn_tmp); Mpsi_best_combined = It_float(); } else { fn_tmp = basePckr->getOutputRootName(fn_mic)+"_"+basePckr->fn_out+"_combinedCLASS.spi"; It_int.read(fn_tmp); Mclass_best_combined = It_int(); } } else { Mccf_best_combined.clear(); Mccf_best_combined.resize(basePckr->workSize, basePckr->workSize); Mccf_best_combined.initConstant(-99.e99); Mclass_best_combined.clear(); Mclass_best_combined.resize(basePckr->workSize, basePckr->workSize); Mclass_best_combined.initConstant(-1); Mpsi_best_combined.clear(); Mpsi_best_combined.resize(basePckr->workSize, basePckr->workSize); Mpsi_best_combined.initConstant(-99.e99); } } AccPtr< XFLOAT > d_ctf(Fctf.nzyxdim, allocator); d_ctf.deviceAlloc(); if(basePckr->do_ctf) { for(int i = 0; i< d_ctf.getSize() ; i++) d_ctf[i]=Fctf.data[i]; d_ctf.cpToDevice(); } for (int iref = 0; iref < basePckr->Mrefs.size(); iref++) { CTIC(timer,"OneReference"); RFLOAT expected_Pratio; // the expectedFOM for this (ctf-corrected) reference if (basePckr->do_read_fom_maps) { #ifdef TIMING basePckr->timer.tic(basePckr->TIMING_B2); #endif if (!basePckr->autopick_helical_segments) { CTIC(timer,"readFromFomMaps"); FileName fn_tmp; Image It; fn_tmp.compose(basePckr->getOutputRootName(fn_mic)+"_"+basePckr->fn_out+"_ref", iref,"_bestCCF.spi"); It.read(fn_tmp); Mccf_best = It(); It.MDMainHeader.getValue(EMDL_IMAGE_STATS_MAX, expected_Pratio); // Retrieve expected_Pratio from the header of the image fn_tmp.compose(basePckr->getOutputRootName(fn_mic)+"_"+basePckr->fn_out+"_ref", iref,"_bestPSI.spi"); It.read(fn_tmp); Mpsi_best = It(); CTOC(timer,"readFromFomMaps"); } #ifdef TIMING basePckr->timer.toc(basePckr->TIMING_B2); #endif } //end else if do_read_fom_maps else { #ifdef TIMING basePckr->timer.tic(basePckr->TIMING_B3); #endif CTIC(timer,"mccfInit"); deviceInitValue(d_Mccf_best, (XFLOAT)-LARGE_NUMBER); CTOC(timer,"mccfInit"); AccProjectorKernel projKernel = AccProjectorKernel::makeKernel( projectors[iref], (int)basePckr->workSize/2+1, (int)basePckr->workSize, 1, // Zdim, always 1 in autopicker. (int)basePckr->workSize/2+1 -1 ); int FauxStride = (basePckr->workSize/2+1)*basePckr->workSize; #ifdef TIMING basePckr->timer.tic(basePckr->TIMING_B4); #endif CTIC(timer,"SingleProjection"); dim3 blocks((int)ceilf((float)FauxStride/(float)BLOCK_SIZE),1); if(basePckr->do_ctf) { cuda_kernel_rotateAndCtf<<>>( ~cudaTransformer1.fouriers, ~d_ctf, 0, projKernel, 0 ); } else { cuda_kernel_rotateOnly<<>>( ~cudaTransformer1.fouriers, 0, projKernel, 0 ); } LAUNCH_HANDLE_ERROR(cudaGetLastError()); CTOC(timer,"SingleProjection"); #ifdef TIMING basePckr->timer.toc(basePckr->TIMING_B4); #endif /* * FIRST PSI WAS USED FOR PREP CALCS - THIS IS NOW A DEDICATED SECTION * ------------------------------------------------------------------- */ CTIC(timer,"PREP_CALCS"); #ifdef TIMING basePckr->timer.tic(basePckr->TIMING_B5); #endif // Sjors 20April2016: The calculation for sum_ref_under_circ_mask, etc below needs to be done on original micrograph_size! CTIC(timer,"windowFourierTransform_FP"); windowFourierTransform2(cudaTransformer1.fouriers, micTransformer.fouriers, basePckr->workSize/2+1, basePckr->workSize, 1, //Input dimensions basePckr->micrograph_size/2+1, basePckr->micrograph_size, 1 //Output dimensions ); CTOC(timer,"windowFourierTransform_FP"); CTIC(timer,"inverseFourierTransform_FP"); micTransformer.backward(); CTOC(timer,"inverseFourierTransform_FP"); CTIC(timer,"runCenterFFT_FP"); runCenterFFT(micTransformer.reals, (int)micTransformer.xSize, (int)micTransformer.ySize, false, 1); CTOC(timer,"runCenterFFT_FP"); micTransformer.reals.cpToHost(); Maux.resizeNoCp(1,basePckr->micrograph_size, basePckr->micrograph_size); micTransformer.reals.streamSync(); for (int i = 0; i < micTransformer.reals.getSize() ; i ++) Maux.data[i] = micTransformer.reals[i]; CTIC(timer,"setXmippOrigin_FP_0"); Maux.setXmippOrigin(); CTOC(timer,"setXmippOrigin_FP_0"); // TODO: check whether I need CenterFFT(Maux, false) // Sjors 20apr2016: checked, somehow not needed. sum_ref_under_circ_mask = 0.; sum_ref2_under_circ_mask = 0.; RFLOAT suma2 = 0.; RFLOAT sumn = 1.; MultidimArray Mctfref(basePckr->particle_size, basePckr->particle_size); CTIC(timer,"setXmippOrigin_FP_1"); Mctfref.setXmippOrigin(); CTOC(timer,"setXmippOrigin_FP_1"); CTIC(timer,"suma_FP"); FOR_ALL_ELEMENTS_IN_ARRAY2D(Mctfref) // only loop over smaller Mctfref, but take values from large Maux! { if (i*i + j*j < basePckr->particle_radius2) { suma2 += A2D_ELEM(Maux, i, j) * A2D_ELEM(Maux, i, j); suma2 += 2. * A2D_ELEM(Maux, i, j) * rnd_gaus(0., 1.); sum_ref_under_circ_mask += A2D_ELEM(Maux, i, j); sum_ref2_under_circ_mask += A2D_ELEM(Maux, i, j) * A2D_ELEM(Maux, i, j); sumn += 1.; } } sum_ref_under_circ_mask /= sumn; sum_ref2_under_circ_mask /= sumn; expected_Pratio = exp(suma2 / (2. * sumn)); CTOC(timer,"suma_FP"); CTOC(timer,"PREP_CALCS"); // for all batches CTIC(timer,"AllPsi"); int startPsi(0); for (int psiIter = 0; psiIter < cudaTransformer1.batchIters; psiIter++) // psi-batches for possible memory-limits { CTIC(timer,"Projection"); dim3 blocks((int)ceilf((float)FauxStride/(float)BLOCK_SIZE),cudaTransformer1.batchSize[psiIter]); if(basePckr->do_ctf) { cuda_kernel_rotateAndCtf<<>>( ~cudaTransformer1.fouriers, ~d_ctf, DEG2RAD(basePckr->psi_sampling), projKernel, startPsi ); } else { cuda_kernel_rotateOnly<<>>( ~cudaTransformer1.fouriers, DEG2RAD(basePckr->psi_sampling), projKernel, startPsi ); } LAUNCH_HANDLE_ERROR(cudaGetLastError()); CTOC(timer,"Projection"); // Now multiply template and micrograph to calculate the cross-correlation CTIC(timer,"convol"); dim3 blocks2( (int) ceilf(( float)FauxStride/(float)BLOCK_SIZE),cudaTransformer1.batchSize[psiIter]); cuda_kernel_batch_convol_A<<>>( ~cudaTransformer1.fouriers, ~d_Fmic, FauxStride); LAUNCH_HANDLE_ERROR(cudaGetLastError()); CTOC(timer,"convol"); CTIC(timer,"CudaInverseFourierTransform_1"); cudaTransformer1.backward(); HANDLE_ERROR(cudaDeviceSynchronize()); CTOC(timer,"CudaInverseFourierTransform_1"); CTIC(timer,"runCenterFFT_1"); runCenterFFT(cudaTransformer1.reals, (int)cudaTransformer1.xSize, (int)cudaTransformer1.ySize, false, cudaTransformer1.batchSize[psiIter]); CTOC(timer,"runCenterFFT_1"); // Calculate ratio of prabilities P(ref)/P(zero) // Keep track of the best values and their corresponding iref and psi // ------------------------------------------------------------------ // So now we already had precalculated: Mdiff2 = 1/sig*Sum(X^2) - 2/sig*Sum(X) + mu^2/sig*Sum(1) // Still to do (per reference): - 2/sig*Sum(AX) + 2*mu/sig*Sum(A) + Sum(A^2) CTIC(timer,"probRatio"); HANDLE_ERROR(cudaDeviceSynchronize()); dim3 PR_blocks(ceilf((float)(cudaTransformer1.reals.getSize()/cudaTransformer1.batchSize[psiIter])/(float)PROBRATIO_BLOCK_SIZE)); cuda_kernel_probRatio<<>>( ~d_Mccf_best, ~d_Mpsi_best, ~cudaTransformer1.reals, ~d_Mmean, ~d_Mstddev, cudaTransformer1.reals.getSize()/cudaTransformer1.batchSize[0], (XFLOAT) -2*normfft, (XFLOAT) 2*sum_ref_under_circ_mask, (XFLOAT) sum_ref2_under_circ_mask, (XFLOAT) expected_Pratio, cudaTransformer1.batchSize[psiIter], startPsi, Npsi ); LAUNCH_HANDLE_ERROR(cudaGetLastError()); startPsi += cudaTransformer1.batchSize[psiIter]; CTOC(timer,"probRatio"); } // end for psi-batches CTOC(timer,"AllPsi"); #ifdef TIMING basePckr->timer.toc(basePckr->TIMING_B6); #endif #ifdef TIMING basePckr->timer.tic(basePckr->TIMING_B7); #endif CTIC(timer,"output"); d_Mccf_best.cpToHost(); d_Mpsi_best.cpToHost(); d_Mccf_best.streamSync(); for (int i = 0; i < Mccf_best.nzyxdim; i ++) { Mccf_best.data[i] = d_Mccf_best[i]; Mpsi_best.data[i] = d_Mpsi_best[i]; } CTOC(timer,"output"); if (basePckr->do_write_fom_maps && !basePckr->autopick_helical_segments) { CTIC(timer,"writeFomMaps"); // TMP output FileName fn_tmp; Image It; It() = Mccf_best; // Store expected_Pratio in the header of the image.. It.MDMainHeader.setValue(EMDL_IMAGE_STATS_MAX, expected_Pratio); // Store expected_Pratio in the header of the image fn_tmp.compose(basePckr->getOutputRootName(fn_mic)+"_"+basePckr->fn_out+"_ref", iref,"_bestCCF.spi"); It.write(fn_tmp); It() = Mpsi_best; fn_tmp.compose(basePckr->getOutputRootName(fn_mic)+"_"+basePckr->fn_out+"_ref", iref,"_bestPSI.spi"); It.write(fn_tmp); CTOC(timer,"writeFomMaps"); } // end if do_write_fom_maps #ifdef TIMING basePckr->timer.toc(basePckr->TIMING_B7); #endif #ifdef TIMING basePckr->timer.toc(basePckr->TIMING_B3); #endif } // end if do_read_fom_maps //TODO FIX HELICAL SEGMENTS SUPPORT if (basePckr->autopick_helical_segments) { if (!basePckr->do_read_fom_maps) { // Combine Mccf_best and Mpsi_best from all refs FOR_ALL_DIRECT_ELEMENTS_IN_MULTIDIMARRAY(Mccf_best) { RFLOAT new_ccf = DIRECT_MULTIDIM_ELEM(Mccf_best, n); RFLOAT old_ccf = DIRECT_MULTIDIM_ELEM(Mccf_best_combined, n); if (new_ccf > old_ccf) { DIRECT_MULTIDIM_ELEM(Mccf_best_combined, n) = new_ccf; if (basePckr->do_amyloid) DIRECT_MULTIDIM_ELEM(Mpsi_best_combined, n) = DIRECT_MULTIDIM_ELEM(Mpsi_best, n); else DIRECT_MULTIDIM_ELEM(Mclass_best_combined, n) = iref; } } } } else { #ifdef TIMING basePckr->timer.tic(basePckr->TIMING_B8); #endif // Now that we have Mccf_best and Mpsi_best, get the peaks std::vector my_ref_peaks; CTIC(timer,"setXmippOriginX3"); Mstddev.setXmippOrigin(); Mmean.setXmippOrigin(); Mccf_best.setXmippOrigin(); Mpsi_best.setXmippOrigin(); CTOC(timer,"setXmippOriginX3"); CTIC(timer,"peakSearch"); basePckr->peakSearch(Mccf_best, Mpsi_best, Mstddev, Mmean, iref, my_skip_side, my_ref_peaks, scale); CTOC(timer,"peakSearch"); CTIC(timer,"peakPrune"); basePckr->prunePeakClusters(my_ref_peaks, min_distance_pix, scale); CTOC(timer,"peakPrune"); CTIC(timer,"peakInsert"); // append the peaks of this reference to all the other peaks peaks.insert(peaks.end(), my_ref_peaks.begin(), my_ref_peaks.end()); CTOC(timer,"peakInsert"); CTOC(timer,"OneReference"); #ifdef TIMING basePckr->timer.toc(basePckr->TIMING_B8); #endif } } // end for iref if (basePckr->autopick_helical_segments) { if (basePckr->do_write_fom_maps) { FileName fn_tmp; Image It_float; Image It_int; It_float() = Mccf_best_combined; fn_tmp = basePckr->getOutputRootName(fn_mic) + "_" + basePckr->fn_out + "_combinedCCF.spi"; It_float.write(fn_tmp); if (basePckr->do_amyloid) { It_float() = Mpsi_best_combined; fn_tmp = basePckr->getOutputRootName(fn_mic) + "_" + basePckr->fn_out + "_combinedPSI.spi"; It_float.write(fn_tmp); } else { It_int() = Mclass_best_combined; fn_tmp = basePckr->getOutputRootName(fn_mic) + + "_" + basePckr->fn_out + "_combinedCLASS.spi"; It_int.write(fn_tmp); } } // end if do_write_fom_maps RFLOAT thres = basePckr->min_fraction_expected_Pratio; int peak_r_min = 1; std::vector ccf_peak_list; std::vector > tube_coord_list, tube_track_list; std::vector tube_len_list; MultidimArray Mccfplot; Mccf_best_combined.setXmippOrigin(); Mpsi_best_combined.setXmippOrigin(); Mstddev2.setXmippOrigin(); Mavg.setXmippOrigin(); Mclass_best_combined.setXmippOrigin(); if (basePckr->do_amyloid) { basePckr->pickAmyloids(Mccf_best_combined, Mpsi_best_combined, Mstddev2, Mavg, thres, basePckr->amyloid_max_psidiff, fn_mic, basePckr->fn_out, (basePckr->helical_tube_diameter / basePckr->angpix), basePckr->autopick_skip_side, scale); } else { basePckr->pickCCFPeaks(Mccf_best_combined, Mstddev2, Mavg, Mclass_best_combined, thres, peak_r_min, (basePckr->particle_diameter / basePckr->angpix), ccf_peak_list, Mccfplot, my_skip_side, scale); basePckr->extractHelicalTubes(ccf_peak_list, tube_coord_list, tube_len_list, tube_track_list, (basePckr->particle_diameter / basePckr->angpix), basePckr->helical_tube_curvature_factor_max, (basePckr->min_particle_distance / basePckr->angpix), (basePckr->helical_tube_diameter / basePckr->angpix), scale); basePckr->exportHelicalTubes(Mccf_best_combined, Mccfplot, Mclass_best_combined, tube_coord_list, tube_track_list, tube_len_list, fn_mic, basePckr->fn_out, (basePckr->particle_diameter / basePckr->angpix), (basePckr->helical_tube_length_min / basePckr->angpix), my_skip_side, scale); } if ((basePckr->do_write_fom_maps || basePckr->do_read_fom_maps) && !basePckr->do_amyloid) { FileName fn_tmp; Image It; It() = Mccfplot; fn_tmp = basePckr->getOutputRootName(fn_mic) + "_" + basePckr->fn_out + "_combinedPLOT.spi"; It.write(fn_tmp); } } else { #ifdef TIMING basePckr->timer.tic(basePckr->TIMING_B9); #endif //Now that we have done all references, prune the list again... CTIC(timer,"finalPeakPrune"); basePckr->prunePeakClusters(peaks, min_distance_pix, scale); CTOC(timer,"finalPeakPrune"); // And remove all too close neighbours basePckr->removeTooCloselyNeighbouringPeaks(peaks, min_distance_pix, scale); // Write out a STAR file with the coordinates MetaDataTable MDout; for (int ipeak =0; ipeak < peaks.size(); ipeak++) { MDout.addObject(); MDout.setValue(EMDL_IMAGE_COORD_X, (RFLOAT)(peaks[ipeak].x)/scale); MDout.setValue(EMDL_IMAGE_COORD_Y, (RFLOAT)(peaks[ipeak].y)/scale); MDout.setValue(EMDL_PARTICLE_CLASS, peaks[ipeak].ref + 1); // start counting at 1 MDout.setValue(EMDL_PARTICLE_AUTOPICK_FOM, peaks[ipeak].fom); MDout.setValue(EMDL_ORIENT_PSI, peaks[ipeak].psi); } FileName fn_tmp = basePckr->getOutputRootName(fn_mic) + "_" + basePckr->fn_out + ".star"; MDout.write(fn_tmp); #ifdef TIMING basePckr->timer.toc(basePckr->TIMING_B9); #endif } } relion-3.1.3/src/acc/cuda/cuda_autopicker.h000066400000000000000000000057141411340063500205560ustar00rootroot00000000000000#ifndef CUDA_AUTOPICKER_H_ #define CUDA_AUTOPICKER_H_ #include "src/mpi.h" #include "src/autopicker.h" #include "src/autopicker_mpi.h" #include "src/projector.h" #include "src/complex.h" #include "src/image.h" #include "src/acc/cuda/cuda_mem_utils.h" #include "src/acc/acc_projector.h" #include "src/acc/cuda/cuda_settings.h" #include "src/acc/cuda/cuda_fft.h" #include "src/acc/cuda/cuda_benchmark_utils.h" #include #ifdef ACC_DOUBLE_PRECISION #define XFLOAT double #else #define XFLOAT float #endif class AutoPickerCuda { private: MpiNode *node; public: AutoPicker *basePckr; CudaCustomAllocator *allocator; CudaFFT micTransformer; CudaFFT cudaTransformer1; CudaFFT cudaTransformer2; std::vector< AccProjector > projectors; //Class streams ( for concurrent scheduling of class-specific kernels) std::vector< cudaStream_t > classStreams; int device_id; bool have_warned_batching; //MlDeviceBundle *devBundle; #ifdef TIMING_FILES relion_timer timer; #endif AutoPickerCuda(AutoPicker *basePicker, int dev_id, const char * timing_fnm); AutoPickerCuda(AutoPickerMpi *basePicker, int dev_id, const char * timing_fnm); void setupProjectors(); void run(); void autoPickOneMicrograph(FileName &fn_mic, long int imic); void calculateStddevAndMeanUnderMask(AccPtr< ACCCOMPLEX > &d_Fmic, AccPtr< ACCCOMPLEX > &d_Fmic2, AccPtr< ACCCOMPLEX > &d_Fmsk, int nr_nonzero_pixels_mask, AccPtr< XFLOAT > &d_Mstddev, AccPtr< XFLOAT > &d_Mmean, size_t x, size_t y, size_t mic_size, size_t workSize); ~AutoPickerCuda() { for (int i = 0; i < classStreams.size(); i++) HANDLE_ERROR(cudaStreamDestroy(classStreams[i])); } //private: // // Uses Roseman2003 formulae to calculate stddev under the mask through FFTs // // The FFTs of the micrograph (Fmic), micrograph-squared (Fmic2) and the mask (Fmsk) need to be provided at downsize_mic // // The putput (Mstddev) will be at (binned) micrograph_size // void calculateStddevAndMeanUnderMask(const MultidimArray &Fmic, const MultidimArray &Fmic2, // MultidimArray &Fmsk, int nr_nonzero_pixels_mask, MultidimArray &Mstddev, MultidimArray &Mmean); // // // Peak search for all pixels above a given threshold in the map // void peakSearch(const MultidimArray &Mccf, const MultidimArray &Mpsi, const MultidimArray &Mstddev, int iref, int skip_side, std::vector &peaks); // // // Now prune the coordinates: within min_particle_distance: all peaks are the same cluster // // From each cluster, take the single peaks with the highest ccf // // If then, there is another peaks at a distance of at least min_particle_distance: take that one as well, and so forth... // void prunePeakClusters(std::vector &peaks, int min_distance); // // // // Only keep those peaks that are at the given distance apart from each other // void removeTooCloselyNeighbouringPeaks(std::vector &peaks, int min_distance); }; #endif /* CUDA_AUTOPICKER_H_ */ relion-3.1.3/src/acc/cuda/cuda_backprojector.cu000066400000000000000000000003151411340063500214100ustar00rootroot00000000000000#include #include #include "src/acc/cuda/cuda_settings.h" #include "src/acc/acc_backprojector.h" #include "src/acc/acc_projector.h" #include "src/acc/acc_backprojector_impl.h" relion-3.1.3/src/acc/cuda/cuda_benchmark_utils.cu000066400000000000000000000064121411340063500217360ustar00rootroot00000000000000 #include "src/acc/cuda/cuda_benchmark_utils.h" //Non-concurrent benchmarking tools (only for Linux) #include #include #include #include #include "src/macros.h" #include "src/error.h" int relion_timer::cuda_benchmark_find_id(std::string id, std::vector v) { for (unsigned i = 0; i < v.size(); i++) if (v[i] == id) return i; return -1; } void relion_timer::cuda_cpu_tic(std::string id) { if (cuda_benchmark_find_id(id, cuda_cpu_benchmark_identifiers) == -1) { cuda_cpu_benchmark_identifiers.push_back(id); cuda_cpu_benchmark_start_times.push_back(clock()); } else { printf("DEBUG_ERROR: Provided identifier '%s' already exists in call to cuda_cpu_tic.\n", id.c_str()); CRITICAL(ERRCTIC); } } void relion_timer::cuda_cpu_toc(std::string id) { int idx = cuda_benchmark_find_id(id, cuda_cpu_benchmark_identifiers); if (idx == -1) { printf("DEBUG_ERROR: Provided identifier '%s' not found in call to cuda_cpu_toc.\n", id.c_str()); //exit( EXIT_FAILURE ); } else { clock_t t = clock() - cuda_cpu_benchmark_start_times[idx]; cuda_cpu_benchmark_identifiers.erase(cuda_cpu_benchmark_identifiers.begin()+idx); cuda_cpu_benchmark_start_times.erase(cuda_cpu_benchmark_start_times.begin()+idx); fprintf(cuda_cpu_benchmark_fPtr,"%06.2f ms ......", (float)t / CLOCKS_PER_SEC * 1000.); for (int i = 1; i < cuda_cpu_benchmark_identifiers.size(); i++) fprintf(cuda_cpu_benchmark_fPtr,"......"); fprintf(cuda_cpu_benchmark_fPtr," %s\n", id.c_str()); // printf(,"%s \t %.2f ms\n", id.c_str(), (float)t / CLOCKS_PER_SEC * 1000.); } } void relion_timer::cuda_gpu_tic(std::string id) { if (cuda_benchmark_find_id(id, cuda_gpu_benchmark_identifiers) == -1) { cudaEvent_t start, stop; cudaEventCreate(&start); cudaEventCreate(&stop); cudaEventRecord(start, 0); cuda_gpu_benchmark_identifiers.push_back(id); cuda_gpu_benchmark_start_times.push_back(start); cuda_gpu_benchmark_stop_times.push_back(stop); } else { printf("DEBUG_ERROR: Provided identifier '%s' already exists in call to cuda_gpu_tic.\n", id.c_str()); CRITICAL(ERRGTIC); } } void relion_timer::cuda_gpu_toc(std::string id) { int idx = cuda_benchmark_find_id(id, cuda_gpu_benchmark_identifiers); if (idx == -1) { printf("DEBUG_ERROR: Provided identifier '%s' not found in call to cuda_gpu_tac.\n", id.c_str()); CRITICAL(ERRGTOC); } else { cudaEventRecord(cuda_gpu_benchmark_stop_times[idx], 0); cudaEventSynchronize(cuda_gpu_benchmark_stop_times[idx]); } } void relion_timer::cuda_gpu_printtictoc() { if (cuda_gpu_benchmark_identifiers.size() == 0) { printf("DEBUG_ERROR: There were no identifiers found in the list, on call to cuda_gpu_toc.\n"); CRITICAL(ERRTPC); } else { float time; for (int idx = 0; idx < cuda_gpu_benchmark_identifiers.size(); idx ++) { cudaEventElapsedTime(&time, cuda_gpu_benchmark_start_times[idx], cuda_gpu_benchmark_stop_times[idx]); cudaEventDestroy(cuda_gpu_benchmark_start_times[idx]); cudaEventDestroy(cuda_gpu_benchmark_stop_times[idx]); fprintf(cuda_gpu_benchmark_fPtr,"%.2f ms \t %s\n", time, cuda_gpu_benchmark_identifiers[idx].c_str()); } cuda_gpu_benchmark_identifiers.clear(); cuda_gpu_benchmark_start_times.clear(); cuda_gpu_benchmark_stop_times.clear(); } } relion-3.1.3/src/acc/cuda/cuda_benchmark_utils.h000066400000000000000000000035721411340063500215620ustar00rootroot00000000000000 #ifndef CUDA_BENCHMARK_UTILS_H_ #define CUDA_BENCHMARK_UTILS_H_ //Non-concurrent benchmarking tools (only for Linux) #include #include #include #include #include #include #include #ifdef TIMING_FILES #define CTIC(timer,timing) (timer.cuda_cpu_tic(timing)) #define CTOC(timer,timing) (timer.cuda_cpu_toc(timing)) #define GTIC(timer,timing) (timer.cuda_gpu_tic(timing)) #define GTOC(timer,timing) (timer.cuda_gpu_toc(timing)) #define GATHERGPUTIMINGS(timer) (timer.cuda_gpu_printtictoc()) #elif defined CUDA_PROFILING #include #define CTIC(timer,timing) (nvtxRangePush(timing)) #define CTOC(timer,timing) (nvtxRangePop()) #define GTIC(timer,timing) #define GTOC(timer,timing) #define GATHERGPUTIMINGS(timer) #else #define CTIC(timer,timing) #define CTOC(timer,timing) #define GTIC(timer,timing) #define GTOC(timer,timing) #define GATHERGPUTIMINGS(timer) #endif class relion_timer { public: std::vector cuda_cpu_benchmark_identifiers; std::vector cuda_cpu_benchmark_start_times; FILE *cuda_cpu_benchmark_fPtr; std::vector cuda_gpu_benchmark_identifiers; std::vector cuda_gpu_benchmark_start_times; std::vector cuda_gpu_benchmark_stop_times; FILE *cuda_gpu_benchmark_fPtr; relion_timer(std::string fnm) { std::stringstream fnm_cpu, fnm_gpu; fnm_cpu << "output/" << fnm << "_cpu.dat"; cuda_cpu_benchmark_fPtr = fopen(fnm_cpu.str().c_str(),"a"); fnm_gpu << "output/" << fnm << "_gpu.dat"; cuda_gpu_benchmark_fPtr = fopen(fnm_gpu.str().c_str(),"a"); } int cuda_benchmark_find_id(std::string id, std::vector v); void cuda_cpu_tic(std::string id); void cuda_cpu_toc(std::string id); void cuda_gpu_tic(std::string id); void cuda_gpu_toc(std::string id); void cuda_gpu_printtictoc(); }; #endif /* CUDA_BENCHMARK_UTILS_H_ */ relion-3.1.3/src/acc/cuda/cuda_fft.h000066400000000000000000000211741411340063500171650ustar00rootroot00000000000000#ifndef CUDA_FFT_H_ #define CUDA_FFT_H_ #include "src/acc/cuda/cuda_settings.h" #include "src/acc/cuda/cuda_mem_utils.h" #include #include #ifdef DEBUG_CUDA #define HANDLE_CUFFT_ERROR( err ) (CufftHandleError( err, __FILE__, __LINE__ )) #else #define HANDLE_CUFFT_ERROR( err ) (err) //Do nothing #endif static void CufftHandleError( cufftResult err, const char *file, int line ) { if (err != CUFFT_SUCCESS) { fprintf(stderr, "Cufft error in file '%s' in line %i : %s.\n", __FILE__, __LINE__, "error" ); #ifdef DEBUG_CUDA raise(SIGSEGV); #else CRITICAL(ERRGPUKERN); #endif } } class CudaFFT { bool planSet; public: #ifdef ACC_DOUBLE_PRECISION AccPtr reals; AccPtr fouriers; #else AccPtr reals; AccPtr fouriers; #endif cufftHandle cufftPlanForward, cufftPlanBackward; int direction; int dimension, idist, odist, istride, ostride; int inembed[3]; int onembed[3]; size_t xSize,ySize,zSize,xFSize,yFSize,zFSize; std::vector< int > batchSize; CudaCustomAllocator *CFallocator; int batchSpace, batchIters, reqN; CudaFFT(cudaStream_t stream, CudaCustomAllocator *allocator, int transformDimension = 2): reals(stream, allocator), fouriers(stream, allocator), cufftPlanForward(0), cufftPlanBackward(0), direction(0), dimension((int)transformDimension), idist(0), odist(0), istride(1), ostride(1), planSet(false), xSize(0), ySize(0), zSize(0), xFSize(0), yFSize(0), zFSize(0), batchSize(1,1), reqN(1), CFallocator(allocator) {}; void setAllocator(CudaCustomAllocator *allocator) { reals.setAllocator(allocator); fouriers.setAllocator(allocator); CFallocator = allocator; } size_t estimate(int batch) { size_t needed(0); size_t biggness; #ifdef ACC_DOUBLE_PRECISION if(direction<=0) { HANDLE_CUFFT_ERROR( cufftEstimateMany(dimension, inembed, inembed, istride, idist, onembed, ostride, odist, CUFFT_D2Z, batch, &biggness)); needed += biggness; } if(direction>=0) { HANDLE_CUFFT_ERROR( cufftEstimateMany(dimension, inembed, onembed, ostride, odist, inembed, istride, idist, CUFFT_Z2D, batch, &biggness)); needed += biggness; } #else if(direction<=0) { HANDLE_CUFFT_ERROR( cufftEstimateMany(dimension, inembed, inembed, istride, idist, onembed, ostride, odist, CUFFT_R2C, batch, &biggness)); needed += biggness; } if(direction>=0) { HANDLE_CUFFT_ERROR( cufftEstimateMany(dimension, inembed, onembed, ostride, odist, inembed, istride, idist, CUFFT_C2R, batch, &biggness)); needed += biggness; } #endif size_t res = needed + (size_t)odist*(size_t)batch*sizeof(XFLOAT)*(size_t)2 + (size_t)idist*(size_t)batch*sizeof(XFLOAT); return res; } void setSize(size_t x, size_t y, size_t z, int batch = 1, int setDirection = 0) { /* Optional direction input restricts transformer to * forwards or backwards tranformation only, * which reduces memory requirements, especially * for large batches of simulatanous transforms. * * FFTW_FORWARDS === -1 * FFTW_BACKWARDS === +1 * * The default direction is 0 === forwards AND backwards */ int checkDim; if(z>1) checkDim=3; else if(y>1) checkDim=2; else checkDim=1; if(checkDim != dimension) CRITICAL(ERRCUFFTDIM); if( !( (setDirection==-1)||(setDirection==0)||(setDirection==1) ) ) { std::cerr << "*ERROR : Setting a cuda transformer direction to non-defined value" << std::endl; CRITICAL(ERRCUFFTDIR); } direction = setDirection; if (x == xSize && y == ySize && z == zSize && batch == reqN && planSet) return; clear(); batchSize.resize(1); batchSize[0] = batch; reqN = batch; xSize = x; ySize = y; zSize = z; xFSize = x/2 + 1; yFSize = y; zFSize = z; idist = zSize*ySize*xSize; odist = zSize*ySize*(xSize/2+1); istride = 1; ostride = 1; if(dimension==3) { inembed[0] = zSize; inembed[1] = ySize; inembed[2] = xSize; onembed[0] = zFSize; onembed[1] = yFSize; onembed[2] = xFSize; } else if(dimension==2) { inembed[0] = ySize; inembed[1] = xSize; onembed[0] = yFSize; onembed[1] = xFSize; } else { inembed[0] = xSize; onembed[0] = xFSize; } size_t needed, avail, total; needed = estimate(batchSize[0]); DEBUG_HANDLE_ERROR(cudaMemGetInfo( &avail, &total )); // std::cout << std::endl << "needed = "; // printf("%15zu\n", needed); // std::cout << "avail = "; // printf("%15zu\n", avail); // Check if there is enough memory // // --- TO HOLD TEMPORARY DATA DURING TRANSFORMS --- // // If there isn't, find how many there ARE space for and loop through them in batches. if(needed>avail) { batchIters = 2; batchSpace = CEIL((double) batch / (double)batchIters); needed = estimate(batchSpace); while(needed>avail && batchSpace>1) { batchIters++; batchSpace = CEIL((double) batch / (double)batchIters); needed = estimate(batchSpace); } if(batchIters>1) { batchIters = (int)((float)batchIters*1.1 + 1); batchSpace = CEIL((double) batch / (double)batchIters); needed = estimate(batchSpace); } batchSize.assign(batchIters,batchSpace); // specify batchIters of batches, each with batchSpace orientations batchSize[batchIters-1] = batchSpace - (batchSpace*batchIters - batch); // set last to care for remainder. if(needed>avail) CRITICAL(ERRFFTMEMLIM); // std::cerr << std::endl << "NOTE: Having to use " << batchIters << " batches of orientations "; // std::cerr << "to achieve the total requested " << batch << " orientations" << std::endl; // std::cerr << "( this could affect performance, consider using " << std::endl; // std::cerr << "\t higher --ang" << std::endl; // std::cerr << "\t harder --shrink" << std::endl; // std::cerr << "\t higher --lopass with --shrink 0" << std::endl; } else { batchIters = 1; batchSpace = batch; } reals.setSize(idist*batchSize[0]); reals.deviceAlloc(); reals.hostAlloc(); fouriers.setSize(odist*batchSize[0]); fouriers.deviceAlloc(); fouriers.hostAlloc(); // DEBUG_HANDLE_ERROR(cudaMemGetInfo( &avail, &total )); // needed = estimate(batchSize[0], fudge); // std::cout << "after alloc: " << std::endl << std::endl << "needed = "; // printf("%15li\n", needed); // std::cout << "avail = "; // printf("%15li\n", avail); #ifdef ACC_DOUBLE_PRECISION if(direction<=0) { HANDLE_CUFFT_ERROR( cufftPlanMany(&cufftPlanForward, dimension, inembed, inembed, istride, idist, onembed, ostride, odist, CUFFT_D2Z, batchSize[0])); HANDLE_CUFFT_ERROR( cufftSetStream(cufftPlanForward, fouriers.getStream())); } if(direction>=0) { HANDLE_CUFFT_ERROR( cufftPlanMany(&cufftPlanBackward, dimension, inembed, onembed, ostride, odist, inembed, istride, idist, CUFFT_Z2D, batchSize[0])); HANDLE_CUFFT_ERROR( cufftSetStream(cufftPlanBackward, reals.getStream())); } planSet = true; } void forward() { HANDLE_CUFFT_ERROR( cufftExecD2Z(cufftPlanForward, ~reals, ~fouriers) ); } void backward() { HANDLE_CUFFT_ERROR( cufftExecZ2D(cufftPlanBackward, ~fouriers, ~reals) ); } void backward(AccPtr &dst) { HANDLE_CUFFT_ERROR( cufftExecZ2D(cufftPlanBackward, ~fouriers, ~dst) ); } #else if(direction<=0) { HANDLE_CUFFT_ERROR( cufftPlanMany(&cufftPlanForward, dimension, inembed, inembed, istride, idist, onembed, ostride, odist, CUFFT_R2C, batchSize[0])); HANDLE_CUFFT_ERROR( cufftSetStream(cufftPlanForward, fouriers.getStream())); } if(direction>=0) { HANDLE_CUFFT_ERROR( cufftPlanMany(&cufftPlanBackward, dimension, inembed, onembed, ostride, odist, inembed, istride, idist, CUFFT_C2R, batchSize[0])); HANDLE_CUFFT_ERROR( cufftSetStream(cufftPlanBackward, reals.getStream())); } planSet = true; } void forward() { if(direction==1) { std::cout << "trying to execute a forward plan for a cudaFFT transformer which is backwards-only" << std::endl; CRITICAL(ERRCUFFTDIRF); } HANDLE_CUFFT_ERROR( cufftExecR2C(cufftPlanForward, ~reals, ~fouriers) ); } void backward() { if(direction==-1) { std::cout << "trying to execute a backwards plan for a cudaFFT transformer which is forwards-only" << std::endl; CRITICAL(ERRCUFFTDIRR); } HANDLE_CUFFT_ERROR( cufftExecC2R(cufftPlanBackward, ~fouriers, ~reals) ); } #endif void clear() { if(planSet) { reals.freeIfSet(); fouriers.freeIfSet(); if(direction<=0) HANDLE_CUFFT_ERROR(cufftDestroy(cufftPlanForward)); if(direction>=0) HANDLE_CUFFT_ERROR(cufftDestroy(cufftPlanBackward)); planSet = false; } } ~CudaFFT() {clear();} }; #endif relion-3.1.3/src/acc/cuda/cuda_helper_functions.cu000066400000000000000000000014471411340063500221360ustar00rootroot00000000000000#undef ALTCPU #include #include "src/ml_optimiser.h" #include "src/acc/acc_ptr.h" #include "src/acc/acc_projector.h" #include "src/acc/acc_projector_plan.h" #include "src/acc/acc_backprojector.h" #include "src/acc/cuda/cuda_settings.h" #include "src/acc/cuda/cuda_fft.h" #include "src/acc/cuda/cuda_kernels/cuda_device_utils.cuh" #ifdef CUDA_FORCESTL #include "src/acc/cuda/cuda_utils_stl.cuh" #else #include "src/acc/cuda/cuda_utils_cub.cuh" #endif #include "src/acc/utilities.h" #include "src/acc/acc_helper_functions.h" #include "src/acc/cuda/cuda_kernels/BP.cuh" #include "src/macros.h" #include "src/error.h" #include "src/acc/acc_ml_optimiser.h" #include "src/acc/cuda/cuda_ml_optimiser.h" #include "src/acc/acc_helper_functions.h" #include "src/acc/acc_helper_functions_impl.h" relion-3.1.3/src/acc/cuda/cuda_kernels/000077500000000000000000000000001411340063500176735ustar00rootroot00000000000000relion-3.1.3/src/acc/cuda/cuda_kernels/BP.cuh000066400000000000000000000434311411340063500207020ustar00rootroot00000000000000#ifndef CUDA_BP_KERNELS_CUH_ #define CUDA_BP_KERNELS_CUH_ #include #include #include #include #include "src/acc/acc_projector.h" #include "src/acc/acc_backprojector.h" #include "src/acc/cuda/cuda_settings.h" #include "src/acc/cuda/cuda_kernels/cuda_device_utils.cuh" /* * BP KERNELS */ template < bool CTF_PREMULTIPLIED > __global__ void cuda_kernel_backproject2D( XFLOAT *g_img_real, XFLOAT *g_img_imag, XFLOAT *g_trans_x, XFLOAT *g_trans_y, XFLOAT* g_weights, XFLOAT* g_Minvsigma2s, XFLOAT* g_ctfs, unsigned long translation_num, XFLOAT significant_weight, XFLOAT weight_norm, XFLOAT *g_eulers, XFLOAT *g_model_real, XFLOAT *g_model_imag, XFLOAT *g_model_weight, int max_r, int max_r2, XFLOAT padding_factor, unsigned img_x, unsigned img_y, unsigned img_xy, unsigned mdl_x, int mdl_inity) { unsigned tid = threadIdx.x; unsigned img = blockIdx.x; int img_y_half = img_y / 2; int max_r2_out = max_r2 * padding_factor * padding_factor; __shared__ XFLOAT s_eulers[4]; XFLOAT minvsigma2, ctf, img_real, img_imag, Fweight, real, imag, weight; if (tid == 0) s_eulers[0] = g_eulers[img*9+0] * padding_factor; else if (tid == 1) s_eulers[1] = g_eulers[img*9+1] * padding_factor; else if (tid == 2) s_eulers[2] = g_eulers[img*9+3] * padding_factor; else if (tid == 3) s_eulers[3] = g_eulers[img*9+4] * padding_factor; __syncthreads(); int pixel_pass_num(ceilf((float)img_xy/(float)BP_2D_BLOCK_SIZE)); for (unsigned pass = 0; pass < pixel_pass_num; pass++) { unsigned pixel = (pass * BP_2D_BLOCK_SIZE) + tid; if (pixel >= img_xy) continue; int x = pixel % img_x; int y = (int)floorf( (float)pixel / (float)img_x); if (y > img_y_half) { y -= img_y; } //WAVG minvsigma2 = __ldg(&g_Minvsigma2s[pixel]); ctf = __ldg(&g_ctfs[pixel]); img_real = __ldg(&g_img_real[pixel]); img_imag = __ldg(&g_img_imag[pixel]); Fweight = (XFLOAT) 0.0; real = (XFLOAT) 0.0; imag = (XFLOAT) 0.0; XFLOAT temp_real, temp_imag; for (unsigned long itrans = 0; itrans < translation_num; itrans++) { weight = g_weights[img * translation_num + itrans]; if (weight >= significant_weight) { if(CTF_PREMULTIPLIED) { weight = (weight / weight_norm) * minvsigma2; Fweight += weight * ctf * ctf; } else { weight = (weight / weight_norm) * ctf * minvsigma2; Fweight += weight * ctf; } translatePixel(x, y, g_trans_x[itrans], g_trans_y[itrans], img_real, img_imag, temp_real, temp_imag); real += temp_real * weight; imag += temp_imag * weight; } } if (Fweight > (XFLOAT) 0.0) { // Get logical coordinates in the 3D map XFLOAT xp = (s_eulers[0] * x + s_eulers[1] * y ); XFLOAT yp = (s_eulers[2] * x + s_eulers[3] * y ); // Only consider pixels that are projected inside the allowed circle in output coordinates. // --JZ, Nov. 26th 2018 if ( ( xp * xp + yp * yp ) > max_r2_out ) continue; // Only asymmetric half is stored if (xp < 0) { // Get complex conjugated hermitian symmetry pair xp = -xp; yp = -yp; imag = -imag; } int x0 = floorf(xp); XFLOAT fx = xp - x0; int x1 = x0 + 1; int y0 = floorf(yp); XFLOAT fy = yp - y0; y0 -= mdl_inity; int y1 = y0 + 1; XFLOAT mfx = (XFLOAT) 1.0 - fx; XFLOAT mfy = (XFLOAT) 1.0 - fy; XFLOAT dd00 = mfy * mfx; XFLOAT dd01 = mfy * fx; XFLOAT dd10 = fy * mfx; XFLOAT dd11 = fy * fx; cuda_atomic_add(&g_model_real [y0 * mdl_x + x0], dd00 * real); cuda_atomic_add(&g_model_imag [y0 * mdl_x + x0], dd00 * imag); cuda_atomic_add(&g_model_weight[y0 * mdl_x + x0], dd00 * Fweight); cuda_atomic_add(&g_model_real [y0 * mdl_x + x1], dd01 * real); cuda_atomic_add(&g_model_imag [y0 * mdl_x + x1], dd01 * imag); cuda_atomic_add(&g_model_weight[y0 * mdl_x + x1], dd01 * Fweight); cuda_atomic_add(&g_model_real [y1 * mdl_x + x0], dd10 * real); cuda_atomic_add(&g_model_imag [y1 * mdl_x + x0], dd10 * imag); cuda_atomic_add(&g_model_weight[y1 * mdl_x + x0], dd10 * Fweight); cuda_atomic_add(&g_model_real [y1 * mdl_x + x1], dd11 * real); cuda_atomic_add(&g_model_imag [y1 * mdl_x + x1], dd11 * imag); cuda_atomic_add(&g_model_weight[y1 * mdl_x + x1], dd11 * Fweight); } } } template < bool DATA3D, bool CTF_PREMULTIPLIED > __global__ void cuda_kernel_backproject3D( XFLOAT *g_img_real, XFLOAT *g_img_imag, XFLOAT *g_trans_x, XFLOAT *g_trans_y, XFLOAT *g_trans_z, XFLOAT* g_weights, XFLOAT* g_Minvsigma2s, XFLOAT* g_ctfs, unsigned long translation_num, XFLOAT significant_weight, XFLOAT weight_norm, XFLOAT *g_eulers, XFLOAT *g_model_real, XFLOAT *g_model_imag, XFLOAT *g_model_weight, int max_r, int max_r2, XFLOAT padding_factor, unsigned img_x, unsigned img_y, unsigned img_z, unsigned img_xyz, unsigned mdl_x, unsigned mdl_y, int mdl_inity, int mdl_initz) { unsigned tid = threadIdx.x; unsigned img = blockIdx.x; int img_y_half = img_y / 2; int img_z_half = img_z / 2; int max_r2_vol = max_r2 * padding_factor * padding_factor; __shared__ XFLOAT s_eulers[9]; XFLOAT minvsigma2, ctf, img_real, img_imag, Fweight, real, imag, weight; if (tid < 9) s_eulers[tid] = g_eulers[img*9+tid]; __syncthreads(); int pixel_pass_num(0); if(DATA3D) pixel_pass_num = (ceilf((float)img_xyz/(float)BP_DATA3D_BLOCK_SIZE)); else pixel_pass_num = (ceilf((float)img_xyz/(float)BP_REF3D_BLOCK_SIZE)); for (unsigned pass = 0; pass < pixel_pass_num; pass++) { unsigned pixel(0); if(DATA3D) pixel = (pass * BP_DATA3D_BLOCK_SIZE) + tid; else pixel = (pass * BP_REF3D_BLOCK_SIZE) + tid; if (pixel >= img_xyz) continue; int x,y,z,xy; if(DATA3D) { z = floorfracf(pixel, img_x*img_y); xy = pixel % (img_x*img_y); x = xy % img_x; y = floorfracf( xy, img_x); if (z > img_z_half) { z = z - img_z; if(x==0) continue; } } else { x = pixel % img_x; y = floorfracf( pixel , img_x); } if (y > img_y_half) { y = y - img_y; } //WAVG minvsigma2 = __ldg(&g_Minvsigma2s[pixel]); ctf = __ldg(&g_ctfs[pixel]); img_real = __ldg(&g_img_real[pixel]); img_imag = __ldg(&g_img_imag[pixel]); Fweight = (XFLOAT) 0.0; real = (XFLOAT) 0.0; imag = (XFLOAT) 0.0; XFLOAT temp_real, temp_imag; for (unsigned long itrans = 0; itrans < translation_num; itrans++) { weight = g_weights[img * translation_num + itrans]; if (weight >= significant_weight) { if(CTF_PREMULTIPLIED) { weight = (weight / weight_norm) * minvsigma2; Fweight += weight * ctf * ctf; } else { weight = (weight / weight_norm) * ctf * minvsigma2; Fweight += weight * ctf; } if(DATA3D) translatePixel(x, y, z, g_trans_x[itrans], g_trans_y[itrans], g_trans_z[itrans], img_real, img_imag, temp_real, temp_imag); else translatePixel(x, y, g_trans_x[itrans], g_trans_y[itrans], img_real, img_imag, temp_real, temp_imag); real += temp_real * weight; imag += temp_imag * weight; } } //BP if (Fweight > (XFLOAT) 0.0) { // Get logical coordinates in the 3D map XFLOAT xp,yp,zp; if(DATA3D) { xp = (s_eulers[0] * x + s_eulers[1] * y + s_eulers[2] * z) * padding_factor; yp = (s_eulers[3] * x + s_eulers[4] * y + s_eulers[5] * z) * padding_factor; zp = (s_eulers[6] * x + s_eulers[7] * y + s_eulers[8] * z) * padding_factor; } else { xp = (s_eulers[0] * x + s_eulers[1] * y ) * padding_factor; yp = (s_eulers[3] * x + s_eulers[4] * y ) * padding_factor; zp = (s_eulers[6] * x + s_eulers[7] * y ) * padding_factor; } // Only consider pixels that are projected inside the sphere in output coordinates. // --JZ, Oct. 18. 2018 if ( ( xp * xp + yp * yp + zp * zp ) > max_r2_vol) continue; // Only asymmetric half is stored if (xp < (XFLOAT) 0.0) { // Get complex conjugated hermitian symmetry pair xp = -xp; yp = -yp; zp = -zp; imag = -imag; } int x0 = floorf(xp); XFLOAT fx = xp - x0; int x1 = x0 + 1; int y0 = floorf(yp); XFLOAT fy = yp - y0; y0 -= mdl_inity; int y1 = y0 + 1; int z0 = floorf(zp); XFLOAT fz = zp - z0; z0 -= mdl_initz; int z1 = z0 + 1; XFLOAT mfx = (XFLOAT)1.0 - fx; XFLOAT mfy = (XFLOAT)1.0 - fy; XFLOAT mfz = (XFLOAT)1.0 - fz; XFLOAT dd000 = mfz * mfy * mfx; cuda_atomic_add(&g_model_real [z0 * mdl_x * mdl_y + y0 * mdl_x + x0], dd000 * real); cuda_atomic_add(&g_model_imag [z0 * mdl_x * mdl_y + y0 * mdl_x + x0], dd000 * imag); cuda_atomic_add(&g_model_weight[z0 * mdl_x * mdl_y + y0 * mdl_x + x0], dd000 * Fweight); XFLOAT dd001 = mfz * mfy * fx; cuda_atomic_add(&g_model_real [z0 * mdl_x * mdl_y + y0 * mdl_x + x1], dd001 * real); cuda_atomic_add(&g_model_imag [z0 * mdl_x * mdl_y + y0 * mdl_x + x1], dd001 * imag); cuda_atomic_add(&g_model_weight[z0 * mdl_x * mdl_y + y0 * mdl_x + x1], dd001 * Fweight); XFLOAT dd010 = mfz * fy * mfx; cuda_atomic_add(&g_model_real [z0 * mdl_x * mdl_y + y1 * mdl_x + x0], dd010 * real); cuda_atomic_add(&g_model_imag [z0 * mdl_x * mdl_y + y1 * mdl_x + x0], dd010 * imag); cuda_atomic_add(&g_model_weight[z0 * mdl_x * mdl_y + y1 * mdl_x + x0], dd010 * Fweight); XFLOAT dd011 = mfz * fy * fx; cuda_atomic_add(&g_model_real [z0 * mdl_x * mdl_y + y1 * mdl_x + x1], dd011 * real); cuda_atomic_add(&g_model_imag [z0 * mdl_x * mdl_y + y1 * mdl_x + x1], dd011 * imag); cuda_atomic_add(&g_model_weight[z0 * mdl_x * mdl_y + y1 * mdl_x + x1], dd011 * Fweight); XFLOAT dd100 = fz * mfy * mfx; cuda_atomic_add(&g_model_real [z1 * mdl_x * mdl_y + y0 * mdl_x + x0], dd100 * real); cuda_atomic_add(&g_model_imag [z1 * mdl_x * mdl_y + y0 * mdl_x + x0], dd100 * imag); cuda_atomic_add(&g_model_weight[z1 * mdl_x * mdl_y + y0 * mdl_x + x0], dd100 * Fweight); XFLOAT dd101 = fz * mfy * fx; cuda_atomic_add(&g_model_real [z1 * mdl_x * mdl_y + y0 * mdl_x + x1], dd101 * real); cuda_atomic_add(&g_model_imag [z1 * mdl_x * mdl_y + y0 * mdl_x + x1], dd101 * imag); cuda_atomic_add(&g_model_weight[z1 * mdl_x * mdl_y + y0 * mdl_x + x1], dd101 * Fweight); XFLOAT dd110 = fz * fy * mfx; cuda_atomic_add(&g_model_real [z1 * mdl_x * mdl_y + y1 * mdl_x + x0], dd110 * real); cuda_atomic_add(&g_model_imag [z1 * mdl_x * mdl_y + y1 * mdl_x + x0], dd110 * imag); cuda_atomic_add(&g_model_weight[z1 * mdl_x * mdl_y + y1 * mdl_x + x0], dd110 * Fweight); XFLOAT dd111 = fz * fy * fx; cuda_atomic_add(&g_model_real [z1 * mdl_x * mdl_y + y1 * mdl_x + x1], dd111 * real); cuda_atomic_add(&g_model_imag [z1 * mdl_x * mdl_y + y1 * mdl_x + x1], dd111 * imag); cuda_atomic_add(&g_model_weight[z1 * mdl_x * mdl_y + y1 * mdl_x + x1], dd111 * Fweight); } } } template < bool DATA3D, bool CTF_PREMULTIPLIED > __global__ void cuda_kernel_backprojectSGD( AccProjectorKernel projector, XFLOAT *g_img_real, XFLOAT *g_img_imag, XFLOAT *g_trans_x, XFLOAT *g_trans_y, XFLOAT *g_trans_z, XFLOAT* g_weights, XFLOAT* g_Minvsigma2s, XFLOAT* g_ctfs, unsigned long translation_num, XFLOAT significant_weight, XFLOAT weight_norm, XFLOAT *g_eulers, XFLOAT *g_model_real, XFLOAT *g_model_imag, XFLOAT *g_model_weight, int max_r, int max_r2, XFLOAT padding_factor, unsigned img_x, unsigned img_y, unsigned img_z, unsigned img_xyz, unsigned mdl_x, unsigned mdl_y, int mdl_inity, int mdl_initz) { unsigned tid = threadIdx.x; unsigned img = blockIdx.x; int img_y_half = img_y / 2; int img_z_half = img_z / 2; int max_r2_vol = max_r2 * padding_factor * padding_factor; __shared__ XFLOAT s_eulers[9]; XFLOAT minvsigma2, ctf, img_real, img_imag, Fweight, real, imag, weight; if (tid < 9) s_eulers[tid] = g_eulers[img*9+tid]; __syncthreads(); int pixel_pass_num(0); if(DATA3D) pixel_pass_num = (ceilf((float)img_xyz/(float)BP_DATA3D_BLOCK_SIZE)); else pixel_pass_num = (ceilf((float)img_xyz/(float)BP_REF3D_BLOCK_SIZE)); for (unsigned pass = 0; pass < pixel_pass_num; pass++) { unsigned pixel(0); if(DATA3D) pixel = (pass * BP_DATA3D_BLOCK_SIZE) + tid; else pixel = (pass * BP_REF3D_BLOCK_SIZE) + tid; if (pixel >= img_xyz) continue; int x,y,z,xy; if(DATA3D) { z = floorfracf(pixel, img_x*img_y); xy = pixel % (img_x*img_y); x = xy % img_x; y = floorfracf( xy, img_x); if (z > img_z_half) { z = z - img_z; if(x==0) continue; } } else { x = pixel % img_x; y = floorfracf( pixel , img_x); } if (y > img_y_half) { y = y - img_y; } XFLOAT ref_real = (XFLOAT) 0.0; XFLOAT ref_imag = (XFLOAT) 0.0; if(DATA3D) projector.project3Dmodel( x,y,z, s_eulers[0], s_eulers[1], s_eulers[2], s_eulers[3], s_eulers[4], s_eulers[5], s_eulers[6], s_eulers[7], s_eulers[8], ref_real, ref_imag); else projector.project3Dmodel( x,y, s_eulers[0], s_eulers[1], s_eulers[3], s_eulers[4], s_eulers[6], s_eulers[7], ref_real, ref_imag); //WAVG minvsigma2 = __ldg(&g_Minvsigma2s[pixel]); ctf = __ldg(&g_ctfs[pixel]); img_real = __ldg(&g_img_real[pixel]); img_imag = __ldg(&g_img_imag[pixel]); Fweight = (XFLOAT) 0.0; real = (XFLOAT) 0.0; imag = (XFLOAT) 0.0; ref_real *= ctf; ref_imag *= ctf; XFLOAT temp_real, temp_imag; for (unsigned long itrans = 0; itrans < translation_num; itrans++) { weight = g_weights[img * translation_num + itrans]; if (weight >= significant_weight) { if(CTF_PREMULTIPLIED) { weight = (weight / weight_norm) * minvsigma2; Fweight += weight * ctf * ctf; } else { weight = (weight / weight_norm) * ctf * minvsigma2; Fweight += weight * ctf; } if(DATA3D) translatePixel(x, y, z, g_trans_x[itrans], g_trans_y[itrans], g_trans_z[itrans], img_real, img_imag, temp_real, temp_imag); else translatePixel(x, y, g_trans_x[itrans], g_trans_y[itrans], img_real, img_imag, temp_real, temp_imag); real += (temp_real-ref_real) * weight; imag += (temp_imag-ref_imag) * weight; } } //BP if (Fweight > (XFLOAT) 0.0) { // Get logical coordinates in the 3D map XFLOAT xp,yp,zp; if(DATA3D) { xp = (s_eulers[0] * x + s_eulers[1] * y + s_eulers[2] * z) * padding_factor; yp = (s_eulers[3] * x + s_eulers[4] * y + s_eulers[5] * z) * padding_factor; zp = (s_eulers[6] * x + s_eulers[7] * y + s_eulers[8] * z) * padding_factor; } else { xp = (s_eulers[0] * x + s_eulers[1] * y ) * padding_factor; yp = (s_eulers[3] * x + s_eulers[4] * y ) * padding_factor; zp = (s_eulers[6] * x + s_eulers[7] * y ) * padding_factor; } // Only consider pixels that are projected inside the sphere in output coordinates. // --JZ, Nov. 26th 2018 if ( ( xp * xp + yp * yp + zp * zp ) > max_r2_vol) continue; // Only asymmetric half is stored if (xp < (XFLOAT) 0.0) { // Get complex conjugated hermitian symmetry pair xp = -xp; yp = -yp; zp = -zp; imag = -imag; } int x0 = floorf(xp); XFLOAT fx = xp - x0; int x1 = x0 + 1; int y0 = floorf(yp); XFLOAT fy = yp - y0; y0 -= mdl_inity; int y1 = y0 + 1; int z0 = floorf(zp); XFLOAT fz = zp - z0; z0 -= mdl_initz; int z1 = z0 + 1; XFLOAT mfx = (XFLOAT)1.0 - fx; XFLOAT mfy = (XFLOAT)1.0 - fy; XFLOAT mfz = (XFLOAT)1.0 - fz; XFLOAT dd000 = mfz * mfy * mfx; cuda_atomic_add(&g_model_real [z0 * mdl_x * mdl_y + y0 * mdl_x + x0], dd000 * real); cuda_atomic_add(&g_model_imag [z0 * mdl_x * mdl_y + y0 * mdl_x + x0], dd000 * imag); cuda_atomic_add(&g_model_weight[z0 * mdl_x * mdl_y + y0 * mdl_x + x0], dd000 * Fweight); XFLOAT dd001 = mfz * mfy * fx; cuda_atomic_add(&g_model_real [z0 * mdl_x * mdl_y + y0 * mdl_x + x1], dd001 * real); cuda_atomic_add(&g_model_imag [z0 * mdl_x * mdl_y + y0 * mdl_x + x1], dd001 * imag); cuda_atomic_add(&g_model_weight[z0 * mdl_x * mdl_y + y0 * mdl_x + x1], dd001 * Fweight); XFLOAT dd010 = mfz * fy * mfx; cuda_atomic_add(&g_model_real [z0 * mdl_x * mdl_y + y1 * mdl_x + x0], dd010 * real); cuda_atomic_add(&g_model_imag [z0 * mdl_x * mdl_y + y1 * mdl_x + x0], dd010 * imag); cuda_atomic_add(&g_model_weight[z0 * mdl_x * mdl_y + y1 * mdl_x + x0], dd010 * Fweight); XFLOAT dd011 = mfz * fy * fx; cuda_atomic_add(&g_model_real [z0 * mdl_x * mdl_y + y1 * mdl_x + x1], dd011 * real); cuda_atomic_add(&g_model_imag [z0 * mdl_x * mdl_y + y1 * mdl_x + x1], dd011 * imag); cuda_atomic_add(&g_model_weight[z0 * mdl_x * mdl_y + y1 * mdl_x + x1], dd011 * Fweight); XFLOAT dd100 = fz * mfy * mfx; cuda_atomic_add(&g_model_real [z1 * mdl_x * mdl_y + y0 * mdl_x + x0], dd100 * real); cuda_atomic_add(&g_model_imag [z1 * mdl_x * mdl_y + y0 * mdl_x + x0], dd100 * imag); cuda_atomic_add(&g_model_weight[z1 * mdl_x * mdl_y + y0 * mdl_x + x0], dd100 * Fweight); XFLOAT dd101 = fz * mfy * fx; cuda_atomic_add(&g_model_real [z1 * mdl_x * mdl_y + y0 * mdl_x + x1], dd101 * real); cuda_atomic_add(&g_model_imag [z1 * mdl_x * mdl_y + y0 * mdl_x + x1], dd101 * imag); cuda_atomic_add(&g_model_weight[z1 * mdl_x * mdl_y + y0 * mdl_x + x1], dd101 * Fweight); XFLOAT dd110 = fz * fy * mfx; cuda_atomic_add(&g_model_real [z1 * mdl_x * mdl_y + y1 * mdl_x + x0], dd110 * real); cuda_atomic_add(&g_model_imag [z1 * mdl_x * mdl_y + y1 * mdl_x + x0], dd110 * imag); cuda_atomic_add(&g_model_weight[z1 * mdl_x * mdl_y + y1 * mdl_x + x0], dd110 * Fweight); XFLOAT dd111 = fz * fy * fx; cuda_atomic_add(&g_model_real [z1 * mdl_x * mdl_y + y1 * mdl_x + x1], dd111 * real); cuda_atomic_add(&g_model_imag [z1 * mdl_x * mdl_y + y1 * mdl_x + x1], dd111 * imag); cuda_atomic_add(&g_model_weight[z1 * mdl_x * mdl_y + y1 * mdl_x + x1], dd111 * Fweight); } } } #endif /* CUDA_PB_KERNELS_CUH_ */ relion-3.1.3/src/acc/cuda/cuda_kernels/cuda_device_utils.cuh000066400000000000000000000160451411340063500240550ustar00rootroot00000000000000#ifndef CUDA_DEVICE_UTILS_CUH_ #define CUDA_DEVICE_UTILS_CUH_ #include #include "src/acc/cuda/cuda_settings.h" #ifdef ACC_DOUBLE_PRECISION __device__ inline double cuda_atomic_add(double* address, double val) { unsigned long long int* address_as_ull = (unsigned long long int*)address; unsigned long long int old = *address_as_ull, assumed; do { assumed = old; old = atomicCAS(address_as_ull, assumed, __double_as_longlong(val + __longlong_as_double(assumed))); } while (assumed != old); // Note: uses integer comparison to avoid hang in case of NaN (since NaN != NaN) return __longlong_as_double(old); } #else __device__ inline void cuda_atomic_add(float* address, float value) { atomicAdd(address,value); } #endif /* * For the following functions always use fast, low-precision intrinsics */ template< typename T1, typename T2 > static inline __device__ int floorfracf(T1 a, T2 b) { // return __float2int_rd(__fdividef( (float)a, (float)b ) ); return (int)(a/b); } template< typename T1, typename T2 > static inline __device__ int ceilfracf(T1 a, T2 b) { // return __float2int_ru(__fdividef( (float)a, (float)b ) ); return (int)(a/b + 1); } static inline __device__ XFLOAT no_tex2D(XFLOAT* mdl, XFLOAT xp, XFLOAT yp, int mdlX, int mdlInitY) { int x0 = floorf(xp); XFLOAT fx = xp - x0; int x1 = x0 + 1; int y0 = floorf(yp); XFLOAT fy = yp - y0; y0 -= mdlInitY; int y1 = y0 + 1; //----------------------------- XFLOAT d00 = mdl[y0*mdlX+x0]; XFLOAT d01 = mdl[y0*mdlX+x1]; XFLOAT d10 = mdl[y1*mdlX+x0]; XFLOAT d11 = mdl[y1*mdlX+x1]; //----------------------------- XFLOAT dx0 = d00 + (d01 - d00)*fx; XFLOAT dx1 = d10 + (d11 - d10)*fx; //----------------------------- return dx0 + (dx1 - dx0)*fy; } static inline __device__ XFLOAT no_tex3D(XFLOAT* mdl, XFLOAT xp, XFLOAT yp, XFLOAT zp, int mdlX, int mdlXY, int mdlInitY, int mdlInitZ) { int x0 = floorf(xp); XFLOAT fx = xp - x0; int x1 = x0 + 1; int y0 = floorf(yp); XFLOAT fy = yp - y0; y0 -= mdlInitY; int y1 = y0 + 1; int z0 = floorf(zp); XFLOAT fz = zp - z0; z0 -= mdlInitZ; int z1 = z0 + 1; XFLOAT d000 = mdl[z0*mdlXY+y0*mdlX+x0]; XFLOAT d001 = mdl[z0*mdlXY+y0*mdlX+x1]; XFLOAT d010 = mdl[z0*mdlXY+y1*mdlX+x0]; XFLOAT d011 = mdl[z0*mdlXY+y1*mdlX+x1]; XFLOAT d100 = mdl[z1*mdlXY+y0*mdlX+x0]; XFLOAT d101 = mdl[z1*mdlXY+y0*mdlX+x1]; XFLOAT d110 = mdl[z1*mdlXY+y1*mdlX+x0]; XFLOAT d111 = mdl[z1*mdlXY+y1*mdlX+x1]; //----------------------------- XFLOAT dx00 = d000 + (d001 - d000)*fx; XFLOAT dx01 = d100 + (d101 - d100)*fx; XFLOAT dx10 = d010 + (d011 - d010)*fx; XFLOAT dx11 = d110 + (d111 - d110)*fx; //----------------------------- XFLOAT dxy0 = dx00 + (dx10 - dx00)*fy; XFLOAT dxy1 = dx01 + (dx11 - dx01)*fy; //----------------------------- return dxy0 + (dxy1 - dxy0)*fz; } __device__ __forceinline__ void translatePixel( int x, int y, XFLOAT tx, XFLOAT ty, XFLOAT &real, XFLOAT &imag, XFLOAT &tReal, XFLOAT &tImag) { XFLOAT s, c; #ifdef ACC_DOUBLE_PRECISION sincos( x * tx + y * ty , &s, &c ); #else sincosf( x * tx + y * ty , &s, &c ); #endif tReal = c * real - s * imag; tImag = c * imag + s * real; } __device__ __forceinline__ void translatePixel( int x, int y, int z, XFLOAT tx, XFLOAT ty, XFLOAT tz, XFLOAT &real, XFLOAT &imag, XFLOAT &tReal, XFLOAT &tImag) { XFLOAT s, c; #ifdef ACC_DOUBLE_PRECISION sincos( x * tx + y * ty + z * tz, &s, &c ); #else sincosf( x * tx + y * ty + z * tz, &s, &c ); #endif tReal = c * real - s * imag; tImag = c * imag + s * real; } inline __device__ float2 operator*(float2 a, float b) { return make_float2(a.x * b, a.y * b); } inline __device__ double2 operator*(double2 a, double b) { return make_double2(a.x * b, a.y * b); } template< typename T> __global__ void cuda_kernel_init_complex_value( T *data, XFLOAT value, size_t size, int block_size) { size_t idx = blockIdx.x * block_size + threadIdx.x; if (idx < size) { data[idx].x = value; data[idx].y = value; } } template< typename T> __global__ void cuda_kernel_init_value( T *data, T value, size_t size, int block_size) { size_t idx = blockIdx.x * block_size + threadIdx.x; if (idx < size) data[idx] = value; } template< typename T> __global__ void cuda_kernel_array_over_threshold( T *data, bool *passed, T threshold, size_t size, int block_size) { size_t idx = blockIdx.x * block_size + threadIdx.x; if (idx < size) { if (data[idx] >= threshold) passed[idx] = true; else passed[idx] = false; } } template< typename T> __global__ void cuda_kernel_find_threshold_idx_in_cumulative( T *data, T threshold, size_t size_m1, //data size minus 1 size_t *idx, int block_size) { size_t i = blockIdx.x * block_size + threadIdx.x; if (i < size_m1 && data[i] <= threshold && threshold < data[i+1]) idx[0] = i+1; } template __global__ void cuda_kernel_window_fourier_transform( XFLOAT *g_in_real, XFLOAT *g_in_imag, XFLOAT *g_out_real, XFLOAT *g_out_imag, unsigned iX, unsigned iY, unsigned iZ, unsigned iYX, //Input dimensions unsigned oX, unsigned oY, unsigned oZ, unsigned oYX, //Output dimensions unsigned max_idx, int block_size, unsigned max_r2 = 0 ) { unsigned n = threadIdx.x + block_size * blockIdx.x; long int image_offset = oX*oY*oZ*blockIdx.y; if (n >= max_idx) return; int k, i, kp, ip, jp; if (check_max_r2) { k = n / (iX * iY); i = (n % (iX * iY)) / iX; kp = k < iX ? k : k - iZ; ip = i < iX ? i : i - iY; jp = n % iX; if (kp*kp + ip*ip + jp*jp > max_r2) return; } else { k = n / (oX * oY); i = (n % (oX * oY)) / oX; kp = k < oX ? k : k - oZ; ip = i < oX ? i : i - oY; jp = n % oX; } g_out_real[(kp < 0 ? kp + oZ : kp) * oYX + (ip < 0 ? ip + oY : ip)*oX + jp + image_offset] = g_in_real[(kp < 0 ? kp + iZ : kp)*iYX + (ip < 0 ? ip + iY : ip)*iX + jp + image_offset]; g_out_imag[(kp < 0 ? kp + oZ : kp) * oYX + (ip < 0 ? ip + oY : ip)*oX + jp + image_offset] = g_in_imag[(kp < 0 ? kp + iZ : kp)*iYX + (ip < 0 ? ip + iY : ip)*iX + jp + image_offset]; } template __global__ void cuda_kernel_window_fourier_transform( ACCCOMPLEX *g_in, ACCCOMPLEX *g_out, size_t iX, size_t iY, size_t iZ, size_t iYX, //Input dimensions size_t oX, size_t oY, size_t oZ, size_t oYX, //Output dimensions size_t max_idx, int block_size, size_t max_r2 = 0 ) { size_t n = threadIdx.x + block_size * blockIdx.x; size_t oOFF = oX*oY*oZ*blockIdx.y; size_t iOFF = iX*iY*iZ*blockIdx.y; if (n >= max_idx) return; long int k, i, kp, ip, jp; if (check_max_r2) { k = n / (iX * iY); i = (n % (iX * iY)) / iX; kp = k < iX ? k : k - iZ; ip = i < iX ? i : i - iY; jp = n % iX; if (kp*kp + ip*ip + jp*jp > max_r2) return; } else { k = n / (oX * oY); i = (n % (oX * oY)) / oX; kp = k < oX ? k : k - oZ; ip = i < oX ? i : i - oY; jp = n % oX; } long int in_idx = (kp < 0 ? kp + iZ : kp) * iYX + (ip < 0 ? ip + iY : ip)*iX + jp; long int out_idx = (kp < 0 ? kp + oZ : kp) * oYX + (ip < 0 ? ip + oY : ip)*oX + jp; g_out[out_idx + oOFF] = g_in[in_idx + iOFF]; } #endif relion-3.1.3/src/acc/cuda/cuda_kernels/diff2.cuh000066400000000000000000000423461411340063500213770ustar00rootroot00000000000000#ifndef CUDA_DIFF2_KERNELS_CUH_ #define CUDA_DIFF2_KERNELS_CUH_ #include #include #include #include #include "src/acc/acc_projector.h" #include "src/acc/acc_projectorkernel_impl.h" #include "src/acc/cuda/cuda_settings.h" #include "src/acc/cuda/cuda_kernels/cuda_device_utils.cuh" /* * DIFFERNECE-BASED KERNELS */ /* * Assuming block_sz % prefetch_fraction == 0 and prefetch_fraction < block_sz * Assuming block_sz % eulers_per_block == 0 * Assuming eulers_per_block * 3 < block_sz */ template __global__ void cuda_kernel_diff2_coarse( XFLOAT *g_eulers, XFLOAT *trans_x, XFLOAT *trans_y, XFLOAT *trans_z, XFLOAT *g_real, XFLOAT *g_imag, AccProjectorKernel projector, XFLOAT *g_corr, XFLOAT *g_diff2s, int translation_num, int image_size ) { int tid = threadIdx.x; //Prefetch euler matrices __shared__ XFLOAT s_eulers[eulers_per_block * 9]; int max_block_pass_euler( ceilfracf(eulers_per_block*9, block_sz) * block_sz); for (int i = tid; i < max_block_pass_euler; i += block_sz) if (i < eulers_per_block * 9) s_eulers[i] = g_eulers[blockIdx.x * eulers_per_block * 9 + i]; //Setup variables __shared__ XFLOAT s_ref_real[block_sz/prefetch_fraction * eulers_per_block]; __shared__ XFLOAT s_ref_imag[block_sz/prefetch_fraction * eulers_per_block]; __shared__ XFLOAT s_real[block_sz]; __shared__ XFLOAT s_imag[block_sz]; __shared__ XFLOAT s_corr[block_sz]; XFLOAT diff2s[eulers_per_block] = {0.f}; XFLOAT tx = trans_x[tid%translation_num]; XFLOAT ty = trans_y[tid%translation_num]; XFLOAT tz = trans_z[tid%translation_num]; //Step through data int max_block_pass_pixel( ceilfracf(image_size,block_sz) * block_sz ); for (int init_pixel = 0; init_pixel < max_block_pass_pixel; init_pixel += block_sz/prefetch_fraction) { __syncthreads(); //Prefetch block-fraction-wise if(init_pixel + tid/prefetch_fraction < image_size) { int x,y,z,xy; if(DATA3D) { z = floorfracf(init_pixel + tid/prefetch_fraction, projector.imgX*projector.imgY); xy = (init_pixel + tid/prefetch_fraction) % (projector.imgX*projector.imgY); x = xy % projector.imgX; y = floorfracf( xy, projector.imgX); if (z > projector.maxR) z -= projector.imgZ; } else { x = ( init_pixel + tid/prefetch_fraction) % projector.imgX; y = floorfracf( init_pixel + tid/prefetch_fraction , projector.imgX); } if (y > projector.maxR) y -= projector.imgY; // #pragma unroll for (int i = tid%prefetch_fraction; i < eulers_per_block; i += prefetch_fraction) { if(DATA3D) // if DATA3D, then REF3D as well. projector.project3Dmodel( x,y,z, s_eulers[i*9 ], s_eulers[i*9+1], s_eulers[i*9+2], s_eulers[i*9+3], s_eulers[i*9+4], s_eulers[i*9+5], s_eulers[i*9+6], s_eulers[i*9+7], s_eulers[i*9+8], s_ref_real[eulers_per_block * (tid/prefetch_fraction) + i], s_ref_imag[eulers_per_block * (tid/prefetch_fraction) + i]); else if(REF3D) projector.project3Dmodel( x,y, s_eulers[i*9 ], s_eulers[i*9+1], s_eulers[i*9+3], s_eulers[i*9+4], s_eulers[i*9+6], s_eulers[i*9+7], s_ref_real[eulers_per_block * (tid/prefetch_fraction) + i], s_ref_imag[eulers_per_block * (tid/prefetch_fraction) + i]); else projector.project2Dmodel( x,y, s_eulers[i*9 ], s_eulers[i*9+1], s_eulers[i*9+3], s_eulers[i*9+4], s_ref_real[eulers_per_block * (tid/prefetch_fraction) + i], s_ref_imag[eulers_per_block * (tid/prefetch_fraction) + i]); } } //Prefetch block-wise if (init_pixel % block_sz == 0 && init_pixel + tid < image_size) { s_real[tid] = g_real[init_pixel + tid]; s_imag[tid] = g_imag[init_pixel + tid]; s_corr[tid] = g_corr[init_pixel + tid] / 2; } __syncthreads(); if (tid/translation_num < block_sz/translation_num) // NOTE int division A/B==C/B !=> A==C for (int i = tid / translation_num; i < block_sz/prefetch_fraction; i += block_sz/translation_num) { if((init_pixel + i) >= image_size) break; int x,y,z,xy; if(DATA3D) { z = floorfracf( init_pixel + i , projector.imgX*projector.imgY); //TODO optimize index extraction. xy = ( init_pixel + i ) % (projector.imgX*projector.imgY); x = xy % projector.imgX; y = floorfracf( xy, projector.imgX); if (z > projector.maxR) z -= projector.imgZ; } else { x = ( init_pixel + i ) % projector.imgX; y = floorfracf( init_pixel + i , projector.imgX); } if (y > projector.maxR) y -= projector.imgY; XFLOAT real, imag; if(DATA3D) translatePixel(x, y, z, tx, ty, tz, s_real[i + init_pixel % block_sz], s_imag[i + init_pixel % block_sz], real, imag); else translatePixel(x, y, tx, ty, s_real[i + init_pixel % block_sz], s_imag[i + init_pixel % block_sz], real, imag); #pragma unroll for (int j = 0; j < eulers_per_block; j ++) { XFLOAT diff_real = s_ref_real[eulers_per_block * i + j] - real; XFLOAT diff_imag = s_ref_imag[eulers_per_block * i + j] - imag; diff2s[j] += (diff_real * diff_real + diff_imag * diff_imag) * s_corr[i + init_pixel % block_sz]; } } } //Set global #pragma unroll for (int i = 0; i < eulers_per_block; i ++) cuda_atomic_add(&g_diff2s[(blockIdx.x * eulers_per_block + i) * translation_num + tid % translation_num], diff2s[i]); } template __global__ void cuda_kernel_diff2_fine( XFLOAT *g_eulers, XFLOAT *g_imgs_real, XFLOAT *g_imgs_imag, XFLOAT *trans_x, XFLOAT *trans_y, XFLOAT *trans_z, AccProjectorKernel projector, XFLOAT *g_corr_img, XFLOAT *g_diff2s, unsigned image_size, XFLOAT sum_init, unsigned long orientation_num, unsigned long translation_num, unsigned long todo_blocks, unsigned long *d_rot_idx, unsigned long *d_trans_idx, unsigned long *d_job_idx, unsigned long *d_job_num ) { unsigned long bid = blockIdx.x; unsigned long tid = threadIdx.x; // // Specialize BlockReduce for a 1D block of 128 threads on type XFLOAT // typedef cub::BlockReduce BlockReduce; // // Allocate shared memory for BlockReduce // __shared__ typename BlockReduce::TempStorage temp_storage; unsigned long pixel; XFLOAT ref_real, ref_imag, shifted_real, shifted_imag, diff_real, diff_imag; __shared__ XFLOAT s[block_sz*chunk_sz]; //We MAY have to do up to chunk_sz translations in each block __shared__ XFLOAT s_outs[chunk_sz]; // inside the padded 2D orientation gri // if( bid < todo_blocks ) // we only need to make { unsigned trans_num = (unsigned)d_job_num[bid]; //how many transes we have for this rot for (int itrans=0; itrans projector.maxR) { if (z >= projector.imgZ - projector.maxR) z = z - projector.imgZ; else x = projector.maxR; } } else { x = pixel % projector.imgX; y = floorfracf( pixel , projector.imgX); } if (y > projector.maxR) { if (y >= projector.imgY - projector.maxR) y = y - projector.imgY; else x = projector.maxR; } if(DATA3D) projector.project3Dmodel( x,y,z, __ldg(&g_eulers[ix*9 ]), __ldg(&g_eulers[ix*9+1]), __ldg(&g_eulers[ix*9+2]), __ldg(&g_eulers[ix*9+3]), __ldg(&g_eulers[ix*9+4]), __ldg(&g_eulers[ix*9+5]), __ldg(&g_eulers[ix*9+6]), __ldg(&g_eulers[ix*9+7]), __ldg(&g_eulers[ix*9+8]), ref_real, ref_imag); else if(REF3D) projector.project3Dmodel( x,y, __ldg(&g_eulers[ix*9 ]), __ldg(&g_eulers[ix*9+1]), __ldg(&g_eulers[ix*9+3]), __ldg(&g_eulers[ix*9+4]), __ldg(&g_eulers[ix*9+6]), __ldg(&g_eulers[ix*9+7]), ref_real, ref_imag); else projector.project2Dmodel( x,y, __ldg(&g_eulers[ix*9 ]), __ldg(&g_eulers[ix*9+1]), __ldg(&g_eulers[ix*9+3]), __ldg(&g_eulers[ix*9+4]), ref_real, ref_imag); for (int itrans=0; itrans0; j/=2) { if(tid __global__ void cuda_kernel_diff2_CC_coarse( XFLOAT *g_eulers, XFLOAT *g_imgs_real, XFLOAT *g_imgs_imag, XFLOAT *g_trans_x, XFLOAT *g_trans_y, XFLOAT *g_trans_z, AccProjectorKernel projector, XFLOAT *g_corr_img, XFLOAT *g_diff2s, unsigned translation_num, int image_size, XFLOAT exp_local_sqrtXi2 ) { int iorient = blockIdx.x; int itrans = blockIdx.y; int tid = threadIdx.x; __shared__ XFLOAT s_weight[block_sz]; s_weight[tid] = (XFLOAT)0.0; __shared__ XFLOAT s_norm[block_sz]; s_norm[tid] = (XFLOAT)0.0; XFLOAT real, imag, ref_real, ref_imag; XFLOAT e0,e1,e2,e3,e4,e5,e6,e7,e8; e0 = __ldg(&g_eulers[iorient*9 ]); e1 = __ldg(&g_eulers[iorient*9+1]); e2 = __ldg(&g_eulers[iorient*9+2]); e3 = __ldg(&g_eulers[iorient*9+3]); e4 = __ldg(&g_eulers[iorient*9+4]); e5 = __ldg(&g_eulers[iorient*9+5]); e6 = __ldg(&g_eulers[iorient*9+6]); e7 = __ldg(&g_eulers[iorient*9+7]); e8 = __ldg(&g_eulers[iorient*9+8]); __syncthreads(); unsigned pixel_pass_num( ceilfracf(image_size,block_sz) ); for (unsigned pass = 0; pass < pixel_pass_num; pass++) { unsigned pixel = (pass * block_sz) + tid; if(pixel < image_size) { int x,y,z,xy; if(DATA3D) { z = floorfracf(pixel, projector.imgX*projector.imgY); xy = pixel % (projector.imgX*projector.imgY); x = xy % projector.imgX; y = floorfracf( xy, projector.imgX); if (z > projector.maxR) { if (z >= projector.imgZ - projector.maxR) z = z - projector.imgZ; else x = projector.maxR; } } else { x = pixel % projector.imgX; y = floorfracf( pixel , projector.imgX); } if (y > projector.maxR) { if (y >= projector.imgY - projector.maxR) y = y - projector.imgY; else x = projector.maxR; } if(DATA3D) projector.project3Dmodel( x,y,z, e0,e1,e2,e3,e4,e5,e6,e7,e8, ref_real, ref_imag); else if(REF3D) projector.project3Dmodel( x,y, e0,e1,e3,e4,e6,e7, ref_real, ref_imag); else projector.project2Dmodel( x,y, e0,e1,e3,e4, ref_real, ref_imag); if(DATA3D) translatePixel(x, y, z, g_trans_x[itrans], g_trans_y[itrans], g_trans_z[itrans], g_imgs_real[pixel], g_imgs_imag[pixel], real, imag); else translatePixel(x, y, g_trans_x[itrans], g_trans_y[itrans], g_imgs_real[pixel], g_imgs_imag[pixel], real, imag); s_weight[tid] += (ref_real * real + ref_imag * imag) * __ldg(&g_corr_img[pixel]); s_norm[tid] += (ref_real * ref_real + ref_imag * ref_imag ) * __ldg(&g_corr_img[pixel]); } __syncthreads(); } for(int j=(block_sz/2); j>0; j/=2) { if(tid __global__ void cuda_kernel_diff2_CC_fine( XFLOAT *g_eulers, XFLOAT *g_imgs_real, XFLOAT *g_imgs_imag, XFLOAT *g_trans_x, XFLOAT *g_trans_y, XFLOAT *g_trans_z, AccProjectorKernel projector, XFLOAT *g_corr_img, XFLOAT *g_diff2s, unsigned image_size, XFLOAT sum_init, XFLOAT exp_local_sqrtXi2, unsigned long orientation_num, unsigned long translation_num, unsigned long todo_blocks, unsigned long *d_rot_idx, unsigned long *d_trans_idx, unsigned long *d_job_idx, unsigned long *d_job_num ) { int bid = blockIdx.y * gridDim.x + blockIdx.x; int tid = threadIdx.x; // // Specialize BlockReduce for a 1D block of 128 threads on type XFLOAT // typedef cub::BlockReduce BlockReduce; // // Allocate shared memory for BlockReduce // __shared__ typename BlockReduce::TempStorage temp_storage; int pixel; XFLOAT ref_real, ref_imag, shifted_real, shifted_imag; __shared__ XFLOAT s[block_sz*chunk_sz]; //We MAY have to do up to chunk_sz translations in each block __shared__ XFLOAT s_cc[block_sz*chunk_sz]; __shared__ XFLOAT s_outs[chunk_sz]; if( bid < todo_blocks ) // we only need to make { unsigned trans_num = d_job_num[bid]; //how many transes we have for this rot for (int itrans=0; itrans projector.maxR) { if (z >= projector.imgZ - projector.maxR) z = z - projector.imgZ; else x = projector.maxR; } } else { x = pixel % projector.imgX; y = floorfracf( pixel , projector.imgX); } if (y > projector.maxR) { if (y >= projector.imgY - projector.maxR) y = y - projector.imgY; else x = projector.maxR; } if(DATA3D) projector.project3Dmodel( x,y,z, __ldg(&g_eulers[ix*9 ]), __ldg(&g_eulers[ix*9+1]), __ldg(&g_eulers[ix*9+2]), __ldg(&g_eulers[ix*9+3]), __ldg(&g_eulers[ix*9+4]), __ldg(&g_eulers[ix*9+5]), __ldg(&g_eulers[ix*9+6]), __ldg(&g_eulers[ix*9+7]), __ldg(&g_eulers[ix*9+8]), ref_real, ref_imag); else if(REF3D) projector.project3Dmodel( x,y, __ldg(&g_eulers[ix*9 ]), __ldg(&g_eulers[ix*9+1]), __ldg(&g_eulers[ix*9+3]), __ldg(&g_eulers[ix*9+4]), __ldg(&g_eulers[ix*9+6]), __ldg(&g_eulers[ix*9+7]), ref_real, ref_imag); else projector.project2Dmodel( x,y, __ldg(&g_eulers[ix*9 ]), __ldg(&g_eulers[ix*9+1]), __ldg(&g_eulers[ix*9+3]), __ldg(&g_eulers[ix*9+4]), ref_real, ref_imag); for (int itrans=0; itrans0; j/=2) { if(tid #include /// Needed explicit template instantiations template __global__ void cuda_kernel_make_eulers_2D(XFLOAT *, XFLOAT *, unsigned); template __global__ void cuda_kernel_make_eulers_2D(XFLOAT *, XFLOAT *, unsigned); template __global__ void cuda_kernel_make_eulers_3D(XFLOAT *, XFLOAT *, XFLOAT *, XFLOAT *, unsigned, XFLOAT *, XFLOAT *); template __global__ void cuda_kernel_make_eulers_3D(XFLOAT *, XFLOAT *, XFLOAT *, XFLOAT *, unsigned, XFLOAT *, XFLOAT *); template __global__ void cuda_kernel_make_eulers_3D(XFLOAT *, XFLOAT *, XFLOAT *, XFLOAT *, unsigned, XFLOAT *, XFLOAT *); template __global__ void cuda_kernel_make_eulers_3D(XFLOAT *, XFLOAT *, XFLOAT *, XFLOAT *, unsigned, XFLOAT *, XFLOAT *); template __global__ void cuda_kernel_make_eulers_3D(XFLOAT *, XFLOAT *, XFLOAT *, XFLOAT *, unsigned, XFLOAT *, XFLOAT *); template __global__ void cuda_kernel_make_eulers_3D(XFLOAT *, XFLOAT *, XFLOAT *, XFLOAT *, unsigned, XFLOAT *, XFLOAT *); template __global__ void cuda_kernel_make_eulers_3D(XFLOAT *, XFLOAT *, XFLOAT *, XFLOAT *, unsigned, XFLOAT *, XFLOAT *); template __global__ void cuda_kernel_make_eulers_3D(XFLOAT *, XFLOAT *, XFLOAT *, XFLOAT *, unsigned, XFLOAT *, XFLOAT *); /* * This draft of a kernel assumes input that has jobs which have a single orientation and sequential translations within each job. * */ __global__ void cuda_kernel_exponentiate_weights_fine( XFLOAT *g_pdf_orientation, bool *g_pdf_orientation_zeros, XFLOAT *g_pdf_offset, bool *g_pdf_offset_zeros, XFLOAT *g_weights, XFLOAT min_diff2, int oversamples_orient, int oversamples_trans, unsigned long *d_rot_id, unsigned long *d_trans_idx, unsigned long *d_job_idx, unsigned long *d_job_num, long int job_num) { // blockid int bid = blockIdx.x; //threadid int tid = threadIdx.x; long int jobid = bid*SUMW_BLOCK_SIZE+tid; if (jobid=xdim) y -= (xdim-1)*2; //assuming square input images (particles) int ires = rintf(sqrtf(x*x + y*y)); #if defined(ACC_DOUBLE_PRECISION) XFLOAT scale = 0.; if(ires=xdim) z -= (xdim-1)*2; //assuming square input images (particles) if(y>=xdim) y -= (xdim-1)*2; //assuming square input images (particles) int ires = rintf(sqrtf(x*x + y*y + z*z)); #if defined(ACC_DOUBLE_PRECISION) XFLOAT scale = 0.; if(ires radius_p) { partial_sum[tid] += (XFLOAT)1.0; partial_sum_bg[tid] += img_pixels[tid]; } else { #if defined(ACC_DOUBLE_PRECISION) raisedcos = 0.5 + 0.5 * cospi( (radius_p - r) / cosine_width ); #else raisedcos = 0.5f + 0.5f * cospif((radius_p - r) / cosine_width ); #endif partial_sum[tid] += raisedcos; partial_sum_bg[tid] += raisedcos * img_pixels[tid]; } } } } __syncthreads(); for(int j=(SOFTMASK_BLOCK_SIZE/2); j>0; j/=2) { if(tid radius_p) img_pixels[tid]=sum_bg_total; else { #if defined(ACC_DOUBLE_PRECISION) raisedcos = 0.5 + 0.5 * cospi( (radius_p - r) / cosine_width ); #else raisedcos = 0.5f + 0.5f * cospif((radius_p - r) / cosine_width ); #endif img_pixels[tid]= img_pixels[tid]*(1-raisedcos) + sum_bg_total*raisedcos; } vol[texel]=img_pixels[tid]; } } } __global__ void cuda_kernel_softMaskBackgroundValue( XFLOAT *vol, long int vol_size, long int xdim, long int ydim, long int zdim, long int xinit, long int yinit, long int zinit, XFLOAT radius, XFLOAT radius_p, XFLOAT cosine_width, XFLOAT *g_sum, XFLOAT *g_sum_bg) { int tid = threadIdx.x; int bid = blockIdx.x; // vol.setXmippOrigin(); // sets xinit=xdim , also for y z XFLOAT r, raisedcos; int x,y,z; __shared__ XFLOAT img_pixels[SOFTMASK_BLOCK_SIZE]; __shared__ XFLOAT partial_sum[SOFTMASK_BLOCK_SIZE]; __shared__ XFLOAT partial_sum_bg[SOFTMASK_BLOCK_SIZE]; long int texel_pass_num = ceilfracf(vol_size,SOFTMASK_BLOCK_SIZE*gridDim.x); int texel = bid*SOFTMASK_BLOCK_SIZE*texel_pass_num + tid; partial_sum[tid]=(XFLOAT)0.0; partial_sum_bg[tid]=(XFLOAT)0.0; for (int pass = 0; pass < texel_pass_num; pass++, texel+=SOFTMASK_BLOCK_SIZE) // loop the available warps enough to complete all translations for this orientation { if(texel radius_p) { partial_sum[tid] += (XFLOAT)1.0; partial_sum_bg[tid] += img_pixels[tid]; } else { #if defined(ACC_DOUBLE_PRECISION) raisedcos = 0.5 + 0.5 * cospi( (radius_p - r) / cosine_width ); #else raisedcos = 0.5f + 0.5f * cospif((radius_p - r) / cosine_width ); #endif partial_sum[tid] += raisedcos; partial_sum_bg[tid] += raisedcos * img_pixels[tid]; } } } cuda_atomic_add(&g_sum[tid] , partial_sum[tid]); cuda_atomic_add(&g_sum_bg[tid], partial_sum_bg[tid]); } __global__ void cuda_kernel_cosineFilter( XFLOAT *vol, long int vol_size, long int xdim, long int ydim, long int zdim, long int xinit, long int yinit, long int zinit, bool do_noise, XFLOAT *noise, XFLOAT radius, XFLOAT radius_p, XFLOAT cosine_width, XFLOAT bg_value) { int tid = threadIdx.x; int bid = blockIdx.x; // vol.setXmippOrigin(); // sets xinit=xdim , also for y z XFLOAT r, raisedcos, defVal; int x,y,z; __shared__ XFLOAT img_pixels[SOFTMASK_BLOCK_SIZE]; long int texel_pass_num = ceilfracf(vol_size,SOFTMASK_BLOCK_SIZE*gridDim.x); int texel = bid*SOFTMASK_BLOCK_SIZE*texel_pass_num + tid; defVal = bg_value; for (int pass = 0; pass < texel_pass_num; pass++, texel+=SOFTMASK_BLOCK_SIZE) // loop the available warps enough to complete all translations for this orientation { if(texel radius_p) img_pixels[tid]=defVal; else { #if defined(ACC_DOUBLE_PRECISION) raisedcos = 0.5 + 0.5 * cospi( (radius_p - r) / cosine_width ); #else raisedcos = 0.5f + 0.5f * cospif((radius_p - r) / cosine_width ); #endif img_pixels[tid]= img_pixels[tid]*(1-raisedcos) + defVal*raisedcos; } vol[texel]=img_pixels[tid]; } } } __global__ void cuda_kernel_probRatio( XFLOAT *d_Mccf, XFLOAT *d_Mpsi, XFLOAT *d_Maux, XFLOAT *d_Mmean, XFLOAT *d_Mstddev, int image_size, XFLOAT normfft, XFLOAT sum_ref_under_circ_mask, XFLOAT sum_ref2_under_circ_mask, XFLOAT expected_Pratio, int NpsiThisBatch, int startPsi, int totalPsis) { /* PLAN TO: * * 1) Pre-filter * d_Mstddev[i] = 1 / (2*d_Mstddev[i]) ( if d_Mstddev[pixel] > 1E-10 ) * d_Mstddev[i] = 1 ( else ) * * 2) Set * sum_ref2_under_circ_mask /= 2. * * 3) Total expression becomes * diff2 = ( exp(k) - 1.f ) / (expected_Pratio - 1.f) * where * k = (normfft * d_Maux[pixel] + d_Mmean[pixel] * sum_ref_under_circ_mask)*d_Mstddev[i] + sum_ref2_under_circ_mask * */ int pixel = threadIdx.x + blockIdx.x*(int)PROBRATIO_BLOCK_SIZE; if(pixel (XFLOAT)1E-10) diff2 *= d_Mstddev[pixel]; diff2 += sum_ref2_under_circ_mask; #if defined(ACC_DOUBLE_PRECISION) diff2 = exp(-diff2 / 2.); // exponentiate to reflect the Gaussian error model. sigma=1 after normalization, 0.4=1/sqrt(2pi) #else diff2 = expf(-diff2 / 2.f); #endif // Store fraction of (1 - probability-ratio) wrt (1 - expected Pratio) diff2 = (diff2 - (XFLOAT)1.0) / (expected_Pratio - (XFLOAT)1.0); if (diff2 > Kccf) { Kccf = diff2; Kpsi = (startPsi + psi)*(360/totalPsis); } } d_Mccf[pixel] = Kccf; if (Kpsi >= 0.) d_Mpsi[pixel] = Kpsi; } } __global__ void cuda_kernel_rotateOnly( ACCCOMPLEX *d_Faux, XFLOAT psi, AccProjectorKernel projector, int startPsi ) { int proj = blockIdx.y; int image_size=projector.imgX*projector.imgY; int pixel = threadIdx.x + blockIdx.x*blockDim.x; if(pixel projector.maxR) { if (y >= projector.imgY - projector.maxR) y = y - projector.imgY; else x = projector.maxR; } XFLOAT sa, ca; sincos((proj+startPsi)*psi, &sa, &ca); ACCCOMPLEX val; projector.project2Dmodel( x,y, ca, -sa, sa, ca, val.x,val.y); long int out_pixel = proj*image_size + pixel; d_Faux[out_pixel].x =val.x; d_Faux[out_pixel].y =val.y; } } __global__ void cuda_kernel_rotateAndCtf( ACCCOMPLEX *d_Faux, XFLOAT *d_ctf, XFLOAT psi, AccProjectorKernel projector, int startPsi ) { int proj = blockIdx.y; int image_size=projector.imgX*projector.imgY; int pixel = threadIdx.x + blockIdx.x*blockDim.x; if(pixel projector.maxR) { if (y >= projector.imgY - projector.maxR) y = y - projector.imgY; else x = projector.maxR; } XFLOAT sa, ca; sincos((proj+startPsi)*psi, &sa, &ca); ACCCOMPLEX val; projector.project2Dmodel( x,y, ca, -sa, sa, ca, val.x,val.y); long int out_pixel = proj*image_size + pixel; d_Faux[out_pixel].x =val.x*d_ctf[pixel]; d_Faux[out_pixel].y =val.y*d_ctf[pixel]; } } __global__ void cuda_kernel_convol_A( ACCCOMPLEX *d_A, ACCCOMPLEX *d_B, int image_size) { int pixel = threadIdx.x + blockIdx.x*blockDim.x; if(pixel 0) Mstddev[pixel] = sqrt(temp); else Mstddev[pixel] = 0; } } __global__ void cuda_kernel_square( XFLOAT *A, int image_size) { int pixel = threadIdx.x + blockIdx.x*blockDim.x; if(pixel __global__ void cuda_kernel_make_eulers_2D( XFLOAT *alphas, XFLOAT *eulers, unsigned orientation_num) { unsigned oid = blockIdx.x * blockDim.x + threadIdx.x; //Orientation id if (oid >= orientation_num) return; XFLOAT ca, sa; XFLOAT a = alphas[oid] * (XFLOAT)PI / (XFLOAT)180.0; #ifdef ACC_DOUBLE_PRECISION sincos(a, &sa, &ca); #else sincosf(a, &sa, &ca); #endif if(!invert) { eulers[9 * oid + 0] = ca;//00 eulers[9 * oid + 1] = sa;//01 eulers[9 * oid + 2] = 0 ;//02 eulers[9 * oid + 3] =-sa;//10 eulers[9 * oid + 4] = ca;//11 eulers[9 * oid + 5] = 0 ;//12 eulers[9 * oid + 6] = 0 ;//20 eulers[9 * oid + 7] = 0 ;//21 eulers[9 * oid + 8] = 1 ;//22 } else { eulers[9 * oid + 0] = ca;//00 eulers[9 * oid + 1] =-sa;//10 eulers[9 * oid + 2] = 0 ;//20 eulers[9 * oid + 3] = sa;//01 eulers[9 * oid + 4] = ca;//11 eulers[9 * oid + 5] = 0 ;//21 eulers[9 * oid + 6] = 0 ;//02 eulers[9 * oid + 7] = 0 ;//12 eulers[9 * oid + 8] = 1 ;//22 } } template __global__ void cuda_kernel_make_eulers_3D( XFLOAT *alphas, XFLOAT *betas, XFLOAT *gammas, XFLOAT *eulers, unsigned orientation_num, XFLOAT *L, XFLOAT *R) { XFLOAT a(0.f),b(0.f),g(0.f), A[9],B[9]; XFLOAT ca, sa, cb, sb, cg, sg, cc, cs, sc, ss; unsigned oid = blockIdx.x * blockDim.x + threadIdx.x; //Orientation id if (oid >= orientation_num) return; for (int i = 0; i < 9; i ++) B[i] = (XFLOAT) 0.f; a = alphas[oid] * (XFLOAT)PI / (XFLOAT)180.0; b = betas[oid] * (XFLOAT)PI / (XFLOAT)180.0; g = gammas[oid] * (XFLOAT)PI / (XFLOAT)180.0; #ifdef ACC_DOUBLE_PRECISION sincos(a, &sa, &ca); sincos(b, &sb, &cb); sincos(g, &sg, &cg); #else sincosf(a, &sa, &ca); sincosf(b, &sb, &cb); sincosf(g, &sg, &cg); #endif cc = cb * ca; cs = cb * sa; sc = sb * ca; ss = sb * sa; A[0] = ( cg * cc - sg * sa);//00 A[1] = ( cg * cs + sg * ca);//01 A[2] = (-cg * sb ) ;//02 A[3] = (-sg * cc - cg * sa);//10 A[4] = (-sg * cs + cg * ca);//11 A[5] = ( sg * sb ) ;//12 A[6] = ( sc ) ;//20 A[7] = ( ss ) ;//21 A[8] = ( cb ) ;//22 if (doR) { for (int i = 0; i < 9; i++) B[i] = 0.f; for (int i = 0; i < 3; i++) for (int j = 0; j < 3; j++) for (int k = 0; k < 3; k++) B[i * 3 + j] += A[i * 3 + k] * R[k * 3 + j]; } else for (int i = 0; i < 9; i++) B[i] = A[i]; if (doL) { if (doR) for (int i = 0; i < 9; i++) A[i] = B[i]; for (int i = 0; i < 9; i++) B[i] = 0.f; for (int i = 0; i < 3; i++) for (int j = 0; j < 3; j++) for (int k = 0; k < 3; k++) B[i * 3 + j] += L[i * 3 + k] * A[k * 3 + j]; } if(invert) { if (doL) // this could have anisotropy, so inverse neq transpose!!! { XFLOAT det; det = B[0] * (B[4] * B[8] - B[7] * B[5]) - B[1] * (B[3] * B[8] - B[6] * B[5]) + B[2] * (B[3] * B[7] - B[6] * B[4]); eulers[9 * oid + 0] = (B[4] * B[8] - B[7] * B[5]) / det; eulers[9 * oid + 1] = (B[7] * B[2] - B[1] * B[8]) / det; eulers[9 * oid + 2] = (B[1] * B[5] - B[4] * B[2]) / det; eulers[9 * oid + 3] = (B[5] * B[6] - B[8] * B[3]) / det; eulers[9 * oid + 4] = (B[8] * B[0] - B[2] * B[6]) / det; eulers[9 * oid + 5] = (B[2] * B[3] - B[5] * B[0]) / det; eulers[9 * oid + 6] = (B[3] * B[7] - B[6] * B[4]) / det; eulers[9 * oid + 7] = (B[6] * B[1] - B[0] * B[7]) / det; eulers[9 * oid + 8] = (B[0] * B[4] - B[3] * B[1]) / det; } else { eulers[9 * oid + 0] = B[0];//00 eulers[9 * oid + 1] = B[3];//01 eulers[9 * oid + 2] = B[6];//02 eulers[9 * oid + 3] = B[1];//10 eulers[9 * oid + 4] = B[4];//11 eulers[9 * oid + 5] = B[7];//12 eulers[9 * oid + 6] = B[2];//20 eulers[9 * oid + 7] = B[5];//21 eulers[9 * oid + 8] = B[8];//22 } } else { eulers[9 * oid + 0] = B[0];//00 eulers[9 * oid + 1] = B[1];//10 eulers[9 * oid + 2] = B[2];//20 eulers[9 * oid + 3] = B[3];//01 eulers[9 * oid + 4] = B[4];//11 eulers[9 * oid + 5] = B[5];//21 eulers[9 * oid + 6] = B[6];//02 eulers[9 * oid + 7] = B[7];//12 eulers[9 * oid + 8] = B[8];//22 } } __global__ void cuda_kernel_allweights_to_mweights( unsigned long * d_iorient, XFLOAT * d_allweights, XFLOAT * d_mweights, unsigned long orientation_num, unsigned long translation_num, int block_size ) { size_t idx = blockIdx.x * block_size + threadIdx.x; if (idx < orientation_num*translation_num) d_mweights[d_iorient[idx/translation_num] * translation_num + idx%translation_num] = d_allweights[idx/translation_num * translation_num + idx%translation_num]; // TODO - isn't this just d_allweights[idx + idx%translation_num]? Really? } __global__ void cuda_kernel_initOrientations(RFLOAT *pdfs, XFLOAT *pdf_orientation, bool *pdf_orientation_zeros, size_t sz) { int idx = blockIdx.x*blockDim.x + threadIdx.x; if(idx < sz){ pdf_orientation_zeros[idx] = (pdfs[idx] == 0); if (pdfs[idx] == 0) pdf_orientation[idx] = 0.f; else pdf_orientation[idx] = log(pdfs[idx]); } } __global__ void cuda_kernel_griddingCorrect(RFLOAT *vol, int interpolator, RFLOAT rrval, RFLOAT r_min_nn, size_t iX, size_t iY, size_t iZ) { int idx = blockIdx.x*blockDim.x + threadIdx.x; int idy = blockIdx.y*blockDim.y + threadIdx.y; int idz = blockIdx.z*blockDim.z + threadIdx.z; if(idx 0.) { RFLOAT rval = r / rrval; RFLOAT sinc = sin(PI * rval) / ( PI * rval); if (interpolator==NEAREST_NEIGHBOUR && r_min_nn == 0.) vol[idz*iX*iY + idy*iX + idx] /= sinc; else if (interpolator==TRILINEAR || (interpolator==NEAREST_NEIGHBOUR && r_min_nn > 0) ) vol[idz*iX*iY + idy*iX + idx] /= sinc * sinc; } } } __global__ void cuda_kernel_updatePowerSpectrum(RFLOAT *dcounter, RFLOAT *dpower_spectrum, int sz) { int idx = blockIdx.x*blockDim.x + threadIdx.x; if(idx #include #include #include #include #include #include "src/acc/cuda/cuda_settings.h" #include "src/acc/cuda/cuda_kernels/cuda_device_utils.cuh" #include "src/acc/acc_projector.h" #include "src/acc/acc_projectorkernel_impl.h" template __global__ void cuda_kernel_weights_exponent_coarse( T *g_pdf_orientation, bool *g_pdf_orientation_zeros, T *g_pdf_offset, bool *g_pdf_offset_zeros, T *g_weights, T g_min_diff2, int nr_coarse_orient, int nr_coarse_trans, int max_idx) { int bid = blockIdx.x; int tid = threadIdx.x; int idx = bid*SUMW_BLOCK_SIZE+tid; if(idx < max_idx) { int itrans = idx % nr_coarse_trans; int iorient = (idx - itrans) / nr_coarse_trans; T diff2 = g_weights[idx]; if( diff2 < g_min_diff2 || g_pdf_orientation_zeros[iorient] || g_pdf_offset_zeros[itrans]) g_weights[idx] = -99e99; //large negative number else g_weights[idx] = g_pdf_orientation[iorient] + g_pdf_offset[itrans] + g_min_diff2 - diff2; } } template __global__ void cuda_kernel_exponentiate( T *g_array, T add, size_t size) { int idx = threadIdx.x + blockIdx.x*BLOCK_SIZE; if(idx < size) { T a = g_array[idx] + add; #ifdef ACC_DOUBLE_PRECISION if (a < -700.) g_array[idx] = 0.f; else g_array[idx] = exp(a); #else if (a < -88.f) g_array[idx] = 0.; else g_array[idx] = expf(a); #endif } } template __global__ void cuda_kernel_collect2jobs( XFLOAT *g_oo_otrans_x, // otrans-size -> make const XFLOAT *g_oo_otrans_y, // otrans-size -> make const XFLOAT *g_oo_otrans_z, // otrans-size -> make const XFLOAT *g_myp_oo_otrans_x2y2z2, // otrans-size -> make const XFLOAT *g_i_weights, XFLOAT op_significant_weight, // TODO Put in const XFLOAT op_sum_weight, // TODO Put in const int coarse_trans, int oversamples_trans, int oversamples_orient, int oversamples, bool do_ignore_pdf_direction, XFLOAT *g_o_weights, XFLOAT *g_thr_wsum_prior_offsetx_class, XFLOAT *g_thr_wsum_prior_offsety_class, XFLOAT *g_thr_wsum_prior_offsetz_class, XFLOAT *g_thr_wsum_sigma2_offset, unsigned long *d_rot_idx, unsigned long *d_trans_idx, unsigned long *d_job_idx, unsigned long *d_job_num ) { // blockid int bid = blockIdx.x; //threadid int tid = threadIdx.x; extern __shared__ XFLOAT buffer[]; XFLOAT * s_o_weights = &buffer[ 0]; XFLOAT * s_thr_wsum_sigma2_offset = &buffer[ SUMW_BLOCK_SIZE]; XFLOAT * s_thr_wsum_prior_offsetx_class = &buffer[2*SUMW_BLOCK_SIZE]; XFLOAT * s_thr_wsum_prior_offsety_class = &buffer[3*SUMW_BLOCK_SIZE]; XFLOAT * s_thr_wsum_prior_offsetz_class(0); if(DATA3D) s_thr_wsum_prior_offsetz_class = &buffer[4*SUMW_BLOCK_SIZE]; s_o_weights[tid] = (XFLOAT)0.0; s_thr_wsum_sigma2_offset[tid] = (XFLOAT)0.0; s_thr_wsum_prior_offsetx_class[tid] = (XFLOAT)0.0; s_thr_wsum_prior_offsety_class[tid] = (XFLOAT)0.0; if(DATA3D) s_thr_wsum_prior_offsety_class[tid] = (XFLOAT)0.0; long int pos = d_job_idx[bid]; int job_size = d_job_num[bid]; pos += tid; // pos is updated to be thread-resolved int pass_num = ceilfracf(job_size,SUMW_BLOCK_SIZE); __syncthreads(); for (int pass = 0; pass < pass_num; pass++, pos+=SUMW_BLOCK_SIZE) // loop the available warps enough to complete all translations for this orientation { if ((pass*SUMW_BLOCK_SIZE+tid)= op_significant_weight ) //TODO Might be slow (divergent threads) weight /= op_sum_weight; else weight = (XFLOAT)0.0; s_o_weights[tid] += weight; s_thr_wsum_sigma2_offset[tid] += weight * g_myp_oo_otrans_x2y2z2[iy]; s_thr_wsum_prior_offsetx_class[tid] += weight * g_oo_otrans_x[iy]; s_thr_wsum_prior_offsety_class[tid] += weight * g_oo_otrans_y[iy]; if(DATA3D) s_thr_wsum_prior_offsetz_class[tid] += weight * g_oo_otrans_z[iy]; } } __syncthreads(); // Reduction of all treanslations this orientation for(int j=(SUMW_BLOCK_SIZE/2); j>0; j/=2) { if(tid __global__ void cuda_kernel_translate2D( T * g_image_in, T * g_image_out, int image_size, int xdim, int ydim, int dx, int dy) { int tid = threadIdx.x; int bid = blockIdx.x; int x,y,xp,yp; int pixel=tid + bid*BLOCK_SIZE; int new_pixel; if(pixel=0 && xp>=0 && yp=0 && new_pixel __global__ void cuda_kernel_translate3D( T * g_image_in, T * g_image_out, int image_size, int xdim, int ydim, int zdim, int dx, int dy, int dz) { int tid = threadIdx.x; int bid = blockIdx.x; int x,y,z,xp,yp,zp,xy; int voxel=tid + bid*BLOCK_SIZE; int new_voxel; int xydim = xdim*ydim; if(voxel=0 && yp>=0 && xp>=0 && zp=0 && new_voxel __global__ void cuda_kernel_multi( T *A, T *OUT, T S, int image_size) { int pixel = threadIdx.x + blockIdx.x*blockDim.x; if(pixel __global__ void cuda_kernel_multi( T *A, T S, int image_size) { int pixel = threadIdx.x + blockIdx.x*blockDim.x; if(pixel __global__ void cuda_kernel_multi( T *A, T *B, T *OUT, T S, int image_size) { int pixel = threadIdx.x + blockIdx.x*blockDim.x; if(pixel __global__ void cuda_kernel_cast( T1 *IN, T2 *OUT, int size) { int pixel = threadIdx.x + blockIdx.x*BLOCK_SIZE; if(pixel __global__ void cuda_kernel_frequencyPass( ACCCOMPLEX *A, long int ori_size, size_t Xdim, size_t Ydim, size_t Zdim, XFLOAT edge_low, XFLOAT edge_width, XFLOAT edge_high, XFLOAT angpix, int image_size) { int texel = threadIdx.x + blockIdx.x*BLOCK_SIZE; int z = texel / (Xdim*Ydim); int xy = (texel - z*Xdim*Ydim); int y = xy / Xdim; int xp = xy - y*Xdim; int zp = ( z lows are dead { A[texel].x = 0.; A[texel].y = 0.; } else if (res < edge_high) //highpass => medium lows are almost dead { XFLOAT mul = 0.5 - 0.5 * cos( PI * (res-edge_low)/edge_width); A[texel].x *= mul; A[texel].y *= mul; } } else //lowpass { if (res > edge_high) //lowpass => highs are dead { A[texel].x = 0.; A[texel].y = 0.; } else if (res > edge_low) //lowpass => medium highs are almost dead { XFLOAT mul = 0.5 + 0.5 * cos( PI * (res-edge_low)/edge_width); A[texel].x *= mul; A[texel].y *= mul; } } } } template __global__ void cuda_kernel_powerClass( ACCCOMPLEX * g_image, XFLOAT * g_spectrum, int image_size, int spectrum_size, int xdim, int ydim, int zdim, int res_limit, XFLOAT * g_highres_Xi2) { int tid = threadIdx.x; int bid = blockIdx.x; XFLOAT normFaux; __shared__ XFLOAT s_highres_Xi2[POWERCLASS_BLOCK_SIZE]; s_highres_Xi2[tid] = (XFLOAT)0.; int x,y,xy,d; int xydim = xdim*ydim; int voxel=tid + bid*POWERCLASS_BLOCK_SIZE; bool coords_in_range(true); if(voxel0.f) && (ires=res_limit) s_highres_Xi2[tid] = normFaux; } } // Reduce the higres_Xi2-values for all threads. (I tried a straight atomic-write: for 128 threads it was ~3x slower) __syncthreads(); for(int j=(POWERCLASS_BLOCK_SIZE/2); j>0.f; j/=2) { if(tid __global__ void cuda_kernel_make_eulers_2D( XFLOAT *alphas, XFLOAT *eulers, unsigned orientation_num); template __global__ void cuda_kernel_make_eulers_3D( XFLOAT *alphas, XFLOAT *betas, XFLOAT *gammas, XFLOAT *eulers, unsigned orientation_num, XFLOAT *L, XFLOAT *R); #define INIT_VALUE_BLOCK_SIZE 512 template< typename T> __global__ void cuda_kernel_init_complex_value( T *data, XFLOAT value, size_t size) { size_t idx = blockIdx.x * INIT_VALUE_BLOCK_SIZE + threadIdx.x; if (idx < size) { data[idx].x = value; data[idx].y = value; } } template< typename T> __global__ void cuda_kernel_init_value( T *data, T value, size_t size) { size_t idx = blockIdx.x * INIT_VALUE_BLOCK_SIZE + threadIdx.x; if (idx < size) data[idx] = value; } #define WEIGHT_MAP_BLOCK_SIZE 512 __global__ void cuda_kernel_allweights_to_mweights( unsigned long * d_iorient, XFLOAT * d_allweights, XFLOAT * d_mweights, unsigned long orientation_num, unsigned long translation_num, int block_size ); #define OVER_THRESHOLD_BLOCK_SIZE 512 template< typename T> __global__ void cuda_kernel_array_over_threshold( T *data, bool *passed, T threshold, size_t size) { size_t idx = blockIdx.x * OVER_THRESHOLD_BLOCK_SIZE + threadIdx.x; if (idx < size) { if (data[idx] >= threshold) passed[idx] = true; else passed[idx] = false; } } #define FIND_IN_CUMULATIVE_BLOCK_SIZE 512 template< typename T> __global__ void cuda_kernel_find_threshold_idx_in_cumulative( T *data, T threshold, size_t size_m1, //data size minus 1 size_t *idx) { size_t i = blockIdx.x * FIND_IN_CUMULATIVE_BLOCK_SIZE + threadIdx.x; if (i < size_m1 && data[i] <= threshold && threshold < data[i+1]) idx[0] = i+1; } #define WINDOW_FT_BLOCK_SIZE 128 template __global__ void cuda_kernel_window_fourier_transform( XFLOAT *g_in_real, XFLOAT *g_in_imag, XFLOAT *g_out_real, XFLOAT *g_out_imag, unsigned iX, unsigned iY, unsigned iZ, unsigned iYX, //Input dimensions unsigned oX, unsigned oY, unsigned oZ, unsigned oYX, //Output dimensions unsigned max_idx, unsigned max_r2 = 0 ) { unsigned n = threadIdx.x + WINDOW_FT_BLOCK_SIZE * blockIdx.x; long int image_offset = oX*oY*oZ*blockIdx.y; if (n >= max_idx) return; int k, i, kp, ip, jp; if (check_max_r2) { k = n / (iX * iY); i = (n % (iX * iY)) / iX; kp = k < iX ? k : k - iZ; ip = i < iX ? i : i - iY; jp = n % iX; if (kp*kp + ip*ip + jp*jp > max_r2) return; } else { k = n / (oX * oY); i = (n % (oX * oY)) / oX; kp = k < oX ? k : k - oZ; ip = i < oX ? i : i - oY; jp = n % oX; } g_out_real[(kp < 0 ? kp + oZ : kp) * oYX + (ip < 0 ? ip + oY : ip)*oX + jp + image_offset] = g_in_real[(kp < 0 ? kp + iZ : kp)*iYX + (ip < 0 ? ip + iY : ip)*iX + jp + image_offset]; g_out_imag[(kp < 0 ? kp + oZ : kp) * oYX + (ip < 0 ? ip + oY : ip)*oX + jp + image_offset] = g_in_imag[(kp < 0 ? kp + iZ : kp)*iYX + (ip < 0 ? ip + iY : ip)*iX + jp + image_offset]; } #define WINDOW_FT_BLOCK_SIZE 128 template __global__ void cuda_kernel_window_fourier_transform( ACCCOMPLEX *g_in, ACCCOMPLEX *g_out, size_t iX, size_t iY, size_t iZ, size_t iYX, //Input dimensions size_t oX, size_t oY, size_t oZ, size_t oYX, //Output dimensions size_t max_idx, size_t max_r2 = 0 ) { size_t n = threadIdx.x + WINDOW_FT_BLOCK_SIZE * blockIdx.x; size_t oOFF = oX*oY*oZ*blockIdx.y; size_t iOFF = iX*iY*iZ*blockIdx.y; if (n >= max_idx) return; long int k, i, kp, ip, jp; if (check_max_r2) { k = n / (iX * iY); i = (n % (iX * iY)) / iX; kp = k < iX ? k : k - iZ; ip = i < iX ? i : i - iY; jp = n % iX; if (kp*kp + ip*ip + jp*jp > max_r2) return; } else { k = n / (oX * oY); i = (n % (oX * oY)) / oX; kp = k < oX ? k : k - oZ; ip = i < oX ? i : i - oY; jp = n % oX; } long int in_idx = (kp < 0 ? kp + iZ : kp) * iYX + (ip < 0 ? ip + iY : ip)*iX + jp; long int out_idx = (kp < 0 ? kp + oZ : kp) * oYX + (ip < 0 ? ip + oY : ip)*oX + jp; g_out[out_idx + oOFF] = g_in[in_idx + iOFF]; } #define NEAREST_NEIGHBOUR 0 #define TRILINEAR 1 __global__ void cuda_kernel_griddingCorrect(RFLOAT *vol, int interpolator, RFLOAT rrval, RFLOAT r_min_nn, size_t iX, size_t iY, size_t iZ); template __global__ void cuda_kernel_window_transform( T *d_in, T *d_out, int iszX, int iszY, int iszZ, //Input dimensions int oftX, int oftY, int oftZ, int oszX, int oszY, int oszZ //Output dimensions ) { int idx = blockIdx.x*blockDim.x + threadIdx.x; int idy = blockIdx.y*blockDim.y + threadIdx.y; int idz = blockIdx.z*blockDim.z + threadIdx.z; if(idx < oszX && idy < oszY && idz =oftX) && (idx=oftY) && (idy=oftZ) && (idz __global__ void cuda_kernel_centerFFT_2D(T *img_in, int image_size, int xdim, int ydim, int xshift, int yshift) { int pixel = threadIdx.x + blockIdx.x*blockDim.x; long int image_offset = image_size*blockIdx.y; if(pixel<(image_size/2)) { int y = floorf((XFLOAT)pixel/(XFLOAT)xdim); int x = pixel % xdim; // also = pixel - y*xdim, but this depends on y having been calculated, i.e. serial evaluation int xp = (x + xshift + xdim)%xdim; int yp = (y + yshift + ydim)%ydim; int n_pixel = yp*xdim + xp; T buffer = img_in[image_offset + n_pixel]; img_in[image_offset + n_pixel] = img_in[image_offset + pixel]; img_in[image_offset + pixel] = buffer; } } template __global__ void cuda_kernel_centerFFT_2D(double*, int, int, int, int, int); template __global__ void cuda_kernel_centerFFT_2D(float*, int, int, int, int, int); template __global__ void cuda_kernel_centerFFT_3D(T *img_in, int image_size, int xdim, int ydim, int zdim, int xshift, int yshift, int zshift) { int pixel = threadIdx.x + blockIdx.x*blockDim.x; long int image_offset = image_size*blockIdx.y; int xydim = xdim*ydim; if(pixel<(image_size/2)) { int z = floorf((XFLOAT)pixel/(XFLOAT)(xydim)); int xy = pixel % xydim; int y = floorf((XFLOAT)xy/(XFLOAT)xdim); int x = xy % xdim; int xp = (x + xshift + xdim)%xdim; int yp = (y + yshift + ydim)%ydim; int zp = (z + zshift + zdim)%zdim; int n_pixel = zp*xydim + yp*xdim + xp; T buffer = img_in[image_offset + n_pixel]; img_in[image_offset + n_pixel] = img_in[image_offset + pixel]; img_in[image_offset + pixel] = buffer; } } template __global__ void cuda_kernel_centerFFT_3D(double*, int, int, int, int, int, int, int); template __global__ void cuda_kernel_centerFFT_3D(float*, int, int, int, int, int, int, int); template __global__ void cuda_kernel_centerFFTbySign(T *img_in, int xdim, int ydim, int zdim) { int x = threadIdx.x + blockIdx.x*blockDim.x; int y = threadIdx.y + blockIdx.y*blockDim.y; int z = threadIdx.z + blockIdx.z*blockDim.z; int pixel = z*xdim*ydim + y*xdim + x; if(x(double2*, int, int, int); template __global__ void cuda_kernel_centerFFTbySign(float2*, int, int, int); #if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 600 #else __device__ double atomicAdd(double* address, double val) { unsigned long long int* address_as_ull = (unsigned long long int*)address; unsigned long long int old = *address_as_ull, assumed; do { assumed = old; old = atomicCAS(address_as_ull, assumed, __double_as_longlong(val + __longlong_as_double(assumed))); } while (assumed != old); return __longlong_as_double(old); } #endif template __global__ void cuda_kernel_calcPowerSpectrum(T *dFaux, int padoridim, T *ddata, int data_sz, RFLOAT *dpower_spectrum, RFLOAT *dcounter, int max_r2, int min_r2, RFLOAT normfft, RFLOAT padding_factor, RFLOAT weight, RFLOAT *dfourier_mask, int fx, int fy, int fz, bool do_fourier_mask) { int idx = blockIdx.x*blockDim.x + threadIdx.x; int idy = blockIdx.y*blockDim.y + threadIdx.y; int idz = blockIdx.z*blockDim.z + threadIdx.z; int XSIZE = padoridim/2+1; int dx, dxy; dx = (data_sz/2+1); dxy = (blockDim.z != 1)? data_sz*dx:0; if(idx #include #include #include #include "src/acc/acc_projector.h" #include "src/acc/acc_projectorkernel_impl.h" #include "src/acc/cuda/cuda_settings.h" #include "src/acc/cuda/cuda_kernels/cuda_device_utils.cuh" template __global__ void cuda_kernel_wavg( XFLOAT *g_eulers, AccProjectorKernel projector, unsigned image_size, unsigned long orientation_num, XFLOAT *g_img_real, XFLOAT *g_img_imag, XFLOAT *g_trans_x, XFLOAT *g_trans_y, XFLOAT *g_trans_z, XFLOAT* g_weights, XFLOAT* g_ctfs, XFLOAT *g_wdiff2s_parts, XFLOAT *g_wdiff2s_AA, XFLOAT *g_wdiff2s_XA, unsigned long translation_num, XFLOAT weight_norm, XFLOAT significant_weight, XFLOAT part_scale) { XFLOAT ref_real, ref_imag, img_real, img_imag, trans_real, trans_imag; int bid = blockIdx.x; //block ID int tid = threadIdx.x; extern __shared__ XFLOAT buffer[]; unsigned pass_num(ceilfracf(image_size,block_sz)),pixel; XFLOAT * s_wdiff2s_parts = &buffer[0]; XFLOAT * s_sumXA = &buffer[block_sz]; XFLOAT * s_sumA2 = &buffer[2*block_sz]; XFLOAT * s_eulers = &buffer[3*block_sz]; if (tid < 9) s_eulers[tid] = g_eulers[bid*9+tid]; __syncthreads(); for (unsigned pass = 0; pass < pass_num; pass++) // finish a reference proj in each block { s_wdiff2s_parts[tid] = 0.0f; s_sumXA[tid] = 0.0f; s_sumA2[tid] = 0.0f; pixel = pass * block_sz + tid; if(pixel projector.maxR) { if (z >= projector.imgZ - projector.maxR) z = z - projector.imgZ; else x = projector.maxR; } } else { x = pixel % projector.imgX; y = floorfracf( pixel , projector.imgX); } if (y > projector.maxR) { if (y >= projector.imgY - projector.maxR) y = y - projector.imgY; else x = projector.maxR; } if(DATA3D) projector.project3Dmodel( x,y,z, s_eulers[0], s_eulers[1], s_eulers[2], s_eulers[3], s_eulers[4], s_eulers[5], s_eulers[6], s_eulers[7], s_eulers[8], ref_real, ref_imag); else if(REF3D) projector.project3Dmodel( x,y, s_eulers[0], s_eulers[1], s_eulers[3], s_eulers[4], s_eulers[6], s_eulers[7], ref_real, ref_imag); else projector.project2Dmodel( x,y, s_eulers[0], s_eulers[1], s_eulers[3], s_eulers[4], ref_real, ref_imag); if (REFCTF) { if(CTFPREMULTIPLIED) { ref_real *= __ldg(&g_ctfs[pixel]) * __ldg(&g_ctfs[pixel]); ref_imag *= __ldg(&g_ctfs[pixel]) * __ldg(&g_ctfs[pixel]); } else { ref_real *= __ldg(&g_ctfs[pixel]); ref_imag *= __ldg(&g_ctfs[pixel]); } } else { ref_real *= part_scale; ref_imag *= part_scale; } img_real = __ldg(&g_img_real[pixel]); img_imag = __ldg(&g_img_imag[pixel]); for (unsigned long itrans = 0; itrans < translation_num; itrans++) { XFLOAT weight = __ldg(&g_weights[bid * translation_num + itrans]); if (weight >= significant_weight) { weight /= weight_norm; if(DATA3D) translatePixel(x, y, z, g_trans_x[itrans], g_trans_y[itrans], g_trans_z[itrans], img_real, img_imag, trans_real, trans_imag); else translatePixel(x, y, g_trans_x[itrans], g_trans_y[itrans], img_real, img_imag, trans_real, trans_imag); XFLOAT diff_real = ref_real - trans_real; XFLOAT diff_imag = ref_imag - trans_imag; s_wdiff2s_parts[tid] += weight * (diff_real*diff_real + diff_imag*diff_imag); s_sumXA[tid] += weight * ( ref_real * trans_real + ref_imag * trans_imag); s_sumA2[tid] += weight * ( ref_real*ref_real + ref_imag*ref_imag ); } } cuda_atomic_add(&g_wdiff2s_XA[pixel], s_sumXA[tid]); cuda_atomic_add(&g_wdiff2s_AA[pixel], s_sumA2[tid]); cuda_atomic_add(&g_wdiff2s_parts[pixel], s_wdiff2s_parts[tid]); } } } #endif /* CUDA_WAVG_KERNEL_CUH_ */ relion-3.1.3/src/acc/cuda/cuda_mem_utils.h000066400000000000000000000042711411340063500204030ustar00rootroot00000000000000#ifndef CUDA_DEVICE_MEM_UTILS_H_ #define CUDA_DEVICE_MEM_UTILS_H_ #ifdef CUDA #include #include #include "src/acc/cuda/cuda_settings.h" #include "src/acc/cuda/custom_allocator.cuh" #endif #include #include #include #include #include #include #include #include #include "src/parallel.h" #include "src/complex.h" // Forward definition template class AccPtr; /** * Print cuda device memory info */ static void cudaPrintMemInfo() { size_t free; size_t total; DEBUG_HANDLE_ERROR(cudaMemGetInfo( &free, &total )); float free_hr(free/(1024.*1024.)); float total_hr(total/(1024.*1024.)); printf( "free %.2fMiB, total %.2fMiB, used %.2fMiB\n", free_hr, total_hr, total_hr - free_hr); } template< typename T> static inline void cudaCpyHostToDevice( T *h_ptr, T *d_ptr, size_t size) { DEBUG_HANDLE_ERROR(cudaMemcpy( d_ptr, h_ptr, size * sizeof(T), cudaMemcpyHostToDevice)); }; template< typename T> static inline void cudaCpyHostToDevice( T *h_ptr, T *d_ptr, size_t size, cudaStream_t &stream) { DEBUG_HANDLE_ERROR(cudaMemcpyAsync( d_ptr, h_ptr, size * sizeof(T), cudaMemcpyHostToDevice, stream)); }; template< typename T> static inline void cudaCpyDeviceToHost( T *d_ptr, T *h_ptr, size_t size) { DEBUG_HANDLE_ERROR(cudaMemcpy( h_ptr, d_ptr, size * sizeof(T), cudaMemcpyDeviceToHost)); }; template< typename T> static inline void cudaCpyDeviceToHost( T *d_ptr, T *h_ptr, size_t size, cudaStream_t &stream) { DEBUG_HANDLE_ERROR(cudaMemcpyAsync( h_ptr, d_ptr, size * sizeof(T), cudaMemcpyDeviceToHost, stream)); }; template< typename T> static inline void cudaCpyDeviceToDevice( T *src, T *des, size_t size, cudaStream_t stream) { DEBUG_HANDLE_ERROR(cudaMemcpyAsync( des, src, size * sizeof(T), cudaMemcpyDeviceToDevice, stream)); }; template< typename T> static inline void cudaMemInit( T *ptr, T value, size_t size) { DEBUG_HANDLE_ERROR(cudaMemset( ptr, value, size * sizeof(T))); }; template< typename T> static inline void cudaMemInit( T *ptr, T value, size_t size, cudaStream_t &stream) { DEBUG_HANDLE_ERROR(cudaMemsetAsync( ptr, value, size * sizeof(T), stream)); }; #endif relion-3.1.3/src/acc/cuda/cuda_ml_optimiser.cu000066400000000000000000000221641411340063500212710ustar00rootroot00000000000000#undef ALTCPU #include #include #include #include #include #include #include #include "src/ml_optimiser.h" #include #include #include #include "src/acc/acc_ptr.h" #include "src/acc/acc_projector.h" #include "src/acc/acc_backprojector.h" #include "src/acc/acc_projector_plan.h" #include "src/acc/cuda/cuda_benchmark_utils.h" #include "src/acc/cuda/cuda_kernels/helper.cuh" #include "src/acc/cuda/cuda_kernels/diff2.cuh" #include "src/acc/cuda/cuda_kernels/wavg.cuh" #include "src/acc/cuda/cuda_mem_utils.h" #include "src/acc/cuda/cuda_fft.h" #include "src/acc/data_types.h" #include "src/complex.h" #include "src/helix.h" #include "src/error.h" #include #include "src/parallel.h" #include #include #ifdef CUDA_FORCESTL #include "src/acc/cuda/cuda_utils_stl.cuh" #else #include "src/acc/cuda/cuda_utils_cub.cuh" #endif #include "src/acc/utilities.h" #include "src/acc/utilities_impl.h" #include "src/acc/acc_ml_optimiser.h" #include "src/acc/cuda/cuda_ml_optimiser.h" #include "src/acc/acc_helper_functions.h" #include "src/acc/acc_ml_optimiser_impl.h" // ------------------------------- Some explicit template instantiations template __global__ void CudaKernels::cuda_kernel_translate2D(XFLOAT *, XFLOAT*, int, int, int, int, int); template __global__ void CudaKernels::cuda_kernel_translate3D(XFLOAT *, XFLOAT *, int, int, int, int, int, int, int); template __global__ void cuda_kernel_multi( XFLOAT *, XFLOAT *, XFLOAT, int); template __global__ void CudaKernels::cuda_kernel_multi( XFLOAT *, XFLOAT, int); template __global__ void cuda_kernel_multi( XFLOAT *, XFLOAT *, XFLOAT *, XFLOAT, int); // ---------------------------------------------------------------------- // High-level CUDA objects size_t MlDeviceBundle::checkFixedSizedObjects(int shares) { int devCount; size_t BoxLimit; HANDLE_ERROR(cudaGetDeviceCount(&devCount)); if(device_id >= devCount) CRITICAL(ERR_GPUID); HANDLE_ERROR(cudaSetDevice(device_id)); size_t free(0), total(0); DEBUG_HANDLE_ERROR(cudaMemGetInfo( &free, &total )); float margin(1.05); BoxLimit = pow(free/(margin*2.5*sizeof(XFLOAT)*((float)shares)),(1/3.0)) / ((float) baseMLO->mymodel.padding_factor); //size_t BytesNeeded = ((float)shares)*margin*2.5*sizeof(XFLOAT)*pow((baseMLO->mymodel.ori_size*baseMLO->mymodel.padding_factor),3); return(BoxLimit); } void MlDeviceBundle::setupFixedSizedObjects() { int devCount; HANDLE_ERROR(cudaGetDeviceCount(&devCount)); if(device_id >= devCount) { //std::cerr << " using device_id=" << device_id << " (device no. " << device_id+1 << ") which is higher than the available number of devices=" << devCount << std::endl; CRITICAL(ERR_GPUID); } else HANDLE_ERROR(cudaSetDevice(device_id)); //Can we pre-generate projector plan and corresponding euler matrices for all particles if (baseMLO->do_skip_align || baseMLO->do_skip_rotate || baseMLO->do_auto_refine || baseMLO->mymodel.orientational_prior_mode != NOPRIOR) generateProjectionPlanOnTheFly = true; else generateProjectionPlanOnTheFly = false; unsigned nr_proj = baseMLO->mymodel.PPref.size(); unsigned nr_bproj = baseMLO->wsum_model.BPref.size(); projectors.resize(nr_proj); backprojectors.resize(nr_bproj); /*====================================================== PROJECTOR AND BACKPROJECTOR ======================================================*/ for (int imodel = 0; imodel < nr_proj; imodel++) { projectors[imodel].setMdlDim( baseMLO->mymodel.PPref[imodel].data.xdim, baseMLO->mymodel.PPref[imodel].data.ydim, baseMLO->mymodel.PPref[imodel].data.zdim, baseMLO->mymodel.PPref[imodel].data.yinit, baseMLO->mymodel.PPref[imodel].data.zinit, baseMLO->mymodel.PPref[imodel].r_max, baseMLO->mymodel.PPref[imodel].padding_factor); projectors[imodel].initMdl(baseMLO->mymodel.PPref[imodel].data.data); } for (int imodel = 0; imodel < nr_bproj; imodel++) { backprojectors[imodel].setMdlDim( baseMLO->wsum_model.BPref[imodel].data.xdim, baseMLO->wsum_model.BPref[imodel].data.ydim, baseMLO->wsum_model.BPref[imodel].data.zdim, baseMLO->wsum_model.BPref[imodel].data.yinit, baseMLO->wsum_model.BPref[imodel].data.zinit, baseMLO->wsum_model.BPref[imodel].r_max, baseMLO->wsum_model.BPref[imodel].padding_factor); backprojectors[imodel].initMdl(); } /*====================================================== CUSTOM ALLOCATOR ======================================================*/ int memAlignmentSize; cudaDeviceGetAttribute ( &memAlignmentSize, cudaDevAttrTextureAlignment, device_id ); allocator = new CudaCustomAllocator(0, memAlignmentSize); } void MlDeviceBundle::setupTunableSizedObjects(size_t allocationSize) { unsigned nr_models = baseMLO->mymodel.nr_classes; int devCount; HANDLE_ERROR(cudaGetDeviceCount(&devCount)); if(device_id >= devCount) { //std::cerr << " using device_id=" << device_id << " (device no. " << device_id+1 << ") which is higher than the available number of devices=" << devCount << std::endl; CRITICAL(ERR_GPUID); } else HANDLE_ERROR(cudaSetDevice(device_id)); /*====================================================== CUSTOM ALLOCATOR ======================================================*/ #ifdef DEBUG_CUDA printf("DEBUG: Total GPU allocation size set to %zu MB on device id %d.\n", allocationSize / (1000*1000), device_id); #endif #ifndef CUDA_NO_CUSTOM_ALLOCATION allocator->resize(allocationSize); #endif /*====================================================== PROJECTION PLAN ======================================================*/ coarseProjectionPlans.resize(nr_models, allocator); for (int iclass = 0; iclass < nr_models; iclass++) { //If doing predefined projector plan at all and is this class significant if (!generateProjectionPlanOnTheFly && baseMLO->mymodel.pdf_class[iclass] > 0.) { std::vector exp_pointer_dir_nonzeroprior; std::vector exp_pointer_psi_nonzeroprior; std::vector exp_directions_prior; std::vector exp_psi_prior; long unsigned itrans_max = baseMLO->sampling.NrTranslationalSamplings() - 1; long unsigned nr_idir = baseMLO->sampling.NrDirections(0, &exp_pointer_dir_nonzeroprior); long unsigned nr_ipsi = baseMLO->sampling.NrPsiSamplings(0, &exp_pointer_psi_nonzeroprior ); coarseProjectionPlans[iclass].setup( baseMLO->sampling, exp_directions_prior, exp_psi_prior, exp_pointer_dir_nonzeroprior, exp_pointer_psi_nonzeroprior, NULL, //Mcoarse_significant baseMLO->mymodel.pdf_class, baseMLO->mymodel.pdf_direction, nr_idir, nr_ipsi, 0, //idir_min nr_idir - 1, //idir_max 0, //ipsi_min nr_ipsi - 1, //ipsi_max 0, //itrans_min itrans_max, 0, //current_oversampling 1, //nr_oversampled_rot iclass, true, //coarse !IS_NOT_INV, baseMLO->do_skip_align, baseMLO->do_skip_rotate, baseMLO->mymodel.orientational_prior_mode ); } } }; void MlOptimiserCuda::resetData() { int devCount; HANDLE_ERROR(cudaGetDeviceCount(&devCount)); if(device_id >= devCount) { //std::cerr << " using device_id=" << device_id << " (device no. " << device_id+1 << ") which is higher than the available number of devices=" << devCount << std::endl; CRITICAL(ERR_GPUID); } else HANDLE_ERROR(cudaSetDevice(device_id)); unsigned nr_classes = baseMLO->mymodel.nr_classes; classStreams.resize(nr_classes, 0); for (int i = 0; i < nr_classes; i++) HANDLE_ERROR(cudaStreamCreate(&classStreams[i])); //HANDLE_ERROR(cudaStreamCreateWithFlags(&classStreams[i],cudaStreamNonBlocking)); transformer1.clear(); transformer2.clear(); }; void MlOptimiserCuda::doThreadExpectationSomeParticles(int thread_id) { #ifdef TIMING // Only time one thread if (thread_id == 0) baseMLO->timer.tic(baseMLO->TIMING_ESP_THR); #endif // CTOC(cudaMLO->timer,"interParticle"); int devCount; HANDLE_ERROR(cudaGetDeviceCount(&devCount)); if(device_id >= devCount) { //std::cerr << " using device_id=" << device_id << " (device no. " << device_id+1 << ") which is higher than the available number of devices=" << devCount << std::endl; CRITICAL(ERR_GPUID); } else DEBUG_HANDLE_ERROR(cudaSetDevice(device_id)); //std::cerr << " calling on device " << device_id << std::endl; //put mweight allocation here size_t first_ipart = 0, last_ipart = 0; while (baseMLO->exp_ipart_ThreadTaskDistributor->getTasks(first_ipart, last_ipart)) { CTIC(timer,"oneTask"); for (long unsigned ipart = first_ipart; ipart <= last_ipart; ipart++) { #ifdef TIMING // Only time one thread if (thread_id == 0) baseMLO->timer.tic(baseMLO->TIMING_ESP_DIFF2_A); #endif AccPtrFactory ptrFactory(allocator, cudaStreamPerThread); accDoExpectationOneParticle(this, baseMLO->exp_my_first_part_id + ipart, thread_id, ptrFactory); } CTOC(timer,"oneTask"); } // CTIC(cudaMLO->timer,"interParticle"); // exit(0); #ifdef TIMING // Only time one thread if (thread_id == 0) baseMLO->timer.toc(baseMLO->TIMING_ESP_THR); #endif } relion-3.1.3/src/acc/cuda/cuda_ml_optimiser.h000066400000000000000000000063361411340063500211140ustar00rootroot00000000000000#ifndef CUDA_ML_OPTIMISER_H_ #define CUDA_ML_OPTIMISER_H_ #include "src/mpi.h" #include "src/ml_optimiser.h" #include "src/acc/cuda/cuda_mem_utils.h" #include "src/acc/acc_projector_plan.h" #include "src/acc/acc_projector.h" #include "src/acc/acc_backprojector.h" #include "src/acc/cuda/cuda_fft.h" #include "src/acc/cuda/cuda_benchmark_utils.h" #include //#include #include "src/acc/acc_ml_optimiser.h" #include "src/acc/acc_ptr.h" class MlDeviceBundle { public: //The CUDA accelerated projector set std::vector< AccProjector > projectors; //The CUDA accelerated back-projector set std::vector< AccBackprojector > backprojectors; //Used for precalculations of projection setup CudaCustomAllocator *allocator; //Used for precalculations of projection setup bool generateProjectionPlanOnTheFly; std::vector< AccProjectorPlan > coarseProjectionPlans; MlOptimiser *baseMLO; int device_id; int rank_shared_count; bool haveWarnedRefinementMem; MlDeviceBundle(MlOptimiser *baseMLOptimiser): baseMLO(baseMLOptimiser), generateProjectionPlanOnTheFly(false), rank_shared_count(1), device_id(-1), haveWarnedRefinementMem(false), allocator(NULL) {}; void setDevice(int did) { device_id = did; } size_t checkFixedSizedObjects(int shares); void setupFixedSizedObjects(); void setupTunableSizedObjects(size_t allocationSize); void syncAllBackprojects() { DEBUG_HANDLE_ERROR(cudaDeviceSynchronize()); } ~MlDeviceBundle() { projectors.clear(); backprojectors.clear(); coarseProjectionPlans.clear(); //Delete this lastly delete allocator; HANDLE_ERROR(cudaSetDevice(device_id)); HANDLE_ERROR(cudaDeviceReset()); } }; class MlOptimiserCuda { public: // transformer as holder for reuse of fftw_plans FourierTransformer transformer; //Class streams ( for concurrent scheduling of class-specific kernels) std::vector< cudaStream_t > classStreams; cudaError_t errorStatus; CudaFFT transformer1; CudaFFT transformer2; MlOptimiser *baseMLO; bool refIs3D; bool dataIs3D; int device_id; MlDeviceBundle *bundle; //Used for precalculations of projection setup CudaCustomAllocator *allocator; //Used for precalculations of projection setup bool generateProjectionPlanOnTheFly; #ifdef TIMING_FILES relion_timer timer; #endif MlOptimiserCuda(MlOptimiser *baseMLOptimiser, MlDeviceBundle* bundle, const char * timing_fnm) : baseMLO(baseMLOptimiser), transformer1(cudaStreamPerThread, bundle->allocator, baseMLOptimiser->mymodel.data_dim), transformer2(cudaStreamPerThread, bundle->allocator, baseMLOptimiser->mymodel.data_dim), refIs3D(baseMLO->mymodel.ref_dim == 3), dataIs3D(baseMLO->mymodel.data_dim == 3), bundle(bundle), device_id(bundle->device_id), #ifdef TIMING_FILES timer(timing_fnm), #endif errorStatus((cudaError_t)0), allocator(bundle->allocator), generateProjectionPlanOnTheFly(bundle->generateProjectionPlanOnTheFly) {}; void resetData(); void doThreadExpectationSomeParticles(int thread_id); ~MlOptimiserCuda() { for (int i = 0; i < classStreams.size(); i++) if (classStreams[i] != NULL) HANDLE_ERROR(cudaStreamDestroy(classStreams[i])); } CudaCustomAllocator *getAllocator() { return (bundle->allocator); }; }; #endif relion-3.1.3/src/acc/cuda/cuda_projector.cu000066400000000000000000000001411411340063500205640ustar00rootroot00000000000000#include "src/acc/acc_projector.h" #include #include "src/acc/acc_projector_impl.h" relion-3.1.3/src/acc/cuda/cuda_projector_plan.cu000066400000000000000000000005161411340063500216040ustar00rootroot00000000000000#include "src/acc/acc_projector_plan.h" #include "src/time.h" #include #ifdef CUDA //#include #ifdef CUDA_FORCESTL #include "src/acc/cuda/cuda_utils_stl.cuh" #else #include "src/acc/cuda/cuda_utils_cub.cuh" #endif #endif #include "src/acc/utilities.h" #include "src/acc/acc_projector_plan_impl.h" relion-3.1.3/src/acc/cuda/cuda_settings.h000066400000000000000000000101051411340063500202360ustar00rootroot00000000000000#ifndef CUDA_SETTINGS_H_ #define CUDA_SETTINGS_H_ #include #include #include #include #include #include #include #include #include "src/macros.h" #include "src/error.h" #include // Required compute capability #define CUDA_CC_MAJOR 3 #define CUDA_CC_MINOR 5 #define LAUNCH_CHECK #define CUDA_BENCHMARK_OLD true // Error handling ---------------------- #ifdef LAUNCH_CHECK #define LAUNCH_HANDLE_ERROR( err ) (LaunchHandleError( err, __FILE__, __LINE__ )) #define LAUNCH_PRIVATE_ERROR(func, status) { \ (status) = (func); \ LAUNCH_HANDLE_ERROR(status); \ } #else #define LAUNCH_HANDLE_ERROR( err ) (err) //Do nothing #define LAUNCH_PRIVATE_ERROR( err ) (err) //Do nothing #endif #ifdef DEBUG_CUDA #define DEBUG_HANDLE_ERROR( err ) (HandleError( err, __FILE__, __LINE__ )) #define DEBUG_PRIVATE_ERROR(func, status) { \ (status) = (func); \ DEBUG_HANDLE_ERROR(status); \ } #else #define DEBUG_HANDLE_ERROR( err ) (err) //Do nothing #define DEBUG_PRIVATE_ERROR( err ) (err) //Do nothing #endif #define HANDLE_ERROR( err ) (HandleError( err, __FILE__, __LINE__ )) #define PRIVATE_ERROR(func, status) { \ (status) = (func); \ HANDLE_ERROR(status); \ } static void HandleError( cudaError_t err, const char *file, int line ) { if (err != cudaSuccess) { fprintf(stderr, "ERROR: %s in %s at line %d (error-code %d)\n", cudaGetErrorString( err ), file, line, err ); fflush(stdout); #ifdef DEBUG_CUDA raise(SIGSEGV); #else CRITICAL(ERRGPUKERN); #endif } } #ifdef LAUNCH_CHECK static void LaunchHandleError( cudaError_t err, const char *file, int line ) { if (err != cudaSuccess) { printf( "KERNEL_ERROR: %s in %s at line %d (error-code %d)\n", cudaGetErrorString( err ), file, line, err ); fflush(stdout); CRITICAL(ERRGPUKERN); } } #endif // GENERAL ----------------------------- #define MAX_RESOL_SHARED_MEM 32 #define BLOCK_SIZE 128 // ------------------------------------- // COARSE DIFF ------------------------- #define D2C_BLOCK_SIZE_2D 512 #define D2C_EULERS_PER_BLOCK_2D 4 #define D2C_BLOCK_SIZE_REF3D 128 #define D2C_EULERS_PER_BLOCK_REF3D 16 #define D2C_BLOCK_SIZE_DATA3D 64 #define D2C_EULERS_PER_BLOCK_DATA3D 32 // ------------------------------------- // FINE DIFF --------------------------- #define D2F_BLOCK_SIZE_2D 256 #define D2F_CHUNK_2D 7 #define D2F_BLOCK_SIZE_REF3D 256 #define D2F_CHUNK_REF3D 7 #define D2F_BLOCK_SIZE_DATA3D 512 #define D2F_CHUNK_DATA3D 4 // ------------------------------------- // WAVG -------------------------------- #define WAVG_BLOCK_SIZE_DATA3D 512 #define WAVG_BLOCK_SIZE 256 // ------------------------------------- // MISC -------------------------------- #define SUMW_BLOCK_SIZE 32 #define SOFTMASK_BLOCK_SIZE 128 #define CFTT_BLOCK_SIZE 128 #define PROBRATIO_BLOCK_SIZE 128 #define POWERCLASS_BLOCK_SIZE 128 #define PROJDIFF_CHUNK_SIZE 14 // ------------------------------------- // RANDOMIZATION ----------------------- #define RND_BLOCK_NUM 64 #define RND_BLOCK_SIZE 32 // ------------------------------------- #define BACKPROJECTION4_BLOCK_SIZE 64 #define BACKPROJECTION4_GROUP_SIZE 16 #define BACKPROJECTION4_PREFETCH_COUNT 3 #define BP_2D_BLOCK_SIZE 128 #define BP_REF3D_BLOCK_SIZE 128 #define BP_DATA3D_BLOCK_SIZE 640 #define REF_GROUP_SIZE 3 // -- Number of references to be treated per block -- // This applies to wavg and reduces global memory // accesses roughly proportionally, but scales shared // memory usage by allocating // ( 6*REF_GROUP_SIZE + 4 ) * BLOCK_SIZE XFLOATS. // DEPRECATED #define NR_CLASS_MUTEXES 5 //The approximate minimum amount of memory each process occupies on a device (in MBs) #define GPU_THREAD_MEMORY_OVERHEAD_MB 200 #endif /* CUDA_SETTINGS_H_ */ relion-3.1.3/src/acc/cuda/cuda_utils_cub.cuh000066400000000000000000000255541411340063500207350ustar00rootroot00000000000000#ifndef CUDA_UTILS_CUB_CUH_ #define CUDA_UTILS_CUB_CUH_ #include #include "src/acc/cuda/cuda_settings.h" #include "src/acc/cuda/cuda_mem_utils.h" #include #include #include // Because thrust uses CUB, thrust defines CubLog and CUB tries to redefine it, // resulting in warnings. This avoids those warnings. #if(defined(CubLog) && defined(__CUDA_ARCH__) && (__CUDA_ARCH__<= 520)) // Intetionally force a warning for new arch #undef CubLog #endif #define CUB_NS_QUALIFIER ::cub # for compatibility with CUDA 11.5 #include "src/acc/cuda/cub/device/device_radix_sort.cuh" #include "src/acc/cuda/cub/device/device_reduce.cuh" #include "src/acc/cuda/cub/device/device_scan.cuh" #include "src/acc/cuda/cub/device/device_select.cuh" namespace CudaKernels { template static std::pair getArgMaxOnDevice(AccPtr &ptr) { #ifdef DEBUG_CUDA if (ptr.getSize() == 0) printf("DEBUG_WARNING: getArgMaxOnDevice called with pointer of zero size.\n"); if (ptr.getDevicePtr() == NULL) printf("DEBUG_WARNING: getArgMaxOnDevice called with null device pointer.\n"); if (ptr.getAllocator() == NULL) printf("DEBUG_WARNING: getArgMaxOnDevice called with null allocator.\n"); #endif AccPtr > max_pair(1, ptr.getStream(), ptr.getAllocator()); max_pair.deviceAlloc(); size_t temp_storage_size = 0; DEBUG_HANDLE_ERROR(cub::DeviceReduce::ArgMax( NULL, temp_storage_size, ~ptr, ~max_pair, ptr.getSize())); if(temp_storage_size==0) temp_storage_size=1; CudaCustomAllocator::Alloc* alloc = ptr.getAllocator()->alloc(temp_storage_size); DEBUG_HANDLE_ERROR(cub::DeviceReduce::ArgMax( alloc->getPtr(), temp_storage_size, ~ptr, ~max_pair, ptr.getSize(), ptr.getStream())); max_pair.cpToHost(); ptr.streamSync(); ptr.getAllocator()->free(alloc); std::pair pair; pair.first = max_pair[0].key; pair.second = max_pair[0].value; return pair; } template static std::pair getArgMinOnDevice(AccPtr &ptr) { #ifdef DEBUG_CUDA if (ptr.getSize() == 0) printf("DEBUG_WARNING: getArgMinOnDevice called with pointer of zero size.\n"); if (ptr.getDevicePtr() == NULL) printf("DEBUG_WARNING: getArgMinOnDevice called with null device pointer.\n"); if (ptr.getAllocator() == NULL) printf("DEBUG_WARNING: getArgMinOnDevice called with null allocator.\n"); #endif AccPtr > min_pair(1, ptr.getStream(), ptr.getAllocator()); min_pair.deviceAlloc(); size_t temp_storage_size = 0; DEBUG_HANDLE_ERROR(cub::DeviceReduce::ArgMin( NULL, temp_storage_size, ~ptr, ~min_pair, ptr.getSize())); if(temp_storage_size==0) temp_storage_size=1; CudaCustomAllocator::Alloc* alloc = ptr.getAllocator()->alloc(temp_storage_size); DEBUG_HANDLE_ERROR(cub::DeviceReduce::ArgMin( alloc->getPtr(), temp_storage_size, ~ptr, ~min_pair, ptr.getSize(), ptr.getStream())); min_pair.cpToHost(); ptr.streamSync(); ptr.getAllocator()->free(alloc); std::pair pair; pair.first = min_pair[0].key; pair.second = min_pair[0].value; return pair; } template static T getMaxOnDevice(AccPtr &ptr) { #ifdef DEBUG_CUDA if (ptr.getSize() == 0) printf("DEBUG_ERROR: getMaxOnDevice called with pointer of zero size.\n"); if (ptr.getDevicePtr() == NULL) printf("DEBUG_ERROR: getMaxOnDevice called with null device pointer.\n"); if (ptr.getAllocator() == NULL) printf("DEBUG_ERROR: getMaxOnDevice called with null allocator.\n"); #endif AccPtr max_val(1, ptr.getStream(), ptr.getAllocator()); max_val.deviceAlloc(); size_t temp_storage_size = 0; DEBUG_HANDLE_ERROR(cub::DeviceReduce::Max( NULL, temp_storage_size, ~ptr, ~max_val, ptr.getSize())); if(temp_storage_size==0) temp_storage_size=1; CudaCustomAllocator::Alloc* alloc = ptr.getAllocator()->alloc(temp_storage_size); DEBUG_HANDLE_ERROR(cub::DeviceReduce::Max( alloc->getPtr(), temp_storage_size, ~ptr, ~max_val, ptr.getSize(), ptr.getStream())); max_val.cpToHost(); ptr.streamSync(); ptr.getAllocator()->free(alloc); return max_val[0]; } template static T getMinOnDevice(AccPtr &ptr) { #ifdef DEBUG_CUDA if (ptr.getSize() == 0) printf("DEBUG_ERROR: getMinOnDevice called with pointer of zero size.\n"); if (ptr.getDevicePtr() == NULL) printf("DEBUG_ERROR: getMinOnDevice called with null device pointer.\n"); if (ptr.getAllocator() == NULL) printf("DEBUG_ERROR: getMinOnDevice called with null allocator.\n"); #endif AccPtr min_val(1, ptr.getStream(), ptr.getAllocator()); min_val.deviceAlloc(); size_t temp_storage_size = 0; DEBUG_HANDLE_ERROR(cub::DeviceReduce::Min( NULL, temp_storage_size, ~ptr, ~min_val, ptr.getSize())); if(temp_storage_size==0) temp_storage_size=1; CudaCustomAllocator::Alloc* alloc = ptr.getAllocator()->alloc(temp_storage_size); DEBUG_HANDLE_ERROR(cub::DeviceReduce::Min( alloc->getPtr(), temp_storage_size, ~ptr, ~min_val, ptr.getSize(), ptr.getStream())); min_val.cpToHost(); ptr.streamSync(); ptr.getAllocator()->free(alloc); return min_val[0]; } template static T getSumOnDevice(AccPtr &ptr) { #ifdef DEBUG_CUDA if (ptr.getSize() == 0) printf("DEBUG_ERROR: getSumOnDevice called with pointer of zero size.\n"); if (ptr.getDevicePtr() == NULL) printf("DEBUG_ERROR: getSumOnDevice called with null device pointer.\n"); if (ptr.getAllocator() == NULL) printf("DEBUG_ERROR: getSumOnDevice called with null allocator.\n"); #endif AccPtr val(1, ptr.getStream(), ptr.getAllocator()); val.deviceAlloc(); size_t temp_storage_size = 0; DEBUG_HANDLE_ERROR(cub::DeviceReduce::Sum( NULL, temp_storage_size, ~ptr, ~val, ptr.getSize())); if(temp_storage_size==0) temp_storage_size=1; CudaCustomAllocator::Alloc* alloc = ptr.getAllocator()->alloc(temp_storage_size); DEBUG_HANDLE_ERROR(cub::DeviceReduce::Sum( alloc->getPtr(), temp_storage_size, ~ptr, ~val, ptr.getSize(), ptr.getStream())); val.cpToHost(); ptr.streamSync(); ptr.getAllocator()->free(alloc); return val[0]; } template static void sortOnDevice(AccPtr &in, AccPtr &out) { #ifdef DEBUG_CUDA if (in.getSize() == 0 || out.getSize() == 0) printf("DEBUG_ERROR: sortOnDevice called with pointer of zero size.\n"); if (in.getDevicePtr() == NULL || out.getDevicePtr() == NULL) printf("DEBUG_ERROR: sortOnDevice called with null device pointer.\n"); if (in.getAllocator() == NULL) printf("DEBUG_ERROR: sortOnDevice called with null allocator.\n"); #endif size_t temp_storage_size = 0; cudaStream_t stream = in.getStream(); DEBUG_HANDLE_ERROR(cub::DeviceRadixSort::SortKeys( NULL, temp_storage_size, ~in, ~out, in.getSize())); if(temp_storage_size==0) temp_storage_size=1; CudaCustomAllocator::Alloc* alloc = in.getAllocator()->alloc(temp_storage_size); DEBUG_HANDLE_ERROR(cub::DeviceRadixSort::SortKeys( alloc->getPtr(), temp_storage_size, ~in, ~out, in.getSize(), 0, sizeof(T) * 8, stream)); alloc->markReadyEvent(stream); alloc->doFreeWhenReady(); } template static void sortDescendingOnDevice(AccPtr &in, AccPtr &out) { #ifdef DEBUG_CUDA if (in.getSize() == 0 || out.getSize() == 0) printf("DEBUG_ERROR: sortDescendingOnDevice called with pointer of zero size.\n"); if (in.getDevicePtr() == NULL || out.getDevicePtr() == NULL) printf("DEBUG_ERROR: sortDescendingOnDevice called with null device pointer.\n"); if (in.getAllocator() == NULL) printf("DEBUG_ERROR: sortDescendingOnDevice called with null allocator.\n"); #endif size_t temp_storage_size = 0; cudaStream_t stream = in.getStream(); DEBUG_HANDLE_ERROR(cub::DeviceRadixSort::SortKeysDescending( NULL, temp_storage_size, ~in, ~out, in.getSize())); if(temp_storage_size==0) temp_storage_size=1; CudaCustomAllocator::Alloc* alloc = in.getAllocator()->alloc(temp_storage_size); DEBUG_HANDLE_ERROR(cub::DeviceRadixSort::SortKeysDescending( alloc->getPtr(), temp_storage_size, ~in, ~out, in.getSize(), 0, sizeof(T) * 8, stream)); alloc->markReadyEvent(stream); alloc->doFreeWhenReady(); } class AllocatorThrustWrapper { public: // just allocate bytes typedef char value_type; std::vector allocs; CudaCustomAllocator *allocator; AllocatorThrustWrapper(CudaCustomAllocator *allocator): allocator(allocator) {} ~AllocatorThrustWrapper() { for (int i = 0; i < allocs.size(); i ++) allocator->free(allocs[i]); } char* allocate(std::ptrdiff_t num_bytes) { CudaCustomAllocator::Alloc* alloc = allocator->alloc(num_bytes); allocs.push_back(alloc); return (char*) alloc->getPtr(); } void deallocate(char* ptr, size_t n) { //TODO fix this (works fine without it though) /Dari } }; template struct MoreThanCubOpt { T compare; MoreThanCubOpt(T compare) : compare(compare) {} __device__ __forceinline__ bool operator()(const T &a) const { return (a > compare); } }; template static int filterOnDevice(AccPtr &in, AccPtr &out, SelectOp select_op) { #ifdef DEBUG_CUDA if (in.getSize() == 0 || out.getSize() == 0) printf("DEBUG_ERROR: filterOnDevice called with pointer of zero size.\n"); if (in.getDevicePtr() == NULL || out.getDevicePtr() == NULL) printf("DEBUG_ERROR: filterOnDevice called with null device pointer.\n"); if (in.getAllocator() == NULL) printf("DEBUG_ERROR: filterOnDevice called with null allocator.\n"); #endif size_t temp_storage_size = 0; cudaStream_t stream = in.getStream(); AccPtr num_selected_out(1, stream, in.getAllocator()); num_selected_out.deviceAlloc(); DEBUG_HANDLE_ERROR(cub::DeviceSelect::If(NULL, temp_storage_size, ~in, ~out, ~num_selected_out, in.getSize(), select_op, stream)); if(temp_storage_size==0) temp_storage_size=1; CudaCustomAllocator::Alloc* alloc = in.getAllocator()->alloc(temp_storage_size); DEBUG_HANDLE_ERROR(cub::DeviceSelect::If(alloc->getPtr(), temp_storage_size, ~in, ~out, ~num_selected_out, in.getSize(), select_op, stream)); num_selected_out.cpToHost(); DEBUG_HANDLE_ERROR(cudaStreamSynchronize(stream)); in.getAllocator()->free(alloc); return num_selected_out[0]; } template static void scanOnDevice(AccPtr &in, AccPtr &out) { #ifdef DEBUG_CUDA if (in.getSize() == 0 || out.getSize() == 0) printf("DEBUG_ERROR: scanOnDevice called with pointer of zero size.\n"); if (in.getDevicePtr() == NULL || out.getDevicePtr() == NULL) printf("DEBUG_ERROR: scanOnDevice called with null device pointer.\n"); if (in.getAllocator() == NULL) printf("DEBUG_ERROR: scanOnDevice called with null allocator.\n"); #endif size_t temp_storage_size = 0; cudaStream_t stream = in.getStream(); DEBUG_HANDLE_ERROR(cub::DeviceScan::InclusiveSum( NULL, temp_storage_size, ~in, ~out, in.getSize())); if(temp_storage_size==0) temp_storage_size=1; CudaCustomAllocator::Alloc* alloc = in.getAllocator()->alloc(temp_storage_size); DEBUG_HANDLE_ERROR(cub::DeviceScan::InclusiveSum( alloc->getPtr(), temp_storage_size, ~in, ~out, in.getSize(), stream)); alloc->markReadyEvent(stream); alloc->doFreeWhenReady(); } } // namespace CudaKernels #endif relion-3.1.3/src/acc/cuda/custom_allocator.cuh000066400000000000000000000313311411340063500213100ustar00rootroot00000000000000#ifndef CUDA_CUSTOM_ALLOCATOR_CUH_ #define CUDA_CUSTOM_ALLOCATOR_CUH_ // This is where custom allocator should be. Commented out for now, to avoid double declaration. #ifdef CUDA #include "src/acc/cuda/cuda_settings.h" #include #endif #include #include #include #include #include #include #include #include #include "src/macros.h" #include "src/error.h" #include "src/parallel.h" #ifdef CUSTOM_ALLOCATOR_MEMGUARD #include #include #endif #ifdef DUMP_CUSTOM_ALLOCATOR_ACTIVITY #define CUSTOM_ALLOCATOR_REGION_NAME( name ) (fprintf(stderr, "\n%s", name)) #else #define CUSTOM_ALLOCATOR_REGION_NAME( name ) //Do nothing #endif class CudaCustomAllocator { typedef unsigned char BYTE; const static unsigned GUARD_SIZE = 4; const static BYTE GUARD_VALUE = 145; const static int ALLOC_RETRY = 500; public: class Alloc { friend class CudaCustomAllocator; private: Alloc *prev, *next; BYTE *ptr; size_t size; bool free; cudaEvent_t readyEvent; //Event record used for auto free bool freeWhenReady; #ifdef CUSTOM_ALLOCATOR_MEMGUARD BYTE *guardPtr; void *backtrace[20]; size_t backtraceSize; #endif Alloc(): prev(NULL), next(NULL), ptr(NULL), size(0), free(0), readyEvent(0), freeWhenReady(false) {} ~Alloc() { prev = NULL; next = NULL; ptr = NULL; if (readyEvent != 0) DEBUG_HANDLE_ERROR(cudaEventDestroy(readyEvent)); } public: inline BYTE *getPtr() { return ptr; } inline size_t getSize() { return size; } inline bool isFree() { return free; } inline cudaEvent_t getReadyEvent() { return readyEvent; } inline void markReadyEvent(cudaStream_t stream = 0) { //TODO add a debug warning if event already set DEBUG_HANDLE_ERROR(cudaEventCreate(&readyEvent)); DEBUG_HANDLE_ERROR(cudaEventRecord(readyEvent, stream)); } inline void doFreeWhenReady() { freeWhenReady = true; } }; private: Alloc *first; size_t totalSize; size_t alignmentSize; bool cache; pthread_mutex_t mutex; //Look for the first suited space Alloc *_getFirstSuitedFree(size_t size) { Alloc *a = first; //If not the last and too small or not free go to next allocation region while (a != NULL && ( a->size <= size || ! a->free ) ) a = a->next; return a; } //Free allocs with recorded ready events bool _syncReadyEvents() { bool somethingReady(false); Alloc *a = first; while (a != NULL) { if (! a->free && a->freeWhenReady && a->readyEvent != 0) { DEBUG_HANDLE_ERROR(cudaEventSynchronize(a->readyEvent)); somethingReady = true; } a = a->next; } return somethingReady; } //Free allocs with recorded ready events bool _freeReadyAllocs() { bool somethingFreed(false); Alloc *next = first; Alloc *curr; while (next != NULL) { curr = next; next = curr->next; if (! curr->free && curr->freeWhenReady && curr->readyEvent != 0) { cudaError_t e = cudaEventQuery(curr->readyEvent); if (e == cudaSuccess) { _free(curr); next = first; //List modified, restart somethingFreed = true; } else if (e != cudaErrorNotReady) { _printState(); HandleError( e, __FILE__, __LINE__ ); } } } return somethingFreed; } size_t _getTotalFreeSpace() { if (cache) { size_t total = 0; Alloc *a = first; while (a != NULL) { if (a->free) total += a->size; a = a->next; } return total; } else { size_t free, total; DEBUG_HANDLE_ERROR(cudaMemGetInfo( &free, &total )); return free; } } size_t _getTotalUsedSpace() { size_t total = 0; Alloc *a = first; while (a != NULL) { if (!a->free) total += a->size; a = a->next; } return total; } size_t _getNumberOfAllocs() { size_t total = 0; Alloc *a = first; while (a != NULL) { if (!a->free) total ++; a = a->next; } return total; } size_t _getLargestContinuousFreeSpace() { if (cache) { size_t largest = 0; Alloc *a = first; while (a != NULL) { if (a->free && a->size > largest) largest = a->size; a = a->next; } return largest; } else return _getTotalFreeSpace(); } void _printState() { size_t total = 0; Alloc *a = first; while (a != NULL) { total += a->size; if (a->free) printf("[%luB] ", (unsigned long) a->size); else if (a->freeWhenReady) printf("<%luB> ", (unsigned long) a->size); else printf("(%luB) ", (unsigned long) a->size); a = a->next; } printf("= %luB\n", (unsigned long) total); fflush(stdout); } void _free(Alloc* a) { // printf("free: %u ", a->size); // _printState(); #ifdef CUSTOM_ALLOCATOR_MEMGUARD size_t guardCount = a->size - (a->guardPtr - a->ptr); BYTE *guards = new BYTE[guardCount]; cudaStream_t stream = 0; CudaShortcuts::cpyDeviceToHost( a->guardPtr, guards, guardCount, stream); DEBUG_HANDLE_ERROR(cudaStreamSynchronize(stream)); for (int i = 0; i < guardCount; i ++) if (guards[i] != GUARD_VALUE) { fprintf (stderr, "ERROR: CORRUPTED BYTE GUARDS DETECTED\n"); char ** messages = backtrace_symbols(a->backtrace, a->backtraceSize); // skip first stack frame (points here) for (int i = 1; i < a->backtraceSize && messages != NULL; ++i) { char *mangled_name = 0, *offset_begin = 0, *offset_end = 0; // find parantheses and +address offset surrounding mangled name for (char *p = messages[i]; *p; ++p) { if (*p == '(') { mangled_name = p; } else if (*p == '+') { offset_begin = p; } else if (*p == ')') { offset_end = p; break; } } // if the line could be processed, attempt to demangle the symbol if (mangled_name && offset_begin && offset_end && mangled_name < offset_begin) { *mangled_name++ = '\0'; *offset_begin++ = '\0'; *offset_end++ = '\0'; int status; char * real_name = abi::__cxa_demangle(mangled_name, 0, 0, &status); // if demangling is successful, output the demangled function name if (status == 0) { std::cerr << "[bt]: (" << i << ") " << messages[i] << " : " << real_name << "+" << offset_begin << offset_end << std::endl; } // otherwise, output the mangled function name else { std::cerr << "[bt]: (" << i << ") " << messages[i] << " : " << mangled_name << "+" << offset_begin << offset_end << std::endl; } // free(real_name); } // otherwise, print the whole line else { std::cerr << "[bt]: (" << i << ") " << messages[i] << std::endl; } } std::cerr << std::endl; // free(messages); exit(EXIT_FAILURE); } delete[] guards; #endif a->free = true; if (cache) { //Previous neighbor is free, concatenate if ( a->prev != NULL && a->prev->free) { //Resize and set pointer a->size += a->prev->size; a->ptr = a->prev->ptr; //Fetch secondary neighbor Alloc *ppL = a->prev->prev; //Remove primary neighbor if (ppL == NULL) //If the previous is first in chain first = a; else ppL->next = a; delete a->prev; //Attach secondary neighbor a->prev = ppL; } //Next neighbor is free, concatenate if ( a->next != NULL && a->next->free) { //Resize and set pointer a->size += a->next->size; //Fetch secondary neighbor Alloc *nnL = a->next->next; //Remove primary neighbor if (nnL != NULL) nnL->prev = a; delete a->next; //Attach secondary neighbor a->next = nnL; } } else { DEBUG_HANDLE_ERROR(cudaFree( a->ptr )); a->ptr = NULL; if ( a->prev != NULL) a->prev->next = a->next; else first = a->next; //This is the first link if ( a->next != NULL) a->next->prev = a->prev; delete a; } }; void _setup() { first = new Alloc(); first->prev = NULL; first->next = NULL; first->size = totalSize; first->free = true; if (totalSize > 0) { HANDLE_ERROR(cudaMalloc( (void**) &(first->ptr), totalSize)); cache = true; } else cache = false; } void _clear() { if (first->ptr != NULL) DEBUG_HANDLE_ERROR(cudaFree( first->ptr )); first->ptr = NULL; Alloc *a = first, *nL; while (a != NULL) { nL = a->next; delete a; a = nL; } } public: CudaCustomAllocator(size_t size, size_t alignmentSize): totalSize(size), alignmentSize(alignmentSize), first(0), cache(true) { _setup(); int mutex_error = pthread_mutex_init(&mutex, NULL); if (mutex_error != 0) { printf("ERROR: Mutex could not be created for alloactor. CODE: %d.\n", mutex_error); fflush(stdout); CRITICAL(ERR_CAMUX); } } void resize(size_t size) { Lock ml(&mutex); _clear(); totalSize = size; _setup(); } Alloc* alloc(size_t requestedSize) { Lock ml(&mutex); _freeReadyAllocs(); // printf("alloc: %u ", size); // _printState(); size_t size = requestedSize; #ifdef CUSTOM_ALLOCATOR_MEMGUARD //Ad byte-guards size += alignmentSize * GUARD_SIZE; //Ad an integer multiple of alignment size as byte guard size #endif #ifdef DUMP_CUSTOM_ALLOCATOR_ACTIVITY fprintf(stderr, " %.4f", 100.*(float)size/(float)totalSize); #endif Alloc *newAlloc(NULL); if (cache) { size = alignmentSize*ceilf( (float)size / (float)alignmentSize) ; //To prevent miss-aligned memory Alloc *curAlloc = _getFirstSuitedFree(size); //If out of memory if (curAlloc == NULL) { #ifdef DEBUG_CUDA size_t spaceDiff = _getTotalFreeSpace(); #endif //Try to recover before throwing error for (int i = 0; i <= ALLOC_RETRY; i ++) { if (_syncReadyEvents() && _freeReadyAllocs()) { curAlloc = _getFirstSuitedFree(size); //Is there space now? if (curAlloc != NULL) break; //Success } else usleep(10000); // 10 ms, Order of magnitude of largest kernels } #ifdef DEBUG_CUDA spaceDiff = _getTotalFreeSpace() - spaceDiff; printf("DEBUG_INFO: Out of memory handled by waiting for unfinished tasks, which freed %lu B.\n", spaceDiff); #endif //Did we manage to recover? if (curAlloc == NULL) { printf("ERROR: CudaCustomAllocator out of memory\n [requestedSpace: %lu B]\n [largestContinuousFreeSpace: %lu B]\n [totalFreeSpace: %lu B]\n", (unsigned long) size, (unsigned long) _getLargestContinuousFreeSpace(), (unsigned long) _getTotalFreeSpace()); _printState(); fflush(stdout); CRITICAL(ERRCUDACAOOM); } } if (curAlloc->size == size) { curAlloc->free = false; newAlloc = curAlloc; } else //Or curAlloc->size is smaller than size { //Setup new pointer newAlloc = new Alloc(); newAlloc->next = curAlloc; newAlloc->ptr = curAlloc->ptr; newAlloc->size = size; newAlloc->free = false; //Modify old pointer curAlloc->ptr = &(curAlloc->ptr[size]); curAlloc->size -= size; //Insert new allocation region into chain if(curAlloc->prev == NULL) //If the first allocation region first = newAlloc; else curAlloc->prev->next = newAlloc; newAlloc->prev = curAlloc->prev; newAlloc->next = curAlloc; curAlloc->prev = newAlloc; } } else { newAlloc = new Alloc(); newAlloc->size = size; newAlloc->free = false; DEBUG_HANDLE_ERROR(cudaMalloc( (void**) &(newAlloc->ptr), size)); //Just add to start by replacing first newAlloc->next = first; first->prev = newAlloc; first = newAlloc; } #ifdef CUSTOM_ALLOCATOR_MEMGUARD newAlloc->backtraceSize = backtrace(newAlloc->backtrace, 20); newAlloc->guardPtr = newAlloc->ptr + requestedSize; cudaStream_t stream = 0; CudaShortcuts::memInit( newAlloc->guardPtr, GUARD_VALUE, size - requestedSize, stream); //TODO switch to specialized stream DEBUG_HANDLE_ERROR(cudaStreamSynchronize(stream)); #endif return newAlloc; }; ~CudaCustomAllocator() { { Lock ml(&mutex); _clear(); } pthread_mutex_destroy(&mutex); } //Thread-safe wrapper functions void free(Alloc* a) { Lock ml(&mutex); _free(a); } void syncReadyEvents() { Lock ml(&mutex); _syncReadyEvents(); } void freeReadyAllocs() { Lock ml(&mutex); _freeReadyAllocs(); } size_t getTotalFreeSpace() { Lock ml(&mutex); size_t size = _getTotalFreeSpace(); return size; } size_t getTotalUsedSpace() { Lock ml(&mutex); size_t size = _getTotalUsedSpace(); return size; } size_t getNumberOfAllocs() { Lock ml(&mutex); size_t size = _getNumberOfAllocs(); return size; } size_t getLargestContinuousFreeSpace() { Lock ml(&mutex); size_t size = _getLargestContinuousFreeSpace(); return size; } void printState() { Lock ml(&mutex); _printState(); } }; // #endif relion-3.1.3/src/acc/cuda/shortcuts.cuh000066400000000000000000000034341411340063500177770ustar00rootroot00000000000000#ifndef CUDA_SHORTCUTS_CUH_ #define CUDA_SHORTCUTS_CUH_ namespace CudaShortcuts { /** * Print cuda device memory info */ static void printMemInfo() { size_t free; size_t total; DEBUG_HANDLE_ERROR(cudaMemGetInfo( &free, &total )); float free_hr(free/(1024.*1024.)); float total_hr(total/(1024.*1024.)); printf( "free %.2fMiB, total %.2fMiB, used %.2fMiB\n", free_hr, total_hr, total_hr - free_hr); } template< typename T> static inline void cpyHostToDevice( T *h_ptr, T *d_ptr, size_t size) { DEBUG_HANDLE_ERROR(cudaMemcpy( d_ptr, h_ptr, size * sizeof(T), cudaMemcpyHostToDevice)); }; template< typename T> static inline void cpyHostToDevice( T *h_ptr, T *d_ptr, size_t size, cudaStream_t stream) { DEBUG_HANDLE_ERROR(cudaMemcpyAsync( d_ptr, h_ptr, size * sizeof(T), cudaMemcpyHostToDevice, stream)); }; template< typename T> static inline void cpyDeviceToHost( T *d_ptr, T *h_ptr, size_t size) { DEBUG_HANDLE_ERROR(cudaMemcpy( h_ptr, d_ptr, size * sizeof(T), cudaMemcpyDeviceToHost)); }; template< typename T> static inline void cpyDeviceToHost( T *d_ptr, T *h_ptr, size_t size, cudaStream_t &stream) { DEBUG_HANDLE_ERROR(cudaMemcpyAsync( h_ptr, d_ptr, size * sizeof(T), cudaMemcpyDeviceToHost, stream)); }; template< typename T> static inline void cpyDeviceToDevice( T *src, T *des, size_t size, cudaStream_t &stream) { DEBUG_HANDLE_ERROR(cudaMemcpyAsync( des, src, size * sizeof(T), cudaMemcpyDeviceToDevice, stream)); }; template< typename T> static inline void memInit( T *ptr, T value, size_t size) { DEBUG_HANDLE_ERROR(cudaMemset( ptr, value, size * sizeof(T))); }; template< typename T> static inline void memInit( T *ptr, T value, size_t size, cudaStream_t &stream) { DEBUG_HANDLE_ERROR(cudaMemsetAsync( ptr, value, size * sizeof(T), stream)); }; } #endif //CUDA_SHORTCUTS_CUH_ relion-3.1.3/src/acc/data_types.h000066400000000000000000000062721411340063500166350ustar00rootroot00000000000000#ifndef ACC_DATA_TYPES_H_ #define ACC_DATA_TYPES_H_ #include "src/acc/acc_ptr.h" #include "src/multidim_array.h" namespace AccDataTypes { template class Image : public AccPtr { private: int x,y,z; bool fourier; //Is this a Fourier space data array public: /*====================================================== CONSTRUCTORS ======================================================*/ Image(AccPtrFactory &f): AccPtr(f.make()), x(0), y(0), z(0), fourier(false) {} Image(int xdim, AccPtrFactory &f): AccPtr(f.make(xdim)), x(xdim), y(1), z(1), fourier(false) {} Image(int xdim, int ydim, AccPtrFactory &f): AccPtr(f.make(xdim*ydim)), x(xdim), y(ydim), z(1), fourier(false) {} Image(int xdim, int ydim, int zdim, AccPtrFactory &f): AccPtr(f.make(xdim*ydim*zdim)), x(xdim), y(ydim), z(zdim), fourier(false) {} template Image(MultidimArray img, AccPtrFactory &f): AccPtr(f.make(img.nzyxdim)), x(img.xdim), y(img.ydim), z(img.zdim), fourier(false) {} Image(int box_dim, bool is_fourier, bool is3D, AccPtrFactory &f) { setSize(box_dim, is_fourier, is3D); AccPtr(f.make(x*y*z)); } /*====================================================== METHODS ======================================================*/ int getx() { return x; } int gety() { return y; } int getz() { return z; } int getxy() { return x*y; } int getxyz() { return AccPtr::getSize(); } bool is3D() { return z > 1; } void setSize(int box_dim, bool is_fourier, bool is3D) { fourier = is_fourier; if (is_fourier) { x = box_dim/2+1; y = box_dim; if (is3D) z = box_dim; else z = 1; } AccPtr::setSize(x*y*z); } void setSize(int xdim) { x = xdim; y = 1; z = 1; AccPtr::setSize(x); } void setSize(int xdim, int ydim) { x = xdim; y = ydim; z = 1; AccPtr::setSize(x*y); } void setSize(int xdim, int ydim, int zdim) { x = xdim; y = ydim; z = zdim; AccPtr::setSize(x*y*z); } template void setSize(MultidimArray img) { x = img.xdim; y = img.xdim; z = img.xdim; AccPtr::setSize(x*y*z); } template void setHost(MultidimArray &img) { if (img.xdim != x || img.ydim != y || img.zdim != z) { if (img.nzyxdim > AccPtr::getSize()) { AccPtr::freeIfSet(); setSize(img); AccPtr::hostAlloc(); } else setSize(img); } if (AccPtr::getHostPtr() == NULL) AccPtr::hostAlloc(); T *ptr = AccPtr::getHostPtr(); if (sizeof(T) == sizeof(T1)) memcpy(ptr, img.data, sizeof(T)*img.nzyxdim); else for (unsigned long i = 0; i < img.nzyxdim; i++) ptr[i] = (T) img.data[i]; } template void getHost(MultidimArray &img) { if(img.nzyxdim!=AccPtr::getSize()) { if(img.nzyxdim==0) img.resize(z,y,x); else CRITICAL("Trying to fill host-array with data from an array with different size!") } T *ptr = AccPtr::getHostPtr(); if (sizeof(T) == sizeof(T1)) memcpy(img.data, ptr, sizeof(T)*img.nzyxdim); else for (unsigned long i = 0; i < img.nzyxdim; i++) img.data[i] = (T1) ptr[i]; } }; } #endif relion-3.1.3/src/acc/settings.h000066400000000000000000000010171411340063500163300ustar00rootroot00000000000000#ifndef ACC_SETTINGS_H_ #define ACC_SETTINGS_H_ #include "src/macros.h" #ifdef ACC_DOUBLE_PRECISION #define XFLOAT double #ifndef CUDA typedef struct{ XFLOAT x; XFLOAT y;} double2; #endif #define ACCCOMPLEX double2 #else #define XFLOAT float #ifndef CUDA typedef struct{ XFLOAT x; XFLOAT y;} float2; #endif #define ACCCOMPLEX float2 #endif #ifdef ALTCPU #ifndef CUDA typedef float cudaStream_t; typedef double CudaCustomAllocator; #define cudaStreamPerThread 0 #endif #endif #endif /* ACC_SETTINGS_H_ */ relion-3.1.3/src/acc/utilities.h000066400000000000000000000535171411340063500165170ustar00rootroot00000000000000#ifndef ACC_UTILITIES_H_ #define ACC_UTILITIES_H_ #include "src/acc/acc_ptr.h" #include "src/acc/data_types.h" #include "src/error.h" #ifdef CUDA #include "src/acc/cuda/cuda_kernels/helper.cuh" #include "src/acc/cuda/cuda_kernels/wavg.cuh" #include "src/acc/cuda/cuda_kernels/diff2.cuh" #include "src/acc/cuda/cuda_fft.h" #else #include "src/acc/cpu/cpu_kernels/helper.h" #include "src/acc/cpu/cpu_kernels/wavg.h" #include "src/acc/cpu/cpu_kernels/diff2.h" #endif void dump_array(char *name, bool *ptr, size_t size); void dump_array(char *name, int *ptr, size_t size); void dump_array(char *name, size_t *ptr, size_t size); void dump_array(char *name, float *ptr, size_t size); void dump_complex_array(char *name, ACCCOMPLEX *ptr, size_t size); void dump_complex_array(char *name, Complex *ptr, size_t size); void dump_double_array(char *name, float *ptr, float *ptr2, size_t size); void dump_triple_array(char *name, float *ptr, float *ptr2, float *ptr3, size_t size); void dump_array(char *name, double *ptr, size_t size); void dump_double_array(char *name, double *ptr, double *ptr2, size_t size); void dump_triple_array(char *name, double *ptr, double *ptr2, double *ptr3, size_t size); namespace AccUtilities { template static void multiply(int block_size, AccDataTypes::Image &ptr, T value) { #ifdef CUDA int BSZ = ( (int) ceilf(( float)ptr.getSize() /(float)block_size)); CudaKernels::cuda_kernel_multi<<>>( ptr(), value, ptr.getSize()); #else CpuKernels::cpu_kernel_multi( ptr(), value, ptr.getSize()); #endif } template static void multiply(int MultiBsize, int block_size, cudaStream_t stream, T *array, T value, size_t size) { #ifdef CUDA CudaKernels::cuda_kernel_multi<<>>( array, value, size); #else CpuKernels::cpu_kernel_multi( array, value, size); #endif } template static void translate(int block_size, AccDataTypes::Image &in, AccDataTypes::Image &out, int dx, int dy, int dz=0) { if(in.getAccPtr()==out.getAccPtr()) CRITICAL(ERRUNSAFEOBJECTREUSE); #ifdef CUDA int BSZ = ( (int) ceilf(( float)in.getxyz() /(float)block_size)); if (in.is3D()) { CudaKernels::cuda_kernel_translate3D<<>>( in(), out(), in.getxyz(), in.getx(), in.gety(), in.getz(), dx, dy, dz); } else { CudaKernels::cuda_kernel_translate2D<<>>( in(), out(), in.getxyz(), in.getx(), in.gety(), dx, dy); } #else if (in.is3D()) { CpuKernels::cpu_translate3D( in(), out(), in.getxyz(), in.getx(), in.gety(), in.getz(), dx, dy, dz); } else { CpuKernels::cpu_translate2D( in(), out(), in.getxyz(), in.getx(), in.gety(), dx, dy); } #endif } template static T getSumOnDevice(AccPtr &ptr) { #ifdef CUDA return CudaKernels::getSumOnDevice(ptr); #else #ifdef DEBUG_CUDA if (ptr.getSize() == 0) printf("DEBUG_ERROR: getSumOnDevice called with pointer of zero size.\n"); if (ptr.getHostPtr() == NULL) printf("DEBUG_ERROR: getSumOnDevice called with null device pointer.\n"); #endif size_t size = ptr.getSize(); T sum = 0; for (size_t i=0; i static T getMinOnDevice(AccPtr &ptr) { #ifdef CUDA return CudaKernels::getMinOnDevice(ptr); #else #ifdef DEBUG_CUDA if (ptr.getSize() == 0) printf("DEBUG_ERROR: getMinOnDevice called with pointer of zero size.\n"); if (ptr.getHostPtr() == NULL) printf("DEBUG_ERROR: getMinOnDevice called with null device pointer.\n"); #endif return CpuKernels::getMin(ptr(), ptr.getSize()); #endif } template static T getMaxOnDevice(AccPtr &ptr) { #ifdef CUDA return CudaKernels::getMaxOnDevice(ptr); #else #ifdef DEBUG_CUDA if (ptr.getSize() == 0) printf("DEBUG_ERROR: getMaxOnDevice called with pointer of zero size.\n"); if (ptr.getHostPtr() == NULL) printf("DEBUG_ERROR: getMaxOnDevice called with null device pointer.\n"); #endif return CpuKernels::getMax(ptr(), ptr.getSize()); #endif } template static std::pair getArgMinOnDevice(AccPtr &ptr) { #ifdef CUDA return CudaKernels::getArgMinOnDevice(ptr); #else #ifdef DEBUG_CUDA if (ptr.getSize() == 0) printf("DEBUG_ERROR: getArgMinOnDevice called with pointer of zero size.\n"); if (ptr.getHostPtr() == NULL) printf("DEBUG_ERROR: getArgMinOnDevice called with null device pointer.\n"); #endif return CpuKernels::getArgMin(ptr(), ptr.getSize()); #endif } template static std::pair getArgMaxOnDevice(AccPtr &ptr) { #ifdef CUDA return CudaKernels::getArgMaxOnDevice(ptr); #else #ifdef DEBUG_CUDA if (ptr.getSize() == 0) printf("DEBUG_ERROR: getArgMaxOnDevice called with pointer of zero size.\n"); if (ptr.getHostPtr() == NULL) printf("DEBUG_ERROR: getArgMaxOnDevice called with null device pointer.\n"); #endif return CpuKernels::getArgMax(ptr(), ptr.getSize()); #endif } template static int filterGreaterZeroOnDevice(AccPtr &in, AccPtr &out) { #ifdef CUDA CudaKernels::MoreThanCubOpt moreThanOpt(0.); return CudaKernels::filterOnDevice(in, out, moreThanOpt); #else size_t arr_size = in.getSize(); size_t filt_size = 0; size_t outindex = 0; // Find how many entries the output array will have for(size_t i=0; i (T)0.0) filt_size++; } #ifdef DEBUG_CUDA if (filt_size==0) ACC_PTR_DEBUG_FATAL("filterGreaterZeroOnDevice - No filtered values greater than 0.\n"); #endif out.resizeHost(filt_size); // Now populate output array for(size_t i=0; i (T)0.0) { out[outindex] = in[i]; outindex++; } return filt_size; #endif } template static void sortOnDevice(AccPtr &in, AccPtr &out) { #ifdef CUDA CudaKernels::sortOnDevice(in, out); #else //TODO - convert ACCPTR to store data as vector so we don't need to make //an extra copies here. For now, nasty hack size_t arr_size = in.getSize(); std::vector sortVector(in(), in() + in.getSize()); sort(sortVector.begin(), sortVector.end()); for (size_t i=0; i < arr_size; i++) out[i] = sortVector[i]; #endif } template static void scanOnDevice(AccPtr &in, AccPtr &out) { #ifdef CUDA CudaKernels::scanOnDevice(in, out); #else T sum = 0.0; size_t arr_size = in.getSize(); for(size_t i=0; i &img_in, AccPtr &img_out, XFLOAT normcorr, RFLOAT xOff, RFLOAT yOff, RFLOAT zOff, bool DATA3D); static void softMaskBackgroundValue( int inblock_dim, int inblock_size, XFLOAT *vol, Image &img, XFLOAT radius, XFLOAT radius_p, XFLOAT cosine_width, XFLOAT *g_sum, XFLOAT *g_sum_bg); static void cosineFilter( int inblock_dim, int inblock_size, XFLOAT *vol, long int vol_size, long int xdim, long int ydim, long int zdim, long int xinit, long int yinit, long int zinit, bool do_Mnoise, XFLOAT radius, XFLOAT radius_p, XFLOAT cosine_width, XFLOAT sum_bg_total); template void powerClass(int in_gridSize, int in_blocksize, ACCCOMPLEX *g_image, XFLOAT *g_spectrum, size_t image_size, size_t spectrum_size, int xdim, int ydim, int zdim, int res_limit, XFLOAT *g_highres_Xi2) { #ifdef CUDA dim3 grid_size(in_gridSize); cuda_kernel_powerClass<<>>(g_image, g_spectrum, image_size, spectrum_size, xdim, ydim, zdim, res_limit, g_highres_Xi2); #else CpuKernels::powerClass(in_gridSize, g_image, g_spectrum, image_size, spectrum_size, xdim, ydim, zdim, res_limit, g_highres_Xi2); #endif } template void acc_make_eulers_2D(int grid_size, int block_size, cudaStream_t stream, XFLOAT *alphas, XFLOAT *eulers, unsigned long orientation_num) { #ifdef CUDA cuda_kernel_make_eulers_2D<<>>( alphas, eulers, orientation_num); #else CpuKernels::cpu_kernel_make_eulers_2D(grid_size, block_size, alphas, eulers, orientation_num); #endif } template void acc_make_eulers_3D(int grid_size, int block_size, cudaStream_t stream, XFLOAT *alphas, XFLOAT *betas, XFLOAT *gammas, XFLOAT *eulers, unsigned long orientation_num, XFLOAT *L, XFLOAT *R) { #ifdef CUDA cuda_kernel_make_eulers_3D<<>>( alphas, betas, gammas, eulers, orientation_num, L, R); #else CpuKernels::cpu_kernel_make_eulers_3D(grid_size, block_size, alphas, betas, gammas, eulers, orientation_num, L, R); #endif } #ifdef CUDA #define INIT_VALUE_BLOCK_SIZE 512 #endif template< typename T> void InitComplexValue(AccPtr &data, XFLOAT value) { #ifdef CUDA int grid_size = ceil((float)(data.getSize())/(float)INIT_VALUE_BLOCK_SIZE); cuda_kernel_init_complex_value<<< grid_size, INIT_VALUE_BLOCK_SIZE, 0, data.getStream() >>>( ~data, value, data.getSize(), INIT_VALUE_BLOCK_SIZE); #else size_t Size = data.getSize(); for(size_t i=0; i void InitValue(AccPtr &data, T value) { #ifdef CUDA int grid_size = ceil((float)data.getSize()/(float)INIT_VALUE_BLOCK_SIZE); cuda_kernel_init_value<<< grid_size, INIT_VALUE_BLOCK_SIZE, 0, data.getStream() >>>( ~data, value, data.getSize(), INIT_VALUE_BLOCK_SIZE); LAUNCH_HANDLE_ERROR(cudaGetLastError()); #else size_t Size = data.getSize(); for (size_t i=0; i < Size; i++) data[i] = value; #endif } template< typename T> void InitValue(AccPtr &data, T value, size_t Size) { #ifdef CUDA int grid_size = ceil((float)Size/(float)INIT_VALUE_BLOCK_SIZE); cuda_kernel_init_value<<< grid_size, INIT_VALUE_BLOCK_SIZE, 0, data.getStream() >>>( ~data, value, Size, INIT_VALUE_BLOCK_SIZE); #else for (size_t i=0; i < Size; i++) data[i] = value; #endif } void initOrientations(AccPtr &pdfs, AccPtr &pdf_orientation, AccPtr &pdf_orientation_zeros); void centerFFT_2D(int grid_size, int batch_size, int block_size, cudaStream_t stream, XFLOAT *img_in, size_t image_size, int xdim, int ydim, int xshift, int yshift); void centerFFT_2D(int grid_size, int batch_size, int block_size, XFLOAT *img_in, size_t image_size, int xdim, int ydim, int xshift, int yshift); void centerFFT_3D(int grid_size, int batch_size, int block_size, cudaStream_t stream, XFLOAT *img_in, size_t image_size, int xdim, int ydim, int zdim, int xshift, int yshift, int zshift); template void frequencyPass(int grid_size, int block_size, cudaStream_t stream, ACCCOMPLEX *A, long int ori_size, size_t Xdim, size_t Ydim, size_t Zdim, XFLOAT edge_low, XFLOAT edge_width, XFLOAT edge_high, XFLOAT angpix, size_t image_size) { #ifdef CUDA dim3 blocks(grid_size); cuda_kernel_frequencyPass<<>>( A, ori_size, Xdim, Ydim, Zdim, edge_low, edge_width, edge_high, angpix, image_size); #else CpuKernels::kernel_frequencyPass(grid_size, block_size, A, ori_size, Xdim, Ydim, Zdim, edge_low, edge_width, edge_high, angpix, image_size); #endif } template void kernel_wavg( XFLOAT *g_eulers, AccProjectorKernel &projector, unsigned long image_size, unsigned long orientation_num, XFLOAT *g_img_real, XFLOAT *g_img_imag, XFLOAT *g_trans_x, XFLOAT *g_trans_y, XFLOAT *g_trans_z, XFLOAT* g_weights, XFLOAT* g_ctfs, XFLOAT *g_wdiff2s_parts, XFLOAT *g_wdiff2s_AA, XFLOAT *g_wdiff2s_XA, unsigned long translation_num, XFLOAT weight_norm, XFLOAT significant_weight, XFLOAT part_scale, cudaStream_t stream) { #ifdef CUDA //We only want as many blocks as there are chunks of orientations to be treated //within the same block (this is done to reduce memory loads in the kernel). dim3 block_dim = orientation_num;//ceil((float)orientation_num/(float)REF_GROUP_SIZE); cuda_kernel_wavg<<>>( g_eulers, projector, image_size, orientation_num, g_img_real, g_img_imag, g_trans_x, g_trans_y, g_trans_z, g_weights, g_ctfs, g_wdiff2s_parts, g_wdiff2s_AA, g_wdiff2s_XA, translation_num, weight_norm, significant_weight, part_scale); #else if (DATA3D) { CpuKernels::wavg_3D( g_eulers, projector, image_size, orientation_num, g_img_real, g_img_imag, g_trans_x, g_trans_y, g_trans_z, g_weights, g_ctfs, g_wdiff2s_parts, g_wdiff2s_AA, g_wdiff2s_XA, translation_num, weight_norm, significant_weight, part_scale); } else { CpuKernels::wavg_ref3D( g_eulers, projector, image_size, orientation_num, g_img_real, g_img_imag, g_trans_x, g_trans_y, g_trans_z, g_weights, g_ctfs, g_wdiff2s_parts, g_wdiff2s_AA, g_wdiff2s_XA, translation_num, weight_norm, significant_weight, part_scale); } #endif } template void diff2_coarse( unsigned long grid_size, int block_size, XFLOAT *g_eulers, XFLOAT *trans_x, XFLOAT *trans_y, XFLOAT *trans_z, XFLOAT *g_real, XFLOAT *g_imag, AccProjectorKernel projector, XFLOAT *g_corr, XFLOAT *g_diff2s, unsigned long translation_num, unsigned long image_size, cudaStream_t stream ) { #ifdef CUDA cuda_kernel_diff2_coarse <<>>( g_eulers, trans_x, trans_y, trans_z, g_real, g_imag, projector, g_corr, g_diff2s, translation_num, image_size); #else #if 1 CpuKernels::diff2_coarse( grid_size, g_eulers, trans_x, trans_y, trans_z, g_real, g_imag, projector, g_corr, g_diff2s, translation_num, image_size ); #else if (DATA3D) CpuKernels::diff2_coarse_3D( grid_size, g_eulers, trans_x, trans_y, trans_z, g_real, g_imag, projector, g_corr, g_diff2s, translation_num, image_size); else CpuKernels::diff2_coarse_2D( grid_size, g_eulers, trans_x, trans_y, trans_z, g_real, g_imag, projector, g_corr, g_diff2s, translation_num, image_size); #endif #endif } template void diff2_CC_coarse( unsigned long grid_size, int block_size, XFLOAT *g_eulers, XFLOAT *g_imgs_real, XFLOAT *g_imgs_imag, XFLOAT *g_trans_x, XFLOAT *g_trans_y, XFLOAT *g_trans_z, AccProjectorKernel projector, XFLOAT *g_corr_img, XFLOAT *g_diff2s, unsigned long translation_num, unsigned long image_size, XFLOAT exp_local_sqrtXi2, cudaStream_t stream ) { #ifdef CUDA dim3 CCblocks(grid_size,translation_num); cuda_kernel_diff2_CC_coarse <<>>( g_eulers, g_imgs_real, g_imgs_imag, g_trans_x, g_trans_y, g_trans_z, projector, g_corr_img, g_diff2s, translation_num, image_size, exp_local_sqrtXi2); #else if (DATA3D) CpuKernels::diff2_CC_coarse_3D( grid_size, g_eulers, g_imgs_real, g_imgs_imag, g_trans_x, g_trans_y, g_trans_z, projector, g_corr_img, g_diff2s, translation_num, image_size, exp_local_sqrtXi2); else CpuKernels::diff2_CC_coarse_2D( grid_size, g_eulers, g_imgs_real, g_imgs_imag, g_trans_x, g_trans_y, projector, g_corr_img, g_diff2s, translation_num, image_size, exp_local_sqrtXi2); #endif } template void diff2_fine( unsigned long grid_size, int block_size, XFLOAT *g_eulers, XFLOAT *g_imgs_real, XFLOAT *g_imgs_imag, XFLOAT *trans_x, XFLOAT *trans_y, XFLOAT *trans_z, AccProjectorKernel projector, XFLOAT *g_corr_img, XFLOAT *g_diff2s, unsigned long image_size, XFLOAT sum_init, unsigned long orientation_num, unsigned long translation_num, unsigned long todo_blocks, unsigned long *d_rot_idx, unsigned long *d_trans_idx, unsigned long *d_job_idx, unsigned long *d_job_num, cudaStream_t stream ) { #ifdef CUDA dim3 block_dim = grid_size; cuda_kernel_diff2_fine <<>>( g_eulers, g_imgs_real, g_imgs_imag, trans_x, trans_y, trans_z, projector, g_corr_img, // in these non-CC kernels this is effectively an adjusted MinvSigma2 g_diff2s, image_size, sum_init, orientation_num, translation_num, todo_blocks, //significant_num, d_rot_idx, d_trans_idx, d_job_idx, d_job_num); #else // TODO - make use of orientation_num, translation_num,todo_blocks on // CPU side if CUDA starts to use if (DATA3D) CpuKernels::diff2_fine_3D( grid_size, g_eulers, g_imgs_real, g_imgs_imag, trans_x, trans_y, trans_z, projector, g_corr_img, // in these non-CC kernels this is effectively an adjusted MinvSigma2 g_diff2s, image_size, sum_init, orientation_num, translation_num, todo_blocks, //significant_num, d_rot_idx, d_trans_idx, d_job_idx, d_job_num); else CpuKernels::diff2_fine_2D( grid_size, g_eulers, g_imgs_real, g_imgs_imag, trans_x, trans_y, trans_z, projector, g_corr_img, // in these non-CC kernels this is effectively an adjusted MinvSigma2 g_diff2s, image_size, sum_init, orientation_num, translation_num, todo_blocks, //significant_num, d_rot_idx, d_trans_idx, d_job_idx, d_job_num); #endif } template void diff2_CC_fine( unsigned long grid_size, int block_size, XFLOAT *g_eulers, XFLOAT *g_imgs_real, XFLOAT *g_imgs_imag, XFLOAT *g_trans_x, XFLOAT *g_trans_y, XFLOAT *g_trans_z, AccProjectorKernel &projector, XFLOAT *g_corr_img, XFLOAT *g_diff2s, unsigned long image_size, XFLOAT sum_init, XFLOAT exp_local_sqrtXi2, unsigned long orientation_num, unsigned long translation_num, unsigned long todo_blocks, unsigned long *d_rot_idx, unsigned long *d_trans_idx, unsigned long *d_job_idx, unsigned long *d_job_num, cudaStream_t stream ) { #ifdef CUDA dim3 block_dim = grid_size; cuda_kernel_diff2_CC_fine <<>>( g_eulers, g_imgs_real, g_imgs_imag, g_trans_x, g_trans_y, g_trans_z, projector, g_corr_img, g_diff2s, image_size, sum_init, exp_local_sqrtXi2, orientation_num, translation_num, todo_blocks, d_rot_idx, d_trans_idx, d_job_idx, d_job_num); #else // TODO - Make use of orientation_num, translation_num, todo_blocks on // CPU side if CUDA starts to use if (DATA3D) CpuKernels::diff2_CC_fine_3D( grid_size, g_eulers, g_imgs_real, g_imgs_imag, g_trans_x, g_trans_y, g_trans_z, projector, g_corr_img, g_diff2s, image_size, sum_init, exp_local_sqrtXi2, orientation_num, translation_num, todo_blocks, d_rot_idx, d_trans_idx, d_job_idx, d_job_num); else CpuKernels::diff2_CC_fine_2D( grid_size, g_eulers, g_imgs_real, g_imgs_imag, g_trans_x, g_trans_y, projector, g_corr_img, g_diff2s, image_size, sum_init, exp_local_sqrtXi2, orientation_num, translation_num, todo_blocks, d_rot_idx, d_trans_idx, d_job_idx, d_job_num); #endif } template void kernel_weights_exponent_coarse( unsigned long num_classes, AccPtr &g_pdf_orientation, AccPtr &g_pdf_orientation_zeros, AccPtr &g_pdf_offset, AccPtr &g_pdf_offset_zeros, AccPtr &g_Mweight, T g_min_diff2, unsigned long nr_coarse_orient, unsigned long nr_coarse_trans) { long int block_num = ceilf( ((double)nr_coarse_orient*nr_coarse_trans*num_classes) / (double)SUMW_BLOCK_SIZE ); #ifdef CUDA cuda_kernel_weights_exponent_coarse <<>>( ~g_pdf_orientation, ~g_pdf_orientation_zeros, ~g_pdf_offset, ~g_pdf_offset_zeros, ~g_Mweight, g_min_diff2, nr_coarse_orient, nr_coarse_trans, nr_coarse_orient*nr_coarse_trans*num_classes); #else CpuKernels::weights_exponent_coarse( ~g_pdf_orientation, ~g_pdf_orientation_zeros, ~g_pdf_offset, ~g_pdf_offset_zeros, ~g_Mweight, g_min_diff2, nr_coarse_orient, nr_coarse_trans, ((size_t)nr_coarse_orient)*((size_t)nr_coarse_trans)*((size_t)num_classes)); #endif } template void kernel_exponentiate( AccPtr &array, T add) { int blockDim = (int) ceilf( (double)array.getSize() / (double)BLOCK_SIZE ); #ifdef CUDA cuda_kernel_exponentiate <<< blockDim,BLOCK_SIZE,0,array.getStream()>>> (~array, add, array.getSize()); #else CpuKernels::exponentiate (~array, add, array.getSize()); #endif } void kernel_exponentiate_weights_fine( int grid_size, int block_size, XFLOAT *g_pdf_orientation, XFLOAT *g_pdf_offset, XFLOAT *g_weights, unsigned long oversamples_orient, unsigned long oversamples_trans, unsigned long *d_rot_id, unsigned long *d_trans_idx, unsigned long *d_job_idx, unsigned long *d_job_num, long int job_num, cudaStream_t stream); }; // namespace AccUtilities #endif //ACC_UTILITIES_H_ relion-3.1.3/src/acc/utilities_impl.h000066400000000000000000000444621411340063500175370ustar00rootroot00000000000000#ifndef ACC_UTILITIES_IMPL_H_ #define ACC_UTILITIES_IMPL_H_ #include "src/acc/acc_ptr.h" #include "src/acc/data_types.h" #include "src/acc/acc_helper_functions.h" #ifdef CUDA #include "src/acc/cuda/cuda_kernels/helper.cuh" #include "src/acc/cuda/cuda_kernels/wavg.cuh" #include "src/acc/cuda/cuda_kernels/diff2.cuh" #include "src/acc/cuda/cuda_fft.h" #else #include "src/acc/cpu/cpu_kernels/helper.h" #include "src/acc/cpu/cpu_kernels/wavg.h" #include "src/acc/cpu/cpu_kernels/diff2.h" #endif void dump_array(char *name, bool *ptr, size_t size) { int count = 0; FILE *fp = fopen(name, "w"); fprintf(fp, "Array size: %ld\n", size); for (size_t i=0; i < size; i++) { fprintf(fp, "%d, ", ptr[i]); count++; if (count > 10) { fprintf(fp, "\n"); count = 0; } } fprintf(fp, "\n"); fflush(fp); fclose(fp); } void dump_array(char *name, int *ptr, size_t size) { int count = 0; FILE *fp = fopen(name, "w"); fprintf(fp, "Array size: %ld\n", size); for (size_t i=0; i < size; i++) { fprintf(fp, "%d, ", ptr[i]); count++; if (count > 10) { fprintf(fp, "\n"); count = 0; } } fprintf(fp, "\n"); fflush(fp); fclose(fp); } void dump_array(char *name, size_t *ptr, size_t size) { int count = 0; FILE *fp = fopen(name, "w"); fprintf(fp, "Array size: %ld\n", size); for (size_t i=0; i < size; i++) { fprintf(fp, "%zu, ", ptr[i]); count++; if (count > 10) { fprintf(fp, "\n"); count = 0; } } fprintf(fp, "\n"); fflush(fp); fclose(fp); } void dump_array(char *name, float *ptr, size_t size) { int count = 0; FILE *fp = fopen(name, "w"); fprintf(fp, "Array size: %ld\n", size); for (size_t i=0; i < size; i++) { fprintf(fp, "%f, ", ptr[i]); count++; if (count > 10) { fprintf(fp, "\n"); count = 0; } } fprintf(fp, "\n"); fflush(fp); fclose(fp); } void dump_complex_array(char *name, ACCCOMPLEX *ptr, size_t size) { int count = 0; FILE *fp = fopen(name, "w"); fprintf(fp, "Array size: %ld\n", size); for (size_t i=0; i < size; i++) { fprintf(fp, "%f,%f, ", ptr[i].x, ptr[i].y); count++; if (count > 10) { fprintf(fp, "\n"); count = 0; } } fprintf(fp, "\n"); fflush(fp); fclose(fp); } void dump_complex_array(char *name, Complex *ptr, size_t size) { int count = 0; FILE *fp = fopen(name, "w"); fprintf(fp, "Array size: %ld\n", size); for (size_t i=0; i < size; i++) { fprintf(fp, "%f,%f, ", ptr[i].real, ptr[i].imag); count++; if (count > 10) { fprintf(fp, "\n"); count = 0; } } fprintf(fp, "\n"); fflush(fp); fclose(fp); } void dump_double_array(char *name, float *ptr, float *ptr2, size_t size) { int count = 0; FILE *fp = fopen(name, "w"); fprintf(fp, "Array size: %ld\n", size); for (size_t i=0; i < size; i++) { fprintf(fp, "%f,%f, ", ptr[i], ptr2[i]); count++; if (count > 10) { fprintf(fp, "\n"); count = 0; } } fprintf(fp, "\n"); fflush(fp); fclose(fp); } void dump_triple_array(char *name, float *ptr, float *ptr2, float *ptr3, size_t size) { int count = 0; FILE *fp = fopen(name, "w"); fprintf(fp, "Array size: %ld\n", size); for (size_t i=0; i < size; i++) { fprintf(fp, "%f,%f,%f, ", ptr[i], ptr2[i], ptr3[i]); count++; if (count > 10) { fprintf(fp, "\n"); count = 0; } } fprintf(fp, "\n"); fflush(fp); fclose(fp); } void dump_array(char *name, double *ptr, size_t size) { int count = 0; FILE *fp = fopen(name, "w"); fprintf(fp, "Array size: %ld\n", size); for (size_t i=0; i < size; i++) { fprintf(fp, "%f, ", ptr[i]); count++; if (count > 10) { fprintf(fp, "\n"); count = 0; } } fprintf(fp, "\n"); fflush(fp); fclose(fp); } void dump_double_array(char *name, double *ptr, double *ptr2, size_t size) { int count = 0; FILE *fp = fopen(name, "w"); fprintf(fp, "Array size: %ld\n", size); for (size_t i=0; i < size; i++) { fprintf(fp, "%f,%f, ", ptr[i], ptr2[i]); count++; if (count > 10) { fprintf(fp, "\n"); count = 0; } } fprintf(fp, "\n"); fflush(fp); fclose(fp); } void dump_triple_array(char *name, double *ptr, double *ptr2, double *ptr3, size_t size) { int count = 0; FILE *fp = fopen(name, "w"); fprintf(fp, "Array size: %ld\n", size); for (size_t i=0; i < size; i++) { fprintf(fp, "%f,%f,%f, ", ptr[i], ptr2[i], ptr3[i]); count++; if (count > 10) { fprintf(fp, "\n"); count = 0; } } fprintf(fp, "\n"); fflush(fp); fclose(fp); } namespace AccUtilities { template void makeNoiseImage(XFLOAT sigmaFudgeFactor, MultidimArray &sigmaNoiseSpectra, long int seed, MlClass *accMLO, AccPtr &RandomImage, bool is3D) { // Different MPI-distributed subsets may otherwise have different instances of the random noise below, // because work is on an on-demand basis and therefore variable with the timing of distinct nodes... // Have the seed based on the part_id, so that each particle has a different instant of the noise init_random_generator(seed); // Make a holder for the spectral profile and copy to the GPU // AccDataTypes::Image NoiseSpectra(sigmaNoiseSpectra, ptrFactory); AccPtr NoiseSpectra = RandomImage.make(sigmaNoiseSpectra.nzyxdim); NoiseSpectra.allAlloc(); FOR_ALL_DIRECT_ELEMENTS_IN_MULTIDIMARRAY(sigmaNoiseSpectra) NoiseSpectra[n] = (XFLOAT)sqrt(sigmaFudgeFactor*sigmaNoiseSpectra.data[n]); #ifdef CUDA // Set up states to seeda and run randomization on the GPU // AccDataTypes::Image RandomStates(RND_BLOCK_NUM*RND_BLOCK_SIZE,ptrFactory); AccPtr RandomStates = RandomImage.make(RND_BLOCK_NUM*RND_BLOCK_SIZE); RandomStates.deviceAlloc(); NoiseSpectra.cpToDevice(); NoiseSpectra.streamSync(); LAUNCH_PRIVATE_ERROR(cudaGetLastError(),accMLO->errorStatus); // Initialize randomization by particle ID, like on the CPU-side cuda_kernel_initRND<<>>( seed, ~RandomStates); LAUNCH_PRIVATE_ERROR(cudaGetLastError(),accMLO->errorStatus); // Create noise image with the correct spectral profile if(is3D) { cuda_kernel_RNDnormalDitributionComplexWithPowerModulation3D<<>>( ~accMLO->transformer1.fouriers, ~RandomStates, accMLO->transformer1.xFSize, accMLO->transformer1.yFSize, ~NoiseSpectra); } else { cuda_kernel_RNDnormalDitributionComplexWithPowerModulation2D<<>>( ~accMLO->transformer1.fouriers, ~RandomStates, accMLO->transformer1.xFSize, ~NoiseSpectra); } LAUNCH_PRIVATE_ERROR(cudaGetLastError(),accMLO->errorStatus); // Transform to real-space, to get something which look like // the particle image without actual signal (a particle) accMLO->transformer1.backward(); // Copy the randomized image to A separate device-array, so that the // transformer can be used to set up the actual particle image accMLO->transformer1.reals.cpOnDevice(~RandomImage); //cudaMLO->transformer1.reals.streamSync(); #else // Create noise image with the correct spectral profile if(is3D) CpuKernels::RNDnormalDitributionComplexWithPowerModulation3D(accMLO->transformer1.fouriers(), accMLO->transformer1.xFSize, accMLO->transformer1.yFSize, ~NoiseSpectra); else CpuKernels::RNDnormalDitributionComplexWithPowerModulation2D(accMLO->transformer1.fouriers(), accMLO->transformer1.xFSize, ~NoiseSpectra); // Transform to real-space, to get something which look like // the particle image without actual signal (a particle) accMLO->transformer1.backward(); // Copy the randomized image to A separate device-array, so that the // transformer can be used to set up the actual particle image for(size_t i=0; itransformer1.reals[i]; #endif } static void TranslateAndNormCorrect(MultidimArray &img_in, AccPtr &img_out, XFLOAT normcorr, RFLOAT xOff, RFLOAT yOff, RFLOAT zOff, bool DATA3D) { //Temporary array because translate is out-of-place AccPtr temp = img_out.make(img_in.nzyxdim); temp.allAlloc(); for (unsigned long i = 0; i < img_in.nzyxdim; i++) temp[i] = (XFLOAT) img_in.data[i]; temp.cpToDevice(); temp.streamSync(); // Apply the norm_correction term if (normcorr!=1) { #ifdef CUDA int BSZ = ( (int) ceilf(( float)temp.getSize() /(float)BLOCK_SIZE)); CudaKernels::cuda_kernel_multi<<>>(temp(),normcorr,temp.getSize()); #else CpuKernels::cpu_kernel_multi(temp(),normcorr, temp.getSize()); #endif } //LAUNCH_PRIVATE_ERROR(cudaGetLastError(),accMLO->errorStatus); if(temp.getAccPtr()==img_out.getAccPtr()) CRITICAL(ERRUNSAFEOBJECTREUSE); #ifdef CUDA int BSZ = ( (int) ceilf(( float)temp.getSize() /(float)BLOCK_SIZE)); if (DATA3D) CudaKernels::cuda_kernel_translate3D<<>>(temp(),img_out(),img_in.zyxdim,img_in.xdim,img_in.ydim,img_in.zdim,xOff,yOff,zOff); else CudaKernels::cuda_kernel_translate2D<<>>(temp(),img_out(),img_in.zyxdim,img_in.xdim,img_in.ydim,xOff,yOff); //LAUNCH_PRIVATE_ERROR(cudaGetLastError(),accMLO->errorStatus); #else if (DATA3D) CpuKernels::cpu_translate3D(temp(),img_out(),img_in.zyxdim,img_in.xdim,img_in.ydim,img_in.zdim,xOff,yOff,zOff); else CpuKernels::cpu_translate2D(temp(),img_out(),img_in.zyxdim,img_in.xdim,img_in.ydim,xOff,yOff); #endif } template void normalizeAndTransformImage( AccPtr &img_in, MultidimArray &img_out, MlClass *accMLO, size_t xSize, size_t ySize, size_t zSize) { img_in.cpOnAcc(accMLO->transformer1.reals); runCenterFFT( accMLO->transformer1.reals, (int)accMLO->transformer1.xSize, (int)accMLO->transformer1.ySize, (int)accMLO->transformer1.zSize, false ); accMLO->transformer1.reals.streamSync(); accMLO->transformer1.forward(); accMLO->transformer1.fouriers.streamSync(); size_t FMultiBsize = ( (int) ceilf(( float)accMLO->transformer1.fouriers.getSize()*2/(float)BLOCK_SIZE)); AccUtilities::multiply(FMultiBsize, BLOCK_SIZE, accMLO->transformer1.fouriers.getStream(), (XFLOAT*)~accMLO->transformer1.fouriers, (XFLOAT)1/((XFLOAT)(accMLO->transformer1.reals.getSize())), accMLO->transformer1.fouriers.getSize()*2); //LAUNCH_PRIVATE_ERROR(cudaGetLastError(),accMLO->errorStatus); AccPtr d_Fimg = img_in.make(xSize * ySize * zSize); d_Fimg.allAlloc(); accMLO->transformer1.fouriers.streamSync(); windowFourierTransform2( accMLO->transformer1.fouriers, d_Fimg, accMLO->transformer1.xFSize,accMLO->transformer1.yFSize, accMLO->transformer1.zFSize, //Input dimensions xSize, ySize, zSize //Output dimensions ); accMLO->transformer1.fouriers.streamSync(); d_Fimg.cpToHost(); d_Fimg.streamSync(); img_out.initZeros(zSize, ySize, xSize); for (unsigned long i = 0; i < img_out.nzyxdim; i ++) { img_out.data[i].real = (RFLOAT) d_Fimg[i].x; img_out.data[i].imag = (RFLOAT) d_Fimg[i].y; } } static void softMaskBackgroundValue( AccDataTypes::Image &vol, XFLOAT radius, XFLOAT radius_p, XFLOAT cosine_width, AccPtr &g_sum, AccPtr &g_sum_bg) { int block_dim = 128; //TODO: set balanced (hardware-dep?) #ifdef CUDA cuda_kernel_softMaskBackgroundValue<<>>( ~vol, vol.getxyz(), vol.getx(), vol.gety(), vol.getz(), vol.getx()/2, vol.gety()/2, vol.getz()/2, radius, radius_p, cosine_width, ~g_sum, ~g_sum_bg); #else CpuKernels::softMaskBackgroundValue( block_dim, SOFTMASK_BLOCK_SIZE, ~vol, vol.getxyz(), vol.getx(), vol.gety(), vol.getz(), vol.getx()/2, vol.gety()/2, vol.getz()/2, radius, radius_p, cosine_width, ~g_sum, ~g_sum_bg); #endif } static void cosineFilter( AccDataTypes::Image &vol, bool do_Mnoise, AccDataTypes::Image Noise, XFLOAT radius, XFLOAT radius_p, XFLOAT cosine_width, XFLOAT sum_bg_total) { int block_dim = 128; //TODO: set balanced (hardware-dep?) #ifdef CUDA cuda_kernel_cosineFilter<<>>( ~vol, vol.getxyz(), vol.getx(), vol.gety(), vol.getz(), vol.getx()/2, vol.gety()/2, vol.getz()/2, !do_Mnoise, ~Noise, radius, radius_p, cosine_width, sum_bg_total); #else CpuKernels::cosineFilter( block_dim, SOFTMASK_BLOCK_SIZE, ~vol, vol.getxyz(), vol.getx(), vol.gety(), vol.getz(), vol.getx()/2, vol.gety()/2, vol.getz()/2, !do_Mnoise, ~Noise, radius, radius_p, cosine_width, sum_bg_total); #endif } void initOrientations(AccPtr &pdfs, AccPtr &pdf_orientation, AccPtr &pdf_orientation_zeros) { #ifdef CUDA int bs = 512; int gs = ceil(pdfs.getSize()/(float)(bs)); cuda_kernel_initOrientations<<>>(~pdfs, ~pdf_orientation, ~pdf_orientation_zeros, pdfs.getSize()); LAUNCH_HANDLE_ERROR(cudaGetLastError()); #else for(int iorientclass=0; iorientclass< pdfs.getSize(); iorientclass++) { if (pdfs[iorientclass] == 0) { pdf_orientation[iorientclass] = 0.f; pdf_orientation_zeros[iorientclass] = true; } else { pdf_orientation[iorientclass] = log(pdfs[iorientclass]); pdf_orientation_zeros[iorientclass] = false; } } #endif } void centerFFT_2D(int grid_size, int batch_size, int block_size, cudaStream_t stream, XFLOAT *img_in, size_t image_size, int xdim, int ydim, int xshift, int yshift) { #ifdef CUDA dim3 blocks(grid_size, batch_size); cuda_kernel_centerFFT_2D<<>>( img_in, image_size, xdim, ydim, xshift, yshift); #else CpuKernels::centerFFT_2D(batch_size, 0, image_size/2, img_in, image_size, xdim, ydim, xshift, yshift); #endif } void centerFFT_2D(int grid_size, int batch_size, int block_size, XFLOAT *img_in, size_t image_size, int xdim, int ydim, int xshift, int yshift) { #ifdef CUDA dim3 blocks(grid_size, batch_size); cuda_kernel_centerFFT_2D<<>>( img_in, image_size, xdim, ydim, xshift, yshift); #else CpuKernels::centerFFT_2D(batch_size, 0, image_size/2, img_in, image_size, xdim, ydim, xshift, yshift); #endif } void centerFFT_3D(int grid_size, int batch_size, int block_size, cudaStream_t stream, XFLOAT *img_in, size_t image_size, int xdim, int ydim, int zdim, int xshift, int yshift, int zshift) { #ifdef CUDA dim3 blocks(grid_size, batch_size); cuda_kernel_centerFFT_3D<<>>( img_in, image_size, xdim, ydim, zdim, xshift, yshift, zshift); #else CpuKernels::centerFFT_3D(batch_size, (size_t)0, (size_t)image_size/2, img_in, image_size, xdim, ydim, zdim, xshift, yshift, zshift); #endif } void kernel_exponentiate_weights_fine( XFLOAT *g_pdf_orientation, bool *g_pdf_orientation_zeros, XFLOAT *g_pdf_offset, bool *g_pdf_offset_zeros, XFLOAT *g_weights, XFLOAT min_diff2, unsigned long oversamples_orient, unsigned long oversamples_trans, unsigned long *d_rot_id, unsigned long *d_trans_idx, unsigned long *d_job_idx, unsigned long *d_job_num, long int job_num, cudaStream_t stream) { long block_num = ceil((double)job_num / (double)SUMW_BLOCK_SIZE); #ifdef CUDA cuda_kernel_exponentiate_weights_fine<<>>( g_pdf_orientation, g_pdf_orientation_zeros, g_pdf_offset, g_pdf_offset_zeros, g_weights, min_diff2, oversamples_orient, oversamples_trans, d_rot_id, d_trans_idx, d_job_idx, d_job_num, job_num); #else CpuKernels::exponentiate_weights_fine( g_pdf_orientation, g_pdf_orientation_zeros, g_pdf_offset, g_pdf_offset_zeros, g_weights, min_diff2, oversamples_orient, oversamples_trans, d_rot_id, d_trans_idx, d_job_idx, d_job_num, job_num); #endif } }; // namespace AccUtilities void run_griddingCorrect(RFLOAT *vol, int interpolator, RFLOAT rrval, RFLOAT r_min_nn, size_t iX, size_t iY, size_t iZ) { #ifdef CUDA dim3 bs(32,4,2); dim3 gs(ceil(iX/(float)bs.x), ceil(iY/(float)bs.y), ceil(iZ/(float)bs.z)); cuda_kernel_griddingCorrect<<>>(vol, interpolator, rrval, r_min_nn, iX, iY, iZ); LAUNCH_HANDLE_ERROR(cudaGetLastError()); #endif } void run_padTranslatedMap( RFLOAT *d_in, RFLOAT *d_out, size_t isX, size_t ieX, size_t isY, size_t ieY, size_t isZ, size_t ieZ, //Input dimensions size_t osX, size_t oeX, size_t osY, size_t oeY, size_t osZ, size_t oeZ, //Output dimensions cudaStream_t stream) { #ifdef CUDA size_t iszX = ieX - isX + 1; size_t iszY = ieY - isY + 1; size_t iszZ = ieZ - isZ + 1; size_t oszX = oeX - osX + 1; size_t oszY = oeY - osY + 1; size_t oszZ = oeZ - osZ + 1; if(iszX == oszX && iszY == oszY && iszZ == oszZ) { cudaCpyDeviceToDevice(d_in, d_out, iszX*iszY*iszZ, stream); } else { dim3 block_dim(16,4,2); dim3 grid_dim(ceil(oszX / (float) block_dim.x), ceil(oszY / (float) block_dim.y), ceil(oszZ / (float) block_dim.z)); cuda_kernel_window_transform<<< grid_dim, block_dim, 0, stream >>>( d_in, d_out, iszX, iszY, iszZ, //Input dimensions isX-osX, isY-osY, isZ-osZ, oszX, oszY, oszZ //Output dimensions ); LAUNCH_HANDLE_ERROR(cudaGetLastError()); } #endif } void run_CenterFFTbySign(Complex *img_in, int xSize, int ySize, int zSize, cudaStream_t stream) { #ifdef CUDA dim3 bs(32,4,2); dim3 gs(ceil(xSize/(float)bs.x), ceil(ySize/(float)bs.y), ceil(zSize/(float)bs.z)); if(sizeof(RFLOAT) == sizeof(double)) cuda_kernel_centerFFTbySign<<>>( (double2*)img_in, xSize, ySize, zSize); else cuda_kernel_centerFFTbySign<<>>( (float2*)img_in, xSize, ySize, zSize); LAUNCH_HANDLE_ERROR(cudaGetLastError()); #endif } #endif //ACC_UTILITIES_H_ relion-3.1.3/src/apps/000077500000000000000000000000001411340063500145355ustar00rootroot00000000000000relion-3.1.3/src/apps/CMakeLists.txt000066400000000000000000000237301411340063500173020ustar00rootroot00000000000000include_directories("${CMAKE_SOURCE_DIR}") message("Running apps/CMakeLists.txt...") if(NOT MKLFFT) include_directories("${FFTW_INCLUDES}") endif(NOT MKLFFT) #include_directories(${CMAKE_BINARY_DIR}/include) find_path (X11_INCLUDES Xdbe.h) message(STATUS "CMAKE_BINARY_DIR:" ${CMAKE_BINARY_DIR}) file(GLOB REL_GUI_SRC "${CMAKE_SOURCE_DIR}/src/manualpicker.cpp" "${CMAKE_SOURCE_DIR}/src/gui_*.cpp" "${CMAKE_SOURCE_DIR}/src/displayer.cpp") # AUTOMATIC VERSIONING # Reference: https://stackoverflow.com/questions/1435953/how-can-i-pass-git-sha1-to-compiler-as-definition-using-cmake list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake/") include(GetGitRevisionDescription) get_git_head_revision(GIT_REFSPEC GIT_SHA1_FULL) set(RELION_VERSION_SUFFIX "") if (GIT_SHA1_FULL) string(SUBSTRING ${GIT_SHA1_FULL} 0 6 GIT_SHA1_PREFIX) if (NOT GIT_SHA1_PREFIX STREQUAL "GITDIR") message(STATUS "Git commit ID: ${GIT_SHA1_FULL}") set(RELION_VERSION_SUFFIX "-commit-${GIT_SHA1_PREFIX}") endif() endif() configure_file("${CMAKE_SOURCE_DIR}/src/macros.cpp.in" "${CMAKE_BINARY_DIR}/macros.cpp" @ONLY) if (ALTCPU) file(GLOB REL_SRC "${CMAKE_SOURCE_DIR}/src/*.cpp" "${CMAKE_BINARY_DIR}/macros.cpp" "${CMAKE_SOURCE_DIR}/src/*.c" "${CMAKE_SOURCE_DIR}/src/acc/cpu/*.cpp" "${CMAKE_SOURCE_DIR}/src/acc/cpu/cpu_kernels/*.cpp" ) file(GLOB REL_SRC_H "${CMAKE_SOURCE_DIR}/src/*.h" "${CMAKE_SOURCE_DIR}/src/acc/*.h" "${CMAKE_SOURCE_DIR}/src/acc/cpu/*.h" "${CMAKE_SOURCE_DIR}/src/acc/cpu/cpu_kernels/*.h" ) else() file(GLOB REL_SRC "${CMAKE_SOURCE_DIR}/src/*.cpp" "${CMAKE_BINARY_DIR}/macros.cpp" "${CMAKE_SOURCE_DIR}/src/*.c" "${CMAKE_SOURCE_DIR}/src/acc/*.cpp" ) file(GLOB REL_SRC_H "${CMAKE_SOURCE_DIR}/src/*.h" "${CMAKE_SOURCE_DIR}/src/acc/*.h" ) endif(ALTCPU) # Remove GUI files from relion_lib foreach(GUI_SRC_FILE ${REL_GUI_SRC}) list(REMOVE_ITEM REL_SRC "${GUI_SRC_FILE}") endforeach() file(GLOB REL_JAZ_SRC "${CMAKE_SOURCE_DIR}/src/jaz/*.cpp") file(GLOB REL_JAZ_H "${CMAKE_SOURCE_DIR}/src/jaz/*.h") file(GLOB REL_JAZ_OPT_SRC "${CMAKE_SOURCE_DIR}/src/jaz/optimization/*.cpp") file(GLOB REL_JAZ_OPT_H "${CMAKE_SOURCE_DIR}/src/jaz/optimization/*.h") file(GLOB REL_JAZ_CTF_SRC "${CMAKE_SOURCE_DIR}/src/jaz/ctf/*.cpp") file(GLOB REL_JAZ_CTF_H "${CMAKE_SOURCE_DIR}/src/jaz/ctf/*.h") file(GLOB REL_JAZ_MOTION_SRC "${CMAKE_SOURCE_DIR}/src/jaz/motion/*.cpp") file(GLOB REL_JAZ_MOTION_H "${CMAKE_SOURCE_DIR}/src/jaz/motion/*.h") file(GLOB REL_JAZ_MATH_SRC "${CMAKE_SOURCE_DIR}/src/jaz/math/*.cpp") file(GLOB REL_JAZ_MATH_H "${CMAKE_SOURCE_DIR}/src/jaz/math/*.h") file(GLOB REL_JAZ_IMG_PROC_SRC "${CMAKE_SOURCE_DIR}/src/jaz/img_proc/*.cpp") file(GLOB REL_JAZ_IMG_PROC_H "${CMAKE_SOURCE_DIR}/src/jaz/img_proc/*.h") file(GLOB REL_JAZ_TOMO_SRC "${CMAKE_SOURCE_DIR}/src/jaz/tomo/*.cpp") file(GLOB REL_JAZ_TOMO_H "${CMAKE_SOURCE_DIR}/src/jaz/tomo/*.h") file(GLOB REL_JAZ_IO_SRC "${CMAKE_SOURCE_DIR}/src/jaz/io/*.cpp") file(GLOB REL_JAZ_IO_H "${CMAKE_SOURCE_DIR}/src/jaz/io/*.h") file(GLOB REL_D3x3_SRC "${CMAKE_SOURCE_DIR}/src/jaz/d3x3/*.c") file(GLOB REL_D3x3_H "${CMAKE_SOURCE_DIR}/src/jaz/d3x3/*.h") file(GLOB REL_LBFGS_SRC "${CMAKE_SOURCE_DIR}/src/jaz/lbfgs/*.c") file(GLOB REL_LBFGS_H "${CMAKE_SOURCE_DIR}/src/jaz/lbfgs/*.h") file(GLOB REL_HP "${CMAKE_SOURCE_DIR}/src/Healpix_2.15a/*.cc") file(GLOB RELION_TARGETS "${CMAKE_SOURCE_DIR}/src/apps/*.cpp") set(GUI_TARGETS maingui display manualpick) #--Remove apps using X11 if no GUI-- if(NOT GUI) foreach(TARGET ${GUI_TARGETS}) list(REMOVE_ITEM RELION_TARGETS "${CMAKE_SOURCE_DIR}/src/apps/${TARGET}.cpp") endforeach() endif(NOT GUI) #--Remove apps for testing-- #SET(RELION_TEST TRUE) set(TEST_TARGETS movie_reconstruct double_reconstruct_openmp cs_fit ctf_nyquist_test free_aberration_plot split_stack defocus_stats double_bfac_fit interpolation_test motion_diff paper_data_synth Zernike_test vis_delocalisation vis_Ewald_weight) if(NOT RELION_TEST) foreach(TARGET ${TEST_TARGETS}) list(REMOVE_ITEM RELION_TARGETS "${CMAKE_SOURCE_DIR}/src/apps/${TARGET}.cpp") endforeach() endif(NOT RELION_TEST) # relion_lib is STATIC or SHARED type based on BUILD_SHARED_LIBS=ON/OFF # relion_lib only contains non-X11 parts # relion_gui_lib is where the X11 code is placed if(BUILD_SHARED_LIBS) add_library(relion_lib SHARED ${REL_SRC} ${REL_SRC_H} ${REL_HP} ${REL_JAZ_SRC} ${REL_JAZ_H} ${REL_JAZ_OPT_SRC} ${REL_JAZ_OPT_H} ${REL_JAZ_CTF_SRC} ${REL_JAZ_CTF_H} ${REL_JAZ_MOTION_SRC} ${REL_JAZ_MOTION_H} ${REL_JAZ_MATH_SRC} ${REL_JAZ_MATH_H} ${REL_JAZ_IMG_PROC_SRC} ${REL_JAZ_IMG_PROC_H} ${REL_JAZ_TOMO_SRC} ${REL_JAZ_TOMO_H} ${REL_JAZ_IO_SRC} ${REL_JAZ_IO_H} ${REL_D3x3_SRC} ${REL_D3x3_H} ${REL_LBFGS_SRC} ${REL_LBFGS_H}) install(TARGETS relion_lib LIBRARY DESTINATION lib) if(GUI) add_library(relion_gui_lib SHARED ${REL_GUI_SRC} ${REL_SRC_H} ${REL_HP}) install(TARGETS relion_gui_lib LIBRARY DESTINATION lib) endif(GUI) else() add_library(relion_lib STATIC ${REL_SRC} ${REL_SRC_H} ${REL_HP} ${REL_JAZ_SRC} ${REL_JAZ_H} ${REL_JAZ_OPT_SRC} ${REL_JAZ_OPT_H} ${REL_JAZ_CTF_SRC} ${REL_JAZ_CTF_H} ${REL_JAZ_MOTION_SRC} ${REL_JAZ_MOTION_H} ${REL_JAZ_MATH_SRC} ${REL_JAZ_MATH_H} ${REL_JAZ_IMG_PROC_SRC} ${REL_JAZ_IMG_PROC_H} ${REL_JAZ_TOMO_SRC} ${REL_JAZ_TOMO_H} ${REL_JAZ_IO_SRC} ${REL_JAZ_IO_H} ${REL_D3x3_SRC} ${REL_D3x3_H} ${REL_LBFGS_SRC} ${REL_LBFGS_H}) if(GUI) add_library(relion_gui_lib STATIC ${REL_GUI_SRC} ${REL_SRC_H} ${REL_HP}) endif(GUI) endif() if(NOT MKLFFT) target_link_libraries(relion_lib ${FFTW_LIBRARIES}) if(BUILD_OWN_FFTW) add_dependencies(relion_lib own_fftw_lib) endif() if(BUILD_OWN_FFTWF) add_dependencies(relion_lib own_fftwf_lib) endif() endif(NOT MKLFFT) if(GUI) include_directories("${FLTK_INCLUDE_DIR}") target_link_libraries(relion_gui_lib relion_lib ${FLTK_LIBRARIES}) if(BUILD_OWN_FLTK) add_dependencies(relion_gui_lib OWN_FLTK) endif() endif(GUI) if (CUDA_FOUND) file(GLOB REL_CUDA_SRC "${CMAKE_SOURCE_DIR}/src/acc/cuda/*.cu" "${CMAKE_SOURCE_DIR}/src/acc/cuda/cuda_kernels/*.cu" ) cuda_add_library(relion_gpu_util ${REL_CUDA_SRC}) if (${CMAKE_BUILD_TYPE_LOWER} STREQUAL "profiling") find_library(NV_TOOLS_LIBRARIES NAMES nvToolsExt PATHS ${CUDA_TOOLKIT_ROOT_DIR}/lib ${CUDA_TOOLKIT_ROOT_DIR}/lib64) list(APPEND EXTRA_LIBS "${NV_TOOLS_LIBRARIES}") target_link_libraries(relion_gpu_util ${NV_TOOLS_LIBRARIES}) message(STATUS "Adding extra library for NVIDIA profiling: ${NV_TOOLS_LIBRARIES}") endif() # Presently we have a number of (bad) circular dependencies between the gpu util # and relion libraries, which cause errors at least on OS X with clang. Tell the # compiler to ignore them. if(APPLE) set(new_link_flags "-undefined suppress -flat_namespace") get_target_property(existing_link_flags relion_gpu_util LINK_FLAGS) if(existing_link_flags) set(new_link_flags "${existing_link_flags} ${new_link_flags}") endif() set_target_properties(relion_gpu_util PROPERTIES LINK_FLAGS "${new_link_flags}") endif() list(APPEND EXTRA_LIBS "${CUDA_CUFFT_LIBRARIES}") if(BUILD_SHARED_LIBS) install (TARGETS relion_gpu_util LIBRARY DESTINATION lib) else() target_link_libraries(relion_gpu_util relion_lib) target_link_libraries(relion_gpu_util ${CUDA_CUFFT_LIBRARIES}) endif() target_link_libraries(relion_lib relion_gpu_util ${CUDA_CUFFT_LIBRARIES}) target_link_libraries(relion_lib relion_gpu_util ${CUDA_CUFFT_LIBRARIES} ${CUDA_curand_LIBRARY}) endif(CUDA_FOUND) if(TIFF_FOUND) #message("TIFF FOUND") include_directories(${TIFF_INCLUDE_DIRS}) target_link_libraries(relion_lib ${TIFF_LIBRARIES}) else() #message("TIFF NOT FOUND") endif() if(PNG_FOUND) #message("PNG FOUND") include_directories(${PNG_INCLUDE_DIRS}) target_link_libraries(relion_lib ${PNG_LIBRARY}) else() #message("PNG NOT FOUND") endif() if(BUILD_OWN_TBB) add_dependencies(relion_lib OWN_TBB) endif() foreach (_target ${RELION_TARGETS}) GET_FILENAME_COMPONENT(_target "relion_${_target}" NAME_WE) #specify target name WE=WithoutExtension if(${_target} STREQUAL "maingui") # We no longer want "relion_maingui" set(_target "relion") add_executable(${_target} maingui.cpp) else() add_executable(${_target} ${_target}.cpp ) set_target_properties(${_target} PROPERTIES PREFIX "relion_") endif() set(LIB relion_lib) add_dependencies(${_target} relion_lib) if(NOT MKLFFT) target_link_libraries(${_target} ${LIB} ${EXTRA_LIBS} ${MPI_LIBRARIES} ${CMAKE_DL_LIBS}) else() target_link_libraries(${_target} ${LIB} ${FFTW_LIBRARIES} ${EXTRA_LIBS} ${MPI_LIBRARIES} ${CMAKE_DL_LIBS}) endif(NOT MKLFFT) if(CUDA_FOUND) target_link_libraries(${_target} relion_gpu_util) endif(CUDA_FOUND) if (ALTCPU) target_link_libraries(${_target} ${TBB_LIBRARIES}) endif(ALTCPU) set_target_properties(${_target} PROPERTIES XX_STANDARD 11 XX_STANDARD_REQUIRED ON XX_EXTENSIONS OFF ) list(FIND GUI_TARGETS ${_target} IS_GUI_TARGET) if((NOT ${IS_GUI_TARGET} LESS 0) OR (${_target} STREQUAL "relion")) add_dependencies(${_target} relion_gui_lib) target_link_libraries(${_target} relion_gui_lib ${FLTK_LIBRARIES} ${X11}) endif() if(TIFF_FOUND) target_link_libraries(${_target} ${TIFF_LIBRARIES}) endif() #message(STATUS "added ${_target}...") install (TARGETS ${_target} RUNTIME DESTINATION bin) endforeach() FIND_PACKAGE( OpenMP REQUIRED) if(OPENMP_FOUND) #message("OPENMP FOUND") set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}") target_link_libraries(relion_lib ${OpenMP_omp_LIBRARY}) endif() set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -std=c99") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++0x") # Set this flag to activate bounds checking in stl-vectors (incl. strings) # It is useful to do this periodically, as it catches # difficult-to-see and rare-to-manifest bugs # set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D_GLIBCXX_DEBUG") relion-3.1.3/src/apps/Zernike_test.cpp000066400000000000000000000011201411340063500177010ustar00rootroot00000000000000#include #include int main(int argc, char *argv[]) { const int N = 12; const int s = 400; Image out(s,s,N); for (int i = 0; i < N; i++) { int m, n; Zernike::oddIndexToMN(i,m,n); std::cout << i << " -> " << m << ", " << n << "\n"; for (int y = 0; y < s; y++) for (int x = 0; x < s; x++) { double xx = 2.0*x/(double)s - 1.0; double yy = 2.0*y/(double)s - 1.0; out(i,y,x) = Zernike::Z_cart(m,n,xx,yy); } } VtkHelper::writeVTK(out, "Zernike-odd-test.vtk"); return RELION_EXIT_SUCCESS; } relion-3.1.3/src/apps/align_symmetry.cpp000066400000000000000000000223251411340063500203100ustar00rootroot00000000000000/*************************************************************************** * * Author: "Sjors H.W. Scheres" and "Takanori Nakane" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #include #include #include #include #include #include #include class align_symmetry { private: Matrix2D A3D; MultidimArray F2D; MultidimArray rotated, symmetrised, dummy; FourierTransformer transformer; public: FileName fn_in, fn_out, fn_sym; RFLOAT angpix, maxres, search_step; int nr_uniform, padding_factor, interpolator, r_min_nn, boxsize, search_range; bool keep_centre, only_rot; // I/O Parser IOParser parser; void usage() { parser.writeUsage(std::cerr); } void read(int argc, char **argv) { parser.setCommandLine(argc, argv); int general_section = parser.addSection("Options"); fn_in = parser.getOption("--i", "Input map to be projected"); fn_out = parser.getOption("--o", "Rootname for output projections", "aligned.mrc"); fn_sym = parser.getOption("--sym", "Target point group symmetry"); boxsize = textToInteger(parser.getOption("--box_size", "Working box size in pixels. Very small box (such that Nyquist is aroud 20 A) is usually sufficient.", "64")); if (boxsize % 2 != 0) REPORT_ERROR("The working box size (--box_size) must be an even number."); keep_centre = parser.checkOption("--keep_centre", "Do not re-centre the input"); angpix = textToFloat(parser.getOption("--angpix", "Pixel size (in Angstroms)", "-1")); only_rot = parser.checkOption("--only_rot", "Keep TILT and PSI fixed and search only ROT (rotation along the Z axis)"); nr_uniform = textToInteger(parser.getOption("--nr_uniform", "Randomly search this many orientations", "400")); maxres = textToFloat(parser.getOption("--maxres", "Maximum resolution (in Angstrom) to consider in Fourier space (default Nyquist)", "-1")); search_range = textToInteger(parser.getOption("--local_search_range", "Local search range (1 + 2 * this number)", "2")); search_step = textToFloat(parser.getOption("--local_search_step", "Local search step (in degrees)", "2")); padding_factor = textToInteger(parser.getOption("--pad", "Padding factor", "2")); if (parser.checkOption("--NN", "Use nearest-neighbour instead of linear interpolation")) interpolator = NEAREST_NEIGHBOUR; else interpolator = TRILINEAR; // Hidden r_min_nn = textToInteger(getParameter(argc, argv, "--r_min_nn", "10")); // Check for errors in the command-line option if (parser.checkForErrors()) REPORT_ERROR("Errors encountered on the command line (see above), exiting..."); } int search(MetaDataTable &MDang, Projector &projector) { init_progress_bar(MDang.numberOfObjects()); long int best_at = 0; double best_diff2 = 1E99; RFLOAT rot, tilt, psi; // TODO: parallelise? FOR_ALL_OBJECTS_IN_METADATA_TABLE(MDang) { MDang.getValue(EMDL_ORIENT_ROT, rot); MDang.getValue(EMDL_ORIENT_TILT, tilt); MDang.getValue(EMDL_ORIENT_PSI, psi); Euler_rotation3DMatrix(rot, tilt, psi, A3D); F2D.initZeros(); projector.get2DFourierTransform(F2D, A3D); transformer.inverseFourierTransform(); CenterFFT(rotated, false); symmetrised = rotated; symmetriseMap(symmetrised, fn_sym); // non-weighted real-space squared difference double diff2 = 0; FOR_ALL_DIRECT_ELEMENTS_IN_MULTIDIMARRAY(rotated) { diff2 += (DIRECT_MULTIDIM_ELEM(rotated, n) - DIRECT_MULTIDIM_ELEM(symmetrised, n)) * (DIRECT_MULTIDIM_ELEM(rotated, n) - DIRECT_MULTIDIM_ELEM(symmetrised, n)); } if (best_diff2 > diff2) { best_diff2 = diff2; best_at = current_object; } if (current_object % 30 == 0) progress_bar(current_object); #ifdef DEBUG std::cout << rot << " " << tilt << " " << psi << " " << diff2 << std::endl; #endif } // end search progress_bar(MDang.numberOfObjects()); return best_at; } void project() { MetaDataTable MDang; Image vol_in, vol_work; int orig_size; RFLOAT work_angpix, r_max, rot, tilt, psi; std::cout << " Reading map: " << fn_in << std::endl; vol_in.read(fn_in); orig_size = XSIZE(vol_in()); std:: cout << " The input box size: " << orig_size << std::endl; if (orig_size % 2 != 0) REPORT_ERROR("The input box size must be an even number."); if (orig_size < boxsize) REPORT_ERROR("There is no point using the working box size (--box_size) larger than the input volume."); if (angpix < 0.) { angpix = vol_in.samplingRateX(); std::cout << " Using the pixel size in the input image header: " << angpix << " A/px" << std::endl; } if (!keep_centre) { selfTranslateCenterOfMassToCenter(vol_in(), DONT_WRAP, true); std::cout << " Re-centred to the centre of the mass" << std::endl; } vol_work = vol_in; resizeMap(vol_work(), boxsize); work_angpix = angpix * orig_size / boxsize; std::cout << " Downsampled to the working box size " << boxsize << " px. This corresponds to " << work_angpix << " A/px." << std::endl; if (nr_uniform > 0) { std::cout << " Generating " << nr_uniform << " projections taken randomly from a uniform angular distribution." << std::endl; MDang.clear(); randomize_random_generator(); tilt = 0; psi = 0; for (long int i = 0; i < nr_uniform; i++) { rot = rnd_unif() * 360.; if (!only_rot) { bool ok_tilt = false; while (!ok_tilt) { tilt = rnd_unif() * 180.; if (rnd_unif() < fabs(SIND(tilt))) ok_tilt = true; } psi = rnd_unif() * 360.; } MDang.addObject(); MDang.setValue(EMDL_ORIENT_ROT, rot); MDang.setValue(EMDL_ORIENT_TILT, tilt); MDang.setValue(EMDL_ORIENT_PSI, psi); } } // Now that we have the size of the volume, check r_max if (maxres < 0.) r_max = boxsize; else r_max = CEIL(boxsize * work_angpix / maxres); // Set right size of F2D and initialize to zero rotated.reshape(vol_work()); symmetrised.reshape(vol_work()); transformer.setReal(rotated); transformer.getFourierAlias(F2D); // Set up the projector int data_dim = 3; Projector projector(boxsize, interpolator, padding_factor, r_min_nn, data_dim); projector.computeFourierTransformMap(vol_work(), dummy, 2* r_max); // Global search std::cout << " Searching globally ..." << std::endl; int best_at; best_at = search(MDang, projector); MDang.getValue(EMDL_ORIENT_ROT, rot, best_at); MDang.getValue(EMDL_ORIENT_TILT, tilt, best_at); MDang.getValue(EMDL_ORIENT_PSI, psi, best_at); std::cout << " The best solution is ROT = " << rot << " TILT = " << tilt << " PSI = " << psi << std::endl << std::endl; // Local refinement std::cout << " Refining locally ..." << std::endl; MDang.clear(); for (int i = -search_range; i <= search_range; i++) { for (int j = -search_range; j <= search_range; j++) { if (only_rot && j != 0) continue; for (int k = -search_range; k <= search_range; k++) { if (only_rot && k != 0) continue; MDang.addObject(); MDang.setValue(EMDL_ORIENT_ROT, rot + i * search_step); MDang.setValue(EMDL_ORIENT_TILT, tilt + j * search_step); MDang.setValue(EMDL_ORIENT_PSI, psi + k * search_range); } } } best_at = search(MDang, projector); MDang.getValue(EMDL_ORIENT_ROT, rot, best_at); MDang.getValue(EMDL_ORIENT_TILT, tilt, best_at); MDang.getValue(EMDL_ORIENT_PSI, psi, best_at); std::cout << " The refined solution is ROT = " << rot << " TILT = " << tilt << " PSI = " << psi << std::endl << std::endl; std::cout << " Now rotating the original (full size) volume ..." << std::endl << std::endl; Projector full_projector(orig_size, interpolator, padding_factor, r_min_nn, data_dim); Image vol_out; FourierTransformer final_transformer; full_projector.computeFourierTransformMap(vol_in(), dummy, 2 * orig_size); Euler_rotation3DMatrix(rot, tilt, psi, A3D); F2D.initZeros(orig_size, orig_size, orig_size / 2 + 1); vol_out().reshape(vol_in()); full_projector.get2DFourierTransform(F2D, A3D); transformer.inverseFourierTransform(F2D, vol_out()); CenterFFT(vol_out(), false); vol_out.setSamplingRateInHeader(angpix); vol_out.write(fn_out); std::cout << " The aligned map has been written to " << fn_out << std::endl; } // end project function }; int main(int argc, char *argv[]) { align_symmetry app; try { app.read(argc, argv); app.project(); } catch (RelionError XE) { //prm.usage(); std::cerr << XE; return RELION_EXIT_FAILURE; } return RELION_EXIT_SUCCESS; } relion-3.1.3/src/apps/autopick.cpp000066400000000000000000000032261411340063500170630ustar00rootroot00000000000000/*************************************************************************** * * Author: "Sjors H.W. Scheres" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #include #ifdef CUDA #include #endif int main(int argc, char *argv[]) { AutoPicker prm; try { prm.read(argc, argv); prm.initialise(); #ifdef CUDA if (prm.do_gpu) { std::stringstream didSs; didSs << "AP"; int dev_id = prm.deviceInitialise(); prm.cudaPicker = (void*) new AutoPickerCuda((AutoPicker*)&prm, dev_id, didSs.str().c_str() ); ((AutoPickerCuda*)prm.cudaPicker)->run(); } else #endif { prm.run(); } prm.generatePDFLogfile(); #ifdef TIMING std::cout << "timings:" << std::endl; prm.timer.printTimes(false); #endif } catch (RelionError XE) { //prm.usage(); std::cerr << XE; return RELION_EXIT_FAILURE; } return RELION_EXIT_SUCCESS; } relion-3.1.3/src/apps/autopick_mpi.cpp000066400000000000000000000034001411340063500177220ustar00rootroot00000000000000/*************************************************************************** * * Author: "Sjors H.W. Scheres" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #include #ifdef CUDA #include #endif int main(int argc, char *argv[]) { AutoPickerMpi prm; try { prm.read(argc, argv); prm.initialise(); #ifdef CUDA if (prm.do_gpu) { std::stringstream didSs; didSs << "APr" << prm.getRank(); int dev_id = prm.deviceInitialise(); prm.cudaPicker = (void*) new AutoPickerCuda((AutoPickerMpi*)&prm, dev_id, didSs.str().c_str() ); ((AutoPickerCuda*)prm.cudaPicker)->run(); } else #endif { prm.run(); } MPI_Barrier(MPI_COMM_WORLD); if (prm.getRank() == 0) prm.generatePDFLogfile(); } catch (RelionError XE) { if (prm.verb > 0) //prm.usage(); std::cerr << XE; MPI_Abort(MPI_COMM_WORLD, RELION_EXIT_FAILURE); return RELION_EXIT_FAILURE; } MPI_Barrier(MPI_COMM_WORLD); return RELION_EXIT_SUCCESS; } relion-3.1.3/src/apps/convert_star.cpp000066400000000000000000000103431411340063500177530ustar00rootroot00000000000000/*************************************************************************** * * Author: "Sjors H.W. Scheres" "Jasenko Zivanov" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #include #include #include #include class star_converter { public: FileName fn_in, fn_out; IOParser parser; RFLOAT Cs, Q0; void usage() { parser.writeUsage(std::cerr); } void read(int argc, char **argv) { parser.setCommandLine(argc, argv); int general_section = parser.addSection("Options"); fn_in = parser.getOption("--i", "Input STAR file to be converted", "None"); fn_out = parser.getOption("--o", "Output STAR file to be written", "None"); Cs = textToFloat(parser.getOption("--Cs", "Spherical aberration (mm)", "-1")); Q0 = textToFloat(parser.getOption("--Q0", "Amplitude contrast", "-1")); if (fn_in == "None" || fn_out == "None") { usage(); REPORT_ERROR("Please specify input and output file names"); } } void run() { MetaDataTable mdt; MetaDataTable mdtOut, optOut; mdt.read(fn_in); const bool isMotionCorrSTAR = mdt.containsLabel(EMDL_MICROGRAPH_METADATA_NAME); StarConverter::convert_3p0_particlesTo_3p1(mdt, mdtOut, optOut, "", false); // don't die if (mdt.containsLabel(EMDL_IMAGE_NAME)) { std::cout << "The input is a particle STAR file" << std::endl; mdtOut.setName("particles"); } else if (isMotionCorrSTAR) { std::cout << "The input is a STAR file from a MotionCorr job." << std::endl; std::cout << "The (binned) pixel size and the voltage are taken from the first metadata STAR file." << std::endl; FileName fn_meta; if (!mdtOut.getValue(EMDL_MICROGRAPH_METADATA_NAME, fn_meta, 0)) REPORT_ERROR("Failed to find the metadata STAR file"); Micrograph mic(fn_meta); std::cout << "- voltage: " << mic.voltage << std::endl; optOut.setValue(EMDL_CTF_VOLTAGE, mic.voltage); std::cout << "- unbinned pixel size: " << mic.angpix << std::endl; std::cout << "- binning factor: " << mic.getBinningFactor() << std::endl; const RFLOAT angpix = mic.angpix * mic.getBinningFactor(); std::cout << "- binned pixel size: " << angpix << std::endl; optOut.setValue(EMDL_MICROGRAPH_PIXEL_SIZE, angpix); std::cout << "\nThe other microscope parameters must be specified in the command line." << std::endl; if (Cs < 0) REPORT_ERROR("Please specify the spherical aberration (mm) in the --Cs option."); std::cout << "- spherical aberration: " << Cs << std::endl; optOut.setValue(EMDL_CTF_CS, Cs); if (Q0 < 0) REPORT_ERROR("Please specify the amplitude contrast in the --Q0 option"); std::cout << "- amplitude contrast: " << Q0 << std::endl; optOut.setValue(EMDL_CTF_Q0, Q0); std::cout << "\nAll necessary information is ready." << std::endl; mdtOut.setName("micrographs"); } else { std::cout << "The input is a micrograph STAR file with CTF information." << std::endl; mdtOut.setName("micrographs"); } std::ofstream of(fn_out); optOut.write(of); mdtOut.write(of); of.close(); std::cout << "\nWritten " << fn_out << std::endl; std::cout << "Please carefully examine the optics group table at the beginning of the output to make sure the information is correct." << std::endl; } }; int main(int argc, char *argv[]) { star_converter app; try { app.read(argc, argv); app.run(); } catch (RelionError XE) { std::cerr << XE; return RELION_EXIT_FAILURE; } return RELION_EXIT_SUCCESS; } relion-3.1.3/src/apps/convert_to_tiff.cpp000066400000000000000000000022541411340063500204360ustar00rootroot00000000000000/*************************************************************************** * * Author: "Takanori Nakane" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #include int main(int argc, char *argv[]) { TIFFConverter app; try { app.read(argc, argv); app.initialise(); app.run(); } catch (RelionError XE) { std::cerr << XE; return RELION_EXIT_FAILURE; } return RELION_EXIT_SUCCESS; } relion-3.1.3/src/apps/convert_to_tiff_mpi.cpp000066400000000000000000000027301411340063500213020ustar00rootroot00000000000000/*************************************************************************** * * Author: "Takanori Nakane" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #include #include int main(int argc, char *argv[]) { TIFFConverter app; int rank, total_ranks; MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &total_ranks); // Handle errors MPI_Comm_set_errhandler(MPI_COMM_WORLD, MPI_ERRORS_RETURN); try { app.read(argc, argv); app.initialise(rank, total_ranks); app.run(); } catch (RelionError XE) { std::cerr << XE; return RELION_EXIT_FAILURE; } MPI_Barrier(MPI_COMM_WORLD); MPI_Finalize(); return RELION_EXIT_SUCCESS; } relion-3.1.3/src/apps/ctf_mask_test.cpp000066400000000000000000000075301411340063500200740ustar00rootroot00000000000000 #include #include #include #include #include #include #include #include #include #include #include #include #include #include using namespace gravis; int main(int argc, char *argv[]) { std::string starFn, outPath; int s, threads, mg; double rad, step, flankWidth; IOParser parser; try { parser.setCommandLine(argc, argv); parser.addSection("General options"); starFn = parser.getOption("--i", "Input particle *.star file"); s = textToInteger(parser.getOption("--s", "Image size")); rad = textToDouble(parser.getOption("--r", "Particle radius")); step = textToDouble(parser.getOption("--t", "Frequency step")); flankWidth = textToInteger(parser.getOption("--tw", "Filter step width")); threads = textToInteger(parser.getOption("--j", "Number of threads", "1")); mg = textToInteger(parser.getOption("--mg", "Micrograph index", "0")); outPath = parser.getOption("--o", "Output path"); parser.checkForErrors(); } catch (RelionError XE) { parser.writeUsage(std::cout); std::cerr << XE; return RELION_EXIT_FAILURE; } ObservationModel obsModel; MetaDataTable mdt0; ObservationModel::loadSafely(starFn, obsModel, mdt0); std::vector allMdts = StackHelper::splitByMicrographName(mdt0); int opticsGroup = obsModel.getOpticsGroup(allMdts[mg], 0); const int sh = s/2 + 1; Image ctfImg(sh,s), one(sh,s); one.data.initConstant(1.0); const double angpix = obsModel.getPixelSize(opticsGroup); CTF ctf; ctf.readByGroup(allMdts[mg], &obsModel, 0); ctf.getFftwImage(ctfImg(), s, s, angpix); const int tc = (int)(sh/step + 1.0); const int maxBin = sh; std::vector> mask(tc+1), psf(tc+1), maskedCTF(tc+1), slopeHistRad(tc+1); std::vector> slopeHist(tc+1, std::vector(maxBin)); for (int t = 0; t < tc+1; t++) { const double k0 = t * step; const double k1 = (t+1) * step; if (t < tc) { mask[t] = FilterHelper::raisedCosEnvRingFreq2D(one, k0, k1, flankWidth); } else { mask[t] = one; } Image ctfZ(sh,s); Image maskedCTF_half(sh,s); for (int y = 0; y < s; y++) for (int x = 0; x < sh; x++) { maskedCTF_half(y,x) = ctfImg(y,x) * mask[t](y,x); ctfZ(y,x) = ctfImg(y,x) * mask[t](y,x); } FftwHelper::decenterDouble2D(maskedCTF_half.data, maskedCTF[t].data); NewFFT::inverseFourierTransform(ctfZ.data, psf[t].data); const double as = s * angpix; double minSlope = 100, maxSlope = -100; for (int y = 0; y < s; y++) for (int x = 0; x < sh; x++) { double xx = x/as; double yy = y < sh? y/as : (y - s)/as; double slope = ctf.getGammaGrad(xx,yy).length()/(as*PI); if (mask[t](y,x) >= 0.5) { if (slope < minSlope) minSlope = slope; if (slope > maxSlope) maxSlope = slope; } int si = slope * sh; if (si < maxBin) { slopeHist[t][si] += mask[t](y,x); } } double maxHist = 0.0; for (int b = 0; b < maxBin; b++) { if (slopeHist[t][b] > maxHist) { maxHist = slopeHist[t][b]; } } if (maxHist > 0.0) { for (int b = 0; b < maxBin; b++) { slopeHist[t][b] /= maxHist; } } std::cout << t << ": " << minSlope << " - " << maxSlope << "\n"; slopeHistRad[t] = NoiseHelper::radialMap(slopeHist[t], false); } JazConfig::writeMrc = false; JazConfig::writeVtk = true; ImageLog::write(maskedCTF, outPath+"_maskedCTF"); ImageLog::write(mask, outPath+"_mask"); ImageLog::write(psf, outPath+"_psf", CenterXY); ImageLog::write(slopeHistRad, outPath+"_slopeHist", CenterXY); } relion-3.1.3/src/apps/ctf_nyquist_test.cpp000066400000000000000000000070111411340063500206470ustar00rootroot00000000000000 #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include using namespace gravis; int main(int argc, char *argv[]) { std::string starFn; int s, threads; IOParser parser; try { parser.setCommandLine(argc, argv); parser.addSection("General options"); starFn = parser.getOption("--i", "Input *.star file"); s = textToInteger(parser.getOption("--s", "Image size")); threads = textToInteger(parser.getOption("--j", "Number of threads")); parser.checkForErrors(); } catch (RelionError XE) { parser.writeUsage(std::cout); std::cerr << XE; return RELION_EXIT_FAILURE; } MetaDataTable mdt; mdt.read(starFn); RFLOAT mag, dstep; mdt.getValue(EMDL_CTF_MAGNIFICATION, mag, 0); mdt.getValue(EMDL_CTF_DETECTOR_PIXEL_SIZE, dstep, 0); double angpix = 10000 * dstep / mag; int sh = s/2 + 1; std::vector percentages = {50, 60, 70, 80, 90, 100, 110}; const int perCnt = percentages.size(); std::vector>> countN(perCnt); for (int c = 0; c < perCnt; c++) { countN[c] = std::vector>(threads); for (int t = 0; t < threads; t++) { countN[c][t] = Image(sh,s); } } const int pc = mdt.numberOfObjects(); #pragma omp parallel for num_threads(threads) for (int p = 0; p < pc; p++) { if (p%1000 == 0) std::cout << p << " / " << pc << "\n"; int th = omp_get_thread_num(); CTF ctf; ctf.read(mdt, mdt, p); RFLOAT as = (RFLOAT)s * angpix; for (long int i = 0; i < s; i++) \ for (long int j = 0; j < sh; j++) { const int x = j; const int y = i < sh? i : i - s; RFLOAT cf = ctf.getCtfFreq(x/as, y/as) / (as * PI); double cfa = std::abs(cf); for (int c = 0; c < perCnt; c++) { if (cfa < percentages[c]/100.0) { countN[c][th](i,j) += 1.0/pc; } } } } { std::string command = " mkdir -p ctf_test"; int ret = system(command.c_str()); } for (int c = 0; c < perCnt; c++) { for (int t = 1; t < threads; t++) { for (long int i = 0; i < s; i++) \ for (long int j = 0; j < sh; j++) { countN[c][0](i,j) += countN[c][t](i,j); } } std::stringstream stsc; stsc << percentages[c]; VtkHelper::writeVTK(countN[c][0], "ctf_test/below_"+stsc.str()+"%_nyq.vtk"); } } relion-3.1.3/src/apps/ctf_refine.cpp000066400000000000000000000023211411340063500173430ustar00rootroot00000000000000/*************************************************************************** * * Authors: "Jasenko Zivanov & Sjors H.W. Scheres" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #include int main(int argc, char *argv[]) { CtfRefiner prm; try { prm.read(argc, argv); prm.init(); prm.run(); } catch (RelionError XE) { //prm.usage(); std::cerr << XE; return RELION_EXIT_FAILURE; } return RELION_EXIT_SUCCESS; } relion-3.1.3/src/apps/ctf_refine_mpi.cpp000066400000000000000000000024641411340063500202200ustar00rootroot00000000000000/*************************************************************************** * * Authors: "Jasenko Zivanov & Sjors H.W. Scheres" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #include int main(int argc, char *argv[]) { CtfRefinerMpi prm; try { prm.read(argc, argv); prm.init(); MPI_Barrier(MPI_COMM_WORLD); prm.run(); } catch (RelionError XE) { //prm.usage(); std::cerr << XE; MPI_Abort(MPI_COMM_WORLD, RELION_EXIT_FAILURE); } MPI_Barrier(MPI_COMM_WORLD); return RELION_EXIT_SUCCESS; } relion-3.1.3/src/apps/ctf_toolbox.cpp000066400000000000000000000166221411340063500175720ustar00rootroot00000000000000/*************************************************************************** * * Author: "Sjors H.W. Scheres" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #include #include #include #include #include #include #include class ctf_toolbox_parameters { public: FileName fn_in, fn_out, fn_sim; bool do_intact_ctf_until_first_peak, do_intact_ctf_after_first_peak, do_ctf_pad; RFLOAT profile_angle, sim_angpix, kV, Q0, Cs, defU, defV, defAng, phase_shift; int verb; // I/O Parser IOParser parser; MetaDataTable MD; FourierTransformer transformer; ObservationModel obsModel; // Image size int xdim, ydim, zdim, sim_box, sim_box_large; long int ndim; void usage() { parser.writeUsage(std::cerr); } void read(int argc, char **argv) { parser.setCommandLine(argc, argv); int general_section = parser.addSection("Pre-multiply options"); fn_in = parser.getOption("--i", "Input STAR file with CTF information", ""); fn_out = parser.getOption("--o", "Output rootname (for multiple images: insert this string before each image's extension)", ""); int sim_section = parser.addSection("OR: simulate options"); fn_sim = parser.getOption("--simulate", "Output name for simulated CTF image",""); sim_angpix = textToFloat(parser.getOption("--angpix", "Pixel size (A)", "1.")); sim_box = textToInteger(parser.getOption("--box", "Box size (pix)", "256")); kV = textToFloat(parser.getOption("--kV", "Voltage (kV)", "300")); Q0 = textToFloat(parser.getOption("--Q0", "Amplitude contrast", "0.1")); Cs = textToFloat(parser.getOption("--Cs", "Spherical aberration (mm)", "2.7")); defU = textToFloat(parser.getOption("--defU", "Defocus in U-direction (A)", "20000")); defV = textToFloat(parser.getOption("--defV", "Defocus in V-direction (A, default = defU)", "-1.")); if (defV < 0) defV = defU; defAng = textToFloat(parser.getOption("--defAng", "Defocus angle (deg)", "0.")); phase_shift = textToFloat(parser.getOption("--phase_shift", "Phase shift (deg)", "0.")); int cst_section = parser.addSection("Shared options"); do_intact_ctf_until_first_peak = parser.checkOption("--ctf_intact_first_peak", "Leave CTFs intact until first peak"); do_intact_ctf_after_first_peak = parser.checkOption("--ctf_intact_after_first_peak", "Leave CTFs intact after first peak"); do_ctf_pad = parser.checkOption("--ctf_pad", "Pre-multiply with a 2x finer-sampled CTF that is then downscaled"); // Check for errors in the command-line option if (parser.checkForErrors()) REPORT_ERROR("Errors encountered on the command line (see above), exiting..."); verb = 1; } void run() { // CTF Simulation of a single image if (fn_sim != "") { if (do_ctf_pad) sim_box_large = 2 * sim_box; else sim_box_large = sim_box; Image Ictf(sim_box_large, sim_box_large); CTF ctf; std::cout << " + Input values: " << std::endl; std::cout << " + kV= " << kV << std::endl; std::cout << " + Cs= " << Cs << std::endl; std::cout << " + Q0= " << Q0 << std::endl; std::cout << " + defU= " << defU << std::endl; std::cout << " + defV= " << defV << std::endl; std::cout << " + defAng= " << defAng << std::endl; std::cout << " + phase_shift = " << phase_shift << std::endl; std::cout << " + angpix= " << sim_angpix<< std::endl; std::cout << " + box= " << sim_box<< std::endl; std::cout << " + use CTF padding? " << ((do_ctf_pad) ? "true" : "false") << std::endl; std::cout << " + " << std::endl; ctf.setValues(defU, defV, defAng, kV, Cs, Q0, 0., 1., phase_shift); Ictf().setXmippOrigin(); RFLOAT xs = (RFLOAT)sim_box_large * sim_angpix; RFLOAT ys = (RFLOAT)sim_box_large * sim_angpix; FOR_ALL_ELEMENTS_IN_ARRAY2D(Ictf()) { RFLOAT x = (RFLOAT)j / xs; RFLOAT y = (RFLOAT)i / ys; A2D_ELEM(Ictf(), i, j) = ctf.getCTF(x, y, false, false, do_intact_ctf_until_first_peak, true, 0.0, do_intact_ctf_after_first_peak); } resizeMap(Ictf(), sim_box); Ictf.write(fn_sim); std::cout << " + Done! written: " << fn_sim << std::endl; } else { ObservationModel::loadSafely(fn_in, obsModel, MD); bool do_mic_name = (obsModel.opticsMdt.getName() == "micrographs"); long int i_img = 0; if (verb > 0) init_progress_bar(MD.numberOfObjects()); FOR_ALL_OBJECTS_IN_METADATA_TABLE(MD) { CTF ctf; ctf.readByGroup(MD, &obsModel); int og = obsModel.getOpticsGroup(MD); RFLOAT angpix = obsModel.getPixelSize(og); FileName fn_img, my_fn_out; if (do_mic_name) MD.getValue(EMDL_MICROGRAPH_NAME, fn_img); else MD.getValue(EMDL_IMAGE_NAME, fn_img); my_fn_out = fn_img.insertBeforeExtension("_" + fn_out); // Now do the actual work MD.getValue(EMDL_IMAGE_NAME, fn_img); Image img; MultidimArray Fimg; MultidimArray Fctf; img.read(fn_img); transformer.FourierTransform(img(), Fimg, false); Fctf.resize(YSIZE(Fimg), XSIZE(Fimg)); ctf.getFftwImage(Fctf, XSIZE(img()), YSIZE(img()), angpix, false, false, do_intact_ctf_until_first_peak, false, do_ctf_pad, do_intact_ctf_after_first_peak); FOR_ALL_DIRECT_ELEMENTS_IN_MULTIDIMARRAY(Fimg) { if (!do_intact_ctf_after_first_peak) DIRECT_MULTIDIM_ELEM(Fimg, n) *= DIRECT_MULTIDIM_ELEM(Fctf, n); else { // this is safe because getCTF does not return RELION_EXIT_SUCCESS. DIRECT_MULTIDIM_ELEM(Fimg, n) /= DIRECT_MULTIDIM_ELEM(Fctf, n); } } transformer.inverseFourierTransform(Fimg, img()); // Write out the result // Check whether fn_out has an "@": if so REPLACE the corresponding frame in the output stack! long int n; FileName fn_tmp; my_fn_out.decompose(n, fn_tmp); n--; if (n >= 0) // This is a stack... { // The following assumes the images in the stack come ordered... if (n == 0) img.write(fn_tmp, n, true, WRITE_OVERWRITE); // make a new stack else img.write(fn_tmp, n, true, WRITE_APPEND); } else // individual image img.write(my_fn_out); MD.setValue(EMDL_IMAGE_NAME, my_fn_out); obsModel.setCtfPremultiplied(og, true); i_img++; if (verb > 0) progress_bar(i_img); } if (verb > 0) progress_bar(MD.numberOfObjects()); obsModel.save(MD, fn_in.insertBeforeExtension("_"+fn_out)); std::cout << " + written out new particles STAR file in: " << fn_in.insertBeforeExtension("_"+fn_out) << std::endl; } } }; int main(int argc, char *argv[]) { ctf_toolbox_parameters prm; try { prm.read(argc, argv); prm.run(); } catch (RelionError XE) { prm.usage(); std::cerr << XE; return RELION_EXIT_FAILURE; } return RELION_EXIT_SUCCESS; } relion-3.1.3/src/apps/defocus_stats.cpp000066400000000000000000000021371411340063500201120ustar00rootroot00000000000000 #include #include #include using namespace gravis; int main(int argc, char *argv[]) { IOParser parser; std::string starFn; parser.setCommandLine(argc, argv); parser.addSection("General options"); starFn = parser.getOption("--i", "Input STAR file with a list of particles"); if (parser.checkForErrors()) return RELION_EXIT_FAILURE; MetaDataTable mdt0; mdt0.read(starFn); double mu(0.0), var(0.0); for (int i = 0; i < mdt0.numberOfObjects(); i++) { double u, v; mdt0.getValue(EMDL_CTF_DEFOCUSU, u, i); mdt0.getValue(EMDL_CTF_DEFOCUSU, v, i); double a = 0.5 * (u + v); mu += a; } mu /= (double) mdt0.numberOfObjects(); std::cout << "mu: " << mu << "\n"; for (int i = 0; i < mdt0.numberOfObjects(); i++) { double u, v; mdt0.getValue(EMDL_CTF_DEFOCUSU, u, i); mdt0.getValue(EMDL_CTF_DEFOCUSU, v, i); double a = 0.5 * (u + v); double d = a - mu; var += d*d; } var /= (double) (mdt0.numberOfObjects() - 1); std::cout << "sigma: " << sqrt(var) << "\n"; return RELION_EXIT_SUCCESS; } relion-3.1.3/src/apps/demodulate.cpp000066400000000000000000000074641411340063500173770ustar00rootroot00000000000000#include #include #include #include #include int main(int argc, char *argv[]) { std::string particlesFn, outPath; MetaDataTable particlesMdt; int nr_omp_threads; bool r31; IOParser parser; try { parser.setCommandLine(argc, argv); parser.addSection("General options"); particlesFn = parser.getOption("--i", "Input STAR file with a list of particles"); nr_omp_threads = textToInteger(parser.getOption("--j", "Number of OMP threads", "6")); r31 = parser.checkOption("--r31", "Write output in Relion-3.1 format"); outPath = parser.getOption("--out", "Output path"); if (parser.checkForErrors()) return RELION_EXIT_FAILURE; } catch (RelionError XE) { parser.writeUsage(std::cout); std::cerr << XE; return RELION_EXIT_FAILURE; } if (outPath[outPath.length()-1] != '/') { outPath += "/"; } std::string command = " mkdir -p " + outPath; int res = system(command.c_str()); ObservationModel obsModel; ObservationModel::loadSafely( particlesFn, obsModel, particlesMdt); particlesMdt.read(particlesFn); std::vector fts(nr_omp_threads); std::vector mdts = StackHelper::splitByStack(&particlesMdt); const int mc = mdts.size(); for (int m = 0; m < mc; m++) { const int pc = mdts[m].numberOfObjects(); std::vector> obs; obs = StackHelper::loadStackFS(mdts[m], "", nr_omp_threads, false); std::string name, fullName; mdts[m].getValue(EMDL_IMAGE_NAME, fullName, 0); name = fullName.substr(fullName.find("@")+1); for (int p = 0; p < pc; p++) { obsModel.demodulatePhase(mdts[m], p, obs[p].data); } std::vector> demodulated = StackHelper::inverseFourierTransform(obs); Image out = StackHelper::toSingleImage(demodulated); FileName fn_pre, fn_jobnr, fn_post; decomposePipelineFileName(name, fn_pre, fn_jobnr, fn_post); std::string outFn = outPath + fn_post; if (outFn.find_last_of("/") != std::string::npos) { std::string command = " mkdir -p " + outFn.substr(0, outFn.find_last_of("/")); int res = system(command.c_str()); } for (int p = 0; p < pc; p++) { std::stringstream sts; sts << (p+1) << "@" << outFn; mdts[m].setValue(EMDL_IMAGE_NAME, sts.str(), p); } out.write(outFn); } MetaDataTable mdt1; for (int m = 0; m < mc; m++) { mdt1.append(mdts[m]); } if (!r31) { const int tpc = mdt1.numberOfObjects(); std::vector allOpticsLabels_double(0); allOpticsLabels_double.push_back(EMDL_CTF_Q0); allOpticsLabels_double.push_back(EMDL_CTF_CS); allOpticsLabels_double.push_back(EMDL_CTF_VOLTAGE); allOpticsLabels_double.push_back(EMDL_CTF_DETECTOR_PIXEL_SIZE); allOpticsLabels_double.push_back(EMDL_CTF_MAGNIFICATION); for (int l = 0; l < allOpticsLabels_double.size(); l++) { EMDLabel lab = allOpticsLabels_double[l]; mdt1.addLabel(lab); for (int p = 0; p < tpc; p++) { int opticsGroup; mdt1.getValue(EMDL_IMAGE_OPTICS_GROUP, opticsGroup, p); opticsGroup--; double v; obsModel.opticsMdt.getValue(lab, v, opticsGroup); mdt1.setValue(lab, v, p); } } obsModel.opticsMdt.deactivateLabel(EMDL_IMAGE_OPTICS_GROUP); mdt1.setVersion(30000); } else { obsModel.opticsMdt.deactivateLabel(EMDL_IMAGE_BEAMTILT_X); obsModel.opticsMdt.deactivateLabel(EMDL_IMAGE_BEAMTILT_Y); obsModel.opticsMdt.deactivateLabel(EMDL_IMAGE_ODD_ZERNIKE_COEFFS); obsModel.opticsMdt.write(outPath+"demodulated_particles_optics.star"); } mdt1.write(outPath+"demodulated_particles.star"); std::cout << "output written into " << (outPath+"demodulated_particles.star") << "\n"; return RELION_EXIT_SUCCESS; } relion-3.1.3/src/apps/display.cpp000066400000000000000000000027001411340063500167050ustar00rootroot00000000000000/*************************************************************************** * * Author: "Sjors H.W. Scheres" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #include #include #include #include #include #include #include #include #include #include int main(int argc, char *argv[]) { Displayer prm; try { prm.read(argc, argv); if (prm.do_gui) { prm.runGui(); } else { prm.initialise(); prm.run(); } } catch (RelionError XE) { //prm.usage(); std::cerr << XE; return RELION_EXIT_FAILURE; } return RELION_EXIT_SUCCESS; } relion-3.1.3/src/apps/double_bfac_fit.cpp000066400000000000000000000066071411340063500203410ustar00rootroot00000000000000 #include #include #include #include #include #include #include #include using namespace gravis; int main(int argc, char *argv[]) { IOParser parser; parser.setCommandLine(argc, argv); parser.addSection("General options"); std::string fccFn0 = parser.getOption("--fcc0", "FCC 0"); std::string fccFn1 = parser.getOption("--fcc1", "FCC 1"); int k0 = textToInteger(parser.getOption("--k0", "k0")); int k1 = textToInteger(parser.getOption("--k1", "k1")); double angpix = textToDouble(parser.getOption("--angpix", "pixel size")); std::string outPath = parser.getOption("--o", "output path"); if (parser.checkForErrors()) return RELION_EXIT_FAILURE; Image fcc0, fcc1; fcc0.read(fccFn0); fcc1.read(fccFn1); const int w = fcc0.data.xdim; const int h0 = fcc0.data.ydim; const int h1 = fcc1.data.ydim; const int h = h0 + h1; Image fcc(w,h); for (int y = 0; y < h0; y++) { for (int x = 0; x < w; x++) { fcc(y,x) = fcc0(y,x); } } for (int y = 0; y < h1; y++) { for (int x = 0; x < w; x++) { fcc(y+h0,x) = fcc1(y,x); } } const int sh = w; const int s = 2*(sh-1); const int fc = h; std::pair,std::vector> bkFacs = DamageHelper::fitBkFactors(fcc, k0, k1); mktree(outPath + "/"); Image bfacFit = DamageHelper::renderBkFit(bkFacs, sh, fc); Image bfacFitNoScale = DamageHelper::renderBkFit(bkFacs, sh, fc, true); std::vector> pixelWeights(2, Image(sh,h0)); for (int x = 0; x < sh; x++) { double sum = 0.0; for (int i = 0; i < h0; i++) { sum += bfacFitNoScale(i,x); } for (int i = 0; i < h0; i++) { pixelWeights[0](i,x) = bfacFitNoScale(i,x) / sum; } } ImageLog::write(pixelWeights[0], outPath + "/pixel_weights_0"); for (int x = 0; x < sh; x++) { double sum = 0.0; for (int i = 0; i < h1; i++) { sum += bfacFitNoScale(i+h0,x); } for (int i = 0; i < h1; i++) { pixelWeights[1](i,x) = bfacFitNoScale(i+h0,x) / sum; } } ImageLog::write(pixelWeights[1], outPath + "/pixel_weights_1"); ImageLog::write(bfacFit, outPath + "/glob_Bk-fit"); ImageLog::write(bfacFitNoScale, outPath + "/glob_Bk-fit_noScale"); ImageLog::write(fcc, outPath + "/glob_Bk-data"); std::ofstream bfacsDat(outPath + "/Bfac.dat"); std::ofstream kfacsDat(outPath + "/kfac.dat"); const double cf = 8.0 * angpix * angpix * sh * sh; double avg0 = 0.0; for (int i = 0; i < h0; i++) { double s = bkFacs.first[i].x; double b = -cf/(s*s); bfacsDat << i << " " << b << std::endl; kfacsDat << i << " " << log(bkFacs.first[i].y) << std::endl; avg0 += b/h0; } bfacsDat << "\n"; kfacsDat << "\n"; std::ofstream dfacsDat(outPath + "/Dfac.dat"); for (int i = 0; i < sh; i++) { dfacsDat << i << " " << bkFacs.second[i] << "\n"; } dfacsDat << "\n"; double avg1 = 0.0; for (int i = 0; i < h1; i++) { double s = bkFacs.first[i+h0].x; double b = -cf/(s*s); bfacsDat << i << " " << b << std::endl; kfacsDat << i << " " << log(bkFacs.first[i+h0].y) << std::endl; avg1 += b/h1; } bfacsDat.close(); kfacsDat.close(); std::cout << "avg0 = " << avg0 << "\n"; std::cout << "avg1 = " << avg1 << "\n"; std::cout << "avg diff = " << (avg1 - avg0) << "\n"; return RELION_EXIT_SUCCESS; } relion-3.1.3/src/apps/double_reconstruct_openmp.cpp000066400000000000000000000554301411340063500225330ustar00rootroot00000000000000/*************************************************************************** * * Authors: Sjors H.W. Scheres and Jasenko Zivanov * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include class reconstruct_parameters { public: FileName fn_out, fn_sel, fn_img, fn_sym, fn_sub; int r_max, r_min_nn, blob_order, ref_dim, interpolator, grid_iters, nr_omp_threads, nr_helical_asu, newbox, width_mask_edge, nr_sectors; RFLOAT blob_radius, blob_alpha, angular_error, shift_error, helical_rise, helical_twist; bool deloc_supp, ctf_phase_flipped, only_flip_phases, intact_ctf_first_peak, do_fom_weighting, do_3d_rot, do_ewald; bool skip_gridding, debug, do_reconstruct_meas, is_positive, read_weights, div_avg; bool no_Wiener, writeWeights, new_Ewald_weight, Ewald_ellipsoid; float padding_factor, mask_diameter_ds, mask_diameter, mask_diameter_filt, flank_width; double padding_factor_2D; // I/O Parser IOParser parser; void usage() { parser.writeUsage(std::cerr); } void read(int argc, char **argv) { parser.setCommandLine(argc, argv); int general_section = parser.addSection("General options"); fn_sel = parser.getOption("--i", "Input STAR file with the projection images and their orientations", ""); fn_out = parser.getOption("--o", "Name for output reconstruction"); fn_sym = parser.getOption("--sym", "Symmetry group", "c1"); padding_factor = textToFloat(parser.getOption("--pad", "Padding factor", "2")); padding_factor_2D = textToDouble(parser.getOption("--pad2D", "Padding factor for 2D images", "1")); mask_diameter_filt = textToFloat(parser.getOption("--filter_diameter", "Diameter of filter-mask applied before division", "-1")); flank_width = textToFloat(parser.getOption("--filter_softness", "Width of filter-mask edge", "30")); nr_omp_threads = textToInteger(parser.getOption("--j", "Number of open-mp threads to use. Memory footprint is multiplied by this value.", "16")); int ctf_section = parser.addSection("CTF options"); deloc_supp = parser.checkOption("--dm", "Apply delocalisation masking"); mask_diameter_ds = textToDouble(parser.getOption("--mask_diameter_ds", "Diameter (in A) of mask for delocalisation suppression", "50")); intact_ctf_first_peak = parser.checkOption("--ctf_intact_first_peak", "Leave CTFs intact until first peak"); ctf_phase_flipped = parser.checkOption("--ctf_phase_flipped", "Images have been phase flipped"); only_flip_phases = parser.checkOption("--only_flip_phases", "Do not correct CTF-amplitudes, only flip phases"); read_weights = parser.checkOption("--read_weights", "Read freq. weight files"); writeWeights = parser.checkOption("--write_weights", "Write the weights volume"); do_ewald = parser.checkOption("--ewald", "Correct for Ewald-sphere curvature (developmental)"); mask_diameter = textToFloat(parser.getOption("--mask_diameter", "Diameter (in A) of mask for Ewald-sphere curvature correction", "-1.")); width_mask_edge = textToInteger(parser.getOption("--width_mask_edge", "Width (in pixels) of the soft edge on the mask", "3")); is_positive = !parser.checkOption("--reverse_curvature", "Try curvature the other way around"); newbox = textToInteger(parser.getOption("--newbox", "Box size of reconstruction after Ewald sphere correction", "-1")); nr_sectors = textToInteger(parser.getOption("--sectors", "Number of sectors for Ewald sphere correction", "2")); int helical_section = parser.addSection("Helical options"); nr_helical_asu = textToInteger(parser.getOption("--nr_helical_asu", "Number of helical asymmetrical units", "1")); helical_rise = textToFloat(parser.getOption("--helical_rise", "Helical rise (in Angstroms)", "0.")); helical_twist = textToFloat(parser.getOption("--helical_twist", "Helical twist (in degrees, + for right-handedness)", "0.")); int expert_section = parser.addSection("Expert options"); fn_sub = parser.getOption("--subtract","Subtract projections of this map from the images used for reconstruction", ""); no_Wiener = parser.checkOption("--legacy", "Use gridding instead of Wiener filter"); new_Ewald_weight = parser.checkOption("--new_Ewald_weight", "Use Ewald weight W that considers Cs as well"); Ewald_ellipsoid = parser.checkOption("--Ewald_ellipsoid", "Allow Ewald sphere to become an ellipsoid under aniso. mag."); if (parser.checkOption("--NN", "Use nearest-neighbour instead of linear interpolation before gridding correction")) { interpolator = NEAREST_NEIGHBOUR; } else { interpolator = TRILINEAR; } blob_radius = textToFloat(parser.getOption("--blob_r", "Radius of blob for gridding interpolation", "1.9")); blob_order = textToInteger(parser.getOption("--blob_m", "Order of blob for gridding interpolation", "0")); blob_alpha = textToFloat(parser.getOption("--blob_a", "Alpha-value of blob for gridding interpolation", "15")); grid_iters = textToInteger(parser.getOption("--iter", "Number of gridding-correction iterations", "10")); ref_dim = textToInteger(parser.getOption("--refdim", "Dimension of the reconstruction (2D or 3D)", "3")); angular_error = textToFloat(parser.getOption("--angular_error", "Apply random deviations with this standard deviation (in degrees) to each of the 3 Euler angles", "0.")); shift_error = textToFloat(parser.getOption("--shift_error", "Apply random deviations with this standard deviation (in pixels) to each of the 2 translations", "0.")); do_fom_weighting = parser.checkOption("--fom_weighting", "Weight particles according to their figure-of-merit (_rlnParticleFigureOfMerit)"); do_3d_rot = parser.checkOption("--3d_rot", "Perform 3D rotations instead of backprojections from 2D images"); skip_gridding = !parser.checkOption("--grid", "Perform gridding part of the reconstruction"); div_avg = parser.checkOption("--div_avg", "Divide the per-voxel average by its weight prior to computing the preliminary FSC"); debug = parser.checkOption("--debug", "Write out debugging data"); // Hidden r_min_nn = textToInteger(getParameter(argc, argv, "--r_min_nn", "10")); // Check for errors in the command-line option if (parser.checkForErrors()) { REPORT_ERROR("Errors encountered on the command line (see above), exiting..."); } } void applyCTFPandCTFQ(MultidimArray &Fin, CTF &ctf, FourierTransformer &transformer, MultidimArray &outP, MultidimArray &outQ, double angpix) { //FourierTransformer transformer; outP.resize(Fin); outQ.resize(Fin); float angle_step = 180./nr_sectors; for (float angle = 0.; angle < 180.; angle +=angle_step) { MultidimArray CTFP(Fin), Fapp(Fin); MultidimArray Iapp(YSIZE(Fin), YSIZE(Fin)); // Two passes: one for CTFP, one for CTFQ for (int ipass = 0; ipass < 2; ipass++) { bool is_my_positive = (ipass == 1) ? is_positive : !is_positive; // Get CTFP and multiply the Fapp with it ctf.getCTFPImage(CTFP, YSIZE(Fin), YSIZE(Fin), angpix, is_my_positive, angle); Fapp = Fin * CTFP; // element-wise complex multiplication! // inverse transform and mask out the particle.... transformer.inverseFourierTransform(Fapp, Iapp); CenterFFT(Iapp, false); softMaskOutsideMap(Iapp, ROUND(mask_diameter/(angpix*2.)), (RFLOAT)width_mask_edge); // Re-box to a smaller size if necessary.... if (newbox > 0 && newbox < YSIZE(Fin)) { Iapp.setXmippOrigin(); Iapp.window(FIRST_XMIPP_INDEX(newbox), FIRST_XMIPP_INDEX(newbox), LAST_XMIPP_INDEX(newbox), LAST_XMIPP_INDEX(newbox)); } // Back into Fourier-space CenterFFT(Iapp, true); transformer.FourierTransform(Iapp, Fapp, false); // false means: leave Fapp in the transformer // First time round: resize the output arrays if (ipass == 0 && fabs(angle) < XMIPP_EQUAL_ACCURACY) { outP.resize(Fapp); outQ.resize(Fapp); } // Now set back the right parts into outP (first pass) or outQ (second pass) float anglemin = angle + 90. - (0.5*angle_step); float anglemax = angle + 90. + (0.5*angle_step); // angles larger than 180 bool is_reverse = false; if (anglemin >= 180.) { anglemin -= 180.; anglemax -= 180.; is_reverse = true; } MultidimArray *myCTFPorQ, *myCTFPorQb; if (is_reverse) { myCTFPorQ = (ipass == 0) ? &outQ : &outP; myCTFPorQb = (ipass == 0) ? &outP : &outQ; } else { myCTFPorQ = (ipass == 0) ? &outP : &outQ; myCTFPorQb = (ipass == 0) ? &outQ : &outP; } // Deal with sectors with the Y-axis in the middle of the sector... bool do_wrap_max = false; if (anglemin < 180. && anglemax > 180.) { anglemax -= 180.; do_wrap_max = true; } // use radians instead of degrees anglemin = DEG2RAD(anglemin); anglemax = DEG2RAD(anglemax); FOR_ALL_ELEMENTS_IN_FFTW_TRANSFORM2D(CTFP) { RFLOAT x = (RFLOAT)jp; RFLOAT y = (RFLOAT)ip; RFLOAT myangle = (x*x+y*y > 0) ? acos(y/sqrt(x*x+y*y)) : 0; // dot-product with Y-axis: (0,1) // Only take the relevant sector now... if (do_wrap_max) { if (myangle >= anglemin) DIRECT_A2D_ELEM(*myCTFPorQ, i, j) = DIRECT_A2D_ELEM(Fapp, i, j); else if (myangle < anglemax) DIRECT_A2D_ELEM(*myCTFPorQb, i, j) = DIRECT_A2D_ELEM(Fapp, i, j); } else { if (myangle >= anglemin && myangle < anglemax) DIRECT_A2D_ELEM(*myCTFPorQ, i, j) = DIRECT_A2D_ELEM(Fapp, i, j); } } } } } void reconstruct() { int data_dim = (do_3d_rot) ? 3 : 2; MultidimArray dummy; Image vol, sub; ObservationModel obsModel; MetaDataTable mdt0; ObservationModel::loadSafely(fn_sel, obsModel, mdt0); std::vector angpix = obsModel.getPixelSizes(); const int optGroupCount = obsModel.numberOfOpticsGroups(); // Use pixel and box size of first opt. group for output; double angpixOut = angpix[0]; int boxOut; // When doing Ewald-curvature correction: allow reconstructing smaller // box than the input images (which should have large boxes!!) if (do_ewald && newbox > 0) { boxOut = newbox; } else { boxOut = obsModel.getBoxSize(0); } std::vector paddedSizes2D(optGroupCount); std::vector origSizes2D(optGroupCount); for (int i = 0; i < optGroupCount; i++) { paddedSizes2D[i] = (int) (padding_factor_2D * obsModel.getBoxSize(i)); origSizes2D[i] = (int) obsModel.getBoxSize(i); } // Get dimension of the images mdt0.getValue(EMDL_IMAGE_NAME, fn_img, 0); Projector subProjector(sub.data.xdim, interpolator, padding_factor, r_min_nn); r_max = -1; if (fn_sub != "") { sub.read(fn_sub); subProjector.computeFourierTransformMap(sub(), dummy, 2 * r_max); } std::vector mdts = StackHelper::splitByStack(&mdt0); const long gc = mdts.size(); std::vector> prevRefs(2); std::vector> backprojectors(2); for (int j = 0; j < 2; j++) { backprojectors[j] = std::vector(nr_omp_threads); for (int i = 0; i < nr_omp_threads; i++) { backprojectors[j][i] = BackProjector( boxOut, ref_dim, fn_sym, interpolator, padding_factor, r_min_nn, blob_order, blob_radius, blob_alpha, data_dim, skip_gridding); } } std::cout << "Back-projecting all images ..." << std::endl; time_config(); init_progress_bar(gc/nr_omp_threads); #pragma omp parallel num_threads(nr_omp_threads) { int threadnum = omp_get_thread_num(); backprojectors[0][threadnum].initZeros(2 * r_max); backprojectors[1][threadnum].initZeros(2 * r_max); RFLOAT rot, tilt, psi, fom, r_ewald_sphere; Matrix2D A3D; MultidimArray Fctf; Matrix1D trans(2); FourierTransformer transformer; #pragma omp for for (int g = 0; g < gc; g++) { std::vector > obsR; try { obsR = StackHelper::loadStack(&mdts[g]); } catch (RelionError XE) { std::cerr << "warning: unable to load micrograph #" << (g+1) << "\n"; continue; } const long pc = obsR.size(); for (int p = 0; p < pc; p++) { int randSubset; mdts[g].getValue(EMDL_PARTICLE_RANDOM_SUBSET, randSubset, p); randSubset = randSubset - 1; // Rotations if (ref_dim == 2) { rot = tilt = 0.; } else { mdts[g].getValue(EMDL_ORIENT_ROT, rot, p); mdts[g].getValue(EMDL_ORIENT_TILT, tilt, p); } psi = 0.; mdts[g].getValue(EMDL_ORIENT_PSI, psi, p); if (angular_error > 0.) { rot += rnd_gaus(0., angular_error); tilt += rnd_gaus(0., angular_error); psi += rnd_gaus(0., angular_error); //std::cout << rnd_gaus(0., angular_error) << std::endl; } Euler_angles2matrix(rot, tilt, psi, A3D); int opticsGroup = obsModel.getOpticsGroup(mdts[g], p); // If we are considering Ewald sphere curvature, the mag. matrix // has to be provided to the backprojector explicitly // (to avoid creating an Ewald ellipsoid) if (!do_ewald || Ewald_ellipsoid) { A3D = obsModel.applyAnisoMag(A3D, opticsGroup); } A3D = obsModel.applyScaleDifference(A3D, opticsGroup, boxOut, angpixOut); A3D /= padding_factor_2D; // Translations (either through phase-shifts or in real space trans.initZeros(); mdts[g].getValue(EMDL_ORIENT_ORIGIN_X_ANGSTROM, XX(trans), p); mdts[g].getValue(EMDL_ORIENT_ORIGIN_Y_ANGSTROM, YY(trans), p); XX(trans) /= angpix[opticsGroup]; YY(trans) /= angpix[opticsGroup]; if (shift_error > 0.) { XX(trans) += rnd_gaus(0., shift_error); YY(trans) += rnd_gaus(0., shift_error); } if (do_3d_rot) { trans.resize(3); mdts[g].getValue( EMDL_ORIENT_ORIGIN_Z, ZZ(trans), p); if (shift_error > 0.) { ZZ(trans) += rnd_gaus(0., shift_error); } } if (do_fom_weighting) { mdts[g].getValue( EMDL_PARTICLE_FOM, fom, p); } MultidimArray Fsub, F2D, F2DP, F2DQ; CenterFFT(obsR[p](), true); const int sPad2D = paddedSizes2D[opticsGroup]; if (padding_factor_2D > 1.0) { obsR[p] = FilterHelper::padCorner2D(obsR[p], sPad2D, sPad2D); } transformer.FourierTransform(obsR[p](), F2D); if (ABS(XX(trans)) > 0. || ABS(YY(trans)) > 0.) { if (do_3d_rot) { shiftImageInFourierTransform( F2D, F2D, sPad2D, XX(trans), YY(trans), ZZ(trans)); } else { shiftImageInFourierTransform( F2D, F2D, sPad2D, XX(trans), YY(trans)); } } Fctf.resize(F2D); Fctf.initConstant(1.); CTF ctf; ctf.readByGroup(mdts[g], &obsModel, p); ctf.getFftwImage(Fctf, sPad2D, sPad2D, angpix[opticsGroup], ctf_phase_flipped, only_flip_phases, intact_ctf_first_peak, true); if (deloc_supp) { DelocalisationHelper::maskOutsideBox( ctf, mask_diameter_ds/(2.0 * angpix[opticsGroup]), angpix[opticsGroup], origSizes2D[opticsGroup], Fctf, XX(trans), YY(trans)); } obsModel.demodulatePhase(mdts[g], p, F2D); obsModel.divideByMtf(mdts[g], p, F2D); if (do_ewald) { // Ewald-sphere curvature correction applyCTFPandCTFQ(F2D, ctf, transformer, F2DP, F2DQ, angpix[opticsGroup]); // Also calculate W, store again in Fctf if (new_Ewald_weight) { ctf.applyWeightEwaldSphereCurvature_new( Fctf, sPad2D, sPad2D, angpix[opticsGroup], mask_diameter); } else { ctf.applyWeightEwaldSphereCurvature( Fctf, sPad2D, sPad2D, angpix[opticsGroup], mask_diameter); } // Also calculate the radius of the Ewald sphere (in pixels) r_ewald_sphere = boxOut * angpix[opticsGroup] / ctf.lambda; } // Subtract reference projection if (fn_sub != "") { obsModel.predictObservation( subProjector, mdts[g], p, Fsub, true, true, true); FOR_ALL_DIRECT_ELEMENTS_IN_MULTIDIMARRAY(Fsub) { DIRECT_MULTIDIM_ELEM(F2D, n) -= DIRECT_MULTIDIM_ELEM(Fsub, n); } // Back-project difference image backprojectors[randSubset][threadnum].set2DFourierTransform( F2D, A3D); } else { if (do_ewald) { FOR_ALL_DIRECT_ELEMENTS_IN_MULTIDIMARRAY(F2D) { DIRECT_MULTIDIM_ELEM(Fctf, n) *= DIRECT_MULTIDIM_ELEM(Fctf, n); } } // "Normal" reconstruction, multiply X by CTF, and W by CTF^2 else { FOR_ALL_DIRECT_ELEMENTS_IN_MULTIDIMARRAY(F2D) { DIRECT_MULTIDIM_ELEM(F2D, n) *= DIRECT_MULTIDIM_ELEM(Fctf, n); DIRECT_MULTIDIM_ELEM(Fctf, n) *= DIRECT_MULTIDIM_ELEM(Fctf, n); } } // Do the following after squaring the CTFs! if (do_fom_weighting) { FOR_ALL_DIRECT_ELEMENTS_IN_MULTIDIMARRAY(F2D) { DIRECT_MULTIDIM_ELEM(F2D, n) *= fom; DIRECT_MULTIDIM_ELEM(Fctf, n) *= fom; } } DIRECT_A2D_ELEM(F2D, 0, 0) = 0.0; if (do_ewald) { Matrix2D magMat; if (obsModel.hasMagMatrices && !Ewald_ellipsoid) { magMat = obsModel.getMagMatrix(opticsGroup); } else { magMat = Matrix2D(2,2); magMat.initIdentity(); } backprojectors[randSubset][threadnum].set2DFourierTransform( F2DP, A3D, &Fctf, r_ewald_sphere, true, &magMat); backprojectors[randSubset][threadnum].set2DFourierTransform( F2DQ, A3D, &Fctf, r_ewald_sphere, false, &magMat); } else { backprojectors[randSubset][threadnum].set2DFourierTransform( F2D, A3D, &Fctf); } } if (threadnum == 0) { progress_bar(g); } } } } progress_bar(gc/nr_omp_threads); std::vector backprojector(2); for (int j = 0; j < 2; j++) { std::cerr << " + Merging volumes for half-set " << (j+1) << "...\n"; backprojector[j] = &backprojectors[j][0]; for (int bpi = 1; bpi < nr_omp_threads; bpi++) { FOR_ALL_DIRECT_ELEMENTS_IN_MULTIDIMARRAY(backprojector[j]->data) { DIRECT_MULTIDIM_ELEM(backprojector[j]->data, n) += DIRECT_MULTIDIM_ELEM(backprojectors[j][bpi].data, n); } backprojectors[j][bpi].data.clear(); FOR_ALL_DIRECT_ELEMENTS_IN_MULTIDIMARRAY(backprojector[j]->weight) { DIRECT_MULTIDIM_ELEM(backprojector[j]->weight, n) += DIRECT_MULTIDIM_ELEM(backprojectors[j][bpi].weight, n); } backprojectors[j][bpi].weight.clear(); } std::cerr << " + Symmetrising half-set " << (j+1) << "...\n"; backprojector[j]->symmetrise( nr_helical_asu, helical_twist, helical_rise/angpixOut, nr_omp_threads); } bool do_map = !no_Wiener; bool do_use_fsc = !no_Wiener; MultidimArray fsc(boxOut/2 + 1); if (!no_Wiener) { MultidimArray avg0, avg1; backprojector[0]->getDownsampledAverage(avg0, div_avg); backprojector[1]->getDownsampledAverage(avg1, div_avg); backprojector[0]->calculateDownSampledFourierShellCorrelation(avg0, avg1, fsc); } if (debug) { std::ofstream fscNew(fn_out+"_prelim_FSC.dat"); for (int i = 0; i < fsc.xdim; i++) { fscNew << i << " " << fsc(i) << "\n"; } } for (int j = 0; j < 2; j++) { if (mask_diameter_filt > 0.0) { std::cout << " + Applying spherical mask of diameter " << mask_diameter_filt << " ..." << std::endl; const double r0 = mask_diameter_filt/2.0; const double r1 = r0 + flank_width; Image tempC; Image tempR; BackProjector::decenterWhole(backprojector[j]->data, tempC()); NewFFT::inverseFourierTransform(tempC(), tempR(), NewFFT::FwdOnly, false); tempR = FilterHelper::raisedCosEnvCorner3D(tempR, r0, r1); NewFFT::FourierTransform(tempR(), tempC(), NewFFT::FwdOnly); BackProjector::recenterWhole(tempC(), backprojector[j]->data); BackProjector::decenterWhole(backprojector[j]->weight, tempC()); NewFFT::inverseFourierTransform(tempC(), tempR(), NewFFT::FwdOnly, false); tempR = FilterHelper::raisedCosEnvCorner3D(tempR, r0, r1); NewFFT::FourierTransform(tempR(), tempC(), NewFFT::FwdOnly); BackProjector::recenterWhole(tempC(), backprojector[j]->weight); } Image weightOut; std::cout << " + Starting the reconstruction ..." << std::endl; MultidimArray tau2; if (do_use_fsc) backprojector[j]->updateSSNRarrays(1., tau2, dummy, dummy, dummy, fsc, do_use_fsc, true); backprojector[j]->reconstruct(vol(), grid_iters, do_map, tau2, 1., 1., -1, false, writeWeights? &weightOut : 0); if (writeWeights) { std::stringstream sts; sts << (j+1); std::string fnWgh = fn_out + "_half" + sts.str() + "_class001_unfil_weight.mrc"; weightOut.write(fnWgh); } prevRefs[j] = vol; } // halves for (int j = 0; j < 2; j++) { std::stringstream sts; sts << (j+1); std::string fnFull = fn_out + "_half" + sts.str() + "_class001_unfil.mrc"; prevRefs[j].write(fnFull); std::cout << " Done writing map in " << fnFull << "\n"; } } }; int main(int argc, char *argv[]) { reconstruct_parameters prm; try { prm.read(argc, argv); prm.reconstruct(); } catch (RelionError XE) { //prm.usage(); std::cerr << XE; return RELION_EXIT_FAILURE; } return RELION_EXIT_SUCCESS; } relion-3.1.3/src/apps/estimate_gain.cpp000066400000000000000000000200401411340063500200460ustar00rootroot00000000000000/*************************************************************************** * * Author: "Takanori Nakane" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #include #include #include #include #include #include #include #include /* A simple program to estimate the gain reference by averaging all unaligned frames and taking the inverse. * This program should be used only as a last resort when the gain reference was lost or badly calibrated. */ class estimate_gain { public: IOParser parser; FileName fn_movie_star, fn_out; int n_threads, max_frames, eer_upsampling; bool randomise_order, dont_invert; void read(int argc, char **argv) { parser.setCommandLine(argc, argv); int general_section = parser.addSection("Options"); fn_movie_star = parser.getOption("--i", "Input movie STAR file"); fn_out = parser.getOption("--o", "Output file name"); n_threads = textToInteger(parser.getOption("--j", "Number of threads", "1")); max_frames = textToInteger(parser.getOption("--max_frames", "Target number of frames to average (rounded to movies; -1 means use all)", "-1")); randomise_order = parser.checkOption("--random", "Randomise the order of input movies before taking subset"); dont_invert = parser.checkOption("--dont_invert", "Don't take the inverse but simply writes the sum"); eer_upsampling = textToInteger(parser.getOption("--eer_upsampling", "EER upsampling (1 = 4K or 2 = 8K)", "2")); // --eer_upsampling 3 is only for debugging. Hidden. if (eer_upsampling != 1 && eer_upsampling != 2 && eer_upsampling != 3) REPORT_ERROR("eer_upsampling must be 1, 2 or 3"); if (parser.checkForErrors()) { REPORT_ERROR("Errors encountered on the command line (see above), exiting..."); } } void run() { MetaDataTable MDin; MDin.read(fn_movie_star, "movies"); // Support non-optics group STAR files if (MDin.numberOfObjects() == 0) MDin.read(fn_movie_star, ""); const int n_total_movies = MDin.numberOfObjects(); if (n_total_movies == 0) REPORT_ERROR("No movies in the input STAR file"); else std::cout << "Number of movies in the STAR file: " << n_total_movies << std::endl; FileName fn_img; if (!MDin.getValue(EMDL_MICROGRAPH_MOVIE_NAME, fn_img, 0)) REPORT_ERROR("The input STAR file does not contain the rlnMicrographMovieName column."); Image Ihead; std::vector> Isums(n_threads); int ny = 0, nx = 0; if (randomise_order) { MDin.randomiseOrder(); std::cout << "Randomised the order of input movies." << std::endl; } MDin.getValue(EMDL_MICROGRAPH_MOVIE_NAME, fn_img, 0); if (EERRenderer::isEER(fn_img)) { EERRenderer renderer; renderer.read(fn_img, eer_upsampling); ny = renderer.getHeight(); nx = renderer.getWidth(); if (!dont_invert) REPORT_ERROR("The input movie is EER. For EER, the gain reference is expected to be the average of counts, not the inverse as in K2/K3. Thus, you need the --dont_invert flag."); } else { Ihead.read(fn_img, false, -1, false, true); // select_img -1, mmap false, is_2D true ny = YSIZE(Ihead()); nx = XSIZE(Ihead()); } std::cout << "The size of the input: NY = " << ny << " NX = " << nx << std::endl; for (int i = 0; i < n_threads; i++) { Isums[i]().resize(ny, nx); Isums[i]().initZeros(ny, nx); } std::cout << "Summing frames ... " << std::endl; long n_frames_used = 0, n_movies_done = 0; if (max_frames > 0) init_progress_bar(max_frames); else init_progress_bar(MDin.numberOfObjects()); FOR_ALL_OBJECTS_IN_METADATA_TABLE(MDin) { MDin.getValue(EMDL_MICROGRAPH_MOVIE_NAME, fn_img); int n_frames; if (!EERRenderer::isEER(fn_img)) { Ihead.read(fn_img, false, -1, false, true); n_frames = NSIZE(Ihead()); if (ny != YSIZE(Ihead()) || nx != XSIZE(Ihead())) { std::cerr << "The size of the movie " + fn_img + " does not much the size of the others. Skipped." << std::endl; continue; } #pragma omp parallel for num_threads(n_threads) for (int iframe = 0; iframe < n_frames; iframe++) { Image Iframe; Iframe.read(fn_img, true, iframe, false, true); // mmap false, is_2D true const int tid = omp_get_thread_num(); FOR_ALL_DIRECT_ELEMENTS_IN_MULTIDIMARRAY(Iframe()) { DIRECT_MULTIDIM_ELEM(Isums[tid](), n) += DIRECT_MULTIDIM_ELEM(Iframe(), n); } } } else { EERRenderer renderer; renderer.read(fn_img, eer_upsampling); n_frames = renderer.getNFrames(); if (ny != renderer.getHeight() || nx != renderer.getWidth()) { std::cerr << "The size of the movie " + fn_img + " does not much the size of the others. Skipped." << std::endl; continue; } const int eer_grouping = (n_frames + n_threads - 1) / n_threads; #pragma omp parallel for num_threads(n_threads) for (int frame = 1; frame < n_frames; frame += eer_grouping) { int frame_end = frame + eer_grouping - 1; if (frame_end > n_frames) frame_end = n_frames; const int tid = omp_get_thread_num(); // std::cout << " Thread " << tid << ": Rendering EER (hardware) frame " << frame << " to " << frame_end << std::endl; MultidimArray buf; // unfortunately this function clears the buffer renderer.renderFrames(frame, frame_end, buf); FOR_ALL_DIRECT_ELEMENTS_IN_MULTIDIMARRAY(buf) { DIRECT_MULTIDIM_ELEM(Isums[tid](), n) += DIRECT_MULTIDIM_ELEM(buf, n); } } } n_frames_used += n_frames; n_movies_done++; if (max_frames > 0 && n_frames_used > max_frames) break; if (max_frames > 0) progress_bar(n_frames_used); else progress_bar(n_movies_done); } if (max_frames > 0) progress_bar(max_frames); else progress_bar(MDin.numberOfObjects()); for (int i = 1; i < n_threads; i++) { #pragma omp parallel for num_threads(n_threads) FOR_ALL_DIRECT_ELEMENTS_IN_MULTIDIMARRAY(Isums[0]()) { DIRECT_MULTIDIM_ELEM(Isums[0](), n) += DIRECT_MULTIDIM_ELEM(Isums[i](), n); } } std::cout << "Summed " << n_frames_used << " frames from " << n_movies_done << " movies." << std::endl; if (dont_invert) { std::cout << "Because of --dont_invert, the sum is written as is." << std::endl; } else { double total_count = 0; #pragma omp parallel for num_threads(n_threads) reduction(+: total_count) FOR_ALL_DIRECT_ELEMENTS_IN_MULTIDIMARRAY(Isums[0]()) total_count += DIRECT_MULTIDIM_ELEM(Isums[0](), n); const double avg_count = total_count / ((double)nx * ny * n_frames_used); std::cout << "Average count per pixel per frame: " << avg_count << std::endl; #pragma omp parallel for num_threads(n_threads) FOR_ALL_DIRECT_ELEMENTS_IN_MULTIDIMARRAY(Isums[0]()) { if (DIRECT_MULTIDIM_ELEM(Isums[0](), n) == 0) DIRECT_MULTIDIM_ELEM(Isums[0](), n) = 1.0; else DIRECT_MULTIDIM_ELEM(Isums[0](), n) = n_frames_used / DIRECT_MULTIDIM_ELEM(Isums[0](), n) * avg_count; } } Isums[0].write(fn_out); std::cout << "Written the estimated gain to " << fn_out << std::endl; } }; int main(int argc, char **argv) { estimate_gain app; try { app.read(argc, argv); app.run(); } catch (RelionError XE) { std::cerr << XE; return RELION_EXIT_FAILURE; } return RELION_EXIT_SUCCESS; } relion-3.1.3/src/apps/external_reconstruct.cpp000066400000000000000000000100731411340063500215170ustar00rootroot00000000000000/*************************************************************************** * * Author: "Sjors H.W. Scheres" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #include #include #include #include #include #include #include #include #include "src/jaz/complex_io.h" class ext_recons_parameters { public: FileName fn_star, fn_recons, fn_data_real, fn_data_imag, fn_weight, fn_out; MultidimArray tau2; RFLOAT tau2_fudge; float padding_factor; int ori_size, current_size, ref_dim; int verb; bool skip_gridding, do_map; void read(int argc, char **argv) { if (argc < 2) { REPORT_ERROR(" Usage: relion_external_reconstruct input.star"); } FileName fn_star = argv[1]; if (fn_star.getExtension() != "star") { REPORT_ERROR(" ERROR: input argument does not have a .star extension."); } skip_gridding = checkParameter(argc, argv, "--skip_gridding"); do_map = !checkParameter(argc, argv, "--no_map"); fn_out = (checkParameter(argc, argv, "--o")) ? getParameter(argc, argv, "--o") : ""; MetaDataTable MDlist, MDtau; MDlist.read(fn_star, "external_reconstruct_general"); MDlist.getValue(EMDL_OPTIMISER_EXTERNAL_RECONS_DATA_REAL, fn_data_real); MDlist.getValue(EMDL_OPTIMISER_EXTERNAL_RECONS_DATA_IMAG, fn_data_imag); MDlist.getValue(EMDL_OPTIMISER_EXTERNAL_RECONS_WEIGHT, fn_weight); MDlist.getValue(EMDL_OPTIMISER_EXTERNAL_RECONS_RESULT, fn_recons); MDlist.getValue(EMDL_MLMODEL_TAU2_FUDGE_FACTOR, tau2_fudge); MDlist.getValue(EMDL_MLMODEL_PADDING_FACTOR, padding_factor); MDlist.getValue(EMDL_MLMODEL_DIMENSIONALITY, ref_dim); MDlist.getValue(EMDL_MLMODEL_ORIGINAL_SIZE, ori_size); MDlist.getValue(EMDL_MLMODEL_CURRENT_SIZE, current_size); if (fn_out != "") fn_recons = fn_out; MDtau.read(fn_star, "external_reconstruct_tau2"); tau2.resize(MDtau.numberOfObjects()); int idx = 0; FOR_ALL_OBJECTS_IN_METADATA_TABLE(MDtau) { RFLOAT mytau2; MDtau.getValue(EMDL_MLMODEL_TAU2_REF, mytau2); DIRECT_MULTIDIM_ELEM(tau2, idx) = mytau2; idx++; } } void reconstruct() { BackProjector BP(ori_size, ref_dim, "C1", TRILINEAR, padding_factor); BP.initZeros(current_size); if (skip_gridding) BP.skip_gridding = skip_gridding; Image Idata; Image Iweight; std::string fn_ext = "."+fn_data_real.getExtension(); std::string fn_root = fn_data_real.beforeFirstOf("_real"); ComplexIO::read(Idata, fn_root, fn_ext); Iweight.read(fn_weight); // Could there be a 1-pixel different in size? use FOR_ALL_ELEMENTS_IN_FFTW_TRANSFORM to be safe const int r_max = current_size / 2; const int r_max2 = ROUND(r_max * padding_factor) * ROUND(r_max * padding_factor); FOR_ALL_ELEMENTS_IN_FFTW_TRANSFORM(Idata()) { if (kp*kp + ip*ip + jp*jp < r_max2) { A3D_ELEM(BP.data, kp, ip, jp) = DIRECT_A3D_ELEM(Idata(), k, i, j); A3D_ELEM(BP.weight, kp, ip, jp) = DIRECT_A3D_ELEM(Iweight(), k, i, j); } } BP.reconstruct(Iweight(), 10, do_map, tau2, tau2_fudge); Iweight.write(fn_recons); } }; int main(int argc, char *argv[]) { ext_recons_parameters prm; try { prm.read(argc, argv); prm.reconstruct(); } catch (RelionError XE) { std::cerr << XE; return RELION_EXIT_FAILURE; } return RELION_EXIT_SUCCESS; } relion-3.1.3/src/apps/find_tiltpairs.cpp000066400000000000000000000345541411340063500202670ustar00rootroot00000000000000/*************************************************************************** * * Author: "Sjors H.W. Scheres" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #include #include #include #include #include #include #include class angular_error_parameters { public: FileName fn_unt, fn_til, fn_out; MetaDataTable MDunt, MDtil; RFLOAT tilt, tilt0, tiltF, tiltStep; RFLOAT rot, rot0, rotF, rotStep; int size, dim; int x0, xF, xStep; int y0, yF, yStep; RFLOAT acc; int mind2; bool do_opt; RFLOAT best_rot, best_tilt; int best_x, best_y; Matrix2D Pass; std::vector p_unt, p_til, p_map, pairs_t2u; // I/O Parser IOParser parser; void usage() { parser.writeUsage(std::cerr); } void read(int argc, char **argv) { parser.setCommandLine(argc, argv); int general_section = parser.addSection("General Options"); fn_unt = parser.getOption("--u", "STAR file with the untilted xy-coordinates"); fn_til = parser.getOption("--t", "STAR file with the untilted xy-coordinates"); size = textToInteger(parser.getOption("--size", "Largest dimension of the micrograph (in pixels), e.g. 4096")); dim = textToInteger(parser.getOption("--dim", "Dimension of boxed particles (for EMAN .box files in pixels)", "200")); acc = textToFloat(parser.getOption("--acc", "Allowed accuracy (in pixels), e.g. half the particle diameter")); tilt = textToFloat(parser.getOption("--tilt", "Fix tilt angle (in degrees)", "99999.")); rot = textToFloat(parser.getOption("--rot", "Fix direction of the tilt axis (in degrees), 0 = along y, 90 = along x", "99999.")); do_opt = !parser.checkOption("--dont_opt", "Skip optimization of the transformation matrix"); mind2 = ROUND(acc * acc); int angle_section = parser.addSection("Specified tilt axis and translational search ranges"); tilt0 = textToFloat(parser.getOption("--tilt0", "Minimum tilt angle (in degrees)","0.")); tiltF = textToFloat(parser.getOption("--tiltF", "Maximum tilt angle (in degrees)","99999.")); if (tiltF == 99999.) tiltF = tilt0; tiltStep = textToFloat(parser.getOption("--tiltStep", "Tilt angle step size (in degrees)","1.")); rot0 = textToFloat(parser.getOption("--rot0", "Minimum rot angle (in degrees)","0.")); rotF = textToFloat(parser.getOption("--rotF", "Maximum rot angle (in degrees)","99999.")); if (rotF == 99999.) rotF = rot0; rotStep = textToFloat(parser.getOption("--rotStep", "Rot angle step size (in degrees)","1.")); x0 = textToInteger(parser.getOption("--x0", "Minimum X offset (pixels)","-99999")); xF = textToInteger(parser.getOption("--xF", "Maximum X offset (pixels)","99999")); xStep = textToInteger(parser.getOption("--xStep", "X offset step size (pixels)","-1")); y0 = textToInteger(parser.getOption("--y0", "Minimum Y offset (pixels)","-99999")); yF = textToInteger(parser.getOption("--yF", "Maximum Y offset (pixels)","99999")); yStep = textToInteger(parser.getOption("--yStep", "Y offset step size (pixels)","-1")); // Check for errors in the command-line option if (parser.checkForErrors()) REPORT_ERROR("Errors encountered on the command line, exiting..."); // If tilt and rot were given: do not search those if (tilt != 99999.) { tilt0 = tiltF = tilt; tiltStep = 1.; } if (rot != 99999.) { rot0 = rotF = rot; rotStep = 1.; } // By default search the entire micrograph x0 = XMIPP_MAX(x0, -size); xF = XMIPP_MIN(xF, size); // By default use a xStep of one third the accuracy if (xStep < 0) xStep = acc / 3; // By default treat y search in the same way as the x-search if (y0 == -99999) y0 = x0; if (yF == 99999) yF = xF; if (yStep < 0) yStep = xStep; // Done reading, now fill p_unt and p_til MDunt.read(fn_unt); MDtil.read(fn_til); // Check for the correct labels if (!MDunt.containsLabel(EMDL_IMAGE_COORD_X) || !MDunt.containsLabel(EMDL_IMAGE_COORD_Y)) REPORT_ERROR("ERROR: Untilted STAR file does not contain the rlnCoordinateX or Y labels"); if (!MDtil.containsLabel(EMDL_IMAGE_COORD_X) || !MDtil.containsLabel(EMDL_IMAGE_COORD_Y)) REPORT_ERROR("ERROR: Tilted STAR file does not contain the rlnCoordinateX or Y labels"); RFLOAT x, y; p_unt.clear(); FOR_ALL_OBJECTS_IN_METADATA_TABLE(MDunt) { MDunt.getValue(EMDL_IMAGE_COORD_X, x); MDunt.getValue(EMDL_IMAGE_COORD_Y, y); p_unt.push_back((int)x); p_unt.push_back((int)y); } p_til.clear(); FOR_ALL_OBJECTS_IN_METADATA_TABLE(MDtil) { MDtil.getValue(EMDL_IMAGE_COORD_X, x); MDtil.getValue(EMDL_IMAGE_COORD_Y, y); p_til.push_back((int)x); p_til.push_back((int)y); } // Initialize best transformation params best_x = best_y = 9999; best_rot = best_tilt = 9999.; } int getNumberOfPairs(int dx=0, int dy=0) { pairs_t2u.clear(); pairs_t2u.resize(p_til.size()/2, -1); int result = 0; for (int u = 0; u < p_map.size()/2; u++) { for (int t = 0; t < p_til.size()/2; t++) { // only search over particles that do not have a pair yet if (pairs_t2u[t] < 0) { int XX = p_map[2*u]-p_til[2*t]+dx; XX*= XX; int YY = p_map[2*u+1]-p_til[2*t+1]+dy; XX += YY*YY; if (XX < mind2) { result++; pairs_t2u[t] = u; //No longer have to search for all the others in q break; } } } } return result; } RFLOAT getAverageDistance(int dx=0, int dy=0) { std::ofstream fh; FileName fn_map; fn_map = "dist.txt"; fh.open(fn_map.c_str(), std::ios::out); RFLOAT result = 0.; int count = 0; for (int t = 0; t < pairs_t2u.size(); t++) { int u = pairs_t2u[t]; if (u >= 0) { int XX = p_map[2*u]-p_til[2*t]+dx; XX*= XX; int YY = p_map[2*u+1]-p_til[2*t+1]+dy; XX += YY*YY; //std::cerr << " sqrt(XX)= " << sqrt(XX) << " t= " << t << " u= " << u << std::endl; result += sqrt(XX); fh << sqrt(XX) << std::endl; count ++; } } result /= (RFLOAT)count; fh.close(); return result; } int prunePairs(int dx=0, int dy=0) { int nprune = 0; // Prune for RFLOAT pairs for (int t = 0; t < pairs_t2u.size(); t++) { int u = pairs_t2u[t]; if (u >= 0) { for (int tp = t+1; tp < pairs_t2u.size(); tp++) { int up = pairs_t2u[tp]; // Find pairs to the same tilted position if (up == u) { nprune++; // Only keep the nearest neighbours as a pair int XX = p_map[2*u]-p_til[2*t]+dx; XX*= XX; int YY = p_map[2*u+1]-p_til[2*t+1]+dy; XX += YY*YY; //up==p int XXp = p_map[2*u]-p_til[2*tp]+dx; XXp*= XXp; int YYp = p_map[2*u+1]-p_til[2*tp+1]+dy; XXp += YYp*YYp; if (XX < XXp) pairs_t2u[tp] = -1; else pairs_t2u[t] = -1; } } } } return nprune; } void mapOntoTilt() { p_map.resize(p_unt.size()); for (int u = 0; u < p_map.size()/2; u++) { RFLOAT xu = (RFLOAT)p_unt[2*u]; RFLOAT yu = (RFLOAT)p_unt[2*u+1]; p_map[2*u] = ROUND(MAT_ELEM(Pass, 0, 0) * xu + MAT_ELEM(Pass, 0, 1) * yu + MAT_ELEM(Pass, 0, 2)); p_map[2*u+1] = ROUND(MAT_ELEM(Pass, 1, 0) * xu + MAT_ELEM(Pass, 1, 1) * yu + MAT_ELEM(Pass, 1, 2)); } } RFLOAT optimiseTransformationMatrix(bool do_optimise_nr_pairs) { std::vector best_pairs_t2u, best_map; RFLOAT score, best_score, best_dist=9999.; if (do_optimise_nr_pairs) best_score = 0.; else best_score = -999999.; int nn = XMIPP_MAX(1., (rotF-rot0)/rotStep); nn *= XMIPP_MAX(1., (tiltF-tilt0)/tiltStep); nn *= XMIPP_MAX(1., (xF-x0)/xStep); nn *= XMIPP_MAX(1., (yF-y0)/yStep); int n = 0; init_progress_bar(nn); for (RFLOAT rot = rot0; rot <= rotF; rot+= rotStep) { for (RFLOAT tilt = tilt0; tilt <= tiltF; tilt+= tiltStep) { // Assume tilt-axis lies in-plane... RFLOAT psi = -rot; // Rotate all points correspondingly Euler_angles2matrix(rot, tilt, psi, Pass); //std::cerr << " Pass= " << Pass << std::endl; // Zero-translations for now (these are added in the x-y loops below) MAT_ELEM(Pass, 0, 2) = MAT_ELEM(Pass, 1, 2) = 0.; mapOntoTilt(); for (int x = x0; x <= xF; x += xStep) { for (int y = y0; y <= yF; y += yStep, n++) { if (do_optimise_nr_pairs) score = getNumberOfPairs(x, y); else score = -getAverageDistance(x, y); // negative because smaller distance is better! bool is_best = false; if (do_optimise_nr_pairs && score==best_score) { RFLOAT dist = getAverageDistance(x, y); if (dist < best_dist) { best_dist = dist; is_best = true; } } if (score > best_score || is_best) { best_score = score; best_pairs_t2u = pairs_t2u; best_rot = rot; best_tilt = tilt; best_x = x; best_y = y; } if (n%1000==0) progress_bar(n); } } } } progress_bar(nn); // Update pairs with the best_pairs if (do_optimise_nr_pairs) pairs_t2u = best_pairs_t2u; // Update the Passing matrix and the mapping Euler_angles2matrix(best_rot, best_tilt, -best_rot, Pass); // Zero-translations for now (these are added in the x-y loops below) MAT_ELEM(Pass, 0, 2) = MAT_ELEM(Pass, 1, 2) = 0.; mapOntoTilt(); return best_score; } void optimiseTransformationMatrixContinuous() { // Get coordinates of all pairs: Matrix2D Au, Bt; Au.initZeros(3, 3); Bt.initZeros(3, 3); Pass.initZeros(4,4); // Add all pairs to dependent matrices (adapted from add_point in Xmipps micrograph_mark main_widget_mark.cpp) for (int t = 0; t < pairs_t2u.size(); t++) { int u = pairs_t2u[t]; if (u >= 0) { Au(0, 0) += (RFLOAT)(p_unt[2*u] * p_unt[2*u]); Au(0, 1) += (RFLOAT)(p_unt[2*u] * p_unt[2*u+1]); Au(0, 2) += (RFLOAT)(p_unt[2*u]); Au(1, 0) = Au(0, 1); Au(1, 1) += (RFLOAT)(p_unt[2*u+1] * p_unt[2*u+1]); Au(1, 2) += (RFLOAT)(p_unt[2*u+1]); Au(2, 0) = Au(0, 2); Au(2, 1) = Au(1, 2); Au(2, 2) += 1.; Bt(0, 0) += (RFLOAT)(p_til[2*t] * p_unt[2*u]); Bt(0, 1) += (RFLOAT)(p_til[2*t+1] * p_unt[2*u]); Bt(0, 2) = Au(0, 2); Bt(1, 0) += (RFLOAT)(p_til[2*t] * p_unt[2*u+1]); Bt(1, 1) += (RFLOAT)(p_til[2*t+1] * p_unt[2*u+1]); Bt(1, 2) = Au(1, 2); Bt(2, 0) += (RFLOAT)(p_til[2*t]); Bt(2, 1) += (RFLOAT)(p_til[2*t+1]); Bt(2,2) += 1.; } } // Solve equations solve(Au, Bt, Pass); Pass = Pass.transpose(); std::cout << " Optimised passing matrix= " << Pass << std::endl; //These values can be complete CRAP. Better not show them at all.... //RFLOAT rotp, tiltp, psip; //tiltp = acos(Pass(1,1)); //rotp = acos(Pass(1,0)/sin(tiltp)); //psip = acos(Pass(0,1)/-sin(tiltp)); //std::cout << " Optimised tilt angle= " << RAD2DEG(tiltp) << std::endl; //std::cout << " Optimised in-plane rot angles= " << RAD2DEG(rotp) <<" and "<< RAD2DEG(psip) << std::endl; // Map using the new matrix mapOntoTilt(); } void run() { // First do a crude search over the given parameter optimization space // Optimize the number of pairs here... int npart = optimiseTransformationMatrix(true); // Get rid of RFLOAT pairs (two different untilted coordinates are close to a tilted coordinate) int nprune = 0; nprune = prunePairs(best_x, best_y); // Calculate average distance between the pairs RFLOAT avgdist = getAverageDistance(best_x, best_y); std::cout << " Before optimization of the passing matrix: "<=0) // std::cerr << " i= " << i << " pairs[i]= " << pairs[i] << std::endl; } fh.close(); #endif if (do_opt) { optimiseTransformationMatrixContinuous(); npart = getNumberOfPairs(); nprune = prunePairs(); avgdist = getAverageDistance(); std::cout << " After optimization of the passing matrix: "<= 0) { MDu.addObject(); MDu.setValue(EMDL_IMAGE_COORD_X, ((RFLOAT)(p_unt[2*u]))); MDu.setValue(EMDL_IMAGE_COORD_Y, ((RFLOAT)(p_unt[2*u+1]))); MDt.addObject(); MDt.setValue(EMDL_IMAGE_COORD_X, ((RFLOAT)(p_til[2*t]))); MDt.setValue(EMDL_IMAGE_COORD_Y, ((RFLOAT)(p_til[2*t+1]))); } } fn_unt = fn_unt.withoutExtension() + "_pairs.star"; fn_til = fn_til.withoutExtension() + "_pairs.star"; MDu.write(fn_unt); MDt.write(fn_til); std::cout << " Written out coordinate STAR files: " << fn_unt << " and " << fn_til < #include "../flex_analyser.h" int main(int argc, char *argv[]) { FlexAnalyser prm; try { prm.read(argc, argv); prm.initialise(); prm.run(); } catch (RelionError XE) { std::cerr << XE; return RELION_EXIT_FAILURE; } return RELION_EXIT_SUCCESS; } relion-3.1.3/src/apps/flex_analyse_mpi.cpp000066400000000000000000000030241411340063500205570ustar00rootroot00000000000000/*************************************************************************** * * Author: "Sjors H.W. Scheres" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #include #include "../flex_analyser.h" int main(int argc, char *argv[]) { FlexAnalyser prm; int rank, size; MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &size); // Handle errors MPI_Comm_set_errhandler(MPI_COMM_WORLD, MPI_ERRORS_RETURN); try { prm.read(argc, argv); // Don't put any output to screen for mpi follower prm.verb = (rank == 0) ? 1 : 0; prm.initialise(); prm.run(rank, size); } catch (RelionError XE) { std::cerr << XE; return RELION_EXIT_FAILURE; } MPI_Barrier(MPI_COMM_WORLD); return RELION_EXIT_SUCCESS; } relion-3.1.3/src/apps/helix_inimodel2d.cpp000066400000000000000000000023111411340063500204550ustar00rootroot00000000000000 /*************************************************************************** * * Author: "Sjors H.W. Scheres" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #include "../helix_inimodel2d.h" int main(int argc, char *argv[]) { HelixAligner prm; try { prm.parseInitial(argc, argv); prm.initialise(); prm.run(); } catch (RelionError XE) { prm.usage(); std::cerr << XE; return RELION_EXIT_FAILURE; } return RELION_EXIT_SUCCESS; } relion-3.1.3/src/apps/helix_toolbox.cpp000066400000000000000000001226111411340063500201230ustar00rootroot00000000000000/*************************************************************************** * * Author: "Shaoda He" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #include #include #include #include #include #include #include #include #include #include #include #define CART_TO_HELICAL_COORDS true #define HELICAL_TO_CART_COORDS false class helix_bilder_parameters { public: IOParser parser; // Available options // PLEASE MAKE SURE THAT ALL THESE OPTIONS ARE INITIALISED IN THE PARSING STEP! // ---------------------------------------- bool show_usage_for_an_option; bool do_extract_coords_relion; bool do_extract_coords_ximdisp; bool do_convert_coords_xim2rln; bool do_extract_coords_eman; bool do_convert_coords_emn2rln; bool do_combine_GCTF_results; bool do_apply_spherical_mask_3D; bool do_crop_central_Z; bool do_create_cylinder_3D; bool do_set_default_tilt; bool do_remove_segments_with_bad_tilt; bool do_remove_segments_with_bad_psi; bool do_remove_mics_with_bad_ctf; bool do_simulate_helix_3D; bool do_impose_helical_symmetry; bool do_local_search_helical_symmetry; bool do_PDB_helix; bool do_divide_star_file; bool do_merge_star_files; bool do_sort_datastar_tubeID; bool do_simulate_helical_segments_2D; bool do_cut_out; bool do_set_xmipp_origin; bool do_impose_helical_symmetry_fourier_space; bool do_check_parameters; bool do_normalise_segments; bool do_interpo_3D_curve; bool do_select_3D_subtomo_from_2D_proj; bool do_average_au_2d; bool do_debug; // ---------------------------------------- // Input files FileName fn_in, fn_in1, fn_in2; // Rootnames of input files FileName fn_in_root, fn_in1_root, fn_in2_root; // Output files FileName fn_out; // Rootnames of output files FileName fn_out_root; // Dimensions int Xdim, Ydim, boxdim; // Number of helical subunits int nr_subunits; // Number of helical asymmetrical units int nr_asu; // Rotational symmetry - Cn int sym_Cn; // Number of filaments in a helix with seam (>= 2) int nr_filaments_helix_with_seam; // Helical rise and its local searches RFLOAT rise_A, rise_min_A, rise_max_A, rise_inistep_A; // Helical twist and its local searches RFLOAT twist_deg, twist_min_deg, twist_max_deg, twist_inistep_deg; // Pixel size in Angstroms RFLOAT pixel_size_A; // Width of soft edge RFLOAT width_edge_pix; // % of box size as the 2D / 3D spherical mask RFLOAT sphere_percentage; // % of Zdim as the central Z mask for helices RFLOAT z_percentage; // Inner and outer diameters of Z cylindrical mask RFLOAT cyl_inner_diameter_A, cyl_outer_diameter_A; // Remove segments of bad tilt angles - Maximum deviation of tilt angles allowed (away from 90 degrees) RFLOAT tilt_max_dev_deg; // Remove segments of bad psi angles - Maximum deviation of psi angles allowed (away from psi prior) RFLOAT psi_max_dev_deg; // Translate all atoms in the original PDB file to the center of mass of the molecule? bool do_center_of_mass_each_PDB_molecule; // Divide one into multiple STAR files - Number of output files int nr_outfiles; // Simulate helical segments with a STAR file - Number of helical tubes int nr_tubes; // Simulate helical subtomograms with a STAR file ? bool is_3d_tomo; // Simulate helical segments / subtomograms with a STAR file - sigma tilt, psi and offset RFLOAT sigma_tilt, sigma_psi, sigma_offset; // Simulate helical segments / subtomograms - Standard deviation of added white Gaussian noise RFLOAT white_noise; // Diameter of helical subunits (in Angstroms) RFLOAT subunit_diameter_A; // Minimum threshold of CTF FOM value, lowest resolution of EPA, minimum and maximum of defocus values RFLOAT ctf_fom_min, EPA_lowest_res, df_min, df_max; // Do bimoidal searches of tilt and psi angles in 3D helical reconstruction? bool do_bimodal_searches; // Cut helical tubes into segments? bool do_cut_into_segments; // Ignore helical symmetry in 3D reconstruction? bool ignore_helical_symmetry; // Perform local searches of helical symmetry in 3D reconstruction? bool do_helical_symmetry_local_refinement; // Construct a 3D reference for helical reconstruction with polarity along Z axis? bool do_polar_reference; // Top-bottom width ratio for construction of polarised helical reference RFLOAT topbottom_ratio; // Cut out a small part of the helix within this angle (in degrees) RFLOAT ang; // Binning factor used in manual segment picking int binning_factor; // Random seed int random_seed; // Verbosity? bool verb; void initBoolOptions() { show_usage_for_an_option = false; do_extract_coords_relion = false; do_extract_coords_ximdisp = false; do_convert_coords_xim2rln = false; do_extract_coords_eman = false; do_convert_coords_emn2rln = false; do_combine_GCTF_results = false; do_apply_spherical_mask_3D = false; do_crop_central_Z = false; do_create_cylinder_3D = false; do_set_default_tilt = false; do_remove_segments_with_bad_tilt = false; do_remove_segments_with_bad_psi = false; do_remove_mics_with_bad_ctf = false; do_simulate_helix_3D = false; do_impose_helical_symmetry = false; do_local_search_helical_symmetry = false; do_PDB_helix = false; do_divide_star_file = false; do_merge_star_files = false; do_sort_datastar_tubeID = false; do_simulate_helical_segments_2D = false; do_cut_out = false; do_set_xmipp_origin = false; do_impose_helical_symmetry_fourier_space = false; do_check_parameters = false; do_normalise_segments = false; do_interpo_3D_curve = false; do_select_3D_subtomo_from_2D_proj = false; do_average_au_2d = false; do_debug = false; }; helix_bilder_parameters() { clear(); }; ~helix_bilder_parameters() { clear(); }; void usage() { parser.writeUsage(std::cerr); }; void writeCommand(FileName fn_cmd) { std::ofstream ofs; ofs.open(fn_cmd.c_str(), std::ofstream::out | std::ofstream::app); time_t now = time(0); char nodename[64] = "undefined"; gethostname(nodename,sizeof(nodename)); std::string hostname(nodename); ofs << std::endl << " ++++ Executed the following command at host " << hostname << " on " << ctime(&now); ofs << " `which relion_helix_toolbox` " << std::flush; parser.writeCommandLine(ofs); ofs.close(); }; void read(int argc, char **argv) { parser.setCommandLine(argc, argv); int init_section = parser.addSection("Show usage"); show_usage_for_an_option = parser.checkOption("--function_help", "Show usage for the selected function (FEB 19, 2017)"); int options_section = parser.addSection("List of functions (alphabetically ordered)"); do_check_parameters = parser.checkOption("--check", "Check parameters for 3D helical reconstruction in RELION"); do_cut_out = parser.checkOption("--cut_out", "Cut out a small part of the helix"); do_create_cylinder_3D = parser.checkOption("--cylinder", "Create a cylinder as 3D initial reference"); do_impose_helical_symmetry = parser.checkOption("--impose", "Impose helical symmetry (in real space)"); do_interpo_3D_curve = parser.checkOption("--interpo", "Interpolate 3D curve for 3D helical sub-tomogram extraction"); do_normalise_segments = parser.checkOption("--norm", "Normalise 2D/3D helical segments in a STAR file"); do_PDB_helix = parser.checkOption("--pdb_helix", "Simulate a helix from a single PDB file of protein molecule"); do_remove_mics_with_bad_ctf = parser.checkOption("--remove_bad_ctf", "Remove micrographs with poor-quality CTF"); do_remove_segments_with_bad_tilt = parser.checkOption("--remove_bad_tilt", "Remove helical segments with large tilt angle deviation (away from 90 degrees)"); do_remove_segments_with_bad_psi = parser.checkOption("--remove_bad_psi", "Remove helical segments with large psi angle deviation (away from psi prior)"); do_local_search_helical_symmetry = parser.checkOption("--search", "Local search of helical symmetry"); do_select_3D_subtomo_from_2D_proj = parser.checkOption("--select_3dtomo", "Select 3D subtomograms given 2D projections"); do_simulate_helix_3D = parser.checkOption("--simulate_helix", "Create a helical 3D reference of spheres"); do_simulate_helical_segments_2D = parser.checkOption("--simulate_segments", "Simulate helical segments using a STAR file"); do_sort_datastar_tubeID = parser.checkOption("--sort_tube_id", "Sort segments in _data.star file according to helical tube IDs"); do_apply_spherical_mask_3D = parser.checkOption("--spherical_mask", "Apply soft spherical mask to 3D helical reference"); do_average_au_2d = parser.checkOption("--average_au_2d", "Average multiple asymmetrical units in 2D along the helical axis?"); int options_old_section = parser.addSection("List of functions which can be called in Relion GUI"); do_combine_GCTF_results = parser.checkOption("--combine_gctf", "Combine Autopicker priors (tilt and psi) with Gctf local search results"); do_crop_central_Z = parser.checkOption("--central_mask", "Crop the central part of a helix"); do_convert_coords_emn2rln = parser.checkOption("--coords_emn2rln", "Convert EMAN2 coordinates of helical segments into RELION STAR format"); do_convert_coords_xim2rln = parser.checkOption("--coords_xim2rln", "Convert XIMDISP coordinates of helical segments into RELION STAR format"); do_divide_star_file = parser.checkOption("--divide", "Divide one huge STAR file into many small ones"); do_extract_coords_eman = parser.checkOption("--extract_emn", "Extract EMAN2 coordinates of helical segments from specified straight tubes"); do_extract_coords_relion = parser.checkOption("--extract_rln", "Extract RELION coordinates of helical segments from specified straight tubes"); do_extract_coords_ximdisp = parser.checkOption("--extract_xim", "Extract XIMDISP coordinates of helical segments from specified straight tubes"); do_impose_helical_symmetry_fourier_space = parser.checkOption("--impose_fourier", "Impose helical symmetry (simulate what is done in 3D reconstruction in Fourier space)"); do_set_default_tilt = parser.checkOption("--init_tilt", "Set tilt angles to 90 degrees for all helical segments"); do_merge_star_files = parser.checkOption("--merge", "Merge small STAR files into a huge one"); do_set_xmipp_origin = parser.checkOption("--set_xmipp_origin", "Set Xmipp origin"); do_debug = parser.checkOption("--debug", "(Debug only)"); int params_section = parser.addSection("Parameters (alphabetically ordered)"); is_3d_tomo = parser.checkOption("--3d_tomo", "Simulate 3D subtomograms using a STAR file?"); ang = textToFloat(parser.getOption("--ang", "Cut out a small part of the helix within this angle (in degrees)", "91.")); pixel_size_A = textToFloat(parser.getOption("--angpix", "Pixel size (in Angstroms)", "1.")); do_bimodal_searches = parser.checkOption("--bimodal", "Do bimodal searches of tilt and psi angles in 3D helical reconstruction?"); binning_factor = textToInteger(parser.getOption("--bin", "Binning factor used in manual segment picking", "1")); boxdim = textToInteger(parser.getOption("--boxdim", "Box size (in pixels)", "-1")); do_center_of_mass_each_PDB_molecule = parser.checkOption("--center_pdb", "Translate all atoms in the original PDB to the center of mass of this molecule?"); ctf_fom_min = textToFloat(parser.getOption("--ctf_fom_min", "Minimum figure-of-merit - threshold used in removing micrographs with bad CTF", "-999")); cyl_inner_diameter_A = textToFloat(parser.getOption("--cyl_inner_diameter", "Inner diameter of the cylindrical mask (in Angstroms)", "-1")); cyl_outer_diameter_A = textToFloat(parser.getOption("--cyl_outer_diameter", "Outer diameter of the cylindrical mask (in Angstroms)", "-1")); df_min = textToFloat(parser.getOption("--df_min", "Minimum defocus (in Angstroms)", "-999999.")); df_max = textToFloat(parser.getOption("--df_max", "Maximum defocus (in Angstroms)", "999999.")); EPA_lowest_res = textToFloat(parser.getOption("--EPA_lowest_res", "Lowest EPA resolution (in Angstroms) - threshold used in removing micrographs with bad CTF", "999")); fn_in = parser.getOption("--i", "Input file", "file.in"); fn_in1 = parser.getOption("--i1", "Input file #1", "file01.in"); fn_in2 = parser.getOption("--i2", "Input file #2", "file02.in"); fn_in_root = parser.getOption("--i_root", "Rootname of input files", "_rootnameIn.star"); fn_in1_root = parser.getOption("--i1_root", "Rootname #1 of input files", "_rootnameIn01.star"); fn_in2_root = parser.getOption("--i2_root", "Rootname #2 of input files", "_rootnameIn02.star"); ignore_helical_symmetry = parser.checkOption("--ignore_helical_symmetry", "Ignore helical symmetry in 3D reconstruction?"); nr_asu = textToInteger(parser.getOption("--nr_asu", "Number of helical asymmetrical units", "1")); nr_outfiles = textToInteger(parser.getOption("--nr_outfiles", "Number of output files", "10")); nr_subunits = textToInteger(parser.getOption("--nr_subunits", "Number of helical subunits", "-1")); nr_tubes = textToInteger(parser.getOption("--nr_tubes", "Number of helical tubes", "-1")); fn_out = parser.getOption("--o", "Output file", "file.out"); fn_out_root = parser.getOption("--o_root", "Rootname of output files", "_rootnameOut.star"); do_polar_reference = parser.checkOption("--polar", "Construct a 3D reference for helical reconstruction with polarity along Z axis?"); psi_max_dev_deg = textToFloat(parser.getOption("--psi_max_dev", "Maximum deviation of psi angles allowed (away from psi prior)", "15.")); random_seed = textToFloat(parser.getOption("--random_seed", "Random seed (set to system time if negative)", "-1")); rise_A = textToFloat(parser.getOption("--rise", "Helical rise (in Angstroms)", "-1")); rise_inistep_A = textToFloat(parser.getOption("--rise_inistep", "Initial step of helical rise search (in Angstroms)", "-1")); rise_min_A = textToFloat(parser.getOption("--rise_min", "Minimum helical rise (in Angstroms)", "-1")); rise_max_A = textToFloat(parser.getOption("--rise_max", "Maximum helical rise (in Angstroms)", "-1")); nr_filaments_helix_with_seam = textToInteger(parser.getOption("--seam_nr_filaments", "Number of filaments in a helix with seam (>= 2)", "-1")); do_helical_symmetry_local_refinement = parser.checkOption("--search_sym", "Perform local searches of helical symmetry in 3D reconstruction?"); do_cut_into_segments = parser.checkOption("--segments", "Cut helical tubes into segments?"); sigma_offset = textToFloat(parser.getOption("--sigma_offset", "Sigma of translational offsets (in pixels)", "5.")); sigma_psi = textToFloat(parser.getOption("--sigma_psi", "Sigma of psi angles (in degrees)", "5.")); sigma_tilt = textToFloat(parser.getOption("--sigma_tilt", "Sigma of tilt angles (in degrees)", "5.")); sphere_percentage = textToFloat(parser.getOption("--sphere_percentage", "Diameter of spherical mask divided by the box size (0.10~0.90 or 0.01~0.99)", "0.9")); subunit_diameter_A = textToFloat(parser.getOption("--subunit_diameter", "Diameter of helical subunits (in Angstroms)", "-1")); sym_Cn = textToInteger(parser.getOption("--sym_Cn", "Rotational symmetry Cn", "1")); tilt_max_dev_deg = textToFloat(parser.getOption("--tilt_max_dev", "Maximum deviation of tilt angles allowed (away from +90 degrees)", "15.")); topbottom_ratio = textToFloat(parser.getOption("--topbottom_ratio", "Top-bottom width ratio for construction of polarised helical reference", "0.5")); twist_deg = textToFloat(parser.getOption("--twist", "Helical twist (in degrees, + for right-handedness)", "-1")); twist_inistep_deg = textToFloat(parser.getOption("--twist_inistep", "Initial step of helical twist search (in degrees)", "-1")); twist_min_deg = textToFloat(parser.getOption("--twist_min", "Minimum helical twist (in degrees, + for right-handedness)", "-1")); twist_max_deg = textToFloat(parser.getOption("--twist_max", "Maximum helical twist (in degrees, + for right-handedness)", "-1")); verb = parser.checkOption("--verb", "Detailed screen output?"); white_noise = textToFloat(parser.getOption("--white_noise", "Standard deviation of added white Gaussian noise", "1.")); width_edge_pix = textToFloat(parser.getOption("--width", "Width of cosine soft edge (in pixels)", "5.")); Xdim = textToInteger(parser.getOption("--xdim", "Dimension X (in pixels) of the micrographs", "4096")); Ydim = textToInteger(parser.getOption("--ydim", "Dimension Y (in pixels) of the micrographs", "4096")); z_percentage = textToFloat(parser.getOption("--z_percentage", "Percentage of cropped length (along Z axis, 0.1~0.9)", "0.3")); RFLOAT tmp_RFLOAT = 0.; if (rise_min_A > rise_max_A) SWAP(rise_min_A, rise_max_A, tmp_RFLOAT); if (twist_min_deg > twist_max_deg) SWAP(twist_min_deg, twist_max_deg, tmp_RFLOAT); // Check for errors in the command-line option if (parser.checkForErrors()) REPORT_ERROR("Errors encountered on the command line (see above), exiting..."); }; void clear() { parser.clear(); initBoolOptions(); }; void displayEmptyLine() { std::cout << "=========================================================================" << std::endl; } void run() { // Check options int valid_options = 0; valid_options += (do_extract_coords_relion) ? (1) : (0); valid_options += (do_extract_coords_ximdisp) ? (1) : (0); valid_options += (do_extract_coords_eman) ? (1) : (0); valid_options += (do_convert_coords_emn2rln) ? (1) : (0); valid_options += (do_convert_coords_xim2rln) ? (1) : (0); valid_options += (do_combine_GCTF_results) ? (1) : (0); valid_options += (do_apply_spherical_mask_3D) ? (1) : (0); valid_options += (do_crop_central_Z) ? (1) : (0); valid_options += (do_create_cylinder_3D) ? (1) : (0); valid_options += (do_set_default_tilt) ? (1) : (0); valid_options += (do_remove_segments_with_bad_tilt) ? (1) : (0); valid_options += (do_remove_segments_with_bad_psi) ? (1) : (0); valid_options += (do_remove_mics_with_bad_ctf) ? (1) : (0); valid_options += (do_simulate_helix_3D) ? (1) : (0); valid_options += (do_impose_helical_symmetry) ? (1) : (0); valid_options += (do_local_search_helical_symmetry) ? (1) : (0); valid_options += (do_PDB_helix) ? (1) : (0); valid_options += (do_divide_star_file) ? (1) : (0); valid_options += (do_merge_star_files) ? (1) : (0); valid_options += (do_sort_datastar_tubeID) ? (1) : (0); valid_options += (do_simulate_helical_segments_2D) ? (1) : (0); valid_options += (do_cut_out) ? (1) : (0); valid_options += (do_set_xmipp_origin) ? (1) : (0); valid_options += (do_impose_helical_symmetry_fourier_space) ? (1) : (0); valid_options += (do_check_parameters) ? (1) : (0); valid_options += (do_normalise_segments) ? (1) : (0); valid_options += (do_interpo_3D_curve) ? (1) : (0); valid_options += (do_select_3D_subtomo_from_2D_proj) ? (1) : (0); valid_options += (do_average_au_2d) ? (1) : (0); valid_options += (do_debug) ? (1) : (0); if (valid_options <= 0) REPORT_ERROR("Please specify one option!"); if (valid_options > 1) REPORT_ERROR("Only one option can be specified at one time! valid_options = " + integerToString(valid_options)); if (do_extract_coords_relion || do_extract_coords_ximdisp || do_extract_coords_eman) { if (show_usage_for_an_option) { displayEmptyLine(); std::cout << " Extract coordinates of helical segments from specified straight tubes" << std::endl; std::cout << " USAGE (EMAN2 format) : --extract_emn --i_root _boxes.txt --o_root _segments.star --nr_asu 30 --rise 1.408 --angpix 1.126 --xdim 4096 --ydim 4096 --boxdim 320 --bimodal --segments" << std::endl; std::cout << " USAGE (RELION format) : --extract_rln --i_root _tubes.star --o_root _segments.star --nr_asu 30 --rise 1.408 --angpix 1.126 --xdim 4096 --ydim 4096 --boxdim 320 --bimodal --segments" << std::endl; std::cout << " USAGE (XIMDISP format): --extract_xim --i_root .mrc.coords --o_root _segments.star --nr_asu 30 --rise 1.408 --angpix 1.126 --xdim 4096 --ydim 4096 --boxdim 320 --bimodal --segments" << std::endl; displayEmptyLine(); return; } int format_tag; if (do_extract_coords_relion) format_tag = RELION_STAR_FORMAT; else if (do_extract_coords_ximdisp) format_tag = XIMDISP_COORDS_FORMAT; else if (do_extract_coords_eman) format_tag = EMAN2_FORMAT; extractHelicalSegmentsFromTubes_Multiple( fn_in_root, fn_out_root, format_tag, nr_asu, rise_A, pixel_size_A, Xdim, Ydim, boxdim, do_bimodal_searches, do_cut_into_segments); } else if (do_convert_coords_emn2rln || do_convert_coords_xim2rln) { if (show_usage_for_an_option) { displayEmptyLine(); std::cout << " Convert EMAN2 / XIMDISP coordinates of helical segments into RELION STAR format" << std::endl; std::cout << " USAGE (EMAN2 format) : --coords_emn2rln --i_root _helix_ptcl_coords.txt --o_root _segments.star --xdim 4096 --ydim 4096 --boxdim 320 --bimodal" << std::endl; std::cout << " USAGE (XIMDISP format): --coords_xim2rln --i_root .mrc.coords --o_root _segments.star --xdim 4096 --ydim 4096 --boxdim 320 --bimodal" << std::endl; displayEmptyLine(); return; } int format_tag; if (do_convert_coords_xim2rln) format_tag = XIMDISP_COORDS_FORMAT; else if (do_convert_coords_emn2rln) format_tag = EMAN2_FORMAT; convertHelicalSegmentCoordsToStarFile_Multiple( fn_in_root, fn_out_root, format_tag, pixel_size_A, Xdim, Ydim, boxdim, do_bimodal_searches); } else if (do_combine_GCTF_results) { if (show_usage_for_an_option) { displayEmptyLine(); std::cout << " Combine Autopicker priors (tilt and psi) with Gctf local search results" << std::endl; std::cout << " USAGE: --combine_gctf --i1_root _autopick.star --i2_root _gctf_local.star --o_root _combined.star" << std::endl; displayEmptyLine(); return; } combineParticlePriorsWithKaiLocalCTF_Multiple( fn_in1_root, fn_in2_root, fn_out_root); } else if (do_apply_spherical_mask_3D) { if (show_usage_for_an_option) { displayEmptyLine(); std::cout << " Apply soft spherical mask to 3D helical reference" << std::endl; std::cout << " USAGE: --spherical_mask --i in.mrc --o out.mrc (--sphere_percentage 0.9 --width 5)" << std::endl; displayEmptyLine(); return; } int box_size; if ( (sphere_percentage < 0.009) || (sphere_percentage > 0.991) ) REPORT_ERROR("Diameter of spherical mask divided by the box size should be within range 0.01~0.99!"); Image img; img.read(fn_in); img().setXmippOrigin(); box_size = ((XSIZE(img())) < (YSIZE(img()))) ? (XSIZE(img())) : (YSIZE(img())); box_size = (box_size < (ZSIZE(img()))) ? box_size : (ZSIZE(img())); applySoftSphericalMask( img(), (RFLOAT(box_size) * sphere_percentage), width_edge_pix); img.write(fn_out); } else if (do_crop_central_Z) { if (show_usage_for_an_option) { displayEmptyLine(); std::cout << " Crop the central part of a helix" << std::endl; std::cout << " USAGE: --central_mask --i in.mrc --o out.mrc (--z_percentage 0.3 --width 5)" << std::endl; displayEmptyLine(); return; } Image img; img.read(fn_in); cutZCentralPartOfSoftMask( img(), z_percentage, width_edge_pix); img.write(fn_out); } else if (do_create_cylinder_3D) { if (show_usage_for_an_option) { displayEmptyLine(); std::cout << " Create a cylinder for 3D initial reference" << std::endl; std::cout << " USAGE: --cylinder --o out.mrc --boxdim 300 (--cyl_inner_diameter -1) --cyl_outer_diameter 200 --angpix 1.34 (--polar --topbottom_ratio 0.5) (--sphere_percentage 0.9 --width 5)" << std::endl; displayEmptyLine(); return; } if (pixel_size_A < 0.01) REPORT_ERROR("Pixel size should be larger than 0!"); if (boxdim < 20) REPORT_ERROR("Box size should be larger than 20 pixels!"); if ( (sphere_percentage < 0.009) || (sphere_percentage > 0.991) ) REPORT_ERROR("Diameter of spherical mask divided by the box size should be within range 0.01~0.99!"); Image img; if (!do_polar_reference) topbottom_ratio = 1.; createCylindricalReferenceWithPolarity( img(), boxdim, (cyl_inner_diameter_A / pixel_size_A), (cyl_outer_diameter_A / pixel_size_A), topbottom_ratio, width_edge_pix); applySoftSphericalMask( img(), (RFLOAT(boxdim) * sphere_percentage), width_edge_pix); img.MDMainHeader.setValue(EMDL_IMAGE_SAMPLINGRATE_X, pixel_size_A); img.MDMainHeader.setValue(EMDL_IMAGE_SAMPLINGRATE_Y, pixel_size_A); img.MDMainHeader.setValue(EMDL_IMAGE_SAMPLINGRATE_Z, pixel_size_A); img.write(fn_out); } else if (do_set_default_tilt) { if (show_usage_for_an_option) { displayEmptyLine(); std::cout << " Set tilt angles to 90 degrees for all helical segments" << std::endl; std::cout << " USAGE: --init_tilt --i in.star --o out.star" << std::endl; displayEmptyLine(); return; } setNullTiltPriorsInDataStar( fn_in, fn_out); } else if (do_remove_segments_with_bad_tilt) { if (show_usage_for_an_option) { displayEmptyLine(); std::cout << " Remove helical segments with large tilt angle deviation (away from 90 degrees)" << std::endl; std::cout << " USAGE: --remove_bad_tilt --i in.star --o out.star --tilt_max_dev 15" << std::endl; displayEmptyLine(); return; } removeBadTiltHelicalSegmentsFromDataStar( fn_in, fn_out, tilt_max_dev_deg); } else if (do_remove_segments_with_bad_psi) { if (show_usage_for_an_option) { displayEmptyLine(); std::cout << " Remove helical segments with large psi angle deviation (away from psi prior)" << std::endl; std::cout << " USAGE: --remove_bad_psi --i in.star --o out.star --psi_max_dev 15" << std::endl; displayEmptyLine(); return; } removeBadPsiHelicalSegmentsFromDataStar( fn_in, fn_out, psi_max_dev_deg); } else if (do_remove_mics_with_bad_ctf) { if (show_usage_for_an_option) { displayEmptyLine(); std::cout << " Remove micrographs with poor-quality CTF" << std::endl; std::cout << " USAGE: --remove_bad_ctf --i in.star --o out.star (--ctf_fom_min 0.1 --EPA_lowest_res 5 --df_min 10000 --df_max 30000)" << std::endl; displayEmptyLine(); return; } excludeLowCTFCCMicrographs( fn_in, fn_out, ctf_fom_min, EPA_lowest_res, df_min, df_max); } else if (do_simulate_helix_3D) { if (show_usage_for_an_option) { displayEmptyLine(); std::cout << " Create a helical 3D reference of spheres" << std::endl; std::cout << " USAGE: --simulate_helix --o ref.mrc --subunit_diameter 30 --cyl_outer_diameter 200 --angpix 1.126 --rise 1.408 --twist 22.03 --boxdim 300 (--sym_Cn 1) (--polar --topbottom_ratio 0.5 --cyl_inner_diameter 20) (--sphere_percentage 0.9 --width 5) (--seam_nr_filaments 13)" << std::endl; displayEmptyLine(); return; } if (pixel_size_A < 0.01) REPORT_ERROR("Pixel size should be larger than 0!"); if (boxdim < 20) REPORT_ERROR("Box size should be larger than 20 pixels!"); if ( (sphere_percentage < 0.009) || (sphere_percentage > 0.991) ) REPORT_ERROR("Diameter of spherical mask divided by the box size should be within range 0.01~0.99!"); Image img; makeHelicalReference3DWithPolarity( img(), boxdim, pixel_size_A, twist_deg, rise_A, cyl_outer_diameter_A, subunit_diameter_A, (do_polar_reference) ? (cyl_inner_diameter_A) : (subunit_diameter_A), (do_polar_reference) ? (topbottom_ratio) : (1.), sym_Cn, nr_filaments_helix_with_seam); applySoftSphericalMask( img(), (RFLOAT(boxdim) * sphere_percentage), width_edge_pix); img.MDMainHeader.setValue(EMDL_IMAGE_SAMPLINGRATE_X, pixel_size_A); img.MDMainHeader.setValue(EMDL_IMAGE_SAMPLINGRATE_Y, pixel_size_A); img.MDMainHeader.setValue(EMDL_IMAGE_SAMPLINGRATE_Z, pixel_size_A); img.write(fn_out); } else if (do_impose_helical_symmetry) { if (show_usage_for_an_option) { displayEmptyLine(); std::cout << " Impose helical symmetry (in real space)" << std::endl; std::cout << " USAGE: --impose --i in.mrc --o out.mrc (--cyl_inner_diameter -1) --cyl_outer_diameter 200 --angpix 1.126 --rise 1.408 --twist 22.03 (--z_percentage 0.3 --sphere_percentage 0.9 --width 5)" << std::endl; displayEmptyLine(); return; } int box_size; RFLOAT sphere_diameter_A; Image img; img.read(fn_in); box_size = ((XSIZE(img())) < (YSIZE(img()))) ? (XSIZE(img())) : (YSIZE(img())); box_size = (box_size < (ZSIZE(img()))) ? (box_size) : (ZSIZE(img())); sphere_diameter_A = pixel_size_A * sphere_percentage * RFLOAT(box_size); img().setXmippOrigin(); imposeHelicalSymmetryInRealSpace( img(), pixel_size_A, sphere_diameter_A / 2., cyl_inner_diameter_A / 2., cyl_outer_diameter_A / 2., z_percentage, rise_A, twist_deg, width_edge_pix); img.MDMainHeader.setValue(EMDL_IMAGE_SAMPLINGRATE_X, pixel_size_A); img.MDMainHeader.setValue(EMDL_IMAGE_SAMPLINGRATE_Y, pixel_size_A); img.MDMainHeader.setValue(EMDL_IMAGE_SAMPLINGRATE_Z, pixel_size_A); img.write(fn_out); } else if (do_local_search_helical_symmetry) { if (show_usage_for_an_option) { displayEmptyLine(); std::cout << " Local search of helical symmetry" << std::endl; std::cout << " USAGE: --search --i in.mrc (--cyl_inner_diameter -1) --cyl_outer_diameter 200 --angpix 1.126 --rise_min 1.3 --rise_max 1.5 (--rise_inistep -1) --twist_min 20 --twist_max 24 (--twist_inistep -1) (--z_percentage 0.3) (--verb)" << std::endl; displayEmptyLine(); return; } int box_size; RFLOAT sphere_diameter_A, rise_refined_A, twist_refined_deg; Image img; img.read(fn_in); box_size = ((XSIZE(img())) < (YSIZE(img()))) ? (XSIZE(img())) : (YSIZE(img())); box_size = (box_size < (ZSIZE(img()))) ? (box_size) : (ZSIZE(img())); sphere_diameter_A = pixel_size_A * sphere_percentage * RFLOAT(box_size); img().setXmippOrigin(); localSearchHelicalSymmetry( img(), pixel_size_A, sphere_diameter_A / 2., cyl_inner_diameter_A / 2., cyl_outer_diameter_A / 2., z_percentage, rise_min_A, rise_max_A, rise_inistep_A, rise_refined_A, twist_min_deg, twist_max_deg, twist_inistep_deg, twist_refined_deg, ((verb == true) ? (&std::cout) : (NULL)) ); std::cout << " Done! Refined helical rise = " << rise_refined_A << " Angstroms, twist = " << twist_refined_deg << " degrees." << std::endl; } else if (do_PDB_helix) { if (show_usage_for_an_option) { displayEmptyLine(); std::cout << " Simulate a helix from a single PDB file of protein molecule" << std::endl; std::cout << " USAGE: --pdb_helix --i in.pdb --o out.pdb --cyl_outer_diameter 50 --rise 1.408 --twist 22.03 --nr_subunits 300 (--center_pdb)" << std::endl; displayEmptyLine(); return; } if ( (fn_in.getExtension() != "pdb") || (fn_out.getExtension() != "pdb") ) REPORT_ERROR("Input and output files should be in .pdb format!"); if (cyl_outer_diameter_A < 0.) // TODO: PLEASE CHECK THIS FOR OTHER OPTIONS ! cyl_outer_diameter_A = 0.; Assembly pdb_ori, pdb_helix; pdb_ori.readPDB(fn_in); makeSimpleHelixFromPDBParticle( pdb_ori, pdb_helix, cyl_outer_diameter_A / 2., twist_deg, rise_A, nr_subunits, do_center_of_mass_each_PDB_molecule); pdb_helix.writePDB(fn_out); } else if (do_divide_star_file) { if (show_usage_for_an_option) { displayEmptyLine(); std::cout << " Divide one huge STAR file into many small ones" << std::endl; std::cout << " USAGE: --divide --i in.star (--nr_outfiles 10)" << std::endl; displayEmptyLine(); return; } divideStarFile(fn_in, nr_outfiles); } else if (do_merge_star_files) { if (show_usage_for_an_option) { displayEmptyLine(); std::cout << " Merge small STAR files into a huge one" << std::endl; std::cout << " USAGE: --merge --i_root _subset.star" << std::endl; displayEmptyLine(); return; } mergeStarFiles(fn_in_root); } else if (do_sort_datastar_tubeID) { if (show_usage_for_an_option) { displayEmptyLine(); std::cout << " Sort segments in _data.star file according to helical tube IDs" << std::endl; std::cout << " USAGE: --sort_tube_id --i in.star --o out.star" << std::endl; displayEmptyLine(); return; } MetaDataTable MD; MD.read(fn_in); sortHelicalTubeID(MD); MD.write(fn_out); } else if (do_simulate_helical_segments_2D) { if (show_usage_for_an_option) { displayEmptyLine(); std::cout << " Simulate helical segments / subtomograms using a STAR file" << std::endl; std::cout << " USAGE: --simulate_segments --i 3dvol-for-projection.mrc --o segments.star --boxdim 200 --nr_subunits 5000 --nr_asu 5 --nr_tubes 20 --twist 22.03 --rise 1.408 --cyl_outer_diameter 200 --angpix 1.126 (--bimodal --3d_tomo --sigma_tilt 5 --sigma_psi 5 --sigma_offset 5 --white_noise 1 --random_seed 1400014000)" << std::endl; std::cout << " BEWARE: '--boxdim' is the shrunk box size of the simulated output (2D or 3D). It should not be bigger than the box size of the input 3D volume." << std::endl; displayEmptyLine(); return; } if ( (pixel_size_A < 0.001) || ((rise_A / pixel_size_A) < 0.001) ) REPORT_ERROR("Helical rise should be larger than 0.001 pixels!"); simulateHelicalSegments( is_3d_tomo, fn_in, fn_out, white_noise, boxdim, nr_subunits, nr_asu, nr_tubes, do_bimodal_searches, cyl_outer_diameter_A, pixel_size_A, rise_A, twist_deg, sigma_psi, sigma_tilt, sigma_offset, random_seed); std::cout << " WARNING: Please check the output STAR files before you execute the .sh script! Use '*_helical_priors.star' or '*_no_priors.star' as the input particle STAR file!" << std::endl; } else if (do_cut_out) { if (show_usage_for_an_option) { displayEmptyLine(); std::cout << " Cut out a small part of the helix" << std::endl; std::cout << " USAGE: --cut_out --i in.mrc --o out.mrc (--boxdim 100 --z_percentage 0.3 --ang 30)" << std::endl; displayEmptyLine(); return; } Image img1, img2; img1.clear(); img2.clear(); img1.read(fn_in); cutOutPartOfHelix(img1(), img2(), boxdim, ang, z_percentage); img2.write(fn_out); } else if (do_set_xmipp_origin) { if (show_usage_for_an_option) { displayEmptyLine(); std::cout << " Set Xmipp origin" << std::endl; std::cout << " USAGE: --set_xmipp_origin --i in.mrc --o out.mrc" << std::endl; displayEmptyLine(); return; } Image img; img.clear(); img.read(fn_in); img().setXmippOrigin(); img.write(fn_out); } else if (do_impose_helical_symmetry_fourier_space) { if (show_usage_for_an_option) { displayEmptyLine(); std::cout << " Impose helical symmetry (Fourier space simulation)" << std::endl; std::cout << " USAGE: --impose_fourier --i in.mrc --o out.mrc --angpix 1.126 --nr_asu 5 --rise 1.408 --twist 22.03" << std::endl; displayEmptyLine(); return; } if (nr_asu <= 1) { std::cout << " Number of asymmetrical units is smaller than 1. Nothing is needed to be done..." << std::endl; return; } MultidimArray Msum, Maux1; Matrix1D transZ(3); Image img; long int Xdim, Ydim, Zdim, Ndim; img.read(fn_in); img().getDimensions(Xdim, Ydim, Zdim, Ndim); img().setXmippOrigin(); if ( (Xdim != Ydim) || (Ydim != Zdim) ) REPORT_ERROR("Error in the input 3D map: DimX != DimY or DimY != DimZ"); Msum.clear(); Msum.initZeros(img()); Msum.setXmippOrigin(); int h_min = -nr_asu / 2; int h_max = -h_min + nr_asu % 2; XX(transZ) = YY(transZ) = 0.; for (int hh = h_min; hh < h_max; hh++) { if (hh == 0) Msum += img(); else { rotate(img(), Maux1, RFLOAT(hh) * twist_deg); ZZ(transZ) = RFLOAT(hh) * rise_A / pixel_size_A; selfTranslate(Maux1, transZ, WRAP); Msum += Maux1; } } img() = Msum / RFLOAT(nr_asu); img.MDMainHeader.setValue(EMDL_IMAGE_SAMPLINGRATE_X, pixel_size_A); img.MDMainHeader.setValue(EMDL_IMAGE_SAMPLINGRATE_Y, pixel_size_A); img.MDMainHeader.setValue(EMDL_IMAGE_SAMPLINGRATE_Z, pixel_size_A); img.write(fn_out); } else if (do_check_parameters) { if (show_usage_for_an_option) { displayEmptyLine(); std::cout << " Check parameters for 3D helical reconstruction in RELION" << std::endl; std::cout << " USAGE: --check --boxdim 300 --angpix 1.126 --sphere_percentage 0.9 (--cyl_inner_diameter 20) --cyl_outer_diameter 240 (--ignore_helical_symmetry --search_sym --z_percentage 0.3 --nr_asu 20 --rise 1.408 --rise_min 1.3 --rise_max 1.5 --twist 22.03 --twist_min 21 --twist_max 23)" << std::endl; displayEmptyLine(); return; } bool result = checkParametersFor3DHelicalReconstruction( ignore_helical_symmetry, do_helical_symmetry_local_refinement, nr_asu, rise_A, rise_min_A, rise_max_A, twist_deg, twist_min_deg, twist_max_deg, boxdim, pixel_size_A, z_percentage, sphere_percentage * boxdim * pixel_size_A, cyl_inner_diameter_A, cyl_outer_diameter_A, true); if (result) std::cout << " Done! All the parameters seem OK for 3D helical reconstruction in RELION." << std::endl; } else if (do_normalise_segments) { if (show_usage_for_an_option) { displayEmptyLine(); std::cout << " Normalise 2D/3D helical segments in a STAR file" << std::endl; std::cout << " USAGE: --norm --i imgs_input.star --o_root _norm --angpix 1.126 --cyl_outer_diameter 200" << std::endl; displayEmptyLine(); return; } normaliseHelicalSegments( fn_in, fn_out_root, cyl_outer_diameter_A, pixel_size_A); } else if (do_interpo_3D_curve) { if (show_usage_for_an_option) { displayEmptyLine(); std::cout << " Interpolate 3D curve for 3D helical sub-tomogram extraction" << std::endl; std::cout << " USAGE: --interpo --i_root Btub_tomo1 --o_root _interpo --nr_asu 1 --rise 52.77 --angpix 2.18 --boxdim 200 --bin 1 (--bimodal)" << std::endl; displayEmptyLine(); return; } Interpolate3DCurves( fn_in_root, fn_out_root, nr_asu, rise_A, pixel_size_A, boxdim, binning_factor, do_bimodal_searches); } else if (do_select_3D_subtomo_from_2D_proj) { if (show_usage_for_an_option) { displayEmptyLine(); std::cout << " Select 3D subtomograms given 2D projections" << std::endl; std::cout << " USAGE: --select_3dtomo --i1 selected_2d_proj.star --i2 particle_3d_subtomo.star --o selected_3d_subtomo.star" << std::endl; displayEmptyLine(); return; } if ( (fn_in1.getExtension() != "star") || (fn_in2.getExtension() != "star") || (fn_out.getExtension() != "star") ) REPORT_ERROR("Input and output files (--i1, --i2, --o) should be in .star format!"); MetaDataTable MD_2d, MD_3d, MD_out; MD_2d.read(fn_in1); MD_3d.read(fn_in2); select3DsubtomoFrom2Dproj(MD_2d, MD_3d, MD_out); MD_out.write(fn_out); std::cout << " Done! " << MD_out.numberOfObjects() << " out of " << MD_3d.numberOfObjects() << " subtomograms have been selected." << std::endl; } else if (do_average_au_2d) { if (show_usage_for_an_option) { displayEmptyLine(); std::cout << " Average asymmertic units in 2D along helical axis" << std::endl; std::cout << " USAGE: --average_au_2d --i input_particles.star --o_root NewParticles/average3au --nr_asu 3 --rise 4.75 " << std::endl; displayEmptyLine(); return; } if (fn_in.getExtension() != "star") REPORT_ERROR("Input file (--i) should be in .star format!"); ObservationModel obsModel; MetaDataTable MDimgs; ObservationModel::loadSafely(fn_in, obsModel, MDimgs, "particles"); averageAsymmetricUnits2D(obsModel, MDimgs, fn_out_root, nr_asu, rise_A); obsModel.save(MDimgs, fn_out_root+"particles.star", "particles"); std::cout << " Done! " << std::endl; } else if (do_debug) { if (show_usage_for_an_option) { displayEmptyLine(); std::cout << " (Debug only)" << std::endl; displayEmptyLine(); return; } //MetaDataTable MD; //MD.read(fn_in); //setPsiFlipRatioInStarFile(MD); //MD.write(fn_out); grabParticleCoordinates_Multiple(fn_in, fn_out); // RECOVER THIS ! //readFileHeader(fn_in, fn_out, 9493); //Image img; //img.read(fn_in); //calculateRadialAvg(img(), pixel_size_A); } else { REPORT_ERROR("Please specify an option!"); } if ( (!show_usage_for_an_option) && (!do_debug) ) { writeCommand("relion_helix_toolbox.log"); } }; }; int main(int argc, char *argv[]) { // time_config(); helix_bilder_parameters prm; try { prm.read(argc, argv); prm.run(); } catch (RelionError XE) { prm.usage(); std::cout << XE; return RELION_EXIT_FAILURE; } return RELION_EXIT_SUCCESS; } relion-3.1.3/src/apps/image_handler.cpp000066400000000000000000001130371411340063500200250ustar00rootroot00000000000000/*************************************************************************** * * Author: "Sjors H.W. Scheres" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #include #include #include #include #include #include #include #ifdef HAVE_PNG #include #endif #include class image_handler_parameters { public: FileName fn_in, fn_out, fn_sel, fn_img, fn_sym, fn_sub, fn_mult, fn_div, fn_add, fn_subtract, fn_mask, fn_fsc, fn_adjust_power, fn_correct_ampl, fn_fourfilter, fn_cosDPhi; int bin_avg, avg_first, avg_last, edge_x0, edge_xF, edge_y0, edge_yF, filter_edge_width, new_box, minr_ampl_corr, my_new_box_size; bool do_add_edge, do_invert_hand, do_flipXY, do_flipmXY, do_flipZ, do_flipX, do_flipY, do_shiftCOM, do_stats, do_calc_com, do_avg_ampl, do_avg_ampl2, do_avg_ampl2_ali, do_average, do_remove_nan, do_average_all_frames, do_power, do_ignore_optics, do_optimise_scale_subtract; RFLOAT multiply_constant, divide_constant, add_constant, subtract_constant, threshold_above, threshold_below, angpix, requested_angpix, real_angpix, force_header_angpix, lowpass, highpass, logfilter, bfactor, shift_x, shift_y, shift_z, replace_nan, randomize_at, optimise_bfactor_subtract; // PNG options RFLOAT minval, maxval, sigma_contrast; int color_scheme; // There is a global variable called colour_scheme in displayer.h! std::string directional; int verb; // I/O Parser IOParser parser; ObservationModel obsModel; Image Iout; Image Iop; Image Imask; MultidimArray avg_ampl; MetaDataTable MD; FourierTransformer transformer; std::map n_images; // Image size int xdim, ydim, zdim; long int ndim; void usage() { parser.writeUsage(std::cerr); } void read(int argc, char **argv) { parser.setCommandLine(argc, argv); int general_section = parser.addSection("General options"); fn_in = parser.getOption("--i", "Input STAR file, image (.mrc) or movie/stack (.mrcs)"); fn_out = parser.getOption("--o", "Output name (for STAR-input: insert this string before each image's extension)", ""); int cst_section = parser.addSection("image-by-constant operations"); multiply_constant = textToFloat(parser.getOption("--multiply_constant", "Multiply the image(s) pixel values by this constant", "1")); divide_constant = textToFloat(parser.getOption("--divide_constant", "Divide the image(s) pixel values by this constant", "1")); add_constant = textToFloat(parser.getOption("--add_constant", "Add this constant to the image(s) pixel values", "0.")); subtract_constant = textToFloat(parser.getOption("--subtract_constant", "Subtract this constant from the image(s) pixel values", "0.")); threshold_above = textToFloat(parser.getOption("--threshold_above", "Set all values higher than this value to this value", "999.")); threshold_below = textToFloat(parser.getOption("--threshold_below", "Set all values lower than this value to this value", "-999.")); int img_section = parser.addSection("image-by-image operations"); fn_mult = parser.getOption("--multiply", "Multiply input image(s) by the pixel values in this image", ""); fn_div = parser.getOption("--divide", "Divide input image(s) by the pixel values in this image", ""); fn_add = parser.getOption("--add", "Add the pixel values in this image to the input image(s) ", ""); fn_subtract = parser.getOption("--subtract", "Subtract the pixel values in this image to the input image(s) ", ""); fn_fsc = parser.getOption("--fsc", "Calculate FSC curve of the input image with this image", ""); do_power = parser.checkOption("--power", "Calculate power spectrum (|F|^2) of the input image"); fn_adjust_power = parser.getOption("--adjust_power", "Adjust the power spectrum of the input image to be the same as this image ", ""); fn_fourfilter = parser.getOption("--fourier_filter", "Multiply the Fourier transform of the input image(s) with this one image ", ""); int subtract_section = parser.addSection("additional subtract options"); do_optimise_scale_subtract = parser.checkOption("--optimise_scale_subtract", "Optimise scale between maps before subtraction?"); optimise_bfactor_subtract = textToFloat(parser.getOption("--optimise_bfactor_subtract", "Search range for relative B-factor for subtraction (in A^2)", "0.")); fn_mask = parser.getOption("--mask_optimise_subtract", "Use only voxels in this mask to optimise scale for subtraction", ""); int four_section = parser.addSection("per-image operations"); do_stats = parser.checkOption("--stats", "Calculate per-image statistics?"); do_calc_com = parser.checkOption("--com", "Calculate center of mass?"); bfactor = textToFloat(parser.getOption("--bfactor", "Apply a B-factor (in A^2)", "0.")); lowpass = textToFloat(parser.getOption("--lowpass", "Low-pass filter frequency (in A)", "-1.")); highpass = textToFloat(parser.getOption("--highpass", "High-pass filter frequency (in A)", "-1.")); directional = parser.getOption("--directional", "Directionality of low-pass filter frequency ('X', 'Y' or 'Z', default non-directional)", ""); logfilter = textToFloat(parser.getOption("--LoG", "Diameter for optimal response of Laplacian of Gaussian filter (in A)", "-1.")); angpix = textToFloat(parser.getOption("--angpix", "Pixel size (in A)", "-1")); requested_angpix = textToFloat(parser.getOption("--rescale_angpix", "Scale input image(s) to this new pixel size (in A)", "-1.")); real_angpix = -1; force_header_angpix = textToFloat(parser.getOption("--force_header_angpix", "Change the pixel size in the header (in A). Without --rescale_angpix, the image is not scaled.", "-1.")); new_box = textToInteger(parser.getOption("--new_box", "Resize the image(s) to this new box size (in pixel) ", "-1")); filter_edge_width = textToInteger(parser.getOption("--filter_edge_width", "Width of the raised cosine on the low/high-pass filter edge (in resolution shells)", "2")); do_flipX = parser.checkOption("--flipX", "Flip (mirror) a 2D image or 3D map in the X-direction?"); do_flipY = parser.checkOption("--flipY", "Flip (mirror) a 2D image or 3D map in the Y-direction?"); do_flipZ = parser.checkOption("--flipZ", "Flip (mirror) a 3D map in the Z-direction?"); do_invert_hand = parser.checkOption("--invert_hand", "Invert hand by flipping X? Similar to flipX, but preserves the symmetry origin. Edge pixels are wrapped around."); do_shiftCOM = parser.checkOption("--shift_com", "Shift image(s) to their center-of-mass (only on positive pixel values)"); shift_x = textToFloat(parser.getOption("--shift_x", "Shift images this many pixels in the X-direction", "0.")); shift_y = textToFloat(parser.getOption("--shift_y", "Shift images this many pixels in the Y-direction", "0.")); shift_z = textToFloat(parser.getOption("--shift_z", "Shift images this many pixels in the Z-direction", "0.")); do_avg_ampl = parser.checkOption("--avg_ampl", "Calculate average amplitude spectrum for all images?"); do_avg_ampl2 = parser.checkOption("--avg_ampl2", "Calculate average amplitude spectrum for all images?"); do_avg_ampl2_ali = parser.checkOption("--avg_ampl2_ali", "Calculate average amplitude spectrum for all aligned images?"); do_average = parser.checkOption("--average", "Calculate average of all images (without alignment)"); fn_correct_ampl = parser.getOption("--correct_avg_ampl", "Correct all images with this average amplitude spectrum", ""); minr_ampl_corr = textToInteger(parser.getOption("--minr_ampl_corr", "Minimum radius (in Fourier pixels) to apply average amplitudes", "0")); do_remove_nan = parser.checkOption("--remove_nan", "Replace non-numerical values (NaN, inf, etc) in the image(s)"); replace_nan = textToFloat(parser.getOption("--replace_nan", "Replace non-numerical values (NaN, inf, etc) with this value", "0")); randomize_at = textToFloat(parser.getOption("--phase_randomise", "Randomise phases beyond this resolution (in Angstroms)", "-1")); int three_d_section = parser.addSection("3D operations"); fn_sym = parser.getOption("--sym", "Symmetrise 3D map with this point group (e.g. D6)", ""); int preprocess_section = parser.addSection("2D-micrograph (or movie) operations"); do_flipXY = parser.checkOption("--flipXY", "Flip the image(s) in the XY direction?"); do_flipmXY = parser.checkOption("--flipmXY", "Flip the image(s) in the -XY direction?"); do_add_edge = parser.checkOption("--add_edge", "Add a barcode-like edge to the micrograph/movie frames?"); edge_x0 = textToInteger(parser.getOption("--edge_x0", "Pixel column to be used for the left edge", "0")); edge_y0 = textToInteger(parser.getOption("--edge_y0", "Pixel row to be used for the top edge", "0")); edge_xF = textToInteger(parser.getOption("--edge_xF", "Pixel column to be used for the right edge", "4095")); edge_yF = textToInteger(parser.getOption("--edge_yF", "Pixel row to be used for the bottom edge", "4095")); int avg_section = parser.addSection("Movie-frame averaging options"); bin_avg = textToInteger(parser.getOption("--avg_bin", "Width (in frames) for binning average, i.e. of every so-many frames", "-1")); avg_first = textToInteger(parser.getOption("--avg_first", "First frame to include in averaging", "-1")); avg_last = textToInteger(parser.getOption("--avg_last", "Last frame to include in averaging", "-1")); do_average_all_frames = parser.checkOption("--average_all_movie_frames", "Average all movie frames of all movies in the input STAR file."); int png_section = parser.addSection("PNG options"); minval = textToFloat(parser.getOption("--black", "Pixel value for black (default is auto-contrast)", "0")); maxval = textToFloat(parser.getOption("--white", "Pixel value for white (default is auto-contrast)", "0")); sigma_contrast = textToFloat(parser.getOption("--sigma_contrast", "Set white and black pixel values this many times the image stddev from the mean", "0")); if (parser.checkOption("--colour_fire", "Show images in black-grey-white-red colour scheme (highlight high signal)?")) color_scheme = BLACKGREYREDSCALE; else if (parser.checkOption("--colour_ice", "Show images in blue-black-grey-white colour scheme (highlight low signal)?")) color_scheme = BLUEGREYWHITESCALE; else if (parser.checkOption("--colour_fire-n-ice", "Show images in blue-grey-red colour scheme (highlight high&low signal)?")) color_scheme = BLUEGREYREDSCALE; else if (parser.checkOption("--colour_rainbow", "Show images in cyan-blue-black-red-yellow colour scheme?")) color_scheme = RAINBOWSCALE; else if (parser.checkOption("--colour_difference", "Show images in cyan-blue-black-red-yellow colour scheme (for difference images)?")) color_scheme = CYANBLACKYELLOWSCALE; else color_scheme = GREYSCALE; // Hidden fn_cosDPhi = getParameter(argc, argv, "--cos_dphi", ""); // Check for errors in the command-line option if (parser.checkForErrors()) REPORT_ERROR("Errors encountered on the command line (see above), exiting..."); verb = (do_stats || do_calc_com || fn_fsc !="" || fn_cosDPhi != "" | do_power) ? 0 : 1; if (fn_out == "" && verb == 1) REPORT_ERROR("Please specify the output file name with --o."); } void perImageOperations(Image &Iin, FileName &my_fn_out, RFLOAT psi = 0.) { Image Iout; Iout().resize(Iin()); bool isPNG = FileName(my_fn_out.getExtension()).toLowercase() == "png"; if (isPNG && (ZSIZE(Iout()) > 1 || NSIZE(Iout()) > 1)) REPORT_ERROR("You can only write a 2D image to a PNG file."); if (angpix < 0 && (requested_angpix > 0 || fn_fsc != "" || randomize_at > 0 || do_power || fn_cosDPhi != "" || fn_correct_ampl != "" || fabs(bfactor) > 0 || logfilter > 0 || lowpass > 0 || highpass > 0 || fabs(optimise_bfactor_subtract) > 0)) { angpix = Iin.samplingRateX(); std::cerr << "WARNING: You did not specify --angpix. The pixel size in the image header, " << angpix << " A/px, is used." << std::endl; } if (do_add_edge) { // Treat X-boundaries FOR_ALL_DIRECT_ELEMENTS_IN_ARRAY2D(Iin()) { if (j < edge_x0) DIRECT_A2D_ELEM(Iin(), i, j) = DIRECT_A2D_ELEM(Iin(), i, edge_x0); else if (j > edge_xF) DIRECT_A2D_ELEM(Iin(), i, j) = DIRECT_A2D_ELEM(Iin(), i, edge_xF); } // Treat Y-boundaries FOR_ALL_DIRECT_ELEMENTS_IN_ARRAY2D(Iin()) { if (i < edge_y0) DIRECT_A2D_ELEM(Iin(), i, j) = DIRECT_A2D_ELEM(Iin(), edge_y0, j); else if (i > edge_yF) DIRECT_A2D_ELEM(Iin(), i, j) = DIRECT_A2D_ELEM(Iin(), edge_yF, j); } } // Flipping: this needs to be done from Iin to Iout (i.e. can't be done on-line on Iout only!) if (do_flipXY) { // Flip X/Y FOR_ALL_DIRECT_ELEMENTS_IN_ARRAY2D(Iin()) { DIRECT_A2D_ELEM(Iout(), i, j) = DIRECT_A2D_ELEM(Iin(), j, i); } } else if (do_flipmXY) { // Flip mX/Y FOR_ALL_DIRECT_ELEMENTS_IN_ARRAY2D(Iin()) { DIRECT_A2D_ELEM(Iout(), i, j) = DIRECT_A2D_ELEM(Iin(), XSIZE(Iin()) - 1 - j, YSIZE(Iin()) - 1 - i); } } else { Iout = Iin; } // From here on also 3D options if (do_remove_nan) { Iout().setXmippOrigin(); FOR_ALL_DIRECT_ELEMENTS_IN_ARRAY3D(Iout()) { if (std::isnan(DIRECT_A3D_ELEM(Iout(), k, i, j)) || std::isinf(DIRECT_A3D_ELEM(Iout(), k, i, j))) DIRECT_A3D_ELEM(Iout(), k, i, j) = replace_nan; } } if (randomize_at > 0.) { int iran = XSIZE(Iin())* angpix / randomize_at; Iout = Iin; randomizePhasesBeyond(Iout(), iran); } if (fabs(multiply_constant - 1.) > 0.) { FOR_ALL_DIRECT_ELEMENTS_IN_ARRAY3D(Iin()) { DIRECT_A3D_ELEM(Iout(), k, i, j) *= multiply_constant; } } else if (fabs(divide_constant - 1.) > 0.) { FOR_ALL_DIRECT_ELEMENTS_IN_ARRAY3D(Iin()) { DIRECT_A3D_ELEM(Iout(), k, i, j) /= divide_constant; } } else if (fabs(add_constant) > 0.) { FOR_ALL_DIRECT_ELEMENTS_IN_ARRAY3D(Iin()) { DIRECT_A3D_ELEM(Iout(), k, i, j) += add_constant; } } else if (fabs(subtract_constant) > 0.) { FOR_ALL_DIRECT_ELEMENTS_IN_ARRAY3D(Iin()) { DIRECT_A3D_ELEM(Iout(), k, i, j) -= subtract_constant; } } else if (fn_mult != "") { FOR_ALL_DIRECT_ELEMENTS_IN_ARRAY3D(Iin()) { DIRECT_A3D_ELEM(Iout(), k, i, j) *= DIRECT_A3D_ELEM(Iop(), k, i, j); } } else if (fn_div != "") { bool is_first = true; FOR_ALL_DIRECT_ELEMENTS_IN_ARRAY3D(Iin()) { if (ABS(DIRECT_A3D_ELEM(Iop(), k, i, j)) < 1e-10) { if (is_first) { std::cout << "Warning: ignore very small pixel values in divide image..." << std::endl; is_first = false; } DIRECT_A3D_ELEM(Iout(), k, i, j) = 0.; } else DIRECT_A3D_ELEM(Iout(), k, i, j) /= DIRECT_A3D_ELEM(Iop(), k, i, j); } } else if (fn_add != "") { FOR_ALL_DIRECT_ELEMENTS_IN_ARRAY3D(Iin()) { DIRECT_A3D_ELEM(Iout(), k, i, j) += DIRECT_A3D_ELEM(Iop(), k, i, j); } } else if (fn_subtract != "") { RFLOAT my_scale = 1., best_diff2 ; if (do_optimise_scale_subtract) { if (fn_mask == "") { Imask(). resize(Iop()); Imask().initConstant(1.); } if (optimise_bfactor_subtract > 0.) { MultidimArray< Complex > FTop, FTop_bfac; FourierTransformer transformer; MultidimArray Isharp(Iop()); transformer.FourierTransform(Iop(), FTop); RFLOAT my_bfac, smallest_diff2=99.e99; for (RFLOAT bfac = -optimise_bfactor_subtract; bfac <= optimise_bfactor_subtract; bfac+= 10.) { FTop_bfac = FTop; applyBFactorToMap(FTop_bfac, XSIZE(Iop()), bfac, angpix); transformer.inverseFourierTransform(FTop_bfac, Isharp); RFLOAT scale, diff2; RFLOAT sum_aa = 0., sum_xa = 0., sum_xx = 0.; FOR_ALL_DIRECT_ELEMENTS_IN_MULTIDIMARRAY(Iin()) { RFLOAT w = DIRECT_MULTIDIM_ELEM(Imask(), n) * DIRECT_MULTIDIM_ELEM(Imask(), n); RFLOAT x = DIRECT_MULTIDIM_ELEM(Iin(), n); RFLOAT a = DIRECT_MULTIDIM_ELEM(Isharp, n); sum_aa += w*a*a; sum_xa += w*x*a; sum_xx += w*x*x; } scale = sum_xa/sum_aa; diff2 = 0.; FOR_ALL_DIRECT_ELEMENTS_IN_MULTIDIMARRAY(Iin()) { RFLOAT w = DIRECT_MULTIDIM_ELEM(Imask(), n); RFLOAT x = DIRECT_MULTIDIM_ELEM(Iin(), n); RFLOAT a = DIRECT_MULTIDIM_ELEM(Isharp, n); diff2 += w * w * (x - scale * a) * (x - scale * a); } if (diff2 < smallest_diff2) { smallest_diff2 = diff2; my_bfac = bfac; my_scale = scale; } } std::cout << " Optimised bfactor = " << my_bfac << "; optimised scale = " << my_scale << std::endl; applyBFactorToMap(FTop, XSIZE(Iop()), my_bfac, angpix); transformer.inverseFourierTransform(FTop, Iop()); } else { RFLOAT sum_aa = 0., sum_xa = 0.; FOR_ALL_DIRECT_ELEMENTS_IN_MULTIDIMARRAY(Iin()) { RFLOAT w = DIRECT_MULTIDIM_ELEM(Imask(), n); RFLOAT x = DIRECT_MULTIDIM_ELEM(Iin(), n); RFLOAT a = DIRECT_MULTIDIM_ELEM(Iop(), n); sum_aa += w*w*a*a; sum_xa += w*w*x*a; } my_scale = sum_xa/sum_aa; std::cout << " Optimised scale = " << my_scale << std::endl; } } FOR_ALL_DIRECT_ELEMENTS_IN_ARRAY3D(Iin()) { DIRECT_A3D_ELEM(Iout(), k, i, j) -= my_scale * DIRECT_A3D_ELEM(Iop(), k, i, j); } } else if (fn_fsc != "") { MultidimArray fsc; MetaDataTable MDfsc; getFSC(Iout(), Iop(), fsc); MDfsc.setName("fsc"); FOR_ALL_DIRECT_ELEMENTS_IN_ARRAY1D(fsc) { MDfsc.addObject(); RFLOAT res = (i > 0) ? (XSIZE(Iout()) * angpix / (RFLOAT)i) : 999.; MDfsc.setValue(EMDL_SPECTRAL_IDX, (int)i); MDfsc.setValue(EMDL_RESOLUTION, 1./res); MDfsc.setValue(EMDL_RESOLUTION_ANGSTROM, res); MDfsc.setValue(EMDL_POSTPROCESS_FSC_GENERAL, DIRECT_A1D_ELEM(fsc, i)); } MDfsc.write(std::cout); } else if (do_power) { MultidimArray spectrum; getSpectrum(Iout(), spectrum, POWER_SPECTRUM); MetaDataTable MDpower; MDpower.setName("power"); FOR_ALL_DIRECT_ELEMENTS_IN_ARRAY1D(spectrum) { if (i > XSIZE(Iout()) / 2 + 1) break; // getSpectrum returns beyond Nyquist!! MDpower.addObject(); RFLOAT res = (i > 0) ? (XSIZE(Iout()) * angpix / (RFLOAT)i) : 999.; MDpower.setValue(EMDL_SPECTRAL_IDX, (int)i); MDpower.setValue(EMDL_RESOLUTION, 1./res); MDpower.setValue(EMDL_RESOLUTION_ANGSTROM, res); MDpower.setValue(EMDL_MLMODEL_POWER_REF, DIRECT_A1D_ELEM(spectrum, i)); } MDpower.write(std::cout); } else if (fn_adjust_power != "") { MultidimArray spectrum; getSpectrum(Iop(), spectrum, AMPLITUDE_SPECTRUM); adaptSpectrum(Iin(), Iout(), spectrum, AMPLITUDE_SPECTRUM); } else if (fn_cosDPhi != "") { MultidimArray cosDPhi; MetaDataTable MDcos; MultidimArray< Complex > FT1, FT2; FourierTransformer transformer; transformer.FourierTransform(Iout(), FT1); transformer.FourierTransform(Iop(), FT2); getCosDeltaPhase(FT1, FT2, cosDPhi); MDcos.setName("cos"); FOR_ALL_DIRECT_ELEMENTS_IN_ARRAY1D(cosDPhi) { MDcos.addObject(); RFLOAT res = (i > 0) ? (XSIZE(Iout()) * angpix / (RFLOAT)i) : 999.; MDcos.setValue(EMDL_SPECTRAL_IDX, (int)i); MDcos.setValue(EMDL_RESOLUTION, 1./res); MDcos.setValue(EMDL_RESOLUTION_ANGSTROM, res); MDcos.setValue(EMDL_POSTPROCESS_FSC_GENERAL, DIRECT_A1D_ELEM(cosDPhi, i)); } MDcos.write(std::cout); } else if (fn_correct_ampl != "") { MultidimArray FT; transformer.FourierTransform(Iin(), FT, false); FOR_ALL_DIRECT_ELEMENTS_IN_MULTIDIMARRAY(FT) { DIRECT_MULTIDIM_ELEM(FT, n) /= DIRECT_MULTIDIM_ELEM(avg_ampl, n); } transformer.inverseFourierTransform(); Iout = Iin; } else if (fn_fourfilter != "") { MultidimArray FT; transformer.FourierTransform(Iin(), FT, false); // Note: only 2D rotations are done! 3D application assumes zero rot and tilt! Matrix2D A; rotation2DMatrix(psi, A); Iop().setXmippOrigin(); FOR_ALL_ELEMENTS_IN_FFTW_TRANSFORM(FT) { int jpp = ROUND(jp * A(0, 0) + ip * A(0, 1)); int ipp = ROUND(jp * A(1, 0) + ip * A(1, 1)); int kpp = kp; RFLOAT fil; if (jpp >= STARTINGX(Iop()) && jpp <= FINISHINGX(Iop()) && ipp >= STARTINGY(Iop()) && ipp <= FINISHINGY(Iop())) fil = A3D_ELEM(Iop(), kpp, ipp, jpp); else fil = 0.; DIRECT_A3D_ELEM(FT, k, i, j) *= fil; } transformer.inverseFourierTransform(); Iout = Iin; } if (fabs(bfactor) > 0.) applyBFactorToMap(Iout(), bfactor, angpix); if (logfilter > 0.) { LoGFilterMap(Iout(), logfilter, angpix); RFLOAT avg, stddev, minval, maxval; //Iout().statisticsAdjust(0,1); } if (lowpass > 0.) { if (directional != "") directionalFilterMap(Iout(), lowpass, angpix, directional, filter_edge_width); else lowPassFilterMap(Iout(), lowpass, angpix, filter_edge_width); } if (highpass > 0.) highPassFilterMap(Iout(), highpass, angpix, filter_edge_width); if (do_flipX) { // For input: 0, 1, 2, 3, 4, 5 (XSIZE = 6) // This gives: 5, 4, 3, 2, 1, 0 FOR_ALL_DIRECT_ELEMENTS_IN_ARRAY3D(Iin()) { DIRECT_A3D_ELEM(Iout(), k, i, j) = A3D_ELEM(Iin(), k, i, XSIZE(Iin()) - 1 - j); } } else if (do_flipY) { FOR_ALL_DIRECT_ELEMENTS_IN_ARRAY3D(Iin()) { DIRECT_A3D_ELEM(Iout(), k, i, j) = A3D_ELEM(Iin(), k, YSIZE(Iin()) - 1 - i, j); } } else if (do_flipZ) { if (ZSIZE(Iout()) < 2) REPORT_ERROR("ERROR: this is not a 3D map, so cannot be flipped in Z"); FOR_ALL_DIRECT_ELEMENTS_IN_ARRAY3D(Iin()) { DIRECT_A3D_ELEM(Iout(), k, i, j) = A3D_ELEM(Iin(), ZSIZE(Iin()) - 1 - k, i, j); } } else if (do_invert_hand) { // For input: 0, 1, 2, 3, 4, 5 (XSIZE = 6) // This gives: 0, 5, 4, 3, 2, 1 FOR_ALL_DIRECT_ELEMENTS_IN_ARRAY3D(Iin()) { long int dest_x = (j == 0) ? 0 : (XSIZE(Iin()) - j); DIRECT_A3D_ELEM(Iout(), k, i, j) = A3D_ELEM(Iin(), k, i, dest_x); } } // Shifting if (do_shiftCOM) selfTranslateCenterOfMassToCenter(Iout(), DONT_WRAP, true); // verbose=true! else if (fabs(shift_x) > 0. || fabs(shift_y) > 0. || fabs(shift_z) > 0.) { Matrix1D shift(2); XX(shift) = shift_x; YY(shift) = shift_y; if (zdim > 1) { shift.resize(3); ZZ(shift) = shift_z; } selfTranslate(Iout(), shift, DONT_WRAP); } // Re-scale if (requested_angpix > 0.) { int oldxsize = XSIZE(Iout()); int oldysize = YSIZE(Iout()); int oldsize = oldxsize; if (oldxsize != oldysize && Iout().getDim() == 2) { oldsize = XMIPP_MAX( oldxsize, oldysize ); Iout().setXmippOrigin(); Iout().window(FIRST_XMIPP_INDEX(oldsize), FIRST_XMIPP_INDEX(oldsize), LAST_XMIPP_INDEX(oldsize), LAST_XMIPP_INDEX(oldsize)); } int newsize = ROUND(oldsize * (angpix / requested_angpix)); newsize -= newsize % 2; //make even in case it is not already real_angpix = oldsize * angpix / newsize; if (fabs(real_angpix - requested_angpix) / requested_angpix > 0.001) std::cerr << "WARNING: Although the requested pixel size (--rescale_angpix) is " << requested_angpix << " A/px, the actual pixel size will be " << real_angpix << " A/px due to rounding of the box size to an even number. The latter value is set to the image header. You can overwrite the header pixel size by --force_header_angpix." << std::endl; resizeMap(Iout(), newsize); my_new_box_size = newsize; if (oldxsize != oldysize && Iout().getDim() == 2) { int newxsize = ROUND(oldxsize * (angpix / real_angpix)); int newysize = ROUND(oldysize * (angpix / real_angpix));; newxsize -= newxsize%2; //make even in case it is not already newysize -= newysize%2; //make even in case it is not already Iout().setXmippOrigin(); Iout().window(FIRST_XMIPP_INDEX(newysize), FIRST_XMIPP_INDEX(newxsize), LAST_XMIPP_INDEX(newysize), LAST_XMIPP_INDEX(newxsize)); } // Also reset the sampling rate in the header Iout.setSamplingRateInHeader(real_angpix); } // Re-window if (new_box > 0 && XSIZE(Iout()) != new_box) { Iout().setXmippOrigin(); if (Iout().getDim() == 2) { Iout().window(FIRST_XMIPP_INDEX(new_box), FIRST_XMIPP_INDEX(new_box), LAST_XMIPP_INDEX(new_box), LAST_XMIPP_INDEX(new_box)); } else if (Iout().getDim() == 3) { Iout().window(FIRST_XMIPP_INDEX(new_box), FIRST_XMIPP_INDEX(new_box), FIRST_XMIPP_INDEX(new_box), LAST_XMIPP_INDEX(new_box), LAST_XMIPP_INDEX(new_box), LAST_XMIPP_INDEX(new_box)); } my_new_box_size = new_box; } if (fn_sym != "") symmetriseMap(Iout(), fn_sym); // Thresholding (can be done after any other operation) if (fabs(threshold_above - 999.) > 0.) { FOR_ALL_DIRECT_ELEMENTS_IN_ARRAY3D(Iout()) { if (DIRECT_A3D_ELEM(Iout(), k, i, j) > threshold_above) DIRECT_A3D_ELEM(Iout(), k, i, j) = threshold_above; } } if (fabs(threshold_below + 999.) > 0.) { FOR_ALL_DIRECT_ELEMENTS_IN_ARRAY3D(Iout()) { if (DIRECT_A3D_ELEM(Iout(), k, i, j) < threshold_below) DIRECT_A3D_ELEM(Iout(), k, i, j) = threshold_below; } } if (force_header_angpix > 0) { Iout.setSamplingRateInHeader(force_header_angpix); std::cout << "As requested by --force_header_angpix, the pixel size in the image header is set to " << force_header_angpix << " A/px." << std::endl; } // Write out the result // Check whether fn_out has an "@": if so REPLACE the corresponding frame in the output stack! long int n; FileName fn_tmp; my_fn_out.decompose(n, fn_tmp); n--; if (!isPNG) { if (n >= 0) // This is a stack... { // The following assumes the images in the stack come ordered... if (n == 0) Iout.write(fn_tmp, n, true, WRITE_OVERWRITE); // make a new stack else Iout.write(fn_tmp, n, true, WRITE_APPEND); } else Iout.write(my_fn_out); } else { #ifdef HAVE_PNG RFLOAT this_minval = minval, this_maxval = maxval; // User setting getImageContrast(Iout(), this_minval, this_maxval, sigma_contrast); // Update if neecssary const RFLOAT range = this_maxval - this_minval; const RFLOAT step = range / 255; gravis::tImage pngOut(XSIZE(Iout()), YSIZE(Iout())); pngOut.fill(gravis::bRGB(0)); FOR_ALL_DIRECT_ELEMENTS_IN_MULTIDIMARRAY(Iout()) { const unsigned char val = FLOOR((DIRECT_MULTIDIM_ELEM(Iout(), n) - this_minval) / step); unsigned char r, g, b; greyToRGB(color_scheme, val, r, g, b); pngOut[n] = gravis::bRGB(r, g, b); } pngOut.writePNG(my_fn_out); #else REPORT_ERROR("You cannot write PNG images because libPNG was not linked during compilation."); #endif } } void run() { my_new_box_size = -1; long int slice_id; std::string fn_stem; fn_in.decompose(slice_id, fn_stem); bool input_is_stack = (fn_in.getExtension() == "mrcs" || fn_in.getExtension() == "tif" || fn_in.getExtension() == "tiff") && (slice_id == -1); bool input_is_star = (fn_in.getExtension() == "star"); // By default: write single output images // Get a MetaDataTable if (input_is_star) { do_ignore_optics = false; ObservationModel::loadSafely(fn_in, obsModel, MD, "discover", verb, false); // false means don't die upon failure if (obsModel.opticsMdt.numberOfObjects() == 0) { do_ignore_optics = true; std::cout << " + WARNING: reading input STAR file without optics groups ..." << std::endl; MD.read(fn_in); } if (fn_out.getExtension() != "mrcs") std::cout << "NOTE: the input (--i) is a STAR file but the output (--o) does not have .mrcs extension. The output is treated as a suffix, not a path." << std::endl; FileName fn_img; MD.getValue(EMDL_IMAGE_NAME, fn_img, 0); fn_img.decompose(slice_id, fn_stem); input_is_stack = (fn_in.getExtension() == "mrcs" || fn_in.getExtension() == "tif" || fn_in.getExtension() == "tiff") && (slice_id == -1); } else if (input_is_stack) { if (bin_avg > 0 || (avg_first >= 0 && avg_last >= 0)) { MD.addObject(); MD.setValue(EMDL_IMAGE_NAME, fn_in); } else { // Read the header to get the number of images inside the stack and generate that many lines in the MD Image tmp; FileName fn_tmp; tmp.read(fn_in, false); //false means do not read image now, only header for (int i = 1; i <= NSIZE(tmp()); i++) { MD.addObject(); fn_tmp.compose(i, fn_in); MD.setValue(EMDL_IMAGE_NAME, fn_tmp); } } } else { // Just individual image input MD.addObject(); MD.setValue(EMDL_IMAGE_NAME, fn_in); } int i_img = 0; time_config(); if (verb > 0) init_progress_bar(MD.numberOfObjects()); bool do_md_out = false; FOR_ALL_OBJECTS_IN_METADATA_TABLE(MD) { FileName fn_img; if (do_average_all_frames) { MD.getValue(EMDL_MICROGRAPH_MOVIE_NAME, fn_img); } else { MD.getValue(EMDL_IMAGE_NAME, fn_img); } // For fourfilter... RFLOAT psi; if (!MD.getValue(EMDL_ORIENT_PSI, psi)) psi =0.; Image Iin; // Initialise for the first image if (i_img == 0) { Image Ihead; Ihead.read(fn_img, false); Ihead.getDimensions(xdim, ydim, zdim, ndim); if (zdim > 1 && (do_add_edge || do_flipXY || do_flipmXY)) REPORT_ERROR("ERROR: you cannot perform 2D operations like --add_edge, --flipXY or --flipmXY on 3D maps. If you intended to operate on a movie, use .mrcs extensions for stacks!"); if (zdim > 1 && (bin_avg > 0 || (avg_first >= 0 && avg_last >= 0))) REPORT_ERROR("ERROR: you cannot perform movie-averaging operations on 3D maps. If you intended to operate on a movie, use .mrcs extensions for stacks!"); if (fn_mult != "") Iop.read(fn_mult); else if (fn_div != "") Iop.read(fn_div); else if (fn_add != "") Iop.read(fn_add); else if (fn_subtract != "") { Iop.read(fn_subtract); if (do_optimise_scale_subtract && fn_mask != "") Imask.read(fn_mask); } else if (fn_fsc != "") Iop.read(fn_fsc); else if (fn_cosDPhi != "") Iop.read(fn_cosDPhi); else if (fn_adjust_power != "") Iop.read(fn_adjust_power); else if (fn_fourfilter != "") Iop.read(fn_fourfilter); else if (fn_correct_ampl != "") { Iop.read(fn_correct_ampl); // Calculate by the radial average in the Fourier domain MultidimArray spectrum, count; spectrum.initZeros(YSIZE(Iop())); count.initZeros(YSIZE(Iop())); FOR_ALL_ELEMENTS_IN_FFTW_TRANSFORM(Iop()) { long int idx = ROUND(sqrt(kp*kp + ip*ip + jp*jp)); spectrum(idx) += dAkij(Iop(), k, i, j); count(idx) += 1.; } FOR_ALL_ELEMENTS_IN_ARRAY1D(spectrum) { if (A1D_ELEM(count, i) > 0.) A1D_ELEM(spectrum, i) /= A1D_ELEM(count, i); } FOR_ALL_ELEMENTS_IN_FFTW_TRANSFORM(Iop()) { long int idx = ROUND(sqrt(kp*kp + ip*ip + jp*jp)); if (idx > minr_ampl_corr) dAkij(Iop(), k, i, j) /= spectrum(idx); else dAkij(Iop(), k, i, j) = 1.; } avg_ampl = Iop(); Iop.write("test.mrc"); } if (fn_mult != "" || fn_div != "" || fn_add != "" || fn_subtract != "" || fn_fsc != "" || fn_adjust_power != "" ||fn_fourfilter != "") if (XSIZE(Iop()) != xdim || YSIZE(Iop()) != ydim || ZSIZE(Iop()) != zdim) REPORT_ERROR("Error: operate-image is not of the correct size"); if (do_avg_ampl || do_avg_ampl2 || do_avg_ampl2_ali) { avg_ampl.initZeros(zdim, ydim, xdim/2+1); } else if (do_average || do_average_all_frames) { avg_ampl.initZeros(zdim, ydim, xdim); } } if (do_stats) // only write statistics to screen { Iin.read(fn_img); RFLOAT avg, stddev, minval, maxval, header_angpix; Iin().computeStats(avg, stddev, minval, maxval); header_angpix = Iin.samplingRateX(); std::cout << fn_img << " : (x,y,z,n)= " << XSIZE(Iin()) << " x "<< YSIZE(Iin()) << " x "<< ZSIZE(Iin()) << " x "<< NSIZE(Iin()) << " ; avg= " << avg << " stddev= " << stddev << " minval= " < 1) std::cout << " y " << YY(com); if (VEC_XSIZE(com) > 2) std::cout << " z " << ZZ(com); std::cout << std::endl; } else if (do_avg_ampl || do_avg_ampl2 || do_avg_ampl2_ali) { Iin.read(fn_img); if (do_avg_ampl2_ali) { RFLOAT xoff = 0.; RFLOAT yoff = 0.; RFLOAT psi = 0.; MD.getValue(EMDL_ORIENT_ORIGIN_X, xoff); MD.getValue(EMDL_ORIENT_ORIGIN_Y, yoff); MD.getValue(EMDL_ORIENT_PSI, psi); // Apply the actual transformation Matrix2D A; rotation2DMatrix(psi, A); MAT_ELEM(A,0, 2) = xoff; MAT_ELEM(A,1, 2) = yoff; selfApplyGeometry(Iin(), A, IS_NOT_INV, DONT_WRAP); } MultidimArray FT; transformer.FourierTransform(Iin(), FT); if (do_avg_ampl) { FOR_ALL_DIRECT_ELEMENTS_IN_MULTIDIMARRAY(FT) { DIRECT_MULTIDIM_ELEM(avg_ampl, n) += abs(DIRECT_MULTIDIM_ELEM(FT, n)); } } else if (do_avg_ampl2 || do_avg_ampl2_ali) { FOR_ALL_DIRECT_ELEMENTS_IN_MULTIDIMARRAY(FT) { DIRECT_MULTIDIM_ELEM(avg_ampl, n) += norm(DIRECT_MULTIDIM_ELEM(FT, n)); } } } else if (do_average) { Iin.read(fn_img); FOR_ALL_DIRECT_ELEMENTS_IN_MULTIDIMARRAY(Iin()) { DIRECT_MULTIDIM_ELEM(avg_ampl, n) += DIRECT_MULTIDIM_ELEM(Iin(), n); } } else if (do_average_all_frames) { Iin.read(fn_img); for (int n = 0; n < ndim; n++) { FOR_ALL_DIRECT_ELEMENTS_IN_ARRAY3D(avg_ampl) { DIRECT_A3D_ELEM(avg_ampl, k, i, j) += DIRECT_NZYX_ELEM(Iin(), n, k, i, j); } } } else if (bin_avg > 0 || (avg_first >= 0 && avg_last >= 0)) { // movie-frame averaging operations int avgndim = 1; if (bin_avg > 0) { avgndim = ndim / bin_avg; } Image Iavg(xdim, ydim, zdim, avgndim); if (ndim == 1) REPORT_ERROR("ERROR: you are trying to perform movie-averaging options on a single image/volume"); FileName fn_ext = fn_out.getExtension(); if (NSIZE(Iavg()) > 1 && ( fn_ext.contains("mrc") && !fn_ext.contains("mrcs") ) ) REPORT_ERROR("ERROR: trying to write a stack into an MRC image. Use .mrcs extensions for stacks!"); for (long int nn = 0; nn < ndim; nn++) { Iin.read(fn_img, true, nn); if (bin_avg > 0) { int myframe = nn / bin_avg; if (myframe < avgndim) { FOR_ALL_DIRECT_ELEMENTS_IN_ARRAY2D(Iin()) { DIRECT_NZYX_ELEM(Iavg(),myframe,0,i,j) += DIRECT_A2D_ELEM(Iin(), i, j); // just store sum } } } else if (avg_first >= 0 && avg_last >= 0 && nn+1 >= avg_first && nn+1 <= avg_last) // add one to start counting at 1 { FOR_ALL_DIRECT_ELEMENTS_IN_MULTIDIMARRAY(Iin()) { DIRECT_MULTIDIM_ELEM(Iavg(), n) += DIRECT_MULTIDIM_ELEM(Iin(), n); // just store sum } } } Iavg.write(fn_out); } else { Iin.read(fn_img); FileName my_fn_out; if (fn_out.getExtension() == "mrcs" && !fn_out.contains("@")) { // current_object starts counting from 0, thus needs to be incremented. my_fn_out.compose(current_object + 1, fn_out); } else { if (input_is_stack) { my_fn_out = fn_img.insertBeforeExtension("_" + fn_out); long int dummy; FileName fn_tmp; my_fn_out.decompose(dummy, fn_tmp); n_images[fn_tmp]++; // this is safe. see https://stackoverflow.com/questions/16177596/stdmapstring-int-default-initialization-of-value. my_fn_out.compose(n_images[fn_tmp], fn_tmp); } else if (input_is_star) { my_fn_out = fn_img.insertBeforeExtension("_" + fn_out); } else { my_fn_out = fn_out; } } perImageOperations(Iin, my_fn_out, psi); do_md_out = true; MD.setValue(EMDL_IMAGE_NAME, my_fn_out); } i_img+=ndim; if (verb > 0) progress_bar(i_img/ndim); } if (do_avg_ampl || do_avg_ampl2 || do_avg_ampl2_ali || do_average || do_average_all_frames) { avg_ampl /= (RFLOAT)i_img; Iout() = avg_ampl; Iout.write(fn_out); } if (verb > 0) progress_bar(MD.numberOfObjects()); if (do_md_out && fn_in.getExtension() == "star") { FileName fn_md_out = fn_in.insertBeforeExtension("_" + fn_out); if (do_ignore_optics) { MD.write(fn_md_out); } else { if (my_new_box_size > 0) { FOR_ALL_OBJECTS_IN_METADATA_TABLE(obsModel.opticsMdt) { obsModel.opticsMdt.setValue(EMDL_IMAGE_SIZE, my_new_box_size); } } if (real_angpix > 0) { FOR_ALL_OBJECTS_IN_METADATA_TABLE(obsModel.opticsMdt) { obsModel.opticsMdt.setValue(EMDL_IMAGE_PIXEL_SIZE, real_angpix); } } obsModel.save(MD, fn_md_out); } std::cout << " Written out new STAR file: " << fn_md_out << std::endl; } } }; int main(int argc, char *argv[]) { image_handler_parameters prm; try { prm.read(argc, argv); prm.run(); } catch (RelionError XE) { //prm.usage(); std::cerr << XE; return RELION_EXIT_FAILURE; } return RELION_EXIT_SUCCESS; } relion-3.1.3/src/apps/import.cpp000066400000000000000000000301651411340063500165600ustar00rootroot00000000000000 /*************************************************************************** * * Author: "Sjors H.W. Scheres" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #include #include #include class import_parameters { public: FileName fn_in, fn_odir, fn_out, fn_mtf; bool do_write_types, do_continue, do_movies, do_micrographs, do_coordinates, do_halfmaps, do_particles, do_other; FileName optics_group_name, node_type, particles_optics_group_name; RFLOAT kV, Cs, Q0, beamtilt_x, beamtilt_y, pixel_size; // I/O Parser IOParser parser; void usage() { parser.writeUsage(std::cerr); } void read(int argc, char **argv) { parser.setCommandLine(argc, argv); int general_section = parser.addSection("General options"); fn_in = parser.getOption("--i", "Input (wildcard) filename"); fn_odir = parser.getOption("--odir", "Output directory (e.g. \"Import/job001/\""); fn_out = parser.getOption("--ofile", "Output file name (e.g. \"movies.star\""); do_movies = parser.checkOption("--do_movies", "Import movies"); do_micrographs = parser.checkOption("--do_micrographs", "Import micrographs"); do_coordinates = parser.checkOption("--do_coordinates", "Import coordinates"); do_halfmaps = parser.checkOption("--do_halfmaps", "Import unfiltered half maps"); do_particles = parser.checkOption("--do_particles", "Import particle STAR files"); particles_optics_group_name = parser.getOption("--particles_optics_group_name", "Rename optics group for all imported particles (e.g. \"opticsGroupLMBjan2019\"", ""); do_other = parser.checkOption("--do_other", "Import anything else"); int mic_section = parser.addSection("Specific options for movies or micrographs"); optics_group_name = parser.getOption("--optics_group_name", "Name for this optics group", "opticsGroup1"); fn_mtf = parser.getOption("--optics_group_mtf", "Name for this optics group's MTF", ""); pixel_size = textToFloat(parser.getOption("--angpix", "Pixel size (Angstrom)", "1.0")); kV = textToFloat(parser.getOption("--kV", "Voltage (kV)", "300")); Cs = textToFloat(parser.getOption("--Cs", "Spherical aberration (mm)", "2.7")); Q0 = textToFloat(parser.getOption("--Q0", "Amplitude contrast", "0.1")); beamtilt_x = textToFloat(parser.getOption("--beamtilt_x", "Beam tilt (X; mrad)", "0.0")); beamtilt_y = textToFloat(parser.getOption("--beamtilt_y", "Beam tilt (Y; mrad)", "0.0")); do_continue = parser.checkOption("--continue", "Continue and old run, add more files to the same import directory"); // Check for errors in the command-line option if (parser.checkForErrors()) REPORT_ERROR("Errors encountered on the command line (see above), exiting..."); if (pixel_size <= 0) REPORT_ERROR("Pixel size must be positive!"); if (kV <= 0) REPORT_ERROR("Acceleration voltage must be positive!"); } void run() { std::string command; MetaDataTable MDout, MDopt; std::vector fns_in; long nr_input_files = fn_in.globFiles(fns_in); std::ofstream fh; int nr_count = 0; if (do_movies) nr_count++; if (do_micrographs) nr_count++; if (do_coordinates) nr_count++; if (do_other || do_halfmaps || do_particles) nr_count++; if (nr_count != 1) { REPORT_ERROR("ERROR: you can only use only one, and at least one, of the options --do_movies, --do_micrographs, --do_coordinates, --do_halfmaps or --do_other"); } std::cout << " importing..." << std::endl; // For micrographs or movies if (do_movies || do_micrographs) { if (fn_in.rfind("../") != std::string::npos) // Forbid at any place REPORT_ERROR("Please don't import files outside the project directory.\nPlease make a symbolic link by an absolute path before importing."); if (fn_in.rfind("/", 0) == 0) // Forbid only at the beginning REPORT_ERROR("Please import files by a relative path.\nIf you want to import files outside the project directory, make a symbolic link by an absolute path and\nimport the symbolic link by a relative path."); std::string tablename = (do_movies) ? "movies" : "micrographs"; bool do_new_optics_group = true; int old_optics_group_number, optics_group_number = 1; long old_nr_files = 0; // When continuing old jobs in the pipeliner, the old names are moved out of the way. Read it in anyway! FileName old_fn_out = fn_odir + fn_out; if (do_continue && exists(old_fn_out)) { MDopt.read(old_fn_out, "optics"); MDout.read(old_fn_out, tablename); old_nr_files = MDout.numberOfObjects(); std::string old_optics_group_name; FOR_ALL_OBJECTS_IN_METADATA_TABLE(MDopt) { MDopt.getValue(EMDL_IMAGE_OPTICS_GROUP_NAME, old_optics_group_name); if (old_optics_group_name == optics_group_name) { do_new_optics_group = false; MDopt.getValue(EMDL_IMAGE_OPTICS_GROUP, optics_group_number); break; } } if (do_new_optics_group) { optics_group_number = MDopt.numberOfObjects() + 1; } } if (do_new_optics_group) { if (!optics_group_name.validateCharactersStrict()) REPORT_ERROR("The optics group name may contain only numbers, alphabets and hyphen(-)."); // Generate MDopt for the optics group MDopt.setName("optics"); MDopt.addObject(); MDopt.setValue(EMDL_IMAGE_OPTICS_GROUP_NAME, optics_group_name); MDopt.setValue(EMDL_IMAGE_OPTICS_GROUP, optics_group_number); if (fn_mtf != "") MDopt.setValue(EMDL_IMAGE_MTF_FILENAME, fn_mtf); if (do_micrographs) MDopt.setValue(EMDL_MICROGRAPH_PIXEL_SIZE, pixel_size); MDopt.setValue(EMDL_MICROGRAPH_ORIGINAL_PIXEL_SIZE, pixel_size); MDopt.setValue(EMDL_CTF_VOLTAGE, kV); MDopt.setValue(EMDL_CTF_CS, Cs); MDopt.setValue(EMDL_CTF_Q0, Q0); if (fabs(beamtilt_x) + fabs(beamtilt_y) > 0.001) { MDopt.setValue(EMDL_IMAGE_BEAMTILT_X, beamtilt_x); MDopt.setValue(EMDL_IMAGE_BEAMTILT_Y, beamtilt_y); } } // Fill in the actual data (movies/micrographs) table MDout.setName(tablename); EMDLabel mylabel = (do_movies) ? EMDL_MICROGRAPH_MOVIE_NAME : EMDL_MICROGRAPH_NAME; for (long i = 0; i < nr_input_files; i++) { // Check this file was not yet present in the input STAR file // TODO: this N^2 algorithm might get too expensive with large data sets.... bool already_there = false; for (long j = 0; j < old_nr_files; j++) { FileName oldfile; MDout.getValue(mylabel, oldfile, j); if (oldfile == fns_in[i]) { already_there = true; int old_optics_group_number; MDout.getValue(EMDL_IMAGE_OPTICS_GROUP, old_optics_group_number, j); if (old_optics_group_number != optics_group_number) { std::cerr << " fns_in[i]= " << fns_in[i] << " old_optics_group_number= " << old_optics_group_number << " optics_group_number= " << optics_group_number << std::endl; REPORT_ERROR("ERROR: trying to add an pre-existing image with a different optics group!"); } break; } } if (!already_there) { MDout.addObject(); MDout.setValue(mylabel, fns_in[i]); MDout.setValue(EMDL_IMAGE_OPTICS_GROUP, optics_group_number); } } // Write output STAR file fh.open((fn_odir + fn_out).c_str(), std::ios::out); if (!fh) REPORT_ERROR( (std::string)"MlModel::write: Cannot write file: " + fn_odir + fn_out); MDopt.write(fh); MDout.write(fh); fh.close(); long nr_new_files = MDout.numberOfObjects(); std::cout << " Written " << (fn_odir + fn_out) << " with " << nr_new_files << " items (" << (nr_new_files - old_nr_files) << " new items)" << std::endl; } else if (do_coordinates) { // Make the same directory structure of the coordinates // Copy all coordinate files into the same subdirectory in the Import directory // But remove directory structure from pipeline if that exists // Dereference symbolic links if needed FileName fn_dir = fn_in; if (fn_dir.contains("/")) fn_dir = fn_dir.beforeLastOf("/"); else fn_dir = "."; FileName fn_pre, fn_jobnr, fn_post; if (decomposePipelineSymlinkName(fn_dir, fn_pre, fn_jobnr, fn_post)) { // Make the output directory command = "mkdir -p " + fn_odir + fn_post; if (system(command.c_str())) REPORT_ERROR("ERROR: there was an error executing: " + command); // Copy the coordinate files one by one to prevent problems of too long command line for (long i = 0; i < nr_input_files; i++) { command = "cp " + fns_in[i] + " " + fn_odir + fn_post; if (system(command.c_str())) REPORT_ERROR("ERROR: there was an error executing: " + command); } } else { // Copy the coordinate files one by one to prevent problems of too long command line for (long i = 0; i < nr_input_files; i++) { command = "cp --parents " + fns_in[i] + " " + fn_odir; if (system(command.c_str())) REPORT_ERROR("ERROR: there was an error executing: " + command); } } // Make a suffix file, which contains the actual suffix as a suffix // Get the coordinate-file suffix FileName fn_suffix2 = fn_in.beforeLastOf("*"); fh.open((fn_odir + fn_out).c_str(), std::ios::out); if (!fh) REPORT_ERROR( (std::string)"Import: Cannot write file: " + fn_odir + fn_out); fh << fn_suffix2 << "*.mrc" << std::endl; fh.close(); } else if (do_particles) { ObservationModel obsModel; MetaDataTable MD; ObservationModel::loadSafely(fn_in, obsModel, MD); // Make sure rlnOpticsGroupName is set to this value // This is only a valid option if there was a single optics_group in the input file if (particles_optics_group_name != "") { if (!particles_optics_group_name.validateCharactersStrict()) REPORT_ERROR("The optics group name may contain only numbers, alphabets and hyphen(-)."); if (obsModel.opticsMdt.numberOfObjects() != 1) { obsModel.opticsMdt.write(std::cerr); REPORT_ERROR(" ERROR: cannot rename particles optics groups when multiple ones are imported!"); } obsModel.opticsMdt.setValue(EMDL_IMAGE_OPTICS_GROUP_NAME, particles_optics_group_name, 0); } FileName fnt = "/" + fn_in; fnt = fn_odir + fnt.afterLastOf("/"); obsModel.save(MD, fnt, "particles"); } else if (do_other || do_halfmaps) { if (nr_input_files > 1) { REPORT_ERROR("ERROR: Multiple files (i.e. filename wildcards) are not allowed for the import of other types."); } // For all the rest of the imports, just copy the files in the Import/jobxxx/ directory with the same name FileName fnt = "/" + fn_in; fnt = fnt.afterLastOf("/"); command = "cp " + fn_in + " " + fn_odir + fnt; if (system(command.c_str())) REPORT_ERROR("ERROR: there was an error executing: " + command); if (do_halfmaps) { // For unfiltered half-maps, also get the other half-map FileName fn_inb = fn_in; size_t pos = fn_inb.find("half1"); if (pos != std::string::npos) { fn_inb.replace(pos, 5, "half2"); } else { pos = fn_inb.find("half2"); if (pos != std::string::npos) { fn_inb.replace(pos, 5, "half1"); } } fnt = "/" + fn_inb; fnt = fnt.afterLastOf("/"); command = "cp " + fn_inb + " " + fn_odir + fnt; if (system(command.c_str())) REPORT_ERROR("ERROR: there was an error executing: " + command); } } std::cout << " done!" << std::endl; } }; int main(int argc, char *argv[]) { import_parameters prm; try { prm.read(argc, argv); prm.run(); } catch (RelionError XE) { //prm.usage(); std::cerr << XE; return RELION_EXIT_FAILURE; } return RELION_EXIT_SUCCESS; } relion-3.1.3/src/apps/interpolation_test.cpp000066400000000000000000000002021411340063500211610ustar00rootroot00000000000000#include int main(int argc, char *argv[]) { Interpolation::test2D(); return RELION_EXIT_SUCCESS; } relion-3.1.3/src/apps/localsym.cpp000066400000000000000000000022671411340063500170730ustar00rootroot00000000000000/*************************************************************************** * * Author: "Sjors H.W. Scheres" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #include "src/local_symmetry.h" int main(int argc, char *argv[]) { local_symmetry_parameters prm; try { prm.read(argc, argv); prm.run(); } catch (RelionError XE) { prm.usage(); std::cerr << XE; return RELION_EXIT_FAILURE; } return RELION_EXIT_SUCCESS; } relion-3.1.3/src/apps/localsym_mpi.cpp000066400000000000000000000005561411340063500177370ustar00rootroot00000000000000#include "src/local_symmetry_mpi.h" int main(int argc, char *argv[]) { local_symmetry_parameters_mpi prm; try { prm.read(argc, argv); prm.run(); } catch (RelionError XE) { if (prm.verb > 0) prm.usage(); std::cerr << XE; MPI_Abort(MPI_COMM_WORLD, RELION_EXIT_FAILURE); } MPI_Barrier(MPI_COMM_WORLD); return RELION_EXIT_SUCCESS; } relion-3.1.3/src/apps/maingui.cpp000066400000000000000000000062621411340063500167000ustar00rootroot00000000000000/*************************************************************************** * * Author: "Sjors H.W. Scheres" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #include #include #include "src/gui_mainwindow.h" #include #ifdef HAVE_CONFIG_H #include "config.h" #endif int main(int argc, char *argv[]) { Fl::scheme("gtk+"); #define _MAX_PATH 200 char my_dir[_MAX_PATH]; char short_dir[49]; char* res = getcwd(my_dir, _MAX_PATH); // Get last 45 characters of my_dir to fit in titlebar of window if (strlen(my_dir) > 45) { short_dir[0]=short_dir[1]=short_dir[2]='.'; int j = 3; for (int i = strlen(my_dir)-45; i < strlen(my_dir); i++, j++) { short_dir[j] = my_dir[i]; } short_dir[j] = '\0'; } else { int i; for (i = 0; i < strlen(my_dir); i++) short_dir[i] = my_dir[i]; short_dir[i] = '\0'; } char titletext[256]; snprintf(titletext, 256, "RELION-%s", g_RELION_VERSION); #ifdef PACKAGE_VERSION strcat(titletext,PACKAGE_VERSION); #endif strcat(titletext,": "); strcat (titletext, short_dir); try { // Fill the window if (checkParameter(argc, argv, "--help")) { std::cerr << " [--refresh 2] : refresh rate in seconds" << std::endl; std::cerr << " [--idle 3600] : quit GUI after this many second" << std::endl; std::cerr << " [--readonly] : limited version of GUI that does not touch any files" << std::endl; std::cerr << " [--version] : show the version of this program" << std::endl; exit(0); } else if (checkParameter(argc, argv, "--version")) { // Although our parser checks for --version, we do it here. Otherwise GuiMainWindow asks for a new project directory. PRINT_VERSION_INFO(); exit(0); } FileName fn_pipe = getParameter(argc, argv, "--pipeline", "default"); FileName fn_sched = getParameter(argc, argv, "--schedule", ""); if (fn_sched != "") fn_sched = "Schedules/" + fn_sched; int _update_every_sec = textToInteger(getParameter(argc, argv, "--refresh", "2")); int _exit_after_sec = textToInteger(getParameter(argc, argv, "--idle", "3600")); bool _do_read_only = checkParameter(argc, argv, "--readonly"); GuiMainWindow window(GUIWIDTH, GUIHEIGHT_EXT, titletext, fn_pipe, fn_sched, _update_every_sec, _exit_after_sec, _do_read_only); // Show and run the window window.show(); Fl::run(); } catch (RelionError XE) { std::cerr << XE; return RELION_EXIT_FAILURE; } //return Fl::run(); return RELION_EXIT_SUCCESS; } relion-3.1.3/src/apps/manualpick.cpp000066400000000000000000000026131411340063500173670ustar00rootroot00000000000000/*************************************************************************** * * Author: "Sjors H.W. Scheres" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #include #include #include #include #include #include #include #include #include #include int main(int argc, char *argv[]) { ManualPicker prm; try { prm.read(argc, argv); prm.initialise(); prm.run(); } catch (RelionError XE) { //prm.usage(); std::cerr << XE; return RELION_EXIT_FAILURE; } return RELION_EXIT_SUCCESS; } relion-3.1.3/src/apps/mask_create.cpp000066400000000000000000000205361411340063500175250ustar00rootroot00000000000000/*************************************************************************** * * Author: "Sjors H.W. Scheres" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #include #include #include #include #include #include #include #include #include class mask_create_parameters { public: FileName fn_apply_in, fn_mask, fn_apply_out, fn_thr, fn_omask, fn_and, fn_or, fn_andnot, fn_ornot; RFLOAT ini_threshold, extend_ini_mask, width_soft_edge, lowpass, angpix, helical_z_percentage; RFLOAT inner_radius, outer_radius, center_x, center_y, center_z; bool do_invert, do_helix, do_denovo; int n_threads, box_size; IOParser parser; void usage() { parser.writeUsage(std::cerr); } void read(int argc, char **argv) { parser.setCommandLine(argc, argv); int create_section = parser.addSection("Mask creation options"); fn_thr = parser.getOption("--i", "Input map to use for thresholding to generate initial binary mask",""); fn_omask = parser.getOption("--o", "Output mask","mask.mrc"); fn_and = parser.getOption("--and", "Pixels in the initial mask will be one if the input AND this map are above the --ini_threshold value",""); fn_or = parser.getOption("--or", "Pixels in the initial mask will be one if the input OR this map are above the --ini_threshold value",""); fn_andnot = parser.getOption("--and_not", "Pixels in the initial mask will be one if the input is above the --ini_threshold AND this map is below it",""); fn_ornot = parser.getOption("--or_not", "Pixels in the initial mask will be one if the input is above the --ini_threshold OR this map is below it",""); ini_threshold = textToFloat(parser.getOption("--ini_threshold", "Initial threshold for binarization","0.01")); extend_ini_mask = textToFloat(parser.getOption("--extend_inimask", "Extend initial binary mask this number of pixels","0")); width_soft_edge = textToFloat(parser.getOption("--width_soft_edge", "Width (in pixels) of the additional soft edge on the binary mask", "0")); do_invert = parser.checkOption("--invert", "Invert the final mask"); do_helix = parser.checkOption("--helix", "Generate a mask for 3D helix"); lowpass = textToFloat(parser.getOption("--lowpass", "Lowpass filter (in Angstroms) for the input map, prior to binarization (default is none)", "-1")); angpix = textToFloat(parser.getOption("--angpix", "Pixel size (in Angstroms) for the lowpass filter", "-1")); helical_z_percentage = textToFloat(parser.getOption("--z_percentage", "This box length along the center of Z axis contains good information of the helix", "0.3")); n_threads = textToInteger(parser.getOption("--j", "Number of threads", "1")); int denovo_section = parser.addSection("De novo mask creation"); do_denovo = parser.checkOption("--denovo", "Create a mask de novo"); box_size = textToInteger(parser.getOption("--box_size", "The box size of the mask in pixels", "-1")); inner_radius = textToFloat(parser.getOption("--inner_radius", "Inner radius of the masked region in pixels", "0")); outer_radius = textToFloat(parser.getOption("--outer_radius", "Outer radius of the mask region in pixels", "99999")); center_x = textToFloat(parser.getOption("--center_x", "X coordinate of the center of the mask in pixels", "0")); center_y = textToFloat(parser.getOption("--center_y", "Y coordinate of the center of the mask in pixels", "0")); center_z = textToFloat(parser.getOption("--center_z", "Z coordinate of the center of the mask in pixels", "0")); // Check for errors in the command-line option if (parser.checkForErrors()) REPORT_ERROR("Errors encountered on the command line, exiting..."); if (fn_thr == "" && fn_apply_in == "" && !do_denovo) REPORT_ERROR("Either provide --i to apply a mask, OR --create_from or --denovo to create a new mask"); if (do_denovo && box_size < 0) REPORT_ERROR("For de novo mask creation, please specify the box size in --box_size"); } void run() { Image Iout; if (do_denovo) { makeMaskFromScratch(Iout); if (angpix < 0) { std::cerr << "WARNING: The pixel size was not specified. 1.00 is set to the output mask." << std::endl; angpix = 1.0; } } else { makeMaskFromFile(Iout); } if (do_invert) { FOR_ALL_DIRECT_ELEMENTS_IN_MULTIDIMARRAY(Iout()) { DIRECT_MULTIDIM_ELEM(Iout(), n) = 1. - DIRECT_MULTIDIM_ELEM(Iout(), n); } } // Set header and write outmap map Iout.setStatisticsInHeader(); Iout.setSamplingRateInHeader(angpix, angpix); Iout.write(fn_omask); std::cout << " Done creating mask! Written out: " << fn_omask << std::endl; } void makeMaskFromScratch(Image &Iout) { Iout().reshape(box_size, box_size, box_size); raisedCrownMask(Iout(), inner_radius, outer_radius, width_soft_edge, center_x, center_y, center_z); } void makeMaskFromFile(Image &Iout) { Image Iin, Ip; std:: cout << " Creating a mask ..." << std::endl; Iin.read(fn_thr); if (angpix < 0) { angpix = Iin.samplingRateX(); std::cerr << "WARNING: The pixel size (--angpix) was not specified." << std::endl; std::cerr << " The value in the input image header (= " << angpix << ") is used instead." << std::endl; } if (lowpass > 0) { lowPassFilterMap(Iin(), lowpass, angpix); } Iin().setXmippOrigin(); if (fn_and != "") { Ip.read(fn_and); Ip().setXmippOrigin(); if (!Ip().sameShape(Iin())) REPORT_ERROR("ERROR: --i and --and maps are different shapes!"); FOR_ALL_DIRECT_ELEMENTS_IN_MULTIDIMARRAY(Ip()) { if (DIRECT_MULTIDIM_ELEM(Ip(), n) > ini_threshold && DIRECT_MULTIDIM_ELEM(Iin(), n) > ini_threshold) DIRECT_MULTIDIM_ELEM(Iin(), n) = ini_threshold + 1.; else DIRECT_MULTIDIM_ELEM(Iin(), n) = ini_threshold - 1.; } } else if (fn_or != "") { Ip.read(fn_or); Ip().setXmippOrigin(); if (!Ip().sameShape(Iin())) REPORT_ERROR("ERROR: --i and --or maps are different shapes!"); FOR_ALL_DIRECT_ELEMENTS_IN_MULTIDIMARRAY(Ip()) { if (DIRECT_MULTIDIM_ELEM(Ip(), n) > ini_threshold || DIRECT_MULTIDIM_ELEM(Iin(), n) > ini_threshold) DIRECT_MULTIDIM_ELEM(Iin(), n) = ini_threshold + 1.; else DIRECT_MULTIDIM_ELEM(Iin(), n) = ini_threshold - 1.; } } else if (fn_andnot != "") { Ip.read(fn_andnot); Ip().setXmippOrigin(); if (!Ip().sameShape(Iin())) REPORT_ERROR("ERROR: --i and --not maps are different shapes!"); FOR_ALL_DIRECT_ELEMENTS_IN_MULTIDIMARRAY(Ip()) { if (DIRECT_MULTIDIM_ELEM(Iin(), n) > ini_threshold && DIRECT_MULTIDIM_ELEM(Ip(), n) < ini_threshold) DIRECT_MULTIDIM_ELEM(Iin(), n) = ini_threshold + 1.; else DIRECT_MULTIDIM_ELEM(Iin(), n) = ini_threshold - 1.; } } else if (fn_ornot != "") { Ip.read(fn_ornot); Ip().setXmippOrigin(); if (!Ip().sameShape(Iin())) REPORT_ERROR("ERROR: --i and --not maps are different shapes!"); FOR_ALL_DIRECT_ELEMENTS_IN_MULTIDIMARRAY(Ip()) { if (DIRECT_MULTIDIM_ELEM(Iin(), n) > ini_threshold || DIRECT_MULTIDIM_ELEM(Ip(), n) < ini_threshold) DIRECT_MULTIDIM_ELEM(Iin(), n) = ini_threshold + 1.; else DIRECT_MULTIDIM_ELEM(Iin(), n) = ini_threshold - 1.; } } autoMask(Iin(), Iout(), ini_threshold, extend_ini_mask, width_soft_edge, true, n_threads); // true sets verbosity if (do_helix) { cutZCentralPartOfSoftMask(Iout(), helical_z_percentage, width_soft_edge); } } }; int main(int argc, char *argv[]) { mask_create_parameters prm; try { prm.read(argc, argv); prm.run(); } catch (RelionError XE) { std::cerr << XE; //prm.usage(); return RELION_EXIT_FAILURE; } return RELION_EXIT_SUCCESS; } relion-3.1.3/src/apps/merge_particles.cpp000066400000000000000000000033111411340063500204040ustar00rootroot00000000000000#include #include #include int main(int argc, char *argv[]) { if (argc < 4) { std::cerr << "usage: relion_merge_particles ... \n"; return RELION_EXIT_FAILURE; } const int srcCount = argc - 2; std::string destFn = argv[argc-1]; std::vector particleMdts(srcCount); std::vector obsModels(srcCount); std::cout << "merging: " << std::endl; for (int i = 0; i < srcCount; i++) { std::string srcFn = argv[i+1]; std::cout << " " << srcFn << std::endl; ObservationModel::loadSafely(srcFn, obsModels[i], particleMdts[i]); } std::cout << "into: " << destFn << std::endl; std::vector> optGrTransl(srcCount); MetaDataTable particleOut, opticsOut; for (int i = 0; i < srcCount; i++) { const int ogc = obsModels[i].opticsMdt.numberOfObjects(); optGrTransl[i].resize(ogc); for (int g = 0; g < ogc; g++) { opticsOut.addObject(obsModels[i].opticsMdt.getObject(g)); const int ogNew = opticsOut.numberOfObjects() - 1; opticsOut.setValue(EMDL_IMAGE_OPTICS_GROUP, ogNew+1, ogNew); optGrTransl[i][g] = ogNew; } const int pc = particleMdts[i].numberOfObjects(); for (int p = 0; p < pc; p++) { particleOut.addObject(particleMdts[i].getObject(p)); const int pNew = particleOut.numberOfObjects() - 1; int og0; particleOut.getValue(EMDL_IMAGE_OPTICS_GROUP, og0, pNew); og0--; int og1 = optGrTransl[i][og0]; particleOut.setValue(EMDL_IMAGE_OPTICS_GROUP, og1+1, pNew); } } ObservationModel::saveNew(particleOut, opticsOut, destFn); return RELION_EXIT_SUCCESS; } relion-3.1.3/src/apps/motion_refine.cpp000066400000000000000000000023351411340063500201010ustar00rootroot00000000000000/*************************************************************************** * * Authors: "Jasenko Zivanov & Sjors H.W. Scheres" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #include int main(int argc, char *argv[]) { MotionRefiner prm; try { prm.read(argc, argv); prm.init(); prm.run(); } catch (RelionError XE) { //prm.usage(); std::cerr << XE; return RELION_EXIT_FAILURE; } return RELION_EXIT_SUCCESS; } relion-3.1.3/src/apps/motion_refine_mpi.cpp000066400000000000000000000024601411340063500207450ustar00rootroot00000000000000/*************************************************************************** * * Authors: "Jasenko Zivanov & Sjors H.W. Scheres" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #include int main(int argc, char *argv[]) { MotionRefinerMpi prm; try { prm.read(argc, argv); prm.init(); MPI_Barrier(MPI_COMM_WORLD); prm.run(); } catch (RelionError XE) { std::cerr << XE; MPI_Abort(MPI_COMM_WORLD, RELION_EXIT_FAILURE); } MPI_Barrier(MPI_COMM_WORLD); return RELION_EXIT_SUCCESS; } relion-3.1.3/src/apps/movie_reconstruct.cpp000066400000000000000000000653151411340063500210250ustar00rootroot00000000000000/*************************************************************************** * * Author: "Sjors H.W. Scheres" "Takanori Nakane" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #include #include #include #include #include #include #include #include #include #include class MovieReconstructor { public: // I/O Parser IOParser parser; FileName fn_out, fn_sym, fn_sel, traj_path, fn_corrmic; MetaDataTable DF; ObservationModel obsModel; int r_max, r_min_nn, blob_order, ref_dim, interpolator, iter, debug_ori_size, debug_size, nr_threads, requested_eer_grouping, ctf_dim, nr_helical_asu, width_mask_edge, nr_sectors, chosen_class, data_dim, output_boxsize, movie_boxsize, verb, frame; RFLOAT blob_radius, blob_alpha, angular_error, shift_error, angpix, maxres, coord_angpix, movie_angpix, helical_rise, helical_twist; std::vector data_angpixes; bool do_ctf, ctf_phase_flipped, only_flip_phases, intact_ctf_first_peak, do_ewald, skip_weighting, skip_mask, no_barcode; bool skip_gridding, is_reverse, read_weights, do_external_reconstruct; float padding_factor, mask_diameter; // All backprojectors needed for parallel reconstruction BackProjector backprojector[2]; std::map mic2meta; public: MovieReconstructor() { } // Read command line arguments void read(int argc, char **argv); // Initialise some stuff after reading void initialise(); // Execute void run(); // Loop over all particles to be back-projected void backproject(int rank = 0, int size = 1); // For parallelisation purposes void backprojectOneParticle(MetaDataTable &mdt, long int ipart, MultidimArray &F2D, int subset); // perform the gridding reconstruction void reconstruct(); void applyCTFPandCTFQ(MultidimArray &Fin, CTF &ctf, FourierTransformer &transformer, MultidimArray &outP, MultidimArray &outQ, bool skip_mask=false); }; int main(int argc, char *argv[]) { MovieReconstructor app; try { app.read(argc, argv); app.initialise(); app.run(); } catch (RelionError XE) { std::cerr << XE; return RELION_EXIT_FAILURE; } return RELION_EXIT_SUCCESS; } void MovieReconstructor::run() { backproject(0, 1); reconstruct(); } void MovieReconstructor::read(int argc, char **argv) { parser.setCommandLine(argc, argv); int general_section = parser.addSection("General options"); fn_sel = parser.getOption("--i", "Input STAR file with the projection images and their orientations", ""); fn_out = parser.getOption("--o", "Name for output reconstruction","relion.mrc"); fn_sym = parser.getOption("--sym", "Symmetry group", "c1"); maxres = textToFloat(parser.getOption("--maxres", "Maximum resolution (in Angstrom) to consider in Fourier space (default Nyquist)", "-1")); padding_factor = textToFloat(parser.getOption("--pad", "Padding factor", "2")); fn_corrmic = parser.getOption("--corr_mic", "Motion correction STAR file", ""); traj_path = parser.getOption("--traj_path", "Trajectory path prefix", ""); movie_angpix = textToFloat(parser.getOption("--movie_angpix", "Pixel size in the movie", "-1")); coord_angpix = textToFloat(parser.getOption("--coord_angpix", "Pixel size of particle coordinates", "-1")); frame = textToInteger(parser.getOption("--frame", "Movie frame to reconstruct (1-indexed)", "1")); requested_eer_grouping = textToInteger(parser.getOption("--eer_grouping", "Override EER grouping (--frame is in this new grouping)", "-1")); movie_boxsize = textToInteger(parser.getOption("--window", "Box size to extract from raw movies", "-1")); output_boxsize = textToInteger(parser.getOption("--scale", "Box size after down-sampling", "-1")); nr_threads = textToInteger(parser.getOption("--j", "Number of threads (1 or 2)", "2")); int ctf_section = parser.addSection("CTF options"); do_ctf = parser.checkOption("--ctf", "Apply CTF correction"); intact_ctf_first_peak = parser.checkOption("--ctf_intact_first_peak", "Leave CTFs intact until first peak"); ctf_phase_flipped = parser.checkOption("--ctf_phase_flipped", "Images have been phase flipped"); only_flip_phases = parser.checkOption("--only_flip_phases", "Do not correct CTF-amplitudes, only flip phases"); int ewald_section = parser.addSection("Ewald-sphere correction options"); do_ewald = parser.checkOption("--ewald", "Correct for Ewald-sphere curvature (developmental)"); mask_diameter = textToFloat(parser.getOption("--mask_diameter", "Diameter (in A) of mask for Ewald-sphere curvature correction", "-1.")); width_mask_edge = textToInteger(parser.getOption("--width_mask_edge", "Width (in pixels) of the soft edge on the mask", "3")); is_reverse = parser.checkOption("--reverse_curvature", "Try curvature the other way around"); nr_sectors = textToInteger(parser.getOption("--sectors", "Number of sectors for Ewald sphere correction", "2")); skip_mask = parser.checkOption("--skip_mask", "Do not apply real space mask during Ewald sphere correction"); skip_weighting = parser.checkOption("--skip_weighting", "Do not apply weighting during Ewald sphere correction"); int helical_section = parser.addSection("Helical options"); nr_helical_asu = textToInteger(parser.getOption("--nr_helical_asu", "Number of helical asymmetrical units", "1")); helical_rise = textToFloat(parser.getOption("--helical_rise", "Helical rise (in Angstroms)", "0.")); helical_twist = textToFloat(parser.getOption("--helical_twist", "Helical twist (in degrees, + for right-handedness)", "0.")); int expert_section = parser.addSection("Expert options"); if (parser.checkOption("--NN", "Use nearest-neighbour instead of linear interpolation before gridding correction")) interpolator = NEAREST_NEIGHBOUR; else interpolator = TRILINEAR; blob_radius = textToFloat(parser.getOption("--blob_r", "Radius of blob for gridding interpolation", "1.9")); blob_order = textToInteger(parser.getOption("--blob_m", "Order of blob for gridding interpolation", "0")); blob_alpha = textToFloat(parser.getOption("--blob_a", "Alpha-value of blob for gridding interpolation", "15")); iter = textToInteger(parser.getOption("--iter", "Number of gridding-correction iterations", "10")); ref_dim = 3; skip_gridding = parser.checkOption("--skip_gridding", "Skip gridding part of the reconstruction"); no_barcode = parser.checkOption("--no_barcode", "Don't apply barcode-like extension when extracting outside a micrograph"); verb = textToInteger(parser.getOption("--verb", "Verbosity", "1")); // Hidden r_min_nn = textToInteger(getParameter(argc, argv, "--r_min_nn", "10")); // Check for errors in the command-line option if (parser.checkForErrors()) REPORT_ERROR("Errors encountered on the command line (see above), exiting..."); if (movie_angpix < 0) REPORT_ERROR("For this program, you have to explicitly specify the movie pixel size (--movie_angpix)."); if (coord_angpix < 0) REPORT_ERROR("For this program, you have to explicitly specify the coordinate pixel size (--coord_angpix)."); if (movie_boxsize < 0 || movie_boxsize % 2 != 0) REPORT_ERROR("You have to specify the extraction box size (--window) as an even number."); if (output_boxsize < 0 || output_boxsize % 2 != 0) REPORT_ERROR("You have to specify the reconstruction box size (--scale) as an even number."); if (nr_threads < 0 || nr_threads > 2) REPORT_ERROR("Number of threads (--j) must be 1 or 2"); if (verb > 0 && do_ewald && mask_diameter < 0 && !(skip_mask && skip_weighting)) REPORT_ERROR("To apply Ewald sphere correction (--ewald), you have to specify the mask diameter(--mask_diameter)."); } void MovieReconstructor::initialise() { angpix = movie_angpix * movie_boxsize / output_boxsize; std::cout << "Movie box size = " << movie_boxsize << " px at " << movie_angpix << " A/px" << std::endl; std::cout << "Reconstruction box size = " << output_boxsize << " px at " << angpix << " A/px" << std::endl; std::cout << "Coordinate pixel size = " << coord_angpix << " A/px" << std::endl; // TODO: movie angpix and coordinate angpix can be read from metadata STAR files // Load motion correction STAR file. FIXME: code duplication from MicrographHandler MetaDataTable corrMic; // Don't die even if conversion failed. Polishing does not use obsModel from a motion correction STAR file ObservationModel::loadSafely(fn_corrmic, obsModel, corrMic, "micrographs", verb, false); FOR_ALL_OBJECTS_IN_METADATA_TABLE(corrMic) { std::string micName, metaName; corrMic.getValueToString(EMDL_MICROGRAPH_NAME, micName); corrMic.getValueToString(EMDL_MICROGRAPH_METADATA_NAME, metaName); // remove the pipeline job prefix FileName fn_pre, fn_jobnr, fn_post; decomposePipelineFileName(micName, fn_pre, fn_jobnr, fn_post); // std::cout << fn_post << " => " << metaName << std::endl; mic2meta[fn_post] = metaName; } // Read MetaData file, which should have the image names and their angles! ObservationModel::loadSafely(fn_sel, obsModel, DF, "particles", 0, false); std::cout << "Read " << DF.numberOfObjects() << " particles." << std::endl; data_angpixes = obsModel.getPixelSizes(); if (verb > 0 && !DF.containsLabel(EMDL_PARTICLE_RANDOM_SUBSET)) { REPORT_ERROR("The rlnRandomSubset column is missing in the input STAR file."); } if (verb > 0 && (chosen_class >= 0) && !DF.containsLabel(EMDL_PARTICLE_CLASS)) { REPORT_ERROR("The rlnClassNumber column is missing in the input STAR file."); } if (do_ewald) do_ctf = true; data_dim = 2; if (maxres < 0.) r_max = -1; else r_max = CEIL(output_boxsize * angpix / maxres); } void MovieReconstructor::backproject(int rank, int size) { for (int i = 0; i < 2; i++) { backprojector[i] = BackProjector(output_boxsize, ref_dim, fn_sym, interpolator, padding_factor, r_min_nn, blob_order, blob_radius, blob_alpha, data_dim, skip_gridding); backprojector[i].initZeros(2 * r_max); } std::vector mdts = StackHelper::splitByMicrographName(DF); const int nr_movies= mdts.size(); if (verb > 0) { std::cout << " + Back-projecting all images ..." << std::endl; time_config(); init_progress_bar(nr_movies); } FileName fn_mic, fn_traj, fn_movie, prev_gain; FourierTransformer transformer[2]; Image Iframe, Igain; int frame_no = frame; // 1-indexed for (int imov = 0; imov < nr_movies; imov++) { mdts[imov].getValue(EMDL_MICROGRAPH_NAME, fn_mic, 0); FileName fn_pre, fn_jobnr, fn_post; decomposePipelineFileName(fn_mic, fn_pre, fn_jobnr, fn_post); // std::cout << "fn_post = " << fn_post << std::endl; if (mic2meta[fn_post] == "") REPORT_ERROR("Cannot get metadata STAR file for " + fn_mic); Micrograph mic(mic2meta[fn_post]); fn_movie = mic.getMovieFilename(); fn_traj = traj_path + "/" + fn_post.withoutExtension() + "_tracks.star"; //#define DEBUG #ifdef DEBUG std::cout << "fn_mic = " << fn_mic << "\n\tfn_traj = " << fn_traj << "\n\tfn_movie = " << fn_movie << std::endl; #endif const bool isEER = EERRenderer::isEER(fn_movie); int eer_upsampling, orig_eer_grouping, eer_grouping; if (isEER) { eer_upsampling = mic.getEERUpsampling(); orig_eer_grouping = mic.getEERGrouping(); if (requested_eer_grouping <= 0) eer_grouping = orig_eer_grouping; else eer_grouping = requested_eer_grouping; } FileName fn_gain = mic.getGainFilename(); if (fn_gain != prev_gain) { if (isEER) EERRenderer::loadEERGain(fn_gain, Igain(), eer_upsampling); else Igain.read(fn_gain); prev_gain = fn_gain; } // Read trajectories. Both particle ID and frame ID are 0-indexed in this array. std::vector> trajectories = MotionHelper::readTracksInPix(fn_traj, movie_angpix); // TODO: loop over relevant frames with per-frame shifts with per-frame shifts with per-frame shifts with per-frame shifts if (isEER) { EERRenderer renderer; renderer.read(fn_movie, eer_upsampling); const int frame_start = (frame_no - 1) * eer_grouping + 1; const int frame_end = frame_start + eer_grouping - 1; // std::cout << "EER orig grouping = " << orig_eer_grouping << " new grouping = " << eer_grouping << " range " << frame_start << " - " << frame_end << std::endl; renderer.setFramesOfInterest(frame_start, frame_end); renderer.renderFrames(frame_start, frame_end, Iframe()); } else { FileName fn_frame; fn_frame.compose(frame_no, fn_movie); Iframe.read(fn_frame); } const int w0 = XSIZE(Iframe()); const int h0 = YSIZE(Iframe()); // Apply gain correction // Probably we can ignore defect correction, because we are not re-aligning. if (fn_gain == "") FOR_ALL_DIRECT_ELEMENTS_IN_MULTIDIMARRAY(Iframe()) DIRECT_MULTIDIM_ELEM(Iframe(), n) *= -1; else FOR_ALL_DIRECT_ELEMENTS_IN_MULTIDIMARRAY(Iframe()) DIRECT_MULTIDIM_ELEM(Iframe(), n) *= -DIRECT_MULTIDIM_ELEM(Igain(), n); #pragma omp parallel for num_threads(nr_threads) for (int subset = 1; subset <= 2; subset++) { long int stack_id; FileName fn_img, fn_stack; Image Iparticle; Image Fparticle; int n_processed = 0; // FOR_ALL_OBJECTS_IN_METADATA_TABLE(mdts[imov]) // You cannot do this within omp parallel (because current_object changes) for (long int ipart = 0; ipart < mdts[imov].numberOfObjects(); ipart++) { #ifndef DEBUG progress_bar(imov); #endif int this_subset = 0; mdts[imov].getValue(EMDL_PARTICLE_RANDOM_SUBSET, this_subset, ipart); if (subset >= 1 && subset <= 2 && this_subset != subset) continue; n_processed++; const int opticsGroup = obsModel.getOpticsGroup(mdts[imov], ipart); // 0-indexed const RFLOAT data_angpix = data_angpixes[opticsGroup]; mdts[imov].getValue(EMDL_IMAGE_NAME, fn_img, ipart); fn_img.decompose(stack_id, fn_stack); #ifdef DEBUG std::cout << "\tstack_id = " << stack_id << " fn_stack = " << fn_stack << std::endl; #endif if (stack_id > trajectories.size()) REPORT_ERROR("Missing trajectory!"); RFLOAT coord_x, coord_y, origin_x, origin_y, traj_x, traj_y; mdts[imov].getValue(EMDL_IMAGE_COORD_X, coord_x, ipart); // in micrograph pixel mdts[imov].getValue(EMDL_IMAGE_COORD_Y, coord_y, ipart); mdts[imov].getValue(EMDL_ORIENT_ORIGIN_X_ANGSTROM, origin_x, ipart); // in Angstrom mdts[imov].getValue(EMDL_ORIENT_ORIGIN_Y_ANGSTROM, origin_y, ipart); #ifdef DEBUG std::cout << "\t\tcoord_mic_px = (" << coord_x << ", " << coord_y << ")"; std::cout << " origin_angst = (" << origin_x << ", " << origin_y << ")"; std::cout << " traj_movie_px = (" << trajectories[stack_id - 1][frame_no - 1].x << ", " << trajectories[stack_id - 1][frame_no - 1].y << ")" << std::endl; #endif // Below might look overly complicated but is necessary to have the same rounding behaviour as Extract & Polish. Iparticle().initZeros(movie_boxsize, movie_boxsize); // Revised code: use data_angpix // pixel coordinate of the top left corner of the extraction box after down-sampling double xpO = (int)(coord_x * coord_angpix / data_angpix); double ypO = (int)(coord_y * coord_angpix / data_angpix); // pixel coordinate in the movie int x0 = (int)round(xpO * data_angpix / movie_angpix) - movie_boxsize / 2; int y0 = (int)round(ypO * data_angpix / movie_angpix) - movie_boxsize / 2; // pixel coordinate in the movie: cleaner but not compatible with existing files... // int x0N = (int)round(coord_x * coord_angpix / movie_angpix) - movie_boxsize / 2; // int y0N = (int)round(coord_y * coord_angpix / movie_angpix) - movie_boxsize / 2; #ifdef DEBUG std::cout << "DEBUG: xpO = " << xpO << " ypO = " << ypO << std::endl; std::cout << "DEBUG: x0 = " << x0 << " y0 = " << y0 << " data_angpix = " << data_angpix << " angpix = " << angpix << std::endl; // std::cout << "DEBUG: x0N = " << x0N << " y0N = " << y0N << std::endl; #endif double dxM, dyM; if (isEER) { const int eer_frame = (frame_no - 1) * eer_grouping; // 0 indexed const double eer_frame_in_old_grouping = (double)eer_frame / orig_eer_grouping; const int src1 = int(floor(eer_frame_in_old_grouping)); const int src2 = src1 + 1; const double frac = eer_frame_in_old_grouping - src1; if (src2 == trajectories[0].size()) // beyond end { dxM = trajectories[stack_id - 1][src1].x; dyM = trajectories[stack_id - 1][src1].y; } else { dxM = trajectories[stack_id - 1][src1].x * (1 - frac) + trajectories[stack_id - 1][src2].x * frac; dyM = trajectories[stack_id - 1][src1].y * (1 - frac) + trajectories[stack_id - 1][src2].y * frac; } // std::cout << "eer_frame_in_old_grouping = " << eer_frame_in_old_grouping << " src1 = " << src1 << " " << trajectories[stack_id - 1][src1] << " src2 = " << src2 << " " << trajectories[stack_id - 1][src2] << " interp = " << dxM << " " << dyM << std::endl; } else { dxM = trajectories[stack_id - 1][frame_no - 1].x; dyM = trajectories[stack_id - 1][frame_no - 1].y; } int dxI = (int)round(dxM); int dyI = (int)round(dyM); x0 += dxI; y0 += dyI; for (long int y = 0; y < movie_boxsize; y++) for (long int x = 0; x < movie_boxsize; x++) { int xx = x0 + x; int yy = y0 + y; if (!no_barcode) { if (xx < 0) xx = 0; else if (xx >= w0) xx = w0 - 1; if (yy < 0) yy = 0; else if (yy >= h0) yy = h0 - 1; } else { // No barcode if (xx < 0 || xx >= w0 || yy < 0 || yy >= h0) continue; } DIRECT_NZYX_ELEM(Iparticle(), 0, 0, y, x) = DIRECT_NZYX_ELEM(Iframe(), 0, 0, yy, xx); } // Residual shifts in Angstrom. They don't contain OriginX/Y. Note the NEGATIVE sign. double dxR = - (dxM - dxI) * movie_angpix; double dyR = - (dyM - dyI) * movie_angpix; // Further shifts by OriginX/Y. Note that OriginX/Y are applied as they are // (defined as "how much shift" we have to move particles). dxR += origin_x; dyR += origin_y; Iparticle().setXmippOrigin(); transformer[this_subset - 1].FourierTransform(Iparticle(), Fparticle()); if (output_boxsize != movie_boxsize) Fparticle = FilterHelper::cropCorner2D(Fparticle, output_boxsize / 2 + 1, output_boxsize); shiftImageInFourierTransform(Fparticle(), Fparticle(), output_boxsize, dxR / angpix, dyR / angpix); CenterFFTbySign(Fparticle()); backprojectOneParticle(mdts[imov], ipart, Fparticle(), this_subset); } // particle } // subset } // movie if (verb > 0) progress_bar(nr_movies); } void MovieReconstructor::backprojectOneParticle(MetaDataTable &mdt, long int p, MultidimArray &F2D, int this_subset) { RFLOAT rot, tilt, psi, fom, r_ewald_sphere; Matrix2D A3D; MultidimArray Fctf; Matrix1D trans(2); FourierTransformer transformer; // Rotations mdt.getValue(EMDL_ORIENT_ROT, rot, p); mdt.getValue(EMDL_ORIENT_TILT, tilt, p); mdt.getValue(EMDL_ORIENT_PSI, psi, p); Euler_angles2matrix(rot, tilt, psi, A3D); // If we are considering Ewald sphere curvature, the mag. matrix // has to be provided to the backprojector explicitly // (to avoid creating an Ewald ellipsoid) const bool ctf_premultiplied = false; const int opticsGroup = obsModel.getOpticsGroup(mdt, p); #pragma omp critical(MovieReconstructor_backprojectOneParticle) { if (obsModel.getPixelSize(opticsGroup) != angpix) obsModel.setPixelSize(opticsGroup, angpix); if (obsModel.getBoxSize(opticsGroup) != output_boxsize) obsModel.setBoxSize(opticsGroup, output_boxsize); } //ctf_premultiplied = obsModel.getCtfPremultiplied(opticsGroup); if (do_ewald && ctf_premultiplied) REPORT_ERROR("We cannot perform Ewald sphere correction on CTF premultiplied particles."); Matrix2D magMat; if (!do_ewald) { A3D = obsModel.applyAnisoMag(A3D, opticsGroup); } // We don't need this, since we are backprojecting as is. /* std::cout << "before: " << A3D << std::endl; A3D = obsModel.applyScaleDifference(A3D, opticsGroup, output_boxsize, angpix); std::cout << "after: " << A3D << std::endl; */ MultidimArray F2DP, F2DQ; FileName fn_img; Fctf.resize(F2D); Fctf.initConstant(1.); // Apply CTF if necessary if (do_ctf) { { CTF ctf; ctf.readByGroup(mdt, &obsModel, p); ctf.getFftwImage(Fctf, output_boxsize, output_boxsize, angpix, ctf_phase_flipped, only_flip_phases, intact_ctf_first_peak, true); obsModel.demodulatePhase(mdt, p, F2D); // This internally uses angpix!! obsModel.divideByMtf(mdt, p, F2D); // Ewald-sphere curvature correction if (do_ewald) { applyCTFPandCTFQ(F2D, ctf, transformer, F2DP, F2DQ, skip_mask); if (!skip_weighting) { // Also calculate W, store again in Fctf ctf.applyWeightEwaldSphereCurvature_noAniso(Fctf, output_boxsize, output_boxsize, angpix, mask_diameter); } // Also calculate the radius of the Ewald sphere (in pixels) r_ewald_sphere = output_boxsize * angpix / ctf.lambda; } } } if (true) // not subtract { if (do_ewald) { FOR_ALL_DIRECT_ELEMENTS_IN_MULTIDIMARRAY(F2D) { DIRECT_MULTIDIM_ELEM(Fctf, n) *= DIRECT_MULTIDIM_ELEM(Fctf, n); } } else if (do_ctf) // "Normal" reconstruction, multiply X by CTF, and W by CTF^2 { if (!ctf_premultiplied) { FOR_ALL_DIRECT_ELEMENTS_IN_MULTIDIMARRAY(F2D) { DIRECT_MULTIDIM_ELEM(F2D, n) *= DIRECT_MULTIDIM_ELEM(Fctf, n); } } FOR_ALL_DIRECT_ELEMENTS_IN_MULTIDIMARRAY(Fctf) { DIRECT_MULTIDIM_ELEM(Fctf, n) *= DIRECT_MULTIDIM_ELEM(Fctf, n); } } DIRECT_A2D_ELEM(F2D, 0, 0) = 0.0; if (do_ewald) { Matrix2D magMat; if (obsModel.hasMagMatrices) { magMat = obsModel.getMagMatrix(opticsGroup); } else { magMat = Matrix2D(2,2); magMat.initIdentity(); } backprojector[this_subset - 1].set2DFourierTransform(F2DP, A3D, &Fctf, r_ewald_sphere, true, &magMat); backprojector[this_subset - 1].set2DFourierTransform(F2DQ, A3D, &Fctf, r_ewald_sphere, false, &magMat); } else { backprojector[this_subset - 1].set2DFourierTransform(F2D, A3D, &Fctf); } } } void MovieReconstructor::reconstruct() { bool do_map = false; bool do_use_fsc = false; if (verb > 0) std::cout << " + Starting the reconstruction ..." << std::endl; #pragma omp parallel for num_threads(nr_threads) for (int i = 0; i < 2; i++) { MultidimArray fsc, dummy; Image vol; fsc.resize(output_boxsize/2+1); backprojector[i].symmetrise(nr_helical_asu, helical_twist, helical_rise / angpix); MultidimArray tau2; backprojector[i].reconstruct(vol(), iter, do_map, tau2); vol.setSamplingRateInHeader(angpix); FileName fn_half = fn_out.withoutExtension() + "_half" + integerToString(i + 1) + ".mrc"; vol.write(fn_half); if (verb > 0) std::cout << " + Done! Written output map in: " << fn_half << std::endl; } } void MovieReconstructor::applyCTFPandCTFQ(MultidimArray &Fin, CTF &ctf, FourierTransformer &transformer, MultidimArray &outP, MultidimArray &outQ, bool skip_mask) { //FourierTransformer transformer; outP.resize(Fin); outQ.resize(Fin); float angle_step = 180./nr_sectors; for (float angle = 0.; angle < 180.; angle +=angle_step) { MultidimArray CTFP(Fin), Fapp(Fin); MultidimArray Iapp(YSIZE(Fin), YSIZE(Fin)); // Two passes: one for CTFP, one for CTFQ for (int ipass = 0; ipass < 2; ipass++) { bool is_my_positive = (ipass == 1) ? is_reverse : !is_reverse; // Get CTFP and multiply the Fapp with it ctf.getCTFPImage(CTFP, YSIZE(Fin), YSIZE(Fin), angpix, is_my_positive, angle); Fapp = Fin * CTFP; // element-wise complex multiplication! if (!skip_mask) { // inverse transform and mask out the particle.... CenterFFTbySign(Fapp); transformer.inverseFourierTransform(Fapp, Iapp); softMaskOutsideMap(Iapp, ROUND(mask_diameter/(angpix*2.)), (RFLOAT)width_mask_edge); // Re-box to a smaller size if necessary.... if (output_boxsize < YSIZE(Fin)) { Iapp.setXmippOrigin(); Iapp.window(FIRST_XMIPP_INDEX(output_boxsize), FIRST_XMIPP_INDEX(output_boxsize), LAST_XMIPP_INDEX(output_boxsize), LAST_XMIPP_INDEX(output_boxsize)); } // Back into Fourier-space transformer.FourierTransform(Iapp, Fapp, false); // false means: leave Fapp in the transformer CenterFFTbySign(Fapp); } // First time round: resize the output arrays if (ipass == 0 && fabs(angle) < XMIPP_EQUAL_ACCURACY) { outP.resize(Fapp); outQ.resize(Fapp); } // Now set back the right parts into outP (first pass) or outQ (second pass) float anglemin = angle + 90. - (0.5*angle_step); float anglemax = angle + 90. + (0.5*angle_step); // angles larger than 180 bool is_reverse = false; if (anglemin >= 180.) { anglemin -= 180.; anglemax -= 180.; is_reverse = true; } MultidimArray *myCTFPorQ, *myCTFPorQb; if (is_reverse) { myCTFPorQ = (ipass == 0) ? &outQ : &outP; myCTFPorQb = (ipass == 0) ? &outP : &outQ; } else { myCTFPorQ = (ipass == 0) ? &outP : &outQ; myCTFPorQb = (ipass == 0) ? &outQ : &outP; } // Deal with sectors with the Y-axis in the middle of the sector... bool do_wrap_max = false; if (anglemin < 180. && anglemax > 180.) { anglemax -= 180.; do_wrap_max = true; } // use radians instead of degrees anglemin = DEG2RAD(anglemin); anglemax = DEG2RAD(anglemax); FOR_ALL_ELEMENTS_IN_FFTW_TRANSFORM2D(CTFP) { RFLOAT x = (RFLOAT)jp; RFLOAT y = (RFLOAT)ip; RFLOAT myangle = (x*x+y*y > 0) ? acos(y/sqrt(x*x+y*y)) : 0; // dot-product with Y-axis: (0,1) // Only take the relevant sector now... if (do_wrap_max) { if (myangle >= anglemin) DIRECT_A2D_ELEM(*myCTFPorQ, i, j) = DIRECT_A2D_ELEM(Fapp, i, j); else if (myangle < anglemax) DIRECT_A2D_ELEM(*myCTFPorQb, i, j) = DIRECT_A2D_ELEM(Fapp, i, j); } else { if (myangle >= anglemin && myangle < anglemax) DIRECT_A2D_ELEM(*myCTFPorQ, i, j) = DIRECT_A2D_ELEM(Fapp, i, j); } } } } } relion-3.1.3/src/apps/mrc2vtk.cpp000066400000000000000000000007741411340063500166410ustar00rootroot00000000000000#include #include int main(int argc, char *argv[]) { if (argc < 2) { std::cerr << "usage: relion_mrc2vtk X.(mrc/mrcs/tiff/spi)\n -> X.vtk\n"; return RELION_EXIT_FAILURE; } std::string fn(argv[1]), fn2; if (fn.find_last_of('.') != std::string::npos) { fn2 = fn.substr(0, fn.find_last_of('.')) + ".vtk"; } else { fn2 = fn + ".vtk"; } Image img; img.read(fn); Image imgZ = VtkHelper::allToZ(img); VtkHelper::writeVTK(imgZ, fn2); } relion-3.1.3/src/apps/paper_data_synth.cpp000066400000000000000000000033461411340063500205740ustar00rootroot00000000000000#include #include #include #include #include using namespace gravis; int main(int argc, char *argv[]) { const int w = 200; const int n = 100; const double mu = 25; const double sigR = 5; const double sigN = 2.0; std::vector ref(w); std::vector> arrays(n); std::string out = "illustration/"; mktree(out); std::ofstream refOut(out+"ref.dat"); for (int i = 0; i < w; i++) { const double x = (i-mu)/sigR; ref[i] = exp(-x*x); refOut << i << " " << ref[i] << "\n"; } std::vector avg = std::vector(w, 0.0); std::ofstream maxOut(out+"maxima.dat"); d2Vector maxAvg(0.0, 0.0); for (int k = 0; k < n; k++) { std::ofstream refOut(out+"noise_"+integerToString(k, 3, '0')+".dat"); arrays[k] = std::vector(w); int imax = 0; double vmax = -100.0; for (int i = 0; i < w; i++) { arrays[k][i] = ref[i] + DistributionHelper::sampleGauss(0, sigN); refOut << i << " " << arrays[k][i] << "\n"; if (vmax < arrays[k][i]) { vmax = arrays[k][i]; imax = i; } avg[i] += arrays[k][i] / (double)n; } refOut << "\n" << imax << " " << vmax << "\n"; maxOut << imax << " " << vmax << "\n\n"; maxAvg.x += imax / (double)n; maxAvg.y += vmax / n; } std::ofstream avgOut(out+"avg.dat"); int imaxA = 0; double vmaxA = -100.0; for (int i = 0; i < w; i++) { avgOut << i << " " << avg[i] << "\n"; if (vmaxA < avg[i]) { vmaxA = avg[i]; imaxA = i; } } avgOut << "\n" << imaxA << " " << vmaxA << "\n"; avgOut << "\n" << maxAvg.x << " " << maxAvg.y << "\n"; return RELION_EXIT_SUCCESS; } relion-3.1.3/src/apps/particle_FCC.cpp000066400000000000000000000063561411340063500175310ustar00rootroot00000000000000 #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include using namespace gravis; int main(int argc, char *argv[]) { std::string starFn, outPath; double minFreqPx; bool oppositeHalf, predictCTF; int minMG, maxMG, threads; ReferenceMap reference; IOParser parser; try { parser.setCommandLine(argc, argv); parser.addSection("General options"); starFn = parser.getOption("--i", "Input particle *.star file"); reference.read(parser, argc, argv); minFreqPx = textToDouble(parser.getOption("--min_freq", "Min. image frequency [px]", "0")); oppositeHalf = parser.checkOption("--opposite_half", "Correlate with opposite half-set"); predictCTF = parser.checkOption("--predict_CTF", "Modulate prediction by CTF"); minMG = textToInteger(parser.getOption("--min_MG", "First micrograph index", "0")); maxMG = textToInteger(parser.getOption("--max_MG", "Last micrograph index (default is to process all)", "-1")); threads = textToInteger(parser.getOption("--j", "Number of threads", "1")); outPath = parser.getOption("--o", "Output path"); parser.checkForErrors(); } catch (RelionError XE) { parser.writeUsage(std::cout); std::cerr << XE; return RELION_EXIT_FAILURE; } ObservationModel obsModel; MetaDataTable mdt0; ObservationModel::loadSafely(starFn, obsModel, mdt0); std::vector allMdts = StackHelper::splitByMicrographName(mdt0); reference.load(1, false); const int s = reference.s; const int sh = s/2 + 1; if (maxMG < 0) maxMG = allMdts.size() - 1; std::vector num(sh, 0.0), denom0(sh, 0.0), denom1(sh, 0.0); for (int m = 0; m <= maxMG; m++) { std::vector> obs, pred; int opticsGroup; allMdts[m].getValue(EMDL_IMAGE_OPTICS_GROUP, opticsGroup, 0); opticsGroup--; // both defocus_tit and tilt_fit need the same observations obs = StackHelper::loadStackFS(allMdts[m], "", threads, true, &obsModel); pred = reference.predictAll( allMdts[m], obsModel, oppositeHalf? ReferenceMap::Opposite : ReferenceMap::Own, threads, predictCTF, true, false); const int pc = obs.size(); for (int p = 0; p < pc; p++) { for (int y = 0; y < s; y++) for (int x = 0; x < sh; x++) { const double xx = x; const double yy = (y + s/2) % s - s/2; const int r = (int)(sqrt(xx*xx + yy*yy) + 0.5); if (r >= sh) continue; const Complex z_pred = pred[p](y,x); const Complex z_obs = obs[p](y,x); num[r] += z_pred.real * z_obs.real + z_pred.imag * z_obs.imag; denom0[r] += z_pred.norm(); denom1[r] += z_obs.norm(); } } } std::ofstream os(outPath+"_FCC.dat"); for (int r = minFreqPx; r < sh; r++) { double wgh = denom0[r] * denom1[r]; if (wgh > 0.0) { double fcc = num[r] / sqrt(wgh); os << r << " " << fcc << "\n"; } } return RELION_EXIT_SUCCESS; } relion-3.1.3/src/apps/particle_reposition.cpp000066400000000000000000000406131411340063500213230ustar00rootroot00000000000000/*************************************************************************** * * Author: "Sjors H.W. Scheres" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #include #include #include #include class particle_reposition_parameters { public: FileName fn_in, fn_opt, fn_out, fn_dat, fn_odir; RFLOAT micrograph_background; int norm_radius; bool do_invert, do_ctf, do_subtract; ObservationModel obsModelMics; // I/O Parser IOParser parser; void usage() { parser.writeUsage(std::cerr); } void read(int argc, char **argv) { parser.setCommandLine(argc, argv); int general_section = parser.addSection("Options"); fn_in = parser.getOption("--i", "Input STAR file with rlnMicrographName's "); fn_out = parser.getOption("--o", "Output rootname, to be added to input micrograph names", ""); fn_odir = parser.getOption("--odir", "Output directory (default is same as input micrographs directory", ""); fn_opt = parser.getOption("--opt", "Optimiser STAR file with the 2D classes or 3D maps to be repositioned"); fn_dat = parser.getOption("--data", "Data STAR file with selected particles (default is to use all particles)", ""); micrograph_background = textToFloat(parser.getOption("--background", "The fraction of micrograph background noise in the output micrograph", "0.1")); do_invert= parser.checkOption("--invert", "Invert the contrast in the references?"); do_ctf = parser.checkOption("--ctf", "Apply CTF for each particle to the references?"); norm_radius = textToFloat(parser.getOption("--norm_radius", "Radius of the circle used for background normalisation (in pixels)", "-1")); do_subtract = parser.checkOption("--subtract", "Subtract repositioned micrographs from the input ones?"); // Check for errors in the command-line option if (parser.checkForErrors()) REPORT_ERROR("Errors encountered on the command line (see above), exiting..."); } void run() { if (fn_out == "" && fn_odir == "") REPORT_ERROR("ERROR: You need to provide either --o or --odir"); if (fn_odir.length() > 0 && fn_odir[fn_odir.length()-1] != '/') fn_odir += "/"; int xdim, ydim, radius; MetaDataTable DFi, DFopt, MDmics_out; ObservationModel::loadSafely(fn_in, obsModelMics, DFi, "micrographs"); MlOptimiser optimiser; optimiser.do_preread_images = false; optimiser.read(fn_opt); optimiser.mymodel.setFourierTransformMaps(false); // Use a user-provided subset of particles instead of all of them? if (fn_dat != "") { std::cout <<" Reading data ..." << std::endl; MetaDataTable MDdata; MDdata.read(fn_dat); optimiser.mydata.MDimg = MDdata; } // Loop over all micrographs int barstep = XMIPP_MAX(1, DFi.numberOfObjects()/ 60); init_progress_bar(DFi.numberOfObjects()); long int imgno = 0; FileName fn_prevdir=""; FOR_ALL_OBJECTS_IN_METADATA_TABLE(DFi) { FileName fn_mic, fn_mic_out; DFi.getValue(EMDL_MICROGRAPH_NAME, fn_mic); if (fn_out != "") fn_mic_out = fn_mic.insertBeforeExtension("_" + fn_out); else fn_mic_out = fn_mic; if (fn_odir != "") { FileName fn_pre, fn_jobnr, fn_post; if (decomposePipelineFileName(fn_mic_out, fn_pre, fn_jobnr, fn_post)) fn_mic_out = fn_odir + fn_post; else fn_mic_out = fn_odir + fn_mic_out; FileName fn_onlydir = fn_mic_out.beforeLastOf("/"); if (fn_onlydir != fn_prevdir) { std::string command = " mkdir -p " + fn_onlydir; int res = system(command.c_str()); fn_prevdir = fn_onlydir; } } FourierTransformer transformer; MetaDataTable MDcoord; Image Imic_in, Imic_out; MultidimArray Imic_sum; // Read in the first micrograph Imic_in.read(fn_mic); Imic_in().setXmippOrigin(); Imic_out().initZeros(Imic_in()); Imic_sum.initZeros(Imic_in()); Imic_sum.setXmippOrigin(); // Get mean and stddev of the input micrograph RFLOAT stddev_mic, mean_mic, dummy; Imic_in().computeStats(mean_mic, stddev_mic, dummy, dummy); int optics_group_mic; DFi.getValue(EMDL_IMAGE_OPTICS_GROUP, optics_group_mic); RFLOAT mic_pixel_size=-1.; for (int i = 0; i < obsModelMics.opticsMdt.numberOfObjects(); i++) { int my_optics_group; obsModelMics.opticsMdt.getValue(EMDL_IMAGE_OPTICS_GROUP, my_optics_group); if (my_optics_group == optics_group_mic) { obsModelMics.opticsMdt.getValue(EMDL_MICROGRAPH_PIXEL_SIZE, mic_pixel_size); break; } } if (mic_pixel_size<0.) REPORT_ERROR("ERROR: could not find correct optics group in micrograph star file..."); FileName fn_mic_pre, fn_mic_jobnr, fn_mic_post; decomposePipelineFileName(fn_mic, fn_mic_pre, fn_mic_jobnr, fn_mic_post); // Loop over all particles bool found_one = false; for (long int part_id = 0; part_id < optimiser.mydata.numberOfParticles(); part_id++) { long int ori_img_id = optimiser.mydata.particles[part_id].images[0].id; int optics_group = optimiser.mydata.getOpticsGroup(part_id, 0); RFLOAT my_pixel_size = optimiser.mydata.getImagePixelSize(part_id, 0); int my_image_size = optimiser.mydata.getOpticsImageSize(optics_group); if (do_subtract && fabs(my_pixel_size - mic_pixel_size) > 1e-6) REPORT_ERROR("ERROR: subtract code has only been validated with same pixel size for particles and micrographs... Sorry!"); FileName fn_mic2; optimiser.mydata.MDimg.getValue(EMDL_MICROGRAPH_NAME, fn_mic2, ori_img_id); FileName fn_mic2_pre, fn_mic2_jobnr, fn_mic2_post; decomposePipelineFileName(fn_mic2, fn_mic2_pre, fn_mic2_jobnr, fn_mic2_post); if (fn_mic2_post == fn_mic_post) { found_one = true; // Prepare transformer MultidimArray Fref; MultidimArray Mref; if (optimiser.mymodel.data_dim == 3) { Mref.resize(my_image_size, my_image_size, my_image_size); Fref.resize(my_image_size, my_image_size, my_image_size/2 + 1); } else { Mref.resize(my_image_size, my_image_size); Fref.resize(my_image_size, my_image_size/2 + 1); } RFLOAT rot, tilt, psi, xcoord=0., ycoord=0., zcoord=0.; int iclass; Matrix2D A; Matrix1D offsets(3); MDcoord.addObject(); MDcoord.setObject(optimiser.mydata.MDimg.getObject(ori_img_id)); MDcoord.setValue(EMDL_MICROGRAPH_NAME,fn_mic_out); optimiser.mydata.MDimg.getValue(EMDL_IMAGE_COORD_X, xcoord, ori_img_id); optimiser.mydata.MDimg.getValue(EMDL_IMAGE_COORD_Y, ycoord, ori_img_id); optimiser.mydata.MDimg.getValue(EMDL_ORIENT_ROT, rot, ori_img_id); optimiser.mydata.MDimg.getValue(EMDL_ORIENT_TILT, tilt, ori_img_id); optimiser.mydata.MDimg.getValue(EMDL_ORIENT_PSI, psi, ori_img_id); optimiser.mydata.MDimg.getValue(EMDL_ORIENT_ORIGIN_X_ANGSTROM, XX(offsets), ori_img_id); optimiser.mydata.MDimg.getValue(EMDL_ORIENT_ORIGIN_Y_ANGSTROM, YY(offsets), ori_img_id); if (optimiser.mymodel.data_dim == 3) { optimiser.mydata.MDimg.getValue(EMDL_ORIENT_ORIGIN_Z_ANGSTROM, ZZ(offsets), ori_img_id); optimiser.mydata.MDimg.getValue(EMDL_IMAGE_COORD_Z, zcoord, ori_img_id); } else { ZZ(offsets) = zcoord = 0.; } // Offsets in pixels offsets /= my_pixel_size; optimiser.mydata.MDimg.getValue(EMDL_PARTICLE_CLASS, iclass, ori_img_id); iclass--; Euler_angles2matrix(rot, tilt, psi, A); if (do_ctf) { A = optimiser.mydata.obsModel.applyAnisoMag(A, optics_group); A = optimiser.mydata.obsModel.applyScaleDifference(A, optics_group, optimiser.mymodel.ori_size, optimiser.mymodel.pixel_size); } // Get the 2D image (in its ori_size) (optimiser.mymodel.PPref[iclass]).get2DFourierTransform(Fref, A); if (optimiser.mymodel.data_dim == 2) shiftImageInFourierTransform(Fref, Fref, my_image_size, -XX(offsets), -YY(offsets)); else shiftImageInFourierTransform(Fref, Fref, my_image_size, -XX(offsets), -YY(offsets), -ZZ(offsets)); if (do_ctf) { MultidimArray Fctf; Fctf.resize(Fref); CTF ctf; if (optimiser.mymodel.data_dim == 3) { Image Ictf; FileName fn_ctf; optimiser.mydata.MDimg.getValue(EMDL_CTF_IMAGE, fn_ctf, ori_img_id); Ictf.read(fn_ctf); // If there is a redundant half, get rid of it if (XSIZE(Ictf()) == YSIZE(Ictf())) { Ictf().setXmippOrigin(); FOR_ALL_ELEMENTS_IN_FFTW_TRANSFORM(Fctf) { // Use negative kp,ip and jp indices, because the origin in the ctf_img lies half a pixel to the right of the actual center.... DIRECT_A3D_ELEM(Fctf, k, i, j) = A3D_ELEM(Ictf(), -kp, -ip, -jp); } } // otherwise, just window the CTF to the current resolution else if (XSIZE(Ictf()) == YSIZE(Ictf()) / 2 + 1) { windowFourierTransform(Ictf(), Fctf, YSIZE(Fctf)); } // if dimensions are neither cubical nor FFTW, stop else { REPORT_ERROR("3D CTF volume must be either cubical or adhere to FFTW format!"); } } else { ctf.readByGroup(optimiser.mydata.MDimg, &optimiser.mydata.obsModel, ori_img_id); ctf.getFftwImage(Fctf, my_image_size, my_image_size, my_pixel_size, optimiser.ctf_phase_flipped, false, optimiser.intact_ctf_first_peak, true); } if (optimiser.mydata.obsModel.getCtfPremultiplied(optics_group)) { FOR_ALL_DIRECT_ELEMENTS_IN_MULTIDIMARRAY(Fref) DIRECT_MULTIDIM_ELEM(Fref, n) *= (DIRECT_MULTIDIM_ELEM(Fctf, n) * DIRECT_MULTIDIM_ELEM(Fctf, n)); } else { FOR_ALL_DIRECT_ELEMENTS_IN_MULTIDIMARRAY(Fref) { DIRECT_MULTIDIM_ELEM(Fref, n) *= DIRECT_MULTIDIM_ELEM(Fctf, n); } } // Also do phase modulation, for beam tilt correction and other asymmetric aberrations optimiser.mydata.obsModel.demodulatePhase(optics_group, Fref, true); // true means do_modulate_instead optimiser.mydata.obsModel.divideByMtf(optics_group, Fref, true); // true means do_multiply_instead } // end if do_ctf if (optimiser.do_scale_correction) { int group_id = optimiser.mydata.getGroupId(part_id, 0); RFLOAT myscale = optimiser.mymodel.scale_correction[group_id]; FOR_ALL_DIRECT_ELEMENTS_IN_MULTIDIMARRAY(Fref) { DIRECT_MULTIDIM_ELEM(Fref, n) *= myscale; } } // Take inverse transform transformer.inverseFourierTransform(Fref, Mref); CenterFFT(Mref, false); Mref.setXmippOrigin(); int mic_image_size = CEIL(my_image_size * my_pixel_size / mic_pixel_size); MultidimArray Mpart_mic = Mref; if (mic_image_size != my_image_size) { resizeMap(Mpart_mic, mic_image_size); Mpart_mic.setXmippOrigin(); } //Image It; //It()=Mpart_mic; //It.write("It.spi"); //exit(1); // To keep raw micrograph and reference projections on the same scale, need to re-obtain // the multiplicative normalisation of the background area (outside circle) again RFLOAT norm_factor = 1.; if (norm_radius > 0) { Image Ipart; Ipart().resize(Mpart_mic); Ipart().initConstant(mean_mic); // set areas outside the micrograph to average of micrograph (just like in preprocessing) Imic_in().xinit = -ROUND(xcoord); Imic_in().yinit = -ROUND(ycoord); Imic_in().zinit = -ROUND(zcoord); FOR_ALL_ELEMENTS_IN_ARRAY3D(Mpart_mic) { // check the particles do not go off the side int kp = (k) - STARTINGZ(Imic_in()); int ip = (i) - STARTINGY(Imic_in()); int jp = (j) - STARTINGX(Imic_in()); if (kp >= 0 && kp < ZSIZE(Imic_in()) && ip >= 0 && ip < YSIZE(Imic_in()) && jp >= 0 && jp < XSIZE(Imic_in()) ) { A3D_ELEM(Ipart(), k, i, j) = A3D_ELEM(Imic_in(), k, i, j); } } RFLOAT psi_deg = 0., tilt_deg = 90.; RFLOAT part_avg, part_stdev; if (optimiser.do_helical_refine) { optimiser.mydata.MDimg.getValue(EMDL_ORIENT_TILT_PRIOR, tilt_deg, ori_img_id); optimiser.mydata.MDimg.getValue(EMDL_ORIENT_PSI_PRIOR, psi_deg, ori_img_id); } calculateBackgroundAvgStddev(Ipart, part_avg, norm_factor, norm_radius, optimiser.do_helical_refine, optimiser.helical_tube_outer_diameter/(2.*mic_pixel_size), tilt_deg, psi_deg); // Apply the per-particle norm_correction term if (optimiser.do_norm_correction) { RFLOAT mynorm; optimiser.mydata.MDimg.getValue(EMDL_IMAGE_NORM_CORRECTION, mynorm, ori_img_id); // TODO: check whether this is the right way around!!! norm_factor *= mynorm/optimiser.mymodel.avg_norm_correction; } } // Reposition Mpart_mic back into the micrograph Imic_out().xinit = -ROUND(xcoord); Imic_out().yinit = -ROUND(ycoord); Imic_out().zinit = -ROUND(zcoord); Imic_sum.xinit = -ROUND(xcoord); Imic_sum.yinit = -ROUND(ycoord); Imic_sum.zinit = -ROUND(zcoord); radius = optimiser.particle_diameter / (2. * mic_pixel_size); FOR_ALL_ELEMENTS_IN_ARRAY3D(Mpart_mic) { long int idx = ROUND(sqrt(k*k + i*i + j*j)); if (idx < radius) { // check the particles do not go off the side int kp = (k) - STARTINGZ(Imic_sum); int ip = (i) - STARTINGY(Imic_sum); int jp = (j) - STARTINGX(Imic_sum); if (kp >= 0 && kp < ZSIZE(Imic_sum) && ip >= 0 && ip < YSIZE(Imic_sum) && jp >= 0 && jp < XSIZE(Imic_sum) ) { A3D_ELEM(Imic_out(), k, i, j) += norm_factor * A3D_ELEM(Mpart_mic, k, i, j); A3D_ELEM(Imic_sum, k, i, j) += 1.; } } } } } // end loop over all particles in the mydata.MDimg table if (found_one) { FOR_ALL_DIRECT_ELEMENTS_IN_MULTIDIMARRAY(Imic_out()) { if (DIRECT_MULTIDIM_ELEM(Imic_sum, n) > 0.) DIRECT_MULTIDIM_ELEM(Imic_out(), n) /= DIRECT_MULTIDIM_ELEM(Imic_sum, n); if (do_invert) DIRECT_MULTIDIM_ELEM(Imic_out(), n) *= -1.; if (do_subtract) { DIRECT_MULTIDIM_ELEM(Imic_out(), n) = DIRECT_MULTIDIM_ELEM(Imic_in(), n) - DIRECT_MULTIDIM_ELEM(Imic_out(), n); } else if (micrograph_background > 0.) { // normalize Imic_in on the fly DIRECT_MULTIDIM_ELEM(Imic_in(), n) -= mean_mic; DIRECT_MULTIDIM_ELEM(Imic_in(), n) /= stddev_mic; // And add a precentage to Imic_out DIRECT_MULTIDIM_ELEM(Imic_out(), n) *= (1. - micrograph_background); DIRECT_MULTIDIM_ELEM(Imic_out(), n) += micrograph_background * DIRECT_MULTIDIM_ELEM(Imic_in(), n); } } // Write out the new micrograph Imic_out.write(fn_mic_out); MDmics_out.addObject(); MDmics_out.setObject(DFi.getObject()); MDmics_out.setValue(EMDL_MICROGRAPH_NAME, fn_mic_out); // Also write out a STAR file with the particles used FileName fn_coord_out = fn_mic_out.withoutExtension()+ "_coord.star"; MDcoord.write(fn_coord_out); MDcoord.clear(); } else { MDmics_out.addObject(); MDmics_out.setObject(DFi.getObject()); } if (imgno%barstep==0) progress_bar(imgno); imgno++; } // end loop over input MetadataTable progress_bar(DFi.numberOfObjects()); FileName fn_star_out = fn_odir + "micrographs_reposition.star"; if (fn_out != "") fn_star_out = fn_star_out.insertBeforeExtension("_" + fn_out); std::cout << "Writing out star file with the new micrographs: " << fn_star_out << std::endl; obsModelMics.save(MDmics_out, fn_star_out, "micrographs"); std::cout << " Done!" << std::endl; }// end run function }; int main(int argc, char *argv[]) { time_config(); particle_reposition_parameters prm; try { prm.read(argc, argv); prm.run(); } catch (RelionError XE) { //prm.usage(); std::cerr << XE; return RELION_EXIT_FAILURE; } return RELION_EXIT_SUCCESS; } relion-3.1.3/src/apps/particle_select.cpp000066400000000000000000000105371411340063500204110ustar00rootroot00000000000000#include #include #include using namespace gravis; int main(int argc, char *argv[]) { IOParser parser; parser.setCommandLine(argc, argv); int gen_section = parser.addSection("General options"); std::string sourceFn = parser.getOption("--i", "Input STAR file containing the source particles"); std::string refFn = parser.getOption("--i_ref", "Input STAR file containing reference particles"); const bool copyAngles = parser.checkOption("--angles", "Copy particle viewing angles from reference"); const bool copyOffsets = parser.checkOption("--offsets", "Copy particle offsets from reference"); std::string outFn = parser.getOption("--o", "Output path", "selected.star"); if (parser.checkForErrors()) { REPORT_ERROR("Errors encountered on the command line (see above), exiting..."); } MetaDataTable sourceAll, refAll; sourceAll.read(sourceFn); refAll.read(refFn); std::vector sourceByMic = StackHelper::splitByMicrographName(sourceAll); std::vector refByMic = StackHelper::splitByMicrographName(refAll); std::map micToSource; for (int m = 0; m < sourceByMic.size(); m++) { std::string micName; sourceByMic[m].getValue(EMDL_MICROGRAPH_NAME, micName, 0); micToSource[micName] = &sourceByMic[m]; } MetaDataTable out; for (int m = 0; m < refByMic.size(); m++) { std::string micName; refByMic[m].getValue(EMDL_MICROGRAPH_NAME, micName, 0); if (micToSource.find(micName) == micToSource.end()) { std::cerr << "Warning: " << micName << " not found.\n"; continue; } MetaDataTable* src = micToSource[micName]; const int pcRef = refByMic[m].numberOfObjects(); const int pcSrc = src->numberOfObjects(); std::vector posSrc(pcSrc); for (int p = 0; p < pcSrc; p++) { src->getValue(EMDL_IMAGE_COORD_X, posSrc[p].x, p); src->getValue(EMDL_IMAGE_COORD_Y, posSrc[p].y, p); } std::vector posRef(pcRef); for (int p = 0; p < pcRef; p++) { refByMic[m].getValue(EMDL_IMAGE_COORD_X, posRef[p].x, p); refByMic[m].getValue(EMDL_IMAGE_COORD_Y, posRef[p].y, p); } int missing = 0, multiple = 0; for (int p = 0; p < pcRef; p++) { int qBest = -1; for (int q = 0; q < pcSrc; q++) { double dist = (posRef[p] - posSrc[q]).length(); if (dist < 1.0) { if (qBest == -1) { qBest = q; } else { qBest = -2; } } } if (qBest >= 0) { out.addObject(src->getObject(qBest)); const int qNew = out.numberOfObjects() - 1; int randSubsetSrc; src->getValue(EMDL_PARTICLE_RANDOM_SUBSET, randSubsetSrc, qBest); int randSubsetRef; refByMic[m].getValue(EMDL_PARTICLE_RANDOM_SUBSET, randSubsetRef, p); if (randSubsetSrc != randSubsetRef) { if (copyAngles && copyOffsets) { out.setValue(EMDL_PARTICLE_RANDOM_SUBSET, randSubsetRef, qNew); } else if (copyAngles != copyOffsets) { REPORT_ERROR_STR("Unable to copy only angles or only offsets, since the " << "particles belong to different random subsets.\n"); } } if (copyAngles) { double rot, tilt, psi; refByMic[m].getValue(EMDL_ORIENT_ROT, rot, p); refByMic[m].getValue(EMDL_ORIENT_TILT, tilt, p); refByMic[m].getValue(EMDL_ORIENT_PSI, psi, p); out.setValue(EMDL_ORIENT_ROT, rot, qNew); out.setValue(EMDL_ORIENT_TILT, tilt, qNew); out.setValue(EMDL_ORIENT_PSI, psi, qNew); } if (copyOffsets) { double xoff, yoff; refByMic[m].getValue(EMDL_ORIENT_ORIGIN_X, xoff, p); refByMic[m].getValue(EMDL_ORIENT_ORIGIN_Y, yoff, p); out.setValue(EMDL_ORIENT_ORIGIN_X, xoff, qNew); out.setValue(EMDL_ORIENT_ORIGIN_Y, yoff, qNew); } } else if (qBest == -1) { missing++; } else // -2 { multiple++; } } if (missing > 0) { std::cerr << " Warning: " << missing << " of " << pcRef << " particles missing from micrograph " << m << "\n"; } if (multiple > 0) { std::cerr << " Warning: " << multiple << " out of " << pcRef << " particles found multiple times in micrograph " << m << "\n" << " (all will be ignored)\n"; } } out.write(outFn); return RELION_EXIT_SUCCESS; } relion-3.1.3/src/apps/particle_subtract.cpp000066400000000000000000000025071411340063500207570ustar00rootroot00000000000000/*************************************************************************** * * Author: "Sjors H.W. Scheres" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #include #include "../particle_subtractor.h" int main(int argc, char *argv[]) { ParticleSubtractor prm; try { prm.read(argc, argv); prm.initialise(); if (prm.fn_revert != "") { prm.revert(); } else { prm.run(); prm.saveStarFile(); prm.combineStarFile(); } } catch (RelionError XE) { std::cerr << XE; return RELION_EXIT_FAILURE; } return RELION_EXIT_SUCCESS; } relion-3.1.3/src/apps/particle_subtract_mpi.cpp000066400000000000000000000044361411340063500216270ustar00rootroot00000000000000/*************************************************************************** * * Author: "Sjors H.W. Scheres" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #include #include "../particle_subtractor.h" #include "../mpi.h" int main(int argc, char *argv[]) { ParticleSubtractor prm; int rank, size; MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &size); // Handle errors MPI_Comm_set_errhandler(MPI_COMM_WORLD, MPI_ERRORS_RETURN); MPI_Status status; try { prm.read(argc, argv); prm.initialise(rank, size); if (prm.fn_revert != "") REPORT_ERROR("You cannot use MPI for reverting subtraction."); prm.run(); if (prm.do_ssnr) { MultidimArray Maux(prm.sum_S2); MPI_Allreduce(MULTIDIM_ARRAY(prm.sum_S2), MULTIDIM_ARRAY(Maux), MULTIDIM_SIZE(prm.sum_S2), MY_MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); prm.sum_S2 = Maux; MPI_Allreduce(MULTIDIM_ARRAY(prm.sum_N2), MULTIDIM_ARRAY(Maux), MULTIDIM_SIZE(prm.sum_N2), MY_MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); prm.sum_N2 = Maux; MPI_Allreduce(MULTIDIM_ARRAY(prm.sum_count), MULTIDIM_ARRAY(Maux), MULTIDIM_SIZE(prm.sum_count), MY_MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); prm.sum_count=Maux; } prm.saveStarFile(rank); MPI_Barrier(MPI_COMM_WORLD); if (rank == 0) prm.combineStarFile(rank); MPI_Barrier(MPI_COMM_WORLD); } catch (RelionError XE) { std::cerr << XE; return RELION_EXIT_FAILURE; } MPI_Barrier(MPI_COMM_WORLD); MPI_Finalize(); return RELION_EXIT_SUCCESS; } relion-3.1.3/src/apps/particle_symmetry_expand.cpp000066400000000000000000000151451411340063500223620ustar00rootroot00000000000000/*************************************************************************** * * Author: "Sjors H.W. Scheres" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #include #include #include #include #include #include class particle_symmetry_expand_parameters { public: FileName fn_sym, fn_in, fn_out; // Helical symmetry bool do_helix, do_ignore_optics; RFLOAT twist, rise, angpix; int nr_asu, frac_sampling; RFLOAT frac_range; ObservationModel obsModel; // I/O Parser IOParser parser; void usage() { parser.writeUsage(std::cerr); } void read(int argc, char **argv) { parser.setCommandLine(argc, argv); int general_section = parser.addSection("Options"); fn_in = parser.getOption("--i", "Input particle STAR file"); fn_out = parser.getOption("--o", "Output expanded particle STAR file", "expanded.star"); fn_sym = parser.getOption("--sym", "Symmetry point group", "C1"); // Helical symmetry int helical_section = parser.addSection("Helix"); do_helix = parser.checkOption("--helix", "Do helical symmetry expansion"); twist = textToFloat(parser.getOption("--twist", "Helical twist (deg)", "0.")); rise = textToFloat(parser.getOption("--rise", "Helical rise (A)", "0.")); angpix = textToFloat(parser.getOption("--angpix", "Pixel size (A)", "1.")); nr_asu = textToFloat(parser.getOption("--asu", "Number of asymmetrical units to expand", "1")); frac_sampling = textToFloat(parser.getOption("--frac_sampling", "Number of samplings in between a single asymmetrical unit", "1")); frac_range = textToFloat(parser.getOption("--frac_range", "Range of the rise [-0.5, 0.5> to be sampled", "0.5")); do_ignore_optics = parser.checkOption("--ignore_optics", "Provide this option for relion-3.0 functionality, without optics groups"); // Check for errors in the command-line option if (parser.checkForErrors()) REPORT_ERROR("Errors encountered on the command line (see above), exiting..."); if (do_helix) { if (fn_sym != "C1") REPORT_ERROR("Provide either --sym OR --helix, but not both!"); if ((nr_asu > 1 && frac_sampling > 1) || (nr_asu == 1 && frac_sampling == 1)) REPORT_ERROR("Provide either --asu OR --frac_sampling, but not both!"); } } void run() { MetaDataTable DFi, DFo; RFLOAT rot, tilt, psi, x, y; RFLOAT rotp, tiltp, psip, xp, yp; Matrix2D L(3,3), R(3,3); // A matrix from the list RFLOAT z_start, z_stop, z_step; // for helices SymList SL; // For helices, pre-calculate expansion range if (do_helix) { if (nr_asu > 1) { // Z_start and z_stop and z_step are in fractions of the rise! int istart = -(nr_asu-1)/2; int istop = nr_asu/2; z_start = (RFLOAT)istart; z_stop = (RFLOAT)istop; z_step = 1.; } else if (frac_sampling > 1) { z_start = -frac_range; z_stop = (frac_range - 0.001); z_step = 1. / frac_sampling; } std::cout << " Helical: z_start= " << z_start << " z_stop= " << z_stop << " z_step= " << z_step << std::endl; } else { SL.read_sym_file(fn_sym); if (SL.SymsNo() < 1) REPORT_ERROR("ERROR Nothing to do. Provide a point group with symmetry!"); } if (do_ignore_optics) { DFi.read(fn_in); } else { ObservationModel::loadSafely(fn_in, obsModel, DFi, "particles", 1, false); if (obsModel.opticsMdt.numberOfObjects() == 0) { std::cerr << " + WARNGING: could not read optics groups table, proceeding without it ..." << std::endl; DFi.read(fn_in); do_ignore_optics = true; } } int barstep = XMIPP_MAX(1, DFi.numberOfObjects()/ 60); init_progress_bar(DFi.numberOfObjects()); DFo.clear(); long int imgno = 0; FOR_ALL_OBJECTS_IN_METADATA_TABLE(DFi) { DFi.getValue(EMDL_ORIENT_ROT, rot); DFi.getValue(EMDL_ORIENT_TILT, tilt); DFi.getValue(EMDL_ORIENT_PSI, psi); DFi.getValue(EMDL_ORIENT_ORIGIN_X_ANGSTROM, x); DFi.getValue(EMDL_ORIENT_ORIGIN_Y_ANGSTROM, y); if (do_helix) { for (RFLOAT z_pos = z_start; z_pos <= z_stop; z_pos += z_step) { // TMP //if (fabs(z_pos) > 0.01) { // Translation along the X-axis in the rotated image is along the helical axis in 3D. // Tilted images shift less: sin(tilt) RFLOAT xxt = SIND(tilt) * z_pos * rise; xp = x + COSD(-psi) * xxt; yp = y + SIND(-psi) * xxt; rotp = rot - z_pos * twist; DFo.addObject(); DFo.setObject(DFi.getObject()); DFo.setValue(EMDL_ORIENT_ROT, rotp); DFo.setValue(EMDL_ORIENT_ORIGIN_X_ANGSTROM, xp); DFo.setValue(EMDL_ORIENT_ORIGIN_Y_ANGSTROM, yp); } } } else { // Get the original line from the STAR file DFo.addObject(); DFo.setObject(DFi.getObject()); // And loop over all symmetry mates for (int isym = 0; isym < SL.SymsNo(); isym++) { SL.get_matrices(isym, L, R); L.resize(3, 3); // Erase last row and column R.resize(3, 3); // as only the relative orientation is useful and not the translation Euler_apply_transf(L, R, rot, tilt, psi, rotp, tiltp, psip); DFo.addObject(); DFo.setObject(DFi.getObject()); DFo.setValue(EMDL_ORIENT_ROT, rotp); DFo.setValue(EMDL_ORIENT_TILT, tiltp); DFo.setValue(EMDL_ORIENT_PSI, psip); } } if (imgno%barstep==0) progress_bar(imgno); imgno++; } // end loop over input MetadataTable progress_bar(DFi.numberOfObjects()); if (do_ignore_optics) { DFo.write(fn_out); } else { obsModel.save(DFo, fn_out, "particles"); } std::cout << " Done! Written: " << fn_out << " with the expanded particle set." << std::endl; }// end run function }; int main(int argc, char *argv[]) { time_config(); particle_symmetry_expand_parameters prm; try { prm.read(argc, argv); prm.run(); } catch (RelionError XE) { //prm.usage(); std::cerr << XE; return RELION_EXIT_FAILURE; } return RELION_EXIT_SUCCESS; } relion-3.1.3/src/apps/pipeliner.cpp000066400000000000000000000141131411340063500172300ustar00rootroot00000000000000/*************************************************************************** * * Author: "Sjors H.W. Scheres" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #include "src/pipeliner.h" #include #ifdef HAVE_CONFIG_H #include "config.h" #endif class pipeliner_parameters { public: FileName fn_sched, fn_jobids, fn_options, fn_alias, run_schedule, abort_schedule, add_job_star; int nr_repeat; bool do_check_complete, do_overwrite_current; long int minutes_wait, minutes_wait_before, seconds_wait_after, gentle_clean, harsh_clean; std::string add_type; // The actual pipeline PipeLine pipeline; // I/O Parser IOParser parser; void usage() { parser.writeUsage(std::cerr); } void read(int argc, char **argv) { parser.setCommandLine(argc, argv); // Fill the window, but don't show it! int check_section = parser.addSection("Check job completion options"); do_check_complete = parser.checkOption("--check_job_completion", "Use this flag to only check whether running jobs have completed"); int add_section = parser.addSection("Add scheduled jobs options"); add_job_star = parser.getOption("--addJobFromStar", "Add a job with the type and options as in this job.star to the pipeline", ""); add_type = parser.getOption("--addJob", "Add a job of this type to the pipeline", ""); fn_options = parser.getOption("--addJobOptions", "Options for this job (either through --addJobFromStar or --addJob)", ""); fn_alias = parser.getOption("--setJobAlias", "Set an alias to this job", ""); int run_section = parser.addSection("Run scheduled jobs options"); fn_jobids = parser.getOption("--RunJobs", "Run these jobs", ""); fn_sched = parser.getOption("--schedule", "Name of the scheduler for running the scheduled jobs", ""); do_overwrite_current = parser.checkOption("--overwrite_jobs", "Use this flag to overwrite existing jobs, instead of continuing them"); nr_repeat = textToInteger(parser.getOption("--repeat", "Run the scheduled jobs this many times", "1")); minutes_wait = textToInteger(parser.getOption("--min_wait", "Wait at least this many minutes between each repeat", "0")); minutes_wait_before = textToInteger(parser.getOption("--min_wait_before", "Wait this many minutes before starting the running the first job", "0")); seconds_wait_after = textToInteger(parser.getOption("--sec_wait_after", "Wait this many seconds after a process finishes (workaround for slow IO)", "10")); int expert_section = parser.addSection("Expert options"); pipeline.name = parser.getOption("--pipeline", "Name of the pipeline", "default"); gentle_clean = textToInteger(parser.getOption("--gentle_clean", "Gentle clean this job", "-1")); harsh_clean = textToInteger(parser.getOption("--harsh_clean", "Harsh clean this job", "-1")); // Check for errors in the command-line option if (parser.checkForErrors()) REPORT_ERROR("Errors encountered on the command line (see above), exiting..."); } void run() { pipeline.read(DO_LOCK); pipeline.write(DO_LOCK); if (do_check_complete) { pipeline.checkProcessCompletion(); } else if (add_job_star != "") { RelionJob job; bool is_continue; job.read(add_job_star, is_continue, true); // true = do_initialise job.is_continue = false; int job_num = pipeline.addScheduledJob(job, fn_options); if (fn_alias != "") { std::string error_message; if (!pipeline.setAliasJob(job_num, fn_alias, error_message)) { std::cerr << "WARNING: Failed to set the job alias to " << fn_alias << ". The job name remains the default." << std::endl; } } } else if (add_type != "") { int job_num = pipeline.addScheduledJob(add_type, fn_options); if (fn_alias != "") { std::string error_message; if (!pipeline.setAliasJob(job_num, fn_alias, error_message)) { std::cerr << "WARNING: Failed to set the job alias to " << fn_alias << ". The job name remains the default." << std::endl; } } } else if (gentle_clean > 0 || harsh_clean > 0) { bool found = false; for (int i = 0, ilim = pipeline.processList.size(); i < ilim; i++) { // std::cout << i << " " << pipeline.processList[i].name << std::endl; FileName fn_pre, fn_jobnr, fn_post; if (!decomposePipelineFileName(pipeline.processList[i].name, fn_pre, fn_jobnr, fn_post)) continue; int job_nr = textToInteger(fn_jobnr.afterFirstOf("job").beforeLastOf("/")); if (!(job_nr == gentle_clean || job_nr == harsh_clean)) continue; found = true; // std::cout << "Gentle clean " << pipeline.processList[i].name << std::endl; std::string error_message; if (!pipeline.cleanupJob(i, (job_nr == harsh_clean), error_message)) { std::cerr << "Failed to clean!" << std::endl; REPORT_ERROR(error_message); } break; } if (!found) { if (gentle_clean > 0) std::cerr << "Could not find job to gentle clean: " << gentle_clean << std::endl; else std::cerr << "Could not find job harsh clean: " << harsh_clean << std::endl; } } else if (nr_repeat > 0) { pipeline.runScheduledJobs(fn_sched, fn_jobids, nr_repeat, minutes_wait, minutes_wait_before, seconds_wait_after, do_overwrite_current); } } }; int main(int argc, char *argv[]) { pipeliner_parameters prm; try { prm.read(argc, argv); prm.run(); } catch (RelionError XE) { std::cerr << XE; return RELION_EXIT_FAILURE; } return RELION_EXIT_SUCCESS; } relion-3.1.3/src/apps/plot_delocalisation.cpp000066400000000000000000000127241411340063500212770ustar00rootroot00000000000000 #include #include #include #include #include #include #include #include #include #include #include #include #include #include using namespace gravis; int main(int argc, char *argv[]) { std::string starFn, outPath, name; int threads, s, optGroup; double rad, maxFreqAng, minFreqAng; bool allParts; IOParser parser; try { parser.setCommandLine(argc, argv); parser.addSection("General options"); starFn = parser.getOption("--i", "Input particle *.star file"); rad = textToDouble(parser.getOption("--rad", "Particle radius [Å]")); optGroup = textToInteger(parser.getOption("--og", "Optics group", "1")) - 1; maxFreqAng = textToDouble(parser.getOption("--max_freq", "Max. image frequency [Å] (default is Nyquist)", "-1")); minFreqAng = textToDouble(parser.getOption("--min_freq", "Min. image frequency [Å]", "0")); name = parser.getOption("--name", "Name of dataset (for the plot)", ""); allParts = parser.checkOption("--all_part", "Consider all particles, instead of only the first one in each micrograph"); s = textToInteger(parser.getOption("--s", "Square size for estimation", "256")); threads = textToInteger(parser.getOption("--j", "Number of threads", "1")); outPath = parser.getOption("--o", "Output path"); parser.checkForErrors(); } catch (RelionError XE) { parser.writeUsage(std::cout); std::cerr << XE; return RELION_EXIT_FAILURE; } ObservationModel obsModel; MetaDataTable mdt0; ObservationModel::loadSafely(starFn, obsModel, mdt0); std::vector allMdts = StackHelper::splitByMicrographName(mdt0); const int sh = s/2 + 1; const double angpix = obsModel.getPixelSize(optGroup); if (maxFreqAng < 0) maxFreqAng = 2*angpix; const double r2max = 1.0 / (maxFreqAng * maxFreqAng); const double r2min = minFreqAng > 0? 1.0 / (minFreqAng * minFreqAng) : -1; const int radPx = (int)(rad / angpix + 0.5); const int maxBin = 5*s; const double as = s * angpix; std::vector histCent(maxBin, 0.0); std::vector histWorst(maxBin, 0.0); for (int m = 0; m < allMdts.size(); m++) { const int pc = allMdts[m].numberOfObjects(); const double mgContrib = allParts? 1.0 : pc; const int p_max = allParts? pc : 1; for (int p = 0; p < p_max; p++) { int ogp = obsModel.getOpticsGroup(allMdts[m], p); if (ogp != optGroup) continue; CTF ctf; ctf.readByGroup(allMdts[m], &obsModel, p); for (int y = 0; y < s; y++) for (int x = 0; x < sh; x++) { double xx = x/as; double yy = y < sh? y/as : (y - s)/as; const double r2 = xx*xx + yy*yy; if (r2 > r2max || r2 < r2min) continue; t2Vector delocCent = RFLOAT(1.0 / (2 * angpix * PI)) * ctf.getGammaGrad(xx,yy); double delocCentVal = delocCent.normLInf(); int sic = (int)(delocCentVal + 0.5); if (sic >= maxBin) sic = maxBin - 1; histCent[sic] += mgContrib; d2Vector delocWorst(std::abs(delocCent.x) + radPx, std::abs(delocCent.y) + radPx); double delocWorstVal = delocWorst.normLInf(); int siw = (int)(delocWorstVal + 0.5); if (siw >= maxBin) siw = maxBin - 1; histWorst[siw] += mgContrib; } } } std::vector histCentCumul(maxBin, 0.0), histWorstCumul(maxBin, 0.0); double cumulC = 0.0, cumulW = 0.0; int first = -1; for (int b = maxBin-1; b >= 0; b--) { cumulC += histCent[b]; histCentCumul[b] = cumulC; cumulW += histWorst[b]; histWorstCumul[b] = cumulW; if (first < 0 && cumulW > 0.0) { first = b; } } if (first < 0) { std::cerr << "No data found!\n"; return RELION_EXIT_FAILURE; } CPlot2D plot2D(""); std::stringstream ogsts; ogsts << (optGroup + 1); std::string title = "Delocalisation"; if (name != "") title = title + " for " + name + " (opt. gr. " + ogsts.str() + ")"; std::stringstream pssts; pssts << angpix; std::stringstream frq0sts; frq0sts << minFreqAng; std::stringstream frq1sts; frq1sts << maxFreqAng; title = title + " at " + pssts.str() + " A/px"; if (minFreqAng <= 0) { title = title + " (up to " + frq1sts.str() + " A)"; } else { title = title + " (" + frq0sts.str() + " A - " + frq1sts.str() + " A)"; } plot2D.SetTitle(title); plot2D.SetDrawLegend(true); CDataSet center; center.SetDrawMarker(false); center.SetDatasetColor(0.0,0.0,0.0); center.SetLineWidth(0.5); center.SetDatasetTitle("particle center"); std::stringstream radsts; radsts << rad; CDataSet edge; edge.SetDrawMarker(false); edge.SetDatasetColor(0.3,0.3,0.6); edge.SetLineWidth(0.5); edge.SetDatasetTitle("worst periphery point (radius " + radsts.str() + " A)"); for (int i = 0; i < first + radPx + 1; i++) { if (i < maxBin && i <= first) { CDataPoint point(2*i, histCentCumul[i]/histCentCumul[0]); center.AddDataPoint(point); } if (i < maxBin && i <= first) { CDataPoint point(2*i, histWorstCumul[i]/histWorstCumul[0]); edge.AddDataPoint(point); } } plot2D.AddDataSet(center); plot2D.AddDataSet(edge); plot2D.SetXAxisTitle("box size (pixels)"); plot2D.SetYAxisTitle("fraction of pixels outside of box"); plot2D.OutputPostScriptPlot(outPath+".eps"); return RELION_EXIT_SUCCESS; } relion-3.1.3/src/apps/postprocess.cpp000066400000000000000000000023421411340063500176260ustar00rootroot00000000000000/*************************************************************************** * * Author: "Sjors H.W. Scheres" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #include int main(int argc, char *argv[]) { Postprocessing prm; try { prm.read(argc, argv); if (prm.do_locres) prm.run_locres(); else prm.run(); } catch (RelionError XE) { //prm.usage(); std::cerr << XE; return RELION_EXIT_FAILURE; } return RELION_EXIT_SUCCESS; } relion-3.1.3/src/apps/postprocess_mpi.cpp000066400000000000000000000031111411340063500204660ustar00rootroot00000000000000/*************************************************************************** * * Author: "Sjors H.W. Scheres" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #include int main(int argc, char *argv[]) { Postprocessing prm; try { int rank, size; MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &size); // Handle errors MPI_Comm_set_errhandler(MPI_COMM_WORLD, MPI_ERRORS_RETURN); prm.read(argc, argv); // Don't put any output to screen for mpi followers prm.verb = (rank == 0) ? 1 : 0; if (prm.do_locres) prm.run_locres(rank, size); else prm.run(); } catch (RelionError XE) { //prm.usage(); std::cerr << XE; return RELION_EXIT_FAILURE; } MPI_Barrier(MPI_COMM_WORLD); MPI_Finalize(); return RELION_EXIT_SUCCESS; } relion-3.1.3/src/apps/prepare_subtomo.cpp000066400000000000000000001336371411340063500204640ustar00rootroot00000000000000/*************************************************************************** * * Author: "Shaoda He" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #include #include #include #include #include //#define DEBUG class prepare_subtomo { public: IOParser parser; // Directory of IMOD executables 'extractilts' and 'newstack' FileName dir_imod; // Tomogram list STAR file FileName fn_tomo_list; // CTFFIND and Gctf executables FileName fn_ctffind_exe, fn_gctf_exe; // Extracted particle STAR file FileName fn_part; // Alias of the particle extraction job FileName fn_extract_job_alias; bool continue_old, do_skip_ctf_correction; bool do_use_trials_for_ctffind, do_use_only_lower_tilt_defoci; RFLOAT lower_tilt_defoci_limit; RFLOAT bfactor; bool is_coords_star_file; bool show_usage; bool dont_check_input_files; ////// CTFFIND parameters // Size of the box to calculate FFTw RFLOAT box_size; // Minimum and maximum resolution (in A) to be taken into account RFLOAT resol_min, resol_max; // Defocus search parameters (in A, positive is underfocus) RFLOAT min_defocus, max_defocus, step_defocus; // Amount of astigmatism (in A) RFLOAT amount_astigmatism; // Voltage (kV) RFLOAT Voltage; // Spherical aberration RFLOAT Cs; // Amplitude contrast (e.g. 0.07) RFLOAT AmplitudeConstrast; // Magnification RFLOAT Magnification; // Detector pixel size (um) RFLOAT PixelSize; // For Gctf: directly provide angpix! RFLOAT angpix; ////// Additional Gctf Parameters bool do_use_gctf, do_ignore_ctffind_params, do_EPA, do_validation; std::string additional_gctf_options, gpu_ids; prepare_subtomo() { clear(); }; ~prepare_subtomo() { clear(); }; void usage() { parser.writeUsage(std::cerr); }; void read(int argc, char **argv) { parser.setCommandLine(argc, argv); int general_section = parser.addSection("General"); show_usage = parser.checkOption("--help", "Show usage"); dont_check_input_files = parser.checkOption("--dont_check", "(Not recommended) Don't check input files in the initialisation step"); fn_tomo_list = parser.getOption("--i", "Tomogram STAR file", "all_tomograms.star"); fn_extract_job_alias = parser.getOption("--o_extract", "Extract job alias. Alias of the job given during particle extraction for RELION 2.0", "extract_tomo"); dir_imod = parser.getOption("--imod_dir", "Directory of IMOD executables", "/public/EM/imod/imod-4.5.8/IMOD/bin"); continue_old = parser.checkOption("--only_do_unfinished", "Only extract individual frames, estimate CTFs for those micrographs for which there is not yet a logfile with Final values. Only write out .sh commands where CTF .mrc files have not yet been reconstructed."); do_skip_ctf_correction = parser.checkOption("--skip_ctf", "Skip CTF correction? The 3D CTF model will have no CTF modulations, but will still use the Tilt and Bfactor weighting."); do_use_trials_for_ctffind = parser.checkOption("--use_trials", "Use trials for CTFFIND. Please keep a tomogram.trial stack in the Tomograms directory containing two trials from either side of the record region. Please note that the tilt order of the files should be same as the aligned stack."); do_use_only_lower_tilt_defoci = parser.checkOption("--use_low_tilt", "If you don't have extra trials, then maybe you can set an upper limit of abs(tilt), over which the average defocus value from lower tilts is used."); // default true lower_tilt_defoci_limit = textToFloat(parser.getOption("--low_tilt_limit", "(See above)", "30.")); bfactor = textToFloat(parser.getOption("--bfac", "3D CTF model weighting B-factor per e-/A2", "4.")); int ctffind_section = parser.addSection("CTFFIND parameters (CTFFIND is used as default)"); fn_ctffind_exe = parser.getOption("--ctffind_exe", "Location of ctffind executable (or through RELION_CTFFIND_EXECUTABLE environment variable)", "/public/EM/ctffind/ctffind.exe"); Cs = textToFloat(parser.getOption("--CS", "Spherical Aberration (mm) ","2.7")); Voltage = textToFloat(parser.getOption("--HT", "Voltage (kV)","300")); AmplitudeConstrast = textToFloat(parser.getOption("--AmpCnst", "Amplitude constrast", "0.07")); Magnification = textToFloat(parser.getOption("--XMAG", "Magnification", "53000")); PixelSize = textToFloat(parser.getOption("--DStep", "Detector pixel size (um)", "11.57")); box_size = textToFloat(parser.getOption("--Box", "Size of the boxes to calculate FFTs", "256")); resol_min = textToFloat(parser.getOption("--ResMin", "Minimum resolution (in A) to include in calculations", "50")); resol_max = textToFloat(parser.getOption("--ResMax", "Maximum resolution (in A) to include in calculations", "8")); min_defocus = textToFloat(parser.getOption("--dFMin", "Minimum defocus value (in A) to search", "20000")); max_defocus = textToFloat(parser.getOption("--dFMax", "Maximum defocus value (in A) to search", "50000")); step_defocus = textToFloat(parser.getOption("--FStep", "defocus step size (in A) for search", "1000")); amount_astigmatism = textToFloat(parser.getOption("--dAst", "amount of astigmatism (in A)", "2000")); int gctf_section = parser.addSection("Gctf parameters (ignored if CTFFIND is used)"); do_use_gctf = parser.checkOption("--use_gctf", "Use Gctf instead of CTFFIND to estimate the CTF parameters"); fn_gctf_exe = parser.getOption("--gctf_exe", "Location of Gctf executable (or through RELION_GCTF_EXECUTABLE environment variable)", "/lmb/home/kzhang/Public/Gctf/bin/Gctf-v0.50_sm_30_cu7.5_x86_64"); angpix = textToFloat(parser.getOption("--angpix", "Magnified pixel size in Angstroms", "2.18302")); do_ignore_ctffind_params = parser.checkOption("--ignore_ctffind_params", "Use Gctf default parameters instead of CTFFIND parameters"); do_EPA = parser.checkOption("--EPA", "Use equi-phase averaging to calculate Thon rinds in Gctf"); do_validation = parser.checkOption("--do_validation", "Use validation inside Gctf to analyse quality of the fit?"); gpu_ids = parser.getOption("--gpu", "(DOUBLE QUOTES NEEDED) Device ids for each MPI-thread, e.g \"0:1:2:3\"", ""); additional_gctf_options = parser.getOption("--extra_gctf_options", "(DOUBLE QUOTES NEEDED) Additional options for Gctf (e.g. \"--refine_local_astm\")", ""); }; void clear() { parser.clear(); }; void showMessages() { // Messages std::cout << std::endl; std::cout << " ### RELION 2.0 sub-tomogram averaging - 23:59, FEB 19, 2014 ###" << std::endl; std::cout << " # The original python script was written by Tanmay Bharat to support sub-tomogram averaging in RELION." << std::endl; std::cout << " # This 'relion_prepare_subtomo' executable was written by Shaoda He in Sjors Scheres' lab." << std::endl; std::cout << " # Please ensure that you have provided the directory containing IMOD executables 'extracttilts' and 'newstack'" << std::endl; std::cout << " # Please provide either CTFFIND or Gctf executable." << std::endl; std::cout << " # Please report bugs and comments to tbharat@mrc-lmb.cam.ac.uk or scheres@mrc-lmb.cam.ac.uk" << std::endl; std::cout << " # Please read the documentation on the RELION wiki, several questions are answered there." << std::endl; std::cout << " # This version can set defocus values above a certain tilt to the defocus value of the zero degree tilt." << std::endl; std::cout << " # This version will write out all the CTF reconstruction commands in the master file." << std::endl; std::cout << " # This version supports RELION 2.0 only. For compatibility with older RELION, please use the original python script." << std::endl; std::cout << " # This version depends on IMOD executables (extracttilts, newstack) and CTFFIND or Gctf." << std::endl; std::cout << std::endl; std::cout << " ### RELION 2.0 sub-tomogram averaging - Usage (also refer to RELION wiki) ###" << std::endl; std::cout << " # Before running the program: " << std::endl; std::cout << " # 1. Create a directory 'Tomogram/tomo\?\?\?' for each reconstructed 3D tomogram." << std::endl; std::cout << " # 2. In each of the individual tomogram directories you need:" << std::endl; std::cout << " # a. tomo.mrc : the actual reconstructed tomogram." << std::endl; std::cout << " # b. tomo.mrcs : the aligned tilt series in MRC-stack format (Please rename if they are in .st format!)" << std::endl; std::cout << " # c. tomo.star : a STAR file with at least 3 columns: _rlnCoordinateX, Y and Z. (e.g. STAR file generated by 'relion_helix_toolbox --interpo')" << std::endl; std::cout << " # OR (if STAR file exists then .coords file will be ignored)" << std::endl; std::cout << " # tomo.coords : a text file with 3 columns: the X, Y and Z coordinates of each subtomogram (e.g. save this from IMOD)." << std::endl; std::cout << " # d. tomo.order : a text file with 2 columns: the tilt angle of each image in tomo.mrcs and the accumulated dose in e-/A2 for that image." << std::endl; std::cout << " # e. tomo.tlt : (OPTIONAL) a text file with the final tilt angles from IMOD. If this is not provided then the extended header of the .mrcs will be read." << std::endl; std::cout << " # 3. Run the program. (Input files will be checked in the initialisation step. Please pay attention if error messages pop up.)" << std::endl; std::cout << " # 4. Check the contents of 'do_all_reconstruct_ctfs.sh', (split it into multiple files for parallelisation) and run the .sh script (please provide the reconstruction box size)." << std::endl; std::cout << " # 5. Process the data with RELION 2.0 GUI." << std::endl; std::cout << std::endl; if (show_usage) REPORT_ERROR("All the available parameters and their default values are listed above."); }; void initialChecks() { FileName fn1, fn2, fn3; std::ifstream fin1; MetaDataTable MD_tomo; std::string line, command; std::vector words; std::vector fns_tomo; RFLOAT calc_angpix = 0.; int res = 0; std::cout << " ###################################################################" << std::endl; std::cout << " Checking input data ..." << std::endl; if (Magnification < 1.) REPORT_ERROR("Invalid magnification!"); calc_angpix = 10000. * PixelSize / Magnification; std::cout << " Calculated pixel size (10000 * DPix / Mag) = " << calc_angpix << " Angstrom(s)" << std::endl; if (calc_angpix < 0.001) REPORT_ERROR("Calculated pixel size is smaller than 0.001!"); // Check CTFFIND or Gctf executables if (do_skip_ctf_correction) { Cs = 0.; AmplitudeConstrast = 1.; do_use_trials_for_ctffind = false; do_use_only_lower_tilt_defoci = false; } else { if (do_use_gctf) { //REPORT_ERROR("Gctf is not currently supported!"); // Get the GCTF executable if (fn_gctf_exe == "") { char* penv = getenv ("RELION_GCTF_EXECUTABLE"); if (penv != NULL) fn_gctf_exe = (std::string)penv; } if ( (fn_gctf_exe.length() < 2) && (!exists(fn_gctf_exe)) ) REPORT_ERROR("Cannot find Gctf executable " + fn_gctf_exe); #ifdef DEBUG if (gpu_ids != "") { std::cout << " str_gpu_ids = " << gpu_ids << std::endl; //if (gpu_ids.length() < 2) // REPORT_ERROR("Invalid GPU ids!"); //if ( (gpu_ids[0] != '\"') || (gpu_ids[gpu_ids.length() - 1] != '\"') ) // REPORT_ERROR("GPU ids should come with double quotes outside! (e.g. \"0:1:2:3\")"); } if (additional_gctf_options != "") { std::cout << " str_additional_gctf_options = " << additional_gctf_options << std::endl; //if (additional_gctf_options.length() < 2) // REPORT_ERROR("Invalid additional gctf options!"); //if ( (additional_gctf_options[0] != '\"') || (additional_gctf_options[additional_gctf_options.length() - 1] != '\"') ) // REPORT_ERROR("Additional gctf options should come with double quotes outside! (e.g. \"--refine_local_astm\")"); } return RELION_EXIT_FAILURE; #endif std::cout << " Pixel size used in Gctf = " << angpix << " Angstrom(s)" << std::endl; RFLOAT ratio = angpix / calc_angpix; // calc_angpix >= 0.001, no need to check again if ( (ratio < 0.99) || (ratio > 1.01) ) REPORT_ERROR("Calculated and user-defined pixel sizes are different (> 1% of error)!"); } else { // Get the CTFFIND executable if (fn_ctffind_exe == "") { char* penv = getenv ("RELION_CTFFIND_EXECUTABLE"); if (penv != NULL) fn_ctffind_exe = (std::string)penv; } if ( (fn_ctffind_exe.length() < 2) && (!exists(fn_ctffind_exe)) ) REPORT_ERROR("Cannot find CTFFIND executable " + fn_ctffind_exe); } // TODO: output CTF parameters! } if (do_use_only_lower_tilt_defoci) { if (!(lower_tilt_defoci_limit > 0.)) REPORT_ERROR("Lower tilt defoci limit should be larger than 0.0!"); // TODO: Report lower_tilt_defoci_limit } // Check IMOD executables: 'extracttilts' and 'newstack fn1 = dir_imod + "/extracttilts"; if (!exists(fn1)) REPORT_ERROR("Cannot find IMOD 'extractilts' executable " + fn1); fn1 = dir_imod + "/newstack"; if (!exists(fn1)) REPORT_ERROR("Cannot find IMOD 'newstack' executable " + fn1); // Check tomogram list if (!exists(fn_tomo_list)) REPORT_ERROR("Cannot find the STAR file with all the tomograms " + fn_tomo_list); MD_tomo.clear(); MD_tomo.read(fn_tomo_list); if (MD_tomo.numberOfObjects() < 1) REPORT_ERROR("Tomogram STAR file " + fn_tomo_list + " is empty!"); if (!MD_tomo.containsLabel(EMDL_MICROGRAPH_NAME)) REPORT_ERROR("Tomogram STAR file " + fn_tomo_list + " should contain _rlnMicrographName!"); // Check whether each tomogram sits in a separate folder fns_tomo.clear(); FOR_ALL_OBJECTS_IN_METADATA_TABLE(MD_tomo) { MD_tomo.getValue(EMDL_MICROGRAPH_NAME, fn1); fns_tomo.push_back(fn1); } if (fns_tomo.size() < 1) REPORT_ERROR("Tomogram STAR file " + fn_tomo_list + " is empty!"); std::stable_sort(fns_tomo.begin(), fns_tomo.end()); for (size_t idx = 0; idx < (fns_tomo.size() - 1); idx++) { fn1 = fns_tomo[idx]; fn2 = fns_tomo[idx + 1]; if (fn1 == fn2) REPORT_ERROR("Tomogram " + fn1 + " appears in the tomogram STAR file more than once!"); if (fn1.beforeLastOf("/") == fn2.beforeLastOf("/")) REPORT_ERROR("Tomograms " + fn1 + " and " + fn2 + " are located in the same folder!"); } // Check dependent files MetaDataTable MD_tmp; Image img; int xdim = 0, ydim = 0, zdim = 0; long int ndim = 0, nr_frames = 0, nr_lines = 0; RFLOAT xx = 0., yy = 0., zz = 0.; bool is_star_coords = false, is_txt_coords = false; MD_tmp.clear(); img.clear(); FOR_ALL_OBJECTS_IN_METADATA_TABLE(MD_tomo) { MD_tomo.getValue(EMDL_MICROGRAPH_NAME, fn1); std::cout << " 3D reconstructed tomogram in STAR file: " << fn1 << std::flush; // Check 3D reconstructed tomogram if (!exists(fn1)) REPORT_ERROR("Cannot find 3D reconstructed tomogram " + fn1); img.read(fn1, false); img.getDimensions(xdim, ydim, zdim, ndim); std::cout << " , Dimensions XYZN = " << xdim << " * " << ydim << " * " << zdim << " * " << ndim << std::endl; if ( (zdim > 1) && (ndim > 1) ) REPORT_ERROR("Reconstructed 3D tomogram " + fn1 + " is 4D!"); if ( (xdim < box_size) || (ydim < box_size) ) REPORT_ERROR("X and/or Y dimensions of reconstructed 3D tomogram " + fn1 + " is smaller than CTF box_size " + integerToString(box_size)); // TODO: consider Gctf box_size? // Check original tilt series fn2 = fn1.withoutExtension() + ".mrcs"; if (!exists(fn2)) REPORT_ERROR("Cannot find original tilt series (.mrcs) " + fn2); std::cout << " Tilt series : " << fn2 << std::flush; img.read(fn2, false); img.getDimensions(xdim, ydim, zdim, ndim); std::cout << " , Dimensions XYZN = " << xdim << " * " << ydim << " * " << zdim << " * " << ndim << std::endl; if ( (zdim > 1) && (ndim > 1) ) REPORT_ERROR("Tilt series " + fn2 + " is 4D!"); if ( (xdim < box_size) || (ydim < box_size) ) REPORT_ERROR("X and/or Y dimensions of tilt series " + fn2 + " is smaller than CTF box_size " + integerToString(box_size)); nr_frames = zdim * ndim; if (nr_frames < 2) REPORT_ERROR("Tilt series " + fn2 + " contains less than 2 frames!"); // Check .tlt (optional) fn2 = fn1.withoutExtension() + ".tlt"; if (exists(fn2)) { std::cout << " File .tlt : " << fn2 << " (optional)" << std::endl; nr_lines = 0; fin1.open(fn2.c_str(), std::ios_base::in); if (fin1.fail()) REPORT_ERROR("Cannot open .tlt file: " + fn2); while (getline(fin1, line, '\n')) { words.clear(); tokenize(line, words); if (words.size() == 0) // Empty line continue; if (words.size() != 1) // 1 blocks: tilt angle REPORT_ERROR("Invalid .tlt file: " + fn2); xx = textToFloat(words[0]); nr_lines++; // A valid line } fin1.close(); if (nr_lines != nr_frames) REPORT_ERROR("Tilt series has " + integerToString(nr_frames) + " frames but .tlt file " + fn2 + " has " + integerToString(nr_lines) + " lines!"); } // Check .order fn2 = fn1.withoutExtension() + ".order"; if (!exists(fn2)) REPORT_ERROR("Cannot find .order file " + fn2); std::cout << " File .order : " << fn2 << std::endl; nr_lines = 0; fin1.open(fn2.c_str(), std::ios_base::in); if (fin1.fail()) REPORT_ERROR("Cannot open .order file: " + fn2); while (getline(fin1, line, '\n')) { words.clear(); tokenize(line, words); if (words.size() == 0) // Empty line continue; if (words.size() != 2) // 2 blocks: tilt angle, accumulated dose REPORT_ERROR("Invalid .order file: " + fn2); xx = textToFloat(words[0]); yy = textToFloat(words[1]); nr_lines++; // A valid line } fin1.close(); if (nr_lines != nr_frames) REPORT_ERROR("Tilt series has " + integerToString(nr_frames) + " frames but .order file " + fn2 + " has " + integerToString(nr_lines) + " lines!"); // Check txt or STAR coords fn2 = fn1.withoutExtension() + ".coords"; fn3 = fn1.withoutExtension() + ".star"; if ( (!exists(fn2)) && (!exists(fn3)) ) REPORT_ERROR("Cannot find .coord OR .star file " + fn2 + " OR " + fn3); if (exists(fn3)) { std::cout << " Coords STAR : " << fn3 << std::endl; MetaDataTable MD_this_tomo; MD_this_tomo.clear(); MD_this_tomo.read(fn3); if (MD_this_tomo.numberOfObjects() < 1) REPORT_ERROR("Coordinates STAR file " + fn3 + " is empty!"); if (MD_tmp.numberOfObjects() < 1) // MD_tmp is empty. 'fn3' is the first STAR file ever read. MD_tmp = MD_this_tomo; else { if (!MetaDataTable::compareLabels(MD_tmp, MD_this_tomo)) REPORT_ERROR("Coordinates STAR file " + fn3 + " has a different set of activeLabels!"); } if ( (!MD_this_tomo.containsLabel(EMDL_IMAGE_COORD_X)) || (!MD_this_tomo.containsLabel(EMDL_IMAGE_COORD_Y)) || (!MD_this_tomo.containsLabel(EMDL_IMAGE_COORD_Z)) ) REPORT_ERROR("Coordinates STAR file " + fn3 + " should contain _rlnCoordinateX Y and Z!"); is_star_coords = true; if (is_txt_coords) REPORT_ERROR("All coordinates files should have the same extensions (either all .star or all .coords)!"); } else { std::cout << " File .coords : " << fn2 << std::endl; nr_lines = 0; fin1.open(fn2.c_str(), std::ios_base::in); if (fin1.fail()) REPORT_ERROR("Cannot open .coords file: " + fn2); while (getline(fin1, line, '\n')) { words.clear(); tokenize(line, words); if (words.size() == 0) // Empty line continue; if (words.size() != 3) // 3 blocks: x, y, z REPORT_ERROR("Invalid .coords file: " + fn2); xx = textToFloat(words[0]); yy = textToFloat(words[1]); zz = textToFloat(words[2]); nr_lines++; // A valid line } fin1.close(); is_txt_coords = true; if (is_star_coords) REPORT_ERROR("All coordinates files should have the same extensions (either all .star or all .coords)!"); } // Check .trial if (do_use_trials_for_ctffind) { fn2 = fn1.withoutExtension() + ".trial"; if (!exists(fn2)) REPORT_ERROR("Cannot find .trial file " + fn2); std::cout << " File .trial : " << fn2 << std::flush; fn2 += ":mrcs"; // Open this file as .mrcs stack img.read(fn2, false); img.getDimensions(xdim, ydim, zdim, ndim); std::cout << " , Dimensions XYZN = " << xdim << " * " << ydim << " * " << zdim << " * " << ndim << std::endl; if ( (zdim > 1) && (ndim > 1) ) REPORT_ERROR("Trial series " + fn2 + " is 4D!"); if ( (xdim < box_size) || (ydim < box_size) ) REPORT_ERROR("X and/or Y dimensions of trial series " + fn2 + " is smaller than CTF box_size " + integerToString(box_size)); if ( (zdim * ndim) != (nr_frames * 2)) REPORT_ERROR("Trial series has " + integerToString(zdim * ndim) + " frames, not 2X the total frames " + integerToString(nr_frames) + " in the tilt series!"); } // Create folders for CTF correction fn2 = fn1.beforeLastOf("/") + "/Ctffind/Results"; std::cout << " Folder containing CTF correction results: " << fn2 << std::endl; if (!exists(fn2)) { std::cout << " This folder does not exist. Create it." << std::endl; command = "mkdir -p " + fn2; #ifdef DEBUG std::cout << " " << command << std::endl; #endif res = system(command.c_str()); } // Create folders for particle extraction fn2 = "Particles/" + fn1.beforeLastOf("/"); std::cout << " Folder containing extracted particles: " << fn2 << std::endl; if (!exists(fn2)) { std::cout << " This folder does not exist. Create it." << std::endl; command = "mkdir -p " + fn2; #ifdef DEBUG std::cout << " " << command << std::endl; #endif res = system(command.c_str()); } } std::cout << " Input data checked." << std::endl; std::cout << " ###################################################################" << std::endl; }; void run() { std::ifstream fin1; std::ofstream fout1; std::string line, command; std::vector words; int res = 0; FileName fn_ctf_recon = "do_all_reconstruct_ctfs.sh"; MetaDataTable MD_tomo, MD_part; MD_tomo.clear(); MD_tomo.read(fn_tomo_list); // Initialise MetaDataTable for particles MD_part.clear(); MD_part.addLabel(EMDL_MICROGRAPH_NAME); MD_part.addLabel(EMDL_IMAGE_COORD_X); MD_part.addLabel(EMDL_IMAGE_COORD_Y); MD_part.addLabel(EMDL_IMAGE_COORD_Z); MD_part.addLabel(EMDL_IMAGE_NAME); MD_part.addLabel(EMDL_CTF_IMAGE); MD_part.addLabel(EMDL_CTF_MAGNIFICATION); MD_part.addLabel(EMDL_CTF_DETECTOR_PIXEL_SIZE); // Open output file fn_ctf_recon fout1.open(fn_ctf_recon.c_str(), std::ios_base::out); if (fout1.fail()) REPORT_ERROR("Cannot open output file: " + (std::string)(fn_ctf_recon)); if (continue_old) { fout1 << "# Option '--only_do_unfinished' is enabled." << std::endl; fout1 << "# Commands are commented if reconstructed CTF files exist." << std::endl; fout1 << std::endl; } FOR_ALL_OBJECTS_IN_METADATA_TABLE(MD_tomo) { std::cout << std::endl; FileName fn_tomo; MD_tomo.getValue(EMDL_MICROGRAPH_NAME, fn_tomo); std::cout << "#### Processing tomogram " << fn_tomo << " ... ####" << std::endl; FileName dir_ctf = fn_tomo.beforeLastOf("/") + "/Ctffind"; FileName dir_ctf_results = fn_tomo.beforeLastOf("/") + "/Ctffind/Results"; FileName fn_order = fn_tomo.withoutExtension() + ".order"; FileName fn_stack = fn_tomo.withoutExtension() + ".mrcs"; FileName fn_trial = fn_tomo.withoutExtension() + ".trial"; // optional FileName fn_tilt = fn_tomo.withoutExtension() + ".tlt"; // optional FileName fn_coords = fn_tomo.withoutExtension() + ".star"; if (!exists(fn_coords)) fn_coords = fn_tomo.withoutExtension() + ".coords"; bool have_tilt = exists(fn_tilt); // ExtractTilt if (!have_tilt) { std::cout << " File .tlt does not exist. Call IMOD extracttilt." << std::endl; command = dir_imod + "/extracttilts -InputFile " + fn_stack + " -tilts -OutputFile " + dir_ctf + "/tiltangles.txt > " + dir_ctf + "/tiltangles_scratch.txt"; //#ifdef DEBUG std::cout << " " << command << std::endl; //#endif res = system(command.c_str()); } else { std::cout << " File .tlt file exists. Copy it to CTFFIND folder." << std::endl; command = "cp " + fn_tilt + " " + dir_ctf + "/tiltangles.txt"; //#ifdef DEBUG std::cout << " " << command << std::endl; //#endif res = system(command.c_str()); } if (do_use_trials_for_ctffind) { std::cout << " Use trails in CTFFIND. Call IMOD extracttilt." << std::endl; command = dir_imod + "/extracttilts -InputFile " + fn_trial + " -tilts -OutputFile " + dir_ctf + "/trial_tiltangles.txt > " + dir_ctf + "/trial_tiltangles_scratch.txt"; //#ifdef DEBUG std::cout << " " << command << std::endl; //#endif res = system(command.c_str()); } std::cout << " Tilt values have been extracted." << std::endl; // CTF correction //FileName fn_ctf = fn_tomo.withoutExtension() + "_images.star"; // OLD FileName fn_ctf = fn_tomo.beforeLastOf("/") + "/Ctffind/" + (fn_tomo.afterLastOf("/")).withoutExtension() + "_images.star"; // NEW #ifdef DEBUG std::cout << " fn_ctf = " << fn_ctf << std::endl; #endif FileName fn_tilt_txt = dir_ctf + "/tiltangles.txt"; FileName fn_trial_tilt_txt = dir_ctf + "/trial_tiltangles.txt"; std::vector tilts, trial_tilts; MetaDataTable MD_ctf, MD_ctf_results; tilts.clear(); trial_tilts.clear(); MD_ctf.clear(); MD_ctf_results.clear(); MD_ctf.addLabel(EMDL_MICROGRAPH_NAME); fin1.open(fn_tilt_txt.c_str(), std::ios_base::in); if (fin1.fail()) REPORT_ERROR("Cannot open input file: " + (std::string)(fn_tilt_txt)); // Get tilt angles (without trials) while (getline(fin1, line, '\n')) { words.clear(); tokenize(line, words); if (words.size() == 0) // Empty line continue; if (words.size() != 1) REPORT_ERROR("Invalid .tlt file: " + fn_tilt_txt); tilts.push_back(textToFloat(words[0])); } fin1.close(); // Get tilt angles (with trials) if (do_use_trials_for_ctffind) { fin1.open(fn_trial_tilt_txt.c_str(), std::ios_base::in); if (fin1.fail()) REPORT_ERROR("Cannot open input file: " + (std::string)(fn_trial_tilt_txt)); while (getline(fin1, line, '\n')) { words.clear(); tokenize(line, words); if (words.size() == 0) // Empty line continue; if (words.size() != 1) REPORT_ERROR("Invalid .tlt trial file: " + fn_trial_tilt_txt); trial_tilts.push_back(textToFloat(words[0])); } fin1.close(); if ( (tilts.size() * 2) != trial_tilts.size()) REPORT_ERROR("Invalid .tlt and/or .tlt trial files: " + fn_tilt_txt + " and " + fn_trial_tilt_txt + " . The trial file should contain 2X lines."); } // Extract individual tilt frames using IMOD executable 'newstack' or 'relion_image_handler' // Note that frame id starts from 0 in' newstack' but 1 in 'relion_image_handler' command = "touch " + dir_ctf + "/temp_newstack_out.txt"; #ifdef DEBUG std::cout << " " << command << std::endl; #endif for (int ida = 0; ida < tilts.size(); ida++) { FileName fn_sec; if (do_use_trials_for_ctffind) { // Command 1 fn_sec = dir_ctf + "/" + (fn_trial.afterLastOf("/")).withoutExtension() + "_image" + floatToString(tilts[ida]) + "_" + integerToString(2 * ida) + ".mrc"; if ( (continue_old) && (exists(fn_sec)) ) {} else { // TODO: I want to use relion_image_handler but failed. It does not produce the same set of individual frames. command = "relion_image_handler --i " + integerToString(2 * ida + 1) + "@" + fn_trial + ":mrcs --o " + fn_sec; //command = dir_imod + "/newstack -secs " + integerToString(2 * ida) + " " + fn_trial + " " + fn_sec + " >> " + dir_ctf + "/temp_newstack_out.txt"; //#ifdef DEBUG std::cout << " " << command << std::endl; //#endif res = system(command.c_str()); } MD_ctf.addObject(); MD_ctf.setValue(EMDL_MICROGRAPH_NAME, fn_sec); // Command 2 fn_sec = dir_ctf + "/" + (fn_trial.afterLastOf("/")).withoutExtension() + "_image" + floatToString(tilts[ida]) + "_" + integerToString(2 * ida + 1) + ".mrc"; if ( (continue_old) && (exists(fn_sec)) ) {} else { // TODO: I want to use relion_image_handler but failed. It does not produce the same set of individual frames. command = "relion_image_handler --i " + integerToString(2 * ida + 2) + "@" + fn_trial + ":mrcs --o " + fn_sec; //command = dir_imod + "/newstack -secs " + integerToString(2 * ida + 1) + " " + fn_trial + " " + fn_sec + " >> " + dir_ctf + "/temp_newstack_out.txt"; //#ifdef DEBUG std::cout << " " << command << std::endl; //#endif res = system(command.c_str()); } MD_ctf.addObject(); MD_ctf.setValue(EMDL_MICROGRAPH_NAME, fn_sec); } else { fn_sec = dir_ctf + "/" + (fn_stack.afterLastOf("/")).withoutExtension() + "_image" + floatToString(tilts[ida]) + "_" + integerToString(ida) + ".mrc"; if ( (continue_old) && (exists(fn_sec)) ) {} else { // TODO: I want to use relion_image_handler but failed. It does not produce the same set of individual frames. command = "relion_image_handler --i " + integerToString(ida + 1) + "@" + fn_stack + ":mrcs --o " + fn_sec; //command = dir_imod + "/newstack -secs " + integerToString(ida) + " " + fn_stack + " " + fn_sec + " >> " + dir_ctf + "/temp_newstack_out.txt"; //#ifdef DEBUG std::cout << " " << command << std::endl; //#endif res = system(command.c_str()); } MD_ctf.addObject(); MD_ctf.setValue(EMDL_MICROGRAPH_NAME, fn_sec); } } MD_ctf.write(fn_ctf); // Run CTFFIND and store the estimated parameters if (do_skip_ctf_correction) { MD_ctf_results.clear(); MD_ctf_results = MD_ctf; MD_ctf_results.addLabel(EMDL_CTF_DEFOCUSU); MD_ctf_results.addLabel(EMDL_CTF_DEFOCUSV); //MD_ctf_results.addLabel(EMDL_CTF_DEFOCUS_ANGLE); FOR_ALL_OBJECTS_IN_METADATA_TABLE(MD_ctf_results) { MD_ctf_results.setValue(EMDL_CTF_DEFOCUSU, 0.); MD_ctf_results.setValue(EMDL_CTF_DEFOCUSV, 0.); //MD_ctf_results.setValue(EMDL_CTF_DEFOCUS_ANGLE, 0.); } MD_ctf_results.write(dir_ctf_results + "/micrographs_ctf.star"); } else { command = "relion_run_ctffind --i " + fn_ctf + " --o " + dir_ctf_results + "/ --CS " + floatToString(Cs) + " --HT " + floatToString(Voltage) + " --ctfWin -1 --AmpCnst " + floatToString(AmplitudeConstrast) + " --DStep " + floatToString(PixelSize) + " --XMAG " + floatToString(Magnification) + " --Box " + floatToString(box_size) + " --dFMin " + floatToString(min_defocus) + " --dFMax " + floatToString(max_defocus) + " --FStep " + floatToString(step_defocus) + " --dAst " + floatToString(amount_astigmatism) + " --ResMin " + floatToString(resol_min) + " --ResMax " + floatToString(resol_max); if (continue_old) command += " --only_do_unfinished"; if (do_use_gctf) { command += " --use_gctf --gctf_exe " + fn_gctf_exe; command += " --angpix " + floatToString(angpix); if (do_ignore_ctffind_params) command += " --ignore_ctffind_params"; if (do_EPA) command += " --EPA"; if (do_validation) command += " --do_validation"; //if (gpu_ids.length() < 2) // gpu_ids = "\"\""; // If gpu_ids is empty, put double quotes outside command += " --gpu \"" + gpu_ids + "\""; // TODO: User needs to provide double quotes outside if (additional_gctf_options.length() > 0) command += " --extra_gctf_options \"" + additional_gctf_options + "\""; // TODO: User needs to provide double quotes outside } else { command += " --ctffind_exe \"" + fn_ctffind_exe + " --omp-num-threads 1 --old-school-input\""; } //#ifdef DEBUG std::cout << " " << command << std::endl; //#endif res = system(command.c_str()); // TODO: Support GCTF ? } // Making .star files for each 3D CTF Volume //FileName dir_rec = "Particles/" + fn_tomo.beforeLastOf("/"); // I don't need this... //FileName fn_rec = "Particles/" + fn_tomo.withoutExtension() + "_rec_CTF_volumes.sh"; // I don't need this... std::vector order_tilts, accu_dose, avg_defoci; RFLOAT du = 0., dv = 0.; order_tilts.clear(); accu_dose.clear(); avg_defoci.clear(); std::cout << " Reading tilt series order file " + fn_order + " for dose dependent B-Factor weighting..." << std::endl; fin1.open(fn_order.c_str(), std::ios_base::in); if (fin1.fail()) REPORT_ERROR("Cannot open input file: " + (std::string)(fn_order)); while (getline(fin1, line, '\n')) { words.clear(); tokenize(line, words); if (words.size() == 0) // Empty line continue; if (words.size() != 2) REPORT_ERROR("Invalid input file!"); order_tilts.push_back(textToFloat(words[0])); accu_dose.push_back(textToFloat(words[1])); } fin1.close(); MD_ctf_results.clear(); MD_ctf_results.read(dir_ctf_results + "/micrographs_ctf.star"); if ( (!MD_ctf_results.containsLabel(EMDL_CTF_DEFOCUSU)) || (!MD_ctf_results.containsLabel(EMDL_CTF_DEFOCUSV)) || (!MD_ctf_results.containsLabel(EMDL_MICROGRAPH_NAME)) ) REPORT_ERROR("micrographs_ctf.star should contain _rlnDefocusU, _rlnDefocusV and _rlnMicrographName! Please check whether CTF estimation was done successfully."); FOR_ALL_OBJECTS_IN_METADATA_TABLE(MD_ctf_results) { MD_ctf_results.getValue(EMDL_CTF_DEFOCUSU, du); MD_ctf_results.getValue(EMDL_CTF_DEFOCUSV, dv); avg_defoci.push_back(du); // TODO: Why just read in defocusU but not with defocusV and defocusAngle ??? } if (do_use_trials_for_ctffind) { #ifdef DEBUG std::cout << " trial_tilts.size() = " << trial_tilts.size() << " , order_tilts.size() = " << order_tilts.size() << std::endl; #endif if (trial_tilts.size() != (order_tilts.size() * 2)) REPORT_ERROR("Tilt series are different in .tlt (or stack header) and .order files: " + fn_trial_tilt_txt + " and " + fn_order); #ifdef DEBUG std::cout << " avg_defoci.size() = " << avg_defoci.size() << " , trial_tilts.size() = " << trial_tilts.size() << std::endl; #endif if (avg_defoci.size() != trial_tilts.size()) REPORT_ERROR("Tilt series are different in .tlt (or stack header) and micrographs_ctf.star files: " + fn_trial_tilt_txt + " and " + dir_ctf_results + "/micrographs_ctf.star"); std::vector tmp_vec; tmp_vec.clear(); for (int id = 0; id < avg_defoci.size(); id += 2) tmp_vec.push_back((avg_defoci[id] + avg_defoci[id + 1]) / 2.); avg_defoci.clear(); avg_defoci = tmp_vec; } else { if (tilts.size() != order_tilts.size()) REPORT_ERROR("Tilt series are different in .tlt (or stack header) and .order files: " + fn_tilt_txt + " and " + fn_order); if (avg_defoci.size() != tilts.size()) REPORT_ERROR("Tilt series are different in .tlt (or stack header) and micrographs_ctf.star files!"); } if (avg_defoci.size() != tilts.size()) REPORT_ERROR("Tilt series are different in .tlt (or stack header) and micrographs_ctf.star files: " + fn_tilt_txt + " and " + dir_ctf_results + "/micrographs_ctf.star"); // Deal with lower tilts if (do_use_only_lower_tilt_defoci) { RFLOAT sum_defoci = 0., nr_defoci = 0.; for (int id = 0; id < tilts.size(); id++) { if (fabs(tilts[id]) < lower_tilt_defoci_limit) { sum_defoci += avg_defoci[id]; nr_defoci += 1.; } } if (nr_defoci > 0.5) sum_defoci /= nr_defoci; std::cout << " " << fn_tomo << " : Average defocus from the lower tilt images below " << lower_tilt_defoci_limit << " is " << sum_defoci << std::endl; for (int id = 0; id < tilts.size(); id++) { // TODO: if (fabs(tilts[id]) > lower_tilt_defoci_limit) // python script { //if (!(fabs(tilts[id]) < lower_tilt_defoci_limit)) // Shaoda's idea avg_defoci[id] = sum_defoci; } } } // TODO: consider Y/Z flipped tomograms ? Maybe not. The coordinates have already been flipped when processing the .mod files. Image img; int xdim = 0, ydim = 0, zdim = 0; long int ndim = 0; RFLOAT calc_angpix = 10000. * PixelSize / Magnification; std::cout << " Calculated pixel size = " << calc_angpix << " Angstrom(s)" << std::endl; std::cout << " Extract XYZN dimensions of the tomogram " << fn_tomo << std::endl; img.read(fn_tomo, false); img.getDimensions(xdim, ydim, zdim, ndim); std::cout << " Tomogram XYZN dimensions = " << xdim << " * " << ydim << " * " << zdim << " * " << ndim << std::endl; std::cout << " Writing out .star files to make 3D CTF volumes..." << std::endl; RFLOAT xx = 0., yy = 0., zz = 0.; int nr_subtomo = 0; bool write_star_file = false; FileName fn_subtomo_star, fn_subtomo_mrc; MetaDataTable MD_coords, MD_this_subtomo; // Load coordinates into MD_coords MD_coords.clear(); #ifdef DEBUG std::cout << " fn_coords = " << fn_coords << std::endl; #endif if (fn_coords.getExtension() != "star") { MD_coords.addLabel(EMDL_IMAGE_COORD_X); MD_coords.addLabel(EMDL_IMAGE_COORD_Y); MD_coords.addLabel(EMDL_IMAGE_COORD_Z); fin1.open(fn_coords.c_str(), std::ios_base::in); while (getline(fin1, line, '\n')) { words.clear(); tokenize(line, words); if (words.size() == 0) // Empty line continue; if (words.size() != 3) REPORT_ERROR("Invalid input file: " + fn_coords); MD_coords.addObject(); MD_coords.setValue(EMDL_IMAGE_COORD_X, textToFloat(words[0])); MD_coords.setValue(EMDL_IMAGE_COORD_Y, textToFloat(words[1])); MD_coords.setValue(EMDL_IMAGE_COORD_Z, textToFloat(words[2])); } fin1.close(); } else { // All MD_coords have the same set of EMDLabels MD_coords.read(fn_coords); // Append extra columns from MD_coords std::vector labels = MD_coords.getActiveLabels(); for (size_t idx = 0; idx < labels.size(); idx++) { if (!MD_part.containsLabel(labels[idx])) MD_part.addLabel(labels[idx]); } } // Loop over every picked 3D point if (MD_coords.numberOfObjects() < 1) REPORT_ERROR("MD_coords is empty! It reads from .coord or .star file: " + fn_coords); nr_subtomo = 0; FOR_ALL_OBJECTS_IN_METADATA_TABLE(MD_coords) { nr_subtomo++; MD_coords.getValue(EMDL_IMAGE_COORD_X, xx); MD_coords.getValue(EMDL_IMAGE_COORD_Y, yy); MD_coords.getValue(EMDL_IMAGE_COORD_Z, zz); write_star_file = ((do_skip_ctf_correction) && (nr_subtomo == 1)) || (!do_skip_ctf_correction); if (!write_star_file) continue; // TODO: check this! OK. I think it is fine. // Only do once if do_skip_ctf_correction if ( (do_skip_ctf_correction) && (nr_subtomo == 1) ) { fn_subtomo_star = "Particles/" + fn_tomo.withoutExtension() + "_ctf.star"; fn_subtomo_mrc = "Particles/" + fn_tomo.withoutExtension() + "_ctf.mrc"; #ifdef DEBUG std::cout << " fn_subtomo_star = " << fn_subtomo_star << " , fn_subtomo_mrc = " << fn_subtomo_mrc << std::endl; #endif } // For every sub-tomogram if (!do_skip_ctf_correction) { fn_subtomo_star = "Particles/" + fn_tomo.withoutExtension() + "_ctf" + integerToString(nr_subtomo, 6, '0') + ".star"; fn_subtomo_mrc = "Particles/" + fn_tomo.withoutExtension() + "_ctf" + integerToString(nr_subtomo, 6, '0') + ".mrc"; #ifdef DEBUG std::cout << " fn_subtomo_star = " << fn_subtomo_star << " , fn_subtomo_mrc = " << fn_subtomo_mrc << std::endl; #endif } MD_this_subtomo.clear(); MD_this_subtomo.addLabel(EMDL_CTF_DEFOCUSU); MD_this_subtomo.addLabel(EMDL_CTF_VOLTAGE); MD_this_subtomo.addLabel(EMDL_CTF_CS); MD_this_subtomo.addLabel(EMDL_CTF_Q0); MD_this_subtomo.addLabel(EMDL_ORIENT_ROT); MD_this_subtomo.addLabel(EMDL_ORIENT_TILT); MD_this_subtomo.addLabel(EMDL_ORIENT_PSI); MD_this_subtomo.addLabel(EMDL_CTF_BFACTOR); MD_this_subtomo.addLabel(EMDL_CTF_SCALEFACTOR); // Find minimum and maximum tilts RFLOAT min_tilt = 999999., max_tilt = -999999.; for (int id = 0; id < tilts.size(); id++) { if (tilts[id] < min_tilt) min_tilt = tilts[id]; if (tilts[id] > max_tilt) max_tilt = tilts[id]; } RFLOAT defoci = 0., tilt_deg = 0., tilt_rad = 0., xxtomo = 0., zztomo = 0.; RFLOAT xximg = 0., deltaD = 0., ptcldefocus = 0., tilt_scale = 0.; RFLOAT tilt_step = 0., tilt_diff = 0., best_tilt_diff = 0., dose_w = 0.; RFLOAT cur_accu_dose = 0.; for (int ida = 0; ida < tilts.size(); ida++) { defoci = avg_defoci[ida]; tilt_deg = tilts[ida]; tilt_rad = DEG2RAD(tilt_deg); xxtomo = float(xx - (xdim / 2) ) * calc_angpix; zztomo = float(zz - (zdim / 2) ) * calc_angpix; // Calculating the height difference of the particle from the tilt axis xximg = xxtomo * cos(tilt_rad) + zztomo * sin(tilt_rad); deltaD = xximg * sin(tilt_rad); ptcldefocus = defoci + deltaD; if (do_skip_ctf_correction) ptcldefocus = 0.; // TODO: Should be 0.000. I think it is fine. // Now weighting the 3D CTF model using the tilt dependent scale factor and the dose dependent B-Factor tilt_scale = cos(fabs(tilt_rad)); if (tilts.size() <= 1) REPORT_ERROR("Less than 2 tilt angles are found in std::vector tilts. Tilt angles are read from " + fn_tilt_txt); // This is checked in the initialisation step. tilt_step = (max_tilt - min_tilt) / (RFLOAT(tilts.size()) - 1.); // Ensure that the denominator is always >= 1.0 best_tilt_diff = tilt_step + 0.5; for (int idb = 0; idb < order_tilts.size(); idb++) { tilt_diff = fabs(tilt_deg - order_tilts[idb]); if (tilt_diff < (tilt_step + 0.25)) { if (tilt_diff < best_tilt_diff) { best_tilt_diff = tilt_diff; cur_accu_dose = accu_dose[idb]; // TODO: cur_accu_dose always reset? Copied from the python script. Not a good way of C/C++ coding. But I think it is fine. } } } dose_w = cur_accu_dose * bfactor; // TODO: check this bfactor. T think it is fine. MD_this_subtomo.addObject(); MD_this_subtomo.setValue(EMDL_CTF_DEFOCUSU, ptcldefocus); MD_this_subtomo.setValue(EMDL_CTF_VOLTAGE, Voltage); MD_this_subtomo.setValue(EMDL_CTF_CS, Cs); MD_this_subtomo.setValue(EMDL_CTF_Q0, AmplitudeConstrast); MD_this_subtomo.setValue(EMDL_ORIENT_ROT, 0.); MD_this_subtomo.setValue(EMDL_ORIENT_TILT, tilt_deg); MD_this_subtomo.setValue(EMDL_ORIENT_PSI, 0.); MD_this_subtomo.setValue(EMDL_CTF_BFACTOR, dose_w); MD_this_subtomo.setValue(EMDL_CTF_SCALEFACTOR, tilt_scale); } MD_this_subtomo.write(fn_subtomo_star); // Write a new line to fn_ctf_recon command = ""; if ( (continue_old) && (exists(fn_subtomo_mrc)) ) // If the reconstructed CTF .mrc file exists, comment that line command += "# "; command += "relion_reconstruct --i " + fn_subtomo_star + " --o " + fn_subtomo_mrc + " --reconstruct_ctf $1 --angpix " + floatToString(calc_angpix); fout1 << command << std::endl; // Write a new object to MD_part FileName fn_extract_part = "Extract/" + fn_extract_job_alias + "/" + fn_tomo.withoutExtension() + integerToString(nr_subtomo, 6, '0') + ".mrc"; if (fn_coords.getExtension() == "star") MD_part.addObject(MD_coords.getObject()); // Append extra information from MD_coords else MD_part.addObject(); // Otherwise, add an empty object MD_part.setValue(EMDL_MICROGRAPH_NAME, fn_tomo); MD_part.setValue(EMDL_IMAGE_COORD_X, xx); MD_part.setValue(EMDL_IMAGE_COORD_Y, yy); MD_part.setValue(EMDL_IMAGE_COORD_Z, zz); MD_part.setValue(EMDL_IMAGE_NAME, fn_extract_part); MD_part.setValue(EMDL_CTF_IMAGE, fn_subtomo_mrc); MD_part.setValue(EMDL_CTF_MAGNIFICATION, Magnification); MD_part.setValue(EMDL_CTF_DETECTOR_PIXEL_SIZE, PixelSize); } // Close fn_rec here? And chmod u+x? I don't need this. } MD_part.write("particles_subtomo.star"); fout1.close(); command = "chmod u+x " + fn_ctf_recon; #ifdef DEBUG std::cout << " " << command << std::endl; #endif res = system(command.c_str()); // Check whether all files are closed in time. I have checked. Fine. if (do_use_gctf) // Delete Gctf temporary files { if (exists("micrographs_all_gctf.star")) { command = "rm -rf micrographs_all_gctf.star"; res = system(command.c_str()); } } std::cout << std::endl; std::cout << " All done!" << std::endl; std::cout << std::endl; std::cout << " Please extract sub-tomograms using the RELION GUI. Remember to use the same subtomoname '--o_extract' as you gave in this script: " << fn_extract_job_alias << std::endl; //std::cout << " Please run the 3D CTF model volume reconstructions using the .sh scripts written in the working directory." << std::endl; std::cout << " Check, (split into multiple files for parallelisation) and run the script(s) from the command line:" << std::endl; std::cout << " --> do_all_reconstruct_ctfs.sh SubtomogramSize" << std::endl; std::cout << " (If you want to rescale the particle boxes in extraction, for example, from 200 to 100, then use 200 when running the script and rewindow the reconstructed CTF .mrc files with 'relion_image_handler --new_box 100')" << std::endl; std::cout << " STAR file to use for refinement (after sub-tomogram extraction and 3D CTF volume reconstruction): particles_subtomo.star" << std::endl; std::cout << std::endl; return; }; }; int main(int argc, char *argv[]) { // time_config(); prepare_subtomo prm; try { prm.read(argc, argv); prm.showMessages(); if (prm.show_usage) return RELION_EXIT_SUCCESS; if (!prm.dont_check_input_files) prm.initialChecks(); prm.run(); } catch (RelionError XE) { //prm.usage(); std::cout << XE; return RELION_EXIT_FAILURE; } return RELION_EXIT_SUCCESS; } relion-3.1.3/src/apps/preprocess.cpp000066400000000000000000000023001411340063500174210ustar00rootroot00000000000000/*************************************************************************** * * Author: "Sjors H.W. Scheres" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #include int main(int argc, char *argv[]) { Preprocessing prm; try { prm.read(argc, argv); prm.initialise(); prm.run(); } catch (RelionError XE) { //prm.usage(); std::cerr << XE; return RELION_EXIT_FAILURE; } return RELION_EXIT_SUCCESS; } relion-3.1.3/src/apps/preprocess_mpi.cpp000066400000000000000000000024311411340063500202730ustar00rootroot00000000000000/*************************************************************************** * * Author: "Sjors H.W. Scheres" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #include int main(int argc, char *argv[]) { PreprocessingMpi prm; try { prm.read(argc, argv); prm.initialise(); prm.run(); } catch (RelionError XE) { //prm.usage(); std::cerr << XE; MPI_Abort(MPI_COMM_WORLD, RELION_EXIT_FAILURE); } MPI_Barrier(MPI_COMM_WORLD); return RELION_EXIT_SUCCESS; } relion-3.1.3/src/apps/project.cpp000066400000000000000000000546311411340063500167200ustar00rootroot00000000000000/*************************************************************************** * * Author: "Sjors H.W. Scheres" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include class project_parameters { public: FileName fn_map, fn_ang, fn_out, fn_img, fn_model, fn_sym, fn_mask, fn_ang_simulate; RFLOAT rot, tilt, psi, xoff, yoff, zoff, angpix, maxres, stddev_white_noise, particle_diameter, ana_prob_range, ana_prob_step, sigma_offset; int padding_factor; int r_max, r_min_nn, interpolator, nr_uniform; bool do_only_one, do_ctf, do_ctf2, ctf_phase_flipped, do_ctf_intact_1st_peak, do_timing, do_add_noise, do_subtract_exp, do_ignore_particle_name, do_3d_rot; bool do_simulate; RFLOAT simulate_SNR; // I/O Parser IOParser parser; MlModel model; ObservationModel obsModel; void usage() { parser.writeUsage(std::cerr); } void read(int argc, char **argv) { parser.setCommandLine(argc, argv); int general_section = parser.addSection("Options"); fn_map = parser.getOption("--i", "Input map to be projected"); fn_out = parser.getOption("--o", "Rootname for output projections", "proj"); do_ctf = parser.checkOption("--ctf", "Apply CTF to reference projections"); ctf_phase_flipped = parser.checkOption("--ctf_phase_flip", "Flip phases of the CTF in the output projections"); do_ctf_intact_1st_peak = parser.checkOption("--ctf_intact_first_peak", "Ignore CTFs until their first peak?"); angpix = textToFloat(parser.getOption("--angpix", "Pixel size (in Angstroms)", "-1")); fn_mask = parser.getOption("--mask", "Mask that will be applied to the input map prior to making projections", ""); fn_ang = parser.getOption("--ang", "Particle STAR file with orientations and CTF for multiple projections (if None, assume single projection)", "None"); nr_uniform = textToInteger(parser.getOption("--nr_uniform", " OR get this many random samples from a uniform angular distribution", "-1")); sigma_offset = textToFloat(parser.getOption("--sigma_offset", "Apply Gaussian errors with this stddev to the XY-offsets", "0")); rot = textToFloat(parser.getOption("--rot", "First Euler angle (for a single projection)", "0")); tilt = textToFloat(parser.getOption("--tilt", "Second Euler angle (for a single projection)", "0")); psi = textToFloat(parser.getOption("--psi", "Third Euler angle (for a single projection)", "0")); xoff = textToFloat(parser.getOption("--xoff", "Origin X-offsets (in pixels) (for a single projection)", "0")); yoff = textToFloat(parser.getOption("--yoff", "Origin Y-offsets (in pixels) (for a single projection)", "0")); zoff = textToFloat(parser.getOption("--zoff", "Origin Z-offsets (in pixels) (for a single 3D rotation)", "0")); do_add_noise = parser.checkOption("--add_noise", "Add noise to the output projections (only with --ang)"); stddev_white_noise = textToFloat(parser.getOption("--white_noise", "Standard deviation of added white Gaussian noise", "0")); fn_model = parser.getOption("--model_noise", "Model STAR file with power spectra for coloured Gaussian noise", ""); do_subtract_exp = parser.checkOption("--subtract_exp", "Subtract projections from experimental images (in --ang)"); do_ignore_particle_name = parser.checkOption("--ignore_particle_name", "Ignore the rlnParticleName column (in --ang)"); do_only_one = (fn_ang == "None" && nr_uniform < 0); do_3d_rot = parser.checkOption("--3d_rot", "Perform 3D rotations instead of projection into 2D images"); do_simulate = parser.checkOption("--simulate", "Simulate data with known ground-truth by subtracting signal and adding projection in random orientation."); simulate_SNR = textToFloat(parser.getOption("--adjust_simulation_SNR", "Relative SNR compared to input images for realistic simulation of data", "1.")); fn_ang_simulate = parser.getOption("--ang_simulate", "STAR file with orientations for projections of realistic simulations (random from --ang STAR file by default)", ""); maxres = textToFloat(parser.getOption("--maxres", "Maximum resolution (in Angstrom) to consider in Fourier space (default Nyquist)", "-1")); padding_factor = textToInteger(parser.getOption("--pad", "Padding factor", "2")); do_ctf2 = parser.checkOption("--ctf2", "Apply CTF*CTF to reference projections"); if (parser.checkOption("--NN", "Use nearest-neighbour instead of linear interpolation")) interpolator = NEAREST_NEIGHBOUR; else interpolator = TRILINEAR; // Hidden r_min_nn = textToInteger(getParameter(argc, argv, "--r_min_nn", "10")); if (do_simulate) { do_ctf = true; } // Check for errors in the command-line option if (parser.checkForErrors()) REPORT_ERROR("Errors encountered on the command line (see above), exiting..."); } void project() { MetaDataTable DFo, MDang, MDang_sim; Matrix2D A3D; FileName fn_expimg; MultidimArray F3D, F2D, Fexpimg; MultidimArray Fctf, dummy; Image vol, img, expimg; FourierTransformer transformer, transformer_expimg; std::cout << " Reading map: " << fn_map << std::endl; vol.read(fn_map); std::cout << " Done reading map!" << std::endl; if (fn_mask != "") { Image msk; msk.read(fn_mask); if (!msk().sameShape(vol())) REPORT_ERROR("project ERROR: mask and map have different sizes!"); FOR_ALL_DIRECT_ELEMENTS_IN_MULTIDIMARRAY(vol()) DIRECT_MULTIDIM_ELEM(vol(), n) *= DIRECT_MULTIDIM_ELEM(msk(), n); } if (nr_uniform > 0) { std::cout << " Generating " << nr_uniform << " projections taken randomly from a uniform angular distribution ..." << std::endl; MDang.clear(); randomize_random_generator(); for (long int i = 0; i < nr_uniform; i++) { RFLOAT rot, tilt, psi, xoff, yoff; rot = rnd_unif() * 360.; bool ok_tilt = false; while (!ok_tilt) { tilt = rnd_unif() * 180.; if (rnd_unif() < fabs(SIND(tilt))) ok_tilt = true; } psi = rnd_unif() * 360.; xoff = rnd_gaus(0., sigma_offset); yoff = rnd_gaus(0., sigma_offset); MDang.addObject(); MDang.setValue(EMDL_ORIENT_ROT, rot); MDang.setValue(EMDL_ORIENT_TILT, tilt); MDang.setValue(EMDL_ORIENT_PSI, psi); MDang.setValue(EMDL_ORIENT_ORIGIN_X, xoff); MDang.setValue(EMDL_ORIENT_ORIGIN_Y, yoff); MDang.setValue(EMDL_IMAGE_OPTICS_GROUP, 1); } std::cout << " Setting default values for optics table, though CTFs are not used in the projections ... " << std::endl; MetaDataTable MDopt; MDopt.addObject(); MDopt.setValue(EMDL_IMAGE_OPTICS_GROUP, 1); std::string name = "optics1"; MDopt.setValue(EMDL_IMAGE_OPTICS_GROUP_NAME, name); MDopt.setValue(EMDL_CTF_VOLTAGE, 300.); MDopt.setValue(EMDL_CTF_CS, 2.7); vol.MDMainHeader.getValue(EMDL_IMAGE_SAMPLINGRATE_X, angpix); MDopt.setValue(EMDL_IMAGE_PIXEL_SIZE, angpix); MDopt.setValue(EMDL_IMAGE_SIZE, XSIZE(vol())); int mydim = (do_3d_rot) ? 3 : 2; MDopt.setValue(EMDL_IMAGE_DIMENSIONALITY, mydim); obsModel = ObservationModel(MDopt); } else if (!do_only_one) { std::cout << " Reading STAR file with all angles " << fn_ang << std::endl; ObservationModel::loadSafely(fn_ang, obsModel, MDang); std::cout << " Done reading STAR file!" << std::endl; if (do_simulate && fn_ang_simulate != "") { std::cout << " Reading STAR file with angles for simulated images " << fn_ang << std::endl; MDang_sim.read(fn_ang_simulate); std::cout << " Done reading STAR file with angles for simulated images!" << std::endl; if (MDang_sim.numberOfObjects() < MDang.numberOfObjects()) { REPORT_ERROR("ERROR: STAR file with angles for simulated images has fewer entries than the input STAR file with all angles."); } } } if (angpix < 0.) { if (!do_only_one) { // Get angpix from the first optics group in the obsModel angpix = obsModel.getPixelSize(0); std::cout << " + Using pixel size from the first optics group in the --ang STAR file: " << angpix << std::endl; } else { angpix = vol.samplingRateX(); std::cerr << "WARNING: The pixel size (--angpix) was not specified." << std::endl; std::cerr << " The value in the input image header (= " << angpix << ") is used instead." << std::endl; } } // Now that we have the size of the volume, check r_max if (maxres < 0.) r_max = XSIZE(vol()); else r_max = CEIL(XSIZE(vol()) * angpix / maxres); // Set right size of F2D and initialize to zero if (do_3d_rot) img().resize(ZSIZE(vol()), YSIZE(vol()), XSIZE(vol())); else img().resize(YSIZE(vol()), XSIZE(vol())); transformer.setReal(img()); transformer.getFourierAlias(F2D); // Set up the projector int data_dim = (do_3d_rot) ? 3 : 2; Projector projector((int)XSIZE(vol()), interpolator, padding_factor, r_min_nn, data_dim); projector.computeFourierTransformMap(vol(), dummy, 2* r_max); if (do_only_one) { Euler_rotation3DMatrix(rot, tilt, psi, A3D); F2D.initZeros(); projector.get2DFourierTransform(F2D, A3D); if (ABS(xoff) > 0.001 || ABS(yoff) > 0.001 || (do_3d_rot && ABS(zoff) > 0.001) ) { Matrix1D shift(2); XX(shift) = -xoff; YY(shift) = -yoff; if (do_3d_rot) { shift.resize(3); ZZ(shift) = -zoff; shiftImageInFourierTransform(F2D, F2D, XSIZE(vol()), XX(shift), YY(shift), ZZ(shift)); } else shiftImageInFourierTransform(F2D, F2D, XSIZE(vol()), XX(shift), YY(shift)); } // Feb 01,2017 - Shaoda, add white noise to 2D / 3D single images if (do_add_noise) { if ( (!(stddev_white_noise > 0.)) || (fn_model != "") ) REPORT_ERROR("ERROR: Only add --white_noise to a single image!"); // fftw normalization and factor sqrt(2) for two-dimensionality of complex plane // TODO: sqrt(2) ??? Why ??? stddev_white_noise /= (data_dim == 3) ? (XSIZE(vol()) * XSIZE(vol())) : (XSIZE(vol()) * sqrt(2)); // Add white noise FOR_ALL_ELEMENTS_IN_FFTW_TRANSFORM(F2D) { DIRECT_A3D_ELEM(F2D, k, i, j).real += rnd_gaus(0., stddev_white_noise); DIRECT_A3D_ELEM(F2D, k, i, j).imag += rnd_gaus(0., stddev_white_noise); } } transformer.inverseFourierTransform(F2D, img()); // Shift the image back to the center... CenterFFT(img(), false); img.setSamplingRateInHeader(angpix); img.write(fn_out); std::cout<<" Done writing "< 0.) stddev_white_noise /= XSIZE(vol()) * sqrt(2); // fftw normalization and factor sqrt(2) for two-dimensionality of complex plane else REPORT_ERROR("ERROR: When adding noise provide either --model_noise or --white_noise"); } long int imgno = 0; long int max_imgno = MDang.numberOfObjects() - 1; FOR_ALL_OBJECTS_IN_METADATA_TABLE(MDang) { MDang.getValue(EMDL_ORIENT_ROT, rot); MDang.getValue(EMDL_ORIENT_TILT, tilt); MDang.getValue(EMDL_ORIENT_PSI, psi); MDang.getValue(EMDL_ORIENT_ORIGIN_X_ANGSTROM, xoff); MDang.getValue(EMDL_ORIENT_ORIGIN_Y_ANGSTROM, yoff); if (do_3d_rot) MDang.getValue(EMDL_ORIENT_ORIGIN_Z_ANGSTROM, zoff); xoff /= angpix; yoff /= angpix; zoff /= angpix; Euler_rotation3DMatrix(rot, tilt, psi, A3D); F2D.initZeros(); projector.get2DFourierTransform(F2D, A3D); if (ABS(xoff) > 0.001 || ABS(yoff) > 0.001 || (do_3d_rot && ABS(zoff) > 0.001) ) { Matrix1D shift(2); XX(shift) = -xoff; YY(shift) = -yoff; if (do_3d_rot) { shift.resize(3); ZZ(shift) = -zoff; shiftImageInFourierTransform(F2D, F2D, XSIZE(vol()), XX(shift), YY(shift), ZZ(shift) ); } else shiftImageInFourierTransform(F2D, F2D, XSIZE(vol()), XX(shift), YY(shift) ); } // Apply CTF if necessary CTF ctf; if (do_ctf || do_ctf2) { if (do_3d_rot) { Image Ictf; FileName fn_ctf; MDang.getValue(EMDL_CTF_IMAGE, fn_ctf); Ictf.read(fn_ctf); // Set the CTF-image in Fctf Fctf.resize(F2D); // If there is a redundant half, get rid of it if (XSIZE(Ictf()) == YSIZE(Ictf())) { Ictf().setXmippOrigin(); // Set the CTF-image in Fctf FOR_ALL_ELEMENTS_IN_FFTW_TRANSFORM(Fctf) { // Use negative kp,ip and jp indices, because the origin in the ctf_img lies half a pixel to the right of the actual center.... DIRECT_A3D_ELEM(Fctf, k, i, j) = A3D_ELEM(Ictf(), -kp, -ip, -jp); } } // otherwise, just window the CTF to the current resolution else if (XSIZE(Ictf()) == YSIZE(Ictf()) / 2 + 1) { windowFourierTransform(Ictf(), Fctf, YSIZE(Fctf)); } // if dimensions are neither cubical nor FFTW, stop else { REPORT_ERROR("3D CTF volume must be either cubical or adhere to FFTW format!"); } } else { ctf.readByGroup(MDang, &obsModel); // This MDimg only contains one particle! Fctf.resize(F2D); ctf.getFftwImage(Fctf, XSIZE(vol()), XSIZE(vol()), angpix, ctf_phase_flipped, false, do_ctf_intact_1st_peak, true); } FOR_ALL_DIRECT_ELEMENTS_IN_MULTIDIMARRAY(F2D) { DIRECT_MULTIDIM_ELEM(F2D, n) *= DIRECT_MULTIDIM_ELEM(Fctf, n); if (do_ctf2) DIRECT_MULTIDIM_ELEM(F2D, n) *= DIRECT_MULTIDIM_ELEM(Fctf, n); } } // Apply Gaussian noise if (do_add_noise) { if (fn_model !="") { //// 23MAY2014: for preparation of 1.3 release: removed reading a exp_model, replaced by just reading MDang // This does however mean that I no longer know mic_id of this image: replace by 0.... FileName fn_group; if (MDang.containsLabel(EMDL_MLMODEL_GROUP_NAME)) { MDang.getValue(EMDL_MLMODEL_GROUP_NAME, fn_group); } else { if (MDang.containsLabel(EMDL_MICROGRAPH_NAME)) { FileName fn_orig, fn_pre, fn_jobnr; MDang.getValue(EMDL_MICROGRAPH_NAME, fn_orig); if (!decomposePipelineFileName(fn_orig, fn_pre, fn_jobnr, fn_group)) { fn_group = fn_orig; // Not a pipeline filename; use as is } } else { REPORT_ERROR("ERROR: cannot find rlnGroupName or rlnMicrographName in the input --ang file..."); } } int my_mic_id = -1; for (int mic_id = 0; mic_id < model.group_names.size(); mic_id++) { if (fn_group == model.group_names[mic_id]) { my_mic_id = mic_id; break; } } if (my_mic_id < 0) REPORT_ERROR("ERROR: cannot find " + fn_group + " in the input model file..."); RFLOAT normcorr = 1.; if (MDang.containsLabel(EMDL_IMAGE_NORM_CORRECTION)) { MDang.getValue(EMDL_IMAGE_NORM_CORRECTION, normcorr); } // Add coloured noise FOR_ALL_ELEMENTS_IN_FFTW_TRANSFORM(F2D) { int ires = ROUND( sqrt( (RFLOAT)(kp*kp + ip*ip + jp*jp) ) ); ires = XMIPP_MIN(ires, model.ori_size/2); // at freqs higher than Nyquist: use last sigma2 value RFLOAT sigma = sqrt(DIRECT_A1D_ELEM(model.sigma2_noise[my_mic_id], ires)); DIRECT_A3D_ELEM(F2D, k, i, j).real += rnd_gaus(0., sigma); DIRECT_A3D_ELEM(F2D, k, i, j).imag += rnd_gaus(0., sigma); } } else { // Add white noise FOR_ALL_ELEMENTS_IN_FFTW_TRANSFORM(F2D) { DIRECT_A3D_ELEM(F2D, k, i, j).real += rnd_gaus(0., stddev_white_noise); DIRECT_A3D_ELEM(F2D, k, i, j).imag += rnd_gaus(0., stddev_white_noise); } } } img().initZeros(); transformer.inverseFourierTransform(F2D, img()); // Shift the image back to the center... CenterFFT(img(), false); // Subtract the projection from the corresponding experimental image if (do_subtract_exp || do_simulate) { MDang.getValue(EMDL_IMAGE_NAME, fn_expimg); MDang.setValue(EMDL_IMAGE_ORI_NAME, fn_expimg); // Store fn_expimg in rlnOriginalParticleName expimg.read(fn_expimg); img() = expimg() - img(); } // If we're simulating realistic images, then now add CTF-affected projection again if (do_simulate) { // Take random orientation from the input STAR file is fn_ang_simulate is empty. Otherwise, use fn_ang_simulate if (fn_ang_simulate == "") { long int random_imgno = -1; while (random_imgno < 0 || random_imgno > max_imgno) { random_imgno = rnd_unif()*max_imgno; } MDang.getValue(EMDL_ORIENT_ROT, rot, random_imgno); MDang.getValue(EMDL_ORIENT_TILT, tilt, random_imgno); MDang.getValue(EMDL_ORIENT_PSI, psi, random_imgno); MDang.getValue(EMDL_ORIENT_ORIGIN_X_ANGSTROM, xoff, random_imgno); MDang.getValue(EMDL_ORIENT_ORIGIN_Y_ANGSTROM, yoff, random_imgno); if (do_3d_rot) MDang.getValue(EMDL_ORIENT_ORIGIN_Z_ANGSTROM, zoff, random_imgno); xoff /= angpix; yoff /= angpix; zoff /= angpix; } else { MDang_sim.getValue(EMDL_ORIENT_ROT, rot, imgno); MDang_sim.getValue(EMDL_ORIENT_TILT, tilt, imgno); MDang_sim.getValue(EMDL_ORIENT_PSI, psi, imgno); MDang_sim.getValue(EMDL_ORIENT_ORIGIN_X_ANGSTROM, xoff, imgno); MDang_sim.getValue(EMDL_ORIENT_ORIGIN_Y_ANGSTROM, yoff, imgno); if (do_3d_rot) MDang_sim.getValue(EMDL_ORIENT_ORIGIN_Z_ANGSTROM, zoff, imgno); xoff /= angpix; yoff /= angpix; zoff /= angpix; } Euler_rotation3DMatrix(rot, tilt, psi, A3D); F2D.initZeros(); projector.get2DFourierTransform(F2D, A3D); if (ABS(xoff) > 0.001 || ABS(yoff) > 0.001 || (do_3d_rot && ABS(zoff) > 0.001) ) { Matrix1D shift(2); XX(shift) = -xoff; YY(shift) = -yoff; if (do_3d_rot) { shift.resize(3); ZZ(shift) = -zoff; shiftImageInFourierTransform(F2D, F2D, XSIZE(vol()), XX(shift), YY(shift), ZZ(shift) ); } else shiftImageInFourierTransform(F2D, F2D, XSIZE(vol()), XX(shift), YY(shift) ); } // Apply CTF CTF ctf; if (do_ctf || do_ctf2) { if (do_3d_rot) { Image Ictf; FileName fn_ctf; MDang.getValue(EMDL_CTF_IMAGE, fn_ctf); Ictf.read(fn_ctf); Ictf().setXmippOrigin(); // If there is a redundant half, get rid of it if (XSIZE(Ictf()) == YSIZE(Ictf())) { Ictf().setXmippOrigin(); FOR_ALL_ELEMENTS_IN_FFTW_TRANSFORM(Fctf) { // Use negative kp,ip and jp indices, because the origin in the ctf_img lies half a pixel to the right of the actual center.... DIRECT_A3D_ELEM(Fctf, k, i, j) = A3D_ELEM(Ictf(), -kp, -ip, -jp); } } // otherwise, just window the CTF to the current resolution else if (XSIZE(Ictf()) == YSIZE(Ictf()) / 2 + 1) { windowFourierTransform(Ictf(), Fctf, YSIZE(Fctf)); } // if dimensions are neither cubical nor FFTW, stop else { REPORT_ERROR("3D CTF volume must be either cubical or adhere to FFTW format!"); } } else { ctf.read(MDang, MDang, imgno); Fctf.resize(F2D); ctf.getFftwImage(Fctf, XSIZE(vol()), XSIZE(vol()), angpix, ctf_phase_flipped, false, do_ctf_intact_1st_peak, true); } FOR_ALL_DIRECT_ELEMENTS_IN_MULTIDIMARRAY(F2D) { DIRECT_MULTIDIM_ELEM(F2D, n) *= DIRECT_MULTIDIM_ELEM(Fctf, n); if (do_ctf2) DIRECT_MULTIDIM_ELEM(F2D, n) *= DIRECT_MULTIDIM_ELEM(Fctf, n); } } expimg().initZeros(); transformer.inverseFourierTransform(F2D, expimg()); // Shift the image back to the center... CenterFFT(expimg(), false); // Modify the strength of the signal if (fabs(simulate_SNR - 1.) > 0.000001) { expimg() *= simulate_SNR; } img() += expimg(); } img.setSamplingRateInHeader(angpix); if (do_3d_rot) { fn_img.compose(fn_out, imgno+1,"mrc"); img.write(fn_img); } else { // Write this particle to the stack on disc // First particle: write stack in overwrite mode, from then on just append to it fn_img.compose(imgno+1,fn_out+".mrcs"); if (imgno == 0) img.write(fn_img, -1, false, WRITE_OVERWRITE); else img.write(fn_img, -1, false, WRITE_APPEND); } // Set the image name to the output STAR file DFo.addObject(); DFo.setObject(MDang.getObject()); DFo.setValue(EMDL_IMAGE_NAME,fn_img); if (do_simulate) { DFo.setValue(EMDL_ORIENT_ROT, rot); DFo.setValue(EMDL_ORIENT_TILT, tilt); DFo.setValue(EMDL_ORIENT_PSI, psi); DFo.setValue(EMDL_ORIENT_ORIGIN_X_ANGSTROM, xoff * angpix); DFo.setValue(EMDL_ORIENT_ORIGIN_Y_ANGSTROM, yoff * angpix); if (do_3d_rot) DFo.setValue(EMDL_ORIENT_ORIGIN_Z_ANGSTROM, zoff * angpix); } if (imgno%60==0) progress_bar(imgno); imgno++; } progress_bar(MDang.numberOfObjects()); // Write out STAR file with all information fn_img = fn_out + ".star"; obsModel.save(DFo, fn_img); std::cout<<" Done writing "< int main(int argc, char *argv[]) { Reconstructor prm; try { prm.read(argc, argv); prm.run(); } catch (RelionError XE) { std::cerr << XE; return RELION_EXIT_FAILURE; } return RELION_EXIT_SUCCESS; } relion-3.1.3/src/apps/reconstruct_mpi.cpp000066400000000000000000000024051411340063500204620ustar00rootroot00000000000000/*************************************************************************** * * Author: "Sjors H.W. Scheres" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #include #include int main(int argc, char *argv[]) { ReconstructorMpi prm; try { prm.read(argc, argv); prm.run(); } catch (RelionError XE) { if (prm.verb > 0) std::cerr << XE; MPI_Abort(MPI_COMM_WORLD, RELION_EXIT_FAILURE); } MPI_Barrier(MPI_COMM_WORLD); return RELION_EXIT_SUCCESS; } relion-3.1.3/src/apps/refine.cpp000066400000000000000000000027311411340063500165140ustar00rootroot00000000000000/*************************************************************************** * * Author: "Sjors H.W. Scheres" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #include /************************************************************************** Main **************************************************************************/ int main(int argc, char **argv) { MlOptimiser optimiser; try { // Read in parameters from the command line optimiser.read(argc, argv); // Set up things optimiser.initialise(); // Do the real work optimiser.iterate(); } catch (RelionError XE) { //optimiser.usage(); std::cerr << XE; return RELION_EXIT_FAILURE; } return RELION_EXIT_SUCCESS; } relion-3.1.3/src/apps/refine_mpi.cpp000066400000000000000000000025261411340063500173630ustar00rootroot00000000000000/*************************************************************************** * * Author: "Sjors H.W. Scheres" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #include int main(int argc, char **argv) { MlOptimiserMpi optimiser; try { // Read in parameters from the command line optimiser.read(argc, argv); // Set things up optimiser.initialise(); // Iterate optimiser.iterate(); } catch (RelionError XE) { std::cerr << XE; MPI_Abort(MPI_COMM_WORLD, RELION_EXIT_FAILURE); return RELION_EXIT_FAILURE; } return RELION_EXIT_SUCCESS; } relion-3.1.3/src/apps/reposition.cpp000066400000000000000000000075451411340063500174470ustar00rootroot00000000000000#include #include #include #include #include #include #include #include #include #include #include using namespace gravis; int main(int argc, char *argv[]) { ReferenceMap reference; IOParser parser; parser.setCommandLine(argc, argv); int gen_section = parser.addSection("General options"); std::string starFn = parser.getOption("--i", "Input STAR file containing the particles"); const int max_shift = textToInteger(parser.getOption("--max_shift", "Maximal allowed shift", "10")); const double pad = textToDouble(parser.getOption("--cc_pad", "Cross-correlation padding", "1")); reference.read(parser, argc, argv); const int nr_omp_threads = textToInteger(parser.getOption("--j", "Number of (OMP) threads", "1")); std::string outFn = parser.getOption("--o", "Output path", "repositioned/"); if (parser.checkForErrors()) { REPORT_ERROR("Errors encountered on the command line (see above), exiting..."); } reference.load(1, true); reference.projectors[0].interpolator = TRILINEAR; // Get dimensions const int s = reference.s; const int sh = s/2 + 1; ObservationModel obsModel; MetaDataTable mdt0; ObservationModel::loadSafely(starFn, obsModel, mdt0); std::vector allMdts = StackHelper::splitByMicrographName(mdt0); const int mgc = allMdts.size(); Image freqWgh(sh,s); freqWgh.data.initConstant(1.0); freqWgh = FilterHelper::raisedCosEnvFreq2D( freqWgh, reference.k_out-1, reference.k_out+1); std::vector> ccsFs(nr_omp_threads); std::vector> ccsRs(nr_omp_threads); const int s2 = (int)(pad * s); const int sh2 = s2/2 + 1; for (int t = 0; t < nr_omp_threads; t++) { ccsFs[t] = Image(sh2,s2); ccsRs[t] = Image(s2,s2); } NewFFT::DoublePlan plan(s2,s2,1); MetaDataTable outMdt; for (int m = 0; m < mgc; m++) { std::cout << m << " / " << mgc << "\n"; const int pc = allMdts[m].numberOfObjects(); std::vector> pred = reference.predictAll( allMdts[m], obsModel, ReferenceMap::Own, nr_omp_threads, true, true, false); std::vector> obs = StackHelper::loadStackFS( allMdts[m], "", nr_omp_threads, true, &obsModel); #pragma omp parallel for num_threads(nr_omp_threads) for (int p = 0; p < pc; p++) { int t = omp_get_thread_num(); int opticsGroup = obsModel.getOpticsGroup(allMdts[m], p); double angpix = obsModel.getPixelSize(opticsGroup); for (int y = 0; y < s; y++) for (int x = 0; x < sh; x++) { Complex z = obs[p](y,x) * freqWgh(y,x) * pred[p](y,x).conj(); const int yy = y < sh? y : s2 - (s - y); ccsFs[t](yy,x) = dComplex(z.real, z.imag); } NewFFT::inverseFourierTransform(ccsFs[t](), ccsRs[t](), plan); d2Vector shift = Interpolation::quadraticMaxWrapXY( ccsRs[t], 1e-12, pad * max_shift, pad * max_shift); if (shift.x >= sh2) shift.x -= s2; if (shift.y >= sh2) shift.y -= s2; double xoff, yoff; allMdts[m].getValue(EMDL_ORIENT_ORIGIN_X_ANGSTROM, xoff, p); allMdts[m].getValue(EMDL_ORIENT_ORIGIN_Y_ANGSTROM, yoff, p); xoff /= angpix; yoff /= angpix; xoff -= shift.x; yoff -= shift.y; xoff *= angpix; yoff *= angpix; allMdts[m].setValue(EMDL_ORIENT_ORIGIN_X_ANGSTROM, xoff, p); allMdts[m].setValue(EMDL_ORIENT_ORIGIN_Y_ANGSTROM, yoff, p); } outMdt.append(allMdts[m]); } if (outFn.length() > 0 && outFn[outFn.length()-1] != '/') { outFn = outFn + "/"; } std::string command = " mkdir -p " + outFn; int res = system(command.c_str()); outMdt.write(outFn+"repositioned.star"); return RELION_EXIT_SUCCESS; } relion-3.1.3/src/apps/run_ctffind.cpp000066400000000000000000000023011411340063500175360ustar00rootroot00000000000000/*************************************************************************** * * Author: "Sjors H.W. Scheres" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #include int main(int argc, char *argv[]) { CtffindRunner prm; try { prm.read(argc, argv); prm.initialise(); prm.run(); } catch (RelionError XE) { //prm.usage(); std::cerr << XE; return RELION_EXIT_FAILURE; } return RELION_EXIT_SUCCESS; } relion-3.1.3/src/apps/run_ctffind_mpi.cpp000066400000000000000000000024271411340063500204140ustar00rootroot00000000000000/*************************************************************************** * * Author: "Sjors H.W. Scheres" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #include int main(int argc, char *argv[]) { CtffindRunnerMpi prm; try { prm.read(argc, argv); prm.initialise(); prm.run(); } catch (RelionError XE) { if (prm.verb > 0) //prm.usage(); std::cerr << XE; MPI_Abort(MPI_COMM_WORLD, RELION_EXIT_FAILURE); } MPI_Barrier(MPI_COMM_WORLD); return RELION_EXIT_SUCCESS; } relion-3.1.3/src/apps/run_motioncorr.cpp000066400000000000000000000023071411340063500203220ustar00rootroot00000000000000/*************************************************************************** * * Author: "Sjors H.W. Scheres" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #include int main(int argc, char *argv[]) { MotioncorrRunner prm; try { prm.read(argc, argv); prm.initialise(); prm.run(); } catch (RelionError XE) { //prm.usage(); std::cerr << XE; return RELION_EXIT_FAILURE; } return RELION_EXIT_SUCCESS; } relion-3.1.3/src/apps/run_motioncorr_mpi.cpp000066400000000000000000000024371411340063500211730ustar00rootroot00000000000000/*************************************************************************** * * Author: "Sjors H.W. Scheres" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #include int main(int argc, char *argv[]) { MotioncorrRunnerMpi prm; try { prm.read(argc, argv); prm.initialise(); prm.run(); } catch (RelionError XE) { //if (prm.verb > 0) //prm.usage(); std::cerr << XE; MPI_Abort(MPI_COMM_WORLD, RELION_EXIT_FAILURE); } MPI_Barrier(MPI_COMM_WORLD); return RELION_EXIT_SUCCESS; } relion-3.1.3/src/apps/scheduler.cpp000066400000000000000000000247471411340063500172350ustar00rootroot00000000000000/*************************************************************************** * * Author: "Sjors H.W. Scheres" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #include "src/scheduler.h" #include #include "src/strings.h" class scheduler_parameters { public: FileName mydir, newname; float myconstant; bool do_reset, do_run, do_abort, has_ori_value; int verb; std::string add, set_var, set_mode, start_node, current_node, email, type, name, value, ori_value, mode, input, input2, output, output2, boolvar; std::string run_pipeline; // The actual pipeline Schedule schedule; // I/O Parser IOParser parser; void usage() { parser.writeUsage(std::cerr); std::cerr << std::endl; std::cerr << " Different ways of using this program: " << std::endl; std::cerr << std::endl << " ++ Add a variable (of type float, bool or file): " << std::endl; std::cerr << " --schedule test --add variable --name iter --value 0" << std::endl; std::cerr << " --schedule test --add variable --name is_finished --value False" << std::endl; std::cerr << " --schedule test --add variable --name initial_model --value map.mrc" << std::endl; std::cerr << std::endl << " ++ Add an operator node (of type float, bool or file): " << std::endl; std::cerr << " --schedule test --add operator --type " << SCHEDULE_FLOAT_OPERATOR_PLUS << " --i iter --i2 iter_step --o iter" << std::endl; std::cerr << " --schedule test --add operator --type " << SCHEDULE_FLOAT_OPERATOR_PLUS << " --i iter --i2 1 --o iter" << std::endl; std::cerr << " --schedule test --add operator --type " << SCHEDULE_OPERATOR_TOUCH_FILE << " --i initial_model" << std::endl; std::cerr << " --schedule test --add operator --type " << SCHEDULE_BOOLEAN_OPERATOR_GT << " --i iter --i2 10 --o is_finished" << std::endl; std::cerr << " --schedule test --add operator --type " << SCHEDULE_BOOLEAN_OPERATOR_FILE_EXISTS << " --i initial_model --o is_finished" << std::endl; std::cerr << std::endl << " ++ Add a job node: " << std::endl; std::cerr << " --schedule test --add job --i my_import --mode continue/new/overwrite" << std::endl; std::cerr << " --schedule test --add job --i exit" << std::endl; std::cerr << std::endl << " ++ Add an edge: " << std::endl; std::cerr << " --schedule test --add edge --i inputnodename --o outputnodename" << std::endl; std::cerr << std::endl << " ++ Add a fork: " << std::endl; std::cerr << " --schedule test --add fork --i inputnodename --bool boolvar --o outputnodename --o2 outputnodename_if_false" << std::endl; std::cerr << "TODO: add information about setting variables etc too!" << std::endl; std::cerr << std::endl; exit(1); } void read(int argc, char **argv) { parser.setCommandLine(argc, argv); // Fill the window, but don't show it! int gen_section = parser.addSection("General options"); mydir = parser.getOption("--schedule", "Directory name of the schedule"); mydir = "Schedules/" + mydir; newname = parser.getOption("--copy", "Make a copy of the schedule into this directory", ""); int add_section = parser.addSection("Add elements to the schedule"); add = parser.getOption("--add", "Specify category of element to add to the schedule (variable, operator, job, edge or fork)", ""); type = parser.getOption("--type", "Specify type of that element to add to the schedule", ""); input = parser.getOption("--i", "Specify input to the element ", ""); input2 = parser.getOption("--i2", "Specify 2nd input to the element ", ""); boolvar = parser.getOption("--bool", "Name of boolean variable (for forks)", ""); output = parser.getOption("--o", "Specify output of the element ", ""); output2 = parser.getOption("--o2", "Specify 2nd output of the element ", ""); name = parser.getOption("--name", "Name of the variable or job to be added",""); value = parser.getOption("--value", "Value of the variable to be added",""); ori_value = parser.getOption("--original_value", "Original value of the variable to be added",""); mode = parser.getOption("--mode", "Mode (for jobs): new, overwrite or continue",""); int set_section = parser.addSection("Set values of variables in the schedule"); do_reset = parser.checkOption("--reset", "Reset all variables to their original values"); do_abort = parser.checkOption("--abort", "Abort a schedule that is running"); set_var = parser.getOption("--set_var", "Name of a variable to set (using also the --value argument)", ""); set_mode = parser.getOption("--set_job_mode", "Name of a job whose mode to set (using also the --value argument)", ""); current_node = parser.getOption("--set_current_node", "Name of a node to which to set current_node", ""); int run_section = parser.addSection("Run the scheduler within a pipeline"); do_run = parser.checkOption("--run", "Run the scheduler"); verb = textToInteger(parser.getOption("--verb", "Running verbosity: 0, 1, 2 or 3)", "1")); run_pipeline = parser.getOption("--run_pipeline", "Name of the pipeline in which to run this schedule", "default"); // Someone could give an empty-string ori_value.... has_ori_value = checkParameter(argc, argv, "--original_value"); // Check for errors in the command-line option if (argc==1) usage(); else if (parser.checkForErrors()) REPORT_ERROR("Errors encountered on the command line (see above), exiting..."); } void run() { // Make sure mydir ends with a slash, and that it exists if (mydir[mydir.length()-1] != '/') mydir += "/"; schedule.setName(mydir); schedule.do_read_only = false; if (do_run) { // Remove the abort signal if it exists FileName myabort = schedule.name + RELION_JOB_ABORT_NOW; if (exists(myabort)) std::remove(myabort.c_str()); PipeLine pipeline; pipeline.setName(run_pipeline); if (exists(pipeline.name + "_pipeline.star")) { std::string lock_message = "mainGUI constructor"; pipeline.read(DO_LOCK, lock_message); // With the locking system, each read needs to be followed soon with a write pipeline.write(DO_LOCK); } else { pipeline.write(); } schedule.read(DO_LOCK); // lock for the entire duration of the run!! schedule.verb = verb; schedule.run(pipeline); schedule.write(DO_LOCK); return; // exit now } if (!exists(mydir)) { std::string command = "mkdir -p " + mydir; int res = system(command.c_str()); } if (do_abort) { schedule.read(); schedule.abort(); return; } // read in schedule if it exists if (exists(mydir + "schedule.star")) { schedule.read(DO_LOCK); schedule.write(DONT_LOCK, mydir + "schedule.star.bck"); // just save another copy of the starfile ... } if (newname != "") { schedule.copy(newname); return; } else if (add != "") { if (add == "variable") { schedule.setVariable(name, value); if (has_ori_value) schedule.setOriginalVariable(name, ori_value); } else if (add == "operator") { std::string error; SchedulerOperator myop = schedule.initialiseOperator(type, input, input2, output, error); if (error != "") REPORT_ERROR(error); else schedule.addOperator(myop); } else if (add == "job") { RelionJob myjob; bool dummy; myjob.read(input, dummy, true); schedule.addJob(myjob, input, mode); } else if (add == "edge") { schedule.addEdge(input, output); } else if (add == "fork") { schedule.addFork(input, boolvar, output, output2); } } else if (do_reset) { schedule.reset(); } else if (set_var != "") { if (isBooleanVariable(set_var)) { if (!(value == "true" || value == "True" || value == "false" || value == "False")) REPORT_ERROR("ERROR: invalid value for Boolean variable for --value: " + value); bool myval = (value == "true" || value == "True"); schedule.setBooleanVariableValue(set_var, myval); if (has_ori_value) { if (!(ori_value == "true" || ori_value == "True" || ori_value == "false" || ori_value == "False")) REPORT_ERROR("ERROR: invalid value for Boolean variable for --original_value: " + ori_value); myval = (ori_value == "true" || ori_value == "True"); schedule.setBooleanOriginalVariableValue(set_var, myval); } } else if (isFloatVariable(set_var)) { float floatval; if (!sscanf(value.c_str(), "%f", &floatval)) // is this a number? REPORT_ERROR("ERROR: invalid value for Float variable for --value: " + value); schedule.setFloatVariableValue(set_var, floatval); if (has_ori_value) { if (!sscanf(ori_value.c_str(), "%f", &floatval)) // is this a number? REPORT_ERROR("ERROR: invalid value for Float variable for --original_value: " + ori_value); schedule.setFloatOriginalVariableValue(set_var, floatval); } } else if (isStringVariable(set_var)) { schedule.setStringVariableValue(set_var, value); if (has_ori_value) schedule.setStringOriginalVariableValue(set_var, ori_value); } else REPORT_ERROR("ERROR: unrecognised variable whose value to set: " + set_var); } else if (set_mode != "") { if (schedule.isJob(set_mode)) { if (!(value == SCHEDULE_NODE_JOB_MODE_NEW || value == SCHEDULE_NODE_JOB_MODE_CONTINUE || value == SCHEDULE_NODE_JOB_MODE_OVERWRITE)) REPORT_ERROR("ERROR: unvalid option for job mode: " + value); schedule.jobs[set_mode].mode = value; } else REPORT_ERROR("ERROR: invalid jobname to set mode: " + set_mode); } else if (current_node != "") { schedule.current_node = current_node; } else REPORT_ERROR(" ERROR: nothing to do!"); schedule.write(exists(mydir + "schedule.star")); // only LOCK if the file already existed } }; int main(int argc, char *argv[]) { scheduler_parameters prm; try { prm.read(argc, argv); prm.run(); } catch (RelionError XE) { std::cerr << XE; return RELION_EXIT_FAILURE; } return RELION_EXIT_SUCCESS; } relion-3.1.3/src/apps/split_stack.cpp000066400000000000000000000015061411340063500175630ustar00rootroot00000000000000 #include #include #include #include #include #include #include #include #include #include #include #include using namespace gravis; int main(int argc, char *argv[]) { std::string inPath = "./"; std::string inName = "TS_03"; Image img0; img0.read(inPath+inName+".st:mrcs", true); Image img1(img0.data.xdim, img0.data.ydim, 1, 1); for (int i = 0; i < img0.data.ndim; i++) { SliceHelper::extractStackSlice(img0, img1, i); std::stringstream sts; sts << i; std::string fn; sts >> fn; img1.write(inPath+"frames/"+inName+"_f"+fn+".mrc"); } return RELION_EXIT_SUCCESS; } relion-3.1.3/src/apps/stack_create.cpp000066400000000000000000000210151411340063500176700ustar00rootroot00000000000000/*************************************************************************** * * Author: "Sjors H.W. Scheres" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #include #include #include #include #include #include #include #include // TODO: set pixel sizes in the outputs class stack_create_parameters { public: FileName fn_star, fn_root, fn_ext; MetaDataTable MD; // I/O Parser IOParser parser; bool do_spider, do_split_per_micrograph, do_apply_trans, do_apply_trans_only, do_ignore_optics, do_one_by_one; ObservationModel obsModel; void usage() { parser.writeUsage(std::cerr); } void read(int argc, char **argv) { parser.setCommandLine(argc, argv); int general_section = parser.addSection("General options"); fn_star = parser.getOption("--i", "Input STAR file with the images (as rlnImageName) to be saved in a stack"); fn_root = parser.getOption("--o", "Output rootname","output"); do_spider = parser.checkOption("--spider_format", "Write out in SPIDER stack format (by default MRC stack format)"); do_split_per_micrograph = parser.checkOption("--split_per_micrograph", "Write out separate stacks for each micrograph (needs rlnMicrographName in STAR file)"); do_apply_trans = parser.checkOption("--apply_transformation", "Apply the inplane-transformations (needs _rlnOriginX/Y and _rlnAnglePsi in STAR file) by real space interpolation"); do_apply_trans_only = parser.checkOption("--apply_rounded_offsets_only", "Apply the rounded translations only (so-recentering without interpolation; needs _rlnOriginX/Y in STAR file)"); do_ignore_optics = parser.checkOption("--ignore_optics", "Ignore optics groups. This allows you to read and write RELION 3.0 STAR files but does NOT allow you to convert 3.1 STAR files back to the 3.0 format."); do_one_by_one = parser.checkOption("--one_by_one", "Write particles one by one. This saves memory but can be slower."); if (do_apply_trans) std::cerr << "WARNING: --apply_transformation uses real space interpolation. It also invalidates CTF parameters (e.g. beam tilt & astigmatism). This can degrade the resolution. USE WITH CARE!!" << std::endl; fn_ext = (do_spider) ? ".spi" : ".mrcs"; // Check for errors in the command-line option if (parser.checkForErrors()) REPORT_ERROR("Errors encountered on the command line, exiting..."); } void run() { if (do_ignore_optics && (do_apply_trans || do_apply_trans_only)) REPORT_ERROR("ERROR: you cannot ignore optics and apply transformations"); if (do_ignore_optics) MD.read(fn_star); else ObservationModel::loadSafely(fn_star, obsModel, MD, "particles"); // Check for rlnImageName label if (!MD.containsLabel(EMDL_IMAGE_NAME)) REPORT_ERROR("ERROR: Input STAR file does not contain the rlnImageName label. Aren't you reading RELION 3.1 STAR files with --ignore_optics?"); if (do_split_per_micrograph && !MD.containsLabel(EMDL_MICROGRAPH_NAME)) REPORT_ERROR("ERROR: Input STAR file does not contain the rlnMicrographName label"); Image in; FileName fn_img, fn_mic; std::vector fn_mics; std::vector mics_ndims; // First get number of images and their size int ndim=0; bool is_first=true; int xdim, ydim, zdim; FOR_ALL_OBJECTS_IN_METADATA_TABLE(MD) { if (is_first) { MD.getValue(EMDL_IMAGE_NAME, fn_img); in.read(fn_img); xdim=XSIZE(in()); ydim=YSIZE(in()); zdim=ZSIZE(in()); is_first=false; } if (do_split_per_micrograph) { MD.getValue(EMDL_MICROGRAPH_NAME, fn_mic); bool have_found = false; for (int m = 0; m < fn_mics.size(); m++) { if (fn_mic == fn_mics[m]) { have_found = true; mics_ndims[m]++; break; } } if (!have_found) { fn_mics.push_back(fn_mic); mics_ndims.push_back(1); } } ndim++; } // If not splitting, just fill fn_mics and mics_ndim with one entry (to re-use loop below) if (!do_split_per_micrograph) { fn_mics.push_back(""); mics_ndims.push_back(ndim); } // Loop over all micrographs for (int m = 0; m < fn_mics.size(); m++) { ndim = mics_ndims[m]; fn_mic = fn_mics[m]; Image out; if (!do_one_by_one) { // Resize the output image std::cout << "Resizing the output stack to "<< ndim<<" images of size: "< A; rotation2DMatrix(psi, A); MAT_ELEM(A, 0, 2) = COSD(psi) * xoff - SIND(psi) * yoff; MAT_ELEM(A, 1, 2) = COSD(psi) * yoff + SIND(psi) * xoff; selfApplyGeometry(in(), A, IS_NOT_INV, DONT_WRAP); MD.setValue(EMDL_ORIENT_ORIGIN_X_ANGSTROM, (ori_xoff - xoff)*angpix); MD.setValue(EMDL_ORIENT_ORIGIN_Y_ANGSTROM, (ori_yoff - yoff)*angpix); MD.setValue(EMDL_ORIENT_PSI, ori_psi - psi); } FileName fn_img; fn_img.compose(n+1, fn_out); MD.setValue(EMDL_IMAGE_NAME, fn_img); if (!do_one_by_one) { out().printShape(); in().printShape(); out().setImage(n, in()); } else { if (n == 0) in.write(fn_img, -1, false, WRITE_OVERWRITE); else in.write(fn_img, -1, true, WRITE_APPEND); } n++; if (n%100==0) progress_bar(n); } } progress_bar(ndim); if (!do_one_by_one) out.write(fn_out); std::cout << "Written out: " << fn_out << std::endl; } if (do_ignore_optics) MD.write(fn_root+".star"); else obsModel.save(MD, fn_root+".star", "particles"); std::cout << "Written out: " << fn_root << ".star" << std::endl; std::cout << "Done!" < #include #include #include #include #include #include class star_handler_parameters { public: FileName fn_in, tablename_in, fn_out, fn_compare, tablename_compare, fn_label1, fn_label2, fn_label3, select_label, select_str_label, discard_label, fn_check, fn_operate, fn_operate2, fn_operate3, fn_set; std::string remove_col_label, add_col_label, add_col_value, add_col_from, hist_col_label, select_include_str, select_exclude_str; RFLOAT eps, select_minval, select_maxval, multiply_by, add_to, center_X, center_Y, center_Z, hist_min, hist_max; bool do_ignore_optics, do_combine, do_split, do_center, do_random_order, show_frac, show_cumulative, do_discard; long int nr_split, size_split, nr_bin, random_seed; RFLOAT discard_sigma, duplicate_threshold, extract_angpix, cl_angpix; ObservationModel obsModel; // I/O Parser IOParser parser; void usage() { parser.writeUsage(std::cerr); } void read(int argc, char **argv) { parser.setCommandLine(argc, argv); int general_section = parser.addSection("General options"); fn_in = parser.getOption("--i", "Input STAR file"); fn_out = parser.getOption("--o", "Output STAR file", "out.star"); do_ignore_optics = parser.checkOption("--ignore_optics", "Provide this option for relion-3.0 functionality, without optics groups"); cl_angpix = textToFloat(parser.getOption("--angpix", "Pixel size in Angstrom, for when ignoring the optics groups in the input star file", "1.")); tablename_in = parser.getOption("--i_tablename", "If ignoring optics, then read table with this name", ""); int compare_section = parser.addSection("Compare options"); fn_compare = parser.getOption("--compare", "STAR file name to compare the input STAR file with", ""); fn_label1 = parser.getOption("--label1", "1st metadata label for the comparison (may be string, int or RFLOAT)", ""); fn_label2 = parser.getOption("--label2", "2nd metadata label for the comparison (RFLOAT only) for 2D/3D-distance)", ""); fn_label3 = parser.getOption("--label3", "3rd metadata label for the comparison (RFLOAT only) for 3D-distance)", ""); eps = textToFloat(parser.getOption("--max_dist", "Maximum distance to consider a match (for int and RFLOAT only)", "0.")); int subset_section = parser.addSection("Select options"); select_label = parser.getOption("--select", "Metadata label (number) to base output selection on (e.g. rlnCtfFigureOfMerit)", ""); select_minval = textToFloat(parser.getOption("--minval", "Minimum acceptable value for this label (inclusive)", "-99999999.")); select_maxval = textToFloat(parser.getOption("--maxval", "Maximum acceptable value for this label (inclusive)", "99999999.")); select_str_label = parser.getOption("--select_by_str", "Metadata label (string) to base output selection on (e.g. rlnMicrographname)", ""); select_include_str = parser.getOption("--select_include", "select rows that contains this string in --select_by_str ", ""); select_exclude_str = parser.getOption("--select_exclude", "exclude rows that contains this string in --select_by_str ", ""); int discard_section = parser.addSection("Discard based on image statistics options"); do_discard = parser.checkOption("--discard_on_stats", "Discard images if their average/stddev deviates too many sigma from the ensemble average"); discard_label = parser.getOption("--discard_label", "MetaDataLabel that points to the images to be used for discarding based on statistics", "rlnImageName"); discard_sigma = textToFloat(parser.getOption("--discard_sigma", "Discard images with average or stddev values that lie this many sigma away from the ensemble average", "4.")); int combine_section = parser.addSection("Combine options"); do_combine = parser.checkOption("--combine", "Combine input STAR files (multiple individual filenames, all within double-quotes after --i)"); fn_check = parser.getOption("--check_duplicates", "MetaDataLabel (for a string only!) to check for duplicates, e.g. rlnImageName", ""); int split_section = parser.addSection("Split options"); do_split = parser.checkOption("--split", "Split the input STAR file into one or more smaller output STAR files"); do_random_order = parser.checkOption("--random_order", "Perform splits on randomised order of the input STAR file"); random_seed = textToInteger(parser.getOption("--random_seed", "Random seed for randomisation.", "-1")); nr_split = textToInteger(parser.getOption("--nr_split", "Split into this many equal-sized STAR files", "-1")); size_split = textToLongLong(parser.getOption("--size_split", "AND/OR split into subsets of this many lines", "-1")); int operate_section = parser.addSection("Operate options"); fn_operate = parser.getOption("--operate", "Operate on this metadata label", ""); fn_operate2 = parser.getOption("--operate2", "Operate also on this metadata label", ""); fn_operate3 = parser.getOption("--operate3", "Operate also on this metadata label", ""); fn_set = parser.getOption("--set_to", "Set all the values for the --operate label(s) to this value", ""); multiply_by = textToFloat(parser.getOption("--multiply_by", "Multiply all the values for the --operate label(s) by this value", "1.")); add_to = textToFloat(parser.getOption("--add_to", "Add this value to all the values for the --operate label(s)", "0.")); int center_section = parser.addSection("Center options"); do_center = parser.checkOption("--center", "Perform centering of particles according to a position in the reference."); center_X = textToFloat(parser.getOption("--center_X", "X-coordinate in the reference to center particles on (in pix)", "0.")); center_Y = textToFloat(parser.getOption("--center_Y", "Y-coordinate in the reference to center particles on (in pix)", "0.")); center_Z = textToFloat(parser.getOption("--center_Z", "Z-coordinate in the reference to center particles on (in pix)", "0.")); int column_section = parser.addSection("Column options"); remove_col_label = parser.getOption("--remove_column", "Remove the column with this metadata label from the input STAR file.", ""); add_col_label = parser.getOption("--add_column", "Add a column with this metadata label from the input STAR file.", ""); add_col_value = parser.getOption("--add_column_value", "Set this value in all rows for the added column", ""); add_col_from = parser.getOption("--copy_column_from", "Copy values in this column to the added column", ""); hist_col_label = parser.getOption("--hist_column", "Calculate histogram of values in the column with this metadata label", ""); show_frac = parser.checkOption("--in_percent", "Show a histogram in percent (need --hist_column)"); show_cumulative = parser.checkOption("--show_cumulative", "Show a histogram of cumulative distribution (need --hist_column)"); nr_bin = textToInteger(parser.getOption("--hist_bins", "Number of bins for the histogram. By default, determined automatically by Freedman–Diaconis rule.", "-1")); hist_min = textToFloat(parser.getOption("--hist_min", "Minimum value for the histogram (needs --hist_bins)", "-inf")); hist_max = textToFloat(parser.getOption("--hist_max", "Maximum value for the histogram (needs --hist_bins)", "inf")); int duplicate_section = parser.addSection("Duplicate removal"); duplicate_threshold = textToFloat(parser.getOption("--remove_duplicates","Remove duplicated particles within this distance [Angstrom]. Negative values disable this.", "-1")); extract_angpix = textToFloat(parser.getOption("--image_angpix", "For down-sampled particles, specify the pixel size [A/pix] of the original images used in the Extract job", "-1")); // Check for errors in the command-line option if (parser.checkForErrors()) REPORT_ERROR("Errors encountered on the command line, exiting..."); } void run() { int c = 0; if (fn_compare != "") c++; if (select_label != "") c++; if (select_str_label != "") c++; if (do_discard) c++; if (do_combine) c++; if (do_split) c++; if (fn_operate != "") c++; if (do_center) c++; if (remove_col_label != "") c++; if (add_col_label != "") c++; if (hist_col_label != "") c++; if (duplicate_threshold > 0) c++; if (c != 1) { MetaDataTable MD; read_check_ignore_optics(MD, fn_in); write_check_ignore_optics(MD, fn_out, MD.getName()); //REPORT_ERROR("ERROR: specify (only and at least) one of the following options: --compare, --select, --select_by_str, --combine, --split, --operate, --center, --remove_column, --add_column, --hist_column or --remove_duplicates."); } if (fn_out == "" && hist_col_label == "") REPORT_ERROR("ERROR: specify the output file name (--o)"); if (fn_compare != "") compare(); if (select_label != "") select(); if (select_str_label != "") select_by_str(); if (do_discard) discard_on_image_stats(); if (do_combine) combine(); if (do_split) split(); if (fn_operate != "") operate(); if (do_center) center(); if (remove_col_label!= "") remove_column(); if (add_col_label!= "") add_column(); if (hist_col_label != "") hist_column(); if (duplicate_threshold > 0) remove_duplicate(); std::cout << " Done!" << std::endl; } void read_check_ignore_optics(MetaDataTable &MD, FileName fn, std::string tablename = "discover") { if (do_ignore_optics) { MD.read(fn, tablename); } else { ObservationModel::loadSafely(fn, obsModel, MD, tablename, 1, false); if (obsModel.opticsMdt.numberOfObjects() == 0) { std::cerr << " + WARNGING: could not read optics groups table, proceeding without it ..." << std::endl; MD.read(fn, tablename); do_ignore_optics = true; } } } void write_check_ignore_optics(MetaDataTable &MD, FileName fn, std::string tablename) { if (do_ignore_optics) MD.write(fn); else obsModel.save(MD, fn, tablename); } void compare() { MetaDataTable MD1, MD2, MDonly1, MDonly2, MDboth; EMDLabel label1, label2, label3; // Read in the observationModel read_check_ignore_optics(MD2, fn_compare); // read_check_ignore_optics() overwrites the member variable obsModel (BAD DESIGN!) // so we have to back up. ObservationModel obsModelCompare = obsModel; read_check_ignore_optics(MD1, fn_in); label1 = EMDL::str2Label(fn_label1); label2 = (fn_label2 == "") ? EMDL_UNDEFINED : EMDL::str2Label(fn_label2); label3 = (fn_label3 == "") ? EMDL_UNDEFINED : EMDL::str2Label(fn_label3); compareMetaDataTable(MD1, MD2, MDboth, MDonly1, MDonly2, label1, eps, label2, label3); std::cout << MDboth.numberOfObjects() << " entries occur in both input STAR files." << std::endl; std::cout << MDonly1.numberOfObjects() << " entries occur only in the 1st input STAR file." << std::endl; std::cout << MDonly2.numberOfObjects() << " entries occur only in the 2nd input STAR file." << std::endl; write_check_ignore_optics(MDboth, fn_out.insertBeforeExtension("_both"), MD1.getName()); std::cout << " Written: " << fn_out.insertBeforeExtension("_both") << std::endl; write_check_ignore_optics(MDonly1, fn_out.insertBeforeExtension("_only1"), MD1.getName()); std::cout << " Written: " << fn_out.insertBeforeExtension("_only1") << std::endl; // Use MD2's optics group for MDonly2. obsModel = obsModelCompare; write_check_ignore_optics(MDonly2, fn_out.insertBeforeExtension("_only2"), MD1.getName()); std::cout << " Written: " << fn_out.insertBeforeExtension("_only2") << std::endl; } void select() { MetaDataTable MDin, MDout; read_check_ignore_optics(MDin, fn_in); MDout = subsetMetaDataTable(MDin, EMDL::str2Label(select_label), select_minval, select_maxval); write_check_ignore_optics(MDout, fn_out, MDin.getName()); std::cout << " Written: " << fn_out << " with " << MDout.numberOfObjects() << " item(s)" << std::endl; } void select_by_str() { int c = 0; if (select_include_str != "") c++; if (select_exclude_str != "") c++; if (c != 1) REPORT_ERROR("You must specify only and at least one of --select_include and --select_exclude"); MetaDataTable MDin, MDout; read_check_ignore_optics(MDin, fn_in); if (select_include_str != "") MDout = subsetMetaDataTable(MDin, EMDL::str2Label(select_str_label), select_include_str, false); else MDout = subsetMetaDataTable(MDin, EMDL::str2Label(select_str_label), select_exclude_str, true); write_check_ignore_optics(MDout, fn_out, MDin.getName()); std::cout << " Written: " << fn_out << std::endl; } void discard_on_image_stats() { MetaDataTable MDin, MDout; read_check_ignore_optics(MDin, fn_in); std::cout << " Calculating average and stddev for all images ... " << std::endl; time_config(); init_progress_bar(MDin.numberOfObjects()); RFLOAT sum_avg = 0.; RFLOAT sum2_avg = 0.; RFLOAT sum_stddev = 0.; RFLOAT sum2_stddev = 0.; RFLOAT sum_n = 0.; std::vector avgs, stddevs; long int ii = 0; FOR_ALL_OBJECTS_IN_METADATA_TABLE(MDin) { Image img; FileName fn_img; RFLOAT avg, stddev, minval, maxval; MDin.getValue(EMDL::str2Label(discard_label), fn_img); img.read(fn_img); img().computeStats(avg, stddev, minval, maxval); sum_avg += avg; sum2_avg += avg * avg; sum_stddev += stddev; sum2_stddev += stddev * stddev; sum_n += 1.; avgs.push_back(avg); stddevs.push_back(stddev); ii++; if (ii%100 == 0) progress_bar(ii); } progress_bar(MDin.numberOfObjects()); sum_avg /= sum_n; sum_stddev /= sum_n; sum2_avg = sqrt(sum2_avg/sum_n - sum_avg*sum_avg); sum2_stddev = sqrt(sum2_stddev/sum_n - sum_stddev*sum_stddev); std::cout << " [ Average , stddev ] of the average Image value = [ " << sum_avg<< " , " << sum2_avg << " ] " << std::endl; std::cout << " [ Average , stddev ] of the stddev Image value = [ " << sum_stddev<< " , " << sum2_stddev << " ] " << std::endl; long int i = 0, nr_discard = 0; FOR_ALL_OBJECTS_IN_METADATA_TABLE(MDin) { if (avgs[i] > sum_avg - discard_sigma * sum2_avg && avgs[i] < sum_avg + discard_sigma * sum2_avg && stddevs[i] > sum_stddev - discard_sigma * sum2_stddev && stddevs[i] < sum_stddev + discard_sigma * sum2_stddev) { MDout.addObject(MDin.getObject(current_object)); } else { nr_discard++; } i++; } std::cout << " Discarded " << nr_discard << " Images because of too large or too small average/stddev values " << std::endl; write_check_ignore_optics(MDout, fn_out, MDin.getName()); std::cout << " Written: " << fn_out << std::endl; } void combine() { std::vector fns_in; std::vector words; tokenize(fn_in, words); for (int iword = 0; iword < words.size(); iword++) { FileName fnt = words[iword]; fnt.globFiles(fns_in, false); } MetaDataTable MDin, MDout; std::vector MDsin, MDoptics; std::vector obsModels; // Read the first table into the global obsModel read_check_ignore_optics(MDin, fns_in[0]); MDsin.push_back(MDin); // Read all the rest of the tables into local obsModels for (int i = 1; i < fns_in.size(); i++) { ObservationModel myobsModel; if (do_ignore_optics) MDin.read(fns_in[i], tablename_in); else ObservationModel::loadSafely(fns_in[i], myobsModel, MDin, "discover", 1); MDsin.push_back(MDin); obsModels.push_back(myobsModel); } // Combine optics groups with the same EMDL_IMAGE_OPTICS_GROUP_NAME, make new ones for those with a different name if (!do_ignore_optics) { std::vector optics_group_uniq_names; // Initialise optics_group_uniq_names with the first table FOR_ALL_OBJECTS_IN_METADATA_TABLE(obsModel.opticsMdt) { std::string myname; obsModel.opticsMdt.getValue(EMDL_IMAGE_OPTICS_GROUP_NAME, myname); optics_group_uniq_names.push_back(myname); } // Now check uniqueness of the other tables for (int MDs_id = 1; MDs_id < fns_in.size(); MDs_id++) { const int obs_id = MDs_id - 1; std::vector new_optics_groups; FOR_ALL_OBJECTS_IN_METADATA_TABLE(MDsin[MDs_id]) { int tmp; MDsin[MDs_id].getValue(EMDL_IMAGE_OPTICS_GROUP, tmp); new_optics_groups.push_back(tmp); } MetaDataTable unique_opticsMdt; unique_opticsMdt.addMissingLabels(&obsModels[obs_id].opticsMdt); FOR_ALL_OBJECTS_IN_METADATA_TABLE(obsModels[obs_id].opticsMdt) { std::string myname; int my_optics_group; obsModels[obs_id].opticsMdt.getValue(EMDL_IMAGE_OPTICS_GROUP_NAME, myname); obsModels[obs_id].opticsMdt.getValue(EMDL_IMAGE_OPTICS_GROUP, my_optics_group); // Check whether this name is unique bool is_uniq = true; int new_group; for (new_group = 0; new_group < optics_group_uniq_names.size(); new_group++) { if (optics_group_uniq_names[new_group] == myname) { is_uniq = false; break; } } new_group ++; // start counting of groups at 1, not 0! if (is_uniq) { std::cout << " + Adding new optics_group with name: " << myname << std::endl; optics_group_uniq_names.push_back(myname); // Add the line to the global obsModel obsModels[obs_id].opticsMdt.setValue(EMDL_IMAGE_OPTICS_GROUP, new_group); unique_opticsMdt.addObject(); unique_opticsMdt.setObject(obsModels[obs_id].opticsMdt.getObject()); } else { std::cout << " + Joining optics_groups with the same name: " << myname << std::endl; std::cerr << " + WARNING: if these are different data sets, you might want to rename optics groups instead of joining them!" << std::endl; std::cerr << " + WARNING: if so, manually edit the rlnOpticsGroupName column in the optics_groups table of your input STAR files." << std::endl; } if (my_optics_group != new_group) { std::cout << " + Renumbering group " << myname << " from " << my_optics_group << " to " << new_group << std::endl; } // Update the optics_group entry for all particles in the MDsin for (long int current_object2 = MDsin[MDs_id].firstObject(); current_object2 < MDsin[MDs_id].numberOfObjects() && current_object2 >= 0; current_object2 = MDsin[MDs_id].nextObject()) { int old_optics_group; MDsin[MDs_id].getValue(EMDL_IMAGE_OPTICS_GROUP, old_optics_group, current_object2); if (old_optics_group == my_optics_group) new_optics_groups[current_object2] = new_group; } } obsModels[obs_id].opticsMdt = unique_opticsMdt; FOR_ALL_OBJECTS_IN_METADATA_TABLE(MDsin[MDs_id]) { MDsin[MDs_id].setValue(EMDL_IMAGE_OPTICS_GROUP, new_optics_groups[current_object]); // Also rename the rlnGroupName to not have groups overlapping from different optics groups std::string name; if (MDsin[MDs_id].getValue(EMDL_MLMODEL_GROUP_NAME, name)) { name = "optics"+integerToString(new_optics_groups[current_object])+"_"+name; MDsin[MDs_id].setValue(EMDL_MLMODEL_GROUP_NAME, name); } } } // Make one vector for combination of the optics tables MDoptics.push_back(obsModel.opticsMdt); for (int i = 1; i < fns_in.size(); i++) { MDoptics.push_back(obsModels[i - 1].opticsMdt); } // Check if anisotropic magnification and/or beam_tilt are present in some optics groups, but not in others. // If so, initialise the others correctly bool has_beamtilt = false, has_not_beamtilt = false; bool has_anisomag = false, has_not_anisomag = false; bool has_odd_zernike = false, has_not_odd_zernike = false; bool has_even_zernike = false, has_not_even_zernike = false; bool has_ctf_premultiplied = false, has_not_ctf_premultiplied = false; for (int i = 0; i < fns_in.size(); i++) { if (MDoptics[i].containsLabel(EMDL_IMAGE_BEAMTILT_X) || MDoptics[i].containsLabel(EMDL_IMAGE_BEAMTILT_Y)) { has_beamtilt = true; } else { has_not_beamtilt = true; } if (MDoptics[i].containsLabel(EMDL_IMAGE_MAG_MATRIX_00) && MDoptics[i].containsLabel(EMDL_IMAGE_MAG_MATRIX_01) && MDoptics[i].containsLabel(EMDL_IMAGE_MAG_MATRIX_10) && MDoptics[i].containsLabel(EMDL_IMAGE_MAG_MATRIX_11)) { has_anisomag = true; } else { has_not_anisomag = true; } if (MDoptics[i].containsLabel(EMDL_IMAGE_ODD_ZERNIKE_COEFFS)) { has_odd_zernike = true; } else { has_not_odd_zernike = true; } if (MDoptics[i].containsLabel(EMDL_IMAGE_EVEN_ZERNIKE_COEFFS)) { has_even_zernike = true; } else { has_not_even_zernike = true; } if (MDoptics[i].containsLabel(EMDL_OPTIMISER_DATA_ARE_CTF_PREMULTIPLIED)) { has_ctf_premultiplied = true; } else { has_not_ctf_premultiplied = true; } } #ifdef DEBUG printf("has_beamtilt = %d, has_not_beamtilt = %d, has_anisomag = %d, has_not_anisomag = %d, has_odd_zernike = %d, has_not_odd_zernike = %d, has_even_zernike = %d, has_not_even_zernike = %d, has_ctf_premultiplied = %d, has_not_ctf_premultiplied = %d\n", has_beamtilt, has_not_beamtilt, has_anisomag, has_not_anisomag, has_odd_zernike, has_not_odd_zernike, has_even_zernike, has_not_even_zernike, has_ctf_premultiplied, has_not_ctf_premultiplied); #endif for (int i = 0; i < fns_in.size(); i++) { if (has_beamtilt && has_not_beamtilt) { if (!MDoptics[i].containsLabel(EMDL_IMAGE_BEAMTILT_X)) { FOR_ALL_OBJECTS_IN_METADATA_TABLE(MDoptics[i]) { MDoptics[i].setValue(EMDL_IMAGE_BEAMTILT_X, 0.); } } if (!MDoptics[i].containsLabel(EMDL_IMAGE_BEAMTILT_Y)) { FOR_ALL_OBJECTS_IN_METADATA_TABLE(MDoptics[i]) { MDoptics[i].setValue(EMDL_IMAGE_BEAMTILT_Y, 0.); } } } if (has_anisomag && has_not_anisomag) { if (!(MDoptics[i].containsLabel(EMDL_IMAGE_MAG_MATRIX_00) && MDoptics[i].containsLabel(EMDL_IMAGE_MAG_MATRIX_01) && MDoptics[i].containsLabel(EMDL_IMAGE_MAG_MATRIX_10) && MDoptics[i].containsLabel(EMDL_IMAGE_MAG_MATRIX_11)) ) { FOR_ALL_OBJECTS_IN_METADATA_TABLE(MDoptics[i]) { MDoptics[i].setValue(EMDL_IMAGE_MAG_MATRIX_00, 1.); MDoptics[i].setValue(EMDL_IMAGE_MAG_MATRIX_01, 0.); MDoptics[i].setValue(EMDL_IMAGE_MAG_MATRIX_10, 0.); MDoptics[i].setValue(EMDL_IMAGE_MAG_MATRIX_11, 1.); } } } if (has_odd_zernike && has_not_odd_zernike) { std::vector six_zeros(6, 0); if (!MDoptics[i].containsLabel(EMDL_IMAGE_ODD_ZERNIKE_COEFFS)) { FOR_ALL_OBJECTS_IN_METADATA_TABLE(MDoptics[i]) { MDoptics[i].setValue(EMDL_IMAGE_ODD_ZERNIKE_COEFFS, six_zeros); } } } if (has_even_zernike && has_not_even_zernike) { std::vector nine_zeros(9, 0); if (!MDoptics[i].containsLabel(EMDL_IMAGE_EVEN_ZERNIKE_COEFFS)) { FOR_ALL_OBJECTS_IN_METADATA_TABLE(MDoptics[i]) { MDoptics[i].setValue(EMDL_IMAGE_EVEN_ZERNIKE_COEFFS, nine_zeros); } } } if (has_ctf_premultiplied && has_not_ctf_premultiplied) { if (!MDoptics[i].containsLabel(EMDL_OPTIMISER_DATA_ARE_CTF_PREMULTIPLIED)) { FOR_ALL_OBJECTS_IN_METADATA_TABLE(MDoptics[i]) { MDoptics[i].setValue(EMDL_OPTIMISER_DATA_ARE_CTF_PREMULTIPLIED, false); } } } } // Now combine all optics tables into one obsModel.opticsMdt = MetaDataTable::combineMetaDataTables(MDoptics); } // Combine the particles tables MDout = MetaDataTable::combineMetaDataTables(MDsin); //Deactivate the group_name column MDout.deactivateLabel(EMDL_MLMODEL_GROUP_NO); if (fn_check != "") { EMDLabel label = EMDL::str2Label(fn_check); if (!MDout.containsLabel(label)) REPORT_ERROR("ERROR: the output file does not contain the label to check for duplicates. Is it present in all input files?"); /// Don't want to mess up original order, so make a MDsort with only that label... FileName fn_this, fn_prev = ""; MetaDataTable MDsort; FOR_ALL_OBJECTS_IN_METADATA_TABLE(MDout) { MDout.getValue(label, fn_this); MDsort.addObject(); MDsort.setValue(label, fn_this); } // sort on the label MDsort.newSort(label); long int nr_duplicates = 0; FOR_ALL_OBJECTS_IN_METADATA_TABLE(MDsort) { MDsort.getValue(label, fn_this); if (fn_this == fn_prev) { nr_duplicates++; std::cerr << " WARNING: duplicate entry: " << fn_this << std::endl; } fn_prev = fn_this; } if (nr_duplicates > 0) std::cerr << " WARNING: Total number of duplicate "<< fn_check << " entries: " << nr_duplicates << std::endl; } write_check_ignore_optics(MDout, fn_out, MDin.getName()); std::cout << " Written: " << fn_out << std::endl; } void split() { MetaDataTable MD; read_check_ignore_optics(MD, fn_in); // Randomise if neccesary if (do_random_order) { if (random_seed < 0) randomize_random_generator(); else init_random_generator(random_seed); MD.randomiseOrder(); } long int n_obj = MD.numberOfObjects(); if (n_obj == 0) { REPORT_ERROR("ERROR: empty STAR file..."); } if (nr_split < 0 && size_split < 0) { REPORT_ERROR("ERROR: nr_split and size_split are both zero. Set at least one of them to be positive."); } else if (nr_split < 0 && size_split > 0) { nr_split = CEIL(1. * n_obj / size_split); } else if (nr_split > 0 && size_split < 0) { size_split = CEIL(1. * n_obj / nr_split); } std::vector MDouts; MDouts.resize(nr_split); long int n = 0; FOR_ALL_OBJECTS_IN_METADATA_TABLE(MD) { int my_split = n / size_split; if (my_split < nr_split) { MDouts[my_split].addObject(MD.getObject(current_object)); } else { break; } n++; } // Sjors 19jun2019: write out a star file with the output nodes MetaDataTable MDnodes; MDnodes.setName("output_nodes"); FileName fnt0; fnt0 = integerToString(nr_split); for (int isplit = 0; isplit < nr_split; isplit ++) { FileName fnt = fn_out.insertBeforeExtension("_split"+integerToString(isplit+1)); write_check_ignore_optics(MDouts[isplit], fnt, MD.getName()); std::cout << " Written: " < my_center(3); XX(my_center) = center_X; YY(my_center) = center_Y; ZZ(my_center) = center_Z; FOR_ALL_OBJECTS_IN_METADATA_TABLE(MD) { int optics_group; Matrix1D my_projected_center(3); Matrix2D A3D; RFLOAT xoff, yoff, zoff, rot, tilt, psi, angpix; if (do_ignore_optics) { angpix = cl_angpix; } else { MD.getValue(EMDL_IMAGE_OPTICS_GROUP, optics_group); optics_group--; angpix = obsModel.getPixelSize(optics_group); } MD.getValue(EMDL_ORIENT_ORIGIN_X_ANGSTROM, xoff); MD.getValue(EMDL_ORIENT_ORIGIN_Y_ANGSTROM, yoff); MD.getValue(EMDL_ORIENT_ROT, rot); MD.getValue(EMDL_ORIENT_TILT, tilt); MD.getValue(EMDL_ORIENT_PSI, psi); xoff /= angpix; yoff /= angpix; // Project the center-coordinates Euler_angles2matrix(rot, tilt, psi, A3D, false); my_projected_center = A3D * my_center; xoff -= XX(my_projected_center); yoff -= YY(my_projected_center); // Set back the new centers MD.setValue(EMDL_ORIENT_ORIGIN_X_ANGSTROM, xoff*angpix); MD.setValue(EMDL_ORIENT_ORIGIN_Y_ANGSTROM, yoff*angpix); // also allow 3D data (subtomograms) if (do_contains_z) { MD.getValue(EMDL_ORIENT_ORIGIN_Z_ANGSTROM, zoff); zoff /= angpix; zoff -= ZZ(my_projected_center); MD.setValue(EMDL_ORIENT_ORIGIN_Z_ANGSTROM, zoff*angpix); } } write_check_ignore_optics(MD, fn_out, MD.getName()); std::cout << " Written: " << fn_out << std::endl; } void remove_column() { MetaDataTable MD; read_check_ignore_optics(MD, fn_in); MD.deactivateLabel(EMDL::str2Label(remove_col_label)); write_check_ignore_optics(MD, fn_out, MD.getName()); std::cout << " Written: " << fn_out << std::endl; } void add_column() { if ((add_col_value == "" && add_col_from == "") || (add_col_value != "" && add_col_from != "")) REPORT_ERROR("ERROR: you need to specify either --add_column_value or --copy_column_from when adding a column."); bool set_value = (add_col_value != ""); MetaDataTable MD; EMDLabel label = EMDL::str2Label(add_col_label); EMDLabel source_label; read_check_ignore_optics(MD, fn_in); MD.addLabel(label); if (add_col_from != "") { source_label = EMDL::str2Label(add_col_from); if (!MD.containsLabel(source_label)) REPORT_ERROR("ERROR: The column specified in --add_column_from is not present in the input STAR file."); } FOR_ALL_OBJECTS_IN_METADATA_TABLE(MD) { if (EMDL::isDouble(label)) { RFLOAT aux; if (set_value) aux = textToFloat(add_col_value); else MD.getValue(source_label, aux); MD.setValue(label, aux); } else if (EMDL::isInt(label)) { long aux; if (set_value) aux = textToInteger(add_col_value); else MD.getValue(source_label, aux); MD.setValue(label, aux); } else if (EMDL::isBool(label)) { bool aux; if (set_value) aux = (bool)textToInteger(add_col_value); else MD.getValue(source_label, aux); MD.setValue(label, aux); } else if (EMDL::isString(label)) { std::string aux; if (set_value) aux = add_col_value; else MD.getValue(source_label, aux); MD.setValue(label, add_col_value); } else if (EMDL::isString(label)) { std::string auxStr; if (set_value) auxStr = add_col_value; else MD.getValueToString(source_label, auxStr); MD.setValueFromString(label, add_col_value); } } write_check_ignore_optics(MD, fn_out, MD.getName()); std::cout << " Written: " << fn_out << std::endl; } void hist_column() { MetaDataTable MD; EMDLabel label = EMDL::str2Label(hist_col_label); std::vector values; read_check_ignore_optics(MD, fn_in); if (!MD.containsLabel(label)) REPORT_ERROR("ERROR: The column specified in --hist_column is not present in the input STAR file."); std::vector histX,histY; CPlot2D *plot2D=new CPlot2D(""); MD.columnHistogram(label, histY, histX, 1, plot2D, nr_bin, hist_min, hist_max, show_frac, show_cumulative); FileName fn_eps = fn_out.withoutExtension()+".eps"; plot2D->OutputPostScriptPlot(fn_eps); std::cout << " Done! written out " << fn_eps << std::endl; delete plot2D; } void remove_duplicate() { if (do_ignore_optics) REPORT_ERROR("Duplicate removal is not compatible with --ignore_optics"); MetaDataTable MD; read_check_ignore_optics(MD, fn_in, "particles"); EMDLabel mic_label; if (MD.containsLabel(EMDL_MICROGRAPH_NAME)) mic_label = EMDL_MICROGRAPH_NAME; else REPORT_ERROR("The input STAR file does not contain rlnMicrographName column."); RFLOAT particle_angpix = 1.0; // rlnOriginX/YAngst is always 1 A/px. if (obsModel.numberOfOpticsGroups() > 1) std::cerr << "WARNING: The input contains multiple optics groups. We assume that the pixel sizes of original micrographs before extraction are all the same. If this is not the case, you have to split the input and remove duplicates separately." << std::endl; if (extract_angpix > 0) { std::cout << " + Using the provided pixel size for original micrographs before extraction: " << extract_angpix << std::endl; } else { extract_angpix = obsModel.getPixelSize(0); std::cout << " + Assuming the pixel size of original micrographs before extraction is " << extract_angpix << std::endl; } RFLOAT scale = particle_angpix / extract_angpix; RFLOAT duplicate_threshold_in_px = duplicate_threshold / extract_angpix; std::cout << " + The minimum inter-particle distance " << duplicate_threshold << " A corresponds to " << duplicate_threshold_in_px << " px in the micrograph coordinate (rlnCoordinateX/Y)." << std::endl; std::cout << " + The particle shifts (rlnOriginXAngst, rlnOriginYAngst) are multiplied by " << scale << " to bring it to the same scale as rlnCoordinateX/Y." << std::endl; FileName fn_removed = fn_out.withoutExtension() + "_removed.star"; MetaDataTable MDout = removeDuplicatedParticles(MD, mic_label, duplicate_threshold_in_px, scale, fn_removed, true); write_check_ignore_optics(MDout, fn_out, "particles"); std::cout << " Written: " << fn_out << std::endl; } }; int main(int argc, char *argv[]) { star_handler_parameters prm; try { prm.read(argc, argv); prm.run(); } catch (RelionError XE) { std::cerr << XE; //prm.usage(); return RELION_EXIT_FAILURE; } return RELION_EXIT_SUCCESS; } relion-3.1.3/src/apps/tiltpair_plot.cpp000066400000000000000000000220571411340063500201350ustar00rootroot00000000000000/*************************************************************************** * * Author: "Sjors H.W. Scheres" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #include #include #include #include #include #include #include #include #include #include #include #include class tiltpair_plot_parameters { public: FileName fn_unt, fn_til, fn_eps, fn_sym; MetaDataTable MDu, MDt; RFLOAT exp_tilt, exp_beta, dist_from_alpha, dist_from_tilt, plot_max_tilt, plot_spot_radius; // I/O Parser IOParser parser; SymList SL; std::ofstream fh_eps; void usage() { parser.writeUsage(std::cerr); } void read(int argc, char **argv) { parser.setCommandLine(argc, argv); int general_section = parser.addSection("General options"); fn_unt = parser.getOption("--u", "Input STAR file with untilted particles"); fn_til = parser.getOption("--t", "Input STAR file with tilted particles"); fn_eps = parser.getOption("--o", "Output EPS file ", "tiltpair.eps"); fn_sym = parser.getOption("--sym", "Symmetry point group", "C1"); exp_tilt = textToFloat(parser.getOption("--exp_tilt", "Choose symmetry operator that gives tilt angle closest to this value", "0.")); exp_beta = textToFloat(parser.getOption("--exp_beta", "Choose symmetry operator that gives beta angle closest to this value", "0.")); dist_from_alpha = textToFloat(parser.getOption("--dist_from_alpha", "Direction (alpha angle) of tilt axis from which to calculate distance", "0.")); dist_from_tilt = textToFloat(parser.getOption("--dist_from_tilt", "Tilt angle from which to calculate distance", "0.")); plot_max_tilt = textToFloat(parser.getOption("--max_tilt", "Maximum tilt angle to plot in the EPS file", "90.")); plot_spot_radius = textToInteger(parser.getOption("--spot_radius", "Radius in pixels of the spots in the tiltpair plot", "3")); // Check for errors in the command-line option if (parser.checkForErrors()) REPORT_ERROR("Errors encountered on the command line, exiting..."); } void initialise() { // Get the MDs for both untilted and tilted particles MDu.read(fn_unt); MDt.read(fn_til); if (MDu.numberOfObjects() != MDt.numberOfObjects()) REPORT_ERROR("Tiltpair plot ERROR: untilted and tilted STAR files have unequal number of entries."); // Get the symmetry point group int pgGroup, pgOrder; SL.isSymmetryGroup(fn_sym, pgGroup, pgOrder); SL.read_sym_file(fn_sym); // Make postscript header fh_eps.open(fn_eps.c_str(), std::ios::out); if (!fh_eps) REPORT_ERROR("Tiltpair plot ERROR: Cannot open " + fn_eps + " for output"); fh_eps << "%%!PS-Adobe-2.0\n"; fh_eps << "%% Creator: Tilt pair analysis \n"; fh_eps << "%% Pages: 1\n"; fh_eps << "0 setgray\n"; fh_eps << "0.1 setlinewidth\n"; // Draw circles on postscript: 250pixels=plot_max_tilt fh_eps << "300 400 83 0 360 arc closepath stroke\n"; fh_eps << "300 400 167 0 360 arc closepath stroke\n"; fh_eps << "300 400 250 0 360 arc closepath stroke\n"; fh_eps << "300 150 newpath moveto 300 650 lineto stroke\n"; fh_eps << "50 400 newpath moveto 550 400 lineto stroke\n"; } void add_to_postscript(RFLOAT tilt_angle, RFLOAT alpha, RFLOAT beta) { RFLOAT rr, th, x, y, r, g, b; rr = (tilt_angle / plot_max_tilt)* 250; x = 300. + rr * COSD(alpha); y = 400. + rr * SIND(alpha); value_to_redblue_scale(ABS(90.-beta), 0., 90., r, g, b); fh_eps << x << " " << y << " " << plot_spot_radius << " 0 360 arc closepath "< L(4, 4), R(4, 4); // A matrix from the list RFLOAT best_ang_dist = 3600; RFLOAT best_rot2, best_tilt2, best_psi2; RFLOAT tilt_angle, alpha, beta; for (int i = 0; i < imax; i++) { RFLOAT rot2p, tilt2p, psi2p; if (i == 0) { rot2p = rot2; tilt2p = tilt2; psi2p = psi2; } else { SL.get_matrices(i - 1, L, R); L.resize(3, 3); // Erase last row and column R.resize(3, 3); // as only the relative orientation is useful and not the translation Euler_apply_transf(L, R, rot2, tilt2, psi2, rot2p, tilt2p, psi2p); } RFLOAT ang_dist = check_tilt_pairs(rot1, tilt1, psi1, rot2p, tilt2p, psi2p); if (ang_dist < best_ang_dist) { best_ang_dist = ang_dist; best_rot2 = rot2p; best_tilt2 = tilt2p; best_psi2 = psi2p; } } rot2 = best_rot2; tilt2 = best_tilt2; psi2 = best_psi2; return best_ang_dist; } RFLOAT check_tilt_pairs(RFLOAT rot1, RFLOAT tilt1, RFLOAT psi1, RFLOAT &alpha, RFLOAT &tilt_angle, RFLOAT &beta) { // Transformation matrices Matrix1D axis(3); Matrix2D E1, E2; axis.resize(3); RFLOAT aux, sine_tilt_angle; RFLOAT rot2 = alpha, tilt2 = tilt_angle, psi2 = beta; // Calculate the transformation from one setting to the second one. Euler_angles2matrix(psi1, tilt1, rot1, E1); Euler_angles2matrix(psi2, tilt2, rot2, E2); E2 = E2 * E1.inv(); // Get the tilt angle (and its sine) aux = ( E2(0,0) + E2(1,1) + E2(2,2) - 1. ) / 2.; if (ABS(aux) - 1. > XMIPP_EQUAL_ACCURACY) REPORT_ERROR("BUG: aux>1"); tilt_angle = ACOSD(aux); sine_tilt_angle = 2. * SIND(tilt_angle); // Get the tilt axis direction in angles alpha and beta if (sine_tilt_angle > XMIPP_EQUAL_ACCURACY) { axis(0) = ( E2(2,1) - E2(1,2) ) / sine_tilt_angle; axis(1) = ( E2(0,2) - E2(2,0) ) / sine_tilt_angle; axis(2) = ( E2(1,0) - E2(0,1) ) / sine_tilt_angle; } else { axis(0) = axis(1) = 0.; axis(2) = 1.; } // Apply E1.inv() to the axis to get everyone in the same coordinate system again axis = E1.inv() * axis; // Convert to alpha and beta angle Euler_direction2angles(axis, alpha, beta); // Enforce positive beta: choose the other Euler angle combination to express the same direction if (beta < 0.) { beta = -beta; alpha+= 180.; } // Let alpha go from 0 to 360 degrees alpha = realWRAP(alpha, 0., 360.); // Return the value that needs to be optimized RFLOAT minimizer=0.; if (exp_beta < 999.) minimizer = ABS(beta - exp_beta); if (exp_tilt < 999.) minimizer += ABS(tilt_angle - exp_tilt); return minimizer; } void run() { int iline = 0; FOR_ALL_OBJECTS_IN_METADATA_TABLE(MDu) { // Read input data RFLOAT rot1, tilt1, psi1; RFLOAT rot2, tilt2, psi2; RFLOAT rot2p, tilt2p, psi2p; RFLOAT best_tilt, best_alpha, best_beta; RFLOAT distp; MDu.getValue(EMDL_ORIENT_ROT, rot1); MDt.getValue(EMDL_ORIENT_ROT, rot2, iline); MDu.getValue(EMDL_ORIENT_TILT, tilt1); MDt.getValue(EMDL_ORIENT_TILT, tilt2, iline); MDu.getValue(EMDL_ORIENT_PSI, psi1); MDt.getValue(EMDL_ORIENT_PSI, psi2, iline); iline++; // Bring both angles to a normalized set rot1 = realWRAP(rot1, -180, 180); tilt1 = realWRAP(tilt1, -180, 180); psi1 = realWRAP(psi1, -180, 180); rot2 = realWRAP(rot2, -180, 180); tilt2 = realWRAP(tilt2, -180, 180); psi2 = realWRAP(psi2, -180, 180); // Apply rotations to find the minimum distance angles rot2p = rot2; tilt2p = tilt2; psi2p = psi2; distp = check_symmetries(rot1, tilt1, psi1, rot2p, tilt2p, psi2p); // Calculate distance to user-defined point RFLOAT xp, yp, x, y; Matrix1D aux2(4); xp = dist_from_tilt * COSD(dist_from_alpha); yp = dist_from_tilt * SIND(dist_from_alpha); x = tilt2p * COSD(rot2p); y = tilt2p * SIND(rot2p); aux2(3) = sqrt((xp-x)*(xp-x) + (yp-y)*(yp-y)); aux2(0) = tilt2p; aux2(1) = rot2p; aux2(2) = psi2p; add_to_postscript(tilt2p, rot2p, psi2p); } // Close the EPS file to write it to disk fh_eps << "showpage\n"; fh_eps.close(); } }; int main(int argc, char *argv[]) { tiltpair_plot_parameters prm; try { prm.read(argc, argv); prm.initialise(); prm.run(); } catch (RelionError XE) { std::cerr << XE; //prm.usage(); return RELION_EXIT_FAILURE; } return RELION_EXIT_SUCCESS; } relion-3.1.3/src/apps/tomo_test.cpp000066400000000000000000000101611411340063500172550ustar00rootroot00000000000000 #include #include #include #include using namespace gravis; void drawPoint(Volume& dest, d3Vector p, RFLOAT val); int main(int argc, char *argv[]) { const double angpix = 1.177; const double bin = 4.0; const int frameCount = 40; const double fullWidth = 3710.0; std::string tomoFn = "frames/TS_03_f*.mrc"; std::string tltFn = "TS_03.tlt"; std::string xfFn = "TS_03.xf"; std::string aliFn = "TS_03_output.txt"; std::string fiducialsFile = "TS_03_3dmod.txt"; std::string particlesStar = "allmotl_TS_03.star"; const bool prescaled = true; TomoStack ts; if (!prescaled) { ts = TomoStack( tomoFn, frameCount, tltFn, xfFn, aliFn, angpix, 1.0); std::cout << "loading done.\n"; std::stringstream sts; sts << bin; ts.downsample(bin); ts.saveImages("frames/bin"+sts.str()+"_*.mrc"); std::cout << "downsampling done.\n"; } else { std::stringstream sts; sts << bin; ts = TomoStack( "frames/bin"+sts.str()+"_*.mrc", frameCount, tltFn, xfFn, aliFn, angpix*bin, 1.0/bin); std::cout << "loading done.\n"; } const int w = 400; const int h = 400; const int d = 200; d3Vector origin(0.0, 0.0, -400.0); const double spacing = fullWidth/w; Volume dest(w,h,d); Volume maskDest(w,h,d); dest.fill(0.0); maskDest.fill(0.0); std::cout << "filling done.\n"; BackprojectionHelper::backprojectRaw(ts, dest, maskDest, origin, spacing); // write clean tomogram into test00.vtk VtkHelper::writeVTK( dest, "test00.vtk", origin.x, origin.y, origin.z, spacing, spacing, spacing); Image destImg; VolumeConverter::convert(dest, destImg); destImg.write("test00.mrc"); // add fiducials std::ifstream fidFile(fiducialsFile); std::vector fids; char text[4096]; while (fidFile.getline(text, 4096)) { std::stringstream line(text); d3Vector fid; line >> fid.x; line >> fid.y; line >> fid.z; fids.push_back(fid); } for (int f = 0; f < fids.size(); f++) { d3Vector fidG = (2.0 * fids[f] - origin) / spacing; drawPoint(dest, fidG, 1000); } // write tomogram with fiducials into test01.vtk VtkHelper::writeVTK( dest, "test01.vtk", origin.x, origin.y, origin.z, spacing, spacing, spacing); // add particles const double z_offset = 450.0; MetaDataTable partMdt; partMdt.read(particlesStar); for (int p = 0; p < partMdt.numberOfObjects(); p++) { d3Vector partCoord, partOff; partMdt.getValue(EMDL_IMAGE_COORD_X, partCoord.x, p); partMdt.getValue(EMDL_IMAGE_COORD_Y, partCoord.y, p); partMdt.getValue(EMDL_IMAGE_COORD_Z, partCoord.z, p); partMdt.getValue(EMDL_ORIENT_ORIGIN_X, partOff.x, p); partMdt.getValue(EMDL_ORIENT_ORIGIN_Y, partOff.y, p); partMdt.getValue(EMDL_ORIENT_ORIGIN_Z, partOff.z, p); d3Vector partPos = partCoord + partOff; partPos.z -= z_offset; d3Vector posVol = (partPos - origin) / spacing; drawPoint(dest, posVol, 1000); } VtkHelper::writeVTK( dest, "test02.vtk", origin.x, origin.y, origin.z, spacing, spacing, spacing); return RELION_EXIT_SUCCESS; } void drawPoint(Volume& dest, d3Vector p, RFLOAT val) { const int w = dest.dimx; const int h = dest.dimy; const int d = dest.dimz; if (p.x > 0.0 && p.y > 0.0 && p.z > 0.0 && p.x < w-1 && p.y < h-1 && p.z < d-1) { int x0 = (int) p.x; int y0 = (int) p.y; int z0 = (int) p.z; int x1 = (int) p.x + 1; int y1 = (int) p.y + 1; int z1 = (int) p.z + 1; const double ix1 = p.x - x0; const double iy1 = p.y - y0; const double iz1 = p.z - z0; const double ix0 = 1.0 - ix1; const double iy0 = 1.0 - iy1; const double iz0 = 1.0 - iz1; dest(x0,y0,z0) += val * ix0 * iy0 * iz0; dest(x1,y0,z0) += val * ix1 * iy0 * iz0; dest(x0,y1,z0) += val * ix0 * iy1 * iz0; dest(x1,y1,z0) += val * ix1 * iy1 * iz0; dest(x0,y0,z1) += val * ix0 * iy0 * iz1; dest(x1,y0,z1) += val * ix1 * iy0 * iz1; dest(x0,y1,z1) += val * ix0 * iy1 * iz1; dest(x1,y1,z1) += val * ix1 * iy1 * iz1; } } relion-3.1.3/src/apps/vis_Ewald_weight.cpp000066400000000000000000000047471411340063500205410ustar00rootroot00000000000000 #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef HAVE_PNG #include #endif #include using namespace gravis; int main(int argc, char *argv[]) { std::string starFn, outPath; int threads, mg_index, part_index, s_cl; double mask_rad; IOParser parser; try { parser.setCommandLine(argc, argv); parser.addSection("General options"); starFn = parser.getOption("--i", "Input particle *.star file"); mask_rad = textToDouble(parser.getOption("--rad", "Mask radius [A]", "50")); mg_index = textToInteger(parser.getOption("--m", "Micrograph index", "0")); part_index = textToInteger(parser.getOption("--p", "Particle index", "0")); s_cl = textToInteger(parser.getOption("--s", "Box size (overrides particle file)", "-1")); threads = textToInteger(parser.getOption("--j", "Number of threads", "1")); outPath = parser.getOption("--o", "Output path"); parser.checkForErrors(); } catch (RelionError XE) { parser.writeUsage(std::cout); std::cerr << XE; return RELION_EXIT_FAILURE; } ObservationModel obsModel; MetaDataTable mdt0; ObservationModel::loadSafely(starFn, obsModel, mdt0); std::vector allMdts = StackHelper::splitByMicrographName(mdt0); const int optGroup = obsModel.getOpticsGroup(allMdts[mg_index], part_index); const int s = s_cl > 0? s_cl : obsModel.getBoxSize(optGroup); const int sh = s/2 + 1; const double angpix = obsModel.getPixelSize(optGroup); CTF ctf; ctf.readByGroup(allMdts[mg_index], &obsModel, part_index); std::cout << "drawing A...\n"; Image img0(sh,s), img1(sh,s); ctf.applyWeightEwaldSphereCurvature(img0.data, s, s, angpix, 2*mask_rad); ctf.applyWeightEwaldSphereCurvature_new(img1.data, s, s, angpix, 2*mask_rad); Image img0Full, img1Full; FftwHelper::decenterDouble2D(img0(), img0Full()); FftwHelper::decenterDouble2D(img1(), img1Full()); VtkHelper::writeVTK(img0Full(), outPath + "_Ewald_weight_0.vtk"); VtkHelper::writeVTK(img1Full(), outPath + "_Ewald_weight_1.vtk"); return RELION_EXIT_SUCCESS; } relion-3.1.3/src/apps/vis_delocalisation.cpp000066400000000000000000000134261411340063500211220ustar00rootroot00000000000000 #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef HAVE_PNG #include #endif #include using namespace gravis; Image visualiseBand( CTF& ctf, int s, double r0, double r1, double flank, double angpix, std::string outPath, std::string tag, bool delocSupp, double mask_rad, bool writeVtks); void pasteImage(const Image& src, Image& dest, int x0, int y0); int main(int argc, char *argv[]) { std::string starFn, outPath; int threads, mg_index, part_index, s_cl; double band_width, band_step, mask_rad; bool writeVtks, delocSupp; IOParser parser; try { parser.setCommandLine(argc, argv); parser.addSection("General options"); starFn = parser.getOption("--i", "Input particle *.star file"); delocSupp = parser.checkOption("--ds", "Apply delocalisation suppression"); mask_rad = textToDouble(parser.getOption("--rad", "Mask radius [Px]", "50")); mg_index = textToInteger(parser.getOption("--m", "Micrograph index", "0")); part_index = textToInteger(parser.getOption("--p", "Particle index", "0")); s_cl = textToInteger(parser.getOption("--s", "Box size (overrides particle file)", "-1")); band_width = textToDouble(parser.getOption("--bw", "Width of each frequency band [Px]", "50")); band_step = textToDouble(parser.getOption("--bs", "Falloff of frequency bands [Px]", "15")); writeVtks = parser.checkOption("--write_vtk", "Write VTK files for individual images"); threads = textToInteger(parser.getOption("--j", "Number of threads", "1")); outPath = parser.getOption("--o", "Output path"); parser.checkForErrors(); } catch (RelionError XE) { parser.writeUsage(std::cout); std::cerr << XE; return RELION_EXIT_FAILURE; } ObservationModel obsModel; MetaDataTable mdt0; ObservationModel::loadSafely(starFn, obsModel, mdt0); std::vector allMdts = StackHelper::splitByMicrographName(mdt0); const int optGroup = obsModel.getOpticsGroup(allMdts[mg_index], part_index); const int s = s_cl > 0? s_cl : obsModel.getBoxSize(optGroup); const int sh = s/2 + 1; const double angpix = obsModel.getPixelSize(optGroup); CTF ctf; ctf.readByGroup(allMdts[mg_index], &obsModel, part_index); int bandCount = sh / band_width + 1; Image output(3 * s, (2 + bandCount)*s); Image square = visualiseBand( ctf, s, 0, 2*s, band_step, angpix, outPath, "full-square", delocSupp, mask_rad, writeVtks); Image circle = visualiseBand( ctf, s, 0, sh, band_step, angpix, outPath, "full-circle", delocSupp, mask_rad, writeVtks); pasteImage(square, output, 0, 0*s); pasteImage(circle, output, 0, 1*s); for (int b = 0; b < bandCount; b++) { double r0 = b * band_width; double r1 = (b + 1) * band_width; std::stringstream sts; sts << b; Image band = visualiseBand( ctf, s, r0, r1, band_step, angpix, outPath, "band_"+sts.str(), delocSupp, mask_rad, writeVtks); pasteImage(band, output, 0, (b+2)*s); } #ifdef HAVE_PNG { tImage pngOut(output.data.xdim, output.data.ydim); pngOut.fill(dRGB(0.f)); for (int y = 0; y < output.data.ydim; y++) for (int x = 0; x < output.data.xdim; x++) { double c = output(y,x); //pngOut(x,y) = fRGB(std::max(c,0.0), c*c, std::max(-c,0.0)); pngOut(x,y) = ColorHelper::signedToRedBlue(c); } pngOut.writePNG(outPath + "_all.png"); } #endif return RELION_EXIT_SUCCESS; } Image visualiseBand( CTF& ctf, int s, double r0, double r1, double flank, double angpix, std::string outPath, std::string tag, bool delocSupp, double mask_rad, bool writeVtks) { const int sh = s/2 + 1; Image one(sh,s); one.data.initConstant(1); Image mask = FilterHelper::raisedCosEnvRingFreq2D(one, r0, r1, flank); Image ctfImg(sh,s), ctfImgFull(s,s); ctf.getFftwImage(ctfImg(), s, s, angpix); if (delocSupp) { DelocalisationHelper::maskOutsideBox(ctf, mask_rad, angpix, s, ctfImg(), 0.0, 0.0); } ctfImg.data *= mask.data; FftwHelper::decenterDouble2D(ctfImg(), ctfImgFull()); if (writeVtks) VtkHelper::writeVTK(ctfImgFull, outPath + "_" + tag + "_ctf.vtk"); Image ctfImgComplex(sh,s); for (int y = 0; y < s; y++) for (int x = 0; x < sh; x++) { ctfImgComplex(y,x) = ctfImg(y,x); } Image psf, psfFull; NewFFT::inverseFourierTransform(ctfImgComplex(), psf(), NewFFT::Both); FftwHelper::decenterFull(psf(), psfFull()); if (writeVtks) VtkHelper::writeVTK(psfFull, outPath + "_" + tag + "_psf.vtk"); Image deloc = DelocalisationHelper::plotDelocalisation(ctf, mask, angpix); Image delocFull; FftwHelper::decenterFull(deloc(), delocFull()); if (writeVtks) VtkHelper::writeVTK(delocFull, outPath + "_" + tag + "_deloc.vtk"); Image out(3*s, s); pasteImage(ctfImgFull, out, 0, 0); pasteImage(FilterHelper::normaliseToUnitIntervalSigned(psfFull), out, s, 0); pasteImage(FilterHelper::normaliseToUnitIntervalSigned(delocFull), out, 2*s, 0); return out; } void pasteImage(const Image& src, Image& dest, int x0, int y0) { const int ws = src.data.xdim; const int hs = src.data.ydim; const int wd = dest.data.xdim; const int hd = dest.data.ydim; for (int y = 0; y < hs; y++) for (int x = 0; x < ws; x++) { const int xx = x0 + x; const int yy = y0 + y; if (xx >= 0 && xx < wd && yy >= 0 && yy < hd) { dest(yy,xx) = src(y,x); } } } relion-3.1.3/src/args.cpp000066400000000000000000000273001411340063500152340ustar00rootroot00000000000000/*************************************************************************** * * Author: "Sjors H.W. Scheres" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ /*************************************************************************** * * Authors: Carlos Oscar S. Sorzano (coss@cnb.csic.es) * * Unidad de Bioinformatica of Centro Nacional de Biotecnologia , CSIC * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA * 02111-1307 USA * * All comments concerning this program package may be sent to the * e-mail address 'xmipp@cnb.csic.es' ***************************************************************************/ #include "src/args.h" #include "src/gcc_version.h" #include "src/matrix1d.h" #include // Get parameters from the command line ==================================== std::string getParameter(int argc, char **argv, const std::string param, const std::string option) { int i = 0; int i_found = -1; while (i < argc) { // std::cout << i << " " << i_found << " " << argv[i] << " looking for " << param << std::endl; if (strcmp(param.c_str(), argv[i]) == 0) { if (i_found != -1) { std::cerr << "WARNING: Command-line option " << param << " was specified more than once. The value specified the last is used." << std::endl; } i_found = i; } i++; } if (i_found > 0 && i_found < argc - 1) { return argv[i_found + 1]; } else { if (option != "NULL") { return (std::string)option; } else { std::string auxstr; auxstr = (std::string)"Argument " + param + " not found or invalid argument"; REPORT_ERROR(auxstr); } } } // Checks if a boolean parameter was included the command line ============= bool checkParameter(int argc, char **argv, std::string param) { int i = 0; while ((i < argc) && (strcmp(param.c_str(), argv[i]) != 0)) i++; if (i < argc) return(true); else return(false); } IOParser::IOParser() { clear(); } IOParser::IOParser(const IOParser &in) { copy(in); } IOParser& IOParser::operator= (const IOParser &in) { copy(in); return (*this); } IOParser::~IOParser() { clear(); } void IOParser::copy(const IOParser &in) { options = in.options; usages = in.usages; optionals = in.optionals; defaultvalues = in.defaultvalues; argc = in.argc; argv = in.argv; error_messages = in.error_messages; warning_messages = in.warning_messages; current_section = in.current_section; section_names = in.section_names; section_numbers = in.section_numbers; } void IOParser::clear() { argc = 0; argv = NULL; options.clear(); usages.clear(); optionals.clear(); defaultvalues.clear(); error_messages.clear(); warning_messages.clear(); section_names.clear(); section_numbers.clear(); current_section = 0; } void IOParser::setCommandLine(int _argc, char** _argv) { argc = _argc; argv = _argv; // Print version of software and exit if ( checkParameter(argc, argv, "--version")) { PRINT_VERSION_INFO(); exit(0); } // Dirty hack to get pipeline control for all programs... if (checkParameter(argc, argv, "--pipeline_control")) pipeline_control_outputname = getParameter(argc, argv, "--pipeline_control"); else pipeline_control_outputname = ""; } void IOParser::addOption(std::string option, std::string usage, std::string defaultvalue, bool hidden) { if (hidden) { hiddenOptions.push_back(option); } else { if (section_names.size() == 0) REPORT_ERROR("IOParser::addOption: ERROR First add a section to the parser, then the options!"); options.push_back(option); usages.push_back(usage); section_numbers.push_back(current_section); if (defaultvalue == "NULL") { optionals.push_back(false); defaultvalues.push_back(" "); } else { optionals.push_back(true); defaultvalues.push_back((std::string)defaultvalue); } } } int IOParser::addSection(std::string name) { current_section = section_names.size(); section_names.push_back(name); return current_section; } /** Set the current section to this number */ void IOParser::setSection(int number) { current_section = number; } bool IOParser::optionExists(std::string option) { for (int ii = 0; ii < options.size(); ii++) if (strcmp((options[ii]).c_str(), option.c_str()) == 0) return true; for (int ii = 0; ii < hiddenOptions.size(); ii++) if (strcmp((hiddenOptions[ii]).c_str(), option.c_str()) == 0) return true; return false; } std::string IOParser::getOption(std::string option, std::string usage, std::string defaultvalue, bool hidden) { // If this option did not exist yet, add it to the list if (!optionExists(option)) addOption(option, usage, defaultvalue, hidden); int i = 0; int i_found = -1; while (i < argc) { if (strcmp(option.c_str(), argv[i]) == 0) { if (i_found != -1) { std::cerr << "WARNING: Command-line option " << option << " was specified more than once. The value specified the last is used." << std::endl; } i_found = i; } i++; } if (i_found > 0 && i_found < argc - 1) { return argv[i_found + 1]; } else { if (defaultvalue != "NULL") { return (std::string)defaultvalue; } else { std::string auxstr; auxstr = (std::string)"ERROR: Argument " + option + " not found or invalid argument"; error_messages.push_back(auxstr); return ""; } } } // Checks if a boolean parameter was included the command line ============= bool IOParser::checkOption(std::string option, std::string usage, std::string defaultvalue, bool hidden) { // If this option did not exist yet, add it to the list if (!optionExists(option)) addOption(option, usage, defaultvalue, hidden); return checkParameter(argc, argv, option); } void IOParser::writeCommandLine(std::ostream &out) { for (int i = 1; i < argc; i++) out << argv[i] << " "; out << std::endl; } bool IOParser::checkForErrors(int verb) { if(checkParameter(argc, argv, "--version")) { std::cout << "RELION version " << g_RELION_VERSION << std::endl; exit(0); } if(argc==1 || (argc==2 && checkParameter(argc, argv, "--continue")) || checkParameter(argc, argv, "--help") || checkParameter(argc, argv, "-h")) { writeUsage(std::cout); exit(0); } // First check the command line for unknown arguments checkForUnknownArguments(); // First print warning messages if (warning_messages.size() > 0) { if (verb > 0) { std::cerr << "The following warnings were encountered upon command-line parsing: " << std::endl; for (unsigned int i = 0; i < warning_messages.size(); ++i) std::cerr << warning_messages[i] << std::endl; } } // Then check for error messages if (error_messages.size() > 0) { if (verb > 0) { std::cerr << "The following errors were encountered upon command-line parsing: " << std::endl; for (unsigned int i = 0; i < error_messages.size(); ++i) std::cerr << error_messages[i] << std::endl; } return true; } else { return false; } } void IOParser::checkForUnknownArguments() { for (int i = 1; i < argc; i++) { // Valid options should start with "--" bool is_ok = true; if (strncmp("--", argv[i], 2) == 0) { if (!optionExists((std::string)argv[i]) && !(strncmp("--pipeline_control", argv[i], 18) == 0) ) { is_ok = false; } } // If argv[i] starts with one "-": check it is a number and argv[i-1] is a valid option // or whether this is perhaps else if (strncmp("--", argv[i], 1) == 0) { float testval; // test whether this is a number int is_a_number = sscanf(argv[i], "%f", &testval); if (is_a_number) { // check whether argv[i-1] is a valid option if (!optionExists(argv[i-1])) is_ok = false; } else is_ok = false; } if (!is_ok) { std::string auxstr; auxstr = (std::string)"WARNING: Option " + argv[i] + "\tis not a valid RELION argument"; warning_messages.push_back(auxstr); } } } void IOParser::writeUsageOneLine(int i, std::ostream &out) { std::string aux = " "; aux += options[i]; if (optionals[i]) { aux += " ("; aux += defaultvalues[i]; aux += ")"; } out << std::setw(35) << aux; out << " : "; out << usages[i]; out << std::endl; } void IOParser::writeUsageOneSection(int section, std::ostream &out) { // First write all compulsory options //out << "+++ Compulsory:" << std::endl; for (int i = 0; i < options.size(); i++) { if (!optionals[i] && section_numbers[i] == section) writeUsageOneLine(i, out); } // Then write optional ones //out << "+++ Optional (defaults between parentheses):" << std::endl; for (int i = 0; i < options.size(); i++) { if (optionals[i] && section_numbers[i] == section) writeUsageOneLine(i, out); } } void IOParser::writeUsage(std::ostream &out) { out << "+++ RELION: command line arguments (with defaults for optional ones between parantheses) +++"< > &untangled) { // Handle GPU (device) assignments for each rank, if speficied size_t pos = 0; std::string delim = ":"; std::vector < std::string > allRankIDs; std::string thisRankIDs, thisThreadID; while ((pos = tangled.find(delim)) != std::string::npos) { thisRankIDs = tangled.substr(0, pos); // std::cout << "in loop " << thisRankIDs << std::endl; tangled.erase(0, pos + delim.length()); allRankIDs.push_back(thisRankIDs); } allRankIDs.push_back(tangled); untangled.resize(allRankIDs.size()); //Now handle the thread assignements in each rank for (int i = 0; i < allRankIDs.size(); i++) { pos=0; delim = ","; // std::cout << "in 2nd loop "<< allRankIDs[i] << std::endl; while ((pos = allRankIDs[i].find(delim)) != std::string::npos) { thisThreadID = allRankIDs[i].substr(0, pos); // std::cout << "in 3rd loop " << thisThreadID << std::endl; allRankIDs[i].erase(0, pos + delim.length()); untangled[i].push_back(thisThreadID); } untangled[i].push_back(allRankIDs[i]); } #ifdef DEBUG std::cout << "untangled.size() == " << untangled.size() << std::endl; for (int irank = 0; irank < untangled.size(); irank++) { std::cout << "untangled[" << irank << "]: "; for (int ithread = 0; ithread < untangled[irank].size(); ithread++) std::cout << untangled[irank][ithread] << " "; std::cout << std::endl; } #endif } relion-3.1.3/src/args.h000066400000000000000000000167661411340063500147170ustar00rootroot00000000000000/*************************************************************************** * * Author: "Sjors H.W. Scheres" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ /*************************************************************************** * * Authors: Carlos Oscar S. Sorzano (coss@cnb.csic.es) * * Unidad de Bioinformatica of Centro Nacional de Biotecnologia , CSIC * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA * 02111-1307 USA * * All comments concerning this program package may be sent to the * e-mail address 'xmipp@cnb.csic.es' ***************************************************************************/ #ifndef ARGS_H #define ARGS_H #include #include #include #include #include //ROB #include #include "src/funcs.h" #include "src/matrix1d.h" template class Matrix1D; /** @defgroup Arguments Functions for parsing the command line * * These functions help you to manage the command line parameters */ /** Get parameters from the command line. * @ingroup CommandLineFunctions * * This function assumes that the command line is structured in such a way that * for each parameter a block like "-param " is defined. The label * "param" can be substituted by any other one you like. If the parameter is * optional then this function allows you to define a default value. If no * default value is supplied and the parameter is not specified in the command * line, then an exception is thrown. You may change the default exception. * * You may also indicate that in case of error no exception is raised and force * the program to abort (use the exit variable). * * @code * m_param = textToFloat(getParameter(argc, argv, "-m")); * * // Get compulsory parameter "-m" * m_param = textToFloat(getParameter(argc, argv, "-m","2.65")); * * // Optional parameter, if no parameter is given it takes 2.65 by default * m_param = textToFloat(getParameter(argc, argv, "-m", NULL, 6001, "-m parameter not \ * found. I'm going out", TRUE); * * // Compulsory parameter, if not found give an special error message and exit * // the program * * @endcode */ std::string getParameter(int argc, char** argv, std::string param, std::string option = "NULL"); /** Get boolean parameters from the command line. * @ingroup CommandLineFunctions * * This function assumes that the command line is structured in such a way that * for each parameter a block like "-param" is defined. The label "param" can be * substituted by any other one you like. It might be used to look for a boolean * parameter, for instance: * * -verbose means that verbose functionality is set (TRUE) * * @code * verbose = checkParameter(argc, argv, "-verbose")); * * // checks if "-verbose" was supplied in the command line. If -verbose was * // supplied the function returns TRUE (1), otherwise returns FALSE (0) * @endcode */ bool checkParameter(int argc, char** argv, std::string param); class IOParser { private: std::vector options; std::vector hiddenOptions; std::vector usages; std::vector optionals; std::vector defaultvalues; std::vector section_numbers; std::vector section_names; std::vector error_messages; std::vector warning_messages; int current_section; // The original command line int argc; char** argv; public: /** Constructor */ IOParser(); /** Copy constructor */ IOParser(const IOParser &in); /**Assignment operator */ IOParser& operator= (const IOParser &in); /** Destructor */ ~IOParser(); /** Copy everything from input to this */ void copy(const IOParser &in); /** Clear object */ void clear(); /** Store pointer to command line */ void setCommandLine(int _argc, char** _argv); /** Check whether option exists in the stored options */ bool optionExists(std::string option); /** Add a section to the parser, and set the current section to the newly created one, returns number of current section */ int addSection(std::string name); /** Set the current section to this number */ void setSection(int number); /** Get the current section to this number */ int getSection() { return current_section; } /** Add an option to the object list */ void addOption(std::string option, std::string usage, std::string defaultvalue = "NULL", bool hidden = false); /** Get the value from the command line, and adds option to the list if it did not yet exist */ std::string getOption(std::string option, std::string usage, std::string defaultvalue = "NULL", bool hidden = false); /** Returns true if option was given and false if not, and adds option to the list if it did not yet exist */ bool checkOption(std::string option, std::string usage, std::string defaultvalue = "false", bool hidden = false); /** Checks the whole command line and reports an error if it contains an undefined option */ bool commandLineContainsUndefinedOption(); /** Write the stored command line to outstream */ void writeCommandLine(std::ostream &outstream); /** Returns true is there were any error messages (and prints them if verb>0 */ bool checkForErrors(int verb = 1); /** Check the whole command line for invalid arguments, if found add to the error messages */ void checkForUnknownArguments(); /** Write one line of the usage to outstream */ void writeUsageOneLine(int i, std::ostream &out); /** Write one section of the usage to outstream */ void writeUsageOneSection(int section, std::ostream &out); /** Write the usage for all options to outstream */ void writeUsage(std::ostream &outstream); }; /* * Takes a string with device-indices which is * : delimited for ranks * , delimited for threads within each rank * and outputs a rank-major array which supplies * a mapping as input for distribution of ranks * and threads over the availiable/specfied GPUs. */ void untangleDeviceIDs(std::string &tangled, std::vector < std::vector < std::string > > &untangled); #endif relion-3.1.3/src/assembly.cpp000066400000000000000000000361741411340063500161300ustar00rootroot00000000000000/* * assembly.cpp * * Created on: Apr 16, 2013 * Author: "Sjors H.W. Scheres" */ #include "src/assembly.h" void Atom::clear() { name = ""; occupancy = bfactor = 0.; coords.clear(); } Matrix1D Atom::getCoordinates() { return coords; } void Residue::clear() { number = -1; name = ""; atoms.clear(); } long int Residue::addAtom(std::string atomname, RFLOAT x, RFLOAT y, RFLOAT z, RFLOAT occ, RFLOAT bfac) { Atom atom(atomname); atom.coords = vectorR3(x,y,z); atom.occupancy = occ; atom.bfactor = bfac; long int result = atoms.size(); atoms.push_back(atom); return result; } void Molecule::clear() { name = ""; alt_name = ""; residues.clear(); } long int Molecule::insertResidue(Residue &res, int pos) { long int result = residues.size(); residues.insert(residues.begin() + pos, res); return result; } long int Molecule::addResidue(Residue &res) { long int result = residues.size(); residues.push_back(res); return result; } long int Molecule::addResidue(std::string name, int resnum) { Residue residue(name, resnum); long int result = residues.size(); residues.push_back(residue); return result; } void Molecule::insertResidues(Molecule add, int residue_start, int residue_end) { int add_nResidues, this_nResidues; int ires_start = -1, ires_end = -1; add_nResidues = add.numberOfResidues(); if (residue_start < 0 && residue_end < 0) { // Add whole chain ires_start = 0; ires_end = add_nResidues-1; } else { // Find beginning and ending ires for (int ires = 0; ires < add_nResidues; ires++) { if (residue_start == add.residues[ires].number) ires_start = ires; if (residue_end == add.residues[ires].number) ires_end = ires; } } if (ires_start < 0 || ires_end < 0) { std::cerr << " ires_start= " << ires_start << " ires_end= " << ires_end << std::endl; std::cerr << " residue_start= " << residue_start << " residue_end= " << residue_end << std::endl; REPORT_ERROR("OrigamiBuilder::insertBases ERROR: negative ires_start or ires_end"); } for (int ires = ires_start; ires <= ires_end; ires++) { int my_res = add.residues[ires].number; bool have_inserted=false; for (int ii = 0; ii < numberOfResidues(); ii++) { if (residues[ii].number > my_res) { insertResidue(add.residues[ires], ii); have_inserted = true; break; } } if (!have_inserted) addResidue(add.residues[ires]); } } void Assembly::clear() { name = ""; molecules.clear(); } long int Assembly::addMolecule(std::string _name, std::string alt_name) { Molecule molecule(_name, alt_name); long int result = molecules.size(); molecules.push_back(molecule); return result; } long int Assembly::addMolecule(Molecule &toadd) { std::string ori_toadd_name = simplify(toadd.name); // Check whether the name of this molecule is unique // If not add a suffix to it bool is_uniq = false; int suffix = 0; while (!is_uniq) { suffix++; is_uniq = true; for (int imol = 0; imol < molecules.size(); imol++) { if (molecules[imol].name == toadd.name) { is_uniq = false; break; } } if (!is_uniq) { toadd.name = ori_toadd_name + integerToString(suffix); } } long int result = molecules.size(); molecules.push_back(toadd); return result; } long int Assembly::numberOfMolecules() const { return molecules.size(); } long int Assembly::numberOfResidues() const { long int sum = 0; for (int imol = 0; imol < molecules.size(); imol++) sum += molecules[imol].residues.size(); return sum; } long int Assembly::numberOfAtoms() const { long int sum = 0; for (int imol = 0; imol < molecules.size(); imol++) for (int ires = 0; ires < molecules[imol].residues.size(); ires++) sum += molecules[imol].residues[ires].atoms.size(); return sum; } void Assembly::printInformation(std::ostream& out) const { out << " Assembly: " << name << std::endl; out << " - Number of molecules : " << numberOfMolecules() << std::endl; out << " - Number of residues : " << numberOfResidues() << std::endl; out << " - Number of atoms : " << numberOfAtoms() << std::endl; } void Assembly::readPDB(std::string filename, bool use_segid_instead_of_chainid, bool do_sort) { // Clear existing object clear(); std::ifstream fh(filename.c_str(), std::ios_base::in); if (fh.fail()) REPORT_ERROR( (std::string) "Assembly::read: File " + filename + " does not exists" ); char line[100]; bool is_sorted = true; int old_resnum = -1; long int mol_id = -1; long int res_id = -1; std::string molname, alt_molname, old_molname=""; fh.seekg(0); // Loop over all lines while (fh.getline (line, 600)) { // Only look at lines with an ATOM label std::string record(line,0,6); if (record == "ATOM ") { // ======================== OLD VERSION ======================== /* char snum[5]={'\0'}; char atomname[5]={'\0'}; char resname[4]={'\0'}; char chainID[1]={'\0'}; int resnum=-1; char insertion_residue_code; float x,y,z; float occupancy, bfactor; char segID[5]={'\0'}, element[3]={'\0'}, charge[3]={'\0'}; int nr= sscanf(line, "ATOM %5s %4s %3s %1s%4d%1c %8f%8f%8f%6f%6f %4s%2s%2s", snum, atomname, resname, chainID, &resnum, &insertion_residue_code, &x, &y, &z, &occupancy, &bfactor, segID, element, charge); */ // ======================== OLD VERSION ======================== // ============= May 7, 2015 - Shaoda - Modified according to wwPDB Format v3.3, v3.2 and v2.3 ================ // sscanf: spaces are ignored! May not get variables at correct subscripts in the string! // last 3 objects are incorrect in the old version? No segID in PDB documentation... if(strlen(line) < 20) { std::string str(line); REPORT_ERROR("Assembly::readPDB ERROR: too few entries on ATOM line:" + str); } char snum[6] = "", atomname[5] = "", altLoc[2] = "", resname[4] = "", chainID[2] = ""; int resnum = -1; char insertion_residue_code[2] = ""; float x, y, z, occupancy, bfactor; char segID[5] = "", element[3] = "", charge[3] = ""; /* int nr= sscanf(line, "ATOM %5s %4s%1s%3s %1s%4d%1s %8f%8f%8f%6f%6f %4s%2s%2s", snum, atomname, altLoc, resname, chainID, &resnum, insertion_residue_code, &x, &y, &z, &occupancy, &bfactor, segID, element, charge); */ int nr = 0; nr += sscanf(line + 6, "%5[^\n]s", snum); nr += sscanf(line + 12, "%4[^\n]s", atomname); nr += sscanf(line + 16, "%1[^\n]s", altLoc); nr += sscanf(line + 17, "%3[^\n]s", resname); nr += sscanf(line + 21, "%1[^\n]s", chainID); nr += sscanf(line + 22, "%4d", &resnum); nr += sscanf(line + 26, "%1[^\n]s", insertion_residue_code); nr += sscanf(line + 30, "%8f%8f%8f%6f%6f", &x, &y, &z, &occupancy, &bfactor); nr += sscanf(line + 72, "%4[^\n]s", segID); nr += sscanf(line + 76, "%2[^\n]s", element); nr += sscanf(line + 78, "%2[^\n]s", charge); snum[5] = '\0'; atomname[4] = '\0'; altLoc[1] = '\0'; resname[3] = '\0'; chainID[1] = '\0'; insertion_residue_code[1] = '\0'; segID[4] = '\0'; element[2] = '\0'; charge[2] = '\0'; /* std::cout << "snum = " << snum << ", " << "atomname = " << atomname << ", " << "altLoc = " << altLoc << ", " << "resname = " << resname << ", " << "chainID = " << chainID << ", " << "resnum = " << resnum << ", " << "insertion_residue_code = " << insertion_residue_code << ", " << "x = " << x << ", " << "y = " << y << ", " << "z = " << z << ", " << "occupancy = " << occupancy << ", " << "bfactor = " << bfactor << ", " << "segID = " << segID << ", " << "element = " << element << ", " << "charge = " << charge << ", " << std::endl; */ // ============= May 7, 2015 - Shaoda - Modified according to wwPDB Format v3.3, v3.2 and v2.3 ================ //#define DEBUG if (nr < 5) { std::string str(line); REPORT_ERROR("Assembly::readPDB ERROR: too few entries on ATOM line:" + str); } if (resnum < 0) { REPORT_ERROR("Assembly::readPDB ERROR: negative residue number encountered"); } std::string str_chainID(chainID); std::string str_segID(segID); std::string str_atomname(atomname); std::string str_resname(resname); // 1. Get mol_id: to which molecule does this atom belong? // Allow for non-ordered atoms belonging to the same molecule... // To speed up things: first check whether the chainID/segID is the same as the previous line molname = (use_segid_instead_of_chainid) ? str_segID : str_chainID; alt_molname = (use_segid_instead_of_chainid) ? str_chainID : str_segID; #ifdef DEBUG std::cerr << " molname= " << molname << " alt_molname= " << alt_molname << " str_chainID= " << str_chainID << " chainID= "<< chainID< 99999) atomnum -= 99999; char chainID = molecules[imol].name[0]; // ======================== OLD VERSION ======================== /* fprintf(file, "ATOM %5d %-4s %3s %1c%4d %8.3f%8.3f%8.3f%6.2f%6.2f %4s\n", atomnum, molecules[imol].residues[ires].atoms[iatom].name.c_str(), molecules[imol].residues[ires].name.c_str(), chainID, molecules[imol].residues[ires].number, XX(molecules[imol].residues[ires].atoms[iatom].coords), YY(molecules[imol].residues[ires].atoms[iatom].coords), ZZ(molecules[imol].residues[ires].atoms[iatom].coords), molecules[imol].residues[ires].atoms[iatom].occupancy, molecules[imol].residues[ires].atoms[iatom].bfactor, molecules[imol].name.c_str()); */ // ======================== OLD VERSION ======================== // ============= May 7, 2015 - Shaoda - Modified according to wwPDB Format v3.3, v3.2 and v2.3 ================ // last 3 objects are incorrect in the old version? No segID in PDB documentation... char atomname[5] = "", element[2] = ""; strcpy(atomname, molecules[imol].residues[ires].atoms[iatom].name.c_str()); element[0] = ' '; element[1] = '\0'; for(int ii = 0; ii < 4; ii++) { if(atomname[ii] != ' ') { element[0] = atomname[ii]; break; } } fprintf(file, "ATOM %5ld %-4s %3s %1c%4d %8.3f%8.3f%8.3f%6.2f%6.2f %2s \n", atomnum, molecules[imol].residues[ires].atoms[iatom].name.c_str(), molecules[imol].residues[ires].name.c_str(), chainID, molecules[imol].residues[ires].number, XX(molecules[imol].residues[ires].atoms[iatom].coords), YY(molecules[imol].residues[ires].atoms[iatom].coords), ZZ(molecules[imol].residues[ires].atoms[iatom].coords), molecules[imol].residues[ires].atoms[iatom].occupancy, molecules[imol].residues[ires].atoms[iatom].bfactor, element); // ============= May 7, 2015 - Shaoda - Modified according to wwPDB Format v3.3, v3.2 and v2.3 ================ } } if (imol + 1 < molecules.size()) fprintf(file, "%s\n", "TER"); } fprintf(file, "%s\n", "END"); fclose(file); } void Assembly::join(Assembly &tojoin) { for (int imol = 0; imol < tojoin.molecules.size(); imol++) addMolecule(tojoin.molecules[imol]); } void Assembly::sortResidues() { // Loop over all molecules for (int imol = 0; imol < molecules.size(); imol++) { // A. Sort all Residues std::vector > vp; for (int ires = 0; ires < molecules[imol].residues.size(); ires++) { vp.push_back(std::make_pair(molecules[imol].residues[ires].number, ires)); } std::sort(vp.begin(), vp.end()); std::vector new_residues; for (int ires = 0; ires < molecules[imol].residues.size(); ires++) { new_residues.push_back(molecules[imol].residues[vp[ires].second]); } molecules[imol].residues = new_residues; } } void Assembly::applyTransformation(Matrix2D &mat, Matrix1D &shift) { for (int imol = 0; imol < molecules.size(); imol++) { for (int ires = 0; ires < molecules[imol].residues.size(); ires++) { for (int iatom = 0; iatom < molecules[imol].residues[ires].atoms.size(); iatom++) { (molecules[imol].residues[ires].atoms[iatom]).coords = mat * (molecules[imol].residues[ires].atoms[iatom]).coords; (molecules[imol].residues[ires].atoms[iatom]).coords += shift; } } } } relion-3.1.3/src/assembly.h000066400000000000000000000130741411340063500155670ustar00rootroot00000000000000/* * assembly.h * * Created on: Apr 16, 2013 * Author: "Sjors H.W. Scheres" */ #ifndef ASSEMBLY_H_ #define ASSEMBLY_H_ #include #include #include #include #include #include #include #include "src/args.h" #include "src/matrix2d.h" /* * Hierarchical model for a macromolecular assembly, e.g. a DNA origami object * * Assembly * -> Molecule * -> Residue * -> Atom (either a true one or a coarse-grain pseudo-atom) * * */ class Atom { public: // Name of this Atom std::string name; // Coordinates Matrix1D coords; // Occupancy RFLOAT occupancy; // B-factor RFLOAT bfactor; // Empty constructor Atom() { clear(); } // Named constructor Atom(std::string in_name) { clear(); name = in_name; } // Destructor needed for work with vectors ~Atom() { clear(); } // Initialize void clear(); // Get the 3D corrdinates as a POint3D Matrix1D getCoordinates(); }; class Residue { public: // Name of this Residue std::string name; // Number of this Residue int number; // All the Atoms in this Residue std::vector atoms; // Empty Constructor Residue() { clear(); } // Constructor Residue(std::string in_name, int in_number) { clear(); name = in_name; number = in_number; } // Destructor needed for work with vectors ~Residue() { clear(); } // Initialize void clear(); // Add an Atom to this Residue; long int addAtom(std::string atomname, RFLOAT x, RFLOAT y, RFLOAT z, RFLOAT occ = 1.0, RFLOAT bfac = 0.0); int numberOfAtoms() { return atoms.size(); } }; class Molecule { public: // Name of this Molecule std::string name; // Alternative name of this Molecule (either chainID or segID) std::string alt_name; // All the Residues in this Molecule std::vector residues; // Empty Constructor Molecule() { clear(); } // Constructor Molecule(std::string in_name, std::string in_alt_name="") { clear(); name = in_name; alt_name = in_alt_name; } // Destructor needed for work with vectors ~Molecule() { clear(); } // Initialize void clear(); // Number of residues in the molecule long int numberOfResidues() { return residues.size(); } // Insert a Residue at the specified position in this Molecule long int insertResidue(Residue &res, int pos); // Add a Residue to this Molecule long int addResidue(Residue &res); // Add a Residue to this Molecule long int addResidue(std::string name, int resnum); // Insert a stretch of residues from another Molecule based on consecutive residue numbering // If start and end residues are negative: just add the entire molecule void insertResidues(Molecule add, int residue_start = -1, int residue_end = -1); }; class Assembly { public: // Name of this Assembly std::string name; // All the Molecules in this Assembly std::vector molecules; // Empty Constructor Assembly() { clear(); } // Named Constructor Assembly(std::string in_name) { clear(); name = in_name; } // Copy constructor Assembly(const Assembly& op) { clear(); *this = op; } // Destructor needed for work with vectors ~Assembly() { clear(); } // Initialize void clear(); // Add a Molecule to this Assembly long int addMolecule(std::string name, std::string alt_name); // Add a Molecule to this Assembly long int addMolecule(Molecule &toadd); // return number of Molecules in the Assembly long int numberOfMolecules() const; // Total number of Atoms long int numberOfAtoms() const; // Total number of Residues long int numberOfResidues() const; // Print some information about the assembly void printInformation(std::ostream& out = std::cout) const; // Read PDB format void readPDB(std::string filename, bool use_segid_instead_of_chainid = false, bool do_sort = true); // Write the Assembly to a PDB file void writePDB(std::string filename); // Combine this Assembly with another one // If there are identical Molecule.name instances, add a number-suffix to the new Assembly's Molecule.name (in the segID) void join(Assembly &tojoin); // Make sure that all Residues within each Molecule are in order w.r.t. their residue number void sortResidues(); // Break Molecules into separate ones if a break larger than maximum_residue_break occurs in the residue numbering // TODO void checkBreaksInResidueNumbering(int maximum_residue_break = 500); // Apply a transformation (first rotation, then shift) void applyTransformation(Matrix2D &mat, Matrix1D &shift); }; #endif /* ASSEMBLY_H_ */ relion-3.1.3/src/autopicker.cpp000066400000000000000000003744661411340063500164700ustar00rootroot00000000000000/*************************************************************************** * * Author: "Sjors H.W. Scheres" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #include "src/autopicker.h" //#define DEBUG //#define DEBUG_HELIX void ccfPeak::clear() { id = ref = nr_peak_pixel = -1; x = y = r = area_percentage = fom_max = psi = dist = fom_thres = (-1.); ccf_pixel_list.clear(); } bool ccfPeak::isValid() const { // Invalid parameters if ( (r < 0.) || (area_percentage < 0.) || (ccf_pixel_list.size() < 1) ) return false; // TODO: check ccf values in ccf_pixel_list? for (int id = 0; id < ccf_pixel_list.size(); id++) { if (ccf_pixel_list[id].fom > fom_thres) return true; } return false; } bool ccfPeak::operator<(const ccfPeak& b) const { if (fabs(r - b.r) < 0.01) return (fom_max < b.fom_max); return (r < b.r); } bool ccfPeak::refresh() { RFLOAT x_avg, y_avg; int nr_valid_pixel; area_percentage = (-1.); if (ccf_pixel_list.size() < 1) return false; fom_max = (-99.e99); nr_valid_pixel = 0; x_avg = y_avg = 0.; for (int id = 0; id < ccf_pixel_list.size(); id++) { if (ccf_pixel_list[id].fom > fom_thres) { nr_valid_pixel++; if (ccf_pixel_list[id].fom > fom_max) fom_max = ccf_pixel_list[id].fom; x_avg += ccf_pixel_list[id].x; y_avg += ccf_pixel_list[id].y; } } nr_peak_pixel = nr_valid_pixel; if (nr_valid_pixel < 1) return false; x = x_avg / (RFLOAT)(nr_valid_pixel); y = y_avg / (RFLOAT)(nr_valid_pixel); area_percentage = (RFLOAT)(nr_valid_pixel) / ccf_pixel_list.size(); return true; }; void AutoPicker::read(int argc, char **argv) { parser.setCommandLine(argc, argv); int gen_section = parser.addSection("General options"); fn_in = parser.getOption("--i", "Micrograph STAR file OR filenames from which to autopick particles, e.g. \"Micrographs/*.mrc\""); fn_out = parser.getOption("--pickname", "Rootname for coordinate STAR files", "autopick"); fn_odir = parser.getOption("--odir", "Output directory for coordinate files (default is to store next to micrographs)", "AutoPick/"); angpix = textToFloat(parser.getOption("--angpix", "Pixel size of the micrographs in Angstroms", "1")); particle_diameter = textToFloat(parser.getOption("--particle_diameter", "Diameter of the circular mask that will be applied to the experimental images (in Angstroms, default=automatic)", "-1")); decrease_radius = textToInteger(parser.getOption("--shrink_particle_mask", "Shrink the particle mask by this many pixels (to detect Einstein-from-noise classes)", "2")); outlier_removal_zscore= textToFloat(parser.getOption("--outlier_removal_zscore", "Remove pixels that are this many sigma away from the mean", "8.")); do_write_fom_maps = parser.checkOption("--write_fom_maps", "Write calculated probability-ratio maps to disc (for re-reading in subsequent runs)"); no_fom_limit = parser.checkOption("--no_fom_limit", "Ignore default maximum limit of 30 fom maps being written","false"); do_read_fom_maps = parser.checkOption("--read_fom_maps", "Skip probability calculations, re-read precalculated maps from disc"); do_optimise_scale = !parser.checkOption("--skip_optimise_scale", "Skip the optimisation of the micrograph scale for better prime factors in the FFTs. This runs slower, but at exactly the requested resolution."); do_only_unfinished = parser.checkOption("--only_do_unfinished", "Only autopick those micrographs for which the coordinate file does not yet exist"); do_gpu = parser.checkOption("--gpu", "Use GPU acceleration when availiable"); gpu_ids = parser.getOption("--gpu", "Device ids for each MPI-thread","default"); #ifndef CUDA if(do_gpu) { std::cerr << "+ WARNING : Relion was compiled without CUDA of at least version 7.0 - you do NOT have support for GPUs" << std::endl; do_gpu = false; } #endif int ref_section = parser.addSection("References options"); fn_ref = parser.getOption("--ref", "STAR file with the reference names, or an MRC stack with all references, or \"gauss\" for blob-picking",""); angpix_ref = textToFloat(parser.getOption("--angpix_ref", "Pixel size of the references in Angstroms (default is same as micrographs)", "-1")); do_invert = parser.checkOption("--invert", "Density in micrograph is inverted w.r.t. density in template"); psi_sampling = textToFloat(parser.getOption("--ang", "Angular sampling (in degrees); use 360 for no rotations", "10")); lowpass = textToFloat(parser.getOption("--lowpass", "Lowpass filter in Angstroms for the references (prevent Einstein-from-noise!)","-1")); highpass = textToFloat(parser.getOption("--highpass", "Highpass filter in Angstroms for the micrographs","-1")); do_ctf = parser.checkOption("--ctf", "Perform CTF correction on the references?"); intact_ctf_first_peak = parser.checkOption("--ctf_intact_first_peak", "Ignore CTFs until their first peak?"); gauss_max_value = textToFloat(parser.getOption("--gauss_max", "Value of the peak in the Gaussian blob reference","0.1")); healpix_order = textToInteger(parser.getOption("--healpix_order", "Healpix order for projecting a 3D reference (hp0=60deg; hp1=30deg; hp2=15deg)", "1")); symmetry = parser.getOption("--sym", "Symmetry point group for a 3D reference","C1"); int log_section = parser.addSection("Laplacian-of-Gaussian options"); do_LoG = parser.checkOption("--LoG", "Use Laplacian-of-Gaussian filter-based picking, instead of template matching"); LoG_min_diameter = textToFloat(parser.getOption("--LoG_diam_min", "Smallest particle diameter (in Angstroms) for blob-detection by Laplacian-of-Gaussian filter", "-1")); LoG_max_diameter = textToFloat(parser.getOption("--LoG_diam_max", "Largest particle diameter (in Angstroms) for blob-detection by Laplacian-of-Gaussian filter", "-1")); LoG_neighbour_fudge = textToFloat(parser.getOption("--LoG_neighbour", "Avoid neighbouring particles within (the detected diameter + the minimum diameter) times this percent", "100")); LoG_neighbour_fudge /= 100.0; LoG_invert = parser.checkOption("--Log_invert", "Use this option if the particles are white instead of black"); LoG_adjust_threshold = textToFloat(parser.getOption("--LoG_adjust_threshold", "Use this option to adjust the picking threshold: positive for less particles, negative for more", "0.")); LoG_upper_limit = textToFloat(parser.getOption("--LoG_upper_threshold", "Use this option to set the upper limit of the picking threshold", "99999")); LoG_use_ctf = parser.checkOption("--LoG_use_ctf", "Use CTF until the first peak in Laplacian-of-Gaussian picker"); if (do_gpu && do_LoG) { REPORT_ERROR("The Laplacian-of-Gaussian picker does not support GPU acceleration. Please remove --gpu option."); } int helix_section = parser.addSection("Helix options"); autopick_helical_segments = parser.checkOption("--helix", "Are the references 2D helical segments? If so, in-plane rotation angles (psi) are estimated for the references."); helical_tube_curvature_factor_max = textToFloat(parser.getOption("--helical_tube_kappa_max", "Factor of maximum curvature relative to that of a circle", "0.25")); helical_tube_diameter = textToFloat(parser.getOption("--helical_tube_outer_diameter", "Tube diameter in Angstroms", "-1")); helical_tube_length_min = textToFloat(parser.getOption("--helical_tube_length_min", "Minimum tube length in Angstroms", "-1")); do_amyloid = parser.checkOption("--amyloid", "Activate specific algorithm for amyloid picking?"); max_local_avg_diameter = textToFloat(parser.getOption("----max_diam_local_avg", "Maximum diameter to calculate local average density in Angstroms", "-1")); int peak_section = parser.addSection("Peak-search options"); min_fraction_expected_Pratio = textToFloat(parser.getOption("--threshold", "Fraction of expected probability ratio in order to consider peaks?", "0.25")); min_particle_distance = textToFloat(parser.getOption("--min_distance", "Minimum distance (in A) between any two particles (default is half the box size)","-1")); max_stddev_noise = textToFloat(parser.getOption("--max_stddev_noise", "Maximum standard deviation in the noise area to use for picking peaks (default is no maximum)","-1")); min_avg_noise = textToFloat(parser.getOption("--min_avg_noise", "Minimum average in the noise area to use for picking peaks (default is no minimum)","-999.")); autopick_skip_side = textToInteger(parser.getOption("--skip_side", "Keep this many extra pixels (apart from particle_size/2) away from the edge of the micrograph ","0")); int expert_section = parser.addSection("Expert options"); verb = textToInteger(parser.getOption("--verb", "Verbosity", "1")); padding = textToInteger(parser.getOption("--pad", "Padding factor for Fourier transforms", "2")); random_seed = textToInteger(parser.getOption("--random_seed", "Number for the random seed generator", "1")); workFrac = textToFloat(parser.getOption("--shrink", "Reduce micrograph to this fraction size, during correlation calc (saves memory and time)", "1.0")); LoG_max_search = textToFloat(parser.getOption("--Log_max_search", "Maximum diameter in LoG-picking multi-scale approach is this many times the min/max diameter", "5.")); extra_padding = textToInteger(parser.getOption("--extra_pad", "Number of pixels for additional padding of the original micrograph", "0")); // Check for errors in the command-line option if (parser.checkForErrors()) REPORT_ERROR("Errors encountered on the command line (see above), exiting..."); if (autopick_helical_segments) { if ( (helical_tube_curvature_factor_max < 0.0001) || (helical_tube_curvature_factor_max > 1.0001) ) REPORT_ERROR("Error: Maximum curvature factor should be 0~1!"); if (!(min_particle_distance > 0.)) REPORT_ERROR("Error: Helical rise and the number of asymmetrical units between neighbouring helical segments should be positive!"); } } void AutoPicker::usage() { parser.writeUsage(std::cout); } void AutoPicker::initialise() { #ifdef TIMING TIMING_A0 = timer.setNew("Initialise()"); TIMING_A1 = timer.setNew("--Init"); TIMING_A2 = timer.setNew("--Read Reference(s)"); TIMING_A3 = timer.setNew("--Read Micrograph(s)"); TIMING_A4 = timer.setNew("--Prep projectors"); TIMING_A5 = timer.setNew("autoPickOneMicrograph()"); TIMING_A6 = timer.setNew("--Read Micrographs(s)"); TIMING_A7 = timer.setNew("--Micrograph computestats"); TIMING_A8 = timer.setNew("--CTF-correct micrograph"); TIMING_A9 = timer.setNew("--Resize CCF and PSI-maps"); TIMING_B1 = timer.setNew("--FOM prep"); TIMING_B2 = timer.setNew("--Read reference(s) via FOM"); TIMING_B3 = timer.setNew("--Psi-dep correlation calc"); TIMING_B4 = timer.setNew("----ctf-correction"); TIMING_B5 = timer.setNew("----first psi"); TIMING_B6 = timer.setNew("----rest of psis"); TIMING_B7 = timer.setNew("----write fom maps"); TIMING_B8 = timer.setNew("----peak-prune/-search"); TIMING_B9 = timer.setNew("--final peak-prune"); #endif #ifdef TIMING timer.tic(TIMING_A0); timer.tic(TIMING_A1); #endif if (random_seed == -1) random_seed = time(NULL); if (fn_in.isStarFile()) { ObservationModel::loadSafely(fn_in, obsModel, MDmic, "micrographs", verb); fn_micrographs.clear(); FOR_ALL_OBJECTS_IN_METADATA_TABLE(MDmic) { FileName fn_mic; MDmic.getValue(EMDL_MICROGRAPH_NAME, fn_mic); fn_micrographs.push_back(fn_mic); } // Check all optics groups have the same pixel size (check for same micrograph size is performed while running through all of them) if (!obsModel.opticsMdt.containsLabel(EMDL_MICROGRAPH_PIXEL_SIZE)) REPORT_ERROR("The input does not contain the rlnMicrographPixelSize column."); obsModel.opticsMdt.getValue(EMDL_MICROGRAPH_PIXEL_SIZE, angpix, 0); for (int optics_group = 1; optics_group < obsModel.numberOfOpticsGroups(); optics_group++) { RFLOAT my_angpix; obsModel.opticsMdt.getValue(EMDL_MICROGRAPH_PIXEL_SIZE, my_angpix, optics_group); if (fabs(angpix - my_angpix) > 0.01) { REPORT_ERROR("ERROR: different pixel size for the different optics groups, perform autopicking separately per optics group."); } } } else { if (do_ctf) REPORT_ERROR("AutoPicker::initialise ERROR: use an input STAR file with the CTF information when using --ctf"); fn_in.globFiles(fn_micrographs); if (fn_micrographs.size() == 0) REPORT_ERROR("Cannot find any micrograph called: "+fns_autopick); } fn_ori_micrographs = fn_micrographs; // If we're continuing an old run, see which micrographs have not been finished yet... if (do_only_unfinished) { if (verb > 0) std::cout << " + Skipping those micrographs for which coordinate file already exists" << std::endl; std::vector fns_todo; for (long int imic = 0; imic < fn_micrographs.size(); imic++) { FileName fn_tmp = getOutputRootName(fn_micrographs[imic]) + "_" + fn_out + ".star"; if (!exists(fn_tmp)) fns_todo.push_back(fn_micrographs[imic]); } fn_micrographs = fns_todo; } // If there is nothing to do, then go out of initialise todo_anything = true; if (fn_micrographs.size() == 0) { if (verb > 0) std::cout << " + No new micrographs to do, so exiting autopicking ..." << std::endl; todo_anything = false; return; } if (verb > 0) { if((fn_micrographs.size() > 30 && do_write_fom_maps) && !no_fom_limit) { REPORT_ERROR("\n If you really want to write this many (" + integerToString(fn_micrographs.size()) + ") FOM-maps, add --no_fom_limit"); } std::cout << " + Run autopicking on the following micrographs: " << std::endl; for(unsigned int i = 0; i < fn_micrographs.size(); ++i) std::cout << " * " << fn_micrographs[i] << std::endl; } #ifdef TIMING timer.toc(TIMING_A1); #endif #ifdef TIMING timer.tic(TIMING_A2); #endif // Make sure that psi-sampling is even around the circle RFLOAT old_sampling = psi_sampling; int n_sampling = ROUND(360. / psi_sampling); psi_sampling = 360. / (RFLOAT) n_sampling; if (verb > 0 && fabs(old_sampling - psi_sampling) > 1e-3) std::cout << " + Changed psi-sampling rate to: " << psi_sampling << std::endl; // Read in the references Mrefs.clear(); if (do_LoG) { if (LoG_min_diameter < 0) REPORT_ERROR("ERROR: Provide --LoG_diam_min when using the LoG-filter for autopicking"); if (LoG_max_diameter < 0) REPORT_ERROR("ERROR: Provide --LoG_diam_max when using the LoG-filter for autopicking"); // Always use skip_side, as algorithms tends to pick on the sides of micrographs autopick_skip_side = XMIPP_MAX(autopick_skip_side, 0.5*LoG_min_diameter/angpix); // Fill vector with diameters to be searched diams_LoG.clear(); for (int i = LoG_max_search; i > 1; i--) diams_LoG.push_back(ROUND(LoG_min_diameter/(RFLOAT)(i))); diams_LoG.push_back(LoG_min_diameter); diams_LoG.push_back((LoG_max_diameter+LoG_min_diameter)/2.); diams_LoG.push_back(LoG_max_diameter); for (int i = 2; i <= LoG_max_search; i++) diams_LoG.push_back(ROUND(LoG_max_diameter*(RFLOAT)(i))); if (verb > 0) { std::cout << " + Will use following diameters for Laplacian-of-Gaussian filter: " << std::endl; for (int i = 0; i < diams_LoG.size(); i++) { RFLOAT myd = diams_LoG[i]; if (myd < LoG_min_diameter) std::cout << " * " << myd << " (too low)" << std::endl; else if (myd > LoG_max_diameter) std::cout << " * " << myd << " (too high)" << std::endl; else std::cout << " * " << myd << " (ok)" << std::endl; } } } else if (fn_ref == "") { REPORT_ERROR("ERROR: Provide either --ref or use --LoG."); } else if (fn_ref == "gauss") { if (verb > 0) std::cout << " + Will use Gaussian blob as reference, with peak value of " << gauss_max_value << std::endl; if(particle_diameter<=0) CRITICAL(ERR_GAUSSBLOBSIZE); // Set particle boxsize to be 1.5x bigger than circle with particle_diameter particle_size = 1.5 * ROUND(particle_diameter/angpix); particle_size += particle_size%2; psi_sampling = 360.; do_ctf = false; Image Iref; Iref().initZeros(particle_size, particle_size); Iref().setXmippOrigin(); // Make a Gaussian reference. sigma is 1/6th of the particle size, such that 3 sigma is at the image edge RFLOAT normgauss = gaussian1D(0., particle_size/6., 0.); FOR_ALL_ELEMENTS_IN_ARRAY2D(Iref()) { double r = sqrt((RFLOAT)(i*i + j*j)); A2D_ELEM(Iref(), i, j) = gauss_max_value * gaussian1D(r, particle_size/6., 0.) / normgauss; } Mrefs.push_back(Iref()); } else if (fn_ref.isStarFile()) { MetaDataTable MDref; MDref.read(fn_ref); FOR_ALL_OBJECTS_IN_METADATA_TABLE(MDref) { // Get all reference images and their names Image Iref; FileName fn_img; if (!MDref.getValue(EMDL_MLMODEL_REF_IMAGE, fn_img)) { if (!MDref.getValue(EMDL_IMAGE_NAME, fn_img)) REPORT_ERROR("AutoPicker::initialise ERROR: either provide rlnReferenceImage or rlnImageName in the reference STAR file!"); } #ifdef DEBUG std::cerr << " Reference fn= " << fn_img << std::endl; #endif Iref.read(fn_img); Iref().setXmippOrigin(); Mrefs.push_back(Iref()); if (Mrefs.size() == 1) // Check only the first reference { // Check pixel size in the header is consistent with angpix_ref. Otherwise, raise a warning RFLOAT angpix_header = Iref.samplingRateX(); if (angpix_ref < 0) { if (verb > 0 && fabs(angpix_header - angpix) > 1e-3) { std::cout << " + Using pixel size in reference image header= " << angpix_header << std::endl; } angpix_ref = angpix_header; } else { if (verb > 0 && fabs(angpix_header - angpix_ref) > 1e-3) { std::cerr << " WARNING!!! Pixel size in reference image header= " << angpix_header << " but you have provided --angpix_ref " << angpix_ref << std::endl; } } } } } else { Image Istk, Iref; Istk.read(fn_ref); // Check pixel size in the header is consistent with angpix_ref. Otherwise, raise a warning RFLOAT angpix_header = Istk.samplingRateX(); if (verb > 0) { if (angpix_ref < 0) { if (fabs(angpix_header - angpix) > 1e-3) { std::cerr << " WARNING!!! Pixel size in reference image header= " << angpix_header << " but you have not provided --angpix_ref." << std::endl; std::cerr << " The pixel size of the reference is assumed to be the same as that of the input micrographs (= " << angpix << ")" << std::endl; } } else { if (fabs(angpix_header - angpix_ref) > 1e-3) { std::cerr << " WARNING!!! Pixel size in reference image header= " << angpix_header << " but you have provided --angpix_ref " << angpix_ref << std::endl; } } } if (ZSIZE(Istk()) > 1) { if (autopick_helical_segments) { REPORT_ERROR("Filament picker (--helix) does not support 3D references. Please use 2D class averages instead."); } // Re-scale references if necessary if (angpix_ref < 0) angpix_ref = angpix; HealpixSampling sampling; sampling.healpix_order = healpix_order; sampling.fn_sym = symmetry; sampling.perturbation_factor = 0.; sampling.offset_step = 1; sampling.limit_tilt = -91.; sampling.is_3D = true; sampling.initialise(); if (verb > 0) { std::cout << " Projecting a 3D reference with " << symmetry << " symmetry, using angular sampling rate of " << sampling.getAngularSampling() << " degrees, i.e. in " << sampling.NrDirections() << " directions ... " << std::endl; } int my_ori_size = XSIZE(Istk()); Projector projector(my_ori_size, TRILINEAR, padding); MultidimArray dummy; int lowpass_size = 2 * CEIL(my_ori_size * angpix_ref / lowpass); projector.computeFourierTransformMap(Istk(), dummy, lowpass_size); MultidimArray Mref(my_ori_size, my_ori_size); MultidimArray Fref; FourierTransformer transformer; transformer.setReal(Mref); transformer.getFourierAlias(Fref); Image Iprojs; FileName fn_img, fn_proj = fn_odir + "reference_projections.mrcs"; for (long int idir = 0; idir < sampling.NrDirections(); idir++) { RFLOAT rot = sampling.rot_angles[idir]; RFLOAT tilt = sampling.tilt_angles[idir]; Matrix2D A; Euler_angles2matrix(rot, tilt, 0., A, false); Fref.initZeros(); projector.get2DFourierTransform(Fref, A); // Shift the image back to the center... CenterFFTbySign(Fref); transformer.inverseFourierTransform(); Mref.setXmippOrigin(); Mrefs.push_back(Mref); if (verb > 0) { // Also write out a stack with the 2D reference projections Iprojs()=Mref; fn_img.compose(idir+1,fn_proj); if (idir == 0) Iprojs.write(fn_img, -1, false, WRITE_OVERWRITE); else Iprojs.write(fn_img, -1, false, WRITE_APPEND); } } } else { // Stack of 2D references for (int n = 0; n < NSIZE(Istk()); n++) { Istk().getImage(n, Iref()); Iref().setXmippOrigin(); Mrefs.push_back(Iref()); } } } #ifdef TIMING timer.toc(TIMING_A2); #endif #ifdef TIMING timer.tic(TIMING_A3); #endif if (!do_LoG) { // Re-scale references if necessary if (angpix_ref < 0) angpix_ref = angpix; // Automated determination of bg_radius (same code as in particle_sorter.cpp!) if (particle_diameter < 0.) { RFLOAT sumr=0.; for (int iref = 0; iref < Mrefs.size(); iref++) { RFLOAT cornerval = DIRECT_MULTIDIM_ELEM(Mrefs[iref], 0); // Look on the central X-axis, which first and last values are NOT equal to the corner value bool has_set_first=false; bool has_set_last=false; int last_corner=FINISHINGX(Mrefs[iref]), first_corner=STARTINGX(Mrefs[iref]); for (long int j=STARTINGX(Mrefs[iref]); j<=FINISHINGX(Mrefs[iref]); j++) { if (!has_set_first) { if (fabs(A3D_ELEM(Mrefs[iref], 0,0,j) - cornerval) > 1e-6) { first_corner = j; has_set_first = true; } } else if (!has_set_last) { if (fabs(A3D_ELEM(Mrefs[iref], 0,0,j) - cornerval) < 1e-6) { last_corner = j - 1; has_set_last = true; } } } sumr += (last_corner - first_corner); } particle_diameter = sumr / Mrefs.size(); // diameter is in Angstroms particle_diameter *= angpix_ref; if (verb>0) { std::cout << " + Automatically set the background diameter to " << particle_diameter << " Angstrom" << std::endl; std::cout << " + You can override this by providing --particle_diameter (in Angstroms)" << std::endl; } } // Now bring Mrefs from angpix_ref to angpix! if (fabs(angpix_ref - angpix) > 1e-3) { int halfoldsize = XSIZE(Mrefs[0]) / 2; int newsize = ROUND(halfoldsize * (angpix_ref/angpix)); newsize *= 2; RFLOAT rescale_greyvalue = 1.; // If the references came from downscaled particles, then those were normalised differently // (the stddev is N times smaller after downscaling N times) // This needs to be corrected again RFLOAT rescale_factor = 1.; if (newsize > XSIZE(Mrefs[0])) rescale_factor *= (RFLOAT)(XSIZE(Mrefs[0]))/(RFLOAT)newsize; for (int iref = 0; iref < Mrefs.size(); iref++) { resizeMap(Mrefs[iref], newsize); Mrefs[iref] *= rescale_factor; Mrefs[iref].setXmippOrigin(); } } // Get particle boxsize from the input reference images particle_size = XSIZE(Mrefs[0]); if (particle_diameter > particle_size * angpix) { std::cerr << " mask_diameter (A): " << particle_diameter << " box_size (pix): " << particle_size << " pixel size (A): " << angpix << std::endl; REPORT_ERROR("ERROR: the particle mask diameter is larger than the size of the box."); } if ( (verb > 0) && (autopick_helical_segments)) { std::cout << " + Helical tube diameter = " << helical_tube_diameter << " Angstroms " << std::endl; } if ( (autopick_helical_segments) && (helical_tube_diameter > particle_diameter) ) { REPORT_ERROR("Error: Helical tube diameter should be smaller than the particle mask diameter!"); } if (autopick_helical_segments && do_amyloid) { amyloid_max_psidiff = RAD2DEG(helical_tube_curvature_factor_max*2.); if (verb > 0) std::cout << " + Setting amyloid max_psidiff to: " << amyloid_max_psidiff << std::endl; if (max_local_avg_diameter < 0.) { max_local_avg_diameter = 3. * helical_tube_diameter; if (verb > 0) std::cout << " + Setting amyloid max_local_avg_diameter to: " << max_local_avg_diameter << std::endl; } } // Get the squared particle radius (in integer pixels) particle_radius2 = ROUND(particle_diameter/(2. * angpix)); particle_radius2 -= decrease_radius; particle_radius2*= particle_radius2; #ifdef DEBUG std::cerr << " particle_size= " << particle_size << " sqrt(particle_radius2)= " << sqrt(particle_radius2) << std::endl; #endif // Invert references if necessary (do this AFTER automasking them!) if (do_invert) { for (int iref = 0; iref < Mrefs.size(); iref++) { Mrefs[iref] *= -1.; } } } // end if !do_LoG // Get micrograph_size Image Imic; Imic.read(fn_micrographs[0], false); micrograph_xsize = XSIZE(Imic()); micrograph_ysize = YSIZE(Imic()); micrograph_size = (micrograph_xsize != micrograph_ysize) ? XMIPP_MAX(micrograph_xsize, micrograph_ysize) : micrograph_xsize; if (extra_padding > 0) micrograph_size += 2*extra_padding; if (lowpass < 0.) { downsize_mic = micrograph_size; } else { downsize_mic = 2 * ROUND(micrograph_size * angpix / lowpass); } /* * Here we set the size of the micrographs during cross-correlation calculation. The final size is still the same size as * the input micrographs, we simply adjust the frequencies used in fourier space by cropping the frequency-space images in * intermediate calculations. */ if(workFrac>1) // set size directly { int tempFrac = (int)ROUND(workFrac); tempFrac -= tempFrac%2; if(tempFrac0) { int tempFrac = (int)ROUND(workFrac*(RFLOAT)micrograph_size); tempFrac -= tempFrac%2; workSize = getGoodFourierDims(tempFrac,micrograph_size); } else if(workFrac==0) { workSize = getGoodFourierDims((int)downsize_mic,micrograph_size); } else REPORT_ERROR("negative workFrac (--shrink) cannot be used. Choose a fraction 0 0 && workSize < downsize_mic) { std::cout << " + WARNING: The calculations will be done at a lower resolution than requested." << std::endl; } if ( verb > 0 && (autopick_helical_segments) && (!do_amyloid) && ((float(workSize) / float(micrograph_size)) < 0.4999) ) { std::cerr << " + WARNING: Please consider using a shrink value 0.5~1 for picking helical segments. Smaller values may lead to poor results." << std::endl; } //printf("workSize = %d, corresponding to a resolution of %g for these settings. \n", workSize, 2*(((RFLOAT)micrograph_size*angpix)/(RFLOAT)workSize)); if (min_particle_distance < 0) { min_particle_distance = particle_size * angpix / 2.; } #ifdef TIMING timer.toc(TIMING_A3); #endif #ifdef TIMING timer.tic(TIMING_A4); #endif // Pre-calculate and store Projectors for all references at the right size if (!do_read_fom_maps && !do_LoG) { if (verb > 0) { std::cout << " Initialising FFTs for the references and masks ... " << std::endl; } // Calculate a circular mask based on the particle_diameter and then store its FT FourierTransformer transformer; MultidimArray Mcirc_mask(particle_size, particle_size); MultidimArray Maux(micrograph_size, micrograph_size); Mcirc_mask.setXmippOrigin(); Maux.setXmippOrigin(); // Sjors 17jan2018; also make a specific circular mask to calculate local average value, for removal of carbon areas with helices if (autopick_helical_segments) { Mcirc_mask.initConstant(1.); nr_pixels_avg_mask = Mcirc_mask.nzyxdim; long int inner_radius = ROUND(helical_tube_diameter/(2.*angpix)); FOR_ALL_ELEMENTS_IN_ARRAY2D(Mcirc_mask) { if (i*i + j*j < inner_radius*inner_radius) { A2D_ELEM(Mcirc_mask, i, j) = 0.; nr_pixels_avg_mask--; } } if (max_local_avg_diameter > 0) { long int outer_radius = ROUND(max_local_avg_diameter/(2.*angpix)); FOR_ALL_ELEMENTS_IN_ARRAY2D(Mcirc_mask) { if (i*i + j*j > outer_radius*outer_radius) { A2D_ELEM(Mcirc_mask, i, j) = 0.; nr_pixels_avg_mask--; } } } // Now set the mask in the large square and store its FFT Maux.initZeros(); FOR_ALL_ELEMENTS_IN_ARRAY2D(Mcirc_mask) { A2D_ELEM(Maux, i, j ) = A2D_ELEM(Mcirc_mask, i, j); } transformer.FourierTransform(Maux, Favgmsk); CenterFFTbySign(Favgmsk); } // For squared difference, need the mask of the background to locally normalise the micrograph nr_pixels_circular_invmask = 0; Mcirc_mask.initZeros(); FOR_ALL_ELEMENTS_IN_ARRAY2D(Mcirc_mask) { if (i*i + j*j >= particle_radius2) { A2D_ELEM(Mcirc_mask, i, j) = 1.; nr_pixels_circular_invmask++; } } // Now set the mask in the large square and store its FFT Maux.initZeros(); FOR_ALL_ELEMENTS_IN_ARRAY2D(Mcirc_mask) { A2D_ELEM(Maux, i, j ) = A2D_ELEM(Mcirc_mask, i, j); } transformer.FourierTransform(Maux, Finvmsk); CenterFFTbySign(Finvmsk); // Also get the particle-area mask nr_pixels_circular_mask = 0; Mcirc_mask.initZeros(); FOR_ALL_ELEMENTS_IN_ARRAY2D(Mcirc_mask) { if (i*i + j*j < particle_radius2) { A2D_ELEM(Mcirc_mask, i, j) = 1.; nr_pixels_circular_mask++; } } #ifdef DEBUG std::cerr << " min_particle_distance= " << min_particle_distance << " micrograph_size= " << micrograph_size << " downsize_mic= " << downsize_mic << std::endl; std::cerr << " nr_pixels_circular_mask= " << nr_pixels_circular_mask << " nr_pixels_circular_invmask= " << nr_pixels_circular_invmask << std::endl; #endif PPref.clear(); if (verb > 0) init_progress_bar(Mrefs.size()); Projector PP(micrograph_size, TRILINEAR, padding); MultidimArray dummy; for (int iref = 0; iref < Mrefs.size(); iref++) { // (Re-)apply the mask to the references FOR_ALL_DIRECT_ELEMENTS_IN_MULTIDIMARRAY(Mrefs[iref]) { DIRECT_MULTIDIM_ELEM(Mrefs[iref], n) *= DIRECT_MULTIDIM_ELEM(Mcirc_mask, n); } // Set reference in the large box of the micrograph Maux.initZeros(); Maux.setXmippOrigin(); FOR_ALL_ELEMENTS_IN_ARRAY2D(Mrefs[iref]) { A2D_ELEM(Maux, i, j) = A2D_ELEM(Mrefs[iref], i, j); } // And compute its Fourier Transform inside the Projector PP.computeFourierTransformMap(Maux, dummy, downsize_mic, 1, false); PPref.push_back(PP); if (verb > 0) progress_bar(iref+1); } if (verb > 0) progress_bar(Mrefs.size()); } #ifdef TIMING timer.toc(TIMING_A4); timer.toc(TIMING_A0); #endif #ifdef DEBUG std::cerr << "Finishing initialise" << std::endl; #endif } #ifdef CUDA int AutoPicker::deviceInitialise() { int devCount; cudaGetDeviceCount(&devCount); std::vector < std::vector < std::string > > allThreadIDs; untangleDeviceIDs(gpu_ids, allThreadIDs); // Sequential initialisation of GPUs on all ranks int dev_id; if (!std::isdigit(*gpu_ids.begin())) dev_id = 0; else dev_id = textToInteger((allThreadIDs[0][0]).c_str()); if (verb>0) { std::cout << " + Using GPU device " << dev_id << std::endl; } return(dev_id); } #endif void AutoPicker::run() { int barstep; if (verb > 0) { std::cout << " Autopicking ..." << std::endl; init_progress_bar(fn_micrographs.size()); barstep = XMIPP_MAX(1, fn_micrographs.size() / 60); } FileName fn_olddir=""; for (long int imic = 0; imic < fn_micrographs.size(); imic++) { // Abort through the pipeline_control system if (pipeline_control_check_abort_job()) exit(RELION_EXIT_ABORTED); if (verb > 0 && imic % barstep == 0) progress_bar(imic); // Check new-style outputdirectory exists and make it if not! FileName fn_dir = getOutputRootName(fn_micrographs[imic]); fn_dir = fn_dir.beforeLastOf("/"); if (fn_dir != fn_olddir) { // Make a Particles directory int res = system(("mkdir -p " + fn_dir).c_str()); fn_olddir = fn_dir; } #ifdef TIMING timer.tic(TIMING_A5); #endif if (do_LoG) autoPickLoGOneMicrograph(fn_micrographs[imic], imic); else autoPickOneMicrograph(fn_micrographs[imic], imic); #ifdef TIMING timer.toc(TIMING_A5); #endif } if (verb > 0) progress_bar(fn_micrographs.size()); } void AutoPicker::generatePDFLogfile() { long int barstep = XMIPP_MAX(1, fn_ori_micrographs.size() / 60); if (verb > 0) { std::cout << " Generating logfile.pdf ... " << std::endl; init_progress_bar(fn_ori_micrographs.size()); } MetaDataTable MDresult; long total_nr_picked = 0; for (long int imic = 0; imic < fn_ori_micrographs.size(); imic++) { MetaDataTable MD; FileName fn_pick = getOutputRootName(fn_ori_micrographs[imic]) + "_" + fn_out + ".star"; if (exists(fn_pick)) { MD.read(fn_pick); long nr_pick = MD.numberOfObjects(); total_nr_picked += nr_pick; if (MD.containsLabel(EMDL_PARTICLE_AUTOPICK_FOM)) { RFLOAT fom, avg_fom = 0.; FOR_ALL_OBJECTS_IN_METADATA_TABLE(MD) { MD.getValue(EMDL_PARTICLE_AUTOPICK_FOM, fom); avg_fom += fom; } avg_fom /= nr_pick; // mis-use MetadataTable to conveniently make histograms and value-plots MDresult.addObject(); MDresult.setValue(EMDL_MICROGRAPH_NAME, fn_ori_micrographs[imic]); MDresult.setValue(EMDL_PARTICLE_AUTOPICK_FOM, avg_fom); MDresult.setValue(EMDL_MLMODEL_GROUP_NR_PARTICLES, nr_pick); } } if (verb > 0 && imic % 60 == 0) progress_bar(imic); } if (verb > 0 ) { progress_bar(fn_ori_micrographs.size()); std::cout << " Total number of particles from " << fn_ori_micrographs.size() << " micrographs is " << total_nr_picked << std::endl; long avg = 0; if (fn_ori_micrographs.size() > 0) avg = ROUND((RFLOAT)total_nr_picked/fn_ori_micrographs.size()); std::cout << " i.e. on average there were " << avg << " particles per micrograph" << std::endl; } // Values for all micrographs FileName fn_eps; std::vector all_fn_eps; std::vector histX, histY; MDresult.write(fn_odir + "summary.star"); CPlot2D *plot2Db=new CPlot2D("Nr of picked particles for all micrographs"); MDresult.addToCPlot2D(plot2Db, EMDL_UNDEFINED, EMDL_MLMODEL_GROUP_NR_PARTICLES, 1.); plot2Db->SetDrawLegend(false); fn_eps = fn_odir + "all_nr_parts.eps"; plot2Db->OutputPostScriptPlot(fn_eps); all_fn_eps.push_back(fn_eps); delete plot2Db; if (MDresult.numberOfObjects() > 3) { CPlot2D *plot2D=new CPlot2D(""); MDresult.columnHistogram(EMDL_MLMODEL_GROUP_NR_PARTICLES,histX,histY,0, plot2D); fn_eps = fn_odir + "histogram_nrparts.eps"; plot2D->SetTitle("Histogram of nr of picked particles per micrograph"); plot2D->OutputPostScriptPlot(fn_eps); all_fn_eps.push_back(fn_eps); delete plot2D; } CPlot2D *plot2Dc=new CPlot2D("Average autopick FOM for all micrographs"); MDresult.addToCPlot2D(plot2Dc, EMDL_UNDEFINED, EMDL_PARTICLE_AUTOPICK_FOM, 1.); plot2Dc->SetDrawLegend(false); fn_eps = fn_odir + "all_FOMs.eps"; plot2Dc->OutputPostScriptPlot(fn_eps); all_fn_eps.push_back(fn_eps); delete plot2Dc; if (MDresult.numberOfObjects() > 3) { CPlot2D *plot2Dd=new CPlot2D(""); MDresult.columnHistogram(EMDL_PARTICLE_AUTOPICK_FOM,histX,histY,0, plot2Dd); fn_eps = fn_odir + "histogram_FOMs.eps"; plot2Dd->SetTitle("Histogram of average autopick FOM per micrograph"); plot2Dd->OutputPostScriptPlot(fn_eps); all_fn_eps.push_back(fn_eps); delete plot2Dd; } joinMultipleEPSIntoSinglePDF(fn_odir + "logfile.pdf", all_fn_eps); if (verb > 0) { std::cout << " Done! Written: " << fn_odir << "logfile.pdf " << std::endl; } } std::vector AutoPicker::findNextCandidateCoordinates(AmyloidCoord &mycoord, std::vector &circle, RFLOAT threshold_value, RFLOAT max_psidiff, int skip_side, float scale, MultidimArray &Mccf, MultidimArray &Mpsi) { std::vector result; int new_micrograph_xsize = (int)((float)micrograph_xsize*scale); int new_micrograph_ysize = (int)((float)micrograph_ysize*scale); int skip_side_pix = ROUND(skip_side * scale); Matrix2D A2D; Matrix1D vec_c(2), vec_p(2); rotation2DMatrix(-mycoord.psi, A2D, false); for (int icoor = 0; icoor < circle.size(); icoor++) { // Rotate the circle-vector coordinates along the mycoord.psi XX(vec_c) = (circle[icoor]).x; YY(vec_c) = (circle[icoor]).y; vec_p = A2D * vec_c; long int jj = ROUND(mycoord.x + XX(vec_p)); long int ii = ROUND(mycoord.y + YY(vec_p)); if ( (jj >= (FIRST_XMIPP_INDEX(new_micrograph_xsize) + skip_side_pix + 1)) && (jj < (LAST_XMIPP_INDEX(new_micrograph_xsize) - skip_side_pix - 1)) && (ii >= (FIRST_XMIPP_INDEX(new_micrograph_ysize) + skip_side_pix + 1)) && (ii < (LAST_XMIPP_INDEX(new_micrograph_ysize) - skip_side_pix - 1)) ) { RFLOAT myccf = A2D_ELEM(Mccf, ii, jj); RFLOAT mypsi = A2D_ELEM(Mpsi, ii, jj); // Small difference in psi-angle with mycoord RFLOAT psidiff = fabs(mycoord.psi - mypsi); psidiff = realWRAP(psidiff, 0., 360.); if (psidiff > 180.) psidiff -= 180.; if (psidiff > 90.) psidiff -= 180.; if (fabs(psidiff) < max_psidiff && myccf > threshold_value) { AmyloidCoord newcoord; newcoord.x = mycoord.x + XX(vec_p); newcoord.y = mycoord.y + YY(vec_p); newcoord.psi = A2D_ELEM(Mpsi, ii, jj); newcoord.fom = myccf; //std::cerr << " myccf= " << myccf << " psi= " << newcoord.psi << std::endl; result.push_back(newcoord); } } } return result; } AmyloidCoord AutoPicker::findNextAmyloidCoordinate(AmyloidCoord &mycoord, std::vector &circle, RFLOAT threshold_value, RFLOAT max_psidiff, RFLOAT amyloid_diameter_pix, int skip_side, float scale, MultidimArray &Mccf, MultidimArray &Mpsi) { int new_micrograph_xsize = (int)((float)micrograph_xsize*scale); int new_micrograph_ysize = (int)((float)micrograph_ysize*scale); int skip_side_pix = ROUND(skip_side * scale); // Return if this one has been done already.. AmyloidCoord result; result.x = result.y = result.psi = 0.; result.fom = -999.; if (A2D_ELEM(Mccf, ROUND(mycoord.y), ROUND(mycoord.x)) < threshold_value) return result; // Set FOM to small value in circle around mycoord int myrad = ROUND(0.5*helical_tube_diameter/angpix*scale); float myrad2 = (float)myrad * (float)myrad; for (int ii = -myrad; ii <= myrad; ii++) { for (int jj = -myrad; jj <= myrad; jj++) { float r2 = (float)(ii*ii) + (float)(jj*jj); if (r2 < myrad2) { long int jp = ROUND(mycoord.x + jj); long int ip = ROUND(mycoord.y + ii); //std::cerr << " jp= " << jp << " ip= " << ip << " jj= " << jj << " ii= " << ii<< std::endl; //std::cerr << " FIRST_XMIPP_INDEX(new_micrograph_xsize)= " << FIRST_XMIPP_INDEX(new_micrograph_xsize) + skip_side_pix + 1<< " LAST_XMIPP_INDEX(new_micrograph_xsize)= " << LAST_XMIPP_INDEX(new_micrograph_xsize)- skip_side_pix - 1 << std::endl; //std::cerr << " FIRST_XMIPP_INDEX(new_micrograph_ysize)= " << FIRST_XMIPP_INDEX(new_micrograph_ysize) + skip_side_pix + 1<< " LAST_XMIPP_INDEX(new_micrograph_ysize)= " << LAST_XMIPP_INDEX(new_micrograph_ysize)- skip_side_pix - 1 << std::endl; if ( (jp >= (FIRST_XMIPP_INDEX(XSIZE(Mccf)) )) && (jp <= (LAST_XMIPP_INDEX(XSIZE(Mccf)) )) && (ip >= (FIRST_XMIPP_INDEX(YSIZE(Mccf)) )) && (ip <= (LAST_XMIPP_INDEX(YSIZE(Mccf)) )) ) A2D_ELEM(Mccf, ip, jp) = -999.; } } } // See how far we can grow in any of the circle directions.... // Recursive call to findNextCandidateCoordinates.... // Let's search 3 layers deep... std::vector new1coords; new1coords = findNextCandidateCoordinates(mycoord, circle, threshold_value, max_psidiff, skip_side, scale, Mccf, Mpsi); long int N = new1coords.size(); std::vector max_depths(N, 0); std::vector max_sumfoms(N, -9999.); RFLOAT sumfom = 0.; RFLOAT max_sumfom = -9999.; int best_inew1=-1; for (int inew1 = 0; inew1 < new1coords.size(); inew1++) { sumfom = new1coords[inew1].fom; if (sumfom > max_sumfom) { max_sumfom = sumfom; best_inew1 = inew1; } std::vector new2coords; new2coords = findNextCandidateCoordinates(new1coords[inew1], circle, threshold_value, max_psidiff, skip_side, scale, Mccf, Mpsi); for (int inew2 = 0; inew2 < new2coords.size(); inew2++) { sumfom = new1coords[inew1].fom + new2coords[inew2].fom; if (sumfom > max_sumfom) { max_sumfom = sumfom; best_inew1 = inew1; } std::vector new3coords; new3coords = findNextCandidateCoordinates(new2coords[inew2], circle, threshold_value, max_psidiff, skip_side, scale, Mccf, Mpsi); for (int inew3 = 0; inew3 < new3coords.size(); inew3++) { sumfom = new1coords[inew1].fom + new2coords[inew2].fom + new3coords[inew3].fom; if (sumfom > max_sumfom) { max_sumfom = sumfom; best_inew1 = inew1; } std::vector new4coords; new4coords = findNextCandidateCoordinates(new3coords[inew3], circle, threshold_value, max_psidiff, skip_side, scale, Mccf, Mpsi); for (int inew4 = 0; inew4 < new4coords.size(); inew4++) { sumfom = new1coords[inew1].fom + new2coords[inew2].fom + new3coords[inew3].fom + new4coords[inew4].fom; if (sumfom > max_sumfom) { max_sumfom = sumfom; best_inew1 = inew1; } } } } } if (best_inew1 < 0) { return result; } else { /* RFLOAT prevpsi = (best_inew1 > 0) ? new1coords[best_inew1-1].psi : -99999.; RFLOAT nextpsi = (new1coords.size() - best_inew1 > 1) ? new1coords[best_inew1+1].psi : -99999.; RFLOAT nextpsidiff = -9999., prevpsidiff=-9999.; if (prevpsi > -999.) { RFLOAT psidiff = fabs(mycoord.psi - prevpsi); psidiff = realWRAP(psidiff, 0., 360.); if (psidiff > 180.) psidiff -= 180.; if (psidiff > 90.) psidiff -= 180.; prevpsidiff = psidiff; } if (nextpsi > -999.) { RFLOAT psidiff = fabs(mycoord.psi - nextpsi); psidiff = realWRAP(psidiff, 0., 360.); if (psidiff > 180.) psidiff -= 180.; if (psidiff > 90.) psidiff -= 180.; nextpsidiff = psidiff; } std::cerr << " new1coords[best_inew1].fom= " << new1coords[best_inew1].fom << " x= " << new1coords[best_inew1].x << " y= " << new1coords[best_inew1].y << " myx= " << mycoord.x << " myy= " << mycoord.y << " mypsi= " << mycoord.psi << " new1coords[best_inew1].psi= " << new1coords[best_inew1].psi << " prevpsi= " << prevpsi << " prevpsidiff= " << prevpsidiff << " nextpsi= " << nextpsi << " nextpsidiff= " << nextpsidiff << std::endl; */ return new1coords[best_inew1]; } } void AutoPicker::pickAmyloids( MultidimArray& Mccf, MultidimArray& Mpsi, MultidimArray& Mstddev, MultidimArray& Mavg, RFLOAT threshold_value, RFLOAT max_psidiff, FileName& fn_mic_in, FileName& fn_star_out, RFLOAT amyloid_width, int skip_side, float scale) { // Set up a vector with coordinates of feasible next coordinates regarding distance and psi-angle std::vector circle; int myrad = ROUND(0.5*helical_tube_diameter/angpix*scale); int myradb = myrad + 1; float myrad2 = (float)myrad * (float)myrad; float myradb2 = (float)myradb * (float)myradb; for (int ii = -myradb; ii <= myradb; ii++) { for (int jj = -myradb; jj <= myradb; jj++) { float r2 = (float)(ii*ii) + (float)(jj*jj); if (r2 > myrad2 && r2 <= myradb2) { float myang = RAD2DEG(atan2((float)(ii),(float)(jj))); if (myang > 90.) myang -= 180.; if (myang < -90.) myang += 180.; if (fabs(myang) < max_psidiff) { AmyloidCoord circlecoord; circlecoord.x = (RFLOAT)jj; circlecoord.y = (RFLOAT)ii; circlecoord.fom =0.; circlecoord.psi =myang; circle.push_back(circlecoord); //std::cerr << " circlecoord.x= " << circlecoord.x << " circlecoord.y= " << circlecoord.y << " psi= " << circlecoord.psi << std::endl; } } } } std::vector< std::vector > helices; bool no_more_ccf_peaks = false; while (!no_more_ccf_peaks) { long int imax, jmax; float myccf = Mccf.maxIndex(imax, jmax); float mypsi = Mpsi(imax, jmax); // Stop searching if all pixels are below min_ccf! //std::cerr << " myccf= " << myccf << " imax= " << imax << " jmax= " << jmax << std::endl; //std::cerr << " helices.size()= " << helices.size() << " threshold_value= " << threshold_value << " mypsi= " << mypsi << std::endl; if (myccf < threshold_value) no_more_ccf_peaks = true; std::vector helix; AmyloidCoord coord, newcoord; coord.x = jmax; coord.y = imax; coord.fom = myccf; coord.psi = mypsi; helix.push_back(coord); bool is_done_start = false; bool is_done_end = false; while ( (!is_done_start) || (!is_done_end) ) { if (!is_done_start) { newcoord = findNextAmyloidCoordinate(helix[0], circle, threshold_value, max_psidiff, helical_tube_diameter/angpix, ROUND(skip_side), scale, Mccf, Mpsi); //std::cerr << " START newcoord.x= " << newcoord.x << " newcoord.y= " << newcoord.y << " newcoord.fom= " << newcoord.fom // << " stddev = " << A2D_ELEM(Mstddev, ROUND(newcoord.y), ROUND(newcoord.x)) // << " avg= " << A2D_ELEM(Mavg, ROUND(newcoord.y), ROUND(newcoord.x)) << std::endl; // Also check for Mstddev value if (newcoord.fom > threshold_value && !(max_stddev_noise > 0. && A2D_ELEM(Mstddev, ROUND(newcoord.y), ROUND(newcoord.x)) > max_stddev_noise) && !(min_avg_noise > -900. && A2D_ELEM(Mavg, ROUND(newcoord.y), ROUND(newcoord.x)) < min_avg_noise) ) helix.insert(helix.begin(), newcoord); else is_done_start = true; } if (!is_done_end) { newcoord = findNextAmyloidCoordinate(helix[helix.size()-1], circle, threshold_value, max_psidiff, helical_tube_diameter/angpix, ROUND(skip_side), scale, Mccf, Mpsi); //std::cerr << " END newcoord.x= " << newcoord.x << " newcoord.y= " << newcoord.y << " newcoord.fom= " << newcoord.fom << std::endl; if (newcoord.fom > threshold_value && !(max_stddev_noise > 0. && A2D_ELEM(Mstddev, ROUND(newcoord.y), ROUND(newcoord.x)) > max_stddev_noise) && !(min_avg_noise > -900. && A2D_ELEM(Mavg, ROUND(newcoord.y), ROUND(newcoord.x)) < min_avg_noise) ) helix.push_back(newcoord); else is_done_end = true; } //std::cerr << " is_done_start= " << is_done_start << " is_done_end= " << is_done_end << std::endl; } //std::cerr << " helix.size()= " << helix.size() << std::endl; if (helical_tube_diameter*0.5*helix.size() > helical_tube_length_min) { helices.push_back(helix); /* std::cerr << "PUSHING BACK HELIX " << helices.size() << " << WITH SIZE= " << helix.size() << std::endl; char c; std::cerr << " helices.size()= " << helices.size() << std::endl; std::cerr << "press any key" << std::endl; //std::cin >> c; Image It; It()=Mccf; It.write("Mccf.spi"); // TMP //no_more_ccf_peaks=true; */ } } // end while (!no_more_ccf_peaks) // Now write out in a STAR file // Write out a STAR file with the coordinates FileName fn_tmp; MetaDataTable MDout; // Only output STAR header if there are no tubes... MDout.clear(); MDout.addLabel(EMDL_IMAGE_COORD_X); MDout.addLabel(EMDL_IMAGE_COORD_Y); MDout.addLabel(EMDL_PARTICLE_AUTOPICK_FOM); MDout.addLabel(EMDL_PARTICLE_HELICAL_TUBE_ID); MDout.addLabel(EMDL_ORIENT_TILT_PRIOR); MDout.addLabel(EMDL_ORIENT_PSI_PRIOR); MDout.addLabel(EMDL_PARTICLE_HELICAL_TRACK_LENGTH_ANGSTROM); MDout.addLabel(EMDL_ORIENT_PSI_PRIOR_FLIP_RATIO); MDout.addLabel(EMDL_ORIENT_ROT_PRIOR_FLIP_RATIO); // KThurber float interbox_dist = (min_particle_distance / angpix); // Write out segments all all helices int helixid = 0; for (int ihelix = 0; ihelix < helices.size(); ihelix++) { RFLOAT leftover_dist = 0.; RFLOAT tube_length = 0.; for (long int iseg = 0; iseg < helices[ihelix].size()-1; iseg++) //for (long int iseg = 0; iseg < helices[ihelix].size(); iseg++) { /* RFLOAT xval = (helices[ihelix][iseg].x / scale) - (RFLOAT)(FIRST_XMIPP_INDEX(micrograph_xsize)); RFLOAT yval = (helices[ihelix][iseg].y / scale) - (RFLOAT)(FIRST_XMIPP_INDEX(micrograph_ysize)); MDout.addObject(); MDout.setValue(EMDL_IMAGE_COORD_X, xval); MDout.setValue(EMDL_IMAGE_COORD_Y, yval); MDout.setValue(EMDL_PARTICLE_HELICAL_TUBE_ID, ihelix+1); // start counting at 1 MDout.setValue(EMDL_ORIENT_PSI_PRIOR, helices[ihelix][iseg].psi); */ // Distance to next segment float dx = (float)(helices[ihelix][iseg+1].x - helices[ihelix][iseg].x)/ scale; float dy = (float)(helices[ihelix][iseg+1].y - helices[ihelix][iseg].y)/ scale; float distnex = sqrt(dx*dx + dy*dy); float myang = -1. * RAD2DEG(atan2(dy,dx)); for (float position = leftover_dist; position < distnex; position+= interbox_dist) { RFLOAT frac = position/distnex; RFLOAT xval = (helices[ihelix][iseg].x / scale) - (RFLOAT)(FIRST_XMIPP_INDEX(micrograph_xsize)) + frac * dx; RFLOAT yval = (helices[ihelix][iseg].y / scale) - (RFLOAT)(FIRST_XMIPP_INDEX(micrograph_ysize)) + frac * dy; MDout.addObject(); MDout.setValue(EMDL_IMAGE_COORD_X, xval); MDout.setValue(EMDL_IMAGE_COORD_Y, yval); MDout.setValue(EMDL_PARTICLE_AUTOPICK_FOM, helices[ihelix][iseg].fom); MDout.setValue(EMDL_PARTICLE_HELICAL_TUBE_ID, ihelix+1); // start counting at 1 MDout.setValue(EMDL_ORIENT_TILT_PRIOR, 90.); MDout.setValue(EMDL_ORIENT_PSI_PRIOR, myang); MDout.setValue(EMDL_PARTICLE_HELICAL_TRACK_LENGTH_ANGSTROM, angpix * tube_length); MDout.setValue(EMDL_ORIENT_PSI_PRIOR_FLIP_RATIO, 0.5); MDout.setValue(EMDL_ORIENT_ROT_PRIOR_FLIP_RATIO, 0.5); // KThurber leftover_dist = interbox_dist + (distnex - position); tube_length += interbox_dist; } } helixid++; } fn_tmp = getOutputRootName(fn_mic_in) + "_" + fn_star_out + ".star"; MDout.write(fn_tmp); } void AutoPicker::pickCCFPeaks( const MultidimArray& Mccf, const MultidimArray& Mstddev, const MultidimArray& Mavg, const MultidimArray& Mclass, RFLOAT threshold_value, int peak_r_min, RFLOAT particle_diameter_pix, std::vector& ccf_peak_list, MultidimArray& Mccfplot, int skip_side, float scale) { MultidimArray Mrec; std::vector ccf_pixel_list; ccfPeak ccf_peak_small, ccf_peak_big; std::vector ccf_peak_list_aux; int new_micrograph_xsize = (int)((float)micrograph_xsize*scale); int new_micrograph_ysize = (int)((float)micrograph_ysize*scale); int nr_pixels; RFLOAT ratio; // Rescale skip_side and particle_diameter_pix skip_side = (int)((float)skip_side*scale); particle_diameter_pix *= scale; //int micrograph_core_size = XMIPP_MIN(micrograph_xsize, micrograph_ysize) - skip_side * 2 - 2; if ( (NSIZE(Mccf) != 1) || (ZSIZE(Mccf) != 1) || (YSIZE(Mccf) != XSIZE(Mccf)) ) REPORT_ERROR("autopicker.cpp::pickCCFPeaks: The micrograph should be a 2D square!"); if ( (XSIZE(Mccf) < new_micrograph_xsize) || (YSIZE(Mccf) < new_micrograph_ysize) ) REPORT_ERROR("autopicker.cpp::pickCCFPeaks: Invalid dimensions for Mccf!"); //if (micrograph_core_size < 100*scale) // REPORT_ERROR("autopicker.cpp::pickCCFPeaks: Size of the micrograph is too small compared to that of the particle box!"); if ( (STARTINGY(Mccf) != FIRST_XMIPP_INDEX(YSIZE(Mccf))) || (STARTINGX(Mccf) != FIRST_XMIPP_INDEX(XSIZE(Mccf))) ) REPORT_ERROR("autopicker.cpp::pickCCFPeaks: The origin of input 3D MultidimArray is not at the center (use v.setXmippOrigin() before calling this function)!"); if (Mccf.sameShape(Mclass) == false) REPORT_ERROR("autopicker.cpp::pickCCFPeaks: Mccf and Mclass should have the same shape!"); if (peak_r_min < 1) REPORT_ERROR("autopicker.cpp::pickCCFPeaks: Radii of peak should be positive!"); if (particle_diameter_pix < 5. * scale) REPORT_ERROR("autopicker.cpp::pickCCFPeaks: Particle diameter should be larger than 5 pixels!"); // Init output ccf_peak_list.clear(); Mccfplot.clear(); Mccfplot.resize(Mccf); Mccfplot.initZeros(); Mccfplot.setXmippOrigin(); RFLOAT avg0, stddev0, maxccf0, minccf0; Mccf.computeStats(avg0, stddev0, minccf0, maxccf0); // Collect all high ccf pixels Mrec.clear(); Mrec.resize(Mccf); Mrec.initConstant(0); Mrec.setXmippOrigin(); nr_pixels = 0; for (int ii = FIRST_XMIPP_INDEX(new_micrograph_ysize) + skip_side; ii <= LAST_XMIPP_INDEX(new_micrograph_ysize) - skip_side; ii++) { for (int jj = FIRST_XMIPP_INDEX(new_micrograph_xsize) + skip_side; jj <= LAST_XMIPP_INDEX(new_micrograph_xsize) - skip_side; jj++) { // Only check stddev in the noise areas if max_stddev_noise is positive! if (max_stddev_noise > 0. && A2D_ELEM(Mstddev, ii, jj) > max_stddev_noise) continue; if (min_avg_noise > -900. && A2D_ELEM(Mavg, ii, jj) < min_avg_noise) continue; RFLOAT fom = A2D_ELEM(Mccf, ii, jj); nr_pixels++; if (fom > threshold_value) { A2D_ELEM(Mrec, ii, jj) = 1; ccf_pixel_list.push_back(ccfPixel(jj, ii, fom)); } } } std::sort(ccf_pixel_list.begin(), ccf_pixel_list.end()); #ifdef DEBUG_HELIX std::cout << " nr_high_ccf_pixels= " << ccf_pixel_list.size() << std::endl; #endif // Do not do anything if nr_high_ccf_pixels is too small or too large! Thres is restricted to 0.01%-10% beforehand. if ( (nr_pixels < 100) || (ccf_pixel_list.size() < 10) ) { ccf_peak_list.clear(); return; } ratio = ((RFLOAT)(ccf_pixel_list.size())) / ((RFLOAT)(nr_pixels)); #ifdef DEBUG_HELIX std::cout << " ratio= " << ratio << std::endl; #endif // Sjors changed ratio threshold to 0.5 on 21nov2017 for tau filaments //if (ratio > 0.1) if (ratio > 0.5) { ccf_peak_list.clear(); return; } // Find all peaks! (From the highest fom values) ccf_peak_list.clear(); for (int id = ccf_pixel_list.size() - 1; id >= 0; id--) { int x_new, y_new, x_old, y_old, rmax, rmax2, iref; int rmax_min = peak_r_min; int rmax_max; int iter_max = 3; RFLOAT fom_max; RFLOAT area_percentage_min = 0.8; // Deal with very small shrink values. But it still not performs well if workFrac < 0.2 if ( (scale < 0.5) && (scale > 0.2) ) area_percentage_min = 0.2 + (2. * (scale - 0.2)); else if (scale < 0.2) area_percentage_min = 0.2; // Check if this ccf pixel is covered by another peak x_old = x_new = ROUND(ccf_pixel_list[id].x); y_old = y_new = ROUND(ccf_pixel_list[id].y); if (A2D_ELEM(Mrec, y_new, x_new) == 0) continue; iref = A2D_ELEM(Mclass, y_new, x_new); fom_max = A2D_ELEM(Mccf, y_new, x_new); // Pick a peak starting from this ccf pixel ccf_peak_small.clear(); ccf_peak_big.clear(); rmax_max = ROUND(particle_diameter_pix / 2.); // Sep29,2015 ???????????? // Sjors 21nov2017 try to adapt for tau fibrils .... //if (rmax_max < 100) // rmax_max = 100; for (rmax = rmax_min; rmax < rmax_max; rmax++) { // Record the smaller peak ccf_peak_small = ccf_peak_big; //std::cout << " id= " << id << ", rmax= " << rmax << ", p= " << ccf_peak_small.area_percentage << std::endl; // 5 iterations to guarantee convergence?????????????? // Require 5 iterations for stablising the center of this peak under this rmax for (int iter = 0; iter < iter_max; iter++) { // Empty this peak ccf_peak_big.clear(); // New rmax rmax2 = rmax * rmax; // Get all ccf pixels within this rmax for (int dx = -rmax; dx <= rmax; dx++) { for (int dy = -rmax; dy <= rmax; dy++) { // Boundary checks if ( (dx * dx + dy * dy) > rmax2) continue; x_new = x_old + dx; y_new = y_old + dy; if ( (x_new < (FIRST_XMIPP_INDEX(new_micrograph_xsize) + skip_side + 1)) || (x_new > (LAST_XMIPP_INDEX(new_micrograph_xsize) - skip_side - 1)) || (y_new < (FIRST_XMIPP_INDEX(new_micrograph_ysize) + skip_side + 1)) || (y_new > (LAST_XMIPP_INDEX(new_micrograph_ysize) - skip_side - 1)) ) continue; // Push back all ccf pixels within this rmax RFLOAT ccf = A2D_ELEM(Mccf, y_new, x_new); if (A2D_ELEM(Mrec, y_new, x_new) == 0) ccf = minccf0; ccf_peak_big.ccf_pixel_list.push_back(ccfPixel(x_new, y_new, ccf)); } } // Check ccf_peak.ccf_pixel_list.size() below! // Refresh ccf_peak_big.r = rmax; ccf_peak_big.fom_thres = threshold_value; if (ccf_peak_big.refresh() == false) { //std::cout << " x_old, y_old = " << x_old << ", " << y_old << std::endl; //REPORT_ERROR("autopicker.cpp::CFFPeaks(): BUG No ccf pixels found within the small circle!"); break; } x_new = ROUND(ccf_peak_big.x); y_new = ROUND(ccf_peak_big.y); // Out of range if ( (x_new < (FIRST_XMIPP_INDEX(new_micrograph_xsize) + skip_side + 1)) || (x_new > (LAST_XMIPP_INDEX(new_micrograph_xsize) - skip_side - 1)) || (y_new < (FIRST_XMIPP_INDEX(new_micrograph_ysize) + skip_side + 1)) || (y_new > (LAST_XMIPP_INDEX(new_micrograph_ysize) - skip_side - 1)) ) break; // Converge if ( (x_old == x_new) && (y_old == y_new) ) break; x_old = x_new; y_old = y_new; } // iter++ ends // Peak finding is over if peak area does not expand if (ccf_peak_big.area_percentage < area_percentage_min) break; } // rmax++ ends // A peak is found if (ccf_peak_small.isValid()) { for (int ii = 0; ii < ccf_peak_small.ccf_pixel_list.size(); ii++) { x_new = ROUND(ccf_peak_small.ccf_pixel_list[ii].x); y_new = ROUND(ccf_peak_small.ccf_pixel_list[ii].y); A2D_ELEM(Mrec, y_new, x_new) = 0; } // TODO: if r > ...? do not include this peak? ccf_peak_small.ref = iref; ccf_peak_small.fom_max = fom_max; ccf_peak_list.push_back(ccf_peak_small); //std::cout << ccf_peak_list.size() << ", "<< std::flush; } }// id-- ends // Sort the peaks from the weakest to the strongest std::sort(ccf_peak_list.begin(), ccf_peak_list.end()); #ifdef DEBUG_HELIX std::cout << " nr_peaks= " << ccf_peak_list.size() << std::endl; #endif // Remove too close peaks (retain the stronger ones while remove the weaker) Mrec.clear(); Mrec.resize(Mccf); Mrec.initConstant(-1); Mrec.setXmippOrigin(); // Sort the peaks from the weakest to the strongest for (int new_id = 0; new_id < ccf_peak_list.size(); new_id++) { int x, y, peak_r, old_id; RFLOAT dist2; RFLOAT peak_r2 = ccf_peak_list[new_id].r * ccf_peak_list[new_id].r; if (ccf_peak_list[new_id].r > 0.) peak_r = CEIL(ccf_peak_list[new_id].r); else peak_r = (-1.); // Remove peaks with too small/big radii! if ( (peak_r <= 1) || (peak_r > (particle_diameter_pix / 2.)) ) { ccf_peak_list[new_id].r = (-1.); continue; } for (int dx = -peak_r; dx <= peak_r; dx++) { for (int dy = -peak_r; dy <= peak_r; dy++) { dist2 = (RFLOAT)(dx * dx + dy * dy); if (dist2 > peak_r2) continue; x = dx + ROUND(ccf_peak_list[new_id].x); y = dy + ROUND(ccf_peak_list[new_id].y); // Out of range if ( (x < (FIRST_XMIPP_INDEX(new_micrograph_xsize) + skip_side + 1)) || (x > (LAST_XMIPP_INDEX(new_micrograph_xsize) - skip_side - 1)) || (y < (FIRST_XMIPP_INDEX(new_micrograph_ysize) + skip_side + 1)) || (y > (LAST_XMIPP_INDEX(new_micrograph_ysize) - skip_side - 1)) ) continue; old_id = A2D_ELEM(Mrec, y, x); if (old_id >= 0) ccf_peak_list[old_id].r = (-1.); A2D_ELEM(Mrec, y, x) = new_id; } } } // Collect all valid peaks ccf_peak_list_aux.clear(); for (int id = 0; id < ccf_peak_list.size(); id++) { if (ccf_peak_list[id].isValid()) ccf_peak_list_aux.push_back(ccf_peak_list[id]); } ccf_peak_list.clear(); ccf_peak_list = ccf_peak_list_aux; ccf_peak_list_aux.clear(); #ifdef DEBUG_HELIX std::cout << " nr_peaks_pruned= " << ccf_peak_list.size() << std::endl; #endif // TODO: Remove all discrete peaks (one peak should have at least two neighbouring peaks within r=particle_radius) // Plot for (int ii = 0; ii < ccf_peak_list.size(); ii++) { for (int jj = 0; jj < ccf_peak_list[ii].ccf_pixel_list.size(); jj++) { int x, y; if (ccf_peak_list[ii].ccf_pixel_list[jj].fom < ccf_peak_list[ii].fom_thres) continue; x = ROUND(ccf_peak_list[ii].ccf_pixel_list[jj].x); y = ROUND(ccf_peak_list[ii].ccf_pixel_list[jj].y); A2D_ELEM(Mccfplot, y, x) = 1.; } } return; } void AutoPicker::extractHelicalTubes( std::vector& peak_list, std::vector >& tube_coord_list, std::vector& tube_len_list, std::vector >& tube_track_list, RFLOAT particle_diameter_pix, RFLOAT curvature_factor_max, RFLOAT interbox_distance_pix, RFLOAT tube_diameter_pix, float scale) { std::vector is_peak_on_other_tubes; std::vector is_peak_on_this_tube; int tube_id; RFLOAT curvature_max; tube_coord_list.clear(); tube_len_list.clear(); tube_track_list.clear(); //Rescaling particle_diameter_pix *= scale; interbox_distance_pix *= scale; tube_diameter_pix *= scale; if (particle_diameter_pix < 5. * scale) REPORT_ERROR("autopicker.cpp::extractHelicalTubes: Particle diameter should be larger than 5 pixels!"); if ( (curvature_factor_max < 0.0001) || (curvature_factor_max > 1.0001) ) REPORT_ERROR("autopicker.cpp::extractHelicalTubes: Factor of curvature should be 0~1!"); if ( (interbox_distance_pix < 0.9999) || (interbox_distance_pix > particle_diameter_pix) ) REPORT_ERROR("autopicker.cpp::extractHelicalTubes: Interbox distance should be > 1 pixel and < particle diameter!"); if ( (tube_diameter_pix < 1.) || (tube_diameter_pix > particle_diameter_pix) ) REPORT_ERROR("autopicker.cpp::extractHelicalTubes: Tube diameter should be > 1 pixel and < particle diameter!"); if (peak_list.size() < 5) return; // Calculate the maximum curvature curvature_max = curvature_factor_max / (particle_diameter_pix / 2.); //curvature_max = (sqrt(1. / scale)) * curvature_factor_max / (particle_diameter_pix / 2.); // Sort the peaks from the weakest to the strongest std::sort(peak_list.begin(), peak_list.end()); is_peak_on_other_tubes.resize(peak_list.size()); is_peak_on_this_tube.resize(peak_list.size()); for (int peak_id0 = 0; peak_id0 < is_peak_on_other_tubes.size(); peak_id0++) is_peak_on_other_tubes[peak_id0] = is_peak_on_this_tube[peak_id0] = -1; // Traverse peaks from the strongest to the weakest tube_id = 0; for (int peak_id0 = peak_list.size() - 1; peak_id0 >= 0; peak_id0--) { RFLOAT rmax2; std::vector selected_peaks; // Check whether this peak is included on other tubes if (is_peak_on_other_tubes[peak_id0] > 0) continue; // Probably a new tube tube_id++; is_peak_on_other_tubes[peak_id0] = tube_id; for (int peak_id1 = 0; peak_id1 < peak_list.size(); peak_id1++) is_peak_on_this_tube[peak_id1] = -1; is_peak_on_this_tube[peak_id0] = tube_id; // Gather all neighboring peaks around selected_peaks.clear(); // don't push itself in? No do not push itself!!! rmax2 = particle_diameter_pix * particle_diameter_pix / 4.; for (int peak_id1 = 0; peak_id1 < peak_list.size(); peak_id1++) { if (peak_id0 == peak_id1) continue; if (is_peak_on_other_tubes[peak_id1] > 0) continue; RFLOAT dx, dy, dist2; dx = peak_list[peak_id1].x - peak_list[peak_id0].x; dy = peak_list[peak_id1].y - peak_list[peak_id0].y; dist2 = dx * dx + dy * dy; if (dist2 < rmax2) { ccfPeak myPeak = peak_list[peak_id1]; myPeak.dist = sqrt(dist2); if ( (fabs(dx) < 0.01) && (fabs(dy) < 0.01) ) myPeak.psi = 0.; else myPeak.psi = RAD2DEG(atan2(dy, dx)); selected_peaks.push_back(myPeak); } } // Sep29,2015 ???????????? // If less than 3 neighboring peaks are found, this is not a peak along a helical tube! if (selected_peaks.size() <= 2) continue; // This peak has >=2 neighboring peaks! Try to find an orientation! RFLOAT local_psi, local_dev, best_local_psi, best_local_dev, dev0, dev1, dev_weights; RFLOAT local_psi_sampling = 0.1; std::vector selected_peaks_dir1, selected_peaks_dir2, helical_track_dir1, helical_track_dir2, helical_track, helical_segments; RFLOAT psi_dir1, psi_dir2, len_dir1, len_dir2; selected_peaks_dir1.clear(); selected_peaks_dir2.clear(); // Find the averaged psi best_local_psi = -1.; best_local_dev = (1e30); // Traverse every possible value of local_psi and calculate the dev for (local_psi = 0.; local_psi < 180.; local_psi += local_psi_sampling) { local_dev = 0.; dev_weights = 0.; for (int peak_id1 = 0; peak_id1 < selected_peaks.size(); peak_id1++) { dev0 = ABS(selected_peaks[peak_id1].psi - local_psi); if (dev0 > 180.) dev0 = ABS(dev0 - 360.); if (dev0 > 90.) dev0 = ABS(dev0 - 180.); RFLOAT pixel_count = (RFLOAT)(selected_peaks[peak_id1].nr_peak_pixel); if (pixel_count < 1.) pixel_count = 1.; local_dev += dev0 * pixel_count; dev_weights += pixel_count; } local_dev /= dev_weights; // Refresh if a better local psi is found if (local_dev < best_local_dev) { best_local_psi = local_psi; best_local_dev = local_dev; } } // Sort all peaks into dir1, dir2 and others psi_dir1 = psi_dir2 = 0.; for (int peak_id1 = 0; peak_id1 < selected_peaks.size(); peak_id1++) { dev0 = ABS(selected_peaks[peak_id1].psi - best_local_psi); dev1 = dev0; if (dev1 > 180.) dev1 = ABS(dev1 - 360.); if (dev1 > 90.) dev1 = ABS(dev1 - 180.); RFLOAT curvature1 = DEG2RAD(dev1) / selected_peaks[peak_id1].dist; // Cannot fall into the estimated direction if (curvature1 > curvature_max) continue; // Psi direction or the opposite direction if (fabs(dev1 - dev0) < 0.1) { selected_peaks_dir2.push_back(selected_peaks[peak_id1]); psi_dir2 += selected_peaks[peak_id1].psi; } else { selected_peaks_dir1.push_back(selected_peaks[peak_id1]); psi_dir1 += selected_peaks[peak_id1].psi; } } RFLOAT xc, yc, xc_new, yc_new, xc_old, yc_old, dist_max, nr_psi_within_range; //std::cout << " nr Dir1 peaks= " << selected_peaks_dir1.size() << std::endl; // Dir1 if (selected_peaks_dir1.size() >= 1) { // Init psi_dir1 /= selected_peaks_dir1.size(); dist_max = -1.; for (int peak_id1 = 0; peak_id1 < selected_peaks_dir1.size(); peak_id1++) { if (selected_peaks_dir1[peak_id1].dist > dist_max) dist_max = selected_peaks_dir1[peak_id1].dist; } len_dir1 = 0.; xc_old = peak_list[peak_id0].x; yc_old = peak_list[peak_id0].y; helical_track_dir1.clear(); while(1) { // A new center along helical track dir1 is found, record it xc_new = xc_old + cos(DEG2RAD(psi_dir1)) * dist_max; yc_new = yc_old + sin(DEG2RAD(psi_dir1)) * dist_max; len_dir1 += dist_max; ccfPeak myPeak; myPeak.x = xc_new; myPeak.y = yc_new; myPeak.psi = psi_dir1; helical_track_dir1.push_back(myPeak); //std::cout << " Dir1 new center: x, y, psi= " << xc << ", " << yc << ", " << psi_dir1 << std::endl; // TODO: other parameters to add? // TODO: mark peaks along helical tracks xc = (xc_old + xc_new) / 2.; yc = (yc_old + yc_new) / 2.; rmax2 = ((dist_max + tube_diameter_pix) / 2.) * ((dist_max + tube_diameter_pix) / 2.); bool is_new_peak_found = false; bool is_combined_with_another_tube = true; for (int peak_id1 = 0; peak_id1 < peak_list.size(); peak_id1++) { RFLOAT dx, dy, dist, dist2, dpsi, h, r; dx = peak_list[peak_id1].x - xc; dy = peak_list[peak_id1].y - yc; dist2 = dx * dx + dy * dy; if (dist2 > rmax2) continue; if ( (fabs(dx) < 0.01) && (fabs(dy) < 0.01) ) // atan2(0,0) dpsi = 0.; else dpsi = RAD2DEG(atan2(dy, dx)) - psi_dir1; dist = sqrt(dist2); h = dist * fabs(cos(DEG2RAD(dpsi))); r = dist * fabs(sin(DEG2RAD(dpsi))); if ( (h < ((dist_max + tube_diameter_pix) / 2.)) && (r < (tube_diameter_pix / 2.)) ) { if (is_peak_on_this_tube[peak_id1] < 0) { is_new_peak_found = true; is_peak_on_this_tube[peak_id1] = tube_id; if (is_peak_on_other_tubes[peak_id1] < 0) { is_combined_with_another_tube = false; is_peak_on_other_tubes[peak_id1] = tube_id; } } } } if ( (is_new_peak_found == false) || (is_combined_with_another_tube == true) ) { // TODO: delete the end of this track list or not? //helical_track_dir1.pop_back(); break; } // TODO: try to find another new center if possible xc_old = xc_new; yc_old = yc_new; rmax2 = particle_diameter_pix * particle_diameter_pix / 4.; selected_peaks_dir1.clear(); for (int peak_id1 = 0; peak_id1 < peak_list.size(); peak_id1++) { if (is_peak_on_this_tube[peak_id1] > 0) continue; RFLOAT dx, dy, dist, dist2, dpsi, h, r; dx = peak_list[peak_id1].x - xc_old; dy = peak_list[peak_id1].y - yc_old; dist2 = dx * dx + dy * dy; if (dist2 < rmax2) { myPeak = peak_list[peak_id1]; myPeak.dist = sqrt(dist2); if ( (fabs(dx) < 0.01) && (fabs(dy) < 0.01) ) // atan2(0,0) myPeak.psi = 0.; else myPeak.psi = RAD2DEG(atan2(dy, dx)); selected_peaks_dir1.push_back(myPeak); } } dist_max = -1.; RFLOAT psi_sum = 0.; RFLOAT psi_weights = 0.; nr_psi_within_range = 0.; int id_peak_dist_max; for (int peak_id1 = 0; peak_id1 < selected_peaks_dir1.size(); peak_id1++) { //std::cout << " Peak id " << selected_peaks_dir1[ii].id << " x, y, r, psi, psidir1= " << selected_peaks_dir1[ii].x << ", " << selected_peaks_dir1[ii].y // << ", " << selected_peaks_dir1[ii].r << ", " << selected_peaks_dir1[ii].psi << ", " << psi_dir1 << std::endl; RFLOAT curvature = DEG2RAD(ABS(selected_peaks_dir1[peak_id1].psi - psi_dir1)) / selected_peaks_dir1[peak_id1].dist; if (curvature < curvature_max) { nr_psi_within_range += 1.; RFLOAT pixel_count = (RFLOAT)(selected_peaks_dir1[peak_id1].nr_peak_pixel); if (pixel_count < 1.) pixel_count = 1.; psi_sum += selected_peaks_dir1[peak_id1].psi * pixel_count; psi_weights += pixel_count; if (selected_peaks_dir1[peak_id1].dist > dist_max) { dist_max = selected_peaks_dir1[peak_id1].dist; id_peak_dist_max = peak_id1; } } } // If no peaks are found in this round, the helical track stops, exit if (nr_psi_within_range < 0.5) break; psi_dir1 = psi_sum / psi_weights; } } //std::cout << " nr Dir2 peaks= " << selected_peaks_dir2.size() << std::endl; // Dir2 // ================================================================================================ if (selected_peaks_dir2.size() >= 1) { // Init psi_dir2 /= selected_peaks_dir2.size(); dist_max = -1.; for (int peak_id1 = 0; peak_id1 < selected_peaks_dir2.size(); peak_id1++) { if (selected_peaks_dir2[peak_id1].dist > dist_max) dist_max = selected_peaks_dir2[peak_id1].dist; } len_dir2 = 0.; xc_old = peak_list[peak_id0].x; yc_old = peak_list[peak_id0].y; helical_track_dir2.clear(); while(1) { // A new center along helical track dir1 is found, record it xc_new = xc_old + cos(DEG2RAD(psi_dir2)) * dist_max; yc_new = yc_old + sin(DEG2RAD(psi_dir2)) * dist_max; len_dir2 += dist_max; ccfPeak myPeak; myPeak.x = xc_new; myPeak.y = yc_new; myPeak.psi = psi_dir2; helical_track_dir2.push_back(myPeak); //std::cout << " Dir1 new center: x, y, psi= " << xc << ", " << yc << ", " << psi_dir1 << std::endl; // TODO: other parameters to add? // TODO: mark peaks along helical tracks xc = (xc_old + xc_new) / 2.; yc = (yc_old + yc_new) / 2.; rmax2 = ((dist_max + tube_diameter_pix) / 2.) * ((dist_max + tube_diameter_pix) / 2.); bool is_new_peak_found = false; bool is_combined_with_another_tube = true; for (int peak_id1 = 0; peak_id1 < peak_list.size(); peak_id1++) { RFLOAT dx, dy, dist, dist2, dpsi, h, r; dx = peak_list[peak_id1].x - xc; dy = peak_list[peak_id1].y - yc; dist2 = dx * dx + dy * dy; if (dist2 > rmax2) continue; if ( (fabs(dx) < 0.01) && (fabs(dy) < 0.01) ) // atan2(0,0) dpsi = 0.; else dpsi = RAD2DEG(atan2(dy, dx)) - psi_dir2; dist = sqrt(dist2); h = dist * fabs(cos(DEG2RAD(dpsi))); r = dist * fabs(sin(DEG2RAD(dpsi))); if ( (h < ((dist_max + tube_diameter_pix) / 2.)) && (r < (tube_diameter_pix / 2.)) ) { if (is_peak_on_this_tube[peak_id1] < 0) { is_new_peak_found = true; is_peak_on_this_tube[peak_id1] = tube_id; if (is_peak_on_other_tubes[peak_id1] < 0) { is_combined_with_another_tube = false; is_peak_on_other_tubes[peak_id1] = tube_id; } } } } if ( (is_new_peak_found == false) || (is_combined_with_another_tube == true) ) { // TODO: delete the end of this track list or not? break; } // TODO: try to find another new center if possible xc_old = xc_new; yc_old = yc_new; rmax2 = particle_diameter_pix * particle_diameter_pix / 4.; selected_peaks_dir2.clear(); for (int peak_id1 = 0; peak_id1 < peak_list.size(); peak_id1++) { if (is_peak_on_this_tube[peak_id1] > 0) continue; RFLOAT dx, dy, dist, dist2, dpsi, h, r; dx = peak_list[peak_id1].x - xc_old; dy = peak_list[peak_id1].y - yc_old; dist2 = dx * dx + dy * dy; if (dist2 < rmax2) { myPeak = peak_list[peak_id1]; myPeak.dist = sqrt(dist2); if ( (fabs(dx) < 0.01) && (fabs(dy) < 0.01) ) // atan2(0,0) myPeak.psi = 0.; else myPeak.psi = RAD2DEG(atan2(dy, dx)); selected_peaks_dir2.push_back(myPeak); } } dist_max = -1.; RFLOAT psi_sum = 0.; RFLOAT psi_weights = 0.; nr_psi_within_range = 0.; int id_peak_dist_max; for (int peak_id1 = 0; peak_id1 < selected_peaks_dir2.size(); peak_id1++) { //std::cout << " Peak id " << selected_peaks_dir2[ii].id << " x, y, r, psi, psidir2= " << selected_peaks_dir2[ii].x << ", " << selected_peaks_dir2[ii].y // << ", " << selected_peaks_dir2[ii].r << ", " << selected_peaks_dir2[ii].psi << ", " << psi_dir2 << std::endl; RFLOAT curvature = DEG2RAD(ABS(selected_peaks_dir2[peak_id1].psi - psi_dir2)) / selected_peaks_dir2[peak_id1].dist; if (curvature < curvature_max) { nr_psi_within_range += 1.; RFLOAT pixel_count = (RFLOAT)(selected_peaks_dir2[peak_id1].nr_peak_pixel); if (pixel_count < 1.) pixel_count = 1.; psi_sum += selected_peaks_dir2[peak_id1].psi * pixel_count; psi_weights += pixel_count; if (selected_peaks_dir2[peak_id1].dist > dist_max) { dist_max = selected_peaks_dir2[peak_id1].dist; id_peak_dist_max = peak_id1; } } } // If no peaks are found in this round, the helical track stops, exit if (nr_psi_within_range < 0.5) break; psi_dir2 = psi_sum / psi_weights; } } // Get a full track helical_track.clear(); for (int ii = helical_track_dir2.size() - 1; ii >= 0; ii--) helical_track.push_back(helical_track_dir2[ii]); helical_track.push_back(peak_list[peak_id0]); for (int ii = 0; ii < helical_track_dir1.size(); ii++) helical_track.push_back(helical_track_dir1[ii]); // TODO: check below !!! if ( (helical_track.size() < 3) || ((len_dir1 + len_dir2) < particle_diameter_pix) || ((len_dir1 + len_dir2) < interbox_distance_pix) ) { helical_track.clear(); } else { ccfPeak newSegment; RFLOAT dist_left, len_total; helical_segments.clear(); // Get the first segment newSegment.x = helical_track[0].x; newSegment.y = helical_track[0].y; newSegment.psi = RAD2DEG(atan2(helical_track[1].y - helical_track[0].y, helical_track[1].x - helical_track[0].x)); newSegment.ref = helical_track[0].ref; helical_segments.push_back(newSegment); // Get segments along the track dist_left = 0.; for (int inext = 1; inext < helical_track.size(); inext++) { RFLOAT x0, y0, dx, dy, dist, dist_total, psi, nr_segments_float; int nr_segments_int; x0 = helical_track[inext - 1].x; y0 = helical_track[inext - 1].y; dx = helical_track[inext].x - helical_track[inext - 1].x; dy = helical_track[inext].y - helical_track[inext - 1].y; psi = RAD2DEG(atan2(dy, dx)); dist_total = sqrt(dx * dx + dy * dy); nr_segments_float = (dist_left + dist_total) / interbox_distance_pix; nr_segments_int = FLOOR(nr_segments_float); if (nr_segments_int >= 1) { for (int iseg = 1; iseg <= nr_segments_int; iseg++) { dist = (RFLOAT)(iseg) * interbox_distance_pix - dist_left; dx = cos(DEG2RAD(psi)) * dist; dy = sin(DEG2RAD(psi)) * dist; newSegment.x = x0 + dx; newSegment.y = y0 + dy; newSegment.psi = psi; if ( (iseg * 2) < nr_segments_int) newSegment.ref = helical_track[inext - 1].ref; else newSegment.ref = helical_track[inext].ref; helical_segments.push_back(newSegment); } } dist_left = (dist_left + dist_total) - ((RFLOAT)(nr_segments_int) * interbox_distance_pix); } // Get the last segment and mark it as invalid (different from what I did for the first segment) int last_id = helical_track.size(); last_id -= 1; newSegment.x = helical_track[last_id].x; newSegment.y = helical_track[last_id].y; newSegment.psi = (1e30); newSegment.ref = helical_track[last_id].ref; helical_segments.push_back(newSegment); len_total = len_dir1 + len_dir2; tube_coord_list.push_back(helical_segments); tube_len_list.push_back(len_total); tube_track_list.push_back(helical_track); // DEBUG #ifdef DEBUG_HELIX for (int ii = 0; ii < helical_track.size(); ii++) std::cout << "Track point x, y, psi= " << helical_track[ii].x << ", " << helical_track[ii].y << ", " << helical_track[ii].psi << std::endl; std::cout << " Track length= " << (len_dir1 + len_dir2) << std::endl; #endif } } return; } void AutoPicker::exportHelicalTubes( const MultidimArray& Mccf, MultidimArray& Mccfplot, const MultidimArray& Mclass, std::vector >& tube_coord_list, std::vector >& tube_track_list, std::vector& tube_len_list, FileName& fn_mic_in, FileName& fn_star_out, RFLOAT particle_diameter_pix, RFLOAT tube_length_min_pix, int skip_side, float scale) { // Rescale particle_diameter_pix, tube_length_min_pix, skip_side tube_length_min_pix *= scale; particle_diameter_pix *= scale; skip_side = (int)((float)skip_side*scale); if ( (tube_coord_list.size() != tube_track_list.size()) || (tube_track_list.size() != tube_len_list.size()) ) REPORT_ERROR("autopicker.cpp::exportHelicalTubes: BUG tube_coord_list.size() != tube_track_list.size() != tube_len_list.size()"); if ( (STARTINGY(Mccf) != FIRST_XMIPP_INDEX(YSIZE(Mccf))) || (STARTINGX(Mccf) != FIRST_XMIPP_INDEX(XSIZE(Mccf))) ) REPORT_ERROR("autopicker.cpp::exportHelicalTubes: The origin of input 3D MultidimArray is not at the center (use v.setXmippOrigin() before calling this function)!"); if (particle_diameter_pix < 5.) // TODO: 5? REPORT_ERROR("autopicker.cpp::exportHelicalTubes: Particle diameter should be larger than 5 pixels!"); // Mark tracks on Mccfplot Mccfplot.setXmippOrigin(); for (int itrack = 0; itrack < tube_track_list.size(); itrack++) { for (int icoord = 1; icoord < tube_track_list[itrack].size(); icoord++) { RFLOAT x0, y0, x1, y1, dx, dy, psi_rad, dist_total; int x_int, y_int; x0 = tube_track_list[itrack][icoord - 1].x; y0 = tube_track_list[itrack][icoord - 1].y; x1 = tube_track_list[itrack][icoord].x; y1 = tube_track_list[itrack][icoord].y; dx = x1 - x0; dy = y1 - y0; if ( (fabs(dx) < 0.1) && (fabs(dy) < 0.1) ) psi_rad = 0.; psi_rad = atan2(dy, dx); dist_total = sqrt(dx * dx + dy * dy); if (dist_total < 2.) continue; for (RFLOAT fdist = 1.; fdist < dist_total; fdist += 1.) { dx = cos(psi_rad) * fdist; dy = sin(psi_rad) * fdist; x1 = x0 + dx; y1 = y0 + dy; x_int = ROUND(x1); y_int = ROUND(y1); if ( (x_int < (FIRST_XMIPP_INDEX(micrograph_xsize) + 1)) || (x_int > (LAST_XMIPP_INDEX(micrograph_xsize) - 1)) || (y_int < (FIRST_XMIPP_INDEX(micrograph_ysize) + 1)) || (y_int > (LAST_XMIPP_INDEX(micrograph_ysize) - 1)) ) continue; A2D_ELEM(Mccfplot, y_int, x_int) = 1.; } } } // Detect crossovers RFLOAT dist2_min = particle_diameter_pix * particle_diameter_pix / 4.; for (int itube1 = 0; (itube1 + 1) < tube_coord_list.size(); itube1++) { for (int icoord1 = 0; icoord1 < tube_coord_list[itube1].size(); icoord1++) { // Coord1 selected for (int itube2 = (itube1 + 1); itube2 < tube_coord_list.size(); itube2++) { for (int icoord2 = 0; icoord2 < tube_coord_list[itube2].size(); icoord2++) { // Coord2 selected RFLOAT x1, y1, x2, y2, dist2; x1 = tube_coord_list[itube1][icoord1].x; y1 = tube_coord_list[itube1][icoord1].y; x2 = tube_coord_list[itube2][icoord2].x; y2 = tube_coord_list[itube2][icoord2].y; dist2 = (x2 - x1) * (x2 - x1) + (y2 - y1) * (y2 - y1); // If this point is around the crossover if (dist2 < dist2_min) tube_coord_list[itube1][icoord1].psi = tube_coord_list[itube2][icoord2].psi = (1e30); } } } } // Cancel segments close to the ends of tubes /* for (int itube = 0; itube < tube_coord_list.size(); itube++) { if (tube_track_list[itube].size() < 2) continue; RFLOAT x_start, y_start, x_end, y_end, particle_radius_pix2; int last_id; last_id = tube_track_list[itube].size(); last_id -= 1; x_start = tube_track_list[itube][0].x; y_start = tube_track_list[itube][0].y; x_end = tube_track_list[itube][last_id].x; y_end = tube_track_list[itube][last_id].y; particle_radius_pix2 = particle_diameter_pix * particle_diameter_pix / 4.; for (int icoord = 0; icoord < tube_coord_list[itube].size(); icoord++) { if (fabs(tube_coord_list[itube][icoord].psi) > 360.) continue; RFLOAT x, y, dx1, dy1, dx2, dy2, dist21, dist22; x = tube_coord_list[itube][icoord].x; y = tube_coord_list[itube][icoord].y; dx1 = x - x_start; dy1 = y - y_start; dx2 = x - x_end; dy2 = y - y_end; dist21 = dx1 * dx1 + dy1 * dy1; dist22 = dx2 * dx2 + dy2 * dy2; if ( (dist21 < particle_radius_pix2) || (dist22 < particle_radius_pix2) ) tube_coord_list[itube][icoord].psi = (1e30); } } */ // Write out a STAR file with the coordinates FileName fn_tmp; MetaDataTable MDout; int helical_tube_id; RFLOAT helical_tube_len; // Only output STAR header if there are no tubes... MDout.clear(); MDout.addLabel(EMDL_IMAGE_COORD_X); MDout.addLabel(EMDL_IMAGE_COORD_Y); MDout.addLabel(EMDL_PARTICLE_CLASS); MDout.addLabel(EMDL_PARTICLE_AUTOPICK_FOM); MDout.addLabel(EMDL_PARTICLE_HELICAL_TUBE_ID); MDout.addLabel(EMDL_ORIENT_TILT_PRIOR); MDout.addLabel(EMDL_ORIENT_PSI_PRIOR); MDout.addLabel(EMDL_PARTICLE_HELICAL_TRACK_LENGTH_ANGSTROM); MDout.addLabel(EMDL_ORIENT_PSI_PRIOR_FLIP_RATIO); MDout.addLabel(EMDL_ORIENT_ROT_PRIOR_FLIP_RATIO); //KThurber helical_tube_id = 0; for (int itube = 0; itube < tube_coord_list.size(); itube++) { if (tube_length_min_pix > particle_diameter_pix) { if (tube_len_list[itube] < tube_length_min_pix) continue; } helical_tube_id++; helical_tube_len = 0.; for (int icoord = 0; icoord < tube_coord_list[itube].size(); icoord++) { int x_int, y_int, iref; RFLOAT fom; if (icoord > 0) { RFLOAT dx = ((RFLOAT)(tube_coord_list[itube][icoord].x)) - ((RFLOAT)(tube_coord_list[itube][icoord - 1].x)); RFLOAT dy = ((RFLOAT)(tube_coord_list[itube][icoord].y)) - ((RFLOAT)(tube_coord_list[itube][icoord - 1].y)); helical_tube_len += sqrt(dx * dx + dy * dy); } // Invalid psi (crossover) if (fabs(tube_coord_list[itube][icoord].psi) > 360.) continue; x_int = ROUND(tube_coord_list[itube][icoord].x); y_int = ROUND(tube_coord_list[itube][icoord].y); // Out of range if ( (x_int < (FIRST_XMIPP_INDEX(micrograph_xsize) + skip_side + 1)) || (x_int > (LAST_XMIPP_INDEX(micrograph_xsize) - skip_side - 1)) || (y_int < (FIRST_XMIPP_INDEX(micrograph_ysize) + skip_side + 1)) || (y_int > (LAST_XMIPP_INDEX(micrograph_ysize) - skip_side - 1)) ) continue; iref = A2D_ELEM(Mclass, y_int, x_int); fom = A2D_ELEM(Mccf, y_int, x_int); MDout.addObject(); RFLOAT xval = (tube_coord_list[itube][icoord].x / scale) - (RFLOAT)(FIRST_XMIPP_INDEX(micrograph_xsize)); RFLOAT yval = (tube_coord_list[itube][icoord].y / scale) - (RFLOAT)(FIRST_XMIPP_INDEX(micrograph_ysize)); MDout.setValue(EMDL_IMAGE_COORD_X, xval); MDout.setValue(EMDL_IMAGE_COORD_Y, yval); MDout.setValue(EMDL_PARTICLE_CLASS, iref + 1); // start counting at 1 MDout.setValue(EMDL_PARTICLE_AUTOPICK_FOM, fom); MDout.setValue(EMDL_PARTICLE_HELICAL_TUBE_ID, helical_tube_id); MDout.setValue(EMDL_ORIENT_TILT_PRIOR, 90.); MDout.setValue(EMDL_ORIENT_PSI_PRIOR, (-1.) * (tube_coord_list[itube][icoord].psi)); // Beware! Multiplied by -1! MDout.setValue(EMDL_PARTICLE_HELICAL_TRACK_LENGTH_ANGSTROM, angpix * helical_tube_len); MDout.setValue(EMDL_ORIENT_PSI_PRIOR_FLIP_RATIO, BIMODAL_PSI_PRIOR_FLIP_RATIO); MDout.setValue(EMDL_ORIENT_ROT_PRIOR_FLIP_RATIO, BIMODAL_PSI_PRIOR_FLIP_RATIO); // KThurber } } fn_tmp = getOutputRootName(fn_mic_in) + "_" + fn_star_out + ".star"; MDout.write(fn_tmp); return; } void AutoPicker::autoPickLoGOneMicrograph(FileName &fn_mic, long int imic) { Image Imic; MultidimArray Fmic, Faux; FourierTransformer transformer; MultidimArray Mbest_size, Mbest_fom; float scale = (float)workSize / (float)micrograph_size; Mbest_size.resize(workSize, workSize); Mbest_size.initConstant(-999.); Mbest_size.setXmippOrigin(); Mbest_fom.resize(workSize, workSize); Mbest_fom.initConstant(-999.); Mbest_fom.setXmippOrigin(); if (!do_read_fom_maps) { // Always use the same random seed init_random_generator(random_seed + imic); // Read in the micrograph Imic.read(fn_mic); Imic().setXmippOrigin(); // Let's just check the square size again.... RFLOAT my_size, my_xsize, my_ysize; my_xsize = XSIZE(Imic()); my_ysize = YSIZE(Imic()); my_size = (my_xsize != my_ysize) ? XMIPP_MAX(my_xsize, my_ysize) : my_xsize; if (my_size != micrograph_size || my_xsize != micrograph_xsize || my_ysize != micrograph_ysize) { Imic().printShape(); std::cerr << " micrograph_size= " << micrograph_size << " micrograph_xsize= " << micrograph_xsize << " micrograph_ysize= " << micrograph_ysize << std::endl; REPORT_ERROR("AutoPicker::autoPickOneMicrograph ERROR: No differently sized micrographs are allowed in one run, sorry you will have to run separately for each size..."); } // Set mean to zero and stddev to 1 to prevent numerical problems with one-sweep stddev calculations.... RFLOAT avg0, stddev0, minval0, maxval0; Imic().computeStats(avg0, stddev0, minval0, maxval0); FOR_ALL_DIRECT_ELEMENTS_IN_MULTIDIMARRAY(Imic()) { // Remove pixel values that are too far away from the mean if ( ABS(DIRECT_MULTIDIM_ELEM(Imic(), n) - avg0) / stddev0 > outlier_removal_zscore) DIRECT_MULTIDIM_ELEM(Imic(), n) = avg0; DIRECT_MULTIDIM_ELEM(Imic(), n) = (DIRECT_MULTIDIM_ELEM(Imic(), n) - avg0) / stddev0; } // Have positive LoG maps if (!LoG_invert) Imic() *= -1.; if (micrograph_xsize != micrograph_size || micrograph_ysize != micrograph_size) { // Window non-square micrographs to be a square with the largest side rewindow(Imic, micrograph_size); // Fill region outside the original window with white Gaussian noise to prevent all-zeros in Mstddev FOR_ALL_ELEMENTS_IN_ARRAY2D(Imic()) { if (i < FIRST_XMIPP_INDEX(micrograph_ysize) || i > LAST_XMIPP_INDEX(micrograph_ysize) || j < FIRST_XMIPP_INDEX(micrograph_xsize) || j > LAST_XMIPP_INDEX(micrograph_xsize) ) A2D_ELEM(Imic(), i, j) = rnd_gaus(0.,1.); } } // Fourier Transform (and downscale) Imic() transformer.FourierTransform(Imic(), Faux); // Use downsized FFTs windowFourierTransform(Faux, Fmic, workSize); if (LoG_use_ctf) { MultidimArray Fctf(YSIZE(Fmic), XSIZE(Fmic)); CTF ctf; // Search for this micrograph in the metadata table bool found = false; FOR_ALL_OBJECTS_IN_METADATA_TABLE(MDmic) { FileName fn_tmp; MDmic.getValue(EMDL_MICROGRAPH_NAME, fn_tmp); if (fn_tmp == fn_mic) { ctf.readByGroup(MDmic, &obsModel); found = true; break; } } if (!found) REPORT_ERROR("Logic error: failed to find CTF information for " + fn_mic); ctf.getFftwImage(Fctf, micrograph_size, micrograph_size, angpix, false, false, false, false, false, true); FOR_ALL_DIRECT_ELEMENTS_IN_MULTIDIMARRAY(Fmic) { // this is safe because getCTF does not return 0. DIRECT_MULTIDIM_ELEM(Fmic, n) /= DIRECT_MULTIDIM_ELEM(Fctf, n); } } Image Maux(workSize, workSize); // transformer.inverseFourierTransform(Fmic, Maux()); // Maux.write("LoG-ctf-filtered.mrc"); // REPORT_ERROR("stop"); // Make the diameter of the LoG filter larger in steps of LoG_incr_search (=1.5) // Search sizes from LoG_min_diameter to LoG_max_search (=5) * LoG_max_diameter for (int i = 0; i < diams_LoG.size(); i++) { RFLOAT myd = diams_LoG[i]; Faux = Fmic; LoGFilterMap(Faux, micrograph_size, myd, angpix); transformer.inverseFourierTransform(Faux, Maux()); if (do_write_fom_maps) { FileName fn_tmp=getOutputRootName(fn_mic)+"_"+fn_out+"_LoG"+integerToString(ROUND(myd))+".spi"; Maux.write(fn_tmp); } FOR_ALL_DIRECT_ELEMENTS_IN_MULTIDIMARRAY(Maux()) { if (DIRECT_MULTIDIM_ELEM(Maux(), n) > DIRECT_MULTIDIM_ELEM(Mbest_fom, n)) { DIRECT_MULTIDIM_ELEM(Mbest_fom, n) = DIRECT_MULTIDIM_ELEM(Maux(), n); DIRECT_MULTIDIM_ELEM(Mbest_size, n) = myd; } } } } // end if !do_read_fom_maps else { Image Maux; // Read back in pre-calculated LoG maps for (int i = 0; i < diams_LoG.size(); i++) { RFLOAT myd = diams_LoG[i]; FileName fn_tmp=getOutputRootName(fn_mic)+"_"+fn_out+"_LoG"+integerToString(ROUND(myd))+".spi"; Maux.read(fn_tmp); FOR_ALL_DIRECT_ELEMENTS_IN_MULTIDIMARRAY(Maux()) { if (DIRECT_MULTIDIM_ELEM(Maux(), n) > DIRECT_MULTIDIM_ELEM(Mbest_fom, n)) { DIRECT_MULTIDIM_ELEM(Mbest_fom, n) = DIRECT_MULTIDIM_ELEM(Maux(), n); DIRECT_MULTIDIM_ELEM(Mbest_size, n) = myd; } } } } Image Maux2; FileName fn_tmp; if (do_write_fom_maps) { Maux2() = Mbest_fom; fn_tmp=getOutputRootName(fn_mic)+"_"+fn_out+"_bestLoG.spi"; Maux2.write(fn_tmp); Maux2() = Mbest_size; fn_tmp=getOutputRootName(fn_mic)+"_"+fn_out+"_bestSize.spi"; Maux2.write(fn_tmp); } // Skip the sides if necessary int my_skip_side = (int)((float)autopick_skip_side*scale); if (my_skip_side > 0) { MultidimArray Mbest_fom_new(Mbest_fom); Mbest_fom_new.initZeros(); for (int i = FIRST_XMIPP_INDEX((int)((float)micrograph_ysize*scale)) + my_skip_side; i <= LAST_XMIPP_INDEX((int)((float)micrograph_ysize*scale)) - my_skip_side; i++) { for (int j = FIRST_XMIPP_INDEX((int)((float)micrograph_xsize*scale)) + my_skip_side; j <= LAST_XMIPP_INDEX((int)((float)micrograph_xsize*scale)) - my_skip_side; j++) { A2D_ELEM(Mbest_fom_new, i, j) = A2D_ELEM(Mbest_fom, i, j); } } Mbest_fom = Mbest_fom_new; } // See which pixels have the best diameters within the desired diameter range // Also store average and stddev of FOMs outside that range, in order to idea of the noise in the FOMs RFLOAT sum_fom_low = 0.; RFLOAT sum_fom_high = 0.; RFLOAT sum_fom_ok = 0.; RFLOAT sum2_fom_low = 0.; RFLOAT sum2_fom_high = 0.; RFLOAT sum2_fom_ok = 0.; RFLOAT count_low = 0.; RFLOAT count_high = 0.; RFLOAT count_ok = 0.; FOR_ALL_DIRECT_ELEMENTS_IN_MULTIDIMARRAY(Mbest_size) { if (DIRECT_MULTIDIM_ELEM(Mbest_size, n) > LoG_max_diameter) { sum_fom_high += DIRECT_MULTIDIM_ELEM(Mbest_fom, n); sum2_fom_high += DIRECT_MULTIDIM_ELEM(Mbest_fom, n) * DIRECT_MULTIDIM_ELEM(Mbest_fom, n); count_high += 1.; DIRECT_MULTIDIM_ELEM(Mbest_fom, n) = 0.; } else if (DIRECT_MULTIDIM_ELEM(Mbest_size, n) < LoG_min_diameter) { sum_fom_low += DIRECT_MULTIDIM_ELEM(Mbest_fom, n); sum2_fom_low += DIRECT_MULTIDIM_ELEM(Mbest_fom, n) * DIRECT_MULTIDIM_ELEM(Mbest_fom, n); count_low += 1.; DIRECT_MULTIDIM_ELEM(Mbest_fom, n) = 0.; } else { sum_fom_ok += DIRECT_MULTIDIM_ELEM(Mbest_fom, n); sum2_fom_ok += DIRECT_MULTIDIM_ELEM(Mbest_fom, n) * DIRECT_MULTIDIM_ELEM(Mbest_fom, n); count_ok += 1.; } } if (do_write_fom_maps) { Maux2() = Mbest_fom; fn_tmp=getOutputRootName(fn_mic)+"_"+fn_out+"_bestLoGb.spi"; Maux2.write(fn_tmp); } // Average of FOMs outside desired diameter range RFLOAT sum_fom_outside = (sum_fom_low + sum_fom_high) / (count_low + count_high); sum_fom_low /= count_low; sum_fom_high /= count_high; sum_fom_ok /= count_ok; // Variance of FOMs outside desired diameter range sum2_fom_low = sum2_fom_low/count_low - sum_fom_low*sum_fom_low; sum2_fom_high = sum2_fom_high/count_high - sum_fom_high*sum_fom_high; sum2_fom_ok = sum2_fom_ok/count_ok - sum_fom_ok*sum_fom_ok; //float my_threshold = sum_fom_low + LoG_adjust_threshold * sqrt(sum2_fom_low); //Sjors 25May2018: better have threshold only depend on fom_ok, as in some cases fom_low/high are on very different scale... float my_threshold = sum_fom_ok + LoG_adjust_threshold * sqrt(sum2_fom_ok); float my_upper_limit = sum_fom_ok + LoG_upper_limit * sqrt(sum2_fom_ok); #ifdef DEBUG_LOG std::cerr << " avg_fom_low= " << sum_fom_low << " stddev_fom_low= " << sqrt(sum2_fom_low) << " N= "<< count_low << std::endl; std::cerr << " avg_fom_high= " << sum_fom_high<< " stddev_fom_high= " << sqrt(sum2_fom_high) << " N= "<< count_high << std::endl; std::cerr << " avg_fom_ok= " << sum_fom_ok<< " stddev_fom_ok= " << sqrt(sum2_fom_ok) << " N= "<< count_ok<< std::endl; std::cerr << " avg_fom_outside= " << sum_fom_outside << std::endl; std::cerr << " my_threshold= " << my_threshold << " LoG_adjust_threshold= "<< LoG_adjust_threshold << std::endl; #endif // Threshold the best_fom map FOR_ALL_DIRECT_ELEMENTS_IN_MULTIDIMARRAY(Mbest_fom) { if (DIRECT_MULTIDIM_ELEM(Mbest_fom, n) < my_threshold) { DIRECT_MULTIDIM_ELEM(Mbest_fom, n) = 0.; } } if (do_write_fom_maps) { Maux2() = Mbest_fom; fn_tmp=getOutputRootName(fn_mic)+"_"+fn_out+"_bestLoGc.spi"; Maux2.write(fn_tmp); } // Now just start from the biggest peak: put a particle coordinate there, remove all neighbouring pixels within corresponding Mbest_size and loop MetaDataTable MDout; long int imax, jmax; while (Mbest_fom.maxIndex(imax, jmax) > 0.) { RFLOAT fom_here = A2D_ELEM(Mbest_fom, imax, jmax); if (fom_here < my_upper_limit) { MDout.addObject(); long int xx = jmax - FIRST_XMIPP_INDEX((int)((float)micrograph_xsize*scale)); long int yy = imax - FIRST_XMIPP_INDEX((int)((float)micrograph_ysize*scale)); MDout.setValue(EMDL_IMAGE_COORD_X, (RFLOAT)(xx) / scale); MDout.setValue(EMDL_IMAGE_COORD_Y, (RFLOAT)(yy) / scale); MDout.setValue(EMDL_PARTICLE_AUTOPICK_FOM, A2D_ELEM(Mbest_fom, imax, jmax)); MDout.setValue(EMDL_PARTICLE_CLASS, 0); // Dummy values to avoid problems in JoinStar MDout.setValue(EMDL_ORIENT_PSI, 0.0); } // Now set all pixels of Mbest_fom within a distance of 0.5* the corresponding Mbest_size to zero // Exclude a bit more radius, such that no very close neighbours are allowed long int myrad = ROUND(scale * (A2D_ELEM(Mbest_size, imax, jmax) + LoG_min_diameter) * LoG_neighbour_fudge / 2 / angpix); long int myrad2 = myrad * myrad; // std::cout << "scale = " << scale << " Mbest_size = " << A2D_ELEM(Mbest_size, imax, jmax) << " myrad " << myrad << std::endl; for (long int ii = imax - myrad; ii <= imax + myrad; ii++) { for (long int jj = jmax - myrad; jj <= jmax + myrad; jj++) { long int r2 = (imax - ii) * (imax - ii) + (jmax - jj) * (jmax - jj); if (r2 < myrad2 && ii >= STARTINGY(Mbest_fom) && jj >= STARTINGX(Mbest_fom) && ii <= FINISHINGY(Mbest_fom) && jj <= FINISHINGX(Mbest_fom)) A2D_ELEM(Mbest_fom, ii, jj) = 0.; } } } if (verb > 1) std::cerr << "Picked " << MDout.numberOfObjects() << " of particles " << std::endl; fn_tmp = getOutputRootName(fn_mic) + "_" + fn_out + ".star"; MDout.write(fn_tmp); } void AutoPicker::autoPickOneMicrograph(FileName &fn_mic, long int imic) { Image Imic; MultidimArray Faux, Faux2, Fmic; MultidimArray Maux, Mstddev, Mmean, Mstddev2, Mavg, Mdiff2, MsumX2, Mccf_best, Mpsi_best, Fctf, Mccf_best_combined, Mpsi_best_combined; MultidimArray Mclass_best_combined; FourierTransformer transformer; RFLOAT sum_ref_under_circ_mask, sum_ref2_under_circ_mask; int my_skip_side = autopick_skip_side + particle_size/2; CTF ctf; int min_distance_pix = ROUND(min_particle_distance / angpix); float scale = (float)workSize / (float)micrograph_size; // Always use the same random seed init_random_generator(random_seed + imic); #ifdef DEBUG Image tt; tt().resize(micrograph_size, micrograph_size); std::cerr << " fn_mic= " << fn_mic << std::endl; #endif #ifdef TIMING timer.tic(TIMING_A6); #endif // Read in the micrograph Imic.read(fn_mic); Imic().setXmippOrigin(); #ifdef TIMING timer.toc(TIMING_A6); #endif // Let's just check the square size again.... RFLOAT my_size, my_xsize, my_ysize; my_xsize = XSIZE(Imic()); my_ysize = YSIZE(Imic()); my_size = (my_xsize != my_ysize) ? XMIPP_MAX(my_xsize, my_ysize) : my_xsize; if (extra_padding > 0) my_size += 2 * extra_padding; if (my_size != micrograph_size || my_xsize != micrograph_xsize || my_ysize != micrograph_ysize) { Imic().printShape(); std::cerr << " micrograph_size= " << micrograph_size << " micrograph_xsize= " << micrograph_xsize << " micrograph_ysize= " << micrograph_ysize << std::endl; REPORT_ERROR("AutoPicker::autoPickOneMicrograph ERROR: No differently sized micrographs are allowed in one run, sorry you will have to run separately for each size..."); } #ifdef TIMING timer.tic(TIMING_A7); #endif // Set mean to zero and stddev to 1 to prevent numerical problems with one-sweep stddev calculations.... RFLOAT avg0, stddev0, minval0, maxval0; Imic().computeStats(avg0, stddev0, minval0, maxval0); FOR_ALL_DIRECT_ELEMENTS_IN_MULTIDIMARRAY(Imic()) { // Remove pixel values that are too far away from the mean if ( ABS(DIRECT_MULTIDIM_ELEM(Imic(), n) - avg0) / stddev0 > outlier_removal_zscore) DIRECT_MULTIDIM_ELEM(Imic(), n) = avg0; DIRECT_MULTIDIM_ELEM(Imic(), n) = (DIRECT_MULTIDIM_ELEM(Imic(), n) - avg0) / stddev0; } if (micrograph_xsize != micrograph_size || micrograph_ysize != micrograph_size) { // Window non-square micrographs to be a square with the largest side rewindow(Imic, micrograph_size); // Fill region outside the original window with white Gaussian noise to prevent all-zeros in Mstddev FOR_ALL_ELEMENTS_IN_ARRAY2D(Imic()) { if (i < FIRST_XMIPP_INDEX(micrograph_ysize) || i > LAST_XMIPP_INDEX(micrograph_ysize) || j < FIRST_XMIPP_INDEX(micrograph_xsize) || j > LAST_XMIPP_INDEX(micrograph_xsize) ) A2D_ELEM(Imic(), i, j) = rnd_gaus(0.,1.); } } #ifdef TIMING timer.toc(TIMING_A7); #endif #ifdef TIMING timer.tic(TIMING_A8); #endif // Read in the CTF information if needed if (do_ctf) { // Search for this micrograph in the metadata table bool found = false; FOR_ALL_OBJECTS_IN_METADATA_TABLE(MDmic) { FileName fn_tmp; MDmic.getValue(EMDL_MICROGRAPH_NAME, fn_tmp); if (fn_tmp==fn_mic) { ctf.readByGroup(MDmic, &obsModel); Fctf.resize(downsize_mic, downsize_mic/2 + 1); ctf.getFftwImage(Fctf, micrograph_size, micrograph_size, angpix, false, false, intact_ctf_first_peak, true); found = true; break; } } if (!found) REPORT_ERROR("Logic error: failed to find CTF information for " + fn_mic); #ifdef DEBUG std::cerr << " Read CTF info from" << fn_mic.withoutExtension()<<"_ctf.star" << std::endl; Image Ictf; Ictf()=Fctf; Ictf.write("Mmic_ctf.spi"); #endif } #ifdef TIMING timer.toc(TIMING_A8); #endif #ifdef TIMING timer.tic(TIMING_A9); #endif Mccf_best.resize(workSize, workSize); Mpsi_best.resize(workSize, workSize); #ifdef TIMING timer.toc(TIMING_A9); #endif #ifdef TIMING timer.tic(TIMING_B1); #endif //Sjors 18apr2016 RFLOAT normfft = (RFLOAT)(micrograph_size * micrograph_size) / (RFLOAT)nr_pixels_circular_mask; if (do_read_fom_maps) { FileName fn_tmp=getOutputRootName(fn_mic)+"_"+fn_out+"_stddevNoise.spi"; Image It; It.read(fn_tmp); if (autopick_helical_segments) Mstddev2 = It(); else Mstddev = It(); fn_tmp=getOutputRootName(fn_mic)+"_"+fn_out+"_avgNoise.spi"; It.read(fn_tmp); if (autopick_helical_segments) Mavg = It(); else Mmean = It(); } else { /* * Squared difference FOM: * Sum ( (X-mu)/sig - A )^2 = * = Sum((X-mu)/sig)^2 - 2 Sum (A*(X-mu)/sig) + Sum(A)^2 * = (1/sig^2)*Sum(X^2) - (2*mu/sig^2)*Sum(X) + (mu^2/sig^2)*Sum(1) - (2/sig)*Sum(AX) + (2*mu/sig)*Sum(A) + Sum(A^2) * * However, the squared difference with an "empty" ie all-zero reference is: * Sum ( (X-mu)/sig)^2 * * The ratio of the probabilities thereby becomes: * P(ref) = 1/sqrt(2pi) * exp (( (X-mu)/sig - A )^2 / -2 ) // assuming sigma = 1! * P(zero) = 1/sqrt(2pi) * exp (( (X-mu)/sig )^2 / -2 ) * * P(ref)/P(zero) = exp(( (X-mu)/sig - A )^2 / -2) / exp ( ( (X-mu)/sig )^2 / -2) * = exp( (- (2/sig)*Sum(AX) + (2*mu/sig)*Sum(A) + Sum(A^2)) / - 2 ) * * Therefore, I do not need to calculate (X-mu)/sig beforehand!!! * */ // Fourier Transform (and downscale) Imic() transformer.FourierTransform(Imic(), Fmic); if (highpass > 0.) { lowPassFilterMap(Fmic, micrograph_size, highpass, angpix, 2, true); // true means highpass instead of lowpass! transformer.inverseFourierTransform(Fmic, Imic()); // also calculate inverse transform again for squared calculation below } CenterFFTbySign(Fmic); // Also calculate the FFT of the squared micrograph Maux.resize(micrograph_size,micrograph_size); FOR_ALL_DIRECT_ELEMENTS_IN_MULTIDIMARRAY(Maux) { DIRECT_MULTIDIM_ELEM(Maux, n) = DIRECT_MULTIDIM_ELEM(Imic(), n) * DIRECT_MULTIDIM_ELEM(Imic(), n); } MultidimArray Fmic2; transformer.FourierTransform(Maux, Fmic2); CenterFFTbySign(Fmic2); Maux.resize(workSize,workSize); #ifdef DEBUG std::cerr << " nr_pixels_circular_invmask= " << nr_pixels_circular_invmask << std::endl; std::cerr << " nr_pixels_circular_mask= " << nr_pixels_circular_mask << std::endl; windowFourierTransform(Finvmsk, Faux2, micrograph_size); CenterFFTbySign(Faux2); transformer.inverseFourierTransform(Faux2, tt()); tt.write("Minvmask.spi"); #endif // The following calculate mu and sig under the solvent area at every position in the micrograph if (autopick_helical_segments) calculateStddevAndMeanUnderMask(Fmic, Fmic2, Favgmsk, nr_pixels_avg_mask, Mstddev2, Mavg); calculateStddevAndMeanUnderMask(Fmic, Fmic2, Finvmsk, nr_pixels_circular_invmask, Mstddev, Mmean); if (do_write_fom_maps) { FileName fn_tmp=getOutputRootName(fn_mic)+"_"+fn_out+"_stddevNoise.spi"; Image It; It() = (autopick_helical_segments) ? Mstddev2 : Mstddev; It.write(fn_tmp); fn_tmp=getOutputRootName(fn_mic)+"_"+fn_out+"_avgNoise.spi"; It() = (autopick_helical_segments) ? Mavg : Mmean; It.write(fn_tmp); } // From now on use downsized Fmic, as the cross-correlation with the references can be done at lower resolution windowFourierTransform(Fmic, Faux, downsize_mic); Fmic = Faux; }// end if do_read_fom_maps #ifdef TIMING timer.toc(TIMING_B1); #endif // Now start looking for the peaks of all references // Clear the output vector with all peaks std::vector peaks; peaks.clear(); if (autopick_helical_segments) { if (do_read_fom_maps) { FileName fn_tmp; Image It_float; Image It_int; fn_tmp = getOutputRootName(fn_mic)+"_"+fn_out+"_combinedCCF.spi"; It_float.read(fn_tmp); Mccf_best_combined = It_float(); if (do_amyloid) { fn_tmp = getOutputRootName(fn_mic)+"_"+fn_out+"_combinedPSI.spi"; It_float.read(fn_tmp); Mpsi_best_combined = It_float(); } else { fn_tmp = getOutputRootName(fn_mic)+"_"+fn_out+"_combinedCLASS.spi"; It_int.read(fn_tmp); Mclass_best_combined = It_int(); } } else { Mccf_best_combined.clear(); Mccf_best_combined.resize(workSize, workSize); Mccf_best_combined.initConstant(-99.e99); Mpsi_best_combined.clear(); Mpsi_best_combined.resize(workSize, workSize); Mpsi_best_combined.initConstant(-99.e99); Mclass_best_combined.clear(); Mclass_best_combined.resize(workSize, workSize); Mclass_best_combined.initConstant(-1.); } } for (int iref = 0; iref < Mrefs.size(); iref++) { RFLOAT expected_Pratio; // the expectedFOM for this (ctf-corrected) reference if (do_read_fom_maps) { #ifdef TIMING timer.tic(TIMING_B2); #endif if (!autopick_helical_segments) { FileName fn_tmp; Image It; fn_tmp.compose(getOutputRootName(fn_mic)+"_"+fn_out+"_ref", iref,"_bestCCF.spi"); It.read(fn_tmp); Mccf_best = It(); It.MDMainHeader.getValue(EMDL_IMAGE_STATS_MAX, expected_Pratio); // Retrieve expected_Pratio from the header of the image fn_tmp.compose(getOutputRootName(fn_mic)+"_"+fn_out+"_ref", iref,"_bestPSI.spi"); It.read(fn_tmp); Mpsi_best = It(); } #ifdef TIMING timer.toc(TIMING_B2); #endif } //end else if do_read_fom_maps else { #ifdef TIMING timer.tic(TIMING_B3); #endif Mccf_best.initConstant(-LARGE_NUMBER); bool is_first_psi = true; for (RFLOAT psi = 0. ; psi < 360.; psi+=psi_sampling) { // Get the Euler matrix Matrix2D A(3,3); Euler_angles2matrix(0., 0., psi, A); // Now get the FT of the rotated (non-ctf-corrected) template Faux.initZeros(downsize_mic, downsize_mic/2 + 1); PPref[iref].get2DFourierTransform(Faux, A); #ifdef DEBUG std::cerr << " psi= " << psi << std::endl; windowFourierTransform(Faux, Faux2, micrograph_size); CenterFFTbySign(Faux2); tt().resize(micrograph_size, micrograph_size); transformer.inverseFourierTransform(Faux2, tt()); tt.write("Mref_rot.spi"); windowFourierTransform(Fmic, Faux2, micrograph_size); CenterFFTbySign(Faux2); transformer.inverseFourierTransform(Faux2, tt()); tt.write("Mmic.spi"); #endif #ifdef TIMING timer.tic(TIMING_B4); #endif // Apply the CTF on-the-fly (so same PPref can be used for many different micrographs) if (do_ctf) { FOR_ALL_DIRECT_ELEMENTS_IN_MULTIDIMARRAY(Faux) { DIRECT_MULTIDIM_ELEM(Faux, n) *= DIRECT_MULTIDIM_ELEM(Fctf, n); } #ifdef TIMING timer.toc(TIMING_B4); #endif #ifdef DEBUG MultidimArray ttt(micrograph_size, micrograph_size); windowFourierTransform(Faux, Faux2, micrograph_size); CenterFFTbySign(Faux2); transformer.inverseFourierTransform(Faux2, ttt); ttt.setXmippOrigin(); tt().resize(particle_size, particle_size); tt().setXmippOrigin(); FOR_ALL_ELEMENTS_IN_ARRAY2D(tt()) { A2D_ELEM(tt(), i, j) = A2D_ELEM(ttt, i, j); } tt.write("Mref_rot_ctf.spi"); #endif } if (is_first_psi) { #ifdef TIMING timer.tic(TIMING_B5); #endif // Calculate the expected ratio of probabilities for this CTF-corrected reference // and the sum_ref_under_circ_mask and sum_ref_under_circ_mask2 // Do this also if we're not recalculating the fom maps... // This calculation needs to be done on an "non-shrinked" micrograph, in order to get the correct I^2 statistics windowFourierTransform(Faux, Faux2, micrograph_size); CenterFFTbySign(Faux2); Maux.resize(micrograph_size, micrograph_size); transformer.inverseFourierTransform(Faux2, Maux); Maux.setXmippOrigin(); #ifdef DEBUG Image ttt; ttt()=Maux; ttt.write("Maux.spi"); #endif sum_ref_under_circ_mask = 0.; sum_ref2_under_circ_mask = 0.; RFLOAT suma2 = 0.; RFLOAT sumn = 1.; MultidimArray Mctfref(particle_size, particle_size); Mctfref.setXmippOrigin(); FOR_ALL_ELEMENTS_IN_ARRAY2D(Mctfref) // only loop over smaller Mctfref, but take values from large Maux! { if (i*i + j*j < particle_radius2) { suma2 += A2D_ELEM(Maux, i, j) * A2D_ELEM(Maux, i, j); suma2 += 2. * A2D_ELEM(Maux, i, j) * rnd_gaus(0., 1.); sum_ref_under_circ_mask += A2D_ELEM(Maux, i, j); sum_ref2_under_circ_mask += A2D_ELEM(Maux, i, j) * A2D_ELEM(Maux, i, j); sumn += 1.; } #ifdef DEBUG A2D_ELEM(Mctfref, i, j) = A2D_ELEM(Maux, i, j); #endif } sum_ref_under_circ_mask /= sumn; sum_ref2_under_circ_mask /= sumn; expected_Pratio = exp(suma2 / (2. * sumn)); #ifdef DEBUG std::cerr << " expected_Pratio["< 1E-10) diff2 /= DIRECT_MULTIDIM_ELEM(Mstddev, n); diff2 += sum_ref2_under_circ_mask; diff2 = exp(- diff2 / 2.); // exponentiate to reflect the Gaussian error model. sigma=1 after normalization, 0.4=1/sqrt(2pi) // Store fraction of (1 - probability-ratio) wrt (1 - expected Pratio) diff2 = (diff2 - 1.) / (expected_Pratio - 1.); #ifdef DEBUG DIRECT_MULTIDIM_ELEM(Maux, n) = diff2; #endif if (diff2 > DIRECT_MULTIDIM_ELEM(Mccf_best, n)) { DIRECT_MULTIDIM_ELEM(Mccf_best, n) = diff2; DIRECT_MULTIDIM_ELEM(Mpsi_best, n) = psi; } } #ifdef DEBUG std::cerr << " Maux.computeMax()= " << Maux.computeMax() << std::endl; tt()=Maux; tt.write("Mccf.spi"); std::cerr << " Press any key to continue... " << std::endl; char c; std::cin >> c; #endif is_first_psi = false; #ifdef TIMING timer.toc(TIMING_B6); #endif } // end for psi #ifdef TIMING timer.toc(TIMING_B3); #endif #ifdef TIMING timer.tic(TIMING_B7); #endif if (do_write_fom_maps && !autopick_helical_segments) { FileName fn_tmp; Image It; It() = Mccf_best; It.MDMainHeader.setValue(EMDL_IMAGE_STATS_MAX, expected_Pratio); // Store expected_Pratio in the header of the image fn_tmp.compose(getOutputRootName(fn_mic)+"_"+fn_out+"_ref", iref,"_bestCCF.spi"); It.write(fn_tmp); It() = Mpsi_best; fn_tmp.compose(getOutputRootName(fn_mic)+"_"+fn_out+"_ref", iref,"_bestPSI.spi"); It.write(fn_tmp); // for (long int n=0; n<((Mccf_best).nzyxdim/10); n+=1) // { // std::cerr << DIRECT_MULTIDIM_ELEM(Mccf_best, n) << std::endl; // } // exit(0); } // end if do_write_fom_maps #ifdef TIMING timer.toc(TIMING_B7); #endif } // end if do_read_fom_maps #ifdef TIMING timer.tic(TIMING_B8); #endif if (autopick_helical_segments) { if (!do_read_fom_maps) { // Combine Mccf_best and Mpsi_best from all refs FOR_ALL_DIRECT_ELEMENTS_IN_MULTIDIMARRAY(Mccf_best) { RFLOAT new_ccf = DIRECT_MULTIDIM_ELEM(Mccf_best, n); RFLOAT old_ccf = DIRECT_MULTIDIM_ELEM(Mccf_best_combined, n); if (new_ccf > old_ccf) { DIRECT_MULTIDIM_ELEM(Mccf_best_combined, n) = new_ccf; if (do_amyloid) DIRECT_MULTIDIM_ELEM(Mpsi_best_combined, n) = DIRECT_MULTIDIM_ELEM(Mpsi_best, n); else DIRECT_MULTIDIM_ELEM(Mclass_best_combined, n) = iref; } } } } else { // Now that we have Mccf_best and Mpsi_best, get the peaks std::vector my_ref_peaks; Mstddev.setXmippOrigin(); Mmean.setXmippOrigin(); Mccf_best.setXmippOrigin(); Mpsi_best.setXmippOrigin(); peakSearch(Mccf_best, Mpsi_best, Mstddev, Mmean, iref, my_skip_side, my_ref_peaks, scale); prunePeakClusters(my_ref_peaks, min_distance_pix, scale); peaks.insert(peaks.end(), my_ref_peaks.begin(), my_ref_peaks.end()); // append the peaks of this reference to all the other peaks } #ifdef TIMING timer.toc(TIMING_B8); #endif } // end for iref if (autopick_helical_segments) { RFLOAT thres = min_fraction_expected_Pratio; int peak_r_min = 1; std::vector ccf_peak_list; std::vector > tube_coord_list, tube_track_list; std::vector tube_len_list; MultidimArray Mccfplot; if (do_write_fom_maps) { FileName fn_tmp; Image It_float; Image It_int; It_float() = Mccf_best_combined; fn_tmp = getOutputRootName(fn_mic) + "_" + fn_out + "_combinedCCF.spi"; It_float.write(fn_tmp); if (do_amyloid) { It_float() = Mpsi_best_combined; fn_tmp = getOutputRootName(fn_mic) + "_" + fn_out + "_combinedPSI.spi"; It_float.write(fn_tmp); } else { It_int() = Mclass_best_combined; fn_tmp = getOutputRootName(fn_mic) + + "_" + fn_out + "_combinedCLASS.spi"; It_int.write(fn_tmp); } } // end if do_write_fom_maps Mccf_best_combined.setXmippOrigin(); Mclass_best_combined.setXmippOrigin(); Mpsi_best_combined.setXmippOrigin(); Mstddev2.setXmippOrigin(); Mavg.setXmippOrigin(); if (do_amyloid) { pickAmyloids(Mccf_best_combined, Mpsi_best_combined, Mstddev2, Mavg, thres, amyloid_max_psidiff, fn_mic, fn_out, (helical_tube_diameter / angpix), autopick_skip_side, scale); } else { pickCCFPeaks(Mccf_best_combined, Mstddev2, Mavg, Mclass_best_combined, thres, peak_r_min, (particle_diameter / angpix), ccf_peak_list, Mccfplot, my_skip_side, scale); extractHelicalTubes(ccf_peak_list, tube_coord_list, tube_len_list, tube_track_list, (particle_diameter / angpix), helical_tube_curvature_factor_max, (min_particle_distance / angpix), (helical_tube_diameter / angpix), scale); exportHelicalTubes(Mccf_best_combined, Mccfplot, Mclass_best_combined, tube_coord_list, tube_track_list, tube_len_list, fn_mic, fn_out, (particle_diameter / angpix), (helical_tube_length_min / angpix), my_skip_side, scale); } if ((do_write_fom_maps || do_read_fom_maps) && !do_amyloid) { FileName fn_tmp; Image It; It() = Mccfplot; fn_tmp = getOutputRootName(fn_mic) + "_" + fn_out + "_combinedPLOT.spi"; It.write(fn_tmp); } } else { #ifdef TIMING timer.tic(TIMING_B9); #endif //Now that we have done all references, prune the list again... prunePeakClusters(peaks, min_distance_pix, scale); // And remove all too close neighbours removeTooCloselyNeighbouringPeaks(peaks, min_distance_pix, scale); // Write out a STAR file with the coordinates MetaDataTable MDout; for (int ipeak =0; ipeak < peaks.size(); ipeak++) { MDout.addObject(); MDout.setValue(EMDL_IMAGE_COORD_X, (RFLOAT)(peaks[ipeak].x) / scale); MDout.setValue(EMDL_IMAGE_COORD_Y, (RFLOAT)(peaks[ipeak].y) / scale); MDout.setValue(EMDL_PARTICLE_CLASS, peaks[ipeak].ref + 1); // start counting at 1 MDout.setValue(EMDL_PARTICLE_AUTOPICK_FOM, peaks[ipeak].fom); MDout.setValue(EMDL_ORIENT_PSI, peaks[ipeak].psi); } FileName fn_tmp = getOutputRootName(fn_mic) + "_" + fn_out + ".star"; MDout.write(fn_tmp); #ifdef TIMING timer.toc(TIMING_B9); #endif } } FileName AutoPicker::getOutputRootName(FileName fn_mic) { FileName fn_pre, fn_jobnr, fn_post; decomposePipelineFileName(fn_mic, fn_pre, fn_jobnr, fn_post); return fn_odir + fn_post.withoutExtension(); } void AutoPicker::calculateStddevAndMeanUnderMask(const MultidimArray &_Fmic, const MultidimArray &_Fmic2, MultidimArray &_Fmsk, int nr_nonzero_pixels_mask, MultidimArray &_Mstddev, MultidimArray &_Mmean) { MultidimArray Faux, Faux2; MultidimArray Maux(workSize, workSize); FourierTransformer transformer; _Mstddev.initZeros(workSize, workSize); RFLOAT normfft = (RFLOAT)(micrograph_size * micrograph_size) / (RFLOAT)nr_nonzero_pixels_mask; // Calculate convolution of micrograph and mask, to get average under mask at all points Faux.resize(_Fmic); #ifdef DEBUG Image tt; #endif FOR_ALL_DIRECT_ELEMENTS_IN_MULTIDIMARRAY(Faux) { DIRECT_MULTIDIM_ELEM(Faux, n) = DIRECT_MULTIDIM_ELEM(_Fmic, n) * conj(DIRECT_MULTIDIM_ELEM(_Fmsk, n)); } windowFourierTransform(Faux, Faux2, workSize); CenterFFTbySign(Faux2); transformer.inverseFourierTransform(Faux2, Maux); Maux *= normfft; _Mmean = Maux; #ifdef DEBUG tt()=Maux; tt.write("Mavg_mic.spi"); #endif FOR_ALL_DIRECT_ELEMENTS_IN_MULTIDIMARRAY(_Mstddev) { // store minus average-squared already in _Mstddev DIRECT_MULTIDIM_ELEM(_Mstddev, n) = -DIRECT_MULTIDIM_ELEM(Maux, n) * DIRECT_MULTIDIM_ELEM(Maux, n); } // Calculate convolution of micrograph-squared and mask FOR_ALL_DIRECT_ELEMENTS_IN_MULTIDIMARRAY(Faux) { DIRECT_MULTIDIM_ELEM(Faux, n) = DIRECT_MULTIDIM_ELEM(_Fmic2, n) * conj(DIRECT_MULTIDIM_ELEM(_Fmsk, n)); } windowFourierTransform(Faux, Faux2, workSize); CenterFFTbySign(Faux2); transformer.inverseFourierTransform(Faux2, Maux); FOR_ALL_DIRECT_ELEMENTS_IN_MULTIDIMARRAY(_Mstddev) { // we already stored minus average-squared in _Mstddev DIRECT_MULTIDIM_ELEM(_Mstddev, n) += normfft * DIRECT_MULTIDIM_ELEM(Maux, n); if (DIRECT_MULTIDIM_ELEM(_Mstddev, n) > (RFLOAT)1E-10) DIRECT_MULTIDIM_ELEM(_Mstddev, n) = sqrt(DIRECT_MULTIDIM_ELEM(_Mstddev, n) ); else DIRECT_MULTIDIM_ELEM(_Mstddev, n) = 1.; } #ifdef DEBUG tt()=_Mstddev; tt.write("Msig_mic.spi"); #endif } void AutoPicker::peakSearch(const MultidimArray &Mfom, const MultidimArray &Mpsi, const MultidimArray &Mstddev, const MultidimArray &Mmean, int iref, int skip_side, std::vector &peaks, float scale) { peaks.clear(); Peak peak; peak.ref = iref; skip_side = (int)((float)skip_side*scale); // Skip the pixels along the side of the micrograph! // At least 1, so dont have to check for the borders! skip_side = XMIPP_MAX(1, skip_side); for (int i = FIRST_XMIPP_INDEX((int)((float)micrograph_ysize*scale)) + skip_side; i <= LAST_XMIPP_INDEX((int)((float)micrograph_ysize*scale)) - skip_side; i++) { for (int j = FIRST_XMIPP_INDEX((int)((float)micrograph_xsize*scale)) + skip_side; j <= LAST_XMIPP_INDEX((int)((float)micrograph_xsize*scale)) - skip_side; j++) { RFLOAT myval = A2D_ELEM(Mfom, i, j); // check if this element is above the threshold if (myval >= min_fraction_expected_Pratio) { // Only check stddev in the noise areas if max_stddev_noise is positive! if (max_stddev_noise > 0. && A2D_ELEM(Mstddev, i, j) > max_stddev_noise) continue; if (min_avg_noise > -900. && A2D_ELEM(Mmean, i, j) < min_avg_noise) continue; if (scale < 1.) { // When we use shrink, then often peaks aren't 5 pixels big anymore.... if (A2D_ELEM(Mfom, i-1, j) > myval ) continue; if (A2D_ELEM(Mfom, i+1, j) > myval ) continue; if (A2D_ELEM(Mfom, i, j-1) > myval ) continue; if (A2D_ELEM(Mfom, i, j+1) > myval ) continue; } else { // This is a peak if all four neighbours are also above the threshold, AND have lower values than myval if (A2D_ELEM(Mfom, i-1, j) < min_fraction_expected_Pratio || A2D_ELEM(Mfom, i-1, j) > myval ) continue; if (A2D_ELEM(Mfom, i+1, j) < min_fraction_expected_Pratio || A2D_ELEM(Mfom, i+1, j) > myval ) continue; if (A2D_ELEM(Mfom, i, j-1) < min_fraction_expected_Pratio || A2D_ELEM(Mfom, i, j-1) > myval ) continue; if (A2D_ELEM(Mfom, i, j+1) < min_fraction_expected_Pratio || A2D_ELEM(Mfom, i, j+1) > myval ) continue; } peak.x = j - FIRST_XMIPP_INDEX((int)((float)micrograph_xsize*scale)); peak.y = i - FIRST_XMIPP_INDEX((int)((float)micrograph_ysize*scale)); peak.psi = A2D_ELEM(Mpsi, i, j); peak.fom = A2D_ELEM(Mfom, i, j); peak.relative_fom = myval; peaks.push_back(peak); } } } } void AutoPicker::prunePeakClusters(std::vector &peaks, int min_distance, float scale) { float mind2 = ((float)min_distance*(float)min_distance)*scale*scale; int nclus = 0; std::vector pruned_peaks; while (peaks.size() > 0) { nclus++; std::vector cluster; cluster.push_back(peaks[0]); peaks.erase(peaks.begin()); for (int iclus = 0; iclus < cluster.size(); iclus++) { int my_x = cluster[iclus].x; int my_y = cluster[iclus].y; for (int ipeakp = 0; ipeakp < peaks.size(); ipeakp++) { float dx = (float)(my_x - peaks[ipeakp].x); float dy = (float)(my_y - peaks[ipeakp].y); if (dx*dx + dy*dy < ( (float)(particle_radius2)*scale*scale )) { // Put ipeakp in the cluster, and remove from the peaks list cluster.push_back(peaks[ipeakp]); peaks.erase(peaks.begin()+ipeakp); ipeakp--; } } } // Now search for the peak from the cluster with the best ccf. // Then search again if there are any other peaks in the cluster that are further than particle_diameter apart from the selected peak // If so, again search for the maximum int ipass = 0; while (cluster.size() > 0) { RFLOAT best_relative_fom=-1.; Peak bestpeak; for (int iclus = 0; iclus < cluster.size(); iclus++) { if ( cluster[iclus].relative_fom > best_relative_fom) { best_relative_fom = cluster[iclus].relative_fom; bestpeak = cluster[iclus]; } } // Store this peak as pruned pruned_peaks.push_back(bestpeak); // Remove all peaks within mind2 from the clusters for (int iclus = 0; iclus < cluster.size(); iclus++) { float dx = (float)(cluster[iclus].x - bestpeak.x); float dy = (float)(cluster[iclus].y - bestpeak.y); if (dx*dx + dy*dy < mind2) { cluster.erase(cluster.begin()+iclus); iclus--; } } ipass++; } } // end while peaks.size > 0 // Set the pruned peaks back into the input vector peaks = pruned_peaks; } void AutoPicker::removeTooCloselyNeighbouringPeaks(std::vector &peaks, int min_distance, float scale) { // Now only keep those peaks that are at least min_particle_distance number of pixels from any other peak std::vector pruned_peaks; float mind2 = ((float)min_distance*(float)min_distance)*scale*scale; for (int ipeak = 0; ipeak < peaks.size(); ipeak++) { int my_x = peaks[ipeak].x; int my_y = peaks[ipeak].y; float my_mind2 = 9999999999.; for (int ipeakp = 0; ipeakp < peaks.size(); ipeakp++) { if (ipeakp != ipeak) { int dx = peaks[ipeakp].x - my_x; int dy = peaks[ipeakp].y - my_y; int d2 = dx*dx + dy*dy; if ( d2 < my_mind2 ) my_mind2 = d2; } } if (my_mind2 > mind2) pruned_peaks.push_back(peaks[ipeak]); } // Set the pruned peaks back into the input vector peaks = pruned_peaks; } int AutoPicker::largestPrime(int query) { int i(2), primeF(query); while (i*i<=primeF) { if(primeF%i!=0) i+=1; else primeF /= i; } return primeF; } int AutoPicker::getGoodFourierDims(int requestedSizeRealX, int lim) { if (!do_optimise_scale) return requestedSizeRealX; int inputPrimeF =XMIPP_MAX(largestPrime(requestedSizeRealX),largestPrime(requestedSizeRealX/2+1)); if(inputPrimeF<=LARGEST_ACCEPTABLE_PRIME) { if (verb > 0) std::cout << " + Will use micrographs scaled to " << requestedSizeRealX << " pixels as requested. The largest prime factor in FFTs is " << inputPrimeF << std::endl; return requestedSizeRealX; } int S_up = LARGEST_ACCEPTABLE_PRIME; int S_down = LARGEST_ACCEPTABLE_PRIME; // Search upwards - can take a long time if unlucky and/or small LARGEST_ACCEPTABLE_PRIME int currentU = requestedSizeRealX; S_up = largestPrime(currentU); S_up = XMIPP_MAX(largestPrime(currentU/2+1),S_up); while(S_up>=LARGEST_ACCEPTABLE_PRIME && currentU<=(lim+2)) { currentU += 2; S_up = largestPrime(currentU); S_up = XMIPP_MAX(largestPrime(currentU/2+1),S_up); } // Search downwards - guaranteed to find in reasonable time int currentD = requestedSizeRealX; S_down = largestPrime(currentD); S_down = XMIPP_MAX(largestPrime(currentD/2+1),S_down); while(S_down>=LARGEST_ACCEPTABLE_PRIME) { currentD -= 2; S_down = largestPrime(currentD); S_down = XMIPP_MAX(largestPrime(currentD/2+1),S_down); } if (verb > 0) { std::cout << " + WARNING: Requested rescale of micrographs is " << requestedSizeRealX << " pixels. The largest prime factor in FFTs is " << inputPrimeF << std::endl; } if((currentU-requestedSizeRealX)>(requestedSizeRealX-currentD) || (currentU>lim)) { if (verb > 0) { std::cout << " + WARNING: Will change rescaling of micrographs to " << currentD << " pixels, because the prime factor then becomes " << S_down << std::endl; std::cout << " + WARNING: add --skip_optimise_scale to your autopick command to prevent rescaling " << std::endl; } return currentD; } else { if (verb > 0) { std::cout << " + WARNING: Will change rescaling of micrographs to " << currentU << " pixels, because the prime factor then becomes " << S_up << std::endl; std::cout << " + WARNING: add --skip_optimise_scale to your autopick command to prevent rescaling " << std::endl; } return currentU; } } relion-3.1.3/src/autopicker.h000066400000000000000000000257751411340063500161310ustar00rootroot00000000000000/* * autopicker.h * * Created on: Sep 18, 2013 * Author: "Sjors H.W. Scheres" */ #ifndef AUTOPICKER_H_ #define AUTOPICKER_H_ #include "src/image.h" #include "src/multidim_array.h" #include "src/metadata_table.h" #include "src/projector.h" #include "src/healpix_sampling.h" #include "src/projector.h" #include "src/jaz/obs_model.h" #include "src/ctf.h" #include "src/fftw.h" #include "src/time.h" #include "src/mask.h" #include "src/macros.h" #include "src/helix.h" #ifdef CUDA #include "src/acc/cuda/cuda_mem_utils.h" #include "src/acc/acc_projector.h" #include "src/acc/cuda/cuda_settings.h" #include "src/acc/cuda/cuda_fft.h" #include "src/acc/cuda/cuda_benchmark_utils.h" #endif //#define OUTPUT_MEAN_MAP_ONLY 1 //#define OUTPUT_STDDEV_MAP_ONLY 2 //#define OUTPUT_BOTH_MEAN_AND_STDDEV_MAPS 3 //#define TIMING class ccfPixel { public: RFLOAT x, y, fom; //RFLOAT x, y, fom, psi; ccfPixel() : x(-1.), y(-1.), fom(-1.) {}; ccfPixel(RFLOAT _x, RFLOAT _y, RFLOAT _fom) : x(_x), y(_y), fom(_fom) {}; //ccfPixel() : x(-1.), y(-1.), fom(-1.), psi(-1.) {}; //ccfPixel(RFLOAT _x, RFLOAT _y, RFLOAT _fom, RFLOAT _psi) : x(_x), y(_y), fom(_fom), psi(_psi) {}; bool operator<(const ccfPixel& b) const { return (fom < b.fom); }; }; class ccfPeak { public: int id, ref, nr_peak_pixel; RFLOAT x, y, r, area_percentage, fom_max, psi, dist, fom_thres; std::vector ccf_pixel_list; void clear(); ccfPeak() { clear(); }; ~ccfPeak() { clear(); }; bool isValid() const; bool operator<(const ccfPeak& b) const; bool refresh(); }; struct Peak { int x, y, ref; RFLOAT psi, fom, relative_fom; }; struct AmyloidCoord { RFLOAT x, y, psi, fom; }; class AutoPicker { public: // For GPU-acceleration void* cudaPicker; // Available memory (in Gigabyte) RFLOAT available_memory; RFLOAT available_gpu_memory; RFLOAT requested_gpu_memory; // I/O Parser IOParser parser; // Verbosity int verb; // Random seed long int random_seed; // Input & Output rootname FileName fn_in, fn_ref, fns_autopick, fn_odir, fn_out; // Pixel size for the micrographs (for low-pass filter and particle diameter) RFLOAT angpix; // Pixel size for the references (for low-pass filter and particle diameter) RFLOAT angpix_ref; // Angular sampling rate for projection of 3D reference (hp=0: 60 deg, hp=1: 30 deg; hp=2: 15deg) int healpix_order; // Symmetry point group for 3D reference std::string symmetry; // Metadata of the micrographs MetaDataTable MDmic; // Optics group information ObservationModel obsModel; // Particle diameter (in Angstroms) RFLOAT particle_diameter; int particle_radius2, decrease_radius; // Maximum diameter for local average density calculation RFLOAT max_local_avg_diameter; // Low pass filter cutoff (in Angstroms) RFLOAT lowpass; // High pass filter cutoff (in Angstroms) RFLOAT highpass; // Original size of the reference images int particle_size; // Dimension of the filtered image int current_size; // Padding to use for Projectors int padding; // Maxmimum value in the Gaussian blob reference RFLOAT gauss_max_value; // Vector with all original reference images std::vector > Mrefs; // FTs of the reference images (either for autopicking or for feature calculation) std::vector PPref; // Use Laplacian-of-Gaussian filters instead of template-based picking bool do_LoG; // Diameter for features to be detected by the LoG filter RFLOAT LoG_min_diameter, LoG_max_diameter, LoG_neighbour_fudge; // How many times the LoG_max_diameter is searched? RFLOAT LoG_max_search; // How many sigma to adjust the FOM threshold? RFLOAT LoG_adjust_threshold, LoG_upper_limit; // Input signal is white bool LoG_invert, LoG_use_ctf; // Vector with all LoG filter FFTs std::vector > FT_LoGs; // Vector with all diameters to be sampled std::vector diams_LoG; //// Specific amyloid picker bool do_amyloid; /// Maximum psi-angle difference in subsequent amyloid segments (in degrees) RFLOAT amyloid_max_psidiff; ///// Autopicking stuff // Re-read precalculated best_localCCF and SPI arrays from disc bool do_read_fom_maps; // Write precalculated best_localCCF and SPI arrays to disc bool do_write_fom_maps; // We impose a limit to not write insane number of images by mistake, but you can override through --fom_override bool no_fom_limit; /// Only autopick those micrographs for which the coordinate file does not yet exist bool do_only_unfinished; // Is there any work to be done? bool todo_anything; // All micrographs to autopick from std::vector fn_micrographs, fn_ori_micrographs; // Original size of the micrographs int micrograph_size, micrograph_xsize, micrograph_ysize, micrograph_minxy_size; // decreased size micrograph int workSize; float workFrac; // Is density in micrograph inverted wrt templates? bool do_invert; // Correct the references for CTF effects? bool do_ctf; // use GPU hardware? bool do_gpu; // Which GPU devices to use? std::string gpu_ids; // Keep the CTFs unchanged until the first peak? bool intact_ctf_first_peak; // Are the templates 2D helical segments? If so, in-plane rotation angles (psi) are estimated for the references. bool autopick_helical_segments; RFLOAT helical_tube_curvature_factor_max; RFLOAT helical_tube_diameter; RFLOAT helical_tube_length_min; // Apart from keeping particle_size/2 away from the sides, should we exclude more? E.g. to get rid of Polara bar code? int autopick_skip_side; // Extra padding around the micrographs, of this many pixels int extra_padding; // In-plane rotational sampling (in degrees) RFLOAT psi_sampling; // Fraction of expected probability ratio to consider as peaks RFLOAT min_fraction_expected_Pratio; // Number of Angstroms any 2 particle peaks need to be apart RFLOAT min_particle_distance; // Maximum standard deviation of the noise prior to normalization to pick peaks from RFLOAT max_stddev_noise; // Minimum average background density of the noise to pick peaks from RFLOAT min_avg_noise; // Removal of outlier pixel values RFLOAT outlier_removal_zscore; // Size of the downsize micrographs for autopicking int downsize_mic; // Number of non-zero pixels in the circular mask, and of its inverse (for background normalisation in do_diff2) int nr_pixels_circular_mask, nr_pixels_avg_mask, nr_pixels_circular_invmask; // Array with Fourier-transform of the inverse of the (circular) mask MultidimArray Finvmsk; // Array with Fourier-transform of the mask to calculate average density MultidimArray Favgmsk; // Perform optimisation of the scale factor? bool do_optimise_scale; #ifdef TIMING Timer timer; int TIMING_A0, TIMING_A1, TIMING_A2, TIMING_A3, TIMING_A4, TIMING_A5, TIMING_A6, TIMING_A7, TIMING_A8, TIMING_A9; int TIMING_B1, TIMING_B2, TIMING_B3, TIMING_B4, TIMING_B5, TIMING_B6, TIMING_B7, TIMING_B8, TIMING_B9; #endif public: AutoPicker(): available_memory(0), available_gpu_memory(0), requested_gpu_memory(0) {} // Read command line arguments void read(int argc, char **argv); // Print usage instructions void usage(); // Initialise some general stuff after reading void initialise(); // Set device-affinity int deviceInitialise(); // General function to decide what to do void run(); // Make a PDF file with plots of numbers of particles per micrograph, average FOMs etc void generatePDFLogfile(); std::vector findNextCandidateCoordinates(AmyloidCoord &mycoord, std::vector &circle, RFLOAT threshold_value, RFLOAT max_psidiff, int skip_side, float scale, MultidimArray &Mccf, MultidimArray &Mpsi); AmyloidCoord findNextAmyloidCoordinate(AmyloidCoord &mycoord, std::vector &circle, RFLOAT threshold_value, RFLOAT max_psidiff, RFLOAT amyloid_diameter_pix, int skip_side, float scale, MultidimArray &Mccf, MultidimArray &Mpsi); void pickAmyloids( MultidimArray& Mccf, MultidimArray& Mpsi, MultidimArray& Mstddev, MultidimArray& Mavg, RFLOAT threshold_value, RFLOAT max_psidiff, FileName& fn_mic_in, FileName& fn_star_out, RFLOAT amyloid_width, int skip_side, float scale); void pickCCFPeaks( const MultidimArray& Mccf, const MultidimArray& Mstddev, const MultidimArray& Mavg, const MultidimArray& Mclass, RFLOAT threshold_value, int peak_r_min, RFLOAT particle_diameter_pix, std::vector& ccf_peak_list, MultidimArray& Mccfplot, int skip_side, float scale); void extractHelicalTubes( std::vector& ccf_peak_list, std::vector >& tube_coord_list, std::vector& tube_len_list, std::vector >& tube_track_list, RFLOAT particle_diameter_pix, RFLOAT curvature_factor_max, RFLOAT interbox_distance_pix, RFLOAT tube_diameter_pix, float scale); void exportHelicalTubes( const MultidimArray& Mccf, MultidimArray& Mccfplot, const MultidimArray& Mclass, std::vector >& tube_coord_list, std::vector >& tube_track_list, std::vector& tube_len_list, FileName& fn_mic_in, FileName& fn_star_out, RFLOAT particle_diameter_pix, RFLOAT tube_length_min_pix, int skip_side, float scale); void autoPickLoGOneMicrograph(FileName &fn_mic, long int imic); void autoPickOneMicrograph(FileName &fn_mic, long int imic); // Get the output coordinate filename given the micrograph filename FileName getOutputRootName(FileName fn_mic); // Uses Roseman2003 formulae to calculate stddev under the mask through FFTs // The FFTs of the micrograph (Fmic), micrograph-squared (Fmic2) and the mask (Fmsk) need to be provided at downsize_mic // The putput (Mstddev) will be at (binned) micrograph_size void calculateStddevAndMeanUnderMask( const MultidimArray &Fmic, const MultidimArray &Fmic2, MultidimArray &Fmsk, int nr_nonzero_pixels_mask, MultidimArray &Mstddev, MultidimArray &Mmean); // Peak search for all pixels above a given threshold in the map void peakSearch(const MultidimArray &Mccf, const MultidimArray &Mpsi, const MultidimArray &Mstddev, const MultidimArray &Mmean, int iref, int skip_side, std::vector &peaks, float scale); // Now prune the coordinates: within min_particle_distance: all peaks are the same cluster // From each cluster, take the single peaks with the highest ccf // If then, there is another peaks at a distance of at least min_particle_distance: take that one as well, and so forth... void prunePeakClusters(std::vector &peaks, int min_distance, float scale); // Only keep those peaks that are at the given distance apart from each other void removeTooCloselyNeighbouringPeaks(std::vector &peaks, int min_distance, float scale); #define LARGEST_ACCEPTABLE_PRIME 43 int largestPrime(int query); int getGoodFourierDims(int requestedSizeRealX, int lim); }; #endif /* AUTOPICKER_H_ */ relion-3.1.3/src/autopicker_mpi.cpp000066400000000000000000000070661411340063500173220ustar00rootroot00000000000000/*************************************************************************** * * Author: "Sjors H.W. Scheres" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #include "src/autopicker_mpi.h" void AutoPickerMpi::read(int argc, char **argv) { // Define a new MpiNode node = new MpiNode(argc, argv); if (node->isLeader()) PRINT_VERSION_INFO(); // First read in non-parallelisation-dependent variables AutoPicker::read(argc, argv); // Don't put any output to screen for mpi followers if (!node->isLeader()) verb = 0; if (do_write_fom_maps && node->isLeader()) std::cerr << "WARNING : --write_fom_maps is very heavy on disc I/O and is not advised in parallel execution. If possible, using --shrink 0 and lowpass makes I/O less significant." << std::endl; // Possibly also read parallelisation-dependent variables here // Print out MPI info printMpiNodesMachineNames(*node); } #ifdef CUDA int AutoPickerMpi::deviceInitialise() { int devCount; cudaGetDeviceCount(&devCount); std::vector < std::vector < std::string > > allThreadIDs; untangleDeviceIDs(gpu_ids, allThreadIDs); // Sequential initialisation of GPUs on all ranks int dev_id; if (!std::isdigit(*gpu_ids.begin())) dev_id = node->rank%devCount; else dev_id = textToInteger((allThreadIDs[node->rank][0]).c_str()); for (int follower = 0; follower < node->size; follower++) { if (follower == node->rank) { std::cout << " + Using GPU device: " << dev_id << " on MPI node: " << node->rank << std::endl; std::cout.flush(); } node->barrierWait(); } return(dev_id); } #endif void AutoPickerMpi::run() { // Each node does part of the work long int my_first_micrograph, my_last_micrograph, my_nr_micrographs; divide_equally(fn_micrographs.size(), node->size, node->rank, my_first_micrograph, my_last_micrograph); my_nr_micrographs = my_last_micrograph - my_first_micrograph + 1; int barstep; if (verb > 0) { std::cout << " Autopicking ..." << std::endl; init_progress_bar(my_nr_micrographs); barstep = XMIPP_MAX(1, my_nr_micrographs / 60); } FileName fn_olddir=""; for (long int imic = my_first_micrograph; imic <= my_last_micrograph; imic++) { // Abort through the pipeline_control system if (pipeline_control_check_abort_job()) MPI_Abort(MPI_COMM_WORLD, RELION_EXIT_ABORTED); if (verb > 0 && imic % barstep == 0) progress_bar(imic); // Check new-style outputdirectory exists and make it if not! FileName fn_dir = getOutputRootName(fn_micrographs[imic]); fn_dir = fn_dir.beforeLastOf("/"); if (fn_dir != fn_olddir) { // Make a Particles directory int res = system(("mkdir -p " + fn_dir).c_str()); fn_olddir = fn_dir; } if (do_LoG) autoPickLoGOneMicrograph(fn_micrographs[imic], imic); else autoPickOneMicrograph(fn_micrographs[imic], imic); } if (verb > 0) progress_bar(my_nr_micrographs); } relion-3.1.3/src/autopicker_mpi.h000066400000000000000000000013131411340063500167540ustar00rootroot00000000000000/* * autopicker_mpi.h * * Created on: Sep 18, 2013 * Author: "Sjors H.W. Scheres" */ #ifndef AUTOPICKER_MPI_H_ #define AUTOPICKER_MPI_H_ #include "src/mpi.h" #include "src/autopicker.h" #include "src/parallel.h" class AutoPickerMpi: public AutoPicker { private: MpiNode *node; public: /** Destructor, calls MPI_Finalize */ ~AutoPickerMpi() { delete node; } /** Read * This could take care of mpi-parallelisation-dependent variables */ void read(int argc, char **argv); // Set device-affinity int deviceInitialise(); // Parallelized run function void run(); int getRank() { return(node->rank); } MpiNode * getNode() { return(node); } }; #endif /* AUTOPICKER_MPI_H_ */ relion-3.1.3/src/backprojector.cpp000066400000000000000000002251751411340063500171420ustar00rootroot00000000000000/*************************************************************************** * * Author: "Sjors H.W. Scheres" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ /* * backprojector.cpp * * Created on: 24 Aug 2010 * Author: scheres */ #include "src/backprojector.h" #ifdef TIMING #define RCTIC(timer,label) (timer.tic(label)) #define RCTOC(timer,label) (timer.toc(label)) #else #define RCTIC(timer,label) #define RCTOC(timer,label) #endif void BackProjector::initialiseDataAndWeight(int current_size) { initialiseData(current_size); weight.resize(data); } void BackProjector::initZeros(int current_size) { initialiseDataAndWeight(current_size); data.initZeros(); weight.initZeros(); } void BackProjector::backproject2Dto3D(const MultidimArray &f2d, const Matrix2D &A, const MultidimArray *Mweight, RFLOAT r_ewald_sphere, bool is_positive_curvature, Matrix2D* magMatrix) { RFLOAT m00, m10, m01, m11; if (magMatrix != 0) { m00 = (*magMatrix)(0,0); m10 = (*magMatrix)(1,0); m01 = (*magMatrix)(0,1); m11 = (*magMatrix)(1,1); } else { m00 = 1.0; m10 = 0.0; m01 = 0.0; m11 = 1.0; } // Use the inverse matrix Matrix2D Ainv; Ainv = A.inv(); // Go from the 2D slice coordinates to the 3D coordinates Ainv *= (RFLOAT)padding_factor; // take scaling into account directly // max_r2 and min_r2_nn are defined in 3D-space const int max_r2 = ROUND(r_max * padding_factor) * ROUND(r_max * padding_factor); const int min_r2_nn = ROUND(r_min_nn * padding_factor) * ROUND(r_min_nn * padding_factor); // precalculated coefficients for ellipse determination (see further down) // first, make sure A contains 2D distortion (lowercase 2D, uppercase 3D): const RFLOAT Am_Xx = Ainv(0,0) * m00 + Ainv(0,1) * m10; const RFLOAT Am_Xy = Ainv(0,0) * m01 + Ainv(0,1) * m11; const RFLOAT Am_Yx = Ainv(1,0) * m00 + Ainv(1,1) * m10; const RFLOAT Am_Yy = Ainv(1,0) * m01 + Ainv(1,1) * m11; const RFLOAT Am_Zx = Ainv(2,0) * m00 + Ainv(2,1) * m10; const RFLOAT Am_Zy = Ainv(2,0) * m01 + Ainv(2,1) * m11; // next, precompute (Am)^t Am into AtA: const RFLOAT AtA_xx = Am_Xx * Am_Xx + Am_Yx * Am_Yx + Am_Zx * Am_Zx; const RFLOAT AtA_xy = Am_Xx * Am_Xy + Am_Yx * Am_Yy + Am_Zx * Am_Zy; const RFLOAT AtA_yy = Am_Xy * Am_Xy + Am_Yy * Am_Yy + Am_Zy * Am_Zy; const RFLOAT AtA_xy2 = AtA_xy * AtA_xy; //#define DEBUG_BACKP #ifdef DEBUG_BACKP std::cerr << " XSIZE(f2d)= "<< XSIZE(f2d) << std::endl; std::cerr << " YSIZE(f2d)= "<< YSIZE(f2d) << std::endl; std::cerr << " XSIZE(data)= "<< XSIZE(data) << std::endl; std::cerr << " YSIZE(data)= "<< YSIZE(data) << std::endl; std::cerr << " STARTINGX(data)= "<< STARTINGX(data) << std::endl; std::cerr << " STARTINGY(data)= "<< STARTINGY(data) << std::endl; std::cerr << " STARTINGZ(data)= "<< STARTINGZ(data) << std::endl; std::cerr << " r_max= "<< r_max << std::endl; std::cerr << " Ainv= " << Ainv << std::endl; #endif // precalculate inverse of Ewald sphere diameter RFLOAT inv_diam_ewald = (r_ewald_sphere > 0.0)? 1.0 / (2.0 * r_ewald_sphere) : 0.0; if (!is_positive_curvature) { inv_diam_ewald *= -1.0; } const int s = YSIZE(f2d); const int sh = XSIZE(f2d); for (int i = 0; i < s; i++) { int y, first_allowed_x; if (i < sh) { y = i; first_allowed_x = 0; } else { y = i - s; // x == 0 plane is stored twice in the FFTW format. Don't set it twice in backprojection! first_allowed_x = 1; } // Only iterate over the ellipse in the 2D-image corresponding to the sphere in 3D. // Find the x-range inside that ellipse for every given y: // |A*v|^2 <= R^2 (for v = (x,y)^t) // = v^t A^t A v =: v^t AtA v // <=> // (AtA_xx) x^2 + (2 AtA_xy y) x + (AtA_yy y^2 - R^2) <= 0 (quadratic eq. in x) // <=> // x in [q - d, q + d], // where: q := -AtA_xy y / AtA_xx, // d := sqrt((AtA_xy y)^2 - AtA_xx (AtA_yy y^2 - R^2)) / AtA_xx RFLOAT discr = AtA_xy2 * y * y - AtA_xx * (AtA_yy * y * y - max_r2); if (discr < 0.0) continue; // no points inside ellipse for this y RFLOAT d = sqrt(discr) / AtA_xx; RFLOAT q = - AtA_xy * y / AtA_xx; int first_x = CEIL(q - d); int last_x = FLOOR(q + d); if (first_x < first_allowed_x) first_x = first_allowed_x; if (last_x > sh - 1) last_x = sh - 1; for (int x = first_x; x <= last_x; x++) { // Get the value from the input image Complex my_val = DIRECT_A2D_ELEM(f2d, i, x); RFLOAT my_weight; // Get the weight if (Mweight != NULL) { my_weight = DIRECT_A2D_ELEM(*Mweight, i, x); } else { my_weight = 1.0; } if (my_weight <= 0.) continue; /* In our implementation, (x, y) are not scaled because: x_on_ewald = x * r / sqrt(x * x + y * y + r * r) = x / sqrt(1 + (x * x + y * y) / (r * r)) ~ x * (1 - (x * x + y * y) / (2 * r * r) + O(1/r^4)) # binomial expansion = x + O(1/r^2) same for y_on_ewald z_on_ewald = r - r * r / sqrt(x * x + y * y + r * r) ~ r - r * (1 - (x * x + y * y) / (2 * r * r) + O(1/r^4)) # binomial expansion = (x * x + y * y) / (2 * r) + O(1/r^3) The error is < 0.0005 reciprocal voxel even for extreme cases like 200kV, 1500 A particle, 1 A / pix. */ // Get logical coordinates in the 3D map. // Make sure that the Ewald sphere is spherical even under anisotropic mag // by first undistorting (x,y) to obtain the true frequencies (xu,yu) RFLOAT xu = m00 * x + m01 * y; RFLOAT yu = m10 * x + m11 * y; RFLOAT z_on_ewaldp = inv_diam_ewald * (xu * xu + yu * yu); RFLOAT xp = Ainv(0,0) * xu + Ainv(0,1) * yu + Ainv(0,2) * z_on_ewaldp; RFLOAT yp = Ainv(1,0) * xu + Ainv(1,1) * yu + Ainv(1,2) * z_on_ewaldp; RFLOAT zp = Ainv(2,0) * xu + Ainv(2,1) * yu + Ainv(2,2) * z_on_ewaldp; double r2_3D = xp*xp + yp*yp + zp*zp; // redundant: if (r2_3D > max_r2) { continue; } if (interpolator == TRILINEAR || r2_3D < min_r2_nn) { bool is_neg_x; // Only asymmetric half is stored if (xp < 0) { // Get complex conjugated hermitian symmetry pair xp = -xp; yp = -yp; zp = -zp; is_neg_x = true; } else { is_neg_x = false; } // Trilinear interpolation (with physical coords) // Subtract STARTINGY and STARTINGZ to accelerate access to data (STARTINGX=0) // In that way use DIRECT_A3D_ELEM, rather than A3D_ELEM int x0 = FLOOR(xp); RFLOAT fx = xp - x0; int x1 = x0 + 1; int y0 = FLOOR(yp); RFLOAT fy = yp - y0; y0 -= STARTINGY(data); int y1 = y0 + 1; int z0 = FLOOR(zp); RFLOAT fz = zp - z0; z0 -= STARTINGZ(data); int z1 = z0 + 1; if (x0 < 0 || x0+1 >= data.xdim || y0 < 0 || y0+1 >= data.ydim || z0 < 0 || z0+1 >= data.zdim) { continue; } RFLOAT mfx = 1. - fx; RFLOAT mfy = 1. - fy; RFLOAT mfz = 1. - fz; RFLOAT dd000 = mfz * mfy * mfx; RFLOAT dd001 = mfz * mfy * fx; RFLOAT dd010 = mfz * fy * mfx; RFLOAT dd011 = mfz * fy * fx; RFLOAT dd100 = fz * mfy * mfx; RFLOAT dd101 = fz * mfy * fx; RFLOAT dd110 = fz * fy * mfx; RFLOAT dd111 = fz * fy * fx; if (is_neg_x) { my_val = conj(my_val); } // Store slice in 3D weighted sum DIRECT_A3D_ELEM(data, z0, y0, x0) += dd000 * my_val; DIRECT_A3D_ELEM(data, z0, y0, x1) += dd001 * my_val; DIRECT_A3D_ELEM(data, z0, y1, x0) += dd010 * my_val; DIRECT_A3D_ELEM(data, z0, y1, x1) += dd011 * my_val; DIRECT_A3D_ELEM(data, z1, y0, x0) += dd100 * my_val; DIRECT_A3D_ELEM(data, z1, y0, x1) += dd101 * my_val; DIRECT_A3D_ELEM(data, z1, y1, x0) += dd110 * my_val; DIRECT_A3D_ELEM(data, z1, y1, x1) += dd111 * my_val; // Store corresponding weights DIRECT_A3D_ELEM(weight, z0, y0, x0) += dd000 * my_weight; DIRECT_A3D_ELEM(weight, z0, y0, x1) += dd001 * my_weight; DIRECT_A3D_ELEM(weight, z0, y1, x0) += dd010 * my_weight; DIRECT_A3D_ELEM(weight, z0, y1, x1) += dd011 * my_weight; DIRECT_A3D_ELEM(weight, z1, y0, x0) += dd100 * my_weight; DIRECT_A3D_ELEM(weight, z1, y0, x1) += dd101 * my_weight; DIRECT_A3D_ELEM(weight, z1, y1, x0) += dd110 * my_weight; DIRECT_A3D_ELEM(weight, z1, y1, x1) += dd111 * my_weight; } // endif TRILINEAR else if (interpolator == NEAREST_NEIGHBOUR ) { int x0 = ROUND(xp); int y0 = ROUND(yp); int z0 = ROUND(zp); bool is_neg_x; if (x0 < 0) { // Get complex conjugated hermitian symmetry pair x0 = -x0; y0 = -y0; z0 = -z0; is_neg_x = true; } else { is_neg_x = false; } const int xr = x0 - STARTINGX(data); const int yr = y0 - STARTINGY(data); const int zr = z0 - STARTINGZ(data); if (xr < 0 || xr >= data.xdim || yr < 0 || yr >= data.ydim || zr < 0 || zr >= data.zdim) { continue; } if (is_neg_x) { DIRECT_A3D_ELEM(data, zr, yr, xr) += conj(my_val); DIRECT_A3D_ELEM(weight, zr, yr, xr) += my_weight; } else { DIRECT_A3D_ELEM(data, zr, yr, xr) += my_val; DIRECT_A3D_ELEM(weight, zr, yr, xr) += my_weight; } } // endif NEAREST_NEIGHBOUR else { REPORT_ERROR("FourierInterpolator::backproject%%ERROR: unrecognized interpolator "); } } // endif x-loop } // endif y-loop } void BackProjector::backproject1Dto2D(const MultidimArray &f1d, const Matrix2D &A, const MultidimArray *Mweight) { Matrix2D Ainv = A.inv(); Ainv *= (RFLOAT)padding_factor; // take scaling into account directly const int r_max_src = XSIZE(f1d) - 1; const int r_max_ref = r_max * padding_factor; const int r_max_ref_2 = r_max_ref * r_max_ref; // currently not used for some reason //const int r_min_NN_ref_2 = r_min_nn * r_min_nn * padding_factor * padding_factor; for (int x = 0; x <= r_max_src; x++) { RFLOAT my_weight; if (Mweight != NULL) { my_weight = DIRECT_A1D_ELEM(*Mweight, x); if (my_weight <= 0.) continue; } else { my_weight = 1.; } Complex my_val = DIRECT_A1D_ELEM(f1d, x); // Get logical coordinates in the 3D map RFLOAT xp = Ainv(0,0) * x; RFLOAT yp = Ainv(1,0) * x; const RFLOAT r_ref_2 = xp*xp + yp*yp; if (r_ref_2 > r_max_ref_2) continue; if (interpolator == TRILINEAR /* && r_ref_2 < r_min_NN_ref_2*/) { // Only asymmetric half is stored const bool is_neg_x = xp < 0; if (is_neg_x) { // Get complex conjugated hermitian symmetry pair xp = -xp; yp = -yp; } // Trilinear interpolation (with physical coords) // Subtract STARTINGY to accelerate access to data (STARTINGX=0) // In that way use DIRECT_A2D_ELEM, rather than A2D_ELEM const int x0 = FLOOR(xp); const RFLOAT fx = xp - x0; const int x1 = x0 + 1; int y0 = FLOOR(yp); const RFLOAT fy = yp - y0; y0 -= STARTINGY(data); const int y1 = y0 + 1; const RFLOAT mfx = 1. - fx; const RFLOAT mfy = 1. - fy; const RFLOAT dd00 = mfy * mfx; const RFLOAT dd01 = mfy * fx; const RFLOAT dd10 = fy * mfx; const RFLOAT dd11 = fy * fx; if (is_neg_x) { my_val = conj(my_val); } // Store slice in 3D weighted sum DIRECT_A2D_ELEM(data, y0, x0) += dd00 * my_val; DIRECT_A2D_ELEM(data, y0, x1) += dd01 * my_val; DIRECT_A2D_ELEM(data, y1, x0) += dd10 * my_val; DIRECT_A2D_ELEM(data, y1, x1) += dd11 * my_val; // Store corresponding weights DIRECT_A2D_ELEM(weight, y0, x0) += dd00 * my_weight; DIRECT_A2D_ELEM(weight, y0, x1) += dd01 * my_weight; DIRECT_A2D_ELEM(weight, y1, x0) += dd10 * my_weight; DIRECT_A2D_ELEM(weight, y1, x1) += dd11 * my_weight; } // endif TRILINEAR else if (interpolator == NEAREST_NEIGHBOUR ) { const int x0 = ROUND(xp); const int y0 = ROUND(yp); if (x0 < 0) { A2D_ELEM(data, -y0, -x0) += conj(my_val); A2D_ELEM(weight, -y0, -x0) += my_weight; } else { A2D_ELEM(data, y0, x0) += my_val; A2D_ELEM(weight, y0, x0) += my_weight; } } // endif NEAREST_NEIGHBOUR else { REPORT_ERROR("FourierInterpolator::backproject1Dto2D%%ERROR: unrecognized interpolator "); } } // endif x-loop } void BackProjector::backrotate2D(const MultidimArray &f2d, const Matrix2D &A, const MultidimArray *Mweight, Matrix2D* magMatrix) { Matrix2D Ainv = A.inv(); Ainv *= (RFLOAT)padding_factor; // take scaling into account directly RFLOAT m00, m10, m01, m11; if (magMatrix != 0) { m00 = (*magMatrix)(0,0); m10 = (*magMatrix)(1,0); m01 = (*magMatrix)(0,1); m11 = (*magMatrix)(1,1); } else { m00 = 1.0; m10 = 0.0; m01 = 0.0; m11 = 1.0; } const int r_max_ref = r_max * padding_factor; const int r_max_ref_2 = r_max_ref * r_max_ref; int min_r2_nn = r_min_nn * r_min_nn * padding_factor * padding_factor; // precalculated coefficients for ellipse determination (see further down) // first, make sure A contains 2D distortion (lowercase 2D, uppercase 3D): const RFLOAT Am_Xx = Ainv(0,0) * m00 + Ainv(0,1) * m10; const RFLOAT Am_Xy = Ainv(0,0) * m01 + Ainv(0,1) * m11; const RFLOAT Am_Yx = Ainv(1,0) * m00 + Ainv(1,1) * m10; const RFLOAT Am_Yy = Ainv(1,0) * m01 + Ainv(1,1) * m11; // next, precompute (Am)^t Am into AtA: const RFLOAT AtA_xx = Am_Xx * Am_Xx + Am_Yx * Am_Yx; const RFLOAT AtA_xy = Am_Xx * Am_Xy + Am_Yx * Am_Yy; const RFLOAT AtA_yy = Am_Xy * Am_Xy + Am_Yy * Am_Yy; const RFLOAT AtA_xy2 = AtA_xy * AtA_xy; //#define DEBUG_BACKROTATE #ifdef DEBUG_BACKROTATE std::cerr << " XSIZE(f2d)= "<< XSIZE(f2d) << std::endl; std::cerr << " YSIZE(f2d)= "<< YSIZE(f2d) << std::endl; std::cerr << " XSIZE(data)= "<< XSIZE(data) << std::endl; std::cerr << " YSIZE(data)= "<< YSIZE(data) << std::endl; std::cerr << " STARTINGX(data)= "<< STARTINGX(data) << std::endl; std::cerr << " STARTINGY(data)= "<< STARTINGY(data) << std::endl; std::cerr << " STARTINGZ(data)= "<< STARTINGZ(data) << std::endl; std::cerr << " max_r= "<< r_max << std::endl; std::cerr << " Ainv= " << Ainv << std::endl; #endif const int s = YSIZE(f2d); const int sh = XSIZE(f2d); for (int i = 0; i < s; i++) { int y, first_allowed_x; if (i < sh) { y = i; first_allowed_x = 0; } else { y = i - s; // x == 0 plane is stored twice in the FFTW format. Don't set it twice in backprojection! first_allowed_x = 1; } // Only iterate over the ellipse in the 2D-image corresponding to the sphere in 3D. // Find the x-range inside that ellipse for every given y: // |A*v|^2 <= R^2 (for v = (x,y)^t) // = v^t A^t A v =: v^t AtA v // <=> // (AtA_xx) x^2 + (2 AtA_xy y) x + (AtA_yy y^2 - R^2) <= 0 (quadratic eq. in x) // <=> // x in [q - d, q + d], // where: q := -AtA_xy y / AtA_xx, // d := sqrt((AtA_xy y)^2 - AtA_xx (AtA_yy y^2 - R^2)) / AtA_xx RFLOAT discr = AtA_xy2 * y*y - AtA_xx * (AtA_yy * y*y - r_max_ref_2); if (discr < 0.0) continue; // no points inside ellipse for this y RFLOAT d = sqrt(discr) / AtA_xx; RFLOAT q = - AtA_xy * y / AtA_xx; int first_x = CEIL(q - d); int last_x = FLOOR(q + d); if (first_x < first_allowed_x) first_x = first_allowed_x; if (last_x > sh - 1) last_x = sh - 1; for (int x = first_x; x <= last_x; x++) { RFLOAT my_weight; if (Mweight != NULL) { my_weight = DIRECT_A2D_ELEM(*Mweight, i, x); if (my_weight <= 0.f) continue; } else { my_weight = 1.; } // Get the relevant value in the input image Complex my_val = DIRECT_A2D_ELEM(f2d, i, x); // Get logical coordinates in the 3D map RFLOAT xu = m00 * x + m01 * y; RFLOAT yu = m10 * x + m11 * y; RFLOAT xp = Ainv(0,0) * xu + Ainv(0,1) * yu; RFLOAT yp = Ainv(1,0) * xu + Ainv(1,1) * yu; RFLOAT r_ref_2 = xp * xp + yp * yp; if (interpolator == TRILINEAR || r_ref_2 < min_r2_nn) { const bool is_neg_x = xp < 0; // Only asymmetric half is stored if (is_neg_x) { // Get complex conjugated hermitian symmetry pair xp = -xp; yp = -yp; } // Trilinear interpolation (with physical coords) // Subtract STARTINGY to accelerate access to data (STARTINGX=0) // In that way use DIRECT_A2D_ELEM, rather than A2D_ELEM const int x0 = FLOOR(xp); const RFLOAT fx = xp - x0; const int x1 = x0 + 1; int y0 = FLOOR(yp); const RFLOAT fy = yp - y0; y0 -= STARTINGY(data); const int y1 = y0 + 1; const RFLOAT mfx = 1. - fx; const RFLOAT mfy = 1. - fy; const RFLOAT dd00 = mfy * mfx; const RFLOAT dd01 = mfy * fx; const RFLOAT dd10 = fy * mfx; const RFLOAT dd11 = fy * fx; if (is_neg_x) { my_val = conj(my_val); } // Store slice in 3D weighted sum DIRECT_A2D_ELEM(data, y0, x0) += dd00 * my_val; DIRECT_A2D_ELEM(data, y0, x1) += dd01 * my_val; DIRECT_A2D_ELEM(data, y1, x0) += dd10 * my_val; DIRECT_A2D_ELEM(data, y1, x1) += dd11 * my_val; // Store corresponding weights DIRECT_A2D_ELEM(weight, y0, x0) += dd00 * my_weight; DIRECT_A2D_ELEM(weight, y0, x1) += dd01 * my_weight; DIRECT_A2D_ELEM(weight, y1, x0) += dd10 * my_weight; DIRECT_A2D_ELEM(weight, y1, x1) += dd11 * my_weight; } // endif TRILINEAR else if (interpolator == NEAREST_NEIGHBOUR ) { const int x0 = ROUND(xp); const int y0 = ROUND(yp); if (x0 < 0) { A2D_ELEM(data, -y0, -x0) += conj(my_val); A2D_ELEM(weight, -y0, -x0) += my_weight; } else { A2D_ELEM(data, y0, x0) += my_val; A2D_ELEM(weight, y0, x0) += my_weight; } } // endif NEAREST_NEIGHBOUR else { REPORT_ERROR("FourierInterpolator::backrotate2D%%ERROR: unrecognized interpolator "); } } // endif x-loop } // endif y-loop } void BackProjector::backrotate3D(const MultidimArray &f3d, const Matrix2D &A, const MultidimArray *Mweight) { // f3d should already be in the right size (ori_size,orihalfdim) // AND the points outside max_r should already be zero. Matrix2D Ainv = A.inv(); Ainv *= (RFLOAT)padding_factor; // take scaling into account directly const int r_max_src = XSIZE(f3d) - 1; const int r_max_src_2 = r_max_src * r_max_src; const int r_max_ref = r_max * padding_factor; const int r_max_ref_2 = r_max_ref * r_max_ref; const int r_min_NN_ref_2 = r_min_nn * r_min_nn * padding_factor * padding_factor; //#define DEBUG_BACKROTATE #ifdef DEBUG_BACKROTATE std::cerr << " XSIZE(f3d)= "<< XSIZE(f3d) << std::endl; std::cerr << " YSIZE(f3d)= "<< YSIZE(f3d) << std::endl; std::cerr << " XSIZE(data)= "<< XSIZE(data) << std::endl; std::cerr << " YSIZE(data)= "<< YSIZE(data) << std::endl; std::cerr << " STARTINGX(data)= "<< STARTINGX(data) << std::endl; std::cerr << " STARTINGY(data)= "<< STARTINGY(data) << std::endl; std::cerr << " STARTINGZ(data)= "<< STARTINGZ(data) << std::endl; std::cerr << " max_r= "<< r_max << std::endl; std::cerr << " Ainv= " << Ainv << std::endl; #endif for (int k = 0; k < ZSIZE(f3d); k++) { int z, x_min; // Don't search beyond square with side max_r if (k <= r_max_src) { z = k; x_min = 0; } else { z = k - ZSIZE(f3d); /// TODO: still check this better in the 3D case!!! // x==0 (y,z)-plane is stored twice in the FFTW format. Don't set it twice in BACKPROJECTION! x_min = 1; } int z2 = z * z; for (int i = 0; i < YSIZE(f3d); i++) { int y = (i <= r_max_src)? i : i - YSIZE(f3d); int y2 = y * y; const RFLOAT yz2 = y2 + z2; // avoid negative square root if (yz2 > r_max_src_2) continue; const int x_max = FLOOR(sqrt(r_max_src_2 - yz2)); for (int x = x_min; x <= x_max; x++) { // Get logical coordinates in the 3D map RFLOAT xp = Ainv(0,0) * x + Ainv(0,1) * y + Ainv(0,2) * z; RFLOAT yp = Ainv(1,0) * x + Ainv(1,1) * y + Ainv(1,2) * z; RFLOAT zp = Ainv(2,0) * x + Ainv(2,1) * y + Ainv(2,2) * z; const int r_ref_2 = xp*xp + yp*yp + zp*zp; if (r_ref_2 > r_max_ref_2) continue; RFLOAT my_weight; // Get the weight if (Mweight != NULL) { my_weight = DIRECT_A3D_ELEM(*Mweight, k, i, x); if (my_weight <= 0.) continue; } else { my_weight = 1.; } Complex my_val = DIRECT_A3D_ELEM(f3d, k, i, x); if (interpolator == TRILINEAR || r_ref_2 < r_min_NN_ref_2) { // Only asymmetric half is stored bool is_neg_x = xp < 0; if (is_neg_x) { // Get complex conjugated hermitian symmetry pair xp = -xp; yp = -yp; zp = -zp; } // Trilinear interpolation (with physical coords) // Subtract STARTINGY to accelerate access to data (STARTINGX=0) // In that way use DIRECT_A3D_ELEM, rather than A3D_ELEM const int x0 = FLOOR(xp); const RFLOAT fx = xp - x0; const int x1 = x0 + 1; int y0 = FLOOR(yp); const RFLOAT fy = yp - y0; y0 -= STARTINGY(data); const int y1 = y0 + 1; int z0 = FLOOR(zp); const RFLOAT fz = zp - z0; z0 -= STARTINGZ(data); const int z1 = z0 + 1; const RFLOAT mfx = 1. - fx; const RFLOAT mfy = 1. - fy; const RFLOAT mfz = 1. - fz; const RFLOAT dd000 = mfz * mfy * mfx; const RFLOAT dd001 = mfz * mfy * fx; const RFLOAT dd010 = mfz * fy * mfx; const RFLOAT dd011 = mfz * fy * fx; const RFLOAT dd100 = fz * mfy * mfx; const RFLOAT dd101 = fz * mfy * fx; const RFLOAT dd110 = fz * fy * mfx; const RFLOAT dd111 = fz * fy * fx; if (is_neg_x) { my_val = conj(my_val); } // Store slice in 3D weighted sum DIRECT_A3D_ELEM(data, z0, y0, x0) += dd000 * my_val; DIRECT_A3D_ELEM(data, z0, y0, x1) += dd001 * my_val; DIRECT_A3D_ELEM(data, z0, y1, x0) += dd010 * my_val; DIRECT_A3D_ELEM(data, z0, y1, x1) += dd011 * my_val; DIRECT_A3D_ELEM(data, z1, y0, x0) += dd100 * my_val; DIRECT_A3D_ELEM(data, z1, y0, x1) += dd101 * my_val; DIRECT_A3D_ELEM(data, z1, y1, x0) += dd110 * my_val; DIRECT_A3D_ELEM(data, z1, y1, x1) += dd111 * my_val; // Store corresponding weights DIRECT_A3D_ELEM(weight, z0, y0, x0) += dd000 * my_weight; DIRECT_A3D_ELEM(weight, z0, y0, x1) += dd001 * my_weight; DIRECT_A3D_ELEM(weight, z0, y1, x0) += dd010 * my_weight; DIRECT_A3D_ELEM(weight, z0, y1, x1) += dd011 * my_weight; DIRECT_A3D_ELEM(weight, z1, y0, x0) += dd100 * my_weight; DIRECT_A3D_ELEM(weight, z1, y0, x1) += dd101 * my_weight; DIRECT_A3D_ELEM(weight, z1, y1, x0) += dd110 * my_weight; DIRECT_A3D_ELEM(weight, z1, y1, x1) += dd111 * my_weight; } // endif TRILINEAR else if (interpolator == NEAREST_NEIGHBOUR ) { const int x0 = ROUND(xp); const int y0 = ROUND(yp); const int z0 = ROUND(zp); if (x0 < 0) { A3D_ELEM(data, -z0, -y0, -x0) += conj(my_val); A3D_ELEM(weight, -z0, -y0, -x0) += my_weight; } else { A3D_ELEM(data, z0, y0, x0) += my_val; A3D_ELEM(weight, z0, y0, x0) += my_weight; } } // endif NEAREST_NEIGHBOUR else { REPORT_ERROR("BackProjector::backrotate3D%%ERROR: unrecognized interpolator "); } } // endif x-loop } // endif y-loop } // endif z-loop } void BackProjector::getLowResDataAndWeight(MultidimArray &lowres_data, MultidimArray &lowres_weight, int lowres_r_max) { const int lowres_r2_max = ROUND(padding_factor * lowres_r_max) * ROUND(padding_factor * lowres_r_max); const int lowres_pad_size = 2 * (ROUND(padding_factor * lowres_r_max) + 1) + 1; // Check lowres_r_max is not too big if (lowres_r_max > r_max) REPORT_ERROR("BackProjector::getLowResDataAndWeight%%ERROR: lowres_r_max is bigger than r_max"); // Initialize lowres_data and low_res_weight arrays lowres_data.clear(); lowres_weight.clear(); if (ref_dim == 2) { lowres_data.resize(lowres_pad_size, lowres_pad_size / 2 + 1); lowres_weight.resize(lowres_pad_size, lowres_pad_size / 2 + 1); } else { lowres_data.resize(lowres_pad_size, lowres_pad_size, lowres_pad_size / 2 + 1); lowres_weight.resize(lowres_pad_size, lowres_pad_size, lowres_pad_size / 2 + 1); } lowres_data.setXmippOrigin(); lowres_data.xinit=0; lowres_weight.setXmippOrigin(); lowres_weight.xinit=0; // fill lowres arrays with relevant values FOR_ALL_ELEMENTS_IN_ARRAY3D(lowres_data) { if (k*k + i*i + j*j <= lowres_r2_max) { A3D_ELEM(lowres_data, k, i, j) = A3D_ELEM(data, k , i, j); A3D_ELEM(lowres_weight, k, i, j) = A3D_ELEM(weight, k , i, j); } } } void BackProjector::setLowResDataAndWeight(MultidimArray &lowres_data, MultidimArray &lowres_weight, int lowres_r_max) { const int lowres_r2_max = ROUND(padding_factor * lowres_r_max) * ROUND(padding_factor * lowres_r_max); const int lowres_pad_size = 2 * (ROUND(padding_factor * lowres_r_max) + 1) + 1; // Check lowres_r_max is not too big if (lowres_r_max > r_max) REPORT_ERROR("BackProjector::getLowResDataAndWeight%%ERROR: lowres_r_max is bigger than r_max"); // Check sizes of lowres_data and lowres_weight if (YSIZE(lowres_data) != lowres_pad_size || XSIZE(lowres_data) != lowres_pad_size / 2 + 1 || (ref_dim ==3 && ZSIZE(lowres_data) != lowres_pad_size) ) REPORT_ERROR("BackProjector::setLowResDataAndWeight%%ERROR: lowres_data is not of expected size..."); if (YSIZE(lowres_weight) != lowres_pad_size || XSIZE(lowres_weight) != lowres_pad_size / 2 + 1 || (ref_dim ==3 && ZSIZE(lowres_weight) != lowres_pad_size) ) REPORT_ERROR("BackProjector::setLowResDataAndWeight%%ERROR: lowres_weight is not of expected size..."); // Re-set origin to the expected place lowres_data.setXmippOrigin(); lowres_data.xinit=0; lowres_weight.setXmippOrigin(); lowres_weight.xinit=0; // Overwrite data and weight with the lowres arrays FOR_ALL_ELEMENTS_IN_ARRAY3D(lowres_data) { if (k*k + i*i + j*j <= lowres_r2_max) { A3D_ELEM(data, k, i, j) = A3D_ELEM(lowres_data, k , i, j); A3D_ELEM(weight, k, i, j) = A3D_ELEM(lowres_weight, k , i, j); } } } void BackProjector::getDownsampledAverage(MultidimArray& avg, bool divide) const { MultidimArray down_weight; // Pre-set down_data and down_weight sizes const int down_size = 2 * (r_max + 1) + 1; // Short side of data array switch (ref_dim) { case 2: avg.initZeros(down_size, down_size / 2 + 1); break; case 3: avg.initZeros(down_size, down_size, down_size / 2 + 1); break; default: REPORT_ERROR("BackProjector::getDownsampledAverage%%ERROR: Dimension of the data array should be 2 or 3"); } // Set origin in the y.z-center, but on the left side for x. avg.setXmippOrigin(); avg.xinit=0; // Resize down_weight the same as down_data down_weight.initZeros(avg); // Now calculate the down-sized sum int kp, ip, jp; FOR_ALL_ELEMENTS_IN_ARRAY3D(data) { kp = ROUND((RFLOAT)k/padding_factor); ip = ROUND((RFLOAT)i/padding_factor); jp = ROUND((RFLOAT)j/padding_factor); // TMP //#define CHECK_SIZE #ifdef CHECK_SIZE if (kp > FINISHINGZ(avg) || ip > FINISHINGY(avg) || jp > FINISHINGX(avg) || kp < STARTINGZ(avg) || ip < STARTINGY(avg) || jp < STARTINGX(avg)) { std::cerr << " kp= " << kp << " ip= " << ip << " jp= " << jp << std::endl; avg.printShape(); REPORT_ERROR("BackProjector::getDownsampledAverage: indices out of range"); } #endif A3D_ELEM(avg, kp, ip, jp) += A3D_ELEM(data, k , i, j); A3D_ELEM(down_weight, kp, ip, jp) += (divide? A3D_ELEM(weight, k , i, j) : 1.0); } // Calculate the straightforward average in the downsampled arrays FOR_ALL_DIRECT_ELEMENTS_IN_MULTIDIMARRAY(avg) { if (DIRECT_MULTIDIM_ELEM(down_weight, n) > 0.) { DIRECT_MULTIDIM_ELEM(avg, n) /= DIRECT_MULTIDIM_ELEM(down_weight, n); } else { DIRECT_MULTIDIM_ELEM(avg, n) = 0.; } } } void BackProjector::calculateDownSampledFourierShellCorrelation(const MultidimArray& avg1, const MultidimArray& avg2, MultidimArray& fsc) const { if (!avg1.sameShape(avg2)) REPORT_ERROR("ERROR BackProjector::calculateDownSampledFourierShellCorrelation: two arrays have different sizes"); MultidimArray num, den1, den2; num.initZeros(ori_size/2 + 1); den1.initZeros(num); den2.initZeros(num); fsc.initZeros(num); FOR_ALL_ELEMENTS_IN_ARRAY3D(avg1) { const RFLOAT R = sqrt(k*k + i*i + j*j); if (R > r_max) continue; int idx = ROUND(R); Complex z1 = A3D_ELEM(avg1, k, i, j); Complex z2 = A3D_ELEM(avg2, k, i, j); RFLOAT nrmz1 = z1.norm(); RFLOAT nrmz2 = z2.norm(); num(idx) += z1.real * z2.real + z1.imag * z2.imag; den1(idx) += nrmz1; den2(idx) += nrmz2; } FOR_ALL_ELEMENTS_IN_ARRAY1D(fsc) { if (den1(i)*den2(i) > 0.) { fsc(i) = num(i)/sqrt(den1(i)*den2(i)); } } // Always set zero-resolution shell to FSC=1 // Raimond Ravelli reported a problem with FSC=1 at res=0 on 13feb2013... // (because of a suboptimal normalisation scheme, but anyway) fsc(0) = 1.; } void BackProjector::updateSSNRarrays(RFLOAT tau2_fudge, MultidimArray &tau2_io, MultidimArray &sigma2_out, MultidimArray &data_vs_prior_out, MultidimArray &fourier_coverage_out, const MultidimArray& fsc, bool update_tau2_with_fsc, bool is_whole_instead_of_half) { // never rely on references (handed to you from the outside) for computation: // they could be the same (i.e. reconstruct(..., dummy, dummy, dummy, dummy, ...); ) MultidimArray sigma2, data_vs_prior, fourier_coverage; MultidimArray tau2 = tau2_io; MultidimArray counter; const int max_r2 = ROUND(r_max * padding_factor) * ROUND(r_max * padding_factor); RFLOAT oversampling_correction = (ref_dim == 3) ? (padding_factor * padding_factor * padding_factor) : (padding_factor * padding_factor); // First calculate the radial average of the (inverse of the) power of the noise in the reconstruction // This is the left-hand side term in the nominator of the Wiener-filter-like update formula // and it is stored inside the weight vector // Then, if (do_map) add the inverse of tau2-spectrum values to the weight sigma2.initZeros(ori_size/2 + 1); counter.initZeros(ori_size/2 + 1); FOR_ALL_ELEMENTS_IN_ARRAY3D(weight) { const int r2 = k * k + i * i + j * j; if (r2 < max_r2) { int ires = ROUND(sqrt((RFLOAT)r2) / padding_factor); RFLOAT invw = oversampling_correction * A3D_ELEM(weight, k, i, j); DIRECT_A1D_ELEM(sigma2, ires) += invw; DIRECT_A1D_ELEM(counter, ires) += 1.; } } // Average (inverse of) sigma2 in reconstruction FOR_ALL_DIRECT_ELEMENTS_IN_ARRAY1D(sigma2) { if (DIRECT_A1D_ELEM(sigma2, i) > 1e-10) DIRECT_A1D_ELEM(sigma2, i) = DIRECT_A1D_ELEM(counter, i) / DIRECT_A1D_ELEM(sigma2, i); else if (DIRECT_A1D_ELEM(sigma2, i) == 0) DIRECT_A1D_ELEM(sigma2, i) = 0.; else { std::cerr << " DIRECT_A1D_ELEM(sigma2, i)= " << DIRECT_A1D_ELEM(sigma2, i) << std::endl; REPORT_ERROR("BackProjector::reconstruct: ERROR: unexpectedly small, yet non-zero sigma2 value, this should not happen...a"); } } tau2.reshape(ori_size/2 + 1); data_vs_prior.initZeros(ori_size/2 + 1); fourier_coverage.initZeros(ori_size/2 + 1); counter.initZeros(ori_size/2 + 1); if (update_tau2_with_fsc) { // Then calculate new tau2 values, based on the FSC if (!fsc.sameShape(sigma2) || !fsc.sameShape(tau2)) { fsc.printShape(std::cerr); tau2.printShape(std::cerr); sigma2.printShape(std::cerr); REPORT_ERROR("ERROR BackProjector::reconstruct: sigma2, tau2 and fsc have different sizes"); } FOR_ALL_DIRECT_ELEMENTS_IN_ARRAY1D(sigma2) { // FSC cannot be negative or zero for conversion into tau2 RFLOAT myfsc = XMIPP_MAX(0.001, DIRECT_A1D_ELEM(fsc, i)); if (is_whole_instead_of_half) { // Factor two because of twice as many particles // Sqrt-term to get 60-degree phase errors.... myfsc = sqrt(2. * myfsc / (myfsc + 1.)); } myfsc = XMIPP_MIN(0.999, myfsc); RFLOAT myssnr = myfsc / (1. - myfsc); // Sjors 29nov2017 try tau2_fudge for pulling harder on Refine3D runs... myssnr *= tau2_fudge; RFLOAT fsc_based_tau = myssnr * DIRECT_A1D_ELEM(sigma2, i); DIRECT_A1D_ELEM(tau2, i) = fsc_based_tau; // data_vs_prior is merely for reporting: it is not used for anything in the reconstruction DIRECT_A1D_ELEM(data_vs_prior, i) = myssnr; } } // Now accumulate data_vs_prior if (!update_tau2_with_fsc) // Also accumulate fourier_coverage FOR_ALL_ELEMENTS_IN_ARRAY3D(weight) { int r2 = k * k + i * i + j * j; if (r2 < max_r2) { int ires = ROUND( sqrt((RFLOAT)r2) / padding_factor ); RFLOAT invw = A3D_ELEM(weight, k, i, j); RFLOAT invtau2; if (DIRECT_A1D_ELEM(tau2, ires) > 0.) { // Calculate inverse of tau2 invtau2 = 1. / (oversampling_correction * tau2_fudge * DIRECT_A1D_ELEM(tau2, ires)); } else if (DIRECT_A1D_ELEM(tau2, ires) == 0.) { // If tau2 is zero, use small value instead invtau2 = 1./ ( 0.001 * invw); } else { std::cerr << " sigma2= " << sigma2 << std::endl; std::cerr << " fsc= " << fsc << std::endl; std::cerr << " tau2= " << tau2 << std::endl; REPORT_ERROR("ERROR BackProjector::reconstruct: Negative or zero values encountered for tau2 spectrum!"); } // Keep track of spectral evidence-to-prior ratio and remaining noise in the reconstruction if (!update_tau2_with_fsc) { DIRECT_A1D_ELEM(data_vs_prior, ires) += invw / invtau2; } // Keep track of the coverage in Fourier space if (invw / invtau2 >= 1.) { DIRECT_A1D_ELEM(fourier_coverage, ires) += 1.; } DIRECT_A1D_ELEM(counter, ires) += 1.; } } // Average data_vs_prior if (!update_tau2_with_fsc) { FOR_ALL_DIRECT_ELEMENTS_IN_ARRAY1D(data_vs_prior) { if (i > r_max) DIRECT_A1D_ELEM(data_vs_prior, i) = 0.; else if (DIRECT_A1D_ELEM(counter, i) < 0.001) DIRECT_A1D_ELEM(data_vs_prior, i) = 999.; else DIRECT_A1D_ELEM(data_vs_prior, i) /= DIRECT_A1D_ELEM(counter, i); } } // Calculate Fourier coverage in each shell FOR_ALL_DIRECT_ELEMENTS_IN_ARRAY1D(fourier_coverage) { if (DIRECT_A1D_ELEM(counter, i) > 0.) DIRECT_A1D_ELEM(fourier_coverage, i) /= DIRECT_A1D_ELEM(counter, i); } // Send back the output tau2_io = tau2; sigma2_out = sigma2; data_vs_prior_out = data_vs_prior; fourier_coverage_out = fourier_coverage; } void BackProjector::externalReconstruct(MultidimArray &vol_out, FileName &fn_out, MultidimArray &fsc_halves_io, MultidimArray &tau2_io, MultidimArray &sigma2_ref, MultidimArray &data_vs_prior, bool is_whole_instead_of_half, RFLOAT tau2_fudge, int verb) { FileName fn_recons = fn_out+"_external_reconstruct.mrc"; FileName fn_star = fn_out+"_external_reconstruct.star"; FileName fn_out_star = fn_out+"_external_reconstruct_out.star"; MultidimArray fsc_halves = fsc_halves_io; MultidimArray tau2 = tau2_io; const int max_r2 = ROUND(r_max * padding_factor) * ROUND(r_max * padding_factor); int padoridim = ROUND(padding_factor * ori_size); // Write out data array Image Idata; if (ref_dim == 2) Idata().resize(pad_size, pad_size/2+1); else Idata().resize(pad_size, pad_size, pad_size/2+1); Projector::decenter(data, Idata(), max_r2); windowFourierTransform(Idata(), padoridim); ComplexIO::write(Idata(), fn_out+"_external_reconstruct_data", ".mrc"); Idata.clear(); // Write out weight array Image Iweight; if (ref_dim == 2) Iweight().resize(pad_size, pad_size/2+1); else Iweight().resize(pad_size, pad_size, pad_size/2+1); Projector::decenter(weight, Iweight(), max_r2); windowFourierTransform(Iweight(), padoridim); Iweight.write(fn_out+"_external_reconstruct_weight.mrc"); Iweight.clear(); // Write out STAR file for input to external reconstruction program MetaDataTable MDlist, MDtau; MDlist.setName("external_reconstruct_general"); MDlist.setIsList(true); MDlist.addObject(); MDlist.setValue(EMDL_OPTIMISER_EXTERNAL_RECONS_DATA_REAL, fn_out+"_external_reconstruct_data_real.mrc"); MDlist.setValue(EMDL_OPTIMISER_EXTERNAL_RECONS_DATA_IMAG, fn_out+"_external_reconstruct_data_imag.mrc"); MDlist.setValue(EMDL_OPTIMISER_EXTERNAL_RECONS_WEIGHT, fn_out+"_external_reconstruct_weight.mrc"); MDlist.setValue(EMDL_OPTIMISER_EXTERNAL_RECONS_RESULT, fn_recons); MDlist.setValue(EMDL_OPTIMISER_EXTERNAL_RECONS_NEWSTAR, fn_out_star); MDlist.setValue(EMDL_MLMODEL_TAU2_FUDGE_FACTOR, tau2_fudge); MDlist.setValue(EMDL_MLMODEL_PADDING_FACTOR, padding_factor); MDlist.setValue(EMDL_MLMODEL_DIMENSIONALITY, ref_dim); MDlist.setValue(EMDL_MLMODEL_ORIGINAL_SIZE, ori_size); MDlist.setValue(EMDL_MLMODEL_CURRENT_SIZE, 2*r_max); MDtau.setName("external_reconstruct_tau2"); for (int ii = 0; ii < XSIZE(tau2); ii++) { MDtau.addObject(); MDtau.setValue(EMDL_SPECTRAL_IDX, ii); MDtau.setValue(EMDL_MLMODEL_TAU2_REF, tau2(ii)); MDtau.setValue(EMDL_MLMODEL_FSC_HALVES_REF, fsc_halves(ii)); } std::ofstream fh; fh.open((fn_star).c_str(), std::ios::out); if (!fh) REPORT_ERROR( (std::string)"BackProjector::externalReconstruct: Cannot write file: " + fn_star); MDlist.write(fh); MDtau.write(fh); fh.close(); // Make the system call: program name plus the STAR file for the external reconstruction program as its first argument char *my_exec = getenv ("RELION_EXTERNAL_RECONSTRUCT_EXECUTABLE"); char default_exec[]=DEFAULT_EXTERNAL_RECONSTRUCT; if (my_exec == NULL) { my_exec = default_exec; } std::string command = std::string(my_exec) + " " + fn_star; if (verb > 0) std::cout << std::endl << " + Making system call for external reconstruction: " << command << std::endl; int res = system(command.c_str()); if (res) REPORT_ERROR(" ERROR: there was something wrong with system call: " + command); else if (verb > 0) std::cout << " + External reconstruction finished successfully, reading result back in ... " << std::endl; // Read the resulting map back into memory Iweight.read(fn_recons); vol_out = Iweight(); vol_out.setXmippOrigin(); if (exists(fn_out_star)) { MetaDataTable MDnewtau; MDnewtau.read(fn_out_star); if (!MDnewtau.containsLabel(EMDL_SPECTRAL_IDX)) REPORT_ERROR("ERROR: external reconstruct output STAR file does not contain spectral idx!"); // Directly update tau2 spectrum int idx; FOR_ALL_OBJECTS_IN_METADATA_TABLE(MDnewtau) { MDnewtau.getValue(EMDL_SPECTRAL_IDX, idx); if (idx >= XSIZE(tau2_io)) continue; if (MDnewtau.containsLabel(EMDL_MLMODEL_TAU2_REF)) { MDnewtau.getValue(EMDL_MLMODEL_TAU2_REF, tau2_io(idx)); data_vs_prior(idx) = tau2_io(idx) / sigma2_ref(idx); } else if (MDnewtau.containsLabel(EMDL_POSTPROCESS_FSC_GENERAL)) { MDnewtau.getValue(EMDL_SPECTRAL_IDX, idx); MDnewtau.getValue(EMDL_POSTPROCESS_FSC_GENERAL, fsc_halves_io(idx)); RFLOAT myfsc = XMIPP_MAX(0.001, fsc_halves_io(idx)); if (is_whole_instead_of_half) { // Factor two because of twice as many particles // Sqrt-term to get 60-degree phase errors.... myfsc = sqrt(2. * myfsc / (myfsc + 1.)); } myfsc = XMIPP_MIN(0.999, myfsc); RFLOAT myssnr = myfsc / (1. - myfsc); myssnr *= tau2_fudge; tau2_io(idx) = myssnr * sigma2_ref(idx); data_vs_prior(idx) = myssnr; } else { REPORT_ERROR("ERROR: output STAR file from external reconstruct does not contain tau2 or FSC array"); } } if (verb > 0) std::cout << " + External reconstruction successfully updated external tau2 array ... " << std::endl; } } void BackProjector::reconstruct(MultidimArray &vol_out, int max_iter_preweight, bool do_map, const MultidimArray &tau2, RFLOAT tau2_fudge, RFLOAT normalise, int minres_map, bool printTimes, Image* weight_out) { #ifdef TIMING Timer ReconTimer; int ReconS_1 = ReconTimer.setNew(" RcS1_Init "); int ReconS_2 = ReconTimer.setNew(" RcS2_Shape&Noise "); int ReconS_2_5 = ReconTimer.setNew(" RcS2.5_Regularize "); int ReconS_3 = ReconTimer.setNew(" RcS3_skipGridding "); int ReconS_4 = ReconTimer.setNew(" RcS4_doGridding_norm "); int ReconS_5 = ReconTimer.setNew(" RcS5_doGridding_init "); int ReconS_6 = ReconTimer.setNew(" RcS6_doGridding_iter "); int ReconS_7 = ReconTimer.setNew(" RcS7_doGridding_apply "); int ReconS_8 = ReconTimer.setNew(" RcS8_blobConvolute "); int ReconS_9 = ReconTimer.setNew(" RcS9_blobResize "); int ReconS_10 = ReconTimer.setNew(" RcS10_blobSetReal "); int ReconS_11 = ReconTimer.setNew(" RcS11_blobSetTemp "); int ReconS_12 = ReconTimer.setNew(" RcS12_blobTransform "); int ReconS_13 = ReconTimer.setNew(" RcS13_blobCenterFFT "); int ReconS_14 = ReconTimer.setNew(" RcS14_blobNorm1 "); int ReconS_15 = ReconTimer.setNew(" RcS15_blobSoftMask "); int ReconS_16 = ReconTimer.setNew(" RcS16_blobNorm2 "); int ReconS_17 = ReconTimer.setNew(" RcS17_WindowReal "); int ReconS_18 = ReconTimer.setNew(" RcS18_GriddingCorrect "); int ReconS_19 = ReconTimer.setNew(" RcS19_tauInit "); int ReconS_20 = ReconTimer.setNew(" RcS20_tausetReal "); int ReconS_21 = ReconTimer.setNew(" RcS21_tauTransform "); int ReconS_22 = ReconTimer.setNew(" RcS22_tautauRest "); int ReconS_23 = ReconTimer.setNew(" RcS23_tauShrinkToFit "); int ReconS_24 = ReconTimer.setNew(" RcS24_extra "); #endif RCTIC(ReconTimer,ReconS_1); const int max_r2 = ROUND(r_max * padding_factor) * ROUND(r_max * padding_factor); RFLOAT oversampling_correction = (ref_dim == 3) ? (padding_factor * padding_factor * padding_factor) : (padding_factor * padding_factor); //#define DEBUG_RECONSTRUCT #ifdef DEBUG_RECONSTRUCT Image ttt; FileName fnttt; ttt()=weight; ttt.write("reconstruct_initial_weight.spi"); std::cerr << " pad_size= " << pad_size << " padding_factor= " << padding_factor << " max_r2= " << max_r2 << std::endl; #endif // Set Fconv to the right size if (ref_dim == 2) vol_out.setDimensions(pad_size, pad_size, 1, 1); else // Too costly to actually allocate the space // Trick transformer with the right dimensions vol_out.setDimensions(pad_size, pad_size, pad_size, 1); FourierTransformer transformer; transformer.setReal(vol_out); // Fake set real. 1. Allocate space for Fconv 2. calculate plans. MultidimArray& Fconv = transformer.getFourierReference(); vol_out.clear(); // Reset dimensions to 0 RCTOC(ReconTimer,ReconS_1); RCTIC(ReconTimer,ReconS_2); // Go from projector-centered to FFTW-uncentered MultidimArray Fweight; Fweight.reshape(Fconv); Projector::decenter(weight, Fweight, max_r2); RCTOC(ReconTimer,ReconS_2); RCTIC(ReconTimer,ReconS_2_5); // Apply MAP-additional term to the Fweight array // This will regularise the actual reconstruction if (do_map) { // Then, add the inverse of tau2-spectrum values to the weight FOR_ALL_ELEMENTS_IN_FFTW_TRANSFORM(Fconv) { int r2 = kp * kp + ip * ip + jp * jp; if (r2 < max_r2) { int ires = ROUND(sqrt((RFLOAT)r2) / padding_factor); RFLOAT invw = DIRECT_A3D_ELEM(Fweight, k, i, j); RFLOAT invtau2; if (DIRECT_A1D_ELEM(tau2, ires) > 0.) { // Calculate inverse of tau2 invtau2 = 1. / (oversampling_correction * tau2_fudge * DIRECT_A1D_ELEM(tau2, ires)); } else if (DIRECT_A1D_ELEM(tau2, ires) < 1e-20) { // If tau2 is zero, use small value instead if (invw > 1e-20) invtau2 = 1./ ( 0.001 * invw); else invtau2 = 0.; } else { std::cerr << " tau2= " << tau2 << std::endl; REPORT_ERROR("ERROR BackProjector::reconstruct: Negative or zero values encountered for tau2 spectrum!"); } // Only for (ires >= minres_map) add Wiener-filter like term if (ires >= minres_map) { // Now add the inverse-of-tau2_class term invw += invtau2; // Store the new weight again in Fweight DIRECT_A3D_ELEM(Fweight, k, i, j) = invw; } } } } //end if do_map RCTOC(ReconTimer,ReconS_2_5); if (skip_gridding) { RCTIC(ReconTimer,ReconS_3); Fconv.initZeros(); // to remove any stuff from the input volume Projector::decenter(data, Fconv, max_r2); // Prevent divisions by zero: set Fweight to at least 1/1000th of the radially averaged weight at that resolution // beyond r_max, set Fweight to at least 1/1000th of the radially averaged weight at r_max; MultidimArray radavg_weight(r_max), counter(r_max); const int round_max_r2 = ROUND(r_max * padding_factor * r_max * padding_factor); FOR_ALL_ELEMENTS_IN_FFTW_TRANSFORM(Fweight) { const int r2 = kp * kp + ip * ip + jp * jp; // Note that (r < ires) != (r2 < max_r2), because max_r2 = ROUND(r_max * padding_factor)^2. // We have to use round_max_r2 = ROUND((r_max * padding_factor)^2). // e.g. k = 0, i = 7, j = 28, max_r2 = 841, r_max = 16, padding_factor = 18. if (r2 < round_max_r2) { const int ires = FLOOR(sqrt((RFLOAT)r2) / padding_factor); if (ires >= XSIZE(radavg_weight)) { std::cerr << " k= " << k << " i= " << i << " j= " << j << std::endl; std::cerr << " ires= " << ires << " XSIZE(radavg_weight)= " << XSIZE(radavg_weight) << std::endl; REPORT_ERROR("BUG: ires >=XSIZE(radavg_weight) "); } DIRECT_A1D_ELEM(radavg_weight, ires) += DIRECT_A3D_ELEM(Fweight, k, i, j); DIRECT_A1D_ELEM(counter, ires) += 1.; } } // Calculate 1/1000th of radial averaged weight FOR_ALL_DIRECT_ELEMENTS_IN_ARRAY1D(radavg_weight) { if (DIRECT_A1D_ELEM(counter, i) > 0. || DIRECT_A1D_ELEM(radavg_weight, i) > 0.) { DIRECT_A1D_ELEM(radavg_weight, i) /= 1000.* DIRECT_A1D_ELEM(counter, i); } else { std::cerr << " counter= " << counter << std::endl; std::cerr << " radavg_weight= " << radavg_weight << std::endl; REPORT_ERROR("BUG: zeros in counter or radavg_weight!"); } } bool have_warned = false; // perform XMIPP_MAX on all weight elements, and do division of data/weight FOR_ALL_ELEMENTS_IN_FFTW_TRANSFORM(Fweight) { const int r2 = kp * kp + ip * ip + jp * jp; const int ires = FLOOR(sqrt((RFLOAT)r2) / padding_factor); const RFLOAT weight = XMIPP_MAX(DIRECT_A3D_ELEM(Fweight, k, i, j), DIRECT_A1D_ELEM(radavg_weight, (ires < r_max) ? ires : (r_max - 1))); if (weight == 0) { if (!have_warned) { std::cerr << " WARNING: ignoring divide by zero in skip_gridding: ires = " << ires << " kp = " << kp << " ip = " << ip << " jp = " << jp << std::endl; std::cerr << " max_r2 = " << max_r2 << " r_max = " << r_max << " padding_factor = " << padding_factor << " ROUND(sqrt(max_r2)) = " << ROUND(sqrt(max_r2)) << " ROUND(r_max * padding_factor) = " << ROUND(r_max * padding_factor) << std::endl; have_warned = true; } } else { DIRECT_A3D_ELEM(Fconv, k, i, j) /= weight; } } } else { RCTIC(ReconTimer,ReconS_4); // Divide both data and Fweight by normalisation factor to prevent FFT's with very large values.... #ifdef DEBUG_RECONSTRUCT std::cerr << " normalise= " << normalise << std::endl; #endif FOR_ALL_DIRECT_ELEMENTS_IN_MULTIDIMARRAY(Fweight) { DIRECT_MULTIDIM_ELEM(Fweight, n) /= normalise; } FOR_ALL_DIRECT_ELEMENTS_IN_MULTIDIMARRAY(data) { DIRECT_MULTIDIM_ELEM(data, n) /= normalise; } RCTOC(ReconTimer,ReconS_4); RCTIC(ReconTimer,ReconS_5); // Initialise Fnewweight with 1's and 0's. (also see comments below) FOR_ALL_ELEMENTS_IN_ARRAY3D(weight) { if (k * k + i * i + j * j < max_r2) A3D_ELEM(weight, k, i, j) = 1.; else A3D_ELEM(weight, k, i, j) = 0.; } // Fnewweight can become too large for a float: always keep this one in double-precision MultidimArray Fnewweight; Fnewweight.reshape(Fconv); decenter(weight, Fnewweight, max_r2); RCTOC(ReconTimer,ReconS_5); // Iterative algorithm as in Eq. [14] in Pipe & Menon (1999) // or Eq. (4) in Matej (2001) for (int iter = 0; iter < max_iter_preweight; iter++) { //std::cout << " iteration " << (iter+1) << "/" << max_iter_preweight << "\n"; RCTIC(ReconTimer,ReconS_6); // Set Fnewweight * Fweight in the transformer // In Matej et al (2001), weights w_P^i are convoluted with the kernel, // and the initial w_P^0 are 1 at each sampling point // Here the initial weights are also 1 (see initialisation Fnewweight above), // but each "sampling point" counts "Fweight" times! // That is why Fnewweight is multiplied by Fweight prior to the convolution FOR_ALL_DIRECT_ELEMENTS_IN_MULTIDIMARRAY(Fconv) { DIRECT_MULTIDIM_ELEM(Fconv, n) = DIRECT_MULTIDIM_ELEM(Fnewweight, n) * DIRECT_MULTIDIM_ELEM(Fweight, n); } // convolute through Fourier-transform (as both grids are rectangular) // Note that convoluteRealSpace acts on the complex array inside the transformer convoluteBlobRealSpace(transformer, false); RFLOAT w, corr_min = LARGE_NUMBER, corr_max = -LARGE_NUMBER, corr_avg=0., corr_nn=0.; FOR_ALL_ELEMENTS_IN_FFTW_TRANSFORM(Fconv) { if (kp * kp + ip * ip + jp * jp < max_r2) { // Make sure no division by zero can occur.... w = XMIPP_MAX(1e-6, abs(DIRECT_A3D_ELEM(Fconv, k, i, j))); // Monitor min, max and avg conv_weight corr_min = XMIPP_MIN(corr_min, w); corr_max = XMIPP_MAX(corr_max, w); corr_avg += w; corr_nn += 1.; // Apply division of Eq. [14] in Pipe & Menon (1999) DIRECT_A3D_ELEM(Fnewweight, k, i, j) /= w; } } RCTOC(ReconTimer,ReconS_6); #ifdef DEBUG_RECONSTRUCT std::cerr << " PREWEIGHTING ITERATION: "<< iter + 1 << " OF " << max_iter_preweight << std::endl; // report of maximum and minimum values of current conv_weight std::cerr << " corr_avg= " << corr_avg / corr_nn << std::endl; std::cerr << " corr_min= " << corr_min << std::endl; std::cerr << " corr_max= " << corr_max << std::endl; #endif } RCTIC(ReconTimer,ReconS_7); #ifdef DEBUG_RECONSTRUCT Image tttt; tttt()=Fnewweight; tttt.write("reconstruct_gridding_weight.spi"); FOR_ALL_DIRECT_ELEMENTS_IN_MULTIDIMARRAY(Fconv) { DIRECT_MULTIDIM_ELEM(ttt(), n) = abs(DIRECT_MULTIDIM_ELEM(Fconv, n)); } ttt.write("reconstruct_gridding_correction_term.spi"); #endif // Clear memory Fweight.clear(); // Note that Fnewweight now holds the approximation of the inverse of the weights on a regular grid // Now do the actual reconstruction with the data array // Apply the iteratively determined weight Fconv.initZeros(); // to remove any stuff from the input volume Projector::decenter(data, Fconv, max_r2); FOR_ALL_DIRECT_ELEMENTS_IN_MULTIDIMARRAY(Fconv) { #ifdef RELION_SINGLE_PRECISION // Prevent numerical instabilities in single-precision reconstruction with very unevenly sampled orientations if (DIRECT_MULTIDIM_ELEM(Fnewweight, n) > 1e20) DIRECT_MULTIDIM_ELEM(Fnewweight, n) = 1e20; #endif DIRECT_MULTIDIM_ELEM(Fconv, n) *= DIRECT_MULTIDIM_ELEM(Fnewweight, n); } // Clear memory Fnewweight.clear(); RCTOC(ReconTimer,ReconS_7); } // end if !skip_gridding // Gridding theory says one now has to interpolate the fine grid onto the coarse one using a blob kernel // and then do the inverse transform and divide by the FT of the blob (i.e. do the gridding correction) // In practice, this gives all types of artefacts (perhaps I never found the right implementation?!) // Therefore, window the Fourier transform and then do the inverse transform //#define RECONSTRUCT_CONVOLUTE_BLOB #ifdef RECONSTRUCT_CONVOLUTE_BLOB // Apply the same blob-convolution as above to the data array // Mask real-space map beyond its original size to prevent aliasing in the downsampling step below RCTIC(ReconTimer,ReconS_8); convoluteBlobRealSpace(transformer, true); RCTOC(ReconTimer,ReconS_8); RCTIC(ReconTimer,ReconS_9); // Now just pick every 3rd pixel in Fourier-space (i.e. down-sample) // and do a final inverse FT if (ref_dim == 2) vol_out.resize(ori_size, ori_size); else vol_out.resize(ori_size, ori_size, ori_size); RCTOC(ReconTimer,ReconS_9); RCTIC(ReconTimer,ReconS_10); FourierTransformer transformer2; MultidimArray Ftmp; transformer2.setReal(vol_out); // cannot use the first transformer because Fconv is inside there!! transformer2.getFourierAlias(Ftmp); RCTOC(ReconTimer,ReconS_10); RCTIC(ReconTimer,ReconS_11); FOR_ALL_ELEMENTS_IN_FFTW_TRANSFORM(Ftmp) { if (kp * kp + ip * ip + jp * jp < r_max * r_max) { DIRECT_A3D_ELEM(Ftmp, k, i, j) = FFTW_ELEM(Fconv, kp * padding_factor, ip * padding_factor, jp * padding_factor); } else { DIRECT_A3D_ELEM(Ftmp, k, i, j) = 0.; } } RCTOC(ReconTimer,ReconS_11); RCTIC(ReconTimer,ReconS_13); CenterFFTbySign(Ftmp); RCTOC(ReconTimer,ReconS_13); RCTIC(ReconTimer,ReconS_12); // inverse FFT leaves result in vol_out transformer2.inverseFourierTransform(); RCTOC(ReconTimer,ReconS_12); RCTIC(ReconTimer,ReconS_14); // Un-normalize FFTW (because original FFTs were done with the size of 2D FFTs) if (ref_dim==3) vol_out /= ori_size; RCTOC(ReconTimer,ReconS_14); RCTIC(ReconTimer,ReconS_15); // Mask out corners to prevent aliasing artefacts softMaskOutsideMap(vol_out); RCTOC(ReconTimer,ReconS_15); RCTIC(ReconTimer,ReconS_16); // Gridding correction for the blob RFLOAT normftblob = tab_ftblob(0.); FOR_ALL_ELEMENTS_IN_ARRAY3D(vol_out) { RFLOAT r = sqrt((RFLOAT)(k*k+i*i+j*j)); RFLOAT rval = r / (ori_size * padding_factor); A3D_ELEM(vol_out, k, i, j) /= tab_ftblob(rval) / normftblob; //if (k==0 && i==0) // std::cerr << " j= " << j << " rval= " << rval << " tab_ftblob(rval) / normftblob= " << tab_ftblob(rval) / normftblob << std::endl; } RCTOC(ReconTimer,ReconS_16); #else // rather than doing the blob-convolution to downsample the data array, do a windowing operation: // This is the same as convolution with a SINC. It seems to give better maps. // Then just make the blob look as much as a SINC as possible.... // The "standard" r1.9, m2 and a15 blob looks quite like a sinc until the first zero (perhaps that's why it is standard?) //for (RFLOAT r = 0.1; r < 10.; r+=0.01) //{ // RFLOAT sinc = sin(PI * r / padding_factor ) / ( PI * r / padding_factor); // std::cout << " r= " << r << " sinc= " << sinc << " blob= " << blob_val(r, blob) << std::endl; //} // Now do inverse FFT and window to original size in real-space // Pass the transformer to prevent making and clearing a new one before clearing the one declared above.... // The latter may give memory problems as detected by electric fence.... RCTIC(ReconTimer,ReconS_17); windowToOridimRealSpace(transformer, vol_out, printTimes); RCTOC(ReconTimer,ReconS_17); #endif #ifdef DEBUG_RECONSTRUCT ttt()=vol_out; ttt.write("reconstruct_before_gridding_correction.spi"); #endif // Correct for the linear/nearest-neighbour interpolation that led to the data array RCTIC(ReconTimer,ReconS_18); griddingCorrect(vol_out); RCTOC(ReconTimer,ReconS_18); RCTIC(ReconTimer,ReconS_23); // Completely empty the transformer object transformer.cleanup(); // Now can use extra mem to move data into smaller array space vol_out.shrinkToFit(); RCTOC(ReconTimer,ReconS_23); #ifdef TIMING if(printTimes) ReconTimer.printTimes(true); #endif #ifdef DEBUG_RECONSTRUCT std::cerr<<"done with reconstruct"<data = MultidimArray(1, ori_size, ori_size, ori_size/2+1); Image count(ori_size/2+1, ori_size, ori_size); count.data.initZeros(); // downsample while considering padding: for (long int z = 0; z < Fweight.zdim; z++) for (long int y = 0; y < Fweight.ydim; y++) for (long int x = 0; x < Fweight.xdim; x++) { int xl = x; int yl = y < Fweight.ydim/2? y : y - Fweight.ydim; int zl = z < Fweight.zdim/2? z : z - Fweight.zdim; if (xl == Fweight.xdim - 1 || yl == Fweight.ydim/2 || yl == -Fweight.ydim/2 - 1 || zl == Fweight.zdim/2 || zl == -Fweight.zdim/2 - 1) { continue; } int xx = ROUND(xl / padding_factor); int yy = (ROUND(yl / padding_factor) + ori_size) % ori_size; int zz = (ROUND(zl / padding_factor) + ori_size) % ori_size; if (xx >= 0 && xx < ori_size/2+1 && yy >= 0 && yy < ori_size && zz >= 0 && zz < ori_size) { DIRECT_A3D_ELEM(weight_out->data, zz, yy, xx) += DIRECT_A3D_ELEM(Fweight, z, y, x); DIRECT_A3D_ELEM(count.data, zz, yy, xx) += 1.0; } } const double pad3 = padding_factor * padding_factor * padding_factor; for (long int z = 0; z < ori_size; z++) for (long int y = 0; y < ori_size; y++) for (long int x = 0; x < ori_size/2 + 1; x++) { const RFLOAT c = DIRECT_A3D_ELEM(count.data, z, y, x); if (c > 0.0) { DIRECT_A3D_ELEM(weight_out->data, z, y, x) *= pad3/c; } } } } void BackProjector::symmetrise(int nr_helical_asu, RFLOAT helical_twist, RFLOAT helical_rise, int threads) { // First make sure the input arrays are obeying Hermitian symmetry, // which is assumed in the rotation operators of both helical and point group symmetry enforceHermitianSymmetry(); // Then apply helical and point group symmetry (order irrelevant?) applyHelicalSymmetry(nr_helical_asu, helical_twist, helical_rise); applyPointGroupSymmetry(threads); } void BackProjector::enforceHermitianSymmetry() { for (int iz = STARTINGZ(data); iz <=FINISHINGZ(data); iz++) { // Make sure all points are only included once. int starty = (iz < 0) ? 0 : 1; for (int iy = starty; iy <= FINISHINGY(data); iy++) { // I just need to sum the two points, not divide by 2! Complex fsum = (A3D_ELEM(data, iz, iy, 0) + conj(A3D_ELEM(data, -iz, -iy, 0))); A3D_ELEM(data, iz, iy, 0) = fsum; A3D_ELEM(data, -iz, -iy, 0) = conj(fsum); RFLOAT sum = (A3D_ELEM(weight, iz, iy, 0) + A3D_ELEM(weight, -iz, -iy, 0)); A3D_ELEM(weight, iz, iy, 0) = sum; A3D_ELEM(weight, -iz, -iy, 0) = sum; } } } void BackProjector::applyHelicalSymmetry(int nr_helical_asu, RFLOAT helical_twist, RFLOAT helical_rise) { if ( (nr_helical_asu < 2) || (ref_dim != 3) ) return; int rmax2 = ROUND(r_max * padding_factor) * ROUND(r_max * padding_factor); Matrix2D R(4, 4); // A matrix from the list MultidimArray sum_weight; MultidimArray sum_data; RFLOAT x, y, z, fx, fy, fz, xp, yp, zp, r2; bool is_neg_x; int x0, x1, y0, y1, z0, z1; Complex d000, d001, d010, d011, d100, d101, d110, d111; Complex dx00, dx01, dx10, dx11, dxy0, dxy1, ddd; RFLOAT dd000, dd001, dd010, dd011, dd100, dd101, dd110, dd111; RFLOAT ddx00, ddx01, ddx10, ddx11, ddxy0, ddxy1; // First symmetry operator (not stored in SL) is the identity matrix sum_weight = weight; sum_data = data; int h_min = -nr_helical_asu/2; int h_max = -h_min + nr_helical_asu%2; for (int hh = h_min; hh < h_max; hh++) { if (hh != 0) // h==0 is done before the for loop (where sum_data = data) { RFLOAT rot_ang = hh * (-helical_twist); rotation3DMatrix(rot_ang, 'Z', R); R.setSmallValuesToZero(); // TODO: invert rotation matrix? // Loop over all points in the output (i.e. rotated, or summed) array FOR_ALL_ELEMENTS_IN_ARRAY3D(sum_weight) { x = (RFLOAT)j; // STARTINGX(sum_weight) is zero! y = (RFLOAT)i; z = (RFLOAT)k; r2 = x*x + y*y + z*z; if (r2 <= rmax2) { // coords_output(x,y) = A * coords_input (xp,yp) xp = x * R(0, 0) + y * R(0, 1) + z * R(0, 2); yp = x * R(1, 0) + y * R(1, 1) + z * R(1, 2); zp = x * R(2, 0) + y * R(2, 1) + z * R(2, 2); // Only asymmetric half is stored if (xp < 0) { // Get complex conjugated hermitian symmetry pair xp = -xp; yp = -yp; zp = -zp; is_neg_x = true; } else { is_neg_x = false; } // Trilinear interpolation (with physical coords) // Subtract STARTINGY and STARTINGZ to accelerate access to data (STARTINGX=0) // In that way use DIRECT_A3D_ELEM, rather than A3D_ELEM x0 = FLOOR(xp); fx = xp - x0; x1 = x0 + 1; y0 = FLOOR(yp); fy = yp - y0; y0 -= STARTINGY(data); y1 = y0 + 1; z0 = FLOOR(zp); fz = zp - z0; z0 -= STARTINGZ(data); z1 = z0 + 1; #ifdef CHECK_SIZE if (x0 < 0 || y0 < 0 || z0 < 0 || x1 < 0 || y1 < 0 || z1 < 0 || x0 >= XSIZE(data) || y0 >= YSIZE(data) || z0 >= ZSIZE(data) || x1 >= XSIZE(data) || y1 >= YSIZE(data) || z1 >= ZSIZE(data) ) { std::cerr << " x0= " << x0 << " y0= " << y0 << " z0= " << z0 << std::endl; std::cerr << " x1= " << x1 << " y1= " << y1 << " z1= " << z1 << std::endl; data.printShape(); REPORT_ERROR("BackProjector::applyPointGroupSymmetry: checksize!!!"); } #endif // First interpolate (complex) data d000 = DIRECT_A3D_ELEM(data, z0, y0, x0); d001 = DIRECT_A3D_ELEM(data, z0, y0, x1); d010 = DIRECT_A3D_ELEM(data, z0, y1, x0); d011 = DIRECT_A3D_ELEM(data, z0, y1, x1); d100 = DIRECT_A3D_ELEM(data, z1, y0, x0); d101 = DIRECT_A3D_ELEM(data, z1, y0, x1); d110 = DIRECT_A3D_ELEM(data, z1, y1, x0); d111 = DIRECT_A3D_ELEM(data, z1, y1, x1); dx00 = LIN_INTERP(fx, d000, d001); dx01 = LIN_INTERP(fx, d100, d101); dx10 = LIN_INTERP(fx, d010, d011); dx11 = LIN_INTERP(fx, d110, d111); dxy0 = LIN_INTERP(fy, dx00, dx10); dxy1 = LIN_INTERP(fy, dx01, dx11); // Take complex conjugated for half with negative x ddd = LIN_INTERP(fz, dxy0, dxy1); if (is_neg_x) ddd = conj(ddd); // Also apply a phase shift for helical translation along Z if (ABS(helical_rise) > 0.) { RFLOAT zshift = hh * helical_rise; zshift /= - ori_size * (RFLOAT)padding_factor; RFLOAT dotp = 2 * PI * (z * zshift); RFLOAT a = cos(dotp); RFLOAT b = sin(dotp); RFLOAT c = ddd.real; RFLOAT d = ddd.imag; RFLOAT ac = a * c; RFLOAT bd = b * d; RFLOAT ab_cd = (a + b) * (c + d); ddd = Complex(ac - bd, ab_cd - ac - bd); } // Accumulated sum of the data term A3D_ELEM(sum_data, k, i, j) += ddd; // Then interpolate (real) weight dd000 = DIRECT_A3D_ELEM(weight, z0, y0, x0); dd001 = DIRECT_A3D_ELEM(weight, z0, y0, x1); dd010 = DIRECT_A3D_ELEM(weight, z0, y1, x0); dd011 = DIRECT_A3D_ELEM(weight, z0, y1, x1); dd100 = DIRECT_A3D_ELEM(weight, z1, y0, x0); dd101 = DIRECT_A3D_ELEM(weight, z1, y0, x1); dd110 = DIRECT_A3D_ELEM(weight, z1, y1, x0); dd111 = DIRECT_A3D_ELEM(weight, z1, y1, x1); ddx00 = LIN_INTERP(fx, dd000, dd001); ddx01 = LIN_INTERP(fx, dd100, dd101); ddx10 = LIN_INTERP(fx, dd010, dd011); ddx11 = LIN_INTERP(fx, dd110, dd111); ddxy0 = LIN_INTERP(fy, ddx00, ddx10); ddxy1 = LIN_INTERP(fy, ddx01, ddx11); A3D_ELEM(sum_weight, k, i, j) += LIN_INTERP(fz, ddxy0, ddxy1); } // end if r2 <= rmax2 } // end loop over all elements of sum_weight } // end if hh!=0 } // end loop over hh data = sum_data; weight = sum_weight; } void BackProjector::applyPointGroupSymmetry(int threads) { //#define DEBUG_SYMM #ifdef DEBUG_SYMM std::cerr << " SL.SymsNo()= " << SL.SymsNo() << std::endl; std::cerr << " SL.true_symNo= " << SL.true_symNo << std::endl; #endif int rmax2 = ROUND(r_max * padding_factor) * ROUND(r_max * padding_factor); if (SL.SymsNo() > 0 && ref_dim == 3) { Matrix2D L(4, 4), R(4, 4); // A matrix from the list MultidimArray sum_weight; MultidimArray sum_data; // First symmetry operator (not stored in SL) is the identity matrix sum_weight = weight; sum_data = data; // Loop over all other symmetry operators for (int isym = 0; isym < SL.SymsNo(); isym++) { SL.get_matrices(isym, L, R); #ifdef DEBUG_SYMM std::cerr << " isym= " << isym << " R= " << R << std::endl; #endif // Loop over all points in the output (i.e. rotated, or summed) array #pragma omp parallel for num_threads(threads) for (long int k=STARTINGZ(sum_weight); k<=FINISHINGZ(sum_weight); k++) for (long int i=STARTINGY(sum_weight); i<=FINISHINGY(sum_weight); i++) for (long int j=STARTINGX(sum_weight); j<=FINISHINGX(sum_weight); j++) { RFLOAT x = (RFLOAT)j; // STARTINGX(sum_weight) is zero! RFLOAT y = (RFLOAT)i; RFLOAT z = (RFLOAT)k; RFLOAT r2 = x*x + y*y + z*z; if (r2 <= rmax2) { // coords_output(x,y) = A * coords_input (xp,yp) RFLOAT xp = x * R(0, 0) + y * R(0, 1) + z * R(0, 2); RFLOAT yp = x * R(1, 0) + y * R(1, 1) + z * R(1, 2); RFLOAT zp = x * R(2, 0) + y * R(2, 1) + z * R(2, 2); bool is_neg_x; // Only asymmetric half is stored if (xp < 0) { // Get complex conjugated hermitian symmetry pair xp = -xp; yp = -yp; zp = -zp; is_neg_x = true; } else { is_neg_x = false; } // Trilinear interpolation (with physical coords) // Subtract STARTINGY and STARTINGZ to accelerate access to data (STARTINGX=0) // In that way use DIRECT_A3D_ELEM, rather than A3D_ELEM int x0 = FLOOR(xp); RFLOAT fx = xp - x0; int x1 = x0 + 1; int y0 = FLOOR(yp); RFLOAT fy = yp - y0; y0 -= STARTINGY(data); int y1 = y0 + 1; int z0 = FLOOR(zp); RFLOAT fz = zp - z0; z0 -= STARTINGZ(data); int z1 = z0 + 1; #ifdef CHECK_SIZE if (x0 < 0 || y0 < 0 || z0 < 0 || x1 < 0 || y1 < 0 || z1 < 0 || x0 >= XSIZE(data) || y0 >= YSIZE(data) || z0 >= ZSIZE(data) || x1 >= XSIZE(data) || y1 >= YSIZE(data) || z1 >= ZSIZE(data) ) { std::cerr << " x0= " << x0 << " y0= " << y0 << " z0= " << z0 << std::endl; std::cerr << " x1= " << x1 << " y1= " << y1 << " z1= " << z1 << std::endl; data.printShape(); REPORT_ERROR("BackProjector::applyPointGroupSymmetry: checksize!!!"); } #endif // First interpolate (complex) data Complex d000 = DIRECT_A3D_ELEM(data, z0, y0, x0); Complex d001 = DIRECT_A3D_ELEM(data, z0, y0, x1); Complex d010 = DIRECT_A3D_ELEM(data, z0, y1, x0); Complex d011 = DIRECT_A3D_ELEM(data, z0, y1, x1); Complex d100 = DIRECT_A3D_ELEM(data, z1, y0, x0); Complex d101 = DIRECT_A3D_ELEM(data, z1, y0, x1); Complex d110 = DIRECT_A3D_ELEM(data, z1, y1, x0); Complex d111 = DIRECT_A3D_ELEM(data, z1, y1, x1); Complex dx00 = LIN_INTERP(fx, d000, d001); Complex dx01 = LIN_INTERP(fx, d100, d101); Complex dx10 = LIN_INTERP(fx, d010, d011); Complex dx11 = LIN_INTERP(fx, d110, d111); Complex dxy0 = LIN_INTERP(fy, dx00, dx10); Complex dxy1 = LIN_INTERP(fy, dx01, dx11); // Take complex conjugated for half with negative x if (is_neg_x) { A3D_ELEM(sum_data, k, i, j) += conj(LIN_INTERP(fz, dxy0, dxy1)); } else { A3D_ELEM(sum_data, k, i, j) += LIN_INTERP(fz, dxy0, dxy1); } // Then interpolate (real) weight RFLOAT dd000 = DIRECT_A3D_ELEM(weight, z0, y0, x0); RFLOAT dd001 = DIRECT_A3D_ELEM(weight, z0, y0, x1); RFLOAT dd010 = DIRECT_A3D_ELEM(weight, z0, y1, x0); RFLOAT dd011 = DIRECT_A3D_ELEM(weight, z0, y1, x1); RFLOAT dd100 = DIRECT_A3D_ELEM(weight, z1, y0, x0); RFLOAT dd101 = DIRECT_A3D_ELEM(weight, z1, y0, x1); RFLOAT dd110 = DIRECT_A3D_ELEM(weight, z1, y1, x0); RFLOAT dd111 = DIRECT_A3D_ELEM(weight, z1, y1, x1); RFLOAT ddx00 = LIN_INTERP(fx, dd000, dd001); RFLOAT ddx01 = LIN_INTERP(fx, dd100, dd101); RFLOAT ddx10 = LIN_INTERP(fx, dd010, dd011); RFLOAT ddx11 = LIN_INTERP(fx, dd110, dd111); RFLOAT ddxy0 = LIN_INTERP(fy, ddx00, ddx10); RFLOAT ddxy1 = LIN_INTERP(fy, ddx01, ddx11); A3D_ELEM(sum_weight, k, i, j) += LIN_INTERP(fz, ddxy0, ddxy1); } // end if r2 <= rmax2 } // end loop over all elements of sum_weight } // end loop over symmetry operators data = sum_data; weight = sum_weight; // Average // The division should only be done if we would search all (C1) directions, not if we restrict the angular search! /* FOR_ALL_DIRECT_ELEMENTS_IN_MULTIDIMARRAY(data) { DIRECT_MULTIDIM_ELEM(data, n) = DIRECT_MULTIDIM_ELEM(sum_data, n) / (RFLOAT)(SL.SymsNo() + 1); DIRECT_MULTIDIM_ELEM(weight, n) = DIRECT_MULTIDIM_ELEM(sum_weight, n) / (RFLOAT)(SL.SymsNo() + 1); } */ } } void BackProjector::convoluteBlobRealSpace(FourierTransformer &transformer, bool do_mask) { MultidimArray Mconv; int padhdim = pad_size / 2; // Set up right dimension of real-space array // TODO: resize this according to r_max!!! if (ref_dim==2) Mconv.reshape(pad_size, pad_size); else Mconv.reshape(pad_size, pad_size, pad_size); // inverse FFT transformer.setReal(Mconv); transformer.inverseFourierTransform(); // Blob normalisation in Fourier space RFLOAT normftblob = tab_ftblob(0.); // TMP DEBUGGING //struct blobtype blob; //blob.order = 0; //blob.radius = 1.9 * padding_factor; //blob.alpha = 15; // Multiply with FT of the blob kernel FOR_ALL_DIRECT_ELEMENTS_IN_ARRAY3D(Mconv) { int kp = (k < padhdim) ? k : k - pad_size; int ip = (i < padhdim) ? i : i - pad_size; int jp = (j < padhdim) ? j : j - pad_size; RFLOAT rval = sqrt ( (RFLOAT)(kp * kp + ip * ip + jp * jp) ) / (ori_size * padding_factor); //if (kp==0 && ip==0 && jp > 0) // std::cerr << " jp= " << jp << " rval= " << rval << " tab_ftblob(rval) / normftblob= " << tab_ftblob(rval) / normftblob << " ori_size/2= " << ori_size/2 << std::endl; // In the final reconstruction: mask the real-space map beyond its original size to prevent aliasing ghosts // Note that rval goes until 1/2 in the oversampled map if (do_mask && rval > 1./(2. * padding_factor)) DIRECT_A3D_ELEM(Mconv, k, i, j) = 0.; else DIRECT_A3D_ELEM(Mconv, k, i, j) *= (tab_ftblob(rval) / normftblob); } // forward FFT to go back to Fourier-space transformer.FourierTransform(); } void BackProjector::windowToOridimRealSpace(FourierTransformer &transformer, MultidimArray &Mout, bool printTimes) { #ifdef TIMING Timer OriDimTimer; int OriDim1 = OriDimTimer.setNew(" OrD1_getFourier "); int OriDim2 = OriDimTimer.setNew(" OrD2_windowFFT "); int OriDim3 = OriDimTimer.setNew(" OrD3_reshape "); int OriDim4 = OriDimTimer.setNew(" OrD4_setReal "); int OriDim5 = OriDimTimer.setNew(" OrD5_invFFT "); int OriDim6 = OriDimTimer.setNew(" OrD6_centerFFT "); int OriDim7 = OriDimTimer.setNew(" OrD7_window "); int OriDim8 = OriDimTimer.setNew(" OrD8_norm "); int OriDim9 = OriDimTimer.setNew(" OrD9_softMask "); #endif RCTIC(OriDimTimer,OriDim1); MultidimArray& Fin = transformer.getFourierReference(); RCTOC(OriDimTimer,OriDim1); RCTIC(OriDimTimer,OriDim2); MultidimArray Ftmp; // Size of padded real-space volume int padoridim = ROUND(padding_factor * ori_size); // make sure padoridim is even padoridim += padoridim%2; RFLOAT normfft; //#define DEBUG_WINDOWORIDIMREALSPACE #ifdef DEBUG_WINDOWORIDIMREALSPACE Image tt; tt().reshape(ZSIZE(Fin), YSIZE(Fin), XSIZE(Fin)); FOR_ALL_DIRECT_ELEMENTS_IN_MULTIDIMARRAY(Fin) { DIRECT_MULTIDIM_ELEM(tt(), n) = abs(DIRECT_MULTIDIM_ELEM(Fin, n)); } tt.write("windoworidim_Fin.spi"); #endif // Resize incoming complex array to the correct size windowFourierTransform(Fin, padoridim); RCTOC(OriDimTimer,OriDim2); RCTIC(OriDimTimer,OriDim3); if (ref_dim == 2) { Mout.reshape(padoridim, padoridim); if (data_dim == 2) normfft = (RFLOAT)(padding_factor * padding_factor); else normfft = (RFLOAT)(padding_factor * padding_factor * ori_size); } else { Mout.reshape(padoridim, padoridim, padoridim); if (data_dim == 3) normfft = (RFLOAT)(padding_factor * padding_factor * padding_factor); else normfft = (RFLOAT)(padding_factor * padding_factor * padding_factor * ori_size); } Mout.setXmippOrigin(); RCTOC(OriDimTimer,OriDim3); #ifdef DEBUG_WINDOWORIDIMREALSPACE tt().reshape(ZSIZE(Fin), YSIZE(Fin), XSIZE(Fin)); FOR_ALL_DIRECT_ELEMENTS_IN_MULTIDIMARRAY(Fin) { DIRECT_MULTIDIM_ELEM(tt(), n) = abs(DIRECT_MULTIDIM_ELEM(Fin, n)); } tt.write("windoworidim_Fresized.spi"); #endif // Shift the map back to its origin RCTIC(OriDimTimer,OriDim6); CenterFFTbySign(Fin); RCTOC(OriDimTimer,OriDim6); // Do the inverse FFT RCTIC(OriDimTimer,OriDim4); transformer.setReal(Mout); RCTOC(OriDimTimer,OriDim4); RCTIC(OriDimTimer,OriDim5); #ifdef TIMING if(printTimes) std::cout << std::endl << "FFTrealDims = (" << transformer.fReal->xdim << " , " << transformer.fReal->ydim << " , " << transformer.fReal->zdim << " ) " << std::endl; #endif transformer.inverseFourierTransform(); RCTOC(OriDimTimer,OriDim5); //transformer.inverseFourierTransform(Fin, Mout); Fin.clear(); transformer.fReal = NULL; // Make sure to re-calculate fftw plan Mout.setXmippOrigin(); #ifdef DEBUG_WINDOWORIDIMREALSPACE tt()=Mout; tt.write("windoworidim_Munwindowed.spi"); #endif // Window in real-space RCTIC(OriDimTimer,OriDim7); if (ref_dim==2) { Mout.window(FIRST_XMIPP_INDEX(ori_size), FIRST_XMIPP_INDEX(ori_size), LAST_XMIPP_INDEX(ori_size), LAST_XMIPP_INDEX(ori_size)); } else { Mout.window(FIRST_XMIPP_INDEX(ori_size), FIRST_XMIPP_INDEX(ori_size), FIRST_XMIPP_INDEX(ori_size), LAST_XMIPP_INDEX(ori_size), LAST_XMIPP_INDEX(ori_size), LAST_XMIPP_INDEX(ori_size)); } Mout.setXmippOrigin(); RCTOC(OriDimTimer,OriDim7); // Normalisation factor of FFTW // The Fourier Transforms are all "normalised" for 2D transforms of size = ori_size x ori_size RCTIC(OriDimTimer,OriDim8); Mout /= normfft; RCTOC(OriDimTimer,OriDim8); #ifdef DEBUG_WINDOWORIDIMREALSPACE tt()=Mout; tt.write("windoworidim_Mwindowed.spi"); #endif // Mask out corners to prevent aliasing artefacts RCTIC(OriDimTimer,OriDim9); softMaskOutsideMap(Mout); RCTOC(OriDimTimer,OriDim9); #ifdef DEBUG_WINDOWORIDIMREALSPACE tt()=Mout; tt.write("windoworidim_Mwindowed_masked.spi"); FourierTransformer ttf; ttf.FourierTransform(Mout, Fin); tt().resize(ZSIZE(Fin), YSIZE(Fin), XSIZE(Fin)); FOR_ALL_DIRECT_ELEMENTS_IN_MULTIDIMARRAY(Fin) { DIRECT_MULTIDIM_ELEM(tt(), n) = abs(DIRECT_MULTIDIM_ELEM(Fin, n)); } tt.write("windoworidim_Fnew.spi"); #endif #ifdef TIMING if(printTimes) OriDimTimer.printTimes(true); #endif } relion-3.1.3/src/backprojector.h000066400000000000000000000326621411340063500166040ustar00rootroot00000000000000/*************************************************************************** * * Author: "Sjors H.W. Scheres" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ /* * backprojector.h * * Created on: 24 Aug 2010 * Author: scheres */ #ifndef BACKPROJECTOR_H_ #define BACKPROJECTOR_H_ #define DEFAULT_EXTERNAL_RECONSTRUCT "relion_external_reconstruct" #include "src/projector.h" #include "src/mask.h" #include "src/tabfuncs.h" #include "src/symmetries.h" #include "src/jaz/complex_io.h" class BackProjector: public Projector { public: // For backward projection: sum of weights MultidimArray weight; // Tabulated blob values TabFtBlob tab_ftblob; // Symmetry object SymList SL; // Helical twist RFLOAT twist; // Helical rise RFLOAT rise; // Helical range int H; // Skip the iterative gridding part of the reconstruction bool skip_gridding; public: BackProjector(){} /** Empty constructor * * A BackProjector is created. * * @code * BackProjector BPref(orisize, 3, "d2"); * @endcode */ BackProjector(int _ori_size, int _ref_dim, FileName fn_sym, int _interpolator = TRILINEAR, float _padding_factor_3d = 2, int _r_min_nn = 10, int _blob_order = 0, RFLOAT _blob_radius = 1.9, RFLOAT _blob_alpha = 15, int _data_dim = 2, bool _skip_gridding = false) { // Store original dimension ori_size = _ori_size; // Set dimensionality of the references ref_dim = _ref_dim; // and of the data data_dim = _data_dim; // Skip gridding skip_gridding = _skip_gridding; // Set the symmetry object SL.read_sym_file(fn_sym); // Padding factor for the map if (_padding_factor_3d < 1.0) REPORT_ERROR("Padding factor cannot be less than 1."); padding_factor = _padding_factor_3d; // Interpolation scheme interpolator = _interpolator; // Minimum radius for NN interpolation r_min_nn = _r_min_nn; // Precalculate tabulated ftblob values //tab_ftblob.initialise(_blob_radius * padding_factor, _blob_alpha, _blob_order, 10000); // Sjors 8aug2017: try to fix problems with pad1 reconstrctions tab_ftblob.initialise(_blob_radius * 2., _blob_alpha, _blob_order, 10000); } /** Copy constructor * * The created BackProjector is a perfect copy of the input array but with a * different memory assignment. * * @code * BackProjector V2(V1); * @endcode */ BackProjector(const BackProjector& op) { clear(); *this = op; } /** Assignment. * * You can build as complex assignment expressions as you like. Multiple * assignment is allowed. */ BackProjector& operator=(const BackProjector& op) { if (&op != this) { // Projector stuff (is this necessary in C++?) data = op.data; ori_size = op.ori_size; pad_size = op.pad_size; r_max = op.r_max; r_min_nn = op.r_min_nn; interpolator = op.interpolator; padding_factor = op.padding_factor; ref_dim = op.ref_dim; data_dim = op.data_dim; skip_gridding = op.skip_gridding; // BackProjector stuff weight = op.weight; tab_ftblob = op.tab_ftblob; SL = op.SL; } return *this; } /** Destructor * * Clears everything * * @code * FourierInterpolator fourint; * @endcode */ ~BackProjector() { clear(); } void clear() { skip_gridding = false; weight.clear(); Projector::clear(); } // Initialise data and weight arrays to the given size and set all values to zero void initialiseDataAndWeight(int current_size = -1); // Initialise data and weight arrays to the given size and set all values to zero void initZeros(int current_size = -1); /* * Set a 2D Fourier Transform back into the 2D or 3D data array * Depending on the dimension of the map, this will be a backprojection or a rotation operation */ void set2DFourierTransform(const MultidimArray &img_in, const Matrix2D &A, const MultidimArray *Mweight = NULL, RFLOAT r_ewald_sphere = -1., bool is_positive_curvature = true, Matrix2D* magMatrix = 0) { // Back-rotation of a 3D Fourier Transform if (img_in.getDim() == 3) { if (ref_dim != 3) REPORT_ERROR("Backprojector::set3DFourierTransform%%ERROR: Dimension of the data array should be 3"); backrotate3D(img_in, A, Mweight); } else if (img_in.getDim() == 1) { if (ref_dim != 2) REPORT_ERROR("Backprojector::set1DFourierTransform%%ERROR: Dimension of the data array should be 2"); backproject1Dto2D(img_in, A, Mweight); } else { switch (ref_dim) { case 2: backrotate2D(img_in, A, Mweight, magMatrix); break; case 3: backproject2Dto3D(img_in, A, Mweight, r_ewald_sphere, is_positive_curvature, magMatrix); break; default: REPORT_ERROR("Backprojector::set2DSlice%%ERROR: Dimension of the data array should be 2 or 3"); } } } /* * Set an in-plane rotated version of the 2D map into the data array (mere interpolation) * If a exp_Mweight is given, rather than adding 1 to all relevant pixels in the weight array, we use exp_Mweight */ void backrotate2D(const MultidimArray &img_in, const Matrix2D &A, const MultidimArray *Mweight = NULL, Matrix2D* magMatrix = 0); /* * Set a 3D-rotated version of the 3D map into the data array (mere interpolation) * If a exp_Mweight is given, rather than adding 1 to all relevant pixels in the weight array, we use exp_Mweight */ void backrotate3D(const MultidimArray &img_in, const Matrix2D &A, const MultidimArray *Mweight = NULL); /* * Set a 2D slice in the 3D map (backward projection) * If a exp_Mweight is given, rather than adding 1 to all relevant pixels in the weight array, we use exp_Mweight */ void backproject2Dto3D(const MultidimArray &img_in, const Matrix2D &A, const MultidimArray *Mweight = NULL, RFLOAT r_ewald_sphere = -1., bool is_positive_curvature = true, Matrix2D* magMatrix = 0); /* * Set a 1D slice in the 2D map (backward projection) * If a exp_Mweight is given, rather than adding 1 to all relevant pixels in the weight array, we use exp_Mweight */ void backproject1Dto2D(const MultidimArray &img_in, const Matrix2D &A, const MultidimArray *Mweight = NULL); /* * Get only the lowest resolution components from the data and weight array * (to be joined together for two independent halves in order to force convergence in the same orientation) */ void getLowResDataAndWeight(MultidimArray &lowres_data, MultidimArray &lowres_weight, int lowres_r_max); /* * Set only the lowest resolution components from the data and weight array * (to be joined together for two independent halves in order to force convergence in the same orientation) */ void setLowResDataAndWeight(MultidimArray &lowres_data, MultidimArray &lowres_weight, int lowres_r_max); /* * Get complex array at the original size as the straightforward average * padding_factor*padding_factor*padding_factor voxels * This will then be used for FSC calculation between two random halves */ void getDownsampledAverage(MultidimArray& avg, bool divide = true) const; /* * From two of the straightforward downsampled averages, calculate an FSC curve */ void calculateDownSampledFourierShellCorrelation(const MultidimArray& avg1, const MultidimArray& avg2, MultidimArray& fsc) const; void updateSSNRarrays(RFLOAT tau2_fudge, MultidimArray &tau2_io, MultidimArray &sigma2_out, MultidimArray &evidence_vs_prior_out, MultidimArray &fourier_coverage_out, const MultidimArray& fsc, bool update_tau2_with_fsc = false, bool is_whole_instead_of_half = false); /* Get the 3D reconstruction, but perform it through a system call outside relion_refine! */ void externalReconstruct(MultidimArray &vol_out, FileName &fn_out, MultidimArray &fsc_halves_io, MultidimArray &tau2_io, MultidimArray &sigma2_ref, MultidimArray &data_vs_prior, bool is_whole_instead_of_half = false, RFLOAT tau2_fudge = 1., int verb = 0); /* Get the 3D reconstruction * If do_map is true, 1 will be added to all weights * alpha will contain the noise-reduction spectrum */ void reconstruct(MultidimArray &vol_out, int max_iter_preweight, bool do_map, const MultidimArray &tau2, RFLOAT tau2_fudge = 1., RFLOAT normalise = 1., int minres_map = -1, bool printTimes= false, Image* weight_out = 0); /* Enforce Hermitian symmetry, apply helical symmetry as well as point-group symmetry */ void symmetrise(int nr_helical_asu = 1, RFLOAT helical_twist = 0., RFLOAT helical_rise = 0., int threads = 1); /* Enforce hermitian symmetry on data and on weight (all points in the x==0 plane) * Because the interpolations are numerical, hermitian symmetry may be broken. * Repairing it here gives like a 2-fold averaging correction for interpolation errors... */ void enforceHermitianSymmetry(); /* Applies helical symmetry. Note that helical_rise is in PIXELS here, as BackProjector doesn't know angpix */ void applyHelicalSymmetry(int nr_helical_asu = 1, RFLOAT helical_twist = 0., RFLOAT helical_rise = 0.); /* Applies the symmetry from the SymList object to the weight and the data array */ void applyPointGroupSymmetry(int threads = 1); /* Convolute in Fourier-space with the blob by multiplication in real-space * Note the convolution is done on the complex array inside the transformer object!! */ void convoluteBlobRealSpace(FourierTransformer &transformer, bool do_mask = false); /* Calculate the inverse FFT of Fin and windows the result to ori_size * Also pass the transformer, to prevent making and clearing a new one before clearing the one in reconstruct() */ void windowToOridimRealSpace(FourierTransformer &transformer, MultidimArray &Mout, bool printTimes = false); /* * The same, but without the spherical cropping and thus invertible */ template static void decenterWhole(MultidimArray &Min, MultidimArray &Mout) { if (Mout.xdim != Min.xdim || Mout.ydim != Min.ydim || Mout.zdim != Min.zdim) { Mout = MultidimArray(Min.zdim, Min.ydim, Min.xdim); } Mout.initZeros(); const int s = Min.ydim; for (long int z = 0; z < Min.zdim; z++) for (long int y = 0; y < Min.ydim; y++) for (long int x = 0; x < Min.xdim; x++) { long int zz = z < Min.xdim? z + s/2 : z - s/2 - 1; long int yy = y < Min.xdim? y + s/2 : y - s/2 - 1; long int xx = x; if (xx >= 0 && xx < Min.xdim && yy >= 0 && yy < Min.ydim && zz >= 0 && zz < Min.zdim) { DIRECT_A3D_ELEM(Mout, z, y, x) = T2(DIRECT_A3D_ELEM(Min, zz, yy, xx)); } } } /* * Inverse of the above */ template static void recenterWhole(MultidimArray &Min, MultidimArray &Mout) { if (Mout.xdim != Min.xdim || Mout.ydim != Min.ydim || Mout.zdim != Min.zdim) { Mout = MultidimArray(Min.zdim, Min.ydim, Min.xdim); } Mout.initZeros(); const int s = Min.ydim; for (long int z = 0; z < Min.zdim; z++) for (long int y = 0; y < Min.ydim; y++) for (long int x = 0; x < Min.xdim; x++) { long int zz = z < Min.xdim? z + s/2 : z - s/2 - 1; long int yy = y < Min.xdim? y + s/2 : y - s/2 - 1; long int xx = x; if (xx >= 0 && xx < Min.xdim && yy >= 0 && yy < Min.ydim && zz >= 0 && zz < Min.zdim) { DIRECT_A3D_ELEM(Mout, zz, yy, xx) = T2(DIRECT_A3D_ELEM(Min, z, y, x)); } } } #ifdef RELION_SINGLE_PRECISION // Fnewweight needs decentering, but has to be in double-precision for correct calculations! template void decenter(MultidimArray &Min, MultidimArray &Mout, int my_rmax2) { // Mout should already have the right size // Initialize to zero Mout.initZeros(); FOR_ALL_ELEMENTS_IN_FFTW_TRANSFORM(Mout) { if (kp*kp + ip*ip + jp*jp <= my_rmax2) DIRECT_A3D_ELEM(Mout, k, i, j) = (double)A3D_ELEM(Min, kp, ip, jp); } } #endif }; #endif /* BACKPROJECTOR_H_ */ relion-3.1.3/src/complex.cpp000066400000000000000000000000461411340063500157450ustar00rootroot00000000000000// This file intentionally left blank relion-3.1.3/src/complex.h000066400000000000000000000020541411340063500154130ustar00rootroot00000000000000/*************************************************************************** * * Author: "Sjors H.W. Scheres" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #ifndef COMPLEX_H_ #define COMPLEX_H_ #include #include typedef tComplex Complex; #endif relion-3.1.3/src/ctf.cpp000066400000000000000000000532111411340063500150540ustar00rootroot00000000000000/*************************************************************************** * * Author: "Sjors H.W. Scheres" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ /*************************************************************************** * * Authors: Carlos Oscar S. Sorzano (coss@cnb.csic.es) * * Unidad de Bioinformatica of Centro Nacional de Biotecnologia , CSIC * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA * 02111-1307 USA * * All comments concerning this program package may be sent to the * e-mail address 'xmipp@cnb.csic.es' ***************************************************************************/ #include "src/ctf.h" #include "src/args.h" #include "src/fftw.h" #include "src/metadata_table.h" #include #include using namespace gravis; /* Read -------------------------------------------------------------------- */ void CTF::readByGroup(const MetaDataTable &partMdt, ObservationModel* obs, long int particle) { opticsGroup = 0; if (obs != 0) { partMdt.getValue(EMDL_IMAGE_OPTICS_GROUP, opticsGroup, particle); } opticsGroup--; readValue(EMDL_CTF_VOLTAGE, kV, 200, particle, opticsGroup, partMdt, obs); readValue(EMDL_CTF_DEFOCUSU, DeltafU, 0, particle, opticsGroup, partMdt, obs); readValue(EMDL_CTF_DEFOCUSV, DeltafV, DeltafU, particle, opticsGroup, partMdt, obs); readValue(EMDL_CTF_DEFOCUS_ANGLE, azimuthal_angle, 0, particle, opticsGroup, partMdt, obs); readValue(EMDL_CTF_CS, Cs, 0, particle, opticsGroup, partMdt, obs); readValue(EMDL_CTF_BFACTOR, Bfac, 0, particle, opticsGroup, partMdt, obs); readValue(EMDL_CTF_SCALEFACTOR, scale, 1, particle, opticsGroup, partMdt, obs); readValue(EMDL_CTF_Q0, Q0, 0, particle, opticsGroup, partMdt, obs); readValue(EMDL_CTF_PHASESHIFT, phase_shift, 0, particle, opticsGroup, partMdt, obs); initialise(); obsModel = obs; } void CTF::readValue(EMDLabel label, RFLOAT& dest, RFLOAT defaultVal, long int particle, int opticsGroup, const MetaDataTable& partMdt, const ObservationModel* obs) { if (!partMdt.getValue(label, dest, particle)) { if (opticsGroup < 0 || !obs->opticsMdt.getValue(label, dest, opticsGroup)) { dest = defaultVal; } } } void CTF::read(const MetaDataTable &MD1, const MetaDataTable &MD2, long int objectID) { if (!MD1.getValue(EMDL_CTF_VOLTAGE, kV, objectID)) if (!MD2.getValue(EMDL_CTF_VOLTAGE, kV, objectID)) kV = 200; if (!MD1.getValue(EMDL_CTF_DEFOCUSU, DeltafU, objectID)) if (!MD2.getValue(EMDL_CTF_DEFOCUSU, DeltafU, objectID)) DeltafU = 0; if (!MD1.getValue(EMDL_CTF_DEFOCUSV, DeltafV, objectID)) if (!MD2.getValue(EMDL_CTF_DEFOCUSV, DeltafV, objectID)) DeltafV = DeltafU; if (!MD1.getValue(EMDL_CTF_DEFOCUS_ANGLE, azimuthal_angle, objectID)) if (!MD2.getValue(EMDL_CTF_DEFOCUS_ANGLE, azimuthal_angle, objectID)) azimuthal_angle = 0; if (!MD1.getValue(EMDL_CTF_CS, Cs, objectID)) if (!MD2.getValue(EMDL_CTF_CS, Cs, objectID)) Cs = 0; if (!MD1.getValue(EMDL_CTF_BFACTOR, Bfac, objectID)) if (!MD2.getValue(EMDL_CTF_BFACTOR, Bfac, objectID)) Bfac = 0; if (!MD1.getValue(EMDL_CTF_SCALEFACTOR, scale, objectID)) if (!MD2.getValue(EMDL_CTF_SCALEFACTOR, scale, objectID)) scale = 1; if (!MD1.getValue(EMDL_CTF_Q0, Q0, objectID)) if (!MD2.getValue(EMDL_CTF_Q0, Q0, objectID)) Q0 = 0; if (!MD1.getValue(EMDL_CTF_PHASESHIFT, phase_shift, objectID)) if (!MD2.getValue(EMDL_CTF_PHASESHIFT, phase_shift, objectID)) phase_shift = 0; initialise(); } void CTF::setValues(RFLOAT _defU, RFLOAT _defV, RFLOAT _defAng, RFLOAT _voltage, RFLOAT _Cs, RFLOAT _Q0, RFLOAT _Bfac, RFLOAT _scale, RFLOAT _phase_shift) { kV = _voltage; DeltafU = _defU; DeltafV = _defV; azimuthal_angle = _defAng; Cs = _Cs; Bfac = _Bfac; scale = _scale; Q0 = _Q0; phase_shift = _phase_shift; initialise(); } void CTF::setValuesByGroup(ObservationModel *obs, int _opticsGroup, RFLOAT _defU, RFLOAT _defV, RFLOAT _defAng, RFLOAT _Bfac, RFLOAT _scale, RFLOAT _phase_shift) { opticsGroup = _opticsGroup; DeltafU = _defU; DeltafV = _defV; azimuthal_angle = _defAng; Bfac = _Bfac; scale = _scale; phase_shift = _phase_shift; obs->opticsMdt.getValue(EMDL_CTF_VOLTAGE, kV, opticsGroup); obs->opticsMdt.getValue(EMDL_CTF_CS, Cs, opticsGroup); obs->opticsMdt.getValue(EMDL_CTF_Q0, Q0, opticsGroup); initialise(); obsModel = obs; } /* Read from 1 MetaDataTable ----------------------------------------------- */ void CTF::read(const MetaDataTable &MD) { MetaDataTable MDempty; MDempty.addObject(); // add one empty object read(MD, MDempty); } /** Write to an existing object in a MetaDataTable. */ void CTF::write(MetaDataTable &MD) { // From version-3.1 onwards: store kV, Cs, Q0 in optics table //MD.setValue(EMDL_CTF_VOLTAGE, kV); MD.setValue(EMDL_CTF_DEFOCUSU, DeltafU); MD.setValue(EMDL_CTF_DEFOCUSV, DeltafV); MD.setValue(EMDL_CTF_DEFOCUS_ANGLE, azimuthal_angle); //MD.setValue(EMDL_CTF_CS, Cs); MD.setValue(EMDL_CTF_BFACTOR, Bfac); MD.setValue(EMDL_CTF_SCALEFACTOR, scale); MD.setValue(EMDL_CTF_PHASESHIFT, phase_shift); //MD.setValue(EMDL_CTF_Q0, Q0); } /* Write ------------------------------------------------------------------- */ void CTF::write(std::ostream &out) { MetaDataTable MD; MD.addObject(); write(MD); MD.write(out); } /* Initialise the CTF ------------------------------------------------------ */ void CTF::initialise() { // Change units RFLOAT local_Cs = Cs * 1e7; RFLOAT local_kV = kV * 1e3; rad_azimuth = DEG2RAD(azimuthal_angle); // Average focus and deviation defocus_average = -(DeltafU + DeltafV) * 0.5; defocus_deviation = -(DeltafU - DeltafV) * 0.5; // lambda=h/sqrt(2*m*e*kV) // h: Planck constant // m: electron mass // e: electron charge // lambda=0.387832/sqrt(kV*(1.+0.000978466*kV)); // Hewz: Angstroms // lambda=h/sqrt(2*m*e*kV) lambda=12.2643247 / sqrt(local_kV * (1. + local_kV * 0.978466e-6)); // See http://en.wikipedia.org/wiki/Electron_diffraction // Helpful constants // ICE: X(u)=-PI/2*deltaf(u)*lambda*u^2+PI/2*Cs*lambda^3*u^4 // = K1*deltaf(u)*u^2 +K2*u^4 K1 = PI / 2 * 2 * lambda; K2 = PI / 2 * local_Cs * lambda * lambda * lambda; K3 = atan(Q0/sqrt(1-Q0*Q0)); K4 = -Bfac / 4.; // Phase shift in radian K5 = DEG2RAD(phase_shift); if (Q0 < 0. || Q0 > 1.) REPORT_ERROR("CTF::initialise ERROR: AmplitudeContrast Q0 cannot be smaller than zero or larger than one!"); if (ABS(DeltafU) < 1e-6 && ABS(DeltafV) < 1e-6 && ABS(Q0) < 1e-6 && ABS(Cs) < 1e-6) REPORT_ERROR("CTF::initialise: ERROR: CTF initialises to all-zero values. Was a correct STAR file provided?"); // express astigmatism as a bilinear form: const double sin_az = sin(rad_azimuth); const double cos_az = cos(rad_azimuth); d2Matrix Q(cos_az, sin_az, -sin_az, cos_az); d2Matrix Qt(cos_az, -sin_az, sin_az, cos_az); d2Matrix D(-DeltafU, 0.0, 0.0, -DeltafV); d2Matrix A = Qt * D * Q; Axx = A(0,0); Axy = A(0,1); Ayy = A(1,1); } RFLOAT CTF::getGamma(RFLOAT X, RFLOAT Y) const { if (obsModel != 0 && obsModel->hasMagMatrices) { const Matrix2D& M = obsModel->getMagMatrix(opticsGroup); RFLOAT XX = M(0,0) * X + M(0,1) * Y; RFLOAT YY = M(1,0) * X + M(1,1) * Y; X = XX; Y = YY; } RFLOAT u2 = X * X + Y * Y; RFLOAT u4 = u2 * u2; return K1 * (Axx * X * X + 2.0 * Axy * X * Y + Ayy * Y * Y) + K2 * u4 - K5 - K3; } RFLOAT CTF::getCtfFreq(RFLOAT X, RFLOAT Y) { RFLOAT u2 = X * X + Y * Y; RFLOAT u = sqrt(u2); RFLOAT deltaf = getDeltaF(X, Y); return 2.0 * K1 * deltaf * u + 4.0 * K2 * u * u * u; } t2Vector CTF::getGammaGrad(RFLOAT X, RFLOAT Y) const { if (obsModel != 0 && obsModel->hasMagMatrices) { const Matrix2D& M = obsModel->getMagMatrix(opticsGroup); RFLOAT XX = M(0,0) * X + M(0,1) * Y; RFLOAT YY = M(1,0) * X + M(1,1) * Y; X = XX; Y = YY; } RFLOAT u2 = X * X + Y * Y; //RFLOAT u4 = u2 * u2; // u4 = (X² + Y²)² // du4/dx = 2 (X² + Y²) 2 X = 4 (X³ + XY²) = 4 u2 X return t2Vector(2.0 * K1 * Axx * X + 2.0 * K1 * Axy * Y + 4.0 * K2 * u2 * X, 2.0 * K1 * Ayy * Y + 2.0 * K1 * Axy * X + 4.0 * K2 * u2 * Y); } /* Generate a complete CTF Image ------------------------------------------------------ */ void CTF::getFftwImage(MultidimArray &result, int orixdim, int oriydim, RFLOAT angpix, bool do_abs, bool do_only_flip_phases, bool do_intact_until_first_peak, bool do_damping, bool do_ctf_padding, bool do_intact_after_first_peak) const { // Boxing the particle in a small box from the whole micrograph leads to loss of delocalised information (or aliaising in the CTF) // Here, calculate the CTF in a 2x larger box to support finer oscillations, // and then rescale the large CTF to simulate the effect of the windowing operation if (do_ctf_padding) { bool ctf_premultiplied=false; if (obsModel != 0) { ctf_premultiplied = obsModel->getCtfPremultiplied(opticsGroup); } // two-fold padding, increased to 4-fold padding for pre-multiplied CTFs int orixdim_pad = 2 * orixdim; int oriydim_pad = 2 * oriydim; // TODO: Such a big box may not be really necessary..... if (ctf_premultiplied) { orixdim_pad *= 2; oriydim_pad *= 2; } MultidimArray Fctf(oriydim_pad, orixdim_pad/2 + 1); getFftwImage(Fctf, orixdim_pad, oriydim_pad, angpix, do_abs, do_only_flip_phases, do_intact_until_first_peak, do_damping, false, do_intact_after_first_peak); // From half to whole MultidimArray Mctf(oriydim_pad, orixdim_pad); Mctf.setXmippOrigin(); for (int i = 0 ; i 1.) DIRECT_A2D_ELEM(result, i, j) = 1.; else DIRECT_A2D_ELEM(result, i, j) = sqrt(A2D_ELEM(Mctf, ip, j)); } else DIRECT_A2D_ELEM(result, i, j) = A2D_ELEM(Mctf, ip, j); } } } } else { RFLOAT xs = (RFLOAT)orixdim * angpix; RFLOAT ys = (RFLOAT)oriydim * angpix; if (obsModel != 0 && obsModel->hasEvenZernike) { if (orixdim != oriydim) { REPORT_ERROR_STR("CTF::getFftwImage: symmetric aberrations are currently only " << "supported for square images.\n"); } if (obsModel->getBoxSize(opticsGroup) != orixdim) { REPORT_ERROR_STR("CTF::getFftwImage: requested output image size " << orixdim << " is not consistent with that in the optics group table " << obsModel->getBoxSize(opticsGroup) << "\n"); } if (fabs(obsModel->getPixelSize(opticsGroup) - angpix) > 1e-4) { REPORT_ERROR_STR("CTF::getFftwImage: requested pixel size " << angpix << " is not consistent with that in the optics group table " << obsModel->getPixelSize(opticsGroup) << "\n"); } const Image& gammaOffset = obsModel->getGammaOffset(opticsGroup, oriydim); for (int y1 = 0; y1 < result.ydim; y1++) for (int x1 = 0; x1 < result.xdim; x1++) { RFLOAT x = x1 / xs; RFLOAT y = y1 <= result.ydim/2? y1 / ys : (y1 - result.ydim) / ys; const int x0 = x1; const int y0 = y1 <= result.ydim/2? y1 : gammaOffset.data.ydim + y1 - result.ydim; DIRECT_A2D_ELEM(result, y1, x1) = getCTF(x, y, do_abs, do_only_flip_phases, do_intact_until_first_peak, do_damping, gammaOffset(y0,x0), do_intact_after_first_peak); } } else { FOR_ALL_ELEMENTS_IN_FFTW_TRANSFORM2D(result) { RFLOAT x = (RFLOAT)jp / xs; RFLOAT y = (RFLOAT)ip / ys; DIRECT_A2D_ELEM(result, i, j) = getCTF(x, y, do_abs, do_only_flip_phases, do_intact_until_first_peak, do_damping, 0.0, do_intact_after_first_peak); } } } } /* Generate a complete CTFP (complex) image (with sector along angle) ------------------------------------------------------ */ void CTF::getCTFPImage(MultidimArray &result, int orixdim, int oriydim, RFLOAT angpix, bool is_positive, float angle) { if (angle < 0 || angle >= 360.) { REPORT_ERROR("CTF::getCTFPImage: angle should be in [0,360>"); } // Angles larger than 180, are the inverse of the other half! if (angle >= 180.) { angle -= 180.; is_positive = !is_positive; } float anglerad = DEG2RAD(angle); RFLOAT xs = (RFLOAT)orixdim * angpix; RFLOAT ys = (RFLOAT)oriydim * angpix; if (obsModel != 0 && obsModel->hasEvenZernike) { if (orixdim != oriydim) { REPORT_ERROR_STR("CTF::getFftwImage: symmetric aberrations are currently only " << "supported for square images.\n"); } const Image& gammaOffset = obsModel->getGammaOffset(opticsGroup, oriydim); if ( gammaOffset.data.xdim < result.xdim || gammaOffset.data.ydim < result.ydim) { REPORT_ERROR_STR("CTF::getFftwImage: requested output image is larger than the original: " << gammaOffset.data.xdim << "x" << gammaOffset.data.ydim << " available, " << result.xdim << "x" << result.ydim << " requested\n"); } for (int y1 = 0; y1 < result.ydim; y1++) for (int x1 = 0; x1 < result.xdim; x1++) { RFLOAT x = x1 / xs; RFLOAT y = y1 <= result.ydim/2? y1 / ys : (y1 - result.ydim) / ys; RFLOAT myangle = (x * x + y * y > 0) ? acos(y / sqrt(x * x + y * y)) : 0; // dot-product with Y-axis: (0,1) const int x0 = x1; const int y0 = y1 <= result.ydim/2? y1 : gammaOffset.data.ydim + y1 - result.ydim; if (myangle >= anglerad) DIRECT_A2D_ELEM(result, y1, x1) = getCTFP(x, y, is_positive, gammaOffset(y0, x0)); else DIRECT_A2D_ELEM(result, y1, x1) = getCTFP(x, y, !is_positive, gammaOffset(y0, x0)); } } else { FOR_ALL_ELEMENTS_IN_FFTW_TRANSFORM2D(result) { RFLOAT x = (RFLOAT)jp / xs; RFLOAT y = (RFLOAT)ip / ys; RFLOAT myangle = (x * x + y * y > 0) ? acos(y / sqrt(x * x + y * y)) : 0; // dot-product with Y-axis: (0,1) if (myangle >= anglerad) { DIRECT_A2D_ELEM(result, i, j) = getCTFP(x, y, is_positive); } else { DIRECT_A2D_ELEM(result, i, j) = getCTFP(x, y, !is_positive); } } } // Special line along the vertical Y-axis, where FFTW stores both Friedel mates and Friedel symmetry needs to remain if (angle == 0.) { int dim = YSIZE(result); int hdim = dim/2; for (int i = hdim + 1; i < dim; i++) { DIRECT_A2D_ELEM(result, i, 0) = conj(DIRECT_A2D_ELEM(result, dim - i, 0)); } } } void CTF::getCenteredImage(MultidimArray &result, RFLOAT Tm, bool do_abs, bool do_only_flip_phases, bool do_intact_until_first_peak, bool do_damping, bool do_intact_after_first_peak) { result.setXmippOrigin(); RFLOAT xs = (RFLOAT)XSIZE(result) * Tm; RFLOAT ys = (RFLOAT)YSIZE(result) * Tm; FOR_ALL_ELEMENTS_IN_ARRAY2D(result) { RFLOAT x = (RFLOAT)j / xs; RFLOAT y = (RFLOAT)i / ys; A2D_ELEM(result, i, j) = getCTF(x, y, do_abs, do_only_flip_phases, do_intact_until_first_peak, do_damping, 0.0, do_intact_after_first_peak); } } void CTF::get1DProfile(MultidimArray < RFLOAT > &result, RFLOAT angle, RFLOAT Tm, bool do_abs, bool do_only_flip_phases, bool do_intact_until_first_peak, bool do_damping, bool do_intact_after_first_peak) { result.setXmippOrigin(); RFLOAT xs = (RFLOAT)XSIZE(result) * Tm; // assuming result is at the image size! FOR_ALL_ELEMENTS_IN_ARRAY1D(result) { RFLOAT x = (COSD(angle) * (RFLOAT)i) / xs; RFLOAT y = (SIND(angle) * (RFLOAT)i) / xs; A1D_ELEM(result, i) = getCTF(x, y, do_abs, do_only_flip_phases, do_intact_until_first_peak, do_damping, 0.0, do_intact_after_first_peak); } } void CTF::applyWeightEwaldSphereCurvature(MultidimArray& result, int orixdim, int oriydim, RFLOAT angpix, RFLOAT particle_diameter) { RFLOAT xs = (RFLOAT)orixdim * angpix; RFLOAT ys = (RFLOAT)oriydim * angpix; Matrix2D M(2,2); if (obsModel != 0 && obsModel->hasMagMatrices) { M = obsModel->getMagMatrix(opticsGroup); } else { M.initIdentity(); } FOR_ALL_ELEMENTS_IN_FFTW_TRANSFORM2D(result) { RFLOAT xu = (RFLOAT)jp / xs; RFLOAT yu = (RFLOAT)ip / ys; RFLOAT x = M(0,0) * xu + M(0,1) * yu; RFLOAT y = M(1,0) * xu + M(1,1) * yu; const RFLOAT astigDefocus = Axx * x * x + 2.0 * Axy * x * y + Ayy * y * y; RFLOAT u2 = x * x + y * y; RFLOAT u4 = u2 * u2; RFLOAT gamma = K1 * astigDefocus + K2 * u4 - K5 - K3; RFLOAT deltaf = u2 > 0.0? std::abs(astigDefocus / u2) : 0.0; RFLOAT inv_d = sqrt(u2); RFLOAT aux = 2.0 * deltaf * lambda * inv_d / particle_diameter; RFLOAT A = (aux > 1.0)? 0.0 : (2.0/PI) * (acos(aux) - aux * sin(acos(aux))); DIRECT_A2D_ELEM(result, i, j) = 1.0 + A * (2.0 * fabs(-sin(gamma)) - 1.0); // Keep everything on the same scale inside RELION, where we use sin(chi), not 2sin(chi) DIRECT_A2D_ELEM(result, i, j) *= 0.5; } } void CTF::applyWeightEwaldSphereCurvature_new(MultidimArray& result, int orixdim, int oriydim, RFLOAT angpix, RFLOAT particle_diameter) { const int s = oriydim; const int sh = s/2 + 1; const double as = angpix * s; const double Dpx = particle_diameter / angpix; for (int yi = 0; yi < s; yi++) for (int xi = 0; xi < sh; xi++) { const double x = xi / as; const double y = yi < sh? yi / as : (yi - s) / as; // shift of this frequency resulting from CTF: const t2Vector shift2D = RFLOAT(1.0 / (2 * angpix * PI)) * getGammaGrad(x, y); const double shift1D = 2.0 * shift2D.length(); // angle between the intersection points of the two circles and the center const double alpha = shift1D > Dpx? 0.0 : 2.0 * acos(shift1D / Dpx); // area of intersection between the two circles, divided by the area of the circle RFLOAT A = (alpha == 0.0)? 0.0 : (1.0/PI) * (alpha - sin(alpha)); // abs. value of CTFR (no damping): const double ctf_val = getCTF(x, y, true, false, false, false, 0.0); DIRECT_A2D_ELEM(result, yi, xi) = 1.0 + A * (2.0 * ctf_val - 1.0); // Keep everything on the same scale inside RELION, where we use sin(chi), not 2sin(chi) DIRECT_A2D_ELEM(result, yi, xi) *= 0.5; } } void CTF::applyWeightEwaldSphereCurvature_noAniso(MultidimArray &result, int orixdim, int oriydim, RFLOAT angpix, RFLOAT particle_diameter) { RFLOAT xs = (RFLOAT)orixdim * angpix; RFLOAT ys = (RFLOAT)oriydim * angpix; FOR_ALL_ELEMENTS_IN_FFTW_TRANSFORM2D(result) { RFLOAT x = (RFLOAT)jp / xs; RFLOAT y = (RFLOAT)ip / ys; RFLOAT deltaf = fabs(getDeltaF(x, y)); RFLOAT inv_d = sqrt(x * x + y * y); RFLOAT aux = (2. * deltaf * lambda * inv_d) / (particle_diameter); RFLOAT A = (aux > 1.) ? 0. : (2. / PI) * (acos(aux) - aux * sin(acos(aux))); DIRECT_A2D_ELEM(result, i, j) = 1. + A * (2.*fabs(getCTF(x, y)) - 1.); // Keep everything on the same scale inside RELION, where we use sin(chi), not 2sin(chi) DIRECT_A2D_ELEM(result, i, j) *= 0.5; } } std::vector CTF::getK() { // offset by one to maintain indices (K[1] = K1) return std::vector{0, K1, K2, K3, K4, K5}; } double CTF::getAxx() { return Axx; } double CTF::getAxy() { return Axy; } double CTF::getAyy() { return Ayy; } relion-3.1.3/src/ctf.h000066400000000000000000000266521411340063500145320ustar00rootroot00000000000000/*************************************************************************** * * Author: "Sjors H.W. Scheres" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ /*************************************************************************** * * Authors: Carlos Oscar S. Sorzano (coss@cnb.csic.es) * * Unidad de Bioinformatica of Centro Nacional de Biotecnologia , CSIC * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * e You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA * 02111-1307 USA * * All comments concerning this program package may be sent to the * e-mail address 'xmipp@cnb.csic.es' ***************************************************************************/ #ifndef _CTF_HH #define _CTF_HH #include "src/multidim_array.h" #include "src/metadata_table.h" #include "src/jaz/obs_model.h" #include class CTF { protected: // Different constants RFLOAT K1; RFLOAT K2; RFLOAT K3; RFLOAT K4; RFLOAT K5; // Astigmatism stored in symmetrical matrix form RFLOAT Axx, Axy, Ayy; // Azimuthal angle in radians RFLOAT rad_azimuth; // defocus_average = (defocus_u + defocus_v)/2 RFLOAT defocus_average; // defocus_deviation = (defocus_u - defocus_v)/2 RFLOAT defocus_deviation; // Pointer to observation model kept after a call to readByGroup() to enable // caching of symmetric aberrations (CTF instances can be reallocated for each particle, // while the same obs. model lives for the entire duration of the program) ObservationModel* obsModel; int opticsGroup; public: /// Accelerating Voltage (in KiloVolts) RFLOAT kV; /// Defocus in U (in Angstroms). Positive values are underfocused RFLOAT DeltafU; /// Defocus in V (in Angstroms). Postive values are underfocused RFLOAT DeltafV; /// Azimuthal angle (between X and U) in degrees RFLOAT azimuthal_angle; // Electron wavelength (Amstrongs) RFLOAT lambda; // Radius of the aperture (in micras) // RFLOAT aperture; // Spherical aberration (in milimeters). Typical value 5.6 RFLOAT Cs; /// Chromatic aberration (in milimeters). Typical value 2 RFLOAT Ca; /** Mean energy loss (eV) due to interaction with sample. Typical value 1*/ RFLOAT espr; /// Objective lens stability (deltaI/I) (ppm). Typical value 1 RFLOAT ispr; /// Convergence cone semiangle (in mrad). Typical value 0.5 RFLOAT alpha; /// Longitudinal mechanical displacement (Angstrom). Typical value 100 RFLOAT DeltaF; /// Transversal mechanical displacement (Angstrom). Typical value 3 RFLOAT DeltaR; /// Amplitude contrast. Typical values 0.07 for cryo, 0.2 for negative stain RFLOAT Q0; // B-factor fall-off RFLOAT Bfac; // Overall scale-factor of CTF RFLOAT scale; // Phase-shift from a phase-plate (in rad) RFLOAT phase_shift; /** Empty constructor. */ CTF() : kV(200), DeltafU(0), DeltafV(0), azimuthal_angle(0), phase_shift(0), Cs(0), Bfac(0), Q0(0), scale(1), obsModel(0), opticsGroup(0) {} // Read CTF parameters from particle table partMdt and optics table opticsMdt. void readByGroup(const MetaDataTable &partMdt, ObservationModel* obs, long int particle = -1); void readValue(EMDLabel label, RFLOAT& dest, RFLOAT defaultVal, long int particle, int opticsGroup, const MetaDataTable& partMdt, const ObservationModel* obs); /** Read CTF parameters from MetaDataTables MD1 and MD2 (deprecated) * If a parameter is not found in MD1 it is tried to be read from MD2 * Only if it is also not present in the second then a default value is used * This is useful if micrograph-specific parameters are stored in a separate MD from the image-specific parameters */ void read(const MetaDataTable &MD1, const MetaDataTable &MD2, long int objectID = -1); /** Just set all values explicitly */ void setValues(RFLOAT _defU, RFLOAT _defV, RFLOAT _defAng, RFLOAT _voltage, RFLOAT _Cs, RFLOAT _Q0, RFLOAT _Bfac, RFLOAT _scale = 1., RFLOAT _phase_shift = 0.); /** Set all values explicitly in 3.1 */ void setValuesByGroup(ObservationModel* obs, int opticsGroup, RFLOAT _defU, RFLOAT _defV, RFLOAT _defAng, RFLOAT _Bfac = 0.0, RFLOAT _scale = 1.0, RFLOAT _phase_shift = 0.0); /** Read from a single MetaDataTable */ void read(const MetaDataTable &MD); /** Write to MetaDataTable. */ void write(MetaDataTable &MD); /** Write to output. */ void write(std::ostream &out); /// Set up the CTF object, read parameters from MetaDataTables with micrograph and particle information /// If no MDmic is provided or it does not contain certain parameters, these parameters are tried to be read from MDimg void initialise(); /// Compute CTF at (U,V). Continuous frequencies inline RFLOAT getCTF(RFLOAT X, RFLOAT Y, bool do_abs = false, bool do_only_flip_phases = false, bool do_intact_until_first_peak = false, bool do_damping = true, double gammaOffset = 0.0, bool do_intact_after_first_peak = false) const { if (obsModel != 0 && obsModel->hasMagMatrices) { const Matrix2D& M = obsModel->getMagMatrix(opticsGroup); RFLOAT Xd = M(0,0) * X + M(0,1) * Y; RFLOAT Yd = M(1,0) * X + M(1,1) * Y; X = Xd; Y = Yd; } RFLOAT u2 = X * X + Y * Y; RFLOAT u4 = u2 * u2; // if (u2>=ua2) return 0; //RFLOAT deltaf = getDeltaF(X, Y); //RFLOAT gamma = K1 * deltaf * u2 + K2 * u4 - K5 - K3 + gammaOffset; RFLOAT gamma = K1 * (Axx*X*X + 2.0*Axy*X*Y + Ayy*Y*Y) + K2 * u4 - K5 - K3 + gammaOffset; RFLOAT retval; if ((do_intact_until_first_peak && ABS(gamma) < PI/2.) || (do_intact_after_first_peak && ABS(gamma) > PI/2.)) { retval = 1.; } else { retval = -sin(gamma); } if (do_damping) { RFLOAT E = exp(K4 * u2); // B-factor decay (K4 = -Bfac/4); retval *= E; } if (do_abs) { retval = ABS(retval); } else if (do_only_flip_phases) { retval = (retval < 0.) ? -1. : 1.; } retval *= scale; // SHWS 25-2-2019: testing a new idea to improve code stability // Don't allow very small values of CTF to prevent division by zero in GPU code if (fabs(retval) < 1e-8) { retval = SGN(retval) * 1e-8; } return retval; } RFLOAT getGamma(RFLOAT X, RFLOAT Y) const; // compute the local frequency of the ctf // (i.e. the radial slope of 'double gamma' in getCTF()) // -- deprecated, use getGammaGrad().length() RFLOAT getCtfFreq(RFLOAT X, RFLOAT Y); gravis::t2Vector getGammaGrad(RFLOAT X, RFLOAT Y) const; inline Complex getCTFP(RFLOAT X, RFLOAT Y, bool is_positive, double gammaOffset = 0.0) const { if (obsModel != 0 && obsModel->hasMagMatrices) { const Matrix2D& M = obsModel->getMagMatrix(opticsGroup); RFLOAT Xd = M(0,0) * X + M(0,1) * Y; RFLOAT Yd = M(1,0) * X + M(1,1) * Y; X = Xd; Y = Yd; } RFLOAT u2 = X * X + Y * Y; RFLOAT u4 = u2 * u2; RFLOAT gamma = K1 * (Axx*X*X + 2.0*Axy*X*Y + Ayy*Y*Y) + K2 * u4 - K5 - K3 + gammaOffset + PI/2.; RFLOAT sinx, cosx; #ifdef RELION_SINGLE_PRECISION SINCOSF( gamma, &sinx, &cosx ); #else SINCOS( gamma, &sinx, &cosx ); #endif Complex retval; retval.real = cosx; retval.imag = (is_positive) ? sinx : -sinx; return retval; } /// Compute Deltaf at a given direction (no longer used by getCTF) inline RFLOAT getDeltaF(RFLOAT X, RFLOAT Y) const { if (ABS(X) < XMIPP_EQUAL_ACCURACY && ABS(Y) < XMIPP_EQUAL_ACCURACY) return 0; RFLOAT ellipsoid_ang = atan2(Y, X) - rad_azimuth; /* * For a derivation of this formulae confer * Principles of Electron Optics page 1380 * in particular term defocus and twofold axial astigmatism * take into account that a1 and a2 are the coefficient * of the zernike polynomials difference of defocus at 0 * and at 45 degrees. In this case a2=0 */ RFLOAT cos_ellipsoid_ang_2 = cos(2*ellipsoid_ang); return (defocus_average + defocus_deviation*cos_ellipsoid_ang_2); } /// Generate (Fourier-space, i.e. FFTW format) image with all CTF values. /// The dimensions of the result array should have been set correctly already void getFftwImage(MultidimArray < RFLOAT > &result, int orixdim, int oriydim, RFLOAT angpix, bool do_abs = false, bool do_only_flip_phases = false, bool do_intact_until_first_peak = false, bool do_damping = true, bool do_ctf_padding = false, bool do_intact_after_first_peak = false) const; // Get a complex image with the CTFP/Q values, where the angle is in degrees between the Y-axis and the CTFP/Q sector line void getCTFPImage(MultidimArray &result, int orixdim, int oriydim, RFLOAT angpix, bool is_positive, float angle); /// Generate a centered image (with hermitian symmetry) /// The dimensions of the result array should have been set correctly already void getCenteredImage(MultidimArray < RFLOAT > &result, RFLOAT angpix, bool do_abs = false, bool do_only_flip_phases = false, bool do_intact_until_first_peak = false, bool do_damping = true, bool do_intact_after_first_peak = false); /// Generate a 1D profile along defocusAngle /// The dimensions of the result array should have been set correctly already, i.e. at the image size! void get1DProfile(MultidimArray < RFLOAT > &result, RFLOAT angle, RFLOAT angpix, bool do_abs = false, bool do_only_flip_phases = false, bool do_intact_until_first_peak = false, bool do_damping = true, bool do_intact_after_first_peak = false); // Calculate weight W for Ewald-sphere curvature correction: apply this to the result from getFftwImage void applyWeightEwaldSphereCurvature(MultidimArray& result, int orixdim, int oriydim, RFLOAT angpix, RFLOAT particle_diameter); void applyWeightEwaldSphereCurvature_new(MultidimArray& result, int orixdim, int oriydim, RFLOAT angpix, RFLOAT particle_diameter); // Calculate weight W for Ewald-sphere curvature correction: apply this to the result from getFftwImage void applyWeightEwaldSphereCurvature_noAniso(MultidimArray < RFLOAT > &result, int orixdim, int oriydim, RFLOAT angpix, RFLOAT particle_diameter); std::vector getK(); double getAxx(); double getAxy(); double getAyy(); }; #endif relion-3.1.3/src/ctffind_runner.cpp000066400000000000000000001077051411340063500173160ustar00rootroot00000000000000/*************************************************************************** * * Author: "Sjors H.W. Scheres" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #include "src/ctffind_runner.h" #include #ifdef CUDA #include "src/acc/cuda/cuda_mem_utils.h" #endif void CtffindRunner::read(int argc, char **argv, int rank) { parser.setCommandLine(argc, argv); int gen_section = parser.addSection("General options"); int ctf_section = parser.addSection("CTF estimation"); fn_in = parser.getOption("--i", "STAR file with all input micrographs, or a unix wildcard to all micrograph files, e.g. \"mics/*.mrc\""); do_use_without_doseweighting = parser.checkOption("--use_noDW", "Estimate CTFs from rlnMicrographNameNoDW instead of rlnMicrographName (only after MotionCor2)"); fn_out = parser.getOption("--o", "Directory, where all output files will be stored", "CtfEstimate/"); do_only_join_results = parser.checkOption("--only_make_star", "Don't estimate any CTFs, only join all logfile results in a STAR file"); continue_old = parser.checkOption("--only_do_unfinished", "Only estimate CTFs for those micrographs for which there is not yet a logfile with Final values."); do_at_most = textToInteger(parser.getOption("--do_at_most", "Only process up to this number of (unprocessed) micrographs.", "-1")); // Use a smaller squared part of the micrograph to estimate CTF (e.g. to avoid film labels...) ctf_win = textToInteger(parser.getOption("--ctfWin", "Size (in pixels) of a centered, squared window to use for CTF-estimation", "-1")); int mic_section = parser.addSection("Microscopy parameters"); // First parameter line in CTFFIND Cs = textToFloat(parser.getOption("--CS", "Spherical Aberration (mm) ","-1")); Voltage = textToFloat(parser.getOption("--HT", "Voltage (kV)","-1")); AmplitudeConstrast = textToFloat(parser.getOption("--AmpCnst", "Amplitude constrast", "-1")); angpix = textToFloat(parser.getOption("--angpix", "Pixel size in the input micrographs (A)", "-1")); int ctffind_section = parser.addSection("CTFFIND parameters"); // Second parameter line in CTFFIND fn_ctffind_exe = parser.getOption("--ctffind_exe","Location of ctffind executable (or through RELION_CTFFIND_EXECUTABLE environment variable)",""); box_size = textToFloat(parser.getOption("--Box", "Size of the boxes to calculate FFTs", "512")); resol_min = textToFloat(parser.getOption("--ResMin", "Minimum resolution (in A) to include in calculations", "100")); resol_max = textToFloat(parser.getOption("--ResMax", "Maximum resolution (in A) to include in calculations", "7")); min_defocus = textToFloat(parser.getOption("--dFMin", "Minimum defocus value (in A) to search", "10000")); max_defocus = textToFloat(parser.getOption("--dFMax", "Maximum defocus value (in A) to search", "50000")); step_defocus = textToFloat(parser.getOption("--FStep", "defocus step size (in A) for search", "250")); amount_astigmatism = textToFloat(parser.getOption("--dAst", "amount of astigmatism (in A)", "0")); int ctffind4_section = parser.addSection("CTFFIND4 parameters"); is_ctffind4 = parser.checkOption("--is_ctffind4", "The provided CTFFIND executable is CTFFIND4 (version 4.1+)"); use_given_ps = parser.checkOption("--use_given_ps", "Use pre-calculated power spectra?"); do_movie_thon_rings = parser.checkOption("--do_movie_thon_rings", "Calculate Thon rings from movie frames?"); avg_movie_frames = textToInteger(parser.getOption("--avg_movie_frames", "Average over how many movie frames (try to get 4 e-/A2)", "1")); movie_rootname = parser.getOption("--movie_rootname", "Rootname plus extension for movies", "_movie.mrcs"); do_phaseshift = parser.checkOption("--do_phaseshift", "Estimate the phase shift in the images (e.g. from a phase-plate)"); phase_min = textToFloat(parser.getOption("--phase_min", "Minimum phase shift (in degrees)", "0.")); phase_max = textToFloat(parser.getOption("--phase_max", "Maximum phase shift (in degrees)", "180.")); phase_step = textToFloat(parser.getOption("--phase_step", "Step in phase shift (in degrees)", "10.")); nr_threads = textToInteger(parser.getOption("--j", "Number of threads (for CTFIND4 only)", "1")); do_fast_search = parser.checkOption("--fast_search", "Disable \"Slower, more exhaustive search\" in CTFFIND4.1 (faster but less accurate)"); int gctf_section = parser.addSection("Gctf parameters"); do_use_gctf = parser.checkOption("--use_gctf", "Use Gctf instead of CTFFIND to estimate the CTF parameters"); fn_gctf_exe = parser.getOption("--gctf_exe","Location of Gctf executable (or through RELION_GCTF_EXECUTABLE environment variable)",""); angpix = textToFloat(parser.getOption("--angpix", "Magnified pixel size in Angstroms", "1.")); do_ignore_ctffind_params = parser.checkOption("--ignore_ctffind_params", "Use Gctf default parameters instead of CTFFIND parameters"); do_EPA = parser.checkOption("--EPA", "Use equi-phase averaging to calculate Thon rinds in Gctf"); do_validation = parser.checkOption("--do_validation", "Use validation inside Gctf to analyse quality of the fit?"); additional_gctf_options = parser.getOption("--extra_gctf_options", "Additional options for Gctf", ""); gpu_ids = parser.getOption("--gpu", "Device ids for each MPI-thread, e.g 0:1:2:3",""); // Initialise verb for non-parallel execution verb = 1; // Check for errors in the command-line option if (parser.checkForErrors()) REPORT_ERROR("Errors encountered on the command line (see above), exiting..."); } void CtffindRunner::usage() { parser.writeUsage(std::cout); } void CtffindRunner::initialise() { // Get the CTFFIND executable if (fn_ctffind_exe == "") { char *penv; penv = getenv("RELION_CTFFIND_EXECUTABLE"); if (penv != NULL) fn_ctffind_exe = (std::string)penv; } // Get the GCTF executable if (do_use_gctf && fn_gctf_exe == "") { char *penv; penv = getenv("RELION_GCTF_EXECUTABLE"); if (penv != NULL) fn_gctf_exe = (std::string)penv; } fn_shell = "csh"; char *shell_name; shell_name = getenv("RELION_SHELL"); if (shell_name != NULL) fn_shell = (std::string)shell_name; if (do_use_gctf && ctf_win>0) REPORT_ERROR("ERROR: Running Gctf together with --ctfWin is not implemented, please use CTFFIND instead."); if (!do_phaseshift && (additional_gctf_options.find("--phase_shift_L") != std::string::npos || additional_gctf_options.find("--phase_shift_H") != std::string::npos || additional_gctf_options.find("--phase_shift_S") != std::string::npos)) REPORT_ERROR("ERROR: Please don't specify --phase_shift_L, H, S in 'Other Gctf options' (--extra_gctf_options). Use 'Estimate phase shifts' (--do_phaseshift) and 'Phase shift - Min, Max, Step' (--phase_min, --phase_max, --phase_step) instead."); if (do_use_gctf && use_given_ps) REPORT_ERROR("ERROR: --use_given_ps is available only with CTFFIND 4.1"); if (use_given_ps && do_movie_thon_rings) REPORT_ERROR("ERROR: You cannot enable --use_given_ps and --do_movie_thon_rings simultaneously"); if (use_given_ps) do_use_without_doseweighting = false; // Make sure fn_out ends with a slash if (fn_out[fn_out.length()-1] != '/') fn_out += "/"; // Set up which micrographs to estimate CTFs from if (fn_in.isStarFile()) { MetaDataTable MDin; ObservationModel::loadSafely(fn_in, obsModel, MDin, "micrographs", verb); if (MDin.numberOfObjects() > 0 && !MDin.containsLabel(EMDL_MICROGRAPH_NAME)) REPORT_ERROR("ERROR: There is no rlnMicrographName label in the input micrograph STAR file."); if (do_use_without_doseweighting && MDin.numberOfObjects() > 0 && !MDin.containsLabel(EMDL_MICROGRAPH_NAME_WODOSE)) REPORT_ERROR("ERROR: You are using --use_noDW, but there is no rlnMicrographNameNoDW label in the input micrograph STAR file."); if (use_given_ps && MDin.numberOfObjects() > 0 && !MDin.containsLabel(EMDL_CTF_POWER_SPECTRUM)) REPORT_ERROR("ERROR: You are using --use_given_ps, but there is no rlnCtfPowerSpectrum label in the input micrograph STAR file."); fn_micrographs_all.clear(); optics_group_micrographs_all.clear(); fn_micrographs_ctf_all.clear(); FOR_ALL_OBJECTS_IN_METADATA_TABLE(MDin) { FileName fn_mic; MDin.getValue(EMDL_MICROGRAPH_NAME, fn_mic); fn_micrographs_all.push_back(fn_mic); // Dose weighted image if (do_use_without_doseweighting) MDin.getValue(EMDL_MICROGRAPH_NAME_WODOSE, fn_mic); else if (use_given_ps) MDin.getValue(EMDL_CTF_POWER_SPECTRUM, fn_mic); fn_micrographs_ctf_all.push_back(fn_mic); // Image for CTF estsimation int optics_group; MDin.getValue(EMDL_IMAGE_OPTICS_GROUP, optics_group); optics_group_micrographs_all.push_back(optics_group); } } else { fn_in.globFiles(fn_micrographs_all); optics_group_micrographs_all.resize(fn_in.size(), 1); obsModel.opticsMdt.clear(); obsModel.opticsMdt.addObject(); } // Make sure obsModel.opticsMdt has all the necessary information // If voltage or pixel size were not in the input STAR file, set them from the command line options if (!obsModel.opticsMdt.containsLabel(EMDL_CTF_CS)) { if (Cs < 0.) { REPORT_ERROR("ERROR: the input STAR file does not contain the spherical aberration, and it is not given through --CS."); } FOR_ALL_OBJECTS_IN_METADATA_TABLE(obsModel.opticsMdt) { obsModel.opticsMdt.setValue(EMDL_CTF_CS, Cs); } } if (!obsModel.opticsMdt.containsLabel(EMDL_CTF_VOLTAGE)) { if (Voltage < 0.) { REPORT_ERROR("ERROR: the input STAR file does not contain the acceleration voltage, and it is not given through --HT."); } FOR_ALL_OBJECTS_IN_METADATA_TABLE(obsModel.opticsMdt) { obsModel.opticsMdt.setValue(EMDL_CTF_VOLTAGE, Voltage); } } if (!obsModel.opticsMdt.containsLabel(EMDL_CTF_Q0)) { if (AmplitudeConstrast < 0.) { REPORT_ERROR("ERROR: the input STAR file does not contain the amplitude contrast, and it is not given through --AmpCnst."); } FOR_ALL_OBJECTS_IN_METADATA_TABLE(obsModel.opticsMdt) { obsModel.opticsMdt.setValue(EMDL_CTF_Q0, AmplitudeConstrast); } } if (!obsModel.opticsMdt.containsLabel(EMDL_MICROGRAPH_PIXEL_SIZE)) { if (angpix < 0.) { REPORT_ERROR("ERROR: the input STAR file does not contain the micrograph pixel size, and it is not given through --angpix."); } FOR_ALL_OBJECTS_IN_METADATA_TABLE(obsModel.opticsMdt) { obsModel.opticsMdt.setValue(EMDL_MICROGRAPH_PIXEL_SIZE, angpix); } } // First backup the given list of all micrographs std::vector optics_group_given_all = optics_group_micrographs_all; std::vector fn_mic_given_all = fn_micrographs_all; std::vector fn_mic_ctf_given_all = fn_micrographs_ctf_all; // These lists contain those for the output STAR & PDF files optics_group_micrographs_all.clear(); fn_micrographs_all.clear(); fn_micrographs_ctf_all.clear(); // These are micrographs to be processed optics_group_micrographs.clear(); fn_micrographs.clear(); fn_micrographs_ctf.clear(); bool warned = false; for (long int imic = 0; imic < fn_mic_given_all.size(); imic++) { bool ignore_this = false; bool process_this = true; if (continue_old) { FileName fn_microot = fn_mic_ctf_given_all[imic].withoutExtension(); RFLOAT defU, defV, defAng, CC, HT, CS, AmpCnst, XMAG, DStep, maxres=-1., valscore = -1., phaseshift = 0.; if (getCtffindResults(fn_microot, defU, defV, defAng, CC, HT, CS, AmpCnst, XMAG, DStep, maxres, valscore, phaseshift, false)) // false: dont warn if not found Final values { process_this = false; // already done } } if (do_at_most >= 0 && fn_micrographs.size() >= do_at_most) { if (process_this) { ignore_this = true; process_this = false; if (!warned) { warned = true; std::cout << "NOTE: processing of some micrographs will be skipped as requested by --do_at_most" << std::endl; } } // If this micrograph has already been processed, the result should be included in the output. // So ignore_this remains false. } if (process_this) { optics_group_micrographs.push_back(optics_group_given_all[imic]); fn_micrographs.push_back(fn_mic_given_all[imic]); fn_micrographs_ctf.push_back(fn_mic_ctf_given_all[imic]); } if (!ignore_this) { optics_group_micrographs_all.push_back(optics_group_given_all[imic]); fn_micrographs_all.push_back(fn_mic_given_all[imic]); fn_micrographs_ctf_all.push_back(fn_mic_ctf_given_all[imic]); } } if (false) { std::cout << fn_mic_given_all.size() << " micrographs were given but we process only "; std::cout << do_at_most << " micrographs as specified in --do_at_most." << std::endl; } // Make symbolic links of the input micrographs in the output directory because ctffind and gctf write output files alongside the input micropgraph char temp [180]; char *cwd = getcwd(temp, 180); currdir = std::string(temp); // Make sure fn_out ends with a slash if (currdir[currdir.length()-1] != '/') currdir += "/"; FileName prevdir=""; for (size_t i = 0; i < fn_micrographs.size(); i++) { FileName myname = fn_micrographs_ctf[i]; if (do_movie_thon_rings) myname = myname.withoutExtension() + movie_rootname; // Remove the UNIQDATE part of the filename if present FileName output = getOutputFileWithNewUniqueDate(myname, fn_out); // Create output directory if neccesary FileName newdir = output.beforeLastOf("/"); if (newdir != prevdir) { std::string command = " mkdir -p " + newdir; int res = system(command.c_str()); } int slk = symlink((currdir+myname).c_str(), output.c_str()); } if (do_use_gctf && fn_micrographs.size()>0) { untangleDeviceIDs(gpu_ids, allThreadIDs); if (allThreadIDs[0].size()==0 || (!std::isdigit(*gpu_ids.begin())) ) { #ifdef CUDA if (verb>0) std::cout << "gpu-ids were not specified, so threads will automatically be mapped to devices (incrementally)."<< std::endl; HANDLE_ERROR(cudaGetDeviceCount(&devCount)); #else if (verb>0) REPORT_ERROR("gpu-ids were not specified, but we could not figure out which GPU to use because RELION was not compiled with CUDA support."); #endif } // Find the dimensions of the first micrograph, to later on ensure all micrographs are the same size Image Itmp; Itmp.read(fn_micrographs[0], false); // false means only read header! xdim = XSIZE(Itmp()); ydim = YSIZE(Itmp()); } if (is_ctffind4 && ctf_win > 0 && do_movie_thon_rings) REPORT_ERROR("CtffindRunner::initialise ERROR: You cannot use a --ctfWin operation on movies."); if (verb > 0) { if (do_use_gctf) std::cout << " Using Gctf executable in: " << fn_gctf_exe << std::endl; else std::cout << " Using CTFFIND executable in: " << fn_ctffind_exe << std::endl; std::cout << " to estimate CTF parameters for the following micrographs: " << std::endl; if (continue_old) std::cout << " (skipping all micrographs for which a logfile with Final values already exists " << std::endl; for(unsigned int i = 0; i < fn_micrographs.size(); ++i) std::cout << " * " << fn_micrographs[i] << std::endl; } } void CtffindRunner::run() { if (!do_only_join_results) { int barstep; if (verb > 0) { if (do_use_gctf) std::cout << " Estimating CTF parameters using Kai Zhang's Gctf ..." << std::endl; else { if (is_ctffind4) std::cout << " Estimating CTF parameters using Alexis Rohou's and Niko Grigorieff's CTFFIND4.1 ..." << std::endl; else std::cout << " Estimating CTF parameters using Niko Grigorieff's CTFFIND ..." << std::endl; } init_progress_bar(fn_micrographs.size()); barstep = XMIPP_MAX(1, fn_micrographs.size() / 60); } std::vector allmicnames; for (long int imic = 0; imic < fn_micrographs.size(); imic++) { // Abort through the pipeline_control system if (pipeline_control_check_abort_job()) exit(RELION_EXIT_ABORTED); // Get angpix and voltage from the optics groups: obsModel.opticsMdt.getValue(EMDL_CTF_CS, Cs, optics_group_micrographs[imic]-1); obsModel.opticsMdt.getValue(EMDL_CTF_VOLTAGE, Voltage, optics_group_micrographs[imic]-1); obsModel.opticsMdt.getValue(EMDL_CTF_Q0, AmplitudeConstrast, optics_group_micrographs[imic]-1); obsModel.opticsMdt.getValue(EMDL_MICROGRAPH_PIXEL_SIZE, angpix, optics_group_micrographs[imic]-1); if (do_use_gctf) { executeGctf(imic, allmicnames, imic+1==fn_micrographs.size()); } else if (is_ctffind4) { executeCtffind4(imic); } else { executeCtffind3(imic); } if (verb > 0 && imic % barstep == 0) progress_bar(imic); } if (verb > 0) progress_bar(fn_micrographs.size()); } joinCtffindResults(); } void CtffindRunner::joinCtffindResults() { long int barstep = XMIPP_MAX(1, fn_micrographs_all.size() / 60); if (verb > 0) { std::cout << " Generating logfile.pdf ... " << std::endl; init_progress_bar(fn_micrographs_all.size()); } MetaDataTable MDctf; for (long int imic = 0; imic < fn_micrographs_all.size(); imic++) { FileName fn_microot = fn_micrographs_ctf_all[imic].withoutExtension(); RFLOAT defU, defV, defAng, CC, HT, CS, AmpCnst, XMAG, DStep; RFLOAT maxres = -999., valscore = -999., phaseshift = -999.; bool has_this_ctf = getCtffindResults(fn_microot, defU, defV, defAng, CC, HT, CS, AmpCnst, XMAG, DStep, maxres, valscore, phaseshift); if (!has_this_ctf) { std::cerr << " WARNING: skipping, since cannot get CTF values for " << fn_micrographs_all[imic] < 0.) { // Put an upper limit on maxres, as gCtf may put 999. now max is 25. MDctf.setValue(EMDL_CTF_MAXRES, XMIPP_MIN(25., maxres)); } if (fabs(phaseshift + 999.) > 0.) MDctf.setValue(EMDL_CTF_PHASESHIFT, phaseshift); if (fabs(valscore + 999.) > 0.) MDctf.setValue(EMDL_CTF_VALIDATIONSCORE, valscore); } if (verb > 0 && imic % 60 == 0) progress_bar(imic); } obsModel.save(MDctf, fn_out+"micrographs_ctf.star", "micrographs"); std::vector plot_labels; plot_labels.push_back(EMDL_CTF_DEFOCUSU); plot_labels.push_back(EMDL_CTF_DEFOCUS_ANGLE); plot_labels.push_back(EMDL_CTF_ASTIGMATISM); plot_labels.push_back(EMDL_CTF_MAXRES); plot_labels.push_back(EMDL_CTF_PHASESHIFT); plot_labels.push_back(EMDL_CTF_FOM); plot_labels.push_back(EMDL_CTF_VALIDATIONSCORE); FileName fn_eps, fn_eps_root = fn_out+"micrographs_ctf"; std::vector all_fn_eps; for (int i = 0; i < plot_labels.size(); i++) { EMDLabel label = plot_labels[i]; if (MDctf.containsLabel(label)) { // Values for all micrographs CPlot2D *plot2Db=new CPlot2D(EMDL::label2Str(label) + " for all micrographs"); MDctf.addToCPlot2D(plot2Db, EMDL_UNDEFINED, label, 1.); plot2Db->SetDrawLegend(false); fn_eps = fn_eps_root + "_all_" + EMDL::label2Str(label) + ".eps"; plot2Db->OutputPostScriptPlot(fn_eps); all_fn_eps.push_back(fn_eps); delete plot2Db; if (MDctf.numberOfObjects() > 3) { // Histogram std::vector histX, histY; CPlot2D *plot2D=new CPlot2D(""); MDctf.columnHistogram(label,histX,histY,0, plot2D); fn_eps = fn_eps_root + "_hist_" + EMDL::label2Str(label) + ".eps"; plot2D->OutputPostScriptPlot(fn_eps); all_fn_eps.push_back(fn_eps); delete plot2D; } } } joinMultipleEPSIntoSinglePDF(fn_out + "logfile.pdf", all_fn_eps); if (verb > 0 ) { progress_bar(fn_micrographs_all.size()); std::cout << " Done! Written out: " << fn_out << "micrographs_ctf.star and " << fn_out << "logfile.pdf" << std::endl; } if (do_use_gctf) { FileName fn_gctf_junk = "micrographs_all_gctf"; if (exists(fn_gctf_junk)) remove(fn_gctf_junk.c_str()); fn_gctf_junk = "extra_micrographs_all_gctf"; if (exists(fn_gctf_junk)) remove(fn_gctf_junk.c_str()); } } void CtffindRunner::executeGctf(long int imic, std::vector &allmicnames, bool is_last, int rank) { // Always add the new micrograph to the TODO list Image Itmp; FileName outputfile = getOutputFileWithNewUniqueDate(fn_micrographs_ctf[imic], fn_out); Itmp.read(outputfile, false); // false means only read header! if (XSIZE(Itmp()) != xdim || YSIZE(Itmp()) != ydim) REPORT_ERROR("CtffindRunner::executeGctf ERROR: Micrographs do not all have the same size! " + fn_micrographs_ctf[imic] + " is different from the first micrograph!"); if (ZSIZE(Itmp()) > 1 || NSIZE(Itmp()) > 1) REPORT_ERROR("CtffindRunner::executeGctf ERROR: No movies or volumes allowed for " + fn_micrographs_ctf[imic]); allmicnames.push_back(outputfile); // Execute Gctf every 20 images, and always for the last one if ( ((imic+1)%20) == 0 || is_last) { std::string command = fn_gctf_exe; //command += " --ctfstar " + fn_out + "tt_micrographs_ctf.star"; command += " --apix " + floatToString(angpix); command += " --cs " + floatToString(Cs); command += " --kV " + floatToString(Voltage); command += " --ac " + floatToString(AmplitudeConstrast); command += " --astm " + floatToString(amount_astigmatism); command += " --logsuffix _gctf.log"; if (!do_ignore_ctffind_params) { command += " --boxsize " + floatToString(box_size); command += " --resL " + floatToString(resol_min); command += " --resH " + floatToString(resol_max); command += " --defL " + floatToString(min_defocus); command += " --defH " + floatToString(max_defocus); command += " --defS " + floatToString(step_defocus); } if (do_phaseshift) { command += " --phase_shift_L " + floatToString(phase_min); command += " --phase_shift_H " + floatToString(phase_max); command += " --phase_shift_S " + floatToString(phase_step); } if (do_EPA) command += " --do_EPA "; if (do_validation) command += " --do_validation "; for (size_t i = 0; i> " + fn_out + "gctf" + integerToString(rank)+".err"; //std::cerr << " command= " << command << std::endl; int res = system(command.c_str()); // Re-set the allmicnames vector allmicnames.clear(); } } void CtffindRunner::executeCtffind3(long int imic) { FileName fn_mic = getOutputFileWithNewUniqueDate(fn_micrographs_ctf[imic], fn_out); FileName fn_root = fn_mic.withoutExtension(); FileName fn_script = fn_root + "_ctffind3.com"; FileName fn_log = fn_root + "_ctffind3.log"; FileName fn_ctf = fn_root + ".ctf"; FileName fn_mic_win; std::ofstream fh; fh.open((fn_script).c_str(), std::ios::out); if (!fh) REPORT_ERROR( (std::string)"CtffindRunner::execute_ctffind cannot create file: " + fn_script); // If given, then put a square window of ctf_win on the micrograph for CTF estimation if (ctf_win > 0) { // Window micrograph to a smaller, squared sub-micrograph to estimate CTF on fn_mic_win = fn_root + "_win.mrc"; // Read in micrograph, window and write out again Image I; I.read(fn_mic); I().setXmippOrigin(); I().window(FIRST_XMIPP_INDEX(ctf_win), FIRST_XMIPP_INDEX(ctf_win), LAST_XMIPP_INDEX(ctf_win), LAST_XMIPP_INDEX(ctf_win)); // Calculate mean, stddev, min and max RFLOAT avg, stddev, minval, maxval; I().computeStats(avg, stddev, minval, maxval); I.MDMainHeader.setValue(EMDL_IMAGE_STATS_MIN, minval); I.MDMainHeader.setValue(EMDL_IMAGE_STATS_MAX, maxval); I.MDMainHeader.setValue(EMDL_IMAGE_STATS_AVG, avg); I.MDMainHeader.setValue(EMDL_IMAGE_STATS_STDDEV, stddev); I.write(fn_mic_win); } else fn_mic_win = fn_mic; std::string ctffind4_options = (is_ctffind4) ? " --omp-num-threads " + integerToString(nr_threads) + " --old-school-input-ctffind4 " : ""; // Write script to run ctffind fh << "#!/usr/bin/env " << fn_shell << std::endl; fh << fn_ctffind_exe << ctffind4_options << " > " << fn_log << " << EOF"< 0) { if( remove( fn_mic_win.c_str() ) != 0 ) std::cerr << "WARNING: there was an error deleting windowed micrograph file " << fn_mic_win << std::endl; } } void CtffindRunner::executeCtffind4(long int imic) { FileName fn_mic = getOutputFileWithNewUniqueDate(fn_micrographs_ctf[imic], fn_out); FileName fn_root = fn_mic.withoutExtension(); FileName fn_script = fn_root + "_ctffind4.com"; FileName fn_log = fn_root + "_ctffind4.log"; FileName fn_ctf = fn_root + ".ctf"; FileName fn_mic_win; std::ofstream fh; fh.open((fn_script).c_str(), std::ios::out); if (!fh) REPORT_ERROR( (std::string)"CtffindRunner::execute_ctffind cannot create file: " + fn_script); // If given, then put a square window of ctf_win on the micrograph for CTF estimation if (ctf_win > 0) { // Window micrograph to a smaller, squared sub-micrograph to estimate CTF on fn_mic_win = fn_root + "_win.mrc"; // Read in micrograph, window and write out again Image I; I.read(fn_mic); I().setXmippOrigin(); I().window(FIRST_XMIPP_INDEX(ctf_win), FIRST_XMIPP_INDEX(ctf_win), LAST_XMIPP_INDEX(ctf_win), LAST_XMIPP_INDEX(ctf_win)); // Calculate mean, stddev, min and max RFLOAT avg, stddev, minval, maxval; I().computeStats(avg, stddev, minval, maxval); I.MDMainHeader.setValue(EMDL_IMAGE_STATS_MIN, minval); I.MDMainHeader.setValue(EMDL_IMAGE_STATS_MAX, maxval); I.MDMainHeader.setValue(EMDL_IMAGE_STATS_AVG, avg); I.MDMainHeader.setValue(EMDL_IMAGE_STATS_STDDEV, stddev); I.write(fn_mic_win); } else fn_mic_win = fn_mic; int ctf_boxsize = box_size; RFLOAT ctf_angpix = angpix; if (use_given_ps) { Image Ihead; Ihead.read(fn_mic_win, false); ctf_boxsize = XSIZE(Ihead()); ctf_angpix = Ihead.samplingRateX(); } //std::string ctffind4_options = " --omp-num-threads " + integerToString(nr_threads); std::string ctffind4_options = ""; if (use_given_ps) ctffind4_options += " --amplitude-spectrum-input"; // Write script to run ctffind fh << "#!/usr/bin/env " << fn_shell << std::endl; fh << fn_ctffind_exe << ctffind4_options << " > " << fn_log << " << EOF"< 0) { if( remove( fn_mic_win.c_str() ) != 0 ) std::cerr << "WARNING: there was an error deleting windowed micrograph file " << fn_mic_win << std::endl; } } bool CtffindRunner::getCtffindResults(FileName fn_microot, RFLOAT &defU, RFLOAT &defV, RFLOAT &defAng, RFLOAT &CC, RFLOAT &HT, RFLOAT &CS, RFLOAT &AmpCnst, RFLOAT &XMAG, RFLOAT &DStep, RFLOAT &maxres, RFLOAT &valscore, RFLOAT &phaseshift, bool do_warn) { if (is_ctffind4) { return getCtffind4Results(fn_microot, defU, defV, defAng, CC, HT, CS, AmpCnst, XMAG, DStep, maxres, phaseshift, do_warn); } else { return getCtffind3Results(fn_microot, defU, defV, defAng, CC, HT, CS, AmpCnst, XMAG, DStep, maxres, phaseshift, valscore, do_warn); } } bool CtffindRunner::getCtffind3Results(FileName fn_microot, RFLOAT &defU, RFLOAT &defV, RFLOAT &defAng, RFLOAT &CC, RFLOAT &HT, RFLOAT &CS, RFLOAT &AmpCnst, RFLOAT &XMAG, RFLOAT &DStep, RFLOAT &maxres, RFLOAT &phaseshift, RFLOAT &valscore, bool do_warn) { FileName fn_root = getOutputFileWithNewUniqueDate(fn_microot, fn_out); FileName fn_log = fn_root + "_ctffind3.log"; if (do_use_gctf) fn_log = fn_root + "_gctf.log"; std::ifstream in(fn_log.data(), std::ios_base::in); if (in.fail()) return false; // Start reading the ifstream at the top in.seekg(0); // Proceed until the next "Final values" statement // The loop statement may be necessary for data blocks that have a list AND a table inside them bool Final_is_found = false; bool Cs_is_found = false; std::string line; std::vector words; while (getline(in, line, '\n')) { // Find data_ lines if (line.find("CS[mm], HT[kV], AmpCnst, XMAG, DStep[um]") != std::string::npos || line.find("CS[mm], HT[kV], ac, XMAG, DStep[um]") != std::string::npos) // GCTF 1.18 B1 changed the line... { getline(in, line, '\n'); tokenize(line, words); if (words.size() == 5) { Cs_is_found = true; CS = textToFloat(words[0]); HT = textToFloat(words[1]); AmpCnst = textToFloat(words[2]); XMAG = textToFloat(words[3]); DStep = textToFloat(words[4]); } } int nr_exp_cols = (do_phaseshift) ? 7 : 6; if (line.find("Final Values") != std::string::npos) { tokenize(line, words); if (words.size() == nr_exp_cols) { Final_is_found = true; defU = textToFloat(words[0]); defV = textToFloat(words[1]); defAng = textToFloat(words[2]); if (do_use_gctf && do_phaseshift) { phaseshift = textToFloat(words[3]); CC = textToFloat(words[4]); } else CC = textToFloat(words[3]); } } if (do_use_gctf) { if (line.find("Resolution limit estimated by EPA:") != std::string::npos) { tokenize(line, words); maxres = textToFloat(words[words.size()-1]); } if (line.find("OVERALL_VALIDATION_SCORE:") != std::string::npos) { tokenize(line, words); valscore = textToFloat(words[words.size()-1]); } } } if (!Cs_is_found) { if (do_warn) std::cerr << "WARNING: cannot find line with Cs[mm], HT[kV], etc values in " << fn_log << std::endl; return false; } if (!Final_is_found) { if (do_warn) std::cerr << "WARNING: cannot find line with Final values in " << fn_log << std::endl; return false; } in.close(); return Final_is_found; } bool CtffindRunner::getCtffind4Results(FileName fn_microot, RFLOAT &defU, RFLOAT &defV, RFLOAT &defAng, RFLOAT &CC, RFLOAT &HT, RFLOAT &CS, RFLOAT &AmpCnst, RFLOAT &XMAG, RFLOAT &DStep, RFLOAT &maxres, RFLOAT &phaseshift, bool do_warn) { FileName fn_root = getOutputFileWithNewUniqueDate(fn_microot, fn_out); FileName fn_log = fn_root + "_ctffind4.log"; std::ifstream in(fn_log.data(), std::ios_base::in); if (in.fail()) return false; // Start reading the ifstream at the top in.seekg(0); std::string line; std::vector words; bool found_log = false; while (getline(in, line, '\n')) { // Find the file with the summary of the results if (line.find("Summary of results") != std::string::npos) { tokenize(line, words); fn_log = words[words.size() - 1]; found_log = true; break; } } in.close(); if (!found_log) return false; // Now open the file with the summry of the results std::ifstream in2(fn_log.data(), std::ios_base::in); if (in2.fail()) return false; bool Final_is_found = false; bool Cs_is_found = false; while (getline(in2, line, '\n')) { // Find data_ lines if (line.find("acceleration voltage:") != std::string::npos) { Cs_is_found = true; tokenize(line, words); if (words.size() < 19) REPORT_ERROR("ERROR: Unexpected number of words on data line with acceleration voltage in " + fn_log); CS = textToFloat(words[13]); HT = textToFloat(words[8]); AmpCnst = textToFloat(words[18]); DStep = textToFloat(words[3]); XMAG = 10000.; } else if (line.find("Columns: ") != std::string::npos) { getline(in2, line, '\n'); tokenize(line, words); if (words.size() < 7) REPORT_ERROR("ERROR: Unexpected number of words on data line below Columns line in " + fn_log); Final_is_found = true; defU = textToFloat(words[1]); defV = textToFloat(words[2]); defAng = textToFloat(words[3]); if (do_phaseshift) phaseshift = RAD2DEG(textToFloat(words[4])); CC = textToFloat(words[5]); if (words[6] == "inf") maxres= 999.; else maxres = textToFloat(words[6]); } } if (!Cs_is_found) { if (do_warn) std::cerr << " WARNING: cannot find line with acceleration voltage etc in " << fn_log << std::endl; return false; } if (!Final_is_found) { if (do_warn) std::cerr << "WARNING: cannot find line with Final values in " << fn_log << std::endl; return false; } in2.close(); return Final_is_found; } relion-3.1.3/src/ctffind_runner.h000066400000000000000000000132571411340063500167610ustar00rootroot00000000000000/*************************************************************************** * * Author: "Sjors H.W. Scheres" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #ifndef CTFFIND_RUNNER_H_ #define CTFFIND_RUNNER_H_ #include #include #include #include #include #include #include "src/metadata_table.h" #include "src/image.h" #include #include "src/jaz/obs_model.h" class CtffindRunner { public: // I/O Parser IOParser parser; // Verbosity int verb; // Output rootname FileName fn_in, fn_out; // Estimate CTFs from rlnMicrographNameWithoutDoseWeighting instead of rlnMicrographName? bool do_use_without_doseweighting; // Filenames of all the micrographs to estimate the CTF from std::vector fn_micrographs, fn_micrographs_ctf, fn_micrographs_all, fn_micrographs_ctf_all; // Optics groups for all micrographs std::vector optics_group_micrographs, optics_group_micrographs_all; // Information about the optics groups ObservationModel obsModel; // Dimension of squared area of the micrograph to use for CTF estimation int ctf_win; // CTFFIND and Gctf executables and shell FileName fn_ctffind_exe, fn_gctf_exe, fn_shell; // Is this ctffind4? bool is_ctffind4; // Number of OMP threads for CTFFIND4 int nr_threads; // Use pre-calculated power spectra bool use_given_ps; // Calculate Thon rings from movies? bool do_movie_thon_rings; // Movie rootname FileName movie_rootname; // Number of movie frames to average int avg_movie_frames; // Estimate phaseshift from a phase-plate? bool do_phaseshift; // Min, max and step phase-shift RFLOAT phase_min, phase_max, phase_step; // use Kai Zhang's Gctf instead of CTFFIND? bool do_use_gctf; // When using Gctf, ignore CTFFIND parameters and use Gctf defaults instead? bool do_ignore_ctffind_params; // When using Gctf, use equi-phase averaging? bool do_EPA; // Additional gctf command line options std::string additional_gctf_options; // When using Gctf, do validation test? bool do_validation; // Continue an old run: only estimate CTF if logfile WITH Final Values line does not yet exist, otherwise skip the micrograph bool continue_old; // Process at most this number of unprocessed micrographs long do_at_most; ////// CTFFIND parameters // Size of the box to calculate FFTw RFLOAT box_size; // Minimum and maximum resolution (in A) to be taken into account RFLOAT resol_min, resol_max; // Defocus search parameters (in A, positive is underfocus) RFLOAT min_defocus, max_defocus, step_defocus; // Amount of astigmatism (in A) RFLOAT amount_astigmatism; // Voltage (kV) RFLOAT Voltage; // Spherical aberration RFLOAT Cs; // Amplitude contrast (e.g. 0.07) RFLOAT AmplitudeConstrast; // Magnification RFLOAT Magnification; // For Gctf: directly provide angpix! RFLOAT angpix; // Flag to only join results into a star file bool do_only_join_results; // Micrograph size (for Gctf check that all are equal size) int xdim, ydim; // Current working directory to make absolute-path symlinks std::string currdir; // Disable "Slower, more exhaustive search?" in CTFFIND 4.1.5- bool do_fast_search; // Which GPU devices to use? std::string gpu_ids; std::vector < std::vector < std::string > > allThreadIDs; int devCount; public: // Read command line arguments void read(int argc, char **argv, int rank = 0); // Print usage instructions void usage(); // Initialise some stuff after reading void initialise(); // Execute all CTFFIND jobs to get CTF parameters void run(); // Harvest all CTFFIND results into a single STAR file void joinCtffindResults(); // Execute CTFFIND for a single micrograph void executeCtffind3(long int imic); // Execute CTFFIND4.1+ for a single micrograph void executeCtffind4(long int imic); // Check micrograph size and add name to the list of micrographs to run Gctf on //void addToGctfJobList(long int imic, std::vector &allmicnames); // Execute Gctf for many micrographs //void executeGctf( std::vector &allmicnames); void executeGctf(long int imic, std::vector &allmicnames, bool is_last, int rank = 0); // Get micrograph metadata bool getCtffindResults(FileName fn_mic, RFLOAT &defU, RFLOAT &defV, RFLOAT &defAng, RFLOAT &CC, RFLOAT &HT, RFLOAT &CS, RFLOAT &AmpCnst, RFLOAT &XMAG, RFLOAT &DStep, RFLOAT &maxres, RFLOAT &valscore, RFLOAT &phaseshift, bool do_warn = true); bool getCtffind3Results(FileName fn_mic, RFLOAT &defU, RFLOAT &defV, RFLOAT &defAng, RFLOAT &CC, RFLOAT &HT, RFLOAT &CS, RFLOAT &AmpCnst, RFLOAT &XMAG, RFLOAT &DStep, RFLOAT &maxres, RFLOAT &phaseshift, RFLOAT &valscore, bool do_warn = true); bool getCtffind4Results(FileName fn_mic, RFLOAT &defU, RFLOAT &defV, RFLOAT &defAng, RFLOAT &CC, RFLOAT &HT, RFLOAT &CS, RFLOAT &AmpCnst, RFLOAT &XMAG, RFLOAT &DStep, RFLOAT &maxres, RFLOAT &phaseshift, bool do_warn = true); }; #endif /* CTFFIND_RUNNER_H_ */ relion-3.1.3/src/ctffind_runner_mpi.cpp000066400000000000000000000064571411340063500201650ustar00rootroot00000000000000/*************************************************************************** * * Author: "Sjors H.W. Scheres" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #include "src/ctffind_runner_mpi.h" void CtffindRunnerMpi::read(int argc, char **argv) { // Define a new MpiNode node = new MpiNode(argc, argv); // First read in non-parallelisation-dependent variables CtffindRunner::read(argc, argv); // Don't put any output to screen for mpi followers verb = (node->isLeader()) ? 1 : 0; // Possibly also read parallelisation-dependent variables here // Print out MPI info printMpiNodesMachineNames(*node); } void CtffindRunnerMpi::run() { if (!do_only_join_results) { // Each node does part of the work long int my_first_micrograph, my_last_micrograph, my_nr_micrographs; divide_equally(fn_micrographs.size(), node->size, node->rank, my_first_micrograph, my_last_micrograph); my_nr_micrographs = my_last_micrograph - my_first_micrograph + 1; int barstep; if (verb > 0) { if (do_use_gctf) std::cout << " Estimating CTF parameters using Kai Zhang's Gctf ..." << std::endl; else std::cout << " Estimating CTF parameters using Niko Grigorieff's CTFFIND ..." << std::endl; init_progress_bar(my_nr_micrographs); barstep = XMIPP_MAX(1, my_nr_micrographs / 60); } std::vector allmicnames; for (long int imic = my_first_micrograph; imic <= my_last_micrograph; imic++) { // Abort through the pipeline_control system if (pipeline_control_check_abort_job()) MPI_Abort(MPI_COMM_WORLD, RELION_EXIT_ABORTED); // Get angpix and voltage from the optics groups: obsModel.opticsMdt.getValue(EMDL_CTF_CS, Cs, optics_group_micrographs[imic]-1); obsModel.opticsMdt.getValue(EMDL_CTF_VOLTAGE, Voltage, optics_group_micrographs[imic]-1); obsModel.opticsMdt.getValue(EMDL_CTF_Q0, AmplitudeConstrast, optics_group_micrographs[imic]-1); obsModel.opticsMdt.getValue(EMDL_MICROGRAPH_PIXEL_SIZE, angpix, optics_group_micrographs[imic]-1); if (do_use_gctf) { //addToGctfJobList(imic, allmicnames); executeGctf(imic, allmicnames, imic == my_last_micrograph, node->rank); } else if (is_ctffind4) { executeCtffind4(imic); } else { executeCtffind3(imic); } if (verb > 0 && imic % barstep == 0) progress_bar(imic); } //if (do_use_gctf && allmicnames.size() > 0) // executeGctf(allmicnames); if (verb > 0) progress_bar(my_nr_micrographs); } MPI_Barrier(MPI_COMM_WORLD); // Only the leader writes the joined result file if (node->isLeader()) { joinCtffindResults(); } } relion-3.1.3/src/ctffind_runner_mpi.h000066400000000000000000000026771411340063500176320ustar00rootroot00000000000000/*************************************************************************** * * Author: "Sjors H.W. Scheres" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #ifndef CTFFIND_RUNNER_MPI_H_ #define CTFFIND_RUNNER_MPI_H_ #include "src/mpi.h" #include "src/ctffind_runner.h" #include "src/parallel.h" class CtffindRunnerMpi: public CtffindRunner { private: MpiNode *node; public: /** Destructor, calls MPI_Finalize */ ~CtffindRunnerMpi() { delete node; } /** Read * This could take care of mpi-parallelisation-dependent variables */ void read(int argc, char **argv); // Parallelized run function void run(); }; #endif /* CTFFIND_RUNNER_MPI_H_ */ relion-3.1.3/src/displayer.cpp000066400000000000000000002712401411340063500163000ustar00rootroot00000000000000/*************************************************************************** * * Author: "Sjors H.W. Scheres" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #include "src/displayer.h" //#define DEBUG // #ifdef HAVE_PNG #include #endif const Fl_Menu_Item color_choices[] = { // text, shortcut, callback, user_data, flags, type, font, size, color {"Red (1)", 0, (Fl_Callback*)0, (void*)1, 0, 0, 0, 0, FL_RED}, {"Green (2)", 0, (Fl_Callback*)0, (void*)2, 0, 0, 0, 0, FL_GREEN}, {"Blue (3)", 0, (Fl_Callback*)0, (void*)3, 0, 0, 0, 0, FL_BLUE}, {"Cyan (4)", 0, (Fl_Callback*)0, (void*)4, 0, 0, 0, 0, FL_CYAN}, {"Magenta (5)", 0, (Fl_Callback*)0, (void*)5, 0, 0, 0, 0, FL_MAGENTA}, {"Yellow (6)", 0, (Fl_Callback*)0, (void*)6, 0, 0, 0, 0, FL_YELLOW}, {0} // sentinel }; const int NUM_COLORS = 6; /************************************************************************/ void DisplayBox::draw() { if (!img_data) return; short xpos = x() + xoff; short ypos = y() + yoff; /* Ensure that the full window is redrawn */ //fl_push_clip(x(),y(),w(),h()); /* Redraw the whole image */ int depth = (colour_scheme) ? 3 : 1; fl_draw_image((const uchar *)img_data, xpos, ypos, (short)xsize_data, (short)ysize_data, depth); if (img_label != "") { fl_color(FL_WHITE); fl_draw(img_label.c_str(), xpos, ypos + fl_height()); } /* Draw a red rectangle around the particle if it is selected */ if (selected >= 1 && selected <= 6) fl_color(color_choices[selected - 1].labelcolor_); else fl_color(FL_BLACK); fl_line_style(FL_SOLID, 2); int x1 = xpos; int y1 = ypos; int x2 = xpos + xsize_data; int y2 = ypos + ysize_data; fl_line(x1, y1, x1, y2); fl_line(x1, y2, x2, y2); fl_line(x2, y2, x2, y1); fl_line(x2, y1, x1, y1); //fl_pop_clip(); } unsigned char rgbToGrey(const unsigned char red, const unsigned char green, const unsigned char blue) { switch (colour_scheme) { case (BLACKGREYREDSCALE): { if (red == 255) return FLOOR((RFLOAT)(255. - blue/2.)); else return FLOOR((RFLOAT)(red/2.)); break; } case (BLUEGREYWHITESCALE): { if (red == 0) return FLOOR((RFLOAT)(255.-blue)/2.); else return FLOOR((RFLOAT)(red/2. + 128.)); break; } case (BLUEGREYREDSCALE): { unsigned char Y; int X; if (red == 0) { Y = 255-blue; X = 0; } else if (red == 255) { Y = 255-blue; X = 2; } else { Y = blue; X = 1; } return CEIL(85*((RFLOAT)Y/256. + X)); break; } case (RAINBOWSCALE): { unsigned char Y; int X; if (red > 0) { if (red == 255) {Y = green; X = 0;} else {Y = 255-red; X = 1;} } else if (green > 0) { if (green == 255) {Y = blue; X = 2;} else {Y = 255 - green; X = 3;} } else {Y = 255; X = 4;} return 255 - CEIL(64*((RFLOAT)Y/255. + X)); break; } case (CYANBLACKYELLOWSCALE): { if (red >0) { if (red < 255) return (unsigned char)FLOOR((RFLOAT)red / 3. + 128); else return (unsigned char)FLOOR((RFLOAT)green/3. + 42 + 128); } else { if (blue < 255) return (unsigned char)FLOOR((RFLOAT)-blue / 3. + 128); else return (unsigned char)FLOOR(-((RFLOAT)green)/3. - 42 + 128); } break; } } REPORT_ERROR("Logic error: should not happen"); return 0; } void DisplayBox::setData(MultidimArray &img, MetaDataContainer *MDCin, int _ipos, RFLOAT _minval, RFLOAT _maxval, RFLOAT _scale, bool do_relion_scale) { scale = _scale; minval = _minval; maxval = _maxval; ipos = _ipos; selected = NOTSELECTED; // Set its own MetaDataTable MDimg.setIsList(true); MDimg.addObject(MDCin); // For volumes only show the central slice if (ZSIZE(img) > 1) { MultidimArray slice; img.getSlice(ZSIZE(img)/2, slice); img=slice; } // create array for the scaled image data xsize_data = CEIL(XSIZE(img) * scale); ysize_data = CEIL(YSIZE(img) * scale); xoff = (xsize_data < w() ) ? (w() - xsize_data) / 2 : 0; yoff = (ysize_data < h() ) ? (h() - ysize_data) / 2 : 0; if (colour_scheme == GREYSCALE) { img_data = new unsigned char [xsize_data * ysize_data]; } else { img_data = new unsigned char [3 * xsize_data * ysize_data]; } RFLOAT range = maxval - minval; RFLOAT step = range / 255; // 8-bit scaling range from 0 to 255 RFLOAT* old_ptr=NULL; long int n; // For micrographs use relion-scaling to avoid bias in down-sampled positions // For multi-image viewers, do not use this scaling as it is slower... if (do_relion_scale && ABS(scale - 1.0) > 0.01) selfScaleToSize(img, xsize_data, ysize_data); // Use the same nearest-neighbor algorithm as in the copy function of Fl_Image... if (ABS(scale - 1.0) > 0.01 && !do_relion_scale) { int xmod = XSIZE(img) % xsize_data; int xstep = XSIZE(img) / xsize_data; int ymod = YSIZE(img) % ysize_data; int ystep = YSIZE(img) / ysize_data; int line_d = XSIZE(img); int dx, dy, sy, xerr, yerr; if (colour_scheme == GREYSCALE) { for (dy = ysize_data, sy = 0, yerr = ysize_data, n = 0; dy > 0; dy --) { for (dx = xsize_data, xerr = xsize_data, old_ptr = img.data + sy * line_d; dx > 0; dx --, n++) { img_data[n] = (char)FLOOR((*old_ptr - minval) / step); old_ptr += xstep; xerr -= xmod; if (xerr <= 0) { xerr += xsize_data; old_ptr += 1; } } sy += ystep; yerr -= ymod; if (yerr <= 0) { yerr += ysize_data; sy ++; } } } else { for (dy = ysize_data, sy = 0, yerr = ysize_data, n = 0; dy > 0; dy --) { for (dx = xsize_data, xerr = xsize_data, old_ptr = img.data + sy * line_d; dx > 0; dx --, n++) { unsigned char val = FLOOR((*old_ptr - minval) / step); greyToRGB(colour_scheme, val, img_data[3*n], img_data[3*n+1], img_data[3*n+2]); old_ptr += xstep; xerr -= xmod; if (xerr <= 0) { xerr += xsize_data; old_ptr += 1; } } sy += ystep; yerr -= ymod; if (yerr <= 0) { yerr += ysize_data; sy ++; } } } } else { if (colour_scheme == GREYSCALE) { FOR_ALL_DIRECT_ELEMENTS_IN_MULTIDIMARRAY_ptr(img, n, old_ptr) { img_data[n] = FLOOR((*old_ptr - minval) / step); } } else { FOR_ALL_DIRECT_ELEMENTS_IN_MULTIDIMARRAY_ptr(img, n, old_ptr) { unsigned char val = FLOOR((*old_ptr - minval) / step); greyToRGB(colour_scheme, val, img_data[3*n], img_data[3*n+1], img_data[3*n+2]); } } } } int DisplayBox::toggleSelect(int set_selected) { if (selected > 0) selected = 0; else if (selected == 0) selected = set_selected; redraw(); return selected; } void DisplayBox::setSelect(int value) { selected = value; redraw(); } int DisplayBox::select() { selected = SELECTED; redraw(); return selected; } int DisplayBox::unSelect() { selected = NOTSELECTED; redraw(); return selected; } int basisViewerWindow::fillCanvas(int viewer_type, MetaDataTable &MDin, ObservationModel *obsModel, EMDLabel display_label, EMDLabel text_label, bool _do_read_whole_stacks, bool _do_apply_orient, RFLOAT _minval, RFLOAT _maxval, RFLOAT _sigma_contrast, RFLOAT _scale, RFLOAT _ori_scale, int _ncol, long int max_nr_images, RFLOAT lowpass, RFLOAT highpass, bool _do_class, MetaDataTable *_MDdata, int _nr_regroup, bool _do_recenter, bool _is_data, MetaDataTable *_MDgroups, bool do_allow_save, FileName fn_selected_imgs, FileName fn_selected_parts, int max_nr_parts_per_class) { // Scroll bars Fl_Scroll scroll(0, 0, w(), h()); // Pre-set the canvas to the correct size FileName fn_img; Image img; MDin.firstObject(); MDin.getValue(display_label, fn_img); img.read(fn_img, false); int nimgs = MDin.numberOfObjects(); if (viewer_type == MULTIVIEWER) { int xsize_canvas = _ncol * (CEIL(XSIZE(img())*_scale) + BOX_OFFSET); int nrow = CEIL((RFLOAT)nimgs/_ncol); int ysize_canvas = nrow * (CEIL(YSIZE(img())*_scale) + BOX_OFFSET); multiViewerCanvas canvas(0, 0, xsize_canvas, ysize_canvas); canvas.multi_max_nr_images = max_nr_images; canvas.SetScroll(&scroll); canvas.do_read_whole_stacks = _do_read_whole_stacks; canvas.is_data = _is_data; canvas.ori_scale = _ori_scale; canvas.display_label = display_label; canvas.sigma_contrast = _sigma_contrast; canvas.minval = _minval; canvas.maxval = _maxval; canvas.do_allow_save = do_allow_save; canvas.fn_selected_imgs= fn_selected_imgs; canvas.fn_selected_parts = fn_selected_parts; canvas.max_nr_parts_per_class = max_nr_parts_per_class; canvas.fill(MDin, obsModel, display_label, text_label, _do_apply_orient, _minval, _maxval, _sigma_contrast, _scale, _ncol, _do_recenter, max_nr_images, lowpass, highpass); canvas.nr_regroups = _nr_regroup; canvas.do_recenter = _do_recenter; canvas.do_apply_orient = _do_apply_orient; canvas.obsModel = obsModel; canvas.text_label = text_label; canvas.metadata_table_name = MDin.getName(); if (canvas.nr_regroups > 0) canvas.MDgroups = _MDgroups; if (_do_class) { canvas.do_class = true; canvas.MDdata = _MDdata; } else { canvas.do_class = false; } // Pre-load existing backup_selection.star file FileName fn_sel, fn_dir="."; if (fn_selected_imgs != "") fn_dir = fn_selected_imgs.beforeLastOf("/"); else if (fn_selected_parts != "") fn_dir = fn_selected_parts.beforeLastOf("/"); fn_dir += "/backup_selection.star"; if (exists(fn_dir)) canvas.loadBackupSelection(false); // false means dont ask for filename resizable(*this); show(); return Fl::run(); } else if (viewer_type == SINGLEVIEWER) { if (nimgs>1) REPORT_ERROR("ERROR: trying to launch a singleViewerCanvas with multiple images..."); int xsize_canvas = CEIL(XSIZE(img())*_scale); int ysize_canvas = CEIL(YSIZE(img())*_scale); singleViewerCanvas canvas(0, 0, xsize_canvas, ysize_canvas); canvas.SetScroll(&scroll); canvas.fill(MDin, obsModel, display_label, text_label, _do_apply_orient, _minval, _maxval, _sigma_contrast, _scale, 1); canvas.do_read_whole_stacks = false; resizable(*this); show(); return Fl::run(); } REPORT_ERROR("Logic error: should not come here"); return -1; } int basisViewerWindow::fillPickerViewerCanvas(MultidimArray image, RFLOAT _minval, RFLOAT _maxval, RFLOAT _sigma_contrast, RFLOAT _scale, RFLOAT _coord_scale, int _particle_radius, bool _do_startend, FileName _fn_coords, FileName _fn_color, FileName _fn_mic, FileName _color_label, RFLOAT _color_blue_value, RFLOAT _color_red_value) { current_selection_type = 2; // Green // Scroll bars Fl_Scroll scroll(0, 0, w(), h()); int xsize_canvas = CEIL(XSIZE(image)*_scale); int ysize_canvas = CEIL(YSIZE(image)*_scale); pickerViewerCanvas canvas(0, 0, xsize_canvas, ysize_canvas); canvas.particle_radius = _particle_radius; canvas.do_startend = _do_startend; canvas.coord_scale = _coord_scale; canvas.SetScroll(&scroll); canvas.fill(image, _minval, _maxval, _sigma_contrast, _scale); canvas.fn_coords = _fn_coords; canvas.fn_color = _fn_color; canvas.fn_mic = _fn_mic; canvas.color_label = EMDL::str2Label(_color_label); canvas.smallest_color_value = XMIPP_MIN(_color_blue_value, _color_red_value); canvas.biggest_color_value = XMIPP_MAX(_color_blue_value, _color_red_value); canvas.do_blue_to_red = (_color_blue_value < _color_red_value); canvas.do_read_whole_stacks = false; if (_fn_coords != "" && exists(_fn_coords)) { canvas.loadCoordinates(false); canvas.redraw(); } resizable(*this); show(); return Fl::run(); } int basisViewerWindow::fillSingleViewerCanvas(MultidimArray image, RFLOAT _minval, RFLOAT _maxval, RFLOAT _sigma_contrast, RFLOAT _scale) { // Scroll bars Fl_Scroll scroll(0, 0, w(), h()); // Pre-set the canvas to the correct size int xsize_canvas = CEIL(XSIZE(image)*_scale); int ysize_canvas = CEIL(YSIZE(image)*_scale); singleViewerCanvas canvas(0, 0, xsize_canvas, ysize_canvas); canvas.SetScroll(&scroll); canvas.fill(image, _minval, _maxval, _sigma_contrast, _scale); canvas.do_read_whole_stacks = false; resizable(*this); show(); return Fl::run(); } void basisViewerCanvas::fill(MetaDataTable &MDin, ObservationModel *obsModel, EMDLabel display_label, EMDLabel text_label, bool _do_apply_orient, RFLOAT _minval, RFLOAT _maxval, RFLOAT _sigma_contrast, RFLOAT _scale, int _ncol, bool _do_recenter, long int max_images, RFLOAT lowpass, RFLOAT highpass) { ncol = _ncol; int nr_imgs = MDin.numberOfObjects(); if (nr_imgs > 1) { xoff = BOX_OFFSET/2; yoff = BOX_OFFSET/2; } else { xoff = 0; yoff = 0; } int barstep; if (nr_imgs > 1) { std::cout << "Reading in all images..." << std::endl; init_progress_bar(nr_imgs); barstep = XMIPP_MAX(1, nr_imgs/ 60); } nrow = 0; long int ipos = 0; int irow = 0; int icol = 0; FileName fn_my_stack, fn_next_stack, fn_img, fn_tmp; long int my_number, my_next_number, my_stack_first_ipos = 0; std::vector numbers_in_stack; long int number_of_images = MDin.numberOfObjects(); if (max_images > 0 && max_images < number_of_images) number_of_images = max_images; boxes.clear(); boxes.resize(number_of_images); FOR_ALL_OBJECTS_IN_METADATA_TABLE(MDin) { // Read in image stacks as a whole, i.e. don't re-open and close stack for every individual image to save speed MDin.getValue(display_label, fn_img, ipos); fn_img.decompose(my_number, fn_my_stack); // See whether the next image has the same stackname.... if (ipos+1 < number_of_images) { MDin.getValue(display_label, fn_tmp, ipos+1); fn_tmp.decompose(my_next_number, fn_next_stack); } else fn_next_stack = ""; numbers_in_stack.push_back(my_number - 1); // start counting at 0! // If next stack is a different one, read the current stack and process all images in it if (fn_next_stack != fn_my_stack) { Image stack, img; fImageHandler hFile; if (do_read_whole_stacks) // Read the entire stack into memory stack.read(fn_my_stack); else // Open the stack file hFile.openFile(fn_my_stack); // 1. Process the current stack for (long int inum = 0; inum < numbers_in_stack.size(); inum++) { // Get the image we want from the stack if (do_read_whole_stacks) stack().getImage(numbers_in_stack[inum], img()); else img.readFromOpenFile(fn_my_stack, hFile, numbers_in_stack[inum]); long int my_ipos = my_stack_first_ipos + inum; bool have_optics_group = false; RFLOAT angpix = 0.; if (_do_apply_orient || lowpass > 0. || highpass > 0.) { if (MDin.containsLabel(EMDL_IMAGE_OPTICS_GROUP)) { int optics_group; MDin.getValue(EMDL_IMAGE_OPTICS_GROUP, optics_group, my_ipos); optics_group--; obsModel->opticsMdt.getValue(EMDL_IMAGE_PIXEL_SIZE, angpix, optics_group); have_optics_group = true; } } if (_do_apply_orient && have_optics_group) { RFLOAT psi,rot,tilt; Matrix1D offset(3); Matrix2D A; MDin.getValue(EMDL_ORIENT_PSI, psi, my_ipos); MDin.getValue(EMDL_ORIENT_ORIGIN_X_ANGSTROM, XX(offset), my_ipos); MDin.getValue(EMDL_ORIENT_ORIGIN_Y_ANGSTROM, YY(offset), my_ipos); if(img().getDim()==2) { offset /= angpix; rotation2DMatrix(psi, A); MAT_ELEM(A, 0, 2) = COSD(psi) * XX(offset) - SIND(psi) * YY(offset); MAT_ELEM(A, 1, 2) = COSD(psi) * YY(offset) + SIND(psi) * XX(offset); selfApplyGeometry(img(), A, IS_NOT_INV, DONT_WRAP); } else { MDin.getValue(EMDL_ORIENT_ROT, rot, my_ipos); MDin.getValue(EMDL_ORIENT_TILT, tilt, my_ipos); MDin.getValue(EMDL_ORIENT_ORIGIN_Z_ANGSTROM, ZZ(offset), my_ipos); offset /= angpix; Euler_rotation3DMatrix(rot,tilt,psi, A); MAT_ELEM(A, 0, 3) = MAT_ELEM(A, 0, 0) * XX(offset) + MAT_ELEM(A, 0, 1) * YY(offset) + MAT_ELEM(A, 0, 2) * ZZ(offset); MAT_ELEM(A, 1, 3) = MAT_ELEM(A, 1, 0) * XX(offset) + MAT_ELEM(A, 1, 1) * YY(offset) + MAT_ELEM(A, 1, 2) * ZZ(offset); MAT_ELEM(A, 2, 3) = MAT_ELEM(A, 2, 0) * XX(offset) + MAT_ELEM(A, 2, 1) * YY(offset) + MAT_ELEM(A, 2, 2) * ZZ(offset); selfApplyGeometry(img(), A, IS_NOT_INV, DONT_WRAP); } } else if(_do_apply_orient && MDin.containsLabel(EMDL_MLMODEL_IS_HELIX) && img().getDim()==3) { RFLOAT psi,rot,tilt; Matrix2D A; Euler_rotation3DMatrix(0,90,0, A); MAT_ELEM(A, 0, 3) = MAT_ELEM(A, 0, 0) + MAT_ELEM(A, 0, 1) + MAT_ELEM(A, 0, 2) ; MAT_ELEM(A, 1, 3) = MAT_ELEM(A, 1, 0) + MAT_ELEM(A, 1, 1) + MAT_ELEM(A, 1, 2) ; MAT_ELEM(A, 2, 3) = MAT_ELEM(A, 2, 0) + MAT_ELEM(A, 2, 1) + MAT_ELEM(A, 2, 2) ; selfApplyGeometry(img(), A, IS_NOT_INV, DONT_WRAP); } if (_do_recenter) { selfTranslateCenterOfMassToCenter(img()); } if (lowpass > 0. && have_optics_group) lowPassFilterMap(img(), lowpass, angpix); if (highpass > 0. && have_optics_group) highPassFilterMap(img(), highpass, angpix); // Dont change the user-provided _minval and _maxval in the getImageContrast routine! RFLOAT myminval = _minval; RFLOAT mymaxval = _maxval; getImageContrast(img(), myminval, mymaxval, _sigma_contrast); long int my_sorted_ipos = my_ipos; if (MDin.containsLabel(EMDL_SORTED_IDX)) { // First get the sorted index MDin.getValue(EMDL_SORTED_IDX, my_sorted_ipos, my_ipos); // Then set the original index in the sorted index, so that particles can be written out in the correct order MDin.setValue(EMDL_SORTED_IDX, my_ipos, my_ipos); } icol = my_sorted_ipos % ncol; irow = my_sorted_ipos / ncol; nrow = XMIPP_MAX(nrow, irow+1); if (my_ipos == 0) { xsize_box = CEIL(_scale * XSIZE(img())) + 2 * xoff; // 2 pixels on each side in between all images ysize_box = CEIL(_scale * YSIZE(img())) + 2 * yoff; } int ycoor = irow * ysize_box; int xcoor = icol * xsize_box; DisplayBox* my_box = new DisplayBox(xcoor, ycoor, xsize_box, ysize_box, ""); my_box->setData(img(), MDin.getObject(my_ipos), my_ipos, myminval, mymaxval, _scale, false); if (MDin.containsLabel(text_label)) { MDin.getValueToString(text_label, my_box->img_label, my_ipos); } my_box->redraw(); boxes[my_sorted_ipos] = my_box;//boxes.push_back(my_box); } // 2. Reset numbers_in_stack and my_stack_first_ipos for next stack numbers_in_stack.clear(); my_stack_first_ipos = ipos + 1; } ipos++; if (ipos >= number_of_images) break; if (nr_imgs > 1 && ipos % barstep == 0) progress_bar(ipos); } if (nr_imgs > 1) progress_bar(nr_imgs); } void basisViewerCanvas::fill(MultidimArray &image, RFLOAT _minval, RFLOAT _maxval, RFLOAT _sigma_contrast, RFLOAT _scale) { xoff = yoff = 0; nrow = ncol = 1; getImageContrast(image, _minval, _maxval, _sigma_contrast); xsize_box = CEIL(_scale * XSIZE(image)); ysize_box = CEIL(_scale * YSIZE(image)); DisplayBox* my_box = new DisplayBox(0, 0, xsize_box, ysize_box, "dummy"); MetaDataTable MDtmp; MDtmp.addObject(); //FileName fn_tmp = "dummy"; //MDtmp.setValue(EMDL_IMAGE_NAME, fn_tmp); my_box->setData(image, MDtmp.getObject(), 0, _minval, _maxval, _scale, true); my_box->redraw(); boxes.push_back(my_box); } void basisViewerCanvas::draw() { for (int ipos = 0 ; ipos < boxes.size(); ipos++) boxes[ipos]->redraw(); } int multiViewerCanvas::handle(int ev) { if (ev==FL_PUSH) { int xc = (int)Fl::event_x() - scroll->x() + scroll->hscrollbar.value(); int yc = (int)Fl::event_y() - scroll->y() + scroll->scrollbar.value(); int xpos = xc / xsize_box; int ypos = yc / ysize_box; int ipos = ypos * ncol + xpos; // Check there was no click in the area outside the boxes... if (xpos < ncol && ypos < nrow && ipos < boxes.size()) { if (Fl::event_button() == FL_LEFT_MOUSE) { // Shift-left-click will select a whole range if (Fl::event_state(FL_SHIFT)) { if (has_shift) { int postshift_ipos = ipos; int ipos0 = (postshift_ipos > preshift_ipos) ? preshift_ipos : postshift_ipos; int iposF = (postshift_ipos > preshift_ipos) ? postshift_ipos : preshift_ipos; // Select all images from ipos0 to iposF // TODO!!! Cannot do this here: have to define an event for the multiview window as a whole! // This multiview window should have all the DisplayBoxes inside it.... for (int my_ipos = ipos0; my_ipos <= iposF; my_ipos++) { boxes[my_ipos]->select(); } has_shift = false; } else { preshift_ipos = ipos; has_shift = true; } } else { boxes[ipos]->toggleSelect(current_selection_type); } } else if ( Fl::event_button() == FL_RIGHT_MOUSE ) { Fl_Menu_Item rclick_menu; if (do_class) { Fl_Menu_Item rclick_menu[] = { { "Save backup selection" }, { "Load backup selection" }, { "Clear selection" }, { "Invert selection" }, { "Select all classes below" }, { "Select all classes above" }, { "Show metadata this class" }, { "Show original image" }, { "Save image as PNG" }, { "Show Fourier amplitudes (2x)" }, { "Show Fourier phase angles (2x)" }, { "Show helical layer line profile" }, { "Show particles from selected classes" }, { "Set selection type" }, { "Save selected classes" }, // idx = 14; change below when re-ordered!! { "Quit" }, { 0 } }; if (!do_allow_save) { rclick_menu[14].deactivate(); } const Fl_Menu_Item *m = rclick_menu->popup(Fl::event_x(), Fl::event_y(), 0, 0, 0); if ( !m ) return 0; else if ( strcmp(m->label(), "Save backup selection") == 0 ) saveBackupSelection(); else if ( strcmp(m->label(), "Load backup selection") == 0 ) loadBackupSelection(); else if ( strcmp(m->label(), "Clear selection") == 0 ) clearSelection(); else if ( strcmp(m->label(), "Invert selection") == 0 ) invertSelection(); else if ( strcmp(m->label(), "Select all classes below") == 0 ) selectFromHereBelow(ipos); else if ( strcmp(m->label(), "Select all classes above") == 0 ) selectFromHereAbove(ipos); else if ( strcmp(m->label(), "Show metadata this class") == 0 ) printMetaData(ipos); else if ( strcmp(m->label(), "Show original image") == 0 ) showOriginalImage(ipos); else if ( strcmp(m->label(), "Save image as PNG") == 0 ) saveImage(ipos); else if ( strcmp(m->label(), "Show Fourier amplitudes (2x)") == 0 ) showFourierAmplitudes(ipos); else if ( strcmp(m->label(), "Show Fourier phase angles (2x)") == 0 ) showFourierPhaseAngles(ipos); else if ( strcmp(m->label(), "Show helical layer line profile") == 0 ) showHelicalLayerLineProfile(ipos); else if ( strcmp(m->label(), "Set selection type") == 0 ) setSelectionType(); else if ( strcmp(m->label(), "Show particles from selected classes") == 0 ) showSelectedParticles(current_selection_type); else if ( strcmp(m->label(), "Save selected classes") == 0 ) { saveBackupSelection(); saveSelected(current_selection_type); saveSelectedParticles(current_selection_type); // save the exit_success file after saving already, // as many users close the window through the operating system's cross symbol on the window, instead of a proper exit RELION_EXIT_SUCCESS; } else if ( strcmp(m->label(), "Quit") == 0 ) { //clean exit exit(RELION_EXIT_SUCCESS); } } else { Fl_Menu_Item rclick_menu[] = { { "Save backup selection" }, { "Load backup selection" }, { "Clear selection" }, { "Invert selection" }, { "Select all below" }, { "Select all above" }, { "Show average of selection" }, { "Show stddev of selection" }, { "Show original image" }, { "Save image as PNG" }, { "Show Fourier amplitudes (2x)" }, { "Show Fourier phase angles (2x)" }, { "Show helical layer line profile" }, { "Set selection type" }, { "Show metadata" }, { "Save STAR with selected images" }, // idx = 15; change below when re-ordered!! { "Quit" }, { 0 } }; if (!do_allow_save) { rclick_menu[15].deactivate(); } const Fl_Menu_Item *m = rclick_menu->popup(Fl::event_x(), Fl::event_y(), 0, 0, 0); if ( !m ) return 0; else if ( strcmp(m->label(), "Save backup selection") == 0 ) saveBackupSelection(); else if ( strcmp(m->label(), "Load backup selection") == 0 ) loadBackupSelection(); else if ( strcmp(m->label(), "Clear selection") == 0 ) clearSelection(); else if ( strcmp(m->label(), "Invert selection") == 0 ) invertSelection(); else if ( strcmp(m->label(), "Select all below") == 0 ) selectFromHereBelow(ipos); else if ( strcmp(m->label(), "Select all above") == 0 ) selectFromHereAbove(ipos); else if ( strcmp(m->label(), "Show average of selection") == 0 ) showAverage(SELECTED, false); else if ( strcmp(m->label(), "Show stddev of selection") == 0 ) showAverage(SELECTED, true); else if ( strcmp(m->label(), "Show original image") == 0 ) showOriginalImage(ipos); else if ( strcmp(m->label(), "Save image as PNG") == 0 ) saveImage(ipos); else if ( strcmp(m->label(), "Show Fourier amplitudes (2x)") == 0 ) showFourierAmplitudes(ipos); else if ( strcmp(m->label(), "Show Fourier phase angles (2x)") == 0 ) showFourierPhaseAngles(ipos); else if ( strcmp(m->label(), "Show helical layer line profile") == 0 ) showHelicalLayerLineProfile(ipos); else if ( strcmp(m->label(), "Set selection type") == 0 ) setSelectionType(); else if ( strcmp(m->label(), "Show metadata") == 0 ) printMetaData(ipos); else if ( strcmp(m->label(), "Save STAR with selected images") == 0 ) { saveBackupSelection(); saveSelected(SELECTED); // save the exit_success file after saving already, // as many users close the window through the operating system's cross symbol on the window, instead of a proper exit RELION_EXIT_SUCCESS; } else if ( strcmp(m->label(), "Quit") == 0 ) exit(0); } return(1); // (tells caller we handled this event) } } // endif ipos within valid region } return 0; } void multiViewerCanvas::saveBackupSelection() { std::vector selected(boxes.size()); for (long int ipos = 0; ipos < boxes.size(); ipos++) { long int my_sorted_ipos; if (boxes[ipos]->MDimg.containsLabel(EMDL_SORTED_IDX)) boxes[ipos]->MDimg.getValue(EMDL_SORTED_IDX, my_sorted_ipos); else my_sorted_ipos = ipos; selected[my_sorted_ipos] = boxes[ipos]->selected; } for (long int ipos = 0; ipos < boxes.size(); ipos++) { if (MDbackup.numberOfObjects() < ipos+1) MDbackup.addObject(); // without the bool() cast, clang will interpret the formal template parameter // as a reference to a bit field, which is not the same as a boolean. MDbackup.setValue(EMDL_SELECTED, selected[ipos], ipos); } FileName fn_dir; if (fn_selected_imgs != "") fn_dir = fn_selected_imgs.beforeLastOf("/"); else if (fn_selected_parts != "") fn_dir = fn_selected_parts.beforeLastOf("/"); else fn_dir = "."; fn_dir += "/backup_selection.star"; MDbackup.write(fn_dir); std::cout <<" Written out " << fn_dir << std::endl; } void multiViewerCanvas::loadBackupSelection(bool do_ask) { FileName fn_sel, fn_dir; if (fn_selected_imgs != "") fn_dir = fn_selected_imgs.beforeLastOf("/"); else if (fn_selected_parts != "") fn_dir = fn_selected_parts.beforeLastOf("/"); else fn_dir = "."; fn_dir += "/"; if (do_ask) { Fl_File_Chooser chooser(fn_dir.c_str(), "(backup_selection.star)",Fl_File_Chooser::SINGLE,"Choose selection file to load"); // chooser type chooser.show(); // Block until user picks something. while(chooser.shown()) { Fl::wait(); } // User hit cancel? if ( chooser.value() == NULL ) return; FileName fnt(chooser.value()); fn_sel = fnt; } else fn_sel = fn_dir+"backup_selection.star"; MDbackup.clear(); MDbackup.read(fn_sel); if (MDbackup.numberOfObjects() != boxes.size()) { std::cerr << "Warning: ignoring .relion_display_backup_selection.star with unexpected number of entries..." << std::endl; return; } std::vector selected(boxes.size(), false); long int ipos = 0; FOR_ALL_OBJECTS_IN_METADATA_TABLE(MDbackup) { MDbackup.getValue(EMDL_SELECTED, selected[ipos]); ipos++; } for (long int ipos = 0; ipos < boxes.size(); ipos++) { long int my_sorted_ipos; if (boxes[ipos]->MDimg.containsLabel(EMDL_SORTED_IDX)) boxes[ipos]->MDimg.getValue(EMDL_SORTED_IDX, my_sorted_ipos); else my_sorted_ipos = ipos; boxes[ipos]->setSelect(selected[my_sorted_ipos]); } } void multiViewerCanvas::clearSelection() { for (long int ipos = 0; ipos < boxes.size(); ipos++) { boxes[ipos]->unSelect(); } } void multiViewerCanvas::invertSelection() { for (long int ipos = 0; ipos < boxes.size(); ipos++) { boxes[ipos]->toggleSelect(current_selection_type); } } void multiViewerCanvas::selectFromHereBelow(int iposp) { for (long int ipos = iposp; ipos < boxes.size(); ipos++) { boxes[ipos]->select(); } } void multiViewerCanvas::selectFromHereAbove(int iposp) { for (long int ipos = 0; ipos <= iposp; ipos++) { boxes[ipos]->select(); } } void multiViewerCanvas::printMetaData(int main_ipos) { std::ostringstream stream; if (do_class) { int myclass, iclass, nselected_classes = 0, nselected_particles = 0; for (long int ipos = 0; ipos < boxes.size(); ipos++) { if (boxes[ipos]->selected == SELECTED) { nselected_classes++; // Get class number (may not be ipos+1 if resorted!) boxes[ipos]->MDimg.getValue(EMDL_PARTICLE_CLASS, myclass); FOR_ALL_OBJECTS_IN_METADATA_TABLE(*MDdata) { MDdata->getValue(EMDL_PARTICLE_CLASS, iclass); if (iclass == myclass) nselected_particles++; } } } stream << "Selected " << nselected_particles << " particles in " << nselected_classes << " classes.\n"; } stream << "Below is the metadata table for the last clicked class/particle.\n"; boxes[main_ipos]->MDimg.write(stream); FileName str = stream.str(); // @ starts special symbol code in FLTK; we must escape it size_t pos = str.find('@', 0); while (pos != std::string::npos) { str.replace(pos, 1, (std::string)"@@" ); pos = str.find('@', pos + 2); } fl_message("%s",str.c_str()); } void multiViewerCanvas::showAverage(bool selected, bool show_stddev) { int xsize = boxes[0]->xsize_data; int ysize = boxes[0]->ysize_data; MultidimArray sum(ysize, xsize); MultidimArray sum2(ysize, xsize); int nn = 0; for (long int ipos = 0; ipos < boxes.size(); ipos++) { if (boxes[ipos]->selected == selected) { FOR_ALL_DIRECT_ELEMENTS_IN_MULTIDIMARRAY(sum) { int ival = boxes[ipos]->img_data[n]; if (ival < 0) ival += 256; DIRECT_MULTIDIM_ELEM(sum, n) += ival; DIRECT_MULTIDIM_ELEM(sum2, n) += ival * ival; } nn++; } } sum /= nn; sum2 /= nn; FOR_ALL_DIRECT_ELEMENTS_IN_MULTIDIMARRAY(sum) { DIRECT_MULTIDIM_ELEM(sum2, n) -= DIRECT_MULTIDIM_ELEM(sum, n) * DIRECT_MULTIDIM_ELEM(sum, n); } sum2 *= nn / (nn - 1); // Show the average if (show_stddev) { basisViewerWindow stddev(xsize, ysize, "Stddev"); stddev.fillSingleViewerCanvas(sum2, 0., 0., 0., 1.); // scale=1 now means: keep same scale as the one in the boxes!!! } else { basisViewerWindow avg(xsize, ysize, "Average"); avg.fillSingleViewerCanvas(sum, 0., 0., 0., 1.); // scale=1 now means: keep same scale as the one in the boxes!!! } } void multiViewerCanvas::showOriginalImage(int ipos) { // Make system call because otherwise the green drawing for distance measurements doesn't work.... FileName fn_img; boxes[ipos]->MDimg.getValue(display_label, fn_img); std::string cl = "relion_display --i " + fn_img + " --scale " + floatToString(ori_scale); cl += " --sigma_contrast " + floatToString(sigma_contrast); cl += " --black " + floatToString(minval); cl += " --white " + floatToString(maxval); switch (colour_scheme) { case (BLACKGREYREDSCALE): { cl += " --colour_fire"; break; } case (BLUEGREYWHITESCALE): { cl += " --colour_ice"; break; } case (BLUEGREYREDSCALE): { cl += " --colour_fire-n-ice"; break; } case (RAINBOWSCALE): { cl += " --colour_rainbow"; break; } case (CYANBLACKYELLOWSCALE): { cl += " --colour_difference"; break; } } // send job in the background cl += " &"; int res = system(cl.c_str()); /* FileName fn_img; boxes[ipos]->MDimg.getValue(display_label, fn_img); Image img; img.read(fn_img); basisViewerWindow win(CEIL(ori_scale*XSIZE(img())), CEIL(ori_scale*YSIZE(img())), fn_img.c_str()); if (sigma_contrast > 0.) { win.fillSingleViewerCanvas(img(), 0., 0., sigma_contrast, ori_scale); } else { win.fillSingleViewerCanvas(img(), boxes[ipos]->minval, boxes[ipos]->maxval, 0., ori_scale); } */ } void basisViewerCanvas::saveImage(int ipos) { #ifndef HAVE_PNG fl_message("Cannot save an image as PNG because libPNG was not linked during compilation."); #else using namespace gravis; Fl_File_Chooser chooser(".", // directory "PNG image (*.png)\tAll Files (*)*", // filter Fl_File_Chooser::CREATE, // chooser type "Save as"); // title chooser.show(); // Block until user picks something. while(chooser.shown()) { Fl::wait(); } // User hit cancel? if ( chooser.value() == NULL ) return; int xsize = boxes[ipos]->xsize_data; int ysize = boxes[ipos]->ysize_data; unsigned char* img_data = boxes[ipos]->img_data; tImage pngOut(xsize, ysize); pngOut.fill(bRGB(0)); for (size_t n = 0, nlim = xsize * ysize; n < nlim; n++) { if (colour_scheme == GREYSCALE) { unsigned char c = img_data[n]; pngOut[n] = bRGB(c, c, c); } else { pngOut[n] = bRGB(img_data[3 * n], img_data[3 * n + 1], img_data[3 * n + 2]); } } pngOut.writePNG(chooser.value()); #endif } void multiViewerCanvas::showFourierAmplitudes(int ipos) { // Make system call because otherwise the green drawing for distance measurements doesn't work.... FileName fn_img; Image img; boxes[ipos]->MDimg.getValue(display_label, fn_img); img.read(fn_img, false); if ( (ZSIZE(img()) > 1) || (NSIZE(img()) > 1) ) { fl_message("Cannot display Fourier transform of STAR files, 3D images or stacks. Please select a 2D image as input."); return; } std::string cl = "relion_display --i " + fn_img + " --scale " + floatToString(ori_scale); if (sigma_contrast > 0.) cl += " --sigma_contrast " + floatToString(sigma_contrast); cl += " --show_fourier_amplitudes"; // send job in the background cl += " &"; int res = system(cl.c_str()); } void multiViewerCanvas::showFourierPhaseAngles(int ipos) { // Make system call because otherwise the green drawing for distance measurements doesn't work.... FileName fn_img; Image img; boxes[ipos]->MDimg.getValue(display_label, fn_img); img.read(fn_img, false); if ( (ZSIZE(img()) > 1) || (NSIZE(img()) > 1) ) { fl_message("Cannot display Fourier transform of STAR files, 3D images or stacks. Please select a 2D image as input."); return; } std::string cl = "relion_display --i " + fn_img + " --scale " + floatToString(ori_scale); cl += " --show_fourier_phase_angles"; // send job in the background cl += " &"; int res = system(cl.c_str()); } void multiViewerCanvas::showHelicalLayerLineProfile(int ipos) { const char * default_pdf_viewer = getenv ("RELION_PDFVIEWER_EXECUTABLE"); char hardcoded_pdf_viewer[]=DEFAULTPDFVIEWER; if (default_pdf_viewer == NULL) { default_pdf_viewer=hardcoded_pdf_viewer; } std::string mydefault = std::string(default_pdf_viewer); std::string command; FileName fn_img, fn_out; Image img; boxes[ipos]->MDimg.getValue(display_label, fn_img); img.read(fn_img); fn_out = "layerlineprofile.eps"; if (exists(fn_out)) { command = "rm -rf " + fn_out; int res = system(command.c_str()); } helicalLayerLineProfile(img(), fn_img, fn_out); command = mydefault + " " + fn_out + " &"; int res = system(command.c_str()); } void multiViewerCanvas::makeStarFileSelectedParticles(int selected, MetaDataTable &MDpart) { MDpart.clear(); int myclass, iclass; for (long int ipos = 0; ipos < boxes.size(); ipos++) { if (boxes[ipos]->selected == selected) { // Get class number (may not be ipos+1 if resorted!) boxes[ipos]->MDimg.getValue(EMDL_PARTICLE_CLASS, myclass); FOR_ALL_OBJECTS_IN_METADATA_TABLE(*MDdata) { MDdata->getValue(EMDL_PARTICLE_CLASS, iclass); if (iclass == myclass) MDpart.addObject(MDdata->getObject()); } } } if (max_nr_parts_per_class > 0) { // Randomise the order, to pick random particles from each class // Unfortunately, this leads to random order of particles in the output file! So be it for now... MetaDataTable MDtmp = MDpart; MDpart.clear(); MDtmp.sort(EMDL_UNDEFINED, false, false, true); for (long int ipos = 0; ipos < boxes.size(); ipos++) { if (boxes[ipos]->selected == selected) { int nr_selected = 0; boxes[ipos]->MDimg.getValue(EMDL_PARTICLE_CLASS, myclass); FOR_ALL_OBJECTS_IN_METADATA_TABLE(MDtmp) { MDtmp.getValue(EMDL_PARTICLE_CLASS, iclass); if (iclass == myclass) { MDpart.addObject(MDtmp.getObject()); nr_selected++; if (nr_selected >= max_nr_parts_per_class) break; } } } } } // Maintain the original image ordering if (MDpart.containsLabel(EMDL_SORTED_IDX)) MDpart.sort(EMDL_SORTED_IDX); } void multiViewerCanvas::saveSelectedParticles(int save_selected) { if (fn_selected_parts == "") { std::cout << " Not saving selected particles, as no filename was provided..." << std::endl; return; } //#define RELION_DEVEL_ASKTRAINING #ifdef RELION_DEVEL_ASKTRAINING bool do_training = false; std::string ask = "Is this a selection of good classes, so it can be used for Sjors' training set for automated class selection?\n \ More info here: /public/EM/RELION/training.txt\n"; do_training = fl_choice("%s", "Don't use", "Use for training", NULL, ask.c_str()); if (do_training) saveTrainingSet(); #endif MetaDataTable MDpart; makeStarFileSelectedParticles(save_selected, MDpart); if (nr_regroups > 0) regroupSelectedParticles(MDpart, *MDgroups, nr_regroups); int nparts = MDpart.numberOfObjects(); if (nparts > 0) { obsModel->save(MDpart, fn_selected_parts, "particles"); std::cout << "Saved " << fn_selected_parts << " with " << nparts << " selected particles." << std::endl; } else std::cout <<" No classes selected. Please select one or more classes..." << std::endl; } void regroupSelectedParticles(MetaDataTable &MDdata, MetaDataTable &MDgroups, int nr_regroups) { // This function modify MDgroups, which will not be written anyway. if (nr_regroups <= 0) return; int max_optics_group_id = -1; // Find out which optics group each scale group belongs to // Also initialise rlnGroupNrParticles for this selection FOR_ALL_OBJECTS_IN_METADATA_TABLE(MDgroups) { MDgroups.setValue(EMDL_IMAGE_OPTICS_GROUP, -1); MDgroups.setValue(EMDL_MLMODEL_GROUP_NR_PARTICLES, 0); } FOR_ALL_OBJECTS_IN_METADATA_TABLE(MDdata) { long group_id, part_optics_id, group_optics_id; int nr_parts; MDdata.getValue(EMDL_MLMODEL_GROUP_NO, group_id); // 1-indexed MDdata.getValue(EMDL_IMAGE_OPTICS_GROUP, part_optics_id); MDgroups.getValue(EMDL_IMAGE_OPTICS_GROUP, group_optics_id, group_id - 1); // 0-indexed if (group_optics_id == -1) { MDgroups.setValue(EMDL_IMAGE_OPTICS_GROUP, part_optics_id, group_id - 1); if (max_optics_group_id < part_optics_id) max_optics_group_id = part_optics_id; } else if (group_optics_id != part_optics_id) { std::cerr << "WARNING: group_no " << group_id << " contains particles from multiple optics groups." << std::endl; } MDgroups.getValue(EMDL_MLMODEL_GROUP_NR_PARTICLES, nr_parts, group_id - 1); MDgroups.setValue(EMDL_MLMODEL_GROUP_NR_PARTICLES, nr_parts + 1, group_id - 1); } // First sort the MDgroups based on refined intensity scale factor MDgroups.sort(EMDL_MLMODEL_GROUP_SCALE_CORRECTION); // Store original image order long int nr_parts = MDdata.numberOfObjects(); for (long int j = 0; j < nr_parts; j++) MDdata.setValue(EMDL_SORTED_IDX, j, j); // Average group size long average_group_size = nr_parts / nr_regroups; if (average_group_size < 10) REPORT_ERROR("Each group should have at least 10 particles"); int fillgroupschar = (int)(floor(log(nr_regroups) / log(10))) + 1; std::map new_group_names; std::map::iterator it; // Loop through all existing, sorted groups long new_group_id = 0; // Worst case: O(old_nr_groups ^ 2) = O(mic ^ 2) // We can reduce this by using one more hash but this should be enough. for (long optics_group_id = 1; optics_group_id <= max_optics_group_id; optics_group_id++) { long nr_parts_in_new_group = 0; new_group_id++; FOR_ALL_OBJECTS_IN_METADATA_TABLE(MDgroups) { long group_id, group_optics_id; int nr_parts; MDgroups.getValue(EMDL_IMAGE_OPTICS_GROUP, group_optics_id); if (group_optics_id != optics_group_id) continue; MDgroups.getValue(EMDL_MLMODEL_GROUP_NO, group_id); MDgroups.getValue(EMDL_MLMODEL_GROUP_NR_PARTICLES, nr_parts); nr_parts_in_new_group += nr_parts; if (nr_parts_in_new_group > average_group_size) { // This group is now full: start a new one new_group_id++; nr_parts_in_new_group = 0; } new_group_names[group_id] = "group_" + integerToString(new_group_id, fillgroupschar); } } FOR_ALL_OBJECTS_IN_METADATA_TABLE(MDdata) { long group_id; MDdata.getValue(EMDL_MLMODEL_GROUP_NO, group_id); it = new_group_names.find(group_id); if (it != new_group_names.end()) { MDdata.setValue(EMDL_MLMODEL_GROUP_NAME, new_group_names[group_id]); } else { std::cerr << "Logic error: cannot find group_id " << group_id << " during remapping." << std::endl; REPORT_ERROR("Failed in regrouping"); } } MDdata.deactivateLabel(EMDL_MLMODEL_GROUP_NO); // no longer valid std::cout <<" Regrouped particles into " << new_group_id << " groups" << std::endl; } void multiViewerCanvas::showSelectedParticles(int save_selected) { MetaDataTable MDpart; makeStarFileSelectedParticles(save_selected, MDpart); int nparts = MDpart.numberOfObjects(); if (nparts > 0) { basisViewerWindow win(MULTIVIEW_WINDOW_WIDTH, MULTIVIEW_WINDOW_HEIGHT, "Particles in the selected classes"); win.fillCanvas(MULTIVIEWER, MDpart, obsModel, EMDL_IMAGE_NAME, text_label, do_read_whole_stacks, do_apply_orient, 0., 0., 0., boxes[0]->scale, ori_scale, ncol, multi_max_nr_images); } else std::cout <<" No classes selected. First select one or more classes..." << std::endl; } void multiViewerCanvas::saveTrainingSet() { FileName fn_rootdir = "/net/dstore1/teraraid3/scheres/trainingset/"; // Make the output job directory char my_dir[200]; FileName fn_projdir = std::string(getcwd(my_dir, 200)); std::replace( fn_projdir.begin(), fn_projdir.end(), '/', '_'); fn_projdir += "/" + (fn_selected_parts.afterFirstOf("/")).beforeLastOf("/"); FileName fn_odir = fn_rootdir + fn_projdir; std::string command = "mkdir -p " + fn_odir + " ; chmod 777 " + fn_odir; int res = system(command.c_str()); // Now save the selected images in a MetaData file. MetaDataTable MDout; int nsel = 0; for (long int ipos = 0; ipos < boxes.size(); ipos++) { MDout.addObject(boxes[ipos]->MDimg.getObject()); if (boxes[ipos]->selected) MDout.setValue(EMDL_SELECTED, 1); else MDout.setValue(EMDL_SELECTED, 0); } // Maintain the original image ordering if (MDout.containsLabel(EMDL_SORTED_IDX)) MDout.sort(EMDL_SORTED_IDX); // Copy all images long int nr; FileName fn_img, fn_new_img, fn_iroot, fn_old=""; FOR_ALL_OBJECTS_IN_METADATA_TABLE(MDout) { MDout.getValue(display_label, fn_img); fn_img.decompose(nr, fn_img); fn_new_img.compose(nr, fn_img.afterLastOf("/")); MDout.setValue(display_label, fn_new_img); if (fn_img != fn_old) // prevent multiple copies of single stack from Class2D copy(fn_img, fn_odir+"/"+fn_img.afterLastOf("/")); fn_old = fn_img; } fn_iroot = fn_img.beforeFirstOf("_class"); // Copy rest of metadata fn_img = fn_iroot + "_model.star"; copy(fn_img, fn_odir+"/"+fn_img.afterLastOf("/")); fn_img = fn_iroot + "_optimiser.star"; copy(fn_img, fn_odir+"/"+fn_img.afterLastOf("/")); fn_img = fn_iroot + "_data.star"; copy(fn_img, fn_odir+"/"+fn_img.afterLastOf("/")); fn_img = fn_iroot + "_sampling.star"; copy(fn_img, fn_odir+"/"+fn_img.afterLastOf("/")); fn_iroot = fn_iroot.beforeLastOf("/"); fn_img = fn_iroot + "/note.txt"; copy(fn_img, fn_odir+"/"+fn_img.afterLastOf("/")); fn_img = fn_iroot + "/run_unmasked_classes.mrcs"; if (exists(fn_img)) { copy(fn_img, fn_odir+"/"+fn_img.afterLastOf("/")); } fn_img = fn_iroot + "/default_pipeline.star"; copy(fn_img, fn_odir+"/"+fn_img.afterLastOf("/")); // Save the actual selection selection MDout.write(fn_odir + "/selected.star"); // Give everyone permissions to this directory and its files //command = " chmod 777 -R " + fn_odir.beforeLastOf("/"); //if (system(command.c_str())) // REPORT_ERROR("ERROR in executing: " + command); std::cout << "Saved selection to Sjors' training directory. Thanks for helping out!" << std::endl; } void multiViewerCanvas::saveSelected(int save_selected) { if (fn_selected_imgs == "") return; // Now save the selected images in a MetaData file. MetaDataTable MDout; int nsel = 0; for (long int ipos = 0; ipos < boxes.size(); ipos++) { if (boxes[ipos]->selected == save_selected) { nsel++; MDout.addObject(boxes[ipos]->MDimg.getObject()); } } if (nsel > 0) { // Maintain the original image ordering if (MDout.containsLabel(EMDL_SORTED_IDX)) MDout.sort(EMDL_SORTED_IDX); // If the images were re-centered to the center-of-mass, then output the recentered images, and change the names of the images in the MDout. if (do_recenter) { FileName fn_stack = fn_selected_imgs.withoutExtension()+".mrcs"; FileName fn_img, fn_out; Image img; long int i = 0; long int nr_images = MDout.numberOfObjects(); FOR_ALL_OBJECTS_IN_METADATA_TABLE(MDout) { i++; MDout.getValue(EMDL_MLMODEL_REF_IMAGE, fn_img); img.read(fn_img); selfTranslateCenterOfMassToCenter(img()); fn_out.compose(i, fn_stack); MDout.setValue(EMDL_MLMODEL_REF_IMAGE, fn_out); if (i == 1) img.write(fn_stack, -1, (nr_images > 1), WRITE_OVERWRITE); else img.write(fn_stack, -1, false, WRITE_APPEND); } } if (obsModel->opticsMdt.numberOfObjects() > 0 && !do_class) { if (metadata_table_name == "micrographs" || (!MDout.containsLabel(EMDL_IMAGE_NAME) && !MDout.containsLabel(EMDL_MICROGRAPH_MOVIE_NAME))) { obsModel->save(MDout, fn_selected_imgs, "micrographs"); std::cout << "Saved "<< fn_selected_imgs << " with " << nsel << " selected micrographs." << std::endl; } else if (metadata_table_name == "movies" || (!MDout.containsLabel(EMDL_IMAGE_NAME) && MDout.containsLabel(EMDL_MICROGRAPH_MOVIE_NAME))) { obsModel->save(MDout, fn_selected_imgs, "movies"); std::cout << "Saved "<< fn_selected_imgs << " with " << nsel << " selected movies." << std::endl; } else { obsModel->save(MDout, fn_selected_imgs, "particles"); std::cout << "Saved "<< fn_selected_imgs << " with " << nsel << " selected particles." << std::endl; } } else { MDout.write(fn_selected_imgs); std::cout << "Saved "<< fn_selected_imgs << " with " << nsel << " selected images." << std::endl; } } else std::cout <<" No images to save...." << std::endl; } void basisViewerCanvas::setSelectionType() { popupSelectionTypeWindow win(250, 50, "Set selection type"); win.fill(); } int popupSelectionTypeWindow::fill() { color(GUI_BACKGROUND_COLOR); choice = new Fl_Choice(50, 10, 130, 30, "type: ") ; choice->menu(color_choices); choice->color(GUI_INPUT_COLOR); choice->value(current_selection_type - 1); choice->callback(cb_set, this); Fl_Button * closebutton = new Fl_Button(190, 10, 50, 30, "Close"); closebutton->color(GUI_RUNBUTTON_COLOR); closebutton->callback(cb_close, this); show(); return Fl::run(); } int singleViewerCanvas::handle(int ev) { if (ev==FL_PUSH && Fl::event_button() == FL_LEFT_MOUSE) { int rx = (int)Fl::event_x() - scroll->x() + scroll->hscrollbar.value(); int ry = (int)Fl::event_y() - scroll->y() + scroll->scrollbar.value(); // Left mouse click writes value and coordinates to screen if (rx < boxes[0]->xsize_data && ry < boxes[0]->ysize_data && rx >= 0 && ry >=0) { unsigned char ival; int n = ry*boxes[0]->xsize_data + rx; if (colour_scheme == GREYSCALE) ival = boxes[0]->img_data[n]; else ival = rgbToGrey(boxes[0]->img_data[3*n], boxes[0]->img_data[3*n+1], boxes[0]->img_data[3*n+2]); RFLOAT step = (boxes[0]->maxval - boxes[0]->minval) / 255.; RFLOAT dval = ival * step + boxes[0]->minval; int ysc = ROUND(ry/boxes[0]->scale); int xsc = ROUND(rx/boxes[0]->scale); int yscp = ysc - ROUND((boxes[0]->ysize_data/(2.* boxes[0]->scale))); int xscp = xsc - ROUND((boxes[0]->xsize_data/(2.* boxes[0]->scale))); std::cout <<" Image value at (" << xsc << "," << ysc << ") or (" << xscp << "," << yscp << ")~= " << dval <popup(Fl::event_x(), Fl::event_y(), 0, 0, 0); if ( !m ) return 0; if ( strcmp(m->label(), "Show metadata") == 0 ) printMetaData(); else if ( strcmp(m->label(), "Save image as PNG") == 0 ) saveImage(); else if ( strcmp(m->label(), "Help") == 0 ) printHelp(); else if ( strcmp(m->label(), "Quit") == 0 ) exit(0); return(1); // (tells caller we handled this event) } else if (ev==FL_PUSH && Fl::event_button() == FL_MIDDLE_MOUSE) { // Middle-mouse dragging for measuring distances if (!has_dragged) { redraw(); predrag_xc = (int)Fl::event_x(); predrag_yc = (int)Fl::event_y(); has_dragged = true; fl_color(FL_RED); fl_circle(predrag_xc, predrag_yc, 3); } return(1); } else if (ev==FL_DRAG && Fl::event_button() == FL_MIDDLE_MOUSE) { fl_color(FL_RED); fl_circle(predrag_xc, predrag_yc, 3); } else if (ev==FL_RELEASE && Fl::event_button() == FL_MIDDLE_MOUSE) { int postdrag_xc = (int)Fl::event_x(); int postdrag_yc = (int)Fl::event_y(); if (has_dragged) { fl_color(FL_RED); fl_circle(predrag_xc, predrag_yc, 3); fl_line(predrag_xc, predrag_yc, postdrag_xc, postdrag_yc); fl_circle(postdrag_xc, postdrag_yc, 3); int dx = postdrag_xc - predrag_xc; int dy = postdrag_yc - predrag_yc; RFLOAT dist = sqrt((RFLOAT)(dx*dx + dy*dy)); std::string text = floatToString(dist/boxes[0]->scale) + " pixels"; fl_draw(text.c_str(), (postdrag_xc + predrag_xc)/2, (postdrag_yc + predrag_yc)/2); // Also write to the screen, in case the text falls outside the screen std::cout << "distance= " << dist/boxes[0]->scale << " pixels" << std::endl; has_dragged = false; } return(1); } return 0; } void singleViewerCanvas::printHelp() { std::cout <<" + Left-mouse click: print coordinates and intensity value to screen " << std::endl; std::cout <<" + Middle-mouse drag: measure distances " << std::endl; std::cout <<" + Right-mouse click: pop-up menu" << std::endl; } /* int popupSetContrastWindow::fill() { color(GUI_BACKGROUND_COLOR); int width = 435; int x=150, y=15, ystep = 27, height = 25, inputwidth = 50; int x2 = width - inputwidth - 50; // Always display these: scale = new Fl_Input(x, y, inputwidth, height, "Scale:"); scale->color(GUI_INPUT_COLOR); scale->value("1"); minval = new Fl_Input(x2, y, inputwidth, height, "Black value:"); minval->value("0"); minval->color(GUI_INPUT_COLOR); y += ystep; sigma_contrast = new Fl_Input(x, y, inputwidth, height, "Sigma contrast:"); sigma_contrast->value("0"); sigma_contrast->color(GUI_INPUT_COLOR); maxval = new Fl_Input(x2, y, inputwidth, height, "White value:"); maxval->value("0"); maxval->color(GUI_INPUT_COLOR); y += ROUND(ystep); Fl_Button * applybutton = new Fl_Button(width-120, y, 70, 30, "Apply!"); applybutton->color(GUI_RUNBUTTON_COLOR); applybutton->callback( cb_set, this); Fl_Button * closebutton = new Fl_Button(width -200, y, 70, 30, "Close"); closebutton->color(GUI_RUNBUTTON_COLOR); closebutton->callback( cb_close, this); show(); return Fl::run(); } */ void pickerViewerCanvas::draw() { RFLOAT scale = boxes[0]->scale; long int icoord = 0; int xcoori_start, ycoori_start; FOR_ALL_OBJECTS_IN_METADATA_TABLE(MDcoords) { icoord++; RFLOAT xcoor, ycoor; MDcoords.getValue(EMDL_IMAGE_COORD_X, xcoor); MDcoords.getValue(EMDL_IMAGE_COORD_Y, ycoor); if (color_label != EMDL_UNDEFINED) { RFLOAT colval; if (EMDL::isInt(color_label)) { int ival; if (!MDcoords.getValue(color_label, ival)) { ival = 2; // populate as green if absent MDcoords.setValue(color_label, ival); } colval = (RFLOAT)ival; if (ival >= 1 && ival <= NUM_COLORS) fl_color(color_choices[ival - 1].labelcolor_); else fl_color(FL_GREEN); } else { MDcoords.getValue(color_label, colval); // Assume undefined values are set to -999.... if ((colval + 999.) < XMIPP_EQUAL_ACCURACY) { fl_color(FL_GREEN); } else { colval = XMIPP_MAX(colval, smallest_color_value); colval = XMIPP_MIN(colval, biggest_color_value); unsigned char red, blue; if (do_blue_to_red) { red = ROUND(255. * (colval - smallest_color_value) / (biggest_color_value - smallest_color_value)); blue = ROUND(255. * (biggest_color_value - colval) / (biggest_color_value - smallest_color_value)); } else { blue = ROUND(255. * (colval - smallest_color_value) / (biggest_color_value - smallest_color_value)); red = ROUND(255. * (biggest_color_value - colval) / (biggest_color_value - smallest_color_value)); } fl_color(red, 0, blue); } } } else { fl_color(FL_GREEN); } int xcoori, ycoori; xcoori = ROUND(xcoor * coord_scale * scale) + scroll->x() - scroll->hscrollbar.value(); ycoori = ROUND(ycoor * coord_scale * scale) + scroll->y() - scroll->scrollbar.value(); fl_circle(xcoori, ycoori, particle_radius); if (do_startend) { if (icoord % 2 == 1) { xcoori_start = xcoori; ycoori_start = ycoori; } else { fl_line(xcoori_start, ycoori_start, xcoori, ycoori); } } } } int pickerViewerCanvas::handle(int ev) { const int button = Fl::event_button() ; const bool with_shift = (Fl::event_shift() != 0); const bool with_control = (Fl::event_ctrl() != 0); const int key = Fl::event_key(); if (ev==FL_PUSH || (ev==FL_DRAG && (button == FL_MIDDLE_MOUSE || (button == FL_LEFT_MOUSE && with_shift)))) { RFLOAT scale = boxes[0]->scale; int xc = (int)Fl::event_x() - scroll->x() + scroll->hscrollbar.value(); int yc = (int)Fl::event_y() - scroll->y() + scroll->scrollbar.value(); RFLOAT xcoor = (RFLOAT)ROUND(xc / (coord_scale * scale)); RFLOAT ycoor = (RFLOAT)ROUND(yc / (coord_scale * scale)); RFLOAT rad2 = particle_radius * particle_radius / (coord_scale * coord_scale * scale * scale); if (button == FL_LEFT_MOUSE && !with_shift && !with_control) { // Left mouse for picking // Check the pick is not inside an existing circle RFLOAT xcoor_p, ycoor_p; FOR_ALL_OBJECTS_IN_METADATA_TABLE(MDcoords) { MDcoords.getValue(EMDL_IMAGE_COORD_X, xcoor_p); MDcoords.getValue(EMDL_IMAGE_COORD_Y, ycoor_p); xcoor_p -= xcoor; ycoor_p -= ycoor; if (xcoor_p*xcoor_p + ycoor_p*ycoor_p < rad2) return 0; } RFLOAT aux = -999., zero = 0.; int iaux = current_selection_type; // Else store new coordinate if (!MDcoords.isEmpty()) { // If there were already entries in MDcoords, then copy the last one. // This will take care of re-picking in coordinate files from previous refinements long int last_idx = MDcoords.numberOfObjects() - 1; MDcoords.addObject(MDcoords.getObject(last_idx)); RFLOAT aux2; if (MDcoords.getValue(EMDL_ORIENT_ROT, aux2)) MDcoords.setValue(EMDL_ORIENT_ROT, aux); if (MDcoords.getValue(EMDL_ORIENT_TILT, aux2)) MDcoords.setValue(EMDL_ORIENT_TILT, aux); if (MDcoords.getValue(EMDL_ORIENT_ORIGIN_X_ANGSTROM, aux2)) MDcoords.setValue(EMDL_ORIENT_ORIGIN_X_ANGSTROM, zero); if (MDcoords.getValue(EMDL_ORIENT_ORIGIN_Y_ANGSTROM, aux2)) MDcoords.setValue(EMDL_ORIENT_ORIGIN_Y_ANGSTROM, zero); } else MDcoords.addObject(); MDcoords.setValue(EMDL_IMAGE_COORD_X, xcoor); MDcoords.setValue(EMDL_IMAGE_COORD_Y, ycoor); // No autopicking, but still always fill in the parameters for autopicking with dummy values (to prevent problems in joining autopicked and manually picked coordinates) MDcoords.setValue(EMDL_PARTICLE_CLASS, iaux); MDcoords.setValue(EMDL_ORIENT_PSI, aux); MDcoords.setValue(EMDL_PARTICLE_AUTOPICK_FOM, aux); redraw(); return 1; } else if ((button == FL_MIDDLE_MOUSE) || (button == FL_LEFT_MOUSE && with_shift)) { boxes[0]->redraw(); // Middle mouse for deleting RFLOAT xcoor_p, ycoor_p; FOR_ALL_OBJECTS_IN_METADATA_TABLE(MDcoords) { MDcoords.getValue(EMDL_IMAGE_COORD_X, xcoor_p); MDcoords.getValue(EMDL_IMAGE_COORD_Y, ycoor_p); xcoor_p -= xcoor; ycoor_p -= ycoor; if (xcoor_p*xcoor_p + ycoor_p*ycoor_p < rad2) { MDcoords.removeObject(); break; } } redraw(); return 1; } else if ((button == FL_RIGHT_MOUSE) || (button == FL_LEFT_MOUSE && with_control)) { redraw(); Fl_Menu_Item rclick_menu[] = { { "Save STAR with coordinates (CTRL-s)" }, // { "Save_as STAR with coordinates" }, { "Load coordinates" }, { "Reload coordinates" }, { "Clear coordinates" }, { "Set selection type" }, { "Help" }, { "Quit (CTRL-q)" }, { 0 } }; const Fl_Menu_Item *m = rclick_menu->popup(Fl::event_x(), Fl::event_y(), 0, 0, 0); if ( !m ) return 0; else if ( strcmp(m->label(), "Save STAR with coordinates (CTRL-s)") == 0 ) saveCoordinates(false); // else if ( strcmp(m->label(), "Save_as STAR with coordinates") == 0 ) // saveCoordinates(true); else if ( strcmp(m->label(), "Load coordinates") == 0 ) loadCoordinates(true); else if ( strcmp(m->label(), "Reload coordinates") == 0 ) loadCoordinates(false); else if ( strcmp(m->label(), "Clear coordinates") == 0 ) clearCoordinates(); else if ( strcmp(m->label(), "Set selection type") == 0) setSelectionType(); else if ( strcmp(m->label(), "Help") == 0 ) printHelp(); else if ( strcmp(m->label(), "Quit (CTRL-q)") == 0 ) exit(0); redraw(); return 1; // (tells caller we handled this event) } return 0; } // Update the drawing every time something happens .... else if (ev==FL_RELEASE || ev==FL_LEAVE || ev==FL_ENTER || ev==FL_MOVE || ev == FL_FOCUS || ev == FL_UNFOCUS) { redraw(); return 1; } // CTRL-s will save the coordinates in a picker window else if (with_control) { if (key == 's') { saveCoordinates(false); sleep(1); // to prevent multiple saves... dirty but don't know how to do this otherwise... return 1; // (tells caller we handled this event) } else if (key == 'q') { sleep(1); exit(0); return 1; // (tells caller we handled this event) } else if (key >= '1' && key <= '6') { std::cout << "debug key = " << key << std::endl; current_selection_type = key - '0'; return 1; } } return 0; } void pickerViewerCanvas::saveCoordinates(bool ask_filename) { FileName fn_out; if (ask_filename) { char *newfile; newfile = fl_file_chooser("Save File As?", "*.star", ""); if (newfile == NULL) return; FileName fn_tmp(newfile); fn_out = fn_tmp; } else { fn_out = (fn_coords=="") ? "picked.star" : fn_coords; } FileName fn_dirs = fn_coords.beforeLastOf("/"); if (!(exists(fn_dirs))) { std::string command = "mkdir -p " + fn_dirs; int res = system(command.c_str()); } // Never write out columns that come from the fn_color file.... if (fn_color != "" && color_label != EMDL_UNDEFINED) { MetaDataTable MDnew = MDcoords; MDnew.deactivateLabel(color_label); MDnew.write(fn_out); // write out a copy of the MDcoord to maintain the Z-score label active... } else { MDcoords.write(fn_out); } std::cout << "Saved "< XMIPP_EQUAL_ACCURACY) { std::cerr << " _fn_img= " << _fn_img << " iimg= " << iimg << " _fn_mic= " << _fn_mic << std::endl; std::cerr << " x= " << x << " my_xpos= " << my_xpos << std::endl; std::cerr << " y= " << y << " my_ypos= " << my_ypos << std::endl; REPORT_ERROR("The image in the --color star file does not have the same coordinates as the ones in the --coord file!"); } else { if (EMDL::isInt(color_label)) { int ival; MDcolor.getValue(color_label, ival); MDcoords.setValue(color_label, ival, iimg); } else { RFLOAT val; MDcolor.getValue(color_label, val); MDcoords.setValue(color_label, val, iimg); } } } } } void pickerViewerCanvas::clearCoordinates() { boxes[0]->redraw(); MDcoords.clear(); } void pickerViewerCanvas::printHelp() { std::cout <<" + Left-mouse click: pick particles " << std::endl; std::cout <<" + Middle-mouse click: delete particles " << std::endl; std::cout <<" + Right-mouse click: pop-up menu" << std::endl; std::cout <<" + Refresh picked particles by moving out of the window and back in again ..." << std::endl; } void singleViewerCanvas::printMetaData() { boxes[0]->MDimg.write(std::cout); } /* void singleViewerCanvas::setContrast() { popupSetContrastWindow win(400, 100, "Set contrast"); win.fill(); } */ // Fill GUI window int displayerGuiWindow::fill(FileName &_fn_in) { // display image name at the top fn_in = _fn_in; FileName fn_short = _fn_in.removeDirectories(); color(GUI_BACKGROUND_COLOR); int width = 485; Fl_Text_Display *mydisp = new Fl_Text_Display(15, 15, width-15, 25,""); Fl_Text_Buffer *textbuff = new Fl_Text_Buffer(); textbuff->text(fn_short.c_str()); mydisp->buffer(textbuff); int x=170, y=15, ystep = 27, height = 25, inputwidth = 50, inputwidth2=30; int x2 = width - inputwidth - 50; y += ROUND(1.5*ystep); // General box Fl_Box *box1 = new Fl_Box(15, y-ROUND(0.25*ystep), width - 15, ROUND(2.5*ystep), ""); box1->color(GUI_BACKGROUND_COLOR); box1->box(FL_DOWN_BOX); // Always display these: scale_input = new Fl_Input(x, y, inputwidth, height, "Scale:"); scale_input->color(GUI_INPUT_COLOR); scale_input->value("1"); black_input = new Fl_Input(x2-110, y, inputwidth, height, "Min:"); black_input->value("0"); black_input->color(GUI_INPUT_COLOR); white_input = new Fl_Input(x2, y, inputwidth, height, "Max:"); white_input->value("0"); white_input->color(GUI_INPUT_COLOR); y += ystep; sigma_contrast_input = new Fl_Input(x, y, inputwidth, height, "Sigma contrast:"); sigma_contrast_input->value("0"); sigma_contrast_input->color(GUI_INPUT_COLOR); colour_scheme_choice = new Fl_Choice(x2-110, y, inputwidth+110, height, "Color:"); colour_scheme_choice->add("greyscale", 0, 0,0, FL_MENU_VALUE); colour_scheme_choice->add("fire", 0, 0,0, FL_MENU_VALUE); colour_scheme_choice->add("ice", 0, 0,0, FL_MENU_VALUE); colour_scheme_choice->add("fire-n-ice", 0, 0,0, FL_MENU_VALUE); colour_scheme_choice->add("rainbow", 0, 0,0, FL_MENU_VALUE); colour_scheme_choice->add("difference", 0, 0,0, FL_MENU_VALUE); colour_scheme_choice->picked(colour_scheme_choice->menu()); colour_scheme_choice->color(GUI_INPUT_COLOR); y += ROUND(1.75*ystep); if (is_star) { // STAR box Fl_Box *box2 = new Fl_Box(15, y-ROUND(0.25*ystep), width - 15, ROUND(3.5*ystep), ""); box2->color(GUI_BACKGROUND_COLOR); box2->box(FL_DOWN_BOX); display_choice = new Fl_Choice(x, y, width-x, height, "Display:"); for (int i = 0; i < display_labels.size(); i++) display_choice->add(display_labels[i].c_str(), 0, 0,0, FL_MENU_VALUE); display_choice->picked(display_choice->menu()); display_choice->color(GUI_INPUT_COLOR); y += ystep; sort_button = new Fl_Check_Button(35,y,height,height, "Sort images "); sort_choice = new Fl_Choice(x, y, width-x, height, "on:"); for (int i = 0; i < sort_labels.size(); i++) sort_choice->add(sort_labels[i].c_str(), 0, 0,0, FL_MENU_VALUE); sort_choice->add("RANDOMLY", 0, 0,0, FL_MENU_VALUE); sort_choice->picked(sort_choice->menu()); sort_choice->color(GUI_INPUT_COLOR); y += ystep; reverse_sort_button = new Fl_Check_Button(35, y, inputwidth, height, "Reverse sort?"); reverse_sort_button->color(GUI_INPUT_COLOR); apply_orient_button = new Fl_Check_Button(x, y, inputwidth, height, "Apply orientations?"); apply_orient_button->color(GUI_INPUT_COLOR); read_whole_stack_button = new Fl_Check_Button(x+160, y, inputwidth, height, "Read whole stacks?"); read_whole_stack_button->color(GUI_INPUT_COLOR); y += ROUND(1.75*ystep); } // Optional display options if (is_multi) { // Multiview box Fl_Box *box3; if (do_allow_save && fn_parts != "") box3 = new Fl_Box(15, y-ROUND(0.25*ystep), width - 15, ROUND(2.5*ystep), ""); else box3 = new Fl_Box(15, y-ROUND(0.25*ystep), width - 15, ROUND(1.5*ystep), ""); box3->color(GUI_BACKGROUND_COLOR); box3->box(FL_DOWN_BOX); int x1p = 125; int x2p = x1p + 130; int x3p = x2p + 170; col_input = new Fl_Input(x1p, y, 40, height, "Nr. columns:"); col_input->value("5"); col_input->color(GUI_INPUT_COLOR); ori_scale_input = new Fl_Input(x2p, y, 40, height, "Ori scale:"); ori_scale_input->value("1"); ori_scale_input->color(GUI_INPUT_COLOR); max_nr_images_input = new Fl_Input(x3p, y, 40, height, "Max. nr. images:"); max_nr_images_input->value("1000"); max_nr_images_input->color(GUI_INPUT_COLOR); if (do_allow_save && fn_parts != "") { y += ystep; max_parts_per_class_input = new Fl_Input(x2p, y, 40, height, "Max nr selected parts per class:"); max_parts_per_class_input->value("-1"); max_parts_per_class_input->color(GUI_INPUT_COLOR); y += ROUND(1.75*ystep); } else y += ROUND(1.75*ystep); } else // is_single { // singleview box Fl_Box *box3 = new Fl_Box(15, y-ROUND(0.25*ystep), width - 15, ROUND(1.5*ystep), ""); box3->color(GUI_BACKGROUND_COLOR); box3->box(FL_DOWN_BOX); lowpass_input = new Fl_Input(x, y, inputwidth2, height, "Lowpass filter (A):"); lowpass_input->color(GUI_INPUT_COLOR); lowpass_input->value("0"); highpass_input = new Fl_Input(275, y, inputwidth2, height, "Highpass:"); highpass_input->color(GUI_INPUT_COLOR); highpass_input->value("0"); angpix_input = new Fl_Input(x2+inputwidth-inputwidth2, y, inputwidth2, height, "Pixel size (A):"); angpix_input->color(GUI_INPUT_COLOR); angpix_input->value("1"); y += ROUND(1.75*ystep); } // Display button Fl_Button *display_button = new Fl_Button(width-100, y, 100, 30, "Display!"); display_button->color(GUI_RUNBUTTON_COLOR); display_button->callback( cb_display, this); // Read last settings file if it is present readLastSettings(); show(); return Fl::run(); } void displayerGuiWindow::readLastSettings() { FileName fn = ".relion_display_gui_settings"; if (!exists(fn)) return; std::ifstream in(fn.c_str(), std::ios_base::in); if (in.fail()) { std::cerr << "Error reading last settings from file: "<< fn<label()) scale_input->value(value.c_str()); else if (label == black_input->label()) black_input->value(value.c_str()); else if (label == white_input->label()) white_input->value(value.c_str()); else if (label == colour_scheme_choice->label()) colour_scheme_choice->value(textToInteger(value)); else if (label == sigma_contrast_input->label()) sigma_contrast_input->value(value.c_str()); else if (is_multi && label == col_input->label()) col_input->value(value.c_str()); else if (is_multi && label == ori_scale_input->label()) ori_scale_input->value(value.c_str()); else if (is_multi && label == max_nr_images_input->label()) max_nr_images_input->value(value.c_str()); else if (!is_multi && label == lowpass_input->label()) lowpass_input->value(value.c_str()); else if (!is_multi && label == highpass_input->label()) highpass_input->value(value.c_str()); else if (!is_multi && label == angpix_input->label()) angpix_input->value(value.c_str()); } in.close(); } void displayerGuiWindow::writeLastSettings() { std::ofstream fh; FileName fn = ".relion_display_gui_settings"; fh.open(fn.c_str(), std::ios::out); if (!fh) { //std::cerr << "Cannot write last settings to file: "<label() << " = " << scale_input->value() << std::endl; fh << black_input->label() << " = " << black_input->value() << std::endl; fh << white_input->label() << " = " << white_input->value() << std::endl; fh << colour_scheme_choice->label() << " = " << colour_scheme_choice->value() << std::endl; fh << sigma_contrast_input->label() << " = " << sigma_contrast_input->value() << std::endl; if (is_multi) { fh << col_input->label() << " = " << col_input->value() << std::endl; fh << ori_scale_input->label() << " = " << ori_scale_input->value() << std::endl; fh << max_nr_images_input->label() << " = " << max_nr_images_input->value() << std::endl; } else { fh << lowpass_input->label() << " = " << lowpass_input->value() << std::endl; fh << highpass_input->label() << " = " << highpass_input->value() << std::endl; fh << angpix_input->label() << " = " << angpix_input->value() << std::endl; } fh.close(); } // Display button call-back functions void displayerGuiWindow::cb_display(Fl_Widget* o, void* v) { displayerGuiWindow* T=(displayerGuiWindow*)v; T->cb_display_i(); } void displayerGuiWindow::cb_display_i() { // Save last settings, so we don't need to change settings every time... writeLastSettings(); // This is a rather ugly system call to the relion_display program again, // but I do not know how to get back to the original Displayer class from here... std::string cl = "relion_display "; cl += " --i " + fn_in; // Always cl += " --scale " + (std::string)scale_input->value(); cl += " --black " + (std::string)black_input->value(); cl += " --white " + (std::string)white_input->value(); cl += " --sigma_contrast " + (std::string)sigma_contrast_input->value(); // Get the colour scheme const Fl_Menu_Item* m3 = colour_scheme_choice->mvalue(); if ((std::string)m3->label() == "fire") cl += " --colour_fire "; else if ((std::string)m3->label() == "ice") cl += " --colour_ice "; else if ((std::string)m3->label() == "fire-n-ice") cl += " --colour_fire-n-ice "; else if ((std::string)m3->label() == "rainbow") cl += " --colour_rainbow "; else if ((std::string)m3->label() == "difference") cl += " --colour_difference "; if (is_star) { const Fl_Menu_Item* m = display_choice->mvalue(); cl += " --display " + (std::string)m->label(); if (getValue(sort_button)) { const Fl_Menu_Item* m2 = sort_choice->mvalue(); if ((std::string)m2->label() == "RANDOMLY") { cl += " --random_sort "; } else { cl += " --sort " + (std::string)m2->label(); if (getValue(reverse_sort_button)) cl += " --reverse "; } } if (getValue(read_whole_stack_button)) cl += " --read_whole_stack "; if (getValue(apply_orient_button)) cl += " --apply_orient "; } if (is_multi) { cl += " --col " + (std::string)col_input->value(); cl += " --ori_scale " + (std::string)ori_scale_input->value(); if (textToInteger(max_nr_images_input->value()) > 0) { if (getValue(sort_button)) std::cerr << " WARNING: you cannot sort particles and use a maximum number of images. Ignoring the latter..." << std::endl; else cl += " --max_nr_images " + (std::string)max_nr_images_input->value(); } } else { //check for individual images cl += " --lowpass " + (std::string)lowpass_input->value(); cl += " --highpass " + (std::string)highpass_input->value(); cl += " --angpix " + (std::string)angpix_input->value(); } if (is_class) { cl += " --class "; } if (do_allow_save) { cl += " --allow_save "; if (fn_parts != "") { cl += " --fn_parts " + fn_parts; if ( textToInteger(max_parts_per_class_input->value()) > 0) cl += " --max_nr_parts_per_class " + (std::string)max_parts_per_class_input->value(); } if (fn_imgs != "") cl += " --fn_imgs " + fn_imgs; } if (nr_regroups > 0) { cl += " --regroup " + integerToString(nr_regroups); } if (do_recenter) { cl += " --recenter"; } if (pipeline_control != "") { cl += " --pipeline_control " + pipeline_control; } // send job in the background cl += " &"; //std::cout << "Executing: " << cl << std::endl; int res = system(cl.c_str()); } void Displayer::read(int argc, char **argv) { parser.setCommandLine(argc, argv); int gen_section = parser.addSection("General options"); fn_in = parser.getOption("--i", "Input STAR file, image or stack",""); do_gui = parser.checkOption("--gui", "Use this to provide all other parameters through a GUI"); display_label = EMDL::str2Label(parser.getOption("--display", "Metadata label to display", "rlnImageName")); text_label = EMDL::str2Label(parser.getOption("--text_label", "Metadata label to display text", "EMDL_UNDEFINED")); table_name = parser.getOption("--table", "Name of the table to read from in the input STAR file", ""); scale = textToFloat(parser.getOption("--scale", "Relative scale", "1")); minval = textToFloat(parser.getOption("--black", "Pixel value for black (default is auto-contrast)", "0")); maxval = textToFloat(parser.getOption("--white", "Pixel value for white (default is auto-contrast)", "0")); sigma_contrast = textToFloat(parser.getOption("--sigma_contrast", "Set white and black pixel values this many times the image stddev from the mean", "0")); do_read_whole_stacks = parser.checkOption("--read_whole_stack", "Read entire stacks at once (to speed up when many images of each stack are displayed)"); show_fourier_amplitudes = parser.checkOption("--show_fourier_amplitudes", "Show amplitudes of 2D Fourier transform?"); show_fourier_phase_angles = parser.checkOption("--show_fourier_phase_angles", "Show phase angles of 2D Fourier transforms?"); if (parser.checkOption("--colour_fire", "Show images in black-grey-white-red colour scheme (highlight high signal)?")) colour_scheme = BLACKGREYREDSCALE; else if (parser.checkOption("--colour_ice", "Show images in blue-black-grey-white colour scheme (highlight low signal)?")) colour_scheme = BLUEGREYWHITESCALE; else if (parser.checkOption("--colour_fire-n-ice", "Show images in blue-grey-red colour scheme (highlight high&low signal)?")) colour_scheme = BLUEGREYREDSCALE; else if (parser.checkOption("--colour_rainbow", "Show images in cyan-blue-black-red-yellow colour scheme?")) colour_scheme = RAINBOWSCALE; else if (parser.checkOption("--colour_difference", "Show images in cyan-blue-black-red-yellow colour scheme (for difference images)?")) colour_scheme = CYANBLACKYELLOWSCALE; else colour_scheme = GREYSCALE; do_colourbar = parser.checkOption("--colour_bar", "Show colourbar image?"); do_ignore_optics = parser.checkOption("--ignore_optics", "Ignore information about optics groups in input STAR file?"); int disp_section = parser.addSection("Multiviewer options"); ncol = textToInteger(parser.getOption("--col", "Number of columns", "5")); do_apply_orient = parser.checkOption("--apply_orient","Apply the orientation as stored in the input STAR file angles and offsets"); angpix = textToFloat(parser.getOption("--angpix", "Pixel size (in A) to calculate lowpass filter and/or translational offsets ", "-1")); ori_scale = textToFloat(parser.getOption("--ori_scale", "Relative scale for viewing individual images in multiviewer", "1")); sort_label = EMDL::str2Label(parser.getOption("--sort", "Metadata label to sort images on", "EMDL_UNDEFINED")); random_sort = parser.checkOption("--random_sort", "Use random order in the sorting"); reverse_sort = parser.checkOption("--reverse", "Use reverse order (from high to low) in the sorting"); do_class = parser.checkOption("--class", "Use this to analyse classes in input model.star file"); nr_regroups = textToInteger(parser.getOption("--regroup", "Number of groups to regroup saved particles from selected classes in (default is no regrouping)", "-1")); do_allow_save = parser.checkOption("--allow_save", "Allow saving of selected particles or class averages"); fn_selected_imgs = parser.getOption("--fn_imgs", "Name of the STAR file in which to save selected images.", ""); fn_selected_parts = parser.getOption("--fn_parts", "Name of the STAR file in which to save particles from selected classes.", ""); max_nr_parts_per_class = textToInteger(parser.getOption("--max_nr_parts_per_class", "Select maximum this number of particles from each selected classes.", "-1")); do_recenter = parser.checkOption("--recenter", "Recenter the selected images to the center-of-mass of all positive pixel values. "); max_nr_images = textToInteger(parser.getOption("--max_nr_images", "Only show this many images (default is show all)", "-1")); int pick_section = parser.addSection("Picking options"); do_pick = parser.checkOption("--pick", "Pick coordinates in input image"); do_pick_startend = parser.checkOption("--pick_start_end", "Pick start-end coordinates in input image"); fn_coords = parser.getOption("--coords", "STAR file with picked particle coordinates", ""); coord_scale = textToFloat(parser.getOption("--coord_scale", "Scale particle coordinates before display", "1.0")); particle_radius = textToFloat(parser.getOption("--particle_radius", "Particle radius in pixels", "100")); particle_radius *= coord_scale; lowpass = textToFloat(parser.getOption("--lowpass", "Lowpass filter (in A) to filter micrograph before displaying", "0")); highpass = textToFloat(parser.getOption("--highpass", "Highpass filter (in A) to filter micrograph before displaying", "0")); fn_color = parser.getOption("--color_star", "STAR file with a column for red-blue coloring (a subset of) the particles", ""); color_label = parser.getOption("--color_label", "MetaDataLabel to color particles on (e.g. rlnParticleSelectZScore)", ""); color_blue_value = textToFloat(parser.getOption("--blue", "Value of the blue color", "1.")); color_red_value = textToFloat(parser.getOption("--red", "Value of the red color", "0.")); verb = textToInteger(parser.getOption("--verb", "Verbosity", "1")); // Check for errors in the command-line option if (parser.checkForErrors()) REPORT_ERROR("Errors encountered on the command line (see above), exiting..."); } void Displayer::usage() { parser.writeUsage(std::cout); } void Displayer::initialise() { if (!do_gui && fn_in=="") REPORT_ERROR("Displayer::initialise ERROR: either provide --i or --gui"); Fl::visual(FL_RGB); // initialise some static variables has_dragged = false; has_shift = false; if (do_class) { display_label = EMDL_MLMODEL_REF_IMAGE; table_name = "model_classes"; FileName fn_data; if (fn_in.contains("_half1_model.star")) fn_data = fn_in.without("_half1_model.star") + "_data.star"; else if (fn_in.contains("_half2_model.star")) fn_data = fn_in.without("_half2_model.star") + "_data.star"; else fn_data = fn_in.without("_model.star") + "_data.star"; if (do_ignore_optics) MDdata.read(fn_data); else ObservationModel::loadSafely(fn_data, obsModel, MDdata); // If regrouping, also read the model_groups table into memory if (nr_regroups > 0) MDgroups.read(fn_in, "model_groups"); } // Also allow regrouping on data.star if (fn_in.contains("_data.star") && nr_regroups > 0) { FileName fn_model; fn_model = fn_in.without("_data.star") + "_model.star"; bool has_model = false; if (exists(fn_model)) { MDgroups.read(fn_model, "model_groups"); has_model = true; } else { fn_model = fn_in.without("_data.star") + "_half1_model.star"; if (exists(fn_model)) { MDgroups.read(fn_model, "model_groups"); has_model = true; } } if (!has_model) std::cout <<" Warning: cannot find model.star file for " << fn_in << " needed for regrouping..." << std::endl; } // Check if input STAR file contains pixel-size information if (!do_class && (do_apply_orient || lowpass > 0 || highpass > 0)) { if (fn_in.isStarFile()) { // As of v3.1 the input STAR files should always store the pixel size, no more check necessary... if (do_ignore_optics) { if (angpix > 0.) { obsModel.opticsMdt.setValue(EMDL_IMAGE_PIXEL_SIZE, angpix); } } } else { // if not a STAR file: always need command-line input for pixel obsModel.opticsMdt.addObject(); if (angpix > 0.) { std::cout << " Using pixel size from command-line input of " << angpix << " Angstroms" << std::endl; obsModel.opticsMdt.setValue(EMDL_IMAGE_PIXEL_SIZE, angpix); } else { REPORT_ERROR("Displayer::initialise ERROR: you provided a low- or highpass filter in Angstroms, so please also provide --angpix."); } } } if (show_fourier_amplitudes && show_fourier_phase_angles) REPORT_ERROR("Displayer::initialise ERROR: cannot display Fourier amplitudes and phase angles at the same time!"); if (show_fourier_amplitudes || show_fourier_phase_angles) { if (do_pick || do_pick_startend) REPORT_ERROR("Displayer::initialise ERROR: cannot pick particles from Fourier maps!"); if (fn_in.isStarFile()) REPORT_ERROR("Displayer::initialise ERROR: use single 2D image files as input!"); Image img; img.read(fn_in, false); // dont read data yet: only header to get size if ( (ZSIZE(img()) > 1) || (NSIZE(img()) > 1) ) REPORT_ERROR("Displayer::initialise ERROR: cannot display Fourier maps for 3D images or stacks!"); } } int Displayer::runGui() { Fl::scheme("gtk+"); if (fn_in == "") { // Shall I make a browser window in this GUI or in the general relion GUI? // Perhaps here is better..., then there will be no fn_in yet.... // Update entire window each time the entry of the browser changes... Fl_File_Chooser chooser(".", // directory "All recognised formats (*.{star,mrc,mrcs})\tSTAR Files (*.star)\tMRC stack (*.mrcs)\tMRC image (*.mrc)\tAll Files (*)*", // filter Fl_File_Chooser::SINGLE, // chooser type "Choose file to display"); // title chooser.show(); // Block until user picks something. while(chooser.shown()) { Fl::wait(); } // User hit cancel? if ( chooser.value() == NULL ) exit(0); FileName _fn_in(chooser.value()); fn_in = _fn_in; } // make a bigger window for STAR files... int windowheight = fn_in.isStarFile() ? 350 : 300; displayerGuiWindow win(500, windowheight, "Relion display GUI"); win.is_class = false; win.is_data = false; win.is_star = false; win.is_multi = false; win.do_allow_save = do_allow_save; win.nr_regroups = nr_regroups; win.do_recenter = do_recenter; win.fn_imgs = fn_selected_imgs; win.fn_parts = fn_selected_parts; win.pipeline_control = pipeline_control_outputname; // this GUI can never create the output itself, so disable pipeline_control pipeline_control_outputname = ""; // If this is a STAR file, decide what to do if (fn_in.isStarFile()) { MetaDataTable MD; win.is_star = true; win.is_multi = true; win.is_data = fn_in.contains("_data.star"); if (fn_in.contains("_model.star")) { win.fn_data = fn_in.without("_model.star") + "_data.star"; win.is_class = true; MD.read(fn_in, "model_classes"); } else { if (do_ignore_optics) MD.read(fn_in); else ObservationModel::loadSafely(fn_in, obsModel, MD, "discover", 0, false); //false means dont die upon error // Check the MD was loaded successfully with obsModel, otherwise read as ignore_optics if (obsModel.opticsMdt.numberOfObjects() == 0) MD.read(fn_in); } // Get which labels are stored in this metadatatable and generate choice menus for display and sorting std::vector activeLabels = MD.getActiveLabels(); for (int ilab = 0; ilab < activeLabels.size(); ilab++) { if (EMDL::isNumber(activeLabels[ilab])) win.sort_labels.push_back(EMDL::label2Str(activeLabels[ilab])); } // Preferred order of defaults! // If EMDL_IMAGE_NAME is among the labels: make that the default choice!) if (MD.containsLabel(EMDL_IMAGE_NAME)) win.display_labels.push_back(EMDL::label2Str(EMDL_IMAGE_NAME)); if (MD.containsLabel(EMDL_IMAGE_ORI_NAME)) win.display_labels.push_back(EMDL::label2Str(EMDL_IMAGE_ORI_NAME)); if (MD.containsLabel(EMDL_MLMODEL_REF_IMAGE)) win.display_labels.push_back(EMDL::label2Str(EMDL_MLMODEL_REF_IMAGE)); if (MD.containsLabel(EMDL_CTF_IMAGE)) win.display_labels.push_back(EMDL::label2Str(EMDL_CTF_IMAGE)); if (MD.containsLabel(EMDL_MICROGRAPH_NAME)) win.display_labels.push_back(EMDL::label2Str(EMDL_MICROGRAPH_NAME)); if (MD.containsLabel(EMDL_CTF_POWER_SPECTRUM)) win.display_labels.push_back(EMDL::label2Str(EMDL_CTF_POWER_SPECTRUM)); if (MD.containsLabel(EMDL_MICROGRAPH_MOVIE_NAME)) win.display_labels.push_back(EMDL::label2Str(EMDL_MICROGRAPH_MOVIE_NAME)); } else { // Try reading as an image/stack header Image img; img.read(fn_in, false); win.is_multi = (ZSIZE(img()) * NSIZE(img()) > 1); } return win.fill(fn_in); } void Displayer::run() { if (do_gui) { } else if (do_colourbar) { Image img(256, 10); FOR_ALL_ELEMENTS_IN_ARRAY2D(img()) { A2D_ELEM(img(), i, j) = (RFLOAT)j; } FileName fnt="colour scheme"; basisViewerWindow win(CEIL(scale*XSIZE(img())), CEIL(scale*YSIZE(img())), fnt.c_str()); win.fillSingleViewerCanvas(img(), 0., 255., 0., scale); } else if (do_pick || do_pick_startend) { Image img; img.read(fn_in); // dont read data yet: only header to get size if (lowpass > 0.) lowPassFilterMap(img(), lowpass, angpix); if (highpass > 0.) highPassFilterMap(img(), highpass, angpix, 25); // use a rather soft high-pass edge of 25 pixels wide basisViewerWindow win(CEIL(scale*XSIZE(img())), CEIL(scale*YSIZE(img())), fn_in.c_str()); if (fn_coords=="") fn_coords = fn_in.withoutExtension()+"_coords.star"; win.fillPickerViewerCanvas(img(), minval, maxval, sigma_contrast, scale, coord_scale, ROUND(scale*particle_radius), do_pick_startend, fn_coords, fn_color, fn_in, color_label, color_blue_value, color_red_value); } else if (fn_in.isStarFile()) { if (fn_in.contains("_model.star")) { MDin.read(fn_in, "model_classes"); MetaDataTable MD2; MD2.read(fn_in, "model_general"); if(MD2.containsLabel(EMDL_MLMODEL_IS_HELIX)) MDin.addLabel(EMDL_MLMODEL_IS_HELIX); } else { if (do_ignore_optics) MDin.read(fn_in); else ObservationModel::loadSafely(fn_in, obsModel, MDin, "discover", 0, false); //false means dont die upon error // Check the MD was loaded successfully with obsModel, otherwise read as ignore_optics if (obsModel.opticsMdt.numberOfObjects() == 0) MDin.read(fn_in); } // Check that label to display is present in the table if (!MDin.containsLabel(display_label)) REPORT_ERROR("Cannot find metadata label in input STAR file"); // Store class number in metadata table if (do_class) { int iclass = 0; FOR_ALL_OBJECTS_IN_METADATA_TABLE(MDin) { iclass++; // start counting at 1 MDin.setValue(EMDL_PARTICLE_CLASS, iclass); } } if (sort_label != EMDL_UNDEFINED || random_sort) { MDin.sort(sort_label, reverse_sort, true, random_sort); // true means only store sorted_idx! // When sorting: never read in the whole stacks! do_read_whole_stacks = false; } basisViewerWindow win(MULTIVIEW_WINDOW_WIDTH, MULTIVIEW_WINDOW_HEIGHT, fn_in.c_str()); win.fillCanvas(MULTIVIEWER, MDin, &obsModel, display_label, text_label, do_read_whole_stacks, do_apply_orient, minval, maxval, sigma_contrast, scale, ori_scale, ncol, max_nr_images, lowpass, highpass, do_class, &MDdata, nr_regroups, do_recenter, fn_in.contains("_data.star"), &MDgroups, do_allow_save, fn_selected_imgs, fn_selected_parts, max_nr_parts_per_class); } else { // Attempt to read a single-file image Image img; img.read(fn_in, false); // dont read data yet: only header to get size MDin.clear(); // display stacks if (NSIZE(img()) > 1) { for (int n = 0; n < NSIZE(img()); n++) { FileName fn_tmp; fn_tmp.compose(n+1,fn_in); MDin.addObject(); MDin.setValue(EMDL_IMAGE_NAME, fn_tmp); MDin.setValue(EMDL_IMAGE_OPTICS_GROUP, 1); } basisViewerWindow win(MULTIVIEW_WINDOW_WIDTH, MULTIVIEW_WINDOW_HEIGHT, fn_in.c_str()); win.fillCanvas(MULTIVIEWER, MDin, &obsModel, EMDL_IMAGE_NAME, text_label, true, false, minval, maxval, sigma_contrast, scale, ori_scale, ncol, max_nr_images, lowpass, highpass); } else if (ZSIZE(img()) > 1) { // Read volume slices from .mrc as if it were a .mrcs stack and then use normal slice viewer // This will not work for Spider volumes... if (fn_in.getFileFormat() != "mrc") REPORT_ERROR("Displayer::run() ERROR: only MRC maps are allowed..."); // Use a single minval and maxval for all slice if (minval == maxval) { Image It; It.read(fn_in); It().computeDoubleMinMax(minval, maxval); } // Trick MD with :mrcs extension.... for (int n = 0; n < ZSIZE(img()); n++) { FileName fn_tmp; fn_tmp.compose(n+1,fn_in); fn_tmp += ":mrcs"; MDin.addObject(); MDin.setValue(EMDL_IMAGE_NAME, fn_tmp); MDin.setValue(EMDL_IMAGE_OPTICS_GROUP, 1); } basisViewerWindow win(MULTIVIEW_WINDOW_WIDTH, MULTIVIEW_WINDOW_HEIGHT, fn_in.c_str()); win.fillCanvas(MULTIVIEWER, MDin, &obsModel, EMDL_IMAGE_NAME, text_label, true, false, minval, maxval, sigma_contrast, scale, ori_scale, ncol, max_nr_images, lowpass, highpass); } else { img.read(fn_in); // now read image data as well (not only header) if (lowpass > 0.) lowPassFilterMap(img(), lowpass, angpix); if (highpass > 0.) highPassFilterMap(img(), highpass, angpix); MDin.addObject(); MDin.setValue(EMDL_IMAGE_NAME, fn_in); MDin.setValue(EMDL_IMAGE_OPTICS_GROUP, 1); RFLOAT new_scale = scale; if (show_fourier_amplitudes || show_fourier_phase_angles) new_scale *= 2.; basisViewerWindow win(CEIL(new_scale*XSIZE(img())), CEIL(new_scale*YSIZE(img())), fn_in.c_str()); if (show_fourier_amplitudes) { amplitudeOrPhaseMap(img(), img(), AMPLITUDE_MAP); win.fillSingleViewerCanvas(img(), minval, maxval, sigma_contrast, scale); } else if (show_fourier_phase_angles) { amplitudeOrPhaseMap(img(), img(), PHASE_MAP); win.fillSingleViewerCanvas(img(), -180., 180., 0., scale); } else { win.fillSingleViewerCanvas(img(), minval, maxval, sigma_contrast, scale); } } } } relion-3.1.3/src/displayer.h000066400000000000000000000421561411340063500157470ustar00rootroot00000000000000/*************************************************************************** * * Author: "Sjors H.W. Scheres" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #ifndef DISPLAYER_H_ #define DISPLAYER_H_ // this define, and the undef below the FL includes, protects against another Complex definition in fltk #define Complex tmpComplex #include #include #include #include #include #include #include #include #include #include #include #include #undef Complex #include "src/image.h" #include "src/metadata_label.h" #include "src/metadata_table.h" #include "src/jaz/obs_model.h" #include #include #include #include #define GUI_BACKGROUND_COLOR (fl_rgb_color(240,240,240)) #define GUI_INPUT_COLOR (fl_rgb_color(255,255,230)) #define GUI_RUNBUTTON_COLOR (fl_rgb_color(238,130,238)) #define SELECTED 1 #define NOTSELECTED 0 #define MULTIVIEW_WINDOW_WIDTH 720 #define MULTIVIEW_WINDOW_HEIGHT 486 #define BOX_OFFSET 4 #define MULTIVIEWER 0 #define SINGLEVIEWER 1 #define BLACK 0 #define WHITE 1 #define DEFAULTPDFVIEWER "evince" static bool has_dragged; static int predrag_xc; static int predrag_yc; static bool has_shift; static int preshift_ipos; static int current_selection_type; static int colour_scheme; class DisplayBox : public Fl_Box { protected: // Draw the actual box on the screen (this function is used by redraw()) void draw(); public: int xsize_data; int ysize_data; int xoff; int yoff; // The box's selection status int selected; // The box's original position in the input MetaDataTable int ipos; // The metadata fir this image MetaDataTable MDimg; // The actual image data array unsigned char *img_data; std::string img_label; // For getting back close the original image values from the uchar ones... RFLOAT minval; RFLOAT maxval; RFLOAT scale; // Constructor with an image and its metadata DisplayBox(int X, int Y, int W, int H, const char *L=0) : Fl_Box(X,Y,W,H,L) { img_data = NULL; img_label = ""; MDimg.clear(); } void setData(MultidimArray &img, MetaDataContainer *MDCin, int ipos, RFLOAT minval, RFLOAT maxval, RFLOAT _scale, bool do_relion_scale = false); // Destructor ~DisplayBox() { MDimg.clear(); if (img_data) delete [] img_data; }; // Change selected status, redraw and return new status int toggleSelect(int set_selected); // Set a specific value in selected for this box void setSelect(int value); // Select, redraw and return new selected status int select(); // unSelect, redraw and return new selected status int unSelect(); }; // This class only puts scrollbars around the resizable canvas class basisViewerWindow : public Fl_Window { public: // Constructor with w x h size of the window and a title basisViewerWindow(int W, int H, const char* title=0): Fl_Window(W, H, title) { current_selection_type = 1; } int fillCanvas(int viewer_type, MetaDataTable &MDin, ObservationModel *obsModel, EMDLabel display_label, EMDLabel text_label, bool _do_read_whole_stacks, bool _do_apply_orient, RFLOAT _minval, RFLOAT _maxval, RFLOAT _sigma_contrast, RFLOAT _scale, RFLOAT _ori_scale, int _ncol, long int max_nr_images = -1, RFLOAT lowpass = -1.0 , RFLOAT highpass = -1.0, bool do_class = false, MetaDataTable *MDdata = NULL, int _nr_regroup = -1, bool do_recenter = false, bool _is_data = false, MetaDataTable *MDgroups = NULL, bool do_allow_save = false, FileName fn_selected_imgs="", FileName fn_selected_parts="", int max_nr_parts_per_class = -1); int fillSingleViewerCanvas(MultidimArray image, RFLOAT _minval, RFLOAT _maxval, RFLOAT _sigma_contrast, RFLOAT _scale); int fillPickerViewerCanvas(MultidimArray image, RFLOAT _minval, RFLOAT _maxval, RFLOAT _sigma_contrast, RFLOAT _scale, RFLOAT _coord_scale, int _particle_radius, bool do_startend = false, FileName _fn_coords = "", FileName _fn_color = "", FileName _fn_mic= "", FileName _color_label = "", RFLOAT _color_blue_value = 0., RFLOAT _color_red_value = 1.); }; class basisViewerCanvas : public Fl_Widget { protected: void draw(); void saveImage(int ipos=0); public: int ncol; int nrow; int xsize_box; int ysize_box; int xoff; int yoff; // To get positions in scrolled canvas... Fl_Scroll *scroll; // All the individual image display boxes std::vector boxes; // Read stacks at once to speed up? bool do_read_whole_stacks; // Constructor with w x h size of the window and a title basisViewerCanvas(int X,int Y, int W, int H, const char* title=0) : Fl_Widget(X,Y,W, H, title) { } void SetScroll(Fl_Scroll *val) { scroll = val; } void fill(MetaDataTable &MDin, ObservationModel *obsModel, EMDLabel display_label, EMDLabel text_label, bool _do_apply_orient, RFLOAT _minval, RFLOAT _maxval, RFLOAT _sigma_contrast, RFLOAT _scale, int _ncol, bool do_recenter = false, long int max_images = -1, RFLOAT lowpass = -1.0, RFLOAT highpass = -1.0); void fill(MultidimArray &image, RFLOAT _minval, RFLOAT _maxval, RFLOAT _sigma_contrast, RFLOAT _scale = 1.); void setSelectionType(); }; class multiViewerCanvas : public basisViewerCanvas { protected: int handle(int ev); public: // Flag to indicate whether this is a viewer for class averages from 2D/3D relion_refine classification runs bool do_class; // Allow saving of selection files? bool do_allow_save; // Filenames with the selected class averages and the particles from the selected classes FileName fn_selected_imgs, fn_selected_parts; // Maximum number of selected particles per class int max_nr_parts_per_class; // Flag to indicate whether this is a viewer for a data.star (to also allow regrouping) bool is_data; // Number of groups for regrouping the selected particles (for model.star) int nr_regroups; // Apply the positioning parameters in the particle metadata? bool do_apply_orient; // Save selected images recentered to their center-of-mass? bool do_recenter; // pointer to the MetaDataTable for the individually aligned particles when do_class (the data.star file) MetaDataTable *MDdata; // pointer to the MetaDataTable for the optics groups ObservationModel *obsModel; // pointer to the MetaDataTable for the groups when do_class and do_regroup (the data.star file) MetaDataTable *MDgroups; // Sjors 12mar18: read/write information-containing backup_selection for Liyi's project MetaDataTable MDbackup; // Scale for showing the original image RFLOAT ori_scale; // To know which original image to display EMDLabel display_label; // Label for text display EMDLabel text_label; // To know which contrast to apply to original image display RFLOAT sigma_contrast, minval, maxval; // Limit number of images to be shown long int multi_max_nr_images; // Name of the metadata table std::string metadata_table_name; // Constructor with w x h size of the window and a title multiViewerCanvas(int X,int Y, int W, int H, const char* title=0): basisViewerCanvas(X,Y,W, H, title) { } private: // Functionalities for popup menu void clearSelection(); void invertSelection(); void selectFromHereBelow(int ipos); void selectFromHereAbove(int ipos); void printMetaData(int ipos); void showAverage(bool selected, bool show_stddev=false); void showOriginalImage(int ipos); void showFourierAmplitudes(int ipos); void showFourierPhaseAngles(int ipos); void showHelicalLayerLineProfile(int ipos); void makeStarFileSelectedParticles(int save_selected, MetaDataTable &MDpart); void saveSelectedParticles(int save_selected); void showSelectedParticles(int save_selected); void saveTrainingSet(); void saveSelected(int save_selected); void saveBackupSelection(); // Allow re-loading of existing backup selection public: void loadBackupSelection(bool do_ask = true); }; // Generally accessible function void regroupSelectedParticles(MetaDataTable &MDdata, MetaDataTable &MDgroups, int nr_regroups); class popupSelectionTypeWindow : Fl_Window { Fl_Choice * choice; public: int result; // Constructor with w x h size of the window and a title popupSelectionTypeWindow(int W, int H, const char* title=0): Fl_Window(W, H, title){} int fill(); static void cb_set(Fl_Widget* o, void* v) { popupSelectionTypeWindow* T=(popupSelectionTypeWindow*)v; T->cb_set_i(); } inline void cb_set_i() { // void* to (small) int current_selection_type = static_cast(reinterpret_cast(choice->mvalue()->user_data())); } static void cb_close(Fl_Widget* o, void* v) { popupSelectionTypeWindow* T=(popupSelectionTypeWindow*)v; T->hide(); } }; class singleViewerCanvas : public basisViewerCanvas { protected: int handle(int ev); public: // Constructor with w x h size of the window and a title singleViewerCanvas(int X, int Y, int W, int H, const char* title=0): basisViewerCanvas(X,Y,W, H, title) { } private: // Functionalities for popup menu void printMetaData(); // void setContrast(); // explain functionality of clicks void printHelp(); }; /* class popupSetContrastWindow : Fl_Window { Fl_Input *minval, *maxval, *scale, *sigma_contrast; public: int result; // Constructor with w x h size of the window and a title popupSetContrastWindow(int W, int H, const char* title=0): Fl_Window(W, H, title){} int fill(); static void cb_set(Fl_Widget* o, void* v) { popupSelectionTypeWindow* T=(popupSelectionTypeWindow*)v; T->cb_set_i(); } inline void cb_set_i() { // Careful with setting the right value! Look at handle function of multiviewerCanvas current_minval = textToFloat(minval->value()); current_maxval = textToFloat(minval->value()); current_scale = textToFloat(minval->value()); current_sigma_contrast = textToFloat(sigma_contrast->value()); } static void cb_close(Fl_Widget* o, void* v) { popupSetContrastWindow* T=(popupSetContrastWindow*)v; T->hide(); } }; */ class pickerViewerCanvas : public basisViewerCanvas { protected: int handle(int ev); void draw(); public: // MetaDataTable with all picked coordinates MetaDataTable MDcoords; int particle_radius; // Scale for rlnCoordinateX/Y RFLOAT coord_scale; // Filename of the picked coordinate files FileName fn_coords; // FileName of the STAR file that contains the color-based column FileName fn_color; // Label to base coloring on EMDLabel color_label; // Blue value for coloring RFLOAT smallest_color_value; // Red value for coloring RFLOAT biggest_color_value; // Red->Blue is true; blue->red is false bool do_blue_to_red; // Draw lines between start-end coordinates? bool do_startend; // Micrograph name (useful to search relevant particles in fn_color) FileName fn_mic; // Constructor with w x h size of the window and a title pickerViewerCanvas(int X, int Y, int W, int H, const char* title=0): basisViewerCanvas(X,Y,W, H, title) { } void loadCoordinates(bool ask_filename = false); // if a fn_zscore is given, then match the coordinates to the Zscores in the corresponding MDtable void findColorColumnForCoordinates(); private: // Functionalities for popup menu void saveCoordinates(bool ask_filename = false); void clearCoordinates(); void printHelp(); void viewExtractedParticles(); }; // This class only puts scrollbars around the resizable canvas class displayerGuiWindow : public Fl_Window { public: FileName fn_in, fn_data; // Some general settings for different types bool is_class; bool is_multi; bool is_star; // Allow regrouping from _data.star bool is_data; // Allow saving of the selection? bool do_allow_save; // remove duplicate-coord particles in fn_data STAR file? bool do_remove_duplicates; RFLOAT duplicate_threshold; // Angstrom radius // Number of regroups int nr_regroups; // Recenter images? bool do_recenter; // Pipeline control std::string pipeline_control; // Maximum number of images to show long int max_nr_images; // Display image in color int colour_scheme; // FileName for selected class average images and particles FileName fn_imgs, fn_parts; // Label option to display or to sort on std::vector display_labels; std::vector sort_labels; std::vector colour_schemes; // Input for the display parameters Fl_Input *black_input, *white_input, *sigma_contrast_input, *scale_input, *lowpass_input, *highpass_input, *angpix_input; Fl_Input *col_input, *ori_scale_input, *max_nr_images_input, *max_parts_per_class_input; Fl_Check_Button *sort_button, *reverse_sort_button, *apply_orient_button, *read_whole_stack_button; Fl_Choice *display_choice, *sort_choice, *colour_scheme_choice; // Constructor with w x h size of the window and a title displayerGuiWindow(int W, int H, const char* title=0): Fl_Window(W, H, title), sort_button(NULL), reverse_sort_button(NULL), apply_orient_button(NULL), read_whole_stack_button(NULL) {} // Fill all except for the browser int fill(FileName &fn_in); // NUll-check value-fetch bool getValue(Fl_Check_Button * button) { if(button != NULL) return(button->value()); else return(false); } private: static void cb_display(Fl_Widget*, void*); inline void cb_display_i(); void readLastSettings(); void writeLastSettings(); }; class Displayer { public: // I/O Parser IOParser parser; // Launch the GUI for parameter input bool do_gui; // Verbosity int verb; // Which metadatalabel to display EMDLabel display_label, sort_label, text_label; // Use random sort bool random_sort; // use reverse order for sorting? bool reverse_sort; // Scale factor for displaying RFLOAT scale; // Number of rows for tiled view int nrow, ncol; // Apply orientations stored in metadatatable bool do_apply_orient; // Scale for showing the original image RFLOAT ori_scale; // Black and white values RFLOAT minval, maxval; // For setting black and white contrast to a specified times the image standard deviation from the mean RFLOAT sigma_contrast; // Particle diameter int particle_radius; // Scale for rlnCoordinateX/Y RFLOAT coord_scale; // Input & Output rootname FileName fn_in; // Ignore optics groups bool do_ignore_optics; // Filename for coordinates star file FileName fn_coords; // FileName of the STAR file that contains the color label FileName fn_color; // Which column to color on? FileName color_label; // Values for blue and red coloring RFLOAT color_blue_value, color_red_value; // Tablename to read from in the input STAR file FileName table_name; // Flag to pick bool do_pick; // Flag to pick start-end bool do_pick_startend; // Flag for looking at classes bool do_class; // Allow saving of selected particles or images? bool do_allow_save; // remove duplicate-coord particles in fn_data STAR file? bool do_remove_duplicates; RFLOAT duplicate_threshold; // Angstrom radius // Filenames for selected particles and selected images FileName fn_selected_imgs, fn_selected_parts; // Select maximum this number of particles from each selected classes int max_nr_parts_per_class; // Number of groups for regrouping (negative number is no regrouping) int nr_regroups; // Re-center class averages to their center-of-mass? bool do_recenter; // Flag for reading whole stacks instead of individual images bool do_read_whole_stacks; // Flag to show colour scalebar image bool do_colourbar; // data.star metadata (for do_class) MetaDataTable MDdata; // Observation model ObservationModel obsModel; // model_groups metadata (for do_class and regrouping) MetaDataTable MDgroups; // Input metadata MetaDataTable MDin; // For the multiviewer std::vector boxes; // Lowpass filter for picker images RFLOAT lowpass; // Highpass filter for picker images RFLOAT highpass; // Pixel size to calculate lowpass filter in Angstroms and translations in apply_orient RFLOAT angpix; // Show Fourier amplitudes? bool show_fourier_amplitudes; // Show Fourier phase angles? bool show_fourier_phase_angles; // Only show a limited number of images long int max_nr_images; public: // Read command line arguments void read(int argc, char **argv); // Print usage instructions void usage(); // Initialise some general stuff after reading void initialise(); // Decide what to do void run(); // run the GUI int runGui(); }; #endif /* DISPLAYER_H_ */ relion-3.1.3/src/error.cpp000066400000000000000000000057011411340063500154320ustar00rootroot00000000000000/*************************************************************************** * * Author: "Sjors H.W. Scheres" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ /*************************************************************************** * * Authors: Carlos Oscar S. Sorzano (coss@cnb.csic.es) * * Unidad de Bioinformatica of Centro Nacional de Biotecnologia , CSIC * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA * 02111-1307 USA * * All comments concerning this program package may be sent to the * e-mail address 'xmipp@cnb.csic.es' ***************************************************************************/ #include "src/error.h" #ifdef __GNUC__ #include #endif // Object Constructor RelionError::RelionError(const std::string &what, const std::string &fileArg, const long lineArg) { #ifdef __GNUC__ const int SZ_BUF = 100; backtrace_buffer = new void*[SZ_BUF]; size = backtrace(backtrace_buffer, SZ_BUF); #endif msg = "ERROR: \n" + what; file= fileArg; line=lineArg; std::cerr << "in: " << file << ", line " << line << "\n"; std::cerr << msg << std::endl; } // Show message std::ostream& operator << (std::ostream& o, RelionError& XE) { #ifdef __GNUC__ o << "=== Backtrace ===" << std::endl; char **bt_symbols = backtrace_symbols(XE.backtrace_buffer, XE.size); for (int i = 0; i < XE.size; i++) { o << bt_symbols[i] << std::endl; } o << "==================" << std::endl; delete[] XE.backtrace_buffer; free(bt_symbols); #endif o << XE.msg << std::endl; return o; } relion-3.1.3/src/error.h000066400000000000000000000361551411340063500151060ustar00rootroot00000000000000/*************************************************************************** * * Author: "Sjors H.W. Scheres" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ /*************************************************************************** * * Authors: Carlos Oscar S. Sorzano (coss@cnb.csic.es) * * Unidad de Bioinformatica of Centro Nacional de Biotecnologia , CSIC * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA * 02111-1307 USA * * All comments concerning this program package may be sent to the * e-mail address 'xmipp@cnb.csic.es' ***************************************************************************/ #ifndef ERROR_H #define ERROR_H //ROB #include #include #include #include #include "src/macros.h" /** Show message and throw exception * @ingroup ErrorHandling * * This macro shows the given message and exits with the error code. * * @code * if (...) * REPORT_ERROR("Error 1"); * @endcode */ #define REPORT_ERROR(ErrormMsg) throw RelionError((ErrormMsg), __FILE__, __LINE__) /** Show message and throw exception * @ingroup ErrorHandling * * same as REPORT_ERROR, but with the ability to stream in other data types (e.g. numbers) * @code * if (...) * REPORT_ERROR_STR("Requested " << num1 << " objects, only " << num2 << " available!"); * @endcode */ #define REPORT_ERROR_STR(m) std::stringstream sts; sts << m; throw RelionError(sts.str(), __FILE__, __LINE__) /** Exception class * @ingroup ErrorHandling * * This is the class type for the errors thrown by the exceptions */ class RelionError { public: /** Error code */ int __errno; /** Message shown */ std::string msg; /** File produstd::cing the error */ std::string file; /** Line number */ long line; #ifdef __GNUC__ /** Backtrace To get a line number from something like this: /lmb/home/tnakane/prog/relion-devel-lmb/build-single/lib/librelion_lib.so(_ZN13MetaDataTable4readERK8FileNameRKSsPSt6vectorI8EMDLabelSaIS6_EESsb+0x384) [0x7fb676e8c2a4] First get the start address of the function: $ nm lib/librelion_lib.so |grep _ZN13MetaDataTable4readERK8FileNameRKSsPSt6vectorI8EMDLabelSaIS6_EESsb 0000000000186f20 T _ZN13MetaDataTable4readERK8FileNameRKSsPSt6vectorI8EMDLabelSaIS6_EESsb Add the offset (in hexadecimal): $ echo 'obase=16;ibase=16;186F20+384' | bc 1872A4 Use addr2line: $ addr2line -Cif -e lib/librelion_lib.so 1872A4 MetaDataTable::read(FileName const&, std::string const&, std::vector >*, std::string, bool) /usr/include/c++/4.8.2/bits/basic_string.h:539 MetaDataTable::read(FileName const&, std::string const&, std::vector >*, std::string, bool) /lmb/home/tnakane/prog/relion-devel-lmb/src/metadata_table.cpp:978 Happy debugging! **/ void **backtrace_buffer; size_t size; #endif RelionError(const std::string& what, const std::string &fileArg, const long lineArg); friend std::ostream& operator<<(std::ostream& o, RelionError& XE); }; #define RAMERR "\n\ There was an issue allocating CPU memory (RAM). \n\ Likely maximum memory size was exceeded." #define DEVERR "\n\ This is a developer error message which you cannot fix \n\ through changing the run config. Either your data is broken or\n\ an unforseen combination of options was encountered. Please report\n\ this error, the command used and a brief description to\n\ the relion developers at \n\n github.com/3dem/relion/issues \n\n" #define ADVERR "\n\ This error is normally only displayed when using advanced \n\ features or build-utilities for code development or benchmarking.\n\ You can ask the relion developers for assistance at \n\n github.com/3dem/relion/issues" #define ERR_GPUID ("\ There was an issue with the GPU-ids. Either \n \t\ - you have specified a GPU index following the --gpu flag which is too high \n \t\ - relion has detected more GPUs than there is one or more node(s). \n\ Try running without ids following the --gpu flag, or specify different such indices.\n\ Remember that the numbering of GPUs start with 0!\n") #define ERRGPUKERN ("\n\ A GPU-function failed to execute.\n\n \ If this occured at the start of a run, you might have GPUs which\n\ are incompatible with either the data or your installation of relion.\n\ If you \n\n\ \t-> INSTALLED RELION YOURSELF: if you e.g. specified -DCUDA_ARCH=50\n\ \t and are trying ot run on a compute 3.5 GPU (-DCUDA_ARCH=3.5), \n\ \t this may happen.\n\n\ \t-> HAVE MULTIPLE GPUS OF DIFFERNT VERSIONS: relion needs GPUS with\n\ \t at least compute 3.5. You may be trying to use a GPU older than\n\ \t this. If you have multiple generations, try specifying --gpu \n\ \t with X=0. Then try X=1 in a new run, and so on. The numbering of\n\ \t GPUs may not be obvious from the driver or intuition. For a list\n\ \t of GPU compute generations, see \n\n\ \t en.wikipedia.org/wiki/CUDA#Version_features_and_specifications\n\n\ \t-> ARE USING DOUBLE-PRECISION GPU CODE: relion was been written so\n\ \t as to not require this, and may thus have unforeseen requirements\n\ \t when run in this mode. If you think it is nonetheless necessary,\n\ \t please consult the developers with this error.\n\n\ If this occurred at the middle or end of a run, it might be that\n\n\ \t-> YOUR DATA OR PARAMETERS WERE UNEXPECTED: execution on GPUs is \n\ \t subject to many restrictions, and relion is written to work within\n\ \t common restraints. If you have exotic data or settings, unexpected\n\ \t configurations may occur. See also above point regarding \n\ \t double precision.\n\ If none of the above applies, please report the error to the relion\n\ developers at github.com/3dem/relion/issues\n\n") #define ERRCUDACAOOM ("\n\ You ran out of memory on the GPU(s).\n\n\ Each MPI-rank running on a GPU increases the use of GPU-memory. Relion\n\ tries to distribute load over multiple GPUs to increase performance,\n\ but doing this in a general and memory-efficient way is difficult.\n\n\ 1. Check the device-mapping presented at the beginning of each run,\n\ and be particularly wary of 'device X is split between N followers', which \n\ will result in a higher memory cost on GPU X. In classifications, GPU-\n\ sharing between MPI-ranks is typically fine, whereas it will usually \n\ cause out-of-memory during the last iteration of high-resolution refinement.\n\n\ 2. If you are not GPU-sharing across MPI-follower ranks, then you might be using a\n\ too-big box-size for the GPU memory. Currently, N-pixel particle images\n\ will require *roughly* \n\n\ \t\t (1.1e-8)*(N*2)^3 GB \n\n\ of memory (per rank) during the final iteration of refinement (using\n\ single-precision GPU code, which is default). 450-pixel images can therefore\n\ just about fit into a GPU with 8GB of memory, since 11*(450*2)^3 ~= 8.02\n\ During classifications, resolution is typically lower and N is suitably\n\ reduced, which means that memory use is much lower.\n\n\ 3. If the above estimation fits onto (all of) your GPU(s), you may have \n\ a very large number of orientations which are found as possible during\n\ the expectation step, which results in large arrays being needed on the \n\ GPU. If this is the case, you should find large (>10'000) values of \n\ '_rlnNrOfSignificantSamples' in your _data.star output files. You can try\n\ adding the --maxsig

, flag, where P is an integer limit, but you \n\ should probably also consult expertise or re-evaluate your data and/or \n\ input reference. Seeing large such values means relion is finding nothing\n\ to align.\n\n\ If none of the above applies, please report the error to the relion\n\ developers at github.com/3dem/relion/issues\n\n") #define ERR_CANZ ("There is an allocation on the GPU left between iterations." DEVERR) #define ERR_CAMUX ("A mutex could not be created for a GPU memory allocation." DEVERR) #define ERR_STAGEMEM ("A zero-size array was attempted to be made, which should not happen." DEVERR) #define ERR_MDLDIM ("The model dimension was not set properly." DEVERR) #define ERR_MDLSET ("The model was set twice." DEVERR) #define ERRCTIC ("You are trying to benchmark a (CPU) section, but started timing it twice." ADVERR) #define ERRCTOC ("You are trying to benchmark a (CPU) section, but this section has not begun." ADVERR) #define ERRGTIC ("You are trying to benchmark a (GPU) section, but started timing it twice." ADVERR) #define ERRGTOC ("You are trying to benchmark a (GPU) section, but this section has not begun." ADVERR) #define ERRTPC ("You are trying to benchmark a (GPU) section, but there is nothing to print." ADVERR) #define ERRCUFFTDIM ("You are changing the dimension of a CUFFT-transform (plan)" DEVERR) #define ERRCUFFTDIR ("You are setting the direction of a CUFFT-transform to something other than forward/inverse" DEVERR) #define ERRCUFFTDIRF ("You are trying to run a forward CUFFT-transform for an inverse transform" DEVERR) #define ERRCUFFTDIRR ("You are trying to run an inverse CUFFT-transform for a forward transform" DEVERR) #define ERRFFTMEMLIM ("\n\ When trying to plan one or more Fourier transforms, it was found that the available\n\ GPU memory was insufficient. Relion attempts to reduce the memory by segmenting\n\ the required number of transformations, but in this case not even a single\n\ transform could fit into memory. Either you are (1) performing very large transforms,\n\ or (2) the GPU had very little available memory.\n\n\ (1) may occur during autopicking if the 'shrink' parameter was set to 1. The \n\ recommended value is 0 (--shrink 0), which is argued in the RELION-2 paper (eLife).\n\ This reduces memory requirements proportionally to the low-pass used. \n\n\ (2) may occur if multiple processes were using the same GPU without being aware\n\ of each other, or if there were too many such processes. Parallel execution of \n\ relion binaries ending with _mpi ARE aware, but you may need to reduce the number\n\ of mpi-ranks to equal the total number of GPUs. If you are running other instances \n\ of GPU-accelerated programs (relion or other), these may be competing for space.\n\ Relion currently reserves all available space during initialization and distributes\n\ this space across all sub-processes using the available resources. This behaviour \n\ can be escaped by the auxiliary flag --free_gpu_memory X [MB]. You can also go \n\ further and force use of full dynamic runtime memory allocation, relion can be \n\ built with the cmake -DCachedAlloc=OFF\n") #define ERR_TRANSLIM ("You have an unexpectedly large number of translations as a result\n\ of your offset search parameters, going outside the scope of relions optimized \n\ GPU functions. Please let the relion developers know if you require a translational \n\ search this extensive (at github.com/3dem/relion/issues). If you are running with\n\ double precision on the GPUs, you can reduce to single precision, otherwise the\n\ only way to currently overcome this is to reduce the translational search.\n") #define ERRFILTEREDZERO ("No orientation was found as better than any other.\n\n\ A particle image was compared to the reference and resulted in all-zero\n\ weights (for all orientations). This should not happen, unless your data\n\ has very special characteristics. This has historically happened for some \n\ lower-precision calculations, but multiple fallbacks have since been \n\ implemented. Please report this error to the relion developers at \n\n\ github.com/3dem/relion/issues \n ") #define ERRNOSIGNIFS ("The number of contributing orientations for an image\n\ was found to be zero. This should not happen, unless your data\n\ has very special characteristics. Please report this error to \n\ the relion developers at \n\n\ github.com/3dem/relion/issues ") #define ERRSUMWEIGHTZERO ("The sum of weights for all orientations was\n\ found to be zero for an image. This should not happen, unless your data\n\ has very special characteristics. Please report this error to \n\ the relion developers at \n\n\ github.com/3dem/relion/issues ") #define ERRNUMFAILSAFE ("Relion had to use extra-precision fallbacks too many times.\n\n\ In some cases relion find it difficult to reconcile the data with the\n\ assumptions and axioms for the refinement procedure. If you e.g. have very\n\ strong preferred orientations, or very noisy data or small molecules, \n\ alignment can become sensitive to numerical accuracies, and if relion \n\ detects this, it uses higher-precision fallback functions. This error \n\ indicates that this had to be used more times than indicated by the threshold,\n\ which can be manually adjusted by the user with the flag --failsafe_threshold. \n\\n\ Before doing anything else, however, we recommend going back and re-extracting\n\ your particle images with the re-centering option enabled and\n\ picking up refinement anew with that dataset.\n\\n\ Assess your data and if in doubt, report this error to \n\ the relion developers at \n\n\ github.com/3dem/relion/issues ") #define ERRNEGLENGTH ("Parameter space for coarse sampling is invalid (negative)" DEVERR) #define ERRHIGHSCALE ("rlnMicrographScaleCorrection is very high. Did you normalize your data?") #define ERR_GAUSSBLOBSIZE ("You tried to use gaussian blobs but did not specify a blob-size.\n\n\ - through the GUI, use the setting for Mask diameter [A] \n\ - through the command-line, add --particle_diameter [A] \n\ both methods specify a diameter in Angstroms \n\n") #define ERRUNSAFEOBJECTREUSE ("An unsafe combination of pointers was found as input to a \n\ function. You probably supplied the same object as both input \n\ and output, which is not always safe, depending on the function design.") #endif relion-3.1.3/src/euler.cpp000066400000000000000000000227561411340063500154260ustar00rootroot00000000000000/*************************************************************************** * * Author: "Sjors H.W. Scheres" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ /*************************************************************************** * * Authors: Carlos Oscar S. Sorzano (coss@cnb.csic.es) * * Unidad de Bioinformatica of Centro Nacional de Biotecnologia , CSIC * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA * 02111-1307 USA * * All comments concerning this program package may be sent to the * e-mail address 'xmipp@cnb.csic.es' ***************************************************************************/ #include #include #include "src/euler.h" #include "src/funcs.h" /* Euler angles --> matrix ------------------------------------------------- */ void Euler_angles2matrix(RFLOAT alpha, RFLOAT beta, RFLOAT gamma, Matrix2D &A, bool homogeneous) { RFLOAT ca, sa, cb, sb, cg, sg; RFLOAT cc, cs, sc, ss; if (homogeneous) { A.initZeros(4,4); MAT_ELEM(A,3,3)=1; } else if (MAT_XSIZE(A) != 3 || MAT_YSIZE(A) != 3) A.resize(3, 3); alpha = DEG2RAD(alpha); beta = DEG2RAD(beta); gamma = DEG2RAD(gamma); ca = cos(alpha); cb = cos(beta); cg = cos(gamma); sa = sin(alpha); sb = sin(beta); sg = sin(gamma); cc = cb * ca; cs = cb * sa; sc = sb * ca; ss = sb * sa; A(0, 0) = cg * cc - sg * sa; A(0, 1) = cg * cs + sg * ca; A(0, 2) = -cg * sb; A(1, 0) = -sg * cc - cg * sa; A(1, 1) = -sg * cs + cg * ca; A(1, 2) = sg * sb; A(2, 0) = sc; A(2, 1) = ss; A(2, 2) = cb; } /* Euler direction --------------------------------------------------------- */ void Euler_angles2direction(RFLOAT alpha, RFLOAT beta, Matrix1D &v) { RFLOAT ca, sa, cb, sb; RFLOAT sc, ss; v.resize(3); alpha = DEG2RAD(alpha); beta = DEG2RAD(beta); ca = cos(alpha); cb = cos(beta); sa = sin(alpha); sb = sin(beta); sc = sb * ca; ss = sb * sa; v(0) = sc; v(1) = ss; v(2) = cb; } /* Euler direction2angles ------------------------------- */ //gamma is useless but I keep it for simmetry //with Euler_direction void Euler_direction2angles(Matrix1D &v0, RFLOAT &alpha, RFLOAT &beta) { // Aug25,2015 - Shaoda // This function can recover tilt (b) as small as 0.0001 degrees // It replaces a more complicated version in the code before Aug2015 Matrix1D v; // Make sure the vector is normalised v.resize(3); v = v0; v.selfNormalize(); // Tilt (b) should be [0, +180] degrees. Rot (a) should be [-180, +180] degrees alpha = RAD2DEG(atan2(v(1), v(0))); // 'atan2' returns an angle within [-pi, +pi] radians for rot beta = RAD2DEG(acos(v(2))); // 'acos' returns an angle within [0, +pi] radians for tilt // The following is done to keep in line with the results from old codes // If tilt (b) = 0 or 180 degrees, sin(b) = 0, rot (a) cannot be calculated from the direction if ( (fabs(beta) < 0.001) || (fabs(beta - 180.) < 0.001) ) alpha = 0.; return; } /* Matrix --> Euler angles ------------------------------------------------- */ #define CHECK //#define DEBUG_EULER void Euler_matrix2angles(const Matrix2D &A, RFLOAT &alpha, RFLOAT &beta, RFLOAT &gamma) { RFLOAT abs_sb, sign_sb; if (MAT_XSIZE(A) != 3 || MAT_YSIZE(A) != 3) REPORT_ERROR( "Euler_matrix2angles: The Euler matrix is not 3x3"); abs_sb = sqrt(A(0, 2) * A(0, 2) + A(1, 2) * A(1, 2)); if (abs_sb > 16*FLT_EPSILON) { gamma = atan2(A(1, 2), -A(0, 2)); alpha = atan2(A(2, 1), A(2, 0)); if (ABS(sin(gamma)) < FLT_EPSILON) sign_sb = SGN(-A(0, 2) / cos(gamma)); // if (sin(alpha)0) ? SGN(A(2,1)):-SGN(A(2,1)); else sign_sb = (sin(gamma) > 0) ? SGN(A(1, 2)) : -SGN(A(1, 2)); beta = atan2(sign_sb * abs_sb, A(2, 2)); } else { if (SGN(A(2, 2)) > 0) { // Let's consider the matrix as a rotation around Z alpha = 0; beta = 0; gamma = atan2(-A(1, 0), A(0, 0)); } else { alpha = 0; beta = PI; gamma = atan2(A(1, 0), -A(0, 0)); } } gamma = RAD2DEG(gamma); beta = RAD2DEG(beta); alpha = RAD2DEG(alpha); #ifdef DEBUG_EULER std::cout << "abs_sb " << abs_sb << std::endl; std::cout << "A(1,2) " << A(1, 2) << " A(0,2) " << A(0, 2) << " gamma " << gamma << std::endl; std::cout << "A(2,1) " << A(2, 1) << " A(2,0) " << A(2, 0) << " alpha " << alpha << std::endl; std::cout << "sign sb " << sign_sb << " A(2,2) " << A(2, 2) << " beta " << beta << std::endl; #endif } #undef CHECK #ifdef NEVERDEFINED // Michael's method void Euler_matrix2angles(Matrix2D A, RFLOAT *alpha, RFLOAT *beta, RFLOAT *gamma) { RFLOAT abs_sb; if (ABS(A(1, 1)) > FLT_EPSILON) { abs_sb = sqrt((-A(2, 2) * A(1, 2) * A(2, 1) - A(0, 2) * A(2, 0)) / A(1, 1)); } else if (ABS(A(0, 1)) > FLT_EPSILON) { abs_sb = sqrt((-A(2, 1) * A(2, 2) * A(0, 2) + A(2, 0) * A(1, 2)) / A(0, 1)); } else if (ABS(A(0, 0)) > FLT_EPSILON) { abs_sb = sqrt((-A(2, 0) * A(2, 2) * A(0, 2) - A(2, 1) * A(1, 2)) / A(0, 0)); } else EXIT_ERROR(1, "Don't know how to extract angles"); if (abs_sb > FLT_EPSILON) { *beta = atan2(abs_sb, A(2, 2)); *alpha = atan2(A(2, 1) / abs_sb, A(2, 0) / abs_sb); *gamma = atan2(A(1, 2) / abs_sb, -A(0, 2) / abs_sb); } else { *alpha = 0; *beta = 0; *gamma = atan2(A(1, 0), A(0, 0)); } *gamma = rad2deg(*gamma); *beta = rad2deg(*beta); *alpha = rad2deg(*alpha); } #endif /* Euler up-down correction ------------------------------------------------ */ void Euler_up_down(RFLOAT rot, RFLOAT tilt, RFLOAT psi, RFLOAT &newrot, RFLOAT &newtilt, RFLOAT &newpsi) { newrot = rot; newtilt = tilt + 180; newpsi = -(180 + psi); } /* Same view, differently expressed ---------------------------------------- */ void Euler_another_set(RFLOAT rot, RFLOAT tilt, RFLOAT psi, RFLOAT &newrot, RFLOAT &newtilt, RFLOAT &newpsi) { newrot = rot + 180; newtilt = -tilt; newpsi = -180 + psi; } /* Euler mirror Y ---------------------------------------------------------- */ void Euler_mirrorY(RFLOAT rot, RFLOAT tilt, RFLOAT psi, RFLOAT &newrot, RFLOAT &newtilt, RFLOAT &newpsi) { newrot = rot; newtilt = tilt + 180; newpsi = -psi; } /* Euler mirror X ---------------------------------------------------------- */ void Euler_mirrorX(RFLOAT rot, RFLOAT tilt, RFLOAT psi, RFLOAT &newrot, RFLOAT &newtilt, RFLOAT &newpsi) { newrot = rot; newtilt = tilt + 180; newpsi = 180 - psi; } /* Euler mirror XY --------------------------------------------------------- */ void Euler_mirrorXY(RFLOAT rot, RFLOAT tilt, RFLOAT psi, RFLOAT &newrot, RFLOAT &newtilt, RFLOAT &newpsi) { newrot = rot; newtilt = tilt; newpsi = 180 + psi; } /* Apply a transformation matrix to Euler angles --------------------------- */ void Euler_apply_transf(const Matrix2D &L, const Matrix2D &R, RFLOAT rot, RFLOAT tilt, RFLOAT psi, RFLOAT &newrot, RFLOAT &newtilt, RFLOAT &newpsi) { Matrix2D euler(3, 3), temp; Euler_angles2matrix(rot, tilt, psi, euler); temp = L * euler * R; Euler_matrix2angles(temp, newrot, newtilt, newpsi); } /* Rotate (3D) MultidimArray with 3 Euler angles ------------------------------------- */ void Euler_rotation3DMatrix(RFLOAT rot, RFLOAT tilt, RFLOAT psi, Matrix2D &result) { Euler_angles2matrix(rot, tilt, psi, result, true); } relion-3.1.3/src/euler.h000066400000000000000000000234671411340063500150730ustar00rootroot00000000000000/*************************************************************************** * * Author: "Sjors H.W. Scheres" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ /*************************************************************************** * * Authors: Carlos Oscar S. Sorzano (coss@cnb.csic.es) * * Unidad de Bioinformatica of Centro Nacional de Biotecnologia , CSIC * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA * 02111-1307 USA * * All comments concerning this program package may be sent to the * e-mail address 'xmipp@cnb.csic.es' ***************************************************************************/ #ifndef GEOMETRY_H #define GEOMETRY_H #include "src/multidim_array.h" #include "src/transformations.h" #ifndef FLT_EPSILON #define FLT_EPSILON 1.19209e-07 #endif /// @name Euler operations /// @{ /** Euler angles --> "Euler" matrix * * This function returns the transformation matrix associated to the 3 given * Euler angles (in degrees). * * As an implementation note you might like to know that this function calls * always to Matrix2D::resize * * See http://xmipp.cnb.csic.es/twiki/bin/view/Xmipp/EulerAngles for a * description of the Euler angles. */ void Euler_angles2matrix(RFLOAT a, RFLOAT b, RFLOAT g, Matrix2D< RFLOAT >& A, bool homogeneous=false); /** Euler angles2direction * * This function returns a vector parallel to the projection direction. * Resizes v if needed */ void Euler_angles2direction(RFLOAT alpha, RFLOAT beta, Matrix1D< RFLOAT >& v); /** Euler direction2angles * * This function returns the 2 Euler angles (rot&tilt) associated to the direction given by * the vector v. */ void Euler_direction2angles(Matrix1D< RFLOAT >& v, RFLOAT& alpha, RFLOAT& beta); /** "Euler" matrix --> angles * * This function compute a set of Euler angles which result in an "Euler" matrix * as the one given. See \ref Euler_angles2matrix to know more about how this * matrix is computed and what each row means. The result angles are in degrees. * Alpha, beta and gamma are respectively the first, second and third rotation * angles. If the input matrix is not 3x3 then an exception is thrown, the * function doesn't check that the Euler matrix is truly representing a * coordinate system. * * @code * Euler_matrix2angles(Euler, alpha, beta, gamma); * @endcode */ void Euler_matrix2angles(const Matrix2D< RFLOAT >& A, RFLOAT& alpha, RFLOAT& beta, RFLOAT& gamma); /** Up-Down projection equivalence * * As you know a projection view from a point has got its homologous from its * diametrized point in the projection sphere. This function takes a projection * defined by its 3 Euler angles and computes an equivalent set of Euler angles * from which the view is exactly the same but in the other part of the sphere * (if the projection is taken from the bottom then the new projection from the * top, and viceversa). The defined projections are exactly the same except for * a flip over X axis, ie, an up-down inversion. Exactly the correction * performed is: * * @code * newrot = rot; * newtilt = tilt + 180; * newpsi = -(180 + psi); * @endcode * * @code * Euler_up_down(rot, tilt, psi, newrot, newtilt, newpsi); * @endcode */ void Euler_up_down(RFLOAT rot, RFLOAT tilt, RFLOAT psi, RFLOAT& newrot, RFLOAT& newtilt, RFLOAT& newpsi); /** The same view but differently expressed * * As you know a projection view from a point can be expressed with different * sets of Euler angles. This function gives you another expression of the Euler * angles for this point of view. Exactly the operation performed is: * * @code * newrot = rot + 180; * newtilt = -tilt; * newpsi = -180 + psi; * @endcode * * @code * Euler_another_set(rot, tilt, psi, newrot, newtilt, newpsi); * @endcode */ void Euler_another_set(RFLOAT rot, RFLOAT tilt, RFLOAT psi, RFLOAT& newrot, RFLOAT& newtilt, RFLOAT& newpsi); /** Mirror over Y axis * * Given a set of Euler angles this function returns a new set which define a * mirrored (over Y axis) version of the former projection. * * @code * -----> X X<------ * | | * | | * | ======> | * v v * Y Y * @endcode * * The operation performed is * * @code * newrot = rot; * newtilt = tilt + 180; * newpsi = -psi; * @endcode * * @code * Euler_mirrorY(rot, tilt, psi, newrot, newtilt, newpsi); * @endcode */ void Euler_mirrorY(RFLOAT rot, RFLOAT tilt, RFLOAT psi, RFLOAT& newrot, RFLOAT& newtilt, RFLOAT& newpsi); /** Mirror over X axis * * Given a set of Euler angles this function returns a new set which define a * mirrored (over X axis) version of the former projection. * * @code * -----> X Y * | ^ * | | * | ======> | * v | * Y -----> X * @endcode * * The operation performed is * * @code * newrot = rot; * newtilt = tilt + 180; * newpsi = 180 - psi; * @endcode * * @code * Euler_mirrorX(rot, tilt, psi, newrot, newtilt, newpsi); * @endcode */ void Euler_mirrorX(RFLOAT rot, RFLOAT tilt, RFLOAT psi, RFLOAT& newrot, RFLOAT& newtilt, RFLOAT& newpsi); /** Mirror over X and Y axes * * Given a set of Euler angles this function returns a new set which define a * mirrored (over X and Y axes at the same time) version of the former * projection. * * @code * -----> X Y * | ^ * | | * | ======> | * v | * Y X<----- * @endcode * * The operation performed is * * @code * newrot = rot; * newtilt = tilt; * newpsi = 180 + psi; * @endcode * * @code * Euler_mirrorX(rot, tilt, psi, newrot, newtilt, newpsi); * @endcode */ void Euler_mirrorXY(RFLOAT rot, RFLOAT tilt, RFLOAT psi, RFLOAT& newrot, RFLOAT& newtilt, RFLOAT& newpsi); /** Apply a geometrical transformation * * The idea behind this function is the following. 3 Euler angles define a point * of view for a projection, but also a coordinate system. You might apply a * geometrical transformation to this system, and then compute back what the * Euler angles for the new system are. This could be used to "mirror" points of * view, rotate them and all the stuff. The transformation matrix must be 3x3 * but it must transform R3 vectors into R3 vectors (that is a normal 3D * transformation matrix when vector coordinates are not homogeneous) and it * will be applied in the sense: * * @code * New Euler matrix = L * Old Euler matrix * R * @endcode * * where you know that the Euler matrix rows represent the different system * axes. See Euler_angles2matrix for more information about the Euler coordinate * system. * * @code * Matrix2D< RFLOAT > R60 = rotation3DMatrix(60, 'Z'); * R60.resize(3, 3); // Get rid of homogeneous part * Matrix2D< RFLOAT > I(3, 3); * I.initIdentity(); * Euler_apply_transf(I, R60, rot, tilt, psi, newrot, newtilt, newpsi); * @endcode */ void Euler_apply_transf(const Matrix2D< RFLOAT >& L, const Matrix2D< RFLOAT >& R, RFLOAT rot, RFLOAT tilt, RFLOAT psi, RFLOAT& newrot, RFLOAT& newtilt, RFLOAT& newpsi); /** 3D Rotation matrix after 3 Euler angles * * Creates a rotational matrix (4x4) for volumes around the combination of the 3 * rotations around ZYZ. All angles are in degrees. You must use it with * IS_NOT_INV in applyGeometry. * * @code * Matrix2D< float > euler = Euler_rotation3DMatrix(60, 30, 60); * @endcode */ void Euler_rotation3DMatrix(RFLOAT rot, RFLOAT tilt, RFLOAT psi, Matrix2D &result); //@} #endif relion-3.1.3/src/exp_model.cpp000066400000000000000000001117271411340063500162630ustar00rootroot00000000000000/*************************************************************************** * * Author: "Sjors H.W. Scheres" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #include "src/exp_model.h" #include long int Experiment::numberOfParticles(int random_subset) { if (random_subset == 0) return particles.size(); else if (random_subset == 1) return nr_particles_subset1; else if (random_subset == 2) return nr_particles_subset2; else REPORT_ERROR("ERROR: Experiment::numberOfParticles invalid random_subset: " + integerToString(random_subset)); } // Get the total number of images in a given particle long int Experiment::numberOfImagesInParticle(long int part_id) { return particles[part_id].images.size(); } long int Experiment::numberOfMicrographs() { return micrographs.size(); } long int Experiment::numberOfGroups() { return groups.size(); } int Experiment::numberOfOpticsGroups() { return obsModel.numberOfOpticsGroups(); } bool Experiment::hasCtfPremultiplied() { for (int og = 0; og < numberOfOpticsGroups(); og++) if (obsModel.getCtfPremultiplied(og)) return true; return false; } RFLOAT Experiment::getOpticsPixelSize(int optics_group) { return obsModel.getPixelSize(optics_group); } int Experiment::getOpticsImageSize(int optics_group) { return obsModel.getBoxSize(optics_group); } long int Experiment::getMicrographId(long int part_id, int img_id) { return (particles[part_id].images[img_id]).micrograph_id; } long int Experiment::getGroupId(long int part_id, int img_id) { return (particles[part_id].images[img_id]).group_id; } int Experiment::getOpticsGroup(long part_id, int img_id) { return particles[part_id].images[img_id].optics_group; } int Experiment::getRandomSubset(long int part_id) { return particles[part_id].random_subset; } int Experiment::getOriginalImageId(long part_id, int img_id) { return particles[part_id].images[img_id].id; } RFLOAT Experiment::getImagePixelSize(long int part_id, int img_id) { int optics_group = particles[part_id].images[img_id].optics_group; return obsModel.getPixelSize(optics_group); } void Experiment::getNumberOfImagesPerGroup(std::vector &nr_particles_per_group) { nr_particles_per_group.resize(groups.size()); for (long int part_id = 0; part_id < particles.size(); part_id++) for (int img_id = 0; img_id < particles[part_id].images.size(); img_id++) nr_particles_per_group[particles[part_id].images[img_id].group_id] += 1; } MetaDataTable Experiment::getMetaDataImage(long int part_id, int img_id) { MetaDataTable result; result.addObject(MDimg.getObject(getOriginalImageId(part_id, img_id))); return result; } long int Experiment::addParticle(std::string part_name, int random_subset) { ExpParticle particle; particle.name = part_name; particle.random_subset = random_subset; // Push back this particle in the particles vector and its sorted index in sorted_idx sorted_idx.push_back(particles.size()); particles.push_back(particle); // Return the current part_id in the particles vector return particles.size() - 1; } int Experiment::addImageToParticle(long int part_id, std::string img_name, long int ori_img_id, long int group_id, long int micrograph_id, int optics_group, bool unique) { if (group_id >= groups.size()) REPORT_ERROR("Experiment::addImageToParticle: group_id out of range"); if (micrograph_id >= micrographs.size()) REPORT_ERROR("Experiment::addImageToParticle: micrograph_id out of range"); if (optics_group >= obsModel.numberOfOpticsGroups()) REPORT_ERROR("Experiment::addImageToParticle: optics_group out of range"); ExpImage img; img.name = img_name; img.id = ori_img_id; img.particle_id = part_id; img.group_id = group_id; img.micrograph_id = micrograph_id; img.optics_group = optics_group; if (unique) nr_images_per_optics_group[optics_group]++; img.optics_group_id = nr_images_per_optics_group[optics_group] - 1; if (img.optics_group_id < 0) REPORT_ERROR("Logic error in Experiment::addImageToParticle."); // Push back this particle in the particles vector particles[part_id].images.push_back(img); (micrographs[micrograph_id].image_ids).push_back(img.id); return particles[part_id].images.size() - 1; } long int Experiment::addGroup(std::string group_name, int _optics_group) { // Add new group to this Experiment ExpGroup group; group.id = groups.size(); // start counting groups at 0! group.optics_group = _optics_group; group.name = group_name; // Push back this micrograph groups.push_back(group); // Return the id in the micrographs vector return group.id; } long int Experiment::addMicrograph(std::string mic_name) { // Add new micrograph to this Experiment ExpMicrograph micrograph; micrograph.id = micrographs.size(); micrograph.name = mic_name; // Push back this micrograph micrographs.push_back(micrograph); // Return the id in the micrographs vector return micrograph.id; } void Experiment::divideParticlesInRandomHalves(int seed, bool do_helical_refine) { // Only do this if the random_subset of all original_particles is zero bool all_are_zero = true; bool some_are_zero = false; nr_particles_subset1 = 0; nr_particles_subset2 = 0; for (long int i = 0; i < particles.size(); i++) { int random_subset = particles[i].random_subset; if (random_subset != 0) { all_are_zero = false; // Keep track of how many particles there are in each subset if (random_subset == 1) nr_particles_subset1++; else if (random_subset == 2) nr_particles_subset2++; else REPORT_ERROR("ERROR Experiment::divideParticlesInRandomHalves: invalid number for random subset (i.e. not 1 or 2): " + integerToString(random_subset)); } else some_are_zero = true; if (!all_are_zero && some_are_zero) REPORT_ERROR("ERROR Experiment::divideParticlesInRandomHalves: some random subset values are zero and others are not. They should all be zero, or all bigger than zero!"); } if (all_are_zero) { // Only randomise them if the random_subset values were not read in from the STAR file srand(seed); if (do_helical_refine) { std::string mic_name, img_name; int nr_swaps, nr_segments_subset1, nr_segments_subset2, helical_tube_id; std::map map_mics; std::map::const_iterator ii_map; std::vector > vec_mics; bool divide_according_to_helical_tube_id = false; if (MDimg.containsLabel(EMDL_PARTICLE_HELICAL_TUBE_ID)) divide_according_to_helical_tube_id = true; // Count micrograph names map_mics.clear(); for (long int part_id = 0; part_id < particles.size(); part_id++) { // Get name of micrograph of the first image in this particle long int mic_id = particles[part_id].images[0].micrograph_id; mic_name = micrographs[mic_id].name; if (divide_according_to_helical_tube_id) { long int ori_img_id = getOriginalImageId(part_id, 0); MDimg.getValue(EMDL_PARTICLE_HELICAL_TUBE_ID, helical_tube_id, ori_img_id); if (helical_tube_id < 1) REPORT_ERROR("ERROR Experiment::divideParticlesInRandomHalves: Helical tube ID should be positive integer!"); mic_name += std::string("_TUBEID_"); mic_name += std::string(integerToString(helical_tube_id)); } if ((map_mics.insert(std::make_pair(mic_name, 1))).second == false) map_mics[mic_name]++; } vec_mics.clear(); for (ii_map = map_mics.begin(); ii_map != map_mics.end(); ii_map++) vec_mics.push_back(*ii_map); // NEW RANDOMISATION (better than the old one) nr_swaps = 0; for (int ptr_a = 0; ptr_a < (vec_mics.size() - 1); ptr_a++) { std::pair tmp; int ptr_b = ROUND(rnd_unif(ptr_a, vec_mics.size() - 1)); if ( (ptr_b <= ptr_a) || (ptr_b >= vec_mics.size()) ) continue; nr_swaps++; tmp = vec_mics[ptr_a]; vec_mics[ptr_a] = vec_mics[ptr_b]; vec_mics[ptr_b] = tmp; } // Divide micrographs into halves map_mics.clear(); nr_segments_subset1 = nr_segments_subset2 = 0; for (int ii = 0; ii < vec_mics.size(); ii++) { if (nr_segments_subset1 < nr_segments_subset2) { nr_segments_subset1 += vec_mics[ii].second; vec_mics[ii].second = 1; } else { nr_segments_subset2 += vec_mics[ii].second; vec_mics[ii].second = 2; } map_mics.insert(vec_mics[ii]); } for (long int part_id = 0; part_id < particles.size(); part_id++) { // Get name of micrograph of the first image in this particle long int mic_id = particles[part_id].images[0].micrograph_id; mic_name = micrographs[mic_id].name; if (divide_according_to_helical_tube_id) { long int ori_img_id = getOriginalImageId(part_id, 0); MDimg.getValue(EMDL_PARTICLE_HELICAL_TUBE_ID, helical_tube_id, ori_img_id); if (helical_tube_id < 1) REPORT_ERROR("ERROR Experiment::divideParticlesInRandomHalves: Helical tube ID should be positive integer!"); mic_name += std::string("_TUBEID_"); mic_name += std::string(integerToString(helical_tube_id)); } particles[part_id].random_subset = map_mics[mic_name]; } } else { for (long int part_id = 0; part_id < particles.size(); part_id++) { int random_subset = rand() % 2 + 1; particles[part_id].random_subset = random_subset; // randomly 1 or 2 } } // Now that random subsets have been assigned, count the number of particles in each subset and set new labels in entire MDimg for (long int part_id = 0; part_id < particles.size(); part_id++) { int random_subset = getRandomSubset(part_id); if (random_subset == 1) nr_particles_subset1++; else if (random_subset == 2) nr_particles_subset2++; else REPORT_ERROR("ERROR Experiment::divideParticlesInRandomHalves: invalid number for random subset (i.e. not 1 or 2): " + integerToString(random_subset)); for (int img_id = 0; img_id < numberOfImagesInParticle(part_id); img_id++) { long int ori_img_id = getOriginalImageId(part_id, img_id); MDimg.setValue(EMDL_PARTICLE_RANDOM_SUBSET, random_subset, ori_img_id); } } } if (nr_particles_subset2 == 0 || nr_particles_subset1 == 0) REPORT_ERROR("ERROR: one of your half sets has no segments. Is rlnRandomSubset set to 1 or 2 in your particles STAR file? Or in case you're doing helical, half-sets are always per-filament, so provide at least 2 filaments."); std::stable_sort(sorted_idx.begin(), sorted_idx.end(), compareRandomSubsetParticles(particles)); } void Experiment::randomiseParticlesOrder(int seed, bool do_split_random_halves, bool do_subsets) { //This static flag is for only randomize once static bool randomised = false; if (!randomised || do_subsets) { srand(seed); if (do_split_random_halves) { std::stable_sort(sorted_idx.begin(), sorted_idx.end(), compareRandomSubsetParticles(particles)); // sanity check long int nr_half1 = 0, nr_half2 = 0; for (long int i = 0; i < particles.size(); i++) { const int random_subset = particles[i].random_subset; if (random_subset == 1) nr_half1++; else if (random_subset == 2) nr_half2++; else REPORT_ERROR("ERROR Experiment::randomiseParticlesOrder: invalid number for random subset (i.e. not 1 or 2): " + integerToString(random_subset)); } if (nr_half1 != nr_particles_subset1) REPORT_ERROR("ERROR Experiment::randomiseParticlesOrder: invalid half1 size:" + integerToString(nr_half1) + " != " + integerToString(nr_particles_subset1)); if (nr_half2 != nr_particles_subset2) REPORT_ERROR("ERROR Experiment::randomiseParticlesOrder: invalid half2 size:" + integerToString(nr_half2) + " != " + integerToString(nr_particles_subset2)); // Randomise the two particle lists std::random_shuffle(sorted_idx.begin(), sorted_idx.begin() + nr_half1); std::random_shuffle(sorted_idx.begin() + nr_half1, sorted_idx.end()); // Make sure the particles are sorted on their optics_group. // Otherwise CudaFFT re-calculation of plans every time image size changes slows down things a lot! std::stable_sort(sorted_idx.begin(), sorted_idx.begin() + nr_half1, compareOpticsGroupsParticles(particles)); std::stable_sort(sorted_idx.begin() + nr_half1, sorted_idx.end(), compareOpticsGroupsParticles(particles)); } else { // Just randomise the entire vector std::random_shuffle(sorted_idx.begin(), sorted_idx.end()); // Make sure the particles are sorted on their optics_group. // Otherwise CudaFFT re-calculation of plans every time image size changes slows down things a lot! std::stable_sort(sorted_idx.begin(), sorted_idx.end(), compareOpticsGroupsParticles(particles)); } randomised = true; } } void Experiment::initialiseBodies(int _nr_bodies) { if (_nr_bodies < 2) { return; } else { nr_bodies = _nr_bodies; MetaDataTable MDbody; MDbody.setIsList(false); bool is_3d = (MDimg.containsLabel(EMDL_ORIENT_ORIGIN_Z)); FOR_ALL_OBJECTS_IN_METADATA_TABLE(MDimg) { MDbody.addObject(); RFLOAT norm, zero=0., ninety=90.; MDimg.getValue(EMDL_IMAGE_NORM_CORRECTION, norm); MDbody.setValue(EMDL_ORIENT_ORIGIN_X_ANGSTROM, zero); MDbody.setValue(EMDL_ORIENT_ORIGIN_Y_ANGSTROM, zero); MDbody.setValue(EMDL_ORIENT_ROT, zero); MDbody.setValue(EMDL_ORIENT_TILT, ninety); MDbody.setValue(EMDL_ORIENT_PSI, zero); MDbody.setValue(EMDL_IMAGE_NORM_CORRECTION, norm); if (is_3d) { MDbody.setValue(EMDL_ORIENT_ORIGIN_Z_ANGSTROM, zero); } } // Now just fill all bodies with that MDbody MDbodies.resize(nr_bodies, MDbody); for (int ibody = 0; ibody < nr_bodies; ibody++) { std::string tablename = "images_body_" + integerToString(ibody+1); MDbodies[ibody].setName(tablename); } } } bool Experiment::getImageNameOnScratch(long int part_id, int img_id, FileName &fn_img, bool is_ctf_image) { int optics_group = getOpticsGroup(part_id, img_id); long int my_id = particles[part_id].images[img_id].optics_group_id; #ifdef DEBUG_SCRATCH std::cerr << "part_id = " << part_id << " img_id = " << img_id << " my_id = " << my_id << " nr_parts_on_scratch[" << optics_group << "] = " << nr_parts_on_scratch[optics_group] << std::endl; #endif if (fn_scratch != "" && my_id < nr_parts_on_scratch[optics_group]) { if (is_3D) { if (is_ctf_image) fn_img = fn_scratch + "opticsgroup" + integerToString(optics_group+1) + "_particle_ctf" + integerToString(my_id+1)+".mrc"; else fn_img = fn_scratch + "opticsgroup" + integerToString(optics_group+1) + "_particle" + integerToString(my_id+1)+".mrc"; } else { // Write different optics groups into different stacks, as sizes might be different FileName fn_tmp = fn_scratch + "opticsgroup" + integerToString(optics_group+1) + "_particles.mrcs"; fn_img.compose(my_id+1, fn_tmp); } #ifdef DEBUG_SCRATCH std::cerr << "getImageNameOnScratch: " << particles[part_id].name << " is cached at " << fn_img << std::endl; #endif return true; } else { return false; } } void Experiment::setScratchDirectory(FileName _fn_scratch, bool do_reuse_scratch, int verb) { // Make sure fn_scratch ends with a slash if (_fn_scratch[_fn_scratch.length()-1] != '/') _fn_scratch += '/'; fn_scratch = _fn_scratch + "relion_volatile/"; if (do_reuse_scratch) { nr_parts_on_scratch.resize(numberOfOpticsGroups(), 0); for (int optics_group = 0; optics_group < numberOfOpticsGroups(); optics_group++) { if (is_3D) { FileName fn_tmp = fn_scratch + "opticsgroup" + integerToString(optics_group+1) + "_particle*.mrc"; std::vector fn_all; fn_tmp.globFiles(fn_all, true); nr_parts_on_scratch[optics_group] = fn_all.size(); } else { FileName fn_tmp = fn_scratch + "opticsgroup" + integerToString(optics_group+1) + "_particles.mrcs"; if (exists(fn_tmp)) { Image Itmp; Itmp.read(fn_tmp, false); nr_parts_on_scratch[optics_group] = NSIZE(Itmp()); } #ifdef DEBUG_SCRATCH if (verb > 0) std::cerr << " optics_group= " << (optics_group + 1) << " nr_parts_on_scratch[optics_group]= " << nr_parts_on_scratch[optics_group] << std::endl; #endif } } } } FileName Experiment::initialiseScratchLock(FileName _fn_scratch, FileName _fn_out) { // Get a unique lockname for this run int uniqnr = rand() % 100000; FileName fn_uniq = _fn_out; fn_uniq.replaceAllSubstrings("/", "_"); fn_uniq += "_lock" + integerToString(uniqnr); FileName fn_lock = fn_scratch + fn_uniq; if (exists(fn_lock)) remove(fn_lock.c_str()); return fn_lock; } bool Experiment::prepareScratchDirectory(FileName _fn_scratch, FileName fn_lock) { if (fn_lock != "" && exists(fn_lock)) { // Still measure how much free space there is struct statvfs vfs; statvfs(_fn_scratch.c_str(), &vfs); char nodename[64] = "undefined"; gethostname(nodename,sizeof(nodename)); std::string myhost(nodename); free_space_Gb = (RFLOAT)vfs.f_bsize * vfs.f_bfree / (1024 * 1024 * 1024); return false; } else { // Wipe the directory clean and make a new one std::string command; deleteDataOnScratch(); // Make the scratch directory with write permissions command = "install -d -m 0777 " + fn_scratch; if (system(command.c_str())) REPORT_ERROR("ERROR: cannot execute: " + command); // Touch the lock file if(fn_lock != "") { touch(fn_lock); command = "chmod 0777 " + fn_lock; if (system(command.c_str())) REPORT_ERROR("ERROR: cannot execute: " + command); } // Measure how much free space there is struct statvfs vfs; statvfs(_fn_scratch.c_str(), &vfs); char nodename[64] = "undefined"; gethostname(nodename,sizeof(nodename)); std::string myhost(nodename); free_space_Gb = (RFLOAT)vfs.f_bsize * vfs.f_bfree / (1024 * 1024 * 1024); std::cout << " + On host " << myhost << ": free scratch space = " << free_space_Gb << " Gb." << std::endl; return true; } } void Experiment::deleteDataOnScratch() { // Wipe the scratch directory if (fn_scratch != "" && exists(fn_scratch)) { std::string command = " rm -rf " + fn_scratch; if (system(command.c_str())) REPORT_ERROR("ERROR: cannot execute: " + command); } } void Experiment::copyParticlesToScratch(int verb, bool do_copy, bool also_do_ctf_image, RFLOAT keep_free_scratch_Gb) { // This function relies on prepareScratchDirectory() being called before! long int nr_part = MDimg.numberOfObjects(); int barstep; if (verb > 0 && do_copy) { std::cout << " Copying particles to scratch directory: " << fn_scratch << std::endl; init_progress_bar(nr_part); barstep = XMIPP_MAX(1, nr_part / 60); } long int one_part_space, used_space = 0.; long int max_space = (free_space_Gb - keep_free_scratch_Gb) * 1024 * 1024 * 1024; // in bytes #ifdef DEBUG_SCRATCH std::cerr << " free_space_Gb = " << free_space_Gb << " GB, keep_free_scratch_Gb = " << keep_free_scratch_Gb << " GB.\n"; std::cerr << " Max space RELION can use = " << max_space << " bytes" << std::endl; #endif // Loop over all particles and copy them one-by-one FileName fn_open_stack = ""; fImageHandler hFile; long int total_nr_parts_on_scratch = 0; nr_parts_on_scratch.resize(numberOfOpticsGroups(), 0); const int check_abort_frequency=100; FileName prev_img_name = "/Unlikely$filename$?*!"; int prev_optics_group = -999; FOR_ALL_OBJECTS_IN_METADATA_TABLE(MDimg) { // TODO: think about MPI_Abort here.... if (current_object % check_abort_frequency == 0 && pipeline_control_check_abort_job()) exit(RELION_EXIT_ABORTED); long int imgno; FileName fn_img, fn_ctf, fn_stack, fn_new; Image img; MDimg.getValue(EMDL_IMAGE_NAME, fn_img); int optics_group = 0; if (MDimg.getValue(EMDL_IMAGE_OPTICS_GROUP, optics_group)) { optics_group--; } // Get the size of the first particle if (nr_parts_on_scratch[optics_group] == 0) { Image tmp; tmp.read(fn_img, false); // false means: only read the header! one_part_space = ZYXSIZE(tmp())*sizeof(float); // MRC images are stored in floats! bool myis3D = (ZSIZE(tmp()) > 1); if (myis3D != is_3D) REPORT_ERROR("BUG: inconsistent is_3D values!"); // add MRC header size for subtomograms, which are stored as 1 MRC file each if (is_3D) { one_part_space += 1024; also_do_ctf_image = MDimg.containsLabel(EMDL_CTF_IMAGE); if (also_do_ctf_image) one_part_space *= 2; } #ifdef DEBUG_SCRATCH std::cerr << "one_part_space[" << optics_group << "] = " << one_part_space << std::endl; #endif } bool is_duplicate = (prev_img_name == fn_img && prev_optics_group == optics_group); // Read in the particle image, and write out on scratch if (do_copy && !is_duplicate) { #ifdef DEBUG_SCRATCH std::cerr << "used_space = " << used_space << std::endl; #endif // Now we have the particle in memory // See how much space it occupies used_space += one_part_space; // If there is no more space, exit the loop over all objects to stop copying files and change filenames in MDimg if (used_space > max_space) { char nodename[64] = "undefined"; gethostname(nodename,sizeof(nodename)); std::string myhost(nodename); std::cerr << " Warning: scratch space full on " << myhost << ". Remaining " << nr_part - total_nr_parts_on_scratch << " particles will be read from where they were."<< std::endl; break; } if (is_3D) { // For subtomograms, write individual .mrc files,possibly also CTF images img.read(fn_img); fn_new = fn_scratch + "opticsgroup" + integerToString(optics_group+1) + "_particle" + integerToString(nr_parts_on_scratch[optics_group]+1)+".mrc"; img.write(fn_new); if (also_do_ctf_image) { FileName fn_ctf; MDimg.getValue(EMDL_CTF_IMAGE, fn_ctf); img.read(fn_ctf); fn_new = fn_scratch + "opticsgroup" + integerToString(optics_group+1) + "_particle_ctf" + integerToString(nr_parts_on_scratch[optics_group]+1)+".mrc"; img.write(fn_new); } } else { // Only open/close new stacks, so check if this is a new stack fn_img.decompose(imgno, fn_stack); if (fn_stack != fn_open_stack) { // Manual closing isn't necessary: if still open, then openFile will first close the filehandler // Also closing the last one isn't necessary, as destructor will do this. //if (fn_open_stack != "") // hFile.closeFile(); hFile.openFile(fn_stack, WRITE_READONLY); fn_open_stack = fn_stack; } img.readFromOpenFile(fn_img, hFile, -1, false); fn_new.compose(nr_parts_on_scratch[optics_group]+1, fn_scratch + "opticsgroup" + integerToString(optics_group+1) + "_particles.mrcs"); if (nr_parts_on_scratch[optics_group] == 0) img.write(fn_new, -1, false, WRITE_OVERWRITE); else img.write(fn_new, -1, true, WRITE_APPEND); #ifdef DEBUG_SCRATCH std::cerr << "Cached " << fn_img << " to " << fn_new << std::endl; #endif } } // Update the counter and progress bar if (!is_duplicate) nr_parts_on_scratch[optics_group]++; total_nr_parts_on_scratch++; prev_img_name = fn_img; prev_optics_group = optics_group; if (verb > 0 && total_nr_parts_on_scratch % barstep == 0) progress_bar(total_nr_parts_on_scratch); } if (verb) { progress_bar(nr_part); for (int i = 0; i < nr_parts_on_scratch.size(); i++) { std::cout << " For optics_group " << (i + 1) << ", there are " << nr_parts_on_scratch[i] << " particles on the scratch disk." << std::endl; } } if (do_copy && total_nr_parts_on_scratch>1) { std::string command = " chmod -R 777 " + fn_scratch + "/"; if (system(command.c_str())) REPORT_ERROR("ERROR in executing: " + command); } } // Read from file void Experiment::read(FileName fn_exp, bool do_ignore_particle_name, bool do_ignore_group_name, bool do_preread_images, bool need_tiltpsipriors_for_helical_refine, int verb) { //#define DEBUG_READ #ifdef DEBUG_READ std::cerr << "Entering Experiment::read" << std::endl; Timer timer; int tall = timer.setNew("ALL"); int tread = timer.setNew("read"); int tsort = timer.setNew("sort"); int tfill = timer.setNew("fill"); int tgroup = timer.setNew("find group"); int tdef = timer.setNew("set defaults"); int tend = timer.setNew("ending"); char c; timer.tic(tall); timer.tic(tread); #endif // Only open stacks once and then read multiple images fImageHandler hFile; long int dump; FileName fn_stack, fn_open_stack=""; // Initialize by emptying everything clear(); long int group_id = 0, mic_id = 0, part_id = 0; if (!fn_exp.isStarFile()) { // Read images from stack. Ignore all metadata, just use filenames // Add a single Micrograph group_id = addGroup("group", 0); mic_id = addMicrograph("micrograph"); // Check that a MRC stack ends in .mrcs, not .mrc (which will be read as a MRC 3D map!) if (fn_exp.contains(".mrc") && !fn_exp.contains(".mrcs")) REPORT_ERROR("Experiment::read: ERROR: MRC stacks of 2D images should be have extension .mrcs, not .mrc!"); // Read in header-only information to get the NSIZE of the stack Image img; img.read(fn_exp, false); // false means skip data, only read header // allocate 1 block of memory particles.reserve(NSIZE(img())); nr_images_per_optics_group.resize(1, 0); for (long int n = 0; n < NSIZE(img()); n++) { FileName fn_img; fn_img.compose(n+1, fn_exp); // fn_img = integerToString(n) + "@" + fn_exp; // Add the particle to my_area = 0 part_id = addParticle(fn_img, 0); // Just add a single image per particle addImageToParticle(part_id, fn_img, n, 0, 0, 0, true); MDimg.addObject(); if (do_preread_images) { Image img; fn_img.decompose(dump, fn_stack); if (fn_stack != fn_open_stack) { hFile.openFile(fn_stack, WRITE_READONLY); fn_open_stack = fn_stack; } img.readFromOpenFile(fn_img, hFile, -1, false); img().setXmippOrigin(); particles[part_id].images[0].img = img(); } // Set the filename and other metadata parameters MDimg.setValue(EMDL_IMAGE_NAME, fn_img, part_id); MDimg.setValue(EMDL_IMAGE_OPTICS_GROUP, 1, part_id); } } else { // MDimg and MDopt have to be read at the same time, so that the optics groups can be // renamed in case they are non-contiguous or not sorted ObservationModel::loadSafely(fn_exp, obsModel, MDimg, "particles", verb); nr_images_per_optics_group.resize(obsModel.numberOfOpticsGroups(), 0); #ifdef DEBUG_READ std::cerr << "Done reading MDimg" << std::endl; timer.toc(tread); timer.tic(tsort); //std::cerr << "Press any key to continue..." << std::endl; //std::cin >> c; #endif // Sort input particles on micrographname bool is_mic_a_movie=false, star_contains_micname; star_contains_micname = MDimg.containsLabel(EMDL_MICROGRAPH_NAME); if (star_contains_micname) { // See if the micrograph names contain an "@", i.e. whether they are movies and we are inside polishing or so. FileName fn_mic; MDimg.getValue(EMDL_MICROGRAPH_NAME, fn_mic); if (fn_mic.contains("@")) { is_mic_a_movie = true; MDimg.newSort(EMDL_MICROGRAPH_NAME, false, true); // sort on part AFTER "@" } else { is_mic_a_movie = false; MDimg.newSort(EMDL_MICROGRAPH_NAME); // just sort on fn_mic } if (do_ignore_group_name) group_id = addGroup("group", 0); } else { // If there is no EMDL_MICROGRAPH_NAME, then just use a single group and micrograph group_id = addGroup("group", 0); mic_id = addMicrograph("micrograph"); } #ifdef DEBUG_READ std::cerr << "Done sorting MDimg" << std::endl; std::cerr << " MDimg.numberOfObjects()= " << MDimg.numberOfObjects() << std::endl; timer.toc(tsort); timer.tic(tfill); long nr_read = 0; #endif // allocate 1 block of memory particles.reserve(MDimg.numberOfObjects()); // Now Loop over all objects in the metadata file and fill the logical tree of the experiment long int last_part_id = -1; FileName prev_img_name = "/Unlikely$filename$?*!"; int prev_optics_group = -999; //FOR_ALL_OBJECTS_IN_METADATA_TABLE(MDimg) for (long int ori_img_id = 0; ori_img_id < MDimg.numberOfObjects(); ori_img_id++) { // Get the optics group of this particle int optics_group = obsModel.getOpticsGroup(MDimg, ori_img_id); // Add new micrographs or get mic_id for existing micrograph FileName mic_name=""; // Filename instead of string because will decompose below if (star_contains_micname) { long int idx = micrographs.size(); std::string last_mic_name = (idx > 0) ? micrographs[idx-1].name : ""; MDimg.getValue(EMDL_MICROGRAPH_NAME, mic_name, ori_img_id); // All frames of a movie belong to the same micrograph if (is_mic_a_movie) mic_name = mic_name.substr(mic_name.find("@")+1); mic_id = -1; if (last_mic_name == mic_name) { // This particle belongs to the previous micrograph mic_id = micrographs[idx - 1].id; } else { // A new micrograph last_part_id = particles.size(); } // Make a new micrograph if (mic_id < 0) mic_id = addMicrograph(mic_name); #ifdef DEBUG_READ timer.tic(tgroup); #endif // For example in particle_polishing the groups are not needed... if (!do_ignore_group_name) { std::string group_name; // Check whether there is a group label, if not use a group for each micrograph if (MDimg.containsLabel(EMDL_MLMODEL_GROUP_NAME)) { MDimg.getValue(EMDL_MLMODEL_GROUP_NAME, group_name, ori_img_id); } else { FileName fn_pre, fn_jobnr, fn_post; decomposePipelineFileName(mic_name, fn_pre, fn_jobnr, fn_post); group_name = fn_post; } // If this group did not exist yet, add it to the experiment group_id = -1; for (long int i = groups.size() - 1; i >= 0; i--) // search backwards to find match faster { if (groups[i].name == group_name) { group_id = groups[i].id; break; } } if (group_id < 0) { group_id = addGroup(group_name, optics_group); } } #ifdef DEBUG_READ timer.toc(tgroup); #endif } else { // All images belong to the same micrograph and group mic_id = 0; group_id = 0; } // If there is an EMDL_PARTICLE_RANDOM_SUBSET entry in the input STAR-file, then set the random_subset, otherwise use default (0) int my_random_subset; if (!MDimg.getValue(EMDL_PARTICLE_RANDOM_SUBSET, my_random_subset, ori_img_id)) { my_random_subset = 0; } // Add this image to an existing particle, or create a new particle std::string part_name; long int part_id = -1; if (MDimg.containsLabel(EMDL_PARTICLE_NAME)) MDimg.getValue(EMDL_PARTICLE_NAME, part_name, ori_img_id); else MDimg.getValue(EMDL_IMAGE_NAME, part_name, ori_img_id); if (MDimg.containsLabel(EMDL_PARTICLE_NAME) && !do_ignore_particle_name) { // Only search ori_particles for the last (original) micrograph for (long int i = last_part_id; i < particles.size(); i++) { if (particles[i].name == part_name) { part_id = i; break; } } } // If no particles with this name was found, // or if no EMDL_PARTICLE_NAME in the input file, or if do_ignore_original_particle_name // then add a new particle if (part_id < 0) { part_id = addParticle(part_name, my_random_subset); } // Create a new image in this particle FileName img_name; MDimg.getValue(EMDL_IMAGE_NAME, img_name, ori_img_id); bool do_cache = (prev_img_name != img_name || prev_optics_group != optics_group); #ifdef DEBUG_SCRATCH std::cerr << "prev_img_name = " << prev_img_name << " img_name = " << img_name << " prev_optics_group = " << prev_optics_group << " optics_group = " << optics_group << " do_cache = " << do_cache << std::endl; #endif prev_img_name = img_name; prev_optics_group = optics_group; int img_id = addImageToParticle(part_id, img_name, ori_img_id, group_id, mic_id, optics_group, do_cache); // The group number is only set upon reading: it is not read from the STAR file itself, // there the only thing that matters is the order of the micrograph_names // Write igroup+1, to start numbering at one instead of at zero MDimg.setValue(EMDL_MLMODEL_GROUP_NO, group_id + 1, ori_img_id); #ifdef DEBUG_READ timer.tic(tori); #endif if (do_preread_images) { Image img; img_name.decompose(dump, fn_stack); if (fn_stack != fn_open_stack) { hFile.openFile(fn_stack, WRITE_READONLY); fn_open_stack = fn_stack; } img.readFromOpenFile(img_name, hFile, -1, false); img().setXmippOrigin(); particles[part_id].images[img_id].img = img(); } #ifdef DEBUG_READ timer.toc(tori); #endif #ifdef DEBUG_READ nr_read++; #endif } // end loop over all objects in MDimg (ori_part_id) #ifdef DEBUG_READ timer.toc(tfill); timer.tic(tdef); std::cerr << " nr_read= " << nr_read << " particles.size()= " << particles.size() << " micrographs.size()= " << micrographs.size() << " groups.size()= " << groups.size() << std::endl; #endif // Check for the presence of multiple bodies (for multi-body refinement) bool is_done = false; nr_bodies = 0; while (!is_done) { std::string tablename = "images_body_" + integerToString(nr_bodies+1); MetaDataTable MDimgin; if (MDimgin.read(fn_exp, tablename) > 0) { nr_bodies++; MDbodies.push_back(MDimgin); } else { is_done = true; } } // Even if we don't do multi-body refinement, then nr_bodies is still 1 nr_bodies = XMIPP_MAX(nr_bodies, 1); } #ifdef DEBUG_READ std::cerr << "Done filling MDimg" << std::endl; //std::cerr << "Press any key to continue..." << std::endl; //std::cin >> c; #endif // Make sure some things are always set in the MDimg bool have_rot = MDimg.containsLabel(EMDL_ORIENT_ROT); bool have_tilt = MDimg.containsLabel(EMDL_ORIENT_TILT); bool have_psi = MDimg.containsLabel(EMDL_ORIENT_PSI); bool have_xoff = MDimg.containsLabel(EMDL_ORIENT_ORIGIN_X_ANGSTROM); bool have_yoff = MDimg.containsLabel(EMDL_ORIENT_ORIGIN_Y_ANGSTROM); bool have_zoff = MDimg.containsLabel(EMDL_ORIENT_ORIGIN_Z_ANGSTROM); bool have_zcoord = MDimg.containsLabel(EMDL_IMAGE_COORD_Z); bool have_clas = MDimg.containsLabel(EMDL_PARTICLE_CLASS); bool have_norm = MDimg.containsLabel(EMDL_IMAGE_NORM_CORRECTION); // Jan20,2016 - Helical reconstruction bool have_tilt_prior = MDimg.containsLabel(EMDL_ORIENT_TILT_PRIOR); bool have_psi_prior = MDimg.containsLabel(EMDL_ORIENT_PSI_PRIOR); bool have_tiltpsi = (have_tilt) && (have_psi); bool have_tiltpsi_prior = (have_tilt_prior) && (have_psi_prior); if (need_tiltpsipriors_for_helical_refine) { if (!have_tiltpsi_prior) { if (!have_tiltpsi) REPORT_ERROR("exp_model.cpp: Experiment::read(): Tilt and psi priors of helical segments are missing!"); } } FOR_ALL_OBJECTS_IN_METADATA_TABLE(MDimg) { RFLOAT dzero=0., done=1.; int izero = 0; if (!have_rot) MDimg.setValue(EMDL_ORIENT_ROT, dzero); if (!have_tilt) MDimg.setValue(EMDL_ORIENT_TILT, dzero); if (!have_psi) MDimg.setValue(EMDL_ORIENT_PSI, dzero); if (!have_xoff) MDimg.setValue(EMDL_ORIENT_ORIGIN_X_ANGSTROM, dzero); if (!have_yoff) MDimg.setValue(EMDL_ORIENT_ORIGIN_Y_ANGSTROM, dzero); if ( (!have_zoff) && (have_zcoord) ) MDimg.setValue(EMDL_ORIENT_ORIGIN_Z_ANGSTROM, dzero); if (!have_clas) MDimg.setValue(EMDL_PARTICLE_CLASS, izero); if (!have_norm) MDimg.setValue(EMDL_IMAGE_NORM_CORRECTION, done); if (need_tiltpsipriors_for_helical_refine && have_tiltpsi_prior) // If doing 3D helical reconstruction and PRIORs exist { RFLOAT tilt = 0., psi = 0.; if (have_tiltpsi) MDimg.getValue(EMDL_ORIENT_TILT, tilt); // If ANGLEs do not exist or they are all set to 0 (from a Class2D job), copy values of PRIORs to ANGLEs if ( (!have_tiltpsi) || ((have_tiltpsi) && (ABS(tilt) < 0.001)) ) { MDimg.getValue(EMDL_ORIENT_TILT_PRIOR, tilt); MDimg.getValue(EMDL_ORIENT_PSI_PRIOR, psi); MDimg.setValue(EMDL_ORIENT_TILT, tilt); MDimg.setValue(EMDL_ORIENT_PSI, psi); } } } // Set is_3D from MDopt int mydim; obsModel.opticsMdt.getValue(EMDL_IMAGE_DIMENSIONALITY, mydim, 0); is_3D = (mydim == 3); #ifdef DEBUG_READ timer.toc(tdef); std::cerr << "Done setting defaults MDimg" << std::endl; timer.toc(tall); timer.printTimes(false); //std::cerr << "Writing out debug_data.star" << std::endl; //write("debug"); //exit(0); #endif } // Write to file void Experiment::write(FileName fn_root) { std::ofstream fh; FileName fn_tmp = fn_root+"_data.star"; fh.open((fn_tmp).c_str(), std::ios::out); if (!fh) REPORT_ERROR( (std::string)"Experiment::write: Cannot write file: " + fn_tmp); obsModel.opticsMdt.setName("optics"); obsModel.opticsMdt.write(fh); // Always write MDimg MDimg.setName("particles"); MDimg.write(fh); if (nr_bodies > 1) { for (int ibody = 0; ibody < nr_bodies; ibody++) { MDbodies[ibody].write(fh); } } fh.close(); } relion-3.1.3/src/exp_model.h000066400000000000000000000274501411340063500157270ustar00rootroot00000000000000/*************************************************************************** * * Author: "Sjors H.W. Scheres" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #ifndef EXP_MODEL_H_ #define EXP_MODEL_H_ #include #include "src/matrix2d.h" #include "src/image.h" #include "src/multidim_array.h" #include "src/metadata_table.h" #include "src/time.h" #include "src/ctf.h" #include "src/jaz/obs_model.h" /// Reserve large vectors with some reasonable estimate // Larger numbers will still be OK, but memory management might suffer #define MAX_NR_PARTICLES_PER_MICROGRAPH 1000 #define MAX_NR_MICROGRAPHS 2000 #define MAX_NR_FRAMES_PER_MOVIE 100 ////////////// Hierarchical metadata model class ExpImage { public: // Position of the image in the original input STAR file long int id; // To which particle does this image belong long int particle_id; // This is the Nth image in this optics_group, for writing to scratch disk: filenames long int optics_group_id; // Name of this image (by this name it will be recognised upon reading) std::string name; // ID of the micrograph that this image comes from long int micrograph_id; // ID of the group that this image comes from long int group_id; // The optics group for this image int optics_group; // Pre-read array of the image in RAM MultidimArray img; // Empty Constructor ExpImage() {} // Destructor needed for work with vectors ~ExpImage() {} // Copy constructor needed for work with vectors ExpImage(ExpImage const& copy) { id = copy.id; particle_id = copy.particle_id; optics_group_id = copy.optics_group_id; name = copy.name; micrograph_id = copy.micrograph_id; group_id = copy.group_id; optics_group = copy.optics_group; img = copy.img; } // Define assignment operator in terms of the copy constructor ExpImage& operator=(ExpImage const& copy) { id = copy.id; particle_id = copy.particle_id; optics_group_id = copy.optics_group_id; name = copy.name; micrograph_id = copy.micrograph_id; group_id = copy.group_id; optics_group = copy.optics_group; img = copy.img; return *this; } }; class ExpParticle { public: // Name of this particle (by this name all the images inside it will be grouped) std::string name; // Random subset this particle belongs to int random_subset; // Vector of all the images for this particle std::vector images; // Empty Constructor ExpParticle() {} // Destructor needed for work with vectors ~ExpParticle() {} // Copy constructor needed for work with vectors ExpParticle(ExpParticle const& copy) { name = copy.name; random_subset = copy.random_subset; images = copy.images; } // Define assignment operator in terms of the copy constructor ExpParticle& operator=(ExpParticle const& copy) { name = copy.name; random_subset = copy.random_subset; images = copy.images; return *this; } int numberOfImages() { return images.size(); } }; class ExpMicrograph { public: // ID of this micrograph, i.e. which number in the MDmic am I? long int id; // Name of this micrograph (by this name it will be recognised upon reading) std::string name; // All the original images that were recorded on this micrograph std::vector image_ids; // Empty Constructor ExpMicrograph() {} // Destructor needed for work with vectors ~ExpMicrograph() {} // Copy constructor needed for work with vectors ExpMicrograph(ExpMicrograph const& copy) { id = copy.id; name = copy.name; image_ids = copy.image_ids; } // Define assignment operator in terms of the copy constructor ExpMicrograph& operator=(ExpMicrograph const& copy) { id = copy.id; name = copy.name; image_ids = copy.image_ids; return *this; } }; class ExpGroup { public: // ID of this group long int id; // The optics_group for this group int optics_group; // Name of this group (by this name it will be recognised upon reading) std::string name; // Empty Constructor ExpGroup() {} // Destructor needed for work with vectors ~ExpGroup() {} // Copy constructor needed for work with vectors ExpGroup(ExpGroup const& copy) { id = copy.id; optics_group = copy.optics_group; name = copy.name; } // Define assignment operator in terms of the copy constructor ExpGroup& operator=(ExpGroup const& copy) { id = copy.id; optics_group = copy.optics_group; name = copy.name; return *this; } }; class Experiment { public: // All groups in the experiment std::vector groups; // All micrographs in the experiment std::vector micrographs; // All particles in the experiment std::vector particles; // Indices of the sorted particles std::vector sorted_idx; // Number of particles in random subsets 1 and 2; long int nr_particles_subset1, nr_particles_subset2; // Number of images per optics group std::vector nr_images_per_optics_group; // One large MetaDataTable for all images MetaDataTable MDimg; // Number of bodies in multi-body refinement int nr_bodies; // Vector with MetaDataTables for orientations of different bodies in the multi-body refinement std::vector MDbodies; // Removed: One large MetaDataTable for all micrographs // MetaDataTable MDmic; // Observation model holding the data for all optics groups ObservationModel obsModel; // Directory on scratch disk to copy particles to FileName fn_scratch; // Number of particles saved on the scratchdir, one for each optics_group std::vector nr_parts_on_scratch; // Number of Gb on scratch disk before copying particles RFLOAT free_space_Gb; // Is this sub-tomograms? bool is_3D; // Empty Constructor Experiment() { clear(); } ~Experiment() { clear(); } void clear() { groups.clear(); groups.reserve(MAX_NR_MICROGRAPHS); micrographs.clear(); micrographs.reserve(MAX_NR_MICROGRAPHS); particles.clear(); // reserve upon reading sorted_idx.clear(); nr_particles_subset1 = nr_particles_subset2 = 0; nr_bodies = 1; fn_scratch = ""; nr_parts_on_scratch.clear(); free_space_Gb = 10; is_3D = false; MDimg.clear(); MDimg.setIsList(false); MDbodies.clear(); MDimg.setName("images"); } // Calculate the total number of particles in this experiment long int numberOfParticles(int random_subset = 0); // Get the total number of images in a given particle long int numberOfImagesInParticle(long int part_id); // Calculate the total number of micrographs in this experiment long int numberOfMicrographs(); // Calculate the total number of groups in this experiment long int numberOfGroups(); // Calculate the total number of optics groups in this experiment int numberOfOpticsGroups(); // Is any of the optics groups CTF-premultiplied? bool hasCtfPremultiplied(); // Get the pixel size for this optics group RFLOAT getOpticsPixelSize(int optics_group); // Get the original image size for this optics group int getOpticsImageSize(int optics_group); // Get the random_subset for this particle int getRandomSubset(long int part_id); // Get the micrograph_id for the N'th image for this particle long int getMicrographId(long int part_id, int img_id); // Get the group_id for the N'th image for this particle long int getGroupId(long int part_id, int img_id); // Get the optics group to which the N'th image for this particle belongs int getOpticsGroup(long int part_id, int img_id); // Get the original position in the input STAR file for the N'th image for this particle int getOriginalImageId(long int part_id, int img_id); // Get the pixel size for the N-th image of this particle RFLOAT getImagePixelSize(long int part_id, int img_id); // Get the vector of number of images per group_id void getNumberOfImagesPerGroup(std::vector &nr_particles_per_group); // Get the metadata-row for this image in a separate MetaDataTable MetaDataTable getMetaDataImage(long int part_id, int img_id); // Add a particle long int addParticle(std::string part_name, int random_subset = 0); // Add an image to the given particle int addImageToParticle(long int part_id, std::string img_name, long int ori_img_id, long int group_id, long int micrograph_id, int optics_group, bool unique); // Add a group long int addGroup(std::string mic_name, int optics_group); // Add a micrograph long int addMicrograph(std::string mic_name); // for separate refinement of random halves of the data void divideParticlesInRandomHalves(int seed, bool do_helical_refine = false); // Randomise the order of the particles void randomiseParticlesOrder(int seed, bool do_split_random_halves = false, bool do_subsets = false); // Make sure the images inside each particle are in the right order void orderImagesInParticles(); // Add a given number of new bodies (for multi-body refinement) to the Experiment, // by copying the relevant entries from MDimg into MDbodies void initialiseBodies(int _nr_bodies); // Get the image name for a given part_id bool getImageNameOnScratch(long int part_id, int img_id, FileName &fn_img, bool is_ctf_image = false); // For parallel executions, lock the scratch directory with a unique code, so we won't copy the same data many times to the same position // This determines the lockname and removes the lock if it exists FileName initialiseScratchLock(FileName _fn_scratch, FileName _fn_out); // Returns true if particles need to be copied, and creates a lock file. // Returns false if the particles do not need to be copied. In that case, only the number of particles on the scratch disk needs to be counted // Also checks how much free space there is on the scratch dir bool prepareScratchDirectory(FileName _fn_scratch, FileName fn_lock = ""); void setScratchDirectory(FileName _fn_scratch, bool do_reuse_scratch, int verb=0); // Wipe the generic scratch directory clean void deleteDataOnScratch(); // Copy particles from their original position to a scratch directory // Monitor when the scratch disk gets to have fewer than free_scratch_Gb space, // in that case, stop copying, and keep reading particles from where they were... void copyParticlesToScratch(int verb, bool do_copy = true, bool also_do_ctf_image = false, RFLOAT free_scratch_Gb = 10); // Read from file void read( FileName fn_in, bool do_ignore_particle_name = false, bool do_ignore_group_name = false, bool do_preread_images = false, bool need_tiltpsipriors_for_helical_refine = false, int verb = 0); // Write void write(FileName fn_root); private: struct compareOpticsGroupsParticles { const std::vector& particles; compareOpticsGroupsParticles(const std::vector& particles) : particles(particles) { } bool operator()(const long int i, const long int j) { return particles[i].images[0].optics_group < particles[j].images[0].optics_group;} }; struct compareRandomSubsetParticles { const std::vector& particles; compareRandomSubsetParticles(const std::vector& particles) : particles(particles) { } bool operator()(const long int i, const long int j) { return particles[i].random_subset < particles[j].random_subset;} }; }; #endif /* METADATA_MODEL_H_ */ relion-3.1.3/src/fftw.cpp000066400000000000000000001440311411340063500152470ustar00rootroot00000000000000/*************************************************************************** * * Author: "Sjors H.W. Scheres" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ /*************************************************************************** * * Authors: Roberto Marabini (roberto@cnb.csic.es) * Carlos Oscar S. Sorzano (coss@cnb.csic.es) * * Unidad de Bioinformatica of Centro Nacional de Biotecnologia , CSIC * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA * 02111-1307 USA * * All comments concerning this program package may be sent to the * e-mail address 'xmipp@cnb.csic.es' ***************************************************************************/ #include "src/macros.h" #include "src/fftw.h" #include "src/args.h" #include #include static pthread_mutex_t fftw_plan_mutex = PTHREAD_MUTEX_INITIALIZER; //#define TIMING_FFTW #ifdef TIMING_FFTW #define RCTIC(label) (timer_fftw.tic(label)) #define RCTOC(label) (timer_fftw.toc(label)) Timer timer_fftw; int TIMING_FFTW_PLAN = timer_fftw.setNew("fftw - plan"); int TIMING_FFTW_EXECUTE = timer_fftw.setNew("fftw - exec"); int TIMING_FFTW_NORMALISE = timer_fftw.setNew("fftw - normalise"); int TIMING_FFTW_COPY = timer_fftw.setNew("fftw - copy"); #else #define RCTIC(label) #define RCTOC(label) #endif //#define DEBUG_PLANS // Constructors and destructors -------------------------------------------- FourierTransformer::FourierTransformer(): plans_are_set(false) { init(); #ifdef DEBUG_PLANS std::cerr << "INIT this= "< &inputFourier) { RCTIC(TIMING_FFTW_COPY); if (!fFourier.sameShape(inputFourier)) { std::cerr << " fFourier= "; fFourier.printShape(std::cerr); std::cerr << " inputFourier= "; inputFourier.printShape(std::cerr); REPORT_ERROR("BUG: incompatible shaped in setFourier part of FFTW transformer"); } memcpy(MULTIDIM_ARRAY(fFourier),MULTIDIM_ARRAY(inputFourier), MULTIDIM_SIZE(inputFourier)*2*sizeof(RFLOAT)); RCTOC(TIMING_FFTW_COPY); } // Transform --------------------------------------------------------------- void FourierTransformer::Transform(int sign) { if (sign == FFTW_FORWARD) { RCTIC(TIMING_FFTW_EXECUTE); #ifdef RELION_SINGLE_PRECISION fftwf_execute_dft_r2c(fPlanForward,MULTIDIM_ARRAY(*fReal), (fftwf_complex*) MULTIDIM_ARRAY(fFourier)); #else fftw_execute_dft_r2c(fPlanForward,MULTIDIM_ARRAY(*fReal), (fftw_complex*) MULTIDIM_ARRAY(fFourier)); #endif RCTOC(TIMING_FFTW_EXECUTE); // Normalisation of the transform unsigned long int size=0; if(fReal!=NULL) size = MULTIDIM_SIZE(*fReal); else if (fComplex!= NULL) size = MULTIDIM_SIZE(*fComplex); else REPORT_ERROR("No complex nor real data defined"); RCTIC(TIMING_FFTW_NORMALISE); FOR_ALL_DIRECT_ELEMENTS_IN_MULTIDIMARRAY(fFourier) DIRECT_MULTIDIM_ELEM(fFourier,n) /= size; RCTOC(TIMING_FFTW_NORMALISE); } else if (sign == FFTW_BACKWARD) { RCTIC(TIMING_FFTW_EXECUTE); #ifdef RELION_SINGLE_PRECISION fftwf_execute_dft_c2r(fPlanBackward, (fftwf_complex*) MULTIDIM_ARRAY(fFourier), MULTIDIM_ARRAY(*fReal)); #else fftw_execute_dft_c2r(fPlanBackward, (fftw_complex*) MULTIDIM_ARRAY(fFourier), MULTIDIM_ARRAY(*fReal)); #endif RCTOC(TIMING_FFTW_EXECUTE); } } void FourierTransformer::FourierTransform() { Transform(FFTW_FORWARD); } void FourierTransformer::inverseFourierTransform() { Transform(FFTW_BACKWARD); } // Inforce Hermitian symmetry --------------------------------------------- void FourierTransformer::enforceHermitianSymmetry() { int ndim = 3; if (ZSIZE(*fReal) == 1) { ndim = 2; if (YSIZE(*fReal) == 1) ndim = 1; } long int yHalf = YSIZE(*fReal) / 2; if (YSIZE(*fReal) % 2 == 0) yHalf--; long int zHalf = ZSIZE(*fReal) / 2; if (ZSIZE(*fReal) % 2 == 0) zHalf--; switch (ndim) { case 2: for (long int i = 1; i <= yHalf; i++) { long int isym = intWRAP(-i, 0, YSIZE(*fReal) - 1); Complex mean = 0.5 * (DIRECT_A2D_ELEM(fFourier, i, 0) + conj(DIRECT_A2D_ELEM(fFourier, isym, 0))); DIRECT_A2D_ELEM(fFourier, i, 0) = mean; DIRECT_A2D_ELEM(fFourier, isym, 0) = conj(mean); } break; case 3: for (long int k =0; k < ZSIZE(*fReal); k++) { long int ksym = intWRAP(-k, 0, ZSIZE(*fReal) - 1); for (long int i = 1; i <= yHalf; i++) { long int isym = intWRAP(-i, 0, YSIZE(*fReal) - 1); Complex mean = 0.5 * (DIRECT_A3D_ELEM(fFourier, k, i, 0) + conj(DIRECT_A3D_ELEM(fFourier, ksym, isym, 0))); DIRECT_A3D_ELEM(fFourier, k, i, 0) = mean; DIRECT_A3D_ELEM(fFourier, ksym, isym, 0) = conj(mean); } } for (long int k = 1; k <= zHalf; k++) { long int ksym = intWRAP(-k, 0, ZSIZE(*fReal) - 1); Complex mean = 0.5*(DIRECT_A3D_ELEM(fFourier, k, 0, 0) + conj(DIRECT_A3D_ELEM(fFourier, ksym, 0, 0))); DIRECT_A3D_ELEM(fFourier, k, 0, 0) = mean; DIRECT_A3D_ELEM(fFourier, ksym, 0, 0) = conj(mean); } break; } } void randomizePhasesBeyond(MultidimArray &v, int index) { MultidimArray< Complex > FT; FourierTransformer transformer; transformer.FourierTransform(v, FT, false); int index2 = index*index; FOR_ALL_ELEMENTS_IN_FFTW_TRANSFORM(FT) { if (kp*kp + ip*ip + jp*jp >= index2) { RFLOAT mag = abs(DIRECT_A3D_ELEM(FT, k, i, j)); RFLOAT phas = rnd_unif(0., 2.*PI); RFLOAT realval = mag * cos(phas); RFLOAT imagval = mag * sin(phas); DIRECT_A3D_ELEM(FT, k, i, j) = Complex(realval, imagval); } } // Inverse transform transformer.inverseFourierTransform(); } /* void randomizePhasesBeyond(MultidimArray &v, int index) { int index2 = index*index; FOR_ALL_ELEMENTS_IN_FFTW_TRANSFORM(v) { if (kp*kp + ip*ip + jp*jp >= index2) { RFLOAT mag = abs(DIRECT_A3D_ELEM(v, k, i, j)); RFLOAT phas = rnd_unif(0., 2.*PI); RFLOAT realval = mag * cos(phas); RFLOAT imagval = mag * sin(phas); DIRECT_A3D_ELEM(v, k, i, j) = Complex(realval, imagval); } } } */ // Fourier ring correlation ----------------------------------------------- // from precalculated Fourier Transforms, and without sampling rate etc. void getFSC(MultidimArray< Complex > &FT1, MultidimArray< Complex > &FT2, MultidimArray< RFLOAT > &fsc) { if (!FT1.sameShape(FT2)) REPORT_ERROR("fourierShellCorrelation ERROR: MultidimArrays have different shapes!"); MultidimArray num(XSIZE(FT1)), den1(XSIZE(FT1)), den2(XSIZE(FT1)); Matrix1D f(3); num.initZeros(); den1.initZeros(); den2.initZeros(); fsc.initZeros(num); FOR_ALL_ELEMENTS_IN_FFTW_TRANSFORM(FT1) { int idx = ROUND(sqrt(kp*kp + ip*ip + jp*jp)); if (idx >= XSIZE(FT1)) continue; Complex z1=DIRECT_A3D_ELEM(FT1, k, i, j); Complex z2=DIRECT_A3D_ELEM(FT2, k, i, j); RFLOAT absz1=abs(z1); RFLOAT absz2=abs(z2); num(idx)+= (conj(z1) * z2).real; den1(idx)+= absz1*absz1; den2(idx)+= absz2*absz2; } FOR_ALL_ELEMENTS_IN_ARRAY1D(fsc) { fsc(i) = num(i)/sqrt(den1(i)*den2(i)); } } void getFSC(MultidimArray< RFLOAT > &m1, MultidimArray< RFLOAT > &m2, MultidimArray< RFLOAT > &fsc) { MultidimArray< Complex > FT1, FT2; FourierTransformer transformer; transformer.FourierTransform(m1, FT1); transformer.FourierTransform(m2, FT2); getFSC(FT1, FT2, fsc); } void getAmplitudeCorrelationAndDifferentialPhaseResidual(MultidimArray< Complex > &FT1, MultidimArray< Complex > &FT2, MultidimArray< RFLOAT > &acorr, MultidimArray< RFLOAT > &dpr) { MultidimArray< int > radial_count(XSIZE(FT1)); MultidimArray num, mu1, mu2, sig1, sig2; Matrix1D f(3); mu1.initZeros(radial_count); mu2.initZeros(radial_count); sig1.initZeros(radial_count); sig2.initZeros(radial_count); acorr.initZeros(radial_count); dpr.initZeros(radial_count); num.initZeros(radial_count); FOR_ALL_ELEMENTS_IN_FFTW_TRANSFORM(FT1) { // Amplitudes int idx = ROUND(sqrt(kp*kp + ip*ip + jp*jp)); if (idx >= XSIZE(FT1)) continue; RFLOAT abs1 = abs(DIRECT_A3D_ELEM(FT1, k, i, j)); RFLOAT abs2 = abs(DIRECT_A3D_ELEM(FT2, k, i, j)); mu1(idx)+= abs1; mu2(idx)+= abs2; radial_count(idx)++; //phases RFLOAT phas1 = RAD2DEG(atan2((DIRECT_A3D_ELEM(FT1, k, i, j)).imag, (DIRECT_A3D_ELEM(FT1, k, i, j)).real)); RFLOAT phas2 = RAD2DEG(atan2((DIRECT_A3D_ELEM(FT2, k, i, j)).imag, (DIRECT_A3D_ELEM(FT2, k, i, j)).real)); RFLOAT delta_phas = phas1 - phas2; if (delta_phas > 180.) delta_phas -= 360.; else if (delta_phas < -180.) delta_phas += 360.; dpr(idx) += delta_phas*delta_phas*(abs1+abs2); num(idx) += (abs1+abs2); } // Get average amplitudes in each shell for both maps FOR_ALL_ELEMENTS_IN_ARRAY1D(mu1) { if (radial_count(i) > 0) { mu1(i) /= radial_count(i); mu2(i) /= radial_count(i); dpr(i) = sqrt(dpr(i)/num(i)); } } // Now calculate Pearson's correlation coefficient FOR_ALL_ELEMENTS_IN_FFTW_TRANSFORM(FT1) { int idx = ROUND(sqrt(kp*kp + ip*ip + jp*jp)); if (idx >= XSIZE(FT1)) continue; RFLOAT z1=abs(DIRECT_A3D_ELEM(FT1, k, i, j)) - mu1(idx); RFLOAT z2=abs(DIRECT_A3D_ELEM(FT2, k, i, j)) - mu2(idx); acorr(idx) += z1*z2; sig1(idx) += z1*z1; sig2(idx) += z2*z2; } FOR_ALL_ELEMENTS_IN_ARRAY1D(acorr) { RFLOAT aux = sqrt(sig1(i))*sqrt(sig2(i)); if (aux > 0.) acorr(i) /= sqrt(sig1(i))*sqrt(sig2(i)); else acorr(i) = 1.; } } void getCosDeltaPhase(MultidimArray< Complex > &FT1, MultidimArray< Complex > &FT2, MultidimArray< RFLOAT > &cosPhi) { MultidimArray< int > radial_count(XSIZE(FT1)); cosPhi.initZeros(XSIZE(FT1)); FOR_ALL_ELEMENTS_IN_FFTW_TRANSFORM(FT1) { int idx = ROUND(sqrt(kp*kp + ip*ip + jp*jp)); if (idx >= XSIZE(FT1)) continue; RFLOAT phas1 = RAD2DEG(atan2((DIRECT_A3D_ELEM(FT1, k, i, j)).imag, (DIRECT_A3D_ELEM(FT1, k, i, j)).real)); RFLOAT phas2 = RAD2DEG(atan2((DIRECT_A3D_ELEM(FT2, k, i, j)).imag, (DIRECT_A3D_ELEM(FT2, k, i, j)).real)); cosPhi(idx) += cos(phas1 - phas2); radial_count(idx) ++; } FOR_ALL_ELEMENTS_IN_ARRAY1D(cosPhi) { if (radial_count(i) > 0) cosPhi(i) /= radial_count(i); } } void getAmplitudeCorrelationAndDifferentialPhaseResidual(MultidimArray< RFLOAT > &m1, MultidimArray< RFLOAT > &m2, MultidimArray< RFLOAT > &acorr, MultidimArray< RFLOAT > &dpr) { MultidimArray< Complex > FT1, FT2; FourierTransformer transformer; transformer.FourierTransform(m1, FT1); transformer.FourierTransform(m2, FT2); getAmplitudeCorrelationAndDifferentialPhaseResidual(FT1, FT2, acorr, dpr); } /* void selfScaleToSizeFourier(long int Ydim, long int Xdim, MultidimArray& Mpmem, int nThreads) { //Mmem = *this //memory for fourier transform output MultidimArray MmemFourier; // Perform the Fourier transform FourierTransformer transformerM; transformerM.setThreadsNumber(nThreads); transformerM.FourierTransform(Mpmem, MmemFourier, true); // Create space for the downsampled image and its Fourier transform Mpmem.resize(Ydim, Xdim); MultidimArray MpmemFourier; FourierTransformer transformerMp; transformerMp.setReal(Mpmem); transformerMp.getFourierAlias(MpmemFourier); long int ihalf = XMIPP_MIN((YSIZE(MpmemFourier)/2+1),(YSIZE(MmemFourier)/2+1)); long int xsize = XMIPP_MIN((XSIZE(MmemFourier)),(XSIZE(MpmemFourier))); //Init with zero MpmemFourier.initZeros(); for (long int i=0; i &in, MultidimArray &out, RFLOAT oridim, RFLOAT xshift, RFLOAT yshift, RFLOAT zshift) { out.resize(in); RFLOAT dotp, a, b, x, y, z; switch (in.getDim()) { case 1: xshift /= -oridim; for (long int j = 0; j < XSIZE(in); j++) { x = j; dotp = 2 * PI * (x * xshift); #ifdef RELION_SINGLE_PRECISION SINCOSF(dotp, &b, &a); #else SINCOS(dotp, &b, &a); #endif DIRECT_A1D_ELEM(out, j) = Complex(a, b); } break; case 2: xshift /= -oridim; yshift /= -oridim; for (long int i=0; i=XSIZE(in); i--) { y = i - YSIZE(in); for (long int j=0; j &in, MultidimArray &out, RFLOAT oridim, long int newdim, TabSine& tabsin, TabCosine& tabcos, RFLOAT xshift, RFLOAT yshift, RFLOAT zshift) { RFLOAT a = 0., b = 0., c = 0., d = 0., ac = 0., bd = 0., ab_cd = 0., dotp = 0., x = 0., y = 0., z = 0.; RFLOAT twopi = 2. * PI; if (&in == &out) REPORT_ERROR("shiftImageInFourierTransformWithTabSincos ERROR: Input and output images should be different!"); // Check size of the input array if ( (YSIZE(in) > 1) && ( (YSIZE(in)/2 + 1) != XSIZE(in) ) ) REPORT_ERROR("shiftImageInFourierTransformWithTabSincos ERROR: the Fourier transform should be of an image with equal sizes in all dimensions!"); long int newhdim = newdim/2 + 1; if (newhdim > XSIZE(in)) REPORT_ERROR("shiftImageInFourierTransformWithTabSincos ERROR: 'newdim' should be equal or smaller than the size of the original array!"); // Initialise output array out.clear(); switch (in.getDim()) { case 2: out.initZeros(newdim, newhdim); break; case 3: out.initZeros(newdim, newdim, newhdim); break; default: REPORT_ERROR("shiftImageInFourierTransformWithTabSincos ERROR: dimension should be 2 or 3!"); } if (in.getDim() == 2) { xshift /= -oridim; yshift /= -oridim; if ( (ABS(xshift) < XMIPP_EQUAL_ACCURACY) && (ABS(yshift) < XMIPP_EQUAL_ACCURACY) ) { windowFourierTransform(in, out, newdim); return; } FOR_ALL_ELEMENTS_IN_FFTW_TRANSFORM2D(out) { dotp = twopi * (jp * xshift + ip * yshift); a = tabcos(dotp); b = tabsin(dotp); c = DIRECT_A2D_ELEM(in, i, j).real; d = DIRECT_A2D_ELEM(in, i, j).imag; ac = a * c; bd = b * d; ab_cd = (a + b) * (c + d); DIRECT_A2D_ELEM(out, i, j) = Complex(ac - bd, ab_cd - ac - bd); } } else if (in.getDim() == 3) { xshift /= -oridim; yshift /= -oridim; zshift /= -oridim; if ( (ABS(xshift) < XMIPP_EQUAL_ACCURACY) && (ABS(yshift) < XMIPP_EQUAL_ACCURACY) && (ABS(zshift) < XMIPP_EQUAL_ACCURACY) ) { windowFourierTransform(in, out, newdim); return; } FOR_ALL_ELEMENTS_IN_FFTW_TRANSFORM(out) { dotp = twopi * (jp * xshift + ip * yshift + kp * zshift); a = tabcos(dotp); b = tabsin(dotp); c = DIRECT_A3D_ELEM(in, k, i, j).real; d = DIRECT_A3D_ELEM(in, k, i, j).imag; ac = a * c; bd = b * d; ab_cd = (a + b) * (c + d); DIRECT_A3D_ELEM(out, k, i, j) = Complex(ac - bd, ab_cd - ac - bd); } } } // Shift an image through phase-shifts in its Fourier Transform (without pretabulated sine and cosine) void shiftImageInFourierTransform(MultidimArray &in, MultidimArray &out, RFLOAT oridim, RFLOAT xshift, RFLOAT yshift, RFLOAT zshift) { out.resize(in); RFLOAT dotp, a, b, c, d, ac, bd, ab_cd, x, y, z; switch (in.getDim()) { case 1: xshift /= -oridim; if (ABS(xshift) < XMIPP_EQUAL_ACCURACY) { out = in; return; } for (long int j = 0; j < XSIZE(in); j++) { x = j; dotp = 2 * PI * (x * xshift); #ifdef RELION_SINGLE_PRECISION SINCOSF(dotp, &b, &a); #else SINCOS(dotp, &b, &a); #endif c = DIRECT_A1D_ELEM(in, j).real; d = DIRECT_A1D_ELEM(in, j).imag; ac = a * c; bd = b * d; ab_cd = (a + b) * (c + d); // (ab_cd-ac-bd = ad+bc : but needs 4 multiplications) DIRECT_A1D_ELEM(out, j) = Complex(ac - bd, ab_cd - ac - bd); } break; case 2: xshift /= -oridim; yshift /= -oridim; if (ABS(xshift) < XMIPP_EQUAL_ACCURACY && ABS(yshift) < XMIPP_EQUAL_ACCURACY) { out = in; return; } for (long int i=0; i=XSIZE(in); i--) { y = i - YSIZE(in); for (long int j=0; j &Min, MultidimArray &spectrum, int spectrum_type) { MultidimArray Faux; int xsize = XSIZE(Min); // Takanori: The above line should be XSIZE(Min) / 2 + 1 but for compatibility reasons, I keep this as it is. Matrix1D f(3); MultidimArray count(xsize); FourierTransformer transformer; spectrum.initZeros(xsize); count.initZeros(); transformer.FourierTransform(Min, Faux, false); FOR_ALL_ELEMENTS_IN_FFTW_TRANSFORM(Faux) { long int idx = ROUND(sqrt(kp*kp + ip*ip + jp*jp)); if (spectrum_type == AMPLITUDE_SPECTRUM) spectrum(idx) += abs(dAkij(Faux, k, i, j)); else spectrum(idx) += norm(dAkij(Faux, k, i, j)); count(idx) += 1.; } for (long int i = 0; i < xsize; i++) if (count(i) > 0.) spectrum(i) /= count(i); } void divideBySpectrum(MultidimArray &Min, MultidimArray &spectrum, bool leave_origin_intact) { MultidimArray div_spec(spectrum); FOR_ALL_DIRECT_ELEMENTS_IN_ARRAY1D(spectrum) { if (ABS(dAi(spectrum,i)) > 0.) dAi(div_spec,i) = 1./dAi(spectrum,i); else dAi(div_spec,i) = 1.; } multiplyBySpectrum(Min,div_spec,leave_origin_intact); } void multiplyBySpectrum(MultidimArray &Min, MultidimArray &spectrum, bool leave_origin_intact) { MultidimArray Faux; Matrix1D f(3); MultidimArray lspectrum; FourierTransformer transformer; //RFLOAT dim3 = XSIZE(Min)*YSIZE(Min)*ZSIZE(Min); transformer.FourierTransform(Min, Faux, false); lspectrum=spectrum; if (leave_origin_intact) lspectrum(0)=1.; FOR_ALL_ELEMENTS_IN_FFTW_TRANSFORM(Faux) { long int idx = ROUND(sqrt(kp*kp + ip*ip + jp*jp)); dAkij(Faux, k, i, j) *= lspectrum(idx);// * dim3; } transformer.inverseFourierTransform(); } void whitenSpectrum(MultidimArray &Min, MultidimArray &Mout, int spectrum_type, bool leave_origin_intact) { MultidimArray spectrum; getSpectrum(Min,spectrum,spectrum_type); Mout=Min; divideBySpectrum(Mout,spectrum,leave_origin_intact); } void adaptSpectrum(MultidimArray &Min, MultidimArray &Mout, const MultidimArray &spectrum_ref, int spectrum_type, bool leave_origin_intact) { MultidimArray spectrum; getSpectrum(Min,spectrum,spectrum_type); FOR_ALL_DIRECT_ELEMENTS_IN_ARRAY1D(spectrum) { dAi(spectrum, i) = (dAi(spectrum, i) > 0.) ? dAi(spectrum_ref,i)/ dAi(spectrum, i) : 1.; } Mout=Min; multiplyBySpectrum(Mout,spectrum,leave_origin_intact); } /** Kullback-Leibler divergence */ RFLOAT getKullbackLeiblerDivergence(MultidimArray &Fimg, MultidimArray &Fref, MultidimArray &sigma2, MultidimArray &p_i, MultidimArray &q_i, int highshell, int lowshell ) { // First check dimensions are OK if (!Fimg.sameShape(Fref)) REPORT_ERROR("getKullbackLeiblerDivergence ERROR: Fimg and Fref are not of the same shape."); if (highshell < 0) highshell = XSIZE(Fimg) - 1; if (lowshell < 0) lowshell = 0; if (highshell > XSIZE(sigma2)) REPORT_ERROR("getKullbackLeiblerDivergence ERROR: highshell is larger than size of sigma2 array."); if (highshell < lowshell) REPORT_ERROR("getKullbackLeiblerDivergence ERROR: highshell is smaller than lowshell."); // Initialize the histogram MultidimArray histogram; int histogram_size = 101; int histogram_origin = histogram_size / 2; RFLOAT sigma_max = 10.; RFLOAT histogram_factor = histogram_origin / sigma_max; histogram.initZeros(histogram_size); // This way this will work in both 2D and 3D FOR_ALL_ELEMENTS_IN_FFTW_TRANSFORM(Fimg) { int ires = ROUND(sqrt(kp*kp + ip*ip + jp*jp)); if (ires >= lowshell && ires <= highshell) { // Use FT of masked image for noise estimation! RFLOAT diff_real = (DIRECT_A3D_ELEM(Fref, k, i, j)).real - (DIRECT_A3D_ELEM(Fimg, k, i, j)).real; RFLOAT diff_imag = (DIRECT_A3D_ELEM(Fref, k, i, j)).imag - (DIRECT_A3D_ELEM(Fimg, k, i, j)).imag; RFLOAT sigma = sqrt(DIRECT_A1D_ELEM(sigma2, ires)); // Divide by standard deviation to normalise all the difference diff_real /= sigma; diff_imag /= sigma; // Histogram runs from -10 sigma to +10 sigma diff_real += sigma_max; diff_imag += sigma_max; // Make histogram on-the-fly; // Real part int ihis = ROUND(diff_real * histogram_factor); if (ihis < 0) ihis = 0; else if (ihis >= histogram_size) ihis = histogram_size - 1; histogram(ihis)++; // Imaginary part ihis = ROUND(diff_imag * histogram_factor); if (ihis < 0) ihis = 0; else if (ihis > histogram_size) ihis = histogram_size; histogram(ihis)++; } } // Normalise the histogram and the discretised analytical Gaussian RFLOAT norm = (RFLOAT)histogram.sum(); RFLOAT gaussnorm = 0.; for (int i = 0; i < histogram_size; i++) { RFLOAT x = (RFLOAT)i / histogram_factor; gaussnorm += gaussian1D(x - sigma_max, 1. , 0.); } // Now calculate the actual Kullback-Leibler divergence RFLOAT kl_divergence = 0.; p_i.resize(histogram_size); q_i.resize(histogram_size); for (int i = 0; i < histogram_size; i++) { // Data distribution p_i(i) = (RFLOAT)histogram(i) / norm; // Theoretical distribution RFLOAT x = (RFLOAT)i / histogram_factor; q_i(i) = gaussian1D(x - sigma_max, 1. , 0.) / gaussnorm; if (p_i(i) > 0.) kl_divergence += p_i(i) * log (p_i(i) / q_i(i)); } kl_divergence /= (RFLOAT)histogram_size; return kl_divergence; } void resizeMap(MultidimArray &img, int newsize) { FourierTransformer transformer; MultidimArray FT, FT2; transformer.FourierTransform(img, FT, false); windowFourierTransform(FT, FT2, newsize); if (img.getDim() == 2) img.resize(newsize, newsize); else if (img.getDim() == 3) img.resize(newsize, newsize, newsize); transformer.inverseFourierTransform(FT2, img); } void applyBFactorToMap(MultidimArray &FT, int ori_size, RFLOAT bfactor, RFLOAT angpix) { FOR_ALL_ELEMENTS_IN_FFTW_TRANSFORM(FT) { int r2 = kp * kp + ip * ip + jp * jp; RFLOAT res = sqrt((RFLOAT)r2)/(ori_size * angpix); // get resolution in 1/Angstrom if (res <= 1. / (angpix * 2.) ) // Apply B-factor sharpening until Nyquist, then low-pass filter later on (with a soft edge) { DIRECT_A3D_ELEM(FT, k, i, j) *= exp( -(bfactor / 4.) * res * res); } else { DIRECT_A3D_ELEM(FT, k, i, j) = 0.; } } } void applyBFactorToMap(MultidimArray &img, RFLOAT bfactor, RFLOAT angpix) { FourierTransformer transformer; MultidimArray FT; transformer.FourierTransform(img, FT, false); applyBFactorToMap(FT, XSIZE(img), bfactor, angpix); transformer.inverseFourierTransform(); } void LoGFilterMap(MultidimArray &FT, int ori_size, RFLOAT sigma, RFLOAT angpix) { // Calculate sigma in reciprocal pixels (input is in Angstroms) and pre-calculate its square // Factor of 1/2 because input is diameter, and filter uses radius RFLOAT isigma2 = (0.5 * ori_size * angpix) / sigma; isigma2 *= isigma2; // Gunn Pattern Recognition 32 (1999) 1463-1472 // The Laplacian filter is: 1/(PI*sigma2)*(r^2/2*sigma2 - 1) * exp(-r^2/(2*sigma2)) // and its Fourier Transform is: r^2 * exp(-0.5*r2/isigma2); // Then to normalise for different scales: divide by isigma2; FOR_ALL_ELEMENTS_IN_FFTW_TRANSFORM(FT) { RFLOAT r2 = (RFLOAT)kp * (RFLOAT)kp + (RFLOAT)ip * (RFLOAT)ip + (RFLOAT)jp * (RFLOAT)jp; DIRECT_A3D_ELEM(FT, k, i, j) *= r2 * exp(-0.5*r2/isigma2) / isigma2; } } void LoGFilterMap(MultidimArray &img, RFLOAT sigma, RFLOAT angpix) { FourierTransformer transformer; MultidimArray FT; // Make this work for maps (or more likely 2D images) that have unequal X and Y dimensions img.setXmippOrigin(); int my_xsize = XSIZE(img); int my_ysize = YSIZE(img); int my_size = (my_xsize != my_ysize) ? XMIPP_MAX(my_xsize, my_ysize) : my_xsize; if (my_xsize != my_ysize) { if (img.getDim() == 2) { int my_small_size = XMIPP_MIN(my_xsize, my_ysize); RFLOAT avg,stddev,minn,maxx; img.computeStats(avg,stddev,minn,maxx); img.window(FIRST_XMIPP_INDEX(my_size), FIRST_XMIPP_INDEX(my_size), LAST_XMIPP_INDEX(my_size), LAST_XMIPP_INDEX(my_size)); if (my_small_size == my_xsize) { FOR_ALL_ELEMENTS_IN_ARRAY2D(img) { if (j < FIRST_XMIPP_INDEX(my_small_size) || j > LAST_XMIPP_INDEX(my_small_size)) A2D_ELEM(img, i, j) = rnd_gaus(avg, stddev); } } else { FOR_ALL_ELEMENTS_IN_ARRAY2D(img) { if (i < FIRST_XMIPP_INDEX(my_small_size) || i > LAST_XMIPP_INDEX(my_small_size)) A2D_ELEM(img, i, j) = rnd_gaus(avg, stddev); } } } else { REPORT_ERROR("lowPassFilterMap: filtering of non-cube maps is not implemented..."); } } transformer.FourierTransform(img, FT, false); LoGFilterMap(FT, XSIZE(img), sigma, angpix); transformer.inverseFourierTransform(); img.setXmippOrigin(); if (my_xsize != my_ysize) { if (img.getDim() == 2) { img.window(FIRST_XMIPP_INDEX(my_ysize), FIRST_XMIPP_INDEX(my_xsize), LAST_XMIPP_INDEX(my_ysize), LAST_XMIPP_INDEX(my_xsize)); } else { REPORT_ERROR("lowPassFilterMap: filtering of non-cube maps is not implemented..."); } } } void lowPassFilterMap(MultidimArray &FT, int ori_size, RFLOAT low_pass, RFLOAT angpix, int filter_edge_width, bool do_highpass_instead) { // Which resolution shell is the filter? int ires_filter = ROUND((ori_size * angpix)/low_pass); int filter_edge_halfwidth = filter_edge_width / 2; // Soft-edge: from 1 shell less to one shell more: RFLOAT edge_low = XMIPP_MAX(0., (ires_filter - filter_edge_halfwidth) / (RFLOAT)ori_size); // in 1/pix RFLOAT edge_high = XMIPP_MIN(XSIZE(FT), (ires_filter + filter_edge_halfwidth) / (RFLOAT)ori_size); // in 1/pix RFLOAT edge_width = edge_high - edge_low; // Put a raised cosine from edge_low to edge_high FOR_ALL_ELEMENTS_IN_FFTW_TRANSFORM(FT) { RFLOAT r2 = (RFLOAT)kp * (RFLOAT)kp + (RFLOAT)ip * (RFLOAT)ip + (RFLOAT)jp * (RFLOAT)jp; RFLOAT res = sqrt((RFLOAT)r2)/ori_size; // get resolution in 1/pixel if (do_highpass_instead) { if (res < edge_low) DIRECT_A3D_ELEM(FT, k, i, j) = 0.; else if (res > edge_high) continue; else DIRECT_A3D_ELEM(FT, k, i, j) *= 0.5 - 0.5 * cos( PI * (res-edge_low)/edge_width); } else { if (res < edge_low) continue; else if (res > edge_high) DIRECT_A3D_ELEM(FT, k, i, j) = 0.; else DIRECT_A3D_ELEM(FT, k, i, j) *= 0.5 + 0.5 * cos( PI * (res-edge_low)/edge_width); } } } void lowPassFilterMap(MultidimArray &img, RFLOAT low_pass, RFLOAT angpix, int filter_edge_width) { FourierTransformer transformer; MultidimArray FT; // Make this work for maps (or more likely 2D images) that have unequal X and Y dimensions img.setXmippOrigin(); int my_xsize = XSIZE(img); int my_ysize = YSIZE(img); int my_size = (my_xsize != my_ysize) ? XMIPP_MAX(my_xsize, my_ysize) : my_xsize; if (my_xsize != my_ysize) { if (img.getDim() == 2) { int my_small_size = XMIPP_MIN(my_xsize, my_ysize); RFLOAT avg,stddev,minn,maxx; img.computeStats(avg,stddev,minn,maxx); img.window(FIRST_XMIPP_INDEX(my_size), FIRST_XMIPP_INDEX(my_size), LAST_XMIPP_INDEX(my_size), LAST_XMIPP_INDEX(my_size)); if (my_small_size == my_xsize) { FOR_ALL_ELEMENTS_IN_ARRAY2D(img) { if (j < FIRST_XMIPP_INDEX(my_small_size) || j > LAST_XMIPP_INDEX(my_small_size)) A2D_ELEM(img, i, j) = rnd_gaus(avg, stddev); } } else { FOR_ALL_ELEMENTS_IN_ARRAY2D(img) { if (i < FIRST_XMIPP_INDEX(my_small_size) || i > LAST_XMIPP_INDEX(my_small_size)) A2D_ELEM(img, i, j) = rnd_gaus(avg, stddev); } } } else { REPORT_ERROR("lowPassFilterMap: filtering of non-cube maps is not implemented..."); } } transformer.FourierTransform(img, FT, false); lowPassFilterMap(FT, XSIZE(img), low_pass, angpix, filter_edge_width, false); transformer.inverseFourierTransform(); img.setXmippOrigin(); if (my_xsize != my_ysize) { if (img.getDim() == 2) { img.window(FIRST_XMIPP_INDEX(my_ysize), FIRST_XMIPP_INDEX(my_xsize), LAST_XMIPP_INDEX(my_ysize), LAST_XMIPP_INDEX(my_xsize)); } else { REPORT_ERROR("lowPassFilterMap: filtering of non-cube maps is not implemented..."); } } } void highPassFilterMap(MultidimArray &img, RFLOAT low_pass, RFLOAT angpix, int filter_edge_width) { FourierTransformer transformer; MultidimArray FT; transformer.FourierTransform(img, FT, false); lowPassFilterMap(FT, XSIZE(img), low_pass, angpix, filter_edge_width, true); transformer.inverseFourierTransform(); } void directionalFilterMap(MultidimArray &FT, int ori_size, RFLOAT low_pass, RFLOAT angpix, std::string axis, int filter_edge_width) { // Which resolution shell is the filter? int ires_filter = ROUND((ori_size * angpix)/low_pass); int filter_edge_halfwidth = filter_edge_width / 2; // Soft-edge: from 1 shell less to one shell more: RFLOAT edge_low = XMIPP_MAX(0., (ires_filter - filter_edge_halfwidth) / (RFLOAT)ori_size); // in 1/pix RFLOAT edge_high = XMIPP_MIN(XSIZE(FT), (ires_filter + filter_edge_halfwidth) / (RFLOAT)ori_size); // in 1/pix RFLOAT edge_width = edge_high - edge_low; if (axis == "x" || axis == "X") { FOR_ALL_ELEMENTS_IN_FFTW_TRANSFORM(FT) { RFLOAT r2 = (RFLOAT)jp * (RFLOAT)jp; RFLOAT res = sqrt((RFLOAT)r2)/ori_size; // get resolution in 1/pixel if (res < edge_low) continue; else if (res > edge_high) DIRECT_A3D_ELEM(FT, k, i, j) = 0.; else DIRECT_A3D_ELEM(FT, k, i, j) *= 0.5 + 0.5 * cos( PI * (res-edge_low)/edge_width); } } else if (axis == "y" || axis == "Y") { FOR_ALL_ELEMENTS_IN_FFTW_TRANSFORM(FT) { RFLOAT r2 = (RFLOAT)ip * (RFLOAT)ip; RFLOAT res = sqrt((RFLOAT)r2)/ori_size; // get resolution in 1/pixel if (res < edge_low) continue; else if (res > edge_high) DIRECT_A3D_ELEM(FT, k, i, j) = 0.; else DIRECT_A3D_ELEM(FT, k, i, j) *= 0.5 + 0.5 * cos( PI * (res-edge_low)/edge_width); } } else if (axis == "z" || axis == "Z") { FOR_ALL_ELEMENTS_IN_FFTW_TRANSFORM(FT) { RFLOAT r2 = (RFLOAT)kp * (RFLOAT)kp; RFLOAT res = sqrt((RFLOAT)r2)/ori_size; // get resolution in 1/pixel if (res < edge_low) continue; else if (res > edge_high) DIRECT_A3D_ELEM(FT, k, i, j) = 0.; else DIRECT_A3D_ELEM(FT, k, i, j) *= 0.5 + 0.5 * cos( PI * (res-edge_low)/edge_width); } } } void directionalFilterMap(MultidimArray &img, RFLOAT low_pass, RFLOAT angpix, std::string axis, int filter_edge_width) { FourierTransformer transformer; MultidimArray FT; // Make this work for maps (or more likely 2D images) that have unequal X and Y dimensions img.setXmippOrigin(); int my_xsize = XSIZE(img); int my_ysize = YSIZE(img); int my_size = (my_xsize != my_ysize) ? XMIPP_MAX(my_xsize, my_ysize) : my_xsize; if (my_xsize != my_ysize) { if (img.getDim() == 2) { int my_small_size = XMIPP_MIN(my_xsize, my_ysize); RFLOAT avg,stddev,minn,maxx; img.computeStats(avg,stddev,minn,maxx); img.window(FIRST_XMIPP_INDEX(my_size), FIRST_XMIPP_INDEX(my_size), LAST_XMIPP_INDEX(my_size), LAST_XMIPP_INDEX(my_size)); if (my_small_size == my_xsize) { FOR_ALL_ELEMENTS_IN_ARRAY2D(img) { if (j < FIRST_XMIPP_INDEX(my_small_size) || j > LAST_XMIPP_INDEX(my_small_size)) A2D_ELEM(img, i, j) = rnd_gaus(avg, stddev); } } else { FOR_ALL_ELEMENTS_IN_ARRAY2D(img) { if (i < FIRST_XMIPP_INDEX(my_small_size) || i > LAST_XMIPP_INDEX(my_small_size)) A2D_ELEM(img, i, j) = rnd_gaus(avg, stddev); } } } else { REPORT_ERROR("lowPassFilterMap: filtering of non-cube maps is not implemented..."); } } transformer.FourierTransform(img, FT, false); directionalFilterMap(FT, XSIZE(img), low_pass, angpix, axis, filter_edge_width); transformer.inverseFourierTransform(); img.setXmippOrigin(); if (my_xsize != my_ysize) { if (img.getDim() == 2) { img.window(FIRST_XMIPP_INDEX(my_ysize), FIRST_XMIPP_INDEX(my_xsize), LAST_XMIPP_INDEX(my_ysize), LAST_XMIPP_INDEX(my_xsize)); } else { REPORT_ERROR("lowPassFilterMap: filtering of non-cube maps is not implemented..."); } } } void applyBeamTilt(const MultidimArray &Fin, MultidimArray &Fout, RFLOAT beamtilt_x, RFLOAT beamtilt_y, RFLOAT wavelength, RFLOAT Cs, RFLOAT angpix, int ori_size) { Fout = Fin; selfApplyBeamTilt(Fout, beamtilt_x, beamtilt_y, wavelength, Cs, angpix, ori_size); } void selfApplyBeamTilt(MultidimArray &Fimg, RFLOAT beamtilt_x, RFLOAT beamtilt_y, RFLOAT wavelength, RFLOAT Cs, RFLOAT angpix, int ori_size) { if (Fimg.getDim() != 2) REPORT_ERROR("applyBeamTilt can only be done on 2D Fourier Transforms!"); RFLOAT boxsize = angpix * ori_size; RFLOAT factor = 0.360 * Cs * 10000000 * wavelength * wavelength / (boxsize * boxsize * boxsize); FOR_ALL_ELEMENTS_IN_FFTW_TRANSFORM2D(Fimg) { RFLOAT delta_phase = factor * (ip * ip + jp * jp) * (ip * beamtilt_y + jp * beamtilt_x); RFLOAT realval = DIRECT_A2D_ELEM(Fimg, i, j).real; RFLOAT imagval = DIRECT_A2D_ELEM(Fimg, i, j).imag; RFLOAT mag = sqrt(realval * realval + imagval * imagval); RFLOAT phas = atan2(imagval, realval) + DEG2RAD(delta_phase); // apply phase shift! realval = mag * cos(phas); imagval = mag * sin(phas); DIRECT_A2D_ELEM(Fimg, i, j) = Complex(realval, imagval); } } void selfApplyBeamTilt(MultidimArray &Fimg, RFLOAT beamtilt_x, RFLOAT beamtilt_y, RFLOAT beamtilt_xx, RFLOAT beamtilt_xy, RFLOAT beamtilt_yy, RFLOAT wavelength, RFLOAT Cs, RFLOAT angpix, int ori_size) { if (Fimg.getDim() != 2) REPORT_ERROR("applyBeamTilt can only be done on 2D Fourier Transforms!"); RFLOAT boxsize = angpix * ori_size; RFLOAT factor = 0.360 * Cs * 10000000 * wavelength * wavelength / (boxsize * boxsize * boxsize); FOR_ALL_ELEMENTS_IN_FFTW_TRANSFORM2D(Fimg) { RFLOAT q = beamtilt_xx * jp * jp + 2.0 * beamtilt_xy * ip * jp + beamtilt_yy * ip * ip; RFLOAT delta_phase = factor * q * (ip * beamtilt_y + jp * beamtilt_x); RFLOAT realval = DIRECT_A2D_ELEM(Fimg, i, j).real; RFLOAT imagval = DIRECT_A2D_ELEM(Fimg, i, j).imag; RFLOAT mag = sqrt(realval * realval + imagval * imagval); RFLOAT phas = atan2(imagval, realval) + DEG2RAD(delta_phase); // apply phase shift! realval = mag * cos(phas); imagval = mag * sin(phas); DIRECT_A2D_ELEM(Fimg, i, j) = Complex(realval, imagval); } } void padAndFloat2DMap(const MultidimArray &v, MultidimArray &out, int factor) { long int Xdim, Ydim, Zdim, Ndim, XYdim; RFLOAT bg_val, bg_pix, bd_val, bd_pix; out.clear(); // Check dimensions v.getDimensions(Xdim, Ydim, Zdim, Ndim); if ( (Zdim > 1) || (Ndim > 1) ) REPORT_ERROR("fftw.cpp::padAndFloat2DMap(): ERROR MultidimArray should be 2D."); if (Xdim * Ydim <= 16) REPORT_ERROR("fftw.cpp::padAndFloat2DMap(): ERROR MultidimArray is too small."); if (factor <= 1) REPORT_ERROR("fftw.cpp::padAndFloat2DMap(): ERROR Padding factor should be larger than 1."); // Calculate background and border values bg_val = bg_pix = bd_val = bd_pix = 0.; FOR_ALL_DIRECT_ELEMENTS_IN_ARRAY2D(v) { bg_val += DIRECT_A2D_ELEM(v, i, j); bg_pix += 1.; if ( (i == 0) || (j == 0) || (i == (YSIZE(v) - 1)) || (j == (XSIZE(v) - 1)) ) { bd_val += DIRECT_A2D_ELEM(v, i, j); bd_pix += 1.; } } if ( (bg_pix < 1.) || (bd_pix < 1.) ) REPORT_ERROR("fftw.cpp::padAndFloat2DMap(): ERROR MultidimArray is too small."); bg_val /= bg_pix; bd_val /= bd_pix; // DEBUG //std::cout << "bg_val = " << bg_val << ", bg_pix = " << bg_pix << std::endl; //std::cout << "bd_val = " << bd_val << ", bd_pix = " << bd_pix << std::endl; // Pad and float output MultidimArray (2x original size by default) XYdim = (Xdim > Ydim) ? (Xdim * factor) : (Ydim * factor); out.resize(XYdim, XYdim); out.initConstant(bd_val - bg_val); out.setXmippOrigin(); FOR_ALL_DIRECT_ELEMENTS_IN_ARRAY2D(v) { A2D_ELEM(out, i + FIRST_XMIPP_INDEX(YSIZE(v)), j + FIRST_XMIPP_INDEX(XSIZE(v))) = DIRECT_A2D_ELEM(v, i, j) - bg_val; } } void amplitudeOrPhaseMap(const MultidimArray &v, MultidimArray &, int output_map_type) { long int XYdim, maxr2; RFLOAT val; FourierTransformer transformer; MultidimArray Faux; MultidimArray out; transformer.clear(); Faux.clear(); out.clear(); // Pad and float padAndFloat2DMap(v, out); if ( (XSIZE(out) != YSIZE(out)) || (ZSIZE(out) > 1) || (NSIZE(out) > 1) ) REPORT_ERROR("fftw.cpp::amplitudeOrPhaseMap(): ERROR MultidimArray should be 2D square."); XYdim = XSIZE(out); // Fourier Transform transformer.FourierTransform(out, Faux, false); // TODO: false??? CenterFFTbySign(Faux); // Write to output files out.setXmippOrigin(); out.initZeros(XYdim, XYdim); maxr2 = (XYdim - 1) * (XYdim - 1) / 4; FOR_ALL_ELEMENTS_IN_FFTW_TRANSFORM2D(Faux) { if ( (ip > STARTINGY(out)) && (ip < FINISHINGY(out)) && (jp > STARTINGX(out)) && (jp < FINISHINGX(out)) && ((ip * ip + jp * jp) < maxr2) ) { if (output_map_type == AMPLITUDE_MAP) val = FFTW2D_ELEM(Faux, ip, jp).abs(); else if (output_map_type == PHASE_MAP) val = (180.) * (FFTW2D_ELEM(Faux, ip, jp).arg()) / PI; else REPORT_ERROR("fftw.cpp::amplitudeOrPhaseMap(): ERROR Unknown type of output map."); A2D_ELEM(out, -ip, -jp) = A2D_ELEM(out, ip, jp) = val; } } A2D_ELEM(out, 0, 0) = 0.; amp.clear(); amp = out; } void helicalLayerLineProfile(const MultidimArray &v, std::string title, std::string fn_eps) { long int XYdim, maxr2; FourierTransformer transformer; MultidimArray Faux; MultidimArray out; std::vector ampl_list, ampr_list, nr_pix_list; transformer.clear(); Faux.clear(); out.clear(); // TODO: DO I NEED TO ROTATE THE ORIGINAL MULTIDINARRAY BY 90 DEGREES ? // Pad and float padAndFloat2DMap(v, out); if ( (XSIZE(out) != YSIZE(out)) || (ZSIZE(out) > 1) || (NSIZE(out) > 1) ) REPORT_ERROR("fftw.cpp::helicalLayerLineProfile(): ERROR MultidimArray should be 2D square."); XYdim = XSIZE(out); // Fourier Transform transformer.FourierTransform(out, Faux, false); // TODO: false??? CenterFFTbySign(Faux); // Statistics out.setXmippOrigin(); maxr2 = (XYdim - 1) * (XYdim - 1) / 4; ampl_list.resize(XSIZE(Faux) + 2); ampr_list.resize(XSIZE(Faux) + 2); nr_pix_list.resize(XSIZE(Faux) + 2); for (int ii = 0; ii < ampl_list.size(); ii++) ampl_list[ii] = ampr_list[ii] = nr_pix_list[ii] = 0.; FOR_ALL_ELEMENTS_IN_FFTW_TRANSFORM2D(Faux) { if ( ((ip * ip + jp * jp) < maxr2) && (ip > 0) ) { nr_pix_list[jp] += 1.; ampl_list[jp] += FFTW2D_ELEM(Faux, ip, jp).abs(); ampr_list[jp] += FFTW2D_ELEM(Faux, -ip, jp).abs(); } } CDataSet dataSetAmpl, dataSetAmpr; RFLOAT linewidth = 1.0; std::string figTitle = "Helical Layer Line Profile - " + title; std::string yTitle = "Reciprocal pixels (padded box size = " + integerToString(XYdim) + ")"; for (int ii = 0; ii < (3 * ampl_list.size() / 4 + 1); ii++) { if (nr_pix_list[ii] < 1.) break; // TODO: IS THIS CORRECT? dataSetAmpl.AddDataPoint(CDataPoint(ii, log(ampl_list[ii] / nr_pix_list[ii]))); dataSetAmpr.AddDataPoint(CDataPoint(ii, log(ampr_list[ii] / nr_pix_list[ii]))); } dataSetAmpl.SetDrawMarker(false); dataSetAmpl.SetLineWidth(linewidth); dataSetAmpl.SetDatasetColor(1., 0., 0.); dataSetAmpl.SetDatasetTitle("ln(amplitudes) (left)"); dataSetAmpr.SetDrawMarker(false); dataSetAmpr.SetLineWidth(linewidth); dataSetAmpr.SetDatasetColor(0., 1., 0.); dataSetAmpr.SetDatasetTitle("ln(amplitudes) (right)"); CPlot2D *plot2D = new CPlot2D(figTitle); plot2D->SetXAxisSize(600); plot2D->SetYAxisSize(400); plot2D->SetXAxisTitle(yTitle); plot2D->SetYAxisTitle("ln(amplitudes)"); plot2D->AddDataSet(dataSetAmpl); plot2D->AddDataSet(dataSetAmpr); plot2D->OutputPostScriptPlot(fn_eps); delete plot2D; } void generateBinaryHelicalFourierMask(MultidimArray &mask, std::vector exclude_begin, std::vector exclude_end, RFLOAT angpix) { if (exclude_begin.size() != exclude_end.size()) REPORT_ERROR("BUG: generateHelicalFourierMask: provide start-end resolutions for each shell."); mask.initConstant(1.); bool is_2d = (mask.getDim() == 2); FOR_ALL_ELEMENTS_IN_FFTW_TRANSFORM(mask) { RFLOAT res; if (is_2d) res = (jp == 0) ? 999. : YSIZE(mask) * angpix / fabs(jp); // helical axis along X-axis, so only jp matters! else res = (kp == 0) ? 999. : ZSIZE(mask) * angpix / fabs(kp); // helical axis along Z-axis, so only kp matters! for (int ishell = 0; ishell < exclude_begin.size(); ishell++) { if (res <= exclude_begin[ishell] && res >= exclude_end[ishell]) DIRECT_A3D_ELEM(mask, k, i, j) = 0.; } } } relion-3.1.3/src/fftw.h000066400000000000000000000764671411340063500147350ustar00rootroot00000000000000/*************************************************************************** * * Author: "Sjors H.W. Scheres" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ /*************************************************************************** * * Authors: Roberto Marabini (roberto@cnb.csic.es) * Carlos Oscar S. Sorzano (coss@cnb.csic.es) * * Unidad de Bioinformatica of Centro Nacional de Biotecnologia, CSIC * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA * 02111-1307 USA * * All comments concerning this program package may be sent to the * e-mail address 'xmipp@cnb.csic.es' ***************************************************************************/ #ifndef __RELIONFFTW_H #define __RELIONFFTW_H #include #include "src/multidim_array.h" #include "src/funcs.h" #include "src/tabfuncs.h" #include "src/complex.h" #include "src/CPlot2D.h" //#define TIMING_FFTW #ifdef TIMING_FFTW #include "src/time.h" extern Timer timer_fftw; #endif #ifdef FAST_CENTERFFT // defined if ALTCPU=on *AND* Intel Compiler used #include "src/acc/cpu/cuda_stubs.h" #include "src/acc/settings.h" #include "src/acc/cpu/cpu_settings.h" #include "src/acc/cpu/cpu_kernels/helper.h" #include #endif /** @defgroup FourierW FFTW Fourier transforms * @ingroup DataLibrary */ /** For all direct elements in the complex array in FFTW format. * * This macro is used to generate loops for the volume in an easy way. It * defines internal indexes 'k','i' and 'j' which ranges the volume using its * physical definition. It also defines 'kp', 'ip' and 'jp', which are the logical coordinates * It also works for 1D or 2D FFTW transforms * * @code * FOR_ALL_ELEMENTS_IN_FFTW_TRANSFORM(V) * { * int r2 = jp*jp + ip*ip + kp*kp; * * std::cout << "element at physical coords: "<< i<<" "< aux; int l, shift; l = XSIZE(v); aux.reshape(l); shift = (int)(l / 2); if (!forward) shift = -shift; // Shift the input in an auxiliary vector for (int i = 0; i < l; i++) { int ip = i + shift; if (ip < 0) ip += l; else if (ip >= l) ip -= l; aux(ip) = DIRECT_A1D_ELEM(v, i); } // Copy the vector for (int i = 0; i < l; i++) DIRECT_A1D_ELEM(v, i) = DIRECT_A1D_ELEM(aux, i); } else if ( v.getDim() == 2 ) { int batchSize = 1; int xSize = XSIZE(v); int ySize = YSIZE(v); int xshift = (xSize / 2); int yshift = (ySize / 2); if (!forward) { xshift = -xshift; yshift = -yshift; } size_t image_size = xSize*ySize; size_t isize2 = image_size/2; int blocks = ceilf((float)(image_size/(float)(2*CFTT_BLOCK_SIZE))); // for(int i=0; i isize2) pixel_end = isize2; CpuKernels::centerFFT_2D(batchSize, pixel_start, pixel_end, MULTIDIM_ARRAY(v), (size_t)xSize*ySize, xSize, ySize, xshift, yshift); } ); } else if ( v.getDim() == 3 ) { int batchSize = 1; int xSize = XSIZE(v); int ySize = YSIZE(v); int zSize = ZSIZE(v); if(zSize>1) { int xshift = (xSize / 2); int yshift = (ySize / 2); int zshift = (zSize / 2); if (!forward) { xshift = -xshift; yshift = -yshift; zshift = -zshift; } size_t image_size = xSize*ySize*zSize; size_t isize2 = image_size/2; int block =ceilf((float)(image_size/(float)(2*CFTT_BLOCK_SIZE))); // for(int i=0; i isize2) pixel_end = isize2; CpuKernels::centerFFT_3D(batchSize, pixel_start, pixel_end, MULTIDIM_ARRAY(v), (size_t)xSize*ySize*zSize, xSize, ySize, zSize, xshift, yshift, zshift); } ); } else { int xshift = (xSize / 2); int yshift = (ySize / 2); if (!forward) { xshift = -xshift; yshift = -yshift; } size_t image_size = xSize*ySize; size_t isize2 = image_size/2; int blocks = ceilf((float)(image_size/(float)(2*CFTT_BLOCK_SIZE))); // for(int i=0; i isize2) pixel_end = isize2; CpuKernels::centerFFT_2D(batchSize, pixel_start, pixel_end, MULTIDIM_ARRAY(v), (size_t)xSize*ySize, xSize, ySize, xshift, yshift); } ); } } else { v.printShape(); REPORT_ERROR("CenterFFT ERROR: Dimension should be 1, 2 or 3"); } #endif // FAST_CENTERFFT } // Window an FFTW-centered Fourier-transform to a given size template void windowFourierTransform(MultidimArray &in, MultidimArray &out, long int newdim) { // Check size of the input array if (YSIZE(in) > 1 && YSIZE(in)/2 + 1 != XSIZE(in)) REPORT_ERROR("windowFourierTransform ERROR: the Fourier transform should be of an image with equal sizes in all dimensions!"); long int newhdim = newdim/2 + 1; // If same size, just return input // Sjors 5dec2017: only check for xdim is not enough, even/off ydim leaves ambiguity for dim>1 if ( newdim == YSIZE(in) && newhdim == XSIZE(in) ) { out = in; return; } // Otherwise apply a windowing operation // Initialise output array switch (in.getDim()) { case 1: out.initZeros(newhdim); break; case 2: out.initZeros(newdim, newhdim); break; case 3: out.initZeros(newdim, newdim, newhdim); break; default: REPORT_ERROR("windowFourierTransform ERROR: dimension should be 1, 2 or 3!"); } if (newhdim > XSIZE(in)) { long int max_r2 = (XSIZE(in) -1) * (XSIZE(in) - 1); FOR_ALL_ELEMENTS_IN_FFTW_TRANSFORM(in) { // Make sure windowed FT has nothing in the corners, otherwise we end up with an asymmetric FT! if (kp*kp + ip*ip + jp*jp <= max_r2) FFTW_ELEM(out, kp, ip, jp) = FFTW_ELEM(in, kp, ip, jp); } } else { FOR_ALL_ELEMENTS_IN_FFTW_TRANSFORM(out) { FFTW_ELEM(out, kp, ip, jp) = FFTW_ELEM(in, kp, ip, jp); } } } // Same as above, acts on the input array directly template void windowFourierTransform(MultidimArray &V, long int newdim) { // Check size of the input array if (YSIZE(V) > 1 && YSIZE(V)/2 + 1 != XSIZE(V)) REPORT_ERROR("windowFourierTransform ERROR: the Fourier transform should be of an image with equal sizes in all dimensions!"); long int newhdim = newdim/2 + 1; // If same size, just return input // Sjors 5dec2017: only check for xdim is not enough, even/off ydim leaves ambiguity for dim>1 if ( newdim == YSIZE(V) && newhdim == XSIZE(V) ) { return; } MultidimArray tmp; windowFourierTransform(V, tmp, newdim); V.moveFrom(tmp); } // A resize operation in Fourier-space (i.e. changing the sampling of the Fourier Transform) by windowing in real-space // If recenter=true, the real-space array will be recentered to have its origin at the origin of the FT template void resizeFourierTransform(MultidimArray &in, MultidimArray &out, long int newdim, bool do_recenter=true) { // Check size of the input array if (YSIZE(in) > 1 && YSIZE(in)/2 + 1 != XSIZE(in)) REPORT_ERROR("windowFourierTransform ERROR: the Fourier transform should be of an image with equal sizes in all dimensions!"); long int newhdim = newdim/2 + 1; long int olddim = 2* (XSIZE(in) - 1); // If same size, just return input if (newhdim == XSIZE(in)) { out = in; return; } // Otherwise apply a windowing operation MultidimArray Fin; MultidimArray Min; FourierTransformer transformer; long int x0, y0, z0, xF, yF, zF; x0 = y0 = z0 = FIRST_XMIPP_INDEX(newdim); xF = yF = zF = LAST_XMIPP_INDEX(newdim); // Initialise output array switch (in.getDim()) { case 1: Min.reshape(olddim); y0=yF=z0=zF=0; break; case 2: Min.reshape(olddim, olddim); z0=zF=0; break; case 3: Min.reshape(olddim, olddim, olddim); break; default: REPORT_ERROR("resizeFourierTransform ERROR: dimension should be 1, 2 or 3!"); } // This is to handle RFLOAT-valued input arrays Fin.reshape(ZSIZE(in), YSIZE(in), XSIZE(in)); FOR_ALL_DIRECT_ELEMENTS_IN_MULTIDIMARRAY(in) { DIRECT_MULTIDIM_ELEM(Fin, n) = DIRECT_MULTIDIM_ELEM(in, n); } transformer.inverseFourierTransform(Fin, Min); Min.setXmippOrigin(); if (do_recenter) CenterFFT(Min, false); // Now do the actual windowing in real-space Min.window(z0, y0, x0, zF, yF, xF); Min.setXmippOrigin(); // If upsizing: mask the corners to prevent aliasing artefacts if (newdim > olddim) { FOR_ALL_ELEMENTS_IN_ARRAY3D(Min) { if (k*k + i*i + j*j > olddim*olddim/4) { A3D_ELEM(Min, k, i, j) = 0.; } } } // Recenter FFT back again if (do_recenter) CenterFFT(Min, true); // And do the inverse Fourier transform transformer.clear(); transformer.FourierTransform(Min, out); } /** Fourier-Ring-Correlation between two multidimArrays using FFT * From precalculated Fourier Transforms * Simpler I/O than above. */ void getFSC(MultidimArray &FT1, MultidimArray &FT2, MultidimArray &fsc); /** Fourier-Ring-Correlation between two multidimArrays using FFT * @ingroup FourierOperations * Simpler I/O than above. */ void getFSC(MultidimArray & m1, MultidimArray & m2, MultidimArray &fsc); void getAmplitudeCorrelationAndDifferentialPhaseResidual(MultidimArray &FT1, MultidimArray &FT2, MultidimArray &acorr, MultidimArray &dpr); void getAmplitudeCorrelationAndDifferentialPhaseResidual(MultidimArray &m1, MultidimArray &m2, MultidimArray &acorr, MultidimArray &dpr); void getCosDeltaPhase(MultidimArray &FT1, MultidimArray &FT2, MultidimArray &cosPhi); // Get precalculated AB-matrices for on-the-fly shift calculations (without tabulated sine and cosine) void getAbMatricesForShiftImageInFourierTransform(MultidimArray &in, MultidimArray &out, RFLOAT oridim, RFLOAT shift_x, RFLOAT shift_y, RFLOAT shift_z = 0.); void shiftImageInFourierTransformWithTabSincos(MultidimArray &in, MultidimArray &out, RFLOAT oridim, long int newdim, TabSine& tabsin, TabCosine& tabcos, RFLOAT xshift, RFLOAT yshift, RFLOAT zshift = 0.); // Shift an image through phase-shifts in its Fourier Transform (without tabulated sine and cosine) // Note that in and out may be the same array, in that case in is overwritten with the result // if oridim is in pixels, xshift, yshift and zshift should be in pixels as well! // or both can be in Angstroms void shiftImageInFourierTransform(MultidimArray &in, MultidimArray &out, RFLOAT oridim, RFLOAT shift_x, RFLOAT shift_y, RFLOAT shift_z = 0.); #define POWER_SPECTRUM 0 #define AMPLITUDE_SPECTRUM 1 #define AMPLITUDE_MAP 0 #define PHASE_MAP 1 /** Get the amplitude or power_class spectrum of the map in Fourier space. * @ingroup FourierOperations i.e. the radial average of the (squared) amplitudes of all Fourier components */ void getSpectrum(MultidimArray &Min, MultidimArray &spectrum, int spectrum_type=POWER_SPECTRUM); /** Divide the input map in Fourier-space by the spectrum provided. * @ingroup FourierOperations If leave_origin_intact==true, the origin pixel will remain untouched */ void divideBySpectrum(MultidimArray &Min, MultidimArray &spectrum, bool leave_origin_intact=false); /** Multiply the input map in Fourier-space by the spectrum provided. * @ingroup FourierOperations If leave_origin_intact==true, the origin pixel will remain untouched */ void multiplyBySpectrum(MultidimArray &Min, MultidimArray &spectrum, bool leave_origin_intact=false); /** Perform a whitening of the amplitude/power_class spectrum of a 3D map * @ingroup FourierOperations If leave_origin_intact==true, the origin pixel will remain untouched */ void whitenSpectrum(MultidimArray &Min, MultidimArray &Mout, int spectrum_type=AMPLITUDE_SPECTRUM, bool leave_origin_intact=false); /** Adapts Min to have the same spectrum as spectrum_ref * @ingroup FourierOperations If only_amplitudes==true, the amplitude rather than the power_class spectrum will be equalized */ void adaptSpectrum(MultidimArray &Min, MultidimArray &Mout, const MultidimArray &spectrum_ref, int spectrum_type=AMPLITUDE_SPECTRUM, bool leave_origin_intact=false); /** Kullback-Leibler divergence */ RFLOAT getKullbackLeiblerDivergence(MultidimArray &Fimg, MultidimArray &Fref, MultidimArray &sigma2, MultidimArray &p_i, MultidimArray &q_i, int highshell = -1, int lowshell = -1); // Resize a map by windowing it's Fourier Transform void resizeMap(MultidimArray &img, int newsize); // Apply a B-factor to a map (given it's Fourier transform) void applyBFactorToMap(MultidimArray &FT, int ori_size, RFLOAT bfactor, RFLOAT angpix); // Apply a B-factor to a map (given it's real-space array) void applyBFactorToMap(MultidimArray &img, RFLOAT bfactor, RFLOAT angpix); // Apply a Laplacian-of-Gaussian filter to a map (given it's Fourier transform) void LoGFilterMap(MultidimArray &FT, int ori_size, RFLOAT sigma, RFLOAT angpix); // Apply a Laplacian-of-Gaussian filter to a map (given it's real-space array) void LoGFilterMap(MultidimArray &img, RFLOAT sigma, RFLOAT angpix); // Low-pass filter a map (given it's Fourier transform) void lowPassFilterMap(MultidimArray &FT, int ori_size, RFLOAT low_pass, RFLOAT angpix, int filter_edge_width = 2, bool do_highpass_instead = false); // Low-pass and high-pass filter a map (given it's real-space array) void lowPassFilterMap(MultidimArray &img, RFLOAT low_pass, RFLOAT angpix, int filter_edge_width = 2); void highPassFilterMap(MultidimArray &img, RFLOAT low_pass, RFLOAT angpix, int filter_edge_width = 2); // Directional filter a map (given it's Fourier transform) void directionalFilterMap(MultidimArray &FT, int ori_size, RFLOAT low_pass, RFLOAT angpix, std::string axis = "x", int filter_edge_width = 2); void directionalFilterMap(MultidimArray &img, RFLOAT low_pass, RFLOAT angpix, std::string axis = "x", int filter_edge_width = 2); /* * Beamtilt x and y are given in mradians * Wavelength in Angstrom, Cs in mm * Phase shifts caused by the beamtilt will be calculated and applied to Fimg */ void selfApplyBeamTilt(MultidimArray &Fimg, RFLOAT beamtilt_x, RFLOAT beamtilt_y, RFLOAT wavelength, RFLOAT Cs, RFLOAT angpix, int ori_size); /* same as above, but for the anisotropic coma model*/ void selfApplyBeamTilt(MultidimArray &Fimg, RFLOAT beamtilt_x, RFLOAT beamtilt_y, RFLOAT beamtilt_xx, RFLOAT beamtilt_xy, RFLOAT beamtilt_yy, RFLOAT wavelength, RFLOAT Cs, RFLOAT angpix, int ori_size); void applyBeamTilt(const MultidimArray &Fin, MultidimArray &Fout, RFLOAT beamtilt_x, RFLOAT beamtilt_y, RFLOAT wavelength, RFLOAT Cs, RFLOAT angpix, int ori_size); void padAndFloat2DMap(const MultidimArray &v, MultidimArray &out, int factor = 2); void amplitudeOrPhaseMap(const MultidimArray &v, MultidimArray &, int output_map_type); void helicalLayerLineProfile(const MultidimArray &v, std::string title, std::string fn_eps); void generateBinaryHelicalFourierMask(MultidimArray &mask, std::vector exclude_begin, std::vector exclude_end, RFLOAT angpix); template void cropInFourierSpace(MultidimArray &Fref, MultidimArray &Fbinned) { const int nfx = XSIZE(Fref), nfy = YSIZE(Fref); const int new_nfx = XSIZE(Fbinned), new_nfy = YSIZE(Fbinned); const int half_new_nfy = new_nfy / 2; if (new_nfx > nfx || new_nfy > nfy) REPORT_ERROR("Invalid size given to cropInFourierSpace"); for (int y = 0; y < half_new_nfy; y++) { for (int x = 0; x < new_nfx; x++) { DIRECT_A2D_ELEM(Fbinned, y, x) = DIRECT_A2D_ELEM(Fref, y, x); } } for (int y = half_new_nfy; y < new_nfy; y++) { for (int x = 0; x < new_nfx; x++) { DIRECT_A2D_ELEM(Fbinned, y, x) = DIRECT_A2D_ELEM(Fref, nfy - new_nfy + y, x); } } } #endif // __RELIONFFTW_H relion-3.1.3/src/filename.cpp000066400000000000000000000375271411340063500160740ustar00rootroot00000000000000/*************************************************************************** * * Author: "Sjors H.W. Scheres" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ /*************************************************************************** * Authors: Carlos Oscar S. Sorzano (coss@cnb.csic.es) * * * Unidad de Bioinformatica of Centro Nacional de Biotecnologia , CSIC * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA * 02111-1307 USA * * All comments concerning this program package may be sent to the * e-mail address 'xmipp@cnb.csic.es' ***************************************************************************/ #include "src/filename.h" #include "src/funcs.h" #include // Constructor with root, number and extension ............................. void FileName::compose(const std::string &str, long int no, const std::string &ext, int numberlength) { *this = (FileName) str; if (no != -1) { char aux_str[numberlength+1]; std::string tmp_fileformat; tmp_fileformat = (std::string) "%0" + integerToString(numberlength)+ (std::string)"d"; sprintf(aux_str, tmp_fileformat.c_str(), no); *this += aux_str; } if (ext != "") *this += (std::string)"." + ext; } // Constructor: prefix number and filename, mainly for selfiles.. void FileName::compose(long int no , const std::string &str, int numberlength) { *this = (FileName) str; if (no != -1) { char aux_str[numberlength+2]; std::string tmp_fileformat; tmp_fileformat = (std::string) "%0" + integerToString(numberlength)+ (std::string)"d@"; sprintf(aux_str, tmp_fileformat.c_str(), no); *this = aux_str + str; } else *this = str; } // Is in stack ............................................................ bool FileName::isInStack() const { return find("@") != std::string::npos; } // Decompose .............................................................. void FileName::decompose(long int &no, std::string &str) const { size_t idx = find('@'); if (idx != std::string::npos) { bool ok = true; for (int i = 0; i < idx; i++) { if ((*this)[i] < '0' || (*this)[i] > '9') { ok = false; break; } } if (ok) { no = textToInteger(substr(0, idx)); str = substr(idx + 1,length() - idx); return; } } no = -1; str = *this; } // Convert to lower case characters ......................................... FileName FileName::toLowercase() const { FileName result = *this; for(unsigned int i=0;i -1) return true; else return false; } // Get substring before first instance of str FileName FileName::beforeFirstOf(const std::string& str) const { int point = find(str); if (point > -1) return substr(0, point); else return *this; } // Get substring before last instance of str FileName FileName::beforeLastOf(const std::string& str) const { int point = rfind(str); if (point > -1) return substr(0, point); else return *this; } // Get substring after first instance of str FileName FileName::afterFirstOf(const std::string& str) const { int point = find(str); if (point > -1) return substr(point + str.length()); else return *this; } // Get substring after last instance of str FileName FileName::afterLastOf(const std::string& str) const { int point = rfind(str); if (point > -1) return substr(point + str.length()); else return *this; } // Get the base name of a filename ......................................... std::string FileName::getBaseName() const { std::string basename = ""; std::string myname = *this; int myindex = 0; for (int p = myname.size() - 1; p >= 0; p--) { if (myname[p] == '/') { myindex = p + 1; break; } } for (int p = myindex; p < myname.size(); p++) { if (myname[p] != '.') basename += myname[p]; else break; } return basename; } // Get the extension of a filename ......................................... std::string FileName::getExtension() const { int last_point = find_last_of("."); if (last_point == -1) return ""; else return substr(last_point + 1); } // Init random ............................................................. void FileName::initRandom(int length) { randomize_random_generator(); *this = ""; for (int i = 0; i < length; i++) *this += 'a' + FLOOR(rnd_unif(0, 26)); } // Add at beginning ........................................................ FileName FileName::addPrefix(const std::string &prefix) const { FileName retval = *this; int skip_directories = find_last_of("/") + 1; return retval.insert(skip_directories, prefix); } // Add at the end .......................................................... FileName FileName::addExtension(const std::string &ext) const { if (ext == "") return *this; else { FileName retval = *this; retval = retval.append((std::string)"." + ext); return retval; } } // Remove last extension ................................................... FileName FileName::withoutExtension() const { FileName retval = *this; return retval.substr(0, rfind(".")); } // Insert before extension ................................................. FileName FileName::insertBeforeExtension(const std::string &str) const { return withoutExtension() + str + "." + getExtension(); } // Remove an extension wherever it is ...................................... FileName FileName::removeExtension(const std::string &ext) const { int first = find((std::string)"." + ext); if (first == -1) return *this; else { FileName retval = *this; return retval.erase(first, 1 + ext.length()); } } // Remove all extensions.................................................... FileName FileName::removeAllExtensions() const { int first = rfind("/"); first = find(".", first + 1); if (first == -1) return *this; else return substr(0, first); } // Replace all substrings void FileName::replaceAllSubstrings(std::string from, std::string to) { FileName result; size_t start_pos = 0; while((start_pos = (*this).find(from, start_pos)) != std::string::npos) { (*this).replace(start_pos, from.length(), to); start_pos += to.length(); // Handles case where 'to' is a substring of 'from' } } FileName FileName::getFileFormat() const { int first; FileName result; if (find("#") != std::string::npos) return "raw"; else if ( (first = rfind(":"))!=std::string::npos) result = substr(first + 1) ; else if ( (first = rfind("."))!=std::string::npos) result = substr(first + 1); else result=""; return result.toLowercase(); } FileName FileName::removeFileFormat() const { if ( find("#", 0) > -1 ) REPORT_ERROR("Not implemented for raw data"); size_t found=rfind(":"); if (found!=std::string::npos) return substr(0, found); return *this; } bool FileName::isStarFile() const { //file names containing @, : or % are not metadatas size_t found=this->find('@'); if (found!=std::string::npos) return false; // Allow :star to indicate that file really is a STAR file! //found=this->find(':'); //if (found!=std::string::npos) // return false; found=this->find('#'); if (found!=std::string::npos) return false; FileName ext = getFileFormat(); if (ext=="star") { return true; } else { return false; } } // Substitute one extension by other ....................................... FileName FileName::substituteExtension(const std::string &ext1, const std::string &ext2) const { int first = find((std::string)"." + ext1); if (first == -1) return *this; else { FileName retval = *this; return retval.replace(first, 1 + ext1.length(), (std::string)"." + ext2); } } // Remove a substring ...................................................... FileName FileName::without(const std::string &str) const { if (str.length() == 0) return *this; int pos = find(str); if (pos == -1) return *this; else { FileName retval = *this; return retval.erase(pos, str.length()); } } // Remove until prefix ..................................................... FileName FileName::removeUntilPrefix(const std::string &str) const { if (str.length() == 0) return *this; int pos = find(str); if (pos == -1) return *this; else { FileName retval = *this; return retval.erase(0, pos + str.length()); } } // Remove directories ...................................................... FileName FileName::removeDirectories(int keep) const { int last_slash = rfind("/"); int tokeep = keep; while (tokeep > 0) { last_slash = rfind("/", last_slash - 1); tokeep--; } if (last_slash == -1) return *this; else return substr(last_slash + 1, length() - last_slash); } size_t FileName::getFileSize() const { struct stat filestatus; stat( this->c_str(), &filestatus ); return filestatus.st_size; } int FileName::globFiles(std::vector &files, bool do_clear) const { if (do_clear) files.clear(); glob_t glob_result; glob((*this).c_str(), GLOB_TILDE, NULL, &glob_result); for(unsigned long int i = 0; i < glob_result.gl_pathc; ++i) { files.push_back(std::string(glob_result.gl_pathv[i])); } globfree(&glob_result); return files.size(); } bool FileName::getTheOtherHalf(FileName &fn_out) const { FileName ret = this->afterLastOf("/"); if (ret.contains("half1")) ret.replaceAllSubstrings("half1", "half2"); else if (ret.contains("half2")) ret.replaceAllSubstrings("half2", "half1"); else return false; if (this->contains("/")) ret = this->beforeLastOf("/") + "/" + ret; fn_out = ret; return true; } bool FileName::validateCharactersStrict(bool do_allow_double_dollar) const { FileName myname = *this; if (do_allow_double_dollar) { myname.replaceAllSubstrings("$$",""); } for (int p = 0, len = myname.size(); p < len; p++) { char c = myname[p]; if (!(c >= '0' && c <= '9') && // 0-9 !(c >= 'A' && c <= 'Z') && // A-Z !(c >= 'a' && c <= 'z') && // a-z c != '-') return false; } return true; } bool exists(const FileName &fn) { struct stat buffer; return (stat (fn.c_str(), &buffer) == 0); } void touch(const FileName &fn) { std::ofstream fh; fh.open(fn.c_str(), std::ios::out); if (!fh) REPORT_ERROR( (std::string)"Filename::touch ERROR: Cannot open file: " + fn); fh.close(); } void copy(const FileName &fn_src, const FileName &fn_dest) { std::ifstream srce( fn_src.c_str(), std::ios::binary ) ; std::ofstream dest( fn_dest.c_str(), std::ios::binary ) ; dest << srce.rdbuf() ; } void move(const FileName &fn_src, const FileName &fn_dest) { copy(fn_src, fn_dest); remove(fn_src.c_str()); } int mktree(const FileName &fn_dir, mode_t mode) { std::string s = fn_dir; size_t pre=0,pos; std::string dir; int mdret; // force trailing / so we can handle everything in loop if(s[s.size()-1]!='/') s+='/'; while((pos=s.find_first_of('/',pre))!=std::string::npos) { dir=s.substr(0,pos++); pre=pos; // if leading / first time is 0 length if (dir.size() == 0) continue; if ((mdret = mkdir(dir.c_str(), mode)) && errno != EEXIST) { return mdret; } } return mdret; } bool decomposePipelineFileName(FileName fn_in, FileName &fn_pre, FileName &fn_jobnr, FileName &fn_post) { size_t slashpos = 0; int i = 0; while (slashpos < fn_in.length()) { i++; slashpos = fn_in.find("/", slashpos+1); if (fn_in[slashpos+1]=='j' && fn_in[slashpos+2]=='o' && fn_in[slashpos+3]=='b' && std::isdigit(fn_in[slashpos+4]) && std::isdigit(fn_in[slashpos+5]) && std::isdigit(fn_in[slashpos+6])) { // find the second slash size_t slashpos2 = fn_in.find("/", slashpos+6); if (slashpos2 == std::string::npos) slashpos2 = fn_in.length() - 1; fn_pre = fn_in.substr(0, slashpos+1); // this has the first slash fn_jobnr = fn_in.substr(slashpos+1, slashpos2-slashpos); // this has the second slash fn_post = fn_in.substr(slashpos2+1); // this has the rest return true; } if (i>20) REPORT_ERROR("decomposePipelineFileName: BUG or found more than 20 directories deep structure for pipeline filename: " + fn_in); } // This was not a pipeline filename fn_pre=""; fn_jobnr=""; fn_post=fn_in; return false; } bool decomposePipelineSymlinkName(FileName fn_in, FileName &fn_pre, FileName &fn_jobnr, FileName &fn_post) { bool dont_expand = false; // Symlinks are always in the second directory. (e.g. Refine3D/JOB_ALIAS_AS_LINK/...) size_t slashpos = 0; int i = 0; while (slashpos < fn_in.length()) { i++; slashpos = fn_in.find("/", slashpos+1); if (i==2) break; } // We ignore links in the first directory (link/XXX) if (i != 2) dont_expand = true; FileName second_dir = fn_in.substr(0, slashpos); // std::cout << second_dir << std::endl; // We also ignore jobXXX even if it is a symbolic link. // e.g. MotionCorr/job003 ==> /path/to/online_processing/MotionCorr/job003 // This is safe because we don't allow an alias name to start from 'job'. if (second_dir.afterLastOf("/").substr(0, 3) == "job") dont_expand = true; // Check whether this is a symbol link char linkname[4096]; ssize_t len_max = sizeof(linkname) - 1; ssize_t len = ::readlink(second_dir.c_str(), linkname, len_max); if (len == len_max) REPORT_ERROR("Too long path in decomposePipelineSymlinkName."); if (!dont_expand && len != -1) { // This is a symbolic link! if (linkname[len - 1] == '/') linkname[len - 1] = '\0'; // remove trailing '/' linkname[len] = '\0'; FileName fn_link = std::string(linkname); // TODO: FIXME: This condition is still not perfect. For example, // Micrograph/mic001.mrc -> ../../../storage/mic001.mrc breaks the code. // Meanwhile one can circumvent this case by using an absolute path in the symlink. if (fn_link.substr(0, 3) == "../") { fn_link = fn_link.substr(3) + fn_in.substr(slashpos); // std::cout << "fn_link=" << fn_link << std::endl; return decomposePipelineFileName(fn_link, fn_pre, fn_jobnr, fn_post); } else { fn_pre = fn_jobnr = ""; fn_post = fn_in; return false; } } // If it is not a symlink, just decompose the filename return decomposePipelineFileName(fn_in, fn_pre, fn_jobnr, fn_post); } FileName getOutputFileWithNewUniqueDate(FileName fn_input, FileName fn_new_outputdir) { FileName fn_pre, fn_jobnr, fn_post; decomposePipelineFileName(fn_input, fn_pre, fn_jobnr, fn_post); return fn_new_outputdir + fn_post; } relion-3.1.3/src/filename.h000066400000000000000000000400731411340063500155270ustar00rootroot00000000000000/*************************************************************************** * * Author: "Sjors H.W. Scheres" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ /*************************************************************************** * Authors: Carlos Oscar S. Sorzano (coss@cnb.csic.es) * * * Unidad de Bioinformatica of Centro Nacional de Biotecnologia , CSIC * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA * 02111-1307 USA * * All comments concerning this program package may be sent to the * e-mail address 'xmipp@cnb.csic.es' ***************************************************************************/ #ifndef FILENAME_H_ #define FILENAME_H_ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "src/numerical_recipes.h" #include "src/macros.h" #include "src/error.h" #include "src/strings.h" #define FILENAMENUMBERLENGTH 6 //@{ /** Filenames. * * This class allows you a lot of usual and common manipulations with filenames. * See filename conventions for a detailed explanation of the Filenames dealed * here, although most of the functions work with the more general model * "name.extension" */ class FileName: public std::string { public: /// @name Filename constructors /// @{ /** Empty constructor * * The empty constructor is inherited from the string class, so an empty * FileName is equal to "". * * @code * FileName fn_blobs; * @endcode */ FileName(): std::string("") {} /* Destructor */ ~FileName() {} /** Constructor from string * * The constructor from a string allows building complex expressions based * on the string class. Notice that in the following example the type * casting to string is very important, if not, the operation is just a * pointer movement instead of a string concatenation. * * @code * FileName fn_blobs((std::string) "art00001" + ".blobs"); * @endcode */ FileName(const std::string& str): std::string(str) {} /** Constructor from char* */ FileName(const char* str): std::string(str) {} /** Copy constructor */ FileName(const FileName& fn): std::string(fn) {} /** Assignment constructor */ FileName& operator=(const FileName& op) { return (FileName&) std::string::operator=(op); } /** Constructor from root, number and extension * * The number and extension are optional. * * @code * FileName fn_proj("g1ta000001.xmp"); // fn_proj = "g1ta000001.xmp" * FileName fn_proj("g1ta",1,"xmp"); // fn_proj = "g1ta000001.xmp" * FileName fn_proj("g1ta",1); // fn_proj = "g1ta000001" * @endcode */ FileName(const char* str, long int no, const std::string& ext = "") { compose(str, no, ext); } /** Constructor from root and extension * * None of the parameters is optional * * @code * FileName fn_proj("g1ta00001", "xmp"); // fn_proj = "g1ta00001.xmp" * @endcode */ FileName(const char* str, const std::string& ext): std::string(str + ext) {} //@} /// @name Composing/Decomposing the filename /// @{ /** Compose from root, number and extension * * @code * fn_proj.compose("g1ta", 1, "xmp"); // fn_proj = "g1ta000001.xmp" * @endcode */ void compose(const std::string& str, long int no, const std::string& ext, int numberlength = FILENAMENUMBERLENGTH); /** Prefix with number @. Mainly for selfiles * * @code * fn_proj.compose(1,"g1ta.xmp"); // fn_proj = "000001@g1ta.xmp" * @endcode */ void compose(long int no, const std::string& str, int numberlength = FILENAMENUMBERLENGTH); /** True if this filename belongs to a stack */ bool isInStack() const; /** Decompose filenames with @. Mainly from selfiles * * @code * fn_proj.decompose(no,filename); // fn_proj = "000001@g1ta000001.xmp" * // no=1 * // filename = "g1ta000001.xmp" * @endcode */ void decompose(long int &no, std::string& str) const; /** Get the base name from a filename */ std::string getBaseName() const; /** Get the last extension from filename * * The extension is returned without the dot. If there is no extension "" is * returned. * * @code * std::string ext = fn_proj.get_extension(); * @endcode */ std::string getExtension() const; /** Get image format identifier (as in Bsoft) * * @code * fn_proj = "g1ta00001.xmp"; * fn_proj = fn_proj.get_file_format(); // fn_proj == "xmp" * fn_proj = "g1ta00001.nor:spi"; * fn_proj = fn_proj.get_file_format(); // fn_proj == "spi" * fn_proj = "input.file#d=f#x=120,120,55#h=1024"; * fn_proj = fn_proj.get_file_format(); // fn_proj == "raw" * @endcode */ FileName getFileFormat() const; /** Random name * * Generate a random name of the desired length. */ void initRandom(int length); //@} ///@name Filename utilities //@{ /** Change all characters for lowercases * * @code * FileName fn_proj("g1tA00001"); * fn_proj = fn_proj.to_lowercase(); // fn_proj = "g1ta00001" * @endcode */ FileName toLowercase() const; /** Change all characters for uppercases * * @code * FileName fn_proj("g1tA00001"); * fn_proj = fn_proj.to_uppercase(); // fn_proj = "G1Ta00001" * @endcode */ FileName toUppercase() const; /** Check whether the filename contains the argument substring * * @code * FileName fn_proj("g1ta00001.raw#d=f"); * if (fn_proj.contains("raw) ) // true * @endcode */ bool contains(const std::string& str) const; /** Return substring before first instance of argument (as in Bsoft) * * @code * FileName fn_proj("g1ta00001.raw#d=f"); * fn_proj = fn_proj.before_first_of("#"); // fn_proj = "g1ta00001.raw" * @endcode */ FileName beforeFirstOf(const std::string& str) const; /** Return substring before last instance of argument (as in Bsoft) * * @code * FileName fn_proj("g1ta00001.raw#d=f"); * fn_proj = fn_proj.before_last_of("#"); // fn_proj = "g1ta00001.raw" * @endcode */ FileName beforeLastOf(const std::string& str) const; /** Return substring after first instance of argument (as in Bsoft) * * @code * FileName fn_proj("g1ta00001.raw#d=f"); * fn_proj = fn_proj.after_first_of("#"); // fn_proj = "d=f" * @endcode */ FileName afterFirstOf(const std::string& str) const; /** Return substring after last instance of argument (as in Bsoft) * * @code * FileName fn_proj("g1ta00001.raw#d=f"); * fn_proj = fn_proj.after_last_of("#"); // fn_proj = "d=f" * @endcode */ FileName afterLastOf(const std::string& str) const; /** Add string at the beginning * * If there is a path then the prefix is added after the path. * * @code * fn_proj = "imgs/g1ta00001"; * fn_proj.add_prefix("h"); // fn_proj == "imgs/hg1ta00001" * * fn_proj = "g1ta00001"; * fn_proj.add_prefix("h"); // fn_proj == "hg1ta00001" * @endcode */ FileName addPrefix(const std::string& prefix) const; /** Add extension at the end. * * The "." is added. If teh input extension is "" then the same name is * returned, with nothing added. * * @code * fn_proj = "g1ta00001"; * fn_proj.add_extension("xmp"); // fn_proj == "g1ta00001.xmp" * @endcode */ FileName addExtension(const std::string& ext) const; /** Remove last extension, if any * * @code * fn_proj = "g1ta00001.xmp"; * fn_proj = fn_proj.without_extension(); // fn_proj == "g1ta00001" * * fn_proj = "g1ta00001"; * fn_proj = fn_proj.without_extension(); // fn_proj == "g1ta00001" * @endcode */ FileName withoutExtension() const; /** Insert before first extension * * If there is no extension, the insertion is performed at the end. * * @code * fn_proj = "g1ta00001.xmp"; * fn_proj = fn_proj.insert_before_extension("pp"); * // fn_proj == "g1ta00001pp.xmp" * * fn_proj = "g1ta00001"; * fn_proj = fn_proj.insert_before_extension("pp"); * // fn_proj=="g1ta00001pp" * @endcode */ FileName insertBeforeExtension(const std::string& str) const; /** Remove a certain extension * * It doesn't matter if there are several extensions and the one to be * removed is in the middle. If the given extension is not present in the * filename nothing is done. * * @code * fn_proj = "g1ta00001.xmp.bak"; * fn_proj = fn_proj.remove_extension("xmp"); * // fn_proj == "g1ta00001.bak" * @endcode */ FileName removeExtension(const std::string& ext) const; /** Remove all extensions */ FileName removeAllExtensions() const; /** * Replace all substrings */ void replaceAllSubstrings(std::string from, std::string to); /** Remove file format * @code * fn_proj = "g1ta00001.xmp"; * fn_proj = fn_proj.get_file_format(); // fn_proj == "xmp" * fn_proj = "g1ta00001.nor:spi"; * fn_proj = fn_proj.get_file_format(); // fn_proj == "spi" * fn_proj = "input.file#d=f#x=120,120,55#h=1024"; * fn_proj = fn_proj.get_file_format(); // fn_proj == "raw" * @endcode */ FileName removeFileFormat() const; /** Is this file a MetaData file? * Returns false if the filename contains "@", ":" or "#" * Returns true if the get_file_format extension == "star" */ bool isStarFile() const; /** Clean image FileName (as in Bsoft) * * @code * fn_proj = "g1ta00001.xmp"; * fn_proj = fn_proj.get_file_format(); // fn_proj == "g1ta00001.xmp" * fn_proj = "g1ta00001.nor:spi"; * fn_proj = fn_proj.clean_image_name(); // fn_proj == "g1ta00001.nor" * fn_proj = "input.file#d=f#x=120,120,55#h=1024"; * fn_proj = fn_proj.clean_image_name(); // fn_proj == "input.file" * @endcode */ //FileName clean_image_name() const; /** Substitute ext1 by ext2 * * It doesn't matter if ext1 is in the middle of several extensions. If ext1 * is not present in the filename nothing is done. * * @code * fn_proj = "g1ta00001.xmp.bak"; * fn_proj = fn_proj.substitute_extension("xmp", "bor"); * // fn_proj == "g1ta00001.bor.bak" * * fn_proj = "g1ta00001.xmp.bak"; * fn_proj = fn_proj.substitute_extension("tob", "bor"); * // fn_proj=="g1ta00001.xmp.bak" * @endcode */ FileName substituteExtension(const std::string& ext1, const std::string& ext2) const; /** Without a substring * * If the substring is not present the same FileName is returned, if it is * there the substring is removed. */ FileName without(const std::string& str) const; /** Remove until prefix * * Remove the starting string until the given prefix, inclusively. For * instance /usr/local/data/ctf-image00001.fft with ctf- yields * image00001.fft. If the prefix is not found nothing is done. */ FileName removeUntilPrefix(const std::string& str) const; /** Remove all directories * * Or if keep>0, then keep the lowest keep directories */ FileName removeDirectories(int keep = 0) const; /* * Gets the filesize (in bytes) */ size_t getFileSize() const; // Get the other half map by swapping half1 and half2 bool getTheOtherHalf(FileName &fn_out) const; bool validateCharactersStrict(bool do_allow_double_dollar = false) const; /** From a wild-card containing filename get a vector with all existing filenames, * return number of existing filenames * If do_clear, the output vector will be clear when starting, when false, files will just be added to the vector * */ int globFiles(std::vector &files, bool do_clear = true) const; //@} }; /** This class is used for comparing filenames. * * Example: "g0ta00001.xmp" is less than "g0ta00002.xmp" * * This class is needed to define a std::map as * map myMap; * * This function is not ported to Python. */ class FileNameComparison { public: inline bool operator ()(const FileName &fn1, const FileName &fn2) { return fn1 &files); /** This function raised an ERROR if the filename if not empty and if * the corresponding file does not exist. * This may be useful to have a better (killing) control on (mpi-regulated) jobs * * @code * exit_if_not_exists("control_file.txt"); * @endcode * * This function is not ported to Python. */ void exit_if_not_exists(const FileName &fn); /** Waits until the given filename has a stable size * * The stable size is defined as having the same size within two samples * separated by time_step (microsecs). * * An exception is throw if the file exists but its size cannot be stated. */ void wait_until_stable_size(const FileName& fn, unsigned long time_step = 250000); /** Write a zero filled file with the desired size. * * The file is written by blocks to speed up, you can modify the block size. * An exception is thrown if any error happens */ void create_empty_file(const FileName& fn, unsigned long long size, unsigned long long block_size = 102400); /** Returns the base directory of the Xmipp installation */ FileName xmippBaseDir(); //@} #endif /* FILENAME_H_ */ relion-3.1.3/src/flex_analyser.cpp000066400000000000000000000720001411340063500171310ustar00rootroot00000000000000/*************************************************************************** * * Author: "Sjors H.W. Scheres" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #include "flex_analyser.h" void FlexAnalyser::read(int argc, char **argv) { parser.setCommandLine(argc, argv); int gen_section = parser.addSection("General options"); fn_data = parser.getOption("--data", "The _data.star file with the orientations to be analysed", ""); fn_model = parser.getOption("--model", " The corresponding _model.star file with the refined model", ""); fn_bodies = parser.getOption("--bodies", "The corresponding star file with the definition of the bodies", ""); fn_out = parser.getOption("--o", "Output rootname", "analyse"); int model_section = parser.addSection("3D model options"); do_3dmodels = parser.checkOption("--3dmodels", "Generate a 3D model for each experimental particles"); size_3dmodels = textToInteger(parser.getOption("--size_3dmodels", "Output size of the 3D models (default is same as input particles)", "-1")); int pca_section = parser.addSection("PCA options"); do_PCA_orient = parser.checkOption("--PCA_orient", "Perform a principal components analysis on the multibody orientations"); do_generate_maps = parser.checkOption("--do_maps", "Generate maps along the principal components"); nr_components = textToInteger(parser.getOption("--k", "Number of principal components to generate maps for", "-1")); explain_variance = textToFloat(parser.getOption("--v", "Or use as many principal components to explain this fraction of variance (<0,1])", "0.75")); nr_maps_per_component = textToInteger(parser.getOption("--maps_per_movie", "Number of maps to use for the movie of each principal component", "10")); nr_bins = textToInteger(parser.getOption("--bins", "Number of bins in histograms of the eigenvalues for each principal component", "100")); select_eigenvalue = textToInteger(parser.getOption("--select_eigenvalue", "Output a selection particle.star file based on eigenvalues along this eigenvector", "-1")); select_eigenvalue_min = textToFloat(parser.getOption("--select_eigenvalue_min", "Minimum for eigenvalue to include particles in selection output star file", "-99999.")); select_eigenvalue_max = textToFloat(parser.getOption("--select_eigenvalue_max", "Maximum for eigenvalue to include particles in selection output star file", "99999.")); do_write_all_pca_projections = parser.checkOption("--write_pca_projections", "Write out a text file with all PCA projections for all particles"); // Initialise verb for non-parallel execution verb = textToInteger(parser.getOption("--verb", "Verbosity", "1")); // Check for errors in the command-line option if (parser.checkForErrors()) REPORT_ERROR("Errors encountered on the command line (see above), exiting..."); } void FlexAnalyser::initialise() { rescale_3dmodels = 1.0; if (verb > 0) std::cout << " Reading in data.star file ..." << std::endl; if (fn_data == "") REPORT_ERROR("ERROR: please provide the --data argument!"); else data.read(fn_data); if (verb > 0) std::cout << " Reading in model.star file ..." << std::endl; if (fn_model == "") REPORT_ERROR("ERROR: please provide the --model argument!"); else model.read(fn_model); if (fn_bodies != "") { if (verb > 0) std::cout << " Initialising bodies ..." << std::endl; model.initialiseBodies(fn_bodies, fn_out); } else { REPORT_ERROR("ERRPR: please specify the --bodies argument!"); } if (model.nr_bodies != data.nr_bodies) REPORT_ERROR("ERROR: Unequal number of bodies in bodies.star and data.star files!"); if (do_3dmodels && model.nr_bodies == 1) REPORT_ERROR("ERROR: --3dmodels option is only valid for multibody refinements."); // This creates a rotation matrix for (rot,tilt,psi) = (0,90,0) // It will be used to make all Abody orientation matrices relative to (0,90,0) instead of the more logical (0,0,0) // This is useful, as psi-priors are ill-defined around tilt=0, as rot becomes the same as -psi!! rotation3DMatrix(-90., 'Y', A_rot90, false); A_rot90T = A_rot90.transpose(); if (do_PCA_orient) { if (model.nr_bodies * 6 > data.numberOfParticles()) REPORT_ERROR("ERROR: there are not enough particles to perform PCA!"); if (do_generate_maps) { if (explain_variance > 1.) REPORT_ERROR("ERROR: --v should be expressed as a fraction, i.e. between 0 and 1."); if (explain_variance < 0. && nr_components < 0) REPORT_ERROR("ERROR: --v or --k should be larger than zero."); } // Calculate effect of 1 degree rotations and 1 pixel translations on the bodies, in order to normalise vectors for PCA norm_pca.clear(); FileName fn_weights = fn_out + "_pca_weights.dat"; std::ofstream f_weights(fn_weights); std::cout << " Normalisation weights for PCA columns are written to " << fn_weights << std::endl; f_weights << "body rot tilt psi offset" << std::endl; f_weights << std::scientific; for (int ibody = 0; ibody < model.nr_bodies; ibody++) { MultidimArray Mbody, Irefp; Irefp = model.Iref[ibody] * model.masks_bodies[ibody]; // Place each body with its center-of-mass in the center of the box selfTranslate(Irefp, -model.com_bodies[ibody], DONT_WRAP); f_weights << ibody + 1; Matrix2D Aresi, Abody; // rot Euler_angles2matrix(1., 90., 0., Aresi); Abody = (model.orient_bodies[ibody]).transpose() * A_rot90 * Aresi * model.orient_bodies[ibody]; Abody.resize(4,4); MAT_ELEM(Abody, 3, 3) = 1.; applyGeometry(Irefp, Mbody, Abody, IS_NOT_INV, DONT_WRAP); Mbody -= Irefp; norm_pca.push_back(sqrt(Mbody.sum2())); f_weights << " " << sqrt(Mbody.sum2()); // tilt Euler_angles2matrix(0., 91., 0., Aresi); Abody = (model.orient_bodies[ibody]).transpose() * A_rot90 * Aresi * model.orient_bodies[ibody]; Abody.resize(4,4); MAT_ELEM(Abody, 3, 3) = 1.; applyGeometry(Irefp, Mbody, Abody, IS_NOT_INV, DONT_WRAP); Mbody -= Irefp; norm_pca.push_back(sqrt(Mbody.sum2())); f_weights << " " << sqrt(Mbody.sum2()); // psi Euler_angles2matrix(0., 90., 1., Aresi); Abody = (model.orient_bodies[ibody]).transpose() * A_rot90 * Aresi * model.orient_bodies[ibody]; Abody.resize(4,4); MAT_ELEM(Abody, 3, 3) = 1.; applyGeometry(Irefp, Mbody, Abody, IS_NOT_INV, DONT_WRAP); Mbody -= Irefp; norm_pca.push_back(sqrt(Mbody.sum2())); f_weights << " " << sqrt(Mbody.sum2()); // translation x & y (considered the same) Euler_angles2matrix(0., 90., 0., Aresi); Abody = (model.orient_bodies[ibody]).transpose() * A_rot90 * Aresi * model.orient_bodies[ibody]; Abody.resize(4,4); MAT_ELEM(Abody, 0, 3) = 1.; MAT_ELEM(Abody, 3, 3) = 1.; applyGeometry(Irefp, Mbody, Abody, IS_NOT_INV, DONT_WRAP); Mbody -= Irefp; norm_pca.push_back(sqrt(Mbody.sum2())); f_weights << " " << sqrt(Mbody.sum2()) << std::endl; } f_weights.close(); } } void FlexAnalyser::run(int rank, int size) { if (size > 1 && do_PCA_orient) REPORT_ERROR("PCA analysis (--PCA_orient) must be performed in the non-MPI version."); if (do_3dmodels) setup3DModels(); // Loop through all particles loopThroughParticles(rank, size); if (size > 1) { MPI_Barrier(MPI_COMM_WORLD); } } void FlexAnalyser::setup3DModels() { for (int ibody = 0; ibody < model.nr_bodies; ibody++) { // Premultiply the map with the mask (otherwise need to do this again for every particle model.Iref[ibody] *= model.masks_bodies[ibody]; // Place each body with its center-of-mass in the center of the box, as that's where the rotations are around selfTranslate(model.Iref[ibody], -model.com_bodies[ibody], DONT_WRAP); // And do the same for the masks selfTranslate(model.masks_bodies[ibody], -model.com_bodies[ibody], DONT_WRAP); if (size_3dmodels < XSIZE(model.Iref[ibody])) { rescale_3dmodels = (RFLOAT)(size_3dmodels)/(RFLOAT)(XSIZE(model.Iref[ibody])); std::cerr << " rescale_3dmodels= " << rescale_3dmodels << std::endl; selfScaleToSize(model.Iref[ibody], size_3dmodels, size_3dmodels, size_3dmodels); selfScaleToSize(model.masks_bodies[ibody], size_3dmodels, size_3dmodels, size_3dmodels); model.Iref[ibody].setXmippOrigin(); model.masks_bodies[ibody].setXmippOrigin(); } } } void FlexAnalyser::loopThroughParticles(int rank, int size) { long int total_nr_particles = data.numberOfParticles(); // Allow parallelisation long int my_first_particle = 0, my_last_particle = total_nr_particles-1; if (size > 1) divide_equally(total_nr_particles, size, rank, my_first_particle, my_last_particle); long int todo_particles = my_last_particle-my_first_particle+1; long int update_interval = XMIPP_MAX(1, todo_particles / 60); if (verb > 0) { std::cout << " Processing all particles ... " << std::endl; init_progress_bar(todo_particles); } DFo.clear(); DFo.setIsList(false); std::vector< std::vector > inputdata; long int imgno = 0; for (long int part_id = my_first_particle; part_id <= my_last_particle; part_id++) { std::vector datarow; if (do_3dmodels || do_PCA_orient) { make3DModelOneParticle(part_id, imgno, datarow, rank, size); if (do_PCA_orient) inputdata.push_back(datarow); } if (imgno%update_interval==0 && verb > 0) progress_bar(imgno); imgno++; } if (verb > 0) progress_bar(todo_particles); if (do_3dmodels) { FileName fn_star; if (size > 1) { fn_star.compose(fn_out + "_", rank + 1, ""); fn_star = fn_star + "_3dmodels.star"; } else { fn_star = fn_out + "_3dmodels.star"; } DFo.write(fn_star); } if (do_PCA_orient) { std::vector< std::vector > eigenvectors, projected_data; std::vector eigenvalues, means; // Do the PCA and make histograms principalComponentsAnalysis(inputdata, eigenvectors, eigenvalues, means, projected_data); FileName fn_evec = fn_out + "_eigenvectors.dat"; std::ofstream f_evec(fn_evec); std::cout << " Eigenvectors (rotations only):" << std::endl; for (int j = 0; j < eigenvectors[0].size(); j++) { std::string stro = ""; if (j % 6 == 0) stro = "rot"; else if (j % 6 == 1) stro = "tilt"; else if (j % 6 == 2) stro = "psi"; else if (j % 6 == 3) stro = "x"; else if (j % 6 == 4) stro = "y"; else if (j % 6 == 5) stro = "z"; if (stro != "") { stro += "-body-" + integerToString(1 + (j / 6)); f_evec << stro << " "; if (j % 6 < 3) { std::cout << std::setw(12) << std::right << std::fixed; std::cout << stro; } } } std::cout << std::endl; f_evec << std::endl; std::cout << " Full eigenvectors including translations are written to " << fn_evec << std::endl; f_evec << std::scientific; for (int k = 0; k < eigenvectors.size(); k++) { for (int j =0; j < eigenvectors[0].size(); j++) { if (j > 0) f_evec << " "; f_evec << eigenvectors[k][j]; } f_evec << std::endl; if (k % 6 < 3) { for (int j =0; j < eigenvectors[0].size(); j++) { if (j % 6 < 3) { std::cout << std::setw(12) << std::fixed; std::cout << eigenvectors[k][j]; } } std::cout << std::endl; } } f_evec.close(); makePCAhistograms(projected_data, eigenvalues, means); // Make movies for the most significant eigenvectors if (do_generate_maps) make3DModelsAlongPrincipalComponents(projected_data, eigenvectors, means); if (do_write_all_pca_projections) { writeAllPCAProjections(projected_data); } // Output a particle selection, if requested if (select_eigenvalue > 0) { outputSelectedParticles(projected_data); } } } void FlexAnalyser::make3DModelOneParticle(long int part_id, long int imgno, std::vector &datarow, int rank, int size) { // Get the consensus class, orientational parameters and norm (if present) Matrix2D Aori; RFLOAT rot, tilt, psi, xoff, yoff, zoff; data.MDimg.getValue(EMDL_ORIENT_ROT, rot, part_id); data.MDimg.getValue(EMDL_ORIENT_TILT, tilt, part_id); data.MDimg.getValue(EMDL_ORIENT_PSI, psi, part_id); Euler_angles2matrix(rot, tilt, psi, Aori, false); RFLOAT my_pixel_size = data.getImagePixelSize(part_id, 0); Image img; MultidimArray sumw; if (do_3dmodels) { img().initZeros(model.Iref[0]); sumw.initZeros(model.Iref[0]); } datarow.clear(); for (int ibody = 0; ibody < model.nr_bodies; ibody++) { MultidimArray Mbody, Mmask; Matrix1D body_offset(3), body_offset_3d(3); RFLOAT body_rot, body_tilt, body_psi; data.MDbodies[ibody].getValue(EMDL_ORIENT_ROT, body_rot, part_id); data.MDbodies[ibody].getValue(EMDL_ORIENT_TILT, body_tilt, part_id); data.MDbodies[ibody].getValue(EMDL_ORIENT_PSI, body_psi, part_id); data.MDbodies[ibody].getValue(EMDL_ORIENT_ORIGIN_X_ANGSTROM, XX(body_offset), part_id); data.MDbodies[ibody].getValue(EMDL_ORIENT_ORIGIN_Y_ANGSTROM, YY(body_offset), part_id); if (model.data_dim == 3) data.MDbodies[ibody].getValue(EMDL_ORIENT_ORIGIN_Z_ANGSTROM, ZZ(body_offset), part_id); // As of v3.1, offsets are in Angstrom: convert back to pixels! body_offset /= my_pixel_size; // Keep rescaling into account! body_offset *= rescale_3dmodels; Matrix2D Aresi, Abody, Anew; // Aresi is the residual orientation for this ibody Euler_angles2matrix(body_rot, body_tilt, body_psi, Aresi); // Only apply the residual orientation now!!! Abody = (model.orient_bodies[ibody]).transpose() * A_rot90 * Aresi * model.orient_bodies[ibody]; // Now we have to get back from the 2D refined body_offset to some 3D translation of the body (with one direction non-defined) // We will need the original projection direction, Aori for that!! // Because one direction is ill-defined, this may not be such a good idea? // But anyway, this should bring it closer to truth than not doing anything at all... Anew = Aori * Abody; body_offset_3d = Anew.inv() * (-body_offset); if (do_PCA_orient) { datarow.push_back(norm_pca[ibody*4+0] * body_rot); datarow.push_back(norm_pca[ibody*4+1] * body_tilt); datarow.push_back(norm_pca[ibody*4+2] * body_psi); datarow.push_back(norm_pca[ibody*4+3] * XX(body_offset_3d)); datarow.push_back(norm_pca[ibody*4+3] * YY(body_offset_3d)); datarow.push_back(norm_pca[ibody*4+3] * ZZ(body_offset_3d)); } if (do_3dmodels) { // Also put back at the centre-of-mass of this body body_offset_3d += rescale_3dmodels * model.com_bodies[ibody]; Abody.resize(4,4); MAT_ELEM(Abody, 0, 3) = XX(body_offset_3d); MAT_ELEM(Abody, 1, 3) = YY(body_offset_3d); MAT_ELEM(Abody, 2, 3) = ZZ(body_offset_3d); MAT_ELEM(Abody, 3, 3) = 1.; Mbody.resize(model.Iref[ibody]); Mmask.resize(model.masks_bodies[ibody]); applyGeometry(model.Iref[ibody], Mbody, Abody, IS_NOT_INV, DONT_WRAP); applyGeometry(model.masks_bodies[ibody], Mmask, Abody, IS_NOT_INV, DONT_WRAP); img() += Mbody; sumw += Mmask; } } // end for ibody if (do_3dmodels) { // Divide the img by sumw to deal with overlapping bodies: just take average FOR_ALL_DIRECT_ELEMENTS_IN_MULTIDIMARRAY(img()) { if (DIRECT_MULTIDIM_ELEM(sumw, n) > 1.) DIRECT_MULTIDIM_ELEM(img(), n) /= DIRECT_MULTIDIM_ELEM(sumw, n); } // Write the image to disk FileName fn_img; fn_img.compose(fn_out+"_part", imgno+1,"mrc"); img.setSamplingRateInHeader(model.pixel_size); img.write(fn_img); DFo.addObject(); DFo.setValue(EMDL_MLMODEL_REF_IMAGE, fn_img); data.MDimg.getValue(EMDL_IMAGE_NAME, fn_img, part_id); DFo.setValue(EMDL_IMAGE_NAME, fn_img); } } void FlexAnalyser::makePCAhistograms(std::vector< std::vector > &projected_input, std::vector &eigenvalues, std::vector &means) { std::vector all_fn_eps; FileName fn_eps = fn_out + "_eigenvalues.eps"; all_fn_eps.push_back(fn_eps); CPlot2D *plot2D=new CPlot2D(fn_eps); CDataSet dataSet; dataSet.SetDrawMarker(false); dataSet.SetDatasetColor(1.0,0.0,0.0); // Percentage of variance double sum=0.; for (int i = 0; i < eigenvalues.size(); i++) sum += eigenvalues[i]; for (int i = 0; i < eigenvalues.size(); i++) { std::cout << " + Component " << i+1 << " explains " << eigenvalues[i]*100./sum << "% of variance." << std::endl; CDataPoint point1((double)i+0.5, (double)0.); CDataPoint point2((double)i+0.5, (double)eigenvalues[i]*100./sum); CDataPoint point3((double)i+1.5, (double)eigenvalues[i]*100./sum); CDataPoint point4((double)i+1.5, (double)0.); dataSet.AddDataPoint(point1); dataSet.AddDataPoint(point2); dataSet.AddDataPoint(point3); dataSet.AddDataPoint(point4); } plot2D->AddDataSet(dataSet); plot2D->SetXAxisTitle("Eigenvalue"); plot2D->SetYAxisTitle("Variance explained [%]"); plot2D->OutputPostScriptPlot(fn_eps); delete plot2D; // Determine how much variance the requested number of components explains if (nr_components < 0) { double cum = 0.; for (int i = 0; i < eigenvalues.size(); i++) { cum += eigenvalues[i]/sum; if (cum >= explain_variance) { nr_components = i + 1; break; } } } explain_variance = 0.; for (int i = 0; i < nr_components; i++) explain_variance += eigenvalues[i]*100./sum; std::cout << " The first " << nr_components << " eigenvectors explain " << explain_variance << " % of the variance in the data." << std::endl; // Output histograms of all eigenvalues for (int k = 0; k < eigenvalues.size(); k++) { // Sort vector of all projected values for this component: divide in nr_maps_per_component bins and take average value std::vector project; for (long int ipart = 0; ipart < projected_input.size(); ipart++) project.push_back(projected_input[ipart][k]); // Sort the vector to calculate average of nr_maps_per_component equi-populated bins std::sort (project.begin(), project.end()); // Write the movement plot as well FileName fn_eps = fn_out + "_component" + integerToString(k+1, 3) + "_histogram.eps"; all_fn_eps.push_back(fn_eps); CPlot2D *plot2D=new CPlot2D(fn_eps); CDataSet dataSet; dataSet.SetDrawMarker(false); dataSet.SetDatasetColor(1.0,0.0,0.0); double minhis = project[0]; double maxhis = project[project.size()-1]; double widthhis = (maxhis - minhis) / nr_bins; double stophis = minhis + widthhis; long int n = 0; for (long int ipart = 0; ipart < project.size(); ipart++) { if (project[ipart] >= stophis) { CDataPoint point1(stophis-widthhis, (double)0.); CDataPoint point2(stophis-widthhis, (double)n); CDataPoint point3(stophis, (double)n); CDataPoint point4(stophis, (double)0.); dataSet.AddDataPoint(point1); dataSet.AddDataPoint(point2); dataSet.AddDataPoint(point3); dataSet.AddDataPoint(point4); n = 0; stophis += widthhis; } n++; } plot2D->AddDataSet(dataSet); plot2D->SetXAxisTitle("Eigenvalue"); plot2D->SetYAxisTitle("Nr particles"); plot2D->OutputPostScriptPlot(fn_eps); delete plot2D; } joinMultipleEPSIntoSinglePDF(fn_out + "_logfile.pdf", all_fn_eps); } void FlexAnalyser::make3DModelsAlongPrincipalComponents(std::vector< std::vector > &projected_input, std::vector< std::vector > &eigenvectors, std::vector &means) { // Loop over the principal components for (int k = 0; k < nr_components; k++) { // Sort vector of all projected values for this component: divide in nr_maps_per_component bins and take average value std::vector project; for (long int ipart = 0; ipart < projected_input.size(); ipart++) project.push_back(projected_input[ipart][k]); // Sort the vector to calculate average of "nr_maps_per_component" equi-populated bins std::sort (project.begin(), project.end()); long int binwidth = ROUND((double)project.size() / (double)nr_maps_per_component); std::cout << " Calculating 3D models for principal component " << k+1 << " ... " << std::endl; for (int ibin = 0; ibin < nr_maps_per_component; ibin++) { long int istart = ibin * binwidth; long int istop = (ibin+1) * binwidth - 1; if (ibin == nr_maps_per_component - 1) istop = project.size() - 1; double avg = 0., nn = 0.; for (long int ipart = istart; ipart <= istop; ipart++) { avg += project[ipart]; nn += 1.; } if (nn > 0.) avg /= nn; // Now we have the average value for the PCA values for this bin: make the 3D model... std::vector orients; for (int j = 0; j < means.size(); j++) { orients.push_back(avg * eigenvectors[k][j] + means[j]); //std::cerr << "j= "< img; MultidimArray sumw; img().initZeros(model.Iref[0]); sumw.initZeros(model.Iref[0]); for (int ibody = 0; ibody < model.nr_bodies; ibody++) { MultidimArray Mbody, Mmask; Matrix1D body_offset_3d(3); RFLOAT body_rot, body_tilt, body_psi; body_rot = orients[ibody * 6 + 0] / norm_pca[ibody*4+0]; body_tilt = orients[ibody * 6 + 1] / norm_pca[ibody*4+1]; body_psi = orients[ibody * 6 + 2] / norm_pca[ibody*4+2]; XX(body_offset_3d) = orients[ibody * 6 + 3] / norm_pca[ibody*4+3]; YY(body_offset_3d) = orients[ibody * 6 + 4] / norm_pca[ibody*4+3]; ZZ(body_offset_3d) = orients[ibody * 6 + 5] / norm_pca[ibody*4+3]; //std::cerr << " norm_pca[ibody*4+0]= " << norm_pca[ibody*4+0] << " norm_pca[ibody*4+1]= " << norm_pca[ibody*4+1] << " norm_pca[ibody*4+2]= " << norm_pca[ibody*4+2] << " norm_pca[ibody*4+3]= " << norm_pca[ibody*4+3] << std::endl; //std::cerr << " body_rot= " << body_rot << " body_tilt= " << body_tilt << " body_psi= " << body_psi << std::endl; //std::cerr << " XX(body_offset_3d)= " << XX(body_offset_3d) << " YY(body_offset_3d)= " << YY(body_offset_3d) << " ZZ(body_offset_3d)= " << ZZ(body_offset_3d) << std::endl; Matrix2D Aresi, Abody; // Aresi is the residual orientation for this ibody Euler_angles2matrix(body_rot, body_tilt, body_psi, Aresi); // Only apply the residual orientation now!!! Abody = (model.orient_bodies[ibody]).transpose() * A_rot90 * Aresi * model.orient_bodies[ibody]; // Also put back at the centre-of-mass of this body Abody.resize(4,4); MAT_ELEM(Abody, 0, 3) = XX(body_offset_3d); MAT_ELEM(Abody, 1, 3) = YY(body_offset_3d); MAT_ELEM(Abody, 2, 3) = ZZ(body_offset_3d); MAT_ELEM(Abody, 3, 3) = 1.; Mbody.resize(model.Iref[ibody]); Mmask.resize(model.masks_bodies[ibody]); applyGeometry(model.Iref[ibody], Mbody, Abody, IS_NOT_INV, DONT_WRAP); applyGeometry(model.masks_bodies[ibody], Mmask, Abody, IS_NOT_INV, DONT_WRAP); img() += Mbody * Mmask; sumw += Mmask; } // Divide the img by sumw to deal with overlapping bodies: just take average FOR_ALL_DIRECT_ELEMENTS_IN_MULTIDIMARRAY(img()) { if (DIRECT_MULTIDIM_ELEM(sumw, n) > 1.) DIRECT_MULTIDIM_ELEM(img(), n) /= DIRECT_MULTIDIM_ELEM(sumw, n); } // Write the image to disk FileName fn_img = fn_out + "_component" + integerToString(k+1, 3) + "_bin" + integerToString(ibin+1, 3) + ".mrc"; img.setSamplingRateInHeader(model.pixel_size); img.write(fn_img); } // end loop ibin } // end loop components } void FlexAnalyser::writeAllPCAProjections(std::vector< std::vector > &projected_input) { FileName fnt = fn_out+"_projections_along_eigenvectors_all_particles.txt"; std::ofstream fh; fh.open((fnt).c_str(), std::ios::out); if (!fh) REPORT_ERROR( (std::string)" FlexAnalyser::writeAllPCAProjections: cannot write to file: " + fnt); for (long int ipart = 0; ipart < projected_input.size(); ipart++) { data.MDimg.getValue(EMDL_IMAGE_NAME, fnt, ipart); fh << fnt << " "; for (int ival = 0; ival < projected_input[ipart].size(); ival++) { fh.width(15); fh << projected_input[ipart][ival]; } fh << " \n"; } fh.close(); } void FlexAnalyser::outputSelectedParticles(std::vector< std::vector > &projected_input) { if (select_eigenvalue <= 0) return; MetaDataTable MDo; for (long int ipart = 0; ipart < projected_input.size(); ipart++) { if (projected_input[ipart][select_eigenvalue-1] > select_eigenvalue_min && projected_input[ipart][select_eigenvalue-1] < select_eigenvalue_max) MDo.addObject(data.MDimg.getObject(ipart)); } int min = ROUND(select_eigenvalue_min); int max = ROUND(select_eigenvalue_max); FileName fnt = fn_out+"_eval"+integerToString(select_eigenvalue,3)+"_select"; if (min > -99998) fnt += "_min"+integerToString(min); if (max < 99998) fnt += "_max"+integerToString(max); fnt += ".star"; data.obsModel.save(MDo, fnt, "particles"); std::cout << " Written out " << MDo.numberOfObjects() << " selected particles in " << fnt << std::endl; } void principalComponentsAnalysis(const std::vector< std::vector > &input, std::vector< std::vector > &eigenvec, std::vector &eigenval, std::vector &means, std::vector< std::vector > &projected_input) { std:: cout << "Calculating PCA ..." << std::endl; std::vector > a; long int datasize = input.size(); if (datasize == 0) REPORT_ERROR("ERROR: empty input vector for PCA!"); // The dimension (n) long int n = input[0].size(); a.resize(n); //Get the mean and variance of the given cluster of vectors for (int k = 0; k < n; k++) { a[k].resize(n); double sum = 0.0; double nn = 0.; for (long int i = 0; i < datasize; i++) { sum += input[i][k]; nn += 1.0; } means.push_back(sum / nn); } for (int i = 0; i < n;i++) { for (int j = 0;j <= i; j++) { double sum = 0.0; double nn = 0.; for (long int k = 0; k < datasize; k++) { double d1 = input[k][i] - means[i]; double d2 = input[k][j] - means[j]; sum += d1 * d2; nn += 1.0; } if (nn > 0.) a[i][j] = a[j][i] = sum / nn; else a[i][j] = a[j][i] = 0; } } eigenval.resize(n); eigenvec.resize(n); std::vector b; b.resize(n); std::vector z; z.resize(n); std::vector &d = eigenval; std::vector< std::vector > &v = eigenvec; for (int i = 0; i < n; i++) { v[i].resize(n); v[i][i] = 1.0; b[i] = d[i] = a[i][i]; } int nrot = 0; // Jacobi method (it=iteration number) for (int it = 1; it <= 50; it++) { double threshold; double sm = 0.0; for (int ip = 0; ip < n - 1; ip++) { for (int iq = ip + 1; iq < n; iq++) sm += fabs(a[iq][ip]); } if (sm == 0.0) {//Done. Sort vectors for (int i = 0; i < n - 1; i++) { int k = i; double p = d[i]; for (int j = i + 1; j < n; j++) if (d[j] >= p) p = d[k = j]; if (k != i) {//Swap i<->k d[k] = d[i]; d[i] = p; std::vector t = v[i]; v[i] = v[k]; v[k] = t; } } // Done with PCA now! // Just project all data onto the PCA now and exit projected_input = input; for (int i = 0; i < n; i++) { for (long int z = 0; z < datasize; z++) { double cum = 0; for (int j = 0; j < n; j++) cum += v[i][j] * (input[z][j] - means[j]); projected_input[z][i] = cum; } // z } // i return; } if (it < 4) threshold = 0.2 * sm / (n * n); else threshold = 0; for (int ip = 0; ip < n - 1; ip++) { for (int iq = ip + 1; iq < n; iq++) { double g = 100.0 * fabs(a[iq][ip]); if (it > 4 && fabs(d[ip]) + g == fabs(d[ip]) && fabs(d[iq]) + g == fabs(d[iq])) a[iq][ip] = 0.0; else if (fabs(a[iq][ip]) > threshold) { double tau, t, s, c; double h = d[iq] - d[ip]; if (fabs(h) + g == fabs(h)) t = a[iq][ip] / h; else { double theta = 0.5 * h / a[iq][ip]; t = 1.0 / (fabs(theta) + sqrt(1.0 + theta * theta)); if (theta < 0.0) t = -t; } c = 1.0 / sqrt(1 + t * t); s = t * c; tau = s / (1.0 + c); h = t * a[iq][ip]; z[ip] -= h; z[iq] += h; d[ip] -= h; d[iq] += h; a[iq][ip] = 0.0; #define rotate(a,i,j,k,l) \ g = a[i][j]; \ h = a[k][l]; \ a[i][j] = g - s *(h + g*tau); \ a[k][l] = h + s*(g - h*tau); for (int j = 0; j < ip; j++) { rotate(a, ip, j, iq, j) } for (int j = ip + 1; j < iq; j++) { rotate(a, j, ip, iq, j) } for (int j = iq + 1; j < n; j++) { rotate(a, j, ip, j, iq) } for (int j = 0; j < n; j++) { rotate(v, ip, j, iq, j) } nrot += 1; }//if }//for iq }//for ip for (int ip = 0; ip < n; ip++) { b[ip] += z[ip]; d[ip] = b[ip]; z[ip] = 0.0; } }//for it REPORT_ERROR("ERROR: too many Jacobi iterations in PCA calculation..."); } relion-3.1.3/src/flex_analyser.h000066400000000000000000000103261411340063500166010ustar00rootroot00000000000000/*************************************************************************** * * Author: "Sjors H.W. Scheres" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #ifndef SRC_FLEX_ANALYSER_H_ #define SRC_FLEX_ANALYSER_H_ #include "src/exp_model.h" #include "src/ml_model.h" #include "src/ctf.h" #include "src/time.h" #include "src/parallel.h" #include "src/mpi.h" class FlexAnalyser { public: // I/O Parser IOParser parser; //verbosity int verb; // Output rootname FileName fn_out; // The model and the data from the refinement to be analysed FileName fn_model, fn_data; MlModel model; Experiment data; // The body STAR file FileName fn_bodies; // Write out 3D models bool do_3dmodels; // Box size of the output 3D models int size_3dmodels; // Rescale factor for output 3D models RFLOAT rescale_3dmodels; // Perform a PCA on the multibody orientations bool do_PCA_orient; // Normalisation of the rotations and translations for PCA normalisation std::vector norm_pca; // Generate maps for movies along principal components bool do_generate_maps; // How many components to make movies from? int nr_components; // How much variance to explain with the movies? double explain_variance; // How many maps to use for the movie of each principal component? int nr_maps_per_component; // How many bins in a histogram int nr_bins; // Select particles based on this eigenvalue int select_eigenvalue; // Select particles based on this eigenvalue minimim float select_eigenvalue_min; // Select particles based on this eigenvalue minimim float select_eigenvalue_max; // Write out text file with eigenvalues for all particles bool do_write_all_pca_projections; // center of mass of the above Matrix1D com_mask; // Pre-calculated rotation matrix for (0,90,0) rotation, and its transpose Matrix2D A_rot90, A_rot90T; MetaDataTable DFo; void read(int argc, char **argv); void initialise(); void run(int rank = 0, int size = 1); void setupSubtractionMasksAndProjectors(); void setup3DModels(); void loopThroughParticles(int rank = 0, int size = 1); void subtractOneParticle(long int part_id, long int imgno, int rank = 0, int size = 1); void make3DModelOneParticle(long int part_id, long int imgno, std::vector &datarow, int rank = 0, int size = 1); // Output logfile.pdf with histograms of all eigenvalues void makePCAhistograms(std::vector< std::vector > &projected_input, std::vector &eigenvalues, std::vector &means); // Generate maps to make movies of the variance along the most significant eigenvectors void make3DModelsAlongPrincipalComponents(std::vector< std::vector > &projected_input, std::vector< std::vector > &eigenvectors, std::vector &means); // Dump all projections to a text file void writeAllPCAProjections(std::vector< std::vector > &projected_input); // Output a particle.star file with a selection based on eigenvalues void outputSelectedParticles(std::vector< std::vector > &projected_input); }; void principalComponentsAnalysis(const std::vector< std::vector > &input, std::vector< std::vector > &eigenvectors, std::vector &eigenvalues, std::vector &means, std::vector< std::vector > &projected_input); #endif /* SRC_FLEX_ANALYSER_H_ */ relion-3.1.3/src/funcs.cpp000066400000000000000000000527371411340063500154320ustar00rootroot00000000000000/*************************************************************************** * * Author: "Sjors H.W. Scheres" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ /*************************************************************************** * * Authors: Carlos Oscar S. Sorzano (coss@cnb.csic.es) * * Unidad de Bioinformatica of Centro Nacional de Biotecnologia , CSIC * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA * 02111-1307 USA * * All comments concerning this program package may be sent to the * e-mail address 'xmipp@cnb.csic.es' ***************************************************************************/ #include "src/funcs.h" #include "src/args.h" #include #include #include #include #include #include #include #include #include #include void fitStraightLine(const std::vector &points, RFLOAT &slope, RFLOAT &intercept, RFLOAT &corr_coeff) { // From: http://mathworld.wolfram.com/LeastSquaresFitting.html // ss_xx = Sum_i x_i^2 - n ave_x^2 // ss_yy = Sum_i y_i^2 - n ave_y^2 // ss_xy = Sum_i x_i * y_i - n ave_x n_ave_y // slope = xx_xy / ss_xx // intercept = ave_y - slope * ave_x // corr_coeff = ss_xy^2 / (ss_xx * ss_yy) RFLOAT ss_xy = 0.; RFLOAT ss_xx = 0.; RFLOAT ss_yy = 0.; RFLOAT ave_x = 0.; RFLOAT ave_y = 0.; RFLOAT sum_w = 0.; for (int i = 0; i < points.size(); i++) { ave_x += points[i].w * points[i].x; ave_y += points[i].w * points[i].y; sum_w += points[i].w; ss_xx += points[i].w * points[i].x * points[i].x; ss_yy += points[i].w * points[i].y * points[i].y; ss_xy += points[i].w * points[i].x * points[i].y; } ave_x /= sum_w; ave_y /= sum_w; ss_xx -= sum_w * ave_x * ave_x; ss_yy -= sum_w * ave_y * ave_y; ss_xy -= sum_w * ave_x * ave_y; //std::cerr << " ss_xx= " << ss_xx << " ss_yy= " << ss_yy << " ss_xy= " << ss_xy << std::endl; //std::cerr << " sum_w= " << sum_w << " ave_x= " << ave_x << " ave_y= " << ave_y << std::endl; if (ss_xx > 0.) { slope = ss_xy / ss_xx; intercept = ave_y - slope * ave_x; corr_coeff = ss_xy * ss_xy / (ss_xx * ss_yy); } else { intercept = slope = corr_coeff = 0.; } } void fitLeastSquaresPlane(const std::vector & points, RFLOAT &plane_a, RFLOAT &plane_b, RFLOAT &plane_c) { RFLOAT D = 0; RFLOAT E = 0; RFLOAT F = 0; RFLOAT G = 0; RFLOAT H = 0; RFLOAT I = 0; RFLOAT J = 0; RFLOAT K = 0; RFLOAT L = 0; RFLOAT W2 = 0; RFLOAT error = 0; RFLOAT denom = 0; for (int i = 0; i < points.size(); i++) { W2 = points[i].w * points[i].w; D += points[i].x * points[i].x * W2 ; E += points[i].x * points[i].y * W2 ; F += points[i].x * W2 ; G += points[i].y * points[i].y * W2 ; H += points[i].y * W2 ; I += 1 * W2 ; J += points[i].x * points[i].z * W2 ; K += points[i].y * points[i].z * W2 ; L += points[i].z * W2 ; } denom = F * F * G - 2 * E * F * H + D * H * H + E * E * I - D * G * I; // X axis slope plane_a = (H * H * J - G * I * J + E * I * K + F * G * L - H * (F * K + E * L)) / denom; // Y axis slope plane_b = (E * I * J + F * F * K - D * I * K + D * H * L - F * (H * J + E * L)) / denom; // Z axis intercept plane_c = (F * G * J - E * H * J - E * F * K + D * H * K + E * E * L - D * G * L) / denom; } /* Value of a blob --------------------------------------------------------- */ RFLOAT kaiser_value(RFLOAT r, RFLOAT a, RFLOAT alpha, int m) { RFLOAT rda, rdas, arg, w; rda = r / a; rdas = rda * rda; if (rdas <= 1.0) { arg = alpha * sqrt(1.0 - rdas); if (m == 0) { w = bessi0(arg) / bessi0(alpha); } else if (m == 1) { w = sqrt (1.0 - rdas); if (alpha != 0.0) w *= bessi1(arg) / bessi1(alpha); } else if (m == 2) { w = sqrt (1.0 - rdas); w = w * w; if (alpha != 0.0) w *= bessi2(arg) / bessi2(alpha); } else if (m == 3) { w = sqrt (1.0 - rdas); w = w * w * w; if (alpha != 0.0) w *= bessi3(arg) / bessi3(alpha); } else if (m == 4) { w = sqrt (1.0 - rdas); w = w * w * w *w; if (alpha != 0.0) w *= bessi4(arg) / bessi4(alpha); } else REPORT_ERROR("m out of range in kaiser_value()"); } else w = 0.0; return w; } /* Line integral through a blob -------------------------------------------- */ /* Value of line integral through Kaiser-Bessel radial function (n >=2 dimensions) at distance s from center of function. Parameter m = 0, 1, or 2. */ RFLOAT kaiser_proj(RFLOAT s, RFLOAT a, RFLOAT alpha, int m) { RFLOAT sda, sdas, w, arg, p; sda = s / a; sdas = sda * sda; w = 1.0 - sdas; if (w > 1.0e-10) { arg = alpha * sqrt(w); if (m == 0) { if (alpha == 0.0) p = 2.0 * a * sqrt(w); else p = (2.0 * a / alpha) * sinh(arg) / bessi0(alpha); } else if (m == 1) { if (alpha == 0.0) p = 2.0 * a * w * sqrt(w) * (2.0 / 3.0); else p = (2.0 * a / alpha) * sqrt(w) * (cosh(arg) - sinh(arg) / arg) / bessi1(alpha); } else if (m == 2) { if (alpha == 0.0) p = 2.0 * a * w * w * sqrt(w) * (8.0 / 15.0); else p = (2.0 * a / alpha) * w * ((3.0 / (arg * arg) + 1.0) * sinh(arg) - (3.0 / arg) * cosh(arg)) / bessi2(alpha); } else REPORT_ERROR("m out of range in kaiser_proj()"); } else p = 0.0; return p; } /* Fourier value of a blob ------------------------------------------------- */ RFLOAT kaiser_Fourier_value(RFLOAT w, RFLOAT a, RFLOAT alpha, int m) { RFLOAT sigma = sqrt(ABS(alpha * alpha - (2. * PI * a * w) * (2. * PI * a * w))); if (m == 2) { if (2.*PI*a*w > alpha) return pow(2.*PI, 3. / 2.)*pow(a, 3.)*pow(alpha, 2.)*bessj3_5(sigma) / (bessi0(alpha)*pow(sigma, 3.5)); else return pow(2.*PI, 3. / 2.)*pow(a, 3.)*pow(alpha, 2.)*bessi3_5(sigma) / (bessi0(alpha)*pow(sigma, 3.5)); } else if (m == 0) { if (2*PI*a*w > alpha) return pow(2.*PI, 3. / 2.)*pow(a, 3)*bessj1_5(sigma) / (bessi0(alpha)*pow(sigma, 1.5)); else return pow(2.*PI, 3. / 2.)*pow(a, 3)*bessi1_5(sigma) / (bessi0(alpha)*pow(sigma, 1.5)); } else REPORT_ERROR("m out of range in kaiser_Fourier_value()"); } /* Volume integral of a blob ----------------------------------------------- */ RFLOAT basvolume(RFLOAT a, RFLOAT alpha, int m, int n) { RFLOAT hn, tpi, v; hn = 0.5 * n; tpi = 2.0 * PI; if (alpha == 0.0) { if ((n / 2)*2 == n) /* n even */ v = pow(tpi, hn) * in_zeroarg(n / 2 + m) / in_zeroarg(m); else /* n odd */ v = pow(tpi, hn) * inph_zeroarg(n / 2 + m) / in_zeroarg(m); } else { /* alpha > 0.0 */ if ((n / 2)*2 == n) /* n even */ v = pow(tpi / alpha, hn) * i_n(n / 2 + m, alpha) / i_n(m, alpha); else /* n odd */ v = pow(tpi / alpha, hn) * i_nph(n / 2 + m, alpha) / i_n(m, alpha); } return v * pow(a, (RFLOAT)n); } /* Bessel function I_n (x), n = 0, 1, 2, ... Use ONLY for small values of n */ RFLOAT i_n(int n, RFLOAT x) { int i; RFLOAT i_ns1, i_n, i_np1; if (n == 0) return bessi0(x); if (n == 1) return bessi1(x); if (x == 0.0) return 0.0; i_ns1 = bessi0(x); i_n = bessi1(x); for (i = 1; i < n; i++) { i_np1 = i_ns1 - (2 * i) / x * i_n; i_ns1 = i_n; i_n = i_np1; } return i_n; } /*.....Bessel function I_(n+1/2) (x), n = 0, 1, 2, ..........................*/ RFLOAT i_nph(int n, RFLOAT x) { int i; RFLOAT r2dpix; RFLOAT i_ns1, i_n, i_np1; if (x == 0.0) return 0.0; r2dpix = sqrt(2.0 / (PI * x)); i_ns1 = r2dpix * cosh(x); i_n = r2dpix * sinh(x); for (i = 1; i <= n; i++) { i_np1 = i_ns1 - (2 * i - 1) / x * i_n; i_ns1 = i_n; i_n = i_np1; } return i_n; } /*....Limit (z->0) of (1/z)^n I_n(z)..........................................*/ RFLOAT in_zeroarg(int n) { int i; RFLOAT fact; fact = 1.0; for (i = 1; i <= n; i++) { fact *= 0.5 / i; } return fact; } /*.......Limit (z->0) of (1/z)^(n+1/2) I_(n+1/2) (z)..........................*/ RFLOAT inph_zeroarg(int n) { int i; RFLOAT fact; fact = 1.0; for (i = 1; i <= n; i++) { fact *= 1.0 / (2 * i + 1.0); } return fact*sqrt(2.0 / PI); } /* Zero freq --------------------------------------------------------------- */ RFLOAT blob_freq_zero(struct blobtype b) { return sqrt(b.alpha*b.alpha + 6.9879*6.9879) / (2*PI*b.radius); } /* Attenuation ------------------------------------------------------------- */ RFLOAT blob_att(RFLOAT w, struct blobtype b) { return blob_Fourier_val(w, b) / blob_Fourier_val(0, b); } /* Number of operations ---------------------------------------------------- */ RFLOAT blob_ops(RFLOAT w, struct blobtype b) { return pow(b.alpha*b.alpha + 6.9879*6.9879, 1.5) / b.radius; } /* Gaussian value ---------------------------------------------------------- */ RFLOAT gaussian1D(RFLOAT x, RFLOAT sigma, RFLOAT mu) { x -= mu; return 1 / sqrt(2*PI*sigma*sigma)*exp(-0.5*((x / sigma)*(x / sigma))); } /* t-student value -------------------------------------------------------- */ RFLOAT tstudent1D(RFLOAT x, RFLOAT df, RFLOAT sigma, RFLOAT mu) { x -= mu; RFLOAT norm = exp(gammln((df+1.)/2.)) / exp(gammln(df/2.)); norm /= sqrt(df*PI*sigma*sigma); return norm * pow((1 + (x/sigma)*(x/sigma)/df),-((df+1.)/2.)); } RFLOAT gaussian2D(RFLOAT x, RFLOAT y, RFLOAT sigmaX, RFLOAT sigmaY, RFLOAT ang, RFLOAT muX, RFLOAT muY) { // Express x,y in the gaussian internal coordinates x -= muX; y -= muY; RFLOAT xp = cos(ang) * x + sin(ang) * y; RFLOAT yp = -sin(ang) * x + cos(ang) * y; // Now evaluate return 1 / sqrt(2*PI*sigmaX*sigmaY)*exp(-0.5*((xp / sigmaX)*(xp / sigmaX) + (yp / sigmaY)*(yp / sigmaY))); } /* ICDF Gaussian ----------------------------------------------------------- */ RFLOAT icdf_gauss(RFLOAT p) { const RFLOAT c[] = { 2.515517, 0.802853, 0.010328 }; const RFLOAT d[] = { 1.432788, 0.189269, 0.001308 }; if (p < 0.5) { // F^-1(p) = - G^-1(p) RFLOAT t=sqrt(-2.0*log(p)); RFLOAT z=t - ((c[2]*t + c[1])*t + c[0]) / (((d[2]*t + d[1])*t + d[0])*t + 1.0); return -z; } else { // F^-1(p) = G^-1(1-p) RFLOAT t=sqrt(-2.0*log(1-p)); RFLOAT z=t - ((c[2]*t + c[1])*t + c[0]) / (((d[2]*t + d[1])*t + d[0])*t + 1.0); return z; } } /* CDF Gaussian ------------------------------------------------------------ */ RFLOAT cdf_gauss(RFLOAT x) { return 0.5 * (1. + erf(x/sqrt(2.))); } /************************************************************************* Student's t distribution Computes the integral from minus infinity to t of the Student t distribution with integer k > 0 degrees of freedom: t - | | - | 2 -(k+1)/2 | ( (k+1)/2 ) | ( x ) ---------------------- | ( 1 + --- ) dx - | ( k ) sqrt( k pi ) | ( k/2 ) | | | - -inf. Relation to incomplete beta integral: 1 - stdtr(k,t) = 0.5 * incbet( k/2, 1/2, z ) where z = k/(k + t**2). For t < -2, this is the method of computation. For higher t, a direct method is derived from integration by parts. Since the function is symmetric about t=0, the area under the right tail of the density is found by calling the function with -t instead of t. ACCURACY: Tested at random 1 <= k <= 25. The "domain" refers to t. Relative error: arithmetic domain # trials peak rms IEEE -100,-2 50000 5.9e-15 1.4e-15 IEEE -2,100 500000 2.7e-15 4.9e-17 Cephes Math Library Release 2.8: June, 2000 Copyright 1984, 1987, 1995, 2000 by Stephen L. Moshier *************************************************************************/ RFLOAT cdf_tstudent(int k, RFLOAT t) { RFLOAT EPS=5E-16; RFLOAT result; RFLOAT x; RFLOAT rk; RFLOAT z; RFLOAT f; RFLOAT tz; RFLOAT p; RFLOAT xsqk; int j; if ( t==0 ) { result = 0.5; return result; } if ( t<-2.0 ) { rk = k; z = rk/(rk+t*t); result = 0.5*betai(0.5*rk, 0.5, z); return result; } if ( t<0 ) { x = -t; } else { x = t; } rk = k; z = 1.0+x*x/rk; if ( k%2 != 0 ) { xsqk = x/sqrt(rk); p = atan(xsqk); if ( k > 1 ) { f = 1.0; tz = 1.0; j = 3; while ( j <= k-2 && tz/f > EPS ) { tz = tz*((j-1)/(z*j)); f = f+tz; j = j+2; } p = p+f*xsqk/z; } p = p*2.0/PI; } else { f = 1.0; tz = 1.0; j = 2; while ( j<= k-2 && tz/f > EPS) { tz = tz*((j-1)/(z*j)); f = f+tz; j = j+2; } p = f*x/sqrt(z*rk); } if ( t<0 ) { p = -p; } result = 0.5+0.5*p; return result; } /* Snedecor's F ------------------------------------------------------------ */ // http://en.wikipedia.org/wiki/F-distribution RFLOAT cdf_FSnedecor(int d1, int d2, RFLOAT x) { return betai(0.5*d1,0.5*d2,(d1*x)/(d1*x+d2)); } RFLOAT icdf_FSnedecor(int d1, int d2, RFLOAT p) { RFLOAT xl=0, xr=1e6; RFLOAT pl=cdf_FSnedecor(d1,d2,xl); RFLOAT pr=cdf_FSnedecor(d1,d2,xr); RFLOAT xm, pm; do { xm=(xl+xr)*0.5; pm=cdf_FSnedecor(d1,d2,xm); if (pm>p) { xr=xm; pr=pm; } else { xl=xm; pl=pm; } } while (ABS(pm-p)/p>0.001); return xm; } // Uniform distribution .................................................... void init_random_generator(int seed) { if (seed < 0) randomize_random_generator(); else srand(static_cast (seed) ); } void randomize_random_generator() { srand(static_cast (time(NULL)) ); } float rnd_unif(float a, float b) { if (a == b) return a; else return a + static_cast (rand()) /( static_cast (RAND_MAX/(b-a))); } // Gaussian distribution ................................................... float rnd_gaus(float mu, float sigma) { float U1, U2, W, mult; static float X1, X2; static int call = 0; if (sigma == 0) return mu; if (call == 1) { call = !call; return (mu + sigma * (float) X2); } do { U1 = -1 + ((float) rand () / RAND_MAX) * 2; U2 = -1 + ((float) rand () / RAND_MAX) * 2; W = pow (U1, 2) + pow (U2, 2); } while (W >= 1 || W == 0); mult = sqrt ((-2 * log (W)) / W); X1 = U1 * mult; X2 = U2 * mult; call = !call; return (mu + sigma * (float) X1); } float rnd_student_t(RFLOAT nu, float mu, float sigma) { REPORT_ERROR("rnd_student_t currently not implemented!"); } float gaus_within_x0(float x0, float mean, float stddev) { float z0 = (x0 - mean) / stddev; return erf(ABS(z0) / sqrt(2.0)); } float gaus_outside_x0(float x0, float mean, float stddev) { float z0 = (x0 - mean) / stddev; return erfc(ABS(z0) / sqrt(2.0)); } float gaus_up_to_x0(float x0, float mean, float stddev) { if (x0 > mean) return 1.0 -gaus_outside_x0(x0, mean, stddev) / 2; else if (x0 == mean) return 0.5; else return gaus_outside_x0(x0, mean, stddev) / 2; } float gaus_from_x0(float x0, float mean, float stddev) { if (x0 > mean) return gaus_outside_x0(x0, mean, stddev) / 2; else if (x0 == mean) return 0.5; else return 1.0 -gaus_outside_x0(x0, mean, stddev) / 2; } float gaus_outside_probb(float p, float mean, float stddev) { // Make a Bolzano search for the right value float p1, p2, pm, x1, x2, xm; x1 = mean; x2 = mean + 5 * stddev; do { xm = (x1 + x2) / 2; p1 = gaus_outside_x0(x1, mean, stddev); p2 = gaus_outside_x0(x2, mean, stddev); pm = gaus_outside_x0(xm, mean, stddev); if (pm > p) x1 = xm; else x2 = xm; } while (ABS(pm - p) / p > 0.005); return xm; } // See Numerical Recipes, Chap. 6.3 float student_within_t0(float t0, float degrees_of_freedom) { return 1 -betai(degrees_of_freedom / 2, 0.5, degrees_of_freedom / (degrees_of_freedom + t0*t0)); } float student_outside_t0(float t0, float degrees_of_freedom) { return 1 -student_within_t0(t0, degrees_of_freedom); } float student_up_to_t0(float t0, float degrees_of_freedom) { if (t0 >= 0) return 1.0 -student_outside_t0(t0, degrees_of_freedom) / 2; else return student_outside_t0(t0, degrees_of_freedom) / 2; } float student_from_t0(float t0, float degrees_of_freedom) { return 1 -student_up_to_t0(t0, degrees_of_freedom); } float student_outside_probb(float p, float degrees_of_freedom) { // Make a Bolzano search for the right value float p1, p2, pm, t1, t2, tm; t1 = 0; t2 = 100; do { tm = (t1 + t2) / 2; p1 = student_outside_t0(t1, degrees_of_freedom); p2 = student_outside_t0(t2, degrees_of_freedom); pm = student_outside_t0(tm, degrees_of_freedom); if (pm > p) t1 = tm; else t2 = tm; } while (ABS(pm - p) / p > 0.005); return tm; } float chi2_up_to_t0(float t0, float degrees_of_freedom) { return gammp(degrees_of_freedom / 2, t0 / 2); } float chi2_from_t0(float t0, float degrees_of_freedom) { return 1 -chi2_up_to_t0(t0, degrees_of_freedom); } // Log uniform distribution ................................................ float rnd_log(float a, float b) { if (a == b) return a; else return exp(rnd_unif(log(a), log(b))); } // Bsoft function void swapbytes(char* v, unsigned long n) { char t; for ( int i=0; i #include #include #include #include #include #include #include #include #include #include "src/numerical_recipes.h" #include "src/macros.h" #include "src/error.h" /** Structure of the points to do least-squares straight-line fitting */ struct fit_point2D { /// x coordinate RFLOAT x; /// y coordinate (assumed to be a function of x) RFLOAT y; /// Weight of the point in the Least-Squares problem RFLOAT w; }; void fitStraightLine(const std::vector &points, RFLOAT &slope, RFLOAT &intercept, RFLOAT &corr_coeff); /** Structure of the points to do least-squares plane fitting */ struct fit_point3D { /// x coordinate RFLOAT x; /// y coordinate RFLOAT y; /// z coordinate (assumed to be a function of x,y) RFLOAT z; /// Weight of the point in the Least-Squares problem RFLOAT w; }; void fitLeastSquaresPlane(const std::vector & points, RFLOAT &plane_a, RFLOAT &plane_b, RFLOAT &plane_c); /* ========================================================================= */ /* BLOBS */ /* ========================================================================= */ /**@defgroup Blobs Blobs @ingroup BasisFunction */ //@{ // Blob structure ---------------------------------------------------------- /** Blob definition. The blob is a space limited function (click here for a theoretical explanation) which is used as basis function for the ART reconstructions. There are several parameters which define the shape of the blob. The following structure holds all needed information for a blob, a variable can be of this type and it is passed to the different functions containing all we need to know about the blob. As a type definition, we can work with several kind of blobs in the same program at the same time. The common way of defining a blob is as follows: @code struct blobtype blob; // Definition of the blob blob.radius = 2; // Blob radius in voxels blob.order = 2; // Order of the Bessel function blob.alpha = 3.6; // Smoothness parameter @endcode Sometimes it is useful to plot any quantity related to the blobs. In the following example you have how to plot their Fourier transform in the continuous frequency space. @code int main(int argc, char **argv) { struct blobtype blob; // Definition of the blob blob.radius = 2; // Blob radius in voxels blob.order = 2; // Order of the Bessel function blob.alpha = textToFloat(argv[1]); // Smoothness parameter RFLOAT M=blob_Fourier_val (0, blob); for (RFLOAT w=0; w<=2; w += 0.05) std::cout << w << " " << blob_Fourier_val (w, blob)/M << std::endl; return 0; } @endcode */ struct blobtype { /// Spatial radius in Universal System units RFLOAT radius; /// Derivation order and Bessel function order int order; /// Smoothness parameter RFLOAT alpha; }; // Blob value -------------------------------------------------------------- /** Blob value. This function returns the value of a blob at a given distance from its center (in Universal System units). The distance must be always positive. Remember that a blob is spherically symmetrycal so the only parameter to know the blob value at a point is its distance to the center of the blob. It doesn't matter if this distance is larger than the real blob spatial extension, in this case the function returns 0 as blob value. \\ Ex: @code struct blobtype blob; blob.radius = 2; blob.order = 2; blob.alpha = 3.6; Matrix1D v=vectorR3(1,1,1); std::cout << "Blob value at (1,1,1) = " << blob_val(v.mod(),blob) << std::endl; @endcode */ #define blob_val(r, blob) kaiser_value(r, blob.radius, blob.alpha, blob.order) /** Function actually computing the blob value. */ RFLOAT kaiser_value(RFLOAT r, RFLOAT a, RFLOAT alpha, int m); // Blob projection --------------------------------------------------------- /** Blob projection. This function returns the value of the blob line integral through a straight line which passes at a distance 'r' (in Universal System units) from the center of the blob. Remember that a blob is spherically symmetrycal so the only parameter to know this blob line integral is its distance to the center of the blob. It doesn't matter if this distance is larger than the real blob spatial extension, in this case the function returns 0. \\ Ex: @code struct blobtype blob; blob.radius = 2; blob.order = 2; blob.alpha = 3.6; Matrix1D v=vectorR3(1,1,1); std::cout << "Blob line integral through (1,1,1) = " << blob_proj(v.mod(),blob) << std::endl; @endcode */ #define blob_proj(r, blob) kaiser_proj(r, blob.radius, blob.alpha, blob.order) /** Function actually computing the blob projection. */ RFLOAT kaiser_proj(RFLOAT r, RFLOAT a, RFLOAT alpha, int m); /** Fourier transform of a blob. This function returns the value of the Fourier transform of the blob at a given frequency (w). This frequency must be normalized by the sampling rate. For instance, for computing the Fourier Transform of a blob at 1/Ts (Ts in Amstrongs) you must provide the frequency Tm/Ts, where Tm is the sampling rate. The Fourier Transform can be computed only for blobs with m=2 or m=0. */ #define blob_Fourier_val(w, blob) \ kaiser_Fourier_value(w, blob.radius, blob.alpha, blob.order) /** Function actually computing the blob Fourier transform. */ RFLOAT kaiser_Fourier_value(RFLOAT w, RFLOAT a, RFLOAT alpha, int m); /** Formula for a volume integral of a blob (n is the blob dimension) */ #define blob_mass(blob) \ basvolume(blob.radius, blob.alpha, blob.order,3) /** Function actually computing the blob integral */ RFLOAT basvolume(RFLOAT a, RFLOAT alpha, int m, int n); /** Limit (z->0) of (1/z)^n I_n(z) (needed by basvolume)*/ RFLOAT in_zeroarg(int n); /** Limit (z->0) of (1/z)^(n+1/2) I_(n+1/2) (z) (needed by basvolume)*/ RFLOAT inph_zeroarg(int n); /** Bessel function I_(n+1/2) (x), n = 0, 1, 2, ... */ RFLOAT i_nph(int n, RFLOAT x); /** Bessel function I_n (x), n = 0, 1, 2, ... Use ONLY for small values of n */ RFLOAT i_n(int n, RFLOAT x); /** Blob pole. This is the normalized frequency at which the blob goes to 0. */ RFLOAT blob_freq_zero(struct blobtype b); /** Attenuation of a blob. The Fourier transform of the blob at w is the Fourier transform at w=0 multiplied by the attenuation. This is the value returned. Remind that the frequency must be normalized by the sampling rate. Ie, Tm*w(cont) */ RFLOAT blob_att(RFLOAT w, struct blobtype b); /** Number of operations for a blob. This is a number proportional to the number of operations that ART would need to make a reconstruction with this blob. */ RFLOAT blob_ops(RFLOAT w, struct blobtype b); /** 1D gaussian value * * This function returns the value of a univariate gaussian function at the * point x. */ RFLOAT gaussian1D(RFLOAT x, RFLOAT sigma, RFLOAT mu = 0); /** 1D t-student value * * This function returns the value of a univariate t-student function at the * point x, and with df degrees of freedom */ RFLOAT tstudent1D(RFLOAT x, RFLOAT df, RFLOAT sigma, RFLOAT mu = 0); /** Inverse Cumulative distribution function for a Gaussian * * This function returns the z of a N(0,1) such that the probability below z is p * * The function employs an fast approximation to z which is valid up to 1e-4. * See http://www.johndcook.com/normal_cdf_inverse.html */ RFLOAT icdf_gauss(RFLOAT p); /** Cumulative distribution function for a Gaussian * * This function returns the value of the CDF of a univariate gaussian function at the * point x. */ RFLOAT cdf_gauss(RFLOAT x); /** Cumulative distribution function for a t-distribution * * This function returns the value of the CDF of a univariate t-distribution * with k degrees of freedom at the point t. * Adapted by Sjors from: http://www.alglib.net/specialfunctions/distributions/student.php */ RFLOAT cdf_tstudent(int k, RFLOAT t); /** Cumulative distribution function for a Snedecor's F-distribution. * * This function returns the value of the CDF of a univariate Snedecor's * F-distribution * with d1, d2 degrees of freedom at the point x. */ RFLOAT cdf_FSnedecor(int d1, int d2, RFLOAT x); /** Inverse Cumulative distribution function for a Snedecor's F-distribution. * * This function returns the value of the ICDF of a univariate Snedecor's * F-distribution * with d1, d2 degrees of freedom with probability p, i.e., it returns * x such that CDF(d1,d2,x)=p */ RFLOAT icdf_FSnedecor(int d1, int d2, RFLOAT p); /** 2D gaussian value * * This function returns the value of a multivariate (2D) gaussian function at * the point (x,y) when the X axis of the gaussian is rotated ang * (counter-clockwise) radians (the angle is positive when measured from the * universal X to the gaussian X). X and Y are supposed to be independent. */ RFLOAT gaussian2D(RFLOAT x, RFLOAT y, RFLOAT sigmaX, RFLOAT sigmaY, RFLOAT ang, RFLOAT muX = 0, RFLOAT muY = 0); //@} /** @name Random functions * * These functions allow you to work in an easier way with the random functions * of the Numerical Recipes. Only an uniform and a gaussian random number * generators have been implemented. In fact only a uniform generator exists and * the gaussian one is based on a call to it. For this reason, if you initialize * the gaussian random generator, you are also initialising the uniform one. * * Here goes an example for uniform random numbers to show how to use this set * of functions. * * @code * // Initialise according to the clock * randomize_random_generator(); * * // Show 10 random numbers between -1 and 1 * for (int i=0; i<10; i++) * std::cout << rnd_unif(-1,1) << std::endl; * @endcode */ //@{ /** Reset uniform random generator to a known point * * If you initialize the random generator with this function each time, then the * same random sequence will be generated * * @code * init_rnd_unif(); * init_rnd_unif(17891) * @endcode */ void init_random_generator(int seed = -1); /** Reset random generator according to the clock. * * This time the initialisation itself assures a random sequence different each * time the program is run. Be careful not to run the program twice within the * same second as the initialisation will be the same for both runs. */ void randomize_random_generator(); /** Produce a uniform random number between a and b * * @code * std::cout << "This random number should be between 0 and 10: " << rnd_unif(0,10) * << std::endl; * @endcode */ float rnd_unif(float a = 0., float b = 1.); /** Produce a gaussian random number with mean a and standard deviation b * * @code * std::cout << "This random number should follow N(1,4): " << rnd_gaus(1,2) * << std::endl; * @endcode */ float rnd_gaus(float mu = 0., float sigma = 1.); /** Produce a gaussian random number with mean mu and standard deviation sigma and nu degrees of freedom * * @code * std::cout << "This random number should follow t(1,4) with 3 d.o.f.: " << rnd_gaus(3,1,2) * << std::endl; * @endcode */ float rnd_student_t(RFLOAT nu, float mu = 0., float sigma = 1.); /** Gaussian area from -x0 to x0 * * By default the gaussian mean is 0 and the gaussian standard deviation is 1. * x0 must be positive */ float gaus_within_x0(float x0, float mean = 0, float stddev = 1); /** Gaussian area outisde -x0 to x0 * * By default the gaussian mean is 0 and the gaussian standard deviation is 1. * x0 must be positive */ float gaus_outside_x0(float x0, float mean = 0, float stddev = 1); /** Gaussian area from -inf to x0 * * By default the gaussian mean is 0 and the gaussian standard deviation is 1. * There is no restriction over the sign of x0 */ float gaus_up_to_x0(float x0, float mean = 0, float stddev = 1); /** Gaussian area from x0 to inf * * By default the gaussian mean is 0 and the gaussian standard deviation is 1. * There is no restriction over the sign of x0 */ float gaus_from_x0(float x0, float mean = 0, float stddev = 1); /** t0 for a given two-sided probability * * This function returns t0 such that the student probability outside t0 is * equal to p */ float student_outside_probb(float p, float degrees_of_freedom); /** student area from -t0 to t0 * * By default the student mean is 0 and the student standard deviation is 1. * t0 must be positive */ float student_within_t0(float t0, float degrees_of_freedom); /** student area outisde -t0 to t0 * * By default the student mean is 0 and the student standard deviation is 1. * t0 must be positive */ float student_outside_t0(float t0, float degrees_of_freedom); /** student area from -inf to t0 * * By default the student mean is 0 and the student standard deviation is 1. * There is no restriction over the sign of t0 */ float student_up_to_t0(float t0, float degrees_of_freedom); /** student area from t0 to inf * * By default the student mean is 0 and the student standard deviation is 1. * There is no restriction over the sign of t0 */ float student_from_t0(float t0, float degrees_of_freedom); /** chi2 area from -inf to t0 * * By default the chi2 mean is 0 and the chi2 standard deviation is 1. * There is no restriction over the sign of t0 */ float chi2_up_to_t0(float t0, float degrees_of_freedom); /** chi2 area from t0 to inf * * By default the chi2 mean is 0 and the chi2 standard deviation is 1. * There is no restriction over the sign of t0 */ float chi2_from_t0(float t0, float degrees_of_freedom); /** Produce a log uniform random number between a and b * * Watch out that the following inequation must hold 0 #else #include #endif #endif relion-3.1.3/src/gpu_utils/000077500000000000000000000000001411340063500156055ustar00rootroot00000000000000relion-3.1.3/src/gpu_utils/cub/000077500000000000000000000000001411340063500163565ustar00rootroot00000000000000relion-3.1.3/src/gpu_utils/cub/agent/000077500000000000000000000000001411340063500174545ustar00rootroot00000000000000relion-3.1.3/src/gpu_utils/cub/agent/agent_histogram.cuh000066400000000000000000001010631411340063500233310ustar00rootroot00000000000000/****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * cub::AgentHistogram implements a stateful abstraction of CUDA thread blocks for participating in device-wide histogram . */ #pragma once #include #include "../util_type.cuh" #include "../block/block_load.cuh" #include "../grid/grid_queue.cuh" #include "../iterator/cache_modified_input_iterator.cuh" #include "../util_namespace.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /****************************************************************************** * Tuning policy ******************************************************************************/ /** * */ enum BlockHistogramMemoryPreference { GMEM, SMEM, BLEND }; /** * Parameterizable tuning policy type for AgentHistogram */ template < int _BLOCK_THREADS, ///< Threads per thread block int _PIXELS_PER_THREAD, ///< Pixels per thread (per tile of input) BlockLoadAlgorithm _LOAD_ALGORITHM, ///< The BlockLoad algorithm to use CacheLoadModifier _LOAD_MODIFIER, ///< Cache load modifier for reading input elements bool _RLE_COMPRESS, ///< Whether to perform localized RLE to compress samples before histogramming BlockHistogramMemoryPreference _MEM_PREFERENCE, ///< Whether to prefer privatized shared-memory bins (versus privatized global-memory bins) bool _WORK_STEALING> ///< Whether to dequeue tiles from a global work queue struct AgentHistogramPolicy { enum { BLOCK_THREADS = _BLOCK_THREADS, ///< Threads per thread block PIXELS_PER_THREAD = _PIXELS_PER_THREAD, ///< Pixels per thread (per tile of input) IS_RLE_COMPRESS = _RLE_COMPRESS, ///< Whether to perform localized RLE to compress samples before histogramming MEM_PREFERENCE = _MEM_PREFERENCE, ///< Whether to prefer privatized shared-memory bins (versus privatized global-memory bins) IS_WORK_STEALING = _WORK_STEALING, ///< Whether to dequeue tiles from a global work queue }; static const BlockLoadAlgorithm LOAD_ALGORITHM = _LOAD_ALGORITHM; ///< The BlockLoad algorithm to use static const CacheLoadModifier LOAD_MODIFIER = _LOAD_MODIFIER; ///< Cache load modifier for reading input elements }; /****************************************************************************** * Thread block abstractions ******************************************************************************/ /** * \brief AgentHistogram implements a stateful abstraction of CUDA thread blocks for participating in device-wide histogram . */ template < typename AgentHistogramPolicyT, ///< Parameterized AgentHistogramPolicy tuning policy type int PRIVATIZED_SMEM_BINS, ///< Number of privatized shared-memory histogram bins of any channel. Zero indicates privatized counters to be maintained in device-accessible memory. int NUM_CHANNELS, ///< Number of channels interleaved in the input data. Supports up to four channels. int NUM_ACTIVE_CHANNELS, ///< Number of channels actively being histogrammed typename SampleIteratorT, ///< Random-access input iterator type for reading samples typename CounterT, ///< Integer type for counting sample occurrences per histogram bin typename PrivatizedDecodeOpT, ///< The transform operator type for determining privatized counter indices from samples, one for each channel typename OutputDecodeOpT, ///< The transform operator type for determining output bin-ids from privatized counter indices, one for each channel typename OffsetT, ///< Signed integer type for global offsets int PTX_ARCH = CUB_PTX_ARCH> ///< PTX compute capability struct AgentHistogram { //--------------------------------------------------------------------- // Types and constants //--------------------------------------------------------------------- /// The sample type of the input iterator typedef typename std::iterator_traits::value_type SampleT; /// The pixel type of SampleT typedef typename CubVector::Type PixelT; /// The quad type of SampleT typedef typename CubVector::Type QuadT; /// Constants enum { BLOCK_THREADS = AgentHistogramPolicyT::BLOCK_THREADS, PIXELS_PER_THREAD = AgentHistogramPolicyT::PIXELS_PER_THREAD, SAMPLES_PER_THREAD = PIXELS_PER_THREAD * NUM_CHANNELS, QUADS_PER_THREAD = SAMPLES_PER_THREAD / 4, TILE_PIXELS = PIXELS_PER_THREAD * BLOCK_THREADS, TILE_SAMPLES = SAMPLES_PER_THREAD * BLOCK_THREADS, IS_RLE_COMPRESS = AgentHistogramPolicyT::IS_RLE_COMPRESS, MEM_PREFERENCE = (PRIVATIZED_SMEM_BINS > 0) ? AgentHistogramPolicyT::MEM_PREFERENCE : GMEM, IS_WORK_STEALING = AgentHistogramPolicyT::IS_WORK_STEALING, }; /// Cache load modifier for reading input elements static const CacheLoadModifier LOAD_MODIFIER = AgentHistogramPolicyT::LOAD_MODIFIER; /// Input iterator wrapper type (for applying cache modifier) typedef typename If::VALUE, CacheModifiedInputIterator, // Wrap the native input pointer with CacheModifiedInputIterator SampleIteratorT>::Type // Directly use the supplied input iterator type WrappedSampleIteratorT; /// Pixel input iterator type (for applying cache modifier) typedef CacheModifiedInputIterator WrappedPixelIteratorT; /// Qaud input iterator type (for applying cache modifier) typedef CacheModifiedInputIterator WrappedQuadIteratorT; /// Parameterized BlockLoad type for samples typedef BlockLoad< SampleT, BLOCK_THREADS, SAMPLES_PER_THREAD, AgentHistogramPolicyT::LOAD_ALGORITHM> BlockLoadSampleT; /// Parameterized BlockLoad type for pixels typedef BlockLoad< PixelT, BLOCK_THREADS, PIXELS_PER_THREAD, AgentHistogramPolicyT::LOAD_ALGORITHM> BlockLoadPixelT; /// Parameterized BlockLoad type for quads typedef BlockLoad< QuadT, BLOCK_THREADS, QUADS_PER_THREAD, AgentHistogramPolicyT::LOAD_ALGORITHM> BlockLoadQuadT; /// Shared memory type required by this thread block struct _TempStorage { CounterT histograms[NUM_ACTIVE_CHANNELS][PRIVATIZED_SMEM_BINS + 1]; // Smem needed for block-privatized smem histogram (with 1 word of padding) int tile_idx; // Aliasable storage layout union Aliasable { typename BlockLoadSampleT::TempStorage sample_load; // Smem needed for loading a tile of samples typename BlockLoadPixelT::TempStorage pixel_load; // Smem needed for loading a tile of pixels typename BlockLoadQuadT::TempStorage quad_load; // Smem needed for loading a tile of quads } aliasable; }; /// Temporary storage type (unionable) struct TempStorage : Uninitialized<_TempStorage> {}; //--------------------------------------------------------------------- // Per-thread fields //--------------------------------------------------------------------- /// Reference to temp_storage _TempStorage &temp_storage; /// Sample input iterator (with cache modifier applied, if possible) WrappedSampleIteratorT d_wrapped_samples; /// Native pointer for input samples (possibly NULL if unavailable) SampleT* d_native_samples; /// The number of output bins for each channel int (&num_output_bins)[NUM_ACTIVE_CHANNELS]; /// The number of privatized bins for each channel int (&num_privatized_bins)[NUM_ACTIVE_CHANNELS]; /// Reference to gmem privatized histograms for each channel CounterT* d_privatized_histograms[NUM_ACTIVE_CHANNELS]; /// Reference to final output histograms (gmem) CounterT* (&d_output_histograms)[NUM_ACTIVE_CHANNELS]; /// The transform operator for determining output bin-ids from privatized counter indices, one for each channel OutputDecodeOpT (&output_decode_op)[NUM_ACTIVE_CHANNELS]; /// The transform operator for determining privatized counter indices from samples, one for each channel PrivatizedDecodeOpT (&privatized_decode_op)[NUM_ACTIVE_CHANNELS]; /// Whether to prefer privatized smem counters vs privatized global counters bool prefer_smem; //--------------------------------------------------------------------- // Initialize privatized bin counters //--------------------------------------------------------------------- // Initialize privatized bin counters __device__ __forceinline__ void InitBinCounters(CounterT* privatized_histograms[NUM_ACTIVE_CHANNELS]) { // Initialize histogram bin counts to zeros #pragma unroll for (int CHANNEL = 0; CHANNEL < NUM_ACTIVE_CHANNELS; ++CHANNEL) { for (int privatized_bin = threadIdx.x; privatized_bin < num_privatized_bins[CHANNEL]; privatized_bin += BLOCK_THREADS) { privatized_histograms[CHANNEL][privatized_bin] = 0; } } // Barrier to make sure all threads are done updating counters CTA_SYNC(); } // Initialize privatized bin counters. Specialized for privatized shared-memory counters __device__ __forceinline__ void InitSmemBinCounters() { CounterT* privatized_histograms[NUM_ACTIVE_CHANNELS]; for (int CHANNEL = 0; CHANNEL < NUM_ACTIVE_CHANNELS; ++CHANNEL) privatized_histograms[CHANNEL] = temp_storage.histograms[CHANNEL]; InitBinCounters(privatized_histograms); } // Initialize privatized bin counters. Specialized for privatized global-memory counters __device__ __forceinline__ void InitGmemBinCounters() { InitBinCounters(d_privatized_histograms); } //--------------------------------------------------------------------- // Update final output histograms //--------------------------------------------------------------------- // Update final output histograms from privatized histograms __device__ __forceinline__ void StoreOutput(CounterT* privatized_histograms[NUM_ACTIVE_CHANNELS]) { // Barrier to make sure all threads are done updating counters CTA_SYNC(); // Apply privatized bin counts to output bin counts #pragma unroll for (int CHANNEL = 0; CHANNEL < NUM_ACTIVE_CHANNELS; ++CHANNEL) { int channel_bins = num_privatized_bins[CHANNEL]; for (int privatized_bin = threadIdx.x; privatized_bin < channel_bins; privatized_bin += BLOCK_THREADS) { int output_bin = -1; CounterT count = privatized_histograms[CHANNEL][privatized_bin]; bool is_valid = count > 0; output_decode_op[CHANNEL].template BinSelect((SampleT) privatized_bin, output_bin, is_valid); if (output_bin >= 0) { atomicAdd(&d_output_histograms[CHANNEL][output_bin], count); } } } } // Update final output histograms from privatized histograms. Specialized for privatized shared-memory counters __device__ __forceinline__ void StoreSmemOutput() { CounterT* privatized_histograms[NUM_ACTIVE_CHANNELS]; for (int CHANNEL = 0; CHANNEL < NUM_ACTIVE_CHANNELS; ++CHANNEL) privatized_histograms[CHANNEL] = temp_storage.histograms[CHANNEL]; StoreOutput(privatized_histograms); } // Update final output histograms from privatized histograms. Specialized for privatized global-memory counters __device__ __forceinline__ void StoreGmemOutput() { StoreOutput(d_privatized_histograms); } //--------------------------------------------------------------------- // Tile accumulation //--------------------------------------------------------------------- // Accumulate pixels. Specialized for RLE compression. __device__ __forceinline__ void AccumulatePixels( SampleT samples[PIXELS_PER_THREAD][NUM_CHANNELS], bool is_valid[PIXELS_PER_THREAD], CounterT* privatized_histograms[NUM_ACTIVE_CHANNELS], Int2Type is_rle_compress) { #pragma unroll for (int CHANNEL = 0; CHANNEL < NUM_ACTIVE_CHANNELS; ++CHANNEL) { // Bin pixels int bins[PIXELS_PER_THREAD]; #pragma unroll for (int PIXEL = 0; PIXEL < PIXELS_PER_THREAD; ++PIXEL) { bins[PIXEL] = -1; privatized_decode_op[CHANNEL].template BinSelect(samples[PIXEL][CHANNEL], bins[PIXEL], is_valid[PIXEL]); } CounterT accumulator = 1; #pragma unroll for (int PIXEL = 0; PIXEL < PIXELS_PER_THREAD - 1; ++PIXEL) { if (bins[PIXEL] != bins[PIXEL + 1]) { if (bins[PIXEL] >= 0) atomicAdd(privatized_histograms[CHANNEL] + bins[PIXEL], accumulator); accumulator = 0; } accumulator++; } // Last pixel if (bins[PIXELS_PER_THREAD - 1] >= 0) atomicAdd(privatized_histograms[CHANNEL] + bins[PIXELS_PER_THREAD - 1], accumulator); } } // Accumulate pixels. Specialized for individual accumulation of each pixel. __device__ __forceinline__ void AccumulatePixels( SampleT samples[PIXELS_PER_THREAD][NUM_CHANNELS], bool is_valid[PIXELS_PER_THREAD], CounterT* privatized_histograms[NUM_ACTIVE_CHANNELS], Int2Type is_rle_compress) { #pragma unroll for (int PIXEL = 0; PIXEL < PIXELS_PER_THREAD; ++PIXEL) { #pragma unroll for (int CHANNEL = 0; CHANNEL < NUM_ACTIVE_CHANNELS; ++CHANNEL) { int bin = -1; privatized_decode_op[CHANNEL].template BinSelect(samples[PIXEL][CHANNEL], bin, is_valid[PIXEL]); if (bin >= 0) atomicAdd(privatized_histograms[CHANNEL] + bin, 1); } } } /** * Accumulate pixel, specialized for smem privatized histogram */ __device__ __forceinline__ void AccumulateSmemPixels( SampleT samples[PIXELS_PER_THREAD][NUM_CHANNELS], bool is_valid[PIXELS_PER_THREAD]) { CounterT* privatized_histograms[NUM_ACTIVE_CHANNELS]; for (int CHANNEL = 0; CHANNEL < NUM_ACTIVE_CHANNELS; ++CHANNEL) privatized_histograms[CHANNEL] = temp_storage.histograms[CHANNEL]; AccumulatePixels(samples, is_valid, privatized_histograms, Int2Type()); } /** * Accumulate pixel, specialized for gmem privatized histogram */ __device__ __forceinline__ void AccumulateGmemPixels( SampleT samples[PIXELS_PER_THREAD][NUM_CHANNELS], bool is_valid[PIXELS_PER_THREAD]) { AccumulatePixels(samples, is_valid, d_privatized_histograms, Int2Type()); } //--------------------------------------------------------------------- // Tile loading //--------------------------------------------------------------------- // Load full, aligned tile using pixel iterator (multi-channel) template __device__ __forceinline__ void LoadFullAlignedTile( OffsetT block_offset, int valid_samples, SampleT (&samples)[PIXELS_PER_THREAD][NUM_CHANNELS], Int2Type<_NUM_ACTIVE_CHANNELS> num_active_channels) { typedef PixelT AliasedPixels[PIXELS_PER_THREAD]; WrappedPixelIteratorT d_wrapped_pixels((PixelT*) (d_native_samples + block_offset)); // Load using a wrapped pixel iterator BlockLoadPixelT(temp_storage.aliasable.pixel_load).Load( d_wrapped_pixels, reinterpret_cast(samples)); } // Load full, aligned tile using quad iterator (single-channel) __device__ __forceinline__ void LoadFullAlignedTile( OffsetT block_offset, int valid_samples, SampleT (&samples)[PIXELS_PER_THREAD][NUM_CHANNELS], Int2Type<1> num_active_channels) { typedef QuadT AliasedQuads[QUADS_PER_THREAD]; WrappedQuadIteratorT d_wrapped_quads((QuadT*) (d_native_samples + block_offset)); // Load using a wrapped quad iterator BlockLoadQuadT(temp_storage.aliasable.quad_load).Load( d_wrapped_quads, reinterpret_cast(samples)); } // Load full, aligned tile __device__ __forceinline__ void LoadTile( OffsetT block_offset, int valid_samples, SampleT (&samples)[PIXELS_PER_THREAD][NUM_CHANNELS], Int2Type is_full_tile, Int2Type is_aligned) { LoadFullAlignedTile(block_offset, valid_samples, samples, Int2Type()); } // Load full, mis-aligned tile using sample iterator __device__ __forceinline__ void LoadTile( OffsetT block_offset, int valid_samples, SampleT (&samples)[PIXELS_PER_THREAD][NUM_CHANNELS], Int2Type is_full_tile, Int2Type is_aligned) { typedef SampleT AliasedSamples[SAMPLES_PER_THREAD]; // Load using sample iterator BlockLoadSampleT(temp_storage.aliasable.sample_load).Load( d_wrapped_samples + block_offset, reinterpret_cast(samples)); } // Load partially-full, aligned tile using the pixel iterator __device__ __forceinline__ void LoadTile( OffsetT block_offset, int valid_samples, SampleT (&samples)[PIXELS_PER_THREAD][NUM_CHANNELS], Int2Type is_full_tile, Int2Type is_aligned) { typedef PixelT AliasedPixels[PIXELS_PER_THREAD]; WrappedPixelIteratorT d_wrapped_pixels((PixelT*) (d_native_samples + block_offset)); int valid_pixels = valid_samples / NUM_CHANNELS; // Load using a wrapped pixel iterator BlockLoadPixelT(temp_storage.aliasable.pixel_load).Load( d_wrapped_pixels, reinterpret_cast(samples), valid_pixels); } // Load partially-full, mis-aligned tile using sample iterator __device__ __forceinline__ void LoadTile( OffsetT block_offset, int valid_samples, SampleT (&samples)[PIXELS_PER_THREAD][NUM_CHANNELS], Int2Type is_full_tile, Int2Type is_aligned) { typedef SampleT AliasedSamples[SAMPLES_PER_THREAD]; BlockLoadSampleT(temp_storage.aliasable.sample_load).Load( d_wrapped_samples + block_offset, reinterpret_cast(samples), valid_samples); } //--------------------------------------------------------------------- // Tile processing //--------------------------------------------------------------------- // Consume a tile of data samples template < bool IS_ALIGNED, // Whether the tile offset is aligned (quad-aligned for single-channel, pixel-aligned for multi-channel) bool IS_FULL_TILE> // Whether the tile is full __device__ __forceinline__ void ConsumeTile(OffsetT block_offset, int valid_samples) { SampleT samples[PIXELS_PER_THREAD][NUM_CHANNELS]; bool is_valid[PIXELS_PER_THREAD]; // Load tile LoadTile( block_offset, valid_samples, samples, Int2Type(), Int2Type()); // Set valid flags #pragma unroll for (int PIXEL = 0; PIXEL < PIXELS_PER_THREAD; ++PIXEL) is_valid[PIXEL] = IS_FULL_TILE || (((threadIdx.x * PIXELS_PER_THREAD + PIXEL) * NUM_CHANNELS) < valid_samples); // Accumulate samples #if CUB_PTX_ARCH >= 120 if (prefer_smem) AccumulateSmemPixels(samples, is_valid); else AccumulateGmemPixels(samples, is_valid); #else AccumulateGmemPixels(samples, is_valid); #endif } // Consume row tiles. Specialized for work-stealing from queue template __device__ __forceinline__ void ConsumeTiles( OffsetT num_row_pixels, ///< The number of multi-channel pixels per row in the region of interest OffsetT num_rows, ///< The number of rows in the region of interest OffsetT row_stride_samples, ///< The number of samples between starts of consecutive rows in the region of interest int tiles_per_row, ///< Number of image tiles per row GridQueue tile_queue, Int2Type is_work_stealing) { int num_tiles = num_rows * tiles_per_row; int tile_idx = (blockIdx.y * gridDim.x) + blockIdx.x; OffsetT num_even_share_tiles = gridDim.x * gridDim.y; while (tile_idx < num_tiles) { int row = tile_idx / tiles_per_row; int col = tile_idx - (row * tiles_per_row); OffsetT row_offset = row * row_stride_samples; OffsetT col_offset = (col * TILE_SAMPLES); OffsetT tile_offset = row_offset + col_offset; if (col == tiles_per_row - 1) { // Consume a partially-full tile at the end of the row OffsetT num_remaining = (num_row_pixels * NUM_CHANNELS) - col_offset; ConsumeTile(tile_offset, num_remaining); } else { // Consume full tile ConsumeTile(tile_offset, TILE_SAMPLES); } CTA_SYNC(); // Get next tile if (threadIdx.x == 0) temp_storage.tile_idx = tile_queue.Drain(1) + num_even_share_tiles; CTA_SYNC(); tile_idx = temp_storage.tile_idx; } } // Consume row tiles. Specialized for even-share (striped across thread blocks) template __device__ __forceinline__ void ConsumeTiles( OffsetT num_row_pixels, ///< The number of multi-channel pixels per row in the region of interest OffsetT num_rows, ///< The number of rows in the region of interest OffsetT row_stride_samples, ///< The number of samples between starts of consecutive rows in the region of interest int tiles_per_row, ///< Number of image tiles per row GridQueue tile_queue, Int2Type is_work_stealing) { for (int row = blockIdx.y; row < num_rows; row += gridDim.y) { OffsetT row_begin = row * row_stride_samples; OffsetT row_end = row_begin + (num_row_pixels * NUM_CHANNELS); OffsetT tile_offset = row_begin + (blockIdx.x * TILE_SAMPLES); while (tile_offset < row_end) { OffsetT num_remaining = row_end - tile_offset; if (num_remaining < TILE_SAMPLES) { // Consume partial tile ConsumeTile(tile_offset, num_remaining); break; } // Consume full tile ConsumeTile(tile_offset, TILE_SAMPLES); tile_offset += gridDim.x * TILE_SAMPLES; } } } //--------------------------------------------------------------------- // Parameter extraction //--------------------------------------------------------------------- // Return a native pixel pointer (specialized for CacheModifiedInputIterator types) template < CacheLoadModifier _MODIFIER, typename _ValueT, typename _OffsetT> __device__ __forceinline__ SampleT* NativePointer(CacheModifiedInputIterator<_MODIFIER, _ValueT, _OffsetT> itr) { return itr.ptr; } // Return a native pixel pointer (specialized for other types) template __device__ __forceinline__ SampleT* NativePointer(IteratorT itr) { return NULL; } //--------------------------------------------------------------------- // Interface //--------------------------------------------------------------------- /** * Constructor */ __device__ __forceinline__ AgentHistogram( TempStorage &temp_storage, ///< Reference to temp_storage SampleIteratorT d_samples, ///< Input data to reduce int (&num_output_bins)[NUM_ACTIVE_CHANNELS], ///< The number bins per final output histogram int (&num_privatized_bins)[NUM_ACTIVE_CHANNELS], ///< The number bins per privatized histogram CounterT* (&d_output_histograms)[NUM_ACTIVE_CHANNELS], ///< Reference to final output histograms CounterT* (&d_privatized_histograms)[NUM_ACTIVE_CHANNELS], ///< Reference to privatized histograms OutputDecodeOpT (&output_decode_op)[NUM_ACTIVE_CHANNELS], ///< The transform operator for determining output bin-ids from privatized counter indices, one for each channel PrivatizedDecodeOpT (&privatized_decode_op)[NUM_ACTIVE_CHANNELS]) ///< The transform operator for determining privatized counter indices from samples, one for each channel : temp_storage(temp_storage.Alias()), d_wrapped_samples(d_samples), num_output_bins(num_output_bins), num_privatized_bins(num_privatized_bins), d_output_histograms(d_output_histograms), privatized_decode_op(privatized_decode_op), output_decode_op(output_decode_op), d_native_samples(NativePointer(d_wrapped_samples)), prefer_smem((MEM_PREFERENCE == SMEM) ? true : // prefer smem privatized histograms (MEM_PREFERENCE == GMEM) ? false : // prefer gmem privatized histograms blockIdx.x & 1) // prefer blended privatized histograms { int blockId = (blockIdx.y * gridDim.x) + blockIdx.x; // Initialize the locations of this block's privatized histograms for (int CHANNEL = 0; CHANNEL < NUM_ACTIVE_CHANNELS; ++CHANNEL) this->d_privatized_histograms[CHANNEL] = d_privatized_histograms[CHANNEL] + (blockId * num_privatized_bins[CHANNEL]); } /** * Consume image */ __device__ __forceinline__ void ConsumeTiles( OffsetT num_row_pixels, ///< The number of multi-channel pixels per row in the region of interest OffsetT num_rows, ///< The number of rows in the region of interest OffsetT row_stride_samples, ///< The number of samples between starts of consecutive rows in the region of interest int tiles_per_row, ///< Number of image tiles per row GridQueue tile_queue) ///< Queue descriptor for assigning tiles of work to thread blocks { // Check whether all row starting offsets are quad-aligned (in single-channel) or pixel-aligned (in multi-channel) int quad_mask = AlignBytes::ALIGN_BYTES - 1; int pixel_mask = AlignBytes::ALIGN_BYTES - 1; size_t row_bytes = sizeof(SampleT) * row_stride_samples; bool quad_aligned_rows = (NUM_CHANNELS == 1) && (SAMPLES_PER_THREAD % 4 == 0) && // Single channel ((size_t(d_native_samples) & quad_mask) == 0) && // ptr is quad-aligned ((num_rows == 1) || ((row_bytes & quad_mask) == 0)); // number of row-samples is a multiple of the alignment of the quad bool pixel_aligned_rows = (NUM_CHANNELS > 1) && // Multi channel ((size_t(d_native_samples) & pixel_mask) == 0) && // ptr is pixel-aligned ((row_bytes & pixel_mask) == 0); // number of row-samples is a multiple of the alignment of the pixel // Whether rows are aligned and can be vectorized if ((d_native_samples != NULL) && (quad_aligned_rows || pixel_aligned_rows)) ConsumeTiles(num_row_pixels, num_rows, row_stride_samples, tiles_per_row, tile_queue, Int2Type()); else ConsumeTiles(num_row_pixels, num_rows, row_stride_samples, tiles_per_row, tile_queue, Int2Type()); } /** * Initialize privatized bin counters. Specialized for privatized shared-memory counters */ __device__ __forceinline__ void InitBinCounters() { if (prefer_smem) InitSmemBinCounters(); else InitGmemBinCounters(); } /** * Store privatized histogram to device-accessible memory. Specialized for privatized shared-memory counters */ __device__ __forceinline__ void StoreOutput() { if (prefer_smem) StoreSmemOutput(); else StoreGmemOutput(); } }; } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/gpu_utils/cub/agent/agent_radix_sort_downsweep.cuh000066400000000000000000000654201411340063500256130ustar00rootroot00000000000000/****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * AgentRadixSortDownsweep implements a stateful abstraction of CUDA thread blocks for participating in device-wide radix sort downsweep . */ #pragma once #include #include "../thread/thread_load.cuh" #include "../block/block_load.cuh" #include "../block/block_store.cuh" #include "../block/block_radix_rank.cuh" #include "../block/block_exchange.cuh" #include "../util_type.cuh" #include "../iterator/cache_modified_input_iterator.cuh" #include "../util_namespace.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /****************************************************************************** * Tuning policy types ******************************************************************************/ /** * Radix ranking algorithm */ enum RadixRankAlgorithm { RADIX_RANK_BASIC, RADIX_RANK_MEMOIZE, RADIX_RANK_MATCH }; /** * Parameterizable tuning policy type for AgentRadixSortDownsweep */ template < int _BLOCK_THREADS, ///< Threads per thread block int _ITEMS_PER_THREAD, ///< Items per thread (per tile of input) BlockLoadAlgorithm _LOAD_ALGORITHM, ///< The BlockLoad algorithm to use CacheLoadModifier _LOAD_MODIFIER, ///< Cache load modifier for reading keys (and values) RadixRankAlgorithm _RANK_ALGORITHM, ///< The radix ranking algorithm to use BlockScanAlgorithm _SCAN_ALGORITHM, ///< The block scan algorithm to use int _RADIX_BITS> ///< The number of radix bits, i.e., log2(bins) struct AgentRadixSortDownsweepPolicy { enum { BLOCK_THREADS = _BLOCK_THREADS, ///< Threads per thread block ITEMS_PER_THREAD = _ITEMS_PER_THREAD, ///< Items per thread (per tile of input) RADIX_BITS = _RADIX_BITS, ///< The number of radix bits, i.e., log2(bins) }; static const BlockLoadAlgorithm LOAD_ALGORITHM = _LOAD_ALGORITHM; ///< The BlockLoad algorithm to use static const CacheLoadModifier LOAD_MODIFIER = _LOAD_MODIFIER; ///< Cache load modifier for reading keys (and values) static const RadixRankAlgorithm RANK_ALGORITHM = _RANK_ALGORITHM; ///< The radix ranking algorithm to use static const BlockScanAlgorithm SCAN_ALGORITHM = _SCAN_ALGORITHM; ///< The BlockScan algorithm to use }; /****************************************************************************** * Thread block abstractions ******************************************************************************/ /** * \brief AgentRadixSortDownsweep implements a stateful abstraction of CUDA thread blocks for participating in device-wide radix sort downsweep . */ template < typename AgentRadixSortDownsweepPolicy, ///< Parameterized AgentRadixSortDownsweepPolicy tuning policy type bool IS_DESCENDING, ///< Whether or not the sorted-order is high-to-low typename KeyT, ///< KeyT type typename ValueT, ///< ValueT type typename OffsetT> ///< Signed integer type for global offsets struct AgentRadixSortDownsweep { //--------------------------------------------------------------------- // Type definitions and constants //--------------------------------------------------------------------- // Appropriate unsigned-bits representation of KeyT typedef typename Traits::UnsignedBits UnsignedBits; static const UnsignedBits LOWEST_KEY = Traits::LOWEST_KEY; static const UnsignedBits MAX_KEY = Traits::MAX_KEY; static const BlockLoadAlgorithm LOAD_ALGORITHM = AgentRadixSortDownsweepPolicy::LOAD_ALGORITHM; static const CacheLoadModifier LOAD_MODIFIER = AgentRadixSortDownsweepPolicy::LOAD_MODIFIER; static const RadixRankAlgorithm RANK_ALGORITHM = AgentRadixSortDownsweepPolicy::RANK_ALGORITHM; static const BlockScanAlgorithm SCAN_ALGORITHM = AgentRadixSortDownsweepPolicy::SCAN_ALGORITHM; enum { BLOCK_THREADS = AgentRadixSortDownsweepPolicy::BLOCK_THREADS, ITEMS_PER_THREAD = AgentRadixSortDownsweepPolicy::ITEMS_PER_THREAD, RADIX_BITS = AgentRadixSortDownsweepPolicy::RADIX_BITS, TILE_ITEMS = BLOCK_THREADS * ITEMS_PER_THREAD, RADIX_DIGITS = 1 << RADIX_BITS, KEYS_ONLY = Equals::VALUE, }; // Input iterator wrapper type (for applying cache modifier)s typedef CacheModifiedInputIterator KeysItr; typedef CacheModifiedInputIterator ValuesItr; // Radix ranking type to use typedef typename If<(RANK_ALGORITHM == RADIX_RANK_BASIC), BlockRadixRank, typename If<(RANK_ALGORITHM == RADIX_RANK_MEMOIZE), BlockRadixRank, BlockRadixRankMatch >::Type >::Type BlockRadixRankT; enum { /// Number of bin-starting offsets tracked per thread BINS_TRACKED_PER_THREAD = BlockRadixRankT::BINS_TRACKED_PER_THREAD }; // BlockLoad type (keys) typedef BlockLoad< UnsignedBits, BLOCK_THREADS, ITEMS_PER_THREAD, LOAD_ALGORITHM> BlockLoadKeysT; // BlockLoad type (values) typedef BlockLoad< ValueT, BLOCK_THREADS, ITEMS_PER_THREAD, LOAD_ALGORITHM> BlockLoadValuesT; /** * Shared memory storage layout */ union __align__(16) _TempStorage { typename BlockLoadKeysT::TempStorage load_keys; typename BlockLoadValuesT::TempStorage load_values; typename BlockRadixRankT::TempStorage radix_rank; struct { UnsignedBits exchange_keys[TILE_ITEMS]; OffsetT relative_bin_offsets[RADIX_DIGITS]; }; ValueT exchange_values[TILE_ITEMS]; OffsetT exclusive_digit_prefix[RADIX_DIGITS]; }; /// Alias wrapper allowing storage to be unioned struct TempStorage : Uninitialized<_TempStorage> {}; //--------------------------------------------------------------------- // Thread fields //--------------------------------------------------------------------- // Shared storage for this CTA _TempStorage &temp_storage; // Input and output device pointers KeysItr d_keys_in; ValuesItr d_values_in; UnsignedBits *d_keys_out; ValueT *d_values_out; // The global scatter base offset for each digit (valid in the first RADIX_DIGITS threads) OffsetT bin_offset[BINS_TRACKED_PER_THREAD]; // The least-significant bit position of the current digit to extract int current_bit; // Number of bits in current digit int num_bits; // Whether to short-cirucit int short_circuit; //--------------------------------------------------------------------- // Utility methods //--------------------------------------------------------------------- /** * Scatter ranked keys through shared memory, then to device-accessible memory */ template __device__ __forceinline__ void ScatterKeys( UnsignedBits (&twiddled_keys)[ITEMS_PER_THREAD], OffsetT (&relative_bin_offsets)[ITEMS_PER_THREAD], int (&ranks)[ITEMS_PER_THREAD], OffsetT valid_items) { #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) { temp_storage.exchange_keys[ranks[ITEM]] = twiddled_keys[ITEM]; } CTA_SYNC(); #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) { UnsignedBits key = temp_storage.exchange_keys[threadIdx.x + (ITEM * BLOCK_THREADS)]; UnsignedBits digit = BFE(key, current_bit, num_bits); relative_bin_offsets[ITEM] = temp_storage.relative_bin_offsets[digit]; // Un-twiddle key = Traits::TwiddleOut(key); if (FULL_TILE || (static_cast(threadIdx.x + (ITEM * BLOCK_THREADS)) < valid_items)) { d_keys_out[relative_bin_offsets[ITEM] + threadIdx.x + (ITEM * BLOCK_THREADS)] = key; } } } /** * Scatter ranked values through shared memory, then to device-accessible memory */ template __device__ __forceinline__ void ScatterValues( ValueT (&values)[ITEMS_PER_THREAD], OffsetT (&relative_bin_offsets)[ITEMS_PER_THREAD], int (&ranks)[ITEMS_PER_THREAD], OffsetT valid_items) { CTA_SYNC(); #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) { temp_storage.exchange_values[ranks[ITEM]] = values[ITEM]; } CTA_SYNC(); #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) { ValueT value = temp_storage.exchange_values[threadIdx.x + (ITEM * BLOCK_THREADS)]; if (FULL_TILE || (static_cast(threadIdx.x + (ITEM * BLOCK_THREADS)) < valid_items)) { d_values_out[relative_bin_offsets[ITEM] + threadIdx.x + (ITEM * BLOCK_THREADS)] = value; } } } /** * Load a tile of keys (specialized for full tile, any ranking algorithm) */ template __device__ __forceinline__ void LoadKeys( UnsignedBits (&keys)[ITEMS_PER_THREAD], OffsetT block_offset, OffsetT valid_items, UnsignedBits oob_item, Int2Type is_full_tile, Int2Type<_RANK_ALGORITHM> rank_algorithm) { BlockLoadKeysT(temp_storage.load_keys).Load( d_keys_in + block_offset, keys); CTA_SYNC(); } /** * Load a tile of keys (specialized for partial tile, any ranking algorithm) */ template __device__ __forceinline__ void LoadKeys( UnsignedBits (&keys)[ITEMS_PER_THREAD], OffsetT block_offset, OffsetT valid_items, UnsignedBits oob_item, Int2Type is_full_tile, Int2Type<_RANK_ALGORITHM> rank_algorithm) { BlockLoadKeysT(temp_storage.load_keys).Load( d_keys_in + block_offset, keys, valid_items, oob_item); CTA_SYNC(); } /** * Load a tile of keys (specialized for full tile, match ranking algorithm) */ __device__ __forceinline__ void LoadKeys( UnsignedBits (&keys)[ITEMS_PER_THREAD], OffsetT block_offset, OffsetT valid_items, UnsignedBits oob_item, Int2Type is_full_tile, Int2Type rank_algorithm) { LoadDirectWarpStriped(threadIdx.x, d_keys_in + block_offset, keys); } /** * Load a tile of keys (specialized for partial tile, match ranking algorithm) */ __device__ __forceinline__ void LoadKeys( UnsignedBits (&keys)[ITEMS_PER_THREAD], OffsetT block_offset, OffsetT valid_items, UnsignedBits oob_item, Int2Type is_full_tile, Int2Type rank_algorithm) { LoadDirectWarpStriped(threadIdx.x, d_keys_in + block_offset, keys, valid_items, oob_item); } /** * Load a tile of values (specialized for full tile, any ranking algorithm) */ template __device__ __forceinline__ void LoadValues( ValueT (&values)[ITEMS_PER_THREAD], OffsetT block_offset, OffsetT valid_items, Int2Type is_full_tile, Int2Type<_RANK_ALGORITHM> rank_algorithm) { BlockLoadValuesT(temp_storage.load_values).Load( d_values_in + block_offset, values); CTA_SYNC(); } /** * Load a tile of values (specialized for partial tile, any ranking algorithm) */ template __device__ __forceinline__ void LoadValues( ValueT (&values)[ITEMS_PER_THREAD], OffsetT block_offset, OffsetT valid_items, Int2Type is_full_tile, Int2Type<_RANK_ALGORITHM> rank_algorithm) { BlockLoadValuesT(temp_storage.load_values).Load( d_values_in + block_offset, values, valid_items); CTA_SYNC(); } /** * Load a tile of items (specialized for full tile, match ranking algorithm) */ __device__ __forceinline__ void LoadValues( ValueT (&values)[ITEMS_PER_THREAD], OffsetT block_offset, volatile OffsetT valid_items, Int2Type is_full_tile, Int2Type rank_algorithm) { LoadDirectWarpStriped(threadIdx.x, d_values_in + block_offset, values); } /** * Load a tile of items (specialized for partial tile, match ranking algorithm) */ __device__ __forceinline__ void LoadValues( ValueT (&values)[ITEMS_PER_THREAD], OffsetT block_offset, volatile OffsetT valid_items, Int2Type is_full_tile, Int2Type rank_algorithm) { LoadDirectWarpStriped(threadIdx.x, d_values_in + block_offset, values, valid_items); } /** * Truck along associated values */ template __device__ __forceinline__ void GatherScatterValues( OffsetT (&relative_bin_offsets)[ITEMS_PER_THREAD], int (&ranks)[ITEMS_PER_THREAD], OffsetT block_offset, OffsetT valid_items, Int2Type /*is_keys_only*/) { CTA_SYNC(); ValueT values[ITEMS_PER_THREAD]; LoadValues( values, block_offset, valid_items, Int2Type(), Int2Type()); ScatterValues( values, relative_bin_offsets, ranks, valid_items); } /** * Truck along associated values (specialized for key-only sorting) */ template __device__ __forceinline__ void GatherScatterValues( OffsetT (&/*relative_bin_offsets*/)[ITEMS_PER_THREAD], int (&/*ranks*/)[ITEMS_PER_THREAD], OffsetT /*block_offset*/, OffsetT /*valid_items*/, Int2Type /*is_keys_only*/) {} /** * Process tile */ template __device__ __forceinline__ void ProcessTile( OffsetT block_offset, const OffsetT &valid_items = TILE_ITEMS) { UnsignedBits keys[ITEMS_PER_THREAD]; int ranks[ITEMS_PER_THREAD]; OffsetT relative_bin_offsets[ITEMS_PER_THREAD]; // Assign default (min/max) value to all keys UnsignedBits default_key = (IS_DESCENDING) ? LOWEST_KEY : MAX_KEY; // Load tile of keys LoadKeys( keys, block_offset, valid_items, default_key, Int2Type(), Int2Type()); // Twiddle key bits if necessary #pragma unroll for (int KEY = 0; KEY < ITEMS_PER_THREAD; KEY++) { keys[KEY] = Traits::TwiddleIn(keys[KEY]); } // Rank the twiddled keys int exclusive_digit_prefix[BINS_TRACKED_PER_THREAD]; BlockRadixRankT(temp_storage.radix_rank).RankKeys( keys, ranks, current_bit, num_bits, exclusive_digit_prefix); CTA_SYNC(); // Share exclusive digit prefix #pragma unroll for (int track = 0; track < BINS_TRACKED_PER_THREAD; ++track) { int bin_idx = (threadIdx.x * BINS_TRACKED_PER_THREAD) + track; if ((BLOCK_THREADS == RADIX_DIGITS) || (bin_idx < RADIX_DIGITS)) { // Store exclusive prefix temp_storage.exclusive_digit_prefix[bin_idx] = exclusive_digit_prefix[track]; } } CTA_SYNC(); // Get inclusive digit prefix int inclusive_digit_prefix[BINS_TRACKED_PER_THREAD]; #pragma unroll for (int track = 0; track < BINS_TRACKED_PER_THREAD; ++track) { int bin_idx = (threadIdx.x * BINS_TRACKED_PER_THREAD) + track; if ((BLOCK_THREADS == RADIX_DIGITS) || (bin_idx < RADIX_DIGITS)) { if (IS_DESCENDING) { // Get inclusive digit prefix from exclusive prefix (higher bins come first) inclusive_digit_prefix[track] = (bin_idx == 0) ? (BLOCK_THREADS * ITEMS_PER_THREAD) : temp_storage.exclusive_digit_prefix[bin_idx - 1]; } else { // Get inclusive digit prefix from exclusive prefix (lower bins come first) inclusive_digit_prefix[track] = (bin_idx == RADIX_DIGITS - 1) ? (BLOCK_THREADS * ITEMS_PER_THREAD) : temp_storage.exclusive_digit_prefix[bin_idx + 1]; } } } CTA_SYNC(); // Update global scatter base offsets for each digit #pragma unroll for (int track = 0; track < BINS_TRACKED_PER_THREAD; ++track) { int bin_idx = (threadIdx.x * BINS_TRACKED_PER_THREAD) + track; if ((BLOCK_THREADS == RADIX_DIGITS) || (bin_idx < RADIX_DIGITS)) { bin_offset[track] -= exclusive_digit_prefix[track]; temp_storage.relative_bin_offsets[bin_idx] = bin_offset[track]; bin_offset[track] += inclusive_digit_prefix[track]; } } CTA_SYNC(); // Scatter keys ScatterKeys(keys, relative_bin_offsets, ranks, valid_items); // Gather/scatter values GatherScatterValues(relative_bin_offsets , ranks, block_offset, valid_items, Int2Type()); } //--------------------------------------------------------------------- // Copy shortcut //--------------------------------------------------------------------- /** * Copy tiles within the range of input */ template < typename InputIteratorT, typename T> __device__ __forceinline__ void Copy( InputIteratorT d_in, T *d_out, OffsetT block_offset, OffsetT block_end) { // Simply copy the input while (block_offset + TILE_ITEMS <= block_end) { T items[ITEMS_PER_THREAD]; LoadDirectStriped(threadIdx.x, d_in + block_offset, items); CTA_SYNC(); StoreDirectStriped(threadIdx.x, d_out + block_offset, items); block_offset += TILE_ITEMS; } // Clean up last partial tile with guarded-I/O if (block_offset < block_end) { OffsetT valid_items = block_end - block_offset; T items[ITEMS_PER_THREAD]; LoadDirectStriped(threadIdx.x, d_in + block_offset, items, valid_items); CTA_SYNC(); StoreDirectStriped(threadIdx.x, d_out + block_offset, items, valid_items); } } /** * Copy tiles within the range of input (specialized for NullType) */ template __device__ __forceinline__ void Copy( InputIteratorT /*d_in*/, NullType * /*d_out*/, OffsetT /*block_offset*/, OffsetT /*block_end*/) {} //--------------------------------------------------------------------- // Interface //--------------------------------------------------------------------- /** * Constructor */ __device__ __forceinline__ AgentRadixSortDownsweep( TempStorage &temp_storage, OffsetT (&bin_offset)[BINS_TRACKED_PER_THREAD], OffsetT num_items, const KeyT *d_keys_in, KeyT *d_keys_out, const ValueT *d_values_in, ValueT *d_values_out, int current_bit, int num_bits) : temp_storage(temp_storage.Alias()), d_keys_in(reinterpret_cast(d_keys_in)), d_values_in(d_values_in), d_keys_out(reinterpret_cast(d_keys_out)), d_values_out(d_values_out), current_bit(current_bit), num_bits(num_bits), short_circuit(1) { #pragma unroll for (int track = 0; track < BINS_TRACKED_PER_THREAD; ++track) { this->bin_offset[track] = bin_offset[track]; int bin_idx = (threadIdx.x * BINS_TRACKED_PER_THREAD) + track; if ((BLOCK_THREADS == RADIX_DIGITS) || (bin_idx < RADIX_DIGITS)) { // Short circuit if the histogram has only bin counts of only zeros or problem-size short_circuit = short_circuit && ((bin_offset[track] == 0) || (bin_offset[track] == num_items)); } } short_circuit = CTA_SYNC_AND(short_circuit); } /** * Constructor */ __device__ __forceinline__ AgentRadixSortDownsweep( TempStorage &temp_storage, OffsetT num_items, OffsetT *d_spine, const KeyT *d_keys_in, KeyT *d_keys_out, const ValueT *d_values_in, ValueT *d_values_out, int current_bit, int num_bits) : temp_storage(temp_storage.Alias()), d_keys_in(reinterpret_cast(d_keys_in)), d_values_in(d_values_in), d_keys_out(reinterpret_cast(d_keys_out)), d_values_out(d_values_out), current_bit(current_bit), num_bits(num_bits), short_circuit(1) { #pragma unroll for (int track = 0; track < BINS_TRACKED_PER_THREAD; ++track) { int bin_idx = (threadIdx.x * BINS_TRACKED_PER_THREAD) + track; // Load digit bin offsets (each of the first RADIX_DIGITS threads will load an offset for that digit) if ((BLOCK_THREADS == RADIX_DIGITS) || (bin_idx < RADIX_DIGITS)) { if (IS_DESCENDING) bin_idx = RADIX_DIGITS - bin_idx - 1; // Short circuit if the first block's histogram has only bin counts of only zeros or problem-size OffsetT first_block_bin_offset = d_spine[gridDim.x * bin_idx]; short_circuit = short_circuit && ((first_block_bin_offset == 0) || (first_block_bin_offset == num_items)); // Load my block's bin offset for my bin bin_offset[track] = d_spine[(gridDim.x * bin_idx) + blockIdx.x]; } } short_circuit = CTA_SYNC_AND(short_circuit); } /** * Distribute keys from a segment of input tiles. */ __device__ __forceinline__ void ProcessRegion( OffsetT block_offset, OffsetT block_end) { if (short_circuit) { // Copy keys Copy(d_keys_in, d_keys_out, block_offset, block_end); // Copy values Copy(d_values_in, d_values_out, block_offset, block_end); } else { // Process full tiles of tile_items while (block_offset + TILE_ITEMS <= block_end) { ProcessTile(block_offset); block_offset += TILE_ITEMS; CTA_SYNC(); } // Clean up last partial tile with guarded-I/O if (block_offset < block_end) { ProcessTile(block_offset, block_end - block_offset); } } } }; } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/gpu_utils/cub/agent/agent_radix_sort_upsweep.cuh000066400000000000000000000427751411340063500253000ustar00rootroot00000000000000/****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * AgentRadixSortUpsweep implements a stateful abstraction of CUDA thread blocks for participating in device-wide radix sort upsweep . */ #pragma once #include "../thread/thread_reduce.cuh" #include "../thread/thread_load.cuh" #include "../warp/warp_reduce.cuh" #include "../block/block_load.cuh" #include "../util_type.cuh" #include "../iterator/cache_modified_input_iterator.cuh" #include "../util_namespace.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /****************************************************************************** * Tuning policy types ******************************************************************************/ /** * Parameterizable tuning policy type for AgentRadixSortUpsweep */ template < int _BLOCK_THREADS, ///< Threads per thread block int _ITEMS_PER_THREAD, ///< Items per thread (per tile of input) CacheLoadModifier _LOAD_MODIFIER, ///< Cache load modifier for reading keys int _RADIX_BITS> ///< The number of radix bits, i.e., log2(bins) struct AgentRadixSortUpsweepPolicy { enum { BLOCK_THREADS = _BLOCK_THREADS, ///< Threads per thread block ITEMS_PER_THREAD = _ITEMS_PER_THREAD, ///< Items per thread (per tile of input) RADIX_BITS = _RADIX_BITS, ///< The number of radix bits, i.e., log2(bins) }; static const CacheLoadModifier LOAD_MODIFIER = _LOAD_MODIFIER; ///< Cache load modifier for reading keys }; /****************************************************************************** * Thread block abstractions ******************************************************************************/ /** * \brief AgentRadixSortUpsweep implements a stateful abstraction of CUDA thread blocks for participating in device-wide radix sort upsweep . */ template < typename AgentRadixSortUpsweepPolicy, ///< Parameterized AgentRadixSortUpsweepPolicy tuning policy type typename KeyT, ///< KeyT type typename OffsetT> ///< Signed integer type for global offsets struct AgentRadixSortUpsweep { //--------------------------------------------------------------------- // Type definitions and constants //--------------------------------------------------------------------- typedef typename Traits::UnsignedBits UnsignedBits; // Integer type for digit counters (to be packed into words of PackedCounters) typedef unsigned char DigitCounter; // Integer type for packing DigitCounters into columns of shared memory banks typedef unsigned int PackedCounter; static const CacheLoadModifier LOAD_MODIFIER = AgentRadixSortUpsweepPolicy::LOAD_MODIFIER; enum { RADIX_BITS = AgentRadixSortUpsweepPolicy::RADIX_BITS, BLOCK_THREADS = AgentRadixSortUpsweepPolicy::BLOCK_THREADS, KEYS_PER_THREAD = AgentRadixSortUpsweepPolicy::ITEMS_PER_THREAD, RADIX_DIGITS = 1 << RADIX_BITS, LOG_WARP_THREADS = CUB_PTX_LOG_WARP_THREADS, WARP_THREADS = 1 << LOG_WARP_THREADS, WARPS = (BLOCK_THREADS + WARP_THREADS - 1) / WARP_THREADS, TILE_ITEMS = BLOCK_THREADS * KEYS_PER_THREAD, BYTES_PER_COUNTER = sizeof(DigitCounter), LOG_BYTES_PER_COUNTER = Log2::VALUE, PACKING_RATIO = sizeof(PackedCounter) / sizeof(DigitCounter), LOG_PACKING_RATIO = Log2::VALUE, LOG_COUNTER_LANES = CUB_MAX(0, RADIX_BITS - LOG_PACKING_RATIO), COUNTER_LANES = 1 << LOG_COUNTER_LANES, // To prevent counter overflow, we must periodically unpack and aggregate the // digit counters back into registers. Each counter lane is assigned to a // warp for aggregation. LANES_PER_WARP = CUB_MAX(1, (COUNTER_LANES + WARPS - 1) / WARPS), // Unroll tiles in batches without risk of counter overflow UNROLL_COUNT = CUB_MIN(64, 255 / KEYS_PER_THREAD), UNROLLED_ELEMENTS = UNROLL_COUNT * TILE_ITEMS, }; // Input iterator wrapper type (for applying cache modifier)s typedef CacheModifiedInputIterator KeysItr; /** * Shared memory storage layout */ union __align__(16) _TempStorage { DigitCounter thread_counters[COUNTER_LANES][BLOCK_THREADS][PACKING_RATIO]; PackedCounter packed_thread_counters[COUNTER_LANES][BLOCK_THREADS]; OffsetT block_counters[WARP_THREADS][RADIX_DIGITS]; }; /// Alias wrapper allowing storage to be unioned struct TempStorage : Uninitialized<_TempStorage> {}; //--------------------------------------------------------------------- // Thread fields (aggregate state bundle) //--------------------------------------------------------------------- // Shared storage for this CTA _TempStorage &temp_storage; // Thread-local counters for periodically aggregating composite-counter lanes OffsetT local_counts[LANES_PER_WARP][PACKING_RATIO]; // Input and output device pointers KeysItr d_keys_in; // The least-significant bit position of the current digit to extract int current_bit; // Number of bits in current digit int num_bits; //--------------------------------------------------------------------- // Helper structure for templated iteration //--------------------------------------------------------------------- // Iterate template struct Iterate { // BucketKeys static __device__ __forceinline__ void BucketKeys( AgentRadixSortUpsweep &cta, UnsignedBits keys[KEYS_PER_THREAD]) { cta.Bucket(keys[COUNT]); // Next Iterate::BucketKeys(cta, keys); } }; // Terminate template struct Iterate { // BucketKeys static __device__ __forceinline__ void BucketKeys(AgentRadixSortUpsweep &/*cta*/, UnsignedBits /*keys*/[KEYS_PER_THREAD]) {} }; //--------------------------------------------------------------------- // Utility methods //--------------------------------------------------------------------- /** * Decode a key and increment corresponding smem digit counter */ __device__ __forceinline__ void Bucket(UnsignedBits key) { // Perform transform op UnsignedBits converted_key = Traits::TwiddleIn(key); // Extract current digit bits UnsignedBits digit = BFE(converted_key, current_bit, num_bits); // Get sub-counter offset UnsignedBits sub_counter = digit & (PACKING_RATIO - 1); // Get row offset UnsignedBits row_offset = digit >> LOG_PACKING_RATIO; // Increment counter temp_storage.thread_counters[row_offset][threadIdx.x][sub_counter]++; } /** * Reset composite counters */ __device__ __forceinline__ void ResetDigitCounters() { #pragma unroll for (int LANE = 0; LANE < COUNTER_LANES; LANE++) { temp_storage.packed_thread_counters[LANE][threadIdx.x] = 0; } } /** * Reset the unpacked counters in each thread */ __device__ __forceinline__ void ResetUnpackedCounters() { #pragma unroll for (int LANE = 0; LANE < LANES_PER_WARP; LANE++) { #pragma unroll for (int UNPACKED_COUNTER = 0; UNPACKED_COUNTER < PACKING_RATIO; UNPACKED_COUNTER++) { local_counts[LANE][UNPACKED_COUNTER] = 0; } } } /** * Extracts and aggregates the digit counters for each counter lane * owned by this warp */ __device__ __forceinline__ void UnpackDigitCounts() { unsigned int warp_id = threadIdx.x >> LOG_WARP_THREADS; unsigned int warp_tid = LaneId(); #pragma unroll for (int LANE = 0; LANE < LANES_PER_WARP; LANE++) { const int counter_lane = (LANE * WARPS) + warp_id; if (counter_lane < COUNTER_LANES) { #pragma unroll for (int PACKED_COUNTER = 0; PACKED_COUNTER < BLOCK_THREADS; PACKED_COUNTER += WARP_THREADS) { #pragma unroll for (int UNPACKED_COUNTER = 0; UNPACKED_COUNTER < PACKING_RATIO; UNPACKED_COUNTER++) { OffsetT counter = temp_storage.thread_counters[counter_lane][warp_tid + PACKED_COUNTER][UNPACKED_COUNTER]; local_counts[LANE][UNPACKED_COUNTER] += counter; } } } } } /** * Processes a single, full tile */ __device__ __forceinline__ void ProcessFullTile(OffsetT block_offset) { // Tile of keys UnsignedBits keys[KEYS_PER_THREAD]; LoadDirectStriped(threadIdx.x, d_keys_in + block_offset, keys); // Prevent hoisting CTA_SYNC(); // Bucket tile of keys Iterate<0, KEYS_PER_THREAD>::BucketKeys(*this, keys); } /** * Processes a single load (may have some threads masked off) */ __device__ __forceinline__ void ProcessPartialTile( OffsetT block_offset, const OffsetT &block_end) { // Process partial tile if necessary using single loads block_offset += threadIdx.x; while (block_offset < block_end) { // Load and bucket key UnsignedBits key = d_keys_in[block_offset]; Bucket(key); block_offset += BLOCK_THREADS; } } //--------------------------------------------------------------------- // Interface //--------------------------------------------------------------------- /** * Constructor */ __device__ __forceinline__ AgentRadixSortUpsweep( TempStorage &temp_storage, const KeyT *d_keys_in, int current_bit, int num_bits) : temp_storage(temp_storage.Alias()), d_keys_in(reinterpret_cast(d_keys_in)), current_bit(current_bit), num_bits(num_bits) {} /** * Compute radix digit histograms from a segment of input tiles. */ __device__ __forceinline__ void ProcessRegion( OffsetT block_offset, const OffsetT &block_end) { // Reset digit counters in smem and unpacked counters in registers ResetDigitCounters(); ResetUnpackedCounters(); // Unroll batches of full tiles while (block_offset + UNROLLED_ELEMENTS <= block_end) { for (int i = 0; i < UNROLL_COUNT; ++i) { ProcessFullTile(block_offset); block_offset += TILE_ITEMS; } CTA_SYNC(); // Aggregate back into local_count registers to prevent overflow UnpackDigitCounts(); CTA_SYNC(); // Reset composite counters in lanes ResetDigitCounters(); } // Unroll single full tiles while (block_offset + TILE_ITEMS <= block_end) { ProcessFullTile(block_offset); block_offset += TILE_ITEMS; } // Process partial tile if necessary ProcessPartialTile( block_offset, block_end); CTA_SYNC(); // Aggregate back into local_count registers UnpackDigitCounts(); } /** * Extract counts (saving them to the external array) */ template __device__ __forceinline__ void ExtractCounts( OffsetT *counters, int bin_stride = 1, int bin_offset = 0) { unsigned int warp_id = threadIdx.x >> LOG_WARP_THREADS; unsigned int warp_tid = LaneId(); // Place unpacked digit counters in shared memory #pragma unroll for (int LANE = 0; LANE < LANES_PER_WARP; LANE++) { int counter_lane = (LANE * WARPS) + warp_id; if (counter_lane < COUNTER_LANES) { int digit_row = counter_lane << LOG_PACKING_RATIO; #pragma unroll for (int UNPACKED_COUNTER = 0; UNPACKED_COUNTER < PACKING_RATIO; UNPACKED_COUNTER++) { int bin_idx = digit_row + UNPACKED_COUNTER; temp_storage.block_counters[warp_tid][bin_idx] = local_counts[LANE][UNPACKED_COUNTER]; } } } CTA_SYNC(); // Rake-reduce bin_count reductions // Whole blocks #pragma unroll for (int BIN_BASE = RADIX_DIGITS % BLOCK_THREADS; (BIN_BASE + BLOCK_THREADS) <= RADIX_DIGITS; BIN_BASE += BLOCK_THREADS) { int bin_idx = BIN_BASE + threadIdx.x; OffsetT bin_count = 0; #pragma unroll for (int i = 0; i < WARP_THREADS; ++i) bin_count += temp_storage.block_counters[i][bin_idx]; if (IS_DESCENDING) bin_idx = RADIX_DIGITS - bin_idx - 1; counters[(bin_stride * bin_idx) + bin_offset] = bin_count; } // Remainder if ((RADIX_DIGITS % BLOCK_THREADS != 0) && (threadIdx.x < RADIX_DIGITS)) { int bin_idx = threadIdx.x; OffsetT bin_count = 0; #pragma unroll for (int i = 0; i < WARP_THREADS; ++i) bin_count += temp_storage.block_counters[i][bin_idx]; if (IS_DESCENDING) bin_idx = RADIX_DIGITS - bin_idx - 1; counters[(bin_stride * bin_idx) + bin_offset] = bin_count; } } /** * Extract counts */ template __device__ __forceinline__ void ExtractCounts( OffsetT (&bin_count)[BINS_TRACKED_PER_THREAD]) ///< [out] The exclusive prefix sum for the digits [(threadIdx.x * BINS_TRACKED_PER_THREAD) ... (threadIdx.x * BINS_TRACKED_PER_THREAD) + BINS_TRACKED_PER_THREAD - 1] { unsigned int warp_id = threadIdx.x >> LOG_WARP_THREADS; unsigned int warp_tid = LaneId(); // Place unpacked digit counters in shared memory #pragma unroll for (int LANE = 0; LANE < LANES_PER_WARP; LANE++) { int counter_lane = (LANE * WARPS) + warp_id; if (counter_lane < COUNTER_LANES) { int digit_row = counter_lane << LOG_PACKING_RATIO; #pragma unroll for (int UNPACKED_COUNTER = 0; UNPACKED_COUNTER < PACKING_RATIO; UNPACKED_COUNTER++) { int bin_idx = digit_row + UNPACKED_COUNTER; temp_storage.block_counters[warp_tid][bin_idx] = local_counts[LANE][UNPACKED_COUNTER]; } } } CTA_SYNC(); // Rake-reduce bin_count reductions #pragma unroll for (int track = 0; track < BINS_TRACKED_PER_THREAD; ++track) { int bin_idx = (threadIdx.x * BINS_TRACKED_PER_THREAD) + track; if ((BLOCK_THREADS == RADIX_DIGITS) || (bin_idx < RADIX_DIGITS)) { bin_count[track] = 0; #pragma unroll for (int i = 0; i < WARP_THREADS; ++i) bin_count[track] += temp_storage.block_counters[i][bin_idx]; } } } }; } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/gpu_utils/cub/agent/agent_reduce.cuh000066400000000000000000000410241411340063500226030ustar00rootroot00000000000000/****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * cub::AgentReduce implements a stateful abstraction of CUDA thread blocks for participating in device-wide reduction . */ #pragma once #include #include "../block/block_load.cuh" #include "../block/block_reduce.cuh" #include "../grid/grid_mapping.cuh" #include "../grid/grid_even_share.cuh" #include "../util_type.cuh" #include "../iterator/cache_modified_input_iterator.cuh" #include "../util_namespace.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /****************************************************************************** * Tuning policy types ******************************************************************************/ /** * Parameterizable tuning policy type for AgentReduce */ template < int _BLOCK_THREADS, ///< Threads per thread block int _ITEMS_PER_THREAD, ///< Items per thread (per tile of input) int _VECTOR_LOAD_LENGTH, ///< Number of items per vectorized load BlockReduceAlgorithm _BLOCK_ALGORITHM, ///< Cooperative block-wide reduction algorithm to use CacheLoadModifier _LOAD_MODIFIER> ///< Cache load modifier for reading input elements struct AgentReducePolicy { enum { BLOCK_THREADS = _BLOCK_THREADS, ///< Threads per thread block ITEMS_PER_THREAD = _ITEMS_PER_THREAD, ///< Items per thread (per tile of input) VECTOR_LOAD_LENGTH = _VECTOR_LOAD_LENGTH, ///< Number of items per vectorized load }; static const BlockReduceAlgorithm BLOCK_ALGORITHM = _BLOCK_ALGORITHM; ///< Cooperative block-wide reduction algorithm to use static const CacheLoadModifier LOAD_MODIFIER = _LOAD_MODIFIER; ///< Cache load modifier for reading input elements }; /****************************************************************************** * Thread block abstractions ******************************************************************************/ /** * \brief AgentReduce implements a stateful abstraction of CUDA thread blocks for participating in device-wide reduction . * * Each thread reduces only the values it loads. If \p FIRST_TILE, this * partial reduction is stored into \p thread_aggregate. Otherwise it is * accumulated into \p thread_aggregate. */ template < typename AgentReducePolicy, ///< Parameterized AgentReducePolicy tuning policy type typename InputIteratorT, ///< Random-access iterator type for input typename OutputIteratorT, ///< Random-access iterator type for output typename OffsetT, ///< Signed integer type for global offsets typename ReductionOp> ///< Binary reduction operator type having member T operator()(const T &a, const T &b) struct AgentReduce { //--------------------------------------------------------------------- // Types and constants //--------------------------------------------------------------------- /// The input value type typedef typename std::iterator_traits::value_type InputT; /// The output value type typedef typename If<(Equals::value_type, void>::VALUE), // OutputT = (if output iterator's value type is void) ? typename std::iterator_traits::value_type, // ... then the input iterator's value type, typename std::iterator_traits::value_type>::Type OutputT; // ... else the output iterator's value type /// Vector type of InputT for data movement typedef typename CubVector::Type VectorT; /// Input iterator wrapper type (for applying cache modifier) typedef typename If::VALUE, CacheModifiedInputIterator, // Wrap the native input pointer with CacheModifiedInputIterator InputIteratorT>::Type // Directly use the supplied input iterator type WrappedInputIteratorT; /// Constants enum { BLOCK_THREADS = AgentReducePolicy::BLOCK_THREADS, ITEMS_PER_THREAD = AgentReducePolicy::ITEMS_PER_THREAD, VECTOR_LOAD_LENGTH = CUB_MIN(ITEMS_PER_THREAD, AgentReducePolicy::VECTOR_LOAD_LENGTH), TILE_ITEMS = BLOCK_THREADS * ITEMS_PER_THREAD, // Can vectorize according to the policy if the input iterator is a native pointer to a primitive type ATTEMPT_VECTORIZATION = (VECTOR_LOAD_LENGTH > 1) && (ITEMS_PER_THREAD % VECTOR_LOAD_LENGTH == 0) && (IsPointer::VALUE) && Traits::PRIMITIVE, }; static const CacheLoadModifier LOAD_MODIFIER = AgentReducePolicy::LOAD_MODIFIER; static const BlockReduceAlgorithm BLOCK_ALGORITHM = AgentReducePolicy::BLOCK_ALGORITHM; /// Parameterized BlockReduce primitive typedef BlockReduce BlockReduceT; /// Shared memory type required by this thread block struct _TempStorage { typename BlockReduceT::TempStorage reduce; }; /// Alias wrapper allowing storage to be unioned struct TempStorage : Uninitialized<_TempStorage> {}; //--------------------------------------------------------------------- // Per-thread fields //--------------------------------------------------------------------- _TempStorage& temp_storage; ///< Reference to temp_storage InputIteratorT d_in; ///< Input data to reduce WrappedInputIteratorT d_wrapped_in; ///< Wrapped input data to reduce ReductionOp reduction_op; ///< Binary reduction operator //--------------------------------------------------------------------- // Utility //--------------------------------------------------------------------- // Whether or not the input is aligned with the vector type (specialized for types we can vectorize) template static __device__ __forceinline__ bool IsAligned( Iterator d_in, Int2Type /*can_vectorize*/) { return (size_t(d_in) & (sizeof(VectorT) - 1)) == 0; } // Whether or not the input is aligned with the vector type (specialized for types we cannot vectorize) template static __device__ __forceinline__ bool IsAligned( Iterator /*d_in*/, Int2Type /*can_vectorize*/) { return false; } //--------------------------------------------------------------------- // Constructor //--------------------------------------------------------------------- /** * Constructor */ __device__ __forceinline__ AgentReduce( TempStorage& temp_storage, ///< Reference to temp_storage InputIteratorT d_in, ///< Input data to reduce ReductionOp reduction_op) ///< Binary reduction operator : temp_storage(temp_storage.Alias()), d_in(d_in), d_wrapped_in(d_in), reduction_op(reduction_op) {} //--------------------------------------------------------------------- // Tile consumption //--------------------------------------------------------------------- /** * Consume a full tile of input (non-vectorized) */ template __device__ __forceinline__ void ConsumeTile( OutputT &thread_aggregate, OffsetT block_offset, ///< The offset the tile to consume int /*valid_items*/, ///< The number of valid items in the tile Int2Type /*is_full_tile*/, ///< Whether or not this is a full tile Int2Type /*can_vectorize*/) ///< Whether or not we can vectorize loads { OutputT items[ITEMS_PER_THREAD]; // Load items in striped fashion LoadDirectStriped(threadIdx.x, d_wrapped_in + block_offset, items); // Reduce items within each thread stripe thread_aggregate = (IS_FIRST_TILE) ? internal::ThreadReduce(items, reduction_op) : internal::ThreadReduce(items, reduction_op, thread_aggregate); } /** * Consume a full tile of input (vectorized) */ template __device__ __forceinline__ void ConsumeTile( OutputT &thread_aggregate, OffsetT block_offset, ///< The offset the tile to consume int /*valid_items*/, ///< The number of valid items in the tile Int2Type /*is_full_tile*/, ///< Whether or not this is a full tile Int2Type /*can_vectorize*/) ///< Whether or not we can vectorize loads { // Alias items as an array of VectorT and load it in striped fashion enum { WORDS = ITEMS_PER_THREAD / VECTOR_LOAD_LENGTH }; // Fabricate a vectorized input iterator InputT *d_in_unqualified = const_cast(d_in) + block_offset + (threadIdx.x * VECTOR_LOAD_LENGTH); CacheModifiedInputIterator d_vec_in( reinterpret_cast(d_in_unqualified)); // Load items as vector items InputT input_items[ITEMS_PER_THREAD]; VectorT *vec_items = reinterpret_cast(input_items); #pragma unroll for (int i = 0; i < WORDS; ++i) vec_items[i] = d_vec_in[BLOCK_THREADS * i]; // Convert from input type to output type OutputT items[ITEMS_PER_THREAD]; #pragma unroll for (int i = 0; i < ITEMS_PER_THREAD; ++i) items[i] = input_items[i]; // Reduce items within each thread stripe thread_aggregate = (IS_FIRST_TILE) ? internal::ThreadReduce(items, reduction_op) : internal::ThreadReduce(items, reduction_op, thread_aggregate); } /** * Consume a partial tile of input */ template __device__ __forceinline__ void ConsumeTile( OutputT &thread_aggregate, OffsetT block_offset, ///< The offset the tile to consume int valid_items, ///< The number of valid items in the tile Int2Type /*is_full_tile*/, ///< Whether or not this is a full tile Int2Type /*can_vectorize*/) ///< Whether or not we can vectorize loads { // Partial tile int thread_offset = threadIdx.x; // Read first item if ((IS_FIRST_TILE) && (thread_offset < valid_items)) { thread_aggregate = d_wrapped_in[block_offset + thread_offset]; thread_offset += BLOCK_THREADS; } // Continue reading items (block-striped) while (thread_offset < valid_items) { OutputT item = d_wrapped_in[block_offset + thread_offset]; thread_aggregate = reduction_op(thread_aggregate, item); thread_offset += BLOCK_THREADS; } } //--------------------------------------------------------------- // Consume a contiguous segment of tiles //--------------------------------------------------------------------- /** * \brief Reduce a contiguous segment of input tiles */ template __device__ __forceinline__ OutputT ConsumeRange( GridEvenShare &even_share, ///< GridEvenShare descriptor Int2Type can_vectorize) ///< Whether or not we can vectorize loads { OutputT thread_aggregate; if (even_share.block_offset + TILE_ITEMS > even_share.block_end) { // First tile isn't full (not all threads have valid items) int valid_items = even_share.block_end - even_share.block_offset; ConsumeTile(thread_aggregate, even_share.block_offset, valid_items, Int2Type(), can_vectorize); return BlockReduceT(temp_storage.reduce).Reduce(thread_aggregate, reduction_op, valid_items); } // At least one full block ConsumeTile(thread_aggregate, even_share.block_offset, TILE_ITEMS, Int2Type(), can_vectorize); even_share.block_offset += even_share.block_stride; // Consume subsequent full tiles of input while (even_share.block_offset + TILE_ITEMS <= even_share.block_end) { ConsumeTile(thread_aggregate, even_share.block_offset, TILE_ITEMS, Int2Type(), can_vectorize); even_share.block_offset += even_share.block_stride; } // Consume a partially-full tile if (even_share.block_offset < even_share.block_end) { int valid_items = even_share.block_end - even_share.block_offset; ConsumeTile(thread_aggregate, even_share.block_offset, valid_items, Int2Type(), can_vectorize); } // Compute block-wide reduction (all threads have valid items) return BlockReduceT(temp_storage.reduce).Reduce(thread_aggregate, reduction_op); } /** * \brief Reduce a contiguous segment of input tiles */ __device__ __forceinline__ OutputT ConsumeRange( OffsetT block_offset, ///< [in] Threadblock begin offset (inclusive) OffsetT block_end) ///< [in] Threadblock end offset (exclusive) { GridEvenShare even_share; even_share.template BlockInit(block_offset, block_end); return (IsAligned(d_in + block_offset, Int2Type())) ? ConsumeRange(even_share, Int2Type()) : ConsumeRange(even_share, Int2Type()); } /** * Reduce a contiguous segment of input tiles */ __device__ __forceinline__ OutputT ConsumeTiles( GridEvenShare &even_share) ///< [in] GridEvenShare descriptor { // Initialize GRID_MAPPING_STRIP_MINE even-share descriptor for this thread block even_share.template BlockInit(); return (IsAligned(d_in, Int2Type())) ? ConsumeRange(even_share, Int2Type()) : ConsumeRange(even_share, Int2Type()); } }; } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/gpu_utils/cub/agent/agent_reduce_by_key.cuh000066400000000000000000000605231411340063500241520ustar00rootroot00000000000000/****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * cub::AgentReduceByKey implements a stateful abstraction of CUDA thread blocks for participating in device-wide reduce-value-by-key. */ #pragma once #include #include "single_pass_scan_operators.cuh" #include "../block/block_load.cuh" #include "../block/block_store.cuh" #include "../block/block_scan.cuh" #include "../block/block_discontinuity.cuh" #include "../iterator/cache_modified_input_iterator.cuh" #include "../iterator/constant_input_iterator.cuh" #include "../util_namespace.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /****************************************************************************** * Tuning policy types ******************************************************************************/ /** * Parameterizable tuning policy type for AgentReduceByKey */ template < int _BLOCK_THREADS, ///< Threads per thread block int _ITEMS_PER_THREAD, ///< Items per thread (per tile of input) BlockLoadAlgorithm _LOAD_ALGORITHM, ///< The BlockLoad algorithm to use CacheLoadModifier _LOAD_MODIFIER, ///< Cache load modifier for reading input elements BlockScanAlgorithm _SCAN_ALGORITHM> ///< The BlockScan algorithm to use struct AgentReduceByKeyPolicy { enum { BLOCK_THREADS = _BLOCK_THREADS, ///< Threads per thread block ITEMS_PER_THREAD = _ITEMS_PER_THREAD, ///< Items per thread (per tile of input) }; static const BlockLoadAlgorithm LOAD_ALGORITHM = _LOAD_ALGORITHM; ///< The BlockLoad algorithm to use static const CacheLoadModifier LOAD_MODIFIER = _LOAD_MODIFIER; ///< Cache load modifier for reading input elements static const BlockScanAlgorithm SCAN_ALGORITHM = _SCAN_ALGORITHM; ///< The BlockScan algorithm to use }; /****************************************************************************** * Thread block abstractions ******************************************************************************/ /** * \brief AgentReduceByKey implements a stateful abstraction of CUDA thread blocks for participating in device-wide reduce-value-by-key */ template < typename AgentReduceByKeyPolicyT, ///< Parameterized AgentReduceByKeyPolicy tuning policy type typename KeysInputIteratorT, ///< Random-access input iterator type for keys typename UniqueOutputIteratorT, ///< Random-access output iterator type for keys typename ValuesInputIteratorT, ///< Random-access input iterator type for values typename AggregatesOutputIteratorT, ///< Random-access output iterator type for values typename NumRunsOutputIteratorT, ///< Output iterator type for recording number of items selected typename EqualityOpT, ///< KeyT equality operator type typename ReductionOpT, ///< ValueT reduction operator type typename OffsetT> ///< Signed integer type for global offsets struct AgentReduceByKey { //--------------------------------------------------------------------- // Types and constants //--------------------------------------------------------------------- // The input keys type typedef typename std::iterator_traits::value_type KeyInputT; // The output keys type typedef typename If<(Equals::value_type, void>::VALUE), // KeyOutputT = (if output iterator's value type is void) ? typename std::iterator_traits::value_type, // ... then the input iterator's value type, typename std::iterator_traits::value_type>::Type KeyOutputT; // ... else the output iterator's value type // The input values type typedef typename std::iterator_traits::value_type ValueInputT; // The output values type typedef typename If<(Equals::value_type, void>::VALUE), // ValueOutputT = (if output iterator's value type is void) ? typename std::iterator_traits::value_type, // ... then the input iterator's value type, typename std::iterator_traits::value_type>::Type ValueOutputT; // ... else the output iterator's value type // Tuple type for scanning (pairs accumulated segment-value with segment-index) typedef KeyValuePair OffsetValuePairT; // Tuple type for pairing keys and values typedef KeyValuePair KeyValuePairT; // Tile status descriptor interface type typedef ReduceByKeyScanTileState ScanTileStateT; // Guarded inequality functor template struct GuardedInequalityWrapper { _EqualityOpT op; ///< Wrapped equality operator int num_remaining; ///< Items remaining /// Constructor __host__ __device__ __forceinline__ GuardedInequalityWrapper(_EqualityOpT op, int num_remaining) : op(op), num_remaining(num_remaining) {} /// Boolean inequality operator, returns (a != b) template __host__ __device__ __forceinline__ bool operator()(const T &a, const T &b, int idx) const { if (idx < num_remaining) return !op(a, b); // In bounds // Return true if first out-of-bounds item, false otherwise return (idx == num_remaining); } }; // Constants enum { BLOCK_THREADS = AgentReduceByKeyPolicyT::BLOCK_THREADS, ITEMS_PER_THREAD = AgentReduceByKeyPolicyT::ITEMS_PER_THREAD, TILE_ITEMS = BLOCK_THREADS * ITEMS_PER_THREAD, TWO_PHASE_SCATTER = (ITEMS_PER_THREAD > 1), // Whether or not the scan operation has a zero-valued identity value (true if we're performing addition on a primitive type) HAS_IDENTITY_ZERO = (Equals::VALUE) && (Traits::PRIMITIVE), }; // Cache-modified Input iterator wrapper type (for applying cache modifier) for keys typedef typename If::VALUE, CacheModifiedInputIterator, // Wrap the native input pointer with CacheModifiedValuesInputIterator KeysInputIteratorT>::Type // Directly use the supplied input iterator type WrappedKeysInputIteratorT; // Cache-modified Input iterator wrapper type (for applying cache modifier) for values typedef typename If::VALUE, CacheModifiedInputIterator, // Wrap the native input pointer with CacheModifiedValuesInputIterator ValuesInputIteratorT>::Type // Directly use the supplied input iterator type WrappedValuesInputIteratorT; // Cache-modified Input iterator wrapper type (for applying cache modifier) for fixup values typedef typename If::VALUE, CacheModifiedInputIterator, // Wrap the native input pointer with CacheModifiedValuesInputIterator AggregatesOutputIteratorT>::Type // Directly use the supplied input iterator type WrappedFixupInputIteratorT; // Reduce-value-by-segment scan operator typedef ReduceBySegmentOp ReduceBySegmentOpT; // Parameterized BlockLoad type for keys typedef BlockLoad< KeyOutputT, BLOCK_THREADS, ITEMS_PER_THREAD, AgentReduceByKeyPolicyT::LOAD_ALGORITHM> BlockLoadKeysT; // Parameterized BlockLoad type for values typedef BlockLoad< ValueOutputT, BLOCK_THREADS, ITEMS_PER_THREAD, AgentReduceByKeyPolicyT::LOAD_ALGORITHM> BlockLoadValuesT; // Parameterized BlockDiscontinuity type for keys typedef BlockDiscontinuity< KeyOutputT, BLOCK_THREADS> BlockDiscontinuityKeys; // Parameterized BlockScan type typedef BlockScan< OffsetValuePairT, BLOCK_THREADS, AgentReduceByKeyPolicyT::SCAN_ALGORITHM> BlockScanT; // Callback type for obtaining tile prefix during block scan typedef TilePrefixCallbackOp< OffsetValuePairT, ReduceBySegmentOpT, ScanTileStateT> TilePrefixCallbackOpT; // Key and value exchange types typedef KeyOutputT KeyExchangeT[TILE_ITEMS + 1]; typedef ValueOutputT ValueExchangeT[TILE_ITEMS + 1]; // Shared memory type for this thread block union _TempStorage { struct { typename BlockScanT::TempStorage scan; // Smem needed for tile scanning typename TilePrefixCallbackOpT::TempStorage prefix; // Smem needed for cooperative prefix callback typename BlockDiscontinuityKeys::TempStorage discontinuity; // Smem needed for discontinuity detection }; // Smem needed for loading keys typename BlockLoadKeysT::TempStorage load_keys; // Smem needed for loading values typename BlockLoadValuesT::TempStorage load_values; // Smem needed for compacting key value pairs(allows non POD items in this union) Uninitialized raw_exchange; }; // Alias wrapper allowing storage to be unioned struct TempStorage : Uninitialized<_TempStorage> {}; //--------------------------------------------------------------------- // Per-thread fields //--------------------------------------------------------------------- _TempStorage& temp_storage; ///< Reference to temp_storage WrappedKeysInputIteratorT d_keys_in; ///< Input keys UniqueOutputIteratorT d_unique_out; ///< Unique output keys WrappedValuesInputIteratorT d_values_in; ///< Input values AggregatesOutputIteratorT d_aggregates_out; ///< Output value aggregates NumRunsOutputIteratorT d_num_runs_out; ///< Output pointer for total number of segments identified EqualityOpT equality_op; ///< KeyT equality operator ReductionOpT reduction_op; ///< Reduction operator ReduceBySegmentOpT scan_op; ///< Reduce-by-segment scan operator //--------------------------------------------------------------------- // Constructor //--------------------------------------------------------------------- // Constructor __device__ __forceinline__ AgentReduceByKey( TempStorage& temp_storage, ///< Reference to temp_storage KeysInputIteratorT d_keys_in, ///< Input keys UniqueOutputIteratorT d_unique_out, ///< Unique output keys ValuesInputIteratorT d_values_in, ///< Input values AggregatesOutputIteratorT d_aggregates_out, ///< Output value aggregates NumRunsOutputIteratorT d_num_runs_out, ///< Output pointer for total number of segments identified EqualityOpT equality_op, ///< KeyT equality operator ReductionOpT reduction_op) ///< ValueT reduction operator : temp_storage(temp_storage.Alias()), d_keys_in(d_keys_in), d_unique_out(d_unique_out), d_values_in(d_values_in), d_aggregates_out(d_aggregates_out), d_num_runs_out(d_num_runs_out), equality_op(equality_op), reduction_op(reduction_op), scan_op(reduction_op) {} //--------------------------------------------------------------------- // Scatter utility methods //--------------------------------------------------------------------- /** * Directly scatter flagged items to output offsets */ __device__ __forceinline__ void ScatterDirect( KeyValuePairT (&scatter_items)[ITEMS_PER_THREAD], OffsetT (&segment_flags)[ITEMS_PER_THREAD], OffsetT (&segment_indices)[ITEMS_PER_THREAD]) { // Scatter flagged keys and values #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) { if (segment_flags[ITEM]) { d_unique_out[segment_indices[ITEM]] = scatter_items[ITEM].key; d_aggregates_out[segment_indices[ITEM]] = scatter_items[ITEM].value; } } } /** * 2-phase scatter flagged items to output offsets * * The exclusive scan causes each head flag to be paired with the previous * value aggregate: the scatter offsets must be decremented for value aggregates */ __device__ __forceinline__ void ScatterTwoPhase( KeyValuePairT (&scatter_items)[ITEMS_PER_THREAD], OffsetT (&segment_flags)[ITEMS_PER_THREAD], OffsetT (&segment_indices)[ITEMS_PER_THREAD], OffsetT num_tile_segments, OffsetT num_tile_segments_prefix) { CTA_SYNC(); // Compact and scatter pairs #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) { if (segment_flags[ITEM]) { temp_storage.raw_exchange.Alias()[segment_indices[ITEM] - num_tile_segments_prefix] = scatter_items[ITEM]; } } CTA_SYNC(); for (int item = threadIdx.x; item < num_tile_segments; item += BLOCK_THREADS) { KeyValuePairT pair = temp_storage.raw_exchange.Alias()[item]; d_unique_out[num_tile_segments_prefix + item] = pair.key; d_aggregates_out[num_tile_segments_prefix + item] = pair.value; } } /** * Scatter flagged items */ __device__ __forceinline__ void Scatter( KeyValuePairT (&scatter_items)[ITEMS_PER_THREAD], OffsetT (&segment_flags)[ITEMS_PER_THREAD], OffsetT (&segment_indices)[ITEMS_PER_THREAD], OffsetT num_tile_segments, OffsetT num_tile_segments_prefix) { // Do a one-phase scatter if (a) two-phase is disabled or (b) the average number of selected items per thread is less than one if (TWO_PHASE_SCATTER && (num_tile_segments > BLOCK_THREADS)) { ScatterTwoPhase( scatter_items, segment_flags, segment_indices, num_tile_segments, num_tile_segments_prefix); } else { ScatterDirect( scatter_items, segment_flags, segment_indices); } } //--------------------------------------------------------------------- // Cooperatively scan a device-wide sequence of tiles with other CTAs //--------------------------------------------------------------------- /** * Process a tile of input (dynamic chained scan) */ template ///< Whether the current tile is the last tile __device__ __forceinline__ void ConsumeTile( OffsetT num_remaining, ///< Number of global input items remaining (including this tile) int tile_idx, ///< Tile index OffsetT tile_offset, ///< Tile offset ScanTileStateT& tile_state) ///< Global tile state descriptor { KeyOutputT keys[ITEMS_PER_THREAD]; // Tile keys KeyOutputT prev_keys[ITEMS_PER_THREAD]; // Tile keys shuffled up ValueOutputT values[ITEMS_PER_THREAD]; // Tile values OffsetT head_flags[ITEMS_PER_THREAD]; // Segment head flags OffsetT segment_indices[ITEMS_PER_THREAD]; // Segment indices OffsetValuePairT scan_items[ITEMS_PER_THREAD]; // Zipped values and segment flags|indices KeyValuePairT scatter_items[ITEMS_PER_THREAD]; // Zipped key value pairs for scattering // Load keys if (IS_LAST_TILE) BlockLoadKeysT(temp_storage.load_keys).Load(d_keys_in + tile_offset, keys, num_remaining); else BlockLoadKeysT(temp_storage.load_keys).Load(d_keys_in + tile_offset, keys); // Load tile predecessor key in first thread KeyOutputT tile_predecessor; if (threadIdx.x == 0) { tile_predecessor = (tile_idx == 0) ? keys[0] : // First tile gets repeat of first item (thus first item will not be flagged as a head) d_keys_in[tile_offset - 1]; // Subsequent tiles get last key from previous tile } CTA_SYNC(); // Load values if (IS_LAST_TILE) BlockLoadValuesT(temp_storage.load_values).Load(d_values_in + tile_offset, values, num_remaining); else BlockLoadValuesT(temp_storage.load_values).Load(d_values_in + tile_offset, values); CTA_SYNC(); // Initialize head-flags and shuffle up the previous keys if (IS_LAST_TILE) { // Use custom flag operator to additionally flag the first out-of-bounds item GuardedInequalityWrapper flag_op(equality_op, num_remaining); BlockDiscontinuityKeys(temp_storage.discontinuity).FlagHeads( head_flags, keys, prev_keys, flag_op, tile_predecessor); } else { InequalityWrapper flag_op(equality_op); BlockDiscontinuityKeys(temp_storage.discontinuity).FlagHeads( head_flags, keys, prev_keys, flag_op, tile_predecessor); } // Zip values and head flags #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) { scan_items[ITEM].value = values[ITEM]; scan_items[ITEM].key = head_flags[ITEM]; } // Perform exclusive tile scan OffsetValuePairT block_aggregate; // Inclusive block-wide scan aggregate OffsetT num_segments_prefix; // Number of segments prior to this tile ValueOutputT total_aggregate; // The tile prefix folded with block_aggregate if (tile_idx == 0) { // Scan first tile BlockScanT(temp_storage.scan).ExclusiveScan(scan_items, scan_items, scan_op, block_aggregate); num_segments_prefix = 0; total_aggregate = block_aggregate.value; // Update tile status if there are successor tiles if ((!IS_LAST_TILE) && (threadIdx.x == 0)) tile_state.SetInclusive(0, block_aggregate); } else { // Scan non-first tile TilePrefixCallbackOpT prefix_op(tile_state, temp_storage.prefix, scan_op, tile_idx); BlockScanT(temp_storage.scan).ExclusiveScan(scan_items, scan_items, scan_op, prefix_op); block_aggregate = prefix_op.GetBlockAggregate(); num_segments_prefix = prefix_op.GetExclusivePrefix().key; total_aggregate = reduction_op( prefix_op.GetExclusivePrefix().value, block_aggregate.value); } // Rezip scatter items and segment indices #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) { scatter_items[ITEM].key = prev_keys[ITEM]; scatter_items[ITEM].value = scan_items[ITEM].value; segment_indices[ITEM] = scan_items[ITEM].key; } // At this point, each flagged segment head has: // - The key for the previous segment // - The reduced value from the previous segment // - The segment index for the reduced value // Scatter flagged keys and values OffsetT num_tile_segments = block_aggregate.key; Scatter(scatter_items, head_flags, segment_indices, num_tile_segments, num_segments_prefix); // Last thread in last tile will output final count (and last pair, if necessary) if ((IS_LAST_TILE) && (threadIdx.x == BLOCK_THREADS - 1)) { OffsetT num_segments = num_segments_prefix + num_tile_segments; // If the last tile is a whole tile, output the final_value if (num_remaining == TILE_ITEMS) { d_unique_out[num_segments] = keys[ITEMS_PER_THREAD - 1]; d_aggregates_out[num_segments] = total_aggregate; num_segments++; } // Output the total number of items selected *d_num_runs_out = num_segments; } } /** * Scan tiles of items as part of a dynamic chained scan */ __device__ __forceinline__ void ConsumeRange( int num_items, ///< Total number of input items ScanTileStateT& tile_state, ///< Global tile state descriptor int start_tile) ///< The starting tile for the current grid { // Blocks are launched in increasing order, so just assign one tile per block int tile_idx = start_tile + blockIdx.x; // Current tile index OffsetT tile_offset = OffsetT(TILE_ITEMS) * tile_idx; // Global offset for the current tile OffsetT num_remaining = num_items - tile_offset; // Remaining items (including this tile) if (num_remaining > TILE_ITEMS) { // Not last tile ConsumeTile(num_remaining, tile_idx, tile_offset, tile_state); } else if (num_remaining > 0) { // Last tile ConsumeTile(num_remaining, tile_idx, tile_offset, tile_state); } } }; } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/gpu_utils/cub/agent/agent_rle.cuh000066400000000000000000001055171411340063500221260ustar00rootroot00000000000000/****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * cub::AgentRle implements a stateful abstraction of CUDA thread blocks for participating in device-wide run-length-encode. */ #pragma once #include #include "single_pass_scan_operators.cuh" #include "../block/block_load.cuh" #include "../block/block_store.cuh" #include "../block/block_scan.cuh" #include "../block/block_exchange.cuh" #include "../block/block_discontinuity.cuh" #include "../grid/grid_queue.cuh" #include "../iterator/cache_modified_input_iterator.cuh" #include "../iterator/constant_input_iterator.cuh" #include "../util_namespace.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /****************************************************************************** * Tuning policy types ******************************************************************************/ /** * Parameterizable tuning policy type for AgentRle */ template < int _BLOCK_THREADS, ///< Threads per thread block int _ITEMS_PER_THREAD, ///< Items per thread (per tile of input) BlockLoadAlgorithm _LOAD_ALGORITHM, ///< The BlockLoad algorithm to use CacheLoadModifier _LOAD_MODIFIER, ///< Cache load modifier for reading input elements bool _STORE_WARP_TIME_SLICING, ///< Whether or not only one warp's worth of shared memory should be allocated and time-sliced among block-warps during any store-related data transpositions (versus each warp having its own storage) BlockScanAlgorithm _SCAN_ALGORITHM> ///< The BlockScan algorithm to use struct AgentRlePolicy { enum { BLOCK_THREADS = _BLOCK_THREADS, ///< Threads per thread block ITEMS_PER_THREAD = _ITEMS_PER_THREAD, ///< Items per thread (per tile of input) STORE_WARP_TIME_SLICING = _STORE_WARP_TIME_SLICING, ///< Whether or not only one warp's worth of shared memory should be allocated and time-sliced among block-warps during any store-related data transpositions (versus each warp having its own storage) }; static const BlockLoadAlgorithm LOAD_ALGORITHM = _LOAD_ALGORITHM; ///< The BlockLoad algorithm to use static const CacheLoadModifier LOAD_MODIFIER = _LOAD_MODIFIER; ///< Cache load modifier for reading input elements static const BlockScanAlgorithm SCAN_ALGORITHM = _SCAN_ALGORITHM; ///< The BlockScan algorithm to use }; /****************************************************************************** * Thread block abstractions ******************************************************************************/ /** * \brief AgentRle implements a stateful abstraction of CUDA thread blocks for participating in device-wide run-length-encode */ template < typename AgentRlePolicyT, ///< Parameterized AgentRlePolicyT tuning policy type typename InputIteratorT, ///< Random-access input iterator type for data typename OffsetsOutputIteratorT, ///< Random-access output iterator type for offset values typename LengthsOutputIteratorT, ///< Random-access output iterator type for length values typename EqualityOpT, ///< T equality operator type typename OffsetT> ///< Signed integer type for global offsets struct AgentRle { //--------------------------------------------------------------------- // Types and constants //--------------------------------------------------------------------- /// The input value type typedef typename std::iterator_traits::value_type T; /// The lengths output value type typedef typename If<(Equals::value_type, void>::VALUE), // LengthT = (if output iterator's value type is void) ? OffsetT, // ... then the OffsetT type, typename std::iterator_traits::value_type>::Type LengthT; // ... else the output iterator's value type /// Tuple type for scanning (pairs run-length and run-index) typedef KeyValuePair LengthOffsetPair; /// Tile status descriptor interface type typedef ReduceByKeyScanTileState ScanTileStateT; // Constants enum { WARP_THREADS = CUB_WARP_THREADS(PTX_ARCH), BLOCK_THREADS = AgentRlePolicyT::BLOCK_THREADS, ITEMS_PER_THREAD = AgentRlePolicyT::ITEMS_PER_THREAD, WARP_ITEMS = WARP_THREADS * ITEMS_PER_THREAD, TILE_ITEMS = BLOCK_THREADS * ITEMS_PER_THREAD, WARPS = (BLOCK_THREADS + WARP_THREADS - 1) / WARP_THREADS, /// Whether or not to sync after loading data SYNC_AFTER_LOAD = (AgentRlePolicyT::LOAD_ALGORITHM != BLOCK_LOAD_DIRECT), /// Whether or not only one warp's worth of shared memory should be allocated and time-sliced among block-warps during any store-related data transpositions (versus each warp having its own storage) STORE_WARP_TIME_SLICING = AgentRlePolicyT::STORE_WARP_TIME_SLICING, ACTIVE_EXCHANGE_WARPS = (STORE_WARP_TIME_SLICING) ? 1 : WARPS, }; /** * Special operator that signals all out-of-bounds items are not equal to everything else, * forcing both (1) the last item to be tail-flagged and (2) all oob items to be marked * trivial. */ template struct OobInequalityOp { OffsetT num_remaining; EqualityOpT equality_op; __device__ __forceinline__ OobInequalityOp( OffsetT num_remaining, EqualityOpT equality_op) : num_remaining(num_remaining), equality_op(equality_op) {} template __host__ __device__ __forceinline__ bool operator()(T first, T second, Index idx) { if (!LAST_TILE || (idx < num_remaining)) return !equality_op(first, second); else return true; } }; // Cache-modified Input iterator wrapper type (for applying cache modifier) for data typedef typename If::VALUE, CacheModifiedInputIterator, // Wrap the native input pointer with CacheModifiedVLengthnputIterator InputIteratorT>::Type // Directly use the supplied input iterator type WrappedInputIteratorT; // Parameterized BlockLoad type for data typedef BlockLoad< T, AgentRlePolicyT::BLOCK_THREADS, AgentRlePolicyT::ITEMS_PER_THREAD, AgentRlePolicyT::LOAD_ALGORITHM> BlockLoadT; // Parameterized BlockDiscontinuity type for data typedef BlockDiscontinuity BlockDiscontinuityT; // Parameterized WarpScan type typedef WarpScan WarpScanPairs; // Reduce-length-by-run scan operator typedef ReduceBySegmentOp ReduceBySegmentOpT; // Callback type for obtaining tile prefix during block scan typedef TilePrefixCallbackOp< LengthOffsetPair, ReduceBySegmentOpT, ScanTileStateT> TilePrefixCallbackOpT; // Warp exchange types typedef WarpExchange WarpExchangePairs; typedef typename If::Type WarpExchangePairsStorage; typedef WarpExchange WarpExchangeOffsets; typedef WarpExchange WarpExchangeLengths; typedef LengthOffsetPair WarpAggregates[WARPS]; // Shared memory type for this thread block struct _TempStorage { // Aliasable storage layout union Aliasable { struct { typename BlockDiscontinuityT::TempStorage discontinuity; // Smem needed for discontinuity detection typename WarpScanPairs::TempStorage warp_scan[WARPS]; // Smem needed for warp-synchronous scans Uninitialized warp_aggregates; // Smem needed for sharing warp-wide aggregates typename TilePrefixCallbackOpT::TempStorage prefix; // Smem needed for cooperative prefix callback }; // Smem needed for input loading typename BlockLoadT::TempStorage load; // Aliasable layout needed for two-phase scatter union ScatterAliasable { unsigned long long align; WarpExchangePairsStorage exchange_pairs[ACTIVE_EXCHANGE_WARPS]; typename WarpExchangeOffsets::TempStorage exchange_offsets[ACTIVE_EXCHANGE_WARPS]; typename WarpExchangeLengths::TempStorage exchange_lengths[ACTIVE_EXCHANGE_WARPS]; } scatter_aliasable; } aliasable; OffsetT tile_idx; // Shared tile index LengthOffsetPair tile_inclusive; // Inclusive tile prefix LengthOffsetPair tile_exclusive; // Exclusive tile prefix }; // Alias wrapper allowing storage to be unioned struct TempStorage : Uninitialized<_TempStorage> {}; //--------------------------------------------------------------------- // Per-thread fields //--------------------------------------------------------------------- _TempStorage& temp_storage; ///< Reference to temp_storage WrappedInputIteratorT d_in; ///< Pointer to input sequence of data items OffsetsOutputIteratorT d_offsets_out; ///< Input run offsets LengthsOutputIteratorT d_lengths_out; ///< Output run lengths EqualityOpT equality_op; ///< T equality operator ReduceBySegmentOpT scan_op; ///< Reduce-length-by-flag scan operator OffsetT num_items; ///< Total number of input items //--------------------------------------------------------------------- // Constructor //--------------------------------------------------------------------- // Constructor __device__ __forceinline__ AgentRle( TempStorage &temp_storage, ///< [in] Reference to temp_storage InputIteratorT d_in, ///< [in] Pointer to input sequence of data items OffsetsOutputIteratorT d_offsets_out, ///< [out] Pointer to output sequence of run offsets LengthsOutputIteratorT d_lengths_out, ///< [out] Pointer to output sequence of run lengths EqualityOpT equality_op, ///< [in] T equality operator OffsetT num_items) ///< [in] Total number of input items : temp_storage(temp_storage.Alias()), d_in(d_in), d_offsets_out(d_offsets_out), d_lengths_out(d_lengths_out), equality_op(equality_op), scan_op(cub::Sum()), num_items(num_items) {} //--------------------------------------------------------------------- // Utility methods for initializing the selections //--------------------------------------------------------------------- template __device__ __forceinline__ void InitializeSelections( OffsetT tile_offset, OffsetT num_remaining, T (&items)[ITEMS_PER_THREAD], LengthOffsetPair (&lengths_and_num_runs)[ITEMS_PER_THREAD]) { bool head_flags[ITEMS_PER_THREAD]; bool tail_flags[ITEMS_PER_THREAD]; OobInequalityOp inequality_op(num_remaining, equality_op); if (FIRST_TILE && LAST_TILE) { // First-and-last-tile always head-flags the first item and tail-flags the last item BlockDiscontinuityT(temp_storage.aliasable.discontinuity).FlagHeadsAndTails( head_flags, tail_flags, items, inequality_op); } else if (FIRST_TILE) { // First-tile always head-flags the first item // Get the first item from the next tile T tile_successor_item; if (threadIdx.x == BLOCK_THREADS - 1) tile_successor_item = d_in[tile_offset + TILE_ITEMS]; BlockDiscontinuityT(temp_storage.aliasable.discontinuity).FlagHeadsAndTails( head_flags, tail_flags, tile_successor_item, items, inequality_op); } else if (LAST_TILE) { // Last-tile always flags the last item // Get the last item from the previous tile T tile_predecessor_item; if (threadIdx.x == 0) tile_predecessor_item = d_in[tile_offset - 1]; BlockDiscontinuityT(temp_storage.aliasable.discontinuity).FlagHeadsAndTails( head_flags, tile_predecessor_item, tail_flags, items, inequality_op); } else { // Get the first item from the next tile T tile_successor_item; if (threadIdx.x == BLOCK_THREADS - 1) tile_successor_item = d_in[tile_offset + TILE_ITEMS]; // Get the last item from the previous tile T tile_predecessor_item; if (threadIdx.x == 0) tile_predecessor_item = d_in[tile_offset - 1]; BlockDiscontinuityT(temp_storage.aliasable.discontinuity).FlagHeadsAndTails( head_flags, tile_predecessor_item, tail_flags, tile_successor_item, items, inequality_op); } // Zip counts and runs #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) { lengths_and_num_runs[ITEM].key = head_flags[ITEM] && (!tail_flags[ITEM]); lengths_and_num_runs[ITEM].value = ((!head_flags[ITEM]) || (!tail_flags[ITEM])); } } //--------------------------------------------------------------------- // Scan utility methods //--------------------------------------------------------------------- /** * Scan of allocations */ __device__ __forceinline__ void WarpScanAllocations( LengthOffsetPair &tile_aggregate, LengthOffsetPair &warp_aggregate, LengthOffsetPair &warp_exclusive_in_tile, LengthOffsetPair &thread_exclusive_in_warp, LengthOffsetPair (&lengths_and_num_runs)[ITEMS_PER_THREAD]) { // Perform warpscans unsigned int warp_id = ((WARPS == 1) ? 0 : threadIdx.x / WARP_THREADS); int lane_id = LaneId(); LengthOffsetPair identity; identity.key = 0; identity.value = 0; LengthOffsetPair thread_inclusive; LengthOffsetPair thread_aggregate = internal::ThreadReduce(lengths_and_num_runs, scan_op); WarpScanPairs(temp_storage.aliasable.warp_scan[warp_id]).Scan( thread_aggregate, thread_inclusive, thread_exclusive_in_warp, identity, scan_op); // Last lane in each warp shares its warp-aggregate if (lane_id == WARP_THREADS - 1) temp_storage.aliasable.warp_aggregates.Alias()[warp_id] = thread_inclusive; CTA_SYNC(); // Accumulate total selected and the warp-wide prefix warp_exclusive_in_tile = identity; warp_aggregate = temp_storage.aliasable.warp_aggregates.Alias()[warp_id]; tile_aggregate = temp_storage.aliasable.warp_aggregates.Alias()[0]; #pragma unroll for (int WARP = 1; WARP < WARPS; ++WARP) { if (warp_id == WARP) warp_exclusive_in_tile = tile_aggregate; tile_aggregate = scan_op(tile_aggregate, temp_storage.aliasable.warp_aggregates.Alias()[WARP]); } } //--------------------------------------------------------------------- // Utility methods for scattering selections //--------------------------------------------------------------------- /** * Two-phase scatter, specialized for warp time-slicing */ template __device__ __forceinline__ void ScatterTwoPhase( OffsetT tile_num_runs_exclusive_in_global, OffsetT warp_num_runs_aggregate, OffsetT warp_num_runs_exclusive_in_tile, OffsetT (&thread_num_runs_exclusive_in_warp)[ITEMS_PER_THREAD], LengthOffsetPair (&lengths_and_offsets)[ITEMS_PER_THREAD], Int2Type is_warp_time_slice) { unsigned int warp_id = ((WARPS == 1) ? 0 : threadIdx.x / WARP_THREADS); int lane_id = LaneId(); // Locally compact items within the warp (first warp) if (warp_id == 0) { WarpExchangePairs(temp_storage.aliasable.scatter_aliasable.exchange_pairs[0]).ScatterToStriped( lengths_and_offsets, thread_num_runs_exclusive_in_warp); } // Locally compact items within the warp (remaining warps) #pragma unroll for (int SLICE = 1; SLICE < WARPS; ++SLICE) { CTA_SYNC(); if (warp_id == SLICE) { WarpExchangePairs(temp_storage.aliasable.scatter_aliasable.exchange_pairs[0]).ScatterToStriped( lengths_and_offsets, thread_num_runs_exclusive_in_warp); } } // Global scatter #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { if ((ITEM * WARP_THREADS) < warp_num_runs_aggregate - lane_id) { OffsetT item_offset = tile_num_runs_exclusive_in_global + warp_num_runs_exclusive_in_tile + (ITEM * WARP_THREADS) + lane_id; // Scatter offset d_offsets_out[item_offset] = lengths_and_offsets[ITEM].key; // Scatter length if not the first (global) length if ((!FIRST_TILE) || (ITEM != 0) || (threadIdx.x > 0)) { d_lengths_out[item_offset - 1] = lengths_and_offsets[ITEM].value; } } } } /** * Two-phase scatter */ template __device__ __forceinline__ void ScatterTwoPhase( OffsetT tile_num_runs_exclusive_in_global, OffsetT warp_num_runs_aggregate, OffsetT warp_num_runs_exclusive_in_tile, OffsetT (&thread_num_runs_exclusive_in_warp)[ITEMS_PER_THREAD], LengthOffsetPair (&lengths_and_offsets)[ITEMS_PER_THREAD], Int2Type is_warp_time_slice) { unsigned int warp_id = ((WARPS == 1) ? 0 : threadIdx.x / WARP_THREADS); int lane_id = LaneId(); // Unzip OffsetT run_offsets[ITEMS_PER_THREAD]; LengthT run_lengths[ITEMS_PER_THREAD]; #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { run_offsets[ITEM] = lengths_and_offsets[ITEM].key; run_lengths[ITEM] = lengths_and_offsets[ITEM].value; } WarpExchangeOffsets(temp_storage.aliasable.scatter_aliasable.exchange_offsets[warp_id]).ScatterToStriped( run_offsets, thread_num_runs_exclusive_in_warp); WARP_SYNC(0xffffffff); WarpExchangeLengths(temp_storage.aliasable.scatter_aliasable.exchange_lengths[warp_id]).ScatterToStriped( run_lengths, thread_num_runs_exclusive_in_warp); // Global scatter #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { if ((ITEM * WARP_THREADS) + lane_id < warp_num_runs_aggregate) { OffsetT item_offset = tile_num_runs_exclusive_in_global + warp_num_runs_exclusive_in_tile + (ITEM * WARP_THREADS) + lane_id; // Scatter offset d_offsets_out[item_offset] = run_offsets[ITEM]; // Scatter length if not the first (global) length if ((!FIRST_TILE) || (ITEM != 0) || (threadIdx.x > 0)) { d_lengths_out[item_offset - 1] = run_lengths[ITEM]; } } } } /** * Direct scatter */ template __device__ __forceinline__ void ScatterDirect( OffsetT tile_num_runs_exclusive_in_global, OffsetT warp_num_runs_aggregate, OffsetT warp_num_runs_exclusive_in_tile, OffsetT (&thread_num_runs_exclusive_in_warp)[ITEMS_PER_THREAD], LengthOffsetPair (&lengths_and_offsets)[ITEMS_PER_THREAD]) { #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) { if (thread_num_runs_exclusive_in_warp[ITEM] < warp_num_runs_aggregate) { OffsetT item_offset = tile_num_runs_exclusive_in_global + warp_num_runs_exclusive_in_tile + thread_num_runs_exclusive_in_warp[ITEM]; // Scatter offset d_offsets_out[item_offset] = lengths_and_offsets[ITEM].key; // Scatter length if not the first (global) length if (item_offset >= 1) { d_lengths_out[item_offset - 1] = lengths_and_offsets[ITEM].value; } } } } /** * Scatter */ template __device__ __forceinline__ void Scatter( OffsetT tile_num_runs_aggregate, OffsetT tile_num_runs_exclusive_in_global, OffsetT warp_num_runs_aggregate, OffsetT warp_num_runs_exclusive_in_tile, OffsetT (&thread_num_runs_exclusive_in_warp)[ITEMS_PER_THREAD], LengthOffsetPair (&lengths_and_offsets)[ITEMS_PER_THREAD]) { if ((ITEMS_PER_THREAD == 1) || (tile_num_runs_aggregate < BLOCK_THREADS)) { // Direct scatter if the warp has any items if (warp_num_runs_aggregate) { ScatterDirect( tile_num_runs_exclusive_in_global, warp_num_runs_aggregate, warp_num_runs_exclusive_in_tile, thread_num_runs_exclusive_in_warp, lengths_and_offsets); } } else { // Scatter two phase ScatterTwoPhase( tile_num_runs_exclusive_in_global, warp_num_runs_aggregate, warp_num_runs_exclusive_in_tile, thread_num_runs_exclusive_in_warp, lengths_and_offsets, Int2Type()); } } //--------------------------------------------------------------------- // Cooperatively scan a device-wide sequence of tiles with other CTAs //--------------------------------------------------------------------- /** * Process a tile of input (dynamic chained scan) */ template < bool LAST_TILE> __device__ __forceinline__ LengthOffsetPair ConsumeTile( OffsetT num_items, ///< Total number of global input items OffsetT num_remaining, ///< Number of global input items remaining (including this tile) int tile_idx, ///< Tile index OffsetT tile_offset, ///< Tile offset ScanTileStateT &tile_status) ///< Global list of tile status { if (tile_idx == 0) { // First tile // Load items T items[ITEMS_PER_THREAD]; if (LAST_TILE) BlockLoadT(temp_storage.aliasable.load).Load(d_in + tile_offset, items, num_remaining, T()); else BlockLoadT(temp_storage.aliasable.load).Load(d_in + tile_offset, items); if (SYNC_AFTER_LOAD) CTA_SYNC(); // Set flags LengthOffsetPair lengths_and_num_runs[ITEMS_PER_THREAD]; InitializeSelections( tile_offset, num_remaining, items, lengths_and_num_runs); // Exclusive scan of lengths and runs LengthOffsetPair tile_aggregate; LengthOffsetPair warp_aggregate; LengthOffsetPair warp_exclusive_in_tile; LengthOffsetPair thread_exclusive_in_warp; WarpScanAllocations( tile_aggregate, warp_aggregate, warp_exclusive_in_tile, thread_exclusive_in_warp, lengths_and_num_runs); // Update tile status if this is not the last tile if (!LAST_TILE && (threadIdx.x == 0)) tile_status.SetInclusive(0, tile_aggregate); // Update thread_exclusive_in_warp to fold in warp run-length if (thread_exclusive_in_warp.key == 0) thread_exclusive_in_warp.value += warp_exclusive_in_tile.value; LengthOffsetPair lengths_and_offsets[ITEMS_PER_THREAD]; OffsetT thread_num_runs_exclusive_in_warp[ITEMS_PER_THREAD]; LengthOffsetPair lengths_and_num_runs2[ITEMS_PER_THREAD]; // Downsweep scan through lengths_and_num_runs internal::ThreadScanExclusive(lengths_and_num_runs, lengths_and_num_runs2, scan_op, thread_exclusive_in_warp); // Zip #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { lengths_and_offsets[ITEM].value = lengths_and_num_runs2[ITEM].value; lengths_and_offsets[ITEM].key = tile_offset + (threadIdx.x * ITEMS_PER_THREAD) + ITEM; thread_num_runs_exclusive_in_warp[ITEM] = (lengths_and_num_runs[ITEM].key) ? lengths_and_num_runs2[ITEM].key : // keep WARP_THREADS * ITEMS_PER_THREAD; // discard } OffsetT tile_num_runs_aggregate = tile_aggregate.key; OffsetT tile_num_runs_exclusive_in_global = 0; OffsetT warp_num_runs_aggregate = warp_aggregate.key; OffsetT warp_num_runs_exclusive_in_tile = warp_exclusive_in_tile.key; // Scatter Scatter( tile_num_runs_aggregate, tile_num_runs_exclusive_in_global, warp_num_runs_aggregate, warp_num_runs_exclusive_in_tile, thread_num_runs_exclusive_in_warp, lengths_and_offsets); // Return running total (inclusive of this tile) return tile_aggregate; } else { // Not first tile // Load items T items[ITEMS_PER_THREAD]; if (LAST_TILE) BlockLoadT(temp_storage.aliasable.load).Load(d_in + tile_offset, items, num_remaining, T()); else BlockLoadT(temp_storage.aliasable.load).Load(d_in + tile_offset, items); if (SYNC_AFTER_LOAD) CTA_SYNC(); // Set flags LengthOffsetPair lengths_and_num_runs[ITEMS_PER_THREAD]; InitializeSelections( tile_offset, num_remaining, items, lengths_and_num_runs); // Exclusive scan of lengths and runs LengthOffsetPair tile_aggregate; LengthOffsetPair warp_aggregate; LengthOffsetPair warp_exclusive_in_tile; LengthOffsetPair thread_exclusive_in_warp; WarpScanAllocations( tile_aggregate, warp_aggregate, warp_exclusive_in_tile, thread_exclusive_in_warp, lengths_and_num_runs); // First warp computes tile prefix in lane 0 TilePrefixCallbackOpT prefix_op(tile_status, temp_storage.aliasable.prefix, Sum(), tile_idx); unsigned int warp_id = ((WARPS == 1) ? 0 : threadIdx.x / WARP_THREADS); if (warp_id == 0) { prefix_op(tile_aggregate); if (threadIdx.x == 0) temp_storage.tile_exclusive = prefix_op.exclusive_prefix; } CTA_SYNC(); LengthOffsetPair tile_exclusive_in_global = temp_storage.tile_exclusive; // Update thread_exclusive_in_warp to fold in warp and tile run-lengths LengthOffsetPair thread_exclusive = scan_op(tile_exclusive_in_global, warp_exclusive_in_tile); if (thread_exclusive_in_warp.key == 0) thread_exclusive_in_warp.value += thread_exclusive.value; // Downsweep scan through lengths_and_num_runs LengthOffsetPair lengths_and_num_runs2[ITEMS_PER_THREAD]; LengthOffsetPair lengths_and_offsets[ITEMS_PER_THREAD]; OffsetT thread_num_runs_exclusive_in_warp[ITEMS_PER_THREAD]; internal::ThreadScanExclusive(lengths_and_num_runs, lengths_and_num_runs2, scan_op, thread_exclusive_in_warp); // Zip #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { lengths_and_offsets[ITEM].value = lengths_and_num_runs2[ITEM].value; lengths_and_offsets[ITEM].key = tile_offset + (threadIdx.x * ITEMS_PER_THREAD) + ITEM; thread_num_runs_exclusive_in_warp[ITEM] = (lengths_and_num_runs[ITEM].key) ? lengths_and_num_runs2[ITEM].key : // keep WARP_THREADS * ITEMS_PER_THREAD; // discard } OffsetT tile_num_runs_aggregate = tile_aggregate.key; OffsetT tile_num_runs_exclusive_in_global = tile_exclusive_in_global.key; OffsetT warp_num_runs_aggregate = warp_aggregate.key; OffsetT warp_num_runs_exclusive_in_tile = warp_exclusive_in_tile.key; // Scatter Scatter( tile_num_runs_aggregate, tile_num_runs_exclusive_in_global, warp_num_runs_aggregate, warp_num_runs_exclusive_in_tile, thread_num_runs_exclusive_in_warp, lengths_and_offsets); // Return running total (inclusive of this tile) return prefix_op.inclusive_prefix; } } /** * Scan tiles of items as part of a dynamic chained scan */ template ///< Output iterator type for recording number of items selected __device__ __forceinline__ void ConsumeRange( int num_tiles, ///< Total number of input tiles ScanTileStateT& tile_status, ///< Global list of tile status NumRunsIteratorT d_num_runs_out) ///< Output pointer for total number of runs identified { // Blocks are launched in increasing order, so just assign one tile per block int tile_idx = (blockIdx.x * gridDim.y) + blockIdx.y; // Current tile index OffsetT tile_offset = tile_idx * TILE_ITEMS; // Global offset for the current tile OffsetT num_remaining = num_items - tile_offset; // Remaining items (including this tile) if (tile_idx < num_tiles - 1) { // Not the last tile (full) ConsumeTile(num_items, num_remaining, tile_idx, tile_offset, tile_status); } else if (num_remaining > 0) { // The last tile (possibly partially-full) LengthOffsetPair running_total = ConsumeTile(num_items, num_remaining, tile_idx, tile_offset, tile_status); if (threadIdx.x == 0) { // Output the total number of items selected *d_num_runs_out = running_total.key; // The inclusive prefix contains accumulated length reduction for the last run if (running_total.key > 0) d_lengths_out[running_total.key - 1] = running_total.value; } } } }; } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/gpu_utils/cub/agent/agent_scan.cuh000066400000000000000000000444511411340063500222670ustar00rootroot00000000000000/****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * cub::AgentScan implements a stateful abstraction of CUDA thread blocks for participating in device-wide prefix scan . */ #pragma once #include #include "single_pass_scan_operators.cuh" #include "../block/block_load.cuh" #include "../block/block_store.cuh" #include "../block/block_scan.cuh" #include "../grid/grid_queue.cuh" #include "../iterator/cache_modified_input_iterator.cuh" #include "../util_namespace.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /****************************************************************************** * Tuning policy types ******************************************************************************/ /** * Parameterizable tuning policy type for AgentScan */ template < int _BLOCK_THREADS, ///< Threads per thread block int _ITEMS_PER_THREAD, ///< Items per thread (per tile of input) BlockLoadAlgorithm _LOAD_ALGORITHM, ///< The BlockLoad algorithm to use CacheLoadModifier _LOAD_MODIFIER, ///< Cache load modifier for reading input elements BlockStoreAlgorithm _STORE_ALGORITHM, ///< The BlockStore algorithm to use BlockScanAlgorithm _SCAN_ALGORITHM> ///< The BlockScan algorithm to use struct AgentScanPolicy { enum { BLOCK_THREADS = _BLOCK_THREADS, ///< Threads per thread block ITEMS_PER_THREAD = _ITEMS_PER_THREAD, ///< Items per thread (per tile of input) }; static const BlockLoadAlgorithm LOAD_ALGORITHM = _LOAD_ALGORITHM; ///< The BlockLoad algorithm to use static const CacheLoadModifier LOAD_MODIFIER = _LOAD_MODIFIER; ///< Cache load modifier for reading input elements static const BlockStoreAlgorithm STORE_ALGORITHM = _STORE_ALGORITHM; ///< The BlockStore algorithm to use static const BlockScanAlgorithm SCAN_ALGORITHM = _SCAN_ALGORITHM; ///< The BlockScan algorithm to use }; /****************************************************************************** * Thread block abstractions ******************************************************************************/ /** * \brief AgentScan implements a stateful abstraction of CUDA thread blocks for participating in device-wide prefix scan . */ template < typename AgentScanPolicyT, ///< Parameterized AgentScanPolicyT tuning policy type typename InputIteratorT, ///< Random-access input iterator type typename OutputIteratorT, ///< Random-access output iterator type typename ScanOpT, ///< Scan functor type typename InitValueT, ///< The init_value element for ScanOpT type (cub::NullType for inclusive scan) typename OffsetT> ///< Signed integer type for global offsets struct AgentScan { //--------------------------------------------------------------------- // Types and constants //--------------------------------------------------------------------- // The input value type typedef typename std::iterator_traits::value_type InputT; // The output value type typedef typename If<(Equals::value_type, void>::VALUE), // OutputT = (if output iterator's value type is void) ? typename std::iterator_traits::value_type, // ... then the input iterator's value type, typename std::iterator_traits::value_type>::Type OutputT; // ... else the output iterator's value type // Tile status descriptor interface type typedef ScanTileState ScanTileStateT; // Input iterator wrapper type (for applying cache modifier) typedef typename If::VALUE, CacheModifiedInputIterator, // Wrap the native input pointer with CacheModifiedInputIterator InputIteratorT>::Type // Directly use the supplied input iterator type WrappedInputIteratorT; // Constants enum { IS_INCLUSIVE = Equals::VALUE, // Inclusive scan if no init_value type is provided BLOCK_THREADS = AgentScanPolicyT::BLOCK_THREADS, ITEMS_PER_THREAD = AgentScanPolicyT::ITEMS_PER_THREAD, TILE_ITEMS = BLOCK_THREADS * ITEMS_PER_THREAD, }; // Parameterized BlockLoad type typedef BlockLoad< OutputT, AgentScanPolicyT::BLOCK_THREADS, AgentScanPolicyT::ITEMS_PER_THREAD, AgentScanPolicyT::LOAD_ALGORITHM> BlockLoadT; // Parameterized BlockStore type typedef BlockStore< OutputT, AgentScanPolicyT::BLOCK_THREADS, AgentScanPolicyT::ITEMS_PER_THREAD, AgentScanPolicyT::STORE_ALGORITHM> BlockStoreT; // Parameterized BlockScan type typedef BlockScan< OutputT, AgentScanPolicyT::BLOCK_THREADS, AgentScanPolicyT::SCAN_ALGORITHM> BlockScanT; // Callback type for obtaining tile prefix during block scan typedef TilePrefixCallbackOp< OutputT, ScanOpT, ScanTileStateT> TilePrefixCallbackOpT; // Stateful BlockScan prefix callback type for managing a running total while scanning consecutive tiles typedef BlockScanRunningPrefixOp< OutputT, ScanOpT> RunningPrefixCallbackOp; // Shared memory type for this thread block union _TempStorage { typename BlockLoadT::TempStorage load; // Smem needed for tile loading typename BlockStoreT::TempStorage store; // Smem needed for tile storing struct { typename TilePrefixCallbackOpT::TempStorage prefix; // Smem needed for cooperative prefix callback typename BlockScanT::TempStorage scan; // Smem needed for tile scanning }; }; // Alias wrapper allowing storage to be unioned struct TempStorage : Uninitialized<_TempStorage> {}; //--------------------------------------------------------------------- // Per-thread fields //--------------------------------------------------------------------- _TempStorage& temp_storage; ///< Reference to temp_storage WrappedInputIteratorT d_in; ///< Input data OutputIteratorT d_out; ///< Output data ScanOpT scan_op; ///< Binary scan operator InitValueT init_value; ///< The init_value element for ScanOpT //--------------------------------------------------------------------- // Block scan utility methods //--------------------------------------------------------------------- /** * Exclusive scan specialization (first tile) */ __device__ __forceinline__ void ScanTile( OutputT (&items)[ITEMS_PER_THREAD], OutputT init_value, ScanOpT scan_op, OutputT &block_aggregate, Int2Type /*is_inclusive*/) { BlockScanT(temp_storage.scan).ExclusiveScan(items, items, init_value, scan_op, block_aggregate); block_aggregate = scan_op(init_value, block_aggregate); } /** * Inclusive scan specialization (first tile) */ __device__ __forceinline__ void ScanTile( OutputT (&items)[ITEMS_PER_THREAD], InitValueT /*init_value*/, ScanOpT scan_op, OutputT &block_aggregate, Int2Type /*is_inclusive*/) { BlockScanT(temp_storage.scan).InclusiveScan(items, items, scan_op, block_aggregate); } /** * Exclusive scan specialization (subsequent tiles) */ template __device__ __forceinline__ void ScanTile( OutputT (&items)[ITEMS_PER_THREAD], ScanOpT scan_op, PrefixCallback &prefix_op, Int2Type /*is_inclusive*/) { BlockScanT(temp_storage.scan).ExclusiveScan(items, items, scan_op, prefix_op); } /** * Inclusive scan specialization (subsequent tiles) */ template __device__ __forceinline__ void ScanTile( OutputT (&items)[ITEMS_PER_THREAD], ScanOpT scan_op, PrefixCallback &prefix_op, Int2Type /*is_inclusive*/) { BlockScanT(temp_storage.scan).InclusiveScan(items, items, scan_op, prefix_op); } //--------------------------------------------------------------------- // Constructor //--------------------------------------------------------------------- // Constructor __device__ __forceinline__ AgentScan( TempStorage& temp_storage, ///< Reference to temp_storage InputIteratorT d_in, ///< Input data OutputIteratorT d_out, ///< Output data ScanOpT scan_op, ///< Binary scan operator InitValueT init_value) ///< Initial value to seed the exclusive scan : temp_storage(temp_storage.Alias()), d_in(d_in), d_out(d_out), scan_op(scan_op), init_value(init_value) {} //--------------------------------------------------------------------- // Cooperatively scan a device-wide sequence of tiles with other CTAs //--------------------------------------------------------------------- /** * Process a tile of input (dynamic chained scan) */ template ///< Whether the current tile is the last tile __device__ __forceinline__ void ConsumeTile( OffsetT num_remaining, ///< Number of global input items remaining (including this tile) int tile_idx, ///< Tile index OffsetT tile_offset, ///< Tile offset ScanTileStateT& tile_state) ///< Global tile state descriptor { // Load items OutputT items[ITEMS_PER_THREAD]; if (IS_LAST_TILE) BlockLoadT(temp_storage.load).Load(d_in + tile_offset, items, num_remaining); else BlockLoadT(temp_storage.load).Load(d_in + tile_offset, items); CTA_SYNC(); // Perform tile scan if (tile_idx == 0) { // Scan first tile OutputT block_aggregate; ScanTile(items, init_value, scan_op, block_aggregate, Int2Type()); if ((!IS_LAST_TILE) && (threadIdx.x == 0)) tile_state.SetInclusive(0, block_aggregate); } else { // Scan non-first tile TilePrefixCallbackOpT prefix_op(tile_state, temp_storage.prefix, scan_op, tile_idx); ScanTile(items, scan_op, prefix_op, Int2Type()); } CTA_SYNC(); // Store items if (IS_LAST_TILE) BlockStoreT(temp_storage.store).Store(d_out + tile_offset, items, num_remaining); else BlockStoreT(temp_storage.store).Store(d_out + tile_offset, items); } /** * Scan tiles of items as part of a dynamic chained scan */ __device__ __forceinline__ void ConsumeRange( int num_items, ///< Total number of input items ScanTileStateT& tile_state, ///< Global tile state descriptor int start_tile) ///< The starting tile for the current grid { // Blocks are launched in increasing order, so just assign one tile per block int tile_idx = start_tile + blockIdx.x; // Current tile index OffsetT tile_offset = OffsetT(TILE_ITEMS) * tile_idx; // Global offset for the current tile OffsetT num_remaining = num_items - tile_offset; // Remaining items (including this tile) if (num_remaining > TILE_ITEMS) { // Not last tile ConsumeTile(num_remaining, tile_idx, tile_offset, tile_state); } else if (num_remaining > 0) { // Last tile ConsumeTile(num_remaining, tile_idx, tile_offset, tile_state); } } //--------------------------------------------------------------------- // Scan an sequence of consecutive tiles (independent of other thread blocks) //--------------------------------------------------------------------- /** * Process a tile of input */ template < bool IS_FIRST_TILE, bool IS_LAST_TILE> __device__ __forceinline__ void ConsumeTile( OffsetT tile_offset, ///< Tile offset RunningPrefixCallbackOp& prefix_op, ///< Running prefix operator int valid_items = TILE_ITEMS) ///< Number of valid items in the tile { // Load items OutputT items[ITEMS_PER_THREAD]; if (IS_LAST_TILE) BlockLoadT(temp_storage.load).Load(d_in + tile_offset, items, valid_items); else BlockLoadT(temp_storage.load).Load(d_in + tile_offset, items); CTA_SYNC(); // Block scan if (IS_FIRST_TILE) { OutputT block_aggregate; ScanTile(items, init_value, scan_op, block_aggregate, Int2Type()); prefix_op.running_total = block_aggregate; } else { ScanTile(items, scan_op, prefix_op, Int2Type()); } CTA_SYNC(); // Store items if (IS_LAST_TILE) BlockStoreT(temp_storage.store).Store(d_out + tile_offset, items, valid_items); else BlockStoreT(temp_storage.store).Store(d_out + tile_offset, items); } /** * Scan a consecutive share of input tiles */ __device__ __forceinline__ void ConsumeRange( OffsetT range_offset, ///< [in] Threadblock begin offset (inclusive) OffsetT range_end) ///< [in] Threadblock end offset (exclusive) { BlockScanRunningPrefixOp prefix_op(scan_op); if (range_offset + TILE_ITEMS <= range_end) { // Consume first tile of input (full) ConsumeTile(range_offset, prefix_op); range_offset += TILE_ITEMS; // Consume subsequent full tiles of input while (range_offset + TILE_ITEMS <= range_end) { ConsumeTile(range_offset, prefix_op); range_offset += TILE_ITEMS; } // Consume a partially-full tile if (range_offset < range_end) { int valid_items = range_end - range_offset; ConsumeTile(range_offset, prefix_op, valid_items); } } else { // Consume the first tile of input (partially-full) int valid_items = range_end - range_offset; ConsumeTile(range_offset, prefix_op, valid_items); } } /** * Scan a consecutive share of input tiles, seeded with the specified prefix value */ __device__ __forceinline__ void ConsumeRange( OffsetT range_offset, ///< [in] Threadblock begin offset (inclusive) OffsetT range_end, ///< [in] Threadblock end offset (exclusive) OutputT prefix) ///< [in] The prefix to apply to the scan segment { BlockScanRunningPrefixOp prefix_op(prefix, scan_op); // Consume full tiles of input while (range_offset + TILE_ITEMS <= range_end) { ConsumeTile(range_offset, prefix_op); range_offset += TILE_ITEMS; } // Consume a partially-full tile if (range_offset < range_end) { int valid_items = range_end - range_offset; ConsumeTile(range_offset, prefix_op, valid_items); } } }; } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/gpu_utils/cub/agent/agent_segment_fixup.cuh000066400000000000000000000404171411340063500242160ustar00rootroot00000000000000/****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * cub::AgentSegmentFixup implements a stateful abstraction of CUDA thread blocks for participating in device-wide reduce-value-by-key. */ #pragma once #include #include "single_pass_scan_operators.cuh" #include "../block/block_load.cuh" #include "../block/block_store.cuh" #include "../block/block_scan.cuh" #include "../block/block_discontinuity.cuh" #include "../iterator/cache_modified_input_iterator.cuh" #include "../iterator/constant_input_iterator.cuh" #include "../util_namespace.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /****************************************************************************** * Tuning policy types ******************************************************************************/ /** * Parameterizable tuning policy type for AgentSegmentFixup */ template < int _BLOCK_THREADS, ///< Threads per thread block int _ITEMS_PER_THREAD, ///< Items per thread (per tile of input) BlockLoadAlgorithm _LOAD_ALGORITHM, ///< The BlockLoad algorithm to use CacheLoadModifier _LOAD_MODIFIER, ///< Cache load modifier for reading input elements BlockScanAlgorithm _SCAN_ALGORITHM> ///< The BlockScan algorithm to use struct AgentSegmentFixupPolicy { enum { BLOCK_THREADS = _BLOCK_THREADS, ///< Threads per thread block ITEMS_PER_THREAD = _ITEMS_PER_THREAD, ///< Items per thread (per tile of input) }; static const BlockLoadAlgorithm LOAD_ALGORITHM = _LOAD_ALGORITHM; ///< The BlockLoad algorithm to use static const CacheLoadModifier LOAD_MODIFIER = _LOAD_MODIFIER; ///< Cache load modifier for reading input elements static const BlockScanAlgorithm SCAN_ALGORITHM = _SCAN_ALGORITHM; ///< The BlockScan algorithm to use }; /****************************************************************************** * Thread block abstractions ******************************************************************************/ /** * \brief AgentSegmentFixup implements a stateful abstraction of CUDA thread blocks for participating in device-wide reduce-value-by-key */ template < typename AgentSegmentFixupPolicyT, ///< Parameterized AgentSegmentFixupPolicy tuning policy type typename PairsInputIteratorT, ///< Random-access input iterator type for keys typename AggregatesOutputIteratorT, ///< Random-access output iterator type for values typename EqualityOpT, ///< KeyT equality operator type typename ReductionOpT, ///< ValueT reduction operator type typename OffsetT> ///< Signed integer type for global offsets struct AgentSegmentFixup { //--------------------------------------------------------------------- // Types and constants //--------------------------------------------------------------------- // Data type of key-value input iterator typedef typename std::iterator_traits::value_type KeyValuePairT; // Value type typedef typename KeyValuePairT::Value ValueT; // Tile status descriptor interface type typedef ReduceByKeyScanTileState ScanTileStateT; // Constants enum { BLOCK_THREADS = AgentSegmentFixupPolicyT::BLOCK_THREADS, ITEMS_PER_THREAD = AgentSegmentFixupPolicyT::ITEMS_PER_THREAD, TILE_ITEMS = BLOCK_THREADS * ITEMS_PER_THREAD, // Whether or not do fixup using RLE + global atomics USE_ATOMIC_FIXUP = (CUB_PTX_ARCH >= 350) && (Equals::VALUE || Equals::VALUE || Equals::VALUE || Equals::VALUE), // Whether or not the scan operation has a zero-valued identity value (true if we're performing addition on a primitive type) HAS_IDENTITY_ZERO = (Equals::VALUE) && (Traits::PRIMITIVE), }; // Cache-modified Input iterator wrapper type (for applying cache modifier) for keys typedef typename If::VALUE, CacheModifiedInputIterator, // Wrap the native input pointer with CacheModifiedValuesInputIterator PairsInputIteratorT>::Type // Directly use the supplied input iterator type WrappedPairsInputIteratorT; // Cache-modified Input iterator wrapper type (for applying cache modifier) for fixup values typedef typename If::VALUE, CacheModifiedInputIterator, // Wrap the native input pointer with CacheModifiedValuesInputIterator AggregatesOutputIteratorT>::Type // Directly use the supplied input iterator type WrappedFixupInputIteratorT; // Reduce-value-by-segment scan operator typedef ReduceByKeyOp ReduceBySegmentOpT; // Parameterized BlockLoad type for pairs typedef BlockLoad< KeyValuePairT, BLOCK_THREADS, ITEMS_PER_THREAD, AgentSegmentFixupPolicyT::LOAD_ALGORITHM> BlockLoadPairs; // Parameterized BlockScan type typedef BlockScan< KeyValuePairT, BLOCK_THREADS, AgentSegmentFixupPolicyT::SCAN_ALGORITHM> BlockScanT; // Callback type for obtaining tile prefix during block scan typedef TilePrefixCallbackOp< KeyValuePairT, ReduceBySegmentOpT, ScanTileStateT> TilePrefixCallbackOpT; // Shared memory type for this thread block union _TempStorage { struct { typename BlockScanT::TempStorage scan; // Smem needed for tile scanning typename TilePrefixCallbackOpT::TempStorage prefix; // Smem needed for cooperative prefix callback }; // Smem needed for loading keys typename BlockLoadPairs::TempStorage load_pairs; }; // Alias wrapper allowing storage to be unioned struct TempStorage : Uninitialized<_TempStorage> {}; //--------------------------------------------------------------------- // Per-thread fields //--------------------------------------------------------------------- _TempStorage& temp_storage; ///< Reference to temp_storage WrappedPairsInputIteratorT d_pairs_in; ///< Input keys AggregatesOutputIteratorT d_aggregates_out; ///< Output value aggregates WrappedFixupInputIteratorT d_fixup_in; ///< Fixup input values InequalityWrapper inequality_op; ///< KeyT inequality operator ReductionOpT reduction_op; ///< Reduction operator ReduceBySegmentOpT scan_op; ///< Reduce-by-segment scan operator //--------------------------------------------------------------------- // Constructor //--------------------------------------------------------------------- // Constructor __device__ __forceinline__ AgentSegmentFixup( TempStorage& temp_storage, ///< Reference to temp_storage PairsInputIteratorT d_pairs_in, ///< Input keys AggregatesOutputIteratorT d_aggregates_out, ///< Output value aggregates EqualityOpT equality_op, ///< KeyT equality operator ReductionOpT reduction_op) ///< ValueT reduction operator : temp_storage(temp_storage.Alias()), d_pairs_in(d_pairs_in), d_aggregates_out(d_aggregates_out), d_fixup_in(d_aggregates_out), inequality_op(equality_op), reduction_op(reduction_op), scan_op(reduction_op) {} //--------------------------------------------------------------------- // Cooperatively scan a device-wide sequence of tiles with other CTAs //--------------------------------------------------------------------- /** * Process input tile. Specialized for atomic-fixup */ template __device__ __forceinline__ void ConsumeTile( OffsetT num_remaining, ///< Number of global input items remaining (including this tile) int tile_idx, ///< Tile index OffsetT tile_offset, ///< Tile offset ScanTileStateT& tile_state, ///< Global tile state descriptor Int2Type use_atomic_fixup) ///< Marker whether to use atomicAdd (instead of reduce-by-key) { KeyValuePairT pairs[ITEMS_PER_THREAD]; // Load pairs KeyValuePairT oob_pair; oob_pair.key = -1; if (IS_LAST_TILE) BlockLoadPairs(temp_storage.load_pairs).Load(d_pairs_in + tile_offset, pairs, num_remaining, oob_pair); else BlockLoadPairs(temp_storage.load_pairs).Load(d_pairs_in + tile_offset, pairs); // RLE #pragma unroll for (int ITEM = 1; ITEM < ITEMS_PER_THREAD; ++ITEM) { ValueT* d_scatter = d_aggregates_out + pairs[ITEM - 1].key; if (pairs[ITEM].key != pairs[ITEM - 1].key) atomicAdd(d_scatter, pairs[ITEM - 1].value); else pairs[ITEM].value = reduction_op(pairs[ITEM - 1].value, pairs[ITEM].value); } // Flush last item if valid ValueT* d_scatter = d_aggregates_out + pairs[ITEMS_PER_THREAD - 1].key; if ((!IS_LAST_TILE) || (pairs[ITEMS_PER_THREAD - 1].key >= 0)) atomicAdd(d_scatter, pairs[ITEMS_PER_THREAD - 1].value); } /** * Process input tile. Specialized for reduce-by-key fixup */ template __device__ __forceinline__ void ConsumeTile( OffsetT num_remaining, ///< Number of global input items remaining (including this tile) int tile_idx, ///< Tile index OffsetT tile_offset, ///< Tile offset ScanTileStateT& tile_state, ///< Global tile state descriptor Int2Type use_atomic_fixup) ///< Marker whether to use atomicAdd (instead of reduce-by-key) { KeyValuePairT pairs[ITEMS_PER_THREAD]; KeyValuePairT scatter_pairs[ITEMS_PER_THREAD]; // Load pairs KeyValuePairT oob_pair; oob_pair.key = -1; if (IS_LAST_TILE) BlockLoadPairs(temp_storage.load_pairs).Load(d_pairs_in + tile_offset, pairs, num_remaining, oob_pair); else BlockLoadPairs(temp_storage.load_pairs).Load(d_pairs_in + tile_offset, pairs); CTA_SYNC(); KeyValuePairT tile_aggregate; if (tile_idx == 0) { // Exclusive scan of values and segment_flags BlockScanT(temp_storage.scan).ExclusiveScan(pairs, scatter_pairs, scan_op, tile_aggregate); // Update tile status if this is not the last tile if (threadIdx.x == 0) { // Set first segment id to not trigger a flush (invalid from exclusive scan) scatter_pairs[0].key = pairs[0].key; if (!IS_LAST_TILE) tile_state.SetInclusive(0, tile_aggregate); } } else { // Exclusive scan of values and segment_flags TilePrefixCallbackOpT prefix_op(tile_state, temp_storage.prefix, scan_op, tile_idx); BlockScanT(temp_storage.scan).ExclusiveScan(pairs, scatter_pairs, scan_op, prefix_op); tile_aggregate = prefix_op.GetBlockAggregate(); } // Scatter updated values #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) { if (scatter_pairs[ITEM].key != pairs[ITEM].key) { // Update the value at the key location ValueT value = d_fixup_in[scatter_pairs[ITEM].key]; value = reduction_op(value, scatter_pairs[ITEM].value); d_aggregates_out[scatter_pairs[ITEM].key] = value; } } // Finalize the last item if (IS_LAST_TILE) { // Last thread will output final count and last item, if necessary if (threadIdx.x == BLOCK_THREADS - 1) { // If the last tile is a whole tile, the inclusive prefix contains accumulated value reduction for the last segment if (num_remaining == TILE_ITEMS) { // Update the value at the key location OffsetT last_key = pairs[ITEMS_PER_THREAD - 1].key; d_aggregates_out[last_key] = reduction_op(tile_aggregate.value, d_fixup_in[last_key]); } } } } /** * Scan tiles of items as part of a dynamic chained scan */ __device__ __forceinline__ void ConsumeRange( int num_items, ///< Total number of input items int num_tiles, ///< Total number of input tiles ScanTileStateT& tile_state) ///< Global tile state descriptor { // Blocks are launched in increasing order, so just assign one tile per block int tile_idx = (blockIdx.x * gridDim.y) + blockIdx.y; // Current tile index OffsetT tile_offset = tile_idx * TILE_ITEMS; // Global offset for the current tile OffsetT num_remaining = num_items - tile_offset; // Remaining items (including this tile) if (num_remaining > TILE_ITEMS) { // Not the last tile (full) ConsumeTile(num_remaining, tile_idx, tile_offset, tile_state, Int2Type()); } else if (num_remaining > 0) { // The last tile (possibly partially-full) ConsumeTile(num_remaining, tile_idx, tile_offset, tile_state, Int2Type()); } } }; } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/gpu_utils/cub/agent/agent_select_if.cuh000066400000000000000000000716321411340063500233010ustar00rootroot00000000000000/****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * cub::AgentSelectIf implements a stateful abstraction of CUDA thread blocks for participating in device-wide select. */ #pragma once #include #include "single_pass_scan_operators.cuh" #include "../block/block_load.cuh" #include "../block/block_store.cuh" #include "../block/block_scan.cuh" #include "../block/block_exchange.cuh" #include "../block/block_discontinuity.cuh" #include "../grid/grid_queue.cuh" #include "../iterator/cache_modified_input_iterator.cuh" #include "../util_namespace.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /****************************************************************************** * Tuning policy types ******************************************************************************/ /** * Parameterizable tuning policy type for AgentSelectIf */ template < int _BLOCK_THREADS, ///< Threads per thread block int _ITEMS_PER_THREAD, ///< Items per thread (per tile of input) BlockLoadAlgorithm _LOAD_ALGORITHM, ///< The BlockLoad algorithm to use CacheLoadModifier _LOAD_MODIFIER, ///< Cache load modifier for reading input elements BlockScanAlgorithm _SCAN_ALGORITHM> ///< The BlockScan algorithm to use struct AgentSelectIfPolicy { enum { BLOCK_THREADS = _BLOCK_THREADS, ///< Threads per thread block ITEMS_PER_THREAD = _ITEMS_PER_THREAD, ///< Items per thread (per tile of input) }; static const BlockLoadAlgorithm LOAD_ALGORITHM = _LOAD_ALGORITHM; ///< The BlockLoad algorithm to use static const CacheLoadModifier LOAD_MODIFIER = _LOAD_MODIFIER; ///< Cache load modifier for reading input elements static const BlockScanAlgorithm SCAN_ALGORITHM = _SCAN_ALGORITHM; ///< The BlockScan algorithm to use }; /****************************************************************************** * Thread block abstractions ******************************************************************************/ /** * \brief AgentSelectIf implements a stateful abstraction of CUDA thread blocks for participating in device-wide selection * * Performs functor-based selection if SelectOpT functor type != NullType * Otherwise performs flag-based selection if FlagsInputIterator's value type != NullType * Otherwise performs discontinuity selection (keep unique) */ template < typename AgentSelectIfPolicyT, ///< Parameterized AgentSelectIfPolicy tuning policy type typename InputIteratorT, ///< Random-access input iterator type for selection items typename FlagsInputIteratorT, ///< Random-access input iterator type for selections (NullType* if a selection functor or discontinuity flagging is to be used for selection) typename SelectedOutputIteratorT, ///< Random-access input iterator type for selection_flags items typename SelectOpT, ///< Selection operator type (NullType if selections or discontinuity flagging is to be used for selection) typename EqualityOpT, ///< Equality operator type (NullType if selection functor or selections is to be used for selection) typename OffsetT, ///< Signed integer type for global offsets bool KEEP_REJECTS> ///< Whether or not we push rejected items to the back of the output struct AgentSelectIf { //--------------------------------------------------------------------- // Types and constants //--------------------------------------------------------------------- // The input value type typedef typename std::iterator_traits::value_type InputT; // The output value type typedef typename If<(Equals::value_type, void>::VALUE), // OutputT = (if output iterator's value type is void) ? typename std::iterator_traits::value_type, // ... then the input iterator's value type, typename std::iterator_traits::value_type>::Type OutputT; // ... else the output iterator's value type // The flag value type typedef typename std::iterator_traits::value_type FlagT; // Tile status descriptor interface type typedef ScanTileState ScanTileStateT; // Constants enum { USE_SELECT_OP, USE_SELECT_FLAGS, USE_DISCONTINUITY, BLOCK_THREADS = AgentSelectIfPolicyT::BLOCK_THREADS, ITEMS_PER_THREAD = AgentSelectIfPolicyT::ITEMS_PER_THREAD, TILE_ITEMS = BLOCK_THREADS * ITEMS_PER_THREAD, TWO_PHASE_SCATTER = (ITEMS_PER_THREAD > 1), SELECT_METHOD = (!Equals::VALUE) ? USE_SELECT_OP : (!Equals::VALUE) ? USE_SELECT_FLAGS : USE_DISCONTINUITY }; // Cache-modified Input iterator wrapper type (for applying cache modifier) for items typedef typename If::VALUE, CacheModifiedInputIterator, // Wrap the native input pointer with CacheModifiedValuesInputIterator InputIteratorT>::Type // Directly use the supplied input iterator type WrappedInputIteratorT; // Cache-modified Input iterator wrapper type (for applying cache modifier) for values typedef typename If::VALUE, CacheModifiedInputIterator, // Wrap the native input pointer with CacheModifiedValuesInputIterator FlagsInputIteratorT>::Type // Directly use the supplied input iterator type WrappedFlagsInputIteratorT; // Parameterized BlockLoad type for input data typedef BlockLoad< OutputT, BLOCK_THREADS, ITEMS_PER_THREAD, AgentSelectIfPolicyT::LOAD_ALGORITHM> BlockLoadT; // Parameterized BlockLoad type for flags typedef BlockLoad< FlagT, BLOCK_THREADS, ITEMS_PER_THREAD, AgentSelectIfPolicyT::LOAD_ALGORITHM> BlockLoadFlags; // Parameterized BlockDiscontinuity type for items typedef BlockDiscontinuity< OutputT, BLOCK_THREADS> BlockDiscontinuityT; // Parameterized BlockScan type typedef BlockScan< OffsetT, BLOCK_THREADS, AgentSelectIfPolicyT::SCAN_ALGORITHM> BlockScanT; // Callback type for obtaining tile prefix during block scan typedef TilePrefixCallbackOp< OffsetT, cub::Sum, ScanTileStateT> TilePrefixCallbackOpT; // Item exchange type typedef OutputT ItemExchangeT[TILE_ITEMS]; // Shared memory type for this thread block union _TempStorage { struct { typename BlockScanT::TempStorage scan; // Smem needed for tile scanning typename TilePrefixCallbackOpT::TempStorage prefix; // Smem needed for cooperative prefix callback typename BlockDiscontinuityT::TempStorage discontinuity; // Smem needed for discontinuity detection }; // Smem needed for loading items typename BlockLoadT::TempStorage load_items; // Smem needed for loading values typename BlockLoadFlags::TempStorage load_flags; // Smem needed for compacting items (allows non POD items in this union) Uninitialized raw_exchange; }; // Alias wrapper allowing storage to be unioned struct TempStorage : Uninitialized<_TempStorage> {}; //--------------------------------------------------------------------- // Per-thread fields //--------------------------------------------------------------------- _TempStorage& temp_storage; ///< Reference to temp_storage WrappedInputIteratorT d_in; ///< Input items SelectedOutputIteratorT d_selected_out; ///< Unique output items WrappedFlagsInputIteratorT d_flags_in; ///< Input selection flags (if applicable) InequalityWrapper inequality_op; ///< T inequality operator SelectOpT select_op; ///< Selection operator OffsetT num_items; ///< Total number of input items //--------------------------------------------------------------------- // Constructor //--------------------------------------------------------------------- // Constructor __device__ __forceinline__ AgentSelectIf( TempStorage &temp_storage, ///< Reference to temp_storage InputIteratorT d_in, ///< Input data FlagsInputIteratorT d_flags_in, ///< Input selection flags (if applicable) SelectedOutputIteratorT d_selected_out, ///< Output data SelectOpT select_op, ///< Selection operator EqualityOpT equality_op, ///< Equality operator OffsetT num_items) ///< Total number of input items : temp_storage(temp_storage.Alias()), d_in(d_in), d_flags_in(d_flags_in), d_selected_out(d_selected_out), select_op(select_op), inequality_op(equality_op), num_items(num_items) {} //--------------------------------------------------------------------- // Utility methods for initializing the selections //--------------------------------------------------------------------- /** * Initialize selections (specialized for selection operator) */ template __device__ __forceinline__ void InitializeSelections( OffsetT /*tile_offset*/, OffsetT num_tile_items, OutputT (&items)[ITEMS_PER_THREAD], OffsetT (&selection_flags)[ITEMS_PER_THREAD], Int2Type /*select_method*/) { #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) { // Out-of-bounds items are selection_flags selection_flags[ITEM] = 1; if (!IS_LAST_TILE || (OffsetT(threadIdx.x * ITEMS_PER_THREAD) + ITEM < num_tile_items)) selection_flags[ITEM] = select_op(items[ITEM]); } } /** * Initialize selections (specialized for valid flags) */ template __device__ __forceinline__ void InitializeSelections( OffsetT tile_offset, OffsetT num_tile_items, OutputT (&/*items*/)[ITEMS_PER_THREAD], OffsetT (&selection_flags)[ITEMS_PER_THREAD], Int2Type /*select_method*/) { CTA_SYNC(); FlagT flags[ITEMS_PER_THREAD]; if (IS_LAST_TILE) { // Out-of-bounds items are selection_flags BlockLoadFlags(temp_storage.load_flags).Load(d_flags_in + tile_offset, flags, num_tile_items, 1); } else { BlockLoadFlags(temp_storage.load_flags).Load(d_flags_in + tile_offset, flags); } // Convert flag type to selection_flags type #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) { selection_flags[ITEM] = flags[ITEM]; } } /** * Initialize selections (specialized for discontinuity detection) */ template __device__ __forceinline__ void InitializeSelections( OffsetT tile_offset, OffsetT num_tile_items, OutputT (&items)[ITEMS_PER_THREAD], OffsetT (&selection_flags)[ITEMS_PER_THREAD], Int2Type /*select_method*/) { if (IS_FIRST_TILE) { CTA_SYNC(); // Set head selection_flags. First tile sets the first flag for the first item BlockDiscontinuityT(temp_storage.discontinuity).FlagHeads(selection_flags, items, inequality_op); } else { OutputT tile_predecessor; if (threadIdx.x == 0) tile_predecessor = d_in[tile_offset - 1]; CTA_SYNC(); BlockDiscontinuityT(temp_storage.discontinuity).FlagHeads(selection_flags, items, inequality_op, tile_predecessor); } // Set selection flags for out-of-bounds items #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) { // Set selection_flags for out-of-bounds items if ((IS_LAST_TILE) && (OffsetT(threadIdx.x * ITEMS_PER_THREAD) + ITEM >= num_tile_items)) selection_flags[ITEM] = 1; } } //--------------------------------------------------------------------- // Scatter utility methods //--------------------------------------------------------------------- /** * Scatter flagged items to output offsets (specialized for direct scattering) */ template __device__ __forceinline__ void ScatterDirect( OutputT (&items)[ITEMS_PER_THREAD], OffsetT (&selection_flags)[ITEMS_PER_THREAD], OffsetT (&selection_indices)[ITEMS_PER_THREAD], OffsetT num_selections) { // Scatter flagged items #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) { if (selection_flags[ITEM]) { if ((!IS_LAST_TILE) || selection_indices[ITEM] < num_selections) { d_selected_out[selection_indices[ITEM]] = items[ITEM]; } } } } /** * Scatter flagged items to output offsets (specialized for two-phase scattering) */ template __device__ __forceinline__ void ScatterTwoPhase( OutputT (&items)[ITEMS_PER_THREAD], OffsetT (&selection_flags)[ITEMS_PER_THREAD], OffsetT (&selection_indices)[ITEMS_PER_THREAD], int /*num_tile_items*/, ///< Number of valid items in this tile int num_tile_selections, ///< Number of selections in this tile OffsetT num_selections_prefix, ///< Total number of selections prior to this tile OffsetT /*num_rejected_prefix*/, ///< Total number of rejections prior to this tile Int2Type /*is_keep_rejects*/) ///< Marker type indicating whether to keep rejected items in the second partition { CTA_SYNC(); // Compact and scatter items #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) { int local_scatter_offset = selection_indices[ITEM] - num_selections_prefix; if (selection_flags[ITEM]) { temp_storage.raw_exchange.Alias()[local_scatter_offset] = items[ITEM]; } } CTA_SYNC(); for (int item = threadIdx.x; item < num_tile_selections; item += BLOCK_THREADS) { d_selected_out[num_selections_prefix + item] = temp_storage.raw_exchange.Alias()[item]; } } /** * Scatter flagged items to output offsets (specialized for two-phase scattering) */ template __device__ __forceinline__ void ScatterTwoPhase( OutputT (&items)[ITEMS_PER_THREAD], OffsetT (&selection_flags)[ITEMS_PER_THREAD], OffsetT (&selection_indices)[ITEMS_PER_THREAD], int num_tile_items, ///< Number of valid items in this tile int num_tile_selections, ///< Number of selections in this tile OffsetT num_selections_prefix, ///< Total number of selections prior to this tile OffsetT num_rejected_prefix, ///< Total number of rejections prior to this tile Int2Type /*is_keep_rejects*/) ///< Marker type indicating whether to keep rejected items in the second partition { CTA_SYNC(); int tile_num_rejections = num_tile_items - num_tile_selections; // Scatter items to shared memory (rejections first) #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) { int item_idx = (threadIdx.x * ITEMS_PER_THREAD) + ITEM; int local_selection_idx = selection_indices[ITEM] - num_selections_prefix; int local_rejection_idx = item_idx - local_selection_idx; int local_scatter_offset = (selection_flags[ITEM]) ? tile_num_rejections + local_selection_idx : local_rejection_idx; temp_storage.raw_exchange.Alias()[local_scatter_offset] = items[ITEM]; } CTA_SYNC(); // Gather items from shared memory and scatter to global #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) { int item_idx = (ITEM * BLOCK_THREADS) + threadIdx.x; int rejection_idx = item_idx; int selection_idx = item_idx - tile_num_rejections; OffsetT scatter_offset = (item_idx < tile_num_rejections) ? num_items - num_rejected_prefix - rejection_idx - 1 : num_selections_prefix + selection_idx; OutputT item = temp_storage.raw_exchange.Alias()[item_idx]; if (!IS_LAST_TILE || (item_idx < num_tile_items)) { d_selected_out[scatter_offset] = item; } } } /** * Scatter flagged items */ template __device__ __forceinline__ void Scatter( OutputT (&items)[ITEMS_PER_THREAD], OffsetT (&selection_flags)[ITEMS_PER_THREAD], OffsetT (&selection_indices)[ITEMS_PER_THREAD], int num_tile_items, ///< Number of valid items in this tile int num_tile_selections, ///< Number of selections in this tile OffsetT num_selections_prefix, ///< Total number of selections prior to this tile OffsetT num_rejected_prefix, ///< Total number of rejections prior to this tile OffsetT num_selections) ///< Total number of selections including this tile { // Do a two-phase scatter if (a) keeping both partitions or (b) two-phase is enabled and the average number of selection_flags items per thread is greater than one if (KEEP_REJECTS || (TWO_PHASE_SCATTER && (num_tile_selections > BLOCK_THREADS))) { ScatterTwoPhase( items, selection_flags, selection_indices, num_tile_items, num_tile_selections, num_selections_prefix, num_rejected_prefix, Int2Type()); } else { ScatterDirect( items, selection_flags, selection_indices, num_selections); } } //--------------------------------------------------------------------- // Cooperatively scan a device-wide sequence of tiles with other CTAs //--------------------------------------------------------------------- /** * Process first tile of input (dynamic chained scan). Returns the running count of selections (including this tile) */ template __device__ __forceinline__ OffsetT ConsumeFirstTile( int num_tile_items, ///< Number of input items comprising this tile OffsetT tile_offset, ///< Tile offset ScanTileStateT& tile_state) ///< Global tile state descriptor { OutputT items[ITEMS_PER_THREAD]; OffsetT selection_flags[ITEMS_PER_THREAD]; OffsetT selection_indices[ITEMS_PER_THREAD]; // Load items if (IS_LAST_TILE) BlockLoadT(temp_storage.load_items).Load(d_in + tile_offset, items, num_tile_items); else BlockLoadT(temp_storage.load_items).Load(d_in + tile_offset, items); // Initialize selection_flags InitializeSelections( tile_offset, num_tile_items, items, selection_flags, Int2Type()); CTA_SYNC(); // Exclusive scan of selection_flags OffsetT num_tile_selections; BlockScanT(temp_storage.scan).ExclusiveSum(selection_flags, selection_indices, num_tile_selections); if (threadIdx.x == 0) { // Update tile status if this is not the last tile if (!IS_LAST_TILE) tile_state.SetInclusive(0, num_tile_selections); } // Discount any out-of-bounds selections if (IS_LAST_TILE) num_tile_selections -= (TILE_ITEMS - num_tile_items); // Scatter flagged items Scatter( items, selection_flags, selection_indices, num_tile_items, num_tile_selections, 0, 0, num_tile_selections); return num_tile_selections; } /** * Process subsequent tile of input (dynamic chained scan). Returns the running count of selections (including this tile) */ template __device__ __forceinline__ OffsetT ConsumeSubsequentTile( int num_tile_items, ///< Number of input items comprising this tile int tile_idx, ///< Tile index OffsetT tile_offset, ///< Tile offset ScanTileStateT& tile_state) ///< Global tile state descriptor { OutputT items[ITEMS_PER_THREAD]; OffsetT selection_flags[ITEMS_PER_THREAD]; OffsetT selection_indices[ITEMS_PER_THREAD]; // Load items if (IS_LAST_TILE) BlockLoadT(temp_storage.load_items).Load(d_in + tile_offset, items, num_tile_items); else BlockLoadT(temp_storage.load_items).Load(d_in + tile_offset, items); // Initialize selection_flags InitializeSelections( tile_offset, num_tile_items, items, selection_flags, Int2Type()); CTA_SYNC(); // Exclusive scan of values and selection_flags TilePrefixCallbackOpT prefix_op(tile_state, temp_storage.prefix, cub::Sum(), tile_idx); BlockScanT(temp_storage.scan).ExclusiveSum(selection_flags, selection_indices, prefix_op); OffsetT num_tile_selections = prefix_op.GetBlockAggregate(); OffsetT num_selections = prefix_op.GetInclusivePrefix(); OffsetT num_selections_prefix = prefix_op.GetExclusivePrefix(); OffsetT num_rejected_prefix = (tile_idx * TILE_ITEMS) - num_selections_prefix; // Discount any out-of-bounds selections if (IS_LAST_TILE) { int num_discount = TILE_ITEMS - num_tile_items; num_selections -= num_discount; num_tile_selections -= num_discount; } // Scatter flagged items Scatter( items, selection_flags, selection_indices, num_tile_items, num_tile_selections, num_selections_prefix, num_rejected_prefix, num_selections); return num_selections; } /** * Process a tile of input */ template __device__ __forceinline__ OffsetT ConsumeTile( int num_tile_items, ///< Number of input items comprising this tile int tile_idx, ///< Tile index OffsetT tile_offset, ///< Tile offset ScanTileStateT& tile_state) ///< Global tile state descriptor { OffsetT num_selections; if (tile_idx == 0) { num_selections = ConsumeFirstTile(num_tile_items, tile_offset, tile_state); } else { num_selections = ConsumeSubsequentTile(num_tile_items, tile_idx, tile_offset, tile_state); } return num_selections; } /** * Scan tiles of items as part of a dynamic chained scan */ template ///< Output iterator type for recording number of items selection_flags __device__ __forceinline__ void ConsumeRange( int num_tiles, ///< Total number of input tiles ScanTileStateT& tile_state, ///< Global tile state descriptor NumSelectedIteratorT d_num_selected_out) ///< Output total number selection_flags { // Blocks are launched in increasing order, so just assign one tile per block int tile_idx = (blockIdx.x * gridDim.y) + blockIdx.y; // Current tile index OffsetT tile_offset = tile_idx * TILE_ITEMS; // Global offset for the current tile if (tile_idx < num_tiles - 1) { // Not the last tile (full) ConsumeTile(TILE_ITEMS, tile_idx, tile_offset, tile_state); } else { // The last tile (possibly partially-full) OffsetT num_remaining = num_items - tile_offset; OffsetT num_selections = ConsumeTile(num_remaining, tile_idx, tile_offset, tile_state); if (threadIdx.x == 0) { // Output the total number of items selection_flags *d_num_selected_out = num_selections; } } } }; } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/gpu_utils/cub/agent/agent_spmv_orig.cuh000066400000000000000000001070131411340063500233420ustar00rootroot00000000000000/****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * cub::AgentSpmv implements a stateful abstraction of CUDA thread blocks for participating in device-wide SpMV. */ #pragma once #include #include "../util_type.cuh" #include "../block/block_reduce.cuh" #include "../block/block_scan.cuh" #include "../block/block_exchange.cuh" #include "../thread/thread_search.cuh" #include "../thread/thread_operators.cuh" #include "../iterator/cache_modified_input_iterator.cuh" #include "../iterator/counting_input_iterator.cuh" #include "../iterator/tex_ref_input_iterator.cuh" #include "../util_namespace.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /****************************************************************************** * Tuning policy ******************************************************************************/ /** * Parameterizable tuning policy type for AgentSpmv */ template < int _BLOCK_THREADS, ///< Threads per thread block int _ITEMS_PER_THREAD, ///< Items per thread (per tile of input) CacheLoadModifier _ROW_OFFSETS_SEARCH_LOAD_MODIFIER, ///< Cache load modifier for reading CSR row-offsets during search CacheLoadModifier _ROW_OFFSETS_LOAD_MODIFIER, ///< Cache load modifier for reading CSR row-offsets CacheLoadModifier _COLUMN_INDICES_LOAD_MODIFIER, ///< Cache load modifier for reading CSR column-indices CacheLoadModifier _VALUES_LOAD_MODIFIER, ///< Cache load modifier for reading CSR values CacheLoadModifier _VECTOR_VALUES_LOAD_MODIFIER, ///< Cache load modifier for reading vector values bool _DIRECT_LOAD_NONZEROS, ///< Whether to load nonzeros directly from global during sequential merging (vs. pre-staged through shared memory) BlockScanAlgorithm _SCAN_ALGORITHM> ///< The BlockScan algorithm to use struct AgentSpmvPolicy { enum { BLOCK_THREADS = _BLOCK_THREADS, ///< Threads per thread block ITEMS_PER_THREAD = _ITEMS_PER_THREAD, ///< Items per thread (per tile of input) DIRECT_LOAD_NONZEROS = _DIRECT_LOAD_NONZEROS, ///< Whether to load nonzeros directly from global during sequential merging (pre-staged through shared memory) }; static const CacheLoadModifier ROW_OFFSETS_SEARCH_LOAD_MODIFIER = _ROW_OFFSETS_SEARCH_LOAD_MODIFIER; ///< Cache load modifier for reading CSR row-offsets static const CacheLoadModifier ROW_OFFSETS_LOAD_MODIFIER = _ROW_OFFSETS_LOAD_MODIFIER; ///< Cache load modifier for reading CSR row-offsets static const CacheLoadModifier COLUMN_INDICES_LOAD_MODIFIER = _COLUMN_INDICES_LOAD_MODIFIER; ///< Cache load modifier for reading CSR column-indices static const CacheLoadModifier VALUES_LOAD_MODIFIER = _VALUES_LOAD_MODIFIER; ///< Cache load modifier for reading CSR values static const CacheLoadModifier VECTOR_VALUES_LOAD_MODIFIER = _VECTOR_VALUES_LOAD_MODIFIER; ///< Cache load modifier for reading vector values static const BlockScanAlgorithm SCAN_ALGORITHM = _SCAN_ALGORITHM; ///< The BlockScan algorithm to use }; /****************************************************************************** * Thread block abstractions ******************************************************************************/ template < typename ValueT, ///< Matrix and vector value type typename OffsetT> ///< Signed integer type for sequence offsets struct SpmvParams { ValueT* d_values; ///< Pointer to the array of \p num_nonzeros values of the corresponding nonzero elements of matrix A. OffsetT* d_row_end_offsets; ///< Pointer to the array of \p m offsets demarcating the end of every row in \p d_column_indices and \p d_values OffsetT* d_column_indices; ///< Pointer to the array of \p num_nonzeros column-indices of the corresponding nonzero elements of matrix A. (Indices are zero-valued.) ValueT* d_vector_x; ///< Pointer to the array of \p num_cols values corresponding to the dense input vector x ValueT* d_vector_y; ///< Pointer to the array of \p num_rows values corresponding to the dense output vector y int num_rows; ///< Number of rows of matrix A. int num_cols; ///< Number of columns of matrix A. int num_nonzeros; ///< Number of nonzero elements of matrix A. ValueT alpha; ///< Alpha multiplicand ValueT beta; ///< Beta addend-multiplicand TexRefInputIterator t_vector_x; }; /** * \brief AgentSpmv implements a stateful abstraction of CUDA thread blocks for participating in device-wide SpMV. */ template < typename AgentSpmvPolicyT, ///< Parameterized AgentSpmvPolicy tuning policy type typename ValueT, ///< Matrix and vector value type typename OffsetT, ///< Signed integer type for sequence offsets bool HAS_ALPHA, ///< Whether the input parameter \p alpha is 1 bool HAS_BETA, ///< Whether the input parameter \p beta is 0 int PTX_ARCH = CUB_PTX_ARCH> ///< PTX compute capability struct AgentSpmv { //--------------------------------------------------------------------- // Types and constants //--------------------------------------------------------------------- /// Constants enum { BLOCK_THREADS = AgentSpmvPolicyT::BLOCK_THREADS, ITEMS_PER_THREAD = AgentSpmvPolicyT::ITEMS_PER_THREAD, TILE_ITEMS = BLOCK_THREADS * ITEMS_PER_THREAD, }; /// 2D merge path coordinate type typedef typename CubVector::Type CoordinateT; /// Input iterator wrapper types (for applying cache modifiers) typedef CacheModifiedInputIterator< AgentSpmvPolicyT::ROW_OFFSETS_SEARCH_LOAD_MODIFIER, OffsetT, OffsetT> RowOffsetsSearchIteratorT; typedef CacheModifiedInputIterator< AgentSpmvPolicyT::ROW_OFFSETS_LOAD_MODIFIER, OffsetT, OffsetT> RowOffsetsIteratorT; typedef CacheModifiedInputIterator< AgentSpmvPolicyT::COLUMN_INDICES_LOAD_MODIFIER, OffsetT, OffsetT> ColumnIndicesIteratorT; typedef CacheModifiedInputIterator< AgentSpmvPolicyT::VALUES_LOAD_MODIFIER, ValueT, OffsetT> ValueIteratorT; typedef CacheModifiedInputIterator< AgentSpmvPolicyT::VECTOR_VALUES_LOAD_MODIFIER, ValueT, OffsetT> VectorValueIteratorT; // Tuple type for scanning (pairs accumulated segment-value with segment-index) typedef KeyValuePair KeyValuePairT; // Reduce-value-by-segment scan operator typedef ReduceByKeyOp ReduceBySegmentOpT; // BlockReduce specialization typedef BlockReduce< ValueT, BLOCK_THREADS, BLOCK_REDUCE_WARP_REDUCTIONS> BlockReduceT; // BlockScan specialization typedef BlockScan< KeyValuePairT, BLOCK_THREADS, AgentSpmvPolicyT::SCAN_ALGORITHM> BlockScanT; // BlockScan specialization typedef BlockScan< ValueT, BLOCK_THREADS, AgentSpmvPolicyT::SCAN_ALGORITHM> BlockPrefixSumT; // BlockExchange specialization typedef BlockExchange< ValueT, BLOCK_THREADS, ITEMS_PER_THREAD> BlockExchangeT; /// Merge item type (either a non-zero value or a row-end offset) union MergeItem { // Value type to pair with index type OffsetT (NullType if loading values directly during merge) typedef typename If::Type MergeValueT; OffsetT row_end_offset; MergeValueT nonzero; }; /// Shared memory type required by this thread block struct _TempStorage { CoordinateT tile_coords[2]; union Aliasable { // Smem needed for tile of merge items MergeItem merge_items[ITEMS_PER_THREAD + TILE_ITEMS + 1]; // Smem needed for block exchange typename BlockExchangeT::TempStorage exchange; // Smem needed for block-wide reduction typename BlockReduceT::TempStorage reduce; // Smem needed for tile scanning typename BlockScanT::TempStorage scan; // Smem needed for tile prefix sum typename BlockPrefixSumT::TempStorage prefix_sum; } aliasable; }; /// Temporary storage type (unionable) struct TempStorage : Uninitialized<_TempStorage> {}; //--------------------------------------------------------------------- // Per-thread fields //--------------------------------------------------------------------- _TempStorage& temp_storage; /// Reference to temp_storage SpmvParams& spmv_params; ValueIteratorT wd_values; ///< Wrapped pointer to the array of \p num_nonzeros values of the corresponding nonzero elements of matrix A. RowOffsetsIteratorT wd_row_end_offsets; ///< Wrapped Pointer to the array of \p m offsets demarcating the end of every row in \p d_column_indices and \p d_values ColumnIndicesIteratorT wd_column_indices; ///< Wrapped Pointer to the array of \p num_nonzeros column-indices of the corresponding nonzero elements of matrix A. (Indices are zero-valued.) VectorValueIteratorT wd_vector_x; ///< Wrapped Pointer to the array of \p num_cols values corresponding to the dense input vector x VectorValueIteratorT wd_vector_y; ///< Wrapped Pointer to the array of \p num_cols values corresponding to the dense input vector x //--------------------------------------------------------------------- // Interface //--------------------------------------------------------------------- /** * Constructor */ __device__ __forceinline__ AgentSpmv( TempStorage& temp_storage, ///< Reference to temp_storage SpmvParams& spmv_params) ///< SpMV input parameter bundle : temp_storage(temp_storage.Alias()), spmv_params(spmv_params), wd_values(spmv_params.d_values), wd_row_end_offsets(spmv_params.d_row_end_offsets), wd_column_indices(spmv_params.d_column_indices), wd_vector_x(spmv_params.d_vector_x), wd_vector_y(spmv_params.d_vector_y) {} /** * Consume a merge tile, specialized for direct-load of nonzeros */ __device__ __forceinline__ KeyValuePairT ConsumeTile( int tile_idx, CoordinateT tile_start_coord, CoordinateT tile_end_coord, Int2Type is_direct_load) ///< Marker type indicating whether to load nonzeros directly during path-discovery or beforehand in batch { int tile_num_rows = tile_end_coord.x - tile_start_coord.x; int tile_num_nonzeros = tile_end_coord.y - tile_start_coord.y; OffsetT* s_tile_row_end_offsets = &temp_storage.aliasable.merge_items[0].row_end_offset; // Gather the row end-offsets for the merge tile into shared memory for (int item = threadIdx.x; item <= tile_num_rows; item += BLOCK_THREADS) { s_tile_row_end_offsets[item] = wd_row_end_offsets[tile_start_coord.x + item]; } CTA_SYNC(); // Search for the thread's starting coordinate within the merge tile CountingInputIterator tile_nonzero_indices(tile_start_coord.y); CoordinateT thread_start_coord; MergePathSearch( OffsetT(threadIdx.x * ITEMS_PER_THREAD), // Diagonal s_tile_row_end_offsets, // List A tile_nonzero_indices, // List B tile_num_rows, tile_num_nonzeros, thread_start_coord); CTA_SYNC(); // Perf-sync // Compute the thread's merge path segment CoordinateT thread_current_coord = thread_start_coord; KeyValuePairT scan_segment[ITEMS_PER_THREAD]; ValueT running_total = 0.0; #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) { OffsetT nonzero_idx = CUB_MIN(tile_nonzero_indices[thread_current_coord.y], spmv_params.num_nonzeros - 1); OffsetT column_idx = wd_column_indices[nonzero_idx]; ValueT value = wd_values[nonzero_idx]; ValueT vector_value = spmv_params.t_vector_x[column_idx]; #if (CUB_PTX_ARCH >= 350) vector_value = wd_vector_x[column_idx]; #endif ValueT nonzero = value * vector_value; OffsetT row_end_offset = s_tile_row_end_offsets[thread_current_coord.x]; if (tile_nonzero_indices[thread_current_coord.y] < row_end_offset) { // Move down (accumulate) running_total += nonzero; scan_segment[ITEM].value = running_total; scan_segment[ITEM].key = tile_num_rows; ++thread_current_coord.y; } else { // Move right (reset) scan_segment[ITEM].value = running_total; scan_segment[ITEM].key = thread_current_coord.x; running_total = 0.0; ++thread_current_coord.x; } } CTA_SYNC(); // Block-wide reduce-value-by-segment KeyValuePairT tile_carry; ReduceBySegmentOpT scan_op; KeyValuePairT scan_item; scan_item.value = running_total; scan_item.key = thread_current_coord.x; BlockScanT(temp_storage.aliasable.scan).ExclusiveScan(scan_item, scan_item, scan_op, tile_carry); if (tile_num_rows > 0) { if (threadIdx.x == 0) scan_item.key = -1; // Direct scatter #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) { if (scan_segment[ITEM].key < tile_num_rows) { if (scan_item.key == scan_segment[ITEM].key) scan_segment[ITEM].value = scan_item.value + scan_segment[ITEM].value; if (HAS_ALPHA) { scan_segment[ITEM].value *= spmv_params.alpha; } if (HAS_BETA) { // Update the output vector element ValueT addend = spmv_params.beta * wd_vector_y[tile_start_coord.x + scan_segment[ITEM].key]; scan_segment[ITEM].value += addend; } // Set the output vector element spmv_params.d_vector_y[tile_start_coord.x + scan_segment[ITEM].key] = scan_segment[ITEM].value; } } } // Return the tile's running carry-out return tile_carry; } /** * Consume a merge tile, specialized for indirect load of nonzeros */ __device__ __forceinline__ KeyValuePairT ConsumeTile( int tile_idx, CoordinateT tile_start_coord, CoordinateT tile_end_coord, Int2Type is_direct_load) ///< Marker type indicating whether to load nonzeros directly during path-discovery or beforehand in batch { int tile_num_rows = tile_end_coord.x - tile_start_coord.x; int tile_num_nonzeros = tile_end_coord.y - tile_start_coord.y; #if (CUB_PTX_ARCH >= 520) /* OffsetT* s_tile_row_end_offsets = &temp_storage.merge_items[tile_num_nonzeros].row_end_offset; ValueT* s_tile_nonzeros = &temp_storage.merge_items[0].nonzero; OffsetT col_indices[ITEMS_PER_THREAD]; ValueT mat_values[ITEMS_PER_THREAD]; int nonzero_indices[ITEMS_PER_THREAD]; // Gather the nonzeros for the merge tile into shared memory #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) { nonzero_indices[ITEM] = threadIdx.x + (ITEM * BLOCK_THREADS); ValueIteratorT a = wd_values + tile_start_coord.y + nonzero_indices[ITEM]; ColumnIndicesIteratorT ci = wd_column_indices + tile_start_coord.y + nonzero_indices[ITEM]; col_indices[ITEM] = (nonzero_indices[ITEM] < tile_num_nonzeros) ? *ci : 0; mat_values[ITEM] = (nonzero_indices[ITEM] < tile_num_nonzeros) ? *a : 0.0; } CTA_SYNC(); #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) { VectorValueIteratorT x = wd_vector_x + col_indices[ITEM]; mat_values[ITEM] *= *x; } CTA_SYNC(); #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) { ValueT *s = s_tile_nonzeros + nonzero_indices[ITEM]; *s = mat_values[ITEM]; } CTA_SYNC(); */ OffsetT* s_tile_row_end_offsets = &temp_storage.merge_items[0].row_end_offset; ValueT* s_tile_nonzeros = &temp_storage.merge_items[tile_num_rows + ITEMS_PER_THREAD].nonzero; // Gather the nonzeros for the merge tile into shared memory #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) { int nonzero_idx = threadIdx.x + (ITEM * BLOCK_THREADS); ValueIteratorT a = wd_values + tile_start_coord.y + nonzero_idx; ColumnIndicesIteratorT ci = wd_column_indices + tile_start_coord.y + nonzero_idx; ValueT* s = s_tile_nonzeros + nonzero_idx; if (nonzero_idx < tile_num_nonzeros) { OffsetT column_idx = *ci; ValueT value = *a; ValueT vector_value = spmv_params.t_vector_x[column_idx]; vector_value = wd_vector_x[column_idx]; ValueT nonzero = value * vector_value; *s = nonzero; } } #else OffsetT* s_tile_row_end_offsets = &temp_storage.aliasable.merge_items[0].row_end_offset; ValueT* s_tile_nonzeros = &temp_storage.aliasable.merge_items[tile_num_rows + ITEMS_PER_THREAD].nonzero; // Gather the nonzeros for the merge tile into shared memory if (tile_num_nonzeros > 0) { #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) { int nonzero_idx = threadIdx.x + (ITEM * BLOCK_THREADS); nonzero_idx = CUB_MIN(nonzero_idx, tile_num_nonzeros - 1); OffsetT column_idx = wd_column_indices[tile_start_coord.y + nonzero_idx]; ValueT value = wd_values[tile_start_coord.y + nonzero_idx]; ValueT vector_value = spmv_params.t_vector_x[column_idx]; #if (CUB_PTX_ARCH >= 350) vector_value = wd_vector_x[column_idx]; #endif ValueT nonzero = value * vector_value; s_tile_nonzeros[nonzero_idx] = nonzero; } } #endif // Gather the row end-offsets for the merge tile into shared memory #pragma unroll 1 for (int item = threadIdx.x; item <= tile_num_rows; item += BLOCK_THREADS) { s_tile_row_end_offsets[item] = wd_row_end_offsets[tile_start_coord.x + item]; } CTA_SYNC(); // Search for the thread's starting coordinate within the merge tile CountingInputIterator tile_nonzero_indices(tile_start_coord.y); CoordinateT thread_start_coord; MergePathSearch( OffsetT(threadIdx.x * ITEMS_PER_THREAD), // Diagonal s_tile_row_end_offsets, // List A tile_nonzero_indices, // List B tile_num_rows, tile_num_nonzeros, thread_start_coord); CTA_SYNC(); // Perf-sync // Compute the thread's merge path segment CoordinateT thread_current_coord = thread_start_coord; KeyValuePairT scan_segment[ITEMS_PER_THREAD]; ValueT running_total = 0.0; OffsetT row_end_offset = s_tile_row_end_offsets[thread_current_coord.x]; ValueT nonzero = s_tile_nonzeros[thread_current_coord.y]; #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) { if (tile_nonzero_indices[thread_current_coord.y] < row_end_offset) { // Move down (accumulate) scan_segment[ITEM].value = nonzero; running_total += nonzero; ++thread_current_coord.y; nonzero = s_tile_nonzeros[thread_current_coord.y]; } else { // Move right (reset) scan_segment[ITEM].value = 0.0; running_total = 0.0; ++thread_current_coord.x; row_end_offset = s_tile_row_end_offsets[thread_current_coord.x]; } scan_segment[ITEM].key = thread_current_coord.x; } CTA_SYNC(); // Block-wide reduce-value-by-segment KeyValuePairT tile_carry; ReduceBySegmentOpT scan_op; KeyValuePairT scan_item; scan_item.value = running_total; scan_item.key = thread_current_coord.x; BlockScanT(temp_storage.aliasable.scan).ExclusiveScan(scan_item, scan_item, scan_op, tile_carry); if (threadIdx.x == 0) { scan_item.key = thread_start_coord.x; scan_item.value = 0.0; } if (tile_num_rows > 0) { CTA_SYNC(); // Scan downsweep and scatter ValueT* s_partials = &temp_storage.aliasable.merge_items[0].nonzero; if (scan_item.key != scan_segment[0].key) { s_partials[scan_item.key] = scan_item.value; } else { scan_segment[0].value += scan_item.value; } #pragma unroll for (int ITEM = 1; ITEM < ITEMS_PER_THREAD; ++ITEM) { if (scan_segment[ITEM - 1].key != scan_segment[ITEM].key) { s_partials[scan_segment[ITEM - 1].key] = scan_segment[ITEM - 1].value; } else { scan_segment[ITEM].value += scan_segment[ITEM - 1].value; } } CTA_SYNC(); #pragma unroll 1 for (int item = threadIdx.x; item < tile_num_rows; item += BLOCK_THREADS) { spmv_params.d_vector_y[tile_start_coord.x + item] = s_partials[item]; } } // Return the tile's running carry-out return tile_carry; } /** * Consume a merge tile, specialized for indirect load of nonzeros * / template __device__ __forceinline__ KeyValuePairT ConsumeTile1( int tile_idx, CoordinateT tile_start_coord, CoordinateT tile_end_coord, IsDirectLoadT is_direct_load) ///< Marker type indicating whether to load nonzeros directly during path-discovery or beforehand in batch { int tile_num_rows = tile_end_coord.x - tile_start_coord.x; int tile_num_nonzeros = tile_end_coord.y - tile_start_coord.y; OffsetT* s_tile_row_end_offsets = &temp_storage.merge_items[0].row_end_offset; int warp_idx = threadIdx.x / WARP_THREADS; int lane_idx = LaneId(); // Gather the row end-offsets for the merge tile into shared memory #pragma unroll 1 for (int item = threadIdx.x; item <= tile_num_rows; item += BLOCK_THREADS) { s_tile_row_end_offsets[item] = wd_row_end_offsets[tile_start_coord.x + item]; } CTA_SYNC(); // Search for warp start/end coords if (lane_idx == 0) { MergePathSearch( OffsetT(warp_idx * ITEMS_PER_WARP), // Diagonal s_tile_row_end_offsets, // List A CountingInputIterator(tile_start_coord.y), // List B tile_num_rows, tile_num_nonzeros, temp_storage.warp_coords[warp_idx]); CoordinateT last = {tile_num_rows, tile_num_nonzeros}; temp_storage.warp_coords[WARPS] = last; } CTA_SYNC(); CoordinateT warp_coord = temp_storage.warp_coords[warp_idx]; CoordinateT warp_end_coord = temp_storage.warp_coords[warp_idx + 1]; OffsetT warp_nonzero_idx = tile_start_coord.y + warp_coord.y; // Consume whole rows #pragma unroll 1 for (; warp_coord.x < warp_end_coord.x; ++warp_coord.x) { ValueT row_total = 0.0; OffsetT row_end_offset = s_tile_row_end_offsets[warp_coord.x]; #pragma unroll 1 for (OffsetT nonzero_idx = warp_nonzero_idx + lane_idx; nonzero_idx < row_end_offset; nonzero_idx += WARP_THREADS) { OffsetT column_idx = wd_column_indices[nonzero_idx]; ValueT value = wd_values[nonzero_idx]; ValueT vector_value = wd_vector_x[column_idx]; row_total += value * vector_value; } // Warp reduce row_total = WarpReduceT(temp_storage.warp_reduce[warp_idx]).Sum(row_total); // Output if (lane_idx == 0) { spmv_params.d_vector_y[tile_start_coord.x + warp_coord.x] = row_total; } warp_nonzero_idx = row_end_offset; } // Consume partial portion of thread's last row if (warp_nonzero_idx < tile_start_coord.y + warp_end_coord.y) { ValueT row_total = 0.0; for (OffsetT nonzero_idx = warp_nonzero_idx + lane_idx; nonzero_idx < tile_start_coord.y + warp_end_coord.y; nonzero_idx += WARP_THREADS) { OffsetT column_idx = wd_column_indices[nonzero_idx]; ValueT value = wd_values[nonzero_idx]; ValueT vector_value = wd_vector_x[column_idx]; row_total += value * vector_value; } // Warp reduce row_total = WarpReduceT(temp_storage.warp_reduce[warp_idx]).Sum(row_total); // Output if (lane_idx == 0) { spmv_params.d_vector_y[tile_start_coord.x + warp_coord.x] = row_total; } } // Return the tile's running carry-out KeyValuePairT tile_carry(tile_num_rows, 0.0); return tile_carry; } */ /** * Consume a merge tile, specialized for indirect load of nonzeros * / __device__ __forceinline__ KeyValuePairT ConsumeTile2( int tile_idx, CoordinateT tile_start_coord, CoordinateT tile_end_coord, Int2Type is_direct_load) ///< Marker type indicating whether to load nonzeros directly during path-discovery or beforehand in batch { int tile_num_rows = tile_end_coord.x - tile_start_coord.x; int tile_num_nonzeros = tile_end_coord.y - tile_start_coord.y; ValueT* s_tile_nonzeros = &temp_storage.merge_items[0].nonzero; ValueT nonzeros[ITEMS_PER_THREAD]; // Gather the nonzeros for the merge tile into shared memory #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) { int nonzero_idx = threadIdx.x + (ITEM * BLOCK_THREADS); nonzero_idx = CUB_MIN(nonzero_idx, tile_num_nonzeros - 1); OffsetT column_idx = wd_column_indices[tile_start_coord.y + nonzero_idx]; ValueT value = wd_values[tile_start_coord.y + nonzero_idx]; ValueT vector_value = spmv_params.t_vector_x[column_idx]; #if (CUB_PTX_ARCH >= 350) vector_value = wd_vector_x[column_idx]; #endif nonzeros[ITEM] = value * vector_value; } // Exchange striped->blocked BlockExchangeT(temp_storage.exchange).StripedToBlocked(nonzeros); CTA_SYNC(); // Compute an inclusive prefix sum BlockPrefixSumT(temp_storage.prefix_sum).InclusiveSum(nonzeros, nonzeros); CTA_SYNC(); if (threadIdx.x == 0) s_tile_nonzeros[0] = 0.0; // Scatter back to smem #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) { int item_idx = (threadIdx.x * ITEMS_PER_THREAD) + ITEM + 1; s_tile_nonzeros[item_idx] = nonzeros[ITEM]; } CTA_SYNC(); // Gather the row end-offsets for the merge tile into shared memory #pragma unroll 1 for (int item = threadIdx.x; item < tile_num_rows; item += BLOCK_THREADS) { OffsetT start = CUB_MAX(wd_row_end_offsets[tile_start_coord.x + item - 1], tile_start_coord.y); OffsetT end = wd_row_end_offsets[tile_start_coord.x + item]; start -= tile_start_coord.y; end -= tile_start_coord.y; ValueT row_partial = s_tile_nonzeros[end] - s_tile_nonzeros[start]; spmv_params.d_vector_y[tile_start_coord.x + item] = row_partial; } // Get the tile's carry-out KeyValuePairT tile_carry; if (threadIdx.x == 0) { tile_carry.key = tile_num_rows; OffsetT start = CUB_MAX(wd_row_end_offsets[tile_end_coord.x - 1], tile_start_coord.y); start -= tile_start_coord.y; OffsetT end = tile_num_nonzeros; tile_carry.value = s_tile_nonzeros[end] - s_tile_nonzeros[start]; } // Return the tile's running carry-out return tile_carry; } */ /** * Consume input tile */ __device__ __forceinline__ void ConsumeTile( CoordinateT* d_tile_coordinates, ///< [in] Pointer to the temporary array of tile starting coordinates KeyValuePairT* d_tile_carry_pairs, ///< [out] Pointer to the temporary array carry-out dot product row-ids, one per block int num_merge_tiles) ///< [in] Number of merge tiles { int tile_idx = (blockIdx.x * gridDim.y) + blockIdx.y; // Current tile index if (tile_idx >= num_merge_tiles) return; // Read our starting coordinates if (threadIdx.x < 2) { if (d_tile_coordinates == NULL) { // Search our starting coordinates OffsetT diagonal = (tile_idx + threadIdx.x) * TILE_ITEMS; CoordinateT tile_coord; CountingInputIterator nonzero_indices(0); // Search the merge path MergePathSearch( diagonal, RowOffsetsSearchIteratorT(spmv_params.d_row_end_offsets), nonzero_indices, spmv_params.num_rows, spmv_params.num_nonzeros, tile_coord); temp_storage.tile_coords[threadIdx.x] = tile_coord; } else { temp_storage.tile_coords[threadIdx.x] = d_tile_coordinates[tile_idx + threadIdx.x]; } } CTA_SYNC(); CoordinateT tile_start_coord = temp_storage.tile_coords[0]; CoordinateT tile_end_coord = temp_storage.tile_coords[1]; // Consume multi-segment tile KeyValuePairT tile_carry = ConsumeTile( tile_idx, tile_start_coord, tile_end_coord, Int2Type()); // Output the tile's carry-out if (threadIdx.x == 0) { if (HAS_ALPHA) tile_carry.value *= spmv_params.alpha; tile_carry.key += tile_start_coord.x; d_tile_carry_pairs[tile_idx] = tile_carry; } } }; } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/gpu_utils/cub/agent/single_pass_scan_operators.cuh000066400000000000000000000654641411340063500256050ustar00rootroot00000000000000/****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * Callback operator types for supplying BlockScan prefixes */ #pragma once #include #include "../thread/thread_load.cuh" #include "../thread/thread_store.cuh" #include "../warp/warp_reduce.cuh" #include "../util_arch.cuh" #include "../util_device.cuh" #include "../util_namespace.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /****************************************************************************** * Prefix functor type for maintaining a running prefix while scanning a * region independent of other thread blocks ******************************************************************************/ /** * Stateful callback operator type for supplying BlockScan prefixes. * Maintains a running prefix that can be applied to consecutive * BlockScan operations. */ template < typename T, ///< BlockScan value type typename ScanOpT> ///< Wrapped scan operator type struct BlockScanRunningPrefixOp { ScanOpT op; ///< Wrapped scan operator T running_total; ///< Running block-wide prefix /// Constructor __device__ __forceinline__ BlockScanRunningPrefixOp(ScanOpT op) : op(op) {} /// Constructor __device__ __forceinline__ BlockScanRunningPrefixOp( T starting_prefix, ScanOpT op) : op(op), running_total(starting_prefix) {} /** * Prefix callback operator. Returns the block-wide running_total in thread-0. */ __device__ __forceinline__ T operator()( const T &block_aggregate) ///< The aggregate sum of the BlockScan inputs { T retval = running_total; running_total = op(running_total, block_aggregate); return retval; } }; /****************************************************************************** * Generic tile status interface types for block-cooperative scans ******************************************************************************/ /** * Enumerations of tile status */ enum ScanTileStatus { SCAN_TILE_OOB, // Out-of-bounds (e.g., padding) SCAN_TILE_INVALID = 99, // Not yet processed SCAN_TILE_PARTIAL, // Tile aggregate is available SCAN_TILE_INCLUSIVE, // Inclusive tile prefix is available }; /** * Tile status interface. */ template < typename T, bool SINGLE_WORD = Traits::PRIMITIVE> struct ScanTileState; /** * Tile status interface specialized for scan status and value types * that can be combined into one machine word that can be * read/written coherently in a single access. */ template struct ScanTileState { // Status word type typedef typename If<(sizeof(T) == 8), long long, typename If<(sizeof(T) == 4), int, typename If<(sizeof(T) == 2), short, char>::Type>::Type>::Type StatusWord; // Unit word type typedef typename If<(sizeof(T) == 8), longlong2, typename If<(sizeof(T) == 4), int2, typename If<(sizeof(T) == 2), int, uchar2>::Type>::Type>::Type TxnWord; // Device word type struct TileDescriptor { StatusWord status; T value; }; // Constants enum { TILE_STATUS_PADDING = CUB_PTX_WARP_THREADS, }; // Device storage TxnWord *d_tile_descriptors; /// Constructor __host__ __device__ __forceinline__ ScanTileState() : d_tile_descriptors(NULL) {} /// Initializer __host__ __device__ __forceinline__ cudaError_t Init( int /*num_tiles*/, ///< [in] Number of tiles void *d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t /*temp_storage_bytes*/) ///< [in] Size in bytes of \t d_temp_storage allocation { d_tile_descriptors = reinterpret_cast(d_temp_storage); return cudaSuccess; } /** * Compute device memory needed for tile status */ __host__ __device__ __forceinline__ static cudaError_t AllocationSize( int num_tiles, ///< [in] Number of tiles size_t &temp_storage_bytes) ///< [out] Size in bytes of \t d_temp_storage allocation { temp_storage_bytes = (num_tiles + TILE_STATUS_PADDING) * sizeof(TileDescriptor); // bytes needed for tile status descriptors return cudaSuccess; } /** * Initialize (from device) */ __device__ __forceinline__ void InitializeStatus(int num_tiles) { int tile_idx = (blockIdx.x * blockDim.x) + threadIdx.x; TxnWord val = TxnWord(); TileDescriptor *descriptor = reinterpret_cast(&val); if (tile_idx < num_tiles) { // Not-yet-set descriptor->status = StatusWord(SCAN_TILE_INVALID); d_tile_descriptors[TILE_STATUS_PADDING + tile_idx] = val; } if ((blockIdx.x == 0) && (threadIdx.x < TILE_STATUS_PADDING)) { // Padding descriptor->status = StatusWord(SCAN_TILE_OOB); d_tile_descriptors[threadIdx.x] = val; } } /** * Update the specified tile's inclusive value and corresponding status */ __device__ __forceinline__ void SetInclusive(int tile_idx, T tile_inclusive) { TileDescriptor tile_descriptor; tile_descriptor.status = SCAN_TILE_INCLUSIVE; tile_descriptor.value = tile_inclusive; TxnWord alias; *reinterpret_cast(&alias) = tile_descriptor; ThreadStore(d_tile_descriptors + TILE_STATUS_PADDING + tile_idx, alias); } /** * Update the specified tile's partial value and corresponding status */ __device__ __forceinline__ void SetPartial(int tile_idx, T tile_partial) { TileDescriptor tile_descriptor; tile_descriptor.status = SCAN_TILE_PARTIAL; tile_descriptor.value = tile_partial; TxnWord alias; *reinterpret_cast(&alias) = tile_descriptor; ThreadStore(d_tile_descriptors + TILE_STATUS_PADDING + tile_idx, alias); } /** * Wait for the corresponding tile to become non-invalid */ __device__ __forceinline__ void WaitForValid( int tile_idx, StatusWord &status, T &value) { TileDescriptor tile_descriptor; do { __threadfence_block(); // prevent hoisting loads from loop TxnWord alias = ThreadLoad(d_tile_descriptors + TILE_STATUS_PADDING + tile_idx); tile_descriptor = reinterpret_cast(alias); } while (WARP_ANY((tile_descriptor.status == SCAN_TILE_INVALID), 0xffffffff)); status = tile_descriptor.status; value = tile_descriptor.value; } }; /** * Tile status interface specialized for scan status and value types that * cannot be combined into one machine word. */ template struct ScanTileState { // Status word type typedef char StatusWord; // Constants enum { TILE_STATUS_PADDING = CUB_PTX_WARP_THREADS, }; // Device storage StatusWord *d_tile_status; T *d_tile_partial; T *d_tile_inclusive; /// Constructor __host__ __device__ __forceinline__ ScanTileState() : d_tile_status(NULL), d_tile_partial(NULL), d_tile_inclusive(NULL) {} /// Initializer __host__ __device__ __forceinline__ cudaError_t Init( int num_tiles, ///< [in] Number of tiles void *d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t temp_storage_bytes) ///< [in] Size in bytes of \t d_temp_storage allocation { cudaError_t error = cudaSuccess; do { void* allocations[3]; size_t allocation_sizes[3]; allocation_sizes[0] = (num_tiles + TILE_STATUS_PADDING) * sizeof(StatusWord); // bytes needed for tile status descriptors allocation_sizes[1] = (num_tiles + TILE_STATUS_PADDING) * sizeof(Uninitialized); // bytes needed for partials allocation_sizes[2] = (num_tiles + TILE_STATUS_PADDING) * sizeof(Uninitialized); // bytes needed for inclusives // Compute allocation pointers into the single storage blob if (CubDebug(error = AliasTemporaries(d_temp_storage, temp_storage_bytes, allocations, allocation_sizes))) break; // Alias the offsets d_tile_status = reinterpret_cast(allocations[0]); d_tile_partial = reinterpret_cast(allocations[1]); d_tile_inclusive = reinterpret_cast(allocations[2]); } while (0); return error; } /** * Compute device memory needed for tile status */ __host__ __device__ __forceinline__ static cudaError_t AllocationSize( int num_tiles, ///< [in] Number of tiles size_t &temp_storage_bytes) ///< [out] Size in bytes of \t d_temp_storage allocation { // Specify storage allocation requirements size_t allocation_sizes[3]; allocation_sizes[0] = (num_tiles + TILE_STATUS_PADDING) * sizeof(StatusWord); // bytes needed for tile status descriptors allocation_sizes[1] = (num_tiles + TILE_STATUS_PADDING) * sizeof(Uninitialized); // bytes needed for partials allocation_sizes[2] = (num_tiles + TILE_STATUS_PADDING) * sizeof(Uninitialized); // bytes needed for inclusives // Set the necessary size of the blob void* allocations[3]; return CubDebug(AliasTemporaries(NULL, temp_storage_bytes, allocations, allocation_sizes)); } /** * Initialize (from device) */ __device__ __forceinline__ void InitializeStatus(int num_tiles) { int tile_idx = (blockIdx.x * blockDim.x) + threadIdx.x; if (tile_idx < num_tiles) { // Not-yet-set d_tile_status[TILE_STATUS_PADDING + tile_idx] = StatusWord(SCAN_TILE_INVALID); } if ((blockIdx.x == 0) && (threadIdx.x < TILE_STATUS_PADDING)) { // Padding d_tile_status[threadIdx.x] = StatusWord(SCAN_TILE_OOB); } } /** * Update the specified tile's inclusive value and corresponding status */ __device__ __forceinline__ void SetInclusive(int tile_idx, T tile_inclusive) { // Update tile inclusive value ThreadStore(d_tile_inclusive + TILE_STATUS_PADDING + tile_idx, tile_inclusive); // Fence __threadfence(); // Update tile status ThreadStore(d_tile_status + TILE_STATUS_PADDING + tile_idx, StatusWord(SCAN_TILE_INCLUSIVE)); } /** * Update the specified tile's partial value and corresponding status */ __device__ __forceinline__ void SetPartial(int tile_idx, T tile_partial) { // Update tile partial value ThreadStore(d_tile_partial + TILE_STATUS_PADDING + tile_idx, tile_partial); // Fence __threadfence(); // Update tile status ThreadStore(d_tile_status + TILE_STATUS_PADDING + tile_idx, StatusWord(SCAN_TILE_PARTIAL)); } /** * Wait for the corresponding tile to become non-invalid */ __device__ __forceinline__ void WaitForValid( int tile_idx, StatusWord &status, T &value) { do { status = ThreadLoad(d_tile_status + TILE_STATUS_PADDING + tile_idx); __threadfence(); // prevent hoisting loads from loop or loads below above this one } while (status == SCAN_TILE_INVALID); if (status == StatusWord(SCAN_TILE_PARTIAL)) value = ThreadLoad(d_tile_partial + TILE_STATUS_PADDING + tile_idx); else value = ThreadLoad(d_tile_inclusive + TILE_STATUS_PADDING + tile_idx); } }; /****************************************************************************** * ReduceByKey tile status interface types for block-cooperative scans ******************************************************************************/ /** * Tile status interface for reduction by key. * */ template < typename ValueT, typename KeyT, bool SINGLE_WORD = (Traits::PRIMITIVE) && (sizeof(ValueT) + sizeof(KeyT) < 16)> struct ReduceByKeyScanTileState; /** * Tile status interface for reduction by key, specialized for scan status and value types that * cannot be combined into one machine word. */ template < typename ValueT, typename KeyT> struct ReduceByKeyScanTileState : ScanTileState > { typedef ScanTileState > SuperClass; /// Constructor __host__ __device__ __forceinline__ ReduceByKeyScanTileState() : SuperClass() {} }; /** * Tile status interface for reduction by key, specialized for scan status and value types that * can be combined into one machine word that can be read/written coherently in a single access. */ template < typename ValueT, typename KeyT> struct ReduceByKeyScanTileState { typedef KeyValuePairKeyValuePairT; // Constants enum { PAIR_SIZE = sizeof(ValueT) + sizeof(KeyT), TXN_WORD_SIZE = 1 << Log2::VALUE, STATUS_WORD_SIZE = TXN_WORD_SIZE - PAIR_SIZE, TILE_STATUS_PADDING = CUB_PTX_WARP_THREADS, }; // Status word type typedef typename If<(STATUS_WORD_SIZE == 8), long long, typename If<(STATUS_WORD_SIZE == 4), int, typename If<(STATUS_WORD_SIZE == 2), short, char>::Type>::Type>::Type StatusWord; // Status word type typedef typename If<(TXN_WORD_SIZE == 16), longlong2, typename If<(TXN_WORD_SIZE == 8), long long, int>::Type>::Type TxnWord; // Device word type (for when sizeof(ValueT) == sizeof(KeyT)) struct TileDescriptorBigStatus { KeyT key; ValueT value; StatusWord status; }; // Device word type (for when sizeof(ValueT) != sizeof(KeyT)) struct TileDescriptorLittleStatus { ValueT value; StatusWord status; KeyT key; }; // Device word type typedef typename If< (sizeof(ValueT) == sizeof(KeyT)), TileDescriptorBigStatus, TileDescriptorLittleStatus>::Type TileDescriptor; // Device storage TxnWord *d_tile_descriptors; /// Constructor __host__ __device__ __forceinline__ ReduceByKeyScanTileState() : d_tile_descriptors(NULL) {} /// Initializer __host__ __device__ __forceinline__ cudaError_t Init( int /*num_tiles*/, ///< [in] Number of tiles void *d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t /*temp_storage_bytes*/) ///< [in] Size in bytes of \t d_temp_storage allocation { d_tile_descriptors = reinterpret_cast(d_temp_storage); return cudaSuccess; } /** * Compute device memory needed for tile status */ __host__ __device__ __forceinline__ static cudaError_t AllocationSize( int num_tiles, ///< [in] Number of tiles size_t &temp_storage_bytes) ///< [out] Size in bytes of \t d_temp_storage allocation { temp_storage_bytes = (num_tiles + TILE_STATUS_PADDING) * sizeof(TileDescriptor); // bytes needed for tile status descriptors return cudaSuccess; } /** * Initialize (from device) */ __device__ __forceinline__ void InitializeStatus(int num_tiles) { int tile_idx = (blockIdx.x * blockDim.x) + threadIdx.x; TxnWord val = TxnWord(); TileDescriptor *descriptor = reinterpret_cast(&val); if (tile_idx < num_tiles) { // Not-yet-set descriptor->status = StatusWord(SCAN_TILE_INVALID); d_tile_descriptors[TILE_STATUS_PADDING + tile_idx] = val; } if ((blockIdx.x == 0) && (threadIdx.x < TILE_STATUS_PADDING)) { // Padding descriptor->status = StatusWord(SCAN_TILE_OOB); d_tile_descriptors[threadIdx.x] = val; } } /** * Update the specified tile's inclusive value and corresponding status */ __device__ __forceinline__ void SetInclusive(int tile_idx, KeyValuePairT tile_inclusive) { TileDescriptor tile_descriptor; tile_descriptor.status = SCAN_TILE_INCLUSIVE; tile_descriptor.value = tile_inclusive.value; tile_descriptor.key = tile_inclusive.key; TxnWord alias; *reinterpret_cast(&alias) = tile_descriptor; ThreadStore(d_tile_descriptors + TILE_STATUS_PADDING + tile_idx, alias); } /** * Update the specified tile's partial value and corresponding status */ __device__ __forceinline__ void SetPartial(int tile_idx, KeyValuePairT tile_partial) { TileDescriptor tile_descriptor; tile_descriptor.status = SCAN_TILE_PARTIAL; tile_descriptor.value = tile_partial.value; tile_descriptor.key = tile_partial.key; TxnWord alias; *reinterpret_cast(&alias) = tile_descriptor; ThreadStore(d_tile_descriptors + TILE_STATUS_PADDING + tile_idx, alias); } /** * Wait for the corresponding tile to become non-invalid */ __device__ __forceinline__ void WaitForValid( int tile_idx, StatusWord &status, KeyValuePairT &value) { // TxnWord alias = ThreadLoad(d_tile_descriptors + TILE_STATUS_PADDING + tile_idx); // TileDescriptor tile_descriptor = reinterpret_cast(alias); // // while (tile_descriptor.status == SCAN_TILE_INVALID) // { // __threadfence_block(); // prevent hoisting loads from loop // // alias = ThreadLoad(d_tile_descriptors + TILE_STATUS_PADDING + tile_idx); // tile_descriptor = reinterpret_cast(alias); // } // // status = tile_descriptor.status; // value.value = tile_descriptor.value; // value.key = tile_descriptor.key; TileDescriptor tile_descriptor; do { __threadfence_block(); // prevent hoisting loads from loop TxnWord alias = ThreadLoad(d_tile_descriptors + TILE_STATUS_PADDING + tile_idx); tile_descriptor = reinterpret_cast(alias); } while (WARP_ANY((tile_descriptor.status == SCAN_TILE_INVALID), 0xffffffff)); status = tile_descriptor.status; value.value = tile_descriptor.value; value.key = tile_descriptor.key; } }; /****************************************************************************** * Prefix call-back operator for coupling local block scan within a * block-cooperative scan ******************************************************************************/ /** * Stateful block-scan prefix functor. Provides the the running prefix for * the current tile by using the call-back warp to wait on on * aggregates/prefixes from predecessor tiles to become available. */ template < typename T, typename ScanOpT, typename ScanTileStateT, int PTX_ARCH = CUB_PTX_ARCH> struct TilePrefixCallbackOp { // Parameterized warp reduce typedef WarpReduce WarpReduceT; // Temporary storage type struct _TempStorage { typename WarpReduceT::TempStorage warp_reduce; T exclusive_prefix; T inclusive_prefix; T block_aggregate; }; // Alias wrapper allowing temporary storage to be unioned struct TempStorage : Uninitialized<_TempStorage> {}; // Type of status word typedef typename ScanTileStateT::StatusWord StatusWord; // Fields _TempStorage& temp_storage; ///< Reference to a warp-reduction instance ScanTileStateT& tile_status; ///< Interface to tile status ScanOpT scan_op; ///< Binary scan operator int tile_idx; ///< The current tile index T exclusive_prefix; ///< Exclusive prefix for the tile T inclusive_prefix; ///< Inclusive prefix for the tile // Constructor __device__ __forceinline__ TilePrefixCallbackOp( ScanTileStateT &tile_status, TempStorage &temp_storage, ScanOpT scan_op, int tile_idx) : temp_storage(temp_storage.Alias()), tile_status(tile_status), scan_op(scan_op), tile_idx(tile_idx) {} // Block until all predecessors within the warp-wide window have non-invalid status __device__ __forceinline__ void ProcessWindow( int predecessor_idx, ///< Preceding tile index to inspect StatusWord &predecessor_status, ///< [out] Preceding tile status T &window_aggregate) ///< [out] Relevant partial reduction from this window of preceding tiles { T value; tile_status.WaitForValid(predecessor_idx, predecessor_status, value); // Perform a segmented reduction to get the prefix for the current window. // Use the swizzled scan operator because we are now scanning *down* towards thread0. int tail_flag = (predecessor_status == StatusWord(SCAN_TILE_INCLUSIVE)); window_aggregate = WarpReduceT(temp_storage.warp_reduce).TailSegmentedReduce( value, tail_flag, SwizzleScanOp(scan_op)); } // BlockScan prefix callback functor (called by the first warp) __device__ __forceinline__ T operator()(T block_aggregate) { // Update our status with our tile-aggregate if (threadIdx.x == 0) { temp_storage.block_aggregate = block_aggregate; tile_status.SetPartial(tile_idx, block_aggregate); } int predecessor_idx = tile_idx - threadIdx.x - 1; StatusWord predecessor_status; T window_aggregate; // Wait for the warp-wide window of predecessor tiles to become valid ProcessWindow(predecessor_idx, predecessor_status, window_aggregate); // The exclusive tile prefix starts out as the current window aggregate exclusive_prefix = window_aggregate; // Keep sliding the window back until we come across a tile whose inclusive prefix is known while (WARP_ALL((predecessor_status != StatusWord(SCAN_TILE_INCLUSIVE)), 0xffffffff)) { predecessor_idx -= CUB_PTX_WARP_THREADS; // Update exclusive tile prefix with the window prefix ProcessWindow(predecessor_idx, predecessor_status, window_aggregate); exclusive_prefix = scan_op(window_aggregate, exclusive_prefix); } // Compute the inclusive tile prefix and update the status for this tile if (threadIdx.x == 0) { inclusive_prefix = scan_op(exclusive_prefix, block_aggregate); tile_status.SetInclusive(tile_idx, inclusive_prefix); temp_storage.exclusive_prefix = exclusive_prefix; temp_storage.inclusive_prefix = inclusive_prefix; } // Return exclusive_prefix return exclusive_prefix; } // Get the exclusive prefix stored in temporary storage __device__ __forceinline__ T GetExclusivePrefix() { return temp_storage.exclusive_prefix; } // Get the inclusive prefix stored in temporary storage __device__ __forceinline__ T GetInclusivePrefix() { return temp_storage.inclusive_prefix; } // Get the block aggregate stored in temporary storage __device__ __forceinline__ T GetBlockAggregate() { return temp_storage.block_aggregate; } }; } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/gpu_utils/cub/block/000077500000000000000000000000001411340063500174505ustar00rootroot00000000000000relion-3.1.3/src/gpu_utils/cub/block/block_adjacent_difference.cuh000066400000000000000000000606631411340063500252610ustar00rootroot00000000000000/****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * The cub::BlockDiscontinuity class provides [collective](index.html#sec0) methods for flagging discontinuities within an ordered set of items partitioned across a CUDA thread block. */ #pragma once #include "../util_type.cuh" #include "../util_ptx.cuh" #include "../util_namespace.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { template < typename T, int BLOCK_DIM_X, int BLOCK_DIM_Y = 1, int BLOCK_DIM_Z = 1, int PTX_ARCH = CUB_PTX_ARCH> class BlockAdjacentDifference { private: /****************************************************************************** * Constants and type definitions ******************************************************************************/ /// Constants enum { /// The thread block size in threads BLOCK_THREADS = BLOCK_DIM_X * BLOCK_DIM_Y * BLOCK_DIM_Z, }; /// Shared memory storage layout type (last element from each thread's input) struct _TempStorage { T first_items[BLOCK_THREADS]; T last_items[BLOCK_THREADS]; }; /****************************************************************************** * Utility methods ******************************************************************************/ /// Internal storage allocator __device__ __forceinline__ _TempStorage& PrivateStorage() { __shared__ _TempStorage private_storage; return private_storage; } /// Specialization for when FlagOp has third index param template ::HAS_PARAM> struct ApplyOp { // Apply flag operator static __device__ __forceinline__ T FlagT(FlagOp flag_op, const T &a, const T &b, int idx) { return flag_op(b, a, idx); } }; /// Specialization for when FlagOp does not have a third index param template struct ApplyOp { // Apply flag operator static __device__ __forceinline__ T FlagT(FlagOp flag_op, const T &a, const T &b, int /*idx*/) { return flag_op(b, a); } }; /// Templated unrolling of item comparison (inductive case) template struct Iterate { // Head flags template < int ITEMS_PER_THREAD, typename FlagT, typename FlagOp> static __device__ __forceinline__ void FlagHeads( int linear_tid, FlagT (&flags)[ITEMS_PER_THREAD], ///< [out] Calling thread's discontinuity head_flags T (&input)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items T (&preds)[ITEMS_PER_THREAD], ///< [out] Calling thread's predecessor items FlagOp flag_op) ///< [in] Binary boolean flag predicate { preds[ITERATION] = input[ITERATION - 1]; flags[ITERATION] = ApplyOp::FlagT( flag_op, preds[ITERATION], input[ITERATION], (linear_tid * ITEMS_PER_THREAD) + ITERATION); Iterate::FlagHeads(linear_tid, flags, input, preds, flag_op); } // Tail flags template < int ITEMS_PER_THREAD, typename FlagT, typename FlagOp> static __device__ __forceinline__ void FlagTails( int linear_tid, FlagT (&flags)[ITEMS_PER_THREAD], ///< [out] Calling thread's discontinuity head_flags T (&input)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items FlagOp flag_op) ///< [in] Binary boolean flag predicate { flags[ITERATION] = ApplyOp::FlagT( flag_op, input[ITERATION], input[ITERATION + 1], (linear_tid * ITEMS_PER_THREAD) + ITERATION + 1); Iterate::FlagTails(linear_tid, flags, input, flag_op); } }; /// Templated unrolling of item comparison (termination case) template struct Iterate { // Head flags template < int ITEMS_PER_THREAD, typename FlagT, typename FlagOp> static __device__ __forceinline__ void FlagHeads( int /*linear_tid*/, FlagT (&/*flags*/)[ITEMS_PER_THREAD], ///< [out] Calling thread's discontinuity head_flags T (&/*input*/)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items T (&/*preds*/)[ITEMS_PER_THREAD], ///< [out] Calling thread's predecessor items FlagOp /*flag_op*/) ///< [in] Binary boolean flag predicate {} // Tail flags template < int ITEMS_PER_THREAD, typename FlagT, typename FlagOp> static __device__ __forceinline__ void FlagTails( int /*linear_tid*/, FlagT (&/*flags*/)[ITEMS_PER_THREAD], ///< [out] Calling thread's discontinuity head_flags T (&/*input*/)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items FlagOp /*flag_op*/) ///< [in] Binary boolean flag predicate {} }; /****************************************************************************** * Thread fields ******************************************************************************/ /// Shared storage reference _TempStorage &temp_storage; /// Linear thread-id unsigned int linear_tid; public: /// \smemstorage{BlockDiscontinuity} struct TempStorage : Uninitialized<_TempStorage> {}; /******************************************************************//** * \name Collective constructors *********************************************************************/ //@{ /** * \brief Collective constructor using a private static allocation of shared memory as temporary storage. */ __device__ __forceinline__ BlockAdjacentDifference() : temp_storage(PrivateStorage()), linear_tid(RowMajorTid(BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z)) {} /** * \brief Collective constructor using the specified memory allocation as temporary storage. */ __device__ __forceinline__ BlockAdjacentDifference( TempStorage &temp_storage) ///< [in] Reference to memory allocation having layout type TempStorage : temp_storage(temp_storage.Alias()), linear_tid(RowMajorTid(BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z)) {} //@} end member group /******************************************************************//** * \name Head flag operations *********************************************************************/ //@{ #ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document template < int ITEMS_PER_THREAD, typename FlagT, typename FlagOp> __device__ __forceinline__ void FlagHeads( FlagT (&head_flags)[ITEMS_PER_THREAD], ///< [out] Calling thread's discontinuity head_flags T (&input)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items T (&preds)[ITEMS_PER_THREAD], ///< [out] Calling thread's predecessor items FlagOp flag_op) ///< [in] Binary boolean flag predicate { // Share last item temp_storage.last_items[linear_tid] = input[ITEMS_PER_THREAD - 1]; CTA_SYNC(); if (linear_tid == 0) { // Set flag for first thread-item (preds[0] is undefined) head_flags[0] = 1; } else { preds[0] = temp_storage.last_items[linear_tid - 1]; head_flags[0] = ApplyOp::FlagT(flag_op, preds[0], input[0], linear_tid * ITEMS_PER_THREAD); } // Set head_flags for remaining items Iterate<1, ITEMS_PER_THREAD>::FlagHeads(linear_tid, head_flags, input, preds, flag_op); } template < int ITEMS_PER_THREAD, typename FlagT, typename FlagOp> __device__ __forceinline__ void FlagHeads( FlagT (&head_flags)[ITEMS_PER_THREAD], ///< [out] Calling thread's discontinuity head_flags T (&input)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items T (&preds)[ITEMS_PER_THREAD], ///< [out] Calling thread's predecessor items FlagOp flag_op, ///< [in] Binary boolean flag predicate T tile_predecessor_item) ///< [in] [thread0 only] Item with which to compare the first tile item (input0 from thread0). { // Share last item temp_storage.last_items[linear_tid] = input[ITEMS_PER_THREAD - 1]; CTA_SYNC(); // Set flag for first thread-item preds[0] = (linear_tid == 0) ? tile_predecessor_item : // First thread temp_storage.last_items[linear_tid - 1]; head_flags[0] = ApplyOp::FlagT(flag_op, preds[0], input[0], linear_tid * ITEMS_PER_THREAD); // Set head_flags for remaining items Iterate<1, ITEMS_PER_THREAD>::FlagHeads(linear_tid, head_flags, input, preds, flag_op); } #endif // DOXYGEN_SHOULD_SKIP_THIS template < int ITEMS_PER_THREAD, typename FlagT, typename FlagOp> __device__ __forceinline__ void FlagHeads( FlagT (&head_flags)[ITEMS_PER_THREAD], ///< [out] Calling thread's discontinuity head_flags T (&input)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items FlagOp flag_op) ///< [in] Binary boolean flag predicate { T preds[ITEMS_PER_THREAD]; FlagHeads(head_flags, input, preds, flag_op); } template < int ITEMS_PER_THREAD, typename FlagT, typename FlagOp> __device__ __forceinline__ void FlagHeads( FlagT (&head_flags)[ITEMS_PER_THREAD], ///< [out] Calling thread's discontinuity head_flags T (&input)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items FlagOp flag_op, ///< [in] Binary boolean flag predicate T tile_predecessor_item) ///< [in] [thread0 only] Item with which to compare the first tile item (input0 from thread0). { T preds[ITEMS_PER_THREAD]; FlagHeads(head_flags, input, preds, flag_op, tile_predecessor_item); } template < int ITEMS_PER_THREAD, typename FlagT, typename FlagOp> __device__ __forceinline__ void FlagTails( FlagT (&tail_flags)[ITEMS_PER_THREAD], ///< [out] Calling thread's discontinuity tail_flags T (&input)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items FlagOp flag_op) ///< [in] Binary boolean flag predicate { // Share first item temp_storage.first_items[linear_tid] = input[0]; CTA_SYNC(); // Set flag for last thread-item tail_flags[ITEMS_PER_THREAD - 1] = (linear_tid == BLOCK_THREADS - 1) ? 1 : // Last thread ApplyOp::FlagT( flag_op, input[ITEMS_PER_THREAD - 1], temp_storage.first_items[linear_tid + 1], (linear_tid * ITEMS_PER_THREAD) + ITEMS_PER_THREAD); // Set tail_flags for remaining items Iterate<0, ITEMS_PER_THREAD - 1>::FlagTails(linear_tid, tail_flags, input, flag_op); } template < int ITEMS_PER_THREAD, typename FlagT, typename FlagOp> __device__ __forceinline__ void FlagTails( FlagT (&tail_flags)[ITEMS_PER_THREAD], ///< [out] Calling thread's discontinuity tail_flags T (&input)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items FlagOp flag_op, ///< [in] Binary boolean flag predicate T tile_successor_item) ///< [in] [threadBLOCK_THREADS-1 only] Item with which to compare the last tile item (inputITEMS_PER_THREAD-1 from threadBLOCK_THREADS-1). { // Share first item temp_storage.first_items[linear_tid] = input[0]; CTA_SYNC(); // Set flag for last thread-item T successor_item = (linear_tid == BLOCK_THREADS - 1) ? tile_successor_item : // Last thread temp_storage.first_items[linear_tid + 1]; tail_flags[ITEMS_PER_THREAD - 1] = ApplyOp::FlagT( flag_op, input[ITEMS_PER_THREAD - 1], successor_item, (linear_tid * ITEMS_PER_THREAD) + ITEMS_PER_THREAD); // Set tail_flags for remaining items Iterate<0, ITEMS_PER_THREAD - 1>::FlagTails(linear_tid, tail_flags, input, flag_op); } template < int ITEMS_PER_THREAD, typename FlagT, typename FlagOp> __device__ __forceinline__ void FlagHeadsAndTails( FlagT (&head_flags)[ITEMS_PER_THREAD], ///< [out] Calling thread's discontinuity head_flags FlagT (&tail_flags)[ITEMS_PER_THREAD], ///< [out] Calling thread's discontinuity tail_flags T (&input)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items FlagOp flag_op) ///< [in] Binary boolean flag predicate { // Share first and last items temp_storage.first_items[linear_tid] = input[0]; temp_storage.last_items[linear_tid] = input[ITEMS_PER_THREAD - 1]; CTA_SYNC(); T preds[ITEMS_PER_THREAD]; // Set flag for first thread-item preds[0] = temp_storage.last_items[linear_tid - 1]; if (linear_tid == 0) { head_flags[0] = 1; } else { head_flags[0] = ApplyOp::FlagT( flag_op, preds[0], input[0], linear_tid * ITEMS_PER_THREAD); } // Set flag for last thread-item tail_flags[ITEMS_PER_THREAD - 1] = (linear_tid == BLOCK_THREADS - 1) ? 1 : // Last thread ApplyOp::FlagT( flag_op, input[ITEMS_PER_THREAD - 1], temp_storage.first_items[linear_tid + 1], (linear_tid * ITEMS_PER_THREAD) + ITEMS_PER_THREAD); // Set head_flags for remaining items Iterate<1, ITEMS_PER_THREAD>::FlagHeads(linear_tid, head_flags, input, preds, flag_op); // Set tail_flags for remaining items Iterate<0, ITEMS_PER_THREAD - 1>::FlagTails(linear_tid, tail_flags, input, flag_op); } template < int ITEMS_PER_THREAD, typename FlagT, typename FlagOp> __device__ __forceinline__ void FlagHeadsAndTails( FlagT (&head_flags)[ITEMS_PER_THREAD], ///< [out] Calling thread's discontinuity head_flags FlagT (&tail_flags)[ITEMS_PER_THREAD], ///< [out] Calling thread's discontinuity tail_flags T tile_successor_item, ///< [in] [threadBLOCK_THREADS-1 only] Item with which to compare the last tile item (inputITEMS_PER_THREAD-1 from threadBLOCK_THREADS-1). T (&input)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items FlagOp flag_op) ///< [in] Binary boolean flag predicate { // Share first and last items temp_storage.first_items[linear_tid] = input[0]; temp_storage.last_items[linear_tid] = input[ITEMS_PER_THREAD - 1]; CTA_SYNC(); T preds[ITEMS_PER_THREAD]; // Set flag for first thread-item if (linear_tid == 0) { head_flags[0] = 1; } else { preds[0] = temp_storage.last_items[linear_tid - 1]; head_flags[0] = ApplyOp::FlagT( flag_op, preds[0], input[0], linear_tid * ITEMS_PER_THREAD); } // Set flag for last thread-item T successor_item = (linear_tid == BLOCK_THREADS - 1) ? tile_successor_item : // Last thread temp_storage.first_items[linear_tid + 1]; tail_flags[ITEMS_PER_THREAD - 1] = ApplyOp::FlagT( flag_op, input[ITEMS_PER_THREAD - 1], successor_item, (linear_tid * ITEMS_PER_THREAD) + ITEMS_PER_THREAD); // Set head_flags for remaining items Iterate<1, ITEMS_PER_THREAD>::FlagHeads(linear_tid, head_flags, input, preds, flag_op); // Set tail_flags for remaining items Iterate<0, ITEMS_PER_THREAD - 1>::FlagTails(linear_tid, tail_flags, input, flag_op); } template < int ITEMS_PER_THREAD, typename FlagT, typename FlagOp> __device__ __forceinline__ void FlagHeadsAndTails( FlagT (&head_flags)[ITEMS_PER_THREAD], ///< [out] Calling thread's discontinuity head_flags T tile_predecessor_item, ///< [in] [thread0 only] Item with which to compare the first tile item (input0 from thread0). FlagT (&tail_flags)[ITEMS_PER_THREAD], ///< [out] Calling thread's discontinuity tail_flags T (&input)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items FlagOp flag_op) ///< [in] Binary boolean flag predicate { // Share first and last items temp_storage.first_items[linear_tid] = input[0]; temp_storage.last_items[linear_tid] = input[ITEMS_PER_THREAD - 1]; CTA_SYNC(); T preds[ITEMS_PER_THREAD]; // Set flag for first thread-item preds[0] = (linear_tid == 0) ? tile_predecessor_item : // First thread temp_storage.last_items[linear_tid - 1]; head_flags[0] = ApplyOp::FlagT( flag_op, preds[0], input[0], linear_tid * ITEMS_PER_THREAD); // Set flag for last thread-item tail_flags[ITEMS_PER_THREAD - 1] = (linear_tid == BLOCK_THREADS - 1) ? 1 : // Last thread ApplyOp::FlagT( flag_op, input[ITEMS_PER_THREAD - 1], temp_storage.first_items[linear_tid + 1], (linear_tid * ITEMS_PER_THREAD) + ITEMS_PER_THREAD); // Set head_flags for remaining items Iterate<1, ITEMS_PER_THREAD>::FlagHeads(linear_tid, head_flags, input, preds, flag_op); // Set tail_flags for remaining items Iterate<0, ITEMS_PER_THREAD - 1>::FlagTails(linear_tid, tail_flags, input, flag_op); } template < int ITEMS_PER_THREAD, typename FlagT, typename FlagOp> __device__ __forceinline__ void FlagHeadsAndTails( FlagT (&head_flags)[ITEMS_PER_THREAD], ///< [out] Calling thread's discontinuity head_flags T tile_predecessor_item, ///< [in] [thread0 only] Item with which to compare the first tile item (input0 from thread0). FlagT (&tail_flags)[ITEMS_PER_THREAD], ///< [out] Calling thread's discontinuity tail_flags T tile_successor_item, ///< [in] [threadBLOCK_THREADS-1 only] Item with which to compare the last tile item (inputITEMS_PER_THREAD-1 from threadBLOCK_THREADS-1). T (&input)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items FlagOp flag_op) ///< [in] Binary boolean flag predicate { // Share first and last items temp_storage.first_items[linear_tid] = input[0]; temp_storage.last_items[linear_tid] = input[ITEMS_PER_THREAD - 1]; CTA_SYNC(); T preds[ITEMS_PER_THREAD]; // Set flag for first thread-item preds[0] = (linear_tid == 0) ? tile_predecessor_item : // First thread temp_storage.last_items[linear_tid - 1]; head_flags[0] = ApplyOp::FlagT( flag_op, preds[0], input[0], linear_tid * ITEMS_PER_THREAD); // Set flag for last thread-item T successor_item = (linear_tid == BLOCK_THREADS - 1) ? tile_successor_item : // Last thread temp_storage.first_items[linear_tid + 1]; tail_flags[ITEMS_PER_THREAD - 1] = ApplyOp::FlagT( flag_op, input[ITEMS_PER_THREAD - 1], successor_item, (linear_tid * ITEMS_PER_THREAD) + ITEMS_PER_THREAD); // Set head_flags for remaining items Iterate<1, ITEMS_PER_THREAD>::FlagHeads(linear_tid, head_flags, input, preds, flag_op); // Set tail_flags for remaining items Iterate<0, ITEMS_PER_THREAD - 1>::FlagTails(linear_tid, tail_flags, input, flag_op); } }; } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/gpu_utils/cub/block/block_discontinuity.cuh000066400000000000000000001524261411340063500242420ustar00rootroot00000000000000/****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * The cub::BlockDiscontinuity class provides [collective](index.html#sec0) methods for flagging discontinuities within an ordered set of items partitioned across a CUDA thread block. */ #pragma once #include "../util_type.cuh" #include "../util_ptx.cuh" #include "../util_namespace.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /** * \brief The BlockDiscontinuity class provides [collective](index.html#sec0) methods for flagging discontinuities within an ordered set of items partitioned across a CUDA thread block. ![](discont_logo.png) * \ingroup BlockModule * * \tparam T The data type to be flagged. * \tparam BLOCK_DIM_X The thread block length in threads along the X dimension * \tparam BLOCK_DIM_Y [optional] The thread block length in threads along the Y dimension (default: 1) * \tparam BLOCK_DIM_Z [optional] The thread block length in threads along the Z dimension (default: 1) * \tparam PTX_ARCH [optional] \ptxversion * * \par Overview * - A set of "head flags" (or "tail flags") is often used to indicate corresponding items * that differ from their predecessors (or successors). For example, head flags are convenient * for demarcating disjoint data segments as part of a segmented scan or reduction. * - \blocked * * \par Performance Considerations * - \granularity * * \par A Simple Example * \blockcollective{BlockDiscontinuity} * \par * The code snippet below illustrates the head flagging of 512 integer items that * are partitioned in a [blocked arrangement](index.html#sec5sec3) across 128 threads * where each thread owns 4 consecutive items. * \par * \code * #include // or equivalently * * __global__ void ExampleKernel(...) * { * // Specialize BlockDiscontinuity for a 1D block of 128 threads on type int * typedef cub::BlockDiscontinuity BlockDiscontinuity; * * // Allocate shared memory for BlockDiscontinuity * __shared__ typename BlockDiscontinuity::TempStorage temp_storage; * * // Obtain a segment of consecutive items that are blocked across threads * int thread_data[4]; * ... * * // Collectively compute head flags for discontinuities in the segment * int head_flags[4]; * BlockDiscontinuity(temp_storage).FlagHeads(head_flags, thread_data, cub::Inequality()); * * \endcode * \par * Suppose the set of input \p thread_data across the block of threads is * { [0,0,1,1], [1,1,1,1], [2,3,3,3], [3,4,4,4], ... }. * The corresponding output \p head_flags in those threads will be * { [1,0,1,0], [0,0,0,0], [1,1,0,0], [0,1,0,0], ... }. * * \par Performance Considerations * - Incurs zero bank conflicts for most types * */ template < typename T, int BLOCK_DIM_X, int BLOCK_DIM_Y = 1, int BLOCK_DIM_Z = 1, int PTX_ARCH = CUB_PTX_ARCH> class BlockDiscontinuity { private: /****************************************************************************** * Constants and type definitions ******************************************************************************/ /// Constants enum { /// The thread block size in threads BLOCK_THREADS = BLOCK_DIM_X * BLOCK_DIM_Y * BLOCK_DIM_Z, }; /// Shared memory storage layout type (last element from each thread's input) struct _TempStorage { T first_items[BLOCK_THREADS]; T last_items[BLOCK_THREADS]; }; /****************************************************************************** * Utility methods ******************************************************************************/ /// Internal storage allocator __device__ __forceinline__ _TempStorage& PrivateStorage() { __shared__ _TempStorage private_storage; return private_storage; } /// Specialization for when FlagOp has third index param template ::HAS_PARAM> struct ApplyOp { // Apply flag operator static __device__ __forceinline__ bool FlagT(FlagOp flag_op, const T &a, const T &b, int idx) { return flag_op(a, b, idx); } }; /// Specialization for when FlagOp does not have a third index param template struct ApplyOp { // Apply flag operator static __device__ __forceinline__ bool FlagT(FlagOp flag_op, const T &a, const T &b, int /*idx*/) { return flag_op(a, b); } }; /// Templated unrolling of item comparison (inductive case) template struct Iterate { // Head flags template < int ITEMS_PER_THREAD, typename FlagT, typename FlagOp> static __device__ __forceinline__ void FlagHeads( int linear_tid, FlagT (&flags)[ITEMS_PER_THREAD], ///< [out] Calling thread's discontinuity head_flags T (&input)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items T (&preds)[ITEMS_PER_THREAD], ///< [out] Calling thread's predecessor items FlagOp flag_op) ///< [in] Binary boolean flag predicate { preds[ITERATION] = input[ITERATION - 1]; flags[ITERATION] = ApplyOp::FlagT( flag_op, preds[ITERATION], input[ITERATION], (linear_tid * ITEMS_PER_THREAD) + ITERATION); Iterate::FlagHeads(linear_tid, flags, input, preds, flag_op); } // Tail flags template < int ITEMS_PER_THREAD, typename FlagT, typename FlagOp> static __device__ __forceinline__ void FlagTails( int linear_tid, FlagT (&flags)[ITEMS_PER_THREAD], ///< [out] Calling thread's discontinuity head_flags T (&input)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items FlagOp flag_op) ///< [in] Binary boolean flag predicate { flags[ITERATION] = ApplyOp::FlagT( flag_op, input[ITERATION], input[ITERATION + 1], (linear_tid * ITEMS_PER_THREAD) + ITERATION + 1); Iterate::FlagTails(linear_tid, flags, input, flag_op); } }; /// Templated unrolling of item comparison (termination case) template struct Iterate { // Head flags template < int ITEMS_PER_THREAD, typename FlagT, typename FlagOp> static __device__ __forceinline__ void FlagHeads( int /*linear_tid*/, FlagT (&/*flags*/)[ITEMS_PER_THREAD], ///< [out] Calling thread's discontinuity head_flags T (&/*input*/)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items T (&/*preds*/)[ITEMS_PER_THREAD], ///< [out] Calling thread's predecessor items FlagOp /*flag_op*/) ///< [in] Binary boolean flag predicate {} // Tail flags template < int ITEMS_PER_THREAD, typename FlagT, typename FlagOp> static __device__ __forceinline__ void FlagTails( int /*linear_tid*/, FlagT (&/*flags*/)[ITEMS_PER_THREAD], ///< [out] Calling thread's discontinuity head_flags T (&/*input*/)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items FlagOp /*flag_op*/) ///< [in] Binary boolean flag predicate {} }; /****************************************************************************** * Thread fields ******************************************************************************/ /// Shared storage reference _TempStorage &temp_storage; /// Linear thread-id unsigned int linear_tid; public: /// \smemstorage{BlockDiscontinuity} struct TempStorage : Uninitialized<_TempStorage> {}; /******************************************************************//** * \name Collective constructors *********************************************************************/ //@{ /** * \brief Collective constructor using a private static allocation of shared memory as temporary storage. */ __device__ __forceinline__ BlockDiscontinuity() : temp_storage(PrivateStorage()), linear_tid(RowMajorTid(BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z)) {} /** * \brief Collective constructor using the specified memory allocation as temporary storage. */ __device__ __forceinline__ BlockDiscontinuity( TempStorage &temp_storage) ///< [in] Reference to memory allocation having layout type TempStorage : temp_storage(temp_storage.Alias()), linear_tid(RowMajorTid(BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z)) {} //@} end member group /******************************************************************//** * \name Head flag operations *********************************************************************/ //@{ #ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document template < int ITEMS_PER_THREAD, typename FlagT, typename FlagOp> __device__ __forceinline__ void FlagHeads( FlagT (&head_flags)[ITEMS_PER_THREAD], ///< [out] Calling thread's discontinuity head_flags T (&input)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items T (&preds)[ITEMS_PER_THREAD], ///< [out] Calling thread's predecessor items FlagOp flag_op) ///< [in] Binary boolean flag predicate { // Share last item temp_storage.last_items[linear_tid] = input[ITEMS_PER_THREAD - 1]; CTA_SYNC(); if (linear_tid == 0) { // Set flag for first thread-item (preds[0] is undefined) head_flags[0] = 1; } else { preds[0] = temp_storage.last_items[linear_tid - 1]; head_flags[0] = ApplyOp::FlagT(flag_op, preds[0], input[0], linear_tid * ITEMS_PER_THREAD); } // Set head_flags for remaining items Iterate<1, ITEMS_PER_THREAD>::FlagHeads(linear_tid, head_flags, input, preds, flag_op); } template < int ITEMS_PER_THREAD, typename FlagT, typename FlagOp> __device__ __forceinline__ void FlagHeads( FlagT (&head_flags)[ITEMS_PER_THREAD], ///< [out] Calling thread's discontinuity head_flags T (&input)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items T (&preds)[ITEMS_PER_THREAD], ///< [out] Calling thread's predecessor items FlagOp flag_op, ///< [in] Binary boolean flag predicate T tile_predecessor_item) ///< [in] [thread0 only] Item with which to compare the first tile item (input0 from thread0). { // Share last item temp_storage.last_items[linear_tid] = input[ITEMS_PER_THREAD - 1]; CTA_SYNC(); // Set flag for first thread-item preds[0] = (linear_tid == 0) ? tile_predecessor_item : // First thread temp_storage.last_items[linear_tid - 1]; head_flags[0] = ApplyOp::FlagT(flag_op, preds[0], input[0], linear_tid * ITEMS_PER_THREAD); // Set head_flags for remaining items Iterate<1, ITEMS_PER_THREAD>::FlagHeads(linear_tid, head_flags, input, preds, flag_op); } #endif // DOXYGEN_SHOULD_SKIP_THIS /** * \brief Sets head flags indicating discontinuities between items partitioned across the thread block, for which the first item has no reference and is always flagged. * * \par * - The flag head_flagsi is set for item * inputi when * flag_op(previous-item, inputi) * returns \p true (where previous-item is either the preceding item * in the same thread or the last item in the previous thread). * - For thread0, item input0 is always flagged. * - \blocked * - \granularity * - \smemreuse * * \par Snippet * The code snippet below illustrates the head-flagging of 512 integer items that * are partitioned in a [blocked arrangement](index.html#sec5sec3) across 128 threads * where each thread owns 4 consecutive items. * \par * \code * #include // or equivalently * * __global__ void ExampleKernel(...) * { * // Specialize BlockDiscontinuity for a 1D block of 128 threads on type int * typedef cub::BlockDiscontinuity BlockDiscontinuity; * * // Allocate shared memory for BlockDiscontinuity * __shared__ typename BlockDiscontinuity::TempStorage temp_storage; * * // Obtain a segment of consecutive items that are blocked across threads * int thread_data[4]; * ... * * // Collectively compute head flags for discontinuities in the segment * int head_flags[4]; * BlockDiscontinuity(temp_storage).FlagHeads(head_flags, thread_data, cub::Inequality()); * * \endcode * \par * Suppose the set of input \p thread_data across the block of threads is * { [0,0,1,1], [1,1,1,1], [2,3,3,3], [3,4,4,4], ... }. * The corresponding output \p head_flags in those threads will be * { [1,0,1,0], [0,0,0,0], [1,1,0,0], [0,1,0,0], ... }. * * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. * \tparam FlagT [inferred] The flag type (must be an integer type) * \tparam FlagOp [inferred] Binary predicate functor type having member T operator()(const T &a, const T &b) or member T operator()(const T &a, const T &b, unsigned int b_index), and returning \p true if a discontinuity exists between \p a and \p b, otherwise \p false. \p b_index is the rank of b in the aggregate tile of data. */ template < int ITEMS_PER_THREAD, typename FlagT, typename FlagOp> __device__ __forceinline__ void FlagHeads( FlagT (&head_flags)[ITEMS_PER_THREAD], ///< [out] Calling thread's discontinuity head_flags T (&input)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items FlagOp flag_op) ///< [in] Binary boolean flag predicate { T preds[ITEMS_PER_THREAD]; FlagHeads(head_flags, input, preds, flag_op); } /** * \brief Sets head flags indicating discontinuities between items partitioned across the thread block. * * \par * - The flag head_flagsi is set for item * inputi when * flag_op(previous-item, inputi) * returns \p true (where previous-item is either the preceding item * in the same thread or the last item in the previous thread). * - For thread0, item input0 is compared * against \p tile_predecessor_item. * - \blocked * - \granularity * - \smemreuse * * \par Snippet * The code snippet below illustrates the head-flagging of 512 integer items that * are partitioned in a [blocked arrangement](index.html#sec5sec3) across 128 threads * where each thread owns 4 consecutive items. * \par * \code * #include // or equivalently * * __global__ void ExampleKernel(...) * { * // Specialize BlockDiscontinuity for a 1D block of 128 threads on type int * typedef cub::BlockDiscontinuity BlockDiscontinuity; * * // Allocate shared memory for BlockDiscontinuity * __shared__ typename BlockDiscontinuity::TempStorage temp_storage; * * // Obtain a segment of consecutive items that are blocked across threads * int thread_data[4]; * ... * * // Have thread0 obtain the predecessor item for the entire tile * int tile_predecessor_item; * if (threadIdx.x == 0) tile_predecessor_item == ... * * // Collectively compute head flags for discontinuities in the segment * int head_flags[4]; * BlockDiscontinuity(temp_storage).FlagHeads( * head_flags, thread_data, cub::Inequality(), tile_predecessor_item); * * \endcode * \par * Suppose the set of input \p thread_data across the block of threads is * { [0,0,1,1], [1,1,1,1], [2,3,3,3], [3,4,4,4], ... }, * and that \p tile_predecessor_item is \p 0. The corresponding output \p head_flags in those threads will be * { [0,0,1,0], [0,0,0,0], [1,1,0,0], [0,1,0,0], ... }. * * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. * \tparam FlagT [inferred] The flag type (must be an integer type) * \tparam FlagOp [inferred] Binary predicate functor type having member T operator()(const T &a, const T &b) or member T operator()(const T &a, const T &b, unsigned int b_index), and returning \p true if a discontinuity exists between \p a and \p b, otherwise \p false. \p b_index is the rank of b in the aggregate tile of data. */ template < int ITEMS_PER_THREAD, typename FlagT, typename FlagOp> __device__ __forceinline__ void FlagHeads( FlagT (&head_flags)[ITEMS_PER_THREAD], ///< [out] Calling thread's discontinuity head_flags T (&input)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items FlagOp flag_op, ///< [in] Binary boolean flag predicate T tile_predecessor_item) ///< [in] [thread0 only] Item with which to compare the first tile item (input0 from thread0). { T preds[ITEMS_PER_THREAD]; FlagHeads(head_flags, input, preds, flag_op, tile_predecessor_item); } //@} end member group /******************************************************************//** * \name Tail flag operations *********************************************************************/ //@{ /** * \brief Sets tail flags indicating discontinuities between items partitioned across the thread block, for which the last item has no reference and is always flagged. * * \par * - The flag tail_flagsi is set for item * inputi when * flag_op(inputi, next-item) * returns \p true (where next-item is either the next item * in the same thread or the first item in the next thread). * - For threadBLOCK_THREADS-1, item * inputITEMS_PER_THREAD-1 is always flagged. * - \blocked * - \granularity * - \smemreuse * * \par Snippet * The code snippet below illustrates the tail-flagging of 512 integer items that * are partitioned in a [blocked arrangement](index.html#sec5sec3) across 128 threads * where each thread owns 4 consecutive items. * \par * \code * #include // or equivalently * * __global__ void ExampleKernel(...) * { * // Specialize BlockDiscontinuity for a 1D block of 128 threads on type int * typedef cub::BlockDiscontinuity BlockDiscontinuity; * * // Allocate shared memory for BlockDiscontinuity * __shared__ typename BlockDiscontinuity::TempStorage temp_storage; * * // Obtain a segment of consecutive items that are blocked across threads * int thread_data[4]; * ... * * // Collectively compute tail flags for discontinuities in the segment * int tail_flags[4]; * BlockDiscontinuity(temp_storage).FlagTails(tail_flags, thread_data, cub::Inequality()); * * \endcode * \par * Suppose the set of input \p thread_data across the block of threads is * { [0,0,1,1], [1,1,1,1], [2,3,3,3], ..., [124,125,125,125] }. * The corresponding output \p tail_flags in those threads will be * { [0,1,0,0], [0,0,0,1], [1,0,0,...], ..., [1,0,0,1] }. * * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. * \tparam FlagT [inferred] The flag type (must be an integer type) * \tparam FlagOp [inferred] Binary predicate functor type having member T operator()(const T &a, const T &b) or member T operator()(const T &a, const T &b, unsigned int b_index), and returning \p true if a discontinuity exists between \p a and \p b, otherwise \p false. \p b_index is the rank of b in the aggregate tile of data. */ template < int ITEMS_PER_THREAD, typename FlagT, typename FlagOp> __device__ __forceinline__ void FlagTails( FlagT (&tail_flags)[ITEMS_PER_THREAD], ///< [out] Calling thread's discontinuity tail_flags T (&input)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items FlagOp flag_op) ///< [in] Binary boolean flag predicate { // Share first item temp_storage.first_items[linear_tid] = input[0]; CTA_SYNC(); // Set flag for last thread-item tail_flags[ITEMS_PER_THREAD - 1] = (linear_tid == BLOCK_THREADS - 1) ? 1 : // Last thread ApplyOp::FlagT( flag_op, input[ITEMS_PER_THREAD - 1], temp_storage.first_items[linear_tid + 1], (linear_tid * ITEMS_PER_THREAD) + ITEMS_PER_THREAD); // Set tail_flags for remaining items Iterate<0, ITEMS_PER_THREAD - 1>::FlagTails(linear_tid, tail_flags, input, flag_op); } /** * \brief Sets tail flags indicating discontinuities between items partitioned across the thread block. * * \par * - The flag tail_flagsi is set for item * inputi when * flag_op(inputi, next-item) * returns \p true (where next-item is either the next item * in the same thread or the first item in the next thread). * - For threadBLOCK_THREADS-1, item * inputITEMS_PER_THREAD-1 is compared * against \p tile_successor_item. * - \blocked * - \granularity * - \smemreuse * * \par Snippet * The code snippet below illustrates the tail-flagging of 512 integer items that * are partitioned in a [blocked arrangement](index.html#sec5sec3) across 128 threads * where each thread owns 4 consecutive items. * \par * \code * #include // or equivalently * * __global__ void ExampleKernel(...) * { * // Specialize BlockDiscontinuity for a 1D block of 128 threads on type int * typedef cub::BlockDiscontinuity BlockDiscontinuity; * * // Allocate shared memory for BlockDiscontinuity * __shared__ typename BlockDiscontinuity::TempStorage temp_storage; * * // Obtain a segment of consecutive items that are blocked across threads * int thread_data[4]; * ... * * // Have thread127 obtain the successor item for the entire tile * int tile_successor_item; * if (threadIdx.x == 127) tile_successor_item == ... * * // Collectively compute tail flags for discontinuities in the segment * int tail_flags[4]; * BlockDiscontinuity(temp_storage).FlagTails( * tail_flags, thread_data, cub::Inequality(), tile_successor_item); * * \endcode * \par * Suppose the set of input \p thread_data across the block of threads is * { [0,0,1,1], [1,1,1,1], [2,3,3,3], ..., [124,125,125,125] } * and that \p tile_successor_item is \p 125. The corresponding output \p tail_flags in those threads will be * { [0,1,0,0], [0,0,0,1], [1,0,0,...], ..., [1,0,0,0] }. * * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. * \tparam FlagT [inferred] The flag type (must be an integer type) * \tparam FlagOp [inferred] Binary predicate functor type having member T operator()(const T &a, const T &b) or member T operator()(const T &a, const T &b, unsigned int b_index), and returning \p true if a discontinuity exists between \p a and \p b, otherwise \p false. \p b_index is the rank of b in the aggregate tile of data. */ template < int ITEMS_PER_THREAD, typename FlagT, typename FlagOp> __device__ __forceinline__ void FlagTails( FlagT (&tail_flags)[ITEMS_PER_THREAD], ///< [out] Calling thread's discontinuity tail_flags T (&input)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items FlagOp flag_op, ///< [in] Binary boolean flag predicate T tile_successor_item) ///< [in] [threadBLOCK_THREADS-1 only] Item with which to compare the last tile item (inputITEMS_PER_THREAD-1 from threadBLOCK_THREADS-1). { // Share first item temp_storage.first_items[linear_tid] = input[0]; CTA_SYNC(); // Set flag for last thread-item T successor_item = (linear_tid == BLOCK_THREADS - 1) ? tile_successor_item : // Last thread temp_storage.first_items[linear_tid + 1]; tail_flags[ITEMS_PER_THREAD - 1] = ApplyOp::FlagT( flag_op, input[ITEMS_PER_THREAD - 1], successor_item, (linear_tid * ITEMS_PER_THREAD) + ITEMS_PER_THREAD); // Set tail_flags for remaining items Iterate<0, ITEMS_PER_THREAD - 1>::FlagTails(linear_tid, tail_flags, input, flag_op); } //@} end member group /******************************************************************//** * \name Head & tail flag operations *********************************************************************/ //@{ /** * \brief Sets both head and tail flags indicating discontinuities between items partitioned across the thread block. * * \par * - The flag head_flagsi is set for item * inputi when * flag_op(previous-item, inputi) * returns \p true (where previous-item is either the preceding item * in the same thread or the last item in the previous thread). * - For thread0, item input0 is always flagged. * - The flag tail_flagsi is set for item * inputi when * flag_op(inputi, next-item) * returns \p true (where next-item is either the next item * in the same thread or the first item in the next thread). * - For threadBLOCK_THREADS-1, item * inputITEMS_PER_THREAD-1 is always flagged. * - \blocked * - \granularity * - \smemreuse * * \par Snippet * The code snippet below illustrates the head- and tail-flagging of 512 integer items that * are partitioned in a [blocked arrangement](index.html#sec5sec3) across 128 threads * where each thread owns 4 consecutive items. * \par * \code * #include // or equivalently * * __global__ void ExampleKernel(...) * { * // Specialize BlockDiscontinuity for a 1D block of 128 threads on type int * typedef cub::BlockDiscontinuity BlockDiscontinuity; * * // Allocate shared memory for BlockDiscontinuity * __shared__ typename BlockDiscontinuity::TempStorage temp_storage; * * // Obtain a segment of consecutive items that are blocked across threads * int thread_data[4]; * ... * * // Collectively compute head and flags for discontinuities in the segment * int head_flags[4]; * int tail_flags[4]; * BlockDiscontinuity(temp_storage).FlagTails( * head_flags, tail_flags, thread_data, cub::Inequality()); * * \endcode * \par * Suppose the set of input \p thread_data across the block of threads is * { [0,0,1,1], [1,1,1,1], [2,3,3,3], ..., [124,125,125,125] } * and that the tile_successor_item is \p 125. The corresponding output \p head_flags * in those threads will be { [1,0,1,0], [0,0,0,0], [1,1,0,0], [0,1,0,0], ... }. * and the corresponding output \p tail_flags in those threads will be * { [0,1,0,0], [0,0,0,1], [1,0,0,...], ..., [1,0,0,1] }. * * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. * \tparam FlagT [inferred] The flag type (must be an integer type) * \tparam FlagOp [inferred] Binary predicate functor type having member T operator()(const T &a, const T &b) or member T operator()(const T &a, const T &b, unsigned int b_index), and returning \p true if a discontinuity exists between \p a and \p b, otherwise \p false. \p b_index is the rank of b in the aggregate tile of data. */ template < int ITEMS_PER_THREAD, typename FlagT, typename FlagOp> __device__ __forceinline__ void FlagHeadsAndTails( FlagT (&head_flags)[ITEMS_PER_THREAD], ///< [out] Calling thread's discontinuity head_flags FlagT (&tail_flags)[ITEMS_PER_THREAD], ///< [out] Calling thread's discontinuity tail_flags T (&input)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items FlagOp flag_op) ///< [in] Binary boolean flag predicate { // Share first and last items temp_storage.first_items[linear_tid] = input[0]; temp_storage.last_items[linear_tid] = input[ITEMS_PER_THREAD - 1]; CTA_SYNC(); T preds[ITEMS_PER_THREAD]; // Set flag for first thread-item preds[0] = temp_storage.last_items[linear_tid - 1]; if (linear_tid == 0) { head_flags[0] = 1; } else { head_flags[0] = ApplyOp::FlagT( flag_op, preds[0], input[0], linear_tid * ITEMS_PER_THREAD); } // Set flag for last thread-item tail_flags[ITEMS_PER_THREAD - 1] = (linear_tid == BLOCK_THREADS - 1) ? 1 : // Last thread ApplyOp::FlagT( flag_op, input[ITEMS_PER_THREAD - 1], temp_storage.first_items[linear_tid + 1], (linear_tid * ITEMS_PER_THREAD) + ITEMS_PER_THREAD); // Set head_flags for remaining items Iterate<1, ITEMS_PER_THREAD>::FlagHeads(linear_tid, head_flags, input, preds, flag_op); // Set tail_flags for remaining items Iterate<0, ITEMS_PER_THREAD - 1>::FlagTails(linear_tid, tail_flags, input, flag_op); } /** * \brief Sets both head and tail flags indicating discontinuities between items partitioned across the thread block. * * \par * - The flag head_flagsi is set for item * inputi when * flag_op(previous-item, inputi) * returns \p true (where previous-item is either the preceding item * in the same thread or the last item in the previous thread). * - For thread0, item input0 is always flagged. * - The flag tail_flagsi is set for item * inputi when * flag_op(inputi, next-item) * returns \p true (where next-item is either the next item * in the same thread or the first item in the next thread). * - For threadBLOCK_THREADS-1, item * inputITEMS_PER_THREAD-1 is compared * against \p tile_predecessor_item. * - \blocked * - \granularity * - \smemreuse * * \par Snippet * The code snippet below illustrates the head- and tail-flagging of 512 integer items that * are partitioned in a [blocked arrangement](index.html#sec5sec3) across 128 threads * where each thread owns 4 consecutive items. * \par * \code * #include // or equivalently * * __global__ void ExampleKernel(...) * { * // Specialize BlockDiscontinuity for a 1D block of 128 threads on type int * typedef cub::BlockDiscontinuity BlockDiscontinuity; * * // Allocate shared memory for BlockDiscontinuity * __shared__ typename BlockDiscontinuity::TempStorage temp_storage; * * // Obtain a segment of consecutive items that are blocked across threads * int thread_data[4]; * ... * * // Have thread127 obtain the successor item for the entire tile * int tile_successor_item; * if (threadIdx.x == 127) tile_successor_item == ... * * // Collectively compute head and flags for discontinuities in the segment * int head_flags[4]; * int tail_flags[4]; * BlockDiscontinuity(temp_storage).FlagTails( * head_flags, tail_flags, tile_successor_item, thread_data, cub::Inequality()); * * \endcode * \par * Suppose the set of input \p thread_data across the block of threads is * { [0,0,1,1], [1,1,1,1], [2,3,3,3], ..., [124,125,125,125] } * and that the tile_successor_item is \p 125. The corresponding output \p head_flags * in those threads will be { [1,0,1,0], [0,0,0,0], [1,1,0,0], [0,1,0,0], ... }. * and the corresponding output \p tail_flags in those threads will be * { [0,1,0,0], [0,0,0,1], [1,0,0,...], ..., [1,0,0,0] }. * * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. * \tparam FlagT [inferred] The flag type (must be an integer type) * \tparam FlagOp [inferred] Binary predicate functor type having member T operator()(const T &a, const T &b) or member T operator()(const T &a, const T &b, unsigned int b_index), and returning \p true if a discontinuity exists between \p a and \p b, otherwise \p false. \p b_index is the rank of b in the aggregate tile of data. */ template < int ITEMS_PER_THREAD, typename FlagT, typename FlagOp> __device__ __forceinline__ void FlagHeadsAndTails( FlagT (&head_flags)[ITEMS_PER_THREAD], ///< [out] Calling thread's discontinuity head_flags FlagT (&tail_flags)[ITEMS_PER_THREAD], ///< [out] Calling thread's discontinuity tail_flags T tile_successor_item, ///< [in] [threadBLOCK_THREADS-1 only] Item with which to compare the last tile item (inputITEMS_PER_THREAD-1 from threadBLOCK_THREADS-1). T (&input)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items FlagOp flag_op) ///< [in] Binary boolean flag predicate { // Share first and last items temp_storage.first_items[linear_tid] = input[0]; temp_storage.last_items[linear_tid] = input[ITEMS_PER_THREAD - 1]; CTA_SYNC(); T preds[ITEMS_PER_THREAD]; // Set flag for first thread-item if (linear_tid == 0) { head_flags[0] = 1; } else { preds[0] = temp_storage.last_items[linear_tid - 1]; head_flags[0] = ApplyOp::FlagT( flag_op, preds[0], input[0], linear_tid * ITEMS_PER_THREAD); } // Set flag for last thread-item T successor_item = (linear_tid == BLOCK_THREADS - 1) ? tile_successor_item : // Last thread temp_storage.first_items[linear_tid + 1]; tail_flags[ITEMS_PER_THREAD - 1] = ApplyOp::FlagT( flag_op, input[ITEMS_PER_THREAD - 1], successor_item, (linear_tid * ITEMS_PER_THREAD) + ITEMS_PER_THREAD); // Set head_flags for remaining items Iterate<1, ITEMS_PER_THREAD>::FlagHeads(linear_tid, head_flags, input, preds, flag_op); // Set tail_flags for remaining items Iterate<0, ITEMS_PER_THREAD - 1>::FlagTails(linear_tid, tail_flags, input, flag_op); } /** * \brief Sets both head and tail flags indicating discontinuities between items partitioned across the thread block. * * \par * - The flag head_flagsi is set for item * inputi when * flag_op(previous-item, inputi) * returns \p true (where previous-item is either the preceding item * in the same thread or the last item in the previous thread). * - For thread0, item input0 is compared * against \p tile_predecessor_item. * - The flag tail_flagsi is set for item * inputi when * flag_op(inputi, next-item) * returns \p true (where next-item is either the next item * in the same thread or the first item in the next thread). * - For threadBLOCK_THREADS-1, item * inputITEMS_PER_THREAD-1 is always flagged. * - \blocked * - \granularity * - \smemreuse * * \par Snippet * The code snippet below illustrates the head- and tail-flagging of 512 integer items that * are partitioned in a [blocked arrangement](index.html#sec5sec3) across 128 threads * where each thread owns 4 consecutive items. * \par * \code * #include // or equivalently * * __global__ void ExampleKernel(...) * { * // Specialize BlockDiscontinuity for a 1D block of 128 threads on type int * typedef cub::BlockDiscontinuity BlockDiscontinuity; * * // Allocate shared memory for BlockDiscontinuity * __shared__ typename BlockDiscontinuity::TempStorage temp_storage; * * // Obtain a segment of consecutive items that are blocked across threads * int thread_data[4]; * ... * * // Have thread0 obtain the predecessor item for the entire tile * int tile_predecessor_item; * if (threadIdx.x == 0) tile_predecessor_item == ... * * // Have thread127 obtain the successor item for the entire tile * int tile_successor_item; * if (threadIdx.x == 127) tile_successor_item == ... * * // Collectively compute head and flags for discontinuities in the segment * int head_flags[4]; * int tail_flags[4]; * BlockDiscontinuity(temp_storage).FlagTails( * head_flags, tile_predecessor_item, tail_flags, tile_successor_item, * thread_data, cub::Inequality()); * * \endcode * \par * Suppose the set of input \p thread_data across the block of threads is * { [0,0,1,1], [1,1,1,1], [2,3,3,3], ..., [124,125,125,125] }, * that the \p tile_predecessor_item is \p 0, and that the * \p tile_successor_item is \p 125. The corresponding output \p head_flags * in those threads will be { [0,0,1,0], [0,0,0,0], [1,1,0,0], [0,1,0,0], ... }. * and the corresponding output \p tail_flags in those threads will be * { [0,1,0,0], [0,0,0,1], [1,0,0,...], ..., [1,0,0,1] }. * * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. * \tparam FlagT [inferred] The flag type (must be an integer type) * \tparam FlagOp [inferred] Binary predicate functor type having member T operator()(const T &a, const T &b) or member T operator()(const T &a, const T &b, unsigned int b_index), and returning \p true if a discontinuity exists between \p a and \p b, otherwise \p false. \p b_index is the rank of b in the aggregate tile of data. */ template < int ITEMS_PER_THREAD, typename FlagT, typename FlagOp> __device__ __forceinline__ void FlagHeadsAndTails( FlagT (&head_flags)[ITEMS_PER_THREAD], ///< [out] Calling thread's discontinuity head_flags T tile_predecessor_item, ///< [in] [thread0 only] Item with which to compare the first tile item (input0 from thread0). FlagT (&tail_flags)[ITEMS_PER_THREAD], ///< [out] Calling thread's discontinuity tail_flags T (&input)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items FlagOp flag_op) ///< [in] Binary boolean flag predicate { // Share first and last items temp_storage.first_items[linear_tid] = input[0]; temp_storage.last_items[linear_tid] = input[ITEMS_PER_THREAD - 1]; CTA_SYNC(); T preds[ITEMS_PER_THREAD]; // Set flag for first thread-item preds[0] = (linear_tid == 0) ? tile_predecessor_item : // First thread temp_storage.last_items[linear_tid - 1]; head_flags[0] = ApplyOp::FlagT( flag_op, preds[0], input[0], linear_tid * ITEMS_PER_THREAD); // Set flag for last thread-item tail_flags[ITEMS_PER_THREAD - 1] = (linear_tid == BLOCK_THREADS - 1) ? 1 : // Last thread ApplyOp::FlagT( flag_op, input[ITEMS_PER_THREAD - 1], temp_storage.first_items[linear_tid + 1], (linear_tid * ITEMS_PER_THREAD) + ITEMS_PER_THREAD); // Set head_flags for remaining items Iterate<1, ITEMS_PER_THREAD>::FlagHeads(linear_tid, head_flags, input, preds, flag_op); // Set tail_flags for remaining items Iterate<0, ITEMS_PER_THREAD - 1>::FlagTails(linear_tid, tail_flags, input, flag_op); } /** * \brief Sets both head and tail flags indicating discontinuities between items partitioned across the thread block. * * \par * - The flag head_flagsi is set for item * inputi when * flag_op(previous-item, inputi) * returns \p true (where previous-item is either the preceding item * in the same thread or the last item in the previous thread). * - For thread0, item input0 is compared * against \p tile_predecessor_item. * - The flag tail_flagsi is set for item * inputi when * flag_op(inputi, next-item) * returns \p true (where next-item is either the next item * in the same thread or the first item in the next thread). * - For threadBLOCK_THREADS-1, item * inputITEMS_PER_THREAD-1 is compared * against \p tile_successor_item. * - \blocked * - \granularity * - \smemreuse * * \par Snippet * The code snippet below illustrates the head- and tail-flagging of 512 integer items that * are partitioned in a [blocked arrangement](index.html#sec5sec3) across 128 threads * where each thread owns 4 consecutive items. * \par * \code * #include // or equivalently * * __global__ void ExampleKernel(...) * { * // Specialize BlockDiscontinuity for a 1D block of 128 threads on type int * typedef cub::BlockDiscontinuity BlockDiscontinuity; * * // Allocate shared memory for BlockDiscontinuity * __shared__ typename BlockDiscontinuity::TempStorage temp_storage; * * // Obtain a segment of consecutive items that are blocked across threads * int thread_data[4]; * ... * * // Have thread0 obtain the predecessor item for the entire tile * int tile_predecessor_item; * if (threadIdx.x == 0) tile_predecessor_item == ... * * // Have thread127 obtain the successor item for the entire tile * int tile_successor_item; * if (threadIdx.x == 127) tile_successor_item == ... * * // Collectively compute head and flags for discontinuities in the segment * int head_flags[4]; * int tail_flags[4]; * BlockDiscontinuity(temp_storage).FlagTails( * head_flags, tile_predecessor_item, tail_flags, tile_successor_item, * thread_data, cub::Inequality()); * * \endcode * \par * Suppose the set of input \p thread_data across the block of threads is * { [0,0,1,1], [1,1,1,1], [2,3,3,3], ..., [124,125,125,125] }, * that the \p tile_predecessor_item is \p 0, and that the * \p tile_successor_item is \p 125. The corresponding output \p head_flags * in those threads will be { [0,0,1,0], [0,0,0,0], [1,1,0,0], [0,1,0,0], ... }. * and the corresponding output \p tail_flags in those threads will be * { [0,1,0,0], [0,0,0,1], [1,0,0,...], ..., [1,0,0,0] }. * * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. * \tparam FlagT [inferred] The flag type (must be an integer type) * \tparam FlagOp [inferred] Binary predicate functor type having member T operator()(const T &a, const T &b) or member T operator()(const T &a, const T &b, unsigned int b_index), and returning \p true if a discontinuity exists between \p a and \p b, otherwise \p false. \p b_index is the rank of b in the aggregate tile of data. */ template < int ITEMS_PER_THREAD, typename FlagT, typename FlagOp> __device__ __forceinline__ void FlagHeadsAndTails( FlagT (&head_flags)[ITEMS_PER_THREAD], ///< [out] Calling thread's discontinuity head_flags T tile_predecessor_item, ///< [in] [thread0 only] Item with which to compare the first tile item (input0 from thread0). FlagT (&tail_flags)[ITEMS_PER_THREAD], ///< [out] Calling thread's discontinuity tail_flags T tile_successor_item, ///< [in] [threadBLOCK_THREADS-1 only] Item with which to compare the last tile item (inputITEMS_PER_THREAD-1 from threadBLOCK_THREADS-1). T (&input)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items FlagOp flag_op) ///< [in] Binary boolean flag predicate { // Share first and last items temp_storage.first_items[linear_tid] = input[0]; temp_storage.last_items[linear_tid] = input[ITEMS_PER_THREAD - 1]; CTA_SYNC(); T preds[ITEMS_PER_THREAD]; // Set flag for first thread-item preds[0] = (linear_tid == 0) ? tile_predecessor_item : // First thread temp_storage.last_items[linear_tid - 1]; head_flags[0] = ApplyOp::FlagT( flag_op, preds[0], input[0], linear_tid * ITEMS_PER_THREAD); // Set flag for last thread-item T successor_item = (linear_tid == BLOCK_THREADS - 1) ? tile_successor_item : // Last thread temp_storage.first_items[linear_tid + 1]; tail_flags[ITEMS_PER_THREAD - 1] = ApplyOp::FlagT( flag_op, input[ITEMS_PER_THREAD - 1], successor_item, (linear_tid * ITEMS_PER_THREAD) + ITEMS_PER_THREAD); // Set head_flags for remaining items Iterate<1, ITEMS_PER_THREAD>::FlagHeads(linear_tid, head_flags, input, preds, flag_op); // Set tail_flags for remaining items Iterate<0, ITEMS_PER_THREAD - 1>::FlagTails(linear_tid, tail_flags, input, flag_op); } //@} end member group }; } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/gpu_utils/cub/block/block_exchange.cuh000066400000000000000000001461211411340063500231120ustar00rootroot00000000000000/****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * The cub::BlockExchange class provides [collective](index.html#sec0) methods for rearranging data partitioned across a CUDA thread block. */ #pragma once #include "../util_ptx.cuh" #include "../util_arch.cuh" #include "../util_macro.cuh" #include "../util_type.cuh" #include "../util_namespace.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /** * \brief The BlockExchange class provides [collective](index.html#sec0) methods for rearranging data partitioned across a CUDA thread block. ![](transpose_logo.png) * \ingroup BlockModule * * \tparam T The data type to be exchanged. * \tparam BLOCK_DIM_X The thread block length in threads along the X dimension * \tparam ITEMS_PER_THREAD The number of items partitioned onto each thread. * \tparam WARP_TIME_SLICING [optional] When \p true, only use enough shared memory for a single warp's worth of tile data, time-slicing the block-wide exchange over multiple synchronized rounds. Yields a smaller memory footprint at the expense of decreased parallelism. (Default: false) * \tparam BLOCK_DIM_Y [optional] The thread block length in threads along the Y dimension (default: 1) * \tparam BLOCK_DIM_Z [optional] The thread block length in threads along the Z dimension (default: 1) * \tparam PTX_ARCH [optional] \ptxversion * * \par Overview * - It is commonplace for blocks of threads to rearrange data items between * threads. For example, the device-accessible memory subsystem prefers access patterns * where data items are "striped" across threads (where consecutive threads access consecutive items), * yet most block-wide operations prefer a "blocked" partitioning of items across threads * (where consecutive items belong to a single thread). * - BlockExchange supports the following types of data exchanges: * - Transposing between [blocked](index.html#sec5sec3) and [striped](index.html#sec5sec3) arrangements * - Transposing between [blocked](index.html#sec5sec3) and [warp-striped](index.html#sec5sec3) arrangements * - Scattering ranked items to a [blocked arrangement](index.html#sec5sec3) * - Scattering ranked items to a [striped arrangement](index.html#sec5sec3) * - \rowmajor * * \par A Simple Example * \blockcollective{BlockExchange} * \par * The code snippet below illustrates the conversion from a "blocked" to a "striped" arrangement * of 512 integer items partitioned across 128 threads where each thread owns 4 items. * \par * \code * #include // or equivalently * * __global__ void ExampleKernel(int *d_data, ...) * { * // Specialize BlockExchange for a 1D block of 128 threads owning 4 integer items each * typedef cub::BlockExchange BlockExchange; * * // Allocate shared memory for BlockExchange * __shared__ typename BlockExchange::TempStorage temp_storage; * * // Load a tile of data striped across threads * int thread_data[4]; * cub::LoadDirectStriped<128>(threadIdx.x, d_data, thread_data); * * // Collectively exchange data into a blocked arrangement across threads * BlockExchange(temp_storage).StripedToBlocked(thread_data); * * \endcode * \par * Suppose the set of striped input \p thread_data across the block of threads is * { [0,128,256,384], [1,129,257,385], ..., [127,255,383,511] }. * The corresponding output \p thread_data in those threads will be * { [0,1,2,3], [4,5,6,7], [8,9,10,11], ..., [508,509,510,511] }. * * \par Performance Considerations * - Proper device-specific padding ensures zero bank conflicts for most types. * */ template < typename InputT, int BLOCK_DIM_X, int ITEMS_PER_THREAD, bool WARP_TIME_SLICING = false, int BLOCK_DIM_Y = 1, int BLOCK_DIM_Z = 1, int PTX_ARCH = CUB_PTX_ARCH> class BlockExchange { private: /****************************************************************************** * Constants ******************************************************************************/ /// Constants enum { /// The thread block size in threads BLOCK_THREADS = BLOCK_DIM_X * BLOCK_DIM_Y * BLOCK_DIM_Z, LOG_WARP_THREADS = CUB_LOG_WARP_THREADS(PTX_ARCH), WARP_THREADS = 1 << LOG_WARP_THREADS, WARPS = (BLOCK_THREADS + WARP_THREADS - 1) / WARP_THREADS, LOG_SMEM_BANKS = CUB_LOG_SMEM_BANKS(PTX_ARCH), SMEM_BANKS = 1 << LOG_SMEM_BANKS, TILE_ITEMS = BLOCK_THREADS * ITEMS_PER_THREAD, TIME_SLICES = (WARP_TIME_SLICING) ? WARPS : 1, TIME_SLICED_THREADS = (WARP_TIME_SLICING) ? CUB_MIN(BLOCK_THREADS, WARP_THREADS) : BLOCK_THREADS, TIME_SLICED_ITEMS = TIME_SLICED_THREADS * ITEMS_PER_THREAD, WARP_TIME_SLICED_THREADS = CUB_MIN(BLOCK_THREADS, WARP_THREADS), WARP_TIME_SLICED_ITEMS = WARP_TIME_SLICED_THREADS * ITEMS_PER_THREAD, // Insert padding to avoid bank conflicts during raking when items per thread is a power of two and > 4 (otherwise we can typically use 128b loads) INSERT_PADDING = (ITEMS_PER_THREAD > 4) && (PowerOfTwo::VALUE), PADDING_ITEMS = (INSERT_PADDING) ? (TIME_SLICED_ITEMS >> LOG_SMEM_BANKS) : 0, }; /****************************************************************************** * Type definitions ******************************************************************************/ /// Shared memory storage layout type struct __align__(16) _TempStorage { InputT buff[TIME_SLICED_ITEMS + PADDING_ITEMS]; }; public: /// \smemstorage{BlockExchange} struct TempStorage : Uninitialized<_TempStorage> {}; private: /****************************************************************************** * Thread fields ******************************************************************************/ /// Shared storage reference _TempStorage &temp_storage; /// Linear thread-id unsigned int linear_tid; unsigned int lane_id; unsigned int warp_id; unsigned int warp_offset; /****************************************************************************** * Utility methods ******************************************************************************/ /// Internal storage allocator __device__ __forceinline__ _TempStorage& PrivateStorage() { __shared__ _TempStorage private_storage; return private_storage; } /** * Transposes data items from blocked arrangement to striped arrangement. Specialized for no timeslicing. */ template __device__ __forceinline__ void BlockedToStriped( InputT input_items[ITEMS_PER_THREAD], ///< [in] Items to exchange, converting between blocked and striped arrangements. OutputT output_items[ITEMS_PER_THREAD], ///< [out] Items to exchange, converting between blocked and striped arrangements. Int2Type /*time_slicing*/) { #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { int item_offset = (linear_tid * ITEMS_PER_THREAD) + ITEM; if (INSERT_PADDING) item_offset += item_offset >> LOG_SMEM_BANKS; temp_storage.buff[item_offset] = input_items[ITEM]; } CTA_SYNC(); #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { int item_offset = int(ITEM * BLOCK_THREADS) + linear_tid; if (INSERT_PADDING) item_offset += item_offset >> LOG_SMEM_BANKS; output_items[ITEM] = temp_storage.buff[item_offset]; } } /** * Transposes data items from blocked arrangement to striped arrangement. Specialized for warp-timeslicing. */ template __device__ __forceinline__ void BlockedToStriped( InputT input_items[ITEMS_PER_THREAD], ///< [in] Items to exchange, converting between blocked and striped arrangements. OutputT output_items[ITEMS_PER_THREAD], ///< [out] Items to exchange, converting between blocked and striped arrangements. Int2Type /*time_slicing*/) { InputT temp_items[ITEMS_PER_THREAD]; #pragma unroll for (int SLICE = 0; SLICE < TIME_SLICES; SLICE++) { const int SLICE_OFFSET = SLICE * TIME_SLICED_ITEMS; const int SLICE_OOB = SLICE_OFFSET + TIME_SLICED_ITEMS; CTA_SYNC(); if (warp_id == SLICE) { #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { int item_offset = (lane_id * ITEMS_PER_THREAD) + ITEM; if (INSERT_PADDING) item_offset += item_offset >> LOG_SMEM_BANKS; temp_storage.buff[item_offset] = input_items[ITEM]; } } CTA_SYNC(); #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { // Read a strip of items const int STRIP_OFFSET = ITEM * BLOCK_THREADS; const int STRIP_OOB = STRIP_OFFSET + BLOCK_THREADS; if ((SLICE_OFFSET < STRIP_OOB) && (SLICE_OOB > STRIP_OFFSET)) { int item_offset = STRIP_OFFSET + linear_tid - SLICE_OFFSET; if ((item_offset >= 0) && (item_offset < TIME_SLICED_ITEMS)) { if (INSERT_PADDING) item_offset += item_offset >> LOG_SMEM_BANKS; temp_items[ITEM] = temp_storage.buff[item_offset]; } } } } // Copy #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { output_items[ITEM] = temp_items[ITEM]; } } /** * Transposes data items from blocked arrangement to warp-striped arrangement. Specialized for no timeslicing */ template __device__ __forceinline__ void BlockedToWarpStriped( InputT input_items[ITEMS_PER_THREAD], ///< [in] Items to exchange, converting between blocked and striped arrangements. OutputT output_items[ITEMS_PER_THREAD], ///< [out] Items to exchange, converting between blocked and striped arrangements. Int2Type /*time_slicing*/) { #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { int item_offset = warp_offset + ITEM + (lane_id * ITEMS_PER_THREAD); if (INSERT_PADDING) item_offset += item_offset >> LOG_SMEM_BANKS; temp_storage.buff[item_offset] = input_items[ITEM]; } WARP_SYNC(0xffffffff); #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { int item_offset = warp_offset + (ITEM * WARP_TIME_SLICED_THREADS) + lane_id; if (INSERT_PADDING) item_offset += item_offset >> LOG_SMEM_BANKS; output_items[ITEM] = temp_storage.buff[item_offset]; } } /** * Transposes data items from blocked arrangement to warp-striped arrangement. Specialized for warp-timeslicing */ template __device__ __forceinline__ void BlockedToWarpStriped( InputT input_items[ITEMS_PER_THREAD], ///< [in] Items to exchange, converting between blocked and striped arrangements. OutputT output_items[ITEMS_PER_THREAD], ///< [out] Items to exchange, converting between blocked and striped arrangements. Int2Type /*time_slicing*/) { if (warp_id == 0) { #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { int item_offset = ITEM + (lane_id * ITEMS_PER_THREAD); if (INSERT_PADDING) item_offset += item_offset >> LOG_SMEM_BANKS; temp_storage.buff[item_offset] = input_items[ITEM]; } WARP_SYNC(0xffffffff); #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { int item_offset = (ITEM * WARP_TIME_SLICED_THREADS) + lane_id; if (INSERT_PADDING) item_offset += item_offset >> LOG_SMEM_BANKS; output_items[ITEM] = temp_storage.buff[item_offset]; } } #pragma unroll for (unsigned int SLICE = 1; SLICE < TIME_SLICES; ++SLICE) { CTA_SYNC(); if (warp_id == SLICE) { #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { int item_offset = ITEM + (lane_id * ITEMS_PER_THREAD); if (INSERT_PADDING) item_offset += item_offset >> LOG_SMEM_BANKS; temp_storage.buff[item_offset] = input_items[ITEM]; } WARP_SYNC(0xffffffff); #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { int item_offset = (ITEM * WARP_TIME_SLICED_THREADS) + lane_id; if (INSERT_PADDING) item_offset += item_offset >> LOG_SMEM_BANKS; output_items[ITEM] = temp_storage.buff[item_offset]; } } } } /** * Transposes data items from striped arrangement to blocked arrangement. Specialized for no timeslicing. */ template __device__ __forceinline__ void StripedToBlocked( InputT input_items[ITEMS_PER_THREAD], ///< [in] Items to exchange, converting between blocked and striped arrangements. OutputT output_items[ITEMS_PER_THREAD], ///< [out] Items to exchange, converting between blocked and striped arrangements. Int2Type /*time_slicing*/) { #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { int item_offset = int(ITEM * BLOCK_THREADS) + linear_tid; if (INSERT_PADDING) item_offset += item_offset >> LOG_SMEM_BANKS; temp_storage.buff[item_offset] = input_items[ITEM]; } CTA_SYNC(); // No timeslicing #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { int item_offset = (linear_tid * ITEMS_PER_THREAD) + ITEM; if (INSERT_PADDING) item_offset += item_offset >> LOG_SMEM_BANKS; output_items[ITEM] = temp_storage.buff[item_offset]; } } /** * Transposes data items from striped arrangement to blocked arrangement. Specialized for warp-timeslicing. */ template __device__ __forceinline__ void StripedToBlocked( InputT input_items[ITEMS_PER_THREAD], ///< [in] Items to exchange, converting between blocked and striped arrangements. OutputT output_items[ITEMS_PER_THREAD], ///< [out] Items to exchange, converting between blocked and striped arrangements. Int2Type /*time_slicing*/) { // Warp time-slicing InputT temp_items[ITEMS_PER_THREAD]; #pragma unroll for (int SLICE = 0; SLICE < TIME_SLICES; SLICE++) { const int SLICE_OFFSET = SLICE * TIME_SLICED_ITEMS; const int SLICE_OOB = SLICE_OFFSET + TIME_SLICED_ITEMS; CTA_SYNC(); #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { // Write a strip of items const int STRIP_OFFSET = ITEM * BLOCK_THREADS; const int STRIP_OOB = STRIP_OFFSET + BLOCK_THREADS; if ((SLICE_OFFSET < STRIP_OOB) && (SLICE_OOB > STRIP_OFFSET)) { int item_offset = STRIP_OFFSET + linear_tid - SLICE_OFFSET; if ((item_offset >= 0) && (item_offset < TIME_SLICED_ITEMS)) { if (INSERT_PADDING) item_offset += item_offset >> LOG_SMEM_BANKS; temp_storage.buff[item_offset] = input_items[ITEM]; } } } CTA_SYNC(); if (warp_id == SLICE) { #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { int item_offset = (lane_id * ITEMS_PER_THREAD) + ITEM; if (INSERT_PADDING) item_offset += item_offset >> LOG_SMEM_BANKS; temp_items[ITEM] = temp_storage.buff[item_offset]; } } } // Copy #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { output_items[ITEM] = temp_items[ITEM]; } } /** * Transposes data items from warp-striped arrangement to blocked arrangement. Specialized for no timeslicing */ template __device__ __forceinline__ void WarpStripedToBlocked( InputT input_items[ITEMS_PER_THREAD], ///< [in] Items to exchange, converting between blocked and striped arrangements. OutputT output_items[ITEMS_PER_THREAD], ///< [out] Items to exchange, converting between blocked and striped arrangements. Int2Type /*time_slicing*/) { #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { int item_offset = warp_offset + (ITEM * WARP_TIME_SLICED_THREADS) + lane_id; if (INSERT_PADDING) item_offset += item_offset >> LOG_SMEM_BANKS; temp_storage.buff[item_offset] = input_items[ITEM]; } WARP_SYNC(0xffffffff); #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { int item_offset = warp_offset + ITEM + (lane_id * ITEMS_PER_THREAD); if (INSERT_PADDING) item_offset += item_offset >> LOG_SMEM_BANKS; output_items[ITEM] = temp_storage.buff[item_offset]; } } /** * Transposes data items from warp-striped arrangement to blocked arrangement. Specialized for warp-timeslicing */ template __device__ __forceinline__ void WarpStripedToBlocked( InputT input_items[ITEMS_PER_THREAD], ///< [in] Items to exchange, converting between blocked and striped arrangements. OutputT output_items[ITEMS_PER_THREAD], ///< [out] Items to exchange, converting between blocked and striped arrangements. Int2Type /*time_slicing*/) { #pragma unroll for (unsigned int SLICE = 0; SLICE < TIME_SLICES; ++SLICE) { CTA_SYNC(); if (warp_id == SLICE) { #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { int item_offset = (ITEM * WARP_TIME_SLICED_THREADS) + lane_id; if (INSERT_PADDING) item_offset += item_offset >> LOG_SMEM_BANKS; temp_storage.buff[item_offset] = input_items[ITEM]; } WARP_SYNC(0xffffffff); #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { int item_offset = ITEM + (lane_id * ITEMS_PER_THREAD); if (INSERT_PADDING) item_offset += item_offset >> LOG_SMEM_BANKS; output_items[ITEM] = temp_storage.buff[item_offset]; } } } } /** * Exchanges data items annotated by rank into blocked arrangement. Specialized for no timeslicing. */ template __device__ __forceinline__ void ScatterToBlocked( InputT input_items[ITEMS_PER_THREAD], ///< [in] Items to exchange, converting between blocked and striped arrangements. OutputT output_items[ITEMS_PER_THREAD], ///< [out] Items to exchange, converting between blocked and striped arrangements. OffsetT ranks[ITEMS_PER_THREAD], ///< [in] Corresponding scatter ranks Int2Type /*time_slicing*/) { #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { int item_offset = ranks[ITEM]; if (INSERT_PADDING) item_offset = SHR_ADD(item_offset, LOG_SMEM_BANKS, item_offset); temp_storage.buff[item_offset] = input_items[ITEM]; } CTA_SYNC(); #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { int item_offset = (linear_tid * ITEMS_PER_THREAD) + ITEM; if (INSERT_PADDING) item_offset = SHR_ADD(item_offset, LOG_SMEM_BANKS, item_offset); output_items[ITEM] = temp_storage.buff[item_offset]; } } /** * Exchanges data items annotated by rank into blocked arrangement. Specialized for warp-timeslicing. */ template __device__ __forceinline__ void ScatterToBlocked( InputT input_items[ITEMS_PER_THREAD], ///< [in] Items to exchange, converting between blocked and striped arrangements. OutputT output_items[ITEMS_PER_THREAD], ///< [out] Items to exchange, converting between blocked and striped arrangements. OffsetT ranks[ITEMS_PER_THREAD], ///< [in] Corresponding scatter ranks Int2Type /*time_slicing*/) { InputT temp_items[ITEMS_PER_THREAD]; #pragma unroll for (int SLICE = 0; SLICE < TIME_SLICES; SLICE++) { CTA_SYNC(); const int SLICE_OFFSET = TIME_SLICED_ITEMS * SLICE; #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { int item_offset = ranks[ITEM] - SLICE_OFFSET; if ((item_offset >= 0) && (item_offset < WARP_TIME_SLICED_ITEMS)) { if (INSERT_PADDING) item_offset = SHR_ADD(item_offset, LOG_SMEM_BANKS, item_offset); temp_storage.buff[item_offset] = input_items[ITEM]; } } CTA_SYNC(); if (warp_id == SLICE) { #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { int item_offset = (lane_id * ITEMS_PER_THREAD) + ITEM; if (INSERT_PADDING) item_offset = SHR_ADD(item_offset, LOG_SMEM_BANKS, item_offset); temp_items[ITEM] = temp_storage.buff[item_offset]; } } } // Copy #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { output_items[ITEM] = temp_items[ITEM]; } } /** * Exchanges data items annotated by rank into striped arrangement. Specialized for no timeslicing. */ template __device__ __forceinline__ void ScatterToStriped( InputT input_items[ITEMS_PER_THREAD], ///< [in] Items to exchange, converting between blocked and striped arrangements. OutputT output_items[ITEMS_PER_THREAD], ///< [out] Items to exchange, converting between blocked and striped arrangements. OffsetT ranks[ITEMS_PER_THREAD], ///< [in] Corresponding scatter ranks Int2Type /*time_slicing*/) { #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { int item_offset = ranks[ITEM]; if (INSERT_PADDING) item_offset = SHR_ADD(item_offset, LOG_SMEM_BANKS, item_offset); temp_storage.buff[item_offset] = input_items[ITEM]; } CTA_SYNC(); #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { int item_offset = int(ITEM * BLOCK_THREADS) + linear_tid; if (INSERT_PADDING) item_offset = SHR_ADD(item_offset, LOG_SMEM_BANKS, item_offset); output_items[ITEM] = temp_storage.buff[item_offset]; } } /** * Exchanges data items annotated by rank into striped arrangement. Specialized for warp-timeslicing. */ template __device__ __forceinline__ void ScatterToStriped( InputT input_items[ITEMS_PER_THREAD], ///< [in] Items to exchange, converting between blocked and striped arrangements. OutputT output_items[ITEMS_PER_THREAD], ///< [out] Items to exchange, converting between blocked and striped arrangements. OffsetT ranks[ITEMS_PER_THREAD], ///< [in] Corresponding scatter ranks Int2Type /*time_slicing*/) { InputT temp_items[ITEMS_PER_THREAD]; #pragma unroll for (int SLICE = 0; SLICE < TIME_SLICES; SLICE++) { const int SLICE_OFFSET = SLICE * TIME_SLICED_ITEMS; const int SLICE_OOB = SLICE_OFFSET + TIME_SLICED_ITEMS; CTA_SYNC(); #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { int item_offset = ranks[ITEM] - SLICE_OFFSET; if ((item_offset >= 0) && (item_offset < WARP_TIME_SLICED_ITEMS)) { if (INSERT_PADDING) item_offset = SHR_ADD(item_offset, LOG_SMEM_BANKS, item_offset); temp_storage.buff[item_offset] = input_items[ITEM]; } } CTA_SYNC(); #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { // Read a strip of items const int STRIP_OFFSET = ITEM * BLOCK_THREADS; const int STRIP_OOB = STRIP_OFFSET + BLOCK_THREADS; if ((SLICE_OFFSET < STRIP_OOB) && (SLICE_OOB > STRIP_OFFSET)) { int item_offset = STRIP_OFFSET + linear_tid - SLICE_OFFSET; if ((item_offset >= 0) && (item_offset < TIME_SLICED_ITEMS)) { if (INSERT_PADDING) item_offset += item_offset >> LOG_SMEM_BANKS; temp_items[ITEM] = temp_storage.buff[item_offset]; } } } } // Copy #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { output_items[ITEM] = temp_items[ITEM]; } } public: /******************************************************************//** * \name Collective constructors *********************************************************************/ //@{ /** * \brief Collective constructor using a private static allocation of shared memory as temporary storage. */ __device__ __forceinline__ BlockExchange() : temp_storage(PrivateStorage()), linear_tid(RowMajorTid(BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z)), warp_id((WARPS == 1) ? 0 : linear_tid / WARP_THREADS), lane_id(LaneId()), warp_offset(warp_id * WARP_TIME_SLICED_ITEMS) {} /** * \brief Collective constructor using the specified memory allocation as temporary storage. */ __device__ __forceinline__ BlockExchange( TempStorage &temp_storage) ///< [in] Reference to memory allocation having layout type TempStorage : temp_storage(temp_storage.Alias()), linear_tid(RowMajorTid(BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z)), lane_id(LaneId()), warp_id((WARPS == 1) ? 0 : linear_tid / WARP_THREADS), warp_offset(warp_id * WARP_TIME_SLICED_ITEMS) {} //@} end member group /******************************************************************//** * \name Structured exchanges *********************************************************************/ //@{ /** * \brief Transposes data items from striped arrangement to blocked arrangement. * * \par * - \smemreuse * * \par Snippet * The code snippet below illustrates the conversion from a "striped" to a "blocked" arrangement * of 512 integer items partitioned across 128 threads where each thread owns 4 items. * \par * \code * #include // or equivalently * * __global__ void ExampleKernel(int *d_data, ...) * { * // Specialize BlockExchange for a 1D block of 128 threads owning 4 integer items each * typedef cub::BlockExchange BlockExchange; * * // Allocate shared memory for BlockExchange * __shared__ typename BlockExchange::TempStorage temp_storage; * * // Load a tile of ordered data into a striped arrangement across block threads * int thread_data[4]; * cub::LoadDirectStriped<128>(threadIdx.x, d_data, thread_data); * * // Collectively exchange data into a blocked arrangement across threads * BlockExchange(temp_storage).StripedToBlocked(thread_data, thread_data); * * \endcode * \par * Suppose the set of striped input \p thread_data across the block of threads is * { [0,128,256,384], [1,129,257,385], ..., [127,255,383,511] } after loading from device-accessible memory. * The corresponding output \p thread_data in those threads will be * { [0,1,2,3], [4,5,6,7], [8,9,10,11], ..., [508,509,510,511] }. * */ template __device__ __forceinline__ void StripedToBlocked( InputT input_items[ITEMS_PER_THREAD], ///< [in] Items to exchange, converting between striped and blocked arrangements. OutputT output_items[ITEMS_PER_THREAD]) ///< [out] Items from exchange, converting between striped and blocked arrangements. { StripedToBlocked(input_items, output_items, Int2Type()); } /** * \brief Transposes data items from blocked arrangement to striped arrangement. * * \par * - \smemreuse * * \par Snippet * The code snippet below illustrates the conversion from a "blocked" to a "striped" arrangement * of 512 integer items partitioned across 128 threads where each thread owns 4 items. * \par * \code * #include // or equivalently * * __global__ void ExampleKernel(int *d_data, ...) * { * // Specialize BlockExchange for a 1D block of 128 threads owning 4 integer items each * typedef cub::BlockExchange BlockExchange; * * // Allocate shared memory for BlockExchange * __shared__ typename BlockExchange::TempStorage temp_storage; * * // Obtain a segment of consecutive items that are blocked across threads * int thread_data[4]; * ... * * // Collectively exchange data into a striped arrangement across threads * BlockExchange(temp_storage).BlockedToStriped(thread_data, thread_data); * * // Store data striped across block threads into an ordered tile * cub::StoreDirectStriped(threadIdx.x, d_data, thread_data); * * \endcode * \par * Suppose the set of blocked input \p thread_data across the block of threads is * { [0,1,2,3], [4,5,6,7], [8,9,10,11], ..., [508,509,510,511] }. * The corresponding output \p thread_data in those threads will be * { [0,128,256,384], [1,129,257,385], ..., [127,255,383,511] } in * preparation for storing to device-accessible memory. * */ template __device__ __forceinline__ void BlockedToStriped( InputT input_items[ITEMS_PER_THREAD], ///< [in] Items to exchange, converting between striped and blocked arrangements. OutputT output_items[ITEMS_PER_THREAD]) ///< [out] Items from exchange, converting between striped and blocked arrangements. { BlockedToStriped(input_items, output_items, Int2Type()); } /** * \brief Transposes data items from warp-striped arrangement to blocked arrangement. * * \par * - \smemreuse * * \par Snippet * The code snippet below illustrates the conversion from a "warp-striped" to a "blocked" arrangement * of 512 integer items partitioned across 128 threads where each thread owns 4 items. * \par * \code * #include // or equivalently * * __global__ void ExampleKernel(int *d_data, ...) * { * // Specialize BlockExchange for a 1D block of 128 threads owning 4 integer items each * typedef cub::BlockExchange BlockExchange; * * // Allocate shared memory for BlockExchange * __shared__ typename BlockExchange::TempStorage temp_storage; * * // Load a tile of ordered data into a warp-striped arrangement across warp threads * int thread_data[4]; * cub::LoadSWarptriped(threadIdx.x, d_data, thread_data); * * // Collectively exchange data into a blocked arrangement across threads * BlockExchange(temp_storage).WarpStripedToBlocked(thread_data); * * \endcode * \par * Suppose the set of warp-striped input \p thread_data across the block of threads is * { [0,32,64,96], [1,33,65,97], [2,34,66,98], ..., [415,447,479,511] } * after loading from device-accessible memory. (The first 128 items are striped across * the first warp of 32 threads, the second 128 items are striped across the second warp, etc.) * The corresponding output \p thread_data in those threads will be * { [0,1,2,3], [4,5,6,7], [8,9,10,11], ..., [508,509,510,511] }. * */ template __device__ __forceinline__ void WarpStripedToBlocked( InputT input_items[ITEMS_PER_THREAD], ///< [in] Items to exchange, converting between striped and blocked arrangements. OutputT output_items[ITEMS_PER_THREAD]) ///< [out] Items from exchange, converting between striped and blocked arrangements. { WarpStripedToBlocked(input_items, output_items, Int2Type()); } /** * \brief Transposes data items from blocked arrangement to warp-striped arrangement. * * \par * - \smemreuse * * \par Snippet * The code snippet below illustrates the conversion from a "blocked" to a "warp-striped" arrangement * of 512 integer items partitioned across 128 threads where each thread owns 4 items. * \par * \code * #include // or equivalently * * __global__ void ExampleKernel(int *d_data, ...) * { * // Specialize BlockExchange for a 1D block of 128 threads owning 4 integer items each * typedef cub::BlockExchange BlockExchange; * * // Allocate shared memory for BlockExchange * __shared__ typename BlockExchange::TempStorage temp_storage; * * // Obtain a segment of consecutive items that are blocked across threads * int thread_data[4]; * ... * * // Collectively exchange data into a warp-striped arrangement across threads * BlockExchange(temp_storage).BlockedToWarpStriped(thread_data, thread_data); * * // Store data striped across warp threads into an ordered tile * cub::StoreDirectStriped(threadIdx.x, d_data, thread_data); * * \endcode * \par * Suppose the set of blocked input \p thread_data across the block of threads is * { [0,1,2,3], [4,5,6,7], [8,9,10,11], ..., [508,509,510,511] }. * The corresponding output \p thread_data in those threads will be * { [0,32,64,96], [1,33,65,97], [2,34,66,98], ..., [415,447,479,511] } * in preparation for storing to device-accessible memory. (The first 128 items are striped across * the first warp of 32 threads, the second 128 items are striped across the second warp, etc.) * */ template __device__ __forceinline__ void BlockedToWarpStriped( InputT input_items[ITEMS_PER_THREAD], ///< [in] Items to exchange, converting between striped and blocked arrangements. OutputT output_items[ITEMS_PER_THREAD]) ///< [out] Items from exchange, converting between striped and blocked arrangements. { BlockedToWarpStriped(input_items, output_items, Int2Type()); } //@} end member group /******************************************************************//** * \name Scatter exchanges *********************************************************************/ //@{ /** * \brief Exchanges data items annotated by rank into blocked arrangement. * * \par * - \smemreuse * * \tparam OffsetT [inferred] Signed integer type for local offsets */ template __device__ __forceinline__ void ScatterToBlocked( InputT input_items[ITEMS_PER_THREAD], ///< [in] Items to exchange, converting between striped and blocked arrangements. OutputT output_items[ITEMS_PER_THREAD], ///< [out] Items from exchange, converting between striped and blocked arrangements. OffsetT ranks[ITEMS_PER_THREAD]) ///< [in] Corresponding scatter ranks { ScatterToBlocked(input_items, output_items, ranks, Int2Type()); } /** * \brief Exchanges data items annotated by rank into striped arrangement. * * \par * - \smemreuse * * \tparam OffsetT [inferred] Signed integer type for local offsets */ template __device__ __forceinline__ void ScatterToStriped( InputT input_items[ITEMS_PER_THREAD], ///< [in] Items to exchange, converting between striped and blocked arrangements. OutputT output_items[ITEMS_PER_THREAD], ///< [out] Items from exchange, converting between striped and blocked arrangements. OffsetT ranks[ITEMS_PER_THREAD]) ///< [in] Corresponding scatter ranks { ScatterToStriped(input_items, output_items, ranks, Int2Type()); } /** * \brief Exchanges data items annotated by rank into striped arrangement. Items with rank -1 are not exchanged. * * \par * - \smemreuse * * \tparam OffsetT [inferred] Signed integer type for local offsets */ template __device__ __forceinline__ void ScatterToStripedGuarded( InputT input_items[ITEMS_PER_THREAD], ///< [in] Items to exchange, converting between striped and blocked arrangements. OutputT output_items[ITEMS_PER_THREAD], ///< [out] Items from exchange, converting between striped and blocked arrangements. OffsetT ranks[ITEMS_PER_THREAD]) ///< [in] Corresponding scatter ranks { #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { int item_offset = ranks[ITEM]; if (INSERT_PADDING) item_offset = SHR_ADD(item_offset, LOG_SMEM_BANKS, item_offset); if (ranks[ITEM] >= 0) temp_storage.buff[item_offset] = input_items[ITEM]; } CTA_SYNC(); #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { int item_offset = int(ITEM * BLOCK_THREADS) + linear_tid; if (INSERT_PADDING) item_offset = SHR_ADD(item_offset, LOG_SMEM_BANKS, item_offset); output_items[ITEM] = temp_storage.buff[item_offset]; } } /** * \brief Exchanges valid data items annotated by rank into striped arrangement. * * \par * - \smemreuse * * \tparam OffsetT [inferred] Signed integer type for local offsets * \tparam ValidFlag [inferred] FlagT type denoting which items are valid */ template __device__ __forceinline__ void ScatterToStripedFlagged( InputT input_items[ITEMS_PER_THREAD], ///< [in] Items to exchange, converting between striped and blocked arrangements. OutputT output_items[ITEMS_PER_THREAD], ///< [out] Items from exchange, converting between striped and blocked arrangements. OffsetT ranks[ITEMS_PER_THREAD], ///< [in] Corresponding scatter ranks ValidFlag is_valid[ITEMS_PER_THREAD]) ///< [in] Corresponding flag denoting item validity { #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { int item_offset = ranks[ITEM]; if (INSERT_PADDING) item_offset = SHR_ADD(item_offset, LOG_SMEM_BANKS, item_offset); if (is_valid[ITEM]) temp_storage.buff[item_offset] = input_items[ITEM]; } CTA_SYNC(); #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { int item_offset = int(ITEM * BLOCK_THREADS) + linear_tid; if (INSERT_PADDING) item_offset = SHR_ADD(item_offset, LOG_SMEM_BANKS, item_offset); output_items[ITEM] = temp_storage.buff[item_offset]; } } //@} end member group #ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document __device__ __forceinline__ void StripedToBlocked( InputT items[ITEMS_PER_THREAD]) ///< [in-out] Items to exchange, converting between striped and blocked arrangements. { StripedToBlocked(items, items); } __device__ __forceinline__ void BlockedToStriped( InputT items[ITEMS_PER_THREAD]) ///< [in-out] Items to exchange, converting between striped and blocked arrangements. { BlockedToStriped(items, items); } __device__ __forceinline__ void WarpStripedToBlocked( InputT items[ITEMS_PER_THREAD]) ///< [in-out] Items to exchange, converting between striped and blocked arrangements. { WarpStripedToBlocked(items, items); } __device__ __forceinline__ void BlockedToWarpStriped( InputT items[ITEMS_PER_THREAD]) ///< [in-out] Items to exchange, converting between striped and blocked arrangements. { BlockedToWarpStriped(items, items); } template __device__ __forceinline__ void ScatterToBlocked( InputT items[ITEMS_PER_THREAD], ///< [in-out] Items to exchange, converting between striped and blocked arrangements. OffsetT ranks[ITEMS_PER_THREAD]) ///< [in] Corresponding scatter ranks { ScatterToBlocked(items, items, ranks); } template __device__ __forceinline__ void ScatterToStriped( InputT items[ITEMS_PER_THREAD], ///< [in-out] Items to exchange, converting between striped and blocked arrangements. OffsetT ranks[ITEMS_PER_THREAD]) ///< [in] Corresponding scatter ranks { ScatterToStriped(items, items, ranks); } template __device__ __forceinline__ void ScatterToStripedGuarded( InputT items[ITEMS_PER_THREAD], ///< [in-out] Items to exchange, converting between striped and blocked arrangements. OffsetT ranks[ITEMS_PER_THREAD]) ///< [in] Corresponding scatter ranks { ScatterToStripedGuarded(items, items, ranks); } template __device__ __forceinline__ void ScatterToStripedFlagged( InputT items[ITEMS_PER_THREAD], ///< [in-out] Items to exchange, converting between striped and blocked arrangements. OffsetT ranks[ITEMS_PER_THREAD], ///< [in] Corresponding scatter ranks ValidFlag is_valid[ITEMS_PER_THREAD]) ///< [in] Corresponding flag denoting item validity { ScatterToStriped(items, items, ranks, is_valid); } #endif // DOXYGEN_SHOULD_SKIP_THIS }; #ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document template < typename T, int ITEMS_PER_THREAD, int LOGICAL_WARP_THREADS = CUB_PTX_WARP_THREADS, int PTX_ARCH = CUB_PTX_ARCH> class WarpExchange { private: /****************************************************************************** * Constants ******************************************************************************/ /// Constants enum { // Whether the logical warp size and the PTX warp size coincide IS_ARCH_WARP = (LOGICAL_WARP_THREADS == CUB_WARP_THREADS(PTX_ARCH)), WARP_ITEMS = (ITEMS_PER_THREAD * LOGICAL_WARP_THREADS) + 1, LOG_SMEM_BANKS = CUB_LOG_SMEM_BANKS(PTX_ARCH), SMEM_BANKS = 1 << LOG_SMEM_BANKS, // Insert padding if the number of items per thread is a power of two and > 4 (otherwise we can typically use 128b loads) INSERT_PADDING = (ITEMS_PER_THREAD > 4) && (PowerOfTwo::VALUE), PADDING_ITEMS = (INSERT_PADDING) ? (WARP_ITEMS >> LOG_SMEM_BANKS) : 0, }; /****************************************************************************** * Type definitions ******************************************************************************/ /// Shared memory storage layout type struct _TempStorage { T buff[WARP_ITEMS + PADDING_ITEMS]; }; public: /// \smemstorage{WarpExchange} struct TempStorage : Uninitialized<_TempStorage> {}; private: /****************************************************************************** * Thread fields ******************************************************************************/ _TempStorage &temp_storage; int lane_id; public: /****************************************************************************** * Construction ******************************************************************************/ /// Constructor __device__ __forceinline__ WarpExchange( TempStorage &temp_storage) : temp_storage(temp_storage.Alias()), lane_id(IS_ARCH_WARP ? LaneId() : LaneId() % LOGICAL_WARP_THREADS) {} /****************************************************************************** * Interface ******************************************************************************/ /** * \brief Exchanges valid data items annotated by rank into striped arrangement. * * \par * - \smemreuse * * \tparam OffsetT [inferred] Signed integer type for local offsets */ template __device__ __forceinline__ void ScatterToStriped( T items[ITEMS_PER_THREAD], ///< [in-out] Items to exchange OffsetT ranks[ITEMS_PER_THREAD]) ///< [in] Corresponding scatter ranks { #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { if (INSERT_PADDING) ranks[ITEM] = SHR_ADD(ranks[ITEM], LOG_SMEM_BANKS, ranks[ITEM]); temp_storage.buff[ranks[ITEM]] = items[ITEM]; } WARP_SYNC(0xffffffff); #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { int item_offset = (ITEM * LOGICAL_WARP_THREADS) + lane_id; if (INSERT_PADDING) item_offset = SHR_ADD(item_offset, LOG_SMEM_BANKS, item_offset); items[ITEM] = temp_storage.buff[item_offset]; } } }; #endif // DOXYGEN_SHOULD_SKIP_THIS } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/gpu_utils/cub/block/block_histogram.cuh000066400000000000000000000376431411340063500233350ustar00rootroot00000000000000/****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * The cub::BlockHistogram class provides [collective](index.html#sec0) methods for constructing block-wide histograms from data samples partitioned across a CUDA thread block. */ #pragma once #include "specializations/block_histogram_sort.cuh" #include "specializations/block_histogram_atomic.cuh" #include "../util_ptx.cuh" #include "../util_arch.cuh" #include "../util_namespace.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /****************************************************************************** * Algorithmic variants ******************************************************************************/ /** * \brief BlockHistogramAlgorithm enumerates alternative algorithms for the parallel construction of block-wide histograms. */ enum BlockHistogramAlgorithm { /** * \par Overview * Sorting followed by differentiation. Execution is comprised of two phases: * -# Sort the data using efficient radix sort * -# Look for "runs" of same-valued keys by detecting discontinuities; the run-lengths are histogram bin counts. * * \par Performance Considerations * Delivers consistent throughput regardless of sample bin distribution. */ BLOCK_HISTO_SORT, /** * \par Overview * Use atomic addition to update byte counts directly * * \par Performance Considerations * Performance is strongly tied to the hardware implementation of atomic * addition, and may be significantly degraded for non uniformly-random * input distributions where many concurrent updates are likely to be * made to the same bin counter. */ BLOCK_HISTO_ATOMIC, }; /****************************************************************************** * Block histogram ******************************************************************************/ /** * \brief The BlockHistogram class provides [collective](index.html#sec0) methods for constructing block-wide histograms from data samples partitioned across a CUDA thread block. ![](histogram_logo.png) * \ingroup BlockModule * * \tparam T The sample type being histogrammed (must be castable to an integer bin identifier) * \tparam BLOCK_DIM_X The thread block length in threads along the X dimension * \tparam ITEMS_PER_THREAD The number of items per thread * \tparam BINS The number bins within the histogram * \tparam ALGORITHM [optional] cub::BlockHistogramAlgorithm enumerator specifying the underlying algorithm to use (default: cub::BLOCK_HISTO_SORT) * \tparam BLOCK_DIM_Y [optional] The thread block length in threads along the Y dimension (default: 1) * \tparam BLOCK_DIM_Z [optional] The thread block length in threads along the Z dimension (default: 1) * \tparam PTX_ARCH [optional] \ptxversion * * \par Overview * - A histogram * counts the number of observations that fall into each of the disjoint categories (known as bins). * - BlockHistogram can be optionally specialized to use different algorithms: * -# cub::BLOCK_HISTO_SORT. Sorting followed by differentiation. [More...](\ref cub::BlockHistogramAlgorithm) * -# cub::BLOCK_HISTO_ATOMIC. Use atomic addition to update byte counts directly. [More...](\ref cub::BlockHistogramAlgorithm) * * \par Performance Considerations * - \granularity * * \par A Simple Example * \blockcollective{BlockHistogram} * \par * The code snippet below illustrates a 256-bin histogram of 512 integer samples that * are partitioned across 128 threads where each thread owns 4 samples. * \par * \code * #include // or equivalently * * __global__ void ExampleKernel(...) * { * // Specialize a 256-bin BlockHistogram type for a 1D block of 128 threads having 4 character samples each * typedef cub::BlockHistogram BlockHistogram; * * // Allocate shared memory for BlockHistogram * __shared__ typename BlockHistogram::TempStorage temp_storage; * * // Allocate shared memory for block-wide histogram bin counts * __shared__ unsigned int smem_histogram[256]; * * // Obtain input samples per thread * unsigned char data[4]; * ... * * // Compute the block-wide histogram * BlockHistogram(temp_storage).Histogram(data, smem_histogram); * * \endcode * * \par Performance and Usage Considerations * - The histogram output can be constructed in shared or device-accessible memory * - See cub::BlockHistogramAlgorithm for performance details regarding algorithmic alternatives * */ template < typename T, int BLOCK_DIM_X, int ITEMS_PER_THREAD, int BINS, BlockHistogramAlgorithm ALGORITHM = BLOCK_HISTO_SORT, int BLOCK_DIM_Y = 1, int BLOCK_DIM_Z = 1, int PTX_ARCH = CUB_PTX_ARCH> class BlockHistogram { private: /****************************************************************************** * Constants and type definitions ******************************************************************************/ /// Constants enum { /// The thread block size in threads BLOCK_THREADS = BLOCK_DIM_X * BLOCK_DIM_Y * BLOCK_DIM_Z, }; /** * Ensure the template parameterization meets the requirements of the * targeted device architecture. BLOCK_HISTO_ATOMIC can only be used * on version SM120 or later. Otherwise BLOCK_HISTO_SORT is used * regardless. */ static const BlockHistogramAlgorithm SAFE_ALGORITHM = ((ALGORITHM == BLOCK_HISTO_ATOMIC) && (PTX_ARCH < 120)) ? BLOCK_HISTO_SORT : ALGORITHM; /// Internal specialization. typedef typename If<(SAFE_ALGORITHM == BLOCK_HISTO_SORT), BlockHistogramSort, BlockHistogramAtomic >::Type InternalBlockHistogram; /// Shared memory storage layout type for BlockHistogram typedef typename InternalBlockHistogram::TempStorage _TempStorage; /****************************************************************************** * Thread fields ******************************************************************************/ /// Shared storage reference _TempStorage &temp_storage; /// Linear thread-id unsigned int linear_tid; /****************************************************************************** * Utility methods ******************************************************************************/ /// Internal storage allocator __device__ __forceinline__ _TempStorage& PrivateStorage() { __shared__ _TempStorage private_storage; return private_storage; } public: /// \smemstorage{BlockHistogram} struct TempStorage : Uninitialized<_TempStorage> {}; /******************************************************************//** * \name Collective constructors *********************************************************************/ //@{ /** * \brief Collective constructor using a private static allocation of shared memory as temporary storage. */ __device__ __forceinline__ BlockHistogram() : temp_storage(PrivateStorage()), linear_tid(RowMajorTid(BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z)) {} /** * \brief Collective constructor using the specified memory allocation as temporary storage. */ __device__ __forceinline__ BlockHistogram( TempStorage &temp_storage) ///< [in] Reference to memory allocation having layout type TempStorage : temp_storage(temp_storage.Alias()), linear_tid(RowMajorTid(BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z)) {} //@} end member group /******************************************************************//** * \name Histogram operations *********************************************************************/ //@{ /** * \brief Initialize the shared histogram counters to zero. * * \par Snippet * The code snippet below illustrates a the initialization and update of a * histogram of 512 integer samples that are partitioned across 128 threads * where each thread owns 4 samples. * \par * \code * #include // or equivalently * * __global__ void ExampleKernel(...) * { * // Specialize a 256-bin BlockHistogram type for a 1D block of 128 threads having 4 character samples each * typedef cub::BlockHistogram BlockHistogram; * * // Allocate shared memory for BlockHistogram * __shared__ typename BlockHistogram::TempStorage temp_storage; * * // Allocate shared memory for block-wide histogram bin counts * __shared__ unsigned int smem_histogram[256]; * * // Obtain input samples per thread * unsigned char thread_samples[4]; * ... * * // Initialize the block-wide histogram * BlockHistogram(temp_storage).InitHistogram(smem_histogram); * * // Update the block-wide histogram * BlockHistogram(temp_storage).Composite(thread_samples, smem_histogram); * * \endcode * * \tparam CounterT [inferred] Histogram counter type */ template __device__ __forceinline__ void InitHistogram(CounterT histogram[BINS]) { // Initialize histogram bin counts to zeros int histo_offset = 0; #pragma unroll for(; histo_offset + BLOCK_THREADS <= BINS; histo_offset += BLOCK_THREADS) { histogram[histo_offset + linear_tid] = 0; } // Finish up with guarded initialization if necessary if ((BINS % BLOCK_THREADS != 0) && (histo_offset + linear_tid < BINS)) { histogram[histo_offset + linear_tid] = 0; } } /** * \brief Constructs a block-wide histogram in shared/device-accessible memory. Each thread contributes an array of input elements. * * \par * - \granularity * - \smemreuse * * \par Snippet * The code snippet below illustrates a 256-bin histogram of 512 integer samples that * are partitioned across 128 threads where each thread owns 4 samples. * \par * \code * #include // or equivalently * * __global__ void ExampleKernel(...) * { * // Specialize a 256-bin BlockHistogram type for a 1D block of 128 threads having 4 character samples each * typedef cub::BlockHistogram BlockHistogram; * * // Allocate shared memory for BlockHistogram * __shared__ typename BlockHistogram::TempStorage temp_storage; * * // Allocate shared memory for block-wide histogram bin counts * __shared__ unsigned int smem_histogram[256]; * * // Obtain input samples per thread * unsigned char thread_samples[4]; * ... * * // Compute the block-wide histogram * BlockHistogram(temp_storage).Histogram(thread_samples, smem_histogram); * * \endcode * * \tparam CounterT [inferred] Histogram counter type */ template < typename CounterT > __device__ __forceinline__ void Histogram( T (&items)[ITEMS_PER_THREAD], ///< [in] Calling thread's input values to histogram CounterT histogram[BINS]) ///< [out] Reference to shared/device-accessible memory histogram { // Initialize histogram bin counts to zeros InitHistogram(histogram); CTA_SYNC(); // Composite the histogram InternalBlockHistogram(temp_storage).Composite(items, histogram); } /** * \brief Updates an existing block-wide histogram in shared/device-accessible memory. Each thread composites an array of input elements. * * \par * - \granularity * - \smemreuse * * \par Snippet * The code snippet below illustrates a the initialization and update of a * histogram of 512 integer samples that are partitioned across 128 threads * where each thread owns 4 samples. * \par * \code * #include // or equivalently * * __global__ void ExampleKernel(...) * { * // Specialize a 256-bin BlockHistogram type for a 1D block of 128 threads having 4 character samples each * typedef cub::BlockHistogram BlockHistogram; * * // Allocate shared memory for BlockHistogram * __shared__ typename BlockHistogram::TempStorage temp_storage; * * // Allocate shared memory for block-wide histogram bin counts * __shared__ unsigned int smem_histogram[256]; * * // Obtain input samples per thread * unsigned char thread_samples[4]; * ... * * // Initialize the block-wide histogram * BlockHistogram(temp_storage).InitHistogram(smem_histogram); * * // Update the block-wide histogram * BlockHistogram(temp_storage).Composite(thread_samples, smem_histogram); * * \endcode * * \tparam CounterT [inferred] Histogram counter type */ template < typename CounterT > __device__ __forceinline__ void Composite( T (&items)[ITEMS_PER_THREAD], ///< [in] Calling thread's input values to histogram CounterT histogram[BINS]) ///< [out] Reference to shared/device-accessible memory histogram { InternalBlockHistogram(temp_storage).Composite(items, histogram); } }; } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/gpu_utils/cub/block/block_load.cuh000066400000000000000000001540311411340063500222460ustar00rootroot00000000000000/****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * Operations for reading linear tiles of data into the CUDA thread block. */ #pragma once #include #include "block_exchange.cuh" #include "../iterator/cache_modified_input_iterator.cuh" #include "../util_ptx.cuh" #include "../util_macro.cuh" #include "../util_type.cuh" #include "../util_namespace.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /** * \addtogroup UtilIo * @{ */ /******************************************************************//** * \name Blocked arrangement I/O (direct) *********************************************************************/ //@{ /** * \brief Load a linear segment of items into a blocked arrangement across the thread block. * * \blocked * * \tparam T [inferred] The data type to load. * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. * \tparam InputIteratorT [inferred] The random-access iterator type for input \iterator. */ template < typename InputT, int ITEMS_PER_THREAD, typename InputIteratorT> __device__ __forceinline__ void LoadDirectBlocked( int linear_tid, ///< [in] A suitable 1D thread-identifier for the calling thread (e.g., (threadIdx.y * blockDim.x) + linear_tid for 2D thread blocks) InputIteratorT block_itr, ///< [in] The thread block's base input iterator for loading from InputT (&items)[ITEMS_PER_THREAD]) ///< [out] Data to load { InputIteratorT thread_itr = block_itr + (linear_tid * ITEMS_PER_THREAD); // Load directly in thread-blocked order #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { items[ITEM] = thread_itr[ITEM]; } } /** * \brief Load a linear segment of items into a blocked arrangement across the thread block, guarded by range. * * \blocked * * \tparam T [inferred] The data type to load. * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. * \tparam InputIteratorT [inferred] The random-access iterator type for input \iterator. */ template < typename InputT, int ITEMS_PER_THREAD, typename InputIteratorT> __device__ __forceinline__ void LoadDirectBlocked( int linear_tid, ///< [in] A suitable 1D thread-identifier for the calling thread (e.g., (threadIdx.y * blockDim.x) + linear_tid for 2D thread blocks) InputIteratorT block_itr, ///< [in] The thread block's base input iterator for loading from InputT (&items)[ITEMS_PER_THREAD], ///< [out] Data to load int valid_items) ///< [in] Number of valid items to load { InputIteratorT thread_itr = block_itr + (linear_tid * ITEMS_PER_THREAD); #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { if ((linear_tid * ITEMS_PER_THREAD) + ITEM < valid_items) { items[ITEM] = thread_itr[ITEM]; } } } /** * \brief Load a linear segment of items into a blocked arrangement across the thread block, guarded by range, with a fall-back assignment of out-of-bound elements.. * * \blocked * * \tparam T [inferred] The data type to load. * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. * \tparam InputIteratorT [inferred] The random-access iterator type for input \iterator. */ template < typename InputT, typename DefaultT, int ITEMS_PER_THREAD, typename InputIteratorT> __device__ __forceinline__ void LoadDirectBlocked( int linear_tid, ///< [in] A suitable 1D thread-identifier for the calling thread (e.g., (threadIdx.y * blockDim.x) + linear_tid for 2D thread blocks) InputIteratorT block_itr, ///< [in] The thread block's base input iterator for loading from InputT (&items)[ITEMS_PER_THREAD], ///< [out] Data to load int valid_items, ///< [in] Number of valid items to load DefaultT oob_default) ///< [in] Default value to assign out-of-bound items { #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) items[ITEM] = oob_default; LoadDirectBlocked(linear_tid, block_itr, items, valid_items); } #ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document /** * Internal implementation for load vectorization */ template < CacheLoadModifier MODIFIER, typename T, int ITEMS_PER_THREAD> __device__ __forceinline__ void InternalLoadDirectBlockedVectorized( int linear_tid, ///< [in] A suitable 1D thread-identifier for the calling thread (e.g., (threadIdx.y * blockDim.x) + linear_tid for 2D thread blocks) T *block_ptr, ///< [in] Input pointer for loading from T (&items)[ITEMS_PER_THREAD]) ///< [out] Data to load { // Biggest memory access word that T is a whole multiple of typedef typename UnitWord::DeviceWord DeviceWord; enum { TOTAL_WORDS = sizeof(items) / sizeof(DeviceWord), VECTOR_SIZE = (TOTAL_WORDS % 4 == 0) ? 4 : (TOTAL_WORDS % 2 == 0) ? 2 : 1, VECTORS_PER_THREAD = TOTAL_WORDS / VECTOR_SIZE, }; // Vector type typedef typename CubVector::Type Vector; // Vector items Vector vec_items[VECTORS_PER_THREAD]; // Aliased input ptr Vector* vec_ptr = reinterpret_cast(block_ptr) + (linear_tid * VECTORS_PER_THREAD); // Load directly in thread-blocked order #pragma unroll for (int ITEM = 0; ITEM < VECTORS_PER_THREAD; ITEM++) { vec_items[ITEM] = ThreadLoad(vec_ptr + ITEM); } // Copy #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { items[ITEM] = *(reinterpret_cast(vec_items) + ITEM); } } #endif // DOXYGEN_SHOULD_SKIP_THIS /** * \brief Load a linear segment of items into a blocked arrangement across the thread block. * * \blocked * * The input offset (\p block_ptr + \p block_offset) must be quad-item aligned * * The following conditions will prevent vectorization and loading will fall back to cub::BLOCK_LOAD_DIRECT: * - \p ITEMS_PER_THREAD is odd * - The data type \p T is not a built-in primitive or CUDA vector type (e.g., \p short, \p int2, \p double, \p float2, etc.) * * \tparam T [inferred] The data type to load. * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. */ template < typename T, int ITEMS_PER_THREAD> __device__ __forceinline__ void LoadDirectBlockedVectorized( int linear_tid, ///< [in] A suitable 1D thread-identifier for the calling thread (e.g., (threadIdx.y * blockDim.x) + linear_tid for 2D thread blocks) T *block_ptr, ///< [in] Input pointer for loading from T (&items)[ITEMS_PER_THREAD]) ///< [out] Data to load { InternalLoadDirectBlockedVectorized(linear_tid, block_ptr, items); } //@} end member group /******************************************************************//** * \name Striped arrangement I/O (direct) *********************************************************************/ //@{ /** * \brief Load a linear segment of items into a striped arrangement across the thread block. * * \striped * * \tparam BLOCK_THREADS The thread block size in threads * \tparam T [inferred] The data type to load. * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. * \tparam InputIteratorT [inferred] The random-access iterator type for input \iterator. */ template < int BLOCK_THREADS, typename InputT, int ITEMS_PER_THREAD, typename InputIteratorT> __device__ __forceinline__ void LoadDirectStriped( int linear_tid, ///< [in] A suitable 1D thread-identifier for the calling thread (e.g., (threadIdx.y * blockDim.x) + linear_tid for 2D thread blocks) InputIteratorT block_itr, ///< [in] The thread block's base input iterator for loading from InputT (&items)[ITEMS_PER_THREAD]) ///< [out] Data to load { InputIteratorT thread_itr = block_itr + linear_tid; #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { items[ITEM] = thread_itr[ITEM * BLOCK_THREADS]; } } /** * \brief Load a linear segment of items into a striped arrangement across the thread block, guarded by range * * \striped * * \tparam BLOCK_THREADS The thread block size in threads * \tparam T [inferred] The data type to load. * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. * \tparam InputIteratorT [inferred] The random-access iterator type for input \iterator. */ template < int BLOCK_THREADS, typename InputT, int ITEMS_PER_THREAD, typename InputIteratorT> __device__ __forceinline__ void LoadDirectStriped( int linear_tid, ///< [in] A suitable 1D thread-identifier for the calling thread (e.g., (threadIdx.y * blockDim.x) + linear_tid for 2D thread blocks) InputIteratorT block_itr, ///< [in] The thread block's base input iterator for loading from InputT (&items)[ITEMS_PER_THREAD], ///< [out] Data to load int valid_items) ///< [in] Number of valid items to load { InputIteratorT thread_itr = block_itr + linear_tid; #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { if (linear_tid + (ITEM * BLOCK_THREADS) < valid_items) { items[ITEM] = thread_itr[ITEM * BLOCK_THREADS]; } } } /** * \brief Load a linear segment of items into a striped arrangement across the thread block, guarded by range, with a fall-back assignment of out-of-bound elements. * * \striped * * \tparam BLOCK_THREADS The thread block size in threads * \tparam T [inferred] The data type to load. * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. * \tparam InputIteratorT [inferred] The random-access iterator type for input \iterator. */ template < int BLOCK_THREADS, typename InputT, typename DefaultT, int ITEMS_PER_THREAD, typename InputIteratorT> __device__ __forceinline__ void LoadDirectStriped( int linear_tid, ///< [in] A suitable 1D thread-identifier for the calling thread (e.g., (threadIdx.y * blockDim.x) + linear_tid for 2D thread blocks) InputIteratorT block_itr, ///< [in] The thread block's base input iterator for loading from InputT (&items)[ITEMS_PER_THREAD], ///< [out] Data to load int valid_items, ///< [in] Number of valid items to load DefaultT oob_default) ///< [in] Default value to assign out-of-bound items { #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) items[ITEM] = oob_default; LoadDirectStriped(linear_tid, block_itr, items, valid_items); } //@} end member group /******************************************************************//** * \name Warp-striped arrangement I/O (direct) *********************************************************************/ //@{ /** * \brief Load a linear segment of items into a warp-striped arrangement across the thread block. * * \warpstriped * * \par Usage Considerations * The number of threads in the thread block must be a multiple of the architecture's warp size. * * \tparam T [inferred] The data type to load. * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. * \tparam InputIteratorT [inferred] The random-access iterator type for input \iterator. */ template < typename InputT, int ITEMS_PER_THREAD, typename InputIteratorT> __device__ __forceinline__ void LoadDirectWarpStriped( int linear_tid, ///< [in] A suitable 1D thread-identifier for the calling thread (e.g., (threadIdx.y * blockDim.x) + linear_tid for 2D thread blocks) InputIteratorT block_itr, ///< [in] The thread block's base input iterator for loading from InputT (&items)[ITEMS_PER_THREAD]) ///< [out] Data to load { int tid = linear_tid & (CUB_PTX_WARP_THREADS - 1); int wid = linear_tid >> CUB_PTX_LOG_WARP_THREADS; int warp_offset = wid * CUB_PTX_WARP_THREADS * ITEMS_PER_THREAD; InputIteratorT thread_itr = block_itr + warp_offset + tid ; // Load directly in warp-striped order #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { items[ITEM] = thread_itr[(ITEM * CUB_PTX_WARP_THREADS)]; } } /** * \brief Load a linear segment of items into a warp-striped arrangement across the thread block, guarded by range * * \warpstriped * * \par Usage Considerations * The number of threads in the thread block must be a multiple of the architecture's warp size. * * \tparam T [inferred] The data type to load. * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. * \tparam InputIteratorT [inferred] The random-access iterator type for input \iterator. */ template < typename InputT, int ITEMS_PER_THREAD, typename InputIteratorT> __device__ __forceinline__ void LoadDirectWarpStriped( int linear_tid, ///< [in] A suitable 1D thread-identifier for the calling thread (e.g., (threadIdx.y * blockDim.x) + linear_tid for 2D thread blocks) InputIteratorT block_itr, ///< [in] The thread block's base input iterator for loading from InputT (&items)[ITEMS_PER_THREAD], ///< [out] Data to load int valid_items) ///< [in] Number of valid items to load { int tid = linear_tid & (CUB_PTX_WARP_THREADS - 1); int wid = linear_tid >> CUB_PTX_LOG_WARP_THREADS; int warp_offset = wid * CUB_PTX_WARP_THREADS * ITEMS_PER_THREAD; InputIteratorT thread_itr = block_itr + warp_offset + tid ; // Load directly in warp-striped order #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { if (warp_offset + tid + (ITEM * CUB_PTX_WARP_THREADS) < valid_items) { items[ITEM] = thread_itr[(ITEM * CUB_PTX_WARP_THREADS)]; } } } /** * \brief Load a linear segment of items into a warp-striped arrangement across the thread block, guarded by range, with a fall-back assignment of out-of-bound elements. * * \warpstriped * * \par Usage Considerations * The number of threads in the thread block must be a multiple of the architecture's warp size. * * \tparam T [inferred] The data type to load. * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. * \tparam InputIteratorT [inferred] The random-access iterator type for input \iterator. */ template < typename InputT, typename DefaultT, int ITEMS_PER_THREAD, typename InputIteratorT> __device__ __forceinline__ void LoadDirectWarpStriped( int linear_tid, ///< [in] A suitable 1D thread-identifier for the calling thread (e.g., (threadIdx.y * blockDim.x) + linear_tid for 2D thread blocks) InputIteratorT block_itr, ///< [in] The thread block's base input iterator for loading from InputT (&items)[ITEMS_PER_THREAD], ///< [out] Data to load int valid_items, ///< [in] Number of valid items to load DefaultT oob_default) ///< [in] Default value to assign out-of-bound items { // Load directly in warp-striped order #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) items[ITEM] = oob_default; LoadDirectWarpStriped(linear_tid, block_itr, items, valid_items); } //@} end member group /** @} */ // end group UtilIo //----------------------------------------------------------------------------- // Generic BlockLoad abstraction //----------------------------------------------------------------------------- /** * \brief cub::BlockLoadAlgorithm enumerates alternative algorithms for cub::BlockLoad to read a linear segment of data from memory into a blocked arrangement across a CUDA thread block. */ /** * \brief cub::BlockLoadAlgorithm enumerates alternative algorithms for cub::BlockLoad to read a linear segment of data from memory into a blocked arrangement across a CUDA thread block. */ enum BlockLoadAlgorithm { /** * \par Overview * * A [blocked arrangement](index.html#sec5sec3) of data is read * directly from memory. * * \par Performance Considerations * - The utilization of memory transactions (coalescing) decreases as the * access stride between threads increases (i.e., the number items per thread). */ BLOCK_LOAD_DIRECT, /** * \par Overview * * A [blocked arrangement](index.html#sec5sec3) of data is read * from memory using CUDA's built-in vectorized loads as a coalescing optimization. * For example, ld.global.v4.s32 instructions will be generated * when \p T = \p int and \p ITEMS_PER_THREAD % 4 == 0. * * \par Performance Considerations * - The utilization of memory transactions (coalescing) remains high until the the * access stride between threads (i.e., the number items per thread) exceeds the * maximum vector load width (typically 4 items or 64B, whichever is lower). * - The following conditions will prevent vectorization and loading will fall back to cub::BLOCK_LOAD_DIRECT: * - \p ITEMS_PER_THREAD is odd * - The \p InputIteratorTis not a simple pointer type * - The block input offset is not quadword-aligned * - The data type \p T is not a built-in primitive or CUDA vector type (e.g., \p short, \p int2, \p double, \p float2, etc.) */ BLOCK_LOAD_VECTORIZE, /** * \par Overview * * A [striped arrangement](index.html#sec5sec3) of data is read * efficiently from memory and then locally transposed into a * [blocked arrangement](index.html#sec5sec3). * * \par Performance Considerations * - The utilization of memory transactions (coalescing) remains high regardless * of items loaded per thread. * - The local reordering incurs slightly longer latencies and throughput than the * direct cub::BLOCK_LOAD_DIRECT and cub::BLOCK_LOAD_VECTORIZE alternatives. */ BLOCK_LOAD_TRANSPOSE, /** * \par Overview * * A [warp-striped arrangement](index.html#sec5sec3) of data is * read efficiently from memory and then locally transposed into a * [blocked arrangement](index.html#sec5sec3). * * \par Usage Considerations * - BLOCK_THREADS must be a multiple of WARP_THREADS * * \par Performance Considerations * - The utilization of memory transactions (coalescing) remains high regardless * of items loaded per thread. * - The local reordering incurs slightly larger latencies than the * direct cub::BLOCK_LOAD_DIRECT and cub::BLOCK_LOAD_VECTORIZE alternatives. * - Provisions more shared storage, but incurs smaller latencies than the * BLOCK_LOAD_WARP_TRANSPOSE_TIMESLICED alternative. */ BLOCK_LOAD_WARP_TRANSPOSE, /** * \par Overview * * Like \p BLOCK_LOAD_WARP_TRANSPOSE, a [warp-striped arrangement](index.html#sec5sec3) * of data is read directly from memory and then is locally transposed into a * [blocked arrangement](index.html#sec5sec3). To reduce the shared memory * requirement, only one warp's worth of shared memory is provisioned and is * subsequently time-sliced among warps. * * \par Usage Considerations * - BLOCK_THREADS must be a multiple of WARP_THREADS * * \par Performance Considerations * - The utilization of memory transactions (coalescing) remains high regardless * of items loaded per thread. * - Provisions less shared memory temporary storage, but incurs larger * latencies than the BLOCK_LOAD_WARP_TRANSPOSE alternative. */ BLOCK_LOAD_WARP_TRANSPOSE_TIMESLICED, }; /** * \brief The BlockLoad class provides [collective](index.html#sec0) data movement methods for loading a linear segment of items from memory into a [blocked arrangement](index.html#sec5sec3) across a CUDA thread block. ![](block_load_logo.png) * \ingroup BlockModule * \ingroup UtilIo * * \tparam InputT The data type to read into (which must be convertible from the input iterator's value type). * \tparam BLOCK_DIM_X The thread block length in threads along the X dimension * \tparam ITEMS_PER_THREAD The number of consecutive items partitioned onto each thread. * \tparam ALGORITHM [optional] cub::BlockLoadAlgorithm tuning policy. default: cub::BLOCK_LOAD_DIRECT. * \tparam WARP_TIME_SLICING [optional] Whether or not only one warp's worth of shared memory should be allocated and time-sliced among block-warps during any load-related data transpositions (versus each warp having its own storage). (default: false) * \tparam BLOCK_DIM_Y [optional] The thread block length in threads along the Y dimension (default: 1) * \tparam BLOCK_DIM_Z [optional] The thread block length in threads along the Z dimension (default: 1) * \tparam PTX_ARCH [optional] \ptxversion * * \par Overview * - The BlockLoad class provides a single data movement abstraction that can be specialized * to implement different cub::BlockLoadAlgorithm strategies. This facilitates different * performance policies for different architectures, data types, granularity sizes, etc. * - BlockLoad can be optionally specialized by different data movement strategies: * -# cub::BLOCK_LOAD_DIRECT. A [blocked arrangement](index.html#sec5sec3) * of data is read directly from memory. [More...](\ref cub::BlockLoadAlgorithm) * -# cub::BLOCK_LOAD_VECTORIZE. A [blocked arrangement](index.html#sec5sec3) * of data is read directly from memory using CUDA's built-in vectorized loads as a * coalescing optimization. [More...](\ref cub::BlockLoadAlgorithm) * -# cub::BLOCK_LOAD_TRANSPOSE. A [striped arrangement](index.html#sec5sec3) * of data is read directly from memory and is then locally transposed into a * [blocked arrangement](index.html#sec5sec3). [More...](\ref cub::BlockLoadAlgorithm) * -# cub::BLOCK_LOAD_WARP_TRANSPOSE. A [warp-striped arrangement](index.html#sec5sec3) * of data is read directly from memory and is then locally transposed into a * [blocked arrangement](index.html#sec5sec3). [More...](\ref cub::BlockLoadAlgorithm) * -# cub::BLOCK_LOAD_WARP_TRANSPOSE_TIMESLICED,. A [warp-striped arrangement](index.html#sec5sec3) * of data is read directly from memory and is then locally transposed into a * [blocked arrangement](index.html#sec5sec3) one warp at a time. [More...](\ref cub::BlockLoadAlgorithm) * - \rowmajor * * \par A Simple Example * \blockcollective{BlockLoad} * \par * The code snippet below illustrates the loading of a linear * segment of 512 integers into a "blocked" arrangement across 128 threads where each * thread owns 4 consecutive items. The load is specialized for \p BLOCK_LOAD_WARP_TRANSPOSE, * meaning memory references are efficiently coalesced using a warp-striped access * pattern (after which items are locally reordered among threads). * \par * \code * #include // or equivalently * * __global__ void ExampleKernel(int *d_data, ...) * { * // Specialize BlockLoad for a 1D block of 128 threads owning 4 integer items each * typedef cub::BlockLoad BlockLoad; * * // Allocate shared memory for BlockLoad * __shared__ typename BlockLoad::TempStorage temp_storage; * * // Load a segment of consecutive items that are blocked across threads * int thread_data[4]; * BlockLoad(temp_storage).Load(d_data, thread_data); * * \endcode * \par * Suppose the input \p d_data is 0, 1, 2, 3, 4, 5, .... * The set of \p thread_data across the block of threads in those threads will be * { [0,1,2,3], [4,5,6,7], ..., [508,509,510,511] }. * */ template < typename InputT, int BLOCK_DIM_X, int ITEMS_PER_THREAD, BlockLoadAlgorithm ALGORITHM = BLOCK_LOAD_DIRECT, int BLOCK_DIM_Y = 1, int BLOCK_DIM_Z = 1, int PTX_ARCH = CUB_PTX_ARCH> class BlockLoad { private: /****************************************************************************** * Constants and typed definitions ******************************************************************************/ /// Constants enum { /// The thread block size in threads BLOCK_THREADS = BLOCK_DIM_X * BLOCK_DIM_Y * BLOCK_DIM_Z, }; /****************************************************************************** * Algorithmic variants ******************************************************************************/ /// Load helper template struct LoadInternal; /** * BLOCK_LOAD_DIRECT specialization of load helper */ template struct LoadInternal { /// Shared memory storage layout type typedef NullType TempStorage; /// Linear thread-id int linear_tid; /// Constructor __device__ __forceinline__ LoadInternal( TempStorage &/*temp_storage*/, int linear_tid) : linear_tid(linear_tid) {} /// Load a linear segment of items from memory template __device__ __forceinline__ void Load( InputIteratorT block_itr, ///< [in] The thread block's base input iterator for loading from InputT (&items)[ITEMS_PER_THREAD]) ///< [out] Data to load { LoadDirectBlocked(linear_tid, block_itr, items); } /// Load a linear segment of items from memory, guarded by range template __device__ __forceinline__ void Load( InputIteratorT block_itr, ///< [in] The thread block's base input iterator for loading from InputT (&items)[ITEMS_PER_THREAD], ///< [out] Data to load int valid_items) ///< [in] Number of valid items to load { LoadDirectBlocked(linear_tid, block_itr, items, valid_items); } /// Load a linear segment of items from memory, guarded by range, with a fall-back assignment of out-of-bound elements template __device__ __forceinline__ void Load( InputIteratorT block_itr, ///< [in] The thread block's base input iterator for loading from InputT (&items)[ITEMS_PER_THREAD], ///< [out] Data to load int valid_items, ///< [in] Number of valid items to load DefaultT oob_default) ///< [in] Default value to assign out-of-bound items { LoadDirectBlocked(linear_tid, block_itr, items, valid_items, oob_default); } }; /** * BLOCK_LOAD_VECTORIZE specialization of load helper */ template struct LoadInternal { /// Shared memory storage layout type typedef NullType TempStorage; /// Linear thread-id int linear_tid; /// Constructor __device__ __forceinline__ LoadInternal( TempStorage &/*temp_storage*/, int linear_tid) : linear_tid(linear_tid) {} /// Load a linear segment of items from memory, specialized for native pointer types (attempts vectorization) template __device__ __forceinline__ void Load( InputT *block_ptr, ///< [in] The thread block's base input iterator for loading from InputT (&items)[ITEMS_PER_THREAD]) ///< [out] Data to load { InternalLoadDirectBlockedVectorized(linear_tid, block_ptr, items); } /// Load a linear segment of items from memory, specialized for native pointer types (attempts vectorization) template __device__ __forceinline__ void Load( const InputT *block_ptr, ///< [in] The thread block's base input iterator for loading from InputT (&items)[ITEMS_PER_THREAD]) ///< [out] Data to load { InternalLoadDirectBlockedVectorized(linear_tid, block_ptr, items); } /// Load a linear segment of items from memory, specialized for native pointer types (attempts vectorization) template < CacheLoadModifier MODIFIER, typename ValueType, typename OffsetT> __device__ __forceinline__ void Load( CacheModifiedInputIterator block_itr, ///< [in] The thread block's base input iterator for loading from InputT (&items)[ITEMS_PER_THREAD]) ///< [out] Data to load { InternalLoadDirectBlockedVectorized(linear_tid, block_itr.ptr, items); } /// Load a linear segment of items from memory, specialized for opaque input iterators (skips vectorization) template __device__ __forceinline__ void Load( _InputIteratorT block_itr, ///< [in] The thread block's base input iterator for loading from InputT (&items)[ITEMS_PER_THREAD]) ///< [out] Data to load { LoadDirectBlocked(linear_tid, block_itr, items); } /// Load a linear segment of items from memory, guarded by range (skips vectorization) template __device__ __forceinline__ void Load( InputIteratorT block_itr, ///< [in] The thread block's base input iterator for loading from InputT (&items)[ITEMS_PER_THREAD], ///< [out] Data to load int valid_items) ///< [in] Number of valid items to load { LoadDirectBlocked(linear_tid, block_itr, items, valid_items); } /// Load a linear segment of items from memory, guarded by range, with a fall-back assignment of out-of-bound elements (skips vectorization) template __device__ __forceinline__ void Load( InputIteratorT block_itr, ///< [in] The thread block's base input iterator for loading from InputT (&items)[ITEMS_PER_THREAD], ///< [out] Data to load int valid_items, ///< [in] Number of valid items to load DefaultT oob_default) ///< [in] Default value to assign out-of-bound items { LoadDirectBlocked(linear_tid, block_itr, items, valid_items, oob_default); } }; /** * BLOCK_LOAD_TRANSPOSE specialization of load helper */ template struct LoadInternal { // BlockExchange utility type for keys typedef BlockExchange BlockExchange; /// Shared memory storage layout type struct _TempStorage : BlockExchange::TempStorage { /// Temporary storage for partially-full block guard volatile int valid_items; }; /// Alias wrapper allowing storage to be unioned struct TempStorage : Uninitialized<_TempStorage> {}; /// Thread reference to shared storage _TempStorage &temp_storage; /// Linear thread-id int linear_tid; /// Constructor __device__ __forceinline__ LoadInternal( TempStorage &temp_storage, int linear_tid) : temp_storage(temp_storage.Alias()), linear_tid(linear_tid) {} /// Load a linear segment of items from memory template __device__ __forceinline__ void Load( InputIteratorT block_itr, ///< [in] The thread block's base input iterator for loading from InputT (&items)[ITEMS_PER_THREAD]) ///< [out] Data to load{ { LoadDirectStriped(linear_tid, block_itr, items); BlockExchange(temp_storage).StripedToBlocked(items, items); } /// Load a linear segment of items from memory, guarded by range template __device__ __forceinline__ void Load( InputIteratorT block_itr, ///< [in] The thread block's base input iterator for loading from InputT (&items)[ITEMS_PER_THREAD], ///< [out] Data to load int valid_items) ///< [in] Number of valid items to load { if (linear_tid == 0) temp_storage.valid_items = valid_items; // Move through volatile smem as a workaround to prevent RF spilling on subsequent loads CTA_SYNC(); LoadDirectStriped(linear_tid, block_itr, items, temp_storage.valid_items); BlockExchange(temp_storage).StripedToBlocked(items, items); } /// Load a linear segment of items from memory, guarded by range, with a fall-back assignment of out-of-bound elements template __device__ __forceinline__ void Load( InputIteratorT block_itr, ///< [in] The thread block's base input iterator for loading from InputT (&items)[ITEMS_PER_THREAD], ///< [out] Data to load int valid_items, ///< [in] Number of valid items to load DefaultT oob_default) ///< [in] Default value to assign out-of-bound items { if (linear_tid == 0) temp_storage.valid_items = valid_items; // Move through volatile smem as a workaround to prevent RF spilling on subsequent loads CTA_SYNC(); LoadDirectStriped(linear_tid, block_itr, items, temp_storage.valid_items, oob_default); BlockExchange(temp_storage).StripedToBlocked(items, items); } }; /** * BLOCK_LOAD_WARP_TRANSPOSE specialization of load helper */ template struct LoadInternal { enum { WARP_THREADS = CUB_WARP_THREADS(PTX_ARCH) }; // Assert BLOCK_THREADS must be a multiple of WARP_THREADS CUB_STATIC_ASSERT((BLOCK_THREADS % WARP_THREADS == 0), "BLOCK_THREADS must be a multiple of WARP_THREADS"); // BlockExchange utility type for keys typedef BlockExchange BlockExchange; /// Shared memory storage layout type struct _TempStorage : BlockExchange::TempStorage { /// Temporary storage for partially-full block guard volatile int valid_items; }; /// Alias wrapper allowing storage to be unioned struct TempStorage : Uninitialized<_TempStorage> {}; /// Thread reference to shared storage _TempStorage &temp_storage; /// Linear thread-id int linear_tid; /// Constructor __device__ __forceinline__ LoadInternal( TempStorage &temp_storage, int linear_tid) : temp_storage(temp_storage.Alias()), linear_tid(linear_tid) {} /// Load a linear segment of items from memory template __device__ __forceinline__ void Load( InputIteratorT block_itr, ///< [in] The thread block's base input iterator for loading from InputT (&items)[ITEMS_PER_THREAD]) ///< [out] Data to load{ { LoadDirectWarpStriped(linear_tid, block_itr, items); BlockExchange(temp_storage).WarpStripedToBlocked(items, items); } /// Load a linear segment of items from memory, guarded by range template __device__ __forceinline__ void Load( InputIteratorT block_itr, ///< [in] The thread block's base input iterator for loading from InputT (&items)[ITEMS_PER_THREAD], ///< [out] Data to load int valid_items) ///< [in] Number of valid items to load { if (linear_tid == 0) temp_storage.valid_items = valid_items; // Move through volatile smem as a workaround to prevent RF spilling on subsequent loads CTA_SYNC(); LoadDirectWarpStriped(linear_tid, block_itr, items, temp_storage.valid_items); BlockExchange(temp_storage).WarpStripedToBlocked(items, items); } /// Load a linear segment of items from memory, guarded by range, with a fall-back assignment of out-of-bound elements template __device__ __forceinline__ void Load( InputIteratorT block_itr, ///< [in] The thread block's base input iterator for loading from InputT (&items)[ITEMS_PER_THREAD], ///< [out] Data to load int valid_items, ///< [in] Number of valid items to load DefaultT oob_default) ///< [in] Default value to assign out-of-bound items { if (linear_tid == 0) temp_storage.valid_items = valid_items; // Move through volatile smem as a workaround to prevent RF spilling on subsequent loads CTA_SYNC(); LoadDirectWarpStriped(linear_tid, block_itr, items, temp_storage.valid_items, oob_default); BlockExchange(temp_storage).WarpStripedToBlocked(items, items); } }; /** * BLOCK_LOAD_WARP_TRANSPOSE_TIMESLICED specialization of load helper */ template struct LoadInternal { enum { WARP_THREADS = CUB_WARP_THREADS(PTX_ARCH) }; // Assert BLOCK_THREADS must be a multiple of WARP_THREADS CUB_STATIC_ASSERT((BLOCK_THREADS % WARP_THREADS == 0), "BLOCK_THREADS must be a multiple of WARP_THREADS"); // BlockExchange utility type for keys typedef BlockExchange BlockExchange; /// Shared memory storage layout type struct _TempStorage : BlockExchange::TempStorage { /// Temporary storage for partially-full block guard volatile int valid_items; }; /// Alias wrapper allowing storage to be unioned struct TempStorage : Uninitialized<_TempStorage> {}; /// Thread reference to shared storage _TempStorage &temp_storage; /// Linear thread-id int linear_tid; /// Constructor __device__ __forceinline__ LoadInternal( TempStorage &temp_storage, int linear_tid) : temp_storage(temp_storage.Alias()), linear_tid(linear_tid) {} /// Load a linear segment of items from memory template __device__ __forceinline__ void Load( InputIteratorT block_itr, ///< [in] The thread block's base input iterator for loading from InputT (&items)[ITEMS_PER_THREAD]) ///< [out] Data to load{ { LoadDirectWarpStriped(linear_tid, block_itr, items); BlockExchange(temp_storage).WarpStripedToBlocked(items, items); } /// Load a linear segment of items from memory, guarded by range template __device__ __forceinline__ void Load( InputIteratorT block_itr, ///< [in] The thread block's base input iterator for loading from InputT (&items)[ITEMS_PER_THREAD], ///< [out] Data to load int valid_items) ///< [in] Number of valid items to load { if (linear_tid == 0) temp_storage.valid_items = valid_items; // Move through volatile smem as a workaround to prevent RF spilling on subsequent loads CTA_SYNC(); LoadDirectWarpStriped(linear_tid, block_itr, items, temp_storage.valid_items); BlockExchange(temp_storage).WarpStripedToBlocked(items, items); } /// Load a linear segment of items from memory, guarded by range, with a fall-back assignment of out-of-bound elements template __device__ __forceinline__ void Load( InputIteratorT block_itr, ///< [in] The thread block's base input iterator for loading from InputT (&items)[ITEMS_PER_THREAD], ///< [out] Data to load int valid_items, ///< [in] Number of valid items to load DefaultT oob_default) ///< [in] Default value to assign out-of-bound items { if (linear_tid == 0) temp_storage.valid_items = valid_items; // Move through volatile smem as a workaround to prevent RF spilling on subsequent loads CTA_SYNC(); LoadDirectWarpStriped(linear_tid, block_itr, items, temp_storage.valid_items, oob_default); BlockExchange(temp_storage).WarpStripedToBlocked(items, items); } }; /****************************************************************************** * Type definitions ******************************************************************************/ /// Internal load implementation to use typedef LoadInternal InternalLoad; /// Shared memory storage layout type typedef typename InternalLoad::TempStorage _TempStorage; /****************************************************************************** * Utility methods ******************************************************************************/ /// Internal storage allocator __device__ __forceinline__ _TempStorage& PrivateStorage() { __shared__ _TempStorage private_storage; return private_storage; } /****************************************************************************** * Thread fields ******************************************************************************/ /// Thread reference to shared storage _TempStorage &temp_storage; /// Linear thread-id int linear_tid; public: /// \smemstorage{BlockLoad} struct TempStorage : Uninitialized<_TempStorage> {}; /******************************************************************//** * \name Collective constructors *********************************************************************/ //@{ /** * \brief Collective constructor using a private static allocation of shared memory as temporary storage. */ __device__ __forceinline__ BlockLoad() : temp_storage(PrivateStorage()), linear_tid(RowMajorTid(BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z)) {} /** * \brief Collective constructor using the specified memory allocation as temporary storage. */ __device__ __forceinline__ BlockLoad( TempStorage &temp_storage) ///< [in] Reference to memory allocation having layout type TempStorage : temp_storage(temp_storage.Alias()), linear_tid(RowMajorTid(BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z)) {} //@} end member group /******************************************************************//** * \name Data movement *********************************************************************/ //@{ /** * \brief Load a linear segment of items from memory. * * \par * - \blocked * - \smemreuse * * \par Snippet * The code snippet below illustrates the loading of a linear * segment of 512 integers into a "blocked" arrangement across 128 threads where each * thread owns 4 consecutive items. The load is specialized for \p BLOCK_LOAD_WARP_TRANSPOSE, * meaning memory references are efficiently coalesced using a warp-striped access * pattern (after which items are locally reordered among threads). * \par * \code * #include // or equivalently * * __global__ void ExampleKernel(int *d_data, ...) * { * // Specialize BlockLoad for a 1D block of 128 threads owning 4 integer items each * typedef cub::BlockLoad BlockLoad; * * // Allocate shared memory for BlockLoad * __shared__ typename BlockLoad::TempStorage temp_storage; * * // Load a segment of consecutive items that are blocked across threads * int thread_data[4]; * BlockLoad(temp_storage).Load(d_data, thread_data); * * \endcode * \par * Suppose the input \p d_data is 0, 1, 2, 3, 4, 5, .... * The set of \p thread_data across the block of threads in those threads will be * { [0,1,2,3], [4,5,6,7], ..., [508,509,510,511] }. * */ template __device__ __forceinline__ void Load( InputIteratorT block_itr, ///< [in] The thread block's base input iterator for loading from InputT (&items)[ITEMS_PER_THREAD]) ///< [out] Data to load { InternalLoad(temp_storage, linear_tid).Load(block_itr, items); } /** * \brief Load a linear segment of items from memory, guarded by range. * * \par * - \blocked * - \smemreuse * * \par Snippet * The code snippet below illustrates the guarded loading of a linear * segment of 512 integers into a "blocked" arrangement across 128 threads where each * thread owns 4 consecutive items. The load is specialized for \p BLOCK_LOAD_WARP_TRANSPOSE, * meaning memory references are efficiently coalesced using a warp-striped access * pattern (after which items are locally reordered among threads). * \par * \code * #include // or equivalently * * __global__ void ExampleKernel(int *d_data, int valid_items, ...) * { * // Specialize BlockLoad for a 1D block of 128 threads owning 4 integer items each * typedef cub::BlockLoad BlockLoad; * * // Allocate shared memory for BlockLoad * __shared__ typename BlockLoad::TempStorage temp_storage; * * // Load a segment of consecutive items that are blocked across threads * int thread_data[4]; * BlockLoad(temp_storage).Load(d_data, thread_data, valid_items); * * \endcode * \par * Suppose the input \p d_data is 0, 1, 2, 3, 4, 5, 6... and \p valid_items is \p 5. * The set of \p thread_data across the block of threads in those threads will be * { [0,1,2,3], [4,?,?,?], ..., [?,?,?,?] }, with only the first two threads * being unmasked to load portions of valid data (and other items remaining unassigned). * */ template __device__ __forceinline__ void Load( InputIteratorT block_itr, ///< [in] The thread block's base input iterator for loading from InputT (&items)[ITEMS_PER_THREAD], ///< [out] Data to load int valid_items) ///< [in] Number of valid items to load { InternalLoad(temp_storage, linear_tid).Load(block_itr, items, valid_items); } /** * \brief Load a linear segment of items from memory, guarded by range, with a fall-back assignment of out-of-bound elements * * \par * - \blocked * - \smemreuse * * \par Snippet * The code snippet below illustrates the guarded loading of a linear * segment of 512 integers into a "blocked" arrangement across 128 threads where each * thread owns 4 consecutive items. The load is specialized for \p BLOCK_LOAD_WARP_TRANSPOSE, * meaning memory references are efficiently coalesced using a warp-striped access * pattern (after which items are locally reordered among threads). * \par * \code * #include // or equivalently * * __global__ void ExampleKernel(int *d_data, int valid_items, ...) * { * // Specialize BlockLoad for a 1D block of 128 threads owning 4 integer items each * typedef cub::BlockLoad BlockLoad; * * // Allocate shared memory for BlockLoad * __shared__ typename BlockLoad::TempStorage temp_storage; * * // Load a segment of consecutive items that are blocked across threads * int thread_data[4]; * BlockLoad(temp_storage).Load(d_data, thread_data, valid_items, -1); * * \endcode * \par * Suppose the input \p d_data is 0, 1, 2, 3, 4, 5, 6..., * \p valid_items is \p 5, and the out-of-bounds default is \p -1. * The set of \p thread_data across the block of threads in those threads will be * { [0,1,2,3], [4,-1,-1,-1], ..., [-1,-1,-1,-1] }, with only the first two threads * being unmasked to load portions of valid data (and other items are assigned \p -1) * */ template __device__ __forceinline__ void Load( InputIteratorT block_itr, ///< [in] The thread block's base input iterator for loading from InputT (&items)[ITEMS_PER_THREAD], ///< [out] Data to load int valid_items, ///< [in] Number of valid items to load DefaultT oob_default) ///< [in] Default value to assign out-of-bound items { InternalLoad(temp_storage, linear_tid).Load(block_itr, items, valid_items, oob_default); } //@} end member group }; } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/gpu_utils/cub/block/block_radix_rank.cuh000066400000000000000000000612361411340063500234550ustar00rootroot00000000000000/****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * cub::BlockRadixRank provides operations for ranking unsigned integer types within a CUDA thread block */ #pragma once #include #include "../thread/thread_reduce.cuh" #include "../thread/thread_scan.cuh" #include "../block/block_scan.cuh" #include "../util_ptx.cuh" #include "../util_arch.cuh" #include "../util_type.cuh" #include "../util_namespace.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /** * \brief BlockRadixRank provides operations for ranking unsigned integer types within a CUDA thread block. * \ingroup BlockModule * * \tparam BLOCK_DIM_X The thread block length in threads along the X dimension * \tparam RADIX_BITS The number of radix bits per digit place * \tparam IS_DESCENDING Whether or not the sorted-order is high-to-low * \tparam MEMOIZE_OUTER_SCAN [optional] Whether or not to buffer outer raking scan partials to incur fewer shared memory reads at the expense of higher register pressure (default: true for architectures SM35 and newer, false otherwise). See BlockScanAlgorithm::BLOCK_SCAN_RAKING_MEMOIZE for more details. * \tparam INNER_SCAN_ALGORITHM [optional] The cub::BlockScanAlgorithm algorithm to use (default: cub::BLOCK_SCAN_WARP_SCANS) * \tparam SMEM_CONFIG [optional] Shared memory bank mode (default: \p cudaSharedMemBankSizeFourByte) * \tparam BLOCK_DIM_Y [optional] The thread block length in threads along the Y dimension (default: 1) * \tparam BLOCK_DIM_Z [optional] The thread block length in threads along the Z dimension (default: 1) * \tparam PTX_ARCH [optional] \ptxversion * * \par Overview * Blah... * - Keys must be in a form suitable for radix ranking (i.e., unsigned bits). * - \blocked * * \par Performance Considerations * - \granularity * * \par Examples * \par * - Example 1: Simple radix rank of 32-bit integer keys * \code * #include * * template * __global__ void ExampleKernel(...) * { * * \endcode */ template < int BLOCK_DIM_X, int RADIX_BITS, bool IS_DESCENDING, bool MEMOIZE_OUTER_SCAN = (CUB_PTX_ARCH >= 350) ? true : false, BlockScanAlgorithm INNER_SCAN_ALGORITHM = BLOCK_SCAN_WARP_SCANS, cudaSharedMemConfig SMEM_CONFIG = cudaSharedMemBankSizeFourByte, int BLOCK_DIM_Y = 1, int BLOCK_DIM_Z = 1, int PTX_ARCH = CUB_PTX_ARCH> class BlockRadixRank { private: /****************************************************************************** * Type definitions and constants ******************************************************************************/ // Integer type for digit counters (to be packed into words of type PackedCounters) typedef unsigned short DigitCounter; // Integer type for packing DigitCounters into columns of shared memory banks typedef typename If<(SMEM_CONFIG == cudaSharedMemBankSizeEightByte), unsigned long long, unsigned int>::Type PackedCounter; enum { // The thread block size in threads BLOCK_THREADS = BLOCK_DIM_X * BLOCK_DIM_Y * BLOCK_DIM_Z, RADIX_DIGITS = 1 << RADIX_BITS, LOG_WARP_THREADS = CUB_LOG_WARP_THREADS(PTX_ARCH), WARP_THREADS = 1 << LOG_WARP_THREADS, WARPS = (BLOCK_THREADS + WARP_THREADS - 1) / WARP_THREADS, BYTES_PER_COUNTER = sizeof(DigitCounter), LOG_BYTES_PER_COUNTER = Log2::VALUE, PACKING_RATIO = sizeof(PackedCounter) / sizeof(DigitCounter), LOG_PACKING_RATIO = Log2::VALUE, LOG_COUNTER_LANES = CUB_MAX((RADIX_BITS - LOG_PACKING_RATIO), 0), // Always at least one lane COUNTER_LANES = 1 << LOG_COUNTER_LANES, // The number of packed counters per thread (plus one for padding) PADDED_COUNTER_LANES = COUNTER_LANES + 1, RAKING_SEGMENT = PADDED_COUNTER_LANES, }; public: enum { /// Number of bin-starting offsets tracked per thread BINS_TRACKED_PER_THREAD = CUB_MAX(1, RADIX_DIGITS / BLOCK_THREADS), }; private: /// BlockScan type typedef BlockScan< PackedCounter, BLOCK_DIM_X, INNER_SCAN_ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH> BlockScan; /// Shared memory storage layout type for BlockRadixRank struct __align__(16) _TempStorage { union Aliasable { DigitCounter digit_counters[PADDED_COUNTER_LANES][BLOCK_THREADS][PACKING_RATIO]; PackedCounter raking_grid[BLOCK_THREADS][RAKING_SEGMENT]; } aliasable; // Storage for scanning local ranks typename BlockScan::TempStorage block_scan; }; /****************************************************************************** * Thread fields ******************************************************************************/ /// Shared storage reference _TempStorage &temp_storage; /// Linear thread-id unsigned int linear_tid; /// Copy of raking segment, promoted to registers PackedCounter cached_segment[RAKING_SEGMENT]; /****************************************************************************** * Utility methods ******************************************************************************/ /** * Internal storage allocator */ __device__ __forceinline__ _TempStorage& PrivateStorage() { __shared__ _TempStorage private_storage; return private_storage; } /** * Performs upsweep raking reduction, returning the aggregate */ __device__ __forceinline__ PackedCounter Upsweep() { PackedCounter *smem_raking_ptr = temp_storage.aliasable.raking_grid[linear_tid]; PackedCounter *raking_ptr; if (MEMOIZE_OUTER_SCAN) { // Copy data into registers #pragma unroll for (int i = 0; i < RAKING_SEGMENT; i++) { cached_segment[i] = smem_raking_ptr[i]; } raking_ptr = cached_segment; } else { raking_ptr = smem_raking_ptr; } return internal::ThreadReduce(raking_ptr, Sum()); } /// Performs exclusive downsweep raking scan __device__ __forceinline__ void ExclusiveDownsweep( PackedCounter raking_partial) { PackedCounter *smem_raking_ptr = temp_storage.aliasable.raking_grid[linear_tid]; PackedCounter *raking_ptr = (MEMOIZE_OUTER_SCAN) ? cached_segment : smem_raking_ptr; // Exclusive raking downsweep scan internal::ThreadScanExclusive(raking_ptr, raking_ptr, Sum(), raking_partial); if (MEMOIZE_OUTER_SCAN) { // Copy data back to smem #pragma unroll for (int i = 0; i < RAKING_SEGMENT; i++) { smem_raking_ptr[i] = cached_segment[i]; } } } /** * Reset shared memory digit counters */ __device__ __forceinline__ void ResetCounters() { // Reset shared memory digit counters #pragma unroll for (int LANE = 0; LANE < PADDED_COUNTER_LANES; LANE++) { *((PackedCounter*) temp_storage.aliasable.digit_counters[LANE][linear_tid]) = 0; } } /** * Block-scan prefix callback */ struct PrefixCallBack { __device__ __forceinline__ PackedCounter operator()(PackedCounter block_aggregate) { PackedCounter block_prefix = 0; // Propagate totals in packed fields #pragma unroll for (int PACKED = 1; PACKED < PACKING_RATIO; PACKED++) { block_prefix += block_aggregate << (sizeof(DigitCounter) * 8 * PACKED); } return block_prefix; } }; /** * Scan shared memory digit counters. */ __device__ __forceinline__ void ScanCounters() { // Upsweep scan PackedCounter raking_partial = Upsweep(); // Compute exclusive sum PackedCounter exclusive_partial; PrefixCallBack prefix_call_back; BlockScan(temp_storage.block_scan).ExclusiveSum(raking_partial, exclusive_partial, prefix_call_back); // Downsweep scan with exclusive partial ExclusiveDownsweep(exclusive_partial); } public: /// \smemstorage{BlockScan} struct TempStorage : Uninitialized<_TempStorage> {}; /******************************************************************//** * \name Collective constructors *********************************************************************/ //@{ /** * \brief Collective constructor using a private static allocation of shared memory as temporary storage. */ __device__ __forceinline__ BlockRadixRank() : temp_storage(PrivateStorage()), linear_tid(RowMajorTid(BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z)) {} /** * \brief Collective constructor using the specified memory allocation as temporary storage. */ __device__ __forceinline__ BlockRadixRank( TempStorage &temp_storage) ///< [in] Reference to memory allocation having layout type TempStorage : temp_storage(temp_storage.Alias()), linear_tid(RowMajorTid(BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z)) {} //@} end member group /******************************************************************//** * \name Raking *********************************************************************/ //@{ /** * \brief Rank keys. */ template < typename UnsignedBits, int KEYS_PER_THREAD> __device__ __forceinline__ void RankKeys( UnsignedBits (&keys)[KEYS_PER_THREAD], ///< [in] Keys for this tile int (&ranks)[KEYS_PER_THREAD], ///< [out] For each key, the local rank within the tile int current_bit, ///< [in] The least-significant bit position of the current digit to extract int num_bits) ///< [in] The number of bits in the current digit { DigitCounter thread_prefixes[KEYS_PER_THREAD]; // For each key, the count of previous keys in this tile having the same digit DigitCounter* digit_counters[KEYS_PER_THREAD]; // For each key, the byte-offset of its corresponding digit counter in smem // Reset shared memory digit counters ResetCounters(); #pragma unroll for (int ITEM = 0; ITEM < KEYS_PER_THREAD; ++ITEM) { // Get digit unsigned int digit = BFE(keys[ITEM], current_bit, num_bits); // Get sub-counter unsigned int sub_counter = digit >> LOG_COUNTER_LANES; // Get counter lane unsigned int counter_lane = digit & (COUNTER_LANES - 1); if (IS_DESCENDING) { sub_counter = PACKING_RATIO - 1 - sub_counter; counter_lane = COUNTER_LANES - 1 - counter_lane; } // Pointer to smem digit counter digit_counters[ITEM] = &temp_storage.aliasable.digit_counters[counter_lane][linear_tid][sub_counter]; // Load thread-exclusive prefix thread_prefixes[ITEM] = *digit_counters[ITEM]; // Store inclusive prefix *digit_counters[ITEM] = thread_prefixes[ITEM] + 1; } CTA_SYNC(); // Scan shared memory counters ScanCounters(); CTA_SYNC(); // Extract the local ranks of each key for (int ITEM = 0; ITEM < KEYS_PER_THREAD; ++ITEM) { // Add in thread block exclusive prefix ranks[ITEM] = thread_prefixes[ITEM] + *digit_counters[ITEM]; } } /** * \brief Rank keys. For the lower \p RADIX_DIGITS threads, digit counts for each digit are provided for the corresponding thread. */ template < typename UnsignedBits, int KEYS_PER_THREAD> __device__ __forceinline__ void RankKeys( UnsignedBits (&keys)[KEYS_PER_THREAD], ///< [in] Keys for this tile int (&ranks)[KEYS_PER_THREAD], ///< [out] For each key, the local rank within the tile (out parameter) int current_bit, ///< [in] The least-significant bit position of the current digit to extract int num_bits, ///< [in] The number of bits in the current digit int (&exclusive_digit_prefix)[BINS_TRACKED_PER_THREAD]) ///< [out] The exclusive prefix sum for the digits [(threadIdx.x * BINS_TRACKED_PER_THREAD) ... (threadIdx.x * BINS_TRACKED_PER_THREAD) + BINS_TRACKED_PER_THREAD - 1] { // Rank keys RankKeys(keys, ranks, current_bit, num_bits); // Get the inclusive and exclusive digit totals corresponding to the calling thread. #pragma unroll for (int track = 0; track < BINS_TRACKED_PER_THREAD; ++track) { int bin_idx = (linear_tid * BINS_TRACKED_PER_THREAD) + track; if ((BLOCK_THREADS == RADIX_DIGITS) || (bin_idx < RADIX_DIGITS)) { if (IS_DESCENDING) bin_idx = RADIX_DIGITS - bin_idx - 1; // Obtain ex/inclusive digit counts. (Unfortunately these all reside in the // first counter column, resulting in unavoidable bank conflicts.) unsigned int counter_lane = (bin_idx & (COUNTER_LANES - 1)); unsigned int sub_counter = bin_idx >> (LOG_COUNTER_LANES); exclusive_digit_prefix[track] = temp_storage.aliasable.digit_counters[counter_lane][0][sub_counter]; } } } }; /** * Radix-rank using match.any */ template < int BLOCK_DIM_X, int RADIX_BITS, bool IS_DESCENDING, BlockScanAlgorithm INNER_SCAN_ALGORITHM = BLOCK_SCAN_WARP_SCANS, int BLOCK_DIM_Y = 1, int BLOCK_DIM_Z = 1, int PTX_ARCH = CUB_PTX_ARCH> class BlockRadixRankMatch { private: /****************************************************************************** * Type definitions and constants ******************************************************************************/ typedef int32_t RankT; typedef int32_t DigitCounterT; enum { // The thread block size in threads BLOCK_THREADS = BLOCK_DIM_X * BLOCK_DIM_Y * BLOCK_DIM_Z, RADIX_DIGITS = 1 << RADIX_BITS, LOG_WARP_THREADS = CUB_LOG_WARP_THREADS(PTX_ARCH), WARP_THREADS = 1 << LOG_WARP_THREADS, WARPS = (BLOCK_THREADS + WARP_THREADS - 1) / WARP_THREADS, PADDED_WARPS = ((WARPS & 0x1) == 0) ? WARPS + 1 : WARPS, COUNTERS = PADDED_WARPS * RADIX_DIGITS, RAKING_SEGMENT = (COUNTERS + BLOCK_THREADS - 1) / BLOCK_THREADS, PADDED_RAKING_SEGMENT = ((RAKING_SEGMENT & 0x1) == 0) ? RAKING_SEGMENT + 1 : RAKING_SEGMENT, }; public: enum { /// Number of bin-starting offsets tracked per thread BINS_TRACKED_PER_THREAD = CUB_MAX(1, RADIX_DIGITS / BLOCK_THREADS), }; private: /// BlockScan type typedef BlockScan< DigitCounterT, BLOCK_THREADS, INNER_SCAN_ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH> BlockScanT; /// Shared memory storage layout type for BlockRadixRank struct __align__(16) _TempStorage { typename BlockScanT::TempStorage block_scan; union __align__(16) Aliasable { volatile DigitCounterT warp_digit_counters[RADIX_DIGITS][PADDED_WARPS]; DigitCounterT raking_grid[BLOCK_THREADS][PADDED_RAKING_SEGMENT]; } aliasable; }; /****************************************************************************** * Thread fields ******************************************************************************/ /// Shared storage reference _TempStorage &temp_storage; /// Linear thread-id unsigned int linear_tid; public: /// \smemstorage{BlockScan} struct TempStorage : Uninitialized<_TempStorage> {}; /******************************************************************//** * \name Collective constructors *********************************************************************/ //@{ /** * \brief Collective constructor using the specified memory allocation as temporary storage. */ __device__ __forceinline__ BlockRadixRankMatch( TempStorage &temp_storage) ///< [in] Reference to memory allocation having layout type TempStorage : temp_storage(temp_storage.Alias()), linear_tid(RowMajorTid(BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z)) {} //@} end member group /******************************************************************//** * \name Raking *********************************************************************/ //@{ /** * \brief Rank keys. */ template < typename UnsignedBits, int KEYS_PER_THREAD> __device__ __forceinline__ void RankKeys( UnsignedBits (&keys)[KEYS_PER_THREAD], ///< [in] Keys for this tile int (&ranks)[KEYS_PER_THREAD], ///< [out] For each key, the local rank within the tile int current_bit, ///< [in] The least-significant bit position of the current digit to extract int num_bits) ///< [in] The number of bits in the current digit { // Initialize shared digit counters #pragma unroll for (int ITEM = 0; ITEM < PADDED_RAKING_SEGMENT; ++ITEM) temp_storage.aliasable.raking_grid[linear_tid][ITEM] = 0; CTA_SYNC(); // Each warp will strip-mine its section of input, one strip at a time volatile DigitCounterT *digit_counters[KEYS_PER_THREAD]; uint32_t lane_id = LaneId(); uint32_t warp_id = linear_tid >> LOG_WARP_THREADS; uint32_t lane_mask_lt = LaneMaskLt(); #pragma unroll for (int ITEM = 0; ITEM < KEYS_PER_THREAD; ++ITEM) { // My digit uint32_t digit = BFE(keys[ITEM], current_bit, num_bits); if (IS_DESCENDING) digit = RADIX_DIGITS - digit - 1; // Mask of peers who have same digit as me uint32_t peer_mask = MatchAny(digit); // Pointer to smem digit counter for this key digit_counters[ITEM] = &temp_storage.aliasable.warp_digit_counters[digit][warp_id]; // Number of occurrences in previous strips DigitCounterT warp_digit_prefix = *digit_counters[ITEM]; // Warp-sync WARP_SYNC(0xFFFFFFFF); // Number of peers having same digit as me int32_t digit_count = __popc(peer_mask); // Number of lower-ranked peers having same digit seen so far int32_t peer_digit_prefix = __popc(peer_mask & lane_mask_lt); if (peer_digit_prefix == 0) { // First thread for each digit updates the shared warp counter *digit_counters[ITEM] = DigitCounterT(warp_digit_prefix + digit_count); } // Warp-sync WARP_SYNC(0xFFFFFFFF); // Number of prior keys having same digit ranks[ITEM] = warp_digit_prefix + DigitCounterT(peer_digit_prefix); } CTA_SYNC(); // Scan warp counters DigitCounterT scan_counters[PADDED_RAKING_SEGMENT]; #pragma unroll for (int ITEM = 0; ITEM < PADDED_RAKING_SEGMENT; ++ITEM) scan_counters[ITEM] = temp_storage.aliasable.raking_grid[linear_tid][ITEM]; BlockScanT(temp_storage.block_scan).ExclusiveSum(scan_counters, scan_counters); #pragma unroll for (int ITEM = 0; ITEM < PADDED_RAKING_SEGMENT; ++ITEM) temp_storage.aliasable.raking_grid[linear_tid][ITEM] = scan_counters[ITEM]; CTA_SYNC(); // Seed ranks with counter values from previous warps #pragma unroll for (int ITEM = 0; ITEM < KEYS_PER_THREAD; ++ITEM) ranks[ITEM] += *digit_counters[ITEM]; } /** * \brief Rank keys. For the lower \p RADIX_DIGITS threads, digit counts for each digit are provided for the corresponding thread. */ template < typename UnsignedBits, int KEYS_PER_THREAD> __device__ __forceinline__ void RankKeys( UnsignedBits (&keys)[KEYS_PER_THREAD], ///< [in] Keys for this tile int (&ranks)[KEYS_PER_THREAD], ///< [out] For each key, the local rank within the tile (out parameter) int current_bit, ///< [in] The least-significant bit position of the current digit to extract int num_bits, ///< [in] The number of bits in the current digit int (&exclusive_digit_prefix)[BINS_TRACKED_PER_THREAD]) ///< [out] The exclusive prefix sum for the digits [(threadIdx.x * BINS_TRACKED_PER_THREAD) ... (threadIdx.x * BINS_TRACKED_PER_THREAD) + BINS_TRACKED_PER_THREAD - 1] { RankKeys(keys, ranks, current_bit, num_bits); // Get exclusive count for each digit #pragma unroll for (int track = 0; track < BINS_TRACKED_PER_THREAD; ++track) { int bin_idx = (linear_tid * BINS_TRACKED_PER_THREAD) + track; if ((BLOCK_THREADS == RADIX_DIGITS) || (bin_idx < RADIX_DIGITS)) { if (IS_DESCENDING) bin_idx = RADIX_DIGITS - bin_idx - 1; exclusive_digit_prefix[track] = temp_storage.aliasable.warp_digit_counters[bin_idx][0]; } } } }; } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/gpu_utils/cub/block/block_radix_sort.cuh000066400000000000000000001126071411340063500235100ustar00rootroot00000000000000/****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * The cub::BlockRadixSort class provides [collective](index.html#sec0) methods for radix sorting of items partitioned across a CUDA thread block. */ #pragma once #include "block_exchange.cuh" #include "block_radix_rank.cuh" #include "../util_ptx.cuh" #include "../util_arch.cuh" #include "../util_type.cuh" #include "../util_namespace.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /** * \brief The BlockRadixSort class provides [collective](index.html#sec0) methods for sorting items partitioned across a CUDA thread block using a radix sorting method. ![](sorting_logo.png) * \ingroup BlockModule * * \tparam KeyT KeyT type * \tparam BLOCK_DIM_X The thread block length in threads along the X dimension * \tparam ITEMS_PER_THREAD The number of items per thread * \tparam ValueT [optional] ValueT type (default: cub::NullType, which indicates a keys-only sort) * \tparam RADIX_BITS [optional] The number of radix bits per digit place (default: 4 bits) * \tparam MEMOIZE_OUTER_SCAN [optional] Whether or not to buffer outer raking scan partials to incur fewer shared memory reads at the expense of higher register pressure (default: true for architectures SM35 and newer, false otherwise). * \tparam INNER_SCAN_ALGORITHM [optional] The cub::BlockScanAlgorithm algorithm to use (default: cub::BLOCK_SCAN_WARP_SCANS) * \tparam SMEM_CONFIG [optional] Shared memory bank mode (default: \p cudaSharedMemBankSizeFourByte) * \tparam BLOCK_DIM_Y [optional] The thread block length in threads along the Y dimension (default: 1) * \tparam BLOCK_DIM_Z [optional] The thread block length in threads along the Z dimension (default: 1) * \tparam PTX_ARCH [optional] \ptxversion * * \par Overview * - The [radix sorting method](http://en.wikipedia.org/wiki/Radix_sort) arranges * items into ascending order. It relies upon a positional representation for * keys, i.e., each key is comprised of an ordered sequence of symbols (e.g., digits, * characters, etc.) specified from least-significant to most-significant. For a * given input sequence of keys and a set of rules specifying a total ordering * of the symbolic alphabet, the radix sorting method produces a lexicographic * ordering of those keys. * - BlockRadixSort can sort all of the built-in C++ numeric primitive types, e.g.: * unsigned char, \p int, \p double, etc. Within each key, the implementation treats fixed-length * bit-sequences of \p RADIX_BITS as radix digit places. Although the direct radix sorting * method can only be applied to unsigned integral types, BlockRadixSort * is able to sort signed and floating-point types via simple bit-wise transformations * that ensure lexicographic key ordering. * - \rowmajor * * \par Performance Considerations * - \granularity * * \par A Simple Example * \blockcollective{BlockRadixSort} * \par * The code snippet below illustrates a sort of 512 integer keys that * are partitioned in a [blocked arrangement](index.html#sec5sec3) across 128 threads * where each thread owns 4 consecutive items. * \par * \code * #include // or equivalently * * __global__ void ExampleKernel(...) * { * // Specialize BlockRadixSort for a 1D block of 128 threads owning 4 integer items each * typedef cub::BlockRadixSort BlockRadixSort; * * // Allocate shared memory for BlockRadixSort * __shared__ typename BlockRadixSort::TempStorage temp_storage; * * // Obtain a segment of consecutive items that are blocked across threads * int thread_keys[4]; * ... * * // Collectively sort the keys * BlockRadixSort(temp_storage).Sort(thread_keys); * * ... * \endcode * \par * Suppose the set of input \p thread_keys across the block of threads is * { [0,511,1,510], [2,509,3,508], [4,507,5,506], ..., [254,257,255,256] }. The * corresponding output \p thread_keys in those threads will be * { [0,1,2,3], [4,5,6,7], [8,9,10,11], ..., [508,509,510,511] }. * */ template < typename KeyT, int BLOCK_DIM_X, int ITEMS_PER_THREAD, typename ValueT = NullType, int RADIX_BITS = 4, bool MEMOIZE_OUTER_SCAN = (CUB_PTX_ARCH >= 350) ? true : false, BlockScanAlgorithm INNER_SCAN_ALGORITHM = BLOCK_SCAN_WARP_SCANS, cudaSharedMemConfig SMEM_CONFIG = cudaSharedMemBankSizeFourByte, int BLOCK_DIM_Y = 1, int BLOCK_DIM_Z = 1, int PTX_ARCH = CUB_PTX_ARCH> class BlockRadixSort { private: /****************************************************************************** * Constants and type definitions ******************************************************************************/ enum { // The thread block size in threads BLOCK_THREADS = BLOCK_DIM_X * BLOCK_DIM_Y * BLOCK_DIM_Z, // Whether or not there are values to be trucked along with keys KEYS_ONLY = Equals::VALUE, }; // KeyT traits and unsigned bits type typedef Traits KeyTraits; typedef typename KeyTraits::UnsignedBits UnsignedBits; /// Ascending BlockRadixRank utility type typedef BlockRadixRank< BLOCK_DIM_X, RADIX_BITS, false, MEMOIZE_OUTER_SCAN, INNER_SCAN_ALGORITHM, SMEM_CONFIG, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH> AscendingBlockRadixRank; /// Descending BlockRadixRank utility type typedef BlockRadixRank< BLOCK_DIM_X, RADIX_BITS, true, MEMOIZE_OUTER_SCAN, INNER_SCAN_ALGORITHM, SMEM_CONFIG, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH> DescendingBlockRadixRank; /// BlockExchange utility type for keys typedef BlockExchange BlockExchangeKeys; /// BlockExchange utility type for values typedef BlockExchange BlockExchangeValues; /// Shared memory storage layout type union _TempStorage { typename AscendingBlockRadixRank::TempStorage asending_ranking_storage; typename DescendingBlockRadixRank::TempStorage descending_ranking_storage; typename BlockExchangeKeys::TempStorage exchange_keys; typename BlockExchangeValues::TempStorage exchange_values; }; /****************************************************************************** * Thread fields ******************************************************************************/ /// Shared storage reference _TempStorage &temp_storage; /// Linear thread-id unsigned int linear_tid; /****************************************************************************** * Utility methods ******************************************************************************/ /// Internal storage allocator __device__ __forceinline__ _TempStorage& PrivateStorage() { __shared__ _TempStorage private_storage; return private_storage; } /// Rank keys (specialized for ascending sort) __device__ __forceinline__ void RankKeys( UnsignedBits (&unsigned_keys)[ITEMS_PER_THREAD], int (&ranks)[ITEMS_PER_THREAD], int begin_bit, int pass_bits, Int2Type /*is_descending*/) { AscendingBlockRadixRank(temp_storage.asending_ranking_storage).RankKeys( unsigned_keys, ranks, begin_bit, pass_bits); } /// Rank keys (specialized for descending sort) __device__ __forceinline__ void RankKeys( UnsignedBits (&unsigned_keys)[ITEMS_PER_THREAD], int (&ranks)[ITEMS_PER_THREAD], int begin_bit, int pass_bits, Int2Type /*is_descending*/) { DescendingBlockRadixRank(temp_storage.descending_ranking_storage).RankKeys( unsigned_keys, ranks, begin_bit, pass_bits); } /// ExchangeValues (specialized for key-value sort, to-blocked arrangement) __device__ __forceinline__ void ExchangeValues( ValueT (&values)[ITEMS_PER_THREAD], int (&ranks)[ITEMS_PER_THREAD], Int2Type /*is_keys_only*/, Int2Type /*is_blocked*/) { CTA_SYNC(); // Exchange values through shared memory in blocked arrangement BlockExchangeValues(temp_storage.exchange_values).ScatterToBlocked(values, ranks); } /// ExchangeValues (specialized for key-value sort, to-striped arrangement) __device__ __forceinline__ void ExchangeValues( ValueT (&values)[ITEMS_PER_THREAD], int (&ranks)[ITEMS_PER_THREAD], Int2Type /*is_keys_only*/, Int2Type /*is_blocked*/) { CTA_SYNC(); // Exchange values through shared memory in blocked arrangement BlockExchangeValues(temp_storage.exchange_values).ScatterToStriped(values, ranks); } /// ExchangeValues (specialized for keys-only sort) template __device__ __forceinline__ void ExchangeValues( ValueT (&/*values*/)[ITEMS_PER_THREAD], int (&/*ranks*/)[ITEMS_PER_THREAD], Int2Type /*is_keys_only*/, Int2Type /*is_blocked*/) {} /// Sort blocked arrangement template __device__ __forceinline__ void SortBlocked( KeyT (&keys)[ITEMS_PER_THREAD], ///< Keys to sort ValueT (&values)[ITEMS_PER_THREAD], ///< Values to sort int begin_bit, ///< The beginning (least-significant) bit index needed for key comparison int end_bit, ///< The past-the-end (most-significant) bit index needed for key comparison Int2Type is_descending, ///< Tag whether is a descending-order sort Int2Type is_keys_only) ///< Tag whether is keys-only sort { UnsignedBits (&unsigned_keys)[ITEMS_PER_THREAD] = reinterpret_cast(keys); // Twiddle bits if necessary #pragma unroll for (int KEY = 0; KEY < ITEMS_PER_THREAD; KEY++) { unsigned_keys[KEY] = KeyTraits::TwiddleIn(unsigned_keys[KEY]); } // Radix sorting passes while (true) { int pass_bits = CUB_MIN(RADIX_BITS, end_bit - begin_bit); // Rank the blocked keys int ranks[ITEMS_PER_THREAD]; RankKeys(unsigned_keys, ranks, begin_bit, pass_bits, is_descending); begin_bit += RADIX_BITS; CTA_SYNC(); // Exchange keys through shared memory in blocked arrangement BlockExchangeKeys(temp_storage.exchange_keys).ScatterToBlocked(keys, ranks); // Exchange values through shared memory in blocked arrangement ExchangeValues(values, ranks, is_keys_only, Int2Type()); // Quit if done if (begin_bit >= end_bit) break; CTA_SYNC(); } // Untwiddle bits if necessary #pragma unroll for (int KEY = 0; KEY < ITEMS_PER_THREAD; KEY++) { unsigned_keys[KEY] = KeyTraits::TwiddleOut(unsigned_keys[KEY]); } } public: #ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document /// Sort blocked -> striped arrangement template __device__ __forceinline__ void SortBlockedToStriped( KeyT (&keys)[ITEMS_PER_THREAD], ///< Keys to sort ValueT (&values)[ITEMS_PER_THREAD], ///< Values to sort int begin_bit, ///< The beginning (least-significant) bit index needed for key comparison int end_bit, ///< The past-the-end (most-significant) bit index needed for key comparison Int2Type is_descending, ///< Tag whether is a descending-order sort Int2Type is_keys_only) ///< Tag whether is keys-only sort { UnsignedBits (&unsigned_keys)[ITEMS_PER_THREAD] = reinterpret_cast(keys); // Twiddle bits if necessary #pragma unroll for (int KEY = 0; KEY < ITEMS_PER_THREAD; KEY++) { unsigned_keys[KEY] = KeyTraits::TwiddleIn(unsigned_keys[KEY]); } // Radix sorting passes while (true) { int pass_bits = CUB_MIN(RADIX_BITS, end_bit - begin_bit); // Rank the blocked keys int ranks[ITEMS_PER_THREAD]; RankKeys(unsigned_keys, ranks, begin_bit, pass_bits, is_descending); begin_bit += RADIX_BITS; CTA_SYNC(); // Check if this is the last pass if (begin_bit >= end_bit) { // Last pass exchanges keys through shared memory in striped arrangement BlockExchangeKeys(temp_storage.exchange_keys).ScatterToStriped(keys, ranks); // Last pass exchanges through shared memory in striped arrangement ExchangeValues(values, ranks, is_keys_only, Int2Type()); // Quit break; } // Exchange keys through shared memory in blocked arrangement BlockExchangeKeys(temp_storage.exchange_keys).ScatterToBlocked(keys, ranks); // Exchange values through shared memory in blocked arrangement ExchangeValues(values, ranks, is_keys_only, Int2Type()); CTA_SYNC(); } // Untwiddle bits if necessary #pragma unroll for (int KEY = 0; KEY < ITEMS_PER_THREAD; KEY++) { unsigned_keys[KEY] = KeyTraits::TwiddleOut(unsigned_keys[KEY]); } } #endif // DOXYGEN_SHOULD_SKIP_THIS /// \smemstorage{BlockRadixSort} struct TempStorage : Uninitialized<_TempStorage> {}; /******************************************************************//** * \name Collective constructors *********************************************************************/ //@{ /** * \brief Collective constructor using a private static allocation of shared memory as temporary storage. */ __device__ __forceinline__ BlockRadixSort() : temp_storage(PrivateStorage()), linear_tid(RowMajorTid(BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z)) {} /** * \brief Collective constructor using the specified memory allocation as temporary storage. */ __device__ __forceinline__ BlockRadixSort( TempStorage &temp_storage) ///< [in] Reference to memory allocation having layout type TempStorage : temp_storage(temp_storage.Alias()), linear_tid(RowMajorTid(BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z)) {} //@} end member group /******************************************************************//** * \name Sorting (blocked arrangements) *********************************************************************/ //@{ /** * \brief Performs an ascending block-wide radix sort over a [blocked arrangement](index.html#sec5sec3) of keys. * * \par * - \granularity * - \smemreuse * * \par Snippet * The code snippet below illustrates a sort of 512 integer keys that * are partitioned in a [blocked arrangement](index.html#sec5sec3) across 128 threads * where each thread owns 4 consecutive keys. * \par * \code * #include // or equivalently * * __global__ void ExampleKernel(...) * { * // Specialize BlockRadixSort for a 1D block of 128 threads owning 4 integer keys each * typedef cub::BlockRadixSort BlockRadixSort; * * // Allocate shared memory for BlockRadixSort * __shared__ typename BlockRadixSort::TempStorage temp_storage; * * // Obtain a segment of consecutive items that are blocked across threads * int thread_keys[4]; * ... * * // Collectively sort the keys * BlockRadixSort(temp_storage).Sort(thread_keys); * * \endcode * \par * Suppose the set of input \p thread_keys across the block of threads is * { [0,511,1,510], [2,509,3,508], [4,507,5,506], ..., [254,257,255,256] }. * The corresponding output \p thread_keys in those threads will be * { [0,1,2,3], [4,5,6,7], [8,9,10,11], ..., [508,509,510,511] }. */ __device__ __forceinline__ void Sort( KeyT (&keys)[ITEMS_PER_THREAD], ///< [in-out] Keys to sort int begin_bit = 0, ///< [in] [optional] The beginning (least-significant) bit index needed for key comparison int end_bit = sizeof(KeyT) * 8) ///< [in] [optional] The past-the-end (most-significant) bit index needed for key comparison { NullType values[ITEMS_PER_THREAD]; SortBlocked(keys, values, begin_bit, end_bit, Int2Type(), Int2Type()); } /** * \brief Performs an ascending block-wide radix sort across a [blocked arrangement](index.html#sec5sec3) of keys and values. * * \par * - BlockRadixSort can only accommodate one associated tile of values. To "truck along" * more than one tile of values, simply perform a key-value sort of the keys paired * with a temporary value array that enumerates the key indices. The reordered indices * can then be used as a gather-vector for exchanging other associated tile data through * shared memory. * - \granularity * - \smemreuse * * \par Snippet * The code snippet below illustrates a sort of 512 integer keys and values that * are partitioned in a [blocked arrangement](index.html#sec5sec3) across 128 threads * where each thread owns 4 consecutive pairs. * \par * \code * #include // or equivalently * * __global__ void ExampleKernel(...) * { * // Specialize BlockRadixSort for a 1D block of 128 threads owning 4 integer keys and values each * typedef cub::BlockRadixSort BlockRadixSort; * * // Allocate shared memory for BlockRadixSort * __shared__ typename BlockRadixSort::TempStorage temp_storage; * * // Obtain a segment of consecutive items that are blocked across threads * int thread_keys[4]; * int thread_values[4]; * ... * * // Collectively sort the keys and values among block threads * BlockRadixSort(temp_storage).Sort(thread_keys, thread_values); * * \endcode * \par * Suppose the set of input \p thread_keys across the block of threads is * { [0,511,1,510], [2,509,3,508], [4,507,5,506], ..., [254,257,255,256] }. The * corresponding output \p thread_keys in those threads will be * { [0,1,2,3], [4,5,6,7], [8,9,10,11], ..., [508,509,510,511] }. * */ __device__ __forceinline__ void Sort( KeyT (&keys)[ITEMS_PER_THREAD], ///< [in-out] Keys to sort ValueT (&values)[ITEMS_PER_THREAD], ///< [in-out] Values to sort int begin_bit = 0, ///< [in] [optional] The beginning (least-significant) bit index needed for key comparison int end_bit = sizeof(KeyT) * 8) ///< [in] [optional] The past-the-end (most-significant) bit index needed for key comparison { SortBlocked(keys, values, begin_bit, end_bit, Int2Type(), Int2Type()); } /** * \brief Performs a descending block-wide radix sort over a [blocked arrangement](index.html#sec5sec3) of keys. * * \par * - \granularity * - \smemreuse * * \par Snippet * The code snippet below illustrates a sort of 512 integer keys that * are partitioned in a [blocked arrangement](index.html#sec5sec3) across 128 threads * where each thread owns 4 consecutive keys. * \par * \code * #include // or equivalently * * __global__ void ExampleKernel(...) * { * // Specialize BlockRadixSort for a 1D block of 128 threads owning 4 integer keys each * typedef cub::BlockRadixSort BlockRadixSort; * * // Allocate shared memory for BlockRadixSort * __shared__ typename BlockRadixSort::TempStorage temp_storage; * * // Obtain a segment of consecutive items that are blocked across threads * int thread_keys[4]; * ... * * // Collectively sort the keys * BlockRadixSort(temp_storage).Sort(thread_keys); * * \endcode * \par * Suppose the set of input \p thread_keys across the block of threads is * { [0,511,1,510], [2,509,3,508], [4,507,5,506], ..., [254,257,255,256] }. * The corresponding output \p thread_keys in those threads will be * { [511,510,509,508], [11,10,9,8], [7,6,5,4], ..., [3,2,1,0] }. */ __device__ __forceinline__ void SortDescending( KeyT (&keys)[ITEMS_PER_THREAD], ///< [in-out] Keys to sort int begin_bit = 0, ///< [in] [optional] The beginning (least-significant) bit index needed for key comparison int end_bit = sizeof(KeyT) * 8) ///< [in] [optional] The past-the-end (most-significant) bit index needed for key comparison { NullType values[ITEMS_PER_THREAD]; SortBlocked(keys, values, begin_bit, end_bit, Int2Type(), Int2Type()); } /** * \brief Performs a descending block-wide radix sort across a [blocked arrangement](index.html#sec5sec3) of keys and values. * * \par * - BlockRadixSort can only accommodate one associated tile of values. To "truck along" * more than one tile of values, simply perform a key-value sort of the keys paired * with a temporary value array that enumerates the key indices. The reordered indices * can then be used as a gather-vector for exchanging other associated tile data through * shared memory. * - \granularity * - \smemreuse * * \par Snippet * The code snippet below illustrates a sort of 512 integer keys and values that * are partitioned in a [blocked arrangement](index.html#sec5sec3) across 128 threads * where each thread owns 4 consecutive pairs. * \par * \code * #include // or equivalently * * __global__ void ExampleKernel(...) * { * // Specialize BlockRadixSort for a 1D block of 128 threads owning 4 integer keys and values each * typedef cub::BlockRadixSort BlockRadixSort; * * // Allocate shared memory for BlockRadixSort * __shared__ typename BlockRadixSort::TempStorage temp_storage; * * // Obtain a segment of consecutive items that are blocked across threads * int thread_keys[4]; * int thread_values[4]; * ... * * // Collectively sort the keys and values among block threads * BlockRadixSort(temp_storage).Sort(thread_keys, thread_values); * * \endcode * \par * Suppose the set of input \p thread_keys across the block of threads is * { [0,511,1,510], [2,509,3,508], [4,507,5,506], ..., [254,257,255,256] }. The * corresponding output \p thread_keys in those threads will be * { [511,510,509,508], [11,10,9,8], [7,6,5,4], ..., [3,2,1,0] }. * */ __device__ __forceinline__ void SortDescending( KeyT (&keys)[ITEMS_PER_THREAD], ///< [in-out] Keys to sort ValueT (&values)[ITEMS_PER_THREAD], ///< [in-out] Values to sort int begin_bit = 0, ///< [in] [optional] The beginning (least-significant) bit index needed for key comparison int end_bit = sizeof(KeyT) * 8) ///< [in] [optional] The past-the-end (most-significant) bit index needed for key comparison { SortBlocked(keys, values, begin_bit, end_bit, Int2Type(), Int2Type()); } //@} end member group /******************************************************************//** * \name Sorting (blocked arrangement -> striped arrangement) *********************************************************************/ //@{ /** * \brief Performs an ascending radix sort across a [blocked arrangement](index.html#sec5sec3) of keys, leaving them in a [striped arrangement](index.html#sec5sec3). * * \par * - \granularity * - \smemreuse * * \par Snippet * The code snippet below illustrates a sort of 512 integer keys that * are initially partitioned in a [blocked arrangement](index.html#sec5sec3) across 128 threads * where each thread owns 4 consecutive keys. The final partitioning is striped. * \par * \code * #include // or equivalently * * __global__ void ExampleKernel(...) * { * // Specialize BlockRadixSort for a 1D block of 128 threads owning 4 integer keys each * typedef cub::BlockRadixSort BlockRadixSort; * * // Allocate shared memory for BlockRadixSort * __shared__ typename BlockRadixSort::TempStorage temp_storage; * * // Obtain a segment of consecutive items that are blocked across threads * int thread_keys[4]; * ... * * // Collectively sort the keys * BlockRadixSort(temp_storage).SortBlockedToStriped(thread_keys); * * \endcode * \par * Suppose the set of input \p thread_keys across the block of threads is * { [0,511,1,510], [2,509,3,508], [4,507,5,506], ..., [254,257,255,256] }. The * corresponding output \p thread_keys in those threads will be * { [0,128,256,384], [1,129,257,385], [2,130,258,386], ..., [127,255,383,511] }. * */ __device__ __forceinline__ void SortBlockedToStriped( KeyT (&keys)[ITEMS_PER_THREAD], ///< [in-out] Keys to sort int begin_bit = 0, ///< [in] [optional] The beginning (least-significant) bit index needed for key comparison int end_bit = sizeof(KeyT) * 8) ///< [in] [optional] The past-the-end (most-significant) bit index needed for key comparison { NullType values[ITEMS_PER_THREAD]; SortBlockedToStriped(keys, values, begin_bit, end_bit, Int2Type(), Int2Type()); } /** * \brief Performs an ascending radix sort across a [blocked arrangement](index.html#sec5sec3) of keys and values, leaving them in a [striped arrangement](index.html#sec5sec3). * * \par * - BlockRadixSort can only accommodate one associated tile of values. To "truck along" * more than one tile of values, simply perform a key-value sort of the keys paired * with a temporary value array that enumerates the key indices. The reordered indices * can then be used as a gather-vector for exchanging other associated tile data through * shared memory. * - \granularity * - \smemreuse * * \par Snippet * The code snippet below illustrates a sort of 512 integer keys and values that * are initially partitioned in a [blocked arrangement](index.html#sec5sec3) across 128 threads * where each thread owns 4 consecutive pairs. The final partitioning is striped. * \par * \code * #include // or equivalently * * __global__ void ExampleKernel(...) * { * // Specialize BlockRadixSort for a 1D block of 128 threads owning 4 integer keys and values each * typedef cub::BlockRadixSort BlockRadixSort; * * // Allocate shared memory for BlockRadixSort * __shared__ typename BlockRadixSort::TempStorage temp_storage; * * // Obtain a segment of consecutive items that are blocked across threads * int thread_keys[4]; * int thread_values[4]; * ... * * // Collectively sort the keys and values among block threads * BlockRadixSort(temp_storage).SortBlockedToStriped(thread_keys, thread_values); * * \endcode * \par * Suppose the set of input \p thread_keys across the block of threads is * { [0,511,1,510], [2,509,3,508], [4,507,5,506], ..., [254,257,255,256] }. The * corresponding output \p thread_keys in those threads will be * { [0,128,256,384], [1,129,257,385], [2,130,258,386], ..., [127,255,383,511] }. * */ __device__ __forceinline__ void SortBlockedToStriped( KeyT (&keys)[ITEMS_PER_THREAD], ///< [in-out] Keys to sort ValueT (&values)[ITEMS_PER_THREAD], ///< [in-out] Values to sort int begin_bit = 0, ///< [in] [optional] The beginning (least-significant) bit index needed for key comparison int end_bit = sizeof(KeyT) * 8) ///< [in] [optional] The past-the-end (most-significant) bit index needed for key comparison { SortBlockedToStriped(keys, values, begin_bit, end_bit, Int2Type(), Int2Type()); } /** * \brief Performs a descending radix sort across a [blocked arrangement](index.html#sec5sec3) of keys, leaving them in a [striped arrangement](index.html#sec5sec3). * * \par * - \granularity * - \smemreuse * * \par Snippet * The code snippet below illustrates a sort of 512 integer keys that * are initially partitioned in a [blocked arrangement](index.html#sec5sec3) across 128 threads * where each thread owns 4 consecutive keys. The final partitioning is striped. * \par * \code * #include // or equivalently * * __global__ void ExampleKernel(...) * { * // Specialize BlockRadixSort for a 1D block of 128 threads owning 4 integer keys each * typedef cub::BlockRadixSort BlockRadixSort; * * // Allocate shared memory for BlockRadixSort * __shared__ typename BlockRadixSort::TempStorage temp_storage; * * // Obtain a segment of consecutive items that are blocked across threads * int thread_keys[4]; * ... * * // Collectively sort the keys * BlockRadixSort(temp_storage).SortBlockedToStriped(thread_keys); * * \endcode * \par * Suppose the set of input \p thread_keys across the block of threads is * { [0,511,1,510], [2,509,3,508], [4,507,5,506], ..., [254,257,255,256] }. The * corresponding output \p thread_keys in those threads will be * { [511,383,255,127], [386,258,130,2], [385,257,128,1], ..., [384,256,128,0] }. * */ __device__ __forceinline__ void SortDescendingBlockedToStriped( KeyT (&keys)[ITEMS_PER_THREAD], ///< [in-out] Keys to sort int begin_bit = 0, ///< [in] [optional] The beginning (least-significant) bit index needed for key comparison int end_bit = sizeof(KeyT) * 8) ///< [in] [optional] The past-the-end (most-significant) bit index needed for key comparison { NullType values[ITEMS_PER_THREAD]; SortBlockedToStriped(keys, values, begin_bit, end_bit, Int2Type(), Int2Type()); } /** * \brief Performs a descending radix sort across a [blocked arrangement](index.html#sec5sec3) of keys and values, leaving them in a [striped arrangement](index.html#sec5sec3). * * \par * - BlockRadixSort can only accommodate one associated tile of values. To "truck along" * more than one tile of values, simply perform a key-value sort of the keys paired * with a temporary value array that enumerates the key indices. The reordered indices * can then be used as a gather-vector for exchanging other associated tile data through * shared memory. * - \granularity * - \smemreuse * * \par Snippet * The code snippet below illustrates a sort of 512 integer keys and values that * are initially partitioned in a [blocked arrangement](index.html#sec5sec3) across 128 threads * where each thread owns 4 consecutive pairs. The final partitioning is striped. * \par * \code * #include // or equivalently * * __global__ void ExampleKernel(...) * { * // Specialize BlockRadixSort for a 1D block of 128 threads owning 4 integer keys and values each * typedef cub::BlockRadixSort BlockRadixSort; * * // Allocate shared memory for BlockRadixSort * __shared__ typename BlockRadixSort::TempStorage temp_storage; * * // Obtain a segment of consecutive items that are blocked across threads * int thread_keys[4]; * int thread_values[4]; * ... * * // Collectively sort the keys and values among block threads * BlockRadixSort(temp_storage).SortBlockedToStriped(thread_keys, thread_values); * * \endcode * \par * Suppose the set of input \p thread_keys across the block of threads is * { [0,511,1,510], [2,509,3,508], [4,507,5,506], ..., [254,257,255,256] }. The * corresponding output \p thread_keys in those threads will be * { [511,383,255,127], [386,258,130,2], [385,257,128,1], ..., [384,256,128,0] }. * */ __device__ __forceinline__ void SortDescendingBlockedToStriped( KeyT (&keys)[ITEMS_PER_THREAD], ///< [in-out] Keys to sort ValueT (&values)[ITEMS_PER_THREAD], ///< [in-out] Values to sort int begin_bit = 0, ///< [in] [optional] The beginning (least-significant) bit index needed for key comparison int end_bit = sizeof(KeyT) * 8) ///< [in] [optional] The past-the-end (most-significant) bit index needed for key comparison { SortBlockedToStriped(keys, values, begin_bit, end_bit, Int2Type(), Int2Type()); } //@} end member group }; /** * \example example_block_radix_sort.cu */ } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/gpu_utils/cub/block/block_raking_layout.cuh000066400000000000000000000140451411340063500241770ustar00rootroot00000000000000/****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * cub::BlockRakingLayout provides a conflict-free shared memory layout abstraction for warp-raking across thread block data. */ #pragma once #include "../util_macro.cuh" #include "../util_arch.cuh" #include "../util_type.cuh" #include "../util_namespace.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /** * \brief BlockRakingLayout provides a conflict-free shared memory layout abstraction for 1D raking across thread block data. ![](raking.png) * \ingroup BlockModule * * \par Overview * This type facilitates a shared memory usage pattern where a block of CUDA * threads places elements into shared memory and then reduces the active * parallelism to one "raking" warp of threads for serially aggregating consecutive * sequences of shared items. Padding is inserted to eliminate bank conflicts * (for most data types). * * \tparam T The data type to be exchanged. * \tparam BLOCK_THREADS The thread block size in threads. * \tparam PTX_ARCH [optional] \ptxversion */ template < typename T, int BLOCK_THREADS, int PTX_ARCH = CUB_PTX_ARCH> struct BlockRakingLayout { //--------------------------------------------------------------------- // Constants and type definitions //--------------------------------------------------------------------- enum { /// The total number of elements that need to be cooperatively reduced SHARED_ELEMENTS = BLOCK_THREADS, /// Maximum number of warp-synchronous raking threads MAX_RAKING_THREADS = CUB_MIN(BLOCK_THREADS, CUB_WARP_THREADS(PTX_ARCH)), /// Number of raking elements per warp-synchronous raking thread (rounded up) SEGMENT_LENGTH = (SHARED_ELEMENTS + MAX_RAKING_THREADS - 1) / MAX_RAKING_THREADS, /// Never use a raking thread that will have no valid data (e.g., when BLOCK_THREADS is 62 and SEGMENT_LENGTH is 2, we should only use 31 raking threads) RAKING_THREADS = (SHARED_ELEMENTS + SEGMENT_LENGTH - 1) / SEGMENT_LENGTH, /// Whether we will have bank conflicts (technically we should find out if the GCD is > 1) HAS_CONFLICTS = (CUB_SMEM_BANKS(PTX_ARCH) % SEGMENT_LENGTH == 0), /// Degree of bank conflicts (e.g., 4-way) CONFLICT_DEGREE = (HAS_CONFLICTS) ? (MAX_RAKING_THREADS * SEGMENT_LENGTH) / CUB_SMEM_BANKS(PTX_ARCH) : 1, /// Pad each segment length with one element if segment length is not relatively prime to warp size and can't be optimized as a vector load USE_SEGMENT_PADDING = ((SEGMENT_LENGTH & 1) == 0) && (SEGMENT_LENGTH > 2), /// Total number of elements in the raking grid GRID_ELEMENTS = RAKING_THREADS * (SEGMENT_LENGTH + USE_SEGMENT_PADDING), /// Whether or not we need bounds checking during raking (the number of reduction elements is not a multiple of the number of raking threads) UNGUARDED = (SHARED_ELEMENTS % RAKING_THREADS == 0), }; /** * \brief Shared memory storage type */ struct __align__(16) _TempStorage { T buff[BlockRakingLayout::GRID_ELEMENTS]; }; /// Alias wrapper allowing storage to be unioned struct TempStorage : Uninitialized<_TempStorage> {}; /** * \brief Returns the location for the calling thread to place data into the grid */ static __device__ __forceinline__ T* PlacementPtr( TempStorage &temp_storage, unsigned int linear_tid) { // Offset for partial unsigned int offset = linear_tid; // Add in one padding element for every segment if (USE_SEGMENT_PADDING > 0) { offset += offset / SEGMENT_LENGTH; } // Incorporating a block of padding partials every shared memory segment return temp_storage.Alias().buff + offset; } /** * \brief Returns the location for the calling thread to begin sequential raking */ static __device__ __forceinline__ T* RakingPtr( TempStorage &temp_storage, unsigned int linear_tid) { return temp_storage.Alias().buff + (linear_tid * (SEGMENT_LENGTH + USE_SEGMENT_PADDING)); } }; } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/gpu_utils/cub/block/block_reduce.cuh000066400000000000000000000613241411340063500226000ustar00rootroot00000000000000/****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * The cub::BlockReduce class provides [collective](index.html#sec0) methods for computing a parallel reduction of items partitioned across a CUDA thread block. */ #pragma once #include "specializations/block_reduce_raking.cuh" #include "specializations/block_reduce_raking_commutative_only.cuh" #include "specializations/block_reduce_warp_reductions.cuh" #include "../util_ptx.cuh" #include "../util_type.cuh" #include "../thread/thread_operators.cuh" #include "../util_namespace.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /****************************************************************************** * Algorithmic variants ******************************************************************************/ /** * BlockReduceAlgorithm enumerates alternative algorithms for parallel * reduction across a CUDA thread block. */ enum BlockReduceAlgorithm { /** * \par Overview * An efficient "raking" reduction algorithm that only supports commutative * reduction operators (true for most operations, e.g., addition). * * \par * Execution is comprised of three phases: * -# Upsweep sequential reduction in registers (if threads contribute more * than one input each). Threads in warps other than the first warp place * their partial reductions into shared memory. * -# Upsweep sequential reduction in shared memory. Threads within the first * warp continue to accumulate by raking across segments of shared partial reductions * -# A warp-synchronous Kogge-Stone style reduction within the raking warp. * * \par * \image html block_reduce.png *

\p BLOCK_REDUCE_RAKING data flow for a hypothetical 16-thread thread block and 4-thread raking warp.
* * \par Performance Considerations * - This variant performs less communication than BLOCK_REDUCE_RAKING_NON_COMMUTATIVE * and is preferable when the reduction operator is commutative. This variant * applies fewer reduction operators than BLOCK_REDUCE_WARP_REDUCTIONS, and can provide higher overall * throughput across the GPU when suitably occupied. However, turn-around latency may be * higher than to BLOCK_REDUCE_WARP_REDUCTIONS and thus less-desirable * when the GPU is under-occupied. */ BLOCK_REDUCE_RAKING_COMMUTATIVE_ONLY, /** * \par Overview * An efficient "raking" reduction algorithm that supports commutative * (e.g., addition) and non-commutative (e.g., string concatenation) reduction * operators. \blocked. * * \par * Execution is comprised of three phases: * -# Upsweep sequential reduction in registers (if threads contribute more * than one input each). Each thread then places the partial reduction * of its item(s) into shared memory. * -# Upsweep sequential reduction in shared memory. Threads within a * single warp rake across segments of shared partial reductions. * -# A warp-synchronous Kogge-Stone style reduction within the raking warp. * * \par * \image html block_reduce.png *
\p BLOCK_REDUCE_RAKING data flow for a hypothetical 16-thread thread block and 4-thread raking warp.
* * \par Performance Considerations * - This variant performs more communication than BLOCK_REDUCE_RAKING * and is only preferable when the reduction operator is non-commutative. This variant * applies fewer reduction operators than BLOCK_REDUCE_WARP_REDUCTIONS, and can provide higher overall * throughput across the GPU when suitably occupied. However, turn-around latency may be * higher than to BLOCK_REDUCE_WARP_REDUCTIONS and thus less-desirable * when the GPU is under-occupied. */ BLOCK_REDUCE_RAKING, /** * \par Overview * A quick "tiled warp-reductions" reduction algorithm that supports commutative * (e.g., addition) and non-commutative (e.g., string concatenation) reduction * operators. * * \par * Execution is comprised of four phases: * -# Upsweep sequential reduction in registers (if threads contribute more * than one input each). Each thread then places the partial reduction * of its item(s) into shared memory. * -# Compute a shallow, but inefficient warp-synchronous Kogge-Stone style * reduction within each warp. * -# A propagation phase where the warp reduction outputs in each warp are * updated with the aggregate from each preceding warp. * * \par * \image html block_scan_warpscans.png *
\p BLOCK_REDUCE_WARP_REDUCTIONS data flow for a hypothetical 16-thread thread block and 4-thread raking warp.
* * \par Performance Considerations * - This variant applies more reduction operators than BLOCK_REDUCE_RAKING * or BLOCK_REDUCE_RAKING_NON_COMMUTATIVE, which may result in lower overall * throughput across the GPU. However turn-around latency may be lower and * thus useful when the GPU is under-occupied. */ BLOCK_REDUCE_WARP_REDUCTIONS, }; /****************************************************************************** * Block reduce ******************************************************************************/ /** * \brief The BlockReduce class provides [collective](index.html#sec0) methods for computing a parallel reduction of items partitioned across a CUDA thread block. ![](reduce_logo.png) * \ingroup BlockModule * * \tparam T Data type being reduced * \tparam BLOCK_DIM_X The thread block length in threads along the X dimension * \tparam ALGORITHM [optional] cub::BlockReduceAlgorithm enumerator specifying the underlying algorithm to use (default: cub::BLOCK_REDUCE_WARP_REDUCTIONS) * \tparam BLOCK_DIM_Y [optional] The thread block length in threads along the Y dimension (default: 1) * \tparam BLOCK_DIM_Z [optional] The thread block length in threads along the Z dimension (default: 1) * \tparam PTX_ARCH [optional] \ptxversion * * \par Overview * - A reduction (or fold) * uses a binary combining operator to compute a single aggregate from a list of input elements. * - \rowmajor * - BlockReduce can be optionally specialized by algorithm to accommodate different latency/throughput workload profiles: * -# cub::BLOCK_REDUCE_RAKING_COMMUTATIVE_ONLY. An efficient "raking" reduction algorithm that only supports commutative reduction operators. [More...](\ref cub::BlockReduceAlgorithm) * -# cub::BLOCK_REDUCE_RAKING. An efficient "raking" reduction algorithm that supports commutative and non-commutative reduction operators. [More...](\ref cub::BlockReduceAlgorithm) * -# cub::BLOCK_REDUCE_WARP_REDUCTIONS. A quick "tiled warp-reductions" reduction algorithm that supports commutative and non-commutative reduction operators. [More...](\ref cub::BlockReduceAlgorithm) * * \par Performance Considerations * - \granularity * - Very efficient (only one synchronization barrier). * - Incurs zero bank conflicts for most types * - Computation is slightly more efficient (i.e., having lower instruction overhead) for: * - Summation (vs. generic reduction) * - \p BLOCK_THREADS is a multiple of the architecture's warp size * - Every thread has a valid input (i.e., full vs. partial-tiles) * - See cub::BlockReduceAlgorithm for performance details regarding algorithmic alternatives * * \par A Simple Example * \blockcollective{BlockReduce} * \par * The code snippet below illustrates a sum reduction of 512 integer items that * are partitioned in a [blocked arrangement](index.html#sec5sec3) across 128 threads * where each thread owns 4 consecutive items. * \par * \code * #include // or equivalently * * __global__ void ExampleKernel(...) * { * // Specialize BlockReduce for a 1D block of 128 threads on type int * typedef cub::BlockReduce BlockReduce; * * // Allocate shared memory for BlockReduce * __shared__ typename BlockReduce::TempStorage temp_storage; * * // Obtain a segment of consecutive items that are blocked across threads * int thread_data[4]; * ... * * // Compute the block-wide sum for thread0 * int aggregate = BlockReduce(temp_storage).Sum(thread_data); * * \endcode * */ template < typename T, int BLOCK_DIM_X, BlockReduceAlgorithm ALGORITHM = BLOCK_REDUCE_WARP_REDUCTIONS, int BLOCK_DIM_Y = 1, int BLOCK_DIM_Z = 1, int PTX_ARCH = CUB_PTX_ARCH> class BlockReduce { private: /****************************************************************************** * Constants and type definitions ******************************************************************************/ /// Constants enum { /// The thread block size in threads BLOCK_THREADS = BLOCK_DIM_X * BLOCK_DIM_Y * BLOCK_DIM_Z, }; typedef BlockReduceWarpReductions WarpReductions; typedef BlockReduceRakingCommutativeOnly RakingCommutativeOnly; typedef BlockReduceRaking Raking; /// Internal specialization type typedef typename If<(ALGORITHM == BLOCK_REDUCE_WARP_REDUCTIONS), WarpReductions, typename If<(ALGORITHM == BLOCK_REDUCE_RAKING_COMMUTATIVE_ONLY), RakingCommutativeOnly, Raking>::Type>::Type InternalBlockReduce; // BlockReduceRaking /// Shared memory storage layout type for BlockReduce typedef typename InternalBlockReduce::TempStorage _TempStorage; /****************************************************************************** * Utility methods ******************************************************************************/ /// Internal storage allocator __device__ __forceinline__ _TempStorage& PrivateStorage() { __shared__ _TempStorage private_storage; return private_storage; } /****************************************************************************** * Thread fields ******************************************************************************/ /// Shared storage reference _TempStorage &temp_storage; /// Linear thread-id unsigned int linear_tid; public: /// \smemstorage{BlockReduce} struct TempStorage : Uninitialized<_TempStorage> {}; /******************************************************************//** * \name Collective constructors *********************************************************************/ //@{ /** * \brief Collective constructor using a private static allocation of shared memory as temporary storage. */ __device__ __forceinline__ BlockReduce() : temp_storage(PrivateStorage()), linear_tid(RowMajorTid(BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z)) {} /** * \brief Collective constructor using the specified memory allocation as temporary storage. */ __device__ __forceinline__ BlockReduce( TempStorage &temp_storage) ///< [in] Reference to memory allocation having layout type TempStorage : temp_storage(temp_storage.Alias()), linear_tid(RowMajorTid(BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z)) {} //@} end member group /******************************************************************//** * \name Generic reductions *********************************************************************/ //@{ /** * \brief Computes a block-wide reduction for thread0 using the specified binary reduction functor. Each thread contributes one input element. * * \par * - The return value is undefined in threads other than thread0. * - \rowmajor * - \smemreuse * * \par Snippet * The code snippet below illustrates a max reduction of 128 integer items that * are partitioned across 128 threads. * \par * \code * #include // or equivalently * * __global__ void ExampleKernel(...) * { * // Specialize BlockReduce for a 1D block of 128 threads on type int * typedef cub::BlockReduce BlockReduce; * * // Allocate shared memory for BlockReduce * __shared__ typename BlockReduce::TempStorage temp_storage; * * // Each thread obtains an input item * int thread_data; * ... * * // Compute the block-wide max for thread0 * int aggregate = BlockReduce(temp_storage).Reduce(thread_data, cub::Max()); * * \endcode * * \tparam ReductionOp [inferred] Binary reduction functor type having member T operator()(const T &a, const T &b) */ template __device__ __forceinline__ T Reduce( T input, ///< [in] Calling thread's input ReductionOp reduction_op) ///< [in] Binary reduction functor { return InternalBlockReduce(temp_storage).template Reduce(input, BLOCK_THREADS, reduction_op); } /** * \brief Computes a block-wide reduction for thread0 using the specified binary reduction functor. Each thread contributes an array of consecutive input elements. * * \par * - The return value is undefined in threads other than thread0. * - \granularity * - \smemreuse * * \par Snippet * The code snippet below illustrates a max reduction of 512 integer items that * are partitioned in a [blocked arrangement](index.html#sec5sec3) across 128 threads * where each thread owns 4 consecutive items. * \par * \code * #include // or equivalently * * __global__ void ExampleKernel(...) * { * // Specialize BlockReduce for a 1D block of 128 threads on type int * typedef cub::BlockReduce BlockReduce; * * // Allocate shared memory for BlockReduce * __shared__ typename BlockReduce::TempStorage temp_storage; * * // Obtain a segment of consecutive items that are blocked across threads * int thread_data[4]; * ... * * // Compute the block-wide max for thread0 * int aggregate = BlockReduce(temp_storage).Reduce(thread_data, cub::Max()); * * \endcode * * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. * \tparam ReductionOp [inferred] Binary reduction functor type having member T operator()(const T &a, const T &b) */ template < int ITEMS_PER_THREAD, typename ReductionOp> __device__ __forceinline__ T Reduce( T (&inputs)[ITEMS_PER_THREAD], ///< [in] Calling thread's input segment ReductionOp reduction_op) ///< [in] Binary reduction functor { // Reduce partials T partial = internal::ThreadReduce(inputs, reduction_op); return Reduce(partial, reduction_op); } /** * \brief Computes a block-wide reduction for thread0 using the specified binary reduction functor. The first \p num_valid threads each contribute one input element. * * \par * - The return value is undefined in threads other than thread0. * - \rowmajor * - \smemreuse * * \par Snippet * The code snippet below illustrates a max reduction of a partially-full tile of integer items that * are partitioned across 128 threads. * \par * \code * #include // or equivalently * * __global__ void ExampleKernel(int num_valid, ...) * { * // Specialize BlockReduce for a 1D block of 128 threads on type int * typedef cub::BlockReduce BlockReduce; * * // Allocate shared memory for BlockReduce * __shared__ typename BlockReduce::TempStorage temp_storage; * * // Each thread obtains an input item * int thread_data; * if (threadIdx.x < num_valid) thread_data = ... * * // Compute the block-wide max for thread0 * int aggregate = BlockReduce(temp_storage).Reduce(thread_data, cub::Max(), num_valid); * * \endcode * * \tparam ReductionOp [inferred] Binary reduction functor type having member T operator()(const T &a, const T &b) */ template __device__ __forceinline__ T Reduce( T input, ///< [in] Calling thread's input ReductionOp reduction_op, ///< [in] Binary reduction functor int num_valid) ///< [in] Number of threads containing valid elements (may be less than BLOCK_THREADS) { // Determine if we scan skip bounds checking if (num_valid >= BLOCK_THREADS) { return InternalBlockReduce(temp_storage).template Reduce(input, num_valid, reduction_op); } else { return InternalBlockReduce(temp_storage).template Reduce(input, num_valid, reduction_op); } } //@} end member group /******************************************************************//** * \name Summation reductions *********************************************************************/ //@{ /** * \brief Computes a block-wide reduction for thread0 using addition (+) as the reduction operator. Each thread contributes one input element. * * \par * - The return value is undefined in threads other than thread0. * - \rowmajor * - \smemreuse * * \par Snippet * The code snippet below illustrates a sum reduction of 128 integer items that * are partitioned across 128 threads. * \par * \code * #include // or equivalently * * __global__ void ExampleKernel(...) * { * // Specialize BlockReduce for a 1D block of 128 threads on type int * typedef cub::BlockReduce BlockReduce; * * // Allocate shared memory for BlockReduce * __shared__ typename BlockReduce::TempStorage temp_storage; * * // Each thread obtains an input item * int thread_data; * ... * * // Compute the block-wide sum for thread0 * int aggregate = BlockReduce(temp_storage).Sum(thread_data); * * \endcode * */ __device__ __forceinline__ T Sum( T input) ///< [in] Calling thread's input { return InternalBlockReduce(temp_storage).template Sum(input, BLOCK_THREADS); } /** * \brief Computes a block-wide reduction for thread0 using addition (+) as the reduction operator. Each thread contributes an array of consecutive input elements. * * \par * - The return value is undefined in threads other than thread0. * - \granularity * - \smemreuse * * \par Snippet * The code snippet below illustrates a sum reduction of 512 integer items that * are partitioned in a [blocked arrangement](index.html#sec5sec3) across 128 threads * where each thread owns 4 consecutive items. * \par * \code * #include // or equivalently * * __global__ void ExampleKernel(...) * { * // Specialize BlockReduce for a 1D block of 128 threads on type int * typedef cub::BlockReduce BlockReduce; * * // Allocate shared memory for BlockReduce * __shared__ typename BlockReduce::TempStorage temp_storage; * * // Obtain a segment of consecutive items that are blocked across threads * int thread_data[4]; * ... * * // Compute the block-wide sum for thread0 * int aggregate = BlockReduce(temp_storage).Sum(thread_data); * * \endcode * * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. */ template __device__ __forceinline__ T Sum( T (&inputs)[ITEMS_PER_THREAD]) ///< [in] Calling thread's input segment { // Reduce partials T partial = internal::ThreadReduce(inputs, cub::Sum()); return Sum(partial); } /** * \brief Computes a block-wide reduction for thread0 using addition (+) as the reduction operator. The first \p num_valid threads each contribute one input element. * * \par * - The return value is undefined in threads other than thread0. * - \rowmajor * - \smemreuse * * \par Snippet * The code snippet below illustrates a sum reduction of a partially-full tile of integer items that * are partitioned across 128 threads. * \par * \code * #include // or equivalently * * __global__ void ExampleKernel(int num_valid, ...) * { * // Specialize BlockReduce for a 1D block of 128 threads on type int * typedef cub::BlockReduce BlockReduce; * * // Allocate shared memory for BlockReduce * __shared__ typename BlockReduce::TempStorage temp_storage; * * // Each thread obtains an input item (up to num_items) * int thread_data; * if (threadIdx.x < num_valid) * thread_data = ... * * // Compute the block-wide sum for thread0 * int aggregate = BlockReduce(temp_storage).Sum(thread_data, num_valid); * * \endcode * */ __device__ __forceinline__ T Sum( T input, ///< [in] Calling thread's input int num_valid) ///< [in] Number of threads containing valid elements (may be less than BLOCK_THREADS) { // Determine if we scan skip bounds checking if (num_valid >= BLOCK_THREADS) { return InternalBlockReduce(temp_storage).template Sum(input, num_valid); } else { return InternalBlockReduce(temp_storage).template Sum(input, num_valid); } } //@} end member group }; /** * \example example_block_reduce.cu */ } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/gpu_utils/cub/block/block_scan.cuh000066400000000000000000003111021411340063500222450ustar00rootroot00000000000000/****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * The cub::BlockScan class provides [collective](index.html#sec0) methods for computing a parallel prefix sum/scan of items partitioned across a CUDA thread block. */ #pragma once #include "specializations/block_scan_raking.cuh" #include "specializations/block_scan_warp_scans.cuh" #include "../util_arch.cuh" #include "../util_type.cuh" #include "../util_ptx.cuh" #include "../util_namespace.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /****************************************************************************** * Algorithmic variants ******************************************************************************/ /** * \brief BlockScanAlgorithm enumerates alternative algorithms for cub::BlockScan to compute a parallel prefix scan across a CUDA thread block. */ enum BlockScanAlgorithm { /** * \par Overview * An efficient "raking reduce-then-scan" prefix scan algorithm. Execution is comprised of five phases: * -# Upsweep sequential reduction in registers (if threads contribute more than one input each). Each thread then places the partial reduction of its item(s) into shared memory. * -# Upsweep sequential reduction in shared memory. Threads within a single warp rake across segments of shared partial reductions. * -# A warp-synchronous Kogge-Stone style exclusive scan within the raking warp. * -# Downsweep sequential exclusive scan in shared memory. Threads within a single warp rake across segments of shared partial reductions, seeded with the warp-scan output. * -# Downsweep sequential scan in registers (if threads contribute more than one input), seeded with the raking scan output. * * \par * \image html block_scan_raking.png *
\p BLOCK_SCAN_RAKING data flow for a hypothetical 16-thread thread block and 4-thread raking warp.
* * \par Performance Considerations * - Although this variant may suffer longer turnaround latencies when the * GPU is under-occupied, it can often provide higher overall throughput * across the GPU when suitably occupied. */ BLOCK_SCAN_RAKING, /** * \par Overview * Similar to cub::BLOCK_SCAN_RAKING, but with fewer shared memory reads at * the expense of higher register pressure. Raking threads preserve their * "upsweep" segment of values in registers while performing warp-synchronous * scan, allowing the "downsweep" not to re-read them from shared memory. */ BLOCK_SCAN_RAKING_MEMOIZE, /** * \par Overview * A quick "tiled warpscans" prefix scan algorithm. Execution is comprised of four phases: * -# Upsweep sequential reduction in registers (if threads contribute more than one input each). Each thread then places the partial reduction of its item(s) into shared memory. * -# Compute a shallow, but inefficient warp-synchronous Kogge-Stone style scan within each warp. * -# A propagation phase where the warp scan outputs in each warp are updated with the aggregate from each preceding warp. * -# Downsweep sequential scan in registers (if threads contribute more than one input), seeded with the raking scan output. * * \par * \image html block_scan_warpscans.png *
\p BLOCK_SCAN_WARP_SCANS data flow for a hypothetical 16-thread thread block and 4-thread raking warp.
* * \par Performance Considerations * - Although this variant may suffer lower overall throughput across the * GPU because due to a heavy reliance on inefficient warpscans, it can * often provide lower turnaround latencies when the GPU is under-occupied. */ BLOCK_SCAN_WARP_SCANS, }; /****************************************************************************** * Block scan ******************************************************************************/ /** * \brief The BlockScan class provides [collective](index.html#sec0) methods for computing a parallel prefix sum/scan of items partitioned across a CUDA thread block. ![](block_scan_logo.png) * \ingroup BlockModule * * \tparam T Data type being scanned * \tparam BLOCK_DIM_X The thread block length in threads along the X dimension * \tparam ALGORITHM [optional] cub::BlockScanAlgorithm enumerator specifying the underlying algorithm to use (default: cub::BLOCK_SCAN_RAKING) * \tparam BLOCK_DIM_Y [optional] The thread block length in threads along the Y dimension (default: 1) * \tparam BLOCK_DIM_Z [optional] The thread block length in threads along the Z dimension (default: 1) * \tparam PTX_ARCH [optional] \ptxversion * * \par Overview * - Given a list of input elements and a binary reduction operator, a [prefix scan](http://en.wikipedia.org/wiki/Prefix_sum) * produces an output list where each element is computed to be the reduction * of the elements occurring earlier in the input list. Prefix sum * connotes a prefix scan with the addition operator. The term \em inclusive indicates * that the ith output reduction incorporates the ith input. * The term \em exclusive indicates the ith input is not incorporated into * the ith output reduction. * - \rowmajor * - BlockScan can be optionally specialized by algorithm to accommodate different workload profiles: * -# cub::BLOCK_SCAN_RAKING. An efficient (high throughput) "raking reduce-then-scan" prefix scan algorithm. [More...](\ref cub::BlockScanAlgorithm) * -# cub::BLOCK_SCAN_RAKING_MEMOIZE. Similar to cub::BLOCK_SCAN_RAKING, but having higher throughput at the expense of additional register pressure for intermediate storage. [More...](\ref cub::BlockScanAlgorithm) * -# cub::BLOCK_SCAN_WARP_SCANS. A quick (low latency) "tiled warpscans" prefix scan algorithm. [More...](\ref cub::BlockScanAlgorithm) * * \par Performance Considerations * - \granularity * - Uses special instructions when applicable (e.g., warp \p SHFL) * - Uses synchronization-free communication between warp lanes when applicable * - Invokes a minimal number of minimal block-wide synchronization barriers (only * one or two depending on algorithm selection) * - Incurs zero bank conflicts for most types * - Computation is slightly more efficient (i.e., having lower instruction overhead) for: * - Prefix sum variants (vs. generic scan) * - \blocksize * - See cub::BlockScanAlgorithm for performance details regarding algorithmic alternatives * * \par A Simple Example * \blockcollective{BlockScan} * \par * The code snippet below illustrates an exclusive prefix sum of 512 integer items that * are partitioned in a [blocked arrangement](index.html#sec5sec3) across 128 threads * where each thread owns 4 consecutive items. * \par * \code * #include // or equivalently * * __global__ void ExampleKernel(...) * { * // Specialize BlockScan for a 1D block of 128 threads on type int * typedef cub::BlockScan BlockScan; * * // Allocate shared memory for BlockScan * __shared__ typename BlockScan::TempStorage temp_storage; * * // Obtain a segment of consecutive items that are blocked across threads * int thread_data[4]; * ... * * // Collectively compute the block-wide exclusive prefix sum * BlockScan(temp_storage).ExclusiveSum(thread_data, thread_data); * * \endcode * \par * Suppose the set of input \p thread_data across the block of threads is * {[1,1,1,1], [1,1,1,1], ..., [1,1,1,1]}. * The corresponding output \p thread_data in those threads will be * {[0,1,2,3], [4,5,6,7], ..., [508,509,510,511]}. * */ template < typename T, int BLOCK_DIM_X, BlockScanAlgorithm ALGORITHM = BLOCK_SCAN_RAKING, int BLOCK_DIM_Y = 1, int BLOCK_DIM_Z = 1, int PTX_ARCH = CUB_PTX_ARCH> class BlockScan { private: /****************************************************************************** * Constants and type definitions ******************************************************************************/ /// Constants enum { /// The thread block size in threads BLOCK_THREADS = BLOCK_DIM_X * BLOCK_DIM_Y * BLOCK_DIM_Z, }; /** * Ensure the template parameterization meets the requirements of the * specified algorithm. Currently, the BLOCK_SCAN_WARP_SCANS policy * cannot be used with thread block sizes not a multiple of the * architectural warp size. */ static const BlockScanAlgorithm SAFE_ALGORITHM = ((ALGORITHM == BLOCK_SCAN_WARP_SCANS) && (BLOCK_THREADS % CUB_WARP_THREADS(PTX_ARCH) != 0)) ? BLOCK_SCAN_RAKING : ALGORITHM; typedef BlockScanWarpScans WarpScans; typedef BlockScanRaking Raking; /// Define the delegate type for the desired algorithm typedef typename If<(SAFE_ALGORITHM == BLOCK_SCAN_WARP_SCANS), WarpScans, Raking>::Type InternalBlockScan; /// Shared memory storage layout type for BlockScan typedef typename InternalBlockScan::TempStorage _TempStorage; /****************************************************************************** * Thread fields ******************************************************************************/ /// Shared storage reference _TempStorage &temp_storage; /// Linear thread-id unsigned int linear_tid; /****************************************************************************** * Utility methods ******************************************************************************/ /// Internal storage allocator __device__ __forceinline__ _TempStorage& PrivateStorage() { __shared__ _TempStorage private_storage; return private_storage; } /****************************************************************************** * Public types ******************************************************************************/ public: /// \smemstorage{BlockScan} struct TempStorage : Uninitialized<_TempStorage> {}; /******************************************************************//** * \name Collective constructors *********************************************************************/ //@{ /** * \brief Collective constructor using a private static allocation of shared memory as temporary storage. */ __device__ __forceinline__ BlockScan() : temp_storage(PrivateStorage()), linear_tid(RowMajorTid(BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z)) {} /** * \brief Collective constructor using the specified memory allocation as temporary storage. */ __device__ __forceinline__ BlockScan( TempStorage &temp_storage) ///< [in] Reference to memory allocation having layout type TempStorage : temp_storage(temp_storage.Alias()), linear_tid(RowMajorTid(BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z)) {} //@} end member group /******************************************************************//** * \name Exclusive prefix sum operations *********************************************************************/ //@{ /** * \brief Computes an exclusive block-wide prefix scan using addition (+) as the scan operator. Each thread contributes one input element. The value of 0 is applied as the initial value, and is assigned to \p output in thread0. * * \par * - \identityzero * - \rowmajor * - \smemreuse * * \par Snippet * The code snippet below illustrates an exclusive prefix sum of 128 integer items that * are partitioned across 128 threads. * \par * \code * #include // or equivalently * * __global__ void ExampleKernel(...) * { * // Specialize BlockScan for a 1D block of 128 threads on type int * typedef cub::BlockScan BlockScan; * * // Allocate shared memory for BlockScan * __shared__ typename BlockScan::TempStorage temp_storage; * * // Obtain input item for each thread * int thread_data; * ... * * // Collectively compute the block-wide exclusive prefix sum * BlockScan(temp_storage).ExclusiveSum(thread_data, thread_data); * * \endcode * \par * Suppose the set of input \p thread_data across the block of threads is 1, 1, ..., 1. The * corresponding output \p thread_data in those threads will be 0, 1, ..., 127. * */ __device__ __forceinline__ void ExclusiveSum( T input, ///< [in] Calling thread's input item T &output) ///< [out] Calling thread's output item (may be aliased to \p input) { T initial_value = 0; ExclusiveScan(input, output, initial_value, cub::Sum()); } /** * \brief Computes an exclusive block-wide prefix scan using addition (+) as the scan operator. Each thread contributes one input element. The value of 0 is applied as the initial value, and is assigned to \p output in thread0. Also provides every thread with the block-wide \p block_aggregate of all inputs. * * \par * - \identityzero * - \rowmajor * - \smemreuse * * \par Snippet * The code snippet below illustrates an exclusive prefix sum of 128 integer items that * are partitioned across 128 threads. * \par * \code * #include // or equivalently * * __global__ void ExampleKernel(...) * { * // Specialize BlockScan for a 1D block of 128 threads on type int * typedef cub::BlockScan BlockScan; * * // Allocate shared memory for BlockScan * __shared__ typename BlockScan::TempStorage temp_storage; * * // Obtain input item for each thread * int thread_data; * ... * * // Collectively compute the block-wide exclusive prefix sum * int block_aggregate; * BlockScan(temp_storage).ExclusiveSum(thread_data, thread_data, block_aggregate); * * \endcode * \par * Suppose the set of input \p thread_data across the block of threads is 1, 1, ..., 1. The * corresponding output \p thread_data in those threads will be 0, 1, ..., 127. * Furthermore the value \p 128 will be stored in \p block_aggregate for all threads. * */ __device__ __forceinline__ void ExclusiveSum( T input, ///< [in] Calling thread's input item T &output, ///< [out] Calling thread's output item (may be aliased to \p input) T &block_aggregate) ///< [out] block-wide aggregate reduction of input items { T initial_value = 0; ExclusiveScan(input, output, initial_value, cub::Sum(), block_aggregate); } /** * \brief Computes an exclusive block-wide prefix scan using addition (+) as the scan operator. Each thread contributes one input element. Instead of using 0 as the block-wide prefix, the call-back functor \p block_prefix_callback_op is invoked by the first warp in the block, and the value returned by lane0 in that warp is used as the "seed" value that logically prefixes the thread block's scan inputs. Also provides every thread with the block-wide \p block_aggregate of all inputs. * * \par * - \identityzero * - The \p block_prefix_callback_op functor must implement a member function T operator()(T block_aggregate). * The functor's input parameter \p block_aggregate is the same value also returned by the scan operation. * The functor will be invoked by the first warp of threads in the block, however only the return value from * lane0 is applied as the block-wide prefix. Can be stateful. * - \rowmajor * - \smemreuse * * \par Snippet * The code snippet below illustrates a single thread block that progressively * computes an exclusive prefix sum over multiple "tiles" of input using a * prefix functor to maintain a running total between block-wide scans. Each tile consists * of 128 integer items that are partitioned across 128 threads. * \par * \code * #include // or equivalently * * // A stateful callback functor that maintains a running prefix to be applied * // during consecutive scan operations. * struct BlockPrefixCallbackOp * { * // Running prefix * int running_total; * * // Constructor * __device__ BlockPrefixCallbackOp(int running_total) : running_total(running_total) {} * * // Callback operator to be entered by the first warp of threads in the block. * // Thread-0 is responsible for returning a value for seeding the block-wide scan. * __device__ int operator()(int block_aggregate) * { * int old_prefix = running_total; * running_total += block_aggregate; * return old_prefix; * } * }; * * __global__ void ExampleKernel(int *d_data, int num_items, ...) * { * // Specialize BlockScan for a 1D block of 128 threads * typedef cub::BlockScan BlockScan; * * // Allocate shared memory for BlockScan * __shared__ typename BlockScan::TempStorage temp_storage; * * // Initialize running total * BlockPrefixCallbackOp prefix_op(0); * * // Have the block iterate over segments of items * for (int block_offset = 0; block_offset < num_items; block_offset += 128) * { * // Load a segment of consecutive items that are blocked across threads * int thread_data = d_data[block_offset]; * * // Collectively compute the block-wide exclusive prefix sum * BlockScan(temp_storage).ExclusiveSum( * thread_data, thread_data, prefix_op); * CTA_SYNC(); * * // Store scanned items to output segment * d_data[block_offset] = thread_data; * } * \endcode * \par * Suppose the input \p d_data is 1, 1, 1, 1, 1, 1, 1, 1, .... * The corresponding output for the first segment will be 0, 1, ..., 127. * The output for the second segment will be 128, 129, ..., 255. * * \tparam BlockPrefixCallbackOp [inferred] Call-back functor type having member T operator()(T block_aggregate) */ template __device__ __forceinline__ void ExclusiveSum( T input, ///< [in] Calling thread's input item T &output, ///< [out] Calling thread's output item (may be aliased to \p input) BlockPrefixCallbackOp &block_prefix_callback_op) ///< [in-out] [warp0 only] Call-back functor for specifying a block-wide prefix to be applied to the logical input sequence. { ExclusiveScan(input, output, cub::Sum(), block_prefix_callback_op); } //@} end member group /******************************************************************//** * \name Exclusive prefix sum operations (multiple data per thread) *********************************************************************/ //@{ /** * \brief Computes an exclusive block-wide prefix scan using addition (+) as the scan operator. Each thread contributes an array of consecutive input elements. The value of 0 is applied as the initial value, and is assigned to \p output[0] in thread0. * * \par * - \identityzero * - \blocked * - \granularity * - \smemreuse * * \par Snippet * The code snippet below illustrates an exclusive prefix sum of 512 integer items that * are partitioned in a [blocked arrangement](index.html#sec5sec3) across 128 threads * where each thread owns 4 consecutive items. * \par * \code * #include // or equivalently * * __global__ void ExampleKernel(...) * { * // Specialize BlockScan for a 1D block of 128 threads on type int * typedef cub::BlockScan BlockScan; * * // Allocate shared memory for BlockScan * __shared__ typename BlockScan::TempStorage temp_storage; * * // Obtain a segment of consecutive items that are blocked across threads * int thread_data[4]; * ... * * // Collectively compute the block-wide exclusive prefix sum * BlockScan(temp_storage).ExclusiveSum(thread_data, thread_data); * * \endcode * \par * Suppose the set of input \p thread_data across the block of threads is { [1,1,1,1], [1,1,1,1], ..., [1,1,1,1] }. The * corresponding output \p thread_data in those threads will be { [0,1,2,3], [4,5,6,7], ..., [508,509,510,511] }. * * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. */ template __device__ __forceinline__ void ExclusiveSum( T (&input)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items T (&output)[ITEMS_PER_THREAD]) ///< [out] Calling thread's output items (may be aliased to \p input) { T initial_value = 0; ExclusiveScan(input, output, initial_value, cub::Sum()); } /** * \brief Computes an exclusive block-wide prefix scan using addition (+) as the scan operator. Each thread contributes an array of consecutive input elements. The value of 0 is applied as the initial value, and is assigned to \p output[0] in thread0. Also provides every thread with the block-wide \p block_aggregate of all inputs. * * \par * - \identityzero * - \blocked * - \granularity * - \smemreuse * * \par Snippet * The code snippet below illustrates an exclusive prefix sum of 512 integer items that * are partitioned in a [blocked arrangement](index.html#sec5sec3) across 128 threads * where each thread owns 4 consecutive items. * \par * \code * #include // or equivalently * * __global__ void ExampleKernel(...) * { * // Specialize BlockScan for a 1D block of 128 threads on type int * typedef cub::BlockScan BlockScan; * * // Allocate shared memory for BlockScan * __shared__ typename BlockScan::TempStorage temp_storage; * * // Obtain a segment of consecutive items that are blocked across threads * int thread_data[4]; * ... * * // Collectively compute the block-wide exclusive prefix sum * int block_aggregate; * BlockScan(temp_storage).ExclusiveSum(thread_data, thread_data, block_aggregate); * * \endcode * \par * Suppose the set of input \p thread_data across the block of threads is { [1,1,1,1], [1,1,1,1], ..., [1,1,1,1] }. The * corresponding output \p thread_data in those threads will be { [0,1,2,3], [4,5,6,7], ..., [508,509,510,511] }. * Furthermore the value \p 512 will be stored in \p block_aggregate for all threads. * * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. */ template __device__ __forceinline__ void ExclusiveSum( T (&input)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items T (&output)[ITEMS_PER_THREAD], ///< [out] Calling thread's output items (may be aliased to \p input) T &block_aggregate) ///< [out] block-wide aggregate reduction of input items { // Reduce consecutive thread items in registers T initial_value = 0; ExclusiveScan(input, output, initial_value, cub::Sum(), block_aggregate); } /** * \brief Computes an exclusive block-wide prefix scan using addition (+) as the scan operator. Each thread contributes an array of consecutive input elements. Instead of using 0 as the block-wide prefix, the call-back functor \p block_prefix_callback_op is invoked by the first warp in the block, and the value returned by lane0 in that warp is used as the "seed" value that logically prefixes the thread block's scan inputs. Also provides every thread with the block-wide \p block_aggregate of all inputs. * * \par * - \identityzero * - The \p block_prefix_callback_op functor must implement a member function T operator()(T block_aggregate). * The functor's input parameter \p block_aggregate is the same value also returned by the scan operation. * The functor will be invoked by the first warp of threads in the block, however only the return value from * lane0 is applied as the block-wide prefix. Can be stateful. * - \blocked * - \granularity * - \smemreuse * * \par Snippet * The code snippet below illustrates a single thread block that progressively * computes an exclusive prefix sum over multiple "tiles" of input using a * prefix functor to maintain a running total between block-wide scans. Each tile consists * of 512 integer items that are partitioned in a [blocked arrangement](index.html#sec5sec3) * across 128 threads where each thread owns 4 consecutive items. * \par * \code * #include // or equivalently * * // A stateful callback functor that maintains a running prefix to be applied * // during consecutive scan operations. * struct BlockPrefixCallbackOp * { * // Running prefix * int running_total; * * // Constructor * __device__ BlockPrefixCallbackOp(int running_total) : running_total(running_total) {} * * // Callback operator to be entered by the first warp of threads in the block. * // Thread-0 is responsible for returning a value for seeding the block-wide scan. * __device__ int operator()(int block_aggregate) * { * int old_prefix = running_total; * running_total += block_aggregate; * return old_prefix; * } * }; * * __global__ void ExampleKernel(int *d_data, int num_items, ...) * { * // Specialize BlockLoad, BlockStore, and BlockScan for a 1D block of 128 threads, 4 ints per thread * typedef cub::BlockLoad BlockLoad; * typedef cub::BlockStore BlockStore; * typedef cub::BlockScan BlockScan; * * // Allocate aliased shared memory for BlockLoad, BlockStore, and BlockScan * __shared__ union { * typename BlockLoad::TempStorage load; * typename BlockScan::TempStorage scan; * typename BlockStore::TempStorage store; * } temp_storage; * * // Initialize running total * BlockPrefixCallbackOp prefix_op(0); * * // Have the block iterate over segments of items * for (int block_offset = 0; block_offset < num_items; block_offset += 128 * 4) * { * // Load a segment of consecutive items that are blocked across threads * int thread_data[4]; * BlockLoad(temp_storage.load).Load(d_data + block_offset, thread_data); * CTA_SYNC(); * * // Collectively compute the block-wide exclusive prefix sum * int block_aggregate; * BlockScan(temp_storage.scan).ExclusiveSum( * thread_data, thread_data, prefix_op); * CTA_SYNC(); * * // Store scanned items to output segment * BlockStore(temp_storage.store).Store(d_data + block_offset, thread_data); * CTA_SYNC(); * } * \endcode * \par * Suppose the input \p d_data is 1, 1, 1, 1, 1, 1, 1, 1, .... * The corresponding output for the first segment will be 0, 1, 2, 3, ..., 510, 511. * The output for the second segment will be 512, 513, 514, 515, ..., 1022, 1023. * * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. * \tparam BlockPrefixCallbackOp [inferred] Call-back functor type having member T operator()(T block_aggregate) */ template < int ITEMS_PER_THREAD, typename BlockPrefixCallbackOp> __device__ __forceinline__ void ExclusiveSum( T (&input)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items T (&output)[ITEMS_PER_THREAD], ///< [out] Calling thread's output items (may be aliased to \p input) BlockPrefixCallbackOp &block_prefix_callback_op) ///< [in-out] [warp0 only] Call-back functor for specifying a block-wide prefix to be applied to the logical input sequence. { ExclusiveScan(input, output, cub::Sum(), block_prefix_callback_op); } //@} end member group // Exclusive prefix sums /******************************************************************//** * \name Exclusive prefix scan operations *********************************************************************/ //@{ /** * \brief Computes an exclusive block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. * * \par * - Supports non-commutative scan operators. * - \rowmajor * - \smemreuse * * \par Snippet * The code snippet below illustrates an exclusive prefix max scan of 128 integer items that * are partitioned across 128 threads. * \par * \code * #include // or equivalently * * __global__ void ExampleKernel(...) * { * // Specialize BlockScan for a 1D block of 128 threads on type int * typedef cub::BlockScan BlockScan; * * // Allocate shared memory for BlockScan * __shared__ typename BlockScan::TempStorage temp_storage; * * // Obtain input item for each thread * int thread_data; * ... * * // Collectively compute the block-wide exclusive prefix max scan * BlockScan(temp_storage).ExclusiveScan(thread_data, thread_data, INT_MIN, cub::Max()); * * \endcode * \par * Suppose the set of input \p thread_data across the block of threads is 0, -1, 2, -3, ..., 126, -127. The * corresponding output \p thread_data in those threads will be INT_MIN, 0, 0, 2, ..., 124, 126. * * \tparam ScanOp [inferred] Binary scan functor type having member T operator()(const T &a, const T &b) */ template __device__ __forceinline__ void ExclusiveScan( T input, ///< [in] Calling thread's input item T &output, ///< [out] Calling thread's output item (may be aliased to \p input) T initial_value, ///< [in] Initial value to seed the exclusive scan (and is assigned to \p output[0] in thread0) ScanOp scan_op) ///< [in] Binary scan functor { InternalBlockScan(temp_storage).ExclusiveScan(input, output, initial_value, scan_op); } /** * \brief Computes an exclusive block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. Also provides every thread with the block-wide \p block_aggregate of all inputs. * * \par * - Supports non-commutative scan operators. * - \rowmajor * - \smemreuse * * \par Snippet * The code snippet below illustrates an exclusive prefix max scan of 128 integer items that * are partitioned across 128 threads. * \par * \code * #include // or equivalently * * __global__ void ExampleKernel(...) * { * // Specialize BlockScan for a 1D block of 128 threads on type int * typedef cub::BlockScan BlockScan; * * // Allocate shared memory for BlockScan * __shared__ typename BlockScan::TempStorage temp_storage; * * // Obtain input item for each thread * int thread_data; * ... * * // Collectively compute the block-wide exclusive prefix max scan * int block_aggregate; * BlockScan(temp_storage).ExclusiveScan(thread_data, thread_data, INT_MIN, cub::Max(), block_aggregate); * * \endcode * \par * Suppose the set of input \p thread_data across the block of threads is 0, -1, 2, -3, ..., 126, -127. The * corresponding output \p thread_data in those threads will be INT_MIN, 0, 0, 2, ..., 124, 126. * Furthermore the value \p 126 will be stored in \p block_aggregate for all threads. * * \tparam ScanOp [inferred] Binary scan functor type having member T operator()(const T &a, const T &b) */ template __device__ __forceinline__ void ExclusiveScan( T input, ///< [in] Calling thread's input items T &output, ///< [out] Calling thread's output items (may be aliased to \p input) T initial_value, ///< [in] Initial value to seed the exclusive scan (and is assigned to \p output[0] in thread0) ScanOp scan_op, ///< [in] Binary scan functor T &block_aggregate) ///< [out] block-wide aggregate reduction of input items { InternalBlockScan(temp_storage).ExclusiveScan(input, output, initial_value, scan_op, block_aggregate); } /** * \brief Computes an exclusive block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. the call-back functor \p block_prefix_callback_op is invoked by the first warp in the block, and the value returned by lane0 in that warp is used as the "seed" value that logically prefixes the thread block's scan inputs. Also provides every thread with the block-wide \p block_aggregate of all inputs. * * \par * - The \p block_prefix_callback_op functor must implement a member function T operator()(T block_aggregate). * The functor's input parameter \p block_aggregate is the same value also returned by the scan operation. * The functor will be invoked by the first warp of threads in the block, however only the return value from * lane0 is applied as the block-wide prefix. Can be stateful. * - Supports non-commutative scan operators. * - \rowmajor * - \smemreuse * * \par Snippet * The code snippet below illustrates a single thread block that progressively * computes an exclusive prefix max scan over multiple "tiles" of input using a * prefix functor to maintain a running total between block-wide scans. Each tile consists * of 128 integer items that are partitioned across 128 threads. * \par * \code * #include // or equivalently * * // A stateful callback functor that maintains a running prefix to be applied * // during consecutive scan operations. * struct BlockPrefixCallbackOp * { * // Running prefix * int running_total; * * // Constructor * __device__ BlockPrefixCallbackOp(int running_total) : running_total(running_total) {} * * // Callback operator to be entered by the first warp of threads in the block. * // Thread-0 is responsible for returning a value for seeding the block-wide scan. * __device__ int operator()(int block_aggregate) * { * int old_prefix = running_total; * running_total = (block_aggregate > old_prefix) ? block_aggregate : old_prefix; * return old_prefix; * } * }; * * __global__ void ExampleKernel(int *d_data, int num_items, ...) * { * // Specialize BlockScan for a 1D block of 128 threads * typedef cub::BlockScan BlockScan; * * // Allocate shared memory for BlockScan * __shared__ typename BlockScan::TempStorage temp_storage; * * // Initialize running total * BlockPrefixCallbackOp prefix_op(INT_MIN); * * // Have the block iterate over segments of items * for (int block_offset = 0; block_offset < num_items; block_offset += 128) * { * // Load a segment of consecutive items that are blocked across threads * int thread_data = d_data[block_offset]; * * // Collectively compute the block-wide exclusive prefix max scan * BlockScan(temp_storage).ExclusiveScan( * thread_data, thread_data, INT_MIN, cub::Max(), prefix_op); * CTA_SYNC(); * * // Store scanned items to output segment * d_data[block_offset] = thread_data; * } * \endcode * \par * Suppose the input \p d_data is 0, -1, 2, -3, 4, -5, .... * The corresponding output for the first segment will be INT_MIN, 0, 0, 2, ..., 124, 126. * The output for the second segment will be 126, 128, 128, 130, ..., 252, 254. * * \tparam ScanOp [inferred] Binary scan functor type having member T operator()(const T &a, const T &b) * \tparam BlockPrefixCallbackOp [inferred] Call-back functor type having member T operator()(T block_aggregate) */ template < typename ScanOp, typename BlockPrefixCallbackOp> __device__ __forceinline__ void ExclusiveScan( T input, ///< [in] Calling thread's input item T &output, ///< [out] Calling thread's output item (may be aliased to \p input) ScanOp scan_op, ///< [in] Binary scan functor BlockPrefixCallbackOp &block_prefix_callback_op) ///< [in-out] [warp0 only] Call-back functor for specifying a block-wide prefix to be applied to the logical input sequence. { InternalBlockScan(temp_storage).ExclusiveScan(input, output, scan_op, block_prefix_callback_op); } //@} end member group // Inclusive prefix sums /******************************************************************//** * \name Exclusive prefix scan operations (multiple data per thread) *********************************************************************/ //@{ /** * \brief Computes an exclusive block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes an array of consecutive input elements. * * \par * - Supports non-commutative scan operators. * - \blocked * - \granularity * - \smemreuse * * \par Snippet * The code snippet below illustrates an exclusive prefix max scan of 512 integer items that * are partitioned in a [blocked arrangement](index.html#sec5sec3) across 128 threads * where each thread owns 4 consecutive items. * \par * \code * #include // or equivalently * * __global__ void ExampleKernel(...) * { * // Specialize BlockScan for a 1D block of 128 threads on type int * typedef cub::BlockScan BlockScan; * * // Allocate shared memory for BlockScan * __shared__ typename BlockScan::TempStorage temp_storage; * * // Obtain a segment of consecutive items that are blocked across threads * int thread_data[4]; * ... * * // Collectively compute the block-wide exclusive prefix max scan * BlockScan(temp_storage).ExclusiveScan(thread_data, thread_data, INT_MIN, cub::Max()); * * \endcode * \par * Suppose the set of input \p thread_data across the block of threads is * { [0,-1,2,-3], [4,-5,6,-7], ..., [508,-509,510,-511] }. * The corresponding output \p thread_data in those threads will be * { [INT_MIN,0,0,2], [2,4,4,6], ..., [506,508,508,510] }. * * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. * \tparam ScanOp [inferred] Binary scan functor type having member T operator()(const T &a, const T &b) */ template < int ITEMS_PER_THREAD, typename ScanOp> __device__ __forceinline__ void ExclusiveScan( T (&input)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items T (&output)[ITEMS_PER_THREAD], ///< [out] Calling thread's output items (may be aliased to \p input) T initial_value, ///< [in] Initial value to seed the exclusive scan (and is assigned to \p output[0] in thread0) ScanOp scan_op) ///< [in] Binary scan functor { // Reduce consecutive thread items in registers T thread_prefix = internal::ThreadReduce(input, scan_op); // Exclusive thread block-scan ExclusiveScan(thread_prefix, thread_prefix, initial_value, scan_op); // Exclusive scan in registers with prefix as seed internal::ThreadScanExclusive(input, output, scan_op, thread_prefix); } /** * \brief Computes an exclusive block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes an array of consecutive input elements. Also provides every thread with the block-wide \p block_aggregate of all inputs. * * \par * - Supports non-commutative scan operators. * - \blocked * - \granularity * - \smemreuse * * \par Snippet * The code snippet below illustrates an exclusive prefix max scan of 512 integer items that * are partitioned in a [blocked arrangement](index.html#sec5sec3) across 128 threads * where each thread owns 4 consecutive items. * \par * \code * #include // or equivalently * * __global__ void ExampleKernel(...) * { * // Specialize BlockScan for a 1D block of 128 threads on type int * typedef cub::BlockScan BlockScan; * * // Allocate shared memory for BlockScan * __shared__ typename BlockScan::TempStorage temp_storage; * * // Obtain a segment of consecutive items that are blocked across threads * int thread_data[4]; * ... * * // Collectively compute the block-wide exclusive prefix max scan * int block_aggregate; * BlockScan(temp_storage).ExclusiveScan(thread_data, thread_data, INT_MIN, cub::Max(), block_aggregate); * * \endcode * \par * Suppose the set of input \p thread_data across the block of threads is { [0,-1,2,-3], [4,-5,6,-7], ..., [508,-509,510,-511] }. The * corresponding output \p thread_data in those threads will be { [INT_MIN,0,0,2], [2,4,4,6], ..., [506,508,508,510] }. * Furthermore the value \p 510 will be stored in \p block_aggregate for all threads. * * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. * \tparam ScanOp [inferred] Binary scan functor type having member T operator()(const T &a, const T &b) */ template < int ITEMS_PER_THREAD, typename ScanOp> __device__ __forceinline__ void ExclusiveScan( T (&input)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items T (&output)[ITEMS_PER_THREAD], ///< [out] Calling thread's output items (may be aliased to \p input) T initial_value, ///< [in] Initial value to seed the exclusive scan (and is assigned to \p output[0] in thread0) ScanOp scan_op, ///< [in] Binary scan functor T &block_aggregate) ///< [out] block-wide aggregate reduction of input items { // Reduce consecutive thread items in registers T thread_prefix = internal::ThreadReduce(input, scan_op); // Exclusive thread block-scan ExclusiveScan(thread_prefix, thread_prefix, initial_value, scan_op, block_aggregate); // Exclusive scan in registers with prefix as seed internal::ThreadScanExclusive(input, output, scan_op, thread_prefix); } /** * \brief Computes an exclusive block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes an array of consecutive input elements. the call-back functor \p block_prefix_callback_op is invoked by the first warp in the block, and the value returned by lane0 in that warp is used as the "seed" value that logically prefixes the thread block's scan inputs. Also provides every thread with the block-wide \p block_aggregate of all inputs. * * \par * - The \p block_prefix_callback_op functor must implement a member function T operator()(T block_aggregate). * The functor's input parameter \p block_aggregate is the same value also returned by the scan operation. * The functor will be invoked by the first warp of threads in the block, however only the return value from * lane0 is applied as the block-wide prefix. Can be stateful. * - Supports non-commutative scan operators. * - \blocked * - \granularity * - \smemreuse * * \par Snippet * The code snippet below illustrates a single thread block that progressively * computes an exclusive prefix max scan over multiple "tiles" of input using a * prefix functor to maintain a running total between block-wide scans. Each tile consists * of 128 integer items that are partitioned across 128 threads. * \par * \code * #include // or equivalently * * // A stateful callback functor that maintains a running prefix to be applied * // during consecutive scan operations. * struct BlockPrefixCallbackOp * { * // Running prefix * int running_total; * * // Constructor * __device__ BlockPrefixCallbackOp(int running_total) : running_total(running_total) {} * * // Callback operator to be entered by the first warp of threads in the block. * // Thread-0 is responsible for returning a value for seeding the block-wide scan. * __device__ int operator()(int block_aggregate) * { * int old_prefix = running_total; * running_total = (block_aggregate > old_prefix) ? block_aggregate : old_prefix; * return old_prefix; * } * }; * * __global__ void ExampleKernel(int *d_data, int num_items, ...) * { * // Specialize BlockLoad, BlockStore, and BlockScan for a 1D block of 128 threads, 4 ints per thread * typedef cub::BlockLoad BlockLoad; * typedef cub::BlockStore BlockStore; * typedef cub::BlockScan BlockScan; * * // Allocate aliased shared memory for BlockLoad, BlockStore, and BlockScan * __shared__ union { * typename BlockLoad::TempStorage load; * typename BlockScan::TempStorage scan; * typename BlockStore::TempStorage store; * } temp_storage; * * // Initialize running total * BlockPrefixCallbackOp prefix_op(0); * * // Have the block iterate over segments of items * for (int block_offset = 0; block_offset < num_items; block_offset += 128 * 4) * { * // Load a segment of consecutive items that are blocked across threads * int thread_data[4]; * BlockLoad(temp_storage.load).Load(d_data + block_offset, thread_data); * CTA_SYNC(); * * // Collectively compute the block-wide exclusive prefix max scan * BlockScan(temp_storage.scan).ExclusiveScan( * thread_data, thread_data, INT_MIN, cub::Max(), prefix_op); * CTA_SYNC(); * * // Store scanned items to output segment * BlockStore(temp_storage.store).Store(d_data + block_offset, thread_data); * CTA_SYNC(); * } * \endcode * \par * Suppose the input \p d_data is 0, -1, 2, -3, 4, -5, .... * The corresponding output for the first segment will be INT_MIN, 0, 0, 2, 2, 4, ..., 508, 510. * The output for the second segment will be 510, 512, 512, 514, 514, 516, ..., 1020, 1022. * * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. * \tparam ScanOp [inferred] Binary scan functor type having member T operator()(const T &a, const T &b) * \tparam BlockPrefixCallbackOp [inferred] Call-back functor type having member T operator()(T block_aggregate) */ template < int ITEMS_PER_THREAD, typename ScanOp, typename BlockPrefixCallbackOp> __device__ __forceinline__ void ExclusiveScan( T (&input)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items T (&output)[ITEMS_PER_THREAD], ///< [out] Calling thread's output items (may be aliased to \p input) ScanOp scan_op, ///< [in] Binary scan functor BlockPrefixCallbackOp &block_prefix_callback_op) ///< [in-out] [warp0 only] Call-back functor for specifying a block-wide prefix to be applied to the logical input sequence. { // Reduce consecutive thread items in registers T thread_prefix = internal::ThreadReduce(input, scan_op); // Exclusive thread block-scan ExclusiveScan(thread_prefix, thread_prefix, scan_op, block_prefix_callback_op); // Exclusive scan in registers with prefix as seed internal::ThreadScanExclusive(input, output, scan_op, thread_prefix); } //@} end member group #ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document no-initial-value scans /******************************************************************//** * \name Exclusive prefix scan operations (no initial value, single datum per thread) *********************************************************************/ //@{ /** * \brief Computes an exclusive block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. With no initial value, the output computed for thread0 is undefined. * * \par * - Supports non-commutative scan operators. * - \rowmajor * - \smemreuse * * \tparam ScanOp [inferred] Binary scan functor type having member T operator()(const T &a, const T &b) */ template __device__ __forceinline__ void ExclusiveScan( T input, ///< [in] Calling thread's input item T &output, ///< [out] Calling thread's output item (may be aliased to \p input) ScanOp scan_op) ///< [in] Binary scan functor { InternalBlockScan(temp_storage).ExclusiveScan(input, output, scan_op); } /** * \brief Computes an exclusive block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. Also provides every thread with the block-wide \p block_aggregate of all inputs. With no initial value, the output computed for thread0 is undefined. * * \par * - Supports non-commutative scan operators. * - \rowmajor * - \smemreuse * * \tparam ScanOp [inferred] Binary scan functor type having member T operator()(const T &a, const T &b) */ template __device__ __forceinline__ void ExclusiveScan( T input, ///< [in] Calling thread's input item T &output, ///< [out] Calling thread's output item (may be aliased to \p input) ScanOp scan_op, ///< [in] Binary scan functor T &block_aggregate) ///< [out] block-wide aggregate reduction of input items { InternalBlockScan(temp_storage).ExclusiveScan(input, output, scan_op, block_aggregate); } //@} end member group /******************************************************************//** * \name Exclusive prefix scan operations (no initial value, multiple data per thread) *********************************************************************/ //@{ /** * \brief Computes an exclusive block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes an array of consecutive input elements. With no initial value, the output computed for thread0 is undefined. * * \par * - Supports non-commutative scan operators. * - \blocked * - \granularity * - \smemreuse * * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. * \tparam ScanOp [inferred] Binary scan functor type having member T operator()(const T &a, const T &b) */ template < int ITEMS_PER_THREAD, typename ScanOp> __device__ __forceinline__ void ExclusiveScan( T (&input)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items T (&output)[ITEMS_PER_THREAD], ///< [out] Calling thread's output items (may be aliased to \p input) ScanOp scan_op) ///< [in] Binary scan functor { // Reduce consecutive thread items in registers T thread_partial = internal::ThreadReduce(input, scan_op); // Exclusive thread block-scan ExclusiveScan(thread_partial, thread_partial, scan_op); // Exclusive scan in registers with prefix internal::ThreadScanExclusive(input, output, scan_op, thread_partial, (linear_tid != 0)); } /** * \brief Computes an exclusive block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes an array of consecutive input elements. Also provides every thread with the block-wide \p block_aggregate of all inputs. With no initial value, the output computed for thread0 is undefined. * * \par * - Supports non-commutative scan operators. * - \blocked * - \granularity * - \smemreuse * * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. * \tparam ScanOp [inferred] Binary scan functor type having member T operator()(const T &a, const T &b) */ template < int ITEMS_PER_THREAD, typename ScanOp> __device__ __forceinline__ void ExclusiveScan( T (&input)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items T (&output)[ITEMS_PER_THREAD], ///< [out] Calling thread's output items (may be aliased to \p input) ScanOp scan_op, ///< [in] Binary scan functor T &block_aggregate) ///< [out] block-wide aggregate reduction of input items { // Reduce consecutive thread items in registers T thread_partial = internal::ThreadReduce(input, scan_op); // Exclusive thread block-scan ExclusiveScan(thread_partial, thread_partial, scan_op, block_aggregate); // Exclusive scan in registers with prefix internal::ThreadScanExclusive(input, output, scan_op, thread_partial, (linear_tid != 0)); } //@} end member group #endif // DOXYGEN_SHOULD_SKIP_THIS // Do not document no-initial-value scans /******************************************************************//** * \name Inclusive prefix sum operations *********************************************************************/ //@{ /** * \brief Computes an inclusive block-wide prefix scan using addition (+) as the scan operator. Each thread contributes one input element. * * \par * - \rowmajor * - \smemreuse * * \par Snippet * The code snippet below illustrates an inclusive prefix sum of 128 integer items that * are partitioned across 128 threads. * \par * \code * #include // or equivalently * * __global__ void ExampleKernel(...) * { * // Specialize BlockScan for a 1D block of 128 threads on type int * typedef cub::BlockScan BlockScan; * * // Allocate shared memory for BlockScan * __shared__ typename BlockScan::TempStorage temp_storage; * * // Obtain input item for each thread * int thread_data; * ... * * // Collectively compute the block-wide inclusive prefix sum * BlockScan(temp_storage).InclusiveSum(thread_data, thread_data); * * \endcode * \par * Suppose the set of input \p thread_data across the block of threads is 1, 1, ..., 1. The * corresponding output \p thread_data in those threads will be 1, 2, ..., 128. * */ __device__ __forceinline__ void InclusiveSum( T input, ///< [in] Calling thread's input item T &output) ///< [out] Calling thread's output item (may be aliased to \p input) { InclusiveScan(input, output, cub::Sum()); } /** * \brief Computes an inclusive block-wide prefix scan using addition (+) as the scan operator. Each thread contributes one input element. Also provides every thread with the block-wide \p block_aggregate of all inputs. * * \par * - \rowmajor * - \smemreuse * * \par Snippet * The code snippet below illustrates an inclusive prefix sum of 128 integer items that * are partitioned across 128 threads. * \par * \code * #include // or equivalently * * __global__ void ExampleKernel(...) * { * // Specialize BlockScan for a 1D block of 128 threads on type int * typedef cub::BlockScan BlockScan; * * // Allocate shared memory for BlockScan * __shared__ typename BlockScan::TempStorage temp_storage; * * // Obtain input item for each thread * int thread_data; * ... * * // Collectively compute the block-wide inclusive prefix sum * int block_aggregate; * BlockScan(temp_storage).InclusiveSum(thread_data, thread_data, block_aggregate); * * \endcode * \par * Suppose the set of input \p thread_data across the block of threads is 1, 1, ..., 1. The * corresponding output \p thread_data in those threads will be 1, 2, ..., 128. * Furthermore the value \p 128 will be stored in \p block_aggregate for all threads. * */ __device__ __forceinline__ void InclusiveSum( T input, ///< [in] Calling thread's input item T &output, ///< [out] Calling thread's output item (may be aliased to \p input) T &block_aggregate) ///< [out] block-wide aggregate reduction of input items { InclusiveScan(input, output, cub::Sum(), block_aggregate); } /** * \brief Computes an inclusive block-wide prefix scan using addition (+) as the scan operator. Each thread contributes one input element. Instead of using 0 as the block-wide prefix, the call-back functor \p block_prefix_callback_op is invoked by the first warp in the block, and the value returned by lane0 in that warp is used as the "seed" value that logically prefixes the thread block's scan inputs. Also provides every thread with the block-wide \p block_aggregate of all inputs. * * \par * - The \p block_prefix_callback_op functor must implement a member function T operator()(T block_aggregate). * The functor's input parameter \p block_aggregate is the same value also returned by the scan operation. * The functor will be invoked by the first warp of threads in the block, however only the return value from * lane0 is applied as the block-wide prefix. Can be stateful. * - \rowmajor * - \smemreuse * * \par Snippet * The code snippet below illustrates a single thread block that progressively * computes an inclusive prefix sum over multiple "tiles" of input using a * prefix functor to maintain a running total between block-wide scans. Each tile consists * of 128 integer items that are partitioned across 128 threads. * \par * \code * #include // or equivalently * * // A stateful callback functor that maintains a running prefix to be applied * // during consecutive scan operations. * struct BlockPrefixCallbackOp * { * // Running prefix * int running_total; * * // Constructor * __device__ BlockPrefixCallbackOp(int running_total) : running_total(running_total) {} * * // Callback operator to be entered by the first warp of threads in the block. * // Thread-0 is responsible for returning a value for seeding the block-wide scan. * __device__ int operator()(int block_aggregate) * { * int old_prefix = running_total; * running_total += block_aggregate; * return old_prefix; * } * }; * * __global__ void ExampleKernel(int *d_data, int num_items, ...) * { * // Specialize BlockScan for a 1D block of 128 threads * typedef cub::BlockScan BlockScan; * * // Allocate shared memory for BlockScan * __shared__ typename BlockScan::TempStorage temp_storage; * * // Initialize running total * BlockPrefixCallbackOp prefix_op(0); * * // Have the block iterate over segments of items * for (int block_offset = 0; block_offset < num_items; block_offset += 128) * { * // Load a segment of consecutive items that are blocked across threads * int thread_data = d_data[block_offset]; * * // Collectively compute the block-wide inclusive prefix sum * BlockScan(temp_storage).InclusiveSum( * thread_data, thread_data, prefix_op); * CTA_SYNC(); * * // Store scanned items to output segment * d_data[block_offset] = thread_data; * } * \endcode * \par * Suppose the input \p d_data is 1, 1, 1, 1, 1, 1, 1, 1, .... * The corresponding output for the first segment will be 1, 2, ..., 128. * The output for the second segment will be 129, 130, ..., 256. * * \tparam BlockPrefixCallbackOp [inferred] Call-back functor type having member T operator()(T block_aggregate) */ template __device__ __forceinline__ void InclusiveSum( T input, ///< [in] Calling thread's input item T &output, ///< [out] Calling thread's output item (may be aliased to \p input) BlockPrefixCallbackOp &block_prefix_callback_op) ///< [in-out] [warp0 only] Call-back functor for specifying a block-wide prefix to be applied to the logical input sequence. { InclusiveScan(input, output, cub::Sum(), block_prefix_callback_op); } //@} end member group /******************************************************************//** * \name Inclusive prefix sum operations (multiple data per thread) *********************************************************************/ //@{ /** * \brief Computes an inclusive block-wide prefix scan using addition (+) as the scan operator. Each thread contributes an array of consecutive input elements. * * \par * - \blocked * - \granularity * - \smemreuse * * \par Snippet * The code snippet below illustrates an inclusive prefix sum of 512 integer items that * are partitioned in a [blocked arrangement](index.html#sec5sec3) across 128 threads * where each thread owns 4 consecutive items. * \par * \code * #include // or equivalently * * __global__ void ExampleKernel(...) * { * // Specialize BlockScan for a 1D block of 128 threads on type int * typedef cub::BlockScan BlockScan; * * // Allocate shared memory for BlockScan * __shared__ typename BlockScan::TempStorage temp_storage; * * // Obtain a segment of consecutive items that are blocked across threads * int thread_data[4]; * ... * * // Collectively compute the block-wide inclusive prefix sum * BlockScan(temp_storage).InclusiveSum(thread_data, thread_data); * * \endcode * \par * Suppose the set of input \p thread_data across the block of threads is { [1,1,1,1], [1,1,1,1], ..., [1,1,1,1] }. The * corresponding output \p thread_data in those threads will be { [1,2,3,4], [5,6,7,8], ..., [509,510,511,512] }. * * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. */ template __device__ __forceinline__ void InclusiveSum( T (&input)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items T (&output)[ITEMS_PER_THREAD]) ///< [out] Calling thread's output items (may be aliased to \p input) { if (ITEMS_PER_THREAD == 1) { InclusiveSum(input[0], output[0]); } else { // Reduce consecutive thread items in registers Sum scan_op; T thread_prefix = internal::ThreadReduce(input, scan_op); // Exclusive thread block-scan ExclusiveSum(thread_prefix, thread_prefix); // Inclusive scan in registers with prefix as seed internal::ThreadScanInclusive(input, output, scan_op, thread_prefix, (linear_tid != 0)); } } /** * \brief Computes an inclusive block-wide prefix scan using addition (+) as the scan operator. Each thread contributes an array of consecutive input elements. Also provides every thread with the block-wide \p block_aggregate of all inputs. * * \par * - \blocked * - \granularity * - \smemreuse * * \par Snippet * The code snippet below illustrates an inclusive prefix sum of 512 integer items that * are partitioned in a [blocked arrangement](index.html#sec5sec3) across 128 threads * where each thread owns 4 consecutive items. * \par * \code * #include // or equivalently * * __global__ void ExampleKernel(...) * { * // Specialize BlockScan for a 1D block of 128 threads on type int * typedef cub::BlockScan BlockScan; * * // Allocate shared memory for BlockScan * __shared__ typename BlockScan::TempStorage temp_storage; * * // Obtain a segment of consecutive items that are blocked across threads * int thread_data[4]; * ... * * // Collectively compute the block-wide inclusive prefix sum * int block_aggregate; * BlockScan(temp_storage).InclusiveSum(thread_data, thread_data, block_aggregate); * * \endcode * \par * Suppose the set of input \p thread_data across the block of threads is * { [1,1,1,1], [1,1,1,1], ..., [1,1,1,1] }. The * corresponding output \p thread_data in those threads will be * { [1,2,3,4], [5,6,7,8], ..., [509,510,511,512] }. * Furthermore the value \p 512 will be stored in \p block_aggregate for all threads. * * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. * \tparam ScanOp [inferred] Binary scan functor type having member T operator()(const T &a, const T &b) */ template __device__ __forceinline__ void InclusiveSum( T (&input)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items T (&output)[ITEMS_PER_THREAD], ///< [out] Calling thread's output items (may be aliased to \p input) T &block_aggregate) ///< [out] block-wide aggregate reduction of input items { if (ITEMS_PER_THREAD == 1) { InclusiveSum(input[0], output[0], block_aggregate); } else { // Reduce consecutive thread items in registers Sum scan_op; T thread_prefix = internal::ThreadReduce(input, scan_op); // Exclusive thread block-scan ExclusiveSum(thread_prefix, thread_prefix, block_aggregate); // Inclusive scan in registers with prefix as seed internal::ThreadScanInclusive(input, output, scan_op, thread_prefix, (linear_tid != 0)); } } /** * \brief Computes an inclusive block-wide prefix scan using addition (+) as the scan operator. Each thread contributes an array of consecutive input elements. Instead of using 0 as the block-wide prefix, the call-back functor \p block_prefix_callback_op is invoked by the first warp in the block, and the value returned by lane0 in that warp is used as the "seed" value that logically prefixes the thread block's scan inputs. Also provides every thread with the block-wide \p block_aggregate of all inputs. * * \par * - The \p block_prefix_callback_op functor must implement a member function T operator()(T block_aggregate). * The functor's input parameter \p block_aggregate is the same value also returned by the scan operation. * The functor will be invoked by the first warp of threads in the block, however only the return value from * lane0 is applied as the block-wide prefix. Can be stateful. * - \blocked * - \granularity * - \smemreuse * * \par Snippet * The code snippet below illustrates a single thread block that progressively * computes an inclusive prefix sum over multiple "tiles" of input using a * prefix functor to maintain a running total between block-wide scans. Each tile consists * of 512 integer items that are partitioned in a [blocked arrangement](index.html#sec5sec3) * across 128 threads where each thread owns 4 consecutive items. * \par * \code * #include // or equivalently * * // A stateful callback functor that maintains a running prefix to be applied * // during consecutive scan operations. * struct BlockPrefixCallbackOp * { * // Running prefix * int running_total; * * // Constructor * __device__ BlockPrefixCallbackOp(int running_total) : running_total(running_total) {} * * // Callback operator to be entered by the first warp of threads in the block. * // Thread-0 is responsible for returning a value for seeding the block-wide scan. * __device__ int operator()(int block_aggregate) * { * int old_prefix = running_total; * running_total += block_aggregate; * return old_prefix; * } * }; * * __global__ void ExampleKernel(int *d_data, int num_items, ...) * { * // Specialize BlockLoad, BlockStore, and BlockScan for a 1D block of 128 threads, 4 ints per thread * typedef cub::BlockLoad BlockLoad; * typedef cub::BlockStore BlockStore; * typedef cub::BlockScan BlockScan; * * // Allocate aliased shared memory for BlockLoad, BlockStore, and BlockScan * __shared__ union { * typename BlockLoad::TempStorage load; * typename BlockScan::TempStorage scan; * typename BlockStore::TempStorage store; * } temp_storage; * * // Initialize running total * BlockPrefixCallbackOp prefix_op(0); * * // Have the block iterate over segments of items * for (int block_offset = 0; block_offset < num_items; block_offset += 128 * 4) * { * // Load a segment of consecutive items that are blocked across threads * int thread_data[4]; * BlockLoad(temp_storage.load).Load(d_data + block_offset, thread_data); * CTA_SYNC(); * * // Collectively compute the block-wide inclusive prefix sum * BlockScan(temp_storage.scan).IncluisveSum( * thread_data, thread_data, prefix_op); * CTA_SYNC(); * * // Store scanned items to output segment * BlockStore(temp_storage.store).Store(d_data + block_offset, thread_data); * CTA_SYNC(); * } * \endcode * \par * Suppose the input \p d_data is 1, 1, 1, 1, 1, 1, 1, 1, .... * The corresponding output for the first segment will be 1, 2, 3, 4, ..., 511, 512. * The output for the second segment will be 513, 514, 515, 516, ..., 1023, 1024. * * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. * \tparam BlockPrefixCallbackOp [inferred] Call-back functor type having member T operator()(T block_aggregate) */ template < int ITEMS_PER_THREAD, typename BlockPrefixCallbackOp> __device__ __forceinline__ void InclusiveSum( T (&input)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items T (&output)[ITEMS_PER_THREAD], ///< [out] Calling thread's output items (may be aliased to \p input) BlockPrefixCallbackOp &block_prefix_callback_op) ///< [in-out] [warp0 only] Call-back functor for specifying a block-wide prefix to be applied to the logical input sequence. { if (ITEMS_PER_THREAD == 1) { InclusiveSum(input[0], output[0], block_prefix_callback_op); } else { // Reduce consecutive thread items in registers Sum scan_op; T thread_prefix = internal::ThreadReduce(input, scan_op); // Exclusive thread block-scan ExclusiveSum(thread_prefix, thread_prefix, block_prefix_callback_op); // Inclusive scan in registers with prefix as seed internal::ThreadScanInclusive(input, output, scan_op, thread_prefix); } } //@} end member group /******************************************************************//** * \name Inclusive prefix scan operations *********************************************************************/ //@{ /** * \brief Computes an inclusive block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. * * \par * - Supports non-commutative scan operators. * - \rowmajor * - \smemreuse * * \par Snippet * The code snippet below illustrates an inclusive prefix max scan of 128 integer items that * are partitioned across 128 threads. * \par * \code * #include // or equivalently * * __global__ void ExampleKernel(...) * { * // Specialize BlockScan for a 1D block of 128 threads on type int * typedef cub::BlockScan BlockScan; * * // Allocate shared memory for BlockScan * __shared__ typename BlockScan::TempStorage temp_storage; * * // Obtain input item for each thread * int thread_data; * ... * * // Collectively compute the block-wide inclusive prefix max scan * BlockScan(temp_storage).InclusiveScan(thread_data, thread_data, cub::Max()); * * \endcode * \par * Suppose the set of input \p thread_data across the block of threads is 0, -1, 2, -3, ..., 126, -127. The * corresponding output \p thread_data in those threads will be 0, 0, 2, 2, ..., 126, 126. * * \tparam ScanOp [inferred] Binary scan functor type having member T operator()(const T &a, const T &b) */ template __device__ __forceinline__ void InclusiveScan( T input, ///< [in] Calling thread's input item T &output, ///< [out] Calling thread's output item (may be aliased to \p input) ScanOp scan_op) ///< [in] Binary scan functor { InternalBlockScan(temp_storage).InclusiveScan(input, output, scan_op); } /** * \brief Computes an inclusive block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. Also provides every thread with the block-wide \p block_aggregate of all inputs. * * \par * - Supports non-commutative scan operators. * - \rowmajor * - \smemreuse * * \par Snippet * The code snippet below illustrates an inclusive prefix max scan of 128 integer items that * are partitioned across 128 threads. * \par * \code * #include // or equivalently * * __global__ void ExampleKernel(...) * { * // Specialize BlockScan for a 1D block of 128 threads on type int * typedef cub::BlockScan BlockScan; * * // Allocate shared memory for BlockScan * __shared__ typename BlockScan::TempStorage temp_storage; * * // Obtain input item for each thread * int thread_data; * ... * * // Collectively compute the block-wide inclusive prefix max scan * int block_aggregate; * BlockScan(temp_storage).InclusiveScan(thread_data, thread_data, cub::Max(), block_aggregate); * * \endcode * \par * Suppose the set of input \p thread_data across the block of threads is 0, -1, 2, -3, ..., 126, -127. The * corresponding output \p thread_data in those threads will be 0, 0, 2, 2, ..., 126, 126. * Furthermore the value \p 126 will be stored in \p block_aggregate for all threads. * * \tparam ScanOp [inferred] Binary scan functor type having member T operator()(const T &a, const T &b) */ template __device__ __forceinline__ void InclusiveScan( T input, ///< [in] Calling thread's input item T &output, ///< [out] Calling thread's output item (may be aliased to \p input) ScanOp scan_op, ///< [in] Binary scan functor T &block_aggregate) ///< [out] block-wide aggregate reduction of input items { InternalBlockScan(temp_storage).InclusiveScan(input, output, scan_op, block_aggregate); } /** * \brief Computes an inclusive block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. the call-back functor \p block_prefix_callback_op is invoked by the first warp in the block, and the value returned by lane0 in that warp is used as the "seed" value that logically prefixes the thread block's scan inputs. Also provides every thread with the block-wide \p block_aggregate of all inputs. * * \par * - The \p block_prefix_callback_op functor must implement a member function T operator()(T block_aggregate). * The functor's input parameter \p block_aggregate is the same value also returned by the scan operation. * The functor will be invoked by the first warp of threads in the block, however only the return value from * lane0 is applied as the block-wide prefix. Can be stateful. * - Supports non-commutative scan operators. * - \rowmajor * - \smemreuse * * \par Snippet * The code snippet below illustrates a single thread block that progressively * computes an inclusive prefix max scan over multiple "tiles" of input using a * prefix functor to maintain a running total between block-wide scans. Each tile consists * of 128 integer items that are partitioned across 128 threads. * \par * \code * #include // or equivalently * * // A stateful callback functor that maintains a running prefix to be applied * // during consecutive scan operations. * struct BlockPrefixCallbackOp * { * // Running prefix * int running_total; * * // Constructor * __device__ BlockPrefixCallbackOp(int running_total) : running_total(running_total) {} * * // Callback operator to be entered by the first warp of threads in the block. * // Thread-0 is responsible for returning a value for seeding the block-wide scan. * __device__ int operator()(int block_aggregate) * { * int old_prefix = running_total; * running_total = (block_aggregate > old_prefix) ? block_aggregate : old_prefix; * return old_prefix; * } * }; * * __global__ void ExampleKernel(int *d_data, int num_items, ...) * { * // Specialize BlockScan for a 1D block of 128 threads * typedef cub::BlockScan BlockScan; * * // Allocate shared memory for BlockScan * __shared__ typename BlockScan::TempStorage temp_storage; * * // Initialize running total * BlockPrefixCallbackOp prefix_op(INT_MIN); * * // Have the block iterate over segments of items * for (int block_offset = 0; block_offset < num_items; block_offset += 128) * { * // Load a segment of consecutive items that are blocked across threads * int thread_data = d_data[block_offset]; * * // Collectively compute the block-wide inclusive prefix max scan * BlockScan(temp_storage).InclusiveScan( * thread_data, thread_data, cub::Max(), prefix_op); * CTA_SYNC(); * * // Store scanned items to output segment * d_data[block_offset] = thread_data; * } * \endcode * \par * Suppose the input \p d_data is 0, -1, 2, -3, 4, -5, .... * The corresponding output for the first segment will be 0, 0, 2, 2, ..., 126, 126. * The output for the second segment will be 128, 128, 130, 130, ..., 254, 254. * * \tparam ScanOp [inferred] Binary scan functor type having member T operator()(const T &a, const T &b) * \tparam BlockPrefixCallbackOp [inferred] Call-back functor type having member T operator()(T block_aggregate) */ template < typename ScanOp, typename BlockPrefixCallbackOp> __device__ __forceinline__ void InclusiveScan( T input, ///< [in] Calling thread's input item T &output, ///< [out] Calling thread's output item (may be aliased to \p input) ScanOp scan_op, ///< [in] Binary scan functor BlockPrefixCallbackOp &block_prefix_callback_op) ///< [in-out] [warp0 only] Call-back functor for specifying a block-wide prefix to be applied to the logical input sequence. { InternalBlockScan(temp_storage).InclusiveScan(input, output, scan_op, block_prefix_callback_op); } //@} end member group /******************************************************************//** * \name Inclusive prefix scan operations (multiple data per thread) *********************************************************************/ //@{ /** * \brief Computes an inclusive block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes an array of consecutive input elements. * * \par * - Supports non-commutative scan operators. * - \blocked * - \granularity * - \smemreuse * * \par Snippet * The code snippet below illustrates an inclusive prefix max scan of 512 integer items that * are partitioned in a [blocked arrangement](index.html#sec5sec3) across 128 threads * where each thread owns 4 consecutive items. * \par * \code * #include // or equivalently * * __global__ void ExampleKernel(...) * { * // Specialize BlockScan for a 1D block of 128 threads on type int * typedef cub::BlockScan BlockScan; * * // Allocate shared memory for BlockScan * __shared__ typename BlockScan::TempStorage temp_storage; * * // Obtain a segment of consecutive items that are blocked across threads * int thread_data[4]; * ... * * // Collectively compute the block-wide inclusive prefix max scan * BlockScan(temp_storage).InclusiveScan(thread_data, thread_data, cub::Max()); * * \endcode * \par * Suppose the set of input \p thread_data across the block of threads is { [0,-1,2,-3], [4,-5,6,-7], ..., [508,-509,510,-511] }. The * corresponding output \p thread_data in those threads will be { [0,0,2,2], [4,4,6,6], ..., [508,508,510,510] }. * * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. * \tparam ScanOp [inferred] Binary scan functor type having member T operator()(const T &a, const T &b) */ template < int ITEMS_PER_THREAD, typename ScanOp> __device__ __forceinline__ void InclusiveScan( T (&input)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items T (&output)[ITEMS_PER_THREAD], ///< [out] Calling thread's output items (may be aliased to \p input) ScanOp scan_op) ///< [in] Binary scan functor { if (ITEMS_PER_THREAD == 1) { InclusiveScan(input[0], output[0], scan_op); } else { // Reduce consecutive thread items in registers T thread_prefix = internal::ThreadReduce(input, scan_op); // Exclusive thread block-scan ExclusiveScan(thread_prefix, thread_prefix, scan_op); // Inclusive scan in registers with prefix as seed (first thread does not seed) internal::ThreadScanInclusive(input, output, scan_op, thread_prefix, (linear_tid != 0)); } } /** * \brief Computes an inclusive block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes an array of consecutive input elements. Also provides every thread with the block-wide \p block_aggregate of all inputs. * * \par * - Supports non-commutative scan operators. * - \blocked * - \granularity * - \smemreuse * * \par Snippet * The code snippet below illustrates an inclusive prefix max scan of 512 integer items that * are partitioned in a [blocked arrangement](index.html#sec5sec3) across 128 threads * where each thread owns 4 consecutive items. * \par * \code * #include // or equivalently * * __global__ void ExampleKernel(...) * { * // Specialize BlockScan for a 1D block of 128 threads on type int * typedef cub::BlockScan BlockScan; * * // Allocate shared memory for BlockScan * __shared__ typename BlockScan::TempStorage temp_storage; * * // Obtain a segment of consecutive items that are blocked across threads * int thread_data[4]; * ... * * // Collectively compute the block-wide inclusive prefix max scan * int block_aggregate; * BlockScan(temp_storage).InclusiveScan(thread_data, thread_data, cub::Max(), block_aggregate); * * \endcode * \par * Suppose the set of input \p thread_data across the block of threads is * { [0,-1,2,-3], [4,-5,6,-7], ..., [508,-509,510,-511] }. * The corresponding output \p thread_data in those threads will be * { [0,0,2,2], [4,4,6,6], ..., [508,508,510,510] }. * Furthermore the value \p 510 will be stored in \p block_aggregate for all threads. * * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. * \tparam ScanOp [inferred] Binary scan functor type having member T operator()(const T &a, const T &b) */ template < int ITEMS_PER_THREAD, typename ScanOp> __device__ __forceinline__ void InclusiveScan( T (&input)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items T (&output)[ITEMS_PER_THREAD], ///< [out] Calling thread's output items (may be aliased to \p input) ScanOp scan_op, ///< [in] Binary scan functor T &block_aggregate) ///< [out] block-wide aggregate reduction of input items { if (ITEMS_PER_THREAD == 1) { InclusiveScan(input[0], output[0], scan_op, block_aggregate); } else { // Reduce consecutive thread items in registers T thread_prefix = internal::ThreadReduce(input, scan_op); // Exclusive thread block-scan (with no initial value) ExclusiveScan(thread_prefix, thread_prefix, scan_op, block_aggregate); // Inclusive scan in registers with prefix as seed (first thread does not seed) internal::ThreadScanInclusive(input, output, scan_op, thread_prefix, (linear_tid != 0)); } } /** * \brief Computes an inclusive block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes an array of consecutive input elements. the call-back functor \p block_prefix_callback_op is invoked by the first warp in the block, and the value returned by lane0 in that warp is used as the "seed" value that logically prefixes the thread block's scan inputs. Also provides every thread with the block-wide \p block_aggregate of all inputs. * * \par * - The \p block_prefix_callback_op functor must implement a member function T operator()(T block_aggregate). * The functor's input parameter \p block_aggregate is the same value also returned by the scan operation. * The functor will be invoked by the first warp of threads in the block, however only the return value from * lane0 is applied as the block-wide prefix. Can be stateful. * - Supports non-commutative scan operators. * - \blocked * - \granularity * - \smemreuse * * \par Snippet * The code snippet below illustrates a single thread block that progressively * computes an inclusive prefix max scan over multiple "tiles" of input using a * prefix functor to maintain a running total between block-wide scans. Each tile consists * of 128 integer items that are partitioned across 128 threads. * \par * \code * #include // or equivalently * * // A stateful callback functor that maintains a running prefix to be applied * // during consecutive scan operations. * struct BlockPrefixCallbackOp * { * // Running prefix * int running_total; * * // Constructor * __device__ BlockPrefixCallbackOp(int running_total) : running_total(running_total) {} * * // Callback operator to be entered by the first warp of threads in the block. * // Thread-0 is responsible for returning a value for seeding the block-wide scan. * __device__ int operator()(int block_aggregate) * { * int old_prefix = running_total; * running_total = (block_aggregate > old_prefix) ? block_aggregate : old_prefix; * return old_prefix; * } * }; * * __global__ void ExampleKernel(int *d_data, int num_items, ...) * { * // Specialize BlockLoad, BlockStore, and BlockScan for a 1D block of 128 threads, 4 ints per thread * typedef cub::BlockLoad BlockLoad; * typedef cub::BlockStore BlockStore; * typedef cub::BlockScan BlockScan; * * // Allocate aliased shared memory for BlockLoad, BlockStore, and BlockScan * __shared__ union { * typename BlockLoad::TempStorage load; * typename BlockScan::TempStorage scan; * typename BlockStore::TempStorage store; * } temp_storage; * * // Initialize running total * BlockPrefixCallbackOp prefix_op(0); * * // Have the block iterate over segments of items * for (int block_offset = 0; block_offset < num_items; block_offset += 128 * 4) * { * // Load a segment of consecutive items that are blocked across threads * int thread_data[4]; * BlockLoad(temp_storage.load).Load(d_data + block_offset, thread_data); * CTA_SYNC(); * * // Collectively compute the block-wide inclusive prefix max scan * BlockScan(temp_storage.scan).InclusiveScan( * thread_data, thread_data, cub::Max(), prefix_op); * CTA_SYNC(); * * // Store scanned items to output segment * BlockStore(temp_storage.store).Store(d_data + block_offset, thread_data); * CTA_SYNC(); * } * \endcode * \par * Suppose the input \p d_data is 0, -1, 2, -3, 4, -5, .... * The corresponding output for the first segment will be 0, 0, 2, 2, 4, 4, ..., 510, 510. * The output for the second segment will be 512, 512, 514, 514, 516, 516, ..., 1022, 1022. * * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. * \tparam ScanOp [inferred] Binary scan functor type having member T operator()(const T &a, const T &b) * \tparam BlockPrefixCallbackOp [inferred] Call-back functor type having member T operator()(T block_aggregate) */ template < int ITEMS_PER_THREAD, typename ScanOp, typename BlockPrefixCallbackOp> __device__ __forceinline__ void InclusiveScan( T (&input)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items T (&output)[ITEMS_PER_THREAD], ///< [out] Calling thread's output items (may be aliased to \p input) ScanOp scan_op, ///< [in] Binary scan functor BlockPrefixCallbackOp &block_prefix_callback_op) ///< [in-out] [warp0 only] Call-back functor for specifying a block-wide prefix to be applied to the logical input sequence. { if (ITEMS_PER_THREAD == 1) { InclusiveScan(input[0], output[0], scan_op, block_prefix_callback_op); } else { // Reduce consecutive thread items in registers T thread_prefix = internal::ThreadReduce(input, scan_op); // Exclusive thread block-scan ExclusiveScan(thread_prefix, thread_prefix, scan_op, block_prefix_callback_op); // Inclusive scan in registers with prefix as seed internal::ThreadScanInclusive(input, output, scan_op, thread_prefix); } } //@} end member group }; /** * \example example_block_scan.cu */ } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/gpu_utils/cub/block/block_shuffle.cuh000066400000000000000000000272741411340063500227730ustar00rootroot00000000000000/****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * The cub::BlockShuffle class provides [collective](index.html#sec0) methods for shuffling data partitioned across a CUDA thread block. */ #pragma once #include "../util_arch.cuh" #include "../util_ptx.cuh" #include "../util_macro.cuh" #include "../util_type.cuh" #include "../util_namespace.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /** * \brief The BlockShuffle class provides [collective](index.html#sec0) methods for shuffling data partitioned across a CUDA thread block. * \ingroup BlockModule * * \tparam T The data type to be exchanged. * \tparam BLOCK_DIM_X The thread block length in threads along the X dimension * \tparam BLOCK_DIM_Y [optional] The thread block length in threads along the Y dimension (default: 1) * \tparam BLOCK_DIM_Z [optional] The thread block length in threads along the Z dimension (default: 1) * \tparam PTX_ARCH [optional] \ptxversion * * \par Overview * It is commonplace for blocks of threads to rearrange data items between * threads. The BlockShuffle abstraction allows threads to efficiently shift items * either (a) up to their successor or (b) down to their predecessor. * */ template < typename T, int BLOCK_DIM_X, int BLOCK_DIM_Y = 1, int BLOCK_DIM_Z = 1, int PTX_ARCH = CUB_PTX_ARCH> class BlockShuffle { private: /****************************************************************************** * Constants ******************************************************************************/ enum { BLOCK_THREADS = BLOCK_DIM_X * BLOCK_DIM_Y * BLOCK_DIM_Z, LOG_WARP_THREADS = CUB_LOG_WARP_THREADS(PTX_ARCH), WARP_THREADS = 1 << LOG_WARP_THREADS, WARPS = (BLOCK_THREADS + WARP_THREADS - 1) / WARP_THREADS, }; /****************************************************************************** * Type definitions ******************************************************************************/ /// Shared memory storage layout type (last element from each thread's input) struct _TempStorage { T prev[BLOCK_THREADS]; T next[BLOCK_THREADS]; }; public: /// \smemstorage{BlockShuffle} struct TempStorage : Uninitialized<_TempStorage> {}; private: /****************************************************************************** * Thread fields ******************************************************************************/ /// Shared storage reference _TempStorage &temp_storage; /// Linear thread-id unsigned int linear_tid; /****************************************************************************** * Utility methods ******************************************************************************/ /// Internal storage allocator __device__ __forceinline__ _TempStorage& PrivateStorage() { __shared__ _TempStorage private_storage; return private_storage; } public: /******************************************************************//** * \name Collective constructors *********************************************************************/ //@{ /** * \brief Collective constructor using a private static allocation of shared memory as temporary storage. */ __device__ __forceinline__ BlockShuffle() : temp_storage(PrivateStorage()), linear_tid(RowMajorTid(BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z)) {} /** * \brief Collective constructor using the specified memory allocation as temporary storage. */ __device__ __forceinline__ BlockShuffle( TempStorage &temp_storage) ///< [in] Reference to memory allocation having layout type TempStorage : temp_storage(temp_storage.Alias()), linear_tid(RowMajorTid(BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z)) {} //@} end member group /******************************************************************//** * \name Shuffle movement *********************************************************************/ //@{ /** * \brief Each threadi obtains the \p input provided by threadi+distance. The offset \p distance may be negative. * * \par * - \smemreuse */ __device__ __forceinline__ void Offset( T input, ///< [in] The input item from the calling thread (threadi) T& output, ///< [out] The \p input item from the successor (or predecessor) thread threadi+distance (may be aliased to \p input). This value is only updated for for threadi when 0 <= (i + \p distance) < BLOCK_THREADS-1 int distance = 1) ///< [in] Offset distance (may be negative) { temp_storage[linear_tid].prev = input; CTA_SYNC(); if ((linear_tid + distance >= 0) && (linear_tid + distance < BLOCK_THREADS)) output = temp_storage[linear_tid + distance].prev; } /** * \brief Each threadi obtains the \p input provided by threadi+distance. * * \par * - \smemreuse */ __device__ __forceinline__ void Rotate( T input, ///< [in] The calling thread's input item T& output, ///< [out] The \p input item from thread thread(i+distance>)% (may be aliased to \p input). This value is not updated for threadBLOCK_THREADS-1 unsigned int distance = 1) ///< [in] Offset distance (0 < \p distance < BLOCK_THREADS) { temp_storage[linear_tid].prev = input; CTA_SYNC(); unsigned int offset = threadIdx.x + distance; if (offset >= BLOCK_THREADS) offset -= BLOCK_THREADS; output = temp_storage[offset].prev; } /** * \brief The thread block rotates its [blocked arrangement](index.html#sec5sec3) of \p input items, shifting it up by one item * * \par * - \blocked * - \granularity * - \smemreuse */ template __device__ __forceinline__ void Up( T (&input)[ITEMS_PER_THREAD], ///< [in] The calling thread's input items T (&prev)[ITEMS_PER_THREAD]) ///< [out] The corresponding predecessor items (may be aliased to \p input). The item \p prev[0] is not updated for thread0. { temp_storage[linear_tid].prev = input[ITEMS_PER_THREAD - 1]; CTA_SYNC(); #pragma unroll for (int ITEM = ITEMS_PER_THREAD - 1; ITEM > 0; --ITEM) prev[ITEM] = input[ITEM - 1]; if (linear_tid > 0) prev[0] = temp_storage[linear_tid - 1].prev; } /** * \brief The thread block rotates its [blocked arrangement](index.html#sec5sec3) of \p input items, shifting it up by one item. All threads receive the \p input provided by threadBLOCK_THREADS-1. * * \par * - \blocked * - \granularity * - \smemreuse */ template __device__ __forceinline__ void Up( T (&input)[ITEMS_PER_THREAD], ///< [in] The calling thread's input items T (&prev)[ITEMS_PER_THREAD], ///< [out] The corresponding predecessor items (may be aliased to \p input). The item \p prev[0] is not updated for thread0. T &block_suffix) ///< [out] The item \p input[ITEMS_PER_THREAD-1] from threadBLOCK_THREADS-1, provided to all threads { Up(input, prev); block_suffix = temp_storage[BLOCK_THREADS - 1].prev; } /** * \brief The thread block rotates its [blocked arrangement](index.html#sec5sec3) of \p input items, shifting it down by one item * * \par * - \blocked * - \granularity * - \smemreuse */ template __device__ __forceinline__ void Down( T (&input)[ITEMS_PER_THREAD], ///< [in] The calling thread's input items T (&prev)[ITEMS_PER_THREAD]) ///< [out] The corresponding predecessor items (may be aliased to \p input). The value \p prev[0] is not updated for threadBLOCK_THREADS-1. { temp_storage[linear_tid].prev = input[ITEMS_PER_THREAD - 1]; CTA_SYNC(); #pragma unroll for (int ITEM = ITEMS_PER_THREAD - 1; ITEM > 0; --ITEM) prev[ITEM] = input[ITEM - 1]; if (linear_tid > 0) prev[0] = temp_storage[linear_tid - 1].prev; } /** * \brief The thread block rotates its [blocked arrangement](index.html#sec5sec3) of input items, shifting it down by one item. All threads receive \p input[0] provided by thread0. * * \par * - \blocked * - \granularity * - \smemreuse */ template __device__ __forceinline__ void Down( T (&input)[ITEMS_PER_THREAD], ///< [in] The calling thread's input items T (&prev)[ITEMS_PER_THREAD], ///< [out] The corresponding predecessor items (may be aliased to \p input). The value \p prev[0] is not updated for threadBLOCK_THREADS-1. T &block_prefix) ///< [out] The item \p input[0] from thread0, provided to all threads { Up(input, prev); block_prefix = temp_storage[BLOCK_THREADS - 1].prev; } //@} end member group }; } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/gpu_utils/cub/block/block_store.cuh000066400000000000000000001207511411340063500224650ustar00rootroot00000000000000/****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * Operations for writing linear segments of data from the CUDA thread block */ #pragma once #include #include "block_exchange.cuh" #include "../util_ptx.cuh" #include "../util_macro.cuh" #include "../util_type.cuh" #include "../util_namespace.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /** * \addtogroup UtilIo * @{ */ /******************************************************************//** * \name Blocked arrangement I/O (direct) *********************************************************************/ //@{ /** * \brief Store a blocked arrangement of items across a thread block into a linear segment of items. * * \blocked * * \tparam T [inferred] The data type to store. * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. * \tparam OutputIteratorT [inferred] The random-access iterator type for output \iterator. */ template < typename T, int ITEMS_PER_THREAD, typename OutputIteratorT> __device__ __forceinline__ void StoreDirectBlocked( int linear_tid, ///< [in] A suitable 1D thread-identifier for the calling thread (e.g., (threadIdx.y * blockDim.x) + linear_tid for 2D thread blocks) OutputIteratorT block_itr, ///< [in] The thread block's base output iterator for storing to T (&items)[ITEMS_PER_THREAD]) ///< [in] Data to store { OutputIteratorT thread_itr = block_itr + (linear_tid * ITEMS_PER_THREAD); // Store directly in thread-blocked order #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { thread_itr[ITEM] = items[ITEM]; } } /** * \brief Store a blocked arrangement of items across a thread block into a linear segment of items, guarded by range * * \blocked * * \tparam T [inferred] The data type to store. * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. * \tparam OutputIteratorT [inferred] The random-access iterator type for output \iterator. */ template < typename T, int ITEMS_PER_THREAD, typename OutputIteratorT> __device__ __forceinline__ void StoreDirectBlocked( int linear_tid, ///< [in] A suitable 1D thread-identifier for the calling thread (e.g., (threadIdx.y * blockDim.x) + linear_tid for 2D thread blocks) OutputIteratorT block_itr, ///< [in] The thread block's base output iterator for storing to T (&items)[ITEMS_PER_THREAD], ///< [in] Data to store int valid_items) ///< [in] Number of valid items to write { OutputIteratorT thread_itr = block_itr + (linear_tid * ITEMS_PER_THREAD); // Store directly in thread-blocked order #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { if (ITEM + (linear_tid * ITEMS_PER_THREAD) < valid_items) { thread_itr[ITEM] = items[ITEM]; } } } /** * \brief Store a blocked arrangement of items across a thread block into a linear segment of items. * * \blocked * * The output offset (\p block_ptr + \p block_offset) must be quad-item aligned, * which is the default starting offset returned by \p cudaMalloc() * * \par * The following conditions will prevent vectorization and storing will fall back to cub::BLOCK_STORE_DIRECT: * - \p ITEMS_PER_THREAD is odd * - The data type \p T is not a built-in primitive or CUDA vector type (e.g., \p short, \p int2, \p double, \p float2, etc.) * * \tparam T [inferred] The data type to store. * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. * */ template < typename T, int ITEMS_PER_THREAD> __device__ __forceinline__ void StoreDirectBlockedVectorized( int linear_tid, ///< [in] A suitable 1D thread-identifier for the calling thread (e.g., (threadIdx.y * blockDim.x) + linear_tid for 2D thread blocks) T *block_ptr, ///< [in] Input pointer for storing from T (&items)[ITEMS_PER_THREAD]) ///< [in] Data to store { enum { // Maximum CUDA vector size is 4 elements MAX_VEC_SIZE = CUB_MIN(4, ITEMS_PER_THREAD), // Vector size must be a power of two and an even divisor of the items per thread VEC_SIZE = ((((MAX_VEC_SIZE - 1) & MAX_VEC_SIZE) == 0) && ((ITEMS_PER_THREAD % MAX_VEC_SIZE) == 0)) ? MAX_VEC_SIZE : 1, VECTORS_PER_THREAD = ITEMS_PER_THREAD / VEC_SIZE, }; // Vector type typedef typename CubVector::Type Vector; // Alias global pointer Vector *block_ptr_vectors = reinterpret_cast(const_cast(block_ptr)); // Alias pointers (use "raw" array here which should get optimized away to prevent conservative PTXAS lmem spilling) Vector raw_vector[VECTORS_PER_THREAD]; T *raw_items = reinterpret_cast(raw_vector); // Copy #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { raw_items[ITEM] = items[ITEM]; } // Direct-store using vector types StoreDirectBlocked(linear_tid, block_ptr_vectors, raw_vector); } //@} end member group /******************************************************************//** * \name Striped arrangement I/O (direct) *********************************************************************/ //@{ /** * \brief Store a striped arrangement of data across the thread block into a linear segment of items. * * \striped * * \tparam BLOCK_THREADS The thread block size in threads * \tparam T [inferred] The data type to store. * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. * \tparam OutputIteratorT [inferred] The random-access iterator type for output \iterator. */ template < int BLOCK_THREADS, typename T, int ITEMS_PER_THREAD, typename OutputIteratorT> __device__ __forceinline__ void StoreDirectStriped( int linear_tid, ///< [in] A suitable 1D thread-identifier for the calling thread (e.g., (threadIdx.y * blockDim.x) + linear_tid for 2D thread blocks) OutputIteratorT block_itr, ///< [in] The thread block's base output iterator for storing to T (&items)[ITEMS_PER_THREAD]) ///< [in] Data to store { OutputIteratorT thread_itr = block_itr + linear_tid; // Store directly in striped order #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { thread_itr[(ITEM * BLOCK_THREADS)] = items[ITEM]; } } /** * \brief Store a striped arrangement of data across the thread block into a linear segment of items, guarded by range * * \striped * * \tparam BLOCK_THREADS The thread block size in threads * \tparam T [inferred] The data type to store. * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. * \tparam OutputIteratorT [inferred] The random-access iterator type for output \iterator. */ template < int BLOCK_THREADS, typename T, int ITEMS_PER_THREAD, typename OutputIteratorT> __device__ __forceinline__ void StoreDirectStriped( int linear_tid, ///< [in] A suitable 1D thread-identifier for the calling thread (e.g., (threadIdx.y * blockDim.x) + linear_tid for 2D thread blocks) OutputIteratorT block_itr, ///< [in] The thread block's base output iterator for storing to T (&items)[ITEMS_PER_THREAD], ///< [in] Data to store int valid_items) ///< [in] Number of valid items to write { OutputIteratorT thread_itr = block_itr + linear_tid; // Store directly in striped order #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { if ((ITEM * BLOCK_THREADS) + linear_tid < valid_items) { thread_itr[(ITEM * BLOCK_THREADS)] = items[ITEM]; } } } //@} end member group /******************************************************************//** * \name Warp-striped arrangement I/O (direct) *********************************************************************/ //@{ /** * \brief Store a warp-striped arrangement of data across the thread block into a linear segment of items. * * \warpstriped * * \par Usage Considerations * The number of threads in the thread block must be a multiple of the architecture's warp size. * * \tparam T [inferred] The data type to store. * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. * \tparam OutputIteratorT [inferred] The random-access iterator type for output \iterator. */ template < typename T, int ITEMS_PER_THREAD, typename OutputIteratorT> __device__ __forceinline__ void StoreDirectWarpStriped( int linear_tid, ///< [in] A suitable 1D thread-identifier for the calling thread (e.g., (threadIdx.y * blockDim.x) + linear_tid for 2D thread blocks) OutputIteratorT block_itr, ///< [in] The thread block's base output iterator for storing to T (&items)[ITEMS_PER_THREAD]) ///< [out] Data to load { int tid = linear_tid & (CUB_PTX_WARP_THREADS - 1); int wid = linear_tid >> CUB_PTX_LOG_WARP_THREADS; int warp_offset = wid * CUB_PTX_WARP_THREADS * ITEMS_PER_THREAD; OutputIteratorT thread_itr = block_itr + warp_offset + tid; // Store directly in warp-striped order #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { thread_itr[(ITEM * CUB_PTX_WARP_THREADS)] = items[ITEM]; } } /** * \brief Store a warp-striped arrangement of data across the thread block into a linear segment of items, guarded by range * * \warpstriped * * \par Usage Considerations * The number of threads in the thread block must be a multiple of the architecture's warp size. * * \tparam T [inferred] The data type to store. * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. * \tparam OutputIteratorT [inferred] The random-access iterator type for output \iterator. */ template < typename T, int ITEMS_PER_THREAD, typename OutputIteratorT> __device__ __forceinline__ void StoreDirectWarpStriped( int linear_tid, ///< [in] A suitable 1D thread-identifier for the calling thread (e.g., (threadIdx.y * blockDim.x) + linear_tid for 2D thread blocks) OutputIteratorT block_itr, ///< [in] The thread block's base output iterator for storing to T (&items)[ITEMS_PER_THREAD], ///< [in] Data to store int valid_items) ///< [in] Number of valid items to write { int tid = linear_tid & (CUB_PTX_WARP_THREADS - 1); int wid = linear_tid >> CUB_PTX_LOG_WARP_THREADS; int warp_offset = wid * CUB_PTX_WARP_THREADS * ITEMS_PER_THREAD; OutputIteratorT thread_itr = block_itr + warp_offset + tid; // Store directly in warp-striped order #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) { if (warp_offset + tid + (ITEM * CUB_PTX_WARP_THREADS) < valid_items) { thread_itr[(ITEM * CUB_PTX_WARP_THREADS)] = items[ITEM]; } } } //@} end member group /** @} */ // end group UtilIo //----------------------------------------------------------------------------- // Generic BlockStore abstraction //----------------------------------------------------------------------------- /** * \brief cub::BlockStoreAlgorithm enumerates alternative algorithms for cub::BlockStore to write a blocked arrangement of items across a CUDA thread block to a linear segment of memory. */ enum BlockStoreAlgorithm { /** * \par Overview * * A [blocked arrangement](index.html#sec5sec3) of data is written * directly to memory. * * \par Performance Considerations * - The utilization of memory transactions (coalescing) decreases as the * access stride between threads increases (i.e., the number items per thread). */ BLOCK_STORE_DIRECT, /** * \par Overview * * A [blocked arrangement](index.html#sec5sec3) of data is written directly * to memory using CUDA's built-in vectorized stores as a coalescing optimization. * For example, st.global.v4.s32 instructions will be generated * when \p T = \p int and \p ITEMS_PER_THREAD % 4 == 0. * * \par Performance Considerations * - The utilization of memory transactions (coalescing) remains high until the the * access stride between threads (i.e., the number items per thread) exceeds the * maximum vector store width (typically 4 items or 64B, whichever is lower). * - The following conditions will prevent vectorization and writing will fall back to cub::BLOCK_STORE_DIRECT: * - \p ITEMS_PER_THREAD is odd * - The \p OutputIteratorT is not a simple pointer type * - The block output offset is not quadword-aligned * - The data type \p T is not a built-in primitive or CUDA vector type (e.g., \p short, \p int2, \p double, \p float2, etc.) */ BLOCK_STORE_VECTORIZE, /** * \par Overview * A [blocked arrangement](index.html#sec5sec3) is locally * transposed and then efficiently written to memory as a [striped arrangement](index.html#sec5sec3). * * \par Performance Considerations * - The utilization of memory transactions (coalescing) remains high regardless * of items written per thread. * - The local reordering incurs slightly longer latencies and throughput than the * direct cub::BLOCK_STORE_DIRECT and cub::BLOCK_STORE_VECTORIZE alternatives. */ BLOCK_STORE_TRANSPOSE, /** * \par Overview * A [blocked arrangement](index.html#sec5sec3) is locally * transposed and then efficiently written to memory as a * [warp-striped arrangement](index.html#sec5sec3) * * \par Usage Considerations * - BLOCK_THREADS must be a multiple of WARP_THREADS * * \par Performance Considerations * - The utilization of memory transactions (coalescing) remains high regardless * of items written per thread. * - The local reordering incurs slightly longer latencies and throughput than the * direct cub::BLOCK_STORE_DIRECT and cub::BLOCK_STORE_VECTORIZE alternatives. */ BLOCK_STORE_WARP_TRANSPOSE, /** * \par Overview * A [blocked arrangement](index.html#sec5sec3) is locally * transposed and then efficiently written to memory as a * [warp-striped arrangement](index.html#sec5sec3) * To reduce the shared memory requirement, only one warp's worth of shared * memory is provisioned and is subsequently time-sliced among warps. * * \par Usage Considerations * - BLOCK_THREADS must be a multiple of WARP_THREADS * * \par Performance Considerations * - The utilization of memory transactions (coalescing) remains high regardless * of items written per thread. * - Provisions less shared memory temporary storage, but incurs larger * latencies than the BLOCK_STORE_WARP_TRANSPOSE alternative. */ BLOCK_STORE_WARP_TRANSPOSE_TIMESLICED, }; /** * \brief The BlockStore class provides [collective](index.html#sec0) data movement methods for writing a [blocked arrangement](index.html#sec5sec3) of items partitioned across a CUDA thread block to a linear segment of memory. ![](block_store_logo.png) * \ingroup BlockModule * \ingroup UtilIo * * \tparam T The type of data to be written. * \tparam BLOCK_DIM_X The thread block length in threads along the X dimension * \tparam ITEMS_PER_THREAD The number of consecutive items partitioned onto each thread. * \tparam ALGORITHM [optional] cub::BlockStoreAlgorithm tuning policy enumeration. default: cub::BLOCK_STORE_DIRECT. * \tparam WARP_TIME_SLICING [optional] Whether or not only one warp's worth of shared memory should be allocated and time-sliced among block-warps during any load-related data transpositions (versus each warp having its own storage). (default: false) * \tparam BLOCK_DIM_Y [optional] The thread block length in threads along the Y dimension (default: 1) * \tparam BLOCK_DIM_Z [optional] The thread block length in threads along the Z dimension (default: 1) * \tparam PTX_ARCH [optional] \ptxversion * * \par Overview * - The BlockStore class provides a single data movement abstraction that can be specialized * to implement different cub::BlockStoreAlgorithm strategies. This facilitates different * performance policies for different architectures, data types, granularity sizes, etc. * - BlockStore can be optionally specialized by different data movement strategies: * -# cub::BLOCK_STORE_DIRECT. A [blocked arrangement](index.html#sec5sec3) of data is written * directly to memory. [More...](\ref cub::BlockStoreAlgorithm) * -# cub::BLOCK_STORE_VECTORIZE. A [blocked arrangement](index.html#sec5sec3) * of data is written directly to memory using CUDA's built-in vectorized stores as a * coalescing optimization. [More...](\ref cub::BlockStoreAlgorithm) * -# cub::BLOCK_STORE_TRANSPOSE. A [blocked arrangement](index.html#sec5sec3) * is locally transposed into a [striped arrangement](index.html#sec5sec3) which is * then written to memory. [More...](\ref cub::BlockStoreAlgorithm) * -# cub::BLOCK_STORE_WARP_TRANSPOSE. A [blocked arrangement](index.html#sec5sec3) * is locally transposed into a [warp-striped arrangement](index.html#sec5sec3) which is * then written to memory. [More...](\ref cub::BlockStoreAlgorithm) * - \rowmajor * * \par A Simple Example * \blockcollective{BlockStore} * \par * The code snippet below illustrates the storing of a "blocked" arrangement * of 512 integers across 128 threads (where each thread owns 4 consecutive items) * into a linear segment of memory. The store is specialized for \p BLOCK_STORE_WARP_TRANSPOSE, * meaning items are locally reordered among threads so that memory references will be * efficiently coalesced using a warp-striped access pattern. * \par * \code * #include // or equivalently * * __global__ void ExampleKernel(int *d_data, ...) * { * // Specialize BlockStore for a 1D block of 128 threads owning 4 integer items each * typedef cub::BlockStore BlockStore; * * // Allocate shared memory for BlockStore * __shared__ typename BlockStore::TempStorage temp_storage; * * // Obtain a segment of consecutive items that are blocked across threads * int thread_data[4]; * ... * * // Store items to linear memory * int thread_data[4]; * BlockStore(temp_storage).Store(d_data, thread_data); * * \endcode * \par * Suppose the set of \p thread_data across the block of threads is * { [0,1,2,3], [4,5,6,7], ..., [508,509,510,511] }. * The output \p d_data will be 0, 1, 2, 3, 4, 5, .... * */ template < typename T, int BLOCK_DIM_X, int ITEMS_PER_THREAD, BlockStoreAlgorithm ALGORITHM = BLOCK_STORE_DIRECT, int BLOCK_DIM_Y = 1, int BLOCK_DIM_Z = 1, int PTX_ARCH = CUB_PTX_ARCH> class BlockStore { private: /****************************************************************************** * Constants and typed definitions ******************************************************************************/ /// Constants enum { /// The thread block size in threads BLOCK_THREADS = BLOCK_DIM_X * BLOCK_DIM_Y * BLOCK_DIM_Z, }; /****************************************************************************** * Algorithmic variants ******************************************************************************/ /// Store helper template struct StoreInternal; /** * BLOCK_STORE_DIRECT specialization of store helper */ template struct StoreInternal { /// Shared memory storage layout type typedef NullType TempStorage; /// Linear thread-id int linear_tid; /// Constructor __device__ __forceinline__ StoreInternal( TempStorage &/*temp_storage*/, int linear_tid) : linear_tid(linear_tid) {} /// Store items into a linear segment of memory template __device__ __forceinline__ void Store( OutputIteratorT block_itr, ///< [in] The thread block's base output iterator for storing to T (&items)[ITEMS_PER_THREAD]) ///< [in] Data to store { StoreDirectBlocked(linear_tid, block_itr, items); } /// Store items into a linear segment of memory, guarded by range template __device__ __forceinline__ void Store( OutputIteratorT block_itr, ///< [in] The thread block's base output iterator for storing to T (&items)[ITEMS_PER_THREAD], ///< [in] Data to store int valid_items) ///< [in] Number of valid items to write { StoreDirectBlocked(linear_tid, block_itr, items, valid_items); } }; /** * BLOCK_STORE_VECTORIZE specialization of store helper */ template struct StoreInternal { /// Shared memory storage layout type typedef NullType TempStorage; /// Linear thread-id int linear_tid; /// Constructor __device__ __forceinline__ StoreInternal( TempStorage &/*temp_storage*/, int linear_tid) : linear_tid(linear_tid) {} /// Store items into a linear segment of memory, specialized for native pointer types (attempts vectorization) __device__ __forceinline__ void Store( T *block_ptr, ///< [in] The thread block's base output iterator for storing to T (&items)[ITEMS_PER_THREAD]) ///< [in] Data to store { StoreDirectBlockedVectorized(linear_tid, block_ptr, items); } /// Store items into a linear segment of memory, specialized for opaque input iterators (skips vectorization) template __device__ __forceinline__ void Store( OutputIteratorT block_itr, ///< [in] The thread block's base output iterator for storing to T (&items)[ITEMS_PER_THREAD]) ///< [in] Data to store { StoreDirectBlocked(linear_tid, block_itr, items); } /// Store items into a linear segment of memory, guarded by range template __device__ __forceinline__ void Store( OutputIteratorT block_itr, ///< [in] The thread block's base output iterator for storing to T (&items)[ITEMS_PER_THREAD], ///< [in] Data to store int valid_items) ///< [in] Number of valid items to write { StoreDirectBlocked(linear_tid, block_itr, items, valid_items); } }; /** * BLOCK_STORE_TRANSPOSE specialization of store helper */ template struct StoreInternal { // BlockExchange utility type for keys typedef BlockExchange BlockExchange; /// Shared memory storage layout type struct _TempStorage : BlockExchange::TempStorage { /// Temporary storage for partially-full block guard volatile int valid_items; }; /// Alias wrapper allowing storage to be unioned struct TempStorage : Uninitialized<_TempStorage> {}; /// Thread reference to shared storage _TempStorage &temp_storage; /// Linear thread-id int linear_tid; /// Constructor __device__ __forceinline__ StoreInternal( TempStorage &temp_storage, int linear_tid) : temp_storage(temp_storage.Alias()), linear_tid(linear_tid) {} /// Store items into a linear segment of memory template __device__ __forceinline__ void Store( OutputIteratorT block_itr, ///< [in] The thread block's base output iterator for storing to T (&items)[ITEMS_PER_THREAD]) ///< [in] Data to store { BlockExchange(temp_storage).BlockedToStriped(items); StoreDirectStriped(linear_tid, block_itr, items); } /// Store items into a linear segment of memory, guarded by range template __device__ __forceinline__ void Store( OutputIteratorT block_itr, ///< [in] The thread block's base output iterator for storing to T (&items)[ITEMS_PER_THREAD], ///< [in] Data to store int valid_items) ///< [in] Number of valid items to write { BlockExchange(temp_storage).BlockedToStriped(items); if (linear_tid == 0) temp_storage.valid_items = valid_items; // Move through volatile smem as a workaround to prevent RF spilling on subsequent loads CTA_SYNC(); StoreDirectStriped(linear_tid, block_itr, items, temp_storage.valid_items); } }; /** * BLOCK_STORE_WARP_TRANSPOSE specialization of store helper */ template struct StoreInternal { enum { WARP_THREADS = CUB_WARP_THREADS(PTX_ARCH) }; // Assert BLOCK_THREADS must be a multiple of WARP_THREADS CUB_STATIC_ASSERT((BLOCK_THREADS % WARP_THREADS == 0), "BLOCK_THREADS must be a multiple of WARP_THREADS"); // BlockExchange utility type for keys typedef BlockExchange BlockExchange; /// Shared memory storage layout type struct _TempStorage : BlockExchange::TempStorage { /// Temporary storage for partially-full block guard volatile int valid_items; }; /// Alias wrapper allowing storage to be unioned struct TempStorage : Uninitialized<_TempStorage> {}; /// Thread reference to shared storage _TempStorage &temp_storage; /// Linear thread-id int linear_tid; /// Constructor __device__ __forceinline__ StoreInternal( TempStorage &temp_storage, int linear_tid) : temp_storage(temp_storage.Alias()), linear_tid(linear_tid) {} /// Store items into a linear segment of memory template __device__ __forceinline__ void Store( OutputIteratorT block_itr, ///< [in] The thread block's base output iterator for storing to T (&items)[ITEMS_PER_THREAD]) ///< [in] Data to store { BlockExchange(temp_storage).BlockedToWarpStriped(items); StoreDirectWarpStriped(linear_tid, block_itr, items); } /// Store items into a linear segment of memory, guarded by range template __device__ __forceinline__ void Store( OutputIteratorT block_itr, ///< [in] The thread block's base output iterator for storing to T (&items)[ITEMS_PER_THREAD], ///< [in] Data to store int valid_items) ///< [in] Number of valid items to write { BlockExchange(temp_storage).BlockedToWarpStriped(items); if (linear_tid == 0) temp_storage.valid_items = valid_items; // Move through volatile smem as a workaround to prevent RF spilling on subsequent loads CTA_SYNC(); StoreDirectWarpStriped(linear_tid, block_itr, items, temp_storage.valid_items); } }; /** * BLOCK_STORE_WARP_TRANSPOSE_TIMESLICED specialization of store helper */ template struct StoreInternal { enum { WARP_THREADS = CUB_WARP_THREADS(PTX_ARCH) }; // Assert BLOCK_THREADS must be a multiple of WARP_THREADS CUB_STATIC_ASSERT((BLOCK_THREADS % WARP_THREADS == 0), "BLOCK_THREADS must be a multiple of WARP_THREADS"); // BlockExchange utility type for keys typedef BlockExchange BlockExchange; /// Shared memory storage layout type struct _TempStorage : BlockExchange::TempStorage { /// Temporary storage for partially-full block guard volatile int valid_items; }; /// Alias wrapper allowing storage to be unioned struct TempStorage : Uninitialized<_TempStorage> {}; /// Thread reference to shared storage _TempStorage &temp_storage; /// Linear thread-id int linear_tid; /// Constructor __device__ __forceinline__ StoreInternal( TempStorage &temp_storage, int linear_tid) : temp_storage(temp_storage.Alias()), linear_tid(linear_tid) {} /// Store items into a linear segment of memory template __device__ __forceinline__ void Store( OutputIteratorT block_itr, ///< [in] The thread block's base output iterator for storing to T (&items)[ITEMS_PER_THREAD]) ///< [in] Data to store { BlockExchange(temp_storage).BlockedToWarpStriped(items); StoreDirectWarpStriped(linear_tid, block_itr, items); } /// Store items into a linear segment of memory, guarded by range template __device__ __forceinline__ void Store( OutputIteratorT block_itr, ///< [in] The thread block's base output iterator for storing to T (&items)[ITEMS_PER_THREAD], ///< [in] Data to store int valid_items) ///< [in] Number of valid items to write { BlockExchange(temp_storage).BlockedToWarpStriped(items); if (linear_tid == 0) temp_storage.valid_items = valid_items; // Move through volatile smem as a workaround to prevent RF spilling on subsequent loads CTA_SYNC(); StoreDirectWarpStriped(linear_tid, block_itr, items, temp_storage.valid_items); } }; /****************************************************************************** * Type definitions ******************************************************************************/ /// Internal load implementation to use typedef StoreInternal InternalStore; /// Shared memory storage layout type typedef typename InternalStore::TempStorage _TempStorage; /****************************************************************************** * Utility methods ******************************************************************************/ /// Internal storage allocator __device__ __forceinline__ _TempStorage& PrivateStorage() { __shared__ _TempStorage private_storage; return private_storage; } /****************************************************************************** * Thread fields ******************************************************************************/ /// Thread reference to shared storage _TempStorage &temp_storage; /// Linear thread-id int linear_tid; public: /// \smemstorage{BlockStore} struct TempStorage : Uninitialized<_TempStorage> {}; /******************************************************************//** * \name Collective constructors *********************************************************************/ //@{ /** * \brief Collective constructor using a private static allocation of shared memory as temporary storage. */ __device__ __forceinline__ BlockStore() : temp_storage(PrivateStorage()), linear_tid(RowMajorTid(BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z)) {} /** * \brief Collective constructor using the specified memory allocation as temporary storage. */ __device__ __forceinline__ BlockStore( TempStorage &temp_storage) ///< [in] Reference to memory allocation having layout type TempStorage : temp_storage(temp_storage.Alias()), linear_tid(RowMajorTid(BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z)) {} //@} end member group /******************************************************************//** * \name Data movement *********************************************************************/ //@{ /** * \brief Store items into a linear segment of memory. * * \par * - \blocked * - \smemreuse * * \par Snippet * The code snippet below illustrates the storing of a "blocked" arrangement * of 512 integers across 128 threads (where each thread owns 4 consecutive items) * into a linear segment of memory. The store is specialized for \p BLOCK_STORE_WARP_TRANSPOSE, * meaning items are locally reordered among threads so that memory references will be * efficiently coalesced using a warp-striped access pattern. * \par * \code * #include // or equivalently * * __global__ void ExampleKernel(int *d_data, ...) * { * // Specialize BlockStore for a 1D block of 128 threads owning 4 integer items each * typedef cub::BlockStore BlockStore; * * // Allocate shared memory for BlockStore * __shared__ typename BlockStore::TempStorage temp_storage; * * // Obtain a segment of consecutive items that are blocked across threads * int thread_data[4]; * ... * * // Store items to linear memory * int thread_data[4]; * BlockStore(temp_storage).Store(d_data, thread_data); * * \endcode * \par * Suppose the set of \p thread_data across the block of threads is * { [0,1,2,3], [4,5,6,7], ..., [508,509,510,511] }. * The output \p d_data will be 0, 1, 2, 3, 4, 5, .... * */ template __device__ __forceinline__ void Store( OutputIteratorT block_itr, ///< [in] The thread block's base output iterator for storing to T (&items)[ITEMS_PER_THREAD]) ///< [in] Data to store { InternalStore(temp_storage, linear_tid).Store(block_itr, items); } /** * \brief Store items into a linear segment of memory, guarded by range. * * \par * - \blocked * - \smemreuse * * \par Snippet * The code snippet below illustrates the guarded storing of a "blocked" arrangement * of 512 integers across 128 threads (where each thread owns 4 consecutive items) * into a linear segment of memory. The store is specialized for \p BLOCK_STORE_WARP_TRANSPOSE, * meaning items are locally reordered among threads so that memory references will be * efficiently coalesced using a warp-striped access pattern. * \par * \code * #include // or equivalently * * __global__ void ExampleKernel(int *d_data, int valid_items, ...) * { * // Specialize BlockStore for a 1D block of 128 threads owning 4 integer items each * typedef cub::BlockStore BlockStore; * * // Allocate shared memory for BlockStore * __shared__ typename BlockStore::TempStorage temp_storage; * * // Obtain a segment of consecutive items that are blocked across threads * int thread_data[4]; * ... * * // Store items to linear memory * int thread_data[4]; * BlockStore(temp_storage).Store(d_data, thread_data, valid_items); * * \endcode * \par * Suppose the set of \p thread_data across the block of threads is * { [0,1,2,3], [4,5,6,7], ..., [508,509,510,511] } and \p valid_items is \p 5. * The output \p d_data will be 0, 1, 2, 3, 4, ?, ?, ?, ..., with * only the first two threads being unmasked to store portions of valid data. * */ template __device__ __forceinline__ void Store( OutputIteratorT block_itr, ///< [in] The thread block's base output iterator for storing to T (&items)[ITEMS_PER_THREAD], ///< [in] Data to store int valid_items) ///< [in] Number of valid items to write { InternalStore(temp_storage, linear_tid).Store(block_itr, items, valid_items); } }; } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/gpu_utils/cub/block/specializations/000077500000000000000000000000001411340063500226515ustar00rootroot00000000000000relion-3.1.3/src/gpu_utils/cub/block/specializations/block_histogram_atomic.cuh000066400000000000000000000063251411340063500300630ustar00rootroot00000000000000/****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * The cub::BlockHistogramAtomic class provides atomic-based methods for constructing block-wide histograms from data samples partitioned across a CUDA thread block. */ #pragma once #include "../../util_namespace.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /** * \brief The BlockHistogramAtomic class provides atomic-based methods for constructing block-wide histograms from data samples partitioned across a CUDA thread block. */ template struct BlockHistogramAtomic { /// Shared memory storage layout type struct TempStorage {}; /// Constructor __device__ __forceinline__ BlockHistogramAtomic( TempStorage &temp_storage) {} /// Composite data onto an existing histogram template < typename T, typename CounterT, int ITEMS_PER_THREAD> __device__ __forceinline__ void Composite( T (&items)[ITEMS_PER_THREAD], ///< [in] Calling thread's input values to histogram CounterT histogram[BINS]) ///< [out] Reference to shared/device-accessible memory histogram { // Update histogram #pragma unroll for (int i = 0; i < ITEMS_PER_THREAD; ++i) { atomicAdd(histogram + items[i], 1); } } }; } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/gpu_utils/cub/block/specializations/block_histogram_sort.cuh000066400000000000000000000200061411340063500275660ustar00rootroot00000000000000/****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * The cub::BlockHistogramSort class provides sorting-based methods for constructing block-wide histograms from data samples partitioned across a CUDA thread block. */ #pragma once #include "../../block/block_radix_sort.cuh" #include "../../block/block_discontinuity.cuh" #include "../../util_ptx.cuh" #include "../../util_namespace.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /** * \brief The BlockHistogramSort class provides sorting-based methods for constructing block-wide histograms from data samples partitioned across a CUDA thread block. */ template < typename T, ///< Sample type int BLOCK_DIM_X, ///< The thread block length in threads along the X dimension int ITEMS_PER_THREAD, ///< The number of samples per thread int BINS, ///< The number of bins into which histogram samples may fall int BLOCK_DIM_Y, ///< The thread block length in threads along the Y dimension int BLOCK_DIM_Z, ///< The thread block length in threads along the Z dimension int PTX_ARCH> ///< The PTX compute capability for which to to specialize this collective struct BlockHistogramSort { /// Constants enum { /// The thread block size in threads BLOCK_THREADS = BLOCK_DIM_X * BLOCK_DIM_Y * BLOCK_DIM_Z, }; // Parameterize BlockRadixSort type for our thread block typedef BlockRadixSort< T, BLOCK_DIM_X, ITEMS_PER_THREAD, NullType, 4, (PTX_ARCH >= 350) ? true : false, BLOCK_SCAN_WARP_SCANS, cudaSharedMemBankSizeFourByte, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH> BlockRadixSortT; // Parameterize BlockDiscontinuity type for our thread block typedef BlockDiscontinuity< T, BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH> BlockDiscontinuityT; /// Shared memory union _TempStorage { // Storage for sorting bin values typename BlockRadixSortT::TempStorage sort; struct { // Storage for detecting discontinuities in the tile of sorted bin values typename BlockDiscontinuityT::TempStorage flag; // Storage for noting begin/end offsets of bin runs in the tile of sorted bin values unsigned int run_begin[BINS]; unsigned int run_end[BINS]; }; }; /// Alias wrapper allowing storage to be unioned struct TempStorage : Uninitialized<_TempStorage> {}; // Thread fields _TempStorage &temp_storage; unsigned int linear_tid; /// Constructor __device__ __forceinline__ BlockHistogramSort( TempStorage &temp_storage) : temp_storage(temp_storage.Alias()), linear_tid(RowMajorTid(BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z)) {} // Discontinuity functor struct DiscontinuityOp { // Reference to temp_storage _TempStorage &temp_storage; // Constructor __device__ __forceinline__ DiscontinuityOp(_TempStorage &temp_storage) : temp_storage(temp_storage) {} // Discontinuity predicate __device__ __forceinline__ bool operator()(const T &a, const T &b, int b_index) { if (a != b) { // Note the begin/end offsets in shared storage temp_storage.run_begin[b] = b_index; temp_storage.run_end[a] = b_index; return true; } else { return false; } } }; // Composite data onto an existing histogram template < typename CounterT > __device__ __forceinline__ void Composite( T (&items)[ITEMS_PER_THREAD], ///< [in] Calling thread's input values to histogram CounterT histogram[BINS]) ///< [out] Reference to shared/device-accessible memory histogram { enum { TILE_SIZE = BLOCK_THREADS * ITEMS_PER_THREAD }; // Sort bytes in blocked arrangement BlockRadixSortT(temp_storage.sort).Sort(items); CTA_SYNC(); // Initialize the shared memory's run_begin and run_end for each bin int histo_offset = 0; #pragma unroll for(; histo_offset + BLOCK_THREADS <= BINS; histo_offset += BLOCK_THREADS) { temp_storage.run_begin[histo_offset + linear_tid] = TILE_SIZE; temp_storage.run_end[histo_offset + linear_tid] = TILE_SIZE; } // Finish up with guarded initialization if necessary if ((BINS % BLOCK_THREADS != 0) && (histo_offset + linear_tid < BINS)) { temp_storage.run_begin[histo_offset + linear_tid] = TILE_SIZE; temp_storage.run_end[histo_offset + linear_tid] = TILE_SIZE; } CTA_SYNC(); int flags[ITEMS_PER_THREAD]; // unused // Compute head flags to demarcate contiguous runs of the same bin in the sorted tile DiscontinuityOp flag_op(temp_storage); BlockDiscontinuityT(temp_storage.flag).FlagHeads(flags, items, flag_op); // Update begin for first item if (linear_tid == 0) temp_storage.run_begin[items[0]] = 0; CTA_SYNC(); // Composite into histogram histo_offset = 0; #pragma unroll for(; histo_offset + BLOCK_THREADS <= BINS; histo_offset += BLOCK_THREADS) { int thread_offset = histo_offset + linear_tid; CounterT count = temp_storage.run_end[thread_offset] - temp_storage.run_begin[thread_offset]; histogram[thread_offset] += count; } // Finish up with guarded composition if necessary if ((BINS % BLOCK_THREADS != 0) && (histo_offset + linear_tid < BINS)) { int thread_offset = histo_offset + linear_tid; CounterT count = temp_storage.run_end[thread_offset] - temp_storage.run_begin[thread_offset]; histogram[thread_offset] += count; } } }; } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/gpu_utils/cub/block/specializations/block_reduce_raking.cuh000066400000000000000000000225571411340063500273410ustar00rootroot00000000000000/****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * cub::BlockReduceRaking provides raking-based methods of parallel reduction across a CUDA thread block. Supports non-commutative reduction operators. */ #pragma once #include "../../block/block_raking_layout.cuh" #include "../../warp/warp_reduce.cuh" #include "../../thread/thread_reduce.cuh" #include "../../util_ptx.cuh" #include "../../util_namespace.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /** * \brief BlockReduceRaking provides raking-based methods of parallel reduction across a CUDA thread block. Supports non-commutative reduction operators. * * Supports non-commutative binary reduction operators. Unlike commutative * reduction operators (e.g., addition), the application of a non-commutative * reduction operator (e.g, string concatenation) across a sequence of inputs must * honor the relative ordering of items and partial reductions when applying the * reduction operator. * * Compared to the implementation of BlockReduceRaking (which does not support * non-commutative operators), this implementation requires a few extra * rounds of inter-thread communication. */ template < typename T, ///< Data type being reduced int BLOCK_DIM_X, ///< The thread block length in threads along the X dimension int BLOCK_DIM_Y, ///< The thread block length in threads along the Y dimension int BLOCK_DIM_Z, ///< The thread block length in threads along the Z dimension int PTX_ARCH> ///< The PTX compute capability for which to to specialize this collective struct BlockReduceRaking { /// Constants enum { /// The thread block size in threads BLOCK_THREADS = BLOCK_DIM_X * BLOCK_DIM_Y * BLOCK_DIM_Z, }; /// Layout type for padded thread block raking grid typedef BlockRakingLayout BlockRakingLayout; /// WarpReduce utility type typedef typename WarpReduce::InternalWarpReduce WarpReduce; /// Constants enum { /// Number of raking threads RAKING_THREADS = BlockRakingLayout::RAKING_THREADS, /// Number of raking elements per warp synchronous raking thread SEGMENT_LENGTH = BlockRakingLayout::SEGMENT_LENGTH, /// Cooperative work can be entirely warp synchronous WARP_SYNCHRONOUS = (RAKING_THREADS == BLOCK_THREADS), /// Whether or not warp-synchronous reduction should be unguarded (i.e., the warp-reduction elements is a power of two WARP_SYNCHRONOUS_UNGUARDED = PowerOfTwo::VALUE, /// Whether or not accesses into smem are unguarded RAKING_UNGUARDED = BlockRakingLayout::UNGUARDED, }; /// Shared memory storage layout type union _TempStorage { typename WarpReduce::TempStorage warp_storage; ///< Storage for warp-synchronous reduction typename BlockRakingLayout::TempStorage raking_grid; ///< Padded thread block raking grid }; /// Alias wrapper allowing storage to be unioned struct TempStorage : Uninitialized<_TempStorage> {}; // Thread fields _TempStorage &temp_storage; unsigned int linear_tid; /// Constructor __device__ __forceinline__ BlockReduceRaking( TempStorage &temp_storage) : temp_storage(temp_storage.Alias()), linear_tid(RowMajorTid(BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z)) {} template __device__ __forceinline__ T RakingReduction( ReductionOp reduction_op, ///< [in] Binary scan operator T *raking_segment, T partial, ///< [in] [lane0 only] Warp-wide aggregate reduction of input items int num_valid, ///< [in] Number of valid elements (may be less than BLOCK_THREADS) Int2Type /*iteration*/) { // Update partial if addend is in range if ((IS_FULL_TILE && RAKING_UNGUARDED) || ((linear_tid * SEGMENT_LENGTH) + ITERATION < num_valid)) { T addend = raking_segment[ITERATION]; partial = reduction_op(partial, addend); } return RakingReduction(reduction_op, raking_segment, partial, num_valid, Int2Type()); } template __device__ __forceinline__ T RakingReduction( ReductionOp /*reduction_op*/, ///< [in] Binary scan operator T * /*raking_segment*/, T partial, ///< [in] [lane0 only] Warp-wide aggregate reduction of input items int /*num_valid*/, ///< [in] Number of valid elements (may be less than BLOCK_THREADS) Int2Type /*iteration*/) { return partial; } /// Computes a thread block-wide reduction using the specified reduction operator. The first num_valid threads each contribute one reduction partial. The return value is only valid for thread0. template < bool IS_FULL_TILE, typename ReductionOp> __device__ __forceinline__ T Reduce( T partial, ///< [in] Calling thread's input partial reductions int num_valid, ///< [in] Number of valid elements (may be less than BLOCK_THREADS) ReductionOp reduction_op) ///< [in] Binary reduction operator { if (WARP_SYNCHRONOUS) { // Short-circuit directly to warp synchronous reduction (unguarded if active threads is a power-of-two) partial = WarpReduce(temp_storage.warp_storage).template Reduce( partial, num_valid, reduction_op); } else { // Place partial into shared memory grid. *BlockRakingLayout::PlacementPtr(temp_storage.raking_grid, linear_tid) = partial; CTA_SYNC(); // Reduce parallelism to one warp if (linear_tid < RAKING_THREADS) { // Raking reduction in grid T *raking_segment = BlockRakingLayout::RakingPtr(temp_storage.raking_grid, linear_tid); partial = raking_segment[0]; partial = RakingReduction(reduction_op, raking_segment, partial, num_valid, Int2Type<1>()); partial = WarpReduce(temp_storage.warp_storage).template Reduce( partial, num_valid, reduction_op); } } return partial; } /// Computes a thread block-wide reduction using addition (+) as the reduction operator. The first num_valid threads each contribute one reduction partial. The return value is only valid for thread0. template __device__ __forceinline__ T Sum( T partial, ///< [in] Calling thread's input partial reductions int num_valid) ///< [in] Number of valid elements (may be less than BLOCK_THREADS) { cub::Sum reduction_op; return Reduce(partial, num_valid, reduction_op); } }; } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/gpu_utils/cub/block/specializations/block_reduce_raking_commutative_only.cuh000066400000000000000000000202611411340063500330050ustar00rootroot00000000000000/****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * cub::BlockReduceRakingCommutativeOnly provides raking-based methods of parallel reduction across a CUDA thread block. Does not support non-commutative reduction operators. */ #pragma once #include "block_reduce_raking.cuh" #include "../../warp/warp_reduce.cuh" #include "../../thread/thread_reduce.cuh" #include "../../util_ptx.cuh" #include "../../util_namespace.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /** * \brief BlockReduceRakingCommutativeOnly provides raking-based methods of parallel reduction across a CUDA thread block. Does not support non-commutative reduction operators. Does not support block sizes that are not a multiple of the warp size. */ template < typename T, ///< Data type being reduced int BLOCK_DIM_X, ///< The thread block length in threads along the X dimension int BLOCK_DIM_Y, ///< The thread block length in threads along the Y dimension int BLOCK_DIM_Z, ///< The thread block length in threads along the Z dimension int PTX_ARCH> ///< The PTX compute capability for which to to specialize this collective struct BlockReduceRakingCommutativeOnly { /// Constants enum { /// The thread block size in threads BLOCK_THREADS = BLOCK_DIM_X * BLOCK_DIM_Y * BLOCK_DIM_Z, }; // The fall-back implementation to use when BLOCK_THREADS is not a multiple of the warp size or not all threads have valid values typedef BlockReduceRaking FallBack; /// Constants enum { /// Number of warp threads WARP_THREADS = CUB_WARP_THREADS(PTX_ARCH), /// Whether or not to use fall-back USE_FALLBACK = ((BLOCK_THREADS % WARP_THREADS != 0) || (BLOCK_THREADS <= WARP_THREADS)), /// Number of raking threads RAKING_THREADS = WARP_THREADS, /// Number of threads actually sharing items with the raking threads SHARING_THREADS = CUB_MAX(1, BLOCK_THREADS - RAKING_THREADS), /// Number of raking elements per warp synchronous raking thread SEGMENT_LENGTH = SHARING_THREADS / WARP_THREADS, }; /// WarpReduce utility type typedef WarpReduce WarpReduce; /// Layout type for padded thread block raking grid typedef BlockRakingLayout BlockRakingLayout; /// Shared memory storage layout type union _TempStorage { struct { typename WarpReduce::TempStorage warp_storage; ///< Storage for warp-synchronous reduction typename BlockRakingLayout::TempStorage raking_grid; ///< Padded thread block raking grid }; typename FallBack::TempStorage fallback_storage; ///< Fall-back storage for non-commutative block scan }; /// Alias wrapper allowing storage to be unioned struct TempStorage : Uninitialized<_TempStorage> {}; // Thread fields _TempStorage &temp_storage; unsigned int linear_tid; /// Constructor __device__ __forceinline__ BlockReduceRakingCommutativeOnly( TempStorage &temp_storage) : temp_storage(temp_storage.Alias()), linear_tid(RowMajorTid(BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z)) {} /// Computes a thread block-wide reduction using addition (+) as the reduction operator. The first num_valid threads each contribute one reduction partial. The return value is only valid for thread0. template __device__ __forceinline__ T Sum( T partial, ///< [in] Calling thread's input partial reductions int num_valid) ///< [in] Number of valid elements (may be less than BLOCK_THREADS) { if (USE_FALLBACK || !FULL_TILE) { return FallBack(temp_storage.fallback_storage).template Sum(partial, num_valid); } else { // Place partial into shared memory grid if (linear_tid >= RAKING_THREADS) *BlockRakingLayout::PlacementPtr(temp_storage.raking_grid, linear_tid - RAKING_THREADS) = partial; CTA_SYNC(); // Reduce parallelism to one warp if (linear_tid < RAKING_THREADS) { // Raking reduction in grid T *raking_segment = BlockRakingLayout::RakingPtr(temp_storage.raking_grid, linear_tid); partial = internal::ThreadReduce(raking_segment, cub::Sum(), partial); // Warpscan partial = WarpReduce(temp_storage.warp_storage).Sum(partial); } } return partial; } /// Computes a thread block-wide reduction using the specified reduction operator. The first num_valid threads each contribute one reduction partial. The return value is only valid for thread0. template < bool FULL_TILE, typename ReductionOp> __device__ __forceinline__ T Reduce( T partial, ///< [in] Calling thread's input partial reductions int num_valid, ///< [in] Number of valid elements (may be less than BLOCK_THREADS) ReductionOp reduction_op) ///< [in] Binary reduction operator { if (USE_FALLBACK || !FULL_TILE) { return FallBack(temp_storage.fallback_storage).template Reduce(partial, num_valid, reduction_op); } else { // Place partial into shared memory grid if (linear_tid >= RAKING_THREADS) *BlockRakingLayout::PlacementPtr(temp_storage.raking_grid, linear_tid - RAKING_THREADS) = partial; CTA_SYNC(); // Reduce parallelism to one warp if (linear_tid < RAKING_THREADS) { // Raking reduction in grid T *raking_segment = BlockRakingLayout::RakingPtr(temp_storage.raking_grid, linear_tid); partial = internal::ThreadReduce(raking_segment, reduction_op, partial); // Warpscan partial = WarpReduce(temp_storage.warp_storage).Reduce(partial, reduction_op); } } return partial; } }; } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/gpu_utils/cub/block/specializations/block_reduce_warp_reductions.cuh000066400000000000000000000233261411340063500312710ustar00rootroot00000000000000/****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * cub::BlockReduceWarpReductions provides variants of warp-reduction-based parallel reduction across a CUDA thread block. Supports non-commutative reduction operators. */ #pragma once #include "../../warp/warp_reduce.cuh" #include "../../util_ptx.cuh" #include "../../util_arch.cuh" #include "../../util_namespace.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /** * \brief BlockReduceWarpReductions provides variants of warp-reduction-based parallel reduction across a CUDA thread block. Supports non-commutative reduction operators. */ template < typename T, ///< Data type being reduced int BLOCK_DIM_X, ///< The thread block length in threads along the X dimension int BLOCK_DIM_Y, ///< The thread block length in threads along the Y dimension int BLOCK_DIM_Z, ///< The thread block length in threads along the Z dimension int PTX_ARCH> ///< The PTX compute capability for which to to specialize this collective struct BlockReduceWarpReductions { /// Constants enum { /// The thread block size in threads BLOCK_THREADS = BLOCK_DIM_X * BLOCK_DIM_Y * BLOCK_DIM_Z, /// Number of warp threads WARP_THREADS = CUB_WARP_THREADS(PTX_ARCH), /// Number of active warps WARPS = (BLOCK_THREADS + WARP_THREADS - 1) / WARP_THREADS, /// The logical warp size for warp reductions LOGICAL_WARP_SIZE = CUB_MIN(BLOCK_THREADS, WARP_THREADS), /// Whether or not the logical warp size evenly divides the thread block size EVEN_WARP_MULTIPLE = (BLOCK_THREADS % LOGICAL_WARP_SIZE == 0) }; /// WarpReduce utility type typedef typename WarpReduce::InternalWarpReduce WarpReduce; /// Shared memory storage layout type struct _TempStorage { typename WarpReduce::TempStorage warp_reduce[WARPS]; ///< Buffer for warp-synchronous scan T warp_aggregates[WARPS]; ///< Shared totals from each warp-synchronous scan T block_prefix; ///< Shared prefix for the entire thread block }; /// Alias wrapper allowing storage to be unioned struct TempStorage : Uninitialized<_TempStorage> {}; // Thread fields _TempStorage &temp_storage; unsigned int linear_tid; unsigned int warp_id; unsigned int lane_id; /// Constructor __device__ __forceinline__ BlockReduceWarpReductions( TempStorage &temp_storage) : temp_storage(temp_storage.Alias()), linear_tid(RowMajorTid(BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z)), warp_id((WARPS == 1) ? 0 : linear_tid / WARP_THREADS), lane_id(LaneId()) {} template __device__ __forceinline__ T ApplyWarpAggregates( ReductionOp reduction_op, ///< [in] Binary scan operator T warp_aggregate, ///< [in] [lane0 only] Warp-wide aggregate reduction of input items int num_valid, ///< [in] Number of valid elements (may be less than BLOCK_THREADS) Int2Type /*successor_warp*/) { if (FULL_TILE || (SUCCESSOR_WARP * LOGICAL_WARP_SIZE < num_valid)) { T addend = temp_storage.warp_aggregates[SUCCESSOR_WARP]; warp_aggregate = reduction_op(warp_aggregate, addend); } return ApplyWarpAggregates(reduction_op, warp_aggregate, num_valid, Int2Type()); } template __device__ __forceinline__ T ApplyWarpAggregates( ReductionOp /*reduction_op*/, ///< [in] Binary scan operator T warp_aggregate, ///< [in] [lane0 only] Warp-wide aggregate reduction of input items int /*num_valid*/, ///< [in] Number of valid elements (may be less than BLOCK_THREADS) Int2Type /*successor_warp*/) { return warp_aggregate; } /// Returns block-wide aggregate in thread0. template < bool FULL_TILE, typename ReductionOp> __device__ __forceinline__ T ApplyWarpAggregates( ReductionOp reduction_op, ///< [in] Binary scan operator T warp_aggregate, ///< [in] [lane0 only] Warp-wide aggregate reduction of input items int num_valid) ///< [in] Number of valid elements (may be less than BLOCK_THREADS) { // Share lane aggregates if (lane_id == 0) { temp_storage.warp_aggregates[warp_id] = warp_aggregate; } CTA_SYNC(); // Update total aggregate in warp 0, lane 0 if (linear_tid == 0) { warp_aggregate = ApplyWarpAggregates(reduction_op, warp_aggregate, num_valid, Int2Type<1>()); } return warp_aggregate; } /// Computes a thread block-wide reduction using addition (+) as the reduction operator. The first num_valid threads each contribute one reduction partial. The return value is only valid for thread0. template __device__ __forceinline__ T Sum( T input, ///< [in] Calling thread's input partial reductions int num_valid) ///< [in] Number of valid elements (may be less than BLOCK_THREADS) { cub::Sum reduction_op; unsigned int warp_offset = warp_id * LOGICAL_WARP_SIZE; unsigned int warp_num_valid = (FULL_TILE && EVEN_WARP_MULTIPLE) ? LOGICAL_WARP_SIZE : (warp_offset < num_valid) ? num_valid - warp_offset : 0; // Warp reduction in every warp T warp_aggregate = WarpReduce(temp_storage.warp_reduce[warp_id]).template Reduce<(FULL_TILE && EVEN_WARP_MULTIPLE), 1>( input, warp_num_valid, cub::Sum()); // Update outputs and block_aggregate with warp-wide aggregates from lane-0s return ApplyWarpAggregates(reduction_op, warp_aggregate, num_valid); } /// Computes a thread block-wide reduction using the specified reduction operator. The first num_valid threads each contribute one reduction partial. The return value is only valid for thread0. template < bool FULL_TILE, typename ReductionOp> __device__ __forceinline__ T Reduce( T input, ///< [in] Calling thread's input partial reductions int num_valid, ///< [in] Number of valid elements (may be less than BLOCK_THREADS) ReductionOp reduction_op) ///< [in] Binary reduction operator { unsigned int warp_offset = warp_id * LOGICAL_WARP_SIZE; unsigned int warp_num_valid = (FULL_TILE && EVEN_WARP_MULTIPLE) ? LOGICAL_WARP_SIZE : (warp_offset < static_cast(num_valid)) ? num_valid - warp_offset : 0; // Warp reduction in every warp T warp_aggregate = WarpReduce(temp_storage.warp_reduce[warp_id]).template Reduce<(FULL_TILE && EVEN_WARP_MULTIPLE), 1>( input, warp_num_valid, reduction_op); // Update outputs and block_aggregate with warp-wide aggregates from lane-0s return ApplyWarpAggregates(reduction_op, warp_aggregate, num_valid); } }; } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/gpu_utils/cub/block/specializations/block_scan_raking.cuh000066400000000000000000000674541411340063500270230ustar00rootroot00000000000000/****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * cub::BlockScanRaking provides variants of raking-based parallel prefix scan across a CUDA thread block. */ #pragma once #include "../../util_ptx.cuh" #include "../../util_arch.cuh" #include "../../block/block_raking_layout.cuh" #include "../../thread/thread_reduce.cuh" #include "../../thread/thread_scan.cuh" #include "../../warp/warp_scan.cuh" #include "../../util_namespace.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /** * \brief BlockScanRaking provides variants of raking-based parallel prefix scan across a CUDA thread block. */ template < typename T, ///< Data type being scanned int BLOCK_DIM_X, ///< The thread block length in threads along the X dimension int BLOCK_DIM_Y, ///< The thread block length in threads along the Y dimension int BLOCK_DIM_Z, ///< The thread block length in threads along the Z dimension bool MEMOIZE, ///< Whether or not to buffer outer raking scan partials to incur fewer shared memory reads at the expense of higher register pressure int PTX_ARCH> ///< The PTX compute capability for which to to specialize this collective struct BlockScanRaking { //--------------------------------------------------------------------- // Types and constants //--------------------------------------------------------------------- /// Constants enum { /// The thread block size in threads BLOCK_THREADS = BLOCK_DIM_X * BLOCK_DIM_Y * BLOCK_DIM_Z, }; /// Layout type for padded thread block raking grid typedef BlockRakingLayout BlockRakingLayout; /// Constants enum { /// Number of raking threads RAKING_THREADS = BlockRakingLayout::RAKING_THREADS, /// Number of raking elements per warp synchronous raking thread SEGMENT_LENGTH = BlockRakingLayout::SEGMENT_LENGTH, /// Cooperative work can be entirely warp synchronous WARP_SYNCHRONOUS = (BLOCK_THREADS == RAKING_THREADS), }; /// WarpScan utility type typedef WarpScan WarpScan; /// Shared memory storage layout type struct _TempStorage { typename WarpScan::TempStorage warp_scan; ///< Buffer for warp-synchronous scan typename BlockRakingLayout::TempStorage raking_grid; ///< Padded thread block raking grid T block_aggregate; ///< Block aggregate }; /// Alias wrapper allowing storage to be unioned struct TempStorage : Uninitialized<_TempStorage> {}; //--------------------------------------------------------------------- // Per-thread fields //--------------------------------------------------------------------- // Thread fields _TempStorage &temp_storage; unsigned int linear_tid; T cached_segment[SEGMENT_LENGTH]; //--------------------------------------------------------------------- // Utility methods //--------------------------------------------------------------------- /// Templated reduction template __device__ __forceinline__ T GuardedReduce( T* raking_ptr, ///< [in] Input array ScanOp scan_op, ///< [in] Binary reduction operator T raking_partial, ///< [in] Prefix to seed reduction with Int2Type /*iteration*/) { if ((BlockRakingLayout::UNGUARDED) || (((linear_tid * SEGMENT_LENGTH) + ITERATION) < BLOCK_THREADS)) { T addend = raking_ptr[ITERATION]; raking_partial = scan_op(raking_partial, addend); } return GuardedReduce(raking_ptr, scan_op, raking_partial, Int2Type()); } /// Templated reduction (base case) template __device__ __forceinline__ T GuardedReduce( T* /*raking_ptr*/, ///< [in] Input array ScanOp /*scan_op*/, ///< [in] Binary reduction operator T raking_partial, ///< [in] Prefix to seed reduction with Int2Type /*iteration*/) { return raking_partial; } /// Templated copy template __device__ __forceinline__ void CopySegment( T* out, ///< [out] Out array T* in, ///< [in] Input array Int2Type /*iteration*/) { out[ITERATION] = in[ITERATION]; CopySegment(out, in, Int2Type()); } /// Templated copy (base case) __device__ __forceinline__ void CopySegment( T* /*out*/, ///< [out] Out array T* /*in*/, ///< [in] Input array Int2Type /*iteration*/) {} /// Performs upsweep raking reduction, returning the aggregate template __device__ __forceinline__ T Upsweep( ScanOp scan_op) { T *smem_raking_ptr = BlockRakingLayout::RakingPtr(temp_storage.raking_grid, linear_tid); // Read data into registers CopySegment(cached_segment, smem_raking_ptr, Int2Type<0>()); T raking_partial = cached_segment[0]; return GuardedReduce(cached_segment, scan_op, raking_partial, Int2Type<1>()); } /// Performs exclusive downsweep raking scan template __device__ __forceinline__ void ExclusiveDownsweep( ScanOp scan_op, T raking_partial, bool apply_prefix = true) { T *smem_raking_ptr = BlockRakingLayout::RakingPtr(temp_storage.raking_grid, linear_tid); // Read data back into registers if (!MEMOIZE) { CopySegment(cached_segment, smem_raking_ptr, Int2Type<0>()); } internal::ThreadScanExclusive(cached_segment, cached_segment, scan_op, raking_partial, apply_prefix); // Write data back to smem CopySegment(smem_raking_ptr, cached_segment, Int2Type<0>()); } /// Performs inclusive downsweep raking scan template __device__ __forceinline__ void InclusiveDownsweep( ScanOp scan_op, T raking_partial, bool apply_prefix = true) { T *smem_raking_ptr = BlockRakingLayout::RakingPtr(temp_storage.raking_grid, linear_tid); // Read data back into registers if (!MEMOIZE) { CopySegment(cached_segment, smem_raking_ptr, Int2Type<0>()); } internal::ThreadScanInclusive(cached_segment, cached_segment, scan_op, raking_partial, apply_prefix); // Write data back to smem CopySegment(smem_raking_ptr, cached_segment, Int2Type<0>()); } //--------------------------------------------------------------------- // Constructors //--------------------------------------------------------------------- /// Constructor __device__ __forceinline__ BlockScanRaking( TempStorage &temp_storage) : temp_storage(temp_storage.Alias()), linear_tid(RowMajorTid(BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z)) {} //--------------------------------------------------------------------- // Exclusive scans //--------------------------------------------------------------------- /// Computes an exclusive thread block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. With no initial value, the output computed for thread0 is undefined. template __device__ __forceinline__ void ExclusiveScan( T input, ///< [in] Calling thread's input item T &exclusive_output, ///< [out] Calling thread's output item (may be aliased to \p input) ScanOp scan_op) ///< [in] Binary scan operator { if (WARP_SYNCHRONOUS) { // Short-circuit directly to warp-synchronous scan WarpScan(temp_storage.warp_scan).ExclusiveScan(input, exclusive_output, scan_op); } else { // Place thread partial into shared memory raking grid T *placement_ptr = BlockRakingLayout::PlacementPtr(temp_storage.raking_grid, linear_tid); *placement_ptr = input; CTA_SYNC(); // Reduce parallelism down to just raking threads if (linear_tid < RAKING_THREADS) { // Raking upsweep reduction across shared partials T upsweep_partial = Upsweep(scan_op); // Warp-synchronous scan T exclusive_partial; WarpScan(temp_storage.warp_scan).ExclusiveScan(upsweep_partial, exclusive_partial, scan_op); // Exclusive raking downsweep scan ExclusiveDownsweep(scan_op, exclusive_partial, (linear_tid != 0)); } CTA_SYNC(); // Grab thread prefix from shared memory exclusive_output = *placement_ptr; } } /// Computes an exclusive thread block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. template __device__ __forceinline__ void ExclusiveScan( T input, ///< [in] Calling thread's input items T &output, ///< [out] Calling thread's output items (may be aliased to \p input) const T &initial_value, ///< [in] Initial value to seed the exclusive scan ScanOp scan_op) ///< [in] Binary scan operator { if (WARP_SYNCHRONOUS) { // Short-circuit directly to warp-synchronous scan WarpScan(temp_storage.warp_scan).ExclusiveScan(input, output, initial_value, scan_op); } else { // Place thread partial into shared memory raking grid T *placement_ptr = BlockRakingLayout::PlacementPtr(temp_storage.raking_grid, linear_tid); *placement_ptr = input; CTA_SYNC(); // Reduce parallelism down to just raking threads if (linear_tid < RAKING_THREADS) { // Raking upsweep reduction across shared partials T upsweep_partial = Upsweep(scan_op); // Exclusive Warp-synchronous scan T exclusive_partial; WarpScan(temp_storage.warp_scan).ExclusiveScan(upsweep_partial, exclusive_partial, initial_value, scan_op); // Exclusive raking downsweep scan ExclusiveDownsweep(scan_op, exclusive_partial); } CTA_SYNC(); // Grab exclusive partial from shared memory output = *placement_ptr; } } /// Computes an exclusive thread block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. Also provides every thread with the block-wide \p block_aggregate of all inputs. With no initial value, the output computed for thread0 is undefined. template __device__ __forceinline__ void ExclusiveScan( T input, ///< [in] Calling thread's input item T &output, ///< [out] Calling thread's output item (may be aliased to \p input) ScanOp scan_op, ///< [in] Binary scan operator T &block_aggregate) ///< [out] Threadblock-wide aggregate reduction of input items { if (WARP_SYNCHRONOUS) { // Short-circuit directly to warp-synchronous scan WarpScan(temp_storage.warp_scan).ExclusiveScan(input, output, scan_op, block_aggregate); } else { // Place thread partial into shared memory raking grid T *placement_ptr = BlockRakingLayout::PlacementPtr(temp_storage.raking_grid, linear_tid); *placement_ptr = input; CTA_SYNC(); // Reduce parallelism down to just raking threads if (linear_tid < RAKING_THREADS) { // Raking upsweep reduction across shared partials T upsweep_partial= Upsweep(scan_op); // Warp-synchronous scan T inclusive_partial; T exclusive_partial; WarpScan(temp_storage.warp_scan).Scan(upsweep_partial, inclusive_partial, exclusive_partial, scan_op); // Exclusive raking downsweep scan ExclusiveDownsweep(scan_op, exclusive_partial, (linear_tid != 0)); // Broadcast aggregate to all threads if (linear_tid == RAKING_THREADS - 1) temp_storage.block_aggregate = inclusive_partial; } CTA_SYNC(); // Grab thread prefix from shared memory output = *placement_ptr; // Retrieve block aggregate block_aggregate = temp_storage.block_aggregate; } } /// Computes an exclusive thread block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. Also provides every thread with the block-wide \p block_aggregate of all inputs. template __device__ __forceinline__ void ExclusiveScan( T input, ///< [in] Calling thread's input items T &output, ///< [out] Calling thread's output items (may be aliased to \p input) const T &initial_value, ///< [in] Initial value to seed the exclusive scan ScanOp scan_op, ///< [in] Binary scan operator T &block_aggregate) ///< [out] Threadblock-wide aggregate reduction of input items { if (WARP_SYNCHRONOUS) { // Short-circuit directly to warp-synchronous scan WarpScan(temp_storage.warp_scan).ExclusiveScan(input, output, initial_value, scan_op, block_aggregate); } else { // Place thread partial into shared memory raking grid T *placement_ptr = BlockRakingLayout::PlacementPtr(temp_storage.raking_grid, linear_tid); *placement_ptr = input; CTA_SYNC(); // Reduce parallelism down to just raking threads if (linear_tid < RAKING_THREADS) { // Raking upsweep reduction across shared partials T upsweep_partial = Upsweep(scan_op); // Warp-synchronous scan T exclusive_partial; WarpScan(temp_storage.warp_scan).ExclusiveScan(upsweep_partial, exclusive_partial, initial_value, scan_op, block_aggregate); // Exclusive raking downsweep scan ExclusiveDownsweep(scan_op, exclusive_partial); // Broadcast aggregate to other threads if (linear_tid == 0) temp_storage.block_aggregate = block_aggregate; } CTA_SYNC(); // Grab exclusive partial from shared memory output = *placement_ptr; // Retrieve block aggregate block_aggregate = temp_storage.block_aggregate; } } /// Computes an exclusive thread block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. the call-back functor \p block_prefix_callback_op is invoked by the first warp in the block, and the value returned by lane0 in that warp is used as the "seed" value that logically prefixes the thread block's scan inputs. Also provides every thread with the block-wide \p block_aggregate of all inputs. template < typename ScanOp, typename BlockPrefixCallbackOp> __device__ __forceinline__ void ExclusiveScan( T input, ///< [in] Calling thread's input item T &output, ///< [out] Calling thread's output item (may be aliased to \p input) ScanOp scan_op, ///< [in] Binary scan operator BlockPrefixCallbackOp &block_prefix_callback_op) ///< [in-out] [warp0 only] Call-back functor for specifying a thread block-wide prefix to be applied to all inputs. { if (WARP_SYNCHRONOUS) { // Short-circuit directly to warp-synchronous scan T block_aggregate; WarpScan warp_scan(temp_storage.warp_scan); warp_scan.ExclusiveScan(input, output, scan_op, block_aggregate); // Obtain warp-wide prefix in lane0, then broadcast to other lanes T block_prefix = block_prefix_callback_op(block_aggregate); block_prefix = warp_scan.Broadcast(block_prefix, 0); output = scan_op(block_prefix, output); if (linear_tid == 0) output = block_prefix; } else { // Place thread partial into shared memory raking grid T *placement_ptr = BlockRakingLayout::PlacementPtr(temp_storage.raking_grid, linear_tid); *placement_ptr = input; CTA_SYNC(); // Reduce parallelism down to just raking threads if (linear_tid < RAKING_THREADS) { WarpScan warp_scan(temp_storage.warp_scan); // Raking upsweep reduction across shared partials T upsweep_partial = Upsweep(scan_op); // Warp-synchronous scan T exclusive_partial, block_aggregate; warp_scan.ExclusiveScan(upsweep_partial, exclusive_partial, scan_op, block_aggregate); // Obtain block-wide prefix in lane0, then broadcast to other lanes T block_prefix = block_prefix_callback_op(block_aggregate); block_prefix = warp_scan.Broadcast(block_prefix, 0); // Update prefix with warpscan exclusive partial T downsweep_prefix = scan_op(block_prefix, exclusive_partial); if (linear_tid == 0) downsweep_prefix = block_prefix; // Exclusive raking downsweep scan ExclusiveDownsweep(scan_op, downsweep_prefix); } CTA_SYNC(); // Grab thread prefix from shared memory output = *placement_ptr; } } //--------------------------------------------------------------------- // Inclusive scans //--------------------------------------------------------------------- /// Computes an inclusive thread block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. template __device__ __forceinline__ void InclusiveScan( T input, ///< [in] Calling thread's input item T &output, ///< [out] Calling thread's output item (may be aliased to \p input) ScanOp scan_op) ///< [in] Binary scan operator { if (WARP_SYNCHRONOUS) { // Short-circuit directly to warp-synchronous scan WarpScan(temp_storage.warp_scan).InclusiveScan(input, output, scan_op); } else { // Place thread partial into shared memory raking grid T *placement_ptr = BlockRakingLayout::PlacementPtr(temp_storage.raking_grid, linear_tid); *placement_ptr = input; CTA_SYNC(); // Reduce parallelism down to just raking threads if (linear_tid < RAKING_THREADS) { // Raking upsweep reduction across shared partials T upsweep_partial = Upsweep(scan_op); // Exclusive Warp-synchronous scan T exclusive_partial; WarpScan(temp_storage.warp_scan).ExclusiveScan(upsweep_partial, exclusive_partial, scan_op); // Inclusive raking downsweep scan InclusiveDownsweep(scan_op, exclusive_partial, (linear_tid != 0)); } CTA_SYNC(); // Grab thread prefix from shared memory output = *placement_ptr; } } /// Computes an inclusive thread block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. Also provides every thread with the block-wide \p block_aggregate of all inputs. template __device__ __forceinline__ void InclusiveScan( T input, ///< [in] Calling thread's input item T &output, ///< [out] Calling thread's output item (may be aliased to \p input) ScanOp scan_op, ///< [in] Binary scan operator T &block_aggregate) ///< [out] Threadblock-wide aggregate reduction of input items { if (WARP_SYNCHRONOUS) { // Short-circuit directly to warp-synchronous scan WarpScan(temp_storage.warp_scan).InclusiveScan(input, output, scan_op, block_aggregate); } else { // Place thread partial into shared memory raking grid T *placement_ptr = BlockRakingLayout::PlacementPtr(temp_storage.raking_grid, linear_tid); *placement_ptr = input; CTA_SYNC(); // Reduce parallelism down to just raking threads if (linear_tid < RAKING_THREADS) { // Raking upsweep reduction across shared partials T upsweep_partial = Upsweep(scan_op); // Warp-synchronous scan T inclusive_partial; T exclusive_partial; WarpScan(temp_storage.warp_scan).Scan(upsweep_partial, inclusive_partial, exclusive_partial, scan_op); // Inclusive raking downsweep scan InclusiveDownsweep(scan_op, exclusive_partial, (linear_tid != 0)); // Broadcast aggregate to all threads if (linear_tid == RAKING_THREADS - 1) temp_storage.block_aggregate = inclusive_partial; } CTA_SYNC(); // Grab thread prefix from shared memory output = *placement_ptr; // Retrieve block aggregate block_aggregate = temp_storage.block_aggregate; } } /// Computes an inclusive thread block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. the call-back functor \p block_prefix_callback_op is invoked by the first warp in the block, and the value returned by lane0 in that warp is used as the "seed" value that logically prefixes the thread block's scan inputs. Also provides every thread with the block-wide \p block_aggregate of all inputs. template < typename ScanOp, typename BlockPrefixCallbackOp> __device__ __forceinline__ void InclusiveScan( T input, ///< [in] Calling thread's input item T &output, ///< [out] Calling thread's output item (may be aliased to \p input) ScanOp scan_op, ///< [in] Binary scan operator BlockPrefixCallbackOp &block_prefix_callback_op) ///< [in-out] [warp0 only] Call-back functor for specifying a thread block-wide prefix to be applied to all inputs. { if (WARP_SYNCHRONOUS) { // Short-circuit directly to warp-synchronous scan T block_aggregate; WarpScan warp_scan(temp_storage.warp_scan); warp_scan.InclusiveScan(input, output, scan_op, block_aggregate); // Obtain warp-wide prefix in lane0, then broadcast to other lanes T block_prefix = block_prefix_callback_op(block_aggregate); block_prefix = warp_scan.Broadcast(block_prefix, 0); // Update prefix with exclusive warpscan partial output = scan_op(block_prefix, output); } else { // Place thread partial into shared memory raking grid T *placement_ptr = BlockRakingLayout::PlacementPtr(temp_storage.raking_grid, linear_tid); *placement_ptr = input; CTA_SYNC(); // Reduce parallelism down to just raking threads if (linear_tid < RAKING_THREADS) { WarpScan warp_scan(temp_storage.warp_scan); // Raking upsweep reduction across shared partials T upsweep_partial = Upsweep(scan_op); // Warp-synchronous scan T exclusive_partial, block_aggregate; warp_scan.ExclusiveScan(upsweep_partial, exclusive_partial, scan_op, block_aggregate); // Obtain block-wide prefix in lane0, then broadcast to other lanes T block_prefix = block_prefix_callback_op(block_aggregate); block_prefix = warp_scan.Broadcast(block_prefix, 0); // Update prefix with warpscan exclusive partial T downsweep_prefix = scan_op(block_prefix, exclusive_partial); if (linear_tid == 0) downsweep_prefix = block_prefix; // Inclusive raking downsweep scan InclusiveDownsweep(scan_op, downsweep_prefix); } CTA_SYNC(); // Grab thread prefix from shared memory output = *placement_ptr; } } }; } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/gpu_utils/cub/block/specializations/block_scan_warp_scans.cuh000066400000000000000000000452551411340063500277030ustar00rootroot00000000000000/****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * cub::BlockScanWarpscans provides warpscan-based variants of parallel prefix scan across a CUDA thread block. */ #pragma once #include "../../util_arch.cuh" #include "../../util_ptx.cuh" #include "../../warp/warp_scan.cuh" #include "../../util_namespace.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /** * \brief BlockScanWarpScans provides warpscan-based variants of parallel prefix scan across a CUDA thread block. */ template < typename T, int BLOCK_DIM_X, ///< The thread block length in threads along the X dimension int BLOCK_DIM_Y, ///< The thread block length in threads along the Y dimension int BLOCK_DIM_Z, ///< The thread block length in threads along the Z dimension int PTX_ARCH> ///< The PTX compute capability for which to to specialize this collective struct BlockScanWarpScans { //--------------------------------------------------------------------- // Types and constants //--------------------------------------------------------------------- /// Constants enum { /// Number of warp threads WARP_THREADS = CUB_WARP_THREADS(PTX_ARCH), /// The thread block size in threads BLOCK_THREADS = BLOCK_DIM_X * BLOCK_DIM_Y * BLOCK_DIM_Z, /// Number of active warps WARPS = (BLOCK_THREADS + WARP_THREADS - 1) / WARP_THREADS, }; /// WarpScan utility type typedef WarpScan WarpScanT; /// WarpScan utility type typedef WarpScan WarpAggregateScan; /// Shared memory storage layout type struct __align__(32) _TempStorage { T warp_aggregates[WARPS]; typename WarpScanT::TempStorage warp_scan[WARPS]; ///< Buffer for warp-synchronous scans T block_prefix; ///< Shared prefix for the entire thread block }; /// Alias wrapper allowing storage to be unioned struct TempStorage : Uninitialized<_TempStorage> {}; //--------------------------------------------------------------------- // Per-thread fields //--------------------------------------------------------------------- // Thread fields _TempStorage &temp_storage; unsigned int linear_tid; unsigned int warp_id; unsigned int lane_id; //--------------------------------------------------------------------- // Constructors //--------------------------------------------------------------------- /// Constructor __device__ __forceinline__ BlockScanWarpScans( TempStorage &temp_storage) : temp_storage(temp_storage.Alias()), linear_tid(RowMajorTid(BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z)), warp_id((WARPS == 1) ? 0 : linear_tid / WARP_THREADS), lane_id(LaneId()) {} //--------------------------------------------------------------------- // Utility methods //--------------------------------------------------------------------- template __device__ __forceinline__ void ApplyWarpAggregates( T &warp_prefix, ///< [out] The calling thread's partial reduction ScanOp scan_op, ///< [in] Binary scan operator T &block_aggregate, ///< [out] Threadblock-wide aggregate reduction of input items Int2Type /*addend_warp*/) { if (warp_id == WARP) warp_prefix = block_aggregate; T addend = temp_storage.warp_aggregates[WARP]; block_aggregate = scan_op(block_aggregate, addend); ApplyWarpAggregates(warp_prefix, scan_op, block_aggregate, Int2Type()); } template __device__ __forceinline__ void ApplyWarpAggregates( T &/*warp_prefix*/, ///< [out] The calling thread's partial reduction ScanOp /*scan_op*/, ///< [in] Binary scan operator T &/*block_aggregate*/, ///< [out] Threadblock-wide aggregate reduction of input items Int2Type /*addend_warp*/) {} /// Use the warp-wide aggregates to compute the calling warp's prefix. Also returns block-wide aggregate in all threads. template __device__ __forceinline__ T ComputeWarpPrefix( ScanOp scan_op, ///< [in] Binary scan operator T warp_aggregate, ///< [in] [laneWARP_THREADS - 1 only] Warp-wide aggregate reduction of input items T &block_aggregate) ///< [out] Threadblock-wide aggregate reduction of input items { // Last lane in each warp shares its warp-aggregate if (lane_id == WARP_THREADS - 1) temp_storage.warp_aggregates[warp_id] = warp_aggregate; CTA_SYNC(); // Accumulate block aggregates and save the one that is our warp's prefix T warp_prefix; block_aggregate = temp_storage.warp_aggregates[0]; // Use template unrolling (since the PTX backend can't handle unrolling it for SM1x) ApplyWarpAggregates(warp_prefix, scan_op, block_aggregate, Int2Type<1>()); /* #pragma unroll for (int WARP = 1; WARP < WARPS; ++WARP) { if (warp_id == WARP) warp_prefix = block_aggregate; T addend = temp_storage.warp_aggregates[WARP]; block_aggregate = scan_op(block_aggregate, addend); } */ return warp_prefix; } /// Use the warp-wide aggregates and initial-value to compute the calling warp's prefix. Also returns block-wide aggregate in all threads. template __device__ __forceinline__ T ComputeWarpPrefix( ScanOp scan_op, ///< [in] Binary scan operator T warp_aggregate, ///< [in] [laneWARP_THREADS - 1 only] Warp-wide aggregate reduction of input items T &block_aggregate, ///< [out] Threadblock-wide aggregate reduction of input items const T &initial_value) ///< [in] Initial value to seed the exclusive scan { T warp_prefix = ComputeWarpPrefix(scan_op, warp_aggregate, block_aggregate); warp_prefix = scan_op(initial_value, warp_prefix); if (warp_id == 0) warp_prefix = initial_value; return warp_prefix; } //--------------------------------------------------------------------- // Exclusive scans //--------------------------------------------------------------------- /// Computes an exclusive thread block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. With no initial value, the output computed for thread0 is undefined. template __device__ __forceinline__ void ExclusiveScan( T input, ///< [in] Calling thread's input item T &exclusive_output, ///< [out] Calling thread's output item (may be aliased to \p input) ScanOp scan_op) ///< [in] Binary scan operator { // Compute block-wide exclusive scan. The exclusive output from tid0 is invalid. T block_aggregate; ExclusiveScan(input, exclusive_output, scan_op, block_aggregate); } /// Computes an exclusive thread block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. template __device__ __forceinline__ void ExclusiveScan( T input, ///< [in] Calling thread's input items T &exclusive_output, ///< [out] Calling thread's output items (may be aliased to \p input) const T &initial_value, ///< [in] Initial value to seed the exclusive scan ScanOp scan_op) ///< [in] Binary scan operator { T block_aggregate; ExclusiveScan(input, exclusive_output, initial_value, scan_op, block_aggregate); } /// Computes an exclusive thread block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. Also provides every thread with the block-wide \p block_aggregate of all inputs. With no initial value, the output computed for thread0 is undefined. template __device__ __forceinline__ void ExclusiveScan( T input, ///< [in] Calling thread's input item T &exclusive_output, ///< [out] Calling thread's output item (may be aliased to \p input) ScanOp scan_op, ///< [in] Binary scan operator T &block_aggregate) ///< [out] Threadblock-wide aggregate reduction of input items { // Compute warp scan in each warp. The exclusive output from each lane0 is invalid. T inclusive_output; WarpScanT(temp_storage.warp_scan[warp_id]).Scan(input, inclusive_output, exclusive_output, scan_op); // Compute the warp-wide prefix and block-wide aggregate for each warp. Warp prefix for warp0 is invalid. T warp_prefix = ComputeWarpPrefix(scan_op, inclusive_output, block_aggregate); // Apply warp prefix to our lane's partial if (warp_id != 0) { exclusive_output = scan_op(warp_prefix, exclusive_output); if (lane_id == 0) exclusive_output = warp_prefix; } } /// Computes an exclusive thread block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. Also provides every thread with the block-wide \p block_aggregate of all inputs. template __device__ __forceinline__ void ExclusiveScan( T input, ///< [in] Calling thread's input items T &exclusive_output, ///< [out] Calling thread's output items (may be aliased to \p input) const T &initial_value, ///< [in] Initial value to seed the exclusive scan ScanOp scan_op, ///< [in] Binary scan operator T &block_aggregate) ///< [out] Threadblock-wide aggregate reduction of input items { // Compute warp scan in each warp. The exclusive output from each lane0 is invalid. T inclusive_output; WarpScanT(temp_storage.warp_scan[warp_id]).Scan(input, inclusive_output, exclusive_output, scan_op); // Compute the warp-wide prefix and block-wide aggregate for each warp T warp_prefix = ComputeWarpPrefix(scan_op, inclusive_output, block_aggregate, initial_value); // Apply warp prefix to our lane's partial exclusive_output = scan_op(warp_prefix, exclusive_output); if (lane_id == 0) exclusive_output = warp_prefix; } /// Computes an exclusive thread block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. the call-back functor \p block_prefix_callback_op is invoked by the first warp in the block, and the value returned by lane0 in that warp is used as the "seed" value that logically prefixes the thread block's scan inputs. Also provides every thread with the block-wide \p block_aggregate of all inputs. template < typename ScanOp, typename BlockPrefixCallbackOp> __device__ __forceinline__ void ExclusiveScan( T input, ///< [in] Calling thread's input item T &exclusive_output, ///< [out] Calling thread's output item (may be aliased to \p input) ScanOp scan_op, ///< [in] Binary scan operator BlockPrefixCallbackOp &block_prefix_callback_op) ///< [in-out] [warp0 only] Call-back functor for specifying a thread block-wide prefix to be applied to all inputs. { // Compute block-wide exclusive scan. The exclusive output from tid0 is invalid. T block_aggregate; ExclusiveScan(input, exclusive_output, scan_op, block_aggregate); // Use the first warp to determine the thread block prefix, returning the result in lane0 if (warp_id == 0) { T block_prefix = block_prefix_callback_op(block_aggregate); if (lane_id == 0) { // Share the prefix with all threads temp_storage.block_prefix = block_prefix; exclusive_output = block_prefix; // The block prefix is the exclusive output for tid0 } } CTA_SYNC(); // Incorporate thread block prefix into outputs T block_prefix = temp_storage.block_prefix; if (linear_tid > 0) { exclusive_output = scan_op(block_prefix, exclusive_output); } } //--------------------------------------------------------------------- // Inclusive scans //--------------------------------------------------------------------- /// Computes an inclusive thread block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. template __device__ __forceinline__ void InclusiveScan( T input, ///< [in] Calling thread's input item T &inclusive_output, ///< [out] Calling thread's output item (may be aliased to \p input) ScanOp scan_op) ///< [in] Binary scan operator { T block_aggregate; InclusiveScan(input, inclusive_output, scan_op, block_aggregate); } /// Computes an inclusive thread block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. Also provides every thread with the block-wide \p block_aggregate of all inputs. template __device__ __forceinline__ void InclusiveScan( T input, ///< [in] Calling thread's input item T &inclusive_output, ///< [out] Calling thread's output item (may be aliased to \p input) ScanOp scan_op, ///< [in] Binary scan operator T &block_aggregate) ///< [out] Threadblock-wide aggregate reduction of input items { WarpScanT(temp_storage.warp_scan[warp_id]).InclusiveScan(input, inclusive_output, scan_op); // Compute the warp-wide prefix and block-wide aggregate for each warp. Warp prefix for warp0 is invalid. T warp_prefix = ComputeWarpPrefix(scan_op, inclusive_output, block_aggregate); // Apply warp prefix to our lane's partial if (warp_id != 0) { inclusive_output = scan_op(warp_prefix, inclusive_output); } } /// Computes an inclusive thread block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. the call-back functor \p block_prefix_callback_op is invoked by the first warp in the block, and the value returned by lane0 in that warp is used as the "seed" value that logically prefixes the thread block's scan inputs. Also provides every thread with the block-wide \p block_aggregate of all inputs. template < typename ScanOp, typename BlockPrefixCallbackOp> __device__ __forceinline__ void InclusiveScan( T input, ///< [in] Calling thread's input item T &exclusive_output, ///< [out] Calling thread's output item (may be aliased to \p input) ScanOp scan_op, ///< [in] Binary scan operator BlockPrefixCallbackOp &block_prefix_callback_op) ///< [in-out] [warp0 only] Call-back functor for specifying a thread block-wide prefix to be applied to all inputs. { T block_aggregate; InclusiveScan(input, exclusive_output, scan_op, block_aggregate); // Use the first warp to determine the thread block prefix, returning the result in lane0 if (warp_id == 0) { T block_prefix = block_prefix_callback_op(block_aggregate); if (lane_id == 0) { // Share the prefix with all threads temp_storage.block_prefix = block_prefix; } } CTA_SYNC(); // Incorporate thread block prefix into outputs T block_prefix = temp_storage.block_prefix; exclusive_output = scan_op(block_prefix, exclusive_output); } }; } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/gpu_utils/cub/block/specializations/block_scan_warp_scans2.cuh000066400000000000000000000507251411340063500277630ustar00rootroot00000000000000/****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * cub::BlockScanWarpscans provides warpscan-based variants of parallel prefix scan across a CUDA thread block. */ #pragma once #include "../../util_arch.cuh" #include "../../util_ptx.cuh" #include "../../warp/warp_scan.cuh" #include "../../util_namespace.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /** * \brief BlockScanWarpScans provides warpscan-based variants of parallel prefix scan across a CUDA thread block. */ template < typename T, int BLOCK_DIM_X, ///< The thread block length in threads along the X dimension int BLOCK_DIM_Y, ///< The thread block length in threads along the Y dimension int BLOCK_DIM_Z, ///< The thread block length in threads along the Z dimension int PTX_ARCH> ///< The PTX compute capability for which to to specialize this collective struct BlockScanWarpScans { //--------------------------------------------------------------------- // Types and constants //--------------------------------------------------------------------- /// Constants enum { /// Number of warp threads WARP_THREADS = CUB_WARP_THREADS(PTX_ARCH), /// The thread block size in threads BLOCK_THREADS = BLOCK_DIM_X * BLOCK_DIM_Y * BLOCK_DIM_Z, /// Number of active warps WARPS = (BLOCK_THREADS + WARP_THREADS - 1) / WARP_THREADS, }; /// WarpScan utility type typedef WarpScan WarpScanT; /// WarpScan utility type typedef WarpScan WarpAggregateScanT; /// Shared memory storage layout type struct _TempStorage { typename WarpAggregateScanT::TempStorage inner_scan[WARPS]; ///< Buffer for warp-synchronous scans typename WarpScanT::TempStorage warp_scan[WARPS]; ///< Buffer for warp-synchronous scans T warp_aggregates[WARPS]; T block_prefix; ///< Shared prefix for the entire thread block }; /// Alias wrapper allowing storage to be unioned struct TempStorage : Uninitialized<_TempStorage> {}; //--------------------------------------------------------------------- // Per-thread fields //--------------------------------------------------------------------- // Thread fields _TempStorage &temp_storage; unsigned int linear_tid; unsigned int warp_id; unsigned int lane_id; //--------------------------------------------------------------------- // Constructors //--------------------------------------------------------------------- /// Constructor __device__ __forceinline__ BlockScanWarpScans( TempStorage &temp_storage) : temp_storage(temp_storage.Alias()), linear_tid(RowMajorTid(BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z)), warp_id((WARPS == 1) ? 0 : linear_tid / WARP_THREADS), lane_id(LaneId()) {} //--------------------------------------------------------------------- // Utility methods //--------------------------------------------------------------------- template __device__ __forceinline__ void ApplyWarpAggregates( T &warp_prefix, ///< [out] The calling thread's partial reduction ScanOp scan_op, ///< [in] Binary scan operator T &block_aggregate, ///< [out] Threadblock-wide aggregate reduction of input items Int2Type addend_warp) { if (warp_id == WARP) warp_prefix = block_aggregate; T addend = temp_storage.warp_aggregates[WARP]; block_aggregate = scan_op(block_aggregate, addend); ApplyWarpAggregates(warp_prefix, scan_op, block_aggregate, Int2Type()); } template __device__ __forceinline__ void ApplyWarpAggregates( T &warp_prefix, ///< [out] The calling thread's partial reduction ScanOp scan_op, ///< [in] Binary scan operator T &block_aggregate, ///< [out] Threadblock-wide aggregate reduction of input items Int2Type addend_warp) {} /// Use the warp-wide aggregates to compute the calling warp's prefix. Also returns block-wide aggregate in all threads. template __device__ __forceinline__ T ComputeWarpPrefix( ScanOp scan_op, ///< [in] Binary scan operator T warp_aggregate, ///< [in] [laneWARP_THREADS - 1 only] Warp-wide aggregate reduction of input items T &block_aggregate) ///< [out] Threadblock-wide aggregate reduction of input items { // Last lane in each warp shares its warp-aggregate if (lane_id == WARP_THREADS - 1) temp_storage.warp_aggregates[warp_id] = warp_aggregate; CTA_SYNC(); // Accumulate block aggregates and save the one that is our warp's prefix T warp_prefix; block_aggregate = temp_storage.warp_aggregates[0]; // Use template unrolling (since the PTX backend can't handle unrolling it for SM1x) ApplyWarpAggregates(warp_prefix, scan_op, block_aggregate, Int2Type<1>()); /* #pragma unroll for (int WARP = 1; WARP < WARPS; ++WARP) { if (warp_id == WARP) warp_prefix = block_aggregate; T addend = temp_storage.warp_aggregates[WARP]; block_aggregate = scan_op(block_aggregate, addend); } */ return warp_prefix; } /// Use the warp-wide aggregates and initial-value to compute the calling warp's prefix. Also returns block-wide aggregate in all threads. template __device__ __forceinline__ T ComputeWarpPrefix( ScanOp scan_op, ///< [in] Binary scan operator T warp_aggregate, ///< [in] [laneWARP_THREADS - 1 only] Warp-wide aggregate reduction of input items T &block_aggregate, ///< [out] Threadblock-wide aggregate reduction of input items const T &initial_value) ///< [in] Initial value to seed the exclusive scan { T warp_prefix = ComputeWarpPrefix(scan_op, warp_aggregate, block_aggregate); warp_prefix = scan_op(initial_value, warp_prefix); if (warp_id == 0) warp_prefix = initial_value; return warp_prefix; } //--------------------------------------------------------------------- // Exclusive scans //--------------------------------------------------------------------- /// Computes an exclusive thread block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. With no initial value, the output computed for thread0 is undefined. template __device__ __forceinline__ void ExclusiveScan( T input, ///< [in] Calling thread's input item T &exclusive_output, ///< [out] Calling thread's output item (may be aliased to \p input) ScanOp scan_op) ///< [in] Binary scan operator { // Compute block-wide exclusive scan. The exclusive output from tid0 is invalid. T block_aggregate; ExclusiveScan(input, exclusive_output, scan_op, block_aggregate); } /// Computes an exclusive thread block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. template __device__ __forceinline__ void ExclusiveScan( T input, ///< [in] Calling thread's input items T &exclusive_output, ///< [out] Calling thread's output items (may be aliased to \p input) const T &initial_value, ///< [in] Initial value to seed the exclusive scan ScanOp scan_op) ///< [in] Binary scan operator { T block_aggregate; ExclusiveScan(input, exclusive_output, initial_value, scan_op, block_aggregate); } /// Computes an exclusive thread block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. Also provides every thread with the block-wide \p block_aggregate of all inputs. With no initial value, the output computed for thread0 is undefined. template __device__ __forceinline__ void ExclusiveScan( T input, ///< [in] Calling thread's input item T &exclusive_output, ///< [out] Calling thread's output item (may be aliased to \p input) ScanOp scan_op, ///< [in] Binary scan operator T &block_aggregate) ///< [out] Threadblock-wide aggregate reduction of input items { WarpScanT my_warp_scan(temp_storage.warp_scan[warp_id]); // Compute warp scan in each warp. The exclusive output from each lane0 is invalid. T inclusive_output; my_warp_scan.Scan(input, inclusive_output, exclusive_output, scan_op); // Compute the warp-wide prefix and block-wide aggregate for each warp. Warp prefix for warp0 is invalid. // T warp_prefix = ComputeWarpPrefix(scan_op, inclusive_output, block_aggregate); //-------------------------------------------------- // Last lane in each warp shares its warp-aggregate if (lane_id == WARP_THREADS - 1) temp_storage.warp_aggregates[warp_id] = inclusive_output; CTA_SYNC(); // Get the warp scan partial T warp_inclusive, warp_prefix; if (lane_id < WARPS) { // Scan the warpscan partials T warp_val = temp_storage.warp_aggregates[lane_id]; WarpAggregateScanT(temp_storage.inner_scan[warp_id]).Scan(warp_val, warp_inclusive, warp_prefix, scan_op); } warp_prefix = my_warp_scan.Broadcast(warp_prefix, warp_id); block_aggregate = my_warp_scan.Broadcast(warp_inclusive, WARPS - 1); //-------------------------------------------------- // Apply warp prefix to our lane's partial if (warp_id != 0) { exclusive_output = scan_op(warp_prefix, exclusive_output); if (lane_id == 0) exclusive_output = warp_prefix; } } /// Computes an exclusive thread block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. Also provides every thread with the block-wide \p block_aggregate of all inputs. template __device__ __forceinline__ void ExclusiveScan( T input, ///< [in] Calling thread's input items T &exclusive_output, ///< [out] Calling thread's output items (may be aliased to \p input) const T &initial_value, ///< [in] Initial value to seed the exclusive scan ScanOp scan_op, ///< [in] Binary scan operator T &block_aggregate) ///< [out] Threadblock-wide aggregate reduction of input items { WarpScanT my_warp_scan(temp_storage.warp_scan[warp_id]); // Compute warp scan in each warp. The exclusive output from each lane0 is invalid. T inclusive_output; my_warp_scan.Scan(input, inclusive_output, exclusive_output, scan_op); // Compute the warp-wide prefix and block-wide aggregate for each warp // T warp_prefix = ComputeWarpPrefix(scan_op, inclusive_output, block_aggregate, initial_value); //-------------------------------------------------- // Last lane in each warp shares its warp-aggregate if (lane_id == WARP_THREADS - 1) temp_storage.warp_aggregates[warp_id] = inclusive_output; CTA_SYNC(); // Get the warp scan partial T warp_inclusive, warp_prefix; if (lane_id < WARPS) { // Scan the warpscan partials T warp_val = temp_storage.warp_aggregates[lane_id]; WarpAggregateScanT(temp_storage.inner_scan[warp_id]).Scan(warp_val, warp_inclusive, warp_prefix, initial_value, scan_op); } warp_prefix = my_warp_scan.Broadcast(warp_prefix, warp_id); block_aggregate = my_warp_scan.Broadcast(warp_inclusive, WARPS - 1); //-------------------------------------------------- // Apply warp prefix to our lane's partial exclusive_output = scan_op(warp_prefix, exclusive_output); if (lane_id == 0) exclusive_output = warp_prefix; } /// Computes an exclusive thread block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. the call-back functor \p block_prefix_callback_op is invoked by the first warp in the block, and the value returned by lane0 in that warp is used as the "seed" value that logically prefixes the thread block's scan inputs. Also provides every thread with the block-wide \p block_aggregate of all inputs. template < typename ScanOp, typename BlockPrefixCallbackOp> __device__ __forceinline__ void ExclusiveScan( T input, ///< [in] Calling thread's input item T &exclusive_output, ///< [out] Calling thread's output item (may be aliased to \p input) ScanOp scan_op, ///< [in] Binary scan operator BlockPrefixCallbackOp &block_prefix_callback_op) ///< [in-out] [warp0 only] Call-back functor for specifying a thread block-wide prefix to be applied to all inputs. { // Compute block-wide exclusive scan. The exclusive output from tid0 is invalid. T block_aggregate; ExclusiveScan(input, exclusive_output, scan_op, block_aggregate); // Use the first warp to determine the thread block prefix, returning the result in lane0 if (warp_id == 0) { T block_prefix = block_prefix_callback_op(block_aggregate); if (lane_id == 0) { // Share the prefix with all threads temp_storage.block_prefix = block_prefix; exclusive_output = block_prefix; // The block prefix is the exclusive output for tid0 } } CTA_SYNC(); // Incorporate thread block prefix into outputs T block_prefix = temp_storage.block_prefix; if (linear_tid > 0) { exclusive_output = scan_op(block_prefix, exclusive_output); } } //--------------------------------------------------------------------- // Inclusive scans //--------------------------------------------------------------------- /// Computes an inclusive thread block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. template __device__ __forceinline__ void InclusiveScan( T input, ///< [in] Calling thread's input item T &inclusive_output, ///< [out] Calling thread's output item (may be aliased to \p input) ScanOp scan_op) ///< [in] Binary scan operator { T block_aggregate; InclusiveScan(input, inclusive_output, scan_op, block_aggregate); } /// Computes an inclusive thread block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. Also provides every thread with the block-wide \p block_aggregate of all inputs. template __device__ __forceinline__ void InclusiveScan( T input, ///< [in] Calling thread's input item T &inclusive_output, ///< [out] Calling thread's output item (may be aliased to \p input) ScanOp scan_op, ///< [in] Binary scan operator T &block_aggregate) ///< [out] Threadblock-wide aggregate reduction of input items { WarpScanT(temp_storage.warp_scan[warp_id]).InclusiveScan(input, inclusive_output, scan_op); // Compute the warp-wide prefix and block-wide aggregate for each warp. Warp prefix for warp0 is invalid. T warp_prefix = ComputeWarpPrefix(scan_op, inclusive_output, block_aggregate); // Apply warp prefix to our lane's partial if (warp_id != 0) { inclusive_output = scan_op(warp_prefix, inclusive_output); } } /// Computes an inclusive thread block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. the call-back functor \p block_prefix_callback_op is invoked by the first warp in the block, and the value returned by lane0 in that warp is used as the "seed" value that logically prefixes the thread block's scan inputs. Also provides every thread with the block-wide \p block_aggregate of all inputs. template < typename ScanOp, typename BlockPrefixCallbackOp> __device__ __forceinline__ void InclusiveScan( T input, ///< [in] Calling thread's input item T &exclusive_output, ///< [out] Calling thread's output item (may be aliased to \p input) ScanOp scan_op, ///< [in] Binary scan operator BlockPrefixCallbackOp &block_prefix_callback_op) ///< [in-out] [warp0 only] Call-back functor for specifying a thread block-wide prefix to be applied to all inputs. { T block_aggregate; InclusiveScan(input, exclusive_output, scan_op, block_aggregate); // Use the first warp to determine the thread block prefix, returning the result in lane0 if (warp_id == 0) { T block_prefix = block_prefix_callback_op(block_aggregate); if (lane_id == 0) { // Share the prefix with all threads temp_storage.block_prefix = block_prefix; } } CTA_SYNC(); // Incorporate thread block prefix into outputs T block_prefix = temp_storage.block_prefix; exclusive_output = scan_op(block_prefix, exclusive_output); } }; } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/gpu_utils/cub/block/specializations/block_scan_warp_scans3.cuh000066400000000000000000000460071411340063500277620ustar00rootroot00000000000000/****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * cub::BlockScanWarpscans provides warpscan-based variants of parallel prefix scan across a CUDA thread block. */ #pragma once #include "../../util_arch.cuh" #include "../../util_ptx.cuh" #include "../../warp/warp_scan.cuh" #include "../../util_namespace.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /** * \brief BlockScanWarpScans provides warpscan-based variants of parallel prefix scan across a CUDA thread block. */ template < typename T, int BLOCK_DIM_X, ///< The thread block length in threads along the X dimension int BLOCK_DIM_Y, ///< The thread block length in threads along the Y dimension int BLOCK_DIM_Z, ///< The thread block length in threads along the Z dimension int PTX_ARCH> ///< The PTX compute capability for which to to specialize this collective struct BlockScanWarpScans { //--------------------------------------------------------------------- // Types and constants //--------------------------------------------------------------------- /// Constants enum { /// The thread block size in threads BLOCK_THREADS = BLOCK_DIM_X * BLOCK_DIM_Y * BLOCK_DIM_Z, /// Number of warp threads INNER_WARP_THREADS = CUB_WARP_THREADS(PTX_ARCH), OUTER_WARP_THREADS = BLOCK_THREADS / INNER_WARP_THREADS, /// Number of outer scan warps OUTER_WARPS = INNER_WARP_THREADS }; /// Outer WarpScan utility type typedef WarpScan OuterWarpScanT; /// Inner WarpScan utility type typedef WarpScan InnerWarpScanT; typedef typename OuterWarpScanT::TempStorage OuterScanArray[OUTER_WARPS]; /// Shared memory storage layout type struct _TempStorage { union Aliasable { Uninitialized outer_warp_scan; ///< Buffer for warp-synchronous outer scans typename InnerWarpScanT::TempStorage inner_warp_scan; ///< Buffer for warp-synchronous inner scan } aliasable; T warp_aggregates[OUTER_WARPS]; T block_aggregate; ///< Shared prefix for the entire thread block }; /// Alias wrapper allowing storage to be unioned struct TempStorage : Uninitialized<_TempStorage> {}; //--------------------------------------------------------------------- // Per-thread fields //--------------------------------------------------------------------- // Thread fields _TempStorage &temp_storage; unsigned int linear_tid; unsigned int warp_id; unsigned int lane_id; //--------------------------------------------------------------------- // Constructors //--------------------------------------------------------------------- /// Constructor __device__ __forceinline__ BlockScanWarpScans( TempStorage &temp_storage) : temp_storage(temp_storage.Alias()), linear_tid(RowMajorTid(BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z)), warp_id((OUTER_WARPS == 1) ? 0 : linear_tid / OUTER_WARP_THREADS), lane_id((OUTER_WARPS == 1) ? linear_tid : linear_tid % OUTER_WARP_THREADS) {} //--------------------------------------------------------------------- // Exclusive scans //--------------------------------------------------------------------- /// Computes an exclusive thread block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. With no initial value, the output computed for thread0 is undefined. template __device__ __forceinline__ void ExclusiveScan( T input, ///< [in] Calling thread's input item T &exclusive_output, ///< [out] Calling thread's output item (may be aliased to \p input) ScanOp scan_op) ///< [in] Binary scan operator { // Compute block-wide exclusive scan. The exclusive output from tid0 is invalid. T block_aggregate; ExclusiveScan(input, exclusive_output, scan_op, block_aggregate); } /// Computes an exclusive thread block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. template __device__ __forceinline__ void ExclusiveScan( T input, ///< [in] Calling thread's input items T &exclusive_output, ///< [out] Calling thread's output items (may be aliased to \p input) const T &initial_value, ///< [in] Initial value to seed the exclusive scan ScanOp scan_op) ///< [in] Binary scan operator { T block_aggregate; ExclusiveScan(input, exclusive_output, initial_value, scan_op, block_aggregate); } /// Computes an exclusive thread block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. Also provides every thread with the block-wide \p block_aggregate of all inputs. With no initial value, the output computed for thread0 is undefined. template __device__ __forceinline__ void ExclusiveScan( T input, ///< [in] Calling thread's input item T &exclusive_output, ///< [out] Calling thread's output item (may be aliased to \p input) ScanOp scan_op, ///< [in] Binary scan operator T &block_aggregate) ///< [out] Threadblock-wide aggregate reduction of input items { // Compute warp scan in each warp. The exclusive output from each lane0 is invalid. T inclusive_output; OuterWarpScanT(temp_storage.aliasable.outer_warp_scan.Alias()[warp_id]).Scan( input, inclusive_output, exclusive_output, scan_op); // Share outer warp total if (lane_id == OUTER_WARP_THREADS - 1) temp_storage.warp_aggregates[warp_id] = inclusive_output; CTA_SYNC(); if (linear_tid < INNER_WARP_THREADS) { T outer_warp_input = temp_storage.warp_aggregates[linear_tid]; T outer_warp_exclusive; InnerWarpScanT(temp_storage.aliasable.inner_warp_scan).ExclusiveScan( outer_warp_input, outer_warp_exclusive, scan_op, block_aggregate); temp_storage.block_aggregate = block_aggregate; temp_storage.warp_aggregates[linear_tid] = outer_warp_exclusive; } CTA_SYNC(); if (warp_id != 0) { // Retrieve block aggregate block_aggregate = temp_storage.block_aggregate; // Apply warp prefix to our lane's partial T outer_warp_exclusive = temp_storage.warp_aggregates[warp_id]; exclusive_output = scan_op(outer_warp_exclusive, exclusive_output); if (lane_id == 0) exclusive_output = outer_warp_exclusive; } } /// Computes an exclusive thread block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. Also provides every thread with the block-wide \p block_aggregate of all inputs. template __device__ __forceinline__ void ExclusiveScan( T input, ///< [in] Calling thread's input items T &exclusive_output, ///< [out] Calling thread's output items (may be aliased to \p input) const T &initial_value, ///< [in] Initial value to seed the exclusive scan ScanOp scan_op, ///< [in] Binary scan operator T &block_aggregate) ///< [out] Threadblock-wide aggregate reduction of input items { // Compute warp scan in each warp. The exclusive output from each lane0 is invalid. T inclusive_output; OuterWarpScanT(temp_storage.aliasable.outer_warp_scan.Alias()[warp_id]).Scan( input, inclusive_output, exclusive_output, scan_op); // Share outer warp total if (lane_id == OUTER_WARP_THREADS - 1) { temp_storage.warp_aggregates[warp_id] = inclusive_output; } CTA_SYNC(); if (linear_tid < INNER_WARP_THREADS) { T outer_warp_input = temp_storage.warp_aggregates[linear_tid]; T outer_warp_exclusive; InnerWarpScanT(temp_storage.aliasable.inner_warp_scan).ExclusiveScan( outer_warp_input, outer_warp_exclusive, initial_value, scan_op, block_aggregate); temp_storage.block_aggregate = block_aggregate; temp_storage.warp_aggregates[linear_tid] = outer_warp_exclusive; } CTA_SYNC(); // Retrieve block aggregate block_aggregate = temp_storage.block_aggregate; // Apply warp prefix to our lane's partial T outer_warp_exclusive = temp_storage.warp_aggregates[warp_id]; exclusive_output = scan_op(outer_warp_exclusive, exclusive_output); if (lane_id == 0) exclusive_output = outer_warp_exclusive; } /// Computes an exclusive thread block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. The call-back functor \p block_prefix_callback_op is invoked by the first warp in the block, and the value returned by lane0 in that warp is used as the "seed" value that logically prefixes the thread block's scan inputs. template < typename ScanOp, typename BlockPrefixCallbackOp> __device__ __forceinline__ void ExclusiveScan( T input, ///< [in] Calling thread's input item T &exclusive_output, ///< [out] Calling thread's output item (may be aliased to \p input) ScanOp scan_op, ///< [in] Binary scan operator BlockPrefixCallbackOp &block_prefix_callback_op) ///< [in-out] [warp0 only] Call-back functor for specifying a thread block-wide prefix to be applied to all inputs. { // Compute warp scan in each warp. The exclusive output from each lane0 is invalid. T inclusive_output; OuterWarpScanT(temp_storage.aliasable.outer_warp_scan.Alias()[warp_id]).Scan( input, inclusive_output, exclusive_output, scan_op); // Share outer warp total if (lane_id == OUTER_WARP_THREADS - 1) temp_storage.warp_aggregates[warp_id] = inclusive_output; CTA_SYNC(); if (linear_tid < INNER_WARP_THREADS) { InnerWarpScanT inner_scan(temp_storage.aliasable.inner_warp_scan); T upsweep = temp_storage.warp_aggregates[linear_tid]; T downsweep_prefix, block_aggregate; inner_scan.ExclusiveScan(upsweep, downsweep_prefix, scan_op, block_aggregate); // Use callback functor to get block prefix in lane0 and then broadcast to other lanes T block_prefix = block_prefix_callback_op(block_aggregate); block_prefix = inner_scan.Broadcast(block_prefix, 0); downsweep_prefix = scan_op(block_prefix, downsweep_prefix); if (linear_tid == 0) downsweep_prefix = block_prefix; temp_storage.warp_aggregates[linear_tid] = downsweep_prefix; } CTA_SYNC(); // Apply warp prefix to our lane's partial (or assign it if partial is invalid) T outer_warp_exclusive = temp_storage.warp_aggregates[warp_id]; exclusive_output = scan_op(outer_warp_exclusive, exclusive_output); if (lane_id == 0) exclusive_output = outer_warp_exclusive; } //--------------------------------------------------------------------- // Inclusive scans //--------------------------------------------------------------------- /// Computes an inclusive thread block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. template __device__ __forceinline__ void InclusiveScan( T input, ///< [in] Calling thread's input item T &inclusive_output, ///< [out] Calling thread's output item (may be aliased to \p input) ScanOp scan_op) ///< [in] Binary scan operator { T block_aggregate; InclusiveScan(input, inclusive_output, scan_op, block_aggregate); } /// Computes an inclusive thread block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. Also provides every thread with the block-wide \p block_aggregate of all inputs. template __device__ __forceinline__ void InclusiveScan( T input, ///< [in] Calling thread's input item T &inclusive_output, ///< [out] Calling thread's output item (may be aliased to \p input) ScanOp scan_op, ///< [in] Binary scan operator T &block_aggregate) ///< [out] Threadblock-wide aggregate reduction of input items { // Compute warp scan in each warp. The exclusive output from each lane0 is invalid. OuterWarpScanT(temp_storage.aliasable.outer_warp_scan.Alias()[warp_id]).InclusiveScan( input, inclusive_output, scan_op); // Share outer warp total if (lane_id == OUTER_WARP_THREADS - 1) temp_storage.warp_aggregates[warp_id] = inclusive_output; CTA_SYNC(); if (linear_tid < INNER_WARP_THREADS) { T outer_warp_input = temp_storage.warp_aggregates[linear_tid]; T outer_warp_exclusive; InnerWarpScanT(temp_storage.aliasable.inner_warp_scan).ExclusiveScan( outer_warp_input, outer_warp_exclusive, scan_op, block_aggregate); temp_storage.block_aggregate = block_aggregate; temp_storage.warp_aggregates[linear_tid] = outer_warp_exclusive; } CTA_SYNC(); if (warp_id != 0) { // Retrieve block aggregate block_aggregate = temp_storage.block_aggregate; // Apply warp prefix to our lane's partial T outer_warp_exclusive = temp_storage.warp_aggregates[warp_id]; inclusive_output = scan_op(outer_warp_exclusive, inclusive_output); } } /// Computes an inclusive thread block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. the call-back functor \p block_prefix_callback_op is invoked by the first warp in the block, and the value returned by lane0 in that warp is used as the "seed" value that logically prefixes the thread block's scan inputs. template < typename ScanOp, typename BlockPrefixCallbackOp> __device__ __forceinline__ void InclusiveScan( T input, ///< [in] Calling thread's input item T &inclusive_output, ///< [out] Calling thread's output item (may be aliased to \p input) ScanOp scan_op, ///< [in] Binary scan operator BlockPrefixCallbackOp &block_prefix_callback_op) ///< [in-out] [warp0 only] Call-back functor for specifying a thread block-wide prefix to be applied to all inputs. { // Compute warp scan in each warp. The exclusive output from each lane0 is invalid. OuterWarpScanT(temp_storage.aliasable.outer_warp_scan.Alias()[warp_id]).InclusiveScan( input, inclusive_output, scan_op); // Share outer warp total if (lane_id == OUTER_WARP_THREADS - 1) temp_storage.warp_aggregates[warp_id] = inclusive_output; CTA_SYNC(); if (linear_tid < INNER_WARP_THREADS) { InnerWarpScanT inner_scan(temp_storage.aliasable.inner_warp_scan); T upsweep = temp_storage.warp_aggregates[linear_tid]; T downsweep_prefix, block_aggregate; inner_scan.ExclusiveScan(upsweep, downsweep_prefix, scan_op, block_aggregate); // Use callback functor to get block prefix in lane0 and then broadcast to other lanes T block_prefix = block_prefix_callback_op(block_aggregate); block_prefix = inner_scan.Broadcast(block_prefix, 0); downsweep_prefix = scan_op(block_prefix, downsweep_prefix); if (linear_tid == 0) downsweep_prefix = block_prefix; temp_storage.warp_aggregates[linear_tid] = downsweep_prefix; } CTA_SYNC(); // Apply warp prefix to our lane's partial T outer_warp_exclusive = temp_storage.warp_aggregates[warp_id]; inclusive_output = scan_op(outer_warp_exclusive, inclusive_output); } }; } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/gpu_utils/cub/cub.cuh000066400000000000000000000070521411340063500176340ustar00rootroot00000000000000/****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * CUB umbrella include file */ #pragma once // Block #include "block/block_histogram.cuh" #include "block/block_discontinuity.cuh" #include "block/block_exchange.cuh" #include "block/block_load.cuh" #include "block/block_radix_rank.cuh" #include "block/block_radix_sort.cuh" #include "block/block_reduce.cuh" #include "block/block_scan.cuh" #include "block/block_store.cuh" //#include "block/block_shift.cuh" // Device #include "device/device_histogram.cuh" #include "device/device_partition.cuh" #include "device/device_radix_sort.cuh" #include "device/device_reduce.cuh" #include "device/device_run_length_encode.cuh" #include "device/device_scan.cuh" #include "device/device_segmented_radix_sort.cuh" #include "device/device_segmented_reduce.cuh" #include "device/device_select.cuh" #include "device/device_spmv.cuh" // Grid //#include "grid/grid_barrier.cuh" #include "grid/grid_even_share.cuh" #include "grid/grid_mapping.cuh" #include "grid/grid_queue.cuh" // Thread #include "thread/thread_load.cuh" #include "thread/thread_operators.cuh" #include "thread/thread_reduce.cuh" #include "thread/thread_scan.cuh" #include "thread/thread_store.cuh" // Warp #include "warp/warp_reduce.cuh" #include "warp/warp_scan.cuh" // Iterator #include "iterator/arg_index_input_iterator.cuh" #include "iterator/cache_modified_input_iterator.cuh" #include "iterator/cache_modified_output_iterator.cuh" #include "iterator/constant_input_iterator.cuh" #include "iterator/counting_input_iterator.cuh" #include "iterator/tex_obj_input_iterator.cuh" #include "iterator/tex_ref_input_iterator.cuh" #include "iterator/transform_input_iterator.cuh" // Util #include "util_arch.cuh" #include "util_debug.cuh" #include "util_device.cuh" #include "util_macro.cuh" #include "util_ptx.cuh" #include "util_type.cuh" relion-3.1.3/src/gpu_utils/cub/device/000077500000000000000000000000001411340063500176155ustar00rootroot00000000000000relion-3.1.3/src/gpu_utils/cub/device/device_histogram.cuh000066400000000000000000001521131411340063500236350ustar00rootroot00000000000000 /****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * cub::DeviceHistogram provides device-wide parallel operations for constructing histogram(s) from a sequence of samples data residing within device-accessible memory. */ #pragma once #include #include #include #include "dispatch/dispatch_histogram.cuh" #include "../util_namespace.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /** * \brief DeviceHistogram provides device-wide parallel operations for constructing histogram(s) from a sequence of samples data residing within device-accessible memory. ![](histogram_logo.png) * \ingroup SingleModule * * \par Overview * A histogram * counts the number of observations that fall into each of the disjoint categories (known as bins). * * \par Usage Considerations * \cdp_class{DeviceHistogram} * */ struct DeviceHistogram { /******************************************************************//** * \name Evenly-segmented bin ranges *********************************************************************/ //@{ /** * \brief Computes an intensity histogram from a sequence of data samples using equal-width bins. * * \par * - The number of histogram bins is (\p num_levels - 1) * - All bins comprise the same width of sample values: (\p upper_level - \p lower_level) / (\p num_levels - 1) * - \devicestorage * * \par Snippet * The code snippet below illustrates the computation of a six-bin histogram * from a sequence of float samples * * \par * \code * #include // or equivalently * * // Declare, allocate, and initialize device-accessible pointers for input samples and * // output histogram * int num_samples; // e.g., 10 * float* d_samples; // e.g., [2.2, 6.0, 7.1, 2.9, 3.5, 0.3, 2.9, 2.0, 6.1, 999.5] * int* d_histogram; // e.g., [ -, -, -, -, -, -, -, -] * int num_levels; // e.g., 7 (seven level boundaries for six bins) * float lower_level; // e.g., 0.0 (lower sample value boundary of lowest bin) * float upper_level; // e.g., 12.0 (upper sample value boundary of upper bin) * ... * * // Determine temporary device storage requirements * void* d_temp_storage = NULL; * size_t temp_storage_bytes = 0; * cub::DeviceHistogram::HistogramEven(d_temp_storage, temp_storage_bytes, * d_samples, d_histogram, num_levels, lower_level, upper_level, num_samples); * * // Allocate temporary storage * cudaMalloc(&d_temp_storage, temp_storage_bytes); * * // Compute histograms * cub::DeviceHistogram::HistogramEven(d_temp_storage, temp_storage_bytes, * d_samples, d_histogram, num_levels, lower_level, upper_level, num_samples); * * // d_histogram <-- [1, 0, 5, 0, 3, 0, 0, 0]; * * \endcode * * \tparam SampleIteratorT [inferred] Random-access input iterator type for reading input samples. \iterator * \tparam CounterT [inferred] Integer type for histogram bin counters * \tparam LevelT [inferred] Type for specifying boundaries (levels) * \tparam OffsetT [inferred] Signed integer type for sequence offsets, list lengths, pointer differences, etc. \offset_size1 */ template < typename SampleIteratorT, typename CounterT, typename LevelT, typename OffsetT> CUB_RUNTIME_FUNCTION static cudaError_t HistogramEven( void* d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t& temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation SampleIteratorT d_samples, ///< [in] The pointer to the input sequence of data samples. CounterT* d_histogram, ///< [out] The pointer to the histogram counter output array of length num_levels - 1. int num_levels, ///< [in] The number of boundaries (levels) for delineating histogram samples. Implies that the number of bins is num_levels - 1. LevelT lower_level, ///< [in] The lower sample value bound (inclusive) for the lowest histogram bin. LevelT upper_level, ///< [in] The upper sample value bound (exclusive) for the highest histogram bin. OffsetT num_samples, ///< [in] The number of input samples (i.e., the length of \p d_samples) cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. May cause significant slowdown. Default is \p false. { /// The sample value type of the input iterator typedef typename std::iterator_traits::value_type SampleT; CounterT* d_histogram1[1] = {d_histogram}; int num_levels1[1] = {num_levels}; LevelT lower_level1[1] = {lower_level}; LevelT upper_level1[1] = {upper_level}; return MultiHistogramEven<1, 1>( d_temp_storage, temp_storage_bytes, d_samples, d_histogram1, num_levels1, lower_level1, upper_level1, num_samples, 1, sizeof(SampleT) * num_samples, stream, debug_synchronous); } /** * \brief Computes an intensity histogram from a sequence of data samples using equal-width bins. * * \par * - A two-dimensional region of interest within \p d_samples can be specified * using the \p num_row_samples, num_rows, and \p row_stride_bytes parameters. * - The row stride must be a whole multiple of the sample data type * size, i.e., (row_stride_bytes % sizeof(SampleT)) == 0. * - The number of histogram bins is (\p num_levels - 1) * - All bins comprise the same width of sample values: (\p upper_level - \p lower_level) / (\p num_levels - 1) * - \devicestorage * * \par Snippet * The code snippet below illustrates the computation of a six-bin histogram * from a 2x5 region of interest within a flattened 2x7 array of float samples. * * \par * \code * #include // or equivalently * * // Declare, allocate, and initialize device-accessible pointers for input samples and * // output histogram * int num_row_samples; // e.g., 5 * int num_rows; // e.g., 2; * size_t row_stride_bytes; // e.g., 7 * sizeof(float) * float* d_samples; // e.g., [2.2, 6.0, 7.1, 2.9, 3.5, -, -, * // 0.3, 2.9, 2.0, 6.1, 999.5, -, -] * int* d_histogram; // e.g., [ -, -, -, -, -, -, -, -] * int num_levels; // e.g., 7 (seven level boundaries for six bins) * float lower_level; // e.g., 0.0 (lower sample value boundary of lowest bin) * float upper_level; // e.g., 12.0 (upper sample value boundary of upper bin) * ... * * // Determine temporary device storage requirements * void* d_temp_storage = NULL; * size_t temp_storage_bytes = 0; * cub::DeviceHistogram::HistogramEven(d_temp_storage, temp_storage_bytes, * d_samples, d_histogram, num_levels, lower_level, upper_level, * num_row_samples, num_rows, row_stride_bytes); * * // Allocate temporary storage * cudaMalloc(&d_temp_storage, temp_storage_bytes); * * // Compute histograms * cub::DeviceHistogram::HistogramEven(d_temp_storage, temp_storage_bytes, d_samples, d_histogram, * d_samples, d_histogram, num_levels, lower_level, upper_level, * num_row_samples, num_rows, row_stride_bytes); * * // d_histogram <-- [1, 0, 5, 0, 3, 0, 0, 0]; * * \endcode * * \tparam SampleIteratorT [inferred] Random-access input iterator type for reading input samples. \iterator * \tparam CounterT [inferred] Integer type for histogram bin counters * \tparam LevelT [inferred] Type for specifying boundaries (levels) * \tparam OffsetT [inferred] Signed integer type for sequence offsets, list lengths, pointer differences, etc. \offset_size1 */ template < typename SampleIteratorT, typename CounterT, typename LevelT, typename OffsetT> CUB_RUNTIME_FUNCTION static cudaError_t HistogramEven( void* d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t& temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation SampleIteratorT d_samples, ///< [in] The pointer to the input sequence of data samples. CounterT* d_histogram, ///< [out] The pointer to the histogram counter output array of length num_levels - 1. int num_levels, ///< [in] The number of boundaries (levels) for delineating histogram samples. Implies that the number of bins is num_levels - 1. LevelT lower_level, ///< [in] The lower sample value bound (inclusive) for the lowest histogram bin. LevelT upper_level, ///< [in] The upper sample value bound (exclusive) for the highest histogram bin. OffsetT num_row_samples, ///< [in] The number of data samples per row in the region of interest OffsetT num_rows, ///< [in] The number of rows in the region of interest size_t row_stride_bytes, ///< [in] The number of bytes between starts of consecutive rows in the region of interest cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. May cause significant slowdown. Default is \p false. { CounterT* d_histogram1[1] = {d_histogram}; int num_levels1[1] = {num_levels}; LevelT lower_level1[1] = {lower_level}; LevelT upper_level1[1] = {upper_level}; return MultiHistogramEven<1, 1>( d_temp_storage, temp_storage_bytes, d_samples, d_histogram1, num_levels1, lower_level1, upper_level1, num_row_samples, num_rows, row_stride_bytes, stream, debug_synchronous); } /** * \brief Computes per-channel intensity histograms from a sequence of multi-channel "pixel" data samples using equal-width bins. * * \par * - The input is a sequence of pixel structures, where each pixel comprises * a record of \p NUM_CHANNELS consecutive data samples (e.g., an RGBA pixel). * - Of the \p NUM_CHANNELS specified, the function will only compute histograms * for the first \p NUM_ACTIVE_CHANNELS (e.g., only RGB histograms from RGBA * pixel samples). * - The number of histogram bins for channeli is num_levels[i] - 1. * - For channeli, the range of values for all histogram bins * have the same width: (upper_level[i] - lower_level[i]) / ( num_levels[i] - 1) * - \devicestorage * * \par Snippet * The code snippet below illustrates the computation of three 256-bin RGB histograms * from a quad-channel sequence of RGBA pixels (8 bits per channel per pixel) * * \par * \code * #include // or equivalently * * // Declare, allocate, and initialize device-accessible pointers for input samples * // and output histograms * int num_pixels; // e.g., 5 * unsigned char* d_samples; // e.g., [(2, 6, 7, 5), (3, 0, 2, 1), (7, 0, 6, 2), * // (0, 6, 7, 5), (3, 0, 2, 6)] * int* d_histogram[3]; // e.g., three device pointers to three device buffers, * // each allocated with 256 integer counters * int num_levels[3]; // e.g., {257, 257, 257}; * unsigned int lower_level[3]; // e.g., {0, 0, 0}; * unsigned int upper_level[3]; // e.g., {256, 256, 256}; * ... * * // Determine temporary device storage requirements * void* d_temp_storage = NULL; * size_t temp_storage_bytes = 0; * cub::DeviceHistogram::MultiHistogramEven<4, 3>(d_temp_storage, temp_storage_bytes, * d_samples, d_histogram, num_levels, lower_level, upper_level, num_pixels); * * // Allocate temporary storage * cudaMalloc(&d_temp_storage, temp_storage_bytes); * * // Compute histograms * cub::DeviceHistogram::MultiHistogramEven<4, 3>(d_temp_storage, temp_storage_bytes, * d_samples, d_histogram, num_levels, lower_level, upper_level, num_pixels); * * // d_histogram <-- [ [1, 0, 1, 2, 0, 0, 0, 1, 0, 0, 0, ..., 0], * // [0, 3, 0, 0, 0, 0, 2, 0, 0, 0, 0, ..., 0], * // [0, 0, 2, 0, 0, 0, 1, 2, 0, 0, 0, ..., 0] ] * * \endcode * * \tparam NUM_CHANNELS Number of channels interleaved in the input data (may be greater than the number of channels being actively histogrammed) * \tparam NUM_ACTIVE_CHANNELS [inferred] Number of channels actively being histogrammed * \tparam SampleIteratorT [inferred] Random-access input iterator type for reading input samples. \iterator * \tparam CounterT [inferred] Integer type for histogram bin counters * \tparam LevelT [inferred] Type for specifying boundaries (levels) * \tparam OffsetT [inferred] Signed integer type for sequence offsets, list lengths, pointer differences, etc. \offset_size1 */ template < int NUM_CHANNELS, int NUM_ACTIVE_CHANNELS, typename SampleIteratorT, typename CounterT, typename LevelT, typename OffsetT> CUB_RUNTIME_FUNCTION static cudaError_t MultiHistogramEven( void* d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t& temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation SampleIteratorT d_samples, ///< [in] The pointer to the multi-channel input sequence of data samples. The samples from different channels are assumed to be interleaved (e.g., an array of 32-bit pixels where each pixel consists of four RGBA 8-bit samples). CounterT* d_histogram[NUM_ACTIVE_CHANNELS], ///< [out] The pointers to the histogram counter output arrays, one for each active channel. For channeli, the allocation length of d_histogram[i] should be num_levels[i] - 1. int num_levels[NUM_ACTIVE_CHANNELS], ///< [in] The number of boundaries (levels) for delineating histogram samples in each active channel. Implies that the number of bins for channeli is num_levels[i] - 1. LevelT lower_level[NUM_ACTIVE_CHANNELS], ///< [in] The lower sample value bound (inclusive) for the lowest histogram bin in each active channel. LevelT upper_level[NUM_ACTIVE_CHANNELS], ///< [in] The upper sample value bound (exclusive) for the highest histogram bin in each active channel. OffsetT num_pixels, ///< [in] The number of multi-channel pixels (i.e., the length of \p d_samples / NUM_CHANNELS) cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. May cause significant slowdown. Default is \p false. { /// The sample value type of the input iterator typedef typename std::iterator_traits::value_type SampleT; return MultiHistogramEven( d_temp_storage, temp_storage_bytes, d_samples, d_histogram, num_levels, lower_level, upper_level, num_pixels, 1, sizeof(SampleT) * NUM_CHANNELS * num_pixels, stream, debug_synchronous); } /** * \brief Computes per-channel intensity histograms from a sequence of multi-channel "pixel" data samples using equal-width bins. * * \par * - The input is a sequence of pixel structures, where each pixel comprises * a record of \p NUM_CHANNELS consecutive data samples (e.g., an RGBA pixel). * - Of the \p NUM_CHANNELS specified, the function will only compute histograms * for the first \p NUM_ACTIVE_CHANNELS (e.g., only RGB histograms from RGBA * pixel samples). * - A two-dimensional region of interest within \p d_samples can be specified * using the \p num_row_samples, num_rows, and \p row_stride_bytes parameters. * - The row stride must be a whole multiple of the sample data type * size, i.e., (row_stride_bytes % sizeof(SampleT)) == 0. * - The number of histogram bins for channeli is num_levels[i] - 1. * - For channeli, the range of values for all histogram bins * have the same width: (upper_level[i] - lower_level[i]) / ( num_levels[i] - 1) * - \devicestorage * * \par Snippet * The code snippet below illustrates the computation of three 256-bin RGB histograms from a 2x3 region of * interest of within a flattened 2x4 array of quad-channel RGBA pixels (8 bits per channel per pixel). * * \par * \code * #include // or equivalently * * // Declare, allocate, and initialize device-accessible pointers for input samples * // and output histograms * int num_row_pixels; // e.g., 3 * int num_rows; // e.g., 2 * size_t row_stride_bytes; // e.g., 4 * sizeof(unsigned char) * NUM_CHANNELS * unsigned char* d_samples; // e.g., [(2, 6, 7, 5), (3, 0, 2, 1), (7, 0, 6, 2), (-, -, -, -), * // (0, 6, 7, 5), (3, 0, 2, 6), (1, 1, 1, 1), (-, -, -, -)] * int* d_histogram[3]; // e.g., three device pointers to three device buffers, * // each allocated with 256 integer counters * int num_levels[3]; // e.g., {257, 257, 257}; * unsigned int lower_level[3]; // e.g., {0, 0, 0}; * unsigned int upper_level[3]; // e.g., {256, 256, 256}; * ... * * // Determine temporary device storage requirements * void* d_temp_storage = NULL; * size_t temp_storage_bytes = 0; * cub::DeviceHistogram::MultiHistogramEven<4, 3>(d_temp_storage, temp_storage_bytes, * d_samples, d_histogram, num_levels, lower_level, upper_level, * num_row_pixels, num_rows, row_stride_bytes); * * // Allocate temporary storage * cudaMalloc(&d_temp_storage, temp_storage_bytes); * * // Compute histograms * cub::DeviceHistogram::MultiHistogramEven<4, 3>(d_temp_storage, temp_storage_bytes, * d_samples, d_histogram, num_levels, lower_level, upper_level, * num_row_pixels, num_rows, row_stride_bytes); * * // d_histogram <-- [ [1, 1, 1, 2, 0, 0, 0, 1, 0, 0, 0, ..., 0], * // [0, 4, 0, 0, 0, 0, 2, 0, 0, 0, 0, ..., 0], * // [0, 1, 2, 0, 0, 0, 1, 2, 0, 0, 0, ..., 0] ] * * \endcode * * \tparam NUM_CHANNELS Number of channels interleaved in the input data (may be greater than the number of channels being actively histogrammed) * \tparam NUM_ACTIVE_CHANNELS [inferred] Number of channels actively being histogrammed * \tparam SampleIteratorT [inferred] Random-access input iterator type for reading input samples. \iterator * \tparam CounterT [inferred] Integer type for histogram bin counters * \tparam LevelT [inferred] Type for specifying boundaries (levels) * \tparam OffsetT [inferred] Signed integer type for sequence offsets, list lengths, pointer differences, etc. \offset_size1 */ template < int NUM_CHANNELS, int NUM_ACTIVE_CHANNELS, typename SampleIteratorT, typename CounterT, typename LevelT, typename OffsetT> CUB_RUNTIME_FUNCTION static cudaError_t MultiHistogramEven( void* d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t& temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation SampleIteratorT d_samples, ///< [in] The pointer to the multi-channel input sequence of data samples. The samples from different channels are assumed to be interleaved (e.g., an array of 32-bit pixels where each pixel consists of four RGBA 8-bit samples). CounterT* d_histogram[NUM_ACTIVE_CHANNELS], ///< [out] The pointers to the histogram counter output arrays, one for each active channel. For channeli, the allocation length of d_histogram[i] should be num_levels[i] - 1. int num_levels[NUM_ACTIVE_CHANNELS], ///< [in] The number of boundaries (levels) for delineating histogram samples in each active channel. Implies that the number of bins for channeli is num_levels[i] - 1. LevelT lower_level[NUM_ACTIVE_CHANNELS], ///< [in] The lower sample value bound (inclusive) for the lowest histogram bin in each active channel. LevelT upper_level[NUM_ACTIVE_CHANNELS], ///< [in] The upper sample value bound (exclusive) for the highest histogram bin in each active channel. OffsetT num_row_pixels, ///< [in] The number of multi-channel pixels per row in the region of interest OffsetT num_rows, ///< [in] The number of rows in the region of interest size_t row_stride_bytes, ///< [in] The number of bytes between starts of consecutive rows in the region of interest cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. May cause significant slowdown. Default is \p false. { /// The sample value type of the input iterator typedef typename std::iterator_traits::value_type SampleT; Int2Type is_byte_sample; if ((sizeof(OffsetT) > sizeof(int)) && ((unsigned long long) (num_rows * row_stride_bytes) < (unsigned long long) std::numeric_limits::max())) { // Down-convert OffsetT data type return DipatchHistogram::DispatchEven( d_temp_storage, temp_storage_bytes, d_samples, d_histogram, num_levels, lower_level, upper_level, (int) num_row_pixels, (int) num_rows, (int) (row_stride_bytes / sizeof(SampleT)), stream, debug_synchronous, is_byte_sample); } return DipatchHistogram::DispatchEven( d_temp_storage, temp_storage_bytes, d_samples, d_histogram, num_levels, lower_level, upper_level, num_row_pixels, num_rows, (OffsetT) (row_stride_bytes / sizeof(SampleT)), stream, debug_synchronous, is_byte_sample); } //@} end member group /******************************************************************//** * \name Custom bin ranges *********************************************************************/ //@{ /** * \brief Computes an intensity histogram from a sequence of data samples using the specified bin boundary levels. * * \par * - The number of histogram bins is (\p num_levels - 1) * - The value range for bini is [level[i], level[i+1]) * - \devicestorage * * \par Snippet * The code snippet below illustrates the computation of an six-bin histogram * from a sequence of float samples * * \par * \code * #include // or equivalently * * // Declare, allocate, and initialize device-accessible pointers for input samples and * // output histogram * int num_samples; // e.g., 10 * float* d_samples; // e.g., [2.2, 6.0, 7.1, 2.9, 3.5, 0.3, 2.9, 2.0, 6.1, 999.5] * int* d_histogram; // e.g., [ -, -, -, -, -, -, -, -] * int num_levels // e.g., 7 (seven level boundaries for six bins) * float* d_levels; // e.g., [0.0, 2.0, 4.0, 6.0, 8.0, 12.0, 16.0] * ... * * // Determine temporary device storage requirements * void* d_temp_storage = NULL; * size_t temp_storage_bytes = 0; * cub::DeviceHistogram::HistogramRange(d_temp_storage, temp_storage_bytes, * d_samples, d_histogram, num_levels, d_levels, num_samples); * * // Allocate temporary storage * cudaMalloc(&d_temp_storage, temp_storage_bytes); * * // Compute histograms * cub::DeviceHistogram::HistogramRange(d_temp_storage, temp_storage_bytes, * d_samples, d_histogram, num_levels, d_levels, num_samples); * * // d_histogram <-- [1, 0, 5, 0, 3, 0, 0, 0]; * * \endcode * * \tparam SampleIteratorT [inferred] Random-access input iterator type for reading input samples. \iterator * \tparam CounterT [inferred] Integer type for histogram bin counters * \tparam LevelT [inferred] Type for specifying boundaries (levels) * \tparam OffsetT [inferred] Signed integer type for sequence offsets, list lengths, pointer differences, etc. \offset_size1 */ template < typename SampleIteratorT, typename CounterT, typename LevelT, typename OffsetT> CUB_RUNTIME_FUNCTION static cudaError_t HistogramRange( void* d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t& temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation SampleIteratorT d_samples, ///< [in] The pointer to the input sequence of data samples. CounterT* d_histogram, ///< [out] The pointer to the histogram counter output array of length num_levels - 1. int num_levels, ///< [in] The number of boundaries (levels) for delineating histogram samples. Implies that the number of bins is num_levels - 1. LevelT* d_levels, ///< [in] The pointer to the array of boundaries (levels). Bin ranges are defined by consecutive boundary pairings: lower sample value boundaries are inclusive and upper sample value boundaries are exclusive. OffsetT num_samples, ///< [in] The number of data samples per row in the region of interest cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. May cause significant slowdown. Default is \p false. { /// The sample value type of the input iterator typedef typename std::iterator_traits::value_type SampleT; CounterT* d_histogram1[1] = {d_histogram}; int num_levels1[1] = {num_levels}; LevelT* d_levels1[1] = {d_levels}; return MultiHistogramRange<1, 1>( d_temp_storage, temp_storage_bytes, d_samples, d_histogram1, num_levels1, d_levels1, num_samples, 1, sizeof(SampleT) * num_samples, stream, debug_synchronous); } /** * \brief Computes an intensity histogram from a sequence of data samples using the specified bin boundary levels. * * \par * - A two-dimensional region of interest within \p d_samples can be specified * using the \p num_row_samples, num_rows, and \p row_stride_bytes parameters. * - The row stride must be a whole multiple of the sample data type * size, i.e., (row_stride_bytes % sizeof(SampleT)) == 0. * - The number of histogram bins is (\p num_levels - 1) * - The value range for bini is [level[i], level[i+1]) * - \devicestorage * * \par Snippet * The code snippet below illustrates the computation of a six-bin histogram * from a 2x5 region of interest within a flattened 2x7 array of float samples. * * \par * \code * #include // or equivalently * * // Declare, allocate, and initialize device-accessible pointers for input samples and * // output histogram * int num_row_samples; // e.g., 5 * int num_rows; // e.g., 2; * int row_stride_bytes; // e.g., 7 * sizeof(float) * float* d_samples; // e.g., [2.2, 6.0, 7.1, 2.9, 3.5, -, -, * // 0.3, 2.9, 2.0, 6.1, 999.5, -, -] * int* d_histogram; // e.g., [ , , , , , , , ] * int num_levels // e.g., 7 (seven level boundaries for six bins) * float *d_levels; // e.g., [0.0, 2.0, 4.0, 6.0, 8.0, 12.0, 16.0] * ... * * // Determine temporary device storage requirements * void* d_temp_storage = NULL; * size_t temp_storage_bytes = 0; * cub::DeviceHistogram::HistogramRange(d_temp_storage, temp_storage_bytes, * d_samples, d_histogram, num_levels, d_levels, * num_row_samples, num_rows, row_stride_bytes); * * // Allocate temporary storage * cudaMalloc(&d_temp_storage, temp_storage_bytes); * * // Compute histograms * cub::DeviceHistogram::HistogramRange(d_temp_storage, temp_storage_bytes, * d_samples, d_histogram, num_levels, d_levels, * num_row_samples, num_rows, row_stride_bytes); * * // d_histogram <-- [1, 0, 5, 0, 3, 0, 0, 0]; * * \endcode * * \tparam SampleIteratorT [inferred] Random-access input iterator type for reading input samples. \iterator * \tparam CounterT [inferred] Integer type for histogram bin counters * \tparam LevelT [inferred] Type for specifying boundaries (levels) * \tparam OffsetT [inferred] Signed integer type for sequence offsets, list lengths, pointer differences, etc. \offset_size1 */ template < typename SampleIteratorT, typename CounterT, typename LevelT, typename OffsetT> CUB_RUNTIME_FUNCTION static cudaError_t HistogramRange( void* d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t& temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation SampleIteratorT d_samples, ///< [in] The pointer to the input sequence of data samples. CounterT* d_histogram, ///< [out] The pointer to the histogram counter output array of length num_levels - 1. int num_levels, ///< [in] The number of boundaries (levels) for delineating histogram samples. Implies that the number of bins is num_levels - 1. LevelT* d_levels, ///< [in] The pointer to the array of boundaries (levels). Bin ranges are defined by consecutive boundary pairings: lower sample value boundaries are inclusive and upper sample value boundaries are exclusive. OffsetT num_row_samples, ///< [in] The number of data samples per row in the region of interest OffsetT num_rows, ///< [in] The number of rows in the region of interest size_t row_stride_bytes, ///< [in] The number of bytes between starts of consecutive rows in the region of interest cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. May cause significant slowdown. Default is \p false. { CounterT* d_histogram1[1] = {d_histogram}; int num_levels1[1] = {num_levels}; LevelT* d_levels1[1] = {d_levels}; return MultiHistogramRange<1, 1>( d_temp_storage, temp_storage_bytes, d_samples, d_histogram1, num_levels1, d_levels1, num_row_samples, num_rows, row_stride_bytes, stream, debug_synchronous); } /** * \brief Computes per-channel intensity histograms from a sequence of multi-channel "pixel" data samples using the specified bin boundary levels. * * \par * - The input is a sequence of pixel structures, where each pixel comprises * a record of \p NUM_CHANNELS consecutive data samples (e.g., an RGBA pixel). * - Of the \p NUM_CHANNELS specified, the function will only compute histograms * for the first \p NUM_ACTIVE_CHANNELS (e.g., RGB histograms from RGBA * pixel samples). * - The number of histogram bins for channeli is num_levels[i] - 1. * - For channeli, the range of values for all histogram bins * have the same width: (upper_level[i] - lower_level[i]) / ( num_levels[i] - 1) * - \devicestorage * * \par Snippet * The code snippet below illustrates the computation of three 4-bin RGB histograms * from a quad-channel sequence of RGBA pixels (8 bits per channel per pixel) * * \par * \code * #include // or equivalently * * // Declare, allocate, and initialize device-accessible pointers for input samples * // and output histograms * int num_pixels; // e.g., 5 * unsigned char *d_samples; // e.g., [(2, 6, 7, 5),(3, 0, 2, 1),(7, 0, 6, 2), * // (0, 6, 7, 5),(3, 0, 2, 6)] * unsigned int *d_histogram[3]; // e.g., [[ -, -, -, -],[ -, -, -, -],[ -, -, -, -]]; * int num_levels[3]; // e.g., {5, 5, 5}; * unsigned int *d_levels[3]; // e.g., [ [0, 2, 4, 6, 8], * // [0, 2, 4, 6, 8], * // [0, 2, 4, 6, 8] ]; * ... * * // Determine temporary device storage requirements * void* d_temp_storage = NULL; * size_t temp_storage_bytes = 0; * cub::DeviceHistogram::MultiHistogramRange<4, 3>(d_temp_storage, temp_storage_bytes, * d_samples, d_histogram, num_levels, d_levels, num_pixels); * * // Allocate temporary storage * cudaMalloc(&d_temp_storage, temp_storage_bytes); * * // Compute histograms * cub::DeviceHistogram::MultiHistogramRange<4, 3>(d_temp_storage, temp_storage_bytes, * d_samples, d_histogram, num_levels, d_levels, num_pixels); * * // d_histogram <-- [ [1, 3, 0, 1], * // [3, 0, 0, 2], * // [0, 2, 0, 3] ] * * \endcode * * \tparam NUM_CHANNELS Number of channels interleaved in the input data (may be greater than the number of channels being actively histogrammed) * \tparam NUM_ACTIVE_CHANNELS [inferred] Number of channels actively being histogrammed * \tparam SampleIteratorT [inferred] Random-access input iterator type for reading input samples. \iterator * \tparam CounterT [inferred] Integer type for histogram bin counters * \tparam LevelT [inferred] Type for specifying boundaries (levels) * \tparam OffsetT [inferred] Signed integer type for sequence offsets, list lengths, pointer differences, etc. \offset_size1 */ template < int NUM_CHANNELS, int NUM_ACTIVE_CHANNELS, typename SampleIteratorT, typename CounterT, typename LevelT, typename OffsetT> CUB_RUNTIME_FUNCTION static cudaError_t MultiHistogramRange( void* d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t& temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation SampleIteratorT d_samples, ///< [in] The pointer to the multi-channel input sequence of data samples. The samples from different channels are assumed to be interleaved (e.g., an array of 32-bit pixels where each pixel consists of four RGBA 8-bit samples). CounterT* d_histogram[NUM_ACTIVE_CHANNELS], ///< [out] The pointers to the histogram counter output arrays, one for each active channel. For channeli, the allocation length of d_histogram[i] should be num_levels[i] - 1. int num_levels[NUM_ACTIVE_CHANNELS], ///< [in] The number of boundaries (levels) for delineating histogram samples in each active channel. Implies that the number of bins for channeli is num_levels[i] - 1. LevelT* d_levels[NUM_ACTIVE_CHANNELS], ///< [in] The pointers to the arrays of boundaries (levels), one for each active channel. Bin ranges are defined by consecutive boundary pairings: lower sample value boundaries are inclusive and upper sample value boundaries are exclusive. OffsetT num_pixels, ///< [in] The number of multi-channel pixels (i.e., the length of \p d_samples / NUM_CHANNELS) cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. May cause significant slowdown. Default is \p false. { /// The sample value type of the input iterator typedef typename std::iterator_traits::value_type SampleT; return MultiHistogramRange( d_temp_storage, temp_storage_bytes, d_samples, d_histogram, num_levels, d_levels, num_pixels, 1, sizeof(SampleT) * NUM_CHANNELS * num_pixels, stream, debug_synchronous); } /** * \brief Computes per-channel intensity histograms from a sequence of multi-channel "pixel" data samples using the specified bin boundary levels. * * \par * - The input is a sequence of pixel structures, where each pixel comprises * a record of \p NUM_CHANNELS consecutive data samples (e.g., an RGBA pixel). * - Of the \p NUM_CHANNELS specified, the function will only compute histograms * for the first \p NUM_ACTIVE_CHANNELS (e.g., RGB histograms from RGBA * pixel samples). * - A two-dimensional region of interest within \p d_samples can be specified * using the \p num_row_samples, num_rows, and \p row_stride_bytes parameters. * - The row stride must be a whole multiple of the sample data type * size, i.e., (row_stride_bytes % sizeof(SampleT)) == 0. * - The number of histogram bins for channeli is num_levels[i] - 1. * - For channeli, the range of values for all histogram bins * have the same width: (upper_level[i] - lower_level[i]) / ( num_levels[i] - 1) * - \devicestorage * * \par Snippet * The code snippet below illustrates the computation of three 4-bin RGB histograms from a 2x3 region of * interest of within a flattened 2x4 array of quad-channel RGBA pixels (8 bits per channel per pixel). * * \par * \code * #include // or equivalently * * // Declare, allocate, and initialize device-accessible pointers for input samples * // and output histograms * int num_row_pixels; // e.g., 3 * int num_rows; // e.g., 2 * size_t row_stride_bytes; // e.g., 4 * sizeof(unsigned char) * NUM_CHANNELS * unsigned char* d_samples; // e.g., [(2, 6, 7, 5),(3, 0, 2, 1),(1, 1, 1, 1),(-, -, -, -), * // (7, 0, 6, 2),(0, 6, 7, 5),(3, 0, 2, 6),(-, -, -, -)] * int* d_histogram[3]; // e.g., [[ -, -, -, -],[ -, -, -, -],[ -, -, -, -]]; * int num_levels[3]; // e.g., {5, 5, 5}; * unsigned int* d_levels[3]; // e.g., [ [0, 2, 4, 6, 8], * // [0, 2, 4, 6, 8], * // [0, 2, 4, 6, 8] ]; * ... * * // Determine temporary device storage requirements * void* d_temp_storage = NULL; * size_t temp_storage_bytes = 0; * cub::DeviceHistogram::MultiHistogramRange<4, 3>(d_temp_storage, temp_storage_bytes, * d_samples, d_histogram, num_levels, d_levels, num_row_pixels, num_rows, row_stride_bytes); * * // Allocate temporary storage * cudaMalloc(&d_temp_storage, temp_storage_bytes); * * // Compute histograms * cub::DeviceHistogram::MultiHistogramRange<4, 3>(d_temp_storage, temp_storage_bytes, * d_samples, d_histogram, num_levels, d_levels, num_row_pixels, num_rows, row_stride_bytes); * * // d_histogram <-- [ [2, 3, 0, 1], * // [3, 0, 0, 2], * // [1, 2, 0, 3] ] * * \endcode * * \tparam NUM_CHANNELS Number of channels interleaved in the input data (may be greater than the number of channels being actively histogrammed) * \tparam NUM_ACTIVE_CHANNELS [inferred] Number of channels actively being histogrammed * \tparam SampleIteratorT [inferred] Random-access input iterator type for reading input samples. \iterator * \tparam CounterT [inferred] Integer type for histogram bin counters * \tparam LevelT [inferred] Type for specifying boundaries (levels) * \tparam OffsetT [inferred] Signed integer type for sequence offsets, list lengths, pointer differences, etc. \offset_size1 */ template < int NUM_CHANNELS, int NUM_ACTIVE_CHANNELS, typename SampleIteratorT, typename CounterT, typename LevelT, typename OffsetT> CUB_RUNTIME_FUNCTION static cudaError_t MultiHistogramRange( void* d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t& temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation SampleIteratorT d_samples, ///< [in] The pointer to the multi-channel input sequence of data samples. The samples from different channels are assumed to be interleaved (e.g., an array of 32-bit pixels where each pixel consists of four RGBA 8-bit samples). CounterT* d_histogram[NUM_ACTIVE_CHANNELS], ///< [out] The pointers to the histogram counter output arrays, one for each active channel. For channeli, the allocation length of d_histogram[i] should be num_levels[i] - 1. int num_levels[NUM_ACTIVE_CHANNELS], ///< [in] The number of boundaries (levels) for delineating histogram samples in each active channel. Implies that the number of bins for channeli is num_levels[i] - 1. LevelT* d_levels[NUM_ACTIVE_CHANNELS], ///< [in] The pointers to the arrays of boundaries (levels), one for each active channel. Bin ranges are defined by consecutive boundary pairings: lower sample value boundaries are inclusive and upper sample value boundaries are exclusive. OffsetT num_row_pixels, ///< [in] The number of multi-channel pixels per row in the region of interest OffsetT num_rows, ///< [in] The number of rows in the region of interest size_t row_stride_bytes, ///< [in] The number of bytes between starts of consecutive rows in the region of interest cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. May cause significant slowdown. Default is \p false. { /// The sample value type of the input iterator typedef typename std::iterator_traits::value_type SampleT; Int2Type is_byte_sample; if ((sizeof(OffsetT) > sizeof(int)) && ((unsigned long long) (num_rows * row_stride_bytes) < (unsigned long long) std::numeric_limits::max())) { // Down-convert OffsetT data type return DipatchHistogram::DispatchRange( d_temp_storage, temp_storage_bytes, d_samples, d_histogram, num_levels, d_levels, (int) num_row_pixels, (int) num_rows, (int) (row_stride_bytes / sizeof(SampleT)), stream, debug_synchronous, is_byte_sample); } return DipatchHistogram::DispatchRange( d_temp_storage, temp_storage_bytes, d_samples, d_histogram, num_levels, d_levels, num_row_pixels, num_rows, (OffsetT) (row_stride_bytes / sizeof(SampleT)), stream, debug_synchronous, is_byte_sample); } //@} end member group }; } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/gpu_utils/cub/device/device_partition.cuh000066400000000000000000000331031411340063500236460ustar00rootroot00000000000000 /****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * cub::DevicePartition provides device-wide, parallel operations for partitioning sequences of data items residing within device-accessible memory. */ #pragma once #include #include #include "dispatch/dispatch_select_if.cuh" #include "../util_namespace.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /** * \brief DevicePartition provides device-wide, parallel operations for partitioning sequences of data items residing within device-accessible memory. ![](partition_logo.png) * \ingroup SingleModule * * \par Overview * These operations apply a selection criterion to construct a partitioned output sequence from items selected/unselected from * a specified input sequence. * * \par Usage Considerations * \cdp_class{DevicePartition} * * \par Performance * \linear_performance{partition} * * \par * The following chart illustrates DevicePartition::If * performance across different CUDA architectures for \p int32 items, * where 50% of the items are randomly selected for the first partition. * \plots_below * * \image html partition_if_int32_50_percent.png * */ struct DevicePartition { /** * \brief Uses the \p d_flags sequence to split the corresponding items from \p d_in into a partitioned sequence \p d_out. The total number of items copied into the first partition is written to \p d_num_selected_out. ![](partition_flags_logo.png) * * \par * - The value type of \p d_flags must be castable to \p bool (e.g., \p bool, \p char, \p int, etc.). * - Copies of the selected items are compacted into \p d_out and maintain their original * relative ordering, however copies of the unselected items are compacted into the * rear of \p d_out in reverse order. * - \devicestorage * * \par Snippet * The code snippet below illustrates the compaction of items selected from an \p int device vector. * \par * \code * #include // or equivalently * * // Declare, allocate, and initialize device-accessible pointers for input, flags, and output * int num_items; // e.g., 8 * int *d_in; // e.g., [1, 2, 3, 4, 5, 6, 7, 8] * char *d_flags; // e.g., [1, 0, 0, 1, 0, 1, 1, 0] * int *d_out; // e.g., [ , , , , , , , ] * int *d_num_selected_out; // e.g., [ ] * ... * * // Determine temporary device storage requirements * void *d_temp_storage = NULL; * size_t temp_storage_bytes = 0; * cub::DevicePartition::Flagged(d_temp_storage, temp_storage_bytes, d_in, d_flags, d_out, d_num_selected_out, num_items); * * // Allocate temporary storage * cudaMalloc(&d_temp_storage, temp_storage_bytes); * * // Run selection * cub::DevicePartition::Flagged(d_temp_storage, temp_storage_bytes, d_in, d_flags, d_out, d_num_selected_out, num_items); * * // d_out <-- [1, 4, 6, 7, 8, 5, 3, 2] * // d_num_selected_out <-- [4] * * \endcode * * \tparam InputIteratorT [inferred] Random-access input iterator type for reading input items \iterator * \tparam FlagIterator [inferred] Random-access input iterator type for reading selection flags \iterator * \tparam OutputIteratorT [inferred] Random-access output iterator type for writing output items \iterator * \tparam NumSelectedIteratorT [inferred] Output iterator type for recording the number of items selected \iterator */ template < typename InputIteratorT, typename FlagIterator, typename OutputIteratorT, typename NumSelectedIteratorT> CUB_RUNTIME_FUNCTION __forceinline__ static cudaError_t Flagged( void* d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t &temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation InputIteratorT d_in, ///< [in] Pointer to the input sequence of data items FlagIterator d_flags, ///< [in] Pointer to the input sequence of selection flags OutputIteratorT d_out, ///< [out] Pointer to the output sequence of partitioned data items NumSelectedIteratorT d_num_selected_out, ///< [out] Pointer to the output total number of items selected (i.e., the offset of the unselected partition) int num_items, ///< [in] Total number of items to select from cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. May cause significant slowdown. Default is \p false. { typedef int OffsetT; // Signed integer type for global offsets typedef NullType SelectOp; // Selection op (not used) typedef NullType EqualityOp; // Equality operator (not used) return DispatchSelectIf::Dispatch( d_temp_storage, temp_storage_bytes, d_in, d_flags, d_out, d_num_selected_out, SelectOp(), EqualityOp(), num_items, stream, debug_synchronous); } /** * \brief Uses the \p select_op functor to split the corresponding items from \p d_in into a partitioned sequence \p d_out. The total number of items copied into the first partition is written to \p d_num_selected_out. ![](partition_logo.png) * * \par * - Copies of the selected items are compacted into \p d_out and maintain their original * relative ordering, however copies of the unselected items are compacted into the * rear of \p d_out in reverse order. * - \devicestorage * * \par Performance * The following charts illustrate saturated partition-if performance across different * CUDA architectures for \p int32 and \p int64 items, respectively. Items are * selected for the first partition with 50% probability. * * \image html partition_if_int32_50_percent.png * \image html partition_if_int64_50_percent.png * * \par * The following charts are similar, but 5% selection probability for the first partition: * * \image html partition_if_int32_5_percent.png * \image html partition_if_int64_5_percent.png * * \par Snippet * The code snippet below illustrates the compaction of items selected from an \p int device vector. * \par * \code * #include // or equivalently * * // Functor type for selecting values less than some criteria * struct LessThan * { * int compare; * * CUB_RUNTIME_FUNCTION __forceinline__ * LessThan(int compare) : compare(compare) {} * * CUB_RUNTIME_FUNCTION __forceinline__ * bool operator()(const int &a) const { * return (a < compare); * } * }; * * // Declare, allocate, and initialize device-accessible pointers for input and output * int num_items; // e.g., 8 * int *d_in; // e.g., [0, 2, 3, 9, 5, 2, 81, 8] * int *d_out; // e.g., [ , , , , , , , ] * int *d_num_selected_out; // e.g., [ ] * LessThan select_op(7); * ... * * // Determine temporary device storage requirements * void *d_temp_storage = NULL; * size_t temp_storage_bytes = 0; * cub::DeviceSelect::If(d_temp_storage, temp_storage_bytes, d_in, d_out, d_num_selected_out, num_items, select_op); * * // Allocate temporary storage * cudaMalloc(&d_temp_storage, temp_storage_bytes); * * // Run selection * cub::DeviceSelect::If(d_temp_storage, temp_storage_bytes, d_in, d_out, d_num_selected_out, num_items, select_op); * * // d_out <-- [0, 2, 3, 5, 2, 8, 81, 9] * // d_num_selected_out <-- [5] * * \endcode * * \tparam InputIteratorT [inferred] Random-access input iterator type for reading input items \iterator * \tparam OutputIteratorT [inferred] Random-access output iterator type for writing output items \iterator * \tparam NumSelectedIteratorT [inferred] Output iterator type for recording the number of items selected \iterator * \tparam SelectOp [inferred] Selection functor type having member bool operator()(const T &a) */ template < typename InputIteratorT, typename OutputIteratorT, typename NumSelectedIteratorT, typename SelectOp> CUB_RUNTIME_FUNCTION __forceinline__ static cudaError_t If( void* d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t &temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation InputIteratorT d_in, ///< [in] Pointer to the input sequence of data items OutputIteratorT d_out, ///< [out] Pointer to the output sequence of partitioned data items NumSelectedIteratorT d_num_selected_out, ///< [out] Pointer to the output total number of items selected (i.e., the offset of the unselected partition) int num_items, ///< [in] Total number of items to select from SelectOp select_op, ///< [in] Unary selection operator cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. May cause significant slowdown. Default is \p false. { typedef int OffsetT; // Signed integer type for global offsets typedef NullType* FlagIterator; // FlagT iterator type (not used) typedef NullType EqualityOp; // Equality operator (not used) return DispatchSelectIf::Dispatch( d_temp_storage, temp_storage_bytes, d_in, NULL, d_out, d_num_selected_out, select_op, EqualityOp(), num_items, stream, debug_synchronous); } }; /** * \example example_device_partition_flagged.cu * \example example_device_partition_if.cu */ } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/gpu_utils/cub/device/device_radix_sort.cuh000066400000000000000000001225221411340063500240170ustar00rootroot00000000000000 /****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * cub::DeviceRadixSort provides device-wide, parallel operations for computing a radix sort across a sequence of data items residing within device-accessible memory. */ #pragma once #include #include #include "dispatch/dispatch_radix_sort.cuh" #include "../util_arch.cuh" #include "../util_namespace.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /** * \brief DeviceRadixSort provides device-wide, parallel operations for computing a radix sort across a sequence of data items residing within device-accessible memory. ![](sorting_logo.png) * \ingroup SingleModule * * \par Overview * The [radix sorting method](http://en.wikipedia.org/wiki/Radix_sort) arranges * items into ascending (or descending) order. The algorithm relies upon a positional representation for * keys, i.e., each key is comprised of an ordered sequence of symbols (e.g., digits, * characters, etc.) specified from least-significant to most-significant. For a * given input sequence of keys and a set of rules specifying a total ordering * of the symbolic alphabet, the radix sorting method produces a lexicographic * ordering of those keys. * * \par * DeviceRadixSort can sort all of the built-in C++ numeric primitive types, e.g.: * unsigned char, \p int, \p double, etc. Although the direct radix sorting * method can only be applied to unsigned integral types, DeviceRadixSort * is able to sort signed and floating-point types via simple bit-wise transformations * that ensure lexicographic key ordering. * * \par Usage Considerations * \cdp_class{DeviceRadixSort} * * \par Performance * \linear_performance{radix sort} The following chart illustrates DeviceRadixSort::SortKeys * performance across different CUDA architectures for uniform-random \p uint32 keys. * \plots_below * * \image html lsb_radix_sort_int32_keys.png * */ struct DeviceRadixSort { /******************************************************************//** * \name KeyT-value pairs *********************************************************************/ //@{ /** * \brief Sorts key-value pairs into ascending order. (~2N auxiliary storage required) * * \par * - The contents of the input data are not altered by the sorting operation * - An optional bit subrange [begin_bit, end_bit) of differentiating key bits can be specified. This can reduce overall sorting overhead and yield a corresponding performance improvement. * - \devicestorageNP For sorting using only O(P) temporary storage, see the sorting interface using DoubleBuffer wrappers below. * - \devicestorage * * \par Performance * The following charts illustrate saturated sorting performance across different * CUDA architectures for uniform-random uint32,uint32 and * uint64,uint64 pairs, respectively. * * \image html lsb_radix_sort_int32_pairs.png * \image html lsb_radix_sort_int64_pairs.png * * \par Snippet * The code snippet below illustrates the sorting of a device vector of \p int keys * with associated vector of \p int values. * \par * \code * #include // or equivalently * * // Declare, allocate, and initialize device-accessible pointers for sorting data * int num_items; // e.g., 7 * int *d_keys_in; // e.g., [8, 6, 7, 5, 3, 0, 9] * int *d_keys_out; // e.g., [ ... ] * int *d_values_in; // e.g., [0, 1, 2, 3, 4, 5, 6] * int *d_values_out; // e.g., [ ... ] * ... * * // Determine temporary device storage requirements * void *d_temp_storage = NULL; * size_t temp_storage_bytes = 0; * cub::DeviceRadixSort::SortPairs(d_temp_storage, temp_storage_bytes, * d_keys_in, d_keys_out, d_values_in, d_values_out, num_items); * * // Allocate temporary storage * cudaMalloc(&d_temp_storage, temp_storage_bytes); * * // Run sorting operation * cub::DeviceRadixSort::SortPairs(d_temp_storage, temp_storage_bytes, * d_keys_in, d_keys_out, d_values_in, d_values_out, num_items); * * // d_keys_out <-- [0, 3, 5, 6, 7, 8, 9] * // d_values_out <-- [5, 4, 3, 1, 2, 0, 6] * * \endcode * * \tparam KeyT [inferred] KeyT type * \tparam ValueT [inferred] ValueT type */ template < typename KeyT, typename ValueT> CUB_RUNTIME_FUNCTION static cudaError_t SortPairs( void *d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t &temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation const KeyT *d_keys_in, ///< [in] Pointer to the input data of key data to sort KeyT *d_keys_out, ///< [out] Pointer to the sorted output sequence of key data const ValueT *d_values_in, ///< [in] Pointer to the corresponding input sequence of associated value items ValueT *d_values_out, ///< [out] Pointer to the correspondingly-reordered output sequence of associated value items int num_items, ///< [in] Number of items to sort int begin_bit = 0, ///< [in] [optional] The least-significant bit index (inclusive) needed for key comparison int end_bit = sizeof(KeyT) * 8, ///< [in] [optional] The most-significant bit index (exclusive) needed for key comparison (e.g., sizeof(unsigned int) * 8) cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is \p false. { // Signed integer type for global offsets typedef int OffsetT; DoubleBuffer d_keys(const_cast(d_keys_in), d_keys_out); DoubleBuffer d_values(const_cast(d_values_in), d_values_out); return DispatchRadixSort::Dispatch( d_temp_storage, temp_storage_bytes, d_keys, d_values, num_items, begin_bit, end_bit, false, stream, debug_synchronous); } /** * \brief Sorts key-value pairs into ascending order. (~N auxiliary storage required) * * \par * - The sorting operation is given a pair of key buffers and a corresponding * pair of associated value buffers. Each pair is managed by a DoubleBuffer * structure that indicates which of the two buffers is "current" (and thus * contains the input data to be sorted). * - The contents of both buffers within each pair may be altered by the sorting * operation. * - Upon completion, the sorting operation will update the "current" indicator * within each DoubleBuffer wrapper to reference which of the two buffers * now contains the sorted output sequence (a function of the number of key bits * specified and the targeted device architecture). * - An optional bit subrange [begin_bit, end_bit) of differentiating key bits can be specified. This can reduce overall sorting overhead and yield a corresponding performance improvement. * - \devicestorageP * - \devicestorage * * \par Performance * The following charts illustrate saturated sorting performance across different * CUDA architectures for uniform-random uint32,uint32 and * uint64,uint64 pairs, respectively. * * \image html lsb_radix_sort_int32_pairs.png * \image html lsb_radix_sort_int64_pairs.png * * \par Snippet * The code snippet below illustrates the sorting of a device vector of \p int keys * with associated vector of \p int values. * \par * \code * #include // or equivalently * * // Declare, allocate, and initialize device-accessible pointers for sorting data * int num_items; // e.g., 7 * int *d_key_buf; // e.g., [8, 6, 7, 5, 3, 0, 9] * int *d_key_alt_buf; // e.g., [ ... ] * int *d_value_buf; // e.g., [0, 1, 2, 3, 4, 5, 6] * int *d_value_alt_buf; // e.g., [ ... ] * ... * * // Create a set of DoubleBuffers to wrap pairs of device pointers * cub::DoubleBuffer d_keys(d_key_buf, d_key_alt_buf); * cub::DoubleBuffer d_values(d_value_buf, d_value_alt_buf); * * // Determine temporary device storage requirements * void *d_temp_storage = NULL; * size_t temp_storage_bytes = 0; * cub::DeviceRadixSort::SortPairs(d_temp_storage, temp_storage_bytes, d_keys, d_values, num_items); * * // Allocate temporary storage * cudaMalloc(&d_temp_storage, temp_storage_bytes); * * // Run sorting operation * cub::DeviceRadixSort::SortPairs(d_temp_storage, temp_storage_bytes, d_keys, d_values, num_items); * * // d_keys.Current() <-- [0, 3, 5, 6, 7, 8, 9] * // d_values.Current() <-- [5, 4, 3, 1, 2, 0, 6] * * \endcode * * \tparam KeyT [inferred] KeyT type * \tparam ValueT [inferred] ValueT type */ template < typename KeyT, typename ValueT> CUB_RUNTIME_FUNCTION static cudaError_t SortPairs( void *d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t &temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation DoubleBuffer &d_keys, ///< [in,out] Reference to the double-buffer of keys whose "current" device-accessible buffer contains the unsorted input keys and, upon return, is updated to point to the sorted output keys DoubleBuffer &d_values, ///< [in,out] Double-buffer of values whose "current" device-accessible buffer contains the unsorted input values and, upon return, is updated to point to the sorted output values int num_items, ///< [in] Number of items to sort int begin_bit = 0, ///< [in] [optional] The least-significant bit index (inclusive) needed for key comparison int end_bit = sizeof(KeyT) * 8, ///< [in] [optional] The most-significant bit index (exclusive) needed for key comparison (e.g., sizeof(unsigned int) * 8) cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is \p false. { // Signed integer type for global offsets typedef int OffsetT; return DispatchRadixSort::Dispatch( d_temp_storage, temp_storage_bytes, d_keys, d_values, num_items, begin_bit, end_bit, true, stream, debug_synchronous); } /** * \brief Sorts key-value pairs into descending order. (~2N auxiliary storage required). * * \par * - The contents of the input data are not altered by the sorting operation * - An optional bit subrange [begin_bit, end_bit) of differentiating key bits can be specified. This can reduce overall sorting overhead and yield a corresponding performance improvement. * - \devicestorageNP For sorting using only O(P) temporary storage, see the sorting interface using DoubleBuffer wrappers below. * - \devicestorage * * \par Performance * Performance is similar to DeviceRadixSort::SortPairs. * * \par Snippet * The code snippet below illustrates the sorting of a device vector of \p int keys * with associated vector of \p int values. * \par * \code * #include // or equivalently * * // Declare, allocate, and initialize device-accessible pointers for sorting data * int num_items; // e.g., 7 * int *d_keys_in; // e.g., [8, 6, 7, 5, 3, 0, 9] * int *d_keys_out; // e.g., [ ... ] * int *d_values_in; // e.g., [0, 1, 2, 3, 4, 5, 6] * int *d_values_out; // e.g., [ ... ] * ... * * // Determine temporary device storage requirements * void *d_temp_storage = NULL; * size_t temp_storage_bytes = 0; * cub::DeviceRadixSort::SortPairsDescending(d_temp_storage, temp_storage_bytes, * d_keys_in, d_keys_out, d_values_in, d_values_out, num_items); * * // Allocate temporary storage * cudaMalloc(&d_temp_storage, temp_storage_bytes); * * // Run sorting operation * cub::DeviceRadixSort::SortPairsDescending(d_temp_storage, temp_storage_bytes, * d_keys_in, d_keys_out, d_values_in, d_values_out, num_items); * * // d_keys_out <-- [9, 8, 7, 6, 5, 3, 0] * // d_values_out <-- [6, 0, 2, 1, 3, 4, 5] * * \endcode * * \tparam KeyT [inferred] KeyT type * \tparam ValueT [inferred] ValueT type */ template < typename KeyT, typename ValueT> CUB_RUNTIME_FUNCTION static cudaError_t SortPairsDescending( void *d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t &temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation const KeyT *d_keys_in, ///< [in] Pointer to the input data of key data to sort KeyT *d_keys_out, ///< [out] Pointer to the sorted output sequence of key data const ValueT *d_values_in, ///< [in] Pointer to the corresponding input sequence of associated value items ValueT *d_values_out, ///< [out] Pointer to the correspondingly-reordered output sequence of associated value items int num_items, ///< [in] Number of items to sort int begin_bit = 0, ///< [in] [optional] The least-significant bit index (inclusive) needed for key comparison int end_bit = sizeof(KeyT) * 8, ///< [in] [optional] The most-significant bit index (exclusive) needed for key comparison (e.g., sizeof(unsigned int) * 8) cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is \p false. { // Signed integer type for global offsets typedef int OffsetT; DoubleBuffer d_keys(const_cast(d_keys_in), d_keys_out); DoubleBuffer d_values(const_cast(d_values_in), d_values_out); return DispatchRadixSort::Dispatch( d_temp_storage, temp_storage_bytes, d_keys, d_values, num_items, begin_bit, end_bit, false, stream, debug_synchronous); } /** * \brief Sorts key-value pairs into descending order. (~N auxiliary storage required). * * \par * - The sorting operation is given a pair of key buffers and a corresponding * pair of associated value buffers. Each pair is managed by a DoubleBuffer * structure that indicates which of the two buffers is "current" (and thus * contains the input data to be sorted). * - The contents of both buffers within each pair may be altered by the sorting * operation. * - Upon completion, the sorting operation will update the "current" indicator * within each DoubleBuffer wrapper to reference which of the two buffers * now contains the sorted output sequence (a function of the number of key bits * specified and the targeted device architecture). * - An optional bit subrange [begin_bit, end_bit) of differentiating key bits can be specified. This can reduce overall sorting overhead and yield a corresponding performance improvement. * - \devicestorageP * - \devicestorage * * \par Performance * Performance is similar to DeviceRadixSort::SortPairs. * * \par Snippet * The code snippet below illustrates the sorting of a device vector of \p int keys * with associated vector of \p int values. * \par * \code * #include // or equivalently * * // Declare, allocate, and initialize device-accessible pointers for sorting data * int num_items; // e.g., 7 * int *d_key_buf; // e.g., [8, 6, 7, 5, 3, 0, 9] * int *d_key_alt_buf; // e.g., [ ... ] * int *d_value_buf; // e.g., [0, 1, 2, 3, 4, 5, 6] * int *d_value_alt_buf; // e.g., [ ... ] * ... * * // Create a set of DoubleBuffers to wrap pairs of device pointers * cub::DoubleBuffer d_keys(d_key_buf, d_key_alt_buf); * cub::DoubleBuffer d_values(d_value_buf, d_value_alt_buf); * * // Determine temporary device storage requirements * void *d_temp_storage = NULL; * size_t temp_storage_bytes = 0; * cub::DeviceRadixSort::SortPairsDescending(d_temp_storage, temp_storage_bytes, d_keys, d_values, num_items); * * // Allocate temporary storage * cudaMalloc(&d_temp_storage, temp_storage_bytes); * * // Run sorting operation * cub::DeviceRadixSort::SortPairsDescending(d_temp_storage, temp_storage_bytes, d_keys, d_values, num_items); * * // d_keys.Current() <-- [9, 8, 7, 6, 5, 3, 0] * // d_values.Current() <-- [6, 0, 2, 1, 3, 4, 5] * * \endcode * * \tparam KeyT [inferred] KeyT type * \tparam ValueT [inferred] ValueT type */ template < typename KeyT, typename ValueT> CUB_RUNTIME_FUNCTION static cudaError_t SortPairsDescending( void *d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t &temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation DoubleBuffer &d_keys, ///< [in,out] Reference to the double-buffer of keys whose "current" device-accessible buffer contains the unsorted input keys and, upon return, is updated to point to the sorted output keys DoubleBuffer &d_values, ///< [in,out] Double-buffer of values whose "current" device-accessible buffer contains the unsorted input values and, upon return, is updated to point to the sorted output values int num_items, ///< [in] Number of items to sort int begin_bit = 0, ///< [in] [optional] The least-significant bit index (inclusive) needed for key comparison int end_bit = sizeof(KeyT) * 8, ///< [in] [optional] The most-significant bit index (exclusive) needed for key comparison (e.g., sizeof(unsigned int) * 8) cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is \p false. { // Signed integer type for global offsets typedef int OffsetT; return DispatchRadixSort::Dispatch( d_temp_storage, temp_storage_bytes, d_keys, d_values, num_items, begin_bit, end_bit, true, stream, debug_synchronous); } //@} end member group /******************************************************************//** * \name Keys-only *********************************************************************/ //@{ /** * \brief Sorts keys into ascending order. (~2N auxiliary storage required) * * \par * - The contents of the input data are not altered by the sorting operation * - An optional bit subrange [begin_bit, end_bit) of differentiating key bits can be specified. This can reduce overall sorting overhead and yield a corresponding performance improvement. * - \devicestorageNP For sorting using only O(P) temporary storage, see the sorting interface using DoubleBuffer wrappers below. * - \devicestorage * * \par Performance * The following charts illustrate saturated sorting performance across different * CUDA architectures for uniform-random \p uint32 and \p uint64 keys, respectively. * * \image html lsb_radix_sort_int32_keys.png * \image html lsb_radix_sort_int64_keys.png * * \par Snippet * The code snippet below illustrates the sorting of a device vector of \p int keys. * \par * \code * #include // or equivalently * * // Declare, allocate, and initialize device-accessible pointers for sorting data * int num_items; // e.g., 7 * int *d_keys_in; // e.g., [8, 6, 7, 5, 3, 0, 9] * int *d_keys_out; // e.g., [ ... ] * ... * * // Determine temporary device storage requirements * void *d_temp_storage = NULL; * size_t temp_storage_bytes = 0; * cub::DeviceRadixSort::SortKeys(d_temp_storage, temp_storage_bytes, d_keys_in, d_keys_out, num_items); * * // Allocate temporary storage * cudaMalloc(&d_temp_storage, temp_storage_bytes); * * // Run sorting operation * cub::DeviceRadixSort::SortKeys(d_temp_storage, temp_storage_bytes, d_keys_in, d_keys_out, num_items); * * // d_keys_out <-- [0, 3, 5, 6, 7, 8, 9] * * \endcode * * \tparam KeyT [inferred] KeyT type */ template CUB_RUNTIME_FUNCTION static cudaError_t SortKeys( void *d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t &temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation const KeyT *d_keys_in, ///< [in] Pointer to the input data of key data to sort KeyT *d_keys_out, ///< [out] Pointer to the sorted output sequence of key data int num_items, ///< [in] Number of items to sort int begin_bit = 0, ///< [in] [optional] The least-significant bit index (inclusive) needed for key comparison int end_bit = sizeof(KeyT) * 8, ///< [in] [optional] The most-significant bit index (exclusive) needed for key comparison (e.g., sizeof(unsigned int) * 8) cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is \p false. { // Signed integer type for global offsets typedef int OffsetT; // Null value type DoubleBuffer d_keys(const_cast(d_keys_in), d_keys_out); DoubleBuffer d_values; return DispatchRadixSort::Dispatch( d_temp_storage, temp_storage_bytes, d_keys, d_values, num_items, begin_bit, end_bit, false, stream, debug_synchronous); } /** * \brief Sorts keys into ascending order. (~N auxiliary storage required). * * \par * - The sorting operation is given a pair of key buffers managed by a * DoubleBuffer structure that indicates which of the two buffers is * "current" (and thus contains the input data to be sorted). * - The contents of both buffers may be altered by the sorting operation. * - Upon completion, the sorting operation will update the "current" indicator * within the DoubleBuffer wrapper to reference which of the two buffers * now contains the sorted output sequence (a function of the number of key bits * specified and the targeted device architecture). * - An optional bit subrange [begin_bit, end_bit) of differentiating key bits can be specified. This can reduce overall sorting overhead and yield a corresponding performance improvement. * - \devicestorageP * - \devicestorage * * \par Performance * The following charts illustrate saturated sorting performance across different * CUDA architectures for uniform-random \p uint32 and \p uint64 keys, respectively. * * \image html lsb_radix_sort_int32_keys.png * \image html lsb_radix_sort_int64_keys.png * * \par Snippet * The code snippet below illustrates the sorting of a device vector of \p int keys. * \par * \code * #include // or equivalently * * // Declare, allocate, and initialize device-accessible pointers for sorting data * int num_items; // e.g., 7 * int *d_key_buf; // e.g., [8, 6, 7, 5, 3, 0, 9] * int *d_key_alt_buf; // e.g., [ ... ] * ... * * // Create a DoubleBuffer to wrap the pair of device pointers * cub::DoubleBuffer d_keys(d_key_buf, d_key_alt_buf); * * // Determine temporary device storage requirements * void *d_temp_storage = NULL; * size_t temp_storage_bytes = 0; * cub::DeviceRadixSort::SortKeys(d_temp_storage, temp_storage_bytes, d_keys, num_items); * * // Allocate temporary storage * cudaMalloc(&d_temp_storage, temp_storage_bytes); * * // Run sorting operation * cub::DeviceRadixSort::SortKeys(d_temp_storage, temp_storage_bytes, d_keys, num_items); * * // d_keys.Current() <-- [0, 3, 5, 6, 7, 8, 9] * * \endcode * * \tparam KeyT [inferred] KeyT type */ template CUB_RUNTIME_FUNCTION static cudaError_t SortKeys( void *d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t &temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation DoubleBuffer &d_keys, ///< [in,out] Reference to the double-buffer of keys whose "current" device-accessible buffer contains the unsorted input keys and, upon return, is updated to point to the sorted output keys int num_items, ///< [in] Number of items to sort int begin_bit = 0, ///< [in] [optional] The least-significant bit index (inclusive) needed for key comparison int end_bit = sizeof(KeyT) * 8, ///< [in] [optional] The most-significant bit index (exclusive) needed for key comparison (e.g., sizeof(unsigned int) * 8) cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is \p false. { // Signed integer type for global offsets typedef int OffsetT; // Null value type DoubleBuffer d_values; return DispatchRadixSort::Dispatch( d_temp_storage, temp_storage_bytes, d_keys, d_values, num_items, begin_bit, end_bit, true, stream, debug_synchronous); } /** * \brief Sorts keys into descending order. (~2N auxiliary storage required). * * \par * - The contents of the input data are not altered by the sorting operation * - An optional bit subrange [begin_bit, end_bit) of differentiating key bits can be specified. This can reduce overall sorting overhead and yield a corresponding performance improvement. * - \devicestorageNP For sorting using only O(P) temporary storage, see the sorting interface using DoubleBuffer wrappers below. * - \devicestorage * * \par Performance * Performance is similar to DeviceRadixSort::SortKeys. * * \par Snippet * The code snippet below illustrates the sorting of a device vector of \p int keys. * \par * \code * #include // or equivalently * * // Declare, allocate, and initialize device-accessible pointers for sorting data * int num_items; // e.g., 7 * int *d_keys_in; // e.g., [8, 6, 7, 5, 3, 0, 9] * int *d_keys_out; // e.g., [ ... ] * ... * * // Create a DoubleBuffer to wrap the pair of device pointers * cub::DoubleBuffer d_keys(d_key_buf, d_key_alt_buf); * * // Determine temporary device storage requirements * void *d_temp_storage = NULL; * size_t temp_storage_bytes = 0; * cub::DeviceRadixSort::SortKeysDescending(d_temp_storage, temp_storage_bytes, d_keys_in, d_keys_out, num_items); * * // Allocate temporary storage * cudaMalloc(&d_temp_storage, temp_storage_bytes); * * // Run sorting operation * cub::DeviceRadixSort::SortKeysDescending(d_temp_storage, temp_storage_bytes, d_keys_in, d_keys_out, num_items); * * // d_keys_out <-- [9, 8, 7, 6, 5, 3, 0]s * * \endcode * * \tparam KeyT [inferred] KeyT type */ template CUB_RUNTIME_FUNCTION static cudaError_t SortKeysDescending( void *d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t &temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation const KeyT *d_keys_in, ///< [in] Pointer to the input data of key data to sort KeyT *d_keys_out, ///< [out] Pointer to the sorted output sequence of key data int num_items, ///< [in] Number of items to sort int begin_bit = 0, ///< [in] [optional] The least-significant bit index (inclusive) needed for key comparison int end_bit = sizeof(KeyT) * 8, ///< [in] [optional] The most-significant bit index (exclusive) needed for key comparison (e.g., sizeof(unsigned int) * 8) cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is \p false. { // Signed integer type for global offsets typedef int OffsetT; DoubleBuffer d_keys(const_cast(d_keys_in), d_keys_out); DoubleBuffer d_values; return DispatchRadixSort::Dispatch( d_temp_storage, temp_storage_bytes, d_keys, d_values, num_items, begin_bit, end_bit, false, stream, debug_synchronous); } /** * \brief Sorts keys into descending order. (~N auxiliary storage required). * * \par * - The sorting operation is given a pair of key buffers managed by a * DoubleBuffer structure that indicates which of the two buffers is * "current" (and thus contains the input data to be sorted). * - The contents of both buffers may be altered by the sorting operation. * - Upon completion, the sorting operation will update the "current" indicator * within the DoubleBuffer wrapper to reference which of the two buffers * now contains the sorted output sequence (a function of the number of key bits * specified and the targeted device architecture). * - An optional bit subrange [begin_bit, end_bit) of differentiating key bits can be specified. This can reduce overall sorting overhead and yield a corresponding performance improvement. * - \devicestorageP * - \devicestorage * * \par Performance * Performance is similar to DeviceRadixSort::SortKeys. * * \par Snippet * The code snippet below illustrates the sorting of a device vector of \p int keys. * \par * \code * #include // or equivalently * * // Declare, allocate, and initialize device-accessible pointers for sorting data * int num_items; // e.g., 7 * int *d_key_buf; // e.g., [8, 6, 7, 5, 3, 0, 9] * int *d_key_alt_buf; // e.g., [ ... ] * ... * * // Create a DoubleBuffer to wrap the pair of device pointers * cub::DoubleBuffer d_keys(d_key_buf, d_key_alt_buf); * * // Determine temporary device storage requirements * void *d_temp_storage = NULL; * size_t temp_storage_bytes = 0; * cub::DeviceRadixSort::SortKeysDescending(d_temp_storage, temp_storage_bytes, d_keys, num_items); * * // Allocate temporary storage * cudaMalloc(&d_temp_storage, temp_storage_bytes); * * // Run sorting operation * cub::DeviceRadixSort::SortKeysDescending(d_temp_storage, temp_storage_bytes, d_keys, num_items); * * // d_keys.Current() <-- [9, 8, 7, 6, 5, 3, 0] * * \endcode * * \tparam KeyT [inferred] KeyT type */ template CUB_RUNTIME_FUNCTION static cudaError_t SortKeysDescending( void *d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t &temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation DoubleBuffer &d_keys, ///< [in,out] Reference to the double-buffer of keys whose "current" device-accessible buffer contains the unsorted input keys and, upon return, is updated to point to the sorted output keys int num_items, ///< [in] Number of items to sort int begin_bit = 0, ///< [in] [optional] The least-significant bit index (inclusive) needed for key comparison int end_bit = sizeof(KeyT) * 8, ///< [in] [optional] The most-significant bit index (exclusive) needed for key comparison (e.g., sizeof(unsigned int) * 8) cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is \p false. { // Signed integer type for global offsets typedef int OffsetT; // Null value type DoubleBuffer d_values; return DispatchRadixSort::Dispatch( d_temp_storage, temp_storage_bytes, d_keys, d_values, num_items, begin_bit, end_bit, true, stream, debug_synchronous); } //@} end member group }; /** * \example example_device_radix_sort.cu */ } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/gpu_utils/cub/device/device_reduce.cuh000066400000000000000000001137701411340063500231150ustar00rootroot00000000000000 /****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * cub::DeviceReduce provides device-wide, parallel operations for computing a reduction across a sequence of data items residing within device-accessible memory. */ #pragma once #include #include #include #include "../iterator/arg_index_input_iterator.cuh" #include "dispatch/dispatch_reduce.cuh" #include "dispatch/dispatch_reduce_by_key.cuh" #include "../util_namespace.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /** * \brief DeviceReduce provides device-wide, parallel operations for computing a reduction across a sequence of data items residing within device-accessible memory. ![](reduce_logo.png) * \ingroup SingleModule * * \par Overview * A reduction (or fold) * uses a binary combining operator to compute a single aggregate from a sequence of input elements. * * \par Usage Considerations * \cdp_class{DeviceReduce} * * \par Performance * \linear_performance{reduction, reduce-by-key, and run-length encode} * * \par * The following chart illustrates DeviceReduce::Sum * performance across different CUDA architectures for \p int32 keys. * * \image html reduce_int32.png * * \par * The following chart illustrates DeviceReduce::ReduceByKey (summation) * performance across different CUDA architectures for \p fp32 * values. Segments are identified by \p int32 keys, and have lengths uniformly sampled from [1,1000]. * * \image html reduce_by_key_fp32_len_500.png * * \par * \plots_below * */ struct DeviceReduce { /** * \brief Computes a device-wide reduction using the specified binary \p reduction_op functor and initial value \p init. * * \par * - Does not support binary reduction operators that are non-commutative. * - Provides "run-to-run" determinism for pseudo-associative reduction * (e.g., addition of floating point types) on the same GPU device. * However, results for pseudo-associative reduction may be inconsistent * from one device to a another device of a different compute-capability * because CUB can employ different tile-sizing for different architectures. * - \devicestorage * * \par Snippet * The code snippet below illustrates a user-defined min-reduction of a device vector of \p int data elements. * \par * \code * #include // or equivalently * * // CustomMin functor * struct CustomMin * { * template * __device__ __forceinline__ * T operator()(const T &a, const T &b) const { * return (b < a) ? b : a; * } * }; * * // Declare, allocate, and initialize device-accessible pointers for input and output * int num_items; // e.g., 7 * int *d_in; // e.g., [8, 6, 7, 5, 3, 0, 9] * int *d_out; // e.g., [-] * CustomMin min_op; * int init; // e.g., INT_MAX * ... * * // Determine temporary device storage requirements * void *d_temp_storage = NULL; * size_t temp_storage_bytes = 0; * cub::DeviceReduce::Reduce(d_temp_storage, temp_storage_bytes, d_in, d_out, num_items, min_op, init); * * // Allocate temporary storage * cudaMalloc(&d_temp_storage, temp_storage_bytes); * * // Run reduction * cub::DeviceReduce::Reduce(d_temp_storage, temp_storage_bytes, d_in, d_out, num_items, min_op, init); * * // d_out <-- [0] * * \endcode * * \tparam InputIteratorT [inferred] Random-access input iterator type for reading input items \iterator * \tparam OutputIteratorT [inferred] Output iterator type for recording the reduced aggregate \iterator * \tparam ReductionOpT [inferred] Binary reduction functor type having member T operator()(const T &a, const T &b) * \tparam T [inferred] Data element type that is convertible to the \p value type of \p InputIteratorT */ template < typename InputIteratorT, typename OutputIteratorT, typename ReductionOpT, typename T> CUB_RUNTIME_FUNCTION static cudaError_t Reduce( void *d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t &temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation InputIteratorT d_in, ///< [in] Pointer to the input sequence of data items OutputIteratorT d_out, ///< [out] Pointer to the output aggregate int num_items, ///< [in] Total number of input items (i.e., length of \p d_in) ReductionOpT reduction_op, ///< [in] Binary reduction functor T init, ///< [in] Initial value of the reduction cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is \p false. { // Signed integer type for global offsets typedef int OffsetT; return DispatchReduce::Dispatch( d_temp_storage, temp_storage_bytes, d_in, d_out, num_items, reduction_op, init, stream, debug_synchronous); } /** * \brief Computes a device-wide sum using the addition (\p +) operator. * * \par * - Uses \p 0 as the initial value of the reduction. * - Does not support \p + operators that are non-commutative.. * - Provides "run-to-run" determinism for pseudo-associative reduction * (e.g., addition of floating point types) on the same GPU device. * However, results for pseudo-associative reduction may be inconsistent * from one device to a another device of a different compute-capability * because CUB can employ different tile-sizing for different architectures. * - \devicestorage * * \par Performance * The following charts illustrate saturated sum-reduction performance across different * CUDA architectures for \p int32 and \p int64 items, respectively. * * \image html reduce_int32.png * \image html reduce_int64.png * * \par Snippet * The code snippet below illustrates the sum-reduction of a device vector of \p int data elements. * \par * \code * #include // or equivalently * * // Declare, allocate, and initialize device-accessible pointers for input and output * int num_items; // e.g., 7 * int *d_in; // e.g., [8, 6, 7, 5, 3, 0, 9] * int *d_out; // e.g., [-] * ... * * // Determine temporary device storage requirements * void *d_temp_storage = NULL; * size_t temp_storage_bytes = 0; * cub::DeviceReduce::Sum(d_temp_storage, temp_storage_bytes, d_in, d_out, num_items); * * // Allocate temporary storage * cudaMalloc(&d_temp_storage, temp_storage_bytes); * * // Run sum-reduction * cub::DeviceReduce::Sum(d_temp_storage, temp_storage_bytes, d_in, d_out, num_items); * * // d_out <-- [38] * * \endcode * * \tparam InputIteratorT [inferred] Random-access input iterator type for reading input items \iterator * \tparam OutputIteratorT [inferred] Output iterator type for recording the reduced aggregate \iterator */ template < typename InputIteratorT, typename OutputIteratorT> CUB_RUNTIME_FUNCTION static cudaError_t Sum( void *d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t &temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation InputIteratorT d_in, ///< [in] Pointer to the input sequence of data items OutputIteratorT d_out, ///< [out] Pointer to the output aggregate int num_items, ///< [in] Total number of input items (i.e., length of \p d_in) cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is \p false. { // Signed integer type for global offsets typedef int OffsetT; // The output value type typedef typename If<(Equals::value_type, void>::VALUE), // OutputT = (if output iterator's value type is void) ? typename std::iterator_traits::value_type, // ... then the input iterator's value type, typename std::iterator_traits::value_type>::Type OutputT; // ... else the output iterator's value type return DispatchReduce::Dispatch( d_temp_storage, temp_storage_bytes, d_in, d_out, num_items, cub::Sum(), OutputT(), // zero-initialize stream, debug_synchronous); } /** * \brief Computes a device-wide minimum using the less-than ('<') operator. * * \par * - Uses std::numeric_limits::max() as the initial value of the reduction. * - Does not support \p < operators that are non-commutative. * - Provides "run-to-run" determinism for pseudo-associative reduction * (e.g., addition of floating point types) on the same GPU device. * However, results for pseudo-associative reduction may be inconsistent * from one device to a another device of a different compute-capability * because CUB can employ different tile-sizing for different architectures. * - \devicestorage * * \par Snippet * The code snippet below illustrates the min-reduction of a device vector of \p int data elements. * \par * \code * #include // or equivalently * * // Declare, allocate, and initialize device-accessible pointers for input and output * int num_items; // e.g., 7 * int *d_in; // e.g., [8, 6, 7, 5, 3, 0, 9] * int *d_out; // e.g., [-] * ... * * // Determine temporary device storage requirements * void *d_temp_storage = NULL; * size_t temp_storage_bytes = 0; * cub::DeviceReduce::Min(d_temp_storage, temp_storage_bytes, d_in, d_out, num_items); * * // Allocate temporary storage * cudaMalloc(&d_temp_storage, temp_storage_bytes); * * // Run min-reduction * cub::DeviceReduce::Min(d_temp_storage, temp_storage_bytes, d_in, d_out, num_items); * * // d_out <-- [0] * * \endcode * * \tparam InputIteratorT [inferred] Random-access input iterator type for reading input items \iterator * \tparam OutputIteratorT [inferred] Output iterator type for recording the reduced aggregate \iterator */ template < typename InputIteratorT, typename OutputIteratorT> CUB_RUNTIME_FUNCTION static cudaError_t Min( void *d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t &temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation InputIteratorT d_in, ///< [in] Pointer to the input sequence of data items OutputIteratorT d_out, ///< [out] Pointer to the output aggregate int num_items, ///< [in] Total number of input items (i.e., length of \p d_in) cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is \p false. { // Signed integer type for global offsets typedef int OffsetT; // The input value type typedef typename std::iterator_traits::value_type InputT; return DispatchReduce::Dispatch( d_temp_storage, temp_storage_bytes, d_in, d_out, num_items, cub::Min(), Traits::Max(), // replace with std::numeric_limits::max() when C++11 support is more prevalent stream, debug_synchronous); } /** * \brief Finds the first device-wide minimum using the less-than ('<') operator, also returning the index of that item. * * \par * - The output value type of \p d_out is cub::KeyValuePair (assuming the value type of \p d_in is \p T) * - The minimum is written to d_out.value and its offset in the input array is written to d_out.key. * - The {1, std::numeric_limits::max()} tuple is produced for zero-length inputs * - Does not support \p < operators that are non-commutative. * - Provides "run-to-run" determinism for pseudo-associative reduction * (e.g., addition of floating point types) on the same GPU device. * However, results for pseudo-associative reduction may be inconsistent * from one device to a another device of a different compute-capability * because CUB can employ different tile-sizing for different architectures. * - \devicestorage * * \par Snippet * The code snippet below illustrates the argmin-reduction of a device vector of \p int data elements. * \par * \code * #include // or equivalently * * // Declare, allocate, and initialize device-accessible pointers for input and output * int num_items; // e.g., 7 * int *d_in; // e.g., [8, 6, 7, 5, 3, 0, 9] * KeyValuePair *d_out; // e.g., [{-,-}] * ... * * // Determine temporary device storage requirements * void *d_temp_storage = NULL; * size_t temp_storage_bytes = 0; * cub::DeviceReduce::ArgMin(d_temp_storage, temp_storage_bytes, d_in, d_argmin, num_items); * * // Allocate temporary storage * cudaMalloc(&d_temp_storage, temp_storage_bytes); * * // Run argmin-reduction * cub::DeviceReduce::ArgMin(d_temp_storage, temp_storage_bytes, d_in, d_argmin, num_items); * * // d_out <-- [{5, 0}] * * \endcode * * \tparam InputIteratorT [inferred] Random-access input iterator type for reading input items (of some type \p T) \iterator * \tparam OutputIteratorT [inferred] Output iterator type for recording the reduced aggregate (having value type cub::KeyValuePair) \iterator */ template < typename InputIteratorT, typename OutputIteratorT> CUB_RUNTIME_FUNCTION static cudaError_t ArgMin( void *d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t &temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation InputIteratorT d_in, ///< [in] Pointer to the input sequence of data items OutputIteratorT d_out, ///< [out] Pointer to the output aggregate int num_items, ///< [in] Total number of input items (i.e., length of \p d_in) cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is \p false. { // Signed integer type for global offsets typedef int OffsetT; // The input type typedef typename std::iterator_traits::value_type InputValueT; // The output tuple type typedef typename If<(Equals::value_type, void>::VALUE), // OutputT = (if output iterator's value type is void) ? KeyValuePair, // ... then the key value pair OffsetT + InputValueT typename std::iterator_traits::value_type>::Type OutputTupleT; // ... else the output iterator's value type // The output value type typedef typename OutputTupleT::Value OutputValueT; // Wrapped input iterator to produce index-value tuples typedef ArgIndexInputIterator ArgIndexInputIteratorT; ArgIndexInputIteratorT d_indexed_in(d_in); // Initial value OutputTupleT initial_value(1, Traits::Max()); // replace with std::numeric_limits::max() when C++11 support is more prevalent return DispatchReduce::Dispatch( d_temp_storage, temp_storage_bytes, d_indexed_in, d_out, num_items, cub::ArgMin(), initial_value, stream, debug_synchronous); } /** * \brief Computes a device-wide maximum using the greater-than ('>') operator. * * \par * - Uses std::numeric_limits::lowest() as the initial value of the reduction. * - Does not support \p > operators that are non-commutative. * - Provides "run-to-run" determinism for pseudo-associative reduction * (e.g., addition of floating point types) on the same GPU device. * However, results for pseudo-associative reduction may be inconsistent * from one device to a another device of a different compute-capability * because CUB can employ different tile-sizing for different architectures. * - \devicestorage * * \par Snippet * The code snippet below illustrates the max-reduction of a device vector of \p int data elements. * \par * \code * #include // or equivalently * * // Declare, allocate, and initialize device-accessible pointers for input and output * int num_items; // e.g., 7 * int *d_in; // e.g., [8, 6, 7, 5, 3, 0, 9] * int *d_out; // e.g., [-] * ... * * // Determine temporary device storage requirements * void *d_temp_storage = NULL; * size_t temp_storage_bytes = 0; * cub::DeviceReduce::Max(d_temp_storage, temp_storage_bytes, d_in, d_max, num_items); * * // Allocate temporary storage * cudaMalloc(&d_temp_storage, temp_storage_bytes); * * // Run max-reduction * cub::DeviceReduce::Max(d_temp_storage, temp_storage_bytes, d_in, d_max, num_items); * * // d_out <-- [9] * * \endcode * * \tparam InputIteratorT [inferred] Random-access input iterator type for reading input items \iterator * \tparam OutputIteratorT [inferred] Output iterator type for recording the reduced aggregate \iterator */ template < typename InputIteratorT, typename OutputIteratorT> CUB_RUNTIME_FUNCTION static cudaError_t Max( void *d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t &temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation InputIteratorT d_in, ///< [in] Pointer to the input sequence of data items OutputIteratorT d_out, ///< [out] Pointer to the output aggregate int num_items, ///< [in] Total number of input items (i.e., length of \p d_in) cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is \p false. { // Signed integer type for global offsets typedef int OffsetT; // The input value type typedef typename std::iterator_traits::value_type InputT; return DispatchReduce::Dispatch( d_temp_storage, temp_storage_bytes, d_in, d_out, num_items, cub::Max(), Traits::Lowest(), // replace with std::numeric_limits::lowest() when C++11 support is more prevalent stream, debug_synchronous); } /** * \brief Finds the first device-wide maximum using the greater-than ('>') operator, also returning the index of that item * * \par * - The output value type of \p d_out is cub::KeyValuePair (assuming the value type of \p d_in is \p T) * - The maximum is written to d_out.value and its offset in the input array is written to d_out.key. * - The {1, std::numeric_limits::lowest()} tuple is produced for zero-length inputs * - Does not support \p > operators that are non-commutative. * - Provides "run-to-run" determinism for pseudo-associative reduction * (e.g., addition of floating point types) on the same GPU device. * However, results for pseudo-associative reduction may be inconsistent * from one device to a another device of a different compute-capability * because CUB can employ different tile-sizing for different architectures. * - \devicestorage * * \par Snippet * The code snippet below illustrates the argmax-reduction of a device vector of \p int data elements. * \par * \code * #include // or equivalently * * // Declare, allocate, and initialize device-accessible pointers for input and output * int num_items; // e.g., 7 * int *d_in; // e.g., [8, 6, 7, 5, 3, 0, 9] * KeyValuePair *d_out; // e.g., [{-,-}] * ... * * // Determine temporary device storage requirements * void *d_temp_storage = NULL; * size_t temp_storage_bytes = 0; * cub::DeviceReduce::ArgMax(d_temp_storage, temp_storage_bytes, d_in, d_argmax, num_items); * * // Allocate temporary storage * cudaMalloc(&d_temp_storage, temp_storage_bytes); * * // Run argmax-reduction * cub::DeviceReduce::ArgMax(d_temp_storage, temp_storage_bytes, d_in, d_argmax, num_items); * * // d_out <-- [{6, 9}] * * \endcode * * \tparam InputIteratorT [inferred] Random-access input iterator type for reading input items (of some type \p T) \iterator * \tparam OutputIteratorT [inferred] Output iterator type for recording the reduced aggregate (having value type cub::KeyValuePair) \iterator */ template < typename InputIteratorT, typename OutputIteratorT> CUB_RUNTIME_FUNCTION static cudaError_t ArgMax( void *d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t &temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation InputIteratorT d_in, ///< [in] Pointer to the input sequence of data items OutputIteratorT d_out, ///< [out] Pointer to the output aggregate int num_items, ///< [in] Total number of input items (i.e., length of \p d_in) cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is \p false. { // Signed integer type for global offsets typedef int OffsetT; // The input type typedef typename std::iterator_traits::value_type InputValueT; // The output tuple type typedef typename If<(Equals::value_type, void>::VALUE), // OutputT = (if output iterator's value type is void) ? KeyValuePair, // ... then the key value pair OffsetT + InputValueT typename std::iterator_traits::value_type>::Type OutputTupleT; // ... else the output iterator's value type // The output value type typedef typename OutputTupleT::Value OutputValueT; // Wrapped input iterator to produce index-value tuples typedef ArgIndexInputIterator ArgIndexInputIteratorT; ArgIndexInputIteratorT d_indexed_in(d_in); // Initial value OutputTupleT initial_value(1, Traits::Lowest()); // replace with std::numeric_limits::lowest() when C++11 support is more prevalent return DispatchReduce::Dispatch( d_temp_storage, temp_storage_bytes, d_indexed_in, d_out, num_items, cub::ArgMax(), initial_value, stream, debug_synchronous); } /** * \brief Reduces segments of values, where segments are demarcated by corresponding runs of identical keys. * * \par * This operation computes segmented reductions within \p d_values_in using * the specified binary \p reduction_op functor. The segments are identified by * "runs" of corresponding keys in \p d_keys_in, where runs are maximal ranges of * consecutive, identical keys. For the ith run encountered, * the first key of the run and the corresponding value aggregate of that run are * written to d_unique_out[i] and d_aggregates_out[i], * respectively. The total number of runs encountered is written to \p d_num_runs_out. * * \par * - The == equality operator is used to determine whether keys are equivalent * - Provides "run-to-run" determinism for pseudo-associative reduction * (e.g., addition of floating point types) on the same GPU device. * However, results for pseudo-associative reduction may be inconsistent * from one device to a another device of a different compute-capability * because CUB can employ different tile-sizing for different architectures. * - \devicestorage * * \par Performance * The following chart illustrates reduction-by-key (sum) performance across * different CUDA architectures for \p fp32 and \p fp64 values, respectively. Segments * are identified by \p int32 keys, and have lengths uniformly sampled from [1,1000]. * * \image html reduce_by_key_fp32_len_500.png * \image html reduce_by_key_fp64_len_500.png * * \par * The following charts are similar, but with segment lengths uniformly sampled from [1,10]: * * \image html reduce_by_key_fp32_len_5.png * \image html reduce_by_key_fp64_len_5.png * * \par Snippet * The code snippet below illustrates the segmented reduction of \p int values grouped * by runs of associated \p int keys. * \par * \code * #include // or equivalently * * // CustomMin functor * struct CustomMin * { * template * CUB_RUNTIME_FUNCTION __forceinline__ * T operator()(const T &a, const T &b) const { * return (b < a) ? b : a; * } * }; * * // Declare, allocate, and initialize device-accessible pointers for input and output * int num_items; // e.g., 8 * int *d_keys_in; // e.g., [0, 2, 2, 9, 5, 5, 5, 8] * int *d_values_in; // e.g., [0, 7, 1, 6, 2, 5, 3, 4] * int *d_unique_out; // e.g., [-, -, -, -, -, -, -, -] * int *d_aggregates_out; // e.g., [-, -, -, -, -, -, -, -] * int *d_num_runs_out; // e.g., [-] * CustomMin reduction_op; * ... * * // Determine temporary device storage requirements * void *d_temp_storage = NULL; * size_t temp_storage_bytes = 0; * cub::DeviceReduce::ReduceByKey(d_temp_storage, temp_storage_bytes, d_keys_in, d_unique_out, d_values_in, d_aggregates_out, d_num_runs_out, reduction_op, num_items); * * // Allocate temporary storage * cudaMalloc(&d_temp_storage, temp_storage_bytes); * * // Run reduce-by-key * cub::DeviceReduce::ReduceByKey(d_temp_storage, temp_storage_bytes, d_keys_in, d_unique_out, d_values_in, d_aggregates_out, d_num_runs_out, reduction_op, num_items); * * // d_unique_out <-- [0, 2, 9, 5, 8] * // d_aggregates_out <-- [0, 1, 6, 2, 4] * // d_num_runs_out <-- [5] * * \endcode * * \tparam KeysInputIteratorT [inferred] Random-access input iterator type for reading input keys \iterator * \tparam UniqueOutputIteratorT [inferred] Random-access output iterator type for writing unique output keys \iterator * \tparam ValuesInputIteratorT [inferred] Random-access input iterator type for reading input values \iterator * \tparam AggregatesOutputIterator [inferred] Random-access output iterator type for writing output value aggregates \iterator * \tparam NumRunsOutputIteratorT [inferred] Output iterator type for recording the number of runs encountered \iterator * \tparam ReductionOpT [inferred] Binary reduction functor type having member T operator()(const T &a, const T &b) */ template < typename KeysInputIteratorT, typename UniqueOutputIteratorT, typename ValuesInputIteratorT, typename AggregatesOutputIteratorT, typename NumRunsOutputIteratorT, typename ReductionOpT> CUB_RUNTIME_FUNCTION __forceinline__ static cudaError_t ReduceByKey( void *d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t &temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation KeysInputIteratorT d_keys_in, ///< [in] Pointer to the input sequence of keys UniqueOutputIteratorT d_unique_out, ///< [out] Pointer to the output sequence of unique keys (one key per run) ValuesInputIteratorT d_values_in, ///< [in] Pointer to the input sequence of corresponding values AggregatesOutputIteratorT d_aggregates_out, ///< [out] Pointer to the output sequence of value aggregates (one aggregate per run) NumRunsOutputIteratorT d_num_runs_out, ///< [out] Pointer to total number of runs encountered (i.e., the length of d_unique_out) ReductionOpT reduction_op, ///< [in] Binary reduction functor int num_items, ///< [in] Total number of associated key+value pairs (i.e., the length of \p d_in_keys and \p d_in_values) cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. May cause significant slowdown. Default is \p false. { // Signed integer type for global offsets typedef int OffsetT; // FlagT iterator type (not used) // Selection op (not used) // Default == operator typedef Equality EqualityOp; return DispatchReduceByKey::Dispatch( d_temp_storage, temp_storage_bytes, d_keys_in, d_unique_out, d_values_in, d_aggregates_out, d_num_runs_out, EqualityOp(), reduction_op, num_items, stream, debug_synchronous); } }; /** * \example example_device_reduce.cu */ } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/gpu_utils/cub/device/device_run_length_encode.cuh000066400000000000000000000347411411340063500253300ustar00rootroot00000000000000 /****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * cub::DeviceRunLengthEncode provides device-wide, parallel operations for computing a run-length encoding across a sequence of data items residing within device-accessible memory. */ #pragma once #include #include #include "dispatch/dispatch_rle.cuh" #include "dispatch/dispatch_reduce_by_key.cuh" #include "../util_namespace.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /** * \brief DeviceRunLengthEncode provides device-wide, parallel operations for demarcating "runs" of same-valued items within a sequence residing within device-accessible memory. ![](run_length_encode_logo.png) * \ingroup SingleModule * * \par Overview * A run-length encoding * computes a simple compressed representation of a sequence of input elements such that each * maximal "run" of consecutive same-valued data items is encoded as a single data value along with a * count of the elements in that run. * * \par Usage Considerations * \cdp_class{DeviceRunLengthEncode} * * \par Performance * \linear_performance{run-length encode} * * \par * The following chart illustrates DeviceRunLengthEncode::RunLengthEncode performance across * different CUDA architectures for \p int32 items. * Segments have lengths uniformly sampled from [1,1000]. * * \image html rle_int32_len_500.png * * \par * \plots_below * */ struct DeviceRunLengthEncode { /** * \brief Computes a run-length encoding of the sequence \p d_in. * * \par * - For the ith run encountered, the first key of the run and its length are written to * d_unique_out[i] and d_counts_out[i], * respectively. * - The total number of runs encountered is written to \p d_num_runs_out. * - The == equality operator is used to determine whether values are equivalent * - \devicestorage * * \par Performance * The following charts illustrate saturated encode performance across different * CUDA architectures for \p int32 and \p int64 items, respectively. Segments have * lengths uniformly sampled from [1,1000]. * * \image html rle_int32_len_500.png * \image html rle_int64_len_500.png * * \par * The following charts are similar, but with segment lengths uniformly sampled from [1,10]: * * \image html rle_int32_len_5.png * \image html rle_int64_len_5.png * * \par Snippet * The code snippet below illustrates the run-length encoding of a sequence of \p int values. * \par * \code * #include // or equivalently * * // Declare, allocate, and initialize device-accessible pointers for input and output * int num_items; // e.g., 8 * int *d_in; // e.g., [0, 2, 2, 9, 5, 5, 5, 8] * int *d_unique_out; // e.g., [ , , , , , , , ] * int *d_counts_out; // e.g., [ , , , , , , , ] * int *d_num_runs_out; // e.g., [ ] * ... * * // Determine temporary device storage requirements * void *d_temp_storage = NULL; * size_t temp_storage_bytes = 0; * cub::DeviceRunLengthEncode::Encode(d_temp_storage, temp_storage_bytes, d_in, d_unique_out, d_counts_out, d_num_runs_out, num_items); * * // Allocate temporary storage * cudaMalloc(&d_temp_storage, temp_storage_bytes); * * // Run encoding * cub::DeviceRunLengthEncode::Encode(d_temp_storage, temp_storage_bytes, d_in, d_unique_out, d_counts_out, d_num_runs_out, num_items); * * // d_unique_out <-- [0, 2, 9, 5, 8] * // d_counts_out <-- [1, 2, 1, 3, 1] * // d_num_runs_out <-- [5] * * \endcode * * \tparam InputIteratorT [inferred] Random-access input iterator type for reading input items \iterator * \tparam UniqueOutputIteratorT [inferred] Random-access output iterator type for writing unique output items \iterator * \tparam LengthsOutputIteratorT [inferred] Random-access output iterator type for writing output counts \iterator * \tparam NumRunsOutputIteratorT [inferred] Output iterator type for recording the number of runs encountered \iterator */ template < typename InputIteratorT, typename UniqueOutputIteratorT, typename LengthsOutputIteratorT, typename NumRunsOutputIteratorT> CUB_RUNTIME_FUNCTION __forceinline__ static cudaError_t Encode( void* d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t &temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation InputIteratorT d_in, ///< [in] Pointer to the input sequence of keys UniqueOutputIteratorT d_unique_out, ///< [out] Pointer to the output sequence of unique keys (one key per run) LengthsOutputIteratorT d_counts_out, ///< [out] Pointer to the output sequence of run-lengths (one count per run) NumRunsOutputIteratorT d_num_runs_out, ///< [out] Pointer to total number of runs int num_items, ///< [in] Total number of associated key+value pairs (i.e., the length of \p d_in_keys and \p d_in_values) cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. May cause significant slowdown. Default is \p false. { typedef int OffsetT; // Signed integer type for global offsets typedef NullType* FlagIterator; // FlagT iterator type (not used) typedef NullType SelectOp; // Selection op (not used) typedef Equality EqualityOp; // Default == operator typedef cub::Sum ReductionOp; // Value reduction operator // The lengths output value type typedef typename If<(Equals::value_type, void>::VALUE), // LengthT = (if output iterator's value type is void) ? OffsetT, // ... then the OffsetT type, typename std::iterator_traits::value_type>::Type LengthT; // ... else the output iterator's value type // Generator type for providing 1s values for run-length reduction typedef ConstantInputIterator LengthsInputIteratorT; return DispatchReduceByKey::Dispatch( d_temp_storage, temp_storage_bytes, d_in, d_unique_out, LengthsInputIteratorT((LengthT) 1), d_counts_out, d_num_runs_out, EqualityOp(), ReductionOp(), num_items, stream, debug_synchronous); } /** * \brief Enumerates the starting offsets and lengths of all non-trivial runs (of length > 1) of same-valued keys in the sequence \p d_in. * * \par * - For the ith non-trivial run, the run's starting offset * and its length are written to d_offsets_out[i] and * d_lengths_out[i], respectively. * - The total number of runs encountered is written to \p d_num_runs_out. * - The == equality operator is used to determine whether values are equivalent * - \devicestorage * * \par Performance * * \par Snippet * The code snippet below illustrates the identification of non-trivial runs within a sequence of \p int values. * \par * \code * #include // or equivalently * * // Declare, allocate, and initialize device-accessible pointers for input and output * int num_items; // e.g., 8 * int *d_in; // e.g., [0, 2, 2, 9, 5, 5, 5, 8] * int *d_offsets_out; // e.g., [ , , , , , , , ] * int *d_lengths_out; // e.g., [ , , , , , , , ] * int *d_num_runs_out; // e.g., [ ] * ... * * // Determine temporary device storage requirements * void *d_temp_storage = NULL; * size_t temp_storage_bytes = 0; * cub::DeviceRunLengthEncode::NonTrivialRuns(d_temp_storage, temp_storage_bytes, d_in, d_offsets_out, d_lengths_out, d_num_runs_out, num_items); * * // Allocate temporary storage * cudaMalloc(&d_temp_storage, temp_storage_bytes); * * // Run encoding * cub::DeviceRunLengthEncode::NonTrivialRuns(d_temp_storage, temp_storage_bytes, d_in, d_offsets_out, d_lengths_out, d_num_runs_out, num_items); * * // d_offsets_out <-- [1, 4] * // d_lengths_out <-- [2, 3] * // d_num_runs_out <-- [2] * * \endcode * * \tparam InputIteratorT [inferred] Random-access input iterator type for reading input items \iterator * \tparam OffsetsOutputIteratorT [inferred] Random-access output iterator type for writing run-offset values \iterator * \tparam LengthsOutputIteratorT [inferred] Random-access output iterator type for writing run-length values \iterator * \tparam NumRunsOutputIteratorT [inferred] Output iterator type for recording the number of runs encountered \iterator */ template < typename InputIteratorT, typename OffsetsOutputIteratorT, typename LengthsOutputIteratorT, typename NumRunsOutputIteratorT> CUB_RUNTIME_FUNCTION __forceinline__ static cudaError_t NonTrivialRuns( void* d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t &temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation InputIteratorT d_in, ///< [in] Pointer to input sequence of data items OffsetsOutputIteratorT d_offsets_out, ///< [out] Pointer to output sequence of run-offsets (one offset per non-trivial run) LengthsOutputIteratorT d_lengths_out, ///< [out] Pointer to output sequence of run-lengths (one count per non-trivial run) NumRunsOutputIteratorT d_num_runs_out, ///< [out] Pointer to total number of runs (i.e., length of \p d_offsets_out) int num_items, ///< [in] Total number of associated key+value pairs (i.e., the length of \p d_in_keys and \p d_in_values) cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. May cause significant slowdown. Default is \p false. { typedef int OffsetT; // Signed integer type for global offsets typedef Equality EqualityOp; // Default == operator return DeviceRleDispatch::Dispatch( d_temp_storage, temp_storage_bytes, d_in, d_offsets_out, d_lengths_out, d_num_runs_out, EqualityOp(), num_items, stream, debug_synchronous); } }; } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/gpu_utils/cub/device/device_scan.cuh000066400000000000000000000524321411340063500225670ustar00rootroot00000000000000 /****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * cub::DeviceScan provides device-wide, parallel operations for computing a prefix scan across a sequence of data items residing within device-accessible memory. */ #pragma once #include #include #include "dispatch/dispatch_scan.cuh" #include "../util_namespace.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /** * \brief DeviceScan provides device-wide, parallel operations for computing a prefix scan across a sequence of data items residing within device-accessible memory. ![](device_scan.png) * \ingroup SingleModule * * \par Overview * Given a sequence of input elements and a binary reduction operator, a [prefix scan](http://en.wikipedia.org/wiki/Prefix_sum) * produces an output sequence where each element is computed to be the reduction * of the elements occurring earlier in the input sequence. Prefix sum * connotes a prefix scan with the addition operator. The term \em inclusive indicates * that the ith output reduction incorporates the ith input. * The term \em exclusive indicates the ith input is not incorporated into * the ith output reduction. * * \par * As of CUB 1.0.1 (2013), CUB's device-wide scan APIs have implemented our "decoupled look-back" algorithm * for performing global prefix scan with only a single pass through the * input data, as described in our 2016 technical report [1]. The central * idea is to leverage a small, constant factor of redundant work in order to overlap the latencies * of global prefix propagation with local computation. As such, our algorithm requires only * ~2n data movement (n inputs are read, n outputs are written), and typically * proceeds at "memcpy" speeds. * * \par * [1] [Duane Merrill and Michael Garland. "Single-pass Parallel Prefix Scan with Decoupled Look-back", NVIDIA Technical Report NVR-2016-002, 2016.](https://research.nvidia.com/publication/single-pass-parallel-prefix-scan-decoupled-look-back) * * \par Usage Considerations * \cdp_class{DeviceScan} * * \par Performance * \linear_performance{prefix scan} * * \par * The following chart illustrates DeviceScan::ExclusiveSum * performance across different CUDA architectures for \p int32 keys. * \plots_below * * \image html scan_int32.png * */ struct DeviceScan { /******************************************************************//** * \name Exclusive scans *********************************************************************/ //@{ /** * \brief Computes a device-wide exclusive prefix sum. The value of 0 is applied as the initial value, and is assigned to *d_out. * * \par * - Supports non-commutative sum operators. * - Provides "run-to-run" determinism for pseudo-associative reduction * (e.g., addition of floating point types) on the same GPU device. * However, results for pseudo-associative reduction may be inconsistent * from one device to a another device of a different compute-capability * because CUB can employ different tile-sizing for different architectures. * - \devicestorage * * \par Performance * The following charts illustrate saturated exclusive sum performance across different * CUDA architectures for \p int32 and \p int64 items, respectively. * * \image html scan_int32.png * \image html scan_int64.png * * \par Snippet * The code snippet below illustrates the exclusive prefix sum of an \p int device vector. * \par * \code * #include // or equivalently * * // Declare, allocate, and initialize device-accessible pointers for input and output * int num_items; // e.g., 7 * int *d_in; // e.g., [8, 6, 7, 5, 3, 0, 9] * int *d_out; // e.g., [ , , , , , , ] * ... * * // Determine temporary device storage requirements * void *d_temp_storage = NULL; * size_t temp_storage_bytes = 0; * cub::DeviceScan::ExclusiveSum(d_temp_storage, temp_storage_bytes, d_in, d_out, num_items); * * // Allocate temporary storage * cudaMalloc(&d_temp_storage, temp_storage_bytes); * * // Run exclusive prefix sum * cub::DeviceScan::ExclusiveSum(d_temp_storage, temp_storage_bytes, d_in, d_out, num_items); * * // d_out s<-- [0, 8, 14, 21, 26, 29, 29] * * \endcode * * \tparam InputIteratorT [inferred] Random-access input iterator type for reading scan inputs \iterator * \tparam OutputIteratorT [inferred] Random-access output iterator type for writing scan outputs \iterator */ template < typename InputIteratorT, typename OutputIteratorT> CUB_RUNTIME_FUNCTION static cudaError_t ExclusiveSum( void *d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t &temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation InputIteratorT d_in, ///< [in] Pointer to the input sequence of data items OutputIteratorT d_out, ///< [out] Pointer to the output sequence of data items int num_items, ///< [in] Total number of input items (i.e., the length of \p d_in) cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. May cause significant slowdown. Default is \p false. { // Signed integer type for global offsets typedef int OffsetT; // The output value type typedef typename If<(Equals::value_type, void>::VALUE), // OutputT = (if output iterator's value type is void) ? typename std::iterator_traits::value_type, // ... then the input iterator's value type, typename std::iterator_traits::value_type>::Type OutputT; // ... else the output iterator's value type // Initial value OutputT init_value = 0; return DispatchScan::Dispatch( d_temp_storage, temp_storage_bytes, d_in, d_out, Sum(), init_value, num_items, stream, debug_synchronous); } /** * \brief Computes a device-wide exclusive prefix scan using the specified binary \p scan_op functor. The \p init_value value is applied as the initial value, and is assigned to *d_out. * * \par * - Supports non-commutative scan operators. * - Provides "run-to-run" determinism for pseudo-associative reduction * (e.g., addition of floating point types) on the same GPU device. * However, results for pseudo-associative reduction may be inconsistent * from one device to a another device of a different compute-capability * because CUB can employ different tile-sizing for different architectures. * - \devicestorage * * \par Snippet * The code snippet below illustrates the exclusive prefix min-scan of an \p int device vector * \par * \code * #include // or equivalently * * // CustomMin functor * struct CustomMin * { * template * CUB_RUNTIME_FUNCTION __forceinline__ * T operator()(const T &a, const T &b) const { * return (b < a) ? b : a; * } * }; * * // Declare, allocate, and initialize device-accessible pointers for input and output * int num_items; // e.g., 7 * int *d_in; // e.g., [8, 6, 7, 5, 3, 0, 9] * int *d_out; // e.g., [ , , , , , , ] * CustomMin min_op * ... * * // Determine temporary device storage requirements for exclusive prefix scan * void *d_temp_storage = NULL; * size_t temp_storage_bytes = 0; * cub::DeviceScan::ExclusiveScan(d_temp_storage, temp_storage_bytes, d_in, d_out, min_op, (int) MAX_INT, num_items); * * // Allocate temporary storage for exclusive prefix scan * cudaMalloc(&d_temp_storage, temp_storage_bytes); * * // Run exclusive prefix min-scan * cub::DeviceScan::ExclusiveScan(d_temp_storage, temp_storage_bytes, d_in, d_out, min_op, (int) MAX_INT, num_items); * * // d_out <-- [2147483647, 8, 6, 6, 5, 3, 0] * * \endcode * * \tparam InputIteratorT [inferred] Random-access input iterator type for reading scan inputs \iterator * \tparam OutputIteratorT [inferred] Random-access output iterator type for writing scan outputs \iterator * \tparam ScanOp [inferred] Binary scan functor type having member T operator()(const T &a, const T &b) * \tparam Identity [inferred] Type of the \p identity value used Binary scan functor type having member T operator()(const T &a, const T &b) */ template < typename InputIteratorT, typename OutputIteratorT, typename ScanOpT, typename InitValueT> CUB_RUNTIME_FUNCTION static cudaError_t ExclusiveScan( void *d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t &temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation InputIteratorT d_in, ///< [in] Pointer to the input sequence of data items OutputIteratorT d_out, ///< [out] Pointer to the output sequence of data items ScanOpT scan_op, ///< [in] Binary scan functor InitValueT init_value, ///< [in] Initial value to seed the exclusive scan (and is assigned to *d_out) int num_items, ///< [in] Total number of input items (i.e., the length of \p d_in) cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. May cause significant slowdown. Default is \p false. { // Signed integer type for global offsets typedef int OffsetT; return DispatchScan::Dispatch( d_temp_storage, temp_storage_bytes, d_in, d_out, scan_op, init_value, num_items, stream, debug_synchronous); } //@} end member group /******************************************************************//** * \name Inclusive scans *********************************************************************/ //@{ /** * \brief Computes a device-wide inclusive prefix sum. * * \par * - Supports non-commutative sum operators. * - Provides "run-to-run" determinism for pseudo-associative reduction * (e.g., addition of floating point types) on the same GPU device. * However, results for pseudo-associative reduction may be inconsistent * from one device to a another device of a different compute-capability * because CUB can employ different tile-sizing for different architectures. * - \devicestorage * * \par Snippet * The code snippet below illustrates the inclusive prefix sum of an \p int device vector. * \par * \code * #include // or equivalently * * // Declare, allocate, and initialize device-accessible pointers for input and output * int num_items; // e.g., 7 * int *d_in; // e.g., [8, 6, 7, 5, 3, 0, 9] * int *d_out; // e.g., [ , , , , , , ] * ... * * // Determine temporary device storage requirements for inclusive prefix sum * void *d_temp_storage = NULL; * size_t temp_storage_bytes = 0; * cub::DeviceScan::InclusiveSum(d_temp_storage, temp_storage_bytes, d_in, d_out, num_items); * * // Allocate temporary storage for inclusive prefix sum * cudaMalloc(&d_temp_storage, temp_storage_bytes); * * // Run inclusive prefix sum * cub::DeviceScan::InclusiveSum(d_temp_storage, temp_storage_bytes, d_in, d_out, num_items); * * // d_out <-- [8, 14, 21, 26, 29, 29, 38] * * \endcode * * \tparam InputIteratorT [inferred] Random-access input iterator type for reading scan inputs \iterator * \tparam OutputIteratorT [inferred] Random-access output iterator type for writing scan outputs \iterator */ template < typename InputIteratorT, typename OutputIteratorT> CUB_RUNTIME_FUNCTION static cudaError_t InclusiveSum( void* d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t& temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation InputIteratorT d_in, ///< [in] Pointer to the input sequence of data items OutputIteratorT d_out, ///< [out] Pointer to the output sequence of data items int num_items, ///< [in] Total number of input items (i.e., the length of \p d_in) cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. May cause significant slowdown. Default is \p false. { // Signed integer type for global offsets typedef int OffsetT; return DispatchScan::Dispatch( d_temp_storage, temp_storage_bytes, d_in, d_out, Sum(), NullType(), num_items, stream, debug_synchronous); } /** * \brief Computes a device-wide inclusive prefix scan using the specified binary \p scan_op functor. * * \par * - Supports non-commutative scan operators. * - Provides "run-to-run" determinism for pseudo-associative reduction * (e.g., addition of floating point types) on the same GPU device. * However, results for pseudo-associative reduction may be inconsistent * from one device to a another device of a different compute-capability * because CUB can employ different tile-sizing for different architectures. * - \devicestorage * * \par Snippet * The code snippet below illustrates the inclusive prefix min-scan of an \p int device vector. * \par * \code * #include // or equivalently * * // CustomMin functor * struct CustomMin * { * template * CUB_RUNTIME_FUNCTION __forceinline__ * T operator()(const T &a, const T &b) const { * return (b < a) ? b : a; * } * }; * * // Declare, allocate, and initialize device-accessible pointers for input and output * int num_items; // e.g., 7 * int *d_in; // e.g., [8, 6, 7, 5, 3, 0, 9] * int *d_out; // e.g., [ , , , , , , ] * CustomMin min_op; * ... * * // Determine temporary device storage requirements for inclusive prefix scan * void *d_temp_storage = NULL; * size_t temp_storage_bytes = 0; * cub::DeviceScan::InclusiveScan(d_temp_storage, temp_storage_bytes, d_in, d_out, min_op, num_items); * * // Allocate temporary storage for inclusive prefix scan * cudaMalloc(&d_temp_storage, temp_storage_bytes); * * // Run inclusive prefix min-scan * cub::DeviceScan::InclusiveScan(d_temp_storage, temp_storage_bytes, d_in, d_out, min_op, num_items); * * // d_out <-- [8, 6, 6, 5, 3, 0, 0] * * \endcode * * \tparam InputIteratorT [inferred] Random-access input iterator type for reading scan inputs \iterator * \tparam OutputIteratorT [inferred] Random-access output iterator type for writing scan outputs \iterator * \tparam ScanOp [inferred] Binary scan functor type having member T operator()(const T &a, const T &b) */ template < typename InputIteratorT, typename OutputIteratorT, typename ScanOpT> CUB_RUNTIME_FUNCTION static cudaError_t InclusiveScan( void *d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t &temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation InputIteratorT d_in, ///< [in] Pointer to the input sequence of data items OutputIteratorT d_out, ///< [out] Pointer to the output sequence of data items ScanOpT scan_op, ///< [in] Binary scan functor int num_items, ///< [in] Total number of input items (i.e., the length of \p d_in) cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. May cause significant slowdown. Default is \p false. { // Signed integer type for global offsets typedef int OffsetT; return DispatchScan::Dispatch( d_temp_storage, temp_storage_bytes, d_in, d_out, scan_op, NullType(), num_items, stream, debug_synchronous); } //@} end member group }; /** * \example example_device_scan.cu */ } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/gpu_utils/cub/device/device_segmented_radix_sort.cuh000066400000000000000000001525431411340063500260600ustar00rootroot00000000000000 /****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * cub::DeviceSegmentedRadixSort provides device-wide, parallel operations for computing a batched radix sort across multiple, non-overlapping sequences of data items residing within device-accessible memory. */ #pragma once #include #include #include "dispatch/dispatch_radix_sort.cuh" #include "../util_arch.cuh" #include "../util_namespace.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /** * \brief DeviceSegmentedRadixSort provides device-wide, parallel operations for computing a batched radix sort across multiple, non-overlapping sequences of data items residing within device-accessible memory. ![](segmented_sorting_logo.png) * \ingroup SegmentedModule * * \par Overview * The [radix sorting method](http://en.wikipedia.org/wiki/Radix_sort) arranges * items into ascending (or descending) order. The algorithm relies upon a positional representation for * keys, i.e., each key is comprised of an ordered sequence of symbols (e.g., digits, * characters, etc.) specified from least-significant to most-significant. For a * given input sequence of keys and a set of rules specifying a total ordering * of the symbolic alphabet, the radix sorting method produces a lexicographic * ordering of those keys. * * \par * DeviceSegmentedRadixSort can sort all of the built-in C++ numeric primitive types, e.g.: * unsigned char, \p int, \p double, etc. Although the direct radix sorting * method can only be applied to unsigned integral types, DeviceSegmentedRadixSort * is able to sort signed and floating-point types via simple bit-wise transformations * that ensure lexicographic key ordering. * * \par Usage Considerations * \cdp_class{DeviceSegmentedRadixSort} * */ struct DeviceSegmentedRadixSort { /******************************************************************//** * \name Key-value pairs *********************************************************************/ //@{ /** * \brief Sorts segments of key-value pairs into ascending order. (~2N auxiliary storage required) * * \par * - The contents of the input data are not altered by the sorting operation * - When input a contiguous sequence of segments, a single sequence * \p segment_offsets (of length num_segments+1) can be aliased * for both the \p d_begin_offsets and \p d_end_offsets parameters (where * the latter is specified as segment_offsets+1). * - An optional bit subrange [begin_bit, end_bit) of differentiating key bits can be specified. This can reduce overall sorting overhead and yield a corresponding performance improvement. * - \devicestorageNP For sorting using only O(P) temporary storage, see the sorting interface using DoubleBuffer wrappers below. * - \devicestorage * * \par Snippet * The code snippet below illustrates the batched sorting of three segments (with one zero-length segment) of \p int keys * with associated vector of \p int values. * \par * \code * #include // or equivalently * * // Declare, allocate, and initialize device-accessible pointers for sorting data * int num_items; // e.g., 7 * int num_segments; // e.g., 3 * int *d_offsets; // e.g., [0, 3, 3, 7] * int *d_keys_in; // e.g., [8, 6, 7, 5, 3, 0, 9] * int *d_keys_out; // e.g., [-, -, -, -, -, -, -] * int *d_values_in; // e.g., [0, 1, 2, 3, 4, 5, 6] * int *d_values_out; // e.g., [-, -, -, -, -, -, -] * ... * * // Determine temporary device storage requirements * void *d_temp_storage = NULL; * size_t temp_storage_bytes = 0; * cub::DeviceSegmentedRadixSort::SortPairs(d_temp_storage, temp_storage_bytes, * d_keys_in, d_keys_out, d_values_in, d_values_out, * num_items, num_segments, d_offsets, d_offsets + 1); * * // Allocate temporary storage * cudaMalloc(&d_temp_storage, temp_storage_bytes); * * // Run sorting operation * cub::DeviceSegmentedRadixSort::SortPairs(d_temp_storage, temp_storage_bytes, * d_keys_in, d_keys_out, d_values_in, d_values_out, * num_items, num_segments, d_offsets, d_offsets + 1); * * // d_keys_out <-- [6, 7, 8, 0, 3, 5, 9] * // d_values_out <-- [1, 2, 0, 5, 4, 3, 6] * * \endcode * * \tparam KeyT [inferred] Key type * \tparam ValueT [inferred] Value type * \tparam OffsetIteratorT [inferred] Random-access input iterator type for reading segment offsets \iterator */ template < typename KeyT, typename ValueT, typename OffsetIteratorT> CUB_RUNTIME_FUNCTION static cudaError_t SortPairs( void *d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t &temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation const KeyT *d_keys_in, ///< [in] %Device-accessible pointer to the input data of key data to sort KeyT *d_keys_out, ///< [out] %Device-accessible pointer to the sorted output sequence of key data const ValueT *d_values_in, ///< [in] %Device-accessible pointer to the corresponding input sequence of associated value items ValueT *d_values_out, ///< [out] %Device-accessible pointer to the correspondingly-reordered output sequence of associated value items int num_items, ///< [in] The total number of items to sort (across all segments) int num_segments, ///< [in] The number of segments that comprise the sorting data OffsetIteratorT d_begin_offsets, ///< [in] Pointer to the sequence of beginning offsets of length \p num_segments, such that d_begin_offsets[i] is the first element of the ith data segment in d_keys_* and d_values_* OffsetIteratorT d_end_offsets, ///< [in] Pointer to the sequence of ending offsets of length \p num_segments, such that d_end_offsets[i]-1 is the last element of the ith data segment in d_keys_* and d_values_*. If d_end_offsets[i]-1 <= d_begin_offsets[i], the ith is considered empty. int begin_bit = 0, ///< [in] [optional] The least-significant bit index (inclusive) needed for key comparison int end_bit = sizeof(KeyT) * 8, ///< [in] [optional] The most-significant bit index (exclusive) needed for key comparison (e.g., sizeof(unsigned int) * 8) cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is \p false. { // Signed integer type for global offsets typedef int OffsetT; DoubleBuffer d_keys(const_cast(d_keys_in), d_keys_out); DoubleBuffer d_values(const_cast(d_values_in), d_values_out); return DispatchSegmentedRadixSort::Dispatch( d_temp_storage, temp_storage_bytes, d_keys, d_values, num_items, num_segments, d_begin_offsets, d_end_offsets, begin_bit, end_bit, false, stream, debug_synchronous); } /** * \brief Sorts segments of key-value pairs into ascending order. (~N auxiliary storage required) * * \par * - The sorting operation is given a pair of key buffers and a corresponding * pair of associated value buffers. Each pair is managed by a DoubleBuffer * structure that indicates which of the two buffers is "current" (and thus * contains the input data to be sorted). * - The contents of both buffers within each pair may be altered by the sorting * operation. * - Upon completion, the sorting operation will update the "current" indicator * within each DoubleBuffer wrapper to reference which of the two buffers * now contains the sorted output sequence (a function of the number of key bits * specified and the targeted device architecture). * - When input a contiguous sequence of segments, a single sequence * \p segment_offsets (of length num_segments+1) can be aliased * for both the \p d_begin_offsets and \p d_end_offsets parameters (where * the latter is specified as segment_offsets+1). * - An optional bit subrange [begin_bit, end_bit) of differentiating key bits can be specified. This can reduce overall sorting overhead and yield a corresponding performance improvement. * - \devicestorageP * - \devicestorage * * \par Snippet * The code snippet below illustrates the batched sorting of three segments (with one zero-length segment) of \p int keys * with associated vector of \p int values. * \par * \code * #include // or equivalently * * // Declare, allocate, and initialize device-accessible pointers for sorting data * int num_items; // e.g., 7 * int num_segments; // e.g., 3 * int *d_offsets; // e.g., [0, 3, 3, 7] * int *d_key_buf; // e.g., [8, 6, 7, 5, 3, 0, 9] * int *d_key_alt_buf; // e.g., [-, -, -, -, -, -, -] * int *d_value_buf; // e.g., [0, 1, 2, 3, 4, 5, 6] * int *d_value_alt_buf; // e.g., [-, -, -, -, -, -, -] * ... * * // Create a set of DoubleBuffers to wrap pairs of device pointers * cub::DoubleBuffer d_keys(d_key_buf, d_key_alt_buf); * cub::DoubleBuffer d_values(d_value_buf, d_value_alt_buf); * * // Determine temporary device storage requirements * void *d_temp_storage = NULL; * size_t temp_storage_bytes = 0; * cub::DeviceSegmentedRadixSort::SortPairs(d_temp_storage, temp_storage_bytes, d_keys, d_values, * num_items, num_segments, d_offsets, d_offsets + 1); * * // Allocate temporary storage * cudaMalloc(&d_temp_storage, temp_storage_bytes); * * // Run sorting operation * cub::DeviceSegmentedRadixSort::SortPairs(d_temp_storage, temp_storage_bytes, d_keys, d_values, * num_items, num_segments, d_offsets, d_offsets + 1); * * // d_keys.Current() <-- [6, 7, 8, 0, 3, 5, 9] * // d_values.Current() <-- [5, 4, 3, 1, 2, 0, 6] * * \endcode * * \tparam KeyT [inferred] Key type * \tparam ValueT [inferred] Value type * \tparam OffsetIteratorT [inferred] Random-access input iterator type for reading segment offsets \iterator */ template < typename KeyT, typename ValueT, typename OffsetIteratorT> CUB_RUNTIME_FUNCTION static cudaError_t SortPairs( void *d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t &temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation DoubleBuffer &d_keys, ///< [in,out] Reference to the double-buffer of keys whose "current" device-accessible buffer contains the unsorted input keys and, upon return, is updated to point to the sorted output keys DoubleBuffer &d_values, ///< [in,out] Double-buffer of values whose "current" device-accessible buffer contains the unsorted input values and, upon return, is updated to point to the sorted output values int num_items, ///< [in] The total number of items to sort (across all segments) int num_segments, ///< [in] The number of segments that comprise the sorting data OffsetIteratorT d_begin_offsets, ///< [in] Pointer to the sequence of beginning offsets of length \p num_segments, such that d_begin_offsets[i] is the first element of the ith data segment in d_keys_* and d_values_* OffsetIteratorT d_end_offsets, ///< [in] Pointer to the sequence of ending offsets of length \p num_segments, such that d_end_offsets[i]-1 is the last element of the ith data segment in d_keys_* and d_values_*. If d_end_offsets[i]-1 <= d_begin_offsets[i], the ith is considered empty. int begin_bit = 0, ///< [in] [optional] The least-significant bit index (inclusive) needed for key comparison int end_bit = sizeof(KeyT) * 8, ///< [in] [optional] The most-significant bit index (exclusive) needed for key comparison (e.g., sizeof(unsigned int) * 8) cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is \p false. { // Signed integer type for global offsets typedef int OffsetT; return DispatchSegmentedRadixSort::Dispatch( d_temp_storage, temp_storage_bytes, d_keys, d_values, num_items, num_segments, d_begin_offsets, d_end_offsets, begin_bit, end_bit, true, stream, debug_synchronous); } /** * \brief Sorts segments of key-value pairs into descending order. (~2N auxiliary storage required). * * \par * - The contents of the input data are not altered by the sorting operation * - When input a contiguous sequence of segments, a single sequence * \p segment_offsets (of length num_segments+1) can be aliased * for both the \p d_begin_offsets and \p d_end_offsets parameters (where * the latter is specified as segment_offsets+1). * - An optional bit subrange [begin_bit, end_bit) of differentiating key bits can be specified. This can reduce overall sorting overhead and yield a corresponding performance improvement. * - \devicestorageNP For sorting using only O(P) temporary storage, see the sorting interface using DoubleBuffer wrappers below. * - \devicestorage * * \par Snippet * The code snippet below illustrates the batched sorting of three segments (with one zero-length segment) of \p int keys * with associated vector of \p int values. * \par * \code * #include // or equivalently * * // Declare, allocate, and initialize device-accessible pointers for sorting data * int num_items; // e.g., 7 * int num_segments; // e.g., 3 * int *d_offsets; // e.g., [0, 3, 3, 7] * int *d_keys_in; // e.g., [8, 6, 7, 5, 3, 0, 9] * int *d_keys_out; // e.g., [-, -, -, -, -, -, -] * int *d_values_in; // e.g., [0, 1, 2, 3, 4, 5, 6] * int *d_values_out; // e.g., [-, -, -, -, -, -, -] * ... * * // Determine temporary device storage requirements * void *d_temp_storage = NULL; * size_t temp_storage_bytes = 0; * cub::DeviceSegmentedRadixSort::SortPairsDescending(d_temp_storage, temp_storage_bytes, * d_keys_in, d_keys_out, d_values_in, d_values_out, * num_items, num_segments, d_offsets, d_offsets + 1); * * // Allocate temporary storage * cudaMalloc(&d_temp_storage, temp_storage_bytes); * * // Run sorting operation * cub::DeviceSegmentedRadixSort::SortPairsDescending(d_temp_storage, temp_storage_bytes, * d_keys_in, d_keys_out, d_values_in, d_values_out, * num_items, num_segments, d_offsets, d_offsets + 1); * * // d_keys_out <-- [8, 7, 6, 9, 5, 3, 0] * // d_values_out <-- [0, 2, 1, 6, 3, 4, 5] * * \endcode * * \tparam KeyT [inferred] Key type * \tparam ValueT [inferred] Value type * \tparam OffsetIteratorT [inferred] Random-access input iterator type for reading segment offsets \iterator */ template < typename KeyT, typename ValueT, typename OffsetIteratorT> CUB_RUNTIME_FUNCTION static cudaError_t SortPairsDescending( void *d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t &temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation const KeyT *d_keys_in, ///< [in] %Device-accessible pointer to the input data of key data to sort KeyT *d_keys_out, ///< [out] %Device-accessible pointer to the sorted output sequence of key data const ValueT *d_values_in, ///< [in] %Device-accessible pointer to the corresponding input sequence of associated value items ValueT *d_values_out, ///< [out] %Device-accessible pointer to the correspondingly-reordered output sequence of associated value items int num_items, ///< [in] The total number of items to sort (across all segments) int num_segments, ///< [in] The number of segments that comprise the sorting data OffsetIteratorT d_begin_offsets, ///< [in] Pointer to the sequence of beginning offsets of length \p num_segments, such that d_begin_offsets[i] is the first element of the ith data segment in d_keys_* and d_values_* OffsetIteratorT d_end_offsets, ///< [in] Pointer to the sequence of ending offsets of length \p num_segments, such that d_end_offsets[i]-1 is the last element of the ith data segment in d_keys_* and d_values_*. If d_end_offsets[i]-1 <= d_begin_offsets[i], the ith is considered empty. int begin_bit = 0, ///< [in] [optional] The least-significant bit index (inclusive) needed for key comparison int end_bit = sizeof(KeyT) * 8, ///< [in] [optional] The most-significant bit index (exclusive) needed for key comparison (e.g., sizeof(unsigned int) * 8) cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is \p false. { // Signed integer type for global offsets typedef int OffsetT; DoubleBuffer d_keys(const_cast(d_keys_in), d_keys_out); DoubleBuffer d_values(const_cast(d_values_in), d_values_out); return DispatchSegmentedRadixSort::Dispatch( d_temp_storage, temp_storage_bytes, d_keys, d_values, num_items, num_segments, d_begin_offsets, d_end_offsets, begin_bit, end_bit, false, stream, debug_synchronous); } /** * \brief Sorts segments of key-value pairs into descending order. (~N auxiliary storage required). * * \par * - The sorting operation is given a pair of key buffers and a corresponding * pair of associated value buffers. Each pair is managed by a DoubleBuffer * structure that indicates which of the two buffers is "current" (and thus * contains the input data to be sorted). * - The contents of both buffers within each pair may be altered by the sorting * operation. * - Upon completion, the sorting operation will update the "current" indicator * within each DoubleBuffer wrapper to reference which of the two buffers * now contains the sorted output sequence (a function of the number of key bits * specified and the targeted device architecture). * - When input a contiguous sequence of segments, a single sequence * \p segment_offsets (of length num_segments+1) can be aliased * for both the \p d_begin_offsets and \p d_end_offsets parameters (where * the latter is specified as segment_offsets+1). * - An optional bit subrange [begin_bit, end_bit) of differentiating key bits can be specified. This can reduce overall sorting overhead and yield a corresponding performance improvement. * - \devicestorageP * - \devicestorage * * \par Snippet * The code snippet below illustrates the batched sorting of three segments (with one zero-length segment) of \p int keys * with associated vector of \p int values. * \par * \code * #include // or equivalently * * // Declare, allocate, and initialize device-accessible pointers for sorting data * int num_items; // e.g., 7 * int num_segments; // e.g., 3 * int *d_offsets; // e.g., [0, 3, 3, 7] * int *d_key_buf; // e.g., [8, 6, 7, 5, 3, 0, 9] * int *d_key_alt_buf; // e.g., [-, -, -, -, -, -, -] * int *d_value_buf; // e.g., [0, 1, 2, 3, 4, 5, 6] * int *d_value_alt_buf; // e.g., [-, -, -, -, -, -, -] * ... * * // Create a set of DoubleBuffers to wrap pairs of device pointers * cub::DoubleBuffer d_keys(d_key_buf, d_key_alt_buf); * cub::DoubleBuffer d_values(d_value_buf, d_value_alt_buf); * * // Determine temporary device storage requirements * void *d_temp_storage = NULL; * size_t temp_storage_bytes = 0; * cub::DeviceSegmentedRadixSort::SortPairsDescending(d_temp_storage, temp_storage_bytes, d_keys, d_values, * num_items, num_segments, d_offsets, d_offsets + 1); * * // Allocate temporary storage * cudaMalloc(&d_temp_storage, temp_storage_bytes); * * // Run sorting operation * cub::DeviceSegmentedRadixSort::SortPairsDescending(d_temp_storage, temp_storage_bytes, d_keys, d_values, * num_items, num_segments, d_offsets, d_offsets + 1); * * // d_keys.Current() <-- [8, 7, 6, 9, 5, 3, 0] * // d_values.Current() <-- [0, 2, 1, 6, 3, 4, 5] * * \endcode * * \tparam KeyT [inferred] Key type * \tparam ValueT [inferred] Value type * \tparam OffsetIteratorT [inferred] Random-access input iterator type for reading segment offsets \iterator */ template < typename KeyT, typename ValueT, typename OffsetIteratorT> CUB_RUNTIME_FUNCTION static cudaError_t SortPairsDescending( void *d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t &temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation DoubleBuffer &d_keys, ///< [in,out] Reference to the double-buffer of keys whose "current" device-accessible buffer contains the unsorted input keys and, upon return, is updated to point to the sorted output keys DoubleBuffer &d_values, ///< [in,out] Double-buffer of values whose "current" device-accessible buffer contains the unsorted input values and, upon return, is updated to point to the sorted output values int num_items, ///< [in] The total number of items to sort (across all segments) int num_segments, ///< [in] The number of segments that comprise the sorting data OffsetIteratorT d_begin_offsets, ///< [in] Pointer to the sequence of beginning offsets of length \p num_segments, such that d_begin_offsets[i] is the first element of the ith data segment in d_keys_* and d_values_* OffsetIteratorT d_end_offsets, ///< [in] Pointer to the sequence of ending offsets of length \p num_segments, such that d_end_offsets[i]-1 is the last element of the ith data segment in d_keys_* and d_values_*. If d_end_offsets[i]-1 <= d_begin_offsets[i], the ith is considered empty. int begin_bit = 0, ///< [in] [optional] The least-significant bit index (inclusive) needed for key comparison int end_bit = sizeof(KeyT) * 8, ///< [in] [optional] The most-significant bit index (exclusive) needed for key comparison (e.g., sizeof(unsigned int) * 8) cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is \p false. { // Signed integer type for global offsets typedef int OffsetT; return DispatchSegmentedRadixSort::Dispatch( d_temp_storage, temp_storage_bytes, d_keys, d_values, num_items, num_segments, d_begin_offsets, d_end_offsets, begin_bit, end_bit, true, stream, debug_synchronous); } //@} end member group /******************************************************************//** * \name Keys-only *********************************************************************/ //@{ /** * \brief Sorts segments of keys into ascending order. (~2N auxiliary storage required) * * \par * - The contents of the input data are not altered by the sorting operation * - An optional bit subrange [begin_bit, end_bit) of differentiating key bits can be specified. This can reduce overall sorting overhead and yield a corresponding performance improvement. * - When input a contiguous sequence of segments, a single sequence * \p segment_offsets (of length num_segments+1) can be aliased * for both the \p d_begin_offsets and \p d_end_offsets parameters (where * the latter is specified as segment_offsets+1). * - \devicestorageNP For sorting using only O(P) temporary storage, see the sorting interface using DoubleBuffer wrappers below. * - \devicestorage * * \par Snippet * The code snippet below illustrates the batched sorting of three segments (with one zero-length segment) of \p int keys. * \par * \code * #include // or equivalently * * // Declare, allocate, and initialize device-accessible pointers for sorting data * int num_items; // e.g., 7 * int num_segments; // e.g., 3 * int *d_offsets; // e.g., [0, 3, 3, 7] * int *d_keys_in; // e.g., [8, 6, 7, 5, 3, 0, 9] * int *d_keys_out; // e.g., [-, -, -, -, -, -, -] * ... * * // Determine temporary device storage requirements * void *d_temp_storage = NULL; * size_t temp_storage_bytes = 0; * cub::DeviceSegmentedRadixSort::SortKeys(d_temp_storage, temp_storage_bytes, d_keys_in, d_keys_out, * num_items, num_segments, d_offsets, d_offsets + 1); * * // Allocate temporary storage * cudaMalloc(&d_temp_storage, temp_storage_bytes); * * // Run sorting operation * cub::DeviceSegmentedRadixSort::SortKeys(d_temp_storage, temp_storage_bytes, d_keys_in, d_keys_out, * num_items, num_segments, d_offsets, d_offsets + 1); * * // d_keys_out <-- [6, 7, 8, 0, 3, 5, 9] * * \endcode * * \tparam KeyT [inferred] Key type * \tparam OffsetIteratorT [inferred] Random-access input iterator type for reading segment offsets \iterator */ template < typename KeyT, typename OffsetIteratorT> CUB_RUNTIME_FUNCTION static cudaError_t SortKeys( void *d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t &temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation const KeyT *d_keys_in, ///< [in] %Device-accessible pointer to the input data of key data to sort KeyT *d_keys_out, ///< [out] %Device-accessible pointer to the sorted output sequence of key data int num_items, ///< [in] The total number of items to sort (across all segments) int num_segments, ///< [in] The number of segments that comprise the sorting data OffsetIteratorT d_begin_offsets, ///< [in] Pointer to the sequence of beginning offsets of length \p num_segments, such that d_begin_offsets[i] is the first element of the ith data segment in d_keys_* and d_values_* OffsetIteratorT d_end_offsets, ///< [in] Pointer to the sequence of ending offsets of length \p num_segments, such that d_end_offsets[i]-1 is the last element of the ith data segment in d_keys_* and d_values_*. If d_end_offsets[i]-1 <= d_begin_offsets[i], the ith is considered empty. int begin_bit = 0, ///< [in] [optional] The least-significant bit index (inclusive) needed for key comparison int end_bit = sizeof(KeyT) * 8, ///< [in] [optional] The most-significant bit index (exclusive) needed for key comparison (e.g., sizeof(unsigned int) * 8) cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is \p false. { // Signed integer type for global offsets typedef int OffsetT; // Null value type DoubleBuffer d_keys(const_cast(d_keys_in), d_keys_out); DoubleBuffer d_values; return DispatchSegmentedRadixSort::Dispatch( d_temp_storage, temp_storage_bytes, d_keys, d_values, num_items, num_segments, d_begin_offsets, d_end_offsets, begin_bit, end_bit, false, stream, debug_synchronous); } /** * \brief Sorts segments of keys into ascending order. (~N auxiliary storage required). * * \par * - The sorting operation is given a pair of key buffers managed by a * DoubleBuffer structure that indicates which of the two buffers is * "current" (and thus contains the input data to be sorted). * - The contents of both buffers may be altered by the sorting operation. * - Upon completion, the sorting operation will update the "current" indicator * within the DoubleBuffer wrapper to reference which of the two buffers * now contains the sorted output sequence (a function of the number of key bits * specified and the targeted device architecture). * - When input a contiguous sequence of segments, a single sequence * \p segment_offsets (of length num_segments+1) can be aliased * for both the \p d_begin_offsets and \p d_end_offsets parameters (where * the latter is specified as segment_offsets+1). * - An optional bit subrange [begin_bit, end_bit) of differentiating key bits can be specified. This can reduce overall sorting overhead and yield a corresponding performance improvement. * - \devicestorageP * - \devicestorage * * \par Snippet * The code snippet below illustrates the batched sorting of three segments (with one zero-length segment) of \p int keys. * \par * \code * #include // or equivalently * * // Declare, allocate, and initialize device-accessible pointers for sorting data * int num_items; // e.g., 7 * int num_segments; // e.g., 3 * int *d_offsets; // e.g., [0, 3, 3, 7] * int *d_key_buf; // e.g., [8, 6, 7, 5, 3, 0, 9] * int *d_key_alt_buf; // e.g., [-, -, -, -, -, -, -] * ... * * // Create a DoubleBuffer to wrap the pair of device pointers * cub::DoubleBuffer d_keys(d_key_buf, d_key_alt_buf); * * // Determine temporary device storage requirements * void *d_temp_storage = NULL; * size_t temp_storage_bytes = 0; * cub::DeviceSegmentedRadixSort::SortKeys(d_temp_storage, temp_storage_bytes, d_keys, * num_items, num_segments, d_offsets, d_offsets + 1); * * // Allocate temporary storage * cudaMalloc(&d_temp_storage, temp_storage_bytes); * * // Run sorting operation * cub::DeviceSegmentedRadixSort::SortKeys(d_temp_storage, temp_storage_bytes, d_keys, * num_items, num_segments, d_offsets, d_offsets + 1); * * // d_keys.Current() <-- [6, 7, 8, 0, 3, 5, 9] * * \endcode * * \tparam KeyT [inferred] Key type * \tparam OffsetIteratorT [inferred] Random-access input iterator type for reading segment offsets \iterator */ template < typename KeyT, typename OffsetIteratorT> CUB_RUNTIME_FUNCTION static cudaError_t SortKeys( void *d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t &temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation DoubleBuffer &d_keys, ///< [in,out] Reference to the double-buffer of keys whose "current" device-accessible buffer contains the unsorted input keys and, upon return, is updated to point to the sorted output keys int num_items, ///< [in] The total number of items to sort (across all segments) int num_segments, ///< [in] The number of segments that comprise the sorting data OffsetIteratorT d_begin_offsets, ///< [in] Pointer to the sequence of beginning offsets of length \p num_segments, such that d_begin_offsets[i] is the first element of the ith data segment in d_keys_* and d_values_* OffsetIteratorT d_end_offsets, ///< [in] Pointer to the sequence of ending offsets of length \p num_segments, such that d_end_offsets[i]-1 is the last element of the ith data segment in d_keys_* and d_values_*. If d_end_offsets[i]-1 <= d_begin_offsets[i], the ith is considered empty. int begin_bit = 0, ///< [in] [optional] The least-significant bit index (inclusive) needed for key comparison int end_bit = sizeof(KeyT) * 8, ///< [in] [optional] The most-significant bit index (exclusive) needed for key comparison (e.g., sizeof(unsigned int) * 8) cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is \p false. { // Signed integer type for global offsets typedef int OffsetT; // Null value type DoubleBuffer d_values; return DispatchSegmentedRadixSort::Dispatch( d_temp_storage, temp_storage_bytes, d_keys, d_values, num_items, num_segments, d_begin_offsets, d_end_offsets, begin_bit, end_bit, true, stream, debug_synchronous); } /** * \brief Sorts segments of keys into descending order. (~2N auxiliary storage required). * * \par * - The contents of the input data are not altered by the sorting operation * - When input a contiguous sequence of segments, a single sequence * \p segment_offsets (of length num_segments+1) can be aliased * for both the \p d_begin_offsets and \p d_end_offsets parameters (where * the latter is specified as segment_offsets+1). * - An optional bit subrange [begin_bit, end_bit) of differentiating key bits can be specified. This can reduce overall sorting overhead and yield a corresponding performance improvement. * - \devicestorageNP For sorting using only O(P) temporary storage, see the sorting interface using DoubleBuffer wrappers below. * - \devicestorage * * \par Snippet * The code snippet below illustrates the batched sorting of three segments (with one zero-length segment) of \p int keys. * \par * \code * #include // or equivalently * * // Declare, allocate, and initialize device-accessible pointers for sorting data * int num_items; // e.g., 7 * int num_segments; // e.g., 3 * int *d_offsets; // e.g., [0, 3, 3, 7] * int *d_keys_in; // e.g., [8, 6, 7, 5, 3, 0, 9] * int *d_keys_out; // e.g., [-, -, -, -, -, -, -] * ... * * // Create a DoubleBuffer to wrap the pair of device pointers * cub::DoubleBuffer d_keys(d_key_buf, d_key_alt_buf); * * // Determine temporary device storage requirements * void *d_temp_storage = NULL; * size_t temp_storage_bytes = 0; * cub::DeviceSegmentedRadixSort::SortKeysDescending(d_temp_storage, temp_storage_bytes, d_keys_in, d_keys_out, * num_items, num_segments, d_offsets, d_offsets + 1); * * // Allocate temporary storage * cudaMalloc(&d_temp_storage, temp_storage_bytes); * * // Run sorting operation * cub::DeviceSegmentedRadixSort::SortKeysDescending(d_temp_storage, temp_storage_bytes, d_keys_in, d_keys_out, * num_items, num_segments, d_offsets, d_offsets + 1); * * // d_keys_out <-- [8, 7, 6, 9, 5, 3, 0] * * \endcode * * \tparam KeyT [inferred] Key type * \tparam OffsetIteratorT [inferred] Random-access input iterator type for reading segment offsets \iterator */ template < typename KeyT, typename OffsetIteratorT> CUB_RUNTIME_FUNCTION static cudaError_t SortKeysDescending( void *d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t &temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation const KeyT *d_keys_in, ///< [in] %Device-accessible pointer to the input data of key data to sort KeyT *d_keys_out, ///< [out] %Device-accessible pointer to the sorted output sequence of key data int num_items, ///< [in] The total number of items to sort (across all segments) int num_segments, ///< [in] The number of segments that comprise the sorting data OffsetIteratorT d_begin_offsets, ///< [in] Pointer to the sequence of beginning offsets of length \p num_segments, such that d_begin_offsets[i] is the first element of the ith data segment in d_keys_* and d_values_* OffsetIteratorT d_end_offsets, ///< [in] Pointer to the sequence of ending offsets of length \p num_segments, such that d_end_offsets[i]-1 is the last element of the ith data segment in d_keys_* and d_values_*. If d_end_offsets[i]-1 <= d_begin_offsets[i], the ith is considered empty. int begin_bit = 0, ///< [in] [optional] The least-significant bit index (inclusive) needed for key comparison int end_bit = sizeof(KeyT) * 8, ///< [in] [optional] The most-significant bit index (exclusive) needed for key comparison (e.g., sizeof(unsigned int) * 8) cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is \p false. { // Signed integer type for global offsets typedef int OffsetT; DoubleBuffer d_keys(const_cast(d_keys_in), d_keys_out); DoubleBuffer d_values; return DispatchSegmentedRadixSort::Dispatch( d_temp_storage, temp_storage_bytes, d_keys, d_values, num_items, num_segments, d_begin_offsets, d_end_offsets, begin_bit, end_bit, false, stream, debug_synchronous); } /** * \brief Sorts segments of keys into descending order. (~N auxiliary storage required). * * \par * - The sorting operation is given a pair of key buffers managed by a * DoubleBuffer structure that indicates which of the two buffers is * "current" (and thus contains the input data to be sorted). * - The contents of both buffers may be altered by the sorting operation. * - Upon completion, the sorting operation will update the "current" indicator * within the DoubleBuffer wrapper to reference which of the two buffers * now contains the sorted output sequence (a function of the number of key bits * specified and the targeted device architecture). * - When input a contiguous sequence of segments, a single sequence * \p segment_offsets (of length num_segments+1) can be aliased * for both the \p d_begin_offsets and \p d_end_offsets parameters (where * the latter is specified as segment_offsets+1). * - An optional bit subrange [begin_bit, end_bit) of differentiating key bits can be specified. This can reduce overall sorting overhead and yield a corresponding performance improvement. * - \devicestorageP * - \devicestorage * * \par Snippet * The code snippet below illustrates the batched sorting of three segments (with one zero-length segment) of \p int keys. * \par * \code * #include // or equivalently * * // Declare, allocate, and initialize device-accessible pointers for sorting data * int num_items; // e.g., 7 * int num_segments; // e.g., 3 * int *d_offsets; // e.g., [0, 3, 3, 7] * int *d_key_buf; // e.g., [8, 6, 7, 5, 3, 0, 9] * int *d_key_alt_buf; // e.g., [-, -, -, -, -, -, -] * ... * * // Create a DoubleBuffer to wrap the pair of device pointers * cub::DoubleBuffer d_keys(d_key_buf, d_key_alt_buf); * * // Determine temporary device storage requirements * void *d_temp_storage = NULL; * size_t temp_storage_bytes = 0; * cub::DeviceSegmentedRadixSort::SortKeysDescending(d_temp_storage, temp_storage_bytes, d_keys, * num_items, num_segments, d_offsets, d_offsets + 1); * * // Allocate temporary storage * cudaMalloc(&d_temp_storage, temp_storage_bytes); * * // Run sorting operation * cub::DeviceSegmentedRadixSort::SortKeysDescending(d_temp_storage, temp_storage_bytes, d_keys, * num_items, num_segments, d_offsets, d_offsets + 1); * * // d_keys.Current() <-- [8, 7, 6, 9, 5, 3, 0] * * \endcode * * \tparam KeyT [inferred] Key type * \tparam OffsetIteratorT [inferred] Random-access input iterator type for reading segment offsets \iterator */ template < typename KeyT, typename OffsetIteratorT> CUB_RUNTIME_FUNCTION static cudaError_t SortKeysDescending( void *d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t &temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation DoubleBuffer &d_keys, ///< [in,out] Reference to the double-buffer of keys whose "current" device-accessible buffer contains the unsorted input keys and, upon return, is updated to point to the sorted output keys int num_items, ///< [in] The total number of items to sort (across all segments) int num_segments, ///< [in] The number of segments that comprise the sorting data OffsetIteratorT d_begin_offsets, ///< [in] Pointer to the sequence of beginning offsets of length \p num_segments, such that d_begin_offsets[i] is the first element of the ith data segment in d_keys_* and d_values_* OffsetIteratorT d_end_offsets, ///< [in] Pointer to the sequence of ending offsets of length \p num_segments, such that d_end_offsets[i]-1 is the last element of the ith data segment in d_keys_* and d_values_*. If d_end_offsets[i]-1 <= d_begin_offsets[i], the ith is considered empty. int begin_bit = 0, ///< [in] [optional] The least-significant bit index (inclusive) needed for key comparison int end_bit = sizeof(KeyT) * 8, ///< [in] [optional] The most-significant bit index (exclusive) needed for key comparison (e.g., sizeof(unsigned int) * 8) cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is \p false. { // Signed integer type for global offsets typedef int OffsetT; // Null value type DoubleBuffer d_values; return DispatchSegmentedRadixSort::Dispatch( d_temp_storage, temp_storage_bytes, d_keys, d_values, num_items, num_segments, d_begin_offsets, d_end_offsets, begin_bit, end_bit, true, stream, debug_synchronous); } //@} end member group }; } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/gpu_utils/cub/device/device_segmented_reduce.cuh000066400000000000000000001072531411340063500251470ustar00rootroot00000000000000 /****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * cub::DeviceSegmentedReduce provides device-wide, parallel operations for computing a batched reduction across multiple sequences of data items residing within device-accessible memory. */ #pragma once #include #include #include "../iterator/arg_index_input_iterator.cuh" #include "dispatch/dispatch_reduce.cuh" #include "dispatch/dispatch_reduce_by_key.cuh" #include "../util_type.cuh" #include "../util_namespace.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /** * \brief DeviceSegmentedReduce provides device-wide, parallel operations for computing a reduction across multiple sequences of data items residing within device-accessible memory. ![](reduce_logo.png) * \ingroup SegmentedModule * * \par Overview * A reduction (or fold) * uses a binary combining operator to compute a single aggregate from a sequence of input elements. * * \par Usage Considerations * \cdp_class{DeviceSegmentedReduce} * */ struct DeviceSegmentedReduce { /** * \brief Computes a device-wide segmented reduction using the specified binary \p reduction_op functor. * * \par * - Does not support binary reduction operators that are non-commutative. * - When input a contiguous sequence of segments, a single sequence * \p segment_offsets (of length num_segments+1) can be aliased * for both the \p d_begin_offsets and \p d_end_offsets parameters (where * the latter is specified as segment_offsets+1). * - \devicestorage * * \par Snippet * The code snippet below illustrates a custom min-reduction of a device vector of \p int data elements. * \par * \code * #include // or equivalently * * // CustomMin functor * struct CustomMin * { * template * CUB_RUNTIME_FUNCTION __forceinline__ * T operator()(const T &a, const T &b) const { * return (b < a) ? b : a; * } * }; * * // Declare, allocate, and initialize device-accessible pointers for input and output * int num_segments; // e.g., 3 * int *d_offsets; // e.g., [0, 3, 3, 7] * int *d_in; // e.g., [8, 6, 7, 5, 3, 0, 9] * int *d_out; // e.g., [-, -, -] * CustomMin min_op; * int initial_value; // e.g., INT_MAX * ... * * // Determine temporary device storage requirements * void *d_temp_storage = NULL; * size_t temp_storage_bytes = 0; * cub::DeviceSegmentedReduce::Reduce(d_temp_storage, temp_storage_bytes, d_in, d_out, * num_segments, d_offsets, d_offsets + 1, min_op, initial_value); * * // Allocate temporary storage * cudaMalloc(&d_temp_storage, temp_storage_bytes); * * // Run reduction * cub::DeviceSegmentedReduce::Reduce(d_temp_storage, temp_storage_bytes, d_in, d_out, * num_segments, d_offsets, d_offsets + 1, min_op, initial_value); * * // d_out <-- [6, INT_MAX, 0] * * \endcode * * \tparam InputIteratorT [inferred] Random-access input iterator type for reading input items \iterator * \tparam OutputIteratorT [inferred] Output iterator type for recording the reduced aggregate \iterator * \tparam OffsetIteratorT [inferred] Random-access input iterator type for reading segment offsets \iterator * \tparam ReductionOp [inferred] Binary reduction functor type having member T operator()(const T &a, const T &b) * \tparam T [inferred] Data element type that is convertible to the \p value type of \p InputIteratorT */ template < typename InputIteratorT, typename OutputIteratorT, typename OffsetIteratorT, typename ReductionOp, typename T> CUB_RUNTIME_FUNCTION static cudaError_t Reduce( void *d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t &temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation InputIteratorT d_in, ///< [in] Pointer to the input sequence of data items OutputIteratorT d_out, ///< [out] Pointer to the output aggregate int num_segments, ///< [in] The number of segments that comprise the sorting data OffsetIteratorT d_begin_offsets, ///< [in] Pointer to the sequence of beginning offsets of length \p num_segments, such that d_begin_offsets[i] is the first element of the ith data segment in d_keys_* and d_values_* OffsetIteratorT d_end_offsets, ///< [in] Pointer to the sequence of ending offsets of length \p num_segments, such that d_end_offsets[i]-1 is the last element of the ith data segment in d_keys_* and d_values_*. If d_end_offsets[i]-1 <= d_begin_offsets[i], the ith is considered empty. ReductionOp reduction_op, ///< [in] Binary reduction functor T initial_value, ///< [in] Initial value of the reduction for each segment cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is \p false. { // Signed integer type for global offsets typedef int OffsetT; return DispatchSegmentedReduce::Dispatch( d_temp_storage, temp_storage_bytes, d_in, d_out, num_segments, d_begin_offsets, d_end_offsets, reduction_op, initial_value, stream, debug_synchronous); } /** * \brief Computes a device-wide segmented sum using the addition ('+') operator. * * \par * - Uses \p 0 as the initial value of the reduction for each segment. * - When input a contiguous sequence of segments, a single sequence * \p segment_offsets (of length num_segments+1) can be aliased * for both the \p d_begin_offsets and \p d_end_offsets parameters (where * the latter is specified as segment_offsets+1). * - Does not support \p + operators that are non-commutative.. * - \devicestorage * * \par Snippet * The code snippet below illustrates the sum reduction of a device vector of \p int data elements. * \par * \code * #include // or equivalently * * // Declare, allocate, and initialize device-accessible pointers for input and output * int num_segments; // e.g., 3 * int *d_offsets; // e.g., [0, 3, 3, 7] * int *d_in; // e.g., [8, 6, 7, 5, 3, 0, 9] * int *d_out; // e.g., [-, -, -] * ... * * // Determine temporary device storage requirements * void *d_temp_storage = NULL; * size_t temp_storage_bytes = 0; * cub::DeviceSegmentedReduce::Sum(d_temp_storage, temp_storage_bytes, d_in, d_out, * num_segments, d_offsets, d_offsets + 1); * * // Allocate temporary storage * cudaMalloc(&d_temp_storage, temp_storage_bytes); * * // Run sum-reduction * cub::DeviceSegmentedReduce::Sum(d_temp_storage, temp_storage_bytes, d_in, d_out, * num_segments, d_offsets, d_offsets + 1); * * // d_out <-- [21, 0, 17] * * \endcode * * \tparam InputIteratorT [inferred] Random-access input iterator type for reading input items \iterator * \tparam OutputIteratorT [inferred] Output iterator type for recording the reduced aggregate \iterator * \tparam OffsetIteratorT [inferred] Random-access input iterator type for reading segment offsets \iterator */ template < typename InputIteratorT, typename OutputIteratorT, typename OffsetIteratorT> CUB_RUNTIME_FUNCTION static cudaError_t Sum( void *d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t &temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation InputIteratorT d_in, ///< [in] Pointer to the input sequence of data items OutputIteratorT d_out, ///< [out] Pointer to the output aggregate int num_segments, ///< [in] The number of segments that comprise the sorting data OffsetIteratorT d_begin_offsets, ///< [in] Pointer to the sequence of beginning offsets of length \p num_segments, such that d_begin_offsets[i] is the first element of the ith data segment in d_keys_* and d_values_* OffsetIteratorT d_end_offsets, ///< [in] Pointer to the sequence of ending offsets of length \p num_segments, such that d_end_offsets[i]-1 is the last element of the ith data segment in d_keys_* and d_values_*. If d_end_offsets[i]-1 <= d_begin_offsets[i], the ith is considered empty. cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is \p false. { // Signed integer type for global offsets typedef int OffsetT; // The output value type typedef typename If<(Equals::value_type, void>::VALUE), // OutputT = (if output iterator's value type is void) ? typename std::iterator_traits::value_type, // ... then the input iterator's value type, typename std::iterator_traits::value_type>::Type OutputT; // ... else the output iterator's value type return DispatchSegmentedReduce::Dispatch( d_temp_storage, temp_storage_bytes, d_in, d_out, num_segments, d_begin_offsets, d_end_offsets, cub::Sum(), OutputT(), // zero-initialize stream, debug_synchronous); } /** * \brief Computes a device-wide segmented minimum using the less-than ('<') operator. * * \par * - Uses std::numeric_limits::max() as the initial value of the reduction for each segment. * - When input a contiguous sequence of segments, a single sequence * \p segment_offsets (of length num_segments+1) can be aliased * for both the \p d_begin_offsets and \p d_end_offsets parameters (where * the latter is specified as segment_offsets+1). * - Does not support \p < operators that are non-commutative. * - \devicestorage * * \par Snippet * The code snippet below illustrates the min-reduction of a device vector of \p int data elements. * \par * \code * #include // or equivalently * * // Declare, allocate, and initialize device-accessible pointers for input and output * int num_segments; // e.g., 3 * int *d_offsets; // e.g., [0, 3, 3, 7] * int *d_in; // e.g., [8, 6, 7, 5, 3, 0, 9] * int *d_out; // e.g., [-, -, -] * ... * * // Determine temporary device storage requirements * void *d_temp_storage = NULL; * size_t temp_storage_bytes = 0; * cub::DeviceSegmentedReduce::Min(d_temp_storage, temp_storage_bytes, d_in, d_out, * num_segments, d_offsets, d_offsets + 1); * * // Allocate temporary storage * cudaMalloc(&d_temp_storage, temp_storage_bytes); * * // Run min-reduction * cub::DeviceSegmentedReduce::Min(d_temp_storage, temp_storage_bytes, d_in, d_out, * num_segments, d_offsets, d_offsets + 1); * * // d_out <-- [6, INT_MAX, 0] * * \endcode * * \tparam InputIteratorT [inferred] Random-access input iterator type for reading input items \iterator * \tparam OutputIteratorT [inferred] Output iterator type for recording the reduced aggregate \iterator * \tparam OffsetIteratorT [inferred] Random-access input iterator type for reading segment offsets \iterator */ template < typename InputIteratorT, typename OutputIteratorT, typename OffsetIteratorT> CUB_RUNTIME_FUNCTION static cudaError_t Min( void *d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t &temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation InputIteratorT d_in, ///< [in] Pointer to the input sequence of data items OutputIteratorT d_out, ///< [out] Pointer to the output aggregate int num_segments, ///< [in] The number of segments that comprise the sorting data OffsetIteratorT d_begin_offsets, ///< [in] Pointer to the sequence of beginning offsets of length \p num_segments, such that d_begin_offsets[i] is the first element of the ith data segment in d_keys_* and d_values_* OffsetIteratorT d_end_offsets, ///< [in] Pointer to the sequence of ending offsets of length \p num_segments, such that d_end_offsets[i]-1 is the last element of the ith data segment in d_keys_* and d_values_*. If d_end_offsets[i]-1 <= d_begin_offsets[i], the ith is considered empty. cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is \p false. { // Signed integer type for global offsets typedef int OffsetT; // The input value type typedef typename std::iterator_traits::value_type InputT; return DispatchSegmentedReduce::Dispatch( d_temp_storage, temp_storage_bytes, d_in, d_out, num_segments, d_begin_offsets, d_end_offsets, cub::Min(), Traits::Max(), // replace with std::numeric_limits::max() when C++11 support is more prevalent stream, debug_synchronous); } /** * \brief Finds the first device-wide minimum in each segment using the less-than ('<') operator, also returning the in-segment index of that item. * * \par * - The output value type of \p d_out is cub::KeyValuePair (assuming the value type of \p d_in is \p T) * - The minimum of the ith segment is written to d_out[i].value and its offset in that segment is written to d_out[i].key. * - The {1, std::numeric_limits::max()} tuple is produced for zero-length inputs * - When input a contiguous sequence of segments, a single sequence * \p segment_offsets (of length num_segments+1) can be aliased * for both the \p d_begin_offsets and \p d_end_offsets parameters (where * the latter is specified as segment_offsets+1). * - Does not support \p < operators that are non-commutative. * - \devicestorage * * \par Snippet * The code snippet below illustrates the argmin-reduction of a device vector of \p int data elements. * \par * \code * #include // or equivalently * * // Declare, allocate, and initialize device-accessible pointers for input and output * int num_segments; // e.g., 3 * int *d_offsets; // e.g., [0, 3, 3, 7] * int *d_in; // e.g., [8, 6, 7, 5, 3, 0, 9] * KeyValuePair *d_out; // e.g., [{-,-}, {-,-}, {-,-}] * ... * * // Determine temporary device storage requirements * void *d_temp_storage = NULL; * size_t temp_storage_bytes = 0; * cub::DeviceSegmentedReduce::ArgMin(d_temp_storage, temp_storage_bytes, d_in, d_out, * num_segments, d_offsets, d_offsets + 1); * * // Allocate temporary storage * cudaMalloc(&d_temp_storage, temp_storage_bytes); * * // Run argmin-reduction * cub::DeviceSegmentedReduce::ArgMin(d_temp_storage, temp_storage_bytes, d_in, d_out, * num_segments, d_offsets, d_offsets + 1); * * // d_out <-- [{1,6}, {1,INT_MAX}, {2,0}] * * \endcode * * \tparam InputIteratorT [inferred] Random-access input iterator type for reading input items (of some type \p T) \iterator * \tparam OutputIteratorT [inferred] Output iterator type for recording the reduced aggregate (having value type KeyValuePair) \iterator * \tparam OffsetIteratorT [inferred] Random-access input iterator type for reading segment offsets \iterator */ template < typename InputIteratorT, typename OutputIteratorT, typename OffsetIteratorT> CUB_RUNTIME_FUNCTION static cudaError_t ArgMin( void *d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t &temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation InputIteratorT d_in, ///< [in] Pointer to the input sequence of data items OutputIteratorT d_out, ///< [out] Pointer to the output aggregate int num_segments, ///< [in] The number of segments that comprise the sorting data OffsetIteratorT d_begin_offsets, ///< [in] Pointer to the sequence of beginning offsets of length \p num_segments, such that d_begin_offsets[i] is the first element of the ith data segment in d_keys_* and d_values_* OffsetIteratorT d_end_offsets, ///< [in] Pointer to the sequence of ending offsets of length \p num_segments, such that d_end_offsets[i]-1 is the last element of the ith data segment in d_keys_* and d_values_*. If d_end_offsets[i]-1 <= d_begin_offsets[i], the ith is considered empty. cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is \p false. { // Signed integer type for global offsets typedef int OffsetT; // The input type typedef typename std::iterator_traits::value_type InputValueT; // The output tuple type typedef typename If<(Equals::value_type, void>::VALUE), // OutputT = (if output iterator's value type is void) ? KeyValuePair, // ... then the key value pair OffsetT + InputValueT typename std::iterator_traits::value_type>::Type OutputTupleT; // ... else the output iterator's value type // The output value type typedef typename OutputTupleT::Value OutputValueT; // Wrapped input iterator to produce index-value tuples typedef ArgIndexInputIterator ArgIndexInputIteratorT; ArgIndexInputIteratorT d_indexed_in(d_in); // Initial value OutputTupleT initial_value(1, Traits::Max()); // replace with std::numeric_limits::max() when C++11 support is more prevalent return DispatchSegmentedReduce::Dispatch( d_temp_storage, temp_storage_bytes, d_indexed_in, d_out, num_segments, d_begin_offsets, d_end_offsets, cub::ArgMin(), initial_value, stream, debug_synchronous); } /** * \brief Computes a device-wide segmented maximum using the greater-than ('>') operator. * * \par * - Uses std::numeric_limits::lowest() as the initial value of the reduction. * - When input a contiguous sequence of segments, a single sequence * \p segment_offsets (of length num_segments+1) can be aliased * for both the \p d_begin_offsets and \p d_end_offsets parameters (where * the latter is specified as segment_offsets+1). * - Does not support \p > operators that are non-commutative. * - \devicestorage * * \par Snippet * The code snippet below illustrates the max-reduction of a device vector of \p int data elements. * \par * \code * #include // or equivalently * * // Declare, allocate, and initialize device-accessible pointers for input and output * int num_segments; // e.g., 3 * int *d_offsets; // e.g., [0, 3, 3, 7] * int *d_in; // e.g., [8, 6, 7, 5, 3, 0, 9] * int *d_out; // e.g., [-, -, -] * ... * * // Determine temporary device storage requirements * void *d_temp_storage = NULL; * size_t temp_storage_bytes = 0; * cub::DeviceSegmentedReduce::Max(d_temp_storage, temp_storage_bytes, d_in, d_out, * num_segments, d_offsets, d_offsets + 1); * * // Allocate temporary storage * cudaMalloc(&d_temp_storage, temp_storage_bytes); * * // Run max-reduction * cub::DeviceSegmentedReduce::Max(d_temp_storage, temp_storage_bytes, d_in, d_out, * num_segments, d_offsets, d_offsets + 1); * * // d_out <-- [8, INT_MIN, 9] * * \endcode * * \tparam InputIteratorT [inferred] Random-access input iterator type for reading input items \iterator * \tparam OutputIteratorT [inferred] Output iterator type for recording the reduced aggregate \iterator * \tparam OffsetIteratorT [inferred] Random-access input iterator type for reading segment offsets \iterator */ template < typename InputIteratorT, typename OutputIteratorT, typename OffsetIteratorT> CUB_RUNTIME_FUNCTION static cudaError_t Max( void *d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t &temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation InputIteratorT d_in, ///< [in] Pointer to the input sequence of data items OutputIteratorT d_out, ///< [out] Pointer to the output aggregate int num_segments, ///< [in] The number of segments that comprise the sorting data OffsetIteratorT d_begin_offsets, ///< [in] Pointer to the sequence of beginning offsets of length \p num_segments, such that d_begin_offsets[i] is the first element of the ith data segment in d_keys_* and d_values_* OffsetIteratorT d_end_offsets, ///< [in] Pointer to the sequence of ending offsets of length \p num_segments, such that d_end_offsets[i]-1 is the last element of the ith data segment in d_keys_* and d_values_*. If d_end_offsets[i]-1 <= d_begin_offsets[i], the ith is considered empty. cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is \p false. { // Signed integer type for global offsets typedef int OffsetT; // The input value type typedef typename std::iterator_traits::value_type InputT; return DispatchSegmentedReduce::Dispatch( d_temp_storage, temp_storage_bytes, d_in, d_out, num_segments, d_begin_offsets, d_end_offsets, cub::Max(), Traits::Lowest(), // replace with std::numeric_limits::lowest() when C++11 support is more prevalent stream, debug_synchronous); } /** * \brief Finds the first device-wide maximum in each segment using the greater-than ('>') operator, also returning the in-segment index of that item * * \par * - The output value type of \p d_out is cub::KeyValuePair (assuming the value type of \p d_in is \p T) * - The maximum of the ith segment is written to d_out[i].value and its offset in that segment is written to d_out[i].key. * - The {1, std::numeric_limits::lowest()} tuple is produced for zero-length inputs * - When input a contiguous sequence of segments, a single sequence * \p segment_offsets (of length num_segments+1) can be aliased * for both the \p d_begin_offsets and \p d_end_offsets parameters (where * the latter is specified as segment_offsets+1). * - Does not support \p > operators that are non-commutative. * - \devicestorage * * \par Snippet * The code snippet below illustrates the argmax-reduction of a device vector of \p int data elements. * \par * \code * #include // or equivalently * * // Declare, allocate, and initialize device-accessible pointers for input and output * int num_segments; // e.g., 3 * int *d_offsets; // e.g., [0, 3, 3, 7] * int *d_in; // e.g., [8, 6, 7, 5, 3, 0, 9] * KeyValuePair *d_out; // e.g., [{-,-}, {-,-}, {-,-}] * ... * * // Determine temporary device storage requirements * void *d_temp_storage = NULL; * size_t temp_storage_bytes = 0; * cub::DeviceSegmentedReduce::ArgMax(d_temp_storage, temp_storage_bytes, d_in, d_out, * num_segments, d_offsets, d_offsets + 1); * * // Allocate temporary storage * cudaMalloc(&d_temp_storage, temp_storage_bytes); * * // Run argmax-reduction * cub::DeviceSegmentedReduce::ArgMax(d_temp_storage, temp_storage_bytes, d_in, d_out, * num_segments, d_offsets, d_offsets + 1); * * // d_out <-- [{0,8}, {1,INT_MIN}, {3,9}] * * \endcode * * \tparam InputIteratorT [inferred] Random-access input iterator type for reading input items (of some type \p T) \iterator * \tparam OutputIteratorT [inferred] Output iterator type for recording the reduced aggregate (having value type KeyValuePair) \iterator * \tparam OffsetIteratorT [inferred] Random-access input iterator type for reading segment offsets \iterator */ template < typename InputIteratorT, typename OutputIteratorT, typename OffsetIteratorT> CUB_RUNTIME_FUNCTION static cudaError_t ArgMax( void *d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t &temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation InputIteratorT d_in, ///< [in] Pointer to the input sequence of data items OutputIteratorT d_out, ///< [out] Pointer to the output aggregate int num_segments, ///< [in] The number of segments that comprise the sorting data OffsetIteratorT d_begin_offsets, ///< [in] Pointer to the sequence of beginning offsets of length \p num_segments, such that d_begin_offsets[i] is the first element of the ith data segment in d_keys_* and d_values_* OffsetIteratorT d_end_offsets, ///< [in] Pointer to the sequence of ending offsets of length \p num_segments, such that d_end_offsets[i]-1 is the last element of the ith data segment in d_keys_* and d_values_*. If d_end_offsets[i]-1 <= d_begin_offsets[i], the ith is considered empty. cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is \p false. { // Signed integer type for global offsets typedef int OffsetT; // The input type typedef typename std::iterator_traits::value_type InputValueT; // The output tuple type typedef typename If<(Equals::value_type, void>::VALUE), // OutputT = (if output iterator's value type is void) ? KeyValuePair, // ... then the key value pair OffsetT + InputValueT typename std::iterator_traits::value_type>::Type OutputTupleT; // ... else the output iterator's value type // The output value type typedef typename OutputTupleT::Value OutputValueT; // Wrapped input iterator to produce index-value tuples typedef ArgIndexInputIterator ArgIndexInputIteratorT; ArgIndexInputIteratorT d_indexed_in(d_in); // Initial value OutputTupleT initial_value(1, Traits::Lowest()); // replace with std::numeric_limits::lowest() when C++11 support is more prevalent return DispatchSegmentedReduce::Dispatch( d_temp_storage, temp_storage_bytes, d_indexed_in, d_out, num_segments, d_begin_offsets, d_end_offsets, cub::ArgMax(), initial_value, stream, debug_synchronous); } }; } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/gpu_utils/cub/device/device_select.cuh000066400000000000000000000446261411340063500231300ustar00rootroot00000000000000 /****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * cub::DeviceSelect provides device-wide, parallel operations for compacting selected items from sequences of data items residing within device-accessible memory. */ #pragma once #include #include #include "dispatch/dispatch_select_if.cuh" #include "../util_namespace.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /** * \brief DeviceSelect provides device-wide, parallel operations for compacting selected items from sequences of data items residing within device-accessible memory. ![](select_logo.png) * \ingroup SingleModule * * \par Overview * These operations apply a selection criterion to selectively copy * items from a specified input sequence to a compact output sequence. * * \par Usage Considerations * \cdp_class{DeviceSelect} * * \par Performance * \linear_performance{select-flagged, select-if, and select-unique} * * \par * The following chart illustrates DeviceSelect::If * performance across different CUDA architectures for \p int32 items, * where 50% of the items are randomly selected. * * \image html select_if_int32_50_percent.png * * \par * The following chart illustrates DeviceSelect::Unique * performance across different CUDA architectures for \p int32 items * where segments have lengths uniformly sampled from [1,1000]. * * \image html select_unique_int32_len_500.png * * \par * \plots_below * */ struct DeviceSelect { /** * \brief Uses the \p d_flags sequence to selectively copy the corresponding items from \p d_in into \p d_out. The total number of items selected is written to \p d_num_selected_out. ![](select_flags_logo.png) * * \par * - The value type of \p d_flags must be castable to \p bool (e.g., \p bool, \p char, \p int, etc.). * - Copies of the selected items are compacted into \p d_out and maintain their original relative ordering. * - \devicestorage * * \par Snippet * The code snippet below illustrates the compaction of items selected from an \p int device vector. * \par * \code * #include // or equivalently * * // Declare, allocate, and initialize device-accessible pointers for input, flags, and output * int num_items; // e.g., 8 * int *d_in; // e.g., [1, 2, 3, 4, 5, 6, 7, 8] * char *d_flags; // e.g., [1, 0, 0, 1, 0, 1, 1, 0] * int *d_out; // e.g., [ , , , , , , , ] * int *d_num_selected_out; // e.g., [ ] * ... * * // Determine temporary device storage requirements * void *d_temp_storage = NULL; * size_t temp_storage_bytes = 0; * cub::DeviceSelect::Flagged(d_temp_storage, temp_storage_bytes, d_in, d_flags, d_out, d_num_selected_out, num_items); * * // Allocate temporary storage * cudaMalloc(&d_temp_storage, temp_storage_bytes); * * // Run selection * cub::DeviceSelect::Flagged(d_temp_storage, temp_storage_bytes, d_in, d_flags, d_out, d_num_selected_out, num_items); * * // d_out <-- [1, 4, 6, 7] * // d_num_selected_out <-- [4] * * \endcode * * \tparam InputIteratorT [inferred] Random-access input iterator type for reading input items \iterator * \tparam FlagIterator [inferred] Random-access input iterator type for reading selection flags \iterator * \tparam OutputIteratorT [inferred] Random-access output iterator type for writing selected items \iterator * \tparam NumSelectedIteratorT [inferred] Output iterator type for recording the number of items selected \iterator */ template < typename InputIteratorT, typename FlagIterator, typename OutputIteratorT, typename NumSelectedIteratorT> CUB_RUNTIME_FUNCTION __forceinline__ static cudaError_t Flagged( void* d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t &temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation InputIteratorT d_in, ///< [in] Pointer to the input sequence of data items FlagIterator d_flags, ///< [in] Pointer to the input sequence of selection flags OutputIteratorT d_out, ///< [out] Pointer to the output sequence of selected data items NumSelectedIteratorT d_num_selected_out, ///< [out] Pointer to the output total number of items selected (i.e., length of \p d_out) int num_items, ///< [in] Total number of input items (i.e., length of \p d_in) cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. May cause significant slowdown. Default is \p false. { typedef int OffsetT; // Signed integer type for global offsets typedef NullType SelectOp; // Selection op (not used) typedef NullType EqualityOp; // Equality operator (not used) return DispatchSelectIf::Dispatch( d_temp_storage, temp_storage_bytes, d_in, d_flags, d_out, d_num_selected_out, SelectOp(), EqualityOp(), num_items, stream, debug_synchronous); } /** * \brief Uses the \p select_op functor to selectively copy items from \p d_in into \p d_out. The total number of items selected is written to \p d_num_selected_out. ![](select_logo.png) * * \par * - Copies of the selected items are compacted into \p d_out and maintain their original relative ordering. * - \devicestorage * * \par Performance * The following charts illustrate saturated select-if performance across different * CUDA architectures for \p int32 and \p int64 items, respectively. Items are * selected with 50% probability. * * \image html select_if_int32_50_percent.png * \image html select_if_int64_50_percent.png * * \par * The following charts are similar, but 5% selection probability: * * \image html select_if_int32_5_percent.png * \image html select_if_int64_5_percent.png * * \par Snippet * The code snippet below illustrates the compaction of items selected from an \p int device vector. * \par * \code * #include // or equivalently * * // Functor type for selecting values less than some criteria * struct LessThan * { * int compare; * * CUB_RUNTIME_FUNCTION __forceinline__ * LessThan(int compare) : compare(compare) {} * * CUB_RUNTIME_FUNCTION __forceinline__ * bool operator()(const int &a) const { * return (a < compare); * } * }; * * // Declare, allocate, and initialize device-accessible pointers for input and output * int num_items; // e.g., 8 * int *d_in; // e.g., [0, 2, 3, 9, 5, 2, 81, 8] * int *d_out; // e.g., [ , , , , , , , ] * int *d_num_selected_out; // e.g., [ ] * LessThan select_op(7); * ... * * // Determine temporary device storage requirements * void *d_temp_storage = NULL; * size_t temp_storage_bytes = 0; * cub::DeviceSelect::If(d_temp_storage, temp_storage_bytes, d_in, d_out, d_num_selected_out, num_items, select_op); * * // Allocate temporary storage * cudaMalloc(&d_temp_storage, temp_storage_bytes); * * // Run selection * cub::DeviceSelect::If(d_temp_storage, temp_storage_bytes, d_in, d_out, d_num_selected_out, num_items, select_op); * * // d_out <-- [0, 2, 3, 5, 2] * // d_num_selected_out <-- [5] * * \endcode * * \tparam InputIteratorT [inferred] Random-access input iterator type for reading input items \iterator * \tparam OutputIteratorT [inferred] Random-access output iterator type for writing selected items \iterator * \tparam NumSelectedIteratorT [inferred] Output iterator type for recording the number of items selected \iterator * \tparam SelectOp [inferred] Selection operator type having member bool operator()(const T &a) */ template < typename InputIteratorT, typename OutputIteratorT, typename NumSelectedIteratorT, typename SelectOp> CUB_RUNTIME_FUNCTION __forceinline__ static cudaError_t If( void* d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t &temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation InputIteratorT d_in, ///< [in] Pointer to the input sequence of data items OutputIteratorT d_out, ///< [out] Pointer to the output sequence of selected data items NumSelectedIteratorT d_num_selected_out, ///< [out] Pointer to the output total number of items selected (i.e., length of \p d_out) int num_items, ///< [in] Total number of input items (i.e., length of \p d_in) SelectOp select_op, ///< [in] Unary selection operator cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. May cause significant slowdown. Default is \p false. { typedef int OffsetT; // Signed integer type for global offsets typedef NullType* FlagIterator; // FlagT iterator type (not used) typedef NullType EqualityOp; // Equality operator (not used) return DispatchSelectIf::Dispatch( d_temp_storage, temp_storage_bytes, d_in, NULL, d_out, d_num_selected_out, select_op, EqualityOp(), num_items, stream, debug_synchronous); } /** * \brief Given an input sequence \p d_in having runs of consecutive equal-valued keys, only the first key from each run is selectively copied to \p d_out. The total number of items selected is written to \p d_num_selected_out. ![](unique_logo.png) * * \par * - The == equality operator is used to determine whether keys are equivalent * - Copies of the selected items are compacted into \p d_out and maintain their original relative ordering. * - \devicestorage * * \par Performance * The following charts illustrate saturated select-unique performance across different * CUDA architectures for \p int32 and \p int64 items, respectively. Segments have * lengths uniformly sampled from [1,1000]. * * \image html select_unique_int32_len_500.png * \image html select_unique_int64_len_500.png * * \par * The following charts are similar, but with segment lengths uniformly sampled from [1,10]: * * \image html select_unique_int32_len_5.png * \image html select_unique_int64_len_5.png * * \par Snippet * The code snippet below illustrates the compaction of items selected from an \p int device vector. * \par * \code * #include // or equivalently * * // Declare, allocate, and initialize device-accessible pointers for input and output * int num_items; // e.g., 8 * int *d_in; // e.g., [0, 2, 2, 9, 5, 5, 5, 8] * int *d_out; // e.g., [ , , , , , , , ] * int *d_num_selected_out; // e.g., [ ] * ... * * // Determine temporary device storage requirements * void *d_temp_storage = NULL; * size_t temp_storage_bytes = 0; * cub::DeviceSelect::Unique(d_temp_storage, temp_storage_bytes, d_in, d_out, d_num_selected_out, num_items); * * // Allocate temporary storage * cudaMalloc(&d_temp_storage, temp_storage_bytes); * * // Run selection * cub::DeviceSelect::Unique(d_temp_storage, temp_storage_bytes, d_in, d_out, d_num_selected_out, num_items); * * // d_out <-- [0, 2, 9, 5, 8] * // d_num_selected_out <-- [5] * * \endcode * * \tparam InputIteratorT [inferred] Random-access input iterator type for reading input items \iterator * \tparam OutputIteratorT [inferred] Random-access output iterator type for writing selected items \iterator * \tparam NumSelectedIteratorT [inferred] Output iterator type for recording the number of items selected \iterator */ template < typename InputIteratorT, typename OutputIteratorT, typename NumSelectedIteratorT> CUB_RUNTIME_FUNCTION __forceinline__ static cudaError_t Unique( void* d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t &temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation InputIteratorT d_in, ///< [in] Pointer to the input sequence of data items OutputIteratorT d_out, ///< [out] Pointer to the output sequence of selected data items NumSelectedIteratorT d_num_selected_out, ///< [out] Pointer to the output total number of items selected (i.e., length of \p d_out) int num_items, ///< [in] Total number of input items (i.e., length of \p d_in) cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. May cause significant slowdown. Default is \p false. { typedef int OffsetT; // Signed integer type for global offsets typedef NullType* FlagIterator; // FlagT iterator type (not used) typedef NullType SelectOp; // Selection op (not used) typedef Equality EqualityOp; // Default == operator return DispatchSelectIf::Dispatch( d_temp_storage, temp_storage_bytes, d_in, NULL, d_out, d_num_selected_out, SelectOp(), EqualityOp(), num_items, stream, debug_synchronous); } }; /** * \example example_device_select_flagged.cu * \example example_device_select_if.cu * \example example_device_select_unique.cu */ } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/gpu_utils/cub/device/device_spmv.cuh000066400000000000000000000205131411340063500226230ustar00rootroot00000000000000 /****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * cub::DeviceSpmv provides device-wide parallel operations for performing sparse-matrix * vector multiplication (SpMV). */ #pragma once #include #include #include #include "dispatch/dispatch_spmv_orig.cuh" #include "../util_namespace.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /** * \brief DeviceSpmv provides device-wide parallel operations for performing sparse-matrix * dense-vector multiplication (SpMV). * \ingroup SingleModule * * \par Overview * The [SpMV computation](http://en.wikipedia.org/wiki/Sparse_matrix-vector_multiplication) * performs the matrix-vector operation * y = alpha*A*x + beta*y, * where: * - A is an mxn sparse matrix whose non-zero structure is specified in * [compressed-storage-row (CSR) format](http://en.wikipedia.org/wiki/Sparse_matrix#Compressed_row_Storage_.28CRS_or_CSR.29) * (i.e., three arrays: values, row_offsets, and column_indices) * - x and y are dense vectors * - alpha and beta are scalar multiplicands * * \par Usage Considerations * \cdp_class{DeviceSpmv} * */ struct DeviceSpmv { /******************************************************************//** * \name CSR matrix operations *********************************************************************/ //@{ /** * \brief This function performs the matrix-vector operation y = A*x. * * \par Snippet * The code snippet below illustrates SpMV upon a 9x9 CSR matrix A * representing a 3x3 lattice (24 non-zeros). * * \par * \code * #include // or equivalently * * // Declare, allocate, and initialize device-accessible pointers for input matrix A, input vector x, * // and output vector y * int num_rows = 9; * int num_cols = 9; * int num_nonzeros = 24; * * float* d_values; // e.g., [1, 1, 1, 1, 1, 1, 1, 1, * // 1, 1, 1, 1, 1, 1, 1, 1, * // 1, 1, 1, 1, 1, 1, 1, 1] * * int* d_column_indices; // e.g., [1, 3, 0, 2, 4, 1, 5, 0, * // 4, 6, 1, 3, 5, 7, 2, 4, * // 8, 3, 7, 4, 6, 8, 5, 7] * * int* d_row_offsets; // e.g., [0, 2, 5, 7, 10, 14, 17, 19, 22, 24] * * float* d_vector_x; // e.g., [1, 1, 1, 1, 1, 1, 1, 1, 1] * float* d_vector_y; // e.g., [ , , , , , , , , ] * ... * * // Determine temporary device storage requirements * void* d_temp_storage = NULL; * size_t temp_storage_bytes = 0; * cub::DeviceSpmv::CsrMV(d_temp_storage, temp_storage_bytes, d_values, * d_row_offsets, d_column_indices, d_vector_x, d_vector_y, * num_rows, num_cols, num_nonzeros, alpha, beta); * * // Allocate temporary storage * cudaMalloc(&d_temp_storage, temp_storage_bytes); * * // Run SpMV * cub::DeviceSpmv::CsrMV(d_temp_storage, temp_storage_bytes, d_values, * d_row_offsets, d_column_indices, d_vector_x, d_vector_y, * num_rows, num_cols, num_nonzeros, alpha, beta); * * // d_vector_y <-- [2, 3, 2, 3, 4, 3, 2, 3, 2] * * \endcode * * \tparam ValueT [inferred] Matrix and vector value type (e.g., /p float, /p double, etc.) */ template < typename ValueT> CUB_RUNTIME_FUNCTION static cudaError_t CsrMV( void* d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t& temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation ValueT* d_values, ///< [in] Pointer to the array of \p num_nonzeros values of the corresponding nonzero elements of matrix A. int* d_row_offsets, ///< [in] Pointer to the array of \p m + 1 offsets demarcating the start of every row in \p d_column_indices and \p d_values (with the final entry being equal to \p num_nonzeros) int* d_column_indices, ///< [in] Pointer to the array of \p num_nonzeros column-indices of the corresponding nonzero elements of matrix A. (Indices are zero-valued.) ValueT* d_vector_x, ///< [in] Pointer to the array of \p num_cols values corresponding to the dense input vector x ValueT* d_vector_y, ///< [out] Pointer to the array of \p num_rows values corresponding to the dense output vector y int num_rows, ///< [in] number of rows of matrix A. int num_cols, ///< [in] number of columns of matrix A. int num_nonzeros, ///< [in] number of nonzero elements of matrix A. cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. May cause significant slowdown. Default is \p false. { SpmvParams spmv_params; spmv_params.d_values = d_values; spmv_params.d_row_end_offsets = d_row_offsets + 1; spmv_params.d_column_indices = d_column_indices; spmv_params.d_vector_x = d_vector_x; spmv_params.d_vector_y = d_vector_y; spmv_params.num_rows = num_rows; spmv_params.num_cols = num_cols; spmv_params.num_nonzeros = num_nonzeros; spmv_params.alpha = 1.0; spmv_params.beta = 0.0; return DispatchSpmv::Dispatch( d_temp_storage, temp_storage_bytes, spmv_params, stream, debug_synchronous); } //@} end member group }; } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/gpu_utils/cub/device/dispatch/000077500000000000000000000000001411340063500214145ustar00rootroot00000000000000relion-3.1.3/src/gpu_utils/cub/device/dispatch/dispatch_histogram.cuh000066400000000000000000001576121411340063500260050ustar00rootroot00000000000000 /****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * cub::DeviceHistogram provides device-wide parallel operations for constructing histogram(s) from a sequence of samples data residing within device-accessible memory. */ #pragma once #include #include #include #include "../../agent/agent_histogram.cuh" #include "../../util_debug.cuh" #include "../../util_device.cuh" #include "../../thread/thread_search.cuh" #include "../../grid/grid_queue.cuh" #include "../../util_namespace.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /****************************************************************************** * Histogram kernel entry points *****************************************************************************/ /** * Histogram initialization kernel entry point */ template < int NUM_ACTIVE_CHANNELS, ///< Number of channels actively being histogrammed typename CounterT, ///< Integer type for counting sample occurrences per histogram bin typename OffsetT> ///< Signed integer type for global offsets __global__ void DeviceHistogramInitKernel( ArrayWrapper num_output_bins_wrapper, ///< Number of output histogram bins per channel ArrayWrapper d_output_histograms_wrapper, ///< Histogram counter data having logical dimensions CounterT[NUM_ACTIVE_CHANNELS][num_bins.array[CHANNEL]] GridQueue tile_queue) ///< Drain queue descriptor for dynamically mapping tile data onto thread blocks { if ((threadIdx.x == 0) && (blockIdx.x == 0)) tile_queue.ResetDrain(); int output_bin = (blockIdx.x * blockDim.x) + threadIdx.x; #pragma unroll for (int CHANNEL = 0; CHANNEL < NUM_ACTIVE_CHANNELS; ++CHANNEL) { if (output_bin < num_output_bins_wrapper.array[CHANNEL]) d_output_histograms_wrapper.array[CHANNEL][output_bin] = 0; } } /** * Histogram privatized sweep kernel entry point (multi-block). Computes privatized histograms, one per thread block. */ template < typename AgentHistogramPolicyT, ///< Parameterized AgentHistogramPolicy tuning policy type int PRIVATIZED_SMEM_BINS, ///< Maximum number of histogram bins per channel (e.g., up to 256) int NUM_CHANNELS, ///< Number of channels interleaved in the input data (may be greater than the number of channels being actively histogrammed) int NUM_ACTIVE_CHANNELS, ///< Number of channels actively being histogrammed typename SampleIteratorT, ///< The input iterator type. \iterator. typename CounterT, ///< Integer type for counting sample occurrences per histogram bin typename PrivatizedDecodeOpT, ///< The transform operator type for determining privatized counter indices from samples, one for each channel typename OutputDecodeOpT, ///< The transform operator type for determining output bin-ids from privatized counter indices, one for each channel typename OffsetT> ///< Signed integer type for global offsets __launch_bounds__ (int(AgentHistogramPolicyT::BLOCK_THREADS)) __global__ void DeviceHistogramSweepKernel( SampleIteratorT d_samples, ///< Input data to reduce ArrayWrapper num_output_bins_wrapper, ///< The number bins per final output histogram ArrayWrapper num_privatized_bins_wrapper, ///< The number bins per privatized histogram ArrayWrapper d_output_histograms_wrapper, ///< Reference to final output histograms ArrayWrapper d_privatized_histograms_wrapper, ///< Reference to privatized histograms ArrayWrapper output_decode_op_wrapper, ///< The transform operator for determining output bin-ids from privatized counter indices, one for each channel ArrayWrapper privatized_decode_op_wrapper, ///< The transform operator for determining privatized counter indices from samples, one for each channel OffsetT num_row_pixels, ///< The number of multi-channel pixels per row in the region of interest OffsetT num_rows, ///< The number of rows in the region of interest OffsetT row_stride_samples, ///< The number of samples between starts of consecutive rows in the region of interest int tiles_per_row, ///< Number of image tiles per row GridQueue tile_queue) ///< Drain queue descriptor for dynamically mapping tile data onto thread blocks { // Thread block type for compositing input tiles typedef AgentHistogram< AgentHistogramPolicyT, PRIVATIZED_SMEM_BINS, NUM_CHANNELS, NUM_ACTIVE_CHANNELS, SampleIteratorT, CounterT, PrivatizedDecodeOpT, OutputDecodeOpT, OffsetT> AgentHistogramT; // Shared memory for AgentHistogram __shared__ typename AgentHistogramT::TempStorage temp_storage; AgentHistogramT agent( temp_storage, d_samples, num_output_bins_wrapper.array, num_privatized_bins_wrapper.array, d_output_histograms_wrapper.array, d_privatized_histograms_wrapper.array, output_decode_op_wrapper.array, privatized_decode_op_wrapper.array); // Initialize counters agent.InitBinCounters(); // Consume input tiles agent.ConsumeTiles( num_row_pixels, num_rows, row_stride_samples, tiles_per_row, tile_queue); // Store output to global (if necessary) agent.StoreOutput(); } /****************************************************************************** * Dispatch ******************************************************************************/ /** * Utility class for dispatching the appropriately-tuned kernels for DeviceHistogram */ template < int NUM_CHANNELS, ///< Number of channels interleaved in the input data (may be greater than the number of channels being actively histogrammed) int NUM_ACTIVE_CHANNELS, ///< Number of channels actively being histogrammed typename SampleIteratorT, ///< Random-access input iterator type for reading input items \iterator typename CounterT, ///< Integer type for counting sample occurrences per histogram bin typename LevelT, ///< Type for specifying bin level boundaries typename OffsetT> ///< Signed integer type for global offsets struct DipatchHistogram { //--------------------------------------------------------------------- // Types and constants //--------------------------------------------------------------------- /// The sample value type of the input iterator typedef typename std::iterator_traits::value_type SampleT; enum { // Maximum number of bins per channel for which we will use a privatized smem strategy MAX_PRIVATIZED_SMEM_BINS = 256 }; //--------------------------------------------------------------------- // Transform functors for converting samples to bin-ids //--------------------------------------------------------------------- // Searches for bin given a list of bin-boundary levels template struct SearchTransform { LevelIteratorT d_levels; // Pointer to levels array int num_output_levels; // Number of levels in array // Initializer __host__ __device__ __forceinline__ void Init( LevelIteratorT d_levels, // Pointer to levels array int num_output_levels) // Number of levels in array { this->d_levels = d_levels; this->num_output_levels = num_output_levels; } // Method for converting samples to bin-ids template __host__ __device__ __forceinline__ void BinSelect(_SampleT sample, int &bin, bool valid) { /// Level iterator wrapper type typedef typename If::VALUE, CacheModifiedInputIterator, // Wrap the native input pointer with CacheModifiedInputIterator LevelIteratorT>::Type // Directly use the supplied input iterator type WrappedLevelIteratorT; WrappedLevelIteratorT wrapped_levels(d_levels); int num_bins = num_output_levels - 1; if (valid) { bin = UpperBound(wrapped_levels, num_output_levels, (LevelT) sample) - 1; if (bin >= num_bins) bin = -1; } } }; // Scales samples to evenly-spaced bins struct ScaleTransform { int num_bins; // Number of levels in array LevelT max; // Max sample level (exclusive) LevelT min; // Min sample level (inclusive) LevelT scale; // Bin scaling factor // Initializer template __host__ __device__ __forceinline__ void Init( int num_output_levels, // Number of levels in array _LevelT max, // Max sample level (exclusive) _LevelT min, // Min sample level (inclusive) _LevelT scale) // Bin scaling factor { this->num_bins = num_output_levels - 1; this->max = max; this->min = min; this->scale = scale; } // Initializer (float specialization) __host__ __device__ __forceinline__ void Init( int num_output_levels, // Number of levels in array float max, // Max sample level (exclusive) float min, // Min sample level (inclusive) float scale) // Bin scaling factor { this->num_bins = num_output_levels - 1; this->max = max; this->min = min; this->scale = float(1.0) / scale; } // Initializer (double specialization) __host__ __device__ __forceinline__ void Init( int num_output_levels, // Number of levels in array double max, // Max sample level (exclusive) double min, // Min sample level (inclusive) double scale) // Bin scaling factor { this->num_bins = num_output_levels - 1; this->max = max; this->min = min; this->scale = double(1.0) / scale; } // Method for converting samples to bin-ids template __host__ __device__ __forceinline__ void BinSelect(_SampleT sample, int &bin, bool valid) { LevelT level_sample = (LevelT) sample; if (valid && (level_sample >= min) && (level_sample < max)) bin = (int) ((level_sample - min) / scale); } // Method for converting samples to bin-ids (float specialization) template __host__ __device__ __forceinline__ void BinSelect(float sample, int &bin, bool valid) { LevelT level_sample = (LevelT) sample; if (valid && (level_sample >= min) && (level_sample < max)) bin = (int) ((level_sample - min) * scale); } // Method for converting samples to bin-ids (double specialization) template __host__ __device__ __forceinline__ void BinSelect(double sample, int &bin, bool valid) { LevelT level_sample = (LevelT) sample; if (valid && (level_sample >= min) && (level_sample < max)) bin = (int) ((level_sample - min) * scale); } }; // Pass-through bin transform operator struct PassThruTransform { // Method for converting samples to bin-ids template __host__ __device__ __forceinline__ void BinSelect(_SampleT sample, int &bin, bool valid) { if (valid) bin = (int) sample; } }; //--------------------------------------------------------------------- // Tuning policies //--------------------------------------------------------------------- template struct TScale { enum { V_SCALE = (sizeof(SampleT) + sizeof(int) - 1) / sizeof(int), VALUE = CUB_MAX((NOMINAL_ITEMS_PER_THREAD / NUM_ACTIVE_CHANNELS / V_SCALE), 1) }; }; /// SM11 struct Policy110 { // HistogramSweepPolicy typedef AgentHistogramPolicy< 512, (NUM_CHANNELS == 1) ? 8 : 2, BLOCK_LOAD_DIRECT, LOAD_DEFAULT, true, GMEM, false> HistogramSweepPolicy; }; /// SM20 struct Policy200 { // HistogramSweepPolicy typedef AgentHistogramPolicy< (NUM_CHANNELS == 1) ? 256 : 128, (NUM_CHANNELS == 1) ? 8 : 3, (NUM_CHANNELS == 1) ? BLOCK_LOAD_DIRECT : BLOCK_LOAD_WARP_TRANSPOSE, LOAD_DEFAULT, true, SMEM, false> HistogramSweepPolicy; }; /// SM30 struct Policy300 { // HistogramSweepPolicy typedef AgentHistogramPolicy< 512, (NUM_CHANNELS == 1) ? 8 : 2, BLOCK_LOAD_DIRECT, LOAD_DEFAULT, true, GMEM, false> HistogramSweepPolicy; }; /// SM35 struct Policy350 { // HistogramSweepPolicy typedef AgentHistogramPolicy< 128, TScale<8>::VALUE, BLOCK_LOAD_DIRECT, LOAD_LDG, true, BLEND, true> HistogramSweepPolicy; }; /// SM50 struct Policy500 { // HistogramSweepPolicy typedef AgentHistogramPolicy< 384, TScale<16>::VALUE, BLOCK_LOAD_DIRECT, LOAD_LDG, true, SMEM, false> HistogramSweepPolicy; }; //--------------------------------------------------------------------- // Tuning policies of current PTX compiler pass //--------------------------------------------------------------------- #if (CUB_PTX_ARCH >= 500) typedef Policy500 PtxPolicy; #elif (CUB_PTX_ARCH >= 350) typedef Policy350 PtxPolicy; #elif (CUB_PTX_ARCH >= 300) typedef Policy300 PtxPolicy; #elif (CUB_PTX_ARCH >= 200) typedef Policy200 PtxPolicy; #else typedef Policy110 PtxPolicy; #endif // "Opaque" policies (whose parameterizations aren't reflected in the type signature) struct PtxHistogramSweepPolicy : PtxPolicy::HistogramSweepPolicy {}; //--------------------------------------------------------------------- // Utilities //--------------------------------------------------------------------- /** * Initialize kernel dispatch configurations with the policies corresponding to the PTX assembly we will use */ template CUB_RUNTIME_FUNCTION __forceinline__ static cudaError_t InitConfigs( int ptx_version, KernelConfig &histogram_sweep_config) { #if (CUB_PTX_ARCH > 0) // We're on the device, so initialize the kernel dispatch configurations with the current PTX policy return histogram_sweep_config.template Init(); #else // We're on the host, so lookup and initialize the kernel dispatch configurations with the policies that match the device's PTX version if (ptx_version >= 500) { return histogram_sweep_config.template Init(); } else if (ptx_version >= 350) { return histogram_sweep_config.template Init(); } else if (ptx_version >= 300) { return histogram_sweep_config.template Init(); } else if (ptx_version >= 200) { return histogram_sweep_config.template Init(); } else if (ptx_version >= 110) { return histogram_sweep_config.template Init(); } else { // No global atomic support return cudaErrorNotSupported; } #endif } /** * Kernel kernel dispatch configuration */ struct KernelConfig { int block_threads; int pixels_per_thread; template CUB_RUNTIME_FUNCTION __forceinline__ cudaError_t Init() { block_threads = BlockPolicy::BLOCK_THREADS; pixels_per_thread = BlockPolicy::PIXELS_PER_THREAD; return cudaSuccess; } }; //--------------------------------------------------------------------- // Dispatch entrypoints //--------------------------------------------------------------------- /** * Privatization-based dispatch routine */ template < typename PrivatizedDecodeOpT, ///< The transform operator type for determining privatized counter indices from samples, one for each channel typename OutputDecodeOpT, ///< The transform operator type for determining output bin-ids from privatized counter indices, one for each channel typename DeviceHistogramInitKernelT, ///< Function type of cub::DeviceHistogramInitKernel typename DeviceHistogramSweepKernelT> ///< Function type of cub::DeviceHistogramSweepKernel CUB_RUNTIME_FUNCTION __forceinline__ static cudaError_t PrivatizedDispatch( void* d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t& temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation SampleIteratorT d_samples, ///< [in] The pointer to the input sequence of sample items. The samples from different channels are assumed to be interleaved (e.g., an array of 32-bit pixels where each pixel consists of four RGBA 8-bit samples). CounterT* d_output_histograms[NUM_ACTIVE_CHANNELS], ///< [out] The pointers to the histogram counter output arrays, one for each active channel. For channeli, the allocation length of d_histograms[i] should be num_output_levels[i] - 1. int num_privatized_levels[NUM_ACTIVE_CHANNELS], ///< [in] The number of bin level boundaries for delineating histogram samples in each active channel. Implies that the number of bins for channeli is num_output_levels[i] - 1. PrivatizedDecodeOpT privatized_decode_op[NUM_ACTIVE_CHANNELS], ///< [in] Transform operators for determining bin-ids from samples, one for each channel int num_output_levels[NUM_ACTIVE_CHANNELS], ///< [in] The number of bin level boundaries for delineating histogram samples in each active channel. Implies that the number of bins for channeli is num_output_levels[i] - 1. OutputDecodeOpT output_decode_op[NUM_ACTIVE_CHANNELS], ///< [in] Transform operators for determining bin-ids from samples, one for each channel int max_num_output_bins, ///< [in] Maximum number of output bins in any channel OffsetT num_row_pixels, ///< [in] The number of multi-channel pixels per row in the region of interest OffsetT num_rows, ///< [in] The number of rows in the region of interest OffsetT row_stride_samples, ///< [in] The number of samples between starts of consecutive rows in the region of interest DeviceHistogramInitKernelT histogram_init_kernel, ///< [in] Kernel function pointer to parameterization of cub::DeviceHistogramInitKernel DeviceHistogramSweepKernelT histogram_sweep_kernel, ///< [in] Kernel function pointer to parameterization of cub::DeviceHistogramSweepKernel KernelConfig histogram_sweep_config, ///< [in] Dispatch parameters that match the policy that \p histogram_sweep_kernel was compiled for cudaStream_t stream, ///< [in] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous) ///< [in] Whether or not to synchronize the stream after every kernel launch to check for errors. May cause significant slowdown. Default is \p false. { #ifndef CUB_RUNTIME_ENABLED // Kernel launch not supported from this device return CubDebug(cudaErrorNotSupported); #else cudaError error = cudaSuccess; do { // Get device ordinal int device_ordinal; if (CubDebug(error = cudaGetDevice(&device_ordinal))) break; // Get SM count int sm_count; if (CubDebug(error = cudaDeviceGetAttribute (&sm_count, cudaDevAttrMultiProcessorCount, device_ordinal))) break; // Get SM occupancy for histogram_sweep_kernel int histogram_sweep_sm_occupancy; if (CubDebug(error = MaxSmOccupancy( histogram_sweep_sm_occupancy, histogram_sweep_kernel, histogram_sweep_config.block_threads))) break; // Get device occupancy for histogram_sweep_kernel int histogram_sweep_occupancy = histogram_sweep_sm_occupancy * sm_count; if (num_row_pixels * NUM_CHANNELS == row_stride_samples) { // Treat as a single linear array of samples num_row_pixels *= num_rows; num_rows = 1; row_stride_samples = num_row_pixels * NUM_CHANNELS; } // Get grid dimensions, trying to keep total blocks ~histogram_sweep_occupancy int pixels_per_tile = histogram_sweep_config.block_threads * histogram_sweep_config.pixels_per_thread; int tiles_per_row = int(num_row_pixels + pixels_per_tile - 1) / pixels_per_tile; int blocks_per_row = CUB_MIN(histogram_sweep_occupancy, tiles_per_row); int blocks_per_col = (blocks_per_row > 0) ? int(CUB_MIN(histogram_sweep_occupancy / blocks_per_row, num_rows)) : 0; int num_thread_blocks = blocks_per_row * blocks_per_col; dim3 sweep_grid_dims; sweep_grid_dims.x = (unsigned int) blocks_per_row; sweep_grid_dims.y = (unsigned int) blocks_per_col; sweep_grid_dims.z = 1; // Temporary storage allocation requirements const int NUM_ALLOCATIONS = NUM_ACTIVE_CHANNELS + 1; void* allocations[NUM_ALLOCATIONS]; size_t allocation_sizes[NUM_ALLOCATIONS]; for (int CHANNEL = 0; CHANNEL < NUM_ACTIVE_CHANNELS; ++CHANNEL) allocation_sizes[CHANNEL] = size_t(num_thread_blocks) * (num_privatized_levels[CHANNEL] - 1) * sizeof(CounterT); allocation_sizes[NUM_ALLOCATIONS - 1] = GridQueue::AllocationSize(); // Alias the temporary allocations from the single storage blob (or compute the necessary size of the blob) if (CubDebug(error = AliasTemporaries(d_temp_storage, temp_storage_bytes, allocations, allocation_sizes))) break; if (d_temp_storage == NULL) { // Return if the caller is simply requesting the size of the storage allocation break; } // Construct the grid queue descriptor GridQueue tile_queue(allocations[NUM_ALLOCATIONS - 1]); // Setup array wrapper for histogram channel output (because we can't pass static arrays as kernel parameters) ArrayWrapper d_output_histograms_wrapper; for (int CHANNEL = 0; CHANNEL < NUM_ACTIVE_CHANNELS; ++CHANNEL) d_output_histograms_wrapper.array[CHANNEL] = d_output_histograms[CHANNEL]; // Setup array wrapper for privatized per-block histogram channel output (because we can't pass static arrays as kernel parameters) ArrayWrapper d_privatized_histograms_wrapper; for (int CHANNEL = 0; CHANNEL < NUM_ACTIVE_CHANNELS; ++CHANNEL) d_privatized_histograms_wrapper.array[CHANNEL] = (CounterT*) allocations[CHANNEL]; // Setup array wrapper for sweep bin transforms (because we can't pass static arrays as kernel parameters) ArrayWrapper privatized_decode_op_wrapper; for (int CHANNEL = 0; CHANNEL < NUM_ACTIVE_CHANNELS; ++CHANNEL) privatized_decode_op_wrapper.array[CHANNEL] = privatized_decode_op[CHANNEL]; // Setup array wrapper for aggregation bin transforms (because we can't pass static arrays as kernel parameters) ArrayWrapper output_decode_op_wrapper; for (int CHANNEL = 0; CHANNEL < NUM_ACTIVE_CHANNELS; ++CHANNEL) output_decode_op_wrapper.array[CHANNEL] = output_decode_op[CHANNEL]; // Setup array wrapper for num privatized bins (because we can't pass static arrays as kernel parameters) ArrayWrapper num_privatized_bins_wrapper; for (int CHANNEL = 0; CHANNEL < NUM_ACTIVE_CHANNELS; ++CHANNEL) num_privatized_bins_wrapper.array[CHANNEL] = num_privatized_levels[CHANNEL] - 1; // Setup array wrapper for num output bins (because we can't pass static arrays as kernel parameters) ArrayWrapper num_output_bins_wrapper; for (int CHANNEL = 0; CHANNEL < NUM_ACTIVE_CHANNELS; ++CHANNEL) num_output_bins_wrapper.array[CHANNEL] = num_output_levels[CHANNEL] - 1; int histogram_init_block_threads = 256; int histogram_init_grid_dims = (max_num_output_bins + histogram_init_block_threads - 1) / histogram_init_block_threads; // Log DeviceHistogramInitKernel configuration if (debug_synchronous) _CubLog("Invoking DeviceHistogramInitKernel<<<%d, %d, 0, %lld>>>()\n", histogram_init_grid_dims, histogram_init_block_threads, (long long) stream); // Invoke histogram_init_kernel histogram_init_kernel<<>>( num_output_bins_wrapper, d_output_histograms_wrapper, tile_queue); // Return if empty problem if ((blocks_per_row == 0) || (blocks_per_col == 0)) break; // Log histogram_sweep_kernel configuration if (debug_synchronous) _CubLog("Invoking histogram_sweep_kernel<<<{%d, %d, %d}, %d, 0, %lld>>>(), %d pixels per thread, %d SM occupancy\n", sweep_grid_dims.x, sweep_grid_dims.y, sweep_grid_dims.z, histogram_sweep_config.block_threads, (long long) stream, histogram_sweep_config.pixels_per_thread, histogram_sweep_sm_occupancy); // Invoke histogram_sweep_kernel histogram_sweep_kernel<<>>( d_samples, num_output_bins_wrapper, num_privatized_bins_wrapper, d_output_histograms_wrapper, d_privatized_histograms_wrapper, output_decode_op_wrapper, privatized_decode_op_wrapper, num_row_pixels, num_rows, row_stride_samples, tiles_per_row, tile_queue); // Check for failure to launch if (CubDebug(error = cudaPeekAtLastError())) break; // Sync the stream if specified to flush runtime errors if (debug_synchronous && (CubDebug(error = SyncStream(stream)))) break; } while (0); return error; #endif // CUB_RUNTIME_ENABLED } /** * Dispatch routine for HistogramRange, specialized for sample types larger than 8bit */ CUB_RUNTIME_FUNCTION static cudaError_t DispatchRange( void* d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t& temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation SampleIteratorT d_samples, ///< [in] The pointer to the multi-channel input sequence of data samples. The samples from different channels are assumed to be interleaved (e.g., an array of 32-bit pixels where each pixel consists of four RGBA 8-bit samples). CounterT* d_output_histograms[NUM_ACTIVE_CHANNELS], ///< [out] The pointers to the histogram counter output arrays, one for each active channel. For channeli, the allocation length of d_histograms[i] should be num_output_levels[i] - 1. int num_output_levels[NUM_ACTIVE_CHANNELS], ///< [in] The number of boundaries (levels) for delineating histogram samples in each active channel. Implies that the number of bins for channeli is num_output_levels[i] - 1. LevelT *d_levels[NUM_ACTIVE_CHANNELS], ///< [in] The pointers to the arrays of boundaries (levels), one for each active channel. Bin ranges are defined by consecutive boundary pairings: lower sample value boundaries are inclusive and upper sample value boundaries are exclusive. OffsetT num_row_pixels, ///< [in] The number of multi-channel pixels per row in the region of interest OffsetT num_rows, ///< [in] The number of rows in the region of interest OffsetT row_stride_samples, ///< [in] The number of samples between starts of consecutive rows in the region of interest cudaStream_t stream, ///< [in] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous, ///< [in] Whether or not to synchronize the stream after every kernel launch to check for errors. May cause significant slowdown. Default is \p false. Int2Type is_byte_sample) ///< [in] Marker type indicating whether or not SampleT is a 8b type { cudaError error = cudaSuccess; do { // Get PTX version int ptx_version; #if (CUB_PTX_ARCH == 0) if (CubDebug(error = PtxVersion(ptx_version))) break; #else ptx_version = CUB_PTX_ARCH; #endif // Get kernel dispatch configurations KernelConfig histogram_sweep_config; if (CubDebug(error = InitConfigs(ptx_version, histogram_sweep_config))) break; // Use the search transform op for converting samples to privatized bins typedef SearchTransform PrivatizedDecodeOpT; // Use the pass-thru transform op for converting privatized bins to output bins typedef PassThruTransform OutputDecodeOpT; PrivatizedDecodeOpT privatized_decode_op[NUM_ACTIVE_CHANNELS]; OutputDecodeOpT output_decode_op[NUM_ACTIVE_CHANNELS]; int max_levels = num_output_levels[0]; for (int channel = 0; channel < NUM_ACTIVE_CHANNELS; ++channel) { privatized_decode_op[channel].Init(d_levels[channel], num_output_levels[channel]); if (num_output_levels[channel] > max_levels) max_levels = num_output_levels[channel]; } int max_num_output_bins = max_levels - 1; // Dispatch if (max_num_output_bins > MAX_PRIVATIZED_SMEM_BINS) { // Too many bins to keep in shared memory. const int PRIVATIZED_SMEM_BINS = 0; if (CubDebug(error = PrivatizedDispatch( d_temp_storage, temp_storage_bytes, d_samples, d_output_histograms, num_output_levels, privatized_decode_op, num_output_levels, output_decode_op, max_num_output_bins, num_row_pixels, num_rows, row_stride_samples, DeviceHistogramInitKernel, DeviceHistogramSweepKernel, histogram_sweep_config, stream, debug_synchronous))) break; } else { // Dispatch shared-privatized approach const int PRIVATIZED_SMEM_BINS = MAX_PRIVATIZED_SMEM_BINS; if (CubDebug(error = PrivatizedDispatch( d_temp_storage, temp_storage_bytes, d_samples, d_output_histograms, num_output_levels, privatized_decode_op, num_output_levels, output_decode_op, max_num_output_bins, num_row_pixels, num_rows, row_stride_samples, DeviceHistogramInitKernel, DeviceHistogramSweepKernel, histogram_sweep_config, stream, debug_synchronous))) break; } } while (0); return error; } /** * Dispatch routine for HistogramRange, specialized for 8-bit sample types (computes 256-bin privatized histograms and then reduces to user-specified levels) */ CUB_RUNTIME_FUNCTION static cudaError_t DispatchRange( void* d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t& temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation SampleIteratorT d_samples, ///< [in] The pointer to the multi-channel input sequence of data samples. The samples from different channels are assumed to be interleaved (e.g., an array of 32-bit pixels where each pixel consists of four RGBA 8-bit samples). CounterT* d_output_histograms[NUM_ACTIVE_CHANNELS], ///< [out] The pointers to the histogram counter output arrays, one for each active channel. For channeli, the allocation length of d_histograms[i] should be num_output_levels[i] - 1. int num_output_levels[NUM_ACTIVE_CHANNELS], ///< [in] The number of boundaries (levels) for delineating histogram samples in each active channel. Implies that the number of bins for channeli is num_output_levels[i] - 1. LevelT *d_levels[NUM_ACTIVE_CHANNELS], ///< [in] The pointers to the arrays of boundaries (levels), one for each active channel. Bin ranges are defined by consecutive boundary pairings: lower sample value boundaries are inclusive and upper sample value boundaries are exclusive. OffsetT num_row_pixels, ///< [in] The number of multi-channel pixels per row in the region of interest OffsetT num_rows, ///< [in] The number of rows in the region of interest OffsetT row_stride_samples, ///< [in] The number of samples between starts of consecutive rows in the region of interest cudaStream_t stream, ///< [in] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous, ///< [in] Whether or not to synchronize the stream after every kernel launch to check for errors. May cause significant slowdown. Default is \p false. Int2Type is_byte_sample) ///< [in] Marker type indicating whether or not SampleT is a 8b type { cudaError error = cudaSuccess; do { // Get PTX version int ptx_version; #if (CUB_PTX_ARCH == 0) if (CubDebug(error = PtxVersion(ptx_version))) break; #else ptx_version = CUB_PTX_ARCH; #endif // Get kernel dispatch configurations KernelConfig histogram_sweep_config; if (CubDebug(error = InitConfigs(ptx_version, histogram_sweep_config))) break; // Use the pass-thru transform op for converting samples to privatized bins typedef PassThruTransform PrivatizedDecodeOpT; // Use the search transform op for converting privatized bins to output bins typedef SearchTransform OutputDecodeOpT; int num_privatized_levels[NUM_ACTIVE_CHANNELS]; PrivatizedDecodeOpT privatized_decode_op[NUM_ACTIVE_CHANNELS]; OutputDecodeOpT output_decode_op[NUM_ACTIVE_CHANNELS]; int max_levels = num_output_levels[0]; // Maximum number of levels in any channel for (int channel = 0; channel < NUM_ACTIVE_CHANNELS; ++channel) { num_privatized_levels[channel] = 257; output_decode_op[channel].Init(d_levels[channel], num_output_levels[channel]); if (num_output_levels[channel] > max_levels) max_levels = num_output_levels[channel]; } int max_num_output_bins = max_levels - 1; const int PRIVATIZED_SMEM_BINS = 256; if (CubDebug(error = PrivatizedDispatch( d_temp_storage, temp_storage_bytes, d_samples, d_output_histograms, num_privatized_levels, privatized_decode_op, num_output_levels, output_decode_op, max_num_output_bins, num_row_pixels, num_rows, row_stride_samples, DeviceHistogramInitKernel, DeviceHistogramSweepKernel, histogram_sweep_config, stream, debug_synchronous))) break; } while (0); return error; } /** * Dispatch routine for HistogramEven, specialized for sample types larger than 8-bit */ CUB_RUNTIME_FUNCTION __forceinline__ static cudaError_t DispatchEven( void* d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t& temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation SampleIteratorT d_samples, ///< [in] The pointer to the input sequence of sample items. The samples from different channels are assumed to be interleaved (e.g., an array of 32-bit pixels where each pixel consists of four RGBA 8-bit samples). CounterT* d_output_histograms[NUM_ACTIVE_CHANNELS], ///< [out] The pointers to the histogram counter output arrays, one for each active channel. For channeli, the allocation length of d_histograms[i] should be num_output_levels[i] - 1. int num_output_levels[NUM_ACTIVE_CHANNELS], ///< [in] The number of bin level boundaries for delineating histogram samples in each active channel. Implies that the number of bins for channeli is num_output_levels[i] - 1. LevelT lower_level[NUM_ACTIVE_CHANNELS], ///< [in] The lower sample value bound (inclusive) for the lowest histogram bin in each active channel. LevelT upper_level[NUM_ACTIVE_CHANNELS], ///< [in] The upper sample value bound (exclusive) for the highest histogram bin in each active channel. OffsetT num_row_pixels, ///< [in] The number of multi-channel pixels per row in the region of interest OffsetT num_rows, ///< [in] The number of rows in the region of interest OffsetT row_stride_samples, ///< [in] The number of samples between starts of consecutive rows in the region of interest cudaStream_t stream, ///< [in] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous, ///< [in] Whether or not to synchronize the stream after every kernel launch to check for errors. May cause significant slowdown. Default is \p false. Int2Type is_byte_sample) ///< [in] Marker type indicating whether or not SampleT is a 8b type { cudaError error = cudaSuccess; do { // Get PTX version int ptx_version; #if (CUB_PTX_ARCH == 0) if (CubDebug(error = PtxVersion(ptx_version))) break; #else ptx_version = CUB_PTX_ARCH; #endif // Get kernel dispatch configurations KernelConfig histogram_sweep_config; if (CubDebug(error = InitConfigs(ptx_version, histogram_sweep_config))) break; // Use the scale transform op for converting samples to privatized bins typedef ScaleTransform PrivatizedDecodeOpT; // Use the pass-thru transform op for converting privatized bins to output bins typedef PassThruTransform OutputDecodeOpT; PrivatizedDecodeOpT privatized_decode_op[NUM_ACTIVE_CHANNELS]; OutputDecodeOpT output_decode_op[NUM_ACTIVE_CHANNELS]; int max_levels = num_output_levels[0]; for (int channel = 0; channel < NUM_ACTIVE_CHANNELS; ++channel) { int bins = num_output_levels[channel] - 1; LevelT scale = (upper_level[channel] - lower_level[channel]) / bins; privatized_decode_op[channel].Init(num_output_levels[channel], upper_level[channel], lower_level[channel], scale); if (num_output_levels[channel] > max_levels) max_levels = num_output_levels[channel]; } int max_num_output_bins = max_levels - 1; if (max_num_output_bins > MAX_PRIVATIZED_SMEM_BINS) { // Dispatch shared-privatized approach const int PRIVATIZED_SMEM_BINS = 0; if (CubDebug(error = PrivatizedDispatch( d_temp_storage, temp_storage_bytes, d_samples, d_output_histograms, num_output_levels, privatized_decode_op, num_output_levels, output_decode_op, max_num_output_bins, num_row_pixels, num_rows, row_stride_samples, DeviceHistogramInitKernel, DeviceHistogramSweepKernel, histogram_sweep_config, stream, debug_synchronous))) break; } else { // Dispatch shared-privatized approach const int PRIVATIZED_SMEM_BINS = MAX_PRIVATIZED_SMEM_BINS; if (CubDebug(error = PrivatizedDispatch( d_temp_storage, temp_storage_bytes, d_samples, d_output_histograms, num_output_levels, privatized_decode_op, num_output_levels, output_decode_op, max_num_output_bins, num_row_pixels, num_rows, row_stride_samples, DeviceHistogramInitKernel, DeviceHistogramSweepKernel, histogram_sweep_config, stream, debug_synchronous))) break; } } while (0); return error; } /** * Dispatch routine for HistogramEven, specialized for 8-bit sample types (computes 256-bin privatized histograms and then reduces to user-specified levels) */ CUB_RUNTIME_FUNCTION __forceinline__ static cudaError_t DispatchEven( void* d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t& temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation SampleIteratorT d_samples, ///< [in] The pointer to the input sequence of sample items. The samples from different channels are assumed to be interleaved (e.g., an array of 32-bit pixels where each pixel consists of four RGBA 8-bit samples). CounterT* d_output_histograms[NUM_ACTIVE_CHANNELS], ///< [out] The pointers to the histogram counter output arrays, one for each active channel. For channeli, the allocation length of d_histograms[i] should be num_output_levels[i] - 1. int num_output_levels[NUM_ACTIVE_CHANNELS], ///< [in] The number of bin level boundaries for delineating histogram samples in each active channel. Implies that the number of bins for channeli is num_output_levels[i] - 1. LevelT lower_level[NUM_ACTIVE_CHANNELS], ///< [in] The lower sample value bound (inclusive) for the lowest histogram bin in each active channel. LevelT upper_level[NUM_ACTIVE_CHANNELS], ///< [in] The upper sample value bound (exclusive) for the highest histogram bin in each active channel. OffsetT num_row_pixels, ///< [in] The number of multi-channel pixels per row in the region of interest OffsetT num_rows, ///< [in] The number of rows in the region of interest OffsetT row_stride_samples, ///< [in] The number of samples between starts of consecutive rows in the region of interest cudaStream_t stream, ///< [in] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous, ///< [in] Whether or not to synchronize the stream after every kernel launch to check for errors. May cause significant slowdown. Default is \p false. Int2Type is_byte_sample) ///< [in] Marker type indicating whether or not SampleT is a 8b type { cudaError error = cudaSuccess; do { // Get PTX version int ptx_version; #if (CUB_PTX_ARCH == 0) if (CubDebug(error = PtxVersion(ptx_version))) break; #else ptx_version = CUB_PTX_ARCH; #endif // Get kernel dispatch configurations KernelConfig histogram_sweep_config; if (CubDebug(error = InitConfigs(ptx_version, histogram_sweep_config))) break; // Use the pass-thru transform op for converting samples to privatized bins typedef PassThruTransform PrivatizedDecodeOpT; // Use the scale transform op for converting privatized bins to output bins typedef ScaleTransform OutputDecodeOpT; int num_privatized_levels[NUM_ACTIVE_CHANNELS]; PrivatizedDecodeOpT privatized_decode_op[NUM_ACTIVE_CHANNELS]; OutputDecodeOpT output_decode_op[NUM_ACTIVE_CHANNELS]; int max_levels = num_output_levels[0]; for (int channel = 0; channel < NUM_ACTIVE_CHANNELS; ++channel) { num_privatized_levels[channel] = 257; int bins = num_output_levels[channel] - 1; LevelT scale = (upper_level[channel] - lower_level[channel]) / bins; output_decode_op[channel].Init(num_output_levels[channel], upper_level[channel], lower_level[channel], scale); if (num_output_levels[channel] > max_levels) max_levels = num_output_levels[channel]; } int max_num_output_bins = max_levels - 1; const int PRIVATIZED_SMEM_BINS = 256; if (CubDebug(error = PrivatizedDispatch( d_temp_storage, temp_storage_bytes, d_samples, d_output_histograms, num_privatized_levels, privatized_decode_op, num_output_levels, output_decode_op, max_num_output_bins, num_row_pixels, num_rows, row_stride_samples, DeviceHistogramInitKernel, DeviceHistogramSweepKernel, histogram_sweep_config, stream, debug_synchronous))) break; } while (0); return error; } }; } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/gpu_utils/cub/device/dispatch/dispatch_radix_sort.cuh000066400000000000000000002406651411340063500261670ustar00rootroot00000000000000 /****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * cub::DeviceRadixSort provides device-wide, parallel operations for computing a radix sort across a sequence of data items residing within device-accessible memory. */ #pragma once #include #include #include "../../agent/agent_radix_sort_upsweep.cuh" #include "../../agent/agent_radix_sort_downsweep.cuh" #include "../../agent/agent_scan.cuh" #include "../../block/block_radix_sort.cuh" #include "../../grid/grid_even_share.cuh" #include "../../util_type.cuh" #include "../../util_debug.cuh" #include "../../util_device.cuh" #include "../../util_namespace.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /****************************************************************************** * Kernel entry points *****************************************************************************/ /** * Upsweep digit-counting kernel entry point (multi-block). Computes privatized digit histograms, one per block. */ template < typename ChainedPolicyT, ///< Chained tuning policy bool ALT_DIGIT_BITS, ///< Whether or not to use the alternate (lower-bits) policy bool IS_DESCENDING, ///< Whether or not the sorted-order is high-to-low typename KeyT, ///< Key type typename OffsetT> ///< Signed integer type for global offsets __launch_bounds__ (int((ALT_DIGIT_BITS) ? ChainedPolicyT::ActivePolicy::AltUpsweepPolicy::BLOCK_THREADS : ChainedPolicyT::ActivePolicy::UpsweepPolicy::BLOCK_THREADS)) __global__ void DeviceRadixSortUpsweepKernel( const KeyT *d_keys, ///< [in] Input keys buffer OffsetT *d_spine, ///< [out] Privatized (per block) digit histograms (striped, i.e., 0s counts from each block, then 1s counts from each block, etc.) OffsetT /*num_items*/, ///< [in] Total number of input data items int current_bit, ///< [in] Bit position of current radix digit int num_bits, ///< [in] Number of bits of current radix digit GridEvenShare even_share) ///< [in] Even-share descriptor for mapan equal number of tiles onto each thread block { enum { TILE_ITEMS = ChainedPolicyT::ActivePolicy::AltUpsweepPolicy::BLOCK_THREADS * ChainedPolicyT::ActivePolicy::AltUpsweepPolicy::ITEMS_PER_THREAD }; // Parameterize AgentRadixSortUpsweep type for the current configuration typedef AgentRadixSortUpsweep< typename If<(ALT_DIGIT_BITS), typename ChainedPolicyT::ActivePolicy::AltUpsweepPolicy, typename ChainedPolicyT::ActivePolicy::UpsweepPolicy>::Type, KeyT, OffsetT> AgentRadixSortUpsweepT; // Shared memory storage __shared__ typename AgentRadixSortUpsweepT::TempStorage temp_storage; // Initialize GRID_MAPPING_RAKE even-share descriptor for this thread block even_share.template BlockInit(); AgentRadixSortUpsweepT upsweep(temp_storage, d_keys, current_bit, num_bits); upsweep.ProcessRegion(even_share.block_offset, even_share.block_end); CTA_SYNC(); // Write out digit counts (striped) upsweep.ExtractCounts(d_spine, gridDim.x, blockIdx.x); } /** * Spine scan kernel entry point (single-block). Computes an exclusive prefix sum over the privatized digit histograms */ template < typename ChainedPolicyT, ///< Chained tuning policy typename OffsetT> ///< Signed integer type for global offsets __launch_bounds__ (int(ChainedPolicyT::ActivePolicy::ScanPolicy::BLOCK_THREADS), 1) __global__ void RadixSortScanBinsKernel( OffsetT *d_spine, ///< [in,out] Privatized (per block) digit histograms (striped, i.e., 0s counts from each block, then 1s counts from each block, etc.) int num_counts) ///< [in] Total number of bin-counts { // Parameterize the AgentScan type for the current configuration typedef AgentScan< typename ChainedPolicyT::ActivePolicy::ScanPolicy, OffsetT*, OffsetT*, cub::Sum, OffsetT, OffsetT> AgentScanT; // Shared memory storage __shared__ typename AgentScanT::TempStorage temp_storage; // Block scan instance AgentScanT block_scan(temp_storage, d_spine, d_spine, cub::Sum(), OffsetT(0)) ; // Process full input tiles int block_offset = 0; BlockScanRunningPrefixOp prefix_op(0, Sum()); while (block_offset + AgentScanT::TILE_ITEMS <= num_counts) { block_scan.template ConsumeTile(block_offset, prefix_op); block_offset += AgentScanT::TILE_ITEMS; } } /** * Downsweep pass kernel entry point (multi-block). Scatters keys (and values) into corresponding bins for the current digit place. */ template < typename ChainedPolicyT, ///< Chained tuning policy bool ALT_DIGIT_BITS, ///< Whether or not to use the alternate (lower-bits) policy bool IS_DESCENDING, ///< Whether or not the sorted-order is high-to-low typename KeyT, ///< Key type typename ValueT, ///< Value type typename OffsetT> ///< Signed integer type for global offsets __launch_bounds__ (int((ALT_DIGIT_BITS) ? ChainedPolicyT::ActivePolicy::AltDownsweepPolicy::BLOCK_THREADS : ChainedPolicyT::ActivePolicy::DownsweepPolicy::BLOCK_THREADS)) __global__ void DeviceRadixSortDownsweepKernel( const KeyT *d_keys_in, ///< [in] Input keys buffer KeyT *d_keys_out, ///< [in] Output keys buffer const ValueT *d_values_in, ///< [in] Input values buffer ValueT *d_values_out, ///< [in] Output values buffer OffsetT *d_spine, ///< [in] Scan of privatized (per block) digit histograms (striped, i.e., 0s counts from each block, then 1s counts from each block, etc.) OffsetT num_items, ///< [in] Total number of input data items int current_bit, ///< [in] Bit position of current radix digit int num_bits, ///< [in] Number of bits of current radix digit GridEvenShare even_share) ///< [in] Even-share descriptor for mapan equal number of tiles onto each thread block { enum { TILE_ITEMS = ChainedPolicyT::ActivePolicy::AltUpsweepPolicy::BLOCK_THREADS * ChainedPolicyT::ActivePolicy::AltUpsweepPolicy::ITEMS_PER_THREAD }; // Parameterize AgentRadixSortDownsweep type for the current configuration typedef AgentRadixSortDownsweep< typename If<(ALT_DIGIT_BITS), typename ChainedPolicyT::ActivePolicy::AltDownsweepPolicy, typename ChainedPolicyT::ActivePolicy::DownsweepPolicy>::Type, IS_DESCENDING, KeyT, ValueT, OffsetT> AgentRadixSortDownsweepT; // Shared memory storage __shared__ typename AgentRadixSortDownsweepT::TempStorage temp_storage; // Initialize even-share descriptor for this thread block even_share.template BlockInit(); // Process input tiles AgentRadixSortDownsweepT(temp_storage, num_items, d_spine, d_keys_in, d_keys_out, d_values_in, d_values_out, current_bit, num_bits).ProcessRegion( even_share.block_offset, even_share.block_end); } /** * Single pass kernel entry point (single-block). Fully sorts a tile of input. */ template < typename ChainedPolicyT, ///< Chained tuning policy bool IS_DESCENDING, ///< Whether or not the sorted-order is high-to-low typename KeyT, ///< Key type typename ValueT, ///< Value type typename OffsetT> ///< Signed integer type for global offsets __launch_bounds__ (int(ChainedPolicyT::ActivePolicy::SingleTilePolicy::BLOCK_THREADS), 1) __global__ void DeviceRadixSortSingleTileKernel( const KeyT *d_keys_in, ///< [in] Input keys buffer KeyT *d_keys_out, ///< [in] Output keys buffer const ValueT *d_values_in, ///< [in] Input values buffer ValueT *d_values_out, ///< [in] Output values buffer OffsetT num_items, ///< [in] Total number of input data items int current_bit, ///< [in] Bit position of current radix digit int end_bit) ///< [in] The past-the-end (most-significant) bit index needed for key comparison { // Constants enum { BLOCK_THREADS = ChainedPolicyT::ActivePolicy::SingleTilePolicy::BLOCK_THREADS, ITEMS_PER_THREAD = ChainedPolicyT::ActivePolicy::SingleTilePolicy::ITEMS_PER_THREAD, KEYS_ONLY = Equals::VALUE, }; // BlockRadixSort type typedef BlockRadixSort< KeyT, BLOCK_THREADS, ITEMS_PER_THREAD, ValueT, ChainedPolicyT::ActivePolicy::SingleTilePolicy::RADIX_BITS, (ChainedPolicyT::ActivePolicy::SingleTilePolicy::RANK_ALGORITHM == RADIX_RANK_MEMOIZE), ChainedPolicyT::ActivePolicy::SingleTilePolicy::SCAN_ALGORITHM> BlockRadixSortT; // BlockLoad type (keys) typedef BlockLoad< KeyT, BLOCK_THREADS, ITEMS_PER_THREAD, ChainedPolicyT::ActivePolicy::SingleTilePolicy::LOAD_ALGORITHM> BlockLoadKeys; // BlockLoad type (values) typedef BlockLoad< ValueT, BLOCK_THREADS, ITEMS_PER_THREAD, ChainedPolicyT::ActivePolicy::SingleTilePolicy::LOAD_ALGORITHM> BlockLoadValues; // Unsigned word for key bits typedef typename Traits::UnsignedBits UnsignedBitsT; // Shared memory storage __shared__ union TempStorage { typename BlockRadixSortT::TempStorage sort; typename BlockLoadKeys::TempStorage load_keys; typename BlockLoadValues::TempStorage load_values; } temp_storage; // Keys and values for the block KeyT keys[ITEMS_PER_THREAD]; ValueT values[ITEMS_PER_THREAD]; // Get default (min/max) value for out-of-bounds keys UnsignedBitsT default_key_bits = (IS_DESCENDING) ? Traits::LOWEST_KEY : Traits::MAX_KEY; KeyT default_key = reinterpret_cast(default_key_bits); // Load keys BlockLoadKeys(temp_storage.load_keys).Load(d_keys_in, keys, num_items, default_key); CTA_SYNC(); // Load values if (!KEYS_ONLY) { BlockLoadValues(temp_storage.load_values).Load(d_values_in, values, num_items); CTA_SYNC(); } // Sort tile BlockRadixSortT(temp_storage.sort).SortBlockedToStriped( keys, values, current_bit, end_bit, Int2Type(), Int2Type()); // Store keys and values #pragma unroll for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) { int item_offset = ITEM * BLOCK_THREADS + threadIdx.x; if (item_offset < num_items) { d_keys_out[item_offset] = keys[ITEM]; if (!KEYS_ONLY) d_values_out[item_offset] = values[ITEM]; } } } /** * Segmented radix sorting pass (one block per segment) */ template < typename ChainedPolicyT, ///< Chained tuning policy bool ALT_DIGIT_BITS, ///< Whether or not to use the alternate (lower-bits) policy bool IS_DESCENDING, ///< Whether or not the sorted-order is high-to-low typename KeyT, ///< Key type typename ValueT, ///< Value type typename OffsetIteratorT, ///< Random-access input iterator type for reading segment offsets \iterator typename OffsetT> ///< Signed integer type for global offsets __launch_bounds__ (int((ALT_DIGIT_BITS) ? ChainedPolicyT::ActivePolicy::AltSegmentedPolicy::BLOCK_THREADS : ChainedPolicyT::ActivePolicy::SegmentedPolicy::BLOCK_THREADS)) __global__ void DeviceSegmentedRadixSortKernel( const KeyT *d_keys_in, ///< [in] Input keys buffer KeyT *d_keys_out, ///< [in] Output keys buffer const ValueT *d_values_in, ///< [in] Input values buffer ValueT *d_values_out, ///< [in] Output values buffer OffsetIteratorT d_begin_offsets, ///< [in] Pointer to the sequence of beginning offsets of length \p num_segments, such that d_begin_offsets[i] is the first element of the ith data segment in d_keys_* and d_values_* OffsetIteratorT d_end_offsets, ///< [in] Pointer to the sequence of ending offsets of length \p num_segments, such that d_end_offsets[i]-1 is the last element of the ith data segment in d_keys_* and d_values_*. If d_end_offsets[i]-1 <= d_begin_offsets[i], the ith is considered empty. int /*num_segments*/, ///< [in] The number of segments that comprise the sorting data int current_bit, ///< [in] Bit position of current radix digit int pass_bits) ///< [in] Number of bits of current radix digit { // // Constants // typedef typename If<(ALT_DIGIT_BITS), typename ChainedPolicyT::ActivePolicy::AltSegmentedPolicy, typename ChainedPolicyT::ActivePolicy::SegmentedPolicy>::Type SegmentedPolicyT; enum { BLOCK_THREADS = SegmentedPolicyT::BLOCK_THREADS, ITEMS_PER_THREAD = SegmentedPolicyT::ITEMS_PER_THREAD, RADIX_BITS = SegmentedPolicyT::RADIX_BITS, TILE_ITEMS = BLOCK_THREADS * ITEMS_PER_THREAD, RADIX_DIGITS = 1 << RADIX_BITS, KEYS_ONLY = Equals::VALUE, }; // Upsweep type typedef AgentRadixSortUpsweep< AgentRadixSortUpsweepPolicy, KeyT, OffsetT> BlockUpsweepT; // Digit-scan type typedef BlockScan DigitScanT; // Downsweep type typedef AgentRadixSortDownsweep BlockDownsweepT; enum { /// Number of bin-starting offsets tracked per thread BINS_TRACKED_PER_THREAD = BlockDownsweepT::BINS_TRACKED_PER_THREAD }; // // Process input tiles // // Shared memory storage __shared__ union { typename BlockUpsweepT::TempStorage upsweep; typename BlockDownsweepT::TempStorage downsweep; struct { volatile OffsetT reverse_counts_in[RADIX_DIGITS]; volatile OffsetT reverse_counts_out[RADIX_DIGITS]; typename DigitScanT::TempStorage scan; }; } temp_storage; OffsetT segment_begin = d_begin_offsets[blockIdx.x]; OffsetT segment_end = d_end_offsets[blockIdx.x]; OffsetT num_items = segment_end - segment_begin; // Check if empty segment if (num_items <= 0) return; // Upsweep BlockUpsweepT upsweep(temp_storage.upsweep, d_keys_in, current_bit, pass_bits); upsweep.ProcessRegion(segment_begin, segment_end); CTA_SYNC(); // The count of each digit value in this pass (valid in the first RADIX_DIGITS threads) OffsetT bin_count[BINS_TRACKED_PER_THREAD]; upsweep.ExtractCounts(bin_count); CTA_SYNC(); if (IS_DESCENDING) { // Reverse bin counts #pragma unroll for (int track = 0; track < BINS_TRACKED_PER_THREAD; ++track) { int bin_idx = (threadIdx.x * BINS_TRACKED_PER_THREAD) + track; if ((BLOCK_THREADS == RADIX_DIGITS) || (bin_idx < RADIX_DIGITS)) temp_storage.reverse_counts_in[bin_idx] = bin_count[track]; } CTA_SYNC(); #pragma unroll for (int track = 0; track < BINS_TRACKED_PER_THREAD; ++track) { int bin_idx = (threadIdx.x * BINS_TRACKED_PER_THREAD) + track; if ((BLOCK_THREADS == RADIX_DIGITS) || (bin_idx < RADIX_DIGITS)) bin_count[track] = temp_storage.reverse_counts_in[RADIX_DIGITS - bin_idx - 1]; } } // Scan OffsetT bin_offset[BINS_TRACKED_PER_THREAD]; // The global scatter base offset for each digit value in this pass (valid in the first RADIX_DIGITS threads) DigitScanT(temp_storage.scan).ExclusiveSum(bin_count, bin_offset); #pragma unroll for (int track = 0; track < BINS_TRACKED_PER_THREAD; ++track) { bin_offset[track] += segment_begin; } if (IS_DESCENDING) { // Reverse bin offsets #pragma unroll for (int track = 0; track < BINS_TRACKED_PER_THREAD; ++track) { int bin_idx = (threadIdx.x * BINS_TRACKED_PER_THREAD) + track; if ((BLOCK_THREADS == RADIX_DIGITS) || (bin_idx < RADIX_DIGITS)) temp_storage.reverse_counts_out[threadIdx.x] = bin_offset[track]; } CTA_SYNC(); #pragma unroll for (int track = 0; track < BINS_TRACKED_PER_THREAD; ++track) { int bin_idx = (threadIdx.x * BINS_TRACKED_PER_THREAD) + track; if ((BLOCK_THREADS == RADIX_DIGITS) || (bin_idx < RADIX_DIGITS)) bin_offset[track] = temp_storage.reverse_counts_out[RADIX_DIGITS - bin_idx - 1]; } } CTA_SYNC(); // Downsweep BlockDownsweepT downsweep(temp_storage.downsweep, bin_offset, num_items, d_keys_in, d_keys_out, d_values_in, d_values_out, current_bit, pass_bits); downsweep.ProcessRegion(segment_begin, segment_end); } /****************************************************************************** * Policy ******************************************************************************/ /** * Tuning policy for kernel specialization */ template < typename KeyT, ///< Key type typename ValueT, ///< Value type typename OffsetT> ///< Signed integer type for global offsets struct DeviceRadixSortPolicy { //------------------------------------------------------------------------------ // Constants //------------------------------------------------------------------------------ enum { // Whether this is a keys-only (or key-value) sort KEYS_ONLY = (Equals::VALUE), // Relative size of KeyT type to a 4-byte word SCALE_FACTOR_4B = (CUB_MAX(sizeof(KeyT), sizeof(ValueT)) + 3) / 4, }; //------------------------------------------------------------------------------ // Architecture-specific tuning policies //------------------------------------------------------------------------------ /// SM13 struct Policy130 : ChainedPolicy<130, Policy130, Policy130> { enum { PRIMARY_RADIX_BITS = 5, ALT_RADIX_BITS = PRIMARY_RADIX_BITS - 1, }; // Keys-only upsweep policies typedef AgentRadixSortUpsweepPolicy <128, CUB_MAX(1, 19 / SCALE_FACTOR_4B), LOAD_DEFAULT, PRIMARY_RADIX_BITS> UpsweepPolicyKeys; typedef AgentRadixSortUpsweepPolicy <128, CUB_MAX(1, 15 / SCALE_FACTOR_4B), LOAD_DEFAULT, ALT_RADIX_BITS> AltUpsweepPolicyKeys; // Key-value pairs upsweep policies typedef AgentRadixSortUpsweepPolicy <128, CUB_MAX(1, 19 / SCALE_FACTOR_4B), LOAD_DEFAULT, PRIMARY_RADIX_BITS> UpsweepPolicyPairs; typedef AgentRadixSortUpsweepPolicy <128, CUB_MAX(1, 15 / SCALE_FACTOR_4B), LOAD_DEFAULT, ALT_RADIX_BITS> AltUpsweepPolicyPairs; // Upsweep policies typedef typename If::Type UpsweepPolicy; typedef typename If::Type AltUpsweepPolicy; // Scan policy typedef AgentScanPolicy <256, 4, BLOCK_LOAD_VECTORIZE, LOAD_DEFAULT, BLOCK_STORE_VECTORIZE, BLOCK_SCAN_WARP_SCANS> ScanPolicy; // Keys-only downsweep policies typedef AgentRadixSortDownsweepPolicy <64, CUB_MAX(1, 19 / SCALE_FACTOR_4B), BLOCK_LOAD_WARP_TRANSPOSE, LOAD_DEFAULT, RADIX_RANK_BASIC, BLOCK_SCAN_WARP_SCANS, PRIMARY_RADIX_BITS> DownsweepPolicyKeys; typedef AgentRadixSortDownsweepPolicy <128, CUB_MAX(1, 15 / SCALE_FACTOR_4B), BLOCK_LOAD_WARP_TRANSPOSE, LOAD_DEFAULT, RADIX_RANK_BASIC, BLOCK_SCAN_WARP_SCANS, ALT_RADIX_BITS> AltDownsweepPolicyKeys; // Key-value pairs downsweep policies typedef AgentRadixSortDownsweepPolicy <64, CUB_MAX(1, 19 / SCALE_FACTOR_4B), BLOCK_LOAD_WARP_TRANSPOSE, LOAD_DEFAULT, RADIX_RANK_BASIC, BLOCK_SCAN_WARP_SCANS, PRIMARY_RADIX_BITS> DownsweepPolicyPairs; typedef AgentRadixSortDownsweepPolicy <128, CUB_MAX(1, 15 / SCALE_FACTOR_4B), BLOCK_LOAD_WARP_TRANSPOSE, LOAD_DEFAULT, RADIX_RANK_BASIC, BLOCK_SCAN_WARP_SCANS, ALT_RADIX_BITS> AltDownsweepPolicyPairs; // Downsweep policies typedef typename If::Type DownsweepPolicy; typedef typename If::Type AltDownsweepPolicy; // Single-tile policy typedef DownsweepPolicy SingleTilePolicy; // Segmented policies typedef DownsweepPolicy SegmentedPolicy; typedef AltDownsweepPolicy AltSegmentedPolicy; }; /// SM20 struct Policy200 : ChainedPolicy<200, Policy200, Policy130> { enum { PRIMARY_RADIX_BITS = 5, ALT_RADIX_BITS = PRIMARY_RADIX_BITS - 1, }; // Keys-only upsweep policies typedef AgentRadixSortUpsweepPolicy <64, CUB_MAX(1, 18 / SCALE_FACTOR_4B), LOAD_DEFAULT, PRIMARY_RADIX_BITS> UpsweepPolicyKeys; typedef AgentRadixSortUpsweepPolicy <64, CUB_MAX(1, 18 / SCALE_FACTOR_4B), LOAD_DEFAULT, ALT_RADIX_BITS> AltUpsweepPolicyKeys; // Key-value pairs upsweep policies typedef AgentRadixSortUpsweepPolicy <128, CUB_MAX(1, 13 / SCALE_FACTOR_4B), LOAD_DEFAULT, PRIMARY_RADIX_BITS> UpsweepPolicyPairs; typedef AgentRadixSortUpsweepPolicy <128, CUB_MAX(1, 13 / SCALE_FACTOR_4B), LOAD_DEFAULT, ALT_RADIX_BITS> AltUpsweepPolicyPairs; // Upsweep policies typedef typename If::Type UpsweepPolicy; typedef typename If::Type AltUpsweepPolicy; // Scan policy typedef AgentScanPolicy <512, 4, BLOCK_LOAD_VECTORIZE, LOAD_DEFAULT, BLOCK_STORE_VECTORIZE, BLOCK_SCAN_RAKING_MEMOIZE> ScanPolicy; // Keys-only downsweep policies typedef AgentRadixSortDownsweepPolicy <64, CUB_MAX(1, 18 / SCALE_FACTOR_4B), BLOCK_LOAD_WARP_TRANSPOSE, LOAD_DEFAULT, RADIX_RANK_BASIC, BLOCK_SCAN_WARP_SCANS, PRIMARY_RADIX_BITS> DownsweepPolicyKeys; typedef AgentRadixSortDownsweepPolicy <64, CUB_MAX(1, 18 / SCALE_FACTOR_4B), BLOCK_LOAD_WARP_TRANSPOSE, LOAD_DEFAULT, RADIX_RANK_BASIC, BLOCK_SCAN_WARP_SCANS, ALT_RADIX_BITS> AltDownsweepPolicyKeys; // Key-value pairs downsweep policies typedef AgentRadixSortDownsweepPolicy <128, CUB_MAX(1, 13 / SCALE_FACTOR_4B), BLOCK_LOAD_WARP_TRANSPOSE, LOAD_DEFAULT, RADIX_RANK_BASIC, BLOCK_SCAN_WARP_SCANS, PRIMARY_RADIX_BITS> DownsweepPolicyPairs; typedef AgentRadixSortDownsweepPolicy <128, CUB_MAX(1, 13 / SCALE_FACTOR_4B), BLOCK_LOAD_WARP_TRANSPOSE, LOAD_DEFAULT, RADIX_RANK_BASIC, BLOCK_SCAN_WARP_SCANS, ALT_RADIX_BITS> AltDownsweepPolicyPairs; // Downsweep policies typedef typename If::Type DownsweepPolicy; typedef typename If::Type AltDownsweepPolicy; // Single-tile policy typedef DownsweepPolicy SingleTilePolicy; // Segmented policies typedef DownsweepPolicy SegmentedPolicy; typedef AltDownsweepPolicy AltSegmentedPolicy; }; /// SM30 struct Policy300 : ChainedPolicy<300, Policy300, Policy200> { enum { PRIMARY_RADIX_BITS = 5, ALT_RADIX_BITS = PRIMARY_RADIX_BITS - 1, }; // Keys-only upsweep policies typedef AgentRadixSortUpsweepPolicy <256, CUB_MAX(1, 7 / SCALE_FACTOR_4B), LOAD_DEFAULT, PRIMARY_RADIX_BITS> UpsweepPolicyKeys; typedef AgentRadixSortUpsweepPolicy <256, CUB_MAX(1, 7 / SCALE_FACTOR_4B), LOAD_DEFAULT, ALT_RADIX_BITS> AltUpsweepPolicyKeys; // Key-value pairs upsweep policies typedef AgentRadixSortUpsweepPolicy <256, CUB_MAX(1, 5 / SCALE_FACTOR_4B), LOAD_DEFAULT, PRIMARY_RADIX_BITS> UpsweepPolicyPairs; typedef AgentRadixSortUpsweepPolicy <256, CUB_MAX(1, 5 / SCALE_FACTOR_4B), LOAD_DEFAULT, ALT_RADIX_BITS> AltUpsweepPolicyPairs; // Upsweep policies typedef typename If::Type UpsweepPolicy; typedef typename If::Type AltUpsweepPolicy; // Scan policy typedef AgentScanPolicy <1024, 4, BLOCK_LOAD_VECTORIZE, LOAD_DEFAULT, BLOCK_STORE_VECTORIZE, BLOCK_SCAN_WARP_SCANS> ScanPolicy; // Keys-only downsweep policies typedef AgentRadixSortDownsweepPolicy <128, CUB_MAX(1, 14 / SCALE_FACTOR_4B), BLOCK_LOAD_WARP_TRANSPOSE, LOAD_DEFAULT, RADIX_RANK_BASIC, BLOCK_SCAN_WARP_SCANS, PRIMARY_RADIX_BITS> DownsweepPolicyKeys; typedef AgentRadixSortDownsweepPolicy <128, CUB_MAX(1, 14 / SCALE_FACTOR_4B), BLOCK_LOAD_WARP_TRANSPOSE, LOAD_DEFAULT, RADIX_RANK_BASIC, BLOCK_SCAN_WARP_SCANS, ALT_RADIX_BITS> AltDownsweepPolicyKeys; // Key-value pairs downsweep policies typedef AgentRadixSortDownsweepPolicy <128, CUB_MAX(1, 10 / SCALE_FACTOR_4B), BLOCK_LOAD_TRANSPOSE, LOAD_DEFAULT, RADIX_RANK_BASIC, BLOCK_SCAN_WARP_SCANS, PRIMARY_RADIX_BITS> DownsweepPolicyPairs; typedef AgentRadixSortDownsweepPolicy <128, CUB_MAX(1, 10 / SCALE_FACTOR_4B), BLOCK_LOAD_TRANSPOSE, LOAD_DEFAULT, RADIX_RANK_BASIC, BLOCK_SCAN_WARP_SCANS, ALT_RADIX_BITS> AltDownsweepPolicyPairs; // Downsweep policies typedef typename If::Type DownsweepPolicy; typedef typename If::Type AltDownsweepPolicy; // Single-tile policy typedef DownsweepPolicy SingleTilePolicy; // Segmented policies typedef DownsweepPolicy SegmentedPolicy; typedef AltDownsweepPolicy AltSegmentedPolicy; }; /// SM35 struct Policy350 : ChainedPolicy<350, Policy350, Policy300> { enum { PRIMARY_RADIX_BITS = 6, // 1.72B 32b keys/s, 1.17B 32b pairs/s, 1.55B 32b segmented keys/s (K40m) }; // Scan policy typedef AgentScanPolicy <1024, 4, BLOCK_LOAD_VECTORIZE, LOAD_DEFAULT, BLOCK_STORE_VECTORIZE, BLOCK_SCAN_WARP_SCANS> ScanPolicy; // Keys-only downsweep policies typedef AgentRadixSortDownsweepPolicy <128, CUB_MAX(1, 9 / SCALE_FACTOR_4B), BLOCK_LOAD_WARP_TRANSPOSE, LOAD_LDG, RADIX_RANK_MATCH, BLOCK_SCAN_WARP_SCANS, PRIMARY_RADIX_BITS> DownsweepPolicyKeys; typedef AgentRadixSortDownsweepPolicy <64, CUB_MAX(1, 18 / SCALE_FACTOR_4B), BLOCK_LOAD_DIRECT, LOAD_LDG, RADIX_RANK_MEMOIZE, BLOCK_SCAN_WARP_SCANS, PRIMARY_RADIX_BITS - 1> AltDownsweepPolicyKeys; // Key-value pairs downsweep policies typedef DownsweepPolicyKeys DownsweepPolicyPairs; typedef AgentRadixSortDownsweepPolicy <128, CUB_MAX(1, 15 / SCALE_FACTOR_4B), BLOCK_LOAD_DIRECT, LOAD_LDG, RADIX_RANK_MEMOIZE, BLOCK_SCAN_WARP_SCANS, PRIMARY_RADIX_BITS - 1> AltDownsweepPolicyPairs; // Downsweep policies typedef typename If::Type DownsweepPolicy; typedef typename If::Type AltDownsweepPolicy; // Upsweep policies typedef DownsweepPolicy UpsweepPolicy; typedef AltDownsweepPolicy AltUpsweepPolicy; // Single-tile policy typedef DownsweepPolicy SingleTilePolicy; // Segmented policies typedef DownsweepPolicy SegmentedPolicy; typedef AltDownsweepPolicy AltSegmentedPolicy; }; /// SM50 struct Policy500 : ChainedPolicy<500, Policy500, Policy350> { enum { PRIMARY_RADIX_BITS = 7, // 3.5B 32b keys/s, 1.92B 32b pairs/s (TitanX) SINGLE_TILE_RADIX_BITS = 6, SEGMENTED_RADIX_BITS = 6, // 3.1B 32b segmented keys/s (TitanX) }; // ScanPolicy typedef AgentScanPolicy <512, 23, BLOCK_LOAD_WARP_TRANSPOSE, LOAD_DEFAULT, BLOCK_STORE_WARP_TRANSPOSE, BLOCK_SCAN_RAKING_MEMOIZE> ScanPolicy; // Downsweep policies typedef AgentRadixSortDownsweepPolicy <160, CUB_MAX(1, 39 / SCALE_FACTOR_4B), BLOCK_LOAD_WARP_TRANSPOSE, LOAD_DEFAULT, RADIX_RANK_BASIC, BLOCK_SCAN_WARP_SCANS, PRIMARY_RADIX_BITS> DownsweepPolicy; typedef AgentRadixSortDownsweepPolicy <256, CUB_MAX(1, 16 / SCALE_FACTOR_4B), BLOCK_LOAD_DIRECT, LOAD_LDG, RADIX_RANK_MEMOIZE, BLOCK_SCAN_RAKING_MEMOIZE, PRIMARY_RADIX_BITS - 1> AltDownsweepPolicy; // Upsweep policies typedef DownsweepPolicy UpsweepPolicy; typedef AltDownsweepPolicy AltUpsweepPolicy; // Single-tile policy typedef AgentRadixSortDownsweepPolicy <256, CUB_MAX(1, 19 / SCALE_FACTOR_4B), BLOCK_LOAD_DIRECT, LOAD_LDG, RADIX_RANK_MEMOIZE, BLOCK_SCAN_WARP_SCANS, SINGLE_TILE_RADIX_BITS> SingleTilePolicy; // Segmented policies typedef AgentRadixSortDownsweepPolicy <192, CUB_MAX(1, 31 / SCALE_FACTOR_4B), BLOCK_LOAD_WARP_TRANSPOSE, LOAD_DEFAULT, RADIX_RANK_MEMOIZE, BLOCK_SCAN_WARP_SCANS, SEGMENTED_RADIX_BITS> SegmentedPolicy; typedef AgentRadixSortDownsweepPolicy <256, CUB_MAX(1, 11 / SCALE_FACTOR_4B), BLOCK_LOAD_WARP_TRANSPOSE, LOAD_DEFAULT, RADIX_RANK_MEMOIZE, BLOCK_SCAN_WARP_SCANS, SEGMENTED_RADIX_BITS - 1> AltSegmentedPolicy; }; /// SM60 (GP100) struct Policy600 : ChainedPolicy<600, Policy600, Policy500> { enum { PRIMARY_RADIX_BITS = 7, // 6.9B 32b keys/s (Quadro P100) SINGLE_TILE_RADIX_BITS = 6, SEGMENTED_RADIX_BITS = 6, // 5.9B 32b segmented keys/s (Quadro P100) }; // ScanPolicy typedef AgentScanPolicy <512, 23, BLOCK_LOAD_WARP_TRANSPOSE, LOAD_DEFAULT, BLOCK_STORE_WARP_TRANSPOSE, BLOCK_SCAN_RAKING_MEMOIZE> ScanPolicy; // Downsweep policies typedef AgentRadixSortDownsweepPolicy <256, CUB_MAX(1, 25 / SCALE_FACTOR_4B), BLOCK_LOAD_TRANSPOSE, LOAD_DEFAULT, RADIX_RANK_MATCH, BLOCK_SCAN_WARP_SCANS, PRIMARY_RADIX_BITS> DownsweepPolicy; typedef AgentRadixSortDownsweepPolicy <192, CUB_MAX(1, 39 / SCALE_FACTOR_4B), BLOCK_LOAD_TRANSPOSE, LOAD_DEFAULT, RADIX_RANK_MEMOIZE, BLOCK_SCAN_WARP_SCANS, PRIMARY_RADIX_BITS - 1> AltDownsweepPolicy; // Upsweep policies typedef DownsweepPolicy UpsweepPolicy; typedef AltDownsweepPolicy AltUpsweepPolicy; // Single-tile policy typedef AgentRadixSortDownsweepPolicy <256, CUB_MAX(1, 19 / SCALE_FACTOR_4B), BLOCK_LOAD_DIRECT, LOAD_LDG, RADIX_RANK_MEMOIZE, BLOCK_SCAN_WARP_SCANS, SINGLE_TILE_RADIX_BITS> SingleTilePolicy; // Segmented policies typedef AgentRadixSortDownsweepPolicy <192, CUB_MAX(1, 39 / SCALE_FACTOR_4B), BLOCK_LOAD_TRANSPOSE, LOAD_DEFAULT, RADIX_RANK_MEMOIZE, BLOCK_SCAN_WARP_SCANS, SEGMENTED_RADIX_BITS> SegmentedPolicy; typedef AgentRadixSortDownsweepPolicy <384, CUB_MAX(1, 11 / SCALE_FACTOR_4B), BLOCK_LOAD_TRANSPOSE, LOAD_DEFAULT, RADIX_RANK_MEMOIZE, BLOCK_SCAN_WARP_SCANS, SEGMENTED_RADIX_BITS - 1> AltSegmentedPolicy; }; /// SM61 (GP104) struct Policy610 : ChainedPolicy<610, Policy610, Policy600> { enum { PRIMARY_RADIX_BITS = 7, // 3.4B 32b keys/s, 1.83B 32b pairs/s (1080) SINGLE_TILE_RADIX_BITS = 6, SEGMENTED_RADIX_BITS = 6, // 3.3B 32b segmented keys/s (1080) }; // ScanPolicy typedef AgentScanPolicy <512, 23, BLOCK_LOAD_WARP_TRANSPOSE, LOAD_DEFAULT, BLOCK_STORE_WARP_TRANSPOSE, BLOCK_SCAN_RAKING_MEMOIZE> ScanPolicy; // Downsweep policies typedef AgentRadixSortDownsweepPolicy <384, CUB_MAX(1, 31 / SCALE_FACTOR_4B), BLOCK_LOAD_DIRECT, LOAD_DEFAULT, RADIX_RANK_MATCH, BLOCK_SCAN_RAKING_MEMOIZE, PRIMARY_RADIX_BITS> DownsweepPolicy; typedef AgentRadixSortDownsweepPolicy <256, CUB_MAX(1, 35 / SCALE_FACTOR_4B), BLOCK_LOAD_TRANSPOSE, LOAD_DEFAULT, RADIX_RANK_MEMOIZE, BLOCK_SCAN_RAKING_MEMOIZE, PRIMARY_RADIX_BITS - 1> AltDownsweepPolicy; // Upsweep policies typedef AgentRadixSortUpsweepPolicy <128, CUB_MAX(1, 16 / SCALE_FACTOR_4B), LOAD_LDG, PRIMARY_RADIX_BITS> UpsweepPolicy; typedef AgentRadixSortUpsweepPolicy <128, CUB_MAX(1, 16 / SCALE_FACTOR_4B), LOAD_LDG, PRIMARY_RADIX_BITS - 1> AltUpsweepPolicy; // Single-tile policy typedef AgentRadixSortDownsweepPolicy <256, CUB_MAX(1, 19 / SCALE_FACTOR_4B), BLOCK_LOAD_DIRECT, LOAD_LDG, RADIX_RANK_MEMOIZE, BLOCK_SCAN_WARP_SCANS, SINGLE_TILE_RADIX_BITS> SingleTilePolicy; // Segmented policies typedef AgentRadixSortDownsweepPolicy <192, CUB_MAX(1, 39 / SCALE_FACTOR_4B), BLOCK_LOAD_TRANSPOSE, LOAD_DEFAULT, RADIX_RANK_MEMOIZE, BLOCK_SCAN_WARP_SCANS, SEGMENTED_RADIX_BITS> SegmentedPolicy; typedef AgentRadixSortDownsweepPolicy <384, CUB_MAX(1, 11 / SCALE_FACTOR_4B), BLOCK_LOAD_TRANSPOSE, LOAD_DEFAULT, RADIX_RANK_MEMOIZE, BLOCK_SCAN_WARP_SCANS, SEGMENTED_RADIX_BITS - 1> AltSegmentedPolicy; }; /// SM62 (Tegra, less RF) struct Policy620 : ChainedPolicy<620, Policy620, Policy610> { enum { PRIMARY_RADIX_BITS = 5, ALT_RADIX_BITS = PRIMARY_RADIX_BITS - 1, }; // ScanPolicy typedef AgentScanPolicy <512, 23, BLOCK_LOAD_WARP_TRANSPOSE, LOAD_DEFAULT, BLOCK_STORE_WARP_TRANSPOSE, BLOCK_SCAN_RAKING_MEMOIZE> ScanPolicy; // Downsweep policies typedef AgentRadixSortDownsweepPolicy <256, CUB_MAX(1, 16 / SCALE_FACTOR_4B), BLOCK_LOAD_DIRECT, LOAD_DEFAULT, RADIX_RANK_MEMOIZE, BLOCK_SCAN_RAKING_MEMOIZE, PRIMARY_RADIX_BITS> DownsweepPolicy; typedef AgentRadixSortDownsweepPolicy <256, CUB_MAX(1, 16 / SCALE_FACTOR_4B), BLOCK_LOAD_DIRECT, LOAD_DEFAULT, RADIX_RANK_MEMOIZE, BLOCK_SCAN_RAKING_MEMOIZE, ALT_RADIX_BITS> AltDownsweepPolicy; // Upsweep policies typedef DownsweepPolicy UpsweepPolicy; typedef AltDownsweepPolicy AltUpsweepPolicy; // Single-tile policy typedef AgentRadixSortDownsweepPolicy <256, CUB_MAX(1, 19 / SCALE_FACTOR_4B), BLOCK_LOAD_DIRECT, LOAD_LDG, RADIX_RANK_MEMOIZE, BLOCK_SCAN_WARP_SCANS, PRIMARY_RADIX_BITS> SingleTilePolicy; // Segmented policies typedef DownsweepPolicy SegmentedPolicy; typedef AltDownsweepPolicy AltSegmentedPolicy; }; /// SM70 (GV100) struct Policy700 : ChainedPolicy<700, Policy700, Policy620> { enum { PRIMARY_RADIX_BITS = 6, // 7.62B 32b keys/s (GV100) SINGLE_TILE_RADIX_BITS = 6, SEGMENTED_RADIX_BITS = 6, // 8.7B 32b segmented keys/s (GV100) }; // ScanPolicy typedef AgentScanPolicy <512, 23, BLOCK_LOAD_WARP_TRANSPOSE, LOAD_DEFAULT, BLOCK_STORE_WARP_TRANSPOSE, BLOCK_SCAN_RAKING_MEMOIZE> ScanPolicy; // Downsweep policies typedef AgentRadixSortDownsweepPolicy <256, CUB_MAX(1, 47 / SCALE_FACTOR_4B), BLOCK_LOAD_TRANSPOSE, LOAD_DEFAULT, RADIX_RANK_MEMOIZE, BLOCK_SCAN_WARP_SCANS, PRIMARY_RADIX_BITS> DownsweepPolicy; typedef AgentRadixSortDownsweepPolicy <384, CUB_MAX(1, 29 / SCALE_FACTOR_4B), BLOCK_LOAD_TRANSPOSE, LOAD_DEFAULT, RADIX_RANK_MEMOIZE, BLOCK_SCAN_WARP_SCANS, PRIMARY_RADIX_BITS - 1> AltDownsweepPolicy; // Upsweep policies typedef AgentRadixSortDownsweepPolicy <128, CUB_MAX(1, 47 / SCALE_FACTOR_4B), BLOCK_LOAD_TRANSPOSE, LOAD_DEFAULT, RADIX_RANK_MATCH, BLOCK_SCAN_WARP_SCANS, PRIMARY_RADIX_BITS> UpsweepPolicy; typedef AgentRadixSortDownsweepPolicy <128, CUB_MAX(1, 29 / SCALE_FACTOR_4B), BLOCK_LOAD_TRANSPOSE, LOAD_DEFAULT, RADIX_RANK_MATCH, BLOCK_SCAN_WARP_SCANS, PRIMARY_RADIX_BITS - 1> AltUpsweepPolicy; // Single-tile policy typedef AgentRadixSortDownsweepPolicy <256, CUB_MAX(1, 19 / SCALE_FACTOR_4B), BLOCK_LOAD_DIRECT, LOAD_LDG, RADIX_RANK_MEMOIZE, BLOCK_SCAN_WARP_SCANS, SINGLE_TILE_RADIX_BITS> SingleTilePolicy; // Segmented policies typedef AgentRadixSortDownsweepPolicy <192, CUB_MAX(1, 39 / SCALE_FACTOR_4B), BLOCK_LOAD_TRANSPOSE, LOAD_DEFAULT, RADIX_RANK_MEMOIZE, BLOCK_SCAN_WARP_SCANS, SEGMENTED_RADIX_BITS> SegmentedPolicy; typedef AgentRadixSortDownsweepPolicy <384, CUB_MAX(1, 11 / SCALE_FACTOR_4B), BLOCK_LOAD_TRANSPOSE, LOAD_DEFAULT, RADIX_RANK_MEMOIZE, BLOCK_SCAN_WARP_SCANS, SEGMENTED_RADIX_BITS - 1> AltSegmentedPolicy; }; /// MaxPolicy typedef Policy700 MaxPolicy; }; /****************************************************************************** * Single-problem dispatch ******************************************************************************/ /** * Utility class for dispatching the appropriately-tuned kernels for device-wide radix sort */ template < bool IS_DESCENDING, ///< Whether or not the sorted-order is high-to-low typename KeyT, ///< Key type typename ValueT, ///< Value type typename OffsetT> ///< Signed integer type for global offsets struct DispatchRadixSort : DeviceRadixSortPolicy { //------------------------------------------------------------------------------ // Constants //------------------------------------------------------------------------------ enum { // Whether this is a keys-only (or key-value) sort KEYS_ONLY = (Equals::VALUE), }; //------------------------------------------------------------------------------ // Problem state //------------------------------------------------------------------------------ void *d_temp_storage; ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t &temp_storage_bytes; ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation DoubleBuffer &d_keys; ///< [in,out] Double-buffer whose current buffer contains the unsorted input keys and, upon return, is updated to point to the sorted output keys DoubleBuffer &d_values; ///< [in,out] Double-buffer whose current buffer contains the unsorted input values and, upon return, is updated to point to the sorted output values OffsetT num_items; ///< [in] Number of items to sort int begin_bit; ///< [in] The beginning (least-significant) bit index needed for key comparison int end_bit; ///< [in] The past-the-end (most-significant) bit index needed for key comparison cudaStream_t stream; ///< [in] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous; ///< [in] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is \p false. int ptx_version; ///< [in] PTX version bool is_overwrite_okay; ///< [in] Whether is okay to overwrite source buffers //------------------------------------------------------------------------------ // Constructor //------------------------------------------------------------------------------ /// Constructor CUB_RUNTIME_FUNCTION __forceinline__ DispatchRadixSort( void* d_temp_storage, size_t &temp_storage_bytes, DoubleBuffer &d_keys, DoubleBuffer &d_values, OffsetT num_items, int begin_bit, int end_bit, bool is_overwrite_okay, cudaStream_t stream, bool debug_synchronous, int ptx_version) : d_temp_storage(d_temp_storage), temp_storage_bytes(temp_storage_bytes), d_keys(d_keys), d_values(d_values), num_items(num_items), begin_bit(begin_bit), end_bit(end_bit), stream(stream), debug_synchronous(debug_synchronous), ptx_version(ptx_version), is_overwrite_okay(is_overwrite_okay) {} //------------------------------------------------------------------------------ // Small-problem (single tile) invocation //------------------------------------------------------------------------------ /// Invoke a single block to sort in-core template < typename ActivePolicyT, ///< Umbrella policy active for the target device typename SingleTileKernelT> ///< Function type of cub::DeviceRadixSortSingleTileKernel CUB_RUNTIME_FUNCTION __forceinline__ cudaError_t InvokeSingleTile( SingleTileKernelT single_tile_kernel) ///< [in] Kernel function pointer to parameterization of cub::DeviceRadixSortSingleTileKernel { #ifndef CUB_RUNTIME_ENABLED (void)single_tile_kernel; // Kernel launch not supported from this device return CubDebug(cudaErrorNotSupported ); #else cudaError error = cudaSuccess; do { // Return if the caller is simply requesting the size of the storage allocation if (d_temp_storage == NULL) { temp_storage_bytes = 1; break; } // Return if empty problem if (num_items == 0) break; // Log single_tile_kernel configuration if (debug_synchronous) _CubLog("Invoking single_tile_kernel<<<%d, %d, 0, %lld>>>(), %d items per thread, %d SM occupancy, current bit %d, bit_grain %d\n", 1, ActivePolicyT::SingleTilePolicy::BLOCK_THREADS, (long long) stream, ActivePolicyT::SingleTilePolicy::ITEMS_PER_THREAD, 1, begin_bit, ActivePolicyT::SingleTilePolicy::RADIX_BITS); // Invoke upsweep_kernel with same grid size as downsweep_kernel single_tile_kernel<<<1, ActivePolicyT::SingleTilePolicy::BLOCK_THREADS, 0, stream>>>( d_keys.Current(), d_keys.Alternate(), d_values.Current(), d_values.Alternate(), num_items, begin_bit, end_bit); // Check for failure to launch if (CubDebug(error = cudaPeekAtLastError())) break; // Sync the stream if specified to flush runtime errors if (debug_synchronous && (CubDebug(error = SyncStream(stream)))) break; // Update selector d_keys.selector ^= 1; d_values.selector ^= 1; } while (0); return error; #endif // CUB_RUNTIME_ENABLED } //------------------------------------------------------------------------------ // Normal problem size invocation //------------------------------------------------------------------------------ /** * Invoke a three-kernel sorting pass at the current bit. */ template CUB_RUNTIME_FUNCTION __forceinline__ cudaError_t InvokePass( const KeyT *d_keys_in, KeyT *d_keys_out, const ValueT *d_values_in, ValueT *d_values_out, OffsetT *d_spine, int spine_length, int ¤t_bit, PassConfigT &pass_config) { cudaError error = cudaSuccess; do { int pass_bits = CUB_MIN(pass_config.radix_bits, (end_bit - current_bit)); // Log upsweep_kernel configuration if (debug_synchronous) _CubLog("Invoking upsweep_kernel<<<%d, %d, 0, %lld>>>(), %d items per thread, %d SM occupancy, current bit %d, bit_grain %d\n", pass_config.even_share.grid_size, pass_config.upsweep_config.block_threads, (long long) stream, pass_config.upsweep_config.items_per_thread, pass_config.upsweep_config.sm_occupancy, current_bit, pass_bits); // Invoke upsweep_kernel with same grid size as downsweep_kernel pass_config.upsweep_kernel<<>>( d_keys_in, d_spine, num_items, current_bit, pass_bits, pass_config.even_share); // Check for failure to launch if (CubDebug(error = cudaPeekAtLastError())) break; // Sync the stream if specified to flush runtime errors if (debug_synchronous && (CubDebug(error = SyncStream(stream)))) break; // Log scan_kernel configuration if (debug_synchronous) _CubLog("Invoking scan_kernel<<<%d, %d, 0, %lld>>>(), %d items per thread\n", 1, pass_config.scan_config.block_threads, (long long) stream, pass_config.scan_config.items_per_thread); // Invoke scan_kernel pass_config.scan_kernel<<<1, pass_config.scan_config.block_threads, 0, stream>>>( d_spine, spine_length); // Check for failure to launch if (CubDebug(error = cudaPeekAtLastError())) break; // Sync the stream if specified to flush runtime errors if (debug_synchronous && (CubDebug(error = SyncStream(stream)))) break; // Log downsweep_kernel configuration if (debug_synchronous) _CubLog("Invoking downsweep_kernel<<<%d, %d, 0, %lld>>>(), %d items per thread, %d SM occupancy\n", pass_config.even_share.grid_size, pass_config.downsweep_config.block_threads, (long long) stream, pass_config.downsweep_config.items_per_thread, pass_config.downsweep_config.sm_occupancy); // Invoke downsweep_kernel pass_config.downsweep_kernel<<>>( d_keys_in, d_keys_out, d_values_in, d_values_out, d_spine, num_items, current_bit, pass_bits, pass_config.even_share); // Check for failure to launch if (CubDebug(error = cudaPeekAtLastError())) break; // Sync the stream if specified to flush runtime errors if (debug_synchronous && (CubDebug(error = SyncStream(stream)))) break; // Update current bit current_bit += pass_bits; } while (0); return error; } /// Pass configuration structure template < typename UpsweepKernelT, typename ScanKernelT, typename DownsweepKernelT> struct PassConfig { UpsweepKernelT upsweep_kernel; KernelConfig upsweep_config; ScanKernelT scan_kernel; KernelConfig scan_config; DownsweepKernelT downsweep_kernel; KernelConfig downsweep_config; int radix_bits; int radix_digits; int max_downsweep_grid_size; GridEvenShare even_share; /// Initialize pass configuration template < typename UpsweepPolicyT, typename ScanPolicyT, typename DownsweepPolicyT> CUB_RUNTIME_FUNCTION __forceinline__ cudaError_t InitPassConfig( UpsweepKernelT upsweep_kernel, ScanKernelT scan_kernel, DownsweepKernelT downsweep_kernel, int ptx_version, int sm_count, int num_items) { cudaError error = cudaSuccess; do { this->upsweep_kernel = upsweep_kernel; this->scan_kernel = scan_kernel; this->downsweep_kernel = downsweep_kernel; radix_bits = DownsweepPolicyT::RADIX_BITS; radix_digits = 1 << radix_bits; if (CubDebug(error = upsweep_config.Init(upsweep_kernel))) break; if (CubDebug(error = scan_config.Init(scan_kernel))) break; if (CubDebug(error = downsweep_config.Init(downsweep_kernel))) break; max_downsweep_grid_size = (downsweep_config.sm_occupancy * sm_count) * CUB_SUBSCRIPTION_FACTOR(ptx_version); even_share.DispatchInit( num_items, max_downsweep_grid_size, CUB_MAX(downsweep_config.tile_size, upsweep_config.tile_size)); } while (0); return error; } }; /// Invocation (run multiple digit passes) template < typename ActivePolicyT, ///< Umbrella policy active for the target device typename UpsweepKernelT, ///< Function type of cub::DeviceRadixSortUpsweepKernel typename ScanKernelT, ///< Function type of cub::SpineScanKernel typename DownsweepKernelT> ///< Function type of cub::DeviceRadixSortDownsweepKernel CUB_RUNTIME_FUNCTION __forceinline__ cudaError_t InvokePasses( UpsweepKernelT upsweep_kernel, ///< [in] Kernel function pointer to parameterization of cub::DeviceRadixSortUpsweepKernel UpsweepKernelT alt_upsweep_kernel, ///< [in] Alternate kernel function pointer to parameterization of cub::DeviceRadixSortUpsweepKernel ScanKernelT scan_kernel, ///< [in] Kernel function pointer to parameterization of cub::SpineScanKernel DownsweepKernelT downsweep_kernel, ///< [in] Kernel function pointer to parameterization of cub::DeviceRadixSortDownsweepKernel DownsweepKernelT alt_downsweep_kernel) ///< [in] Alternate kernel function pointer to parameterization of cub::DeviceRadixSortDownsweepKernel { #ifndef CUB_RUNTIME_ENABLED (void)upsweep_kernel; (void)alt_upsweep_kernel; (void)scan_kernel; (void)downsweep_kernel; (void)alt_downsweep_kernel; // Kernel launch not supported from this device return CubDebug(cudaErrorNotSupported ); #else cudaError error = cudaSuccess; do { // Get device ordinal int device_ordinal; if (CubDebug(error = cudaGetDevice(&device_ordinal))) break; // Get SM count int sm_count; if (CubDebug(error = cudaDeviceGetAttribute (&sm_count, cudaDevAttrMultiProcessorCount, device_ordinal))) break; // Init regular and alternate-digit kernel configurations PassConfig pass_config, alt_pass_config; if ((error = pass_config.template InitPassConfig< typename ActivePolicyT::UpsweepPolicy, typename ActivePolicyT::ScanPolicy, typename ActivePolicyT::DownsweepPolicy>( upsweep_kernel, scan_kernel, downsweep_kernel, ptx_version, sm_count, num_items))) break; if ((error = alt_pass_config.template InitPassConfig< typename ActivePolicyT::AltUpsweepPolicy, typename ActivePolicyT::ScanPolicy, typename ActivePolicyT::AltDownsweepPolicy>( alt_upsweep_kernel, scan_kernel, alt_downsweep_kernel, ptx_version, sm_count, num_items))) break; // Get maximum spine length int max_grid_size = CUB_MAX(pass_config.max_downsweep_grid_size, alt_pass_config.max_downsweep_grid_size); int spine_length = (max_grid_size * pass_config.radix_digits) + pass_config.scan_config.tile_size; // Temporary storage allocation requirements void* allocations[3]; size_t allocation_sizes[3] = { spine_length * sizeof(OffsetT), // bytes needed for privatized block digit histograms (is_overwrite_okay) ? 0 : num_items * sizeof(KeyT), // bytes needed for 3rd keys buffer (is_overwrite_okay || (KEYS_ONLY)) ? 0 : num_items * sizeof(ValueT), // bytes needed for 3rd values buffer }; // Alias the temporary allocations from the single storage blob (or compute the necessary size of the blob) if (CubDebug(error = AliasTemporaries(d_temp_storage, temp_storage_bytes, allocations, allocation_sizes))) break; // Return if the caller is simply requesting the size of the storage allocation if (d_temp_storage == NULL) return cudaSuccess; // Pass planning. Run passes of the alternate digit-size configuration until we have an even multiple of our preferred digit size int num_bits = end_bit - begin_bit; int num_passes = (num_bits + pass_config.radix_bits - 1) / pass_config.radix_bits; bool is_num_passes_odd = num_passes & 1; int max_alt_passes = (num_passes * pass_config.radix_bits) - num_bits; int alt_end_bit = CUB_MIN(end_bit, begin_bit + (max_alt_passes * alt_pass_config.radix_bits)); // Alias the temporary storage allocations OffsetT *d_spine = static_cast(allocations[0]); DoubleBuffer d_keys_remaining_passes( (is_overwrite_okay || is_num_passes_odd) ? d_keys.Alternate() : static_cast(allocations[1]), (is_overwrite_okay) ? d_keys.Current() : (is_num_passes_odd) ? static_cast(allocations[1]) : d_keys.Alternate()); DoubleBuffer d_values_remaining_passes( (is_overwrite_okay || is_num_passes_odd) ? d_values.Alternate() : static_cast(allocations[2]), (is_overwrite_okay) ? d_values.Current() : (is_num_passes_odd) ? static_cast(allocations[2]) : d_values.Alternate()); // Run first pass, consuming from the input's current buffers int current_bit = begin_bit; if (CubDebug(error = InvokePass( d_keys.Current(), d_keys_remaining_passes.Current(), d_values.Current(), d_values_remaining_passes.Current(), d_spine, spine_length, current_bit, (current_bit < alt_end_bit) ? alt_pass_config : pass_config))) break; // Run remaining passes while (current_bit < end_bit) { if (CubDebug(error = InvokePass( d_keys_remaining_passes.d_buffers[d_keys_remaining_passes.selector], d_keys_remaining_passes.d_buffers[d_keys_remaining_passes.selector ^ 1], d_values_remaining_passes.d_buffers[d_keys_remaining_passes.selector], d_values_remaining_passes.d_buffers[d_keys_remaining_passes.selector ^ 1], d_spine, spine_length, current_bit, (current_bit < alt_end_bit) ? alt_pass_config : pass_config))) break;; // Invert selectors d_keys_remaining_passes.selector ^= 1; d_values_remaining_passes.selector ^= 1; } // Update selector if (!is_overwrite_okay) { num_passes = 1; // Sorted data always ends up in the other vector } d_keys.selector = (d_keys.selector + num_passes) & 1; d_values.selector = (d_values.selector + num_passes) & 1; } while (0); return error; #endif // CUB_RUNTIME_ENABLED } //------------------------------------------------------------------------------ // Chained policy invocation //------------------------------------------------------------------------------ /// Invocation template CUB_RUNTIME_FUNCTION __forceinline__ cudaError_t Invoke() { typedef typename DispatchRadixSort::MaxPolicy MaxPolicyT; typedef typename ActivePolicyT::SingleTilePolicy SingleTilePolicyT; // Force kernel code-generation in all compiler passes if (num_items <= (SingleTilePolicyT::BLOCK_THREADS * SingleTilePolicyT::ITEMS_PER_THREAD)) { // Small, single tile size return InvokeSingleTile( DeviceRadixSortSingleTileKernel); } else { // Regular size return InvokePasses( DeviceRadixSortUpsweepKernel< MaxPolicyT, false, IS_DESCENDING, KeyT, OffsetT>, DeviceRadixSortUpsweepKernel< MaxPolicyT, true, IS_DESCENDING, KeyT, OffsetT>, RadixSortScanBinsKernel< MaxPolicyT, OffsetT>, DeviceRadixSortDownsweepKernel< MaxPolicyT, false, IS_DESCENDING, KeyT, ValueT, OffsetT>, DeviceRadixSortDownsweepKernel< MaxPolicyT, true, IS_DESCENDING, KeyT, ValueT, OffsetT>); } } //------------------------------------------------------------------------------ // Dispatch entrypoints //------------------------------------------------------------------------------ /** * Internal dispatch routine */ CUB_RUNTIME_FUNCTION __forceinline__ static cudaError_t Dispatch( void* d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t &temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation DoubleBuffer &d_keys, ///< [in,out] Double-buffer whose current buffer contains the unsorted input keys and, upon return, is updated to point to the sorted output keys DoubleBuffer &d_values, ///< [in,out] Double-buffer whose current buffer contains the unsorted input values and, upon return, is updated to point to the sorted output values OffsetT num_items, ///< [in] Number of items to sort int begin_bit, ///< [in] The beginning (least-significant) bit index needed for key comparison int end_bit, ///< [in] The past-the-end (most-significant) bit index needed for key comparison bool is_overwrite_okay, ///< [in] Whether is okay to overwrite source buffers cudaStream_t stream, ///< [in] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous) ///< [in] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is \p false. { typedef typename DispatchRadixSort::MaxPolicy MaxPolicyT; cudaError_t error; do { // Get PTX version int ptx_version; if (CubDebug(error = PtxVersion(ptx_version))) break; // Create dispatch functor DispatchRadixSort dispatch( d_temp_storage, temp_storage_bytes, d_keys, d_values, num_items, begin_bit, end_bit, is_overwrite_okay, stream, debug_synchronous, ptx_version); // Dispatch to chained policy if (CubDebug(error = MaxPolicyT::Invoke(ptx_version, dispatch))) break; } while (0); return error; } }; /****************************************************************************** * Segmented dispatch ******************************************************************************/ /** * Utility class for dispatching the appropriately-tuned kernels for segmented device-wide radix sort */ template < bool IS_DESCENDING, ///< Whether or not the sorted-order is high-to-low typename KeyT, ///< Key type typename ValueT, ///< Value type typename OffsetIteratorT, ///< Random-access input iterator type for reading segment offsets \iterator typename OffsetT> ///< Signed integer type for global offsets struct DispatchSegmentedRadixSort : DeviceRadixSortPolicy { //------------------------------------------------------------------------------ // Constants //------------------------------------------------------------------------------ enum { // Whether this is a keys-only (or key-value) sort KEYS_ONLY = (Equals::VALUE), }; //------------------------------------------------------------------------------ // Parameter members //------------------------------------------------------------------------------ void *d_temp_storage; ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t &temp_storage_bytes; ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation DoubleBuffer &d_keys; ///< [in,out] Double-buffer whose current buffer contains the unsorted input keys and, upon return, is updated to point to the sorted output keys DoubleBuffer &d_values; ///< [in,out] Double-buffer whose current buffer contains the unsorted input values and, upon return, is updated to point to the sorted output values OffsetT num_items; ///< [in] Number of items to sort OffsetT num_segments; ///< [in] The number of segments that comprise the sorting data OffsetIteratorT d_begin_offsets; ///< [in] Pointer to the sequence of beginning offsets of length \p num_segments, such that d_begin_offsets[i] is the first element of the ith data segment in d_keys_* and d_values_* OffsetIteratorT d_end_offsets; ///< [in] Pointer to the sequence of ending offsets of length \p num_segments, such that d_end_offsets[i]-1 is the last element of the ith data segment in d_keys_* and d_values_*. If d_end_offsets[i]-1 <= d_begin_offsets[i], the ith is considered empty. int begin_bit; ///< [in] The beginning (least-significant) bit index needed for key comparison int end_bit; ///< [in] The past-the-end (most-significant) bit index needed for key comparison cudaStream_t stream; ///< [in] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous; ///< [in] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is \p false. int ptx_version; ///< [in] PTX version bool is_overwrite_okay; ///< [in] Whether is okay to overwrite source buffers //------------------------------------------------------------------------------ // Constructors //------------------------------------------------------------------------------ /// Constructor CUB_RUNTIME_FUNCTION __forceinline__ DispatchSegmentedRadixSort( void* d_temp_storage, size_t &temp_storage_bytes, DoubleBuffer &d_keys, DoubleBuffer &d_values, OffsetT num_items, OffsetT num_segments, OffsetIteratorT d_begin_offsets, OffsetIteratorT d_end_offsets, int begin_bit, int end_bit, bool is_overwrite_okay, cudaStream_t stream, bool debug_synchronous, int ptx_version) : d_temp_storage(d_temp_storage), temp_storage_bytes(temp_storage_bytes), d_keys(d_keys), d_values(d_values), num_items(num_items), num_segments(num_segments), d_begin_offsets(d_begin_offsets), d_end_offsets(d_end_offsets), begin_bit(begin_bit), end_bit(end_bit), is_overwrite_okay(is_overwrite_okay), stream(stream), debug_synchronous(debug_synchronous), ptx_version(ptx_version) {} //------------------------------------------------------------------------------ // Multi-segment invocation //------------------------------------------------------------------------------ /// Invoke a three-kernel sorting pass at the current bit. template CUB_RUNTIME_FUNCTION __forceinline__ cudaError_t InvokePass( const KeyT *d_keys_in, KeyT *d_keys_out, const ValueT *d_values_in, ValueT *d_values_out, int ¤t_bit, PassConfigT &pass_config) { cudaError error = cudaSuccess; do { int pass_bits = CUB_MIN(pass_config.radix_bits, (end_bit - current_bit)); // Log kernel configuration if (debug_synchronous) _CubLog("Invoking segmented_kernels<<<%d, %d, 0, %lld>>>(), %d items per thread, %d SM occupancy, current bit %d, bit_grain %d\n", num_segments, pass_config.segmented_config.block_threads, (long long) stream, pass_config.segmented_config.items_per_thread, pass_config.segmented_config.sm_occupancy, current_bit, pass_bits); pass_config.segmented_kernel<<>>( d_keys_in, d_keys_out, d_values_in, d_values_out, d_begin_offsets, d_end_offsets, num_segments, current_bit, pass_bits); // Check for failure to launch if (CubDebug(error = cudaPeekAtLastError())) break; // Sync the stream if specified to flush runtime errors if (debug_synchronous && (CubDebug(error = SyncStream(stream)))) break; // Update current bit current_bit += pass_bits; } while (0); return error; } /// PassConfig data structure template struct PassConfig { SegmentedKernelT segmented_kernel; KernelConfig segmented_config; int radix_bits; int radix_digits; /// Initialize pass configuration template CUB_RUNTIME_FUNCTION __forceinline__ cudaError_t InitPassConfig(SegmentedKernelT segmented_kernel) { this->segmented_kernel = segmented_kernel; this->radix_bits = SegmentedPolicyT::RADIX_BITS; this->radix_digits = 1 << radix_bits; return CubDebug(segmented_config.Init(segmented_kernel)); } }; /// Invocation (run multiple digit passes) template < typename ActivePolicyT, ///< Umbrella policy active for the target device typename SegmentedKernelT> ///< Function type of cub::DeviceSegmentedRadixSortKernel CUB_RUNTIME_FUNCTION __forceinline__ cudaError_t InvokePasses( SegmentedKernelT segmented_kernel, ///< [in] Kernel function pointer to parameterization of cub::DeviceSegmentedRadixSortKernel SegmentedKernelT alt_segmented_kernel) ///< [in] Alternate kernel function pointer to parameterization of cub::DeviceSegmentedRadixSortKernel { #ifndef CUB_RUNTIME_ENABLED (void)segmented_kernel; (void)alt_segmented_kernel; // Kernel launch not supported from this device return CubDebug(cudaErrorNotSupported ); #else cudaError error = cudaSuccess; do { // Init regular and alternate kernel configurations PassConfig pass_config, alt_pass_config; if ((error = pass_config.template InitPassConfig(segmented_kernel))) break; if ((error = alt_pass_config.template InitPassConfig(alt_segmented_kernel))) break; // Temporary storage allocation requirements void* allocations[2]; size_t allocation_sizes[2] = { (is_overwrite_okay) ? 0 : num_items * sizeof(KeyT), // bytes needed for 3rd keys buffer (is_overwrite_okay || (KEYS_ONLY)) ? 0 : num_items * sizeof(ValueT), // bytes needed for 3rd values buffer }; // Alias the temporary allocations from the single storage blob (or compute the necessary size of the blob) if (CubDebug(error = AliasTemporaries(d_temp_storage, temp_storage_bytes, allocations, allocation_sizes))) break; // Return if the caller is simply requesting the size of the storage allocation if (d_temp_storage == NULL) { if (temp_storage_bytes == 0) temp_storage_bytes = 1; return cudaSuccess; } // Pass planning. Run passes of the alternate digit-size configuration until we have an even multiple of our preferred digit size int radix_bits = ActivePolicyT::SegmentedPolicy::RADIX_BITS; int alt_radix_bits = ActivePolicyT::AltSegmentedPolicy::RADIX_BITS; int num_bits = end_bit - begin_bit; int num_passes = (num_bits + radix_bits - 1) / radix_bits; bool is_num_passes_odd = num_passes & 1; int max_alt_passes = (num_passes * radix_bits) - num_bits; int alt_end_bit = CUB_MIN(end_bit, begin_bit + (max_alt_passes * alt_radix_bits)); DoubleBuffer d_keys_remaining_passes( (is_overwrite_okay || is_num_passes_odd) ? d_keys.Alternate() : static_cast(allocations[0]), (is_overwrite_okay) ? d_keys.Current() : (is_num_passes_odd) ? static_cast(allocations[0]) : d_keys.Alternate()); DoubleBuffer d_values_remaining_passes( (is_overwrite_okay || is_num_passes_odd) ? d_values.Alternate() : static_cast(allocations[1]), (is_overwrite_okay) ? d_values.Current() : (is_num_passes_odd) ? static_cast(allocations[1]) : d_values.Alternate()); // Run first pass, consuming from the input's current buffers int current_bit = begin_bit; if (CubDebug(error = InvokePass( d_keys.Current(), d_keys_remaining_passes.Current(), d_values.Current(), d_values_remaining_passes.Current(), current_bit, (current_bit < alt_end_bit) ? alt_pass_config : pass_config))) break; // Run remaining passes while (current_bit < end_bit) { if (CubDebug(error = InvokePass( d_keys_remaining_passes.d_buffers[d_keys_remaining_passes.selector], d_keys_remaining_passes.d_buffers[d_keys_remaining_passes.selector ^ 1], d_values_remaining_passes.d_buffers[d_keys_remaining_passes.selector], d_values_remaining_passes.d_buffers[d_keys_remaining_passes.selector ^ 1], current_bit, (current_bit < alt_end_bit) ? alt_pass_config : pass_config))) break; // Invert selectors and update current bit d_keys_remaining_passes.selector ^= 1; d_values_remaining_passes.selector ^= 1; } // Update selector if (!is_overwrite_okay) { num_passes = 1; // Sorted data always ends up in the other vector } d_keys.selector = (d_keys.selector + num_passes) & 1; d_values.selector = (d_values.selector + num_passes) & 1; } while (0); return error; #endif // CUB_RUNTIME_ENABLED } //------------------------------------------------------------------------------ // Chained policy invocation //------------------------------------------------------------------------------ /// Invocation template CUB_RUNTIME_FUNCTION __forceinline__ cudaError_t Invoke() { typedef typename DispatchSegmentedRadixSort::MaxPolicy MaxPolicyT; // Force kernel code-generation in all compiler passes return InvokePasses( DeviceSegmentedRadixSortKernel, DeviceSegmentedRadixSortKernel); } //------------------------------------------------------------------------------ // Dispatch entrypoints //------------------------------------------------------------------------------ /// Internal dispatch routine CUB_RUNTIME_FUNCTION __forceinline__ static cudaError_t Dispatch( void* d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t &temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation DoubleBuffer &d_keys, ///< [in,out] Double-buffer whose current buffer contains the unsorted input keys and, upon return, is updated to point to the sorted output keys DoubleBuffer &d_values, ///< [in,out] Double-buffer whose current buffer contains the unsorted input values and, upon return, is updated to point to the sorted output values int num_items, ///< [in] Number of items to sort int num_segments, ///< [in] The number of segments that comprise the sorting data OffsetIteratorT d_begin_offsets, ///< [in] Pointer to the sequence of beginning offsets of length \p num_segments, such that d_begin_offsets[i] is the first element of the ith data segment in d_keys_* and d_values_* OffsetIteratorT d_end_offsets, ///< [in] Pointer to the sequence of ending offsets of length \p num_segments, such that d_end_offsets[i]-1 is the last element of the ith data segment in d_keys_* and d_values_*. If d_end_offsets[i]-1 <= d_begin_offsets[i], the ith is considered empty. int begin_bit, ///< [in] The beginning (least-significant) bit index needed for key comparison int end_bit, ///< [in] The past-the-end (most-significant) bit index needed for key comparison bool is_overwrite_okay, ///< [in] Whether is okay to overwrite source buffers cudaStream_t stream, ///< [in] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous) ///< [in] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is \p false. { typedef typename DispatchSegmentedRadixSort::MaxPolicy MaxPolicyT; cudaError_t error; do { // Get PTX version int ptx_version; if (CubDebug(error = PtxVersion(ptx_version))) break; // Create dispatch functor DispatchSegmentedRadixSort dispatch( d_temp_storage, temp_storage_bytes, d_keys, d_values, num_items, num_segments, d_begin_offsets, d_end_offsets, begin_bit, end_bit, is_overwrite_okay, stream, debug_synchronous, ptx_version); // Dispatch to chained policy if (CubDebug(error = MaxPolicyT::Invoke(ptx_version, dispatch))) break; } while (0); return error; } }; } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/gpu_utils/cub/device/dispatch/dispatch_reduce.cuh000066400000000000000000001232651411340063500252540ustar00rootroot00000000000000 /****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * cub::DeviceReduce provides device-wide, parallel operations for computing a reduction across a sequence of data items residing within device-accessible memory. */ #pragma once #include #include #include "../../agent/agent_reduce.cuh" #include "../../iterator/arg_index_input_iterator.cuh" #include "../../thread/thread_operators.cuh" #include "../../grid/grid_even_share.cuh" #include "../../iterator/arg_index_input_iterator.cuh" #include "../../util_debug.cuh" #include "../../util_device.cuh" #include "../../util_namespace.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /****************************************************************************** * Kernel entry points *****************************************************************************/ /** * Reduce region kernel entry point (multi-block). Computes privatized reductions, one per thread block. */ template < typename ChainedPolicyT, ///< Chained tuning policy typename InputIteratorT, ///< Random-access input iterator type for reading input items \iterator typename OutputIteratorT, ///< Output iterator type for recording the reduced aggregate \iterator typename OffsetT, ///< Signed integer type for global offsets typename ReductionOpT> ///< Binary reduction functor type having member T operator()(const T &a, const T &b) __launch_bounds__ (int(ChainedPolicyT::ActivePolicy::ReducePolicy::BLOCK_THREADS)) __global__ void DeviceReduceKernel( InputIteratorT d_in, ///< [in] Pointer to the input sequence of data items OutputIteratorT d_out, ///< [out] Pointer to the output aggregate OffsetT num_items, ///< [in] Total number of input data items GridEvenShare even_share, ///< [in] Even-share descriptor for mapping an equal number of tiles onto each thread block ReductionOpT reduction_op) ///< [in] Binary reduction functor { // The output value type typedef typename If<(Equals::value_type, void>::VALUE), // OutputT = (if output iterator's value type is void) ? typename std::iterator_traits::value_type, // ... then the input iterator's value type, typename std::iterator_traits::value_type>::Type OutputT; // ... else the output iterator's value type // Thread block type for reducing input tiles typedef AgentReduce< typename ChainedPolicyT::ActivePolicy::ReducePolicy, InputIteratorT, OutputIteratorT, OffsetT, ReductionOpT> AgentReduceT; // Shared memory storage __shared__ typename AgentReduceT::TempStorage temp_storage; // Consume input tiles OutputT block_aggregate = AgentReduceT(temp_storage, d_in, reduction_op).ConsumeTiles(even_share); // Output result if (threadIdx.x == 0) d_out[blockIdx.x] = block_aggregate; } /** * Reduce a single tile kernel entry point (single-block). Can be used to aggregate privatized thread block reductions from a previous multi-block reduction pass. */ template < typename ChainedPolicyT, ///< Chained tuning policy typename InputIteratorT, ///< Random-access input iterator type for reading input items \iterator typename OutputIteratorT, ///< Output iterator type for recording the reduced aggregate \iterator typename OffsetT, ///< Signed integer type for global offsets typename ReductionOpT, ///< Binary reduction functor type having member T operator()(const T &a, const T &b) typename OuputT> ///< Data element type that is convertible to the \p value type of \p OutputIteratorT __launch_bounds__ (int(ChainedPolicyT::ActivePolicy::SingleTilePolicy::BLOCK_THREADS), 1) __global__ void DeviceReduceSingleTileKernel( InputIteratorT d_in, ///< [in] Pointer to the input sequence of data items OutputIteratorT d_out, ///< [out] Pointer to the output aggregate OffsetT num_items, ///< [in] Total number of input data items ReductionOpT reduction_op, ///< [in] Binary reduction functor OuputT init) ///< [in] The initial value of the reduction { // Thread block type for reducing input tiles typedef AgentReduce< typename ChainedPolicyT::ActivePolicy::SingleTilePolicy, InputIteratorT, OutputIteratorT, OffsetT, ReductionOpT> AgentReduceT; // Shared memory storage __shared__ typename AgentReduceT::TempStorage temp_storage; // Check if empty problem if (num_items == 0) { if (threadIdx.x == 0) *d_out = init; return; } // Consume input tiles OuputT block_aggregate = AgentReduceT(temp_storage, d_in, reduction_op).ConsumeRange( OffsetT(0), num_items); // Output result if (threadIdx.x == 0) *d_out = reduction_op(init, block_aggregate); } /// Normalize input iterator to segment offset template __device__ __forceinline__ void NormalizeReductionOutput( T &/*val*/, OffsetT /*base_offset*/, IteratorT /*itr*/) {} /// Normalize input iterator to segment offset (specialized for arg-index) template __device__ __forceinline__ void NormalizeReductionOutput( KeyValuePairT &val, OffsetT base_offset, ArgIndexInputIterator /*itr*/) { val.key -= base_offset; } /** * Segmented reduction (one block per segment) */ template < typename ChainedPolicyT, ///< Chained tuning policy typename InputIteratorT, ///< Random-access input iterator type for reading input items \iterator typename OutputIteratorT, ///< Output iterator type for recording the reduced aggregate \iterator typename OffsetIteratorT, ///< Random-access input iterator type for reading segment offsets \iterator typename OffsetT, ///< Signed integer type for global offsets typename ReductionOpT, ///< Binary reduction functor type having member T operator()(const T &a, const T &b) typename OutputT> ///< Data element type that is convertible to the \p value type of \p OutputIteratorT __launch_bounds__ (int(ChainedPolicyT::ActivePolicy::ReducePolicy::BLOCK_THREADS)) __global__ void DeviceSegmentedReduceKernel( InputIteratorT d_in, ///< [in] Pointer to the input sequence of data items OutputIteratorT d_out, ///< [out] Pointer to the output aggregate OffsetIteratorT d_begin_offsets, ///< [in] Pointer to the sequence of beginning offsets of length \p num_segments, such that d_begin_offsets[i] is the first element of the ith data segment in d_keys_* and d_values_* OffsetIteratorT d_end_offsets, ///< [in] Pointer to the sequence of ending offsets of length \p num_segments, such that d_end_offsets[i]-1 is the last element of the ith data segment in d_keys_* and d_values_*. If d_end_offsets[i]-1 <= d_begin_offsets[i], the ith is considered empty. int /*num_segments*/, ///< [in] The number of segments that comprise the sorting data ReductionOpT reduction_op, ///< [in] Binary reduction functor OutputT init) ///< [in] The initial value of the reduction { // Thread block type for reducing input tiles typedef AgentReduce< typename ChainedPolicyT::ActivePolicy::ReducePolicy, InputIteratorT, OutputIteratorT, OffsetT, ReductionOpT> AgentReduceT; // Shared memory storage __shared__ typename AgentReduceT::TempStorage temp_storage; OffsetT segment_begin = d_begin_offsets[blockIdx.x]; OffsetT segment_end = d_end_offsets[blockIdx.x]; // Check if empty problem if (segment_begin == segment_end) { if (threadIdx.x == 0) d_out[blockIdx.x] = init; return; } // Consume input tiles OutputT block_aggregate = AgentReduceT(temp_storage, d_in, reduction_op).ConsumeRange( segment_begin, segment_end); // Normalize as needed NormalizeReductionOutput(block_aggregate, segment_begin, d_in); if (threadIdx.x == 0) d_out[blockIdx.x] = reduction_op(init, block_aggregate);; } /****************************************************************************** * Policy ******************************************************************************/ template < typename OuputT, ///< Data type typename OffsetT, ///< Signed integer type for global offsets typename ReductionOpT> ///< Binary reduction functor type having member T operator()(const T &a, const T &b) struct DeviceReducePolicy { //------------------------------------------------------------------------------ // Architecture-specific tuning policies //------------------------------------------------------------------------------ /// SM13 struct Policy130 : ChainedPolicy<130, Policy130, Policy130> { // ReducePolicy typedef AgentReducePolicy< CUB_NOMINAL_CONFIG(128, 8, OuputT), ///< Threads per block, items per thread 2, ///< Number of items per vectorized load BLOCK_REDUCE_RAKING, ///< Cooperative block-wide reduction algorithm to use LOAD_DEFAULT> ///< Cache load modifier ReducePolicy; // SingleTilePolicy typedef ReducePolicy SingleTilePolicy; // SegmentedReducePolicy typedef ReducePolicy SegmentedReducePolicy; }; /// SM20 struct Policy200 : ChainedPolicy<200, Policy200, Policy130> { // ReducePolicy (GTX 580: 178.9 GB/s @ 48M 4B items, 158.1 GB/s @ 192M 1B items) typedef AgentReducePolicy< CUB_NOMINAL_CONFIG(128, 8, OuputT), ///< Threads per block, items per thread 4, ///< Number of items per vectorized load BLOCK_REDUCE_RAKING, ///< Cooperative block-wide reduction algorithm to use LOAD_DEFAULT> ///< Cache load modifier ReducePolicy; // SingleTilePolicy typedef ReducePolicy SingleTilePolicy; // SegmentedReducePolicy typedef ReducePolicy SegmentedReducePolicy; }; /// SM30 struct Policy300 : ChainedPolicy<300, Policy300, Policy200> { // ReducePolicy (GTX670: 154.0 @ 48M 4B items) typedef AgentReducePolicy< CUB_NOMINAL_CONFIG(256, 20, OuputT), ///< Threads per block, items per thread 2, ///< Number of items per vectorized load BLOCK_REDUCE_WARP_REDUCTIONS, ///< Cooperative block-wide reduction algorithm to use LOAD_DEFAULT> ///< Cache load modifier ReducePolicy; // SingleTilePolicy typedef ReducePolicy SingleTilePolicy; // SegmentedReducePolicy typedef ReducePolicy SegmentedReducePolicy; }; /// SM35 struct Policy350 : ChainedPolicy<350, Policy350, Policy300> { // ReducePolicy (GTX Titan: 255.1 GB/s @ 48M 4B items; 228.7 GB/s @ 192M 1B items) typedef AgentReducePolicy< CUB_NOMINAL_CONFIG(256, 20, OuputT), ///< Threads per block, items per thread 4, ///< Number of items per vectorized load BLOCK_REDUCE_WARP_REDUCTIONS, ///< Cooperative block-wide reduction algorithm to use LOAD_LDG> ///< Cache load modifier ReducePolicy; // SingleTilePolicy typedef ReducePolicy SingleTilePolicy; // SegmentedReducePolicy typedef ReducePolicy SegmentedReducePolicy; }; /// SM60 struct Policy600 : ChainedPolicy<600, Policy600, Policy350> { // ReducePolicy (P100: 591 GB/s @ 64M 4B items; 583 GB/s @ 256M 1B items) typedef AgentReducePolicy< CUB_NOMINAL_CONFIG(256, 16, OuputT), ///< Threads per block, items per thread 4, ///< Number of items per vectorized load BLOCK_REDUCE_WARP_REDUCTIONS, ///< Cooperative block-wide reduction algorithm to use LOAD_LDG> ///< Cache load modifier ReducePolicy; // SingleTilePolicy typedef ReducePolicy SingleTilePolicy; // SegmentedReducePolicy typedef ReducePolicy SegmentedReducePolicy; }; /// MaxPolicy typedef Policy600 MaxPolicy; }; /****************************************************************************** * Single-problem dispatch ******************************************************************************/ /** * Utility class for dispatching the appropriately-tuned kernels for device-wide reduction */ template < typename InputIteratorT, ///< Random-access input iterator type for reading input items \iterator typename OutputIteratorT, ///< Output iterator type for recording the reduced aggregate \iterator typename OffsetT, ///< Signed integer type for global offsets typename ReductionOpT> ///< Binary reduction functor type having member T operator()(const T &a, const T &b) struct DispatchReduce : DeviceReducePolicy< typename If<(Equals::value_type, void>::VALUE), // OutputT = (if output iterator's value type is void) ? typename std::iterator_traits::value_type, // ... then the input iterator's value type, typename std::iterator_traits::value_type>::Type, // ... else the output iterator's value type OffsetT, ReductionOpT> { //------------------------------------------------------------------------------ // Constants //------------------------------------------------------------------------------ // Data type of output iterator typedef typename If<(Equals::value_type, void>::VALUE), // OutputT = (if output iterator's value type is void) ? typename std::iterator_traits::value_type, // ... then the input iterator's value type, typename std::iterator_traits::value_type>::Type OutputT; // ... else the output iterator's value type //------------------------------------------------------------------------------ // Problem state //------------------------------------------------------------------------------ void *d_temp_storage; ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t &temp_storage_bytes; ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation InputIteratorT d_in; ///< [in] Pointer to the input sequence of data items OutputIteratorT d_out; ///< [out] Pointer to the output aggregate OffsetT num_items; ///< [in] Total number of input items (i.e., length of \p d_in) ReductionOpT reduction_op; ///< [in] Binary reduction functor OutputT init; ///< [in] The initial value of the reduction cudaStream_t stream; ///< [in] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous; ///< [in] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is \p false. int ptx_version; ///< [in] PTX version //------------------------------------------------------------------------------ // Constructor //------------------------------------------------------------------------------ /// Constructor CUB_RUNTIME_FUNCTION __forceinline__ DispatchReduce( void* d_temp_storage, size_t &temp_storage_bytes, InputIteratorT d_in, OutputIteratorT d_out, OffsetT num_items, ReductionOpT reduction_op, OutputT init, cudaStream_t stream, bool debug_synchronous, int ptx_version) : d_temp_storage(d_temp_storage), temp_storage_bytes(temp_storage_bytes), d_in(d_in), d_out(d_out), num_items(num_items), reduction_op(reduction_op), init(init), stream(stream), debug_synchronous(debug_synchronous), ptx_version(ptx_version) {} //------------------------------------------------------------------------------ // Small-problem (single tile) invocation //------------------------------------------------------------------------------ /// Invoke a single block block to reduce in-core template < typename ActivePolicyT, ///< Umbrella policy active for the target device typename SingleTileKernelT> ///< Function type of cub::DeviceReduceSingleTileKernel CUB_RUNTIME_FUNCTION __forceinline__ cudaError_t InvokeSingleTile( SingleTileKernelT single_tile_kernel) ///< [in] Kernel function pointer to parameterization of cub::DeviceReduceSingleTileKernel { #ifndef CUB_RUNTIME_ENABLED (void)single_tile_kernel; // Kernel launch not supported from this device return CubDebug(cudaErrorNotSupported ); #else cudaError error = cudaSuccess; do { // Return if the caller is simply requesting the size of the storage allocation if (d_temp_storage == NULL) { temp_storage_bytes = 1; break; } // Log single_reduce_sweep_kernel configuration if (debug_synchronous) _CubLog("Invoking DeviceReduceSingleTileKernel<<<1, %d, 0, %lld>>>(), %d items per thread\n", ActivePolicyT::SingleTilePolicy::BLOCK_THREADS, (long long) stream, ActivePolicyT::SingleTilePolicy::ITEMS_PER_THREAD); // Invoke single_reduce_sweep_kernel single_tile_kernel<<<1, ActivePolicyT::SingleTilePolicy::BLOCK_THREADS, 0, stream>>>( d_in, d_out, num_items, reduction_op, init); // Check for failure to launch if (CubDebug(error = cudaPeekAtLastError())) break; // Sync the stream if specified to flush runtime errors if (debug_synchronous && (CubDebug(error = SyncStream(stream)))) break; } while (0); return error; #endif // CUB_RUNTIME_ENABLED } //------------------------------------------------------------------------------ // Normal problem size invocation (two-pass) //------------------------------------------------------------------------------ /// Invoke two-passes to reduce template < typename ActivePolicyT, ///< Umbrella policy active for the target device typename ReduceKernelT, ///< Function type of cub::DeviceReduceKernel typename SingleTileKernelT> ///< Function type of cub::DeviceReduceSingleTileKernel CUB_RUNTIME_FUNCTION __forceinline__ cudaError_t InvokePasses( ReduceKernelT reduce_kernel, ///< [in] Kernel function pointer to parameterization of cub::DeviceReduceKernel SingleTileKernelT single_tile_kernel) ///< [in] Kernel function pointer to parameterization of cub::DeviceReduceSingleTileKernel { #ifndef CUB_RUNTIME_ENABLED (void) reduce_kernel; (void) single_tile_kernel; // Kernel launch not supported from this device return CubDebug(cudaErrorNotSupported ); #else cudaError error = cudaSuccess; do { // Get device ordinal int device_ordinal; if (CubDebug(error = cudaGetDevice(&device_ordinal))) break; // Get SM count int sm_count; if (CubDebug(error = cudaDeviceGetAttribute (&sm_count, cudaDevAttrMultiProcessorCount, device_ordinal))) break; // Init regular kernel configuration KernelConfig reduce_config; if (CubDebug(error = reduce_config.Init(reduce_kernel))) break; int reduce_device_occupancy = reduce_config.sm_occupancy * sm_count; // Even-share work distribution int max_blocks = reduce_device_occupancy * CUB_SUBSCRIPTION_FACTOR(ptx_version); GridEvenShare even_share; even_share.DispatchInit(num_items, max_blocks, reduce_config.tile_size); // Temporary storage allocation requirements void* allocations[1]; size_t allocation_sizes[1] = { max_blocks * sizeof(OutputT) // bytes needed for privatized block reductions }; // Alias the temporary allocations from the single storage blob (or compute the necessary size of the blob) if (CubDebug(error = AliasTemporaries(d_temp_storage, temp_storage_bytes, allocations, allocation_sizes))) break; if (d_temp_storage == NULL) { // Return if the caller is simply requesting the size of the storage allocation return cudaSuccess; } // Alias the allocation for the privatized per-block reductions OutputT *d_block_reductions = (OutputT*) allocations[0]; // Get grid size for device_reduce_sweep_kernel int reduce_grid_size = even_share.grid_size; // Log device_reduce_sweep_kernel configuration if (debug_synchronous) _CubLog("Invoking DeviceReduceKernel<<<%d, %d, 0, %lld>>>(), %d items per thread, %d SM occupancy\n", reduce_grid_size, ActivePolicyT::ReducePolicy::BLOCK_THREADS, (long long) stream, ActivePolicyT::ReducePolicy::ITEMS_PER_THREAD, reduce_config.sm_occupancy); // Invoke DeviceReduceKernel reduce_kernel<<>>( d_in, d_block_reductions, num_items, even_share, reduction_op); // Check for failure to launch if (CubDebug(error = cudaPeekAtLastError())) break; // Sync the stream if specified to flush runtime errors if (debug_synchronous && (CubDebug(error = SyncStream(stream)))) break; // Log single_reduce_sweep_kernel configuration if (debug_synchronous) _CubLog("Invoking DeviceReduceSingleTileKernel<<<1, %d, 0, %lld>>>(), %d items per thread\n", ActivePolicyT::SingleTilePolicy::BLOCK_THREADS, (long long) stream, ActivePolicyT::SingleTilePolicy::ITEMS_PER_THREAD); // Invoke DeviceReduceSingleTileKernel single_tile_kernel<<<1, ActivePolicyT::SingleTilePolicy::BLOCK_THREADS, 0, stream>>>( d_block_reductions, d_out, reduce_grid_size, reduction_op, init); // Check for failure to launch if (CubDebug(error = cudaPeekAtLastError())) break; // Sync the stream if specified to flush runtime errors if (debug_synchronous && (CubDebug(error = SyncStream(stream)))) break; } while (0); return error; #endif // CUB_RUNTIME_ENABLED } //------------------------------------------------------------------------------ // Chained policy invocation //------------------------------------------------------------------------------ /// Invocation template CUB_RUNTIME_FUNCTION __forceinline__ cudaError_t Invoke() { typedef typename ActivePolicyT::SingleTilePolicy SingleTilePolicyT; typedef typename DispatchReduce::MaxPolicy MaxPolicyT; // Force kernel code-generation in all compiler passes if (num_items <= (SingleTilePolicyT::BLOCK_THREADS * SingleTilePolicyT::ITEMS_PER_THREAD)) { // Small, single tile size return InvokeSingleTile( DeviceReduceSingleTileKernel); } else { // Regular size return InvokePasses( DeviceReduceKernel, DeviceReduceSingleTileKernel); } } //------------------------------------------------------------------------------ // Dispatch entrypoints //------------------------------------------------------------------------------ /** * Internal dispatch routine for computing a device-wide reduction */ CUB_RUNTIME_FUNCTION __forceinline__ static cudaError_t Dispatch( void *d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t &temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation InputIteratorT d_in, ///< [in] Pointer to the input sequence of data items OutputIteratorT d_out, ///< [out] Pointer to the output aggregate OffsetT num_items, ///< [in] Total number of input items (i.e., length of \p d_in) ReductionOpT reduction_op, ///< [in] Binary reduction functor OutputT init, ///< [in] The initial value of the reduction cudaStream_t stream, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is \p false. { typedef typename DispatchReduce::MaxPolicy MaxPolicyT; cudaError error = cudaSuccess; do { // Get PTX version int ptx_version; if (CubDebug(error = PtxVersion(ptx_version))) break; // Create dispatch functor DispatchReduce dispatch( d_temp_storage, temp_storage_bytes, d_in, d_out, num_items, reduction_op, init, stream, debug_synchronous, ptx_version); // Dispatch to chained policy if (CubDebug(error = MaxPolicyT::Invoke(ptx_version, dispatch))) break; } while (0); return error; } }; /****************************************************************************** * Segmented dispatch ******************************************************************************/ /** * Utility class for dispatching the appropriately-tuned kernels for device-wide reduction */ template < typename InputIteratorT, ///< Random-access input iterator type for reading input items \iterator typename OutputIteratorT, ///< Output iterator type for recording the reduced aggregate \iterator typename OffsetIteratorT, ///< Random-access input iterator type for reading segment offsets \iterator typename OffsetT, ///< Signed integer type for global offsets typename ReductionOpT> ///< Binary reduction functor type having member T operator()(const T &a, const T &b) struct DispatchSegmentedReduce : DeviceReducePolicy< typename std::iterator_traits::value_type, OffsetT, ReductionOpT> { //------------------------------------------------------------------------------ // Constants //------------------------------------------------------------------------------ /// The output value type typedef typename If<(Equals::value_type, void>::VALUE), // OutputT = (if output iterator's value type is void) ? typename std::iterator_traits::value_type, // ... then the input iterator's value type, typename std::iterator_traits::value_type>::Type OutputT; // ... else the output iterator's value type //------------------------------------------------------------------------------ // Problem state //------------------------------------------------------------------------------ void *d_temp_storage; ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t &temp_storage_bytes; ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation InputIteratorT d_in; ///< [in] Pointer to the input sequence of data items OutputIteratorT d_out; ///< [out] Pointer to the output aggregate OffsetT num_segments; ///< [in] The number of segments that comprise the sorting data OffsetIteratorT d_begin_offsets; ///< [in] Pointer to the sequence of beginning offsets of length \p num_segments, such that d_begin_offsets[i] is the first element of the ith data segment in d_keys_* and d_values_* OffsetIteratorT d_end_offsets; ///< [in] Pointer to the sequence of ending offsets of length \p num_segments, such that d_end_offsets[i]-1 is the last element of the ith data segment in d_keys_* and d_values_*. If d_end_offsets[i]-1 <= d_begin_offsets[i], the ith is considered empty. ReductionOpT reduction_op; ///< [in] Binary reduction functor OutputT init; ///< [in] The initial value of the reduction cudaStream_t stream; ///< [in] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous; ///< [in] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is \p false. int ptx_version; ///< [in] PTX version //------------------------------------------------------------------------------ // Constructor //------------------------------------------------------------------------------ /// Constructor CUB_RUNTIME_FUNCTION __forceinline__ DispatchSegmentedReduce( void* d_temp_storage, size_t &temp_storage_bytes, InputIteratorT d_in, OutputIteratorT d_out, OffsetT num_segments, OffsetIteratorT d_begin_offsets, OffsetIteratorT d_end_offsets, ReductionOpT reduction_op, OutputT init, cudaStream_t stream, bool debug_synchronous, int ptx_version) : d_temp_storage(d_temp_storage), temp_storage_bytes(temp_storage_bytes), d_in(d_in), d_out(d_out), num_segments(num_segments), d_begin_offsets(d_begin_offsets), d_end_offsets(d_end_offsets), reduction_op(reduction_op), init(init), stream(stream), debug_synchronous(debug_synchronous), ptx_version(ptx_version) {} //------------------------------------------------------------------------------ // Chained policy invocation //------------------------------------------------------------------------------ /// Invocation template < typename ActivePolicyT, ///< Umbrella policy active for the target device typename DeviceSegmentedReduceKernelT> ///< Function type of cub::DeviceSegmentedReduceKernel CUB_RUNTIME_FUNCTION __forceinline__ cudaError_t InvokePasses( DeviceSegmentedReduceKernelT segmented_reduce_kernel) ///< [in] Kernel function pointer to parameterization of cub::DeviceSegmentedReduceKernel { #ifndef CUB_RUNTIME_ENABLED (void)segmented_reduce_kernel; // Kernel launch not supported from this device return CubDebug(cudaErrorNotSupported ); #else cudaError error = cudaSuccess; do { // Return if the caller is simply requesting the size of the storage allocation if (d_temp_storage == NULL) { temp_storage_bytes = 1; return cudaSuccess; } // Init kernel configuration KernelConfig segmented_reduce_config; if (CubDebug(error = segmented_reduce_config.Init(segmented_reduce_kernel))) break; // Log device_reduce_sweep_kernel configuration if (debug_synchronous) _CubLog("Invoking SegmentedDeviceReduceKernel<<<%d, %d, 0, %lld>>>(), %d items per thread, %d SM occupancy\n", num_segments, ActivePolicyT::SegmentedReducePolicy::BLOCK_THREADS, (long long) stream, ActivePolicyT::SegmentedReducePolicy::ITEMS_PER_THREAD, segmented_reduce_config.sm_occupancy); // Invoke DeviceReduceKernel segmented_reduce_kernel<<>>( d_in, d_out, d_begin_offsets, d_end_offsets, num_segments, reduction_op, init); // Check for failure to launch if (CubDebug(error = cudaPeekAtLastError())) break; // Sync the stream if specified to flush runtime errors if (debug_synchronous && (CubDebug(error = SyncStream(stream)))) break; } while (0); return error; #endif // CUB_RUNTIME_ENABLED } /// Invocation template CUB_RUNTIME_FUNCTION __forceinline__ cudaError_t Invoke() { typedef typename DispatchSegmentedReduce::MaxPolicy MaxPolicyT; // Force kernel code-generation in all compiler passes return InvokePasses( DeviceSegmentedReduceKernel); } //------------------------------------------------------------------------------ // Dispatch entrypoints //------------------------------------------------------------------------------ /** * Internal dispatch routine for computing a device-wide reduction */ CUB_RUNTIME_FUNCTION __forceinline__ static cudaError_t Dispatch( void *d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t &temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation InputIteratorT d_in, ///< [in] Pointer to the input sequence of data items OutputIteratorT d_out, ///< [out] Pointer to the output aggregate int num_segments, ///< [in] The number of segments that comprise the sorting data OffsetIteratorT d_begin_offsets, ///< [in] Pointer to the sequence of beginning offsets of length \p num_segments, such that d_begin_offsets[i] is the first element of the ith data segment in d_keys_* and d_values_* OffsetIteratorT d_end_offsets, ///< [in] Pointer to the sequence of ending offsets of length \p num_segments, such that d_end_offsets[i]-1 is the last element of the ith data segment in d_keys_* and d_values_*. If d_end_offsets[i]-1 <= d_begin_offsets[i], the ith is considered empty. ReductionOpT reduction_op, ///< [in] Binary reduction functor OutputT init, ///< [in] The initial value of the reduction cudaStream_t stream, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is \p false. { typedef typename DispatchSegmentedReduce::MaxPolicy MaxPolicyT; if (num_segments <= 0) return cudaSuccess; cudaError error = cudaSuccess; do { // Get PTX version int ptx_version; if (CubDebug(error = PtxVersion(ptx_version))) break; // Create dispatch functor DispatchSegmentedReduce dispatch( d_temp_storage, temp_storage_bytes, d_in, d_out, num_segments, d_begin_offsets, d_end_offsets, reduction_op, init, stream, debug_synchronous, ptx_version); // Dispatch to chained policy if (CubDebug(error = MaxPolicyT::Invoke(ptx_version, dispatch))) break; } while (0); return error; } }; } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/gpu_utils/cub/device/dispatch/dispatch_reduce_by_key.cuh000066400000000000000000000616061411340063500266160ustar00rootroot00000000000000 /****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * cub::DeviceReduceByKey provides device-wide, parallel operations for reducing segments of values residing within device-accessible memory. */ #pragma once #include #include #include "dispatch_scan.cuh" #include "../../agent/agent_reduce_by_key.cuh" #include "../../thread/thread_operators.cuh" #include "../../grid/grid_queue.cuh" #include "../../util_device.cuh" #include "../../util_namespace.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /****************************************************************************** * Kernel entry points *****************************************************************************/ /** * Multi-block reduce-by-key sweep kernel entry point */ template < typename AgentReduceByKeyPolicyT, ///< Parameterized AgentReduceByKeyPolicyT tuning policy type typename KeysInputIteratorT, ///< Random-access input iterator type for keys typename UniqueOutputIteratorT, ///< Random-access output iterator type for keys typename ValuesInputIteratorT, ///< Random-access input iterator type for values typename AggregatesOutputIteratorT, ///< Random-access output iterator type for values typename NumRunsOutputIteratorT, ///< Output iterator type for recording number of segments encountered typename ScanTileStateT, ///< Tile status interface type typename EqualityOpT, ///< KeyT equality operator type typename ReductionOpT, ///< ValueT reduction operator type typename OffsetT> ///< Signed integer type for global offsets __launch_bounds__ (int(AgentReduceByKeyPolicyT::BLOCK_THREADS)) __global__ void DeviceReduceByKeyKernel( KeysInputIteratorT d_keys_in, ///< Pointer to the input sequence of keys UniqueOutputIteratorT d_unique_out, ///< Pointer to the output sequence of unique keys (one key per run) ValuesInputIteratorT d_values_in, ///< Pointer to the input sequence of corresponding values AggregatesOutputIteratorT d_aggregates_out, ///< Pointer to the output sequence of value aggregates (one aggregate per run) NumRunsOutputIteratorT d_num_runs_out, ///< Pointer to total number of runs encountered (i.e., the length of d_unique_out) ScanTileStateT tile_state, ///< Tile status interface int start_tile, ///< The starting tile for the current grid EqualityOpT equality_op, ///< KeyT equality operator ReductionOpT reduction_op, ///< ValueT reduction operator OffsetT num_items) ///< Total number of items to select from { // Thread block type for reducing tiles of value segments typedef AgentReduceByKey< AgentReduceByKeyPolicyT, KeysInputIteratorT, UniqueOutputIteratorT, ValuesInputIteratorT, AggregatesOutputIteratorT, NumRunsOutputIteratorT, EqualityOpT, ReductionOpT, OffsetT> AgentReduceByKeyT; // Shared memory for AgentReduceByKey __shared__ typename AgentReduceByKeyT::TempStorage temp_storage; // Process tiles AgentReduceByKeyT(temp_storage, d_keys_in, d_unique_out, d_values_in, d_aggregates_out, d_num_runs_out, equality_op, reduction_op).ConsumeRange( num_items, tile_state, start_tile); } /****************************************************************************** * Dispatch ******************************************************************************/ /** * Utility class for dispatching the appropriately-tuned kernels for DeviceReduceByKey */ template < typename KeysInputIteratorT, ///< Random-access input iterator type for keys typename UniqueOutputIteratorT, ///< Random-access output iterator type for keys typename ValuesInputIteratorT, ///< Random-access input iterator type for values typename AggregatesOutputIteratorT, ///< Random-access output iterator type for values typename NumRunsOutputIteratorT, ///< Output iterator type for recording number of segments encountered typename EqualityOpT, ///< KeyT equality operator type typename ReductionOpT, ///< ValueT reduction operator type typename OffsetT> ///< Signed integer type for global offsets struct DispatchReduceByKey { //------------------------------------------------------------------------- // Types and constants //------------------------------------------------------------------------- // The input keys type typedef typename std::iterator_traits::value_type KeyInputT; // The output keys type typedef typename If<(Equals::value_type, void>::VALUE), // KeyOutputT = (if output iterator's value type is void) ? typename std::iterator_traits::value_type, // ... then the input iterator's value type, typename std::iterator_traits::value_type>::Type KeyOutputT; // ... else the output iterator's value type // The input values type typedef typename std::iterator_traits::value_type ValueInputT; // The output values type typedef typename If<(Equals::value_type, void>::VALUE), // ValueOutputT = (if output iterator's value type is void) ? typename std::iterator_traits::value_type, // ... then the input iterator's value type, typename std::iterator_traits::value_type>::Type ValueOutputT; // ... else the output iterator's value type enum { INIT_KERNEL_THREADS = 128, MAX_INPUT_BYTES = CUB_MAX(sizeof(KeyOutputT), sizeof(ValueOutputT)), COMBINED_INPUT_BYTES = sizeof(KeyOutputT) + sizeof(ValueOutputT), }; // Tile status descriptor interface type typedef ReduceByKeyScanTileState ScanTileStateT; //------------------------------------------------------------------------- // Tuning policies //------------------------------------------------------------------------- /// SM35 struct Policy350 { enum { NOMINAL_4B_ITEMS_PER_THREAD = 6, ITEMS_PER_THREAD = (MAX_INPUT_BYTES <= 8) ? 6 : CUB_MIN(NOMINAL_4B_ITEMS_PER_THREAD, CUB_MAX(1, ((NOMINAL_4B_ITEMS_PER_THREAD * 8) + COMBINED_INPUT_BYTES - 1) / COMBINED_INPUT_BYTES)), }; typedef AgentReduceByKeyPolicy< 128, ITEMS_PER_THREAD, BLOCK_LOAD_DIRECT, LOAD_LDG, BLOCK_SCAN_WARP_SCANS> ReduceByKeyPolicyT; }; /// SM30 struct Policy300 { enum { NOMINAL_4B_ITEMS_PER_THREAD = 6, ITEMS_PER_THREAD = CUB_MIN(NOMINAL_4B_ITEMS_PER_THREAD, CUB_MAX(1, ((NOMINAL_4B_ITEMS_PER_THREAD * 8) + COMBINED_INPUT_BYTES - 1) / COMBINED_INPUT_BYTES)), }; typedef AgentReduceByKeyPolicy< 128, ITEMS_PER_THREAD, BLOCK_LOAD_WARP_TRANSPOSE, LOAD_DEFAULT, BLOCK_SCAN_WARP_SCANS> ReduceByKeyPolicyT; }; /// SM20 struct Policy200 { enum { NOMINAL_4B_ITEMS_PER_THREAD = 11, ITEMS_PER_THREAD = CUB_MIN(NOMINAL_4B_ITEMS_PER_THREAD, CUB_MAX(1, ((NOMINAL_4B_ITEMS_PER_THREAD * 8) + COMBINED_INPUT_BYTES - 1) / COMBINED_INPUT_BYTES)), }; typedef AgentReduceByKeyPolicy< 128, ITEMS_PER_THREAD, BLOCK_LOAD_WARP_TRANSPOSE, LOAD_DEFAULT, BLOCK_SCAN_WARP_SCANS> ReduceByKeyPolicyT; }; /// SM13 struct Policy130 { enum { NOMINAL_4B_ITEMS_PER_THREAD = 7, ITEMS_PER_THREAD = CUB_MIN(NOMINAL_4B_ITEMS_PER_THREAD, CUB_MAX(1, ((NOMINAL_4B_ITEMS_PER_THREAD * 8) + COMBINED_INPUT_BYTES - 1) / COMBINED_INPUT_BYTES)), }; typedef AgentReduceByKeyPolicy< 128, ITEMS_PER_THREAD, BLOCK_LOAD_WARP_TRANSPOSE, LOAD_DEFAULT, BLOCK_SCAN_WARP_SCANS> ReduceByKeyPolicyT; }; /// SM11 struct Policy110 { enum { NOMINAL_4B_ITEMS_PER_THREAD = 5, ITEMS_PER_THREAD = CUB_MIN(NOMINAL_4B_ITEMS_PER_THREAD, CUB_MAX(1, (NOMINAL_4B_ITEMS_PER_THREAD * 8) / COMBINED_INPUT_BYTES)), }; typedef AgentReduceByKeyPolicy< 64, ITEMS_PER_THREAD, BLOCK_LOAD_WARP_TRANSPOSE, LOAD_DEFAULT, BLOCK_SCAN_RAKING> ReduceByKeyPolicyT; }; /****************************************************************************** * Tuning policies of current PTX compiler pass ******************************************************************************/ #if (CUB_PTX_ARCH >= 350) typedef Policy350 PtxPolicy; #elif (CUB_PTX_ARCH >= 300) typedef Policy300 PtxPolicy; #elif (CUB_PTX_ARCH >= 200) typedef Policy200 PtxPolicy; #elif (CUB_PTX_ARCH >= 130) typedef Policy130 PtxPolicy; #else typedef Policy110 PtxPolicy; #endif // "Opaque" policies (whose parameterizations aren't reflected in the type signature) struct PtxReduceByKeyPolicy : PtxPolicy::ReduceByKeyPolicyT {}; /****************************************************************************** * Utilities ******************************************************************************/ /** * Initialize kernel dispatch configurations with the policies corresponding to the PTX assembly we will use */ template CUB_RUNTIME_FUNCTION __forceinline__ static void InitConfigs( int ptx_version, KernelConfig &reduce_by_key_config) { #if (CUB_PTX_ARCH > 0) (void)ptx_version; // We're on the device, so initialize the kernel dispatch configurations with the current PTX policy reduce_by_key_config.template Init(); #else // We're on the host, so lookup and initialize the kernel dispatch configurations with the policies that match the device's PTX version if (ptx_version >= 350) { reduce_by_key_config.template Init(); } else if (ptx_version >= 300) { reduce_by_key_config.template Init(); } else if (ptx_version >= 200) { reduce_by_key_config.template Init(); } else if (ptx_version >= 130) { reduce_by_key_config.template Init(); } else { reduce_by_key_config.template Init(); } #endif } /** * Kernel kernel dispatch configuration. */ struct KernelConfig { int block_threads; int items_per_thread; int tile_items; template CUB_RUNTIME_FUNCTION __forceinline__ void Init() { block_threads = PolicyT::BLOCK_THREADS; items_per_thread = PolicyT::ITEMS_PER_THREAD; tile_items = block_threads * items_per_thread; } }; //--------------------------------------------------------------------- // Dispatch entrypoints //--------------------------------------------------------------------- /** * Internal dispatch routine for computing a device-wide reduce-by-key using the * specified kernel functions. */ template < typename ScanInitKernelT, ///< Function type of cub::DeviceScanInitKernel typename ReduceByKeyKernelT> ///< Function type of cub::DeviceReduceByKeyKernelT CUB_RUNTIME_FUNCTION __forceinline__ static cudaError_t Dispatch( void* d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t& temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation KeysInputIteratorT d_keys_in, ///< [in] Pointer to the input sequence of keys UniqueOutputIteratorT d_unique_out, ///< [out] Pointer to the output sequence of unique keys (one key per run) ValuesInputIteratorT d_values_in, ///< [in] Pointer to the input sequence of corresponding values AggregatesOutputIteratorT d_aggregates_out, ///< [out] Pointer to the output sequence of value aggregates (one aggregate per run) NumRunsOutputIteratorT d_num_runs_out, ///< [out] Pointer to total number of runs encountered (i.e., the length of d_unique_out) EqualityOpT equality_op, ///< [in] KeyT equality operator ReductionOpT reduction_op, ///< [in] ValueT reduction operator OffsetT num_items, ///< [in] Total number of items to select from cudaStream_t stream, ///< [in] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous, ///< [in] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is \p false. int /*ptx_version*/, ///< [in] PTX version of dispatch kernels ScanInitKernelT init_kernel, ///< [in] Kernel function pointer to parameterization of cub::DeviceScanInitKernel ReduceByKeyKernelT reduce_by_key_kernel, ///< [in] Kernel function pointer to parameterization of cub::DeviceReduceByKeyKernel KernelConfig reduce_by_key_config) ///< [in] Dispatch parameters that match the policy that \p reduce_by_key_kernel was compiled for { #ifndef CUB_RUNTIME_ENABLED (void)d_temp_storage; (void)temp_storage_bytes; (void)d_keys_in; (void)d_unique_out; (void)d_values_in; (void)d_aggregates_out; (void)d_num_runs_out; (void)equality_op; (void)reduction_op; (void)num_items; (void)stream; (void)debug_synchronous; (void)init_kernel; (void)reduce_by_key_kernel; (void)reduce_by_key_config; // Kernel launch not supported from this device return CubDebug(cudaErrorNotSupported); #else cudaError error = cudaSuccess; do { // Get device ordinal int device_ordinal; if (CubDebug(error = cudaGetDevice(&device_ordinal))) break; // Get SM count int sm_count; if (CubDebug(error = cudaDeviceGetAttribute (&sm_count, cudaDevAttrMultiProcessorCount, device_ordinal))) break; // Number of input tiles int tile_size = reduce_by_key_config.block_threads * reduce_by_key_config.items_per_thread; int num_tiles = (num_items + tile_size - 1) / tile_size; // Specify temporary storage allocation requirements size_t allocation_sizes[1]; if (CubDebug(error = ScanTileStateT::AllocationSize(num_tiles, allocation_sizes[0]))) break; // bytes needed for tile status descriptors // Compute allocation pointers into the single storage blob (or compute the necessary size of the blob) void* allocations[1]; if (CubDebug(error = AliasTemporaries(d_temp_storage, temp_storage_bytes, allocations, allocation_sizes))) break; if (d_temp_storage == NULL) { // Return if the caller is simply requesting the size of the storage allocation break; } // Construct the tile status interface ScanTileStateT tile_state; if (CubDebug(error = tile_state.Init(num_tiles, allocations[0], allocation_sizes[0]))) break; // Log init_kernel configuration int init_grid_size = CUB_MAX(1, (num_tiles + INIT_KERNEL_THREADS - 1) / INIT_KERNEL_THREADS); if (debug_synchronous) _CubLog("Invoking init_kernel<<<%d, %d, 0, %lld>>>()\n", init_grid_size, INIT_KERNEL_THREADS, (long long) stream); // Invoke init_kernel to initialize tile descriptors init_kernel<<>>( tile_state, num_tiles, d_num_runs_out); // Check for failure to launch if (CubDebug(error = cudaPeekAtLastError())) break; // Sync the stream if specified to flush runtime errors if (debug_synchronous && (CubDebug(error = SyncStream(stream)))) break; // Return if empty problem if (num_items == 0) break; // Get SM occupancy for reduce_by_key_kernel int reduce_by_key_sm_occupancy; if (CubDebug(error = MaxSmOccupancy( reduce_by_key_sm_occupancy, // out reduce_by_key_kernel, reduce_by_key_config.block_threads))) break; // Get max x-dimension of grid int max_dim_x; if (CubDebug(error = cudaDeviceGetAttribute(&max_dim_x, cudaDevAttrMaxGridDimX, device_ordinal))) break;; // Run grids in epochs (in case number of tiles exceeds max x-dimension int scan_grid_size = CUB_MIN(num_tiles, max_dim_x); for (int start_tile = 0; start_tile < num_tiles; start_tile += scan_grid_size) { // Log reduce_by_key_kernel configuration if (debug_synchronous) _CubLog("Invoking %d reduce_by_key_kernel<<<%d, %d, 0, %lld>>>(), %d items per thread, %d SM occupancy\n", start_tile, scan_grid_size, reduce_by_key_config.block_threads, (long long) stream, reduce_by_key_config.items_per_thread, reduce_by_key_sm_occupancy); // Invoke reduce_by_key_kernel reduce_by_key_kernel<<>>( d_keys_in, d_unique_out, d_values_in, d_aggregates_out, d_num_runs_out, tile_state, start_tile, equality_op, reduction_op, num_items); // Check for failure to launch if (CubDebug(error = cudaPeekAtLastError())) break; // Sync the stream if specified to flush runtime errors if (debug_synchronous && (CubDebug(error = SyncStream(stream)))) break; } } while (0); return error; #endif // CUB_RUNTIME_ENABLED } /** * Internal dispatch routine */ CUB_RUNTIME_FUNCTION __forceinline__ static cudaError_t Dispatch( void* d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t& temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation KeysInputIteratorT d_keys_in, ///< [in] Pointer to the input sequence of keys UniqueOutputIteratorT d_unique_out, ///< [out] Pointer to the output sequence of unique keys (one key per run) ValuesInputIteratorT d_values_in, ///< [in] Pointer to the input sequence of corresponding values AggregatesOutputIteratorT d_aggregates_out, ///< [out] Pointer to the output sequence of value aggregates (one aggregate per run) NumRunsOutputIteratorT d_num_runs_out, ///< [out] Pointer to total number of runs encountered (i.e., the length of d_unique_out) EqualityOpT equality_op, ///< [in] KeyT equality operator ReductionOpT reduction_op, ///< [in] ValueT reduction operator OffsetT num_items, ///< [in] Total number of items to select from cudaStream_t stream, ///< [in] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous) ///< [in] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is \p false. { cudaError error = cudaSuccess; do { // Get PTX version int ptx_version; #if (CUB_PTX_ARCH == 0) if (CubDebug(error = PtxVersion(ptx_version))) break; #else ptx_version = CUB_PTX_ARCH; #endif // Get kernel kernel dispatch configurations KernelConfig reduce_by_key_config; InitConfigs(ptx_version, reduce_by_key_config); // Dispatch if (CubDebug(error = Dispatch( d_temp_storage, temp_storage_bytes, d_keys_in, d_unique_out, d_values_in, d_aggregates_out, d_num_runs_out, equality_op, reduction_op, num_items, stream, debug_synchronous, ptx_version, DeviceCompactInitKernel, DeviceReduceByKeyKernel, reduce_by_key_config))) break; } while (0); return error; } }; } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/gpu_utils/cub/device/dispatch/dispatch_rle.cuh000066400000000000000000000560611411340063500245660ustar00rootroot00000000000000 /****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * cub::DeviceRle provides device-wide, parallel operations for run-length-encoding sequences of data items residing within device-accessible memory. */ #pragma once #include #include #include "dispatch_scan.cuh" #include "../../agent/agent_rle.cuh" #include "../../thread/thread_operators.cuh" #include "../../grid/grid_queue.cuh" #include "../../util_device.cuh" #include "../../util_namespace.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /****************************************************************************** * Kernel entry points *****************************************************************************/ /** * Select kernel entry point (multi-block) * * Performs functor-based selection if SelectOp functor type != NullType * Otherwise performs flag-based selection if FlagIterator's value type != NullType * Otherwise performs discontinuity selection (keep unique) */ template < typename AgentRlePolicyT, ///< Parameterized AgentRlePolicyT tuning policy type typename InputIteratorT, ///< Random-access input iterator type for reading input items \iterator typename OffsetsOutputIteratorT, ///< Random-access output iterator type for writing run-offset values \iterator typename LengthsOutputIteratorT, ///< Random-access output iterator type for writing run-length values \iterator typename NumRunsOutputIteratorT, ///< Output iterator type for recording the number of runs encountered \iterator typename ScanTileStateT, ///< Tile status interface type typename EqualityOpT, ///< T equality operator type typename OffsetT> ///< Signed integer type for global offsets __launch_bounds__ (int(AgentRlePolicyT::BLOCK_THREADS)) __global__ void DeviceRleSweepKernel( InputIteratorT d_in, ///< [in] Pointer to input sequence of data items OffsetsOutputIteratorT d_offsets_out, ///< [out] Pointer to output sequence of run-offsets LengthsOutputIteratorT d_lengths_out, ///< [out] Pointer to output sequence of run-lengths NumRunsOutputIteratorT d_num_runs_out, ///< [out] Pointer to total number of runs (i.e., length of \p d_offsets_out) ScanTileStateT tile_status, ///< [in] Tile status interface EqualityOpT equality_op, ///< [in] Equality operator for input items OffsetT num_items, ///< [in] Total number of input items (i.e., length of \p d_in) int num_tiles) ///< [in] Total number of tiles for the entire problem { // Thread block type for selecting data from input tiles typedef AgentRle< AgentRlePolicyT, InputIteratorT, OffsetsOutputIteratorT, LengthsOutputIteratorT, EqualityOpT, OffsetT> AgentRleT; // Shared memory for AgentRle __shared__ typename AgentRleT::TempStorage temp_storage; // Process tiles AgentRleT(temp_storage, d_in, d_offsets_out, d_lengths_out, equality_op, num_items).ConsumeRange( num_tiles, tile_status, d_num_runs_out); } /****************************************************************************** * Dispatch ******************************************************************************/ /** * Utility class for dispatching the appropriately-tuned kernels for DeviceRle */ template < typename InputIteratorT, ///< Random-access input iterator type for reading input items \iterator typename OffsetsOutputIteratorT, ///< Random-access output iterator type for writing run-offset values \iterator typename LengthsOutputIteratorT, ///< Random-access output iterator type for writing run-length values \iterator typename NumRunsOutputIteratorT, ///< Output iterator type for recording the number of runs encountered \iterator typename EqualityOpT, ///< T equality operator type typename OffsetT> ///< Signed integer type for global offsets struct DeviceRleDispatch { /****************************************************************************** * Types and constants ******************************************************************************/ // The input value type typedef typename std::iterator_traits::value_type T; // The lengths output value type typedef typename If<(Equals::value_type, void>::VALUE), // LengthT = (if output iterator's value type is void) ? OffsetT, // ... then the OffsetT type, typename std::iterator_traits::value_type>::Type LengthT; // ... else the output iterator's value type enum { INIT_KERNEL_THREADS = 128, }; // Tile status descriptor interface type typedef ReduceByKeyScanTileState ScanTileStateT; /****************************************************************************** * Tuning policies ******************************************************************************/ /// SM35 struct Policy350 { enum { NOMINAL_4B_ITEMS_PER_THREAD = 15, ITEMS_PER_THREAD = CUB_MIN(NOMINAL_4B_ITEMS_PER_THREAD, CUB_MAX(1, (NOMINAL_4B_ITEMS_PER_THREAD * 4 / sizeof(T)))), }; typedef AgentRlePolicy< 96, ITEMS_PER_THREAD, BLOCK_LOAD_DIRECT, LOAD_LDG, true, BLOCK_SCAN_WARP_SCANS> RleSweepPolicy; }; /// SM30 struct Policy300 { enum { NOMINAL_4B_ITEMS_PER_THREAD = 5, ITEMS_PER_THREAD = CUB_MIN(NOMINAL_4B_ITEMS_PER_THREAD, CUB_MAX(1, (NOMINAL_4B_ITEMS_PER_THREAD * 4 / sizeof(T)))), }; typedef AgentRlePolicy< 256, ITEMS_PER_THREAD, BLOCK_LOAD_WARP_TRANSPOSE, LOAD_DEFAULT, true, BLOCK_SCAN_RAKING_MEMOIZE> RleSweepPolicy; }; /// SM20 struct Policy200 { enum { NOMINAL_4B_ITEMS_PER_THREAD = 15, ITEMS_PER_THREAD = CUB_MIN(NOMINAL_4B_ITEMS_PER_THREAD, CUB_MAX(1, (NOMINAL_4B_ITEMS_PER_THREAD * 4 / sizeof(T)))), }; typedef AgentRlePolicy< 128, ITEMS_PER_THREAD, BLOCK_LOAD_WARP_TRANSPOSE, LOAD_DEFAULT, false, BLOCK_SCAN_WARP_SCANS> RleSweepPolicy; }; /// SM13 struct Policy130 { enum { NOMINAL_4B_ITEMS_PER_THREAD = 9, ITEMS_PER_THREAD = CUB_MIN(NOMINAL_4B_ITEMS_PER_THREAD, CUB_MAX(1, (NOMINAL_4B_ITEMS_PER_THREAD * 4 / sizeof(T)))), }; typedef AgentRlePolicy< 64, ITEMS_PER_THREAD, BLOCK_LOAD_WARP_TRANSPOSE, LOAD_DEFAULT, true, BLOCK_SCAN_RAKING_MEMOIZE> RleSweepPolicy; }; /// SM10 struct Policy100 { enum { NOMINAL_4B_ITEMS_PER_THREAD = 9, ITEMS_PER_THREAD = CUB_MIN(NOMINAL_4B_ITEMS_PER_THREAD, CUB_MAX(1, (NOMINAL_4B_ITEMS_PER_THREAD * 4 / sizeof(T)))), }; typedef AgentRlePolicy< 256, ITEMS_PER_THREAD, BLOCK_LOAD_WARP_TRANSPOSE, LOAD_DEFAULT, true, BLOCK_SCAN_RAKING_MEMOIZE> RleSweepPolicy; }; /****************************************************************************** * Tuning policies of current PTX compiler pass ******************************************************************************/ #if (CUB_PTX_ARCH >= 350) typedef Policy350 PtxPolicy; #elif (CUB_PTX_ARCH >= 300) typedef Policy300 PtxPolicy; #elif (CUB_PTX_ARCH >= 200) typedef Policy200 PtxPolicy; #elif (CUB_PTX_ARCH >= 130) typedef Policy130 PtxPolicy; #else typedef Policy100 PtxPolicy; #endif // "Opaque" policies (whose parameterizations aren't reflected in the type signature) struct PtxRleSweepPolicy : PtxPolicy::RleSweepPolicy {}; /****************************************************************************** * Utilities ******************************************************************************/ /** * Initialize kernel dispatch configurations with the policies corresponding to the PTX assembly we will use */ template CUB_RUNTIME_FUNCTION __forceinline__ static void InitConfigs( int ptx_version, KernelConfig& device_rle_config) { #if (CUB_PTX_ARCH > 0) // We're on the device, so initialize the kernel dispatch configurations with the current PTX policy device_rle_config.template Init(); #else // We're on the host, so lookup and initialize the kernel dispatch configurations with the policies that match the device's PTX version if (ptx_version >= 350) { device_rle_config.template Init(); } else if (ptx_version >= 300) { device_rle_config.template Init(); } else if (ptx_version >= 200) { device_rle_config.template Init(); } else if (ptx_version >= 130) { device_rle_config.template Init(); } else { device_rle_config.template Init(); } #endif } /** * Kernel kernel dispatch configuration. Mirrors the constants within AgentRlePolicyT. */ struct KernelConfig { int block_threads; int items_per_thread; BlockLoadAlgorithm load_policy; bool store_warp_time_slicing; BlockScanAlgorithm scan_algorithm; template CUB_RUNTIME_FUNCTION __forceinline__ void Init() { block_threads = AgentRlePolicyT::BLOCK_THREADS; items_per_thread = AgentRlePolicyT::ITEMS_PER_THREAD; load_policy = AgentRlePolicyT::LOAD_ALGORITHM; store_warp_time_slicing = AgentRlePolicyT::STORE_WARP_TIME_SLICING; scan_algorithm = AgentRlePolicyT::SCAN_ALGORITHM; } CUB_RUNTIME_FUNCTION __forceinline__ void Print() { printf("%d, %d, %d, %d, %d", block_threads, items_per_thread, load_policy, store_warp_time_slicing, scan_algorithm); } }; /****************************************************************************** * Dispatch entrypoints ******************************************************************************/ /** * Internal dispatch routine for computing a device-wide run-length-encode using the * specified kernel functions. */ template < typename DeviceScanInitKernelPtr, ///< Function type of cub::DeviceScanInitKernel typename DeviceRleSweepKernelPtr> ///< Function type of cub::DeviceRleSweepKernelPtr CUB_RUNTIME_FUNCTION __forceinline__ static cudaError_t Dispatch( void* d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t& temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation InputIteratorT d_in, ///< [in] Pointer to the input sequence of data items OffsetsOutputIteratorT d_offsets_out, ///< [out] Pointer to the output sequence of run-offsets LengthsOutputIteratorT d_lengths_out, ///< [out] Pointer to the output sequence of run-lengths NumRunsOutputIteratorT d_num_runs_out, ///< [out] Pointer to the total number of runs encountered (i.e., length of \p d_offsets_out) EqualityOpT equality_op, ///< [in] Equality operator for input items OffsetT num_items, ///< [in] Total number of input items (i.e., length of \p d_in) cudaStream_t stream, ///< [in] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous, ///< [in] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is \p false. int ptx_version, ///< [in] PTX version of dispatch kernels DeviceScanInitKernelPtr device_scan_init_kernel, ///< [in] Kernel function pointer to parameterization of cub::DeviceScanInitKernel DeviceRleSweepKernelPtr device_rle_sweep_kernel, ///< [in] Kernel function pointer to parameterization of cub::DeviceRleSweepKernel KernelConfig device_rle_config) ///< [in] Dispatch parameters that match the policy that \p device_rle_sweep_kernel was compiled for { #ifndef CUB_RUNTIME_ENABLED // Kernel launch not supported from this device return CubDebug(cudaErrorNotSupported); #else cudaError error = cudaSuccess; do { // Get device ordinal int device_ordinal; if (CubDebug(error = cudaGetDevice(&device_ordinal))) break; // Get SM count int sm_count; if (CubDebug(error = cudaDeviceGetAttribute (&sm_count, cudaDevAttrMultiProcessorCount, device_ordinal))) break; // Number of input tiles int tile_size = device_rle_config.block_threads * device_rle_config.items_per_thread; int num_tiles = (num_items + tile_size - 1) / tile_size; // Specify temporary storage allocation requirements size_t allocation_sizes[1]; if (CubDebug(error = ScanTileStateT::AllocationSize(num_tiles, allocation_sizes[0]))) break; // bytes needed for tile status descriptors // Compute allocation pointers into the single storage blob (or compute the necessary size of the blob) void* allocations[1]; if (CubDebug(error = AliasTemporaries(d_temp_storage, temp_storage_bytes, allocations, allocation_sizes))) break; if (d_temp_storage == NULL) { // Return if the caller is simply requesting the size of the storage allocation break; } // Construct the tile status interface ScanTileStateT tile_status; if (CubDebug(error = tile_status.Init(num_tiles, allocations[0], allocation_sizes[0]))) break; // Log device_scan_init_kernel configuration int init_grid_size = CUB_MAX(1, (num_tiles + INIT_KERNEL_THREADS - 1) / INIT_KERNEL_THREADS); if (debug_synchronous) _CubLog("Invoking device_scan_init_kernel<<<%d, %d, 0, %lld>>>()\n", init_grid_size, INIT_KERNEL_THREADS, (long long) stream); // Invoke device_scan_init_kernel to initialize tile descriptors and queue descriptors device_scan_init_kernel<<>>( tile_status, num_tiles, d_num_runs_out); // Check for failure to launch if (CubDebug(error = cudaPeekAtLastError())) break; // Sync the stream if specified to flush runtime errors if (debug_synchronous && (CubDebug(error = SyncStream(stream)))) break; // Return if empty problem if (num_items == 0) break; // Get SM occupancy for device_rle_sweep_kernel int device_rle_kernel_sm_occupancy; if (CubDebug(error = MaxSmOccupancy( device_rle_kernel_sm_occupancy, // out device_rle_sweep_kernel, device_rle_config.block_threads))) break; // Get max x-dimension of grid int max_dim_x; if (CubDebug(error = cudaDeviceGetAttribute(&max_dim_x, cudaDevAttrMaxGridDimX, device_ordinal))) break;; // Get grid size for scanning tiles dim3 scan_grid_size; scan_grid_size.z = 1; scan_grid_size.y = ((unsigned int) num_tiles + max_dim_x - 1) / max_dim_x; scan_grid_size.x = CUB_MIN(num_tiles, max_dim_x); // Log device_rle_sweep_kernel configuration if (debug_synchronous) _CubLog("Invoking device_rle_sweep_kernel<<<{%d,%d,%d}, %d, 0, %lld>>>(), %d items per thread, %d SM occupancy\n", scan_grid_size.x, scan_grid_size.y, scan_grid_size.z, device_rle_config.block_threads, (long long) stream, device_rle_config.items_per_thread, device_rle_kernel_sm_occupancy); // Invoke device_rle_sweep_kernel device_rle_sweep_kernel<<>>( d_in, d_offsets_out, d_lengths_out, d_num_runs_out, tile_status, equality_op, num_items, num_tiles); // Check for failure to launch if (CubDebug(error = cudaPeekAtLastError())) break; // Sync the stream if specified to flush runtime errors if (debug_synchronous && (CubDebug(error = SyncStream(stream)))) break; } while (0); return error; #endif // CUB_RUNTIME_ENABLED } /** * Internal dispatch routine */ CUB_RUNTIME_FUNCTION __forceinline__ static cudaError_t Dispatch( void* d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t& temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation InputIteratorT d_in, ///< [in] Pointer to input sequence of data items OffsetsOutputIteratorT d_offsets_out, ///< [out] Pointer to output sequence of run-offsets LengthsOutputIteratorT d_lengths_out, ///< [out] Pointer to output sequence of run-lengths NumRunsOutputIteratorT d_num_runs_out, ///< [out] Pointer to total number of runs (i.e., length of \p d_offsets_out) EqualityOpT equality_op, ///< [in] Equality operator for input items OffsetT num_items, ///< [in] Total number of input items (i.e., length of \p d_in) cudaStream_t stream, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is \p false. { cudaError error = cudaSuccess; do { // Get PTX version int ptx_version; #if (CUB_PTX_ARCH == 0) if (CubDebug(error = PtxVersion(ptx_version))) break; #else ptx_version = CUB_PTX_ARCH; #endif // Get kernel kernel dispatch configurations KernelConfig device_rle_config; InitConfigs(ptx_version, device_rle_config); // Dispatch if (CubDebug(error = Dispatch( d_temp_storage, temp_storage_bytes, d_in, d_offsets_out, d_lengths_out, d_num_runs_out, equality_op, num_items, stream, debug_synchronous, ptx_version, DeviceCompactInitKernel, DeviceRleSweepKernel, device_rle_config))) break; } while (0); return error; } }; } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/gpu_utils/cub/device/dispatch/dispatch_scan.cuh000066400000000000000000000543201411340063500247240ustar00rootroot00000000000000 /****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * cub::DeviceScan provides device-wide, parallel operations for computing a prefix scan across a sequence of data items residing within device-accessible memory. */ #pragma once #include #include #include "../../agent/agent_scan.cuh" #include "../../thread/thread_operators.cuh" #include "../../grid/grid_queue.cuh" #include "../../util_arch.cuh" #include "../../util_debug.cuh" #include "../../util_device.cuh" #include "../../util_namespace.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /****************************************************************************** * Kernel entry points *****************************************************************************/ /** * Initialization kernel for tile status initialization (multi-block) */ template < typename ScanTileStateT> ///< Tile status interface type __global__ void DeviceScanInitKernel( ScanTileStateT tile_state, ///< [in] Tile status interface int num_tiles) ///< [in] Number of tiles { // Initialize tile status tile_state.InitializeStatus(num_tiles); } /** * Initialization kernel for tile status initialization (multi-block) */ template < typename ScanTileStateT, ///< Tile status interface type typename NumSelectedIteratorT> ///< Output iterator type for recording the number of items selected __global__ void DeviceCompactInitKernel( ScanTileStateT tile_state, ///< [in] Tile status interface int num_tiles, ///< [in] Number of tiles NumSelectedIteratorT d_num_selected_out) ///< [out] Pointer to the total number of items selected (i.e., length of \p d_selected_out) { // Initialize tile status tile_state.InitializeStatus(num_tiles); // Initialize d_num_selected_out if ((blockIdx.x == 0) && (threadIdx.x == 0)) *d_num_selected_out = 0; } /** * Scan kernel entry point (multi-block) */ template < typename ScanPolicyT, ///< Parameterized ScanPolicyT tuning policy type typename InputIteratorT, ///< Random-access input iterator type for reading scan inputs \iterator typename OutputIteratorT, ///< Random-access output iterator type for writing scan outputs \iterator typename ScanTileStateT, ///< Tile status interface type typename ScanOpT, ///< Binary scan functor type having member T operator()(const T &a, const T &b) typename InitValueT, ///< Initial value to seed the exclusive scan (cub::NullType for inclusive scans) typename OffsetT> ///< Signed integer type for global offsets __launch_bounds__ (int(ScanPolicyT::BLOCK_THREADS)) __global__ void DeviceScanKernel( InputIteratorT d_in, ///< Input data OutputIteratorT d_out, ///< Output data ScanTileStateT tile_state, ///< Tile status interface int start_tile, ///< The starting tile for the current grid ScanOpT scan_op, ///< Binary scan functor InitValueT init_value, ///< Initial value to seed the exclusive scan OffsetT num_items) ///< Total number of scan items for the entire problem { // Thread block type for scanning input tiles typedef AgentScan< ScanPolicyT, InputIteratorT, OutputIteratorT, ScanOpT, InitValueT, OffsetT> AgentScanT; // Shared memory for AgentScan __shared__ typename AgentScanT::TempStorage temp_storage; // Process tiles AgentScanT(temp_storage, d_in, d_out, scan_op, init_value).ConsumeRange( num_items, tile_state, start_tile); } /****************************************************************************** * Dispatch ******************************************************************************/ /** * Utility class for dispatching the appropriately-tuned kernels for DeviceScan */ template < typename InputIteratorT, ///< Random-access input iterator type for reading scan inputs \iterator typename OutputIteratorT, ///< Random-access output iterator type for writing scan outputs \iterator typename ScanOpT, ///< Binary scan functor type having member T operator()(const T &a, const T &b) typename InitValueT, ///< The init_value element type for ScanOpT (cub::NullType for inclusive scans) typename OffsetT> ///< Signed integer type for global offsets struct DispatchScan { //--------------------------------------------------------------------- // Constants and Types //--------------------------------------------------------------------- enum { INIT_KERNEL_THREADS = 128 }; // The output value type typedef typename If<(Equals::value_type, void>::VALUE), // OutputT = (if output iterator's value type is void) ? typename std::iterator_traits::value_type, // ... then the input iterator's value type, typename std::iterator_traits::value_type>::Type OutputT; // ... else the output iterator's value type // Tile status descriptor interface type typedef ScanTileState ScanTileStateT; //--------------------------------------------------------------------- // Tuning policies //--------------------------------------------------------------------- /// SM600 struct Policy600 { typedef AgentScanPolicy< CUB_NOMINAL_CONFIG(128, 15, OutputT), ///< Threads per block, items per thread BLOCK_LOAD_TRANSPOSE, LOAD_DEFAULT, BLOCK_STORE_TRANSPOSE, BLOCK_SCAN_WARP_SCANS> ScanPolicyT; }; /// SM520 struct Policy520 { // Titan X: 32.47B items/s @ 48M 32-bit T typedef AgentScanPolicy< CUB_NOMINAL_CONFIG(128, 12, OutputT), ///< Threads per block, items per thread BLOCK_LOAD_DIRECT, LOAD_LDG, BLOCK_STORE_WARP_TRANSPOSE, BLOCK_SCAN_WARP_SCANS> ScanPolicyT; }; /// SM35 struct Policy350 { // GTX Titan: 29.5B items/s (232.4 GB/s) @ 48M 32-bit T typedef AgentScanPolicy< CUB_NOMINAL_CONFIG(128, 12, OutputT), ///< Threads per block, items per thread BLOCK_LOAD_DIRECT, LOAD_LDG, BLOCK_STORE_WARP_TRANSPOSE_TIMESLICED, BLOCK_SCAN_RAKING> ScanPolicyT; }; /// SM30 struct Policy300 { typedef AgentScanPolicy< CUB_NOMINAL_CONFIG(256, 9, OutputT), ///< Threads per block, items per thread BLOCK_LOAD_WARP_TRANSPOSE, LOAD_DEFAULT, BLOCK_STORE_WARP_TRANSPOSE, BLOCK_SCAN_WARP_SCANS> ScanPolicyT; }; /// SM20 struct Policy200 { // GTX 580: 20.3B items/s (162.3 GB/s) @ 48M 32-bit T typedef AgentScanPolicy< CUB_NOMINAL_CONFIG(128, 12, OutputT), ///< Threads per block, items per thread BLOCK_LOAD_WARP_TRANSPOSE, LOAD_DEFAULT, BLOCK_STORE_WARP_TRANSPOSE, BLOCK_SCAN_WARP_SCANS> ScanPolicyT; }; /// SM13 struct Policy130 { typedef AgentScanPolicy< CUB_NOMINAL_CONFIG(96, 21, OutputT), ///< Threads per block, items per thread BLOCK_LOAD_WARP_TRANSPOSE, LOAD_DEFAULT, BLOCK_STORE_WARP_TRANSPOSE, BLOCK_SCAN_RAKING_MEMOIZE> ScanPolicyT; }; /// SM10 struct Policy100 { typedef AgentScanPolicy< CUB_NOMINAL_CONFIG(64, 9, OutputT), ///< Threads per block, items per thread BLOCK_LOAD_WARP_TRANSPOSE, LOAD_DEFAULT, BLOCK_STORE_WARP_TRANSPOSE, BLOCK_SCAN_WARP_SCANS> ScanPolicyT; }; //--------------------------------------------------------------------- // Tuning policies of current PTX compiler pass //--------------------------------------------------------------------- #if (CUB_PTX_ARCH >= 600) typedef Policy600 PtxPolicy; #elif (CUB_PTX_ARCH >= 520) typedef Policy520 PtxPolicy; #elif (CUB_PTX_ARCH >= 350) typedef Policy350 PtxPolicy; #elif (CUB_PTX_ARCH >= 300) typedef Policy300 PtxPolicy; #elif (CUB_PTX_ARCH >= 200) typedef Policy200 PtxPolicy; #elif (CUB_PTX_ARCH >= 130) typedef Policy130 PtxPolicy; #else typedef Policy100 PtxPolicy; #endif // "Opaque" policies (whose parameterizations aren't reflected in the type signature) struct PtxAgentScanPolicy : PtxPolicy::ScanPolicyT {}; //--------------------------------------------------------------------- // Utilities //--------------------------------------------------------------------- /** * Initialize kernel dispatch configurations with the policies corresponding to the PTX assembly we will use */ template CUB_RUNTIME_FUNCTION __forceinline__ static void InitConfigs( int ptx_version, KernelConfig &scan_kernel_config) { #if (CUB_PTX_ARCH > 0) (void)ptx_version; // We're on the device, so initialize the kernel dispatch configurations with the current PTX policy scan_kernel_config.template Init(); #else // We're on the host, so lookup and initialize the kernel dispatch configurations with the policies that match the device's PTX version if (ptx_version >= 600) { scan_kernel_config.template Init(); } else if (ptx_version >= 520) { scan_kernel_config.template Init(); } else if (ptx_version >= 350) { scan_kernel_config.template Init(); } else if (ptx_version >= 300) { scan_kernel_config.template Init(); } else if (ptx_version >= 200) { scan_kernel_config.template Init(); } else if (ptx_version >= 130) { scan_kernel_config.template Init(); } else { scan_kernel_config.template Init(); } #endif } /** * Kernel kernel dispatch configuration. */ struct KernelConfig { int block_threads; int items_per_thread; int tile_items; template CUB_RUNTIME_FUNCTION __forceinline__ void Init() { block_threads = PolicyT::BLOCK_THREADS; items_per_thread = PolicyT::ITEMS_PER_THREAD; tile_items = block_threads * items_per_thread; } }; //--------------------------------------------------------------------- // Dispatch entrypoints //--------------------------------------------------------------------- /** * Internal dispatch routine for computing a device-wide prefix scan using the * specified kernel functions. */ template < typename ScanInitKernelPtrT, ///< Function type of cub::DeviceScanInitKernel typename ScanSweepKernelPtrT> ///< Function type of cub::DeviceScanKernelPtrT CUB_RUNTIME_FUNCTION __forceinline__ static cudaError_t Dispatch( void* d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t& temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation InputIteratorT d_in, ///< [in] Pointer to the input sequence of data items OutputIteratorT d_out, ///< [out] Pointer to the output sequence of data items ScanOpT scan_op, ///< [in] Binary scan functor InitValueT init_value, ///< [in] Initial value to seed the exclusive scan OffsetT num_items, ///< [in] Total number of input items (i.e., the length of \p d_in) cudaStream_t stream, ///< [in] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous, ///< [in] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is \p false. int /*ptx_version*/, ///< [in] PTX version of dispatch kernels ScanInitKernelPtrT init_kernel, ///< [in] Kernel function pointer to parameterization of cub::DeviceScanInitKernel ScanSweepKernelPtrT scan_kernel, ///< [in] Kernel function pointer to parameterization of cub::DeviceScanKernel KernelConfig scan_kernel_config) ///< [in] Dispatch parameters that match the policy that \p scan_kernel was compiled for { #ifndef CUB_RUNTIME_ENABLED (void)d_temp_storage; (void)temp_storage_bytes; (void)d_in; (void)d_out; (void)scan_op; (void)init_value; (void)num_items; (void)stream; (void)debug_synchronous; (void)init_kernel; (void)scan_kernel; (void)scan_kernel_config; // Kernel launch not supported from this device return CubDebug(cudaErrorNotSupported); #else cudaError error = cudaSuccess; do { // Get device ordinal int device_ordinal; if (CubDebug(error = cudaGetDevice(&device_ordinal))) break; // Get SM count int sm_count; if (CubDebug(error = cudaDeviceGetAttribute (&sm_count, cudaDevAttrMultiProcessorCount, device_ordinal))) break; // Number of input tiles int tile_size = scan_kernel_config.block_threads * scan_kernel_config.items_per_thread; int num_tiles = (num_items + tile_size - 1) / tile_size; // Specify temporary storage allocation requirements size_t allocation_sizes[1]; if (CubDebug(error = ScanTileStateT::AllocationSize(num_tiles, allocation_sizes[0]))) break; // bytes needed for tile status descriptors // Compute allocation pointers into the single storage blob (or compute the necessary size of the blob) void* allocations[1]; if (CubDebug(error = AliasTemporaries(d_temp_storage, temp_storage_bytes, allocations, allocation_sizes))) break; if (d_temp_storage == NULL) { // Return if the caller is simply requesting the size of the storage allocation break; } // Return if empty problem if (num_items == 0) break; // Construct the tile status interface ScanTileStateT tile_state; if (CubDebug(error = tile_state.Init(num_tiles, allocations[0], allocation_sizes[0]))) break; // Log init_kernel configuration int init_grid_size = (num_tiles + INIT_KERNEL_THREADS - 1) / INIT_KERNEL_THREADS; if (debug_synchronous) _CubLog("Invoking init_kernel<<<%d, %d, 0, %lld>>>()\n", init_grid_size, INIT_KERNEL_THREADS, (long long) stream); // Invoke init_kernel to initialize tile descriptors init_kernel<<>>( tile_state, num_tiles); // Check for failure to launch if (CubDebug(error = cudaPeekAtLastError())) break; // Sync the stream if specified to flush runtime errors if (debug_synchronous && (CubDebug(error = SyncStream(stream)))) break; // Get SM occupancy for scan_kernel int scan_sm_occupancy; if (CubDebug(error = MaxSmOccupancy( scan_sm_occupancy, // out scan_kernel, scan_kernel_config.block_threads))) break; // Get max x-dimension of grid int max_dim_x; if (CubDebug(error = cudaDeviceGetAttribute(&max_dim_x, cudaDevAttrMaxGridDimX, device_ordinal))) break;; // Run grids in epochs (in case number of tiles exceeds max x-dimension int scan_grid_size = CUB_MIN(num_tiles, max_dim_x); for (int start_tile = 0; start_tile < num_tiles; start_tile += scan_grid_size) { // Log scan_kernel configuration if (debug_synchronous) _CubLog("Invoking %d scan_kernel<<<%d, %d, 0, %lld>>>(), %d items per thread, %d SM occupancy\n", start_tile, scan_grid_size, scan_kernel_config.block_threads, (long long) stream, scan_kernel_config.items_per_thread, scan_sm_occupancy); // Invoke scan_kernel scan_kernel<<>>( d_in, d_out, tile_state, start_tile, scan_op, init_value, num_items); // Check for failure to launch if (CubDebug(error = cudaPeekAtLastError())) break; // Sync the stream if specified to flush runtime errors if (debug_synchronous && (CubDebug(error = SyncStream(stream)))) break; } } while (0); return error; #endif // CUB_RUNTIME_ENABLED } /** * Internal dispatch routine */ CUB_RUNTIME_FUNCTION __forceinline__ static cudaError_t Dispatch( void* d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t& temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation InputIteratorT d_in, ///< [in] Pointer to the input sequence of data items OutputIteratorT d_out, ///< [out] Pointer to the output sequence of data items ScanOpT scan_op, ///< [in] Binary scan functor InitValueT init_value, ///< [in] Initial value to seed the exclusive scan OffsetT num_items, ///< [in] Total number of input items (i.e., the length of \p d_in) cudaStream_t stream, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is \p false. { cudaError error = cudaSuccess; do { // Get PTX version int ptx_version; if (CubDebug(error = PtxVersion(ptx_version))) break; // Get kernel kernel dispatch configurations KernelConfig scan_kernel_config; InitConfigs(ptx_version, scan_kernel_config); // Dispatch if (CubDebug(error = Dispatch( d_temp_storage, temp_storage_bytes, d_in, d_out, scan_op, init_value, num_items, stream, debug_synchronous, ptx_version, DeviceScanInitKernel, DeviceScanKernel, scan_kernel_config))) break; } while (0); return error; } }; } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/gpu_utils/cub/device/dispatch/dispatch_select_if.cuh000066400000000000000000000576061411340063500257470ustar00rootroot00000000000000 /****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * cub::DeviceSelect provides device-wide, parallel operations for selecting items from sequences of data items residing within device-accessible memory. */ #pragma once #include #include #include "dispatch_scan.cuh" #include "../../agent/agent_select_if.cuh" #include "../../thread/thread_operators.cuh" #include "../../grid/grid_queue.cuh" #include "../../util_device.cuh" #include "../../util_namespace.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /****************************************************************************** * Kernel entry points *****************************************************************************/ /** * Select kernel entry point (multi-block) * * Performs functor-based selection if SelectOpT functor type != NullType * Otherwise performs flag-based selection if FlagsInputIterator's value type != NullType * Otherwise performs discontinuity selection (keep unique) */ template < typename AgentSelectIfPolicyT, ///< Parameterized AgentSelectIfPolicyT tuning policy type typename InputIteratorT, ///< Random-access input iterator type for reading input items typename FlagsInputIteratorT, ///< Random-access input iterator type for reading selection flags (NullType* if a selection functor or discontinuity flagging is to be used for selection) typename SelectedOutputIteratorT, ///< Random-access output iterator type for writing selected items typename NumSelectedIteratorT, ///< Output iterator type for recording the number of items selected typename ScanTileStateT, ///< Tile status interface type typename SelectOpT, ///< Selection operator type (NullType if selection flags or discontinuity flagging is to be used for selection) typename EqualityOpT, ///< Equality operator type (NullType if selection functor or selection flags is to be used for selection) typename OffsetT, ///< Signed integer type for global offsets bool KEEP_REJECTS> ///< Whether or not we push rejected items to the back of the output __launch_bounds__ (int(AgentSelectIfPolicyT::BLOCK_THREADS)) __global__ void DeviceSelectSweepKernel( InputIteratorT d_in, ///< [in] Pointer to the input sequence of data items FlagsInputIteratorT d_flags, ///< [in] Pointer to the input sequence of selection flags (if applicable) SelectedOutputIteratorT d_selected_out, ///< [out] Pointer to the output sequence of selected data items NumSelectedIteratorT d_num_selected_out, ///< [out] Pointer to the total number of items selected (i.e., length of \p d_selected_out) ScanTileStateT tile_status, ///< [in] Tile status interface SelectOpT select_op, ///< [in] Selection operator EqualityOpT equality_op, ///< [in] Equality operator OffsetT num_items, ///< [in] Total number of input items (i.e., length of \p d_in) int num_tiles) ///< [in] Total number of tiles for the entire problem { // Thread block type for selecting data from input tiles typedef AgentSelectIf< AgentSelectIfPolicyT, InputIteratorT, FlagsInputIteratorT, SelectedOutputIteratorT, SelectOpT, EqualityOpT, OffsetT, KEEP_REJECTS> AgentSelectIfT; // Shared memory for AgentSelectIf __shared__ typename AgentSelectIfT::TempStorage temp_storage; // Process tiles AgentSelectIfT(temp_storage, d_in, d_flags, d_selected_out, select_op, equality_op, num_items).ConsumeRange( num_tiles, tile_status, d_num_selected_out); } /****************************************************************************** * Dispatch ******************************************************************************/ /** * Utility class for dispatching the appropriately-tuned kernels for DeviceSelect */ template < typename InputIteratorT, ///< Random-access input iterator type for reading input items typename FlagsInputIteratorT, ///< Random-access input iterator type for reading selection flags (NullType* if a selection functor or discontinuity flagging is to be used for selection) typename SelectedOutputIteratorT, ///< Random-access output iterator type for writing selected items typename NumSelectedIteratorT, ///< Output iterator type for recording the number of items selected typename SelectOpT, ///< Selection operator type (NullType if selection flags or discontinuity flagging is to be used for selection) typename EqualityOpT, ///< Equality operator type (NullType if selection functor or selection flags is to be used for selection) typename OffsetT, ///< Signed integer type for global offsets bool KEEP_REJECTS> ///< Whether or not we push rejected items to the back of the output struct DispatchSelectIf { /****************************************************************************** * Types and constants ******************************************************************************/ // The output value type typedef typename If<(Equals::value_type, void>::VALUE), // OutputT = (if output iterator's value type is void) ? typename std::iterator_traits::value_type, // ... then the input iterator's value type, typename std::iterator_traits::value_type>::Type OutputT; // ... else the output iterator's value type // The flag value type typedef typename std::iterator_traits::value_type FlagT; enum { INIT_KERNEL_THREADS = 128, }; // Tile status descriptor interface type typedef ScanTileState ScanTileStateT; /****************************************************************************** * Tuning policies ******************************************************************************/ /// SM35 struct Policy350 { enum { NOMINAL_4B_ITEMS_PER_THREAD = 10, ITEMS_PER_THREAD = CUB_MIN(NOMINAL_4B_ITEMS_PER_THREAD, CUB_MAX(1, (NOMINAL_4B_ITEMS_PER_THREAD * 4 / sizeof(OutputT)))), }; typedef AgentSelectIfPolicy< 128, ITEMS_PER_THREAD, BLOCK_LOAD_DIRECT, LOAD_LDG, BLOCK_SCAN_WARP_SCANS> SelectIfPolicyT; }; /// SM30 struct Policy300 { enum { NOMINAL_4B_ITEMS_PER_THREAD = 7, ITEMS_PER_THREAD = CUB_MIN(NOMINAL_4B_ITEMS_PER_THREAD, CUB_MAX(3, (NOMINAL_4B_ITEMS_PER_THREAD * 4 / sizeof(OutputT)))), }; typedef AgentSelectIfPolicy< 128, ITEMS_PER_THREAD, BLOCK_LOAD_WARP_TRANSPOSE, LOAD_DEFAULT, BLOCK_SCAN_WARP_SCANS> SelectIfPolicyT; }; /// SM20 struct Policy200 { enum { NOMINAL_4B_ITEMS_PER_THREAD = (KEEP_REJECTS) ? 7 : 15, ITEMS_PER_THREAD = CUB_MIN(NOMINAL_4B_ITEMS_PER_THREAD, CUB_MAX(1, (NOMINAL_4B_ITEMS_PER_THREAD * 4 / sizeof(OutputT)))), }; typedef AgentSelectIfPolicy< 128, ITEMS_PER_THREAD, BLOCK_LOAD_WARP_TRANSPOSE, LOAD_DEFAULT, BLOCK_SCAN_WARP_SCANS> SelectIfPolicyT; }; /// SM13 struct Policy130 { enum { NOMINAL_4B_ITEMS_PER_THREAD = 9, ITEMS_PER_THREAD = CUB_MIN(NOMINAL_4B_ITEMS_PER_THREAD, CUB_MAX(1, (NOMINAL_4B_ITEMS_PER_THREAD * 4 / sizeof(OutputT)))), }; typedef AgentSelectIfPolicy< 64, ITEMS_PER_THREAD, BLOCK_LOAD_WARP_TRANSPOSE, LOAD_DEFAULT, BLOCK_SCAN_RAKING_MEMOIZE> SelectIfPolicyT; }; /// SM10 struct Policy100 { enum { NOMINAL_4B_ITEMS_PER_THREAD = 9, ITEMS_PER_THREAD = CUB_MIN(NOMINAL_4B_ITEMS_PER_THREAD, CUB_MAX(1, (NOMINAL_4B_ITEMS_PER_THREAD * 4 / sizeof(OutputT)))), }; typedef AgentSelectIfPolicy< 64, ITEMS_PER_THREAD, BLOCK_LOAD_WARP_TRANSPOSE, LOAD_DEFAULT, BLOCK_SCAN_RAKING> SelectIfPolicyT; }; /****************************************************************************** * Tuning policies of current PTX compiler pass ******************************************************************************/ #if (CUB_PTX_ARCH >= 350) typedef Policy350 PtxPolicy; #elif (CUB_PTX_ARCH >= 300) typedef Policy300 PtxPolicy; #elif (CUB_PTX_ARCH >= 200) typedef Policy200 PtxPolicy; #elif (CUB_PTX_ARCH >= 130) typedef Policy130 PtxPolicy; #else typedef Policy100 PtxPolicy; #endif // "Opaque" policies (whose parameterizations aren't reflected in the type signature) struct PtxSelectIfPolicyT : PtxPolicy::SelectIfPolicyT {}; /****************************************************************************** * Utilities ******************************************************************************/ /** * Initialize kernel dispatch configurations with the policies corresponding to the PTX assembly we will use */ template CUB_RUNTIME_FUNCTION __forceinline__ static void InitConfigs( int ptx_version, KernelConfig &select_if_config) { #if (CUB_PTX_ARCH > 0) (void)ptx_version; // We're on the device, so initialize the kernel dispatch configurations with the current PTX policy select_if_config.template Init(); #else // We're on the host, so lookup and initialize the kernel dispatch configurations with the policies that match the device's PTX version if (ptx_version >= 350) { select_if_config.template Init(); } else if (ptx_version >= 300) { select_if_config.template Init(); } else if (ptx_version >= 200) { select_if_config.template Init(); } else if (ptx_version >= 130) { select_if_config.template Init(); } else { select_if_config.template Init(); } #endif } /** * Kernel kernel dispatch configuration. */ struct KernelConfig { int block_threads; int items_per_thread; int tile_items; template CUB_RUNTIME_FUNCTION __forceinline__ void Init() { block_threads = PolicyT::BLOCK_THREADS; items_per_thread = PolicyT::ITEMS_PER_THREAD; tile_items = block_threads * items_per_thread; } }; /****************************************************************************** * Dispatch entrypoints ******************************************************************************/ /** * Internal dispatch routine for computing a device-wide selection using the * specified kernel functions. */ template < typename ScanInitKernelPtrT, ///< Function type of cub::DeviceScanInitKernel typename SelectIfKernelPtrT> ///< Function type of cub::SelectIfKernelPtrT CUB_RUNTIME_FUNCTION __forceinline__ static cudaError_t Dispatch( void* d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t& temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation InputIteratorT d_in, ///< [in] Pointer to the input sequence of data items FlagsInputIteratorT d_flags, ///< [in] Pointer to the input sequence of selection flags (if applicable) SelectedOutputIteratorT d_selected_out, ///< [in] Pointer to the output sequence of selected data items NumSelectedIteratorT d_num_selected_out, ///< [in] Pointer to the total number of items selected (i.e., length of \p d_selected_out) SelectOpT select_op, ///< [in] Selection operator EqualityOpT equality_op, ///< [in] Equality operator OffsetT num_items, ///< [in] Total number of input items (i.e., length of \p d_in) cudaStream_t stream, ///< [in] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous, ///< [in] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is \p false. int /*ptx_version*/, ///< [in] PTX version of dispatch kernels ScanInitKernelPtrT scan_init_kernel, ///< [in] Kernel function pointer to parameterization of cub::DeviceScanInitKernel SelectIfKernelPtrT select_if_kernel, ///< [in] Kernel function pointer to parameterization of cub::DeviceSelectSweepKernel KernelConfig select_if_config) ///< [in] Dispatch parameters that match the policy that \p select_if_kernel was compiled for { #ifndef CUB_RUNTIME_ENABLED (void)d_temp_storage; (void)temp_storage_bytes; (void)d_in; (void)d_flags; (void)d_selected_out; (void)d_num_selected_out; (void)select_op; (void)equality_op; (void)num_items; (void)stream; (void)debug_synchronous; (void)scan_init_kernel; (void)select_if_kernel; (void)select_if_config; // Kernel launch not supported from this device return CubDebug(cudaErrorNotSupported); #else cudaError error = cudaSuccess; do { // Get device ordinal int device_ordinal; if (CubDebug(error = cudaGetDevice(&device_ordinal))) break; // Get SM count int sm_count; if (CubDebug(error = cudaDeviceGetAttribute (&sm_count, cudaDevAttrMultiProcessorCount, device_ordinal))) break; // Number of input tiles int tile_size = select_if_config.block_threads * select_if_config.items_per_thread; int num_tiles = (num_items + tile_size - 1) / tile_size; // Specify temporary storage allocation requirements size_t allocation_sizes[1]; if (CubDebug(error = ScanTileStateT::AllocationSize(num_tiles, allocation_sizes[0]))) break; // bytes needed for tile status descriptors // Compute allocation pointers into the single storage blob (or compute the necessary size of the blob) void* allocations[1]; if (CubDebug(error = AliasTemporaries(d_temp_storage, temp_storage_bytes, allocations, allocation_sizes))) break; if (d_temp_storage == NULL) { // Return if the caller is simply requesting the size of the storage allocation break; } // Construct the tile status interface ScanTileStateT tile_status; if (CubDebug(error = tile_status.Init(num_tiles, allocations[0], allocation_sizes[0]))) break; // Log scan_init_kernel configuration int init_grid_size = CUB_MAX(1, (num_tiles + INIT_KERNEL_THREADS - 1) / INIT_KERNEL_THREADS); if (debug_synchronous) _CubLog("Invoking scan_init_kernel<<<%d, %d, 0, %lld>>>()\n", init_grid_size, INIT_KERNEL_THREADS, (long long) stream); // Invoke scan_init_kernel to initialize tile descriptors scan_init_kernel<<>>( tile_status, num_tiles, d_num_selected_out); // Check for failure to launch if (CubDebug(error = cudaPeekAtLastError())) break; // Sync the stream if specified to flush runtime errors if (debug_synchronous && (CubDebug(error = SyncStream(stream)))) break; // Return if empty problem if (num_items == 0) break; // Get SM occupancy for select_if_kernel int range_select_sm_occupancy; if (CubDebug(error = MaxSmOccupancy( range_select_sm_occupancy, // out select_if_kernel, select_if_config.block_threads))) break; // Get max x-dimension of grid int max_dim_x; if (CubDebug(error = cudaDeviceGetAttribute(&max_dim_x, cudaDevAttrMaxGridDimX, device_ordinal))) break;; // Get grid size for scanning tiles dim3 scan_grid_size; scan_grid_size.z = 1; scan_grid_size.y = ((unsigned int) num_tiles + max_dim_x - 1) / max_dim_x; scan_grid_size.x = CUB_MIN(num_tiles, max_dim_x); // Log select_if_kernel configuration if (debug_synchronous) _CubLog("Invoking select_if_kernel<<<{%d,%d,%d}, %d, 0, %lld>>>(), %d items per thread, %d SM occupancy\n", scan_grid_size.x, scan_grid_size.y, scan_grid_size.z, select_if_config.block_threads, (long long) stream, select_if_config.items_per_thread, range_select_sm_occupancy); // Invoke select_if_kernel select_if_kernel<<>>( d_in, d_flags, d_selected_out, d_num_selected_out, tile_status, select_op, equality_op, num_items, num_tiles); // Check for failure to launch if (CubDebug(error = cudaPeekAtLastError())) break; // Sync the stream if specified to flush runtime errors if (debug_synchronous && (CubDebug(error = SyncStream(stream)))) break; } while (0); return error; #endif // CUB_RUNTIME_ENABLED } /** * Internal dispatch routine */ CUB_RUNTIME_FUNCTION __forceinline__ static cudaError_t Dispatch( void* d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t& temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation InputIteratorT d_in, ///< [in] Pointer to the input sequence of data items FlagsInputIteratorT d_flags, ///< [in] Pointer to the input sequence of selection flags (if applicable) SelectedOutputIteratorT d_selected_out, ///< [in] Pointer to the output sequence of selected data items NumSelectedIteratorT d_num_selected_out, ///< [in] Pointer to the total number of items selected (i.e., length of \p d_selected_out) SelectOpT select_op, ///< [in] Selection operator EqualityOpT equality_op, ///< [in] Equality operator OffsetT num_items, ///< [in] Total number of input items (i.e., length of \p d_in) cudaStream_t stream, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is \p false. { cudaError error = cudaSuccess; do { // Get PTX version int ptx_version; #if (CUB_PTX_ARCH == 0) if (CubDebug(error = PtxVersion(ptx_version))) break; #else ptx_version = CUB_PTX_ARCH; #endif // Get kernel kernel dispatch configurations KernelConfig select_if_config; InitConfigs(ptx_version, select_if_config); // Dispatch if (CubDebug(error = Dispatch( d_temp_storage, temp_storage_bytes, d_in, d_flags, d_selected_out, d_num_selected_out, select_op, equality_op, num_items, stream, debug_synchronous, ptx_version, DeviceCompactInitKernel, DeviceSelectSweepKernel, select_if_config))) break; } while (0); return error; } }; } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/gpu_utils/cub/device/dispatch/dispatch_spmv_orig.cuh000066400000000000000000001047011411340063500260040ustar00rootroot00000000000000 /****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * cub::DeviceSpmv provides device-wide parallel operations for performing sparse-matrix * vector multiplication (SpMV). */ #pragma once #include #include #include "../../agent/single_pass_scan_operators.cuh" #include "../../agent/agent_segment_fixup.cuh" #include "../../agent/agent_spmv_orig.cuh" #include "../../util_type.cuh" #include "../../util_debug.cuh" #include "../../util_device.cuh" #include "../../thread/thread_search.cuh" #include "../../grid/grid_queue.cuh" #include "../../util_namespace.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /****************************************************************************** * SpMV kernel entry points *****************************************************************************/ /** * Spmv search kernel. Identifies merge path starting coordinates for each tile. */ template < typename AgentSpmvPolicyT, ///< Parameterized SpmvPolicy tuning policy type typename ValueT, ///< Matrix and vector value type typename OffsetT> ///< Signed integer type for sequence offsets __global__ void DeviceSpmv1ColKernel( SpmvParams spmv_params) ///< [in] SpMV input parameter bundle { typedef CacheModifiedInputIterator< AgentSpmvPolicyT::VECTOR_VALUES_LOAD_MODIFIER, ValueT, OffsetT> VectorValueIteratorT; VectorValueIteratorT wrapped_vector_x(spmv_params.d_vector_x); int row_idx = (blockIdx.x * blockDim.x) + threadIdx.x; if (row_idx < spmv_params.num_rows) { OffsetT end_nonzero_idx = spmv_params.d_row_end_offsets[row_idx]; OffsetT nonzero_idx = spmv_params.d_row_end_offsets[row_idx - 1]; ValueT value = 0.0; if (end_nonzero_idx != nonzero_idx) { value = spmv_params.d_values[nonzero_idx] * wrapped_vector_x[spmv_params.d_column_indices[nonzero_idx]]; } spmv_params.d_vector_y[row_idx] = value; } } /** * Spmv search kernel. Identifies merge path starting coordinates for each tile. */ template < typename SpmvPolicyT, ///< Parameterized SpmvPolicy tuning policy type typename OffsetT, ///< Signed integer type for sequence offsets typename CoordinateT, ///< Merge path coordinate type typename SpmvParamsT> ///< SpmvParams type __global__ void DeviceSpmvSearchKernel( int num_merge_tiles, ///< [in] Number of SpMV merge tiles (spmv grid size) CoordinateT* d_tile_coordinates, ///< [out] Pointer to the temporary array of tile starting coordinates SpmvParamsT spmv_params) ///< [in] SpMV input parameter bundle { /// Constants enum { BLOCK_THREADS = SpmvPolicyT::BLOCK_THREADS, ITEMS_PER_THREAD = SpmvPolicyT::ITEMS_PER_THREAD, TILE_ITEMS = BLOCK_THREADS * ITEMS_PER_THREAD, }; typedef CacheModifiedInputIterator< SpmvPolicyT::ROW_OFFSETS_SEARCH_LOAD_MODIFIER, OffsetT, OffsetT> RowOffsetsSearchIteratorT; // Find the starting coordinate for all tiles (plus the end coordinate of the last one) int tile_idx = (blockIdx.x * blockDim.x) + threadIdx.x; if (tile_idx < num_merge_tiles + 1) { OffsetT diagonal = (tile_idx * TILE_ITEMS); CoordinateT tile_coordinate; CountingInputIterator nonzero_indices(0); // Search the merge path MergePathSearch( diagonal, RowOffsetsSearchIteratorT(spmv_params.d_row_end_offsets), nonzero_indices, spmv_params.num_rows, spmv_params.num_nonzeros, tile_coordinate); // Output starting offset d_tile_coordinates[tile_idx] = tile_coordinate; } } /** * Spmv agent entry point */ template < typename SpmvPolicyT, ///< Parameterized SpmvPolicy tuning policy type typename ScanTileStateT, ///< Tile status interface type typename ValueT, ///< Matrix and vector value type typename OffsetT, ///< Signed integer type for sequence offsets typename CoordinateT, ///< Merge path coordinate type bool HAS_ALPHA, ///< Whether the input parameter Alpha is 1 bool HAS_BETA> ///< Whether the input parameter Beta is 0 __launch_bounds__ (int(SpmvPolicyT::BLOCK_THREADS)) __global__ void DeviceSpmvKernel( SpmvParams spmv_params, ///< [in] SpMV input parameter bundle CoordinateT* d_tile_coordinates, ///< [in] Pointer to the temporary array of tile starting coordinates KeyValuePair* d_tile_carry_pairs, ///< [out] Pointer to the temporary array carry-out dot product row-ids, one per block int num_tiles, ///< [in] Number of merge tiles ScanTileStateT tile_state, ///< [in] Tile status interface for fixup reduce-by-key kernel int num_segment_fixup_tiles) ///< [in] Number of reduce-by-key tiles (fixup grid size) { // Spmv agent type specialization typedef AgentSpmv< SpmvPolicyT, ValueT, OffsetT, HAS_ALPHA, HAS_BETA> AgentSpmvT; // Shared memory for AgentSpmv __shared__ typename AgentSpmvT::TempStorage temp_storage; AgentSpmvT(temp_storage, spmv_params).ConsumeTile( d_tile_coordinates, d_tile_carry_pairs, num_tiles); // Initialize fixup tile status tile_state.InitializeStatus(num_segment_fixup_tiles); } /** * Multi-block reduce-by-key sweep kernel entry point */ template < typename AgentSegmentFixupPolicyT, ///< Parameterized AgentSegmentFixupPolicy tuning policy type typename PairsInputIteratorT, ///< Random-access input iterator type for keys typename AggregatesOutputIteratorT, ///< Random-access output iterator type for values typename OffsetT, ///< Signed integer type for global offsets typename ScanTileStateT> ///< Tile status interface type __launch_bounds__ (int(AgentSegmentFixupPolicyT::BLOCK_THREADS)) __global__ void DeviceSegmentFixupKernel( PairsInputIteratorT d_pairs_in, ///< [in] Pointer to the array carry-out dot product row-ids, one per spmv block AggregatesOutputIteratorT d_aggregates_out, ///< [in,out] Output value aggregates OffsetT num_items, ///< [in] Total number of items to select from int num_tiles, ///< [in] Total number of tiles for the entire problem ScanTileStateT tile_state) ///< [in] Tile status interface { // Thread block type for reducing tiles of value segments typedef AgentSegmentFixup< AgentSegmentFixupPolicyT, PairsInputIteratorT, AggregatesOutputIteratorT, cub::Equality, cub::Sum, OffsetT> AgentSegmentFixupT; // Shared memory for AgentSegmentFixup __shared__ typename AgentSegmentFixupT::TempStorage temp_storage; // Process tiles AgentSegmentFixupT(temp_storage, d_pairs_in, d_aggregates_out, cub::Equality(), cub::Sum()).ConsumeRange( num_items, num_tiles, tile_state); } /****************************************************************************** * Dispatch ******************************************************************************/ /** * Utility class for dispatching the appropriately-tuned kernels for DeviceSpmv */ template < typename ValueT, ///< Matrix and vector value type typename OffsetT> ///< Signed integer type for global offsets struct DispatchSpmv { //--------------------------------------------------------------------- // Constants and Types //--------------------------------------------------------------------- enum { INIT_KERNEL_THREADS = 128 }; // SpmvParams bundle type typedef SpmvParams SpmvParamsT; // 2D merge path coordinate type typedef typename CubVector::Type CoordinateT; // Tile status descriptor interface type typedef ReduceByKeyScanTileState ScanTileStateT; // Tuple type for scanning (pairs accumulated segment-value with segment-index) typedef KeyValuePair KeyValuePairT; //--------------------------------------------------------------------- // Tuning policies //--------------------------------------------------------------------- /// SM11 struct Policy110 { typedef AgentSpmvPolicy< 128, 1, LOAD_DEFAULT, LOAD_DEFAULT, LOAD_DEFAULT, LOAD_DEFAULT, LOAD_DEFAULT, false, BLOCK_SCAN_WARP_SCANS> SpmvPolicyT; typedef AgentSegmentFixupPolicy< 128, 4, BLOCK_LOAD_VECTORIZE, LOAD_DEFAULT, BLOCK_SCAN_WARP_SCANS> SegmentFixupPolicyT; }; /// SM20 struct Policy200 { typedef AgentSpmvPolicy< 96, 18, LOAD_DEFAULT, LOAD_DEFAULT, LOAD_DEFAULT, LOAD_DEFAULT, LOAD_DEFAULT, false, BLOCK_SCAN_RAKING> SpmvPolicyT; typedef AgentSegmentFixupPolicy< 128, 4, BLOCK_LOAD_VECTORIZE, LOAD_DEFAULT, BLOCK_SCAN_WARP_SCANS> SegmentFixupPolicyT; }; /// SM30 struct Policy300 { typedef AgentSpmvPolicy< 96, 6, LOAD_DEFAULT, LOAD_DEFAULT, LOAD_DEFAULT, LOAD_DEFAULT, LOAD_DEFAULT, false, BLOCK_SCAN_WARP_SCANS> SpmvPolicyT; typedef AgentSegmentFixupPolicy< 128, 4, BLOCK_LOAD_VECTORIZE, LOAD_DEFAULT, BLOCK_SCAN_WARP_SCANS> SegmentFixupPolicyT; }; /// SM35 struct Policy350 { typedef AgentSpmvPolicy< (sizeof(ValueT) > 4) ? 96 : 128, (sizeof(ValueT) > 4) ? 4 : 7, LOAD_LDG, LOAD_CA, LOAD_LDG, LOAD_LDG, LOAD_LDG, (sizeof(ValueT) > 4) ? true : false, BLOCK_SCAN_WARP_SCANS> SpmvPolicyT; typedef AgentSegmentFixupPolicy< 128, 3, BLOCK_LOAD_VECTORIZE, LOAD_LDG, BLOCK_SCAN_WARP_SCANS> SegmentFixupPolicyT; }; /// SM37 struct Policy370 { typedef AgentSpmvPolicy< (sizeof(ValueT) > 4) ? 128 : 128, (sizeof(ValueT) > 4) ? 9 : 14, LOAD_LDG, LOAD_CA, LOAD_LDG, LOAD_LDG, LOAD_LDG, false, BLOCK_SCAN_WARP_SCANS> SpmvPolicyT; typedef AgentSegmentFixupPolicy< 128, 3, BLOCK_LOAD_VECTORIZE, LOAD_LDG, BLOCK_SCAN_WARP_SCANS> SegmentFixupPolicyT; }; /// SM50 struct Policy500 { typedef AgentSpmvPolicy< (sizeof(ValueT) > 4) ? 64 : 128, (sizeof(ValueT) > 4) ? 6 : 7, LOAD_LDG, LOAD_DEFAULT, (sizeof(ValueT) > 4) ? LOAD_LDG : LOAD_DEFAULT, (sizeof(ValueT) > 4) ? LOAD_LDG : LOAD_DEFAULT, LOAD_LDG, (sizeof(ValueT) > 4) ? true : false, (sizeof(ValueT) > 4) ? BLOCK_SCAN_WARP_SCANS : BLOCK_SCAN_RAKING_MEMOIZE> SpmvPolicyT; typedef AgentSegmentFixupPolicy< 128, 3, BLOCK_LOAD_VECTORIZE, LOAD_LDG, BLOCK_SCAN_RAKING_MEMOIZE> SegmentFixupPolicyT; }; //--------------------------------------------------------------------- // Tuning policies of current PTX compiler pass //--------------------------------------------------------------------- #if (CUB_PTX_ARCH >= 500) typedef Policy500 PtxPolicy; #elif (CUB_PTX_ARCH >= 370) typedef Policy370 PtxPolicy; #elif (CUB_PTX_ARCH >= 350) typedef Policy350 PtxPolicy; #elif (CUB_PTX_ARCH >= 300) typedef Policy300 PtxPolicy; #elif (CUB_PTX_ARCH >= 200) typedef Policy200 PtxPolicy; #else typedef Policy110 PtxPolicy; #endif // "Opaque" policies (whose parameterizations aren't reflected in the type signature) struct PtxSpmvPolicyT : PtxPolicy::SpmvPolicyT {}; struct PtxSegmentFixupPolicy : PtxPolicy::SegmentFixupPolicyT {}; //--------------------------------------------------------------------- // Utilities //--------------------------------------------------------------------- /** * Initialize kernel dispatch configurations with the policies corresponding to the PTX assembly we will use */ template CUB_RUNTIME_FUNCTION __forceinline__ static void InitConfigs( int ptx_version, KernelConfig &spmv_config, KernelConfig &segment_fixup_config) { #if (CUB_PTX_ARCH > 0) // We're on the device, so initialize the kernel dispatch configurations with the current PTX policy spmv_config.template Init(); segment_fixup_config.template Init(); #else // We're on the host, so lookup and initialize the kernel dispatch configurations with the policies that match the device's PTX version if (ptx_version >= 500) { spmv_config.template Init(); segment_fixup_config.template Init(); } else if (ptx_version >= 370) { spmv_config.template Init(); segment_fixup_config.template Init(); } else if (ptx_version >= 350) { spmv_config.template Init(); segment_fixup_config.template Init(); } else if (ptx_version >= 300) { spmv_config.template Init(); segment_fixup_config.template Init(); } else if (ptx_version >= 200) { spmv_config.template Init(); segment_fixup_config.template Init(); } else { spmv_config.template Init(); segment_fixup_config.template Init(); } #endif } /** * Kernel kernel dispatch configuration. */ struct KernelConfig { int block_threads; int items_per_thread; int tile_items; template CUB_RUNTIME_FUNCTION __forceinline__ void Init() { block_threads = PolicyT::BLOCK_THREADS; items_per_thread = PolicyT::ITEMS_PER_THREAD; tile_items = block_threads * items_per_thread; } }; //--------------------------------------------------------------------- // Dispatch entrypoints //--------------------------------------------------------------------- /** * Internal dispatch routine for computing a device-wide reduction using the * specified kernel functions. * * If the input is larger than a single tile, this method uses two-passes of * kernel invocations. */ template < typename Spmv1ColKernelT, ///< Function type of cub::DeviceSpmv1ColKernel typename SpmvSearchKernelT, ///< Function type of cub::AgentSpmvSearchKernel typename SpmvKernelT, ///< Function type of cub::AgentSpmvKernel typename SegmentFixupKernelT> ///< Function type of cub::DeviceSegmentFixupKernelT CUB_RUNTIME_FUNCTION __forceinline__ static cudaError_t Dispatch( void* d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t& temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation SpmvParamsT& spmv_params, ///< SpMV input parameter bundle cudaStream_t stream, ///< [in] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous, ///< [in] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is \p false. Spmv1ColKernelT spmv_1col_kernel, ///< [in] Kernel function pointer to parameterization of DeviceSpmv1ColKernel SpmvSearchKernelT spmv_search_kernel, ///< [in] Kernel function pointer to parameterization of AgentSpmvSearchKernel SpmvKernelT spmv_kernel, ///< [in] Kernel function pointer to parameterization of AgentSpmvKernel SegmentFixupKernelT segment_fixup_kernel, ///< [in] Kernel function pointer to parameterization of cub::DeviceSegmentFixupKernel KernelConfig spmv_config, ///< [in] Dispatch parameters that match the policy that \p spmv_kernel was compiled for KernelConfig segment_fixup_config) ///< [in] Dispatch parameters that match the policy that \p segment_fixup_kernel was compiled for { #ifndef CUB_RUNTIME_ENABLED // Kernel launch not supported from this device return CubDebug(cudaErrorNotSupported ); #else cudaError error = cudaSuccess; do { if (spmv_params.num_cols == 1) { if (d_temp_storage == NULL) { // Return if the caller is simply requesting the size of the storage allocation temp_storage_bytes = 1; break; } // Get search/init grid dims int degen_col_kernel_block_size = INIT_KERNEL_THREADS; int degen_col_kernel_grid_size = (spmv_params.num_rows + degen_col_kernel_block_size - 1) / degen_col_kernel_block_size; if (debug_synchronous) _CubLog("Invoking spmv_1col_kernel<<<%d, %d, 0, %lld>>>()\n", degen_col_kernel_grid_size, degen_col_kernel_block_size, (long long) stream); // Invoke spmv_search_kernel spmv_1col_kernel<<>>( spmv_params); // Check for failure to launch if (CubDebug(error = cudaPeekAtLastError())) break; // Sync the stream if specified to flush runtime errors if (debug_synchronous && (CubDebug(error = SyncStream(stream)))) break; break; } // Get device ordinal int device_ordinal; if (CubDebug(error = cudaGetDevice(&device_ordinal))) break; // Get SM count int sm_count; if (CubDebug(error = cudaDeviceGetAttribute (&sm_count, cudaDevAttrMultiProcessorCount, device_ordinal))) break; // Get max x-dimension of grid int max_dim_x; if (CubDebug(error = cudaDeviceGetAttribute(&max_dim_x, cudaDevAttrMaxGridDimX, device_ordinal))) break;; // Total number of spmv work items int num_merge_items = spmv_params.num_rows + spmv_params.num_nonzeros; // Tile sizes of kernels int merge_tile_size = spmv_config.block_threads * spmv_config.items_per_thread; int segment_fixup_tile_size = segment_fixup_config.block_threads * segment_fixup_config.items_per_thread; // Number of tiles for kernels unsigned int num_merge_tiles = (num_merge_items + merge_tile_size - 1) / merge_tile_size; unsigned int num_segment_fixup_tiles = (num_merge_tiles + segment_fixup_tile_size - 1) / segment_fixup_tile_size; // Get SM occupancy for kernels int spmv_sm_occupancy; if (CubDebug(error = MaxSmOccupancy( spmv_sm_occupancy, spmv_kernel, spmv_config.block_threads))) break; int segment_fixup_sm_occupancy; if (CubDebug(error = MaxSmOccupancy( segment_fixup_sm_occupancy, segment_fixup_kernel, segment_fixup_config.block_threads))) break; // Get grid dimensions dim3 spmv_grid_size( CUB_MIN(num_merge_tiles, max_dim_x), (num_merge_tiles + max_dim_x - 1) / max_dim_x, 1); dim3 segment_fixup_grid_size( CUB_MIN(num_segment_fixup_tiles, max_dim_x), (num_segment_fixup_tiles + max_dim_x - 1) / max_dim_x, 1); // Get the temporary storage allocation requirements size_t allocation_sizes[3]; if (CubDebug(error = ScanTileStateT::AllocationSize(num_segment_fixup_tiles, allocation_sizes[0]))) break; // bytes needed for reduce-by-key tile status descriptors allocation_sizes[1] = num_merge_tiles * sizeof(KeyValuePairT); // bytes needed for block carry-out pairs allocation_sizes[2] = (num_merge_tiles + 1) * sizeof(CoordinateT); // bytes needed for tile starting coordinates // Alias the temporary allocations from the single storage blob (or compute the necessary size of the blob) void* allocations[3]; if (CubDebug(error = AliasTemporaries(d_temp_storage, temp_storage_bytes, allocations, allocation_sizes))) break; if (d_temp_storage == NULL) { // Return if the caller is simply requesting the size of the storage allocation break; } // Construct the tile status interface ScanTileStateT tile_state; if (CubDebug(error = tile_state.Init(num_segment_fixup_tiles, allocations[0], allocation_sizes[0]))) break; // Alias the other allocations KeyValuePairT* d_tile_carry_pairs = (KeyValuePairT*) allocations[1]; // Agent carry-out pairs CoordinateT* d_tile_coordinates = (CoordinateT*) allocations[2]; // Agent starting coordinates // Get search/init grid dims int search_block_size = INIT_KERNEL_THREADS; int search_grid_size = (num_merge_tiles + 1 + search_block_size - 1) / search_block_size; #if (CUB_PTX_ARCH == 0) // Init textures if (CubDebug(error = spmv_params.t_vector_x.BindTexture(spmv_params.d_vector_x))) break; #endif if (search_grid_size < sm_count) // if (num_merge_tiles < spmv_sm_occupancy * sm_count) { // Not enough spmv tiles to saturate the device: have spmv blocks search their own staring coords d_tile_coordinates = NULL; } else { // Use separate search kernel if we have enough spmv tiles to saturate the device // Log spmv_search_kernel configuration if (debug_synchronous) _CubLog("Invoking spmv_search_kernel<<<%d, %d, 0, %lld>>>()\n", search_grid_size, search_block_size, (long long) stream); // Invoke spmv_search_kernel spmv_search_kernel<<>>( num_merge_tiles, d_tile_coordinates, spmv_params); // Check for failure to launch if (CubDebug(error = cudaPeekAtLastError())) break; // Sync the stream if specified to flush runtime errors if (debug_synchronous && (CubDebug(error = SyncStream(stream)))) break; } // Log spmv_kernel configuration if (debug_synchronous) _CubLog("Invoking spmv_kernel<<<{%d,%d,%d}, %d, 0, %lld>>>(), %d items per thread, %d SM occupancy\n", spmv_grid_size.x, spmv_grid_size.y, spmv_grid_size.z, spmv_config.block_threads, (long long) stream, spmv_config.items_per_thread, spmv_sm_occupancy); // Invoke spmv_kernel spmv_kernel<<>>( spmv_params, d_tile_coordinates, d_tile_carry_pairs, num_merge_tiles, tile_state, num_segment_fixup_tiles); // Check for failure to launch if (CubDebug(error = cudaPeekAtLastError())) break; // Sync the stream if specified to flush runtime errors if (debug_synchronous && (CubDebug(error = SyncStream(stream)))) break; // Run reduce-by-key fixup if necessary if (num_merge_tiles > 1) { // Log segment_fixup_kernel configuration if (debug_synchronous) _CubLog("Invoking segment_fixup_kernel<<<{%d,%d,%d}, %d, 0, %lld>>>(), %d items per thread, %d SM occupancy\n", segment_fixup_grid_size.x, segment_fixup_grid_size.y, segment_fixup_grid_size.z, segment_fixup_config.block_threads, (long long) stream, segment_fixup_config.items_per_thread, segment_fixup_sm_occupancy); // Invoke segment_fixup_kernel segment_fixup_kernel<<>>( d_tile_carry_pairs, spmv_params.d_vector_y, num_merge_tiles, num_segment_fixup_tiles, tile_state); // Check for failure to launch if (CubDebug(error = cudaPeekAtLastError())) break; // Sync the stream if specified to flush runtime errors if (debug_synchronous && (CubDebug(error = SyncStream(stream)))) break; } #if (CUB_PTX_ARCH == 0) // Free textures if (CubDebug(error = spmv_params.t_vector_x.UnbindTexture())) break; #endif } while (0); return error; #endif // CUB_RUNTIME_ENABLED } /** * Internal dispatch routine for computing a device-wide reduction */ CUB_RUNTIME_FUNCTION __forceinline__ static cudaError_t Dispatch( void* d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t& temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation SpmvParamsT& spmv_params, ///< SpMV input parameter bundle cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. May cause significant slowdown. Default is \p false. { cudaError error = cudaSuccess; do { // Get PTX version int ptx_version; #if (CUB_PTX_ARCH == 0) if (CubDebug(error = PtxVersion(ptx_version))) break; #else ptx_version = CUB_PTX_ARCH; #endif // Get kernel kernel dispatch configurations KernelConfig spmv_config, segment_fixup_config; InitConfigs(ptx_version, spmv_config, segment_fixup_config); if (CubDebug(error = Dispatch( d_temp_storage, temp_storage_bytes, spmv_params, stream, debug_synchronous, DeviceSpmv1ColKernel, DeviceSpmvSearchKernel, DeviceSpmvKernel, DeviceSegmentFixupKernel, spmv_config, segment_fixup_config))) break; /* // Dispatch if (spmv_params.beta == 0.0) { if (spmv_params.alpha == 1.0) { // Dispatch y = A*x if (CubDebug(error = Dispatch( d_temp_storage, temp_storage_bytes, spmv_params, stream, debug_synchronous, DeviceSpmv1ColKernel, DeviceSpmvSearchKernel, DeviceSpmvKernel, DeviceSegmentFixupKernel, spmv_config, segment_fixup_config))) break; } else { // Dispatch y = alpha*A*x if (CubDebug(error = Dispatch( d_temp_storage, temp_storage_bytes, spmv_params, stream, debug_synchronous, DeviceSpmvSearchKernel, DeviceSpmvKernel, DeviceSegmentFixupKernel, spmv_config, segment_fixup_config))) break; } } else { if (spmv_params.alpha == 1.0) { // Dispatch y = A*x + beta*y if (CubDebug(error = Dispatch( d_temp_storage, temp_storage_bytes, spmv_params, stream, debug_synchronous, DeviceSpmvSearchKernel, DeviceSpmvKernel, DeviceSegmentFixupKernel, spmv_config, segment_fixup_config))) break; } else { // Dispatch y = alpha*A*x + beta*y if (CubDebug(error = Dispatch( d_temp_storage, temp_storage_bytes, spmv_params, stream, debug_synchronous, DeviceSpmvSearchKernel, DeviceSpmvKernel, DeviceSegmentFixupKernel, spmv_config, segment_fixup_config))) break; } } */ } while (0); return error; } }; } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/gpu_utils/cub/grid/000077500000000000000000000000001411340063500173035ustar00rootroot00000000000000relion-3.1.3/src/gpu_utils/cub/grid/grid_barrier.cuh000066400000000000000000000133451411340063500224450ustar00rootroot00000000000000/****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * cub::GridBarrier implements a software global barrier among thread blocks within a CUDA grid */ #pragma once #include "../util_debug.cuh" #include "../util_namespace.cuh" #include "../thread/thread_load.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /** * \addtogroup GridModule * @{ */ /** * \brief GridBarrier implements a software global barrier among thread blocks within a CUDA grid */ class GridBarrier { protected : typedef unsigned int SyncFlag; // Counters in global device memory SyncFlag* d_sync; public: /** * Constructor */ GridBarrier() : d_sync(NULL) {} /** * Synchronize */ __device__ __forceinline__ void Sync() const { volatile SyncFlag *d_vol_sync = d_sync; // Threadfence and syncthreads to make sure global writes are visible before // thread-0 reports in with its sync counter __threadfence(); CTA_SYNC(); if (blockIdx.x == 0) { // Report in ourselves if (threadIdx.x == 0) { d_vol_sync[blockIdx.x] = 1; } CTA_SYNC(); // Wait for everyone else to report in for (int peer_block = threadIdx.x; peer_block < gridDim.x; peer_block += blockDim.x) { while (ThreadLoad(d_sync + peer_block) == 0) { __threadfence_block(); } } CTA_SYNC(); // Let everyone know it's safe to proceed for (int peer_block = threadIdx.x; peer_block < gridDim.x; peer_block += blockDim.x) { d_vol_sync[peer_block] = 0; } } else { if (threadIdx.x == 0) { // Report in d_vol_sync[blockIdx.x] = 1; // Wait for acknowledgment while (ThreadLoad(d_sync + blockIdx.x) == 1) { __threadfence_block(); } } CTA_SYNC(); } } }; /** * \brief GridBarrierLifetime extends GridBarrier to provide lifetime management of the temporary device storage needed for cooperation. * * Uses RAII for lifetime, i.e., device resources are reclaimed when * the destructor is called. */ class GridBarrierLifetime : public GridBarrier { protected: // Number of bytes backed by d_sync size_t sync_bytes; public: /** * Constructor */ GridBarrierLifetime() : GridBarrier(), sync_bytes(0) {} /** * DeviceFrees and resets the progress counters */ cudaError_t HostReset() { cudaError_t retval = cudaSuccess; if (d_sync) { CubDebug(retval = cudaFree(d_sync)); d_sync = NULL; } sync_bytes = 0; return retval; } /** * Destructor */ virtual ~GridBarrierLifetime() { HostReset(); } /** * Sets up the progress counters for the next kernel launch (lazily * allocating and initializing them if necessary) */ cudaError_t Setup(int sweep_grid_size) { cudaError_t retval = cudaSuccess; do { size_t new_sync_bytes = sweep_grid_size * sizeof(SyncFlag); if (new_sync_bytes > sync_bytes) { if (d_sync) { if (CubDebug(retval = cudaFree(d_sync))) break; } sync_bytes = new_sync_bytes; // Allocate and initialize to zero if (CubDebug(retval = cudaMalloc((void**) &d_sync, sync_bytes))) break; if (CubDebug(retval = cudaMemset(d_sync, 0, new_sync_bytes))) break; } } while (0); return retval; } }; /** @} */ // end group GridModule } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/gpu_utils/cub/grid/grid_even_share.cuh000066400000000000000000000200061411340063500231260ustar00rootroot00000000000000/****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * cub::GridEvenShare is a descriptor utility for distributing input among CUDA thread blocks in an "even-share" fashion. Each thread block gets roughly the same number of fixed-size work units (grains). */ #pragma once #include "../util_namespace.cuh" #include "../util_macro.cuh" #include "grid_mapping.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /** * \addtogroup GridModule * @{ */ /** * \brief GridEvenShare is a descriptor utility for distributing input among * CUDA thread blocks in an "even-share" fashion. Each thread block gets roughly * the same number of input tiles. * * \par Overview * Each thread block is assigned a consecutive sequence of input tiles. To help * preserve alignment and eliminate the overhead of guarded loads for all but the * last thread block, to GridEvenShare assigns one of three different amounts of * work to a given thread block: "big", "normal", or "last". The "big" workloads * are one scheduling grain larger than "normal". The "last" work unit for the * last thread block may be partially-full if the input is not an even multiple of * the scheduling grain size. * * \par * Before invoking a child grid, a parent thread will typically construct an * instance of GridEvenShare. The instance can be passed to child thread blocks * which can initialize their per-thread block offsets using \p BlockInit(). */ template struct GridEvenShare { private: OffsetT total_tiles; int big_shares; OffsetT big_share_items; OffsetT normal_share_items; OffsetT normal_base_offset; public: /// Total number of input items OffsetT num_items; /// Grid size in thread blocks int grid_size; /// OffsetT into input marking the beginning of the owning thread block's segment of input tiles OffsetT block_offset; /// OffsetT into input of marking the end (one-past) of the owning thread block's segment of input tiles OffsetT block_end; /// Stride between input tiles OffsetT block_stride; /** * \brief Constructor. */ __host__ __device__ __forceinline__ GridEvenShare() : total_tiles(0), big_shares(0), big_share_items(0), normal_share_items(0), normal_base_offset(0), num_items(0), grid_size(0), block_offset(0), block_end(0), block_stride(0) {} /** * \brief Dispatch initializer. To be called prior prior to kernel launch. */ __host__ __device__ __forceinline__ void DispatchInit( OffsetT num_items, ///< Total number of input items int max_grid_size, ///< Maximum grid size allowable (actual grid size may be less if not warranted by the the number of input items) int tile_items) ///< Number of data items per input tile { this->block_offset = num_items; // Initialize past-the-end this->block_end = num_items; // Initialize past-the-end this->num_items = num_items; this->total_tiles = (num_items + tile_items - 1) / tile_items; this->grid_size = CUB_MIN(total_tiles, max_grid_size); OffsetT avg_tiles_per_block = total_tiles / grid_size; this->big_shares = total_tiles - (avg_tiles_per_block * grid_size); // leftover grains go to big blocks this->normal_share_items = avg_tiles_per_block * tile_items; this->normal_base_offset = big_shares * tile_items; this->big_share_items = normal_share_items + tile_items; } /** * \brief Initializes ranges for the specified thread block index. Specialized * for a "raking" access pattern in which each thread block is assigned a * consecutive sequence of input tiles. */ template __device__ __forceinline__ void BlockInit( int block_id, Int2Type /*strategy_tag*/) { block_stride = TILE_ITEMS; if (block_id < big_shares) { // This thread block gets a big share of grains (avg_tiles_per_block + 1) block_offset = (block_id * big_share_items); block_end = block_offset + big_share_items; } else if (block_id < total_tiles) { // This thread block gets a normal share of grains (avg_tiles_per_block) block_offset = normal_base_offset + (block_id * normal_share_items); block_end = CUB_MIN(num_items, block_offset + normal_share_items); } // Else default past-the-end } /** * \brief Block-initialization, specialized for a "raking" access * pattern in which each thread block is assigned a consecutive sequence * of input tiles. */ template __device__ __forceinline__ void BlockInit( int block_id, Int2Type /*strategy_tag*/) { block_stride = grid_size * TILE_ITEMS; block_offset = (block_id * TILE_ITEMS); block_end = num_items; } /** * \brief Block-initialization, specialized for "strip mining" access * pattern in which the input tiles assigned to each thread block are * separated by a stride equal to the the extent of the grid. */ template < int TILE_ITEMS, GridMappingStrategy STRATEGY> __device__ __forceinline__ void BlockInit() { BlockInit(blockIdx.x, Int2Type()); } /** * \brief Block-initialization, specialized for a "raking" access * pattern in which each thread block is assigned a consecutive sequence * of input tiles. */ template __device__ __forceinline__ void BlockInit( OffsetT block_offset, ///< [in] Threadblock begin offset (inclusive) OffsetT block_end) ///< [in] Threadblock end offset (exclusive) { this->block_offset = block_offset; this->block_end = block_end; this->block_stride = TILE_ITEMS; } }; /** @} */ // end group GridModule } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/gpu_utils/cub/grid/grid_mapping.cuh000066400000000000000000000113331411340063500224450ustar00rootroot00000000000000/****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * cub::GridMappingStrategy enumerates alternative strategies for mapping constant-sized tiles of device-wide data onto a grid of CUDA thread blocks. */ #pragma once #include "../util_namespace.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /** * \addtogroup GridModule * @{ */ /****************************************************************************** * Mapping policies *****************************************************************************/ /** * \brief cub::GridMappingStrategy enumerates alternative strategies for mapping constant-sized tiles of device-wide data onto a grid of CUDA thread blocks. */ enum GridMappingStrategy { /** * \brief An a "raking" access pattern in which each thread block is * assigned a consecutive sequence of input tiles * * \par Overview * The input is evenly partitioned into \p p segments, where \p p is * constant and corresponds loosely to the number of thread blocks that may * actively reside on the target device. Each segment is comprised of * consecutive tiles, where a tile is a small, constant-sized unit of input * to be processed to completion before the thread block terminates or * obtains more work. The kernel invokes \p p thread blocks, each * of which iteratively consumes a segment of n/p elements * in tile-size increments. */ GRID_MAPPING_RAKE, /** * \brief An a "strip mining" access pattern in which the input tiles assigned * to each thread block are separated by a stride equal to the the extent of * the grid. * * \par Overview * The input is evenly partitioned into \p p sets, where \p p is * constant and corresponds loosely to the number of thread blocks that may * actively reside on the target device. Each set is comprised of * data tiles separated by stride \p tiles, where a tile is a small, * constant-sized unit of input to be processed to completion before the * thread block terminates or obtains more work. The kernel invokes \p p * thread blocks, each of which iteratively consumes a segment of * n/p elements in tile-size increments. */ GRID_MAPPING_STRIP_MINE, /** * \brief A dynamic "queue-based" strategy for assigning input tiles to thread blocks. * * \par Overview * The input is treated as a queue to be dynamically consumed by a grid of * thread blocks. Work is atomically dequeued in tiles, where a tile is a * unit of input to be processed to completion before the thread block * terminates or obtains more work. The grid size \p p is constant, * loosely corresponding to the number of thread blocks that may actively * reside on the target device. */ GRID_MAPPING_DYNAMIC, }; /** @} */ // end group GridModule } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/gpu_utils/cub/grid/grid_queue.cuh000066400000000000000000000164631411340063500221470ustar00rootroot00000000000000/****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * cub::GridQueue is a descriptor utility for dynamic queue management. */ #pragma once #include "../util_namespace.cuh" #include "../util_debug.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /** * \addtogroup GridModule * @{ */ /** * \brief GridQueue is a descriptor utility for dynamic queue management. * * \par Overview * GridQueue descriptors provides abstractions for "filling" or * "draining" globally-shared vectors. * * \par * A "filling" GridQueue works by atomically-adding to a zero-initialized counter, * returning a unique offset for the calling thread to write its items. * The GridQueue maintains the total "fill-size". The fill counter must be reset * using GridQueue::ResetFill by the host or kernel instance prior to the kernel instance that * will be filling. * * \par * Similarly, a "draining" GridQueue works by works by atomically-incrementing a * zero-initialized counter, returning a unique offset for the calling thread to * read its items. Threads can safely drain until the array's logical fill-size is * exceeded. The drain counter must be reset using GridQueue::ResetDrain or * GridQueue::FillAndResetDrain by the host or kernel instance prior to the kernel instance that * will be filling. (For dynamic work distribution of existing data, the corresponding fill-size * is simply the number of elements in the array.) * * \par * Iterative work management can be implemented simply with a pair of flip-flopping * work buffers, each with an associated set of fill and drain GridQueue descriptors. * * \tparam OffsetT Signed integer type for global offsets */ template class GridQueue { private: /// Counter indices enum { FILL = 0, DRAIN = 1, }; /// Pair of counters OffsetT *d_counters; public: /// Returns the device allocation size in bytes needed to construct a GridQueue instance __host__ __device__ __forceinline__ static size_t AllocationSize() { return sizeof(OffsetT) * 2; } /// Constructs an invalid GridQueue descriptor __host__ __device__ __forceinline__ GridQueue() : d_counters(NULL) {} /// Constructs a GridQueue descriptor around the device storage allocation __host__ __device__ __forceinline__ GridQueue( void *d_storage) ///< Device allocation to back the GridQueue. Must be at least as big as AllocationSize(). : d_counters((OffsetT*) d_storage) {} /// This operation sets the fill-size and resets the drain counter, preparing the GridQueue for draining in the next kernel instance. To be called by the host or by a kernel prior to that which will be draining. __host__ __device__ __forceinline__ cudaError_t FillAndResetDrain( OffsetT fill_size, cudaStream_t stream = 0) { #if (CUB_PTX_ARCH > 0) (void)stream; d_counters[FILL] = fill_size; d_counters[DRAIN] = 0; return cudaSuccess; #else OffsetT counters[2]; counters[FILL] = fill_size; counters[DRAIN] = 0; return CubDebug(cudaMemcpyAsync(d_counters, counters, sizeof(OffsetT) * 2, cudaMemcpyHostToDevice, stream)); #endif } /// This operation resets the drain so that it may advance to meet the existing fill-size. To be called by the host or by a kernel prior to that which will be draining. __host__ __device__ __forceinline__ cudaError_t ResetDrain(cudaStream_t stream = 0) { #if (CUB_PTX_ARCH > 0) (void)stream; d_counters[DRAIN] = 0; return cudaSuccess; #else return CubDebug(cudaMemsetAsync(d_counters + DRAIN, 0, sizeof(OffsetT), stream)); #endif } /// This operation resets the fill counter. To be called by the host or by a kernel prior to that which will be filling. __host__ __device__ __forceinline__ cudaError_t ResetFill(cudaStream_t stream = 0) { #if (CUB_PTX_ARCH > 0) (void)stream; d_counters[FILL] = 0; return cudaSuccess; #else return CubDebug(cudaMemsetAsync(d_counters + FILL, 0, sizeof(OffsetT), stream)); #endif } /// Returns the fill-size established by the parent or by the previous kernel. __host__ __device__ __forceinline__ cudaError_t FillSize( OffsetT &fill_size, cudaStream_t stream = 0) { #if (CUB_PTX_ARCH > 0) (void)stream; fill_size = d_counters[FILL]; return cudaSuccess; #else return CubDebug(cudaMemcpyAsync(&fill_size, d_counters + FILL, sizeof(OffsetT), cudaMemcpyDeviceToHost, stream)); #endif } /// Drain \p num_items from the queue. Returns offset from which to read items. To be called from CUDA kernel. __device__ __forceinline__ OffsetT Drain(OffsetT num_items) { return atomicAdd(d_counters + DRAIN, num_items); } /// Fill \p num_items into the queue. Returns offset from which to write items. To be called from CUDA kernel. __device__ __forceinline__ OffsetT Fill(OffsetT num_items) { return atomicAdd(d_counters + FILL, num_items); } }; #ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document /** * Reset grid queue (call with 1 block of 1 thread) */ template __global__ void FillAndResetDrainKernel( GridQueue grid_queue, OffsetT num_items) { grid_queue.FillAndResetDrain(num_items); } #endif // DOXYGEN_SHOULD_SKIP_THIS /** @} */ // end group GridModule } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/gpu_utils/cub/host/000077500000000000000000000000001411340063500173335ustar00rootroot00000000000000relion-3.1.3/src/gpu_utils/cub/host/mutex.cuh000066400000000000000000000107451411340063500212050ustar00rootroot00000000000000/****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * Simple portable mutex */ #pragma once #if (__cplusplus > 199711L) || (defined(_MSC_VER) && _MSC_VER >= 1800) #include #else #if defined(_WIN32) || defined(_WIN64) #include #define WIN32_LEAN_AND_MEAN #define NOMINMAX #include #undef WIN32_LEAN_AND_MEAN #undef NOMINMAX /** * Compiler read/write barrier */ #pragma intrinsic(_ReadWriteBarrier) #endif #endif #include "../util_namespace.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /** * Simple portable mutex * - Wraps std::mutex when compiled with C++11 or newer (supported on all platforms) * - Uses GNU/Windows spinlock mechanisms for pre C++11 (supported on x86/x64 when compiled with cl.exe or g++) */ struct Mutex { #if (__cplusplus > 199711L) || (defined(_MSC_VER) && _MSC_VER >= 1800) std::mutex mtx; void Lock() { mtx.lock(); } void Unlock() { mtx.unlock(); } void TryLock() { mtx.try_lock(); } #else //__cplusplus > 199711L #if defined(_MSC_VER) // Microsoft VC++ typedef long Spinlock; #else // GNU g++ typedef int Spinlock; /** * Compiler read/write barrier */ __forceinline__ void _ReadWriteBarrier() { __sync_synchronize(); } /** * Atomic exchange */ __forceinline__ long _InterlockedExchange(volatile int * const Target, const int Value) { // NOTE: __sync_lock_test_and_set would be an acquire barrier, so we force a full barrier _ReadWriteBarrier(); return __sync_lock_test_and_set(Target, Value); } /** * Pause instruction to prevent excess processor bus usage */ __forceinline__ void YieldProcessor() { } #endif // defined(_MSC_VER) /// Lock member volatile Spinlock lock; /** * Constructor */ Mutex() : lock(0) {} /** * Return when the specified spinlock has been acquired */ __forceinline__ void Lock() { while (1) { if (!_InterlockedExchange(&lock, 1)) return; while (lock) YieldProcessor(); } } /** * Release the specified spinlock */ __forceinline__ void Unlock() { _ReadWriteBarrier(); lock = 0; } #endif // __cplusplus > 199711L }; } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/gpu_utils/cub/iterator/000077500000000000000000000000001411340063500202075ustar00rootroot00000000000000relion-3.1.3/src/gpu_utils/cub/iterator/arg_index_input_iterator.cuh000066400000000000000000000211151411340063500260000ustar00rootroot00000000000000/****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * Random-access iterator types */ #pragma once #include #include #include "../thread/thread_load.cuh" #include "../thread/thread_store.cuh" #include "../util_device.cuh" #include "../util_namespace.cuh" #include #if (THRUST_VERSION >= 100700) // This iterator is compatible with Thrust API 1.7 and newer #include #include #endif // THRUST_VERSION /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /** * \addtogroup UtilIterator * @{ */ /** * \brief A random-access input wrapper for pairing dereferenced values with their corresponding indices (forming \p KeyValuePair tuples). * * \par Overview * - ArgIndexInputIteratorTwraps a random access input iterator \p itr of type \p InputIteratorT. * Dereferencing an ArgIndexInputIteratorTat offset \p i produces a \p KeyValuePair value whose * \p key field is \p i and whose \p value field is itr[i]. * - Can be used with any data type. * - Can be constructed, manipulated, and exchanged within and between host and device * functions. Wrapped host memory can only be dereferenced on the host, and wrapped * device memory can only be dereferenced on the device. * - Compatible with Thrust API v1.7 or newer. * * \par Snippet * The code snippet below illustrates the use of \p ArgIndexInputIteratorTto * dereference an array of doubles * \par * \code * #include // or equivalently * * // Declare, allocate, and initialize a device array * double *d_in; // e.g., [8.0, 6.0, 7.0, 5.0, 3.0, 0.0, 9.0] * * // Create an iterator wrapper * cub::ArgIndexInputIterator itr(d_in); * * // Within device code: * typedef typename cub::ArgIndexInputIterator::value_type Tuple; * Tuple item_offset_pair.key = *itr; * printf("%f @ %d\n", * item_offset_pair.value, * item_offset_pair.key); // 8.0 @ 0 * * itr = itr + 6; * item_offset_pair.key = *itr; * printf("%f @ %d\n", * item_offset_pair.value, * item_offset_pair.key); // 9.0 @ 6 * * \endcode * * \tparam InputIteratorT The value type of the wrapped input iterator * \tparam OffsetT The difference type of this iterator (Default: \p ptrdiff_t) * \tparam OutputValueT The paired value type of the tuple (Default: value type of input iterator) */ template < typename InputIteratorT, typename OffsetT = ptrdiff_t, typename OutputValueT = typename std::iterator_traits::value_type> class ArgIndexInputIterator { public: // Required iterator traits typedef ArgIndexInputIterator self_type; ///< My own type typedef OffsetT difference_type; ///< Type to express the result of subtracting one iterator from another typedef KeyValuePair value_type; ///< The type of the element the iterator can point to typedef value_type* pointer; ///< The type of a pointer to an element the iterator can point to typedef value_type reference; ///< The type of a reference to an element the iterator can point to #if (THRUST_VERSION >= 100700) // Use Thrust's iterator categories so we can use these iterators in Thrust 1.7 (or newer) methods typedef typename thrust::detail::iterator_facade_category< thrust::any_system_tag, thrust::random_access_traversal_tag, value_type, reference >::type iterator_category; ///< The iterator category #else typedef std::random_access_iterator_tag iterator_category; ///< The iterator category #endif // THRUST_VERSION private: InputIteratorT itr; difference_type offset; public: /// Constructor __host__ __device__ __forceinline__ ArgIndexInputIterator( InputIteratorT itr, ///< Input iterator to wrap difference_type offset = 0) ///< OffsetT (in items) from \p itr denoting the position of the iterator : itr(itr), offset(offset) {} /// Postfix increment __host__ __device__ __forceinline__ self_type operator++(int) { self_type retval = *this; offset++; return retval; } /// Prefix increment __host__ __device__ __forceinline__ self_type operator++() { offset++; return *this; } /// Indirection __host__ __device__ __forceinline__ reference operator*() const { value_type retval; retval.value = itr[offset]; retval.key = offset; return retval; } /// Addition template __host__ __device__ __forceinline__ self_type operator+(Distance n) const { self_type retval(itr, offset + n); return retval; } /// Addition assignment template __host__ __device__ __forceinline__ self_type& operator+=(Distance n) { offset += n; return *this; } /// Subtraction template __host__ __device__ __forceinline__ self_type operator-(Distance n) const { self_type retval(itr, offset - n); return retval; } /// Subtraction assignment template __host__ __device__ __forceinline__ self_type& operator-=(Distance n) { offset -= n; return *this; } /// Distance __host__ __device__ __forceinline__ difference_type operator-(self_type other) const { return offset - other.offset; } /// Array subscript template __host__ __device__ __forceinline__ reference operator[](Distance n) const { self_type offset = (*this) + n; return *offset; } /// Structure dereference __host__ __device__ __forceinline__ pointer operator->() { return &(*(*this)); } /// Equal to __host__ __device__ __forceinline__ bool operator==(const self_type& rhs) { return ((itr == rhs.itr) && (offset == rhs.offset)); } /// Not equal to __host__ __device__ __forceinline__ bool operator!=(const self_type& rhs) { return ((itr != rhs.itr) || (offset != rhs.offset)); } /// Normalize __host__ __device__ __forceinline__ void normalize() { itr += offset; offset = 0; } /// ostream operator friend std::ostream& operator<<(std::ostream& os, const self_type& /*itr*/) { return os; } }; /** @} */ // end group UtilIterator } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/gpu_utils/cub/iterator/cache_modified_input_iterator.cuh000066400000000000000000000176501411340063500267540ustar00rootroot00000000000000/****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * Random-access iterator types */ #pragma once #include #include #include "../thread/thread_load.cuh" #include "../thread/thread_store.cuh" #include "../util_device.cuh" #include "../util_namespace.cuh" #if (THRUST_VERSION >= 100700) // This iterator is compatible with Thrust API 1.7 and newer #include #include #endif // THRUST_VERSION /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /** * \addtogroup UtilIterator * @{ */ /** * \brief A random-access input wrapper for dereferencing array values using a PTX cache load modifier. * * \par Overview * - CacheModifiedInputIteratorTis a random-access input iterator that wraps a native * device pointer of type ValueType*. \p ValueType references are * made by reading \p ValueType values through loads modified by \p MODIFIER. * - Can be used to load any data type from memory using PTX cache load modifiers (e.g., "LOAD_LDG", * "LOAD_CG", "LOAD_CA", "LOAD_CS", "LOAD_CV", etc.). * - Can be constructed, manipulated, and exchanged within and between host and device * functions, but can only be dereferenced within device functions. * - Compatible with Thrust API v1.7 or newer. * * \par Snippet * The code snippet below illustrates the use of \p CacheModifiedInputIteratorTto * dereference a device array of double using the "ldg" PTX load modifier * (i.e., load values through texture cache). * \par * \code * #include // or equivalently * * // Declare, allocate, and initialize a device array * double *d_in; // e.g., [8.0, 6.0, 7.0, 5.0, 3.0, 0.0, 9.0] * * // Create an iterator wrapper * cub::CacheModifiedInputIterator itr(d_in); * * // Within device code: * printf("%f\n", itr[0]); // 8.0 * printf("%f\n", itr[1]); // 6.0 * printf("%f\n", itr[6]); // 9.0 * * \endcode * * \tparam CacheLoadModifier The cub::CacheLoadModifier to use when accessing data * \tparam ValueType The value type of this iterator * \tparam OffsetT The difference type of this iterator (Default: \p ptrdiff_t) */ template < CacheLoadModifier MODIFIER, typename ValueType, typename OffsetT = ptrdiff_t> class CacheModifiedInputIterator { public: // Required iterator traits typedef CacheModifiedInputIterator self_type; ///< My own type typedef OffsetT difference_type; ///< Type to express the result of subtracting one iterator from another typedef ValueType value_type; ///< The type of the element the iterator can point to typedef ValueType* pointer; ///< The type of a pointer to an element the iterator can point to typedef ValueType reference; ///< The type of a reference to an element the iterator can point to #if (THRUST_VERSION >= 100700) // Use Thrust's iterator categories so we can use these iterators in Thrust 1.7 (or newer) methods typedef typename thrust::detail::iterator_facade_category< thrust::device_system_tag, thrust::random_access_traversal_tag, value_type, reference >::type iterator_category; ///< The iterator category #else typedef std::random_access_iterator_tag iterator_category; ///< The iterator category #endif // THRUST_VERSION public: /// Wrapped native pointer ValueType* ptr; /// Constructor template __host__ __device__ __forceinline__ CacheModifiedInputIterator( QualifiedValueType* ptr) ///< Native pointer to wrap : ptr(const_cast::Type *>(ptr)) {} /// Postfix increment __host__ __device__ __forceinline__ self_type operator++(int) { self_type retval = *this; ptr++; return retval; } /// Prefix increment __host__ __device__ __forceinline__ self_type operator++() { ptr++; return *this; } /// Indirection __device__ __forceinline__ reference operator*() const { return ThreadLoad(ptr); } /// Addition template __host__ __device__ __forceinline__ self_type operator+(Distance n) const { self_type retval(ptr + n); return retval; } /// Addition assignment template __host__ __device__ __forceinline__ self_type& operator+=(Distance n) { ptr += n; return *this; } /// Subtraction template __host__ __device__ __forceinline__ self_type operator-(Distance n) const { self_type retval(ptr - n); return retval; } /// Subtraction assignment template __host__ __device__ __forceinline__ self_type& operator-=(Distance n) { ptr -= n; return *this; } /// Distance __host__ __device__ __forceinline__ difference_type operator-(self_type other) const { return ptr - other.ptr; } /// Array subscript template __device__ __forceinline__ reference operator[](Distance n) const { return ThreadLoad(ptr + n); } /// Structure dereference __device__ __forceinline__ pointer operator->() { return &ThreadLoad(ptr); } /// Equal to __host__ __device__ __forceinline__ bool operator==(const self_type& rhs) { return (ptr == rhs.ptr); } /// Not equal to __host__ __device__ __forceinline__ bool operator!=(const self_type& rhs) { return (ptr != rhs.ptr); } /// ostream operator friend std::ostream& operator<<(std::ostream& os, const self_type& /*itr*/) { return os; } }; /** @} */ // end group UtilIterator } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/gpu_utils/cub/iterator/cache_modified_output_iterator.cuh000066400000000000000000000202021411340063500271400ustar00rootroot00000000000000/****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * Random-access iterator types */ #pragma once #include #include #include "../thread/thread_load.cuh" #include "../thread/thread_store.cuh" #include "../util_device.cuh" #include "../util_namespace.cuh" #if (THRUST_VERSION >= 100700) // This iterator is compatible with Thrust API 1.7 and newer #include #include #endif // THRUST_VERSION /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /** * \addtogroup UtilIterator * @{ */ /** * \brief A random-access output wrapper for storing array values using a PTX cache-modifier. * * \par Overview * - CacheModifiedOutputIterator is a random-access output iterator that wraps a native * device pointer of type ValueType*. \p ValueType references are * made by writing \p ValueType values through stores modified by \p MODIFIER. * - Can be used to store any data type to memory using PTX cache store modifiers (e.g., "STORE_WB", * "STORE_CG", "STORE_CS", "STORE_WT", etc.). * - Can be constructed, manipulated, and exchanged within and between host and device * functions, but can only be dereferenced within device functions. * - Compatible with Thrust API v1.7 or newer. * * \par Snippet * The code snippet below illustrates the use of \p CacheModifiedOutputIterator to * dereference a device array of doubles using the "wt" PTX load modifier * (i.e., write-through to system memory). * \par * \code * #include // or equivalently * * // Declare, allocate, and initialize a device array * double *d_out; // e.g., [, , , , , , ] * * // Create an iterator wrapper * cub::CacheModifiedOutputIterator itr(d_out); * * // Within device code: * itr[0] = 8.0; * itr[1] = 66.0; * itr[55] = 24.0; * * \endcode * * \par Usage Considerations * - Can only be dereferenced within device code * * \tparam CacheStoreModifier The cub::CacheStoreModifier to use when accessing data * \tparam ValueType The value type of this iterator * \tparam OffsetT The difference type of this iterator (Default: \p ptrdiff_t) */ template < CacheStoreModifier MODIFIER, typename ValueType, typename OffsetT = ptrdiff_t> class CacheModifiedOutputIterator { private: // Proxy object struct Reference { ValueType* ptr; /// Constructor __host__ __device__ __forceinline__ Reference(ValueType* ptr) : ptr(ptr) {} /// Assignment __device__ __forceinline__ ValueType operator =(ValueType val) { ThreadStore(ptr, val); return val; } }; public: // Required iterator traits typedef CacheModifiedOutputIterator self_type; ///< My own type typedef OffsetT difference_type; ///< Type to express the result of subtracting one iterator from another typedef void value_type; ///< The type of the element the iterator can point to typedef void pointer; ///< The type of a pointer to an element the iterator can point to typedef Reference reference; ///< The type of a reference to an element the iterator can point to #if (THRUST_VERSION >= 100700) // Use Thrust's iterator categories so we can use these iterators in Thrust 1.7 (or newer) methods typedef typename thrust::detail::iterator_facade_category< thrust::device_system_tag, thrust::random_access_traversal_tag, value_type, reference >::type iterator_category; ///< The iterator category #else typedef std::random_access_iterator_tag iterator_category; ///< The iterator category #endif // THRUST_VERSION private: ValueType* ptr; public: /// Constructor template __host__ __device__ __forceinline__ CacheModifiedOutputIterator( QualifiedValueType* ptr) ///< Native pointer to wrap : ptr(const_cast::Type *>(ptr)) {} /// Postfix increment __host__ __device__ __forceinline__ self_type operator++(int) { self_type retval = *this; ptr++; return retval; } /// Prefix increment __host__ __device__ __forceinline__ self_type operator++() { ptr++; return *this; } /// Indirection __host__ __device__ __forceinline__ reference operator*() const { return Reference(ptr); } /// Addition template __host__ __device__ __forceinline__ self_type operator+(Distance n) const { self_type retval(ptr + n); return retval; } /// Addition assignment template __host__ __device__ __forceinline__ self_type& operator+=(Distance n) { ptr += n; return *this; } /// Subtraction template __host__ __device__ __forceinline__ self_type operator-(Distance n) const { self_type retval(ptr - n); return retval; } /// Subtraction assignment template __host__ __device__ __forceinline__ self_type& operator-=(Distance n) { ptr -= n; return *this; } /// Distance __host__ __device__ __forceinline__ difference_type operator-(self_type other) const { return ptr - other.ptr; } /// Array subscript template __host__ __device__ __forceinline__ reference operator[](Distance n) const { return Reference(ptr + n); } /// Equal to __host__ __device__ __forceinline__ bool operator==(const self_type& rhs) { return (ptr == rhs.ptr); } /// Not equal to __host__ __device__ __forceinline__ bool operator!=(const self_type& rhs) { return (ptr != rhs.ptr); } /// ostream operator friend std::ostream& operator<<(std::ostream& os, const self_type& itr) { return os; } }; /** @} */ // end group UtilIterator } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/gpu_utils/cub/iterator/constant_input_iterator.cuh000066400000000000000000000167421411340063500257030ustar00rootroot00000000000000/****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * Random-access iterator types */ #pragma once #include #include #include "../thread/thread_load.cuh" #include "../thread/thread_store.cuh" #include "../util_namespace.cuh" #if (THRUST_VERSION >= 100700) // This iterator is compatible with Thrust API 1.7 and newer #include #include #endif // THRUST_VERSION /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /** * \addtogroup UtilIterator * @{ */ /** * \brief A random-access input generator for dereferencing a sequence of homogeneous values * * \par Overview * - Read references to a ConstantInputIteratorTiterator always return the supplied constant * of type \p ValueType. * - Can be used with any data type. * - Can be constructed, manipulated, dereferenced, and exchanged within and between host and device * functions. * - Compatible with Thrust API v1.7 or newer. * * \par Snippet * The code snippet below illustrates the use of \p ConstantInputIteratorTto * dereference a sequence of homogeneous doubles. * \par * \code * #include // or equivalently * * cub::ConstantInputIterator itr(5.0); * * printf("%f\n", itr[0]); // 5.0 * printf("%f\n", itr[1]); // 5.0 * printf("%f\n", itr[2]); // 5.0 * printf("%f\n", itr[50]); // 5.0 * * \endcode * * \tparam ValueType The value type of this iterator * \tparam OffsetT The difference type of this iterator (Default: \p ptrdiff_t) */ template < typename ValueType, typename OffsetT = ptrdiff_t> class ConstantInputIterator { public: // Required iterator traits typedef ConstantInputIterator self_type; ///< My own type typedef OffsetT difference_type; ///< Type to express the result of subtracting one iterator from another typedef ValueType value_type; ///< The type of the element the iterator can point to typedef ValueType* pointer; ///< The type of a pointer to an element the iterator can point to typedef ValueType reference; ///< The type of a reference to an element the iterator can point to #if (THRUST_VERSION >= 100700) // Use Thrust's iterator categories so we can use these iterators in Thrust 1.7 (or newer) methods typedef typename thrust::detail::iterator_facade_category< thrust::any_system_tag, thrust::random_access_traversal_tag, value_type, reference >::type iterator_category; ///< The iterator category #else typedef std::random_access_iterator_tag iterator_category; ///< The iterator category #endif // THRUST_VERSION private: ValueType val; OffsetT offset; #ifdef _WIN32 OffsetT pad[CUB_MAX(1, (16 / sizeof(OffsetT) - 1))]; // Workaround for win32 parameter-passing bug (ulonglong2 argmin DeviceReduce) #endif public: /// Constructor __host__ __device__ __forceinline__ ConstantInputIterator( ValueType val, ///< Starting value for the iterator instance to report OffsetT offset = 0) ///< Base offset : val(val), offset(offset) {} /// Postfix increment __host__ __device__ __forceinline__ self_type operator++(int) { self_type retval = *this; offset++; return retval; } /// Prefix increment __host__ __device__ __forceinline__ self_type operator++() { offset++; return *this; } /// Indirection __host__ __device__ __forceinline__ reference operator*() const { return val; } /// Addition template __host__ __device__ __forceinline__ self_type operator+(Distance n) const { self_type retval(val, offset + n); return retval; } /// Addition assignment template __host__ __device__ __forceinline__ self_type& operator+=(Distance n) { offset += n; return *this; } /// Subtraction template __host__ __device__ __forceinline__ self_type operator-(Distance n) const { self_type retval(val, offset - n); return retval; } /// Subtraction assignment template __host__ __device__ __forceinline__ self_type& operator-=(Distance n) { offset -= n; return *this; } /// Distance __host__ __device__ __forceinline__ difference_type operator-(self_type other) const { return offset - other.offset; } /// Array subscript template __host__ __device__ __forceinline__ reference operator[](Distance /*n*/) const { return val; } /// Structure dereference __host__ __device__ __forceinline__ pointer operator->() { return &val; } /// Equal to __host__ __device__ __forceinline__ bool operator==(const self_type& rhs) { return (offset == rhs.offset) && ((val == rhs.val)); } /// Not equal to __host__ __device__ __forceinline__ bool operator!=(const self_type& rhs) { return (offset != rhs.offset) || (val!= rhs.val); } /// ostream operator friend std::ostream& operator<<(std::ostream& os, const self_type& itr) { os << "[" << itr.val << "," << itr.offset << "]"; return os; } }; /** @} */ // end group UtilIterator } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/gpu_utils/cub/iterator/counting_input_iterator.cuh000066400000000000000000000163111411340063500256700ustar00rootroot00000000000000/****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * Random-access iterator types */ #pragma once #include #include #include "../thread/thread_load.cuh" #include "../thread/thread_store.cuh" #include "../util_device.cuh" #include "../util_namespace.cuh" #if (THRUST_VERSION >= 100700) // This iterator is compatible with Thrust API 1.7 and newer #include #include #endif // THRUST_VERSION /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /** * \addtogroup UtilIterator * @{ */ /** * \brief A random-access input generator for dereferencing a sequence of incrementing integer values. * * \par Overview * - After initializing a CountingInputIteratorTto a certain integer \p base, read references * at \p offset will return the value \p base + \p offset. * - Can be constructed, manipulated, dereferenced, and exchanged within and between host and device * functions. * - Compatible with Thrust API v1.7 or newer. * * \par Snippet * The code snippet below illustrates the use of \p CountingInputIteratorTto * dereference a sequence of incrementing integers. * \par * \code * #include // or equivalently * * cub::CountingInputIterator itr(5); * * printf("%d\n", itr[0]); // 5 * printf("%d\n", itr[1]); // 6 * printf("%d\n", itr[2]); // 7 * printf("%d\n", itr[50]); // 55 * * \endcode * * \tparam ValueType The value type of this iterator * \tparam OffsetT The difference type of this iterator (Default: \p ptrdiff_t) */ template < typename ValueType, typename OffsetT = ptrdiff_t> class CountingInputIterator { public: // Required iterator traits typedef CountingInputIterator self_type; ///< My own type typedef OffsetT difference_type; ///< Type to express the result of subtracting one iterator from another typedef ValueType value_type; ///< The type of the element the iterator can point to typedef ValueType* pointer; ///< The type of a pointer to an element the iterator can point to typedef ValueType reference; ///< The type of a reference to an element the iterator can point to #if (THRUST_VERSION >= 100700) // Use Thrust's iterator categories so we can use these iterators in Thrust 1.7 (or newer) methods typedef typename thrust::detail::iterator_facade_category< thrust::any_system_tag, thrust::random_access_traversal_tag, value_type, reference >::type iterator_category; ///< The iterator category #else typedef std::random_access_iterator_tag iterator_category; ///< The iterator category #endif // THRUST_VERSION private: ValueType val; public: /// Constructor __host__ __device__ __forceinline__ CountingInputIterator( const ValueType &val) ///< Starting value for the iterator instance to report : val(val) {} /// Postfix increment __host__ __device__ __forceinline__ self_type operator++(int) { self_type retval = *this; val++; return retval; } /// Prefix increment __host__ __device__ __forceinline__ self_type operator++() { val++; return *this; } /// Indirection __host__ __device__ __forceinline__ reference operator*() const { return val; } /// Addition template __host__ __device__ __forceinline__ self_type operator+(Distance n) const { self_type retval(val + (ValueType) n); return retval; } /// Addition assignment template __host__ __device__ __forceinline__ self_type& operator+=(Distance n) { val += (ValueType) n; return *this; } /// Subtraction template __host__ __device__ __forceinline__ self_type operator-(Distance n) const { self_type retval(val - (ValueType) n); return retval; } /// Subtraction assignment template __host__ __device__ __forceinline__ self_type& operator-=(Distance n) { val -= n; return *this; } /// Distance __host__ __device__ __forceinline__ difference_type operator-(self_type other) const { return (difference_type) (val - other.val); } /// Array subscript template __host__ __device__ __forceinline__ reference operator[](Distance n) const { return val + (ValueType) n; } /// Structure dereference __host__ __device__ __forceinline__ pointer operator->() { return &val; } /// Equal to __host__ __device__ __forceinline__ bool operator==(const self_type& rhs) { return (val == rhs.val); } /// Not equal to __host__ __device__ __forceinline__ bool operator!=(const self_type& rhs) { return (val != rhs.val); } /// ostream operator friend std::ostream& operator<<(std::ostream& os, const self_type& itr) { os << "[" << itr.val << "]"; return os; } }; /** @} */ // end group UtilIterator } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/gpu_utils/cub/iterator/discard_output_iterator.cuh000066400000000000000000000152411411340063500256550ustar00rootroot00000000000000/****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * Random-access iterator types */ #pragma once #include #include #include "../util_namespace.cuh" #include "../util_macro.cuh" #if (THRUST_VERSION >= 100700) // This iterator is compatible with Thrust API 1.7 and newer #include #include #endif // THRUST_VERSION /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /** * \addtogroup UtilIterator * @{ */ /** * \brief A discard iterator */ template class DiscardOutputIterator { public: // Required iterator traits typedef DiscardOutputIterator self_type; ///< My own type typedef OffsetT difference_type; ///< Type to express the result of subtracting one iterator from another typedef void value_type; ///< The type of the element the iterator can point to typedef void pointer; ///< The type of a pointer to an element the iterator can point to typedef void reference; ///< The type of a reference to an element the iterator can point to #if (THRUST_VERSION >= 100700) // Use Thrust's iterator categories so we can use these iterators in Thrust 1.7 (or newer) methods typedef typename thrust::detail::iterator_facade_category< thrust::any_system_tag, thrust::random_access_traversal_tag, value_type, reference >::type iterator_category; ///< The iterator category #else typedef std::random_access_iterator_tag iterator_category; ///< The iterator category #endif // THRUST_VERSION private: OffsetT offset; #if defined(_WIN32) || !defined(_WIN64) // Workaround for win32 parameter-passing bug (ulonglong2 argmin DeviceReduce) OffsetT pad[CUB_MAX(1, (16 / sizeof(OffsetT) - 1))]; #endif public: /// Constructor __host__ __device__ __forceinline__ DiscardOutputIterator( OffsetT offset = 0) ///< Base offset : offset(offset) {} /// Postfix increment __host__ __device__ __forceinline__ self_type operator++(int) { self_type retval = *this; offset++; return retval; } /// Prefix increment __host__ __device__ __forceinline__ self_type operator++() { offset++; return *this; } /// Indirection __host__ __device__ __forceinline__ self_type& operator*() { // return self reference, which can be assigned to anything return *this; } /// Addition template __host__ __device__ __forceinline__ self_type operator+(Distance n) const { self_type retval(offset + n); return retval; } /// Addition assignment template __host__ __device__ __forceinline__ self_type& operator+=(Distance n) { offset += n; return *this; } /// Subtraction template __host__ __device__ __forceinline__ self_type operator-(Distance n) const { self_type retval(offset - n); return retval; } /// Subtraction assignment template __host__ __device__ __forceinline__ self_type& operator-=(Distance n) { offset -= n; return *this; } /// Distance __host__ __device__ __forceinline__ difference_type operator-(self_type other) const { return offset - other.offset; } /// Array subscript template __host__ __device__ __forceinline__ self_type& operator[](Distance n) { // return self reference, which can be assigned to anything return *this; } /// Structure dereference __host__ __device__ __forceinline__ pointer operator->() { return; } /// Assignment to self (no-op) __host__ __device__ __forceinline__ void operator=(self_type const& other) { offset = other.offset; } /// Assignment to anything else (no-op) template __host__ __device__ __forceinline__ void operator=(T const&) {} /// Cast to void* operator __host__ __device__ __forceinline__ operator void*() const { return NULL; } /// Equal to __host__ __device__ __forceinline__ bool operator==(const self_type& rhs) { return (offset == rhs.offset); } /// Not equal to __host__ __device__ __forceinline__ bool operator!=(const self_type& rhs) { return (offset != rhs.offset); } /// ostream operator friend std::ostream& operator<<(std::ostream& os, const self_type& itr) { os << "[" << itr.offset << "]"; return os; } }; /** @} */ // end group UtilIterator } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/gpu_utils/cub/iterator/tex_obj_input_iterator.cuh000066400000000000000000000244621411340063500255020ustar00rootroot00000000000000/****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * Random-access iterator types */ #pragma once #include #include #include "../thread/thread_load.cuh" #include "../thread/thread_store.cuh" #include "../util_device.cuh" #include "../util_debug.cuh" #include "../util_namespace.cuh" #if (THRUST_VERSION >= 100700) // This iterator is compatible with Thrust API 1.7 and newer #include #include #endif // THRUST_VERSION /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /** * \addtogroup UtilIterator * @{ */ /** * \brief A random-access input wrapper for dereferencing array values through texture cache. Uses newer Kepler-style texture objects. * * \par Overview * - TexObjInputIteratorTwraps a native device pointer of type ValueType*. References * to elements are to be loaded through texture cache. * - Can be used to load any data type from memory through texture cache. * - Can be manipulated and exchanged within and between host and device * functions, can only be constructed within host functions, and can only be * dereferenced within device functions. * - With regard to nested/dynamic parallelism, TexObjInputIteratorTiterators may only be * created by the host thread, but can be used by any descendant kernel. * - Compatible with Thrust API v1.7 or newer. * * \par Snippet * The code snippet below illustrates the use of \p TexRefInputIteratorTto * dereference a device array of doubles through texture cache. * \par * \code * #include // or equivalently * * // Declare, allocate, and initialize a device array * int num_items; // e.g., 7 * double *d_in; // e.g., [8.0, 6.0, 7.0, 5.0, 3.0, 0.0, 9.0] * * // Create an iterator wrapper * cub::TexObjInputIterator itr; * itr.BindTexture(d_in, sizeof(double) * num_items); * ... * * // Within device code: * printf("%f\n", itr[0]); // 8.0 * printf("%f\n", itr[1]); // 6.0 * printf("%f\n", itr[6]); // 9.0 * * ... * itr.UnbindTexture(); * * \endcode * * \tparam T The value type of this iterator * \tparam OffsetT The difference type of this iterator (Default: \p ptrdiff_t) */ template < typename T, typename OffsetT = ptrdiff_t> class TexObjInputIterator { public: // Required iterator traits typedef TexObjInputIterator self_type; ///< My own type typedef OffsetT difference_type; ///< Type to express the result of subtracting one iterator from another typedef T value_type; ///< The type of the element the iterator can point to typedef T* pointer; ///< The type of a pointer to an element the iterator can point to typedef T reference; ///< The type of a reference to an element the iterator can point to #if (THRUST_VERSION >= 100700) // Use Thrust's iterator categories so we can use these iterators in Thrust 1.7 (or newer) methods typedef typename thrust::detail::iterator_facade_category< thrust::device_system_tag, thrust::random_access_traversal_tag, value_type, reference >::type iterator_category; ///< The iterator category #else typedef std::random_access_iterator_tag iterator_category; ///< The iterator category #endif // THRUST_VERSION private: // Largest texture word we can use in device typedef typename UnitWord::TextureWord TextureWord; // Number of texture words per T enum { TEXTURE_MULTIPLE = sizeof(T) / sizeof(TextureWord) }; private: T* ptr; difference_type tex_offset; cudaTextureObject_t tex_obj; public: /// Constructor __host__ __device__ __forceinline__ TexObjInputIterator() : ptr(NULL), tex_offset(0), tex_obj(0) {} /// Use this iterator to bind \p ptr with a texture reference template cudaError_t BindTexture( QualifiedT *ptr, ///< Native pointer to wrap that is aligned to cudaDeviceProp::textureAlignment size_t bytes = size_t(-1), ///< Number of bytes in the range size_t tex_offset = 0) ///< OffsetT (in items) from \p ptr denoting the position of the iterator { this->ptr = const_cast::Type *>(ptr); this->tex_offset = tex_offset; cudaChannelFormatDesc channel_desc = cudaCreateChannelDesc(); cudaResourceDesc res_desc; cudaTextureDesc tex_desc; memset(&res_desc, 0, sizeof(cudaResourceDesc)); memset(&tex_desc, 0, sizeof(cudaTextureDesc)); res_desc.resType = cudaResourceTypeLinear; res_desc.res.linear.devPtr = this->ptr; res_desc.res.linear.desc = channel_desc; res_desc.res.linear.sizeInBytes = bytes; tex_desc.readMode = cudaReadModeElementType; return cudaCreateTextureObject(&tex_obj, &res_desc, &tex_desc, NULL); } /// Unbind this iterator from its texture reference cudaError_t UnbindTexture() { return cudaDestroyTextureObject(tex_obj); } /// Postfix increment __host__ __device__ __forceinline__ self_type operator++(int) { self_type retval = *this; tex_offset++; return retval; } /// Prefix increment __host__ __device__ __forceinline__ self_type operator++() { tex_offset++; return *this; } /// Indirection __host__ __device__ __forceinline__ reference operator*() const { #if (CUB_PTX_ARCH == 0) // Simply dereference the pointer on the host return ptr[tex_offset]; #else // Move array of uninitialized words, then alias and assign to return value TextureWord words[TEXTURE_MULTIPLE]; #pragma unroll for (int i = 0; i < TEXTURE_MULTIPLE; ++i) { words[i] = tex1Dfetch( tex_obj, (tex_offset * TEXTURE_MULTIPLE) + i); } // Load from words return *reinterpret_cast(words); #endif } /// Addition template __host__ __device__ __forceinline__ self_type operator+(Distance n) const { self_type retval; retval.ptr = ptr; retval.tex_obj = tex_obj; retval.tex_offset = tex_offset + n; return retval; } /// Addition assignment template __host__ __device__ __forceinline__ self_type& operator+=(Distance n) { tex_offset += n; return *this; } /// Subtraction template __host__ __device__ __forceinline__ self_type operator-(Distance n) const { self_type retval; retval.ptr = ptr; retval.tex_obj = tex_obj; retval.tex_offset = tex_offset - n; return retval; } /// Subtraction assignment template __host__ __device__ __forceinline__ self_type& operator-=(Distance n) { tex_offset -= n; return *this; } /// Distance __host__ __device__ __forceinline__ difference_type operator-(self_type other) const { return tex_offset - other.tex_offset; } /// Array subscript template __host__ __device__ __forceinline__ reference operator[](Distance n) const { self_type offset = (*this) + n; return *offset; } /// Structure dereference __host__ __device__ __forceinline__ pointer operator->() { return &(*(*this)); } /// Equal to __host__ __device__ __forceinline__ bool operator==(const self_type& rhs) { return ((ptr == rhs.ptr) && (tex_offset == rhs.tex_offset) && (tex_obj == rhs.tex_obj)); } /// Not equal to __host__ __device__ __forceinline__ bool operator!=(const self_type& rhs) { return ((ptr != rhs.ptr) || (tex_offset != rhs.tex_offset) || (tex_obj != rhs.tex_obj)); } /// ostream operator friend std::ostream& operator<<(std::ostream& os, const self_type& itr) { return os; } }; /** @} */ // end group UtilIterator } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/gpu_utils/cub/iterator/tex_ref_input_iterator.cuh000066400000000000000000000302161411340063500254760ustar00rootroot00000000000000/****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * Random-access iterator types */ #pragma once #include #include #include "../thread/thread_load.cuh" #include "../thread/thread_store.cuh" #include "../util_device.cuh" #include "../util_debug.cuh" #include "../util_namespace.cuh" #if (CUDA_VERSION >= 5050) || defined(DOXYGEN_ACTIVE) // This iterator is compatible with CUDA 5.5 and newer #if (THRUST_VERSION >= 100700) // This iterator is compatible with Thrust API 1.7 and newer #include #include #endif // THRUST_VERSION /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /****************************************************************************** * Static file-scope Tesla/Fermi-style texture references *****************************************************************************/ #ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document // Anonymous namespace namespace { /// Global texture reference specialized by type template struct IteratorTexRef { /// And by unique ID template struct TexId { // Largest texture word we can use in device typedef typename UnitWord::DeviceWord DeviceWord; typedef typename UnitWord::TextureWord TextureWord; // Number of texture words per T enum { DEVICE_MULTIPLE = sizeof(T) / sizeof(DeviceWord), TEXTURE_MULTIPLE = sizeof(T) / sizeof(TextureWord) }; // Texture reference type typedef texture TexRef; // Texture reference static TexRef ref; /// Bind texture static cudaError_t BindTexture(void *d_in, size_t &offset) { if (d_in) { cudaChannelFormatDesc tex_desc = cudaCreateChannelDesc(); ref.channelDesc = tex_desc; return (CubDebug(cudaBindTexture(&offset, ref, d_in))); } return cudaSuccess; } /// Unbind texture static cudaError_t UnbindTexture() { return CubDebug(cudaUnbindTexture(ref)); } /// Fetch element template static __device__ __forceinline__ T Fetch(Distance tex_offset) { DeviceWord temp[DEVICE_MULTIPLE]; TextureWord *words = reinterpret_cast(temp); #pragma unroll for (int i = 0; i < TEXTURE_MULTIPLE; ++i) { words[i] = tex1Dfetch(ref, (tex_offset * TEXTURE_MULTIPLE) + i); } return reinterpret_cast(temp); } }; }; // Texture reference definitions template template typename IteratorTexRef::template TexId::TexRef IteratorTexRef::template TexId::ref = 0; } // Anonymous namespace #endif // DOXYGEN_SHOULD_SKIP_THIS /** * \addtogroup UtilIterator * @{ */ /** * \brief A random-access input wrapper for dereferencing array values through texture cache. Uses older Tesla/Fermi-style texture references. * * \par Overview * - TexRefInputIteratorTwraps a native device pointer of type ValueType*. References * to elements are to be loaded through texture cache. * - Can be used to load any data type from memory through texture cache. * - Can be manipulated and exchanged within and between host and device * functions, can only be constructed within host functions, and can only be * dereferenced within device functions. * - The \p UNIQUE_ID template parameter is used to statically name the underlying texture * reference. Only one TexRefInputIteratorTinstance can be bound at any given time for a * specific combination of (1) data type \p T, (2) \p UNIQUE_ID, (3) host * thread, and (4) compilation .o unit. * - With regard to nested/dynamic parallelism, TexRefInputIteratorTiterators may only be * created by the host thread and used by a top-level kernel (i.e. the one which is launched * from the host). * - Compatible with Thrust API v1.7 or newer. * - Compatible with CUDA toolkit v5.5 or newer. * * \par Snippet * The code snippet below illustrates the use of \p TexRefInputIteratorTto * dereference a device array of doubles through texture cache. * \par * \code * #include // or equivalently * * // Declare, allocate, and initialize a device array * int num_items; // e.g., 7 * double *d_in; // e.g., [8.0, 6.0, 7.0, 5.0, 3.0, 0.0, 9.0] * * // Create an iterator wrapper * cub::TexRefInputIterator itr; * itr.BindTexture(d_in, sizeof(double) * num_items); * ... * * // Within device code: * printf("%f\n", itr[0]); // 8.0 * printf("%f\n", itr[1]); // 6.0 * printf("%f\n", itr[6]); // 9.0 * * ... * itr.UnbindTexture(); * * \endcode * * \tparam T The value type of this iterator * \tparam UNIQUE_ID A globally-unique identifier (within the compilation unit) to name the underlying texture reference * \tparam OffsetT The difference type of this iterator (Default: \p ptrdiff_t) */ template < typename T, int UNIQUE_ID, typename OffsetT = ptrdiff_t> class TexRefInputIterator { public: // Required iterator traits typedef TexRefInputIterator self_type; ///< My own type typedef OffsetT difference_type; ///< Type to express the result of subtracting one iterator from another typedef T value_type; ///< The type of the element the iterator can point to typedef T* pointer; ///< The type of a pointer to an element the iterator can point to typedef T reference; ///< The type of a reference to an element the iterator can point to #if (THRUST_VERSION >= 100700) // Use Thrust's iterator categories so we can use these iterators in Thrust 1.7 (or newer) methods typedef typename thrust::detail::iterator_facade_category< thrust::device_system_tag, thrust::random_access_traversal_tag, value_type, reference >::type iterator_category; ///< The iterator category #else typedef std::random_access_iterator_tag iterator_category; ///< The iterator category #endif // THRUST_VERSION private: T* ptr; difference_type tex_offset; // Texture reference wrapper (old Tesla/Fermi-style textures) typedef typename IteratorTexRef::template TexId TexId; public: /* /// Constructor __host__ __device__ __forceinline__ TexRefInputIterator() : ptr(NULL), tex_offset(0) {} */ /// Use this iterator to bind \p ptr with a texture reference template cudaError_t BindTexture( QualifiedT *ptr, ///< Native pointer to wrap that is aligned to cudaDeviceProp::textureAlignment size_t bytes = size_t(-1), ///< Number of bytes in the range size_t tex_offset = 0) ///< OffsetT (in items) from \p ptr denoting the position of the iterator { this->ptr = const_cast::Type *>(ptr); size_t offset; cudaError_t retval = TexId::BindTexture(this->ptr + tex_offset, offset); this->tex_offset = (difference_type) (offset / sizeof(QualifiedT)); return retval; } /// Unbind this iterator from its texture reference cudaError_t UnbindTexture() { return TexId::UnbindTexture(); } /// Postfix increment __host__ __device__ __forceinline__ self_type operator++(int) { self_type retval = *this; tex_offset++; return retval; } /// Prefix increment __host__ __device__ __forceinline__ self_type operator++() { tex_offset++; return *this; } /// Indirection __host__ __device__ __forceinline__ reference operator*() const { #if (CUB_PTX_ARCH == 0) // Simply dereference the pointer on the host return ptr[tex_offset]; #else // Use the texture reference return TexId::Fetch(tex_offset); #endif } /// Addition template __host__ __device__ __forceinline__ self_type operator+(Distance n) const { self_type retval; retval.ptr = ptr; retval.tex_offset = tex_offset + n; return retval; } /// Addition assignment template __host__ __device__ __forceinline__ self_type& operator+=(Distance n) { tex_offset += n; return *this; } /// Subtraction template __host__ __device__ __forceinline__ self_type operator-(Distance n) const { self_type retval; retval.ptr = ptr; retval.tex_offset = tex_offset - n; return retval; } /// Subtraction assignment template __host__ __device__ __forceinline__ self_type& operator-=(Distance n) { tex_offset -= n; return *this; } /// Distance __host__ __device__ __forceinline__ difference_type operator-(self_type other) const { return tex_offset - other.tex_offset; } /// Array subscript template __host__ __device__ __forceinline__ reference operator[](Distance n) const { self_type offset = (*this) + n; return *offset; } /// Structure dereference __host__ __device__ __forceinline__ pointer operator->() { return &(*(*this)); } /// Equal to __host__ __device__ __forceinline__ bool operator==(const self_type& rhs) { return ((ptr == rhs.ptr) && (tex_offset == rhs.tex_offset)); } /// Not equal to __host__ __device__ __forceinline__ bool operator!=(const self_type& rhs) { return ((ptr != rhs.ptr) || (tex_offset != rhs.tex_offset)); } /// ostream operator friend std::ostream& operator<<(std::ostream& os, const self_type& itr) { return os; } }; /** @} */ // end group UtilIterator } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) #endif // CUDA_VERSION relion-3.1.3/src/gpu_utils/cub/iterator/transform_input_iterator.cuh000066400000000000000000000206431411340063500260600ustar00rootroot00000000000000/****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * Random-access iterator types */ #pragma once #include #include #include "../thread/thread_load.cuh" #include "../thread/thread_store.cuh" #include "../util_device.cuh" #include "../util_namespace.cuh" #if (THRUST_VERSION >= 100700) // This iterator is compatible with Thrust API 1.7 and newer #include #include #endif // THRUST_VERSION /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /** * \addtogroup UtilIterator * @{ */ /** * \brief A random-access input wrapper for transforming dereferenced values. * * \par Overview * - TransformInputIteratorTwraps a unary conversion functor of type \p * ConversionOp and a random-access input iterator of type InputIteratorT, * using the former to produce references of type \p ValueType from the latter. * - Can be used with any data type. * - Can be constructed, manipulated, and exchanged within and between host and device * functions. Wrapped host memory can only be dereferenced on the host, and wrapped * device memory can only be dereferenced on the device. * - Compatible with Thrust API v1.7 or newer. * * \par Snippet * The code snippet below illustrates the use of \p TransformInputIteratorTto * dereference an array of integers, tripling the values and converting them to doubles. * \par * \code * #include // or equivalently * * // Functor for tripling integer values and converting to doubles * struct TripleDoubler * { * __host__ __device__ __forceinline__ * double operator()(const int &a) const { * return double(a * 3); * } * }; * * // Declare, allocate, and initialize a device array * int *d_in; // e.g., [8, 6, 7, 5, 3, 0, 9] * TripleDoubler conversion_op; * * // Create an iterator wrapper * cub::TransformInputIterator itr(d_in, conversion_op); * * // Within device code: * printf("%f\n", itr[0]); // 24.0 * printf("%f\n", itr[1]); // 18.0 * printf("%f\n", itr[6]); // 27.0 * * \endcode * * \tparam ValueType The value type of this iterator * \tparam ConversionOp Unary functor type for mapping objects of type \p InputType to type \p ValueType. Must have member ValueType operator()(const InputType &datum). * \tparam InputIteratorT The type of the wrapped input iterator * \tparam OffsetT The difference type of this iterator (Default: \p ptrdiff_t) * */ template < typename ValueType, typename ConversionOp, typename InputIteratorT, typename OffsetT = ptrdiff_t> class TransformInputIterator { public: // Required iterator traits typedef TransformInputIterator self_type; ///< My own type typedef OffsetT difference_type; ///< Type to express the result of subtracting one iterator from another typedef ValueType value_type; ///< The type of the element the iterator can point to typedef ValueType* pointer; ///< The type of a pointer to an element the iterator can point to typedef ValueType reference; ///< The type of a reference to an element the iterator can point to #if (THRUST_VERSION >= 100700) // Use Thrust's iterator categories so we can use these iterators in Thrust 1.7 (or newer) methods typedef typename thrust::detail::iterator_facade_category< thrust::any_system_tag, thrust::random_access_traversal_tag, value_type, reference >::type iterator_category; ///< The iterator category #else typedef std::random_access_iterator_tag iterator_category; ///< The iterator category #endif // THRUST_VERSION private: ConversionOp conversion_op; InputIteratorT input_itr; public: /// Constructor __host__ __device__ __forceinline__ TransformInputIterator( InputIteratorT input_itr, ///< Input iterator to wrap ConversionOp conversion_op) ///< Conversion functor to wrap : conversion_op(conversion_op), input_itr(input_itr) {} /// Postfix increment __host__ __device__ __forceinline__ self_type operator++(int) { self_type retval = *this; input_itr++; return retval; } /// Prefix increment __host__ __device__ __forceinline__ self_type operator++() { input_itr++; return *this; } /// Indirection __host__ __device__ __forceinline__ reference operator*() const { return conversion_op(*input_itr); } /// Addition template __host__ __device__ __forceinline__ self_type operator+(Distance n) const { self_type retval(input_itr + n, conversion_op); return retval; } /// Addition assignment template __host__ __device__ __forceinline__ self_type& operator+=(Distance n) { input_itr += n; return *this; } /// Subtraction template __host__ __device__ __forceinline__ self_type operator-(Distance n) const { self_type retval(input_itr - n, conversion_op); return retval; } /// Subtraction assignment template __host__ __device__ __forceinline__ self_type& operator-=(Distance n) { input_itr -= n; return *this; } /// Distance __host__ __device__ __forceinline__ difference_type operator-(self_type other) const { return input_itr - other.input_itr; } /// Array subscript template __host__ __device__ __forceinline__ reference operator[](Distance n) const { return conversion_op(input_itr[n]); } /// Structure dereference __host__ __device__ __forceinline__ pointer operator->() { return &conversion_op(*input_itr); } /// Equal to __host__ __device__ __forceinline__ bool operator==(const self_type& rhs) { return (input_itr == rhs.input_itr); } /// Not equal to __host__ __device__ __forceinline__ bool operator!=(const self_type& rhs) { return (input_itr != rhs.input_itr); } /// ostream operator friend std::ostream& operator<<(std::ostream& os, const self_type& itr) { return os; } }; /** @} */ // end group UtilIterator } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/gpu_utils/cub/thread/000077500000000000000000000000001411340063500176255ustar00rootroot00000000000000relion-3.1.3/src/gpu_utils/cub/thread/thread_load.cuh000066400000000000000000000446651411340063500226130ustar00rootroot00000000000000/****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * Thread utilities for reading memory using PTX cache modifiers. */ #pragma once #include #include #include "../util_ptx.cuh" #include "../util_type.cuh" #include "../util_namespace.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /** * \addtogroup UtilIo * @{ */ //----------------------------------------------------------------------------- // Tags and constants //----------------------------------------------------------------------------- /** * \brief Enumeration of cache modifiers for memory load operations. */ enum CacheLoadModifier { LOAD_DEFAULT, ///< Default (no modifier) LOAD_CA, ///< Cache at all levels LOAD_CG, ///< Cache at global level LOAD_CS, ///< Cache streaming (likely to be accessed once) LOAD_CV, ///< Cache as volatile (including cached system lines) LOAD_LDG, ///< Cache as texture LOAD_VOLATILE, ///< Volatile (any memory space) }; /** * \name Thread I/O (cache modified) * @{ */ /** * \brief Thread utility for reading memory using cub::CacheLoadModifier cache modifiers. Can be used to load any data type. * * \par Example * \code * #include // or equivalently * * // 32-bit load using cache-global modifier: * int *d_in; * int val = cub::ThreadLoad(d_in + threadIdx.x); * * // 16-bit load using default modifier * short *d_in; * short val = cub::ThreadLoad(d_in + threadIdx.x); * * // 256-bit load using cache-volatile modifier * double4 *d_in; * double4 val = cub::ThreadLoad(d_in + threadIdx.x); * * // 96-bit load using cache-streaming modifier * struct TestFoo { bool a; short b; }; * TestFoo *d_struct; * TestFoo val = cub::ThreadLoad(d_in + threadIdx.x); * \endcode * * \tparam MODIFIER [inferred] CacheLoadModifier enumeration * \tparam InputIteratorT [inferred] Input iterator type \iterator */ template < CacheLoadModifier MODIFIER, typename InputIteratorT> __device__ __forceinline__ typename std::iterator_traits::value_type ThreadLoad(InputIteratorT itr); //@} end member group #ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document /// Helper structure for templated load iteration (inductive case) template struct IterateThreadLoad { template static __device__ __forceinline__ void Load(T const *ptr, T *vals) { vals[COUNT] = ThreadLoad(ptr + COUNT); IterateThreadLoad::template Load(ptr, vals); } template static __device__ __forceinline__ void Dereference(InputIteratorT itr, T *vals) { vals[COUNT] = itr[COUNT]; IterateThreadLoad::Dereference(itr, vals); } }; /// Helper structure for templated load iteration (termination case) template struct IterateThreadLoad { template static __device__ __forceinline__ void Load(T const * /*ptr*/, T * /*vals*/) {} template static __device__ __forceinline__ void Dereference(InputIteratorT /*itr*/, T * /*vals*/) {} }; /** * Define a uint4 (16B) ThreadLoad specialization for the given Cache load modifier */ #define _CUB_LOAD_16(cub_modifier, ptx_modifier) \ template<> \ __device__ __forceinline__ uint4 ThreadLoad(uint4 const *ptr) \ { \ uint4 retval; \ asm volatile ("ld."#ptx_modifier".v4.u32 {%0, %1, %2, %3}, [%4];" : \ "=r"(retval.x), \ "=r"(retval.y), \ "=r"(retval.z), \ "=r"(retval.w) : \ _CUB_ASM_PTR_(ptr)); \ return retval; \ } \ template<> \ __device__ __forceinline__ ulonglong2 ThreadLoad(ulonglong2 const *ptr) \ { \ ulonglong2 retval; \ asm volatile ("ld."#ptx_modifier".v2.u64 {%0, %1}, [%2];" : \ "=l"(retval.x), \ "=l"(retval.y) : \ _CUB_ASM_PTR_(ptr)); \ return retval; \ } /** * Define a uint2 (8B) ThreadLoad specialization for the given Cache load modifier */ #define _CUB_LOAD_8(cub_modifier, ptx_modifier) \ template<> \ __device__ __forceinline__ ushort4 ThreadLoad(ushort4 const *ptr) \ { \ ushort4 retval; \ asm volatile ("ld."#ptx_modifier".v4.u16 {%0, %1, %2, %3}, [%4];" : \ "=h"(retval.x), \ "=h"(retval.y), \ "=h"(retval.z), \ "=h"(retval.w) : \ _CUB_ASM_PTR_(ptr)); \ return retval; \ } \ template<> \ __device__ __forceinline__ uint2 ThreadLoad(uint2 const *ptr) \ { \ uint2 retval; \ asm volatile ("ld."#ptx_modifier".v2.u32 {%0, %1}, [%2];" : \ "=r"(retval.x), \ "=r"(retval.y) : \ _CUB_ASM_PTR_(ptr)); \ return retval; \ } \ template<> \ __device__ __forceinline__ unsigned long long ThreadLoad(unsigned long long const *ptr) \ { \ unsigned long long retval; \ asm volatile ("ld."#ptx_modifier".u64 %0, [%1];" : \ "=l"(retval) : \ _CUB_ASM_PTR_(ptr)); \ return retval; \ } /** * Define a uint (4B) ThreadLoad specialization for the given Cache load modifier */ #define _CUB_LOAD_4(cub_modifier, ptx_modifier) \ template<> \ __device__ __forceinline__ unsigned int ThreadLoad(unsigned int const *ptr) \ { \ unsigned int retval; \ asm volatile ("ld."#ptx_modifier".u32 %0, [%1];" : \ "=r"(retval) : \ _CUB_ASM_PTR_(ptr)); \ return retval; \ } /** * Define a unsigned short (2B) ThreadLoad specialization for the given Cache load modifier */ #define _CUB_LOAD_2(cub_modifier, ptx_modifier) \ template<> \ __device__ __forceinline__ unsigned short ThreadLoad(unsigned short const *ptr) \ { \ unsigned short retval; \ asm volatile ("ld."#ptx_modifier".u16 %0, [%1];" : \ "=h"(retval) : \ _CUB_ASM_PTR_(ptr)); \ return retval; \ } /** * Define an unsigned char (1B) ThreadLoad specialization for the given Cache load modifier */ #define _CUB_LOAD_1(cub_modifier, ptx_modifier) \ template<> \ __device__ __forceinline__ unsigned char ThreadLoad(unsigned char const *ptr) \ { \ unsigned short retval; \ asm volatile ( \ "{" \ " .reg .u8 datum;" \ " ld."#ptx_modifier".u8 datum, [%1];" \ " cvt.u16.u8 %0, datum;" \ "}" : \ "=h"(retval) : \ _CUB_ASM_PTR_(ptr)); \ return (unsigned char) retval; \ } /** * Define powers-of-two ThreadLoad specializations for the given Cache load modifier */ #define _CUB_LOAD_ALL(cub_modifier, ptx_modifier) \ _CUB_LOAD_16(cub_modifier, ptx_modifier) \ _CUB_LOAD_8(cub_modifier, ptx_modifier) \ _CUB_LOAD_4(cub_modifier, ptx_modifier) \ _CUB_LOAD_2(cub_modifier, ptx_modifier) \ _CUB_LOAD_1(cub_modifier, ptx_modifier) \ /** * Define powers-of-two ThreadLoad specializations for the various Cache load modifiers */ #if CUB_PTX_ARCH >= 200 _CUB_LOAD_ALL(LOAD_CA, ca) _CUB_LOAD_ALL(LOAD_CG, cg) _CUB_LOAD_ALL(LOAD_CS, cs) _CUB_LOAD_ALL(LOAD_CV, cv) #else _CUB_LOAD_ALL(LOAD_CA, global) // Use volatile to ensure coherent reads when this PTX is JIT'd to run on newer architectures with L1 _CUB_LOAD_ALL(LOAD_CG, volatile.global) _CUB_LOAD_ALL(LOAD_CS, global) _CUB_LOAD_ALL(LOAD_CV, volatile.global) #endif #if CUB_PTX_ARCH >= 350 _CUB_LOAD_ALL(LOAD_LDG, global.nc) #else _CUB_LOAD_ALL(LOAD_LDG, global) #endif // Macro cleanup #undef _CUB_LOAD_ALL #undef _CUB_LOAD_1 #undef _CUB_LOAD_2 #undef _CUB_LOAD_4 #undef _CUB_LOAD_8 #undef _CUB_LOAD_16 /** * ThreadLoad definition for LOAD_DEFAULT modifier on iterator types */ template __device__ __forceinline__ typename std::iterator_traits::value_type ThreadLoad( InputIteratorT itr, Int2Type /*modifier*/, Int2Type /*is_pointer*/) { return *itr; } /** * ThreadLoad definition for LOAD_DEFAULT modifier on pointer types */ template __device__ __forceinline__ T ThreadLoad( T *ptr, Int2Type /*modifier*/, Int2Type /*is_pointer*/) { return *ptr; } /** * ThreadLoad definition for LOAD_VOLATILE modifier on primitive pointer types */ template __device__ __forceinline__ T ThreadLoadVolatilePointer( T *ptr, Int2Type /*is_primitive*/) { T retval = *reinterpret_cast(ptr); return retval; } /** * ThreadLoad definition for LOAD_VOLATILE modifier on non-primitive pointer types */ template __device__ __forceinline__ T ThreadLoadVolatilePointer( T *ptr, Int2Type /*is_primitive*/) { typedef typename UnitWord::VolatileWord VolatileWord; // Word type for memcopying const int VOLATILE_MULTIPLE = sizeof(T) / sizeof(VolatileWord); /* VolatileWord words[VOLATILE_MULTIPLE]; IterateThreadLoad<0, VOLATILE_MULTIPLE>::Dereference( reinterpret_cast(ptr), words); return *reinterpret_cast(words); */ T retval; VolatileWord *words = reinterpret_cast(&retval); IterateThreadLoad<0, VOLATILE_MULTIPLE>::Dereference( reinterpret_cast(ptr), words); return retval; } /** * ThreadLoad definition for LOAD_VOLATILE modifier on pointer types */ template __device__ __forceinline__ T ThreadLoad( T *ptr, Int2Type /*modifier*/, Int2Type /*is_pointer*/) { // Apply tags for partial-specialization return ThreadLoadVolatilePointer(ptr, Int2Type::PRIMITIVE>()); } /** * ThreadLoad definition for generic modifiers on pointer types */ template __device__ __forceinline__ T ThreadLoad( T const *ptr, Int2Type /*modifier*/, Int2Type /*is_pointer*/) { typedef typename UnitWord::DeviceWord DeviceWord; const int DEVICE_MULTIPLE = sizeof(T) / sizeof(DeviceWord); DeviceWord words[DEVICE_MULTIPLE]; IterateThreadLoad<0, DEVICE_MULTIPLE>::template Load( reinterpret_cast(const_cast(ptr)), words); return *reinterpret_cast(words); } /** * ThreadLoad definition for generic modifiers */ template < CacheLoadModifier MODIFIER, typename InputIteratorT> __device__ __forceinline__ typename std::iterator_traits::value_type ThreadLoad(InputIteratorT itr) { // Apply tags for partial-specialization return ThreadLoad( itr, Int2Type(), Int2Type::VALUE>()); } #endif // DOXYGEN_SHOULD_SKIP_THIS /** @} */ // end group UtilIo } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/gpu_utils/cub/thread/thread_operators.cuh000066400000000000000000000220141411340063500236720ustar00rootroot00000000000000/****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * Simple binary operator functor types */ /****************************************************************************** * Simple functor operators ******************************************************************************/ #pragma once #include "../util_macro.cuh" #include "../util_type.cuh" #include "../util_namespace.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /** * \addtogroup UtilModule * @{ */ /** * \brief Default equality functor */ struct Equality { /// Boolean equality operator, returns (a == b) template __host__ __device__ __forceinline__ bool operator()(const T &a, const T &b) const { return a == b; } }; /** * \brief Default inequality functor */ struct Inequality { /// Boolean inequality operator, returns (a != b) template __host__ __device__ __forceinline__ bool operator()(const T &a, const T &b) const { return a != b; } }; /** * \brief Inequality functor (wraps equality functor) */ template struct InequalityWrapper { /// Wrapped equality operator EqualityOp op; /// Constructor __host__ __device__ __forceinline__ InequalityWrapper(EqualityOp op) : op(op) {} /// Boolean inequality operator, returns (a != b) template __host__ __device__ __forceinline__ bool operator()(const T &a, const T &b) { return !op(a, b); } }; /** * \brief Default sum functor */ struct Sum { /// Boolean sum operator, returns a + b template __host__ __device__ __forceinline__ T operator()(const T &a, const T &b) const { return a + b; } }; /** * \brief Default max functor */ struct Max { /// Boolean max operator, returns (a > b) ? a : b template __host__ __device__ __forceinline__ T operator()(const T &a, const T &b) const { return CUB_MAX(a, b); } }; /** * \brief Arg max functor (keeps the value and offset of the first occurrence of the larger item) */ struct ArgMax { /// Boolean max operator, preferring the item having the smaller offset in case of ties template __host__ __device__ __forceinline__ KeyValuePair operator()( const KeyValuePair &a, const KeyValuePair &b) const { // Mooch BUG (device reduce argmax gk110 3.2 million random fp32) // return ((b.value > a.value) || ((a.value == b.value) && (b.key < a.key))) ? b : a; if ((b.value > a.value) || ((a.value == b.value) && (b.key < a.key))) return b; return a; } }; /** * \brief Default min functor */ struct Min { /// Boolean min operator, returns (a < b) ? a : b template __host__ __device__ __forceinline__ T operator()(const T &a, const T &b) const { return CUB_MIN(a, b); } }; /** * \brief Arg min functor (keeps the value and offset of the first occurrence of the smallest item) */ struct ArgMin { /// Boolean min operator, preferring the item having the smaller offset in case of ties template __host__ __device__ __forceinline__ KeyValuePair operator()( const KeyValuePair &a, const KeyValuePair &b) const { // Mooch BUG (device reduce argmax gk110 3.2 million random fp32) // return ((b.value < a.value) || ((a.value == b.value) && (b.key < a.key))) ? b : a; if ((b.value < a.value) || ((a.value == b.value) && (b.key < a.key))) return b; return a; } }; /** * \brief Default cast functor */ template struct CastOp { /// Cast operator, returns (B) a template __host__ __device__ __forceinline__ B operator()(const A &a) const { return (B) a; } }; /** * \brief Binary operator wrapper for switching non-commutative scan arguments */ template class SwizzleScanOp { private: /// Wrapped scan operator ScanOp scan_op; public: /// Constructor __host__ __device__ __forceinline__ SwizzleScanOp(ScanOp scan_op) : scan_op(scan_op) {} /// Switch the scan arguments template __host__ __device__ __forceinline__ T operator()(const T &a, const T &b) { T _a(a); T _b(b); return scan_op(_b, _a); } }; /** * \brief Reduce-by-segment functor. * * Given two cub::KeyValuePair inputs \p a and \p b and a * binary associative combining operator \p f(const T &x, const T &y), * an instance of this functor returns a cub::KeyValuePair whose \p key * field is a.key + b.key, and whose \p value field * is either b.value if b.key is non-zero, or f(a.value, b.value) otherwise. * * ReduceBySegmentOp is an associative, non-commutative binary combining operator * for input sequences of cub::KeyValuePair pairings. Such * sequences are typically used to represent a segmented set of values to be reduced * and a corresponding set of {0,1}-valued integer "head flags" demarcating the * first value of each segment. * */ template ///< Binary reduction operator to apply to values struct ReduceBySegmentOp { /// Wrapped reduction operator ReductionOpT op; /// Constructor __host__ __device__ __forceinline__ ReduceBySegmentOp() {} /// Constructor __host__ __device__ __forceinline__ ReduceBySegmentOp(ReductionOpT op) : op(op) {} /// Scan operator template ///< KeyValuePair pairing of T (value) and OffsetT (head flag) __host__ __device__ __forceinline__ KeyValuePairT operator()( const KeyValuePairT &first, ///< First partial reduction const KeyValuePairT &second) ///< Second partial reduction { KeyValuePairT retval; retval.key = first.key + second.key; retval.value = (second.key) ? second.value : // The second partial reduction spans a segment reset, so it's value aggregate becomes the running aggregate op(first.value, second.value); // The second partial reduction does not span a reset, so accumulate both into the running aggregate return retval; } }; template ///< Binary reduction operator to apply to values struct ReduceByKeyOp { /// Wrapped reduction operator ReductionOpT op; /// Constructor __host__ __device__ __forceinline__ ReduceByKeyOp() {} /// Constructor __host__ __device__ __forceinline__ ReduceByKeyOp(ReductionOpT op) : op(op) {} /// Scan operator template __host__ __device__ __forceinline__ KeyValuePairT operator()( const KeyValuePairT &first, ///< First partial reduction const KeyValuePairT &second) ///< Second partial reduction { KeyValuePairT retval = second; if (first.key == second.key) retval.value = op(first.value, retval.value); return retval; } }; /** @} */ // end group UtilModule } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/gpu_utils/cub/thread/thread_reduce.cuh000066400000000000000000000136251411340063500231330ustar00rootroot00000000000000/****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * Thread utilities for sequential reduction over statically-sized array types */ #pragma once #include "../thread/thread_operators.cuh" #include "../util_namespace.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /// Internal namespace (to prevent ADL mishaps between static functions when mixing different CUB installations) namespace internal { /** * Sequential reduction over statically-sized array types */ template < int LENGTH, typename T, typename ReductionOp> __device__ __forceinline__ T ThreadReduce( T* input, ///< [in] Input array ReductionOp reduction_op, ///< [in] Binary reduction operator T prefix, ///< [in] Prefix to seed reduction with Int2Type /*length*/) { T retval = prefix; #pragma unroll for (int i = 0; i < LENGTH; ++i) retval = reduction_op(retval, input[i]); return retval; } /** * \brief Perform a sequential reduction over \p LENGTH elements of the \p input array, seeded with the specified \p prefix. The aggregate is returned. * * \tparam LENGTH LengthT of input array * \tparam T [inferred] The data type to be reduced. * \tparam ScanOp [inferred] Binary reduction operator type having member T operator()(const T &a, const T &b) */ template < int LENGTH, typename T, typename ReductionOp> __device__ __forceinline__ T ThreadReduce( T* input, ///< [in] Input array ReductionOp reduction_op, ///< [in] Binary reduction operator T prefix) ///< [in] Prefix to seed reduction with { return ThreadReduce(input, reduction_op, prefix, Int2Type()); } /** * \brief Perform a sequential reduction over \p LENGTH elements of the \p input array. The aggregate is returned. * * \tparam LENGTH LengthT of input array * \tparam T [inferred] The data type to be reduced. * \tparam ScanOp [inferred] Binary reduction operator type having member T operator()(const T &a, const T &b) */ template < int LENGTH, typename T, typename ReductionOp> __device__ __forceinline__ T ThreadReduce( T* input, ///< [in] Input array ReductionOp reduction_op) ///< [in] Binary reduction operator { T prefix = input[0]; return ThreadReduce(input + 1, reduction_op, prefix); } /** * \brief Perform a sequential reduction over the statically-sized \p input array, seeded with the specified \p prefix. The aggregate is returned. * * \tparam LENGTH [inferred] LengthT of \p input array * \tparam T [inferred] The data type to be reduced. * \tparam ScanOp [inferred] Binary reduction operator type having member T operator()(const T &a, const T &b) */ template < int LENGTH, typename T, typename ReductionOp> __device__ __forceinline__ T ThreadReduce( T (&input)[LENGTH], ///< [in] Input array ReductionOp reduction_op, ///< [in] Binary reduction operator T prefix) ///< [in] Prefix to seed reduction with { return ThreadReduce(input, reduction_op, prefix, Int2Type()); } /** * \brief Serial reduction with the specified operator * * \tparam LENGTH [inferred] LengthT of \p input array * \tparam T [inferred] The data type to be reduced. * \tparam ScanOp [inferred] Binary reduction operator type having member T operator()(const T &a, const T &b) */ template < int LENGTH, typename T, typename ReductionOp> __device__ __forceinline__ T ThreadReduce( T (&input)[LENGTH], ///< [in] Input array ReductionOp reduction_op) ///< [in] Binary reduction operator { return ThreadReduce((T*) input, reduction_op); } } // internal namespace } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/gpu_utils/cub/thread/thread_scan.cuh000066400000000000000000000245061411340063500226100ustar00rootroot00000000000000/****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * Thread utilities for sequential prefix scan over statically-sized array types */ #pragma once #include "../thread/thread_operators.cuh" #include "../util_namespace.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /// Internal namespace (to prevent ADL mishaps between static functions when mixing different CUB installations) namespace internal { /** * \addtogroup UtilModule * @{ */ /** * \name Sequential prefix scan over statically-sized array types * @{ */ template < int LENGTH, typename T, typename ScanOp> __device__ __forceinline__ T ThreadScanExclusive( T inclusive, T exclusive, T *input, ///< [in] Input array T *output, ///< [out] Output array (may be aliased to \p input) ScanOp scan_op, ///< [in] Binary scan operator Int2Type /*length*/) { #pragma unroll for (int i = 0; i < LENGTH; ++i) { inclusive = scan_op(exclusive, input[i]); output[i] = exclusive; exclusive = inclusive; } return inclusive; } /** * \brief Perform a sequential exclusive prefix scan over \p LENGTH elements of the \p input array, seeded with the specified \p prefix. The aggregate is returned. * * \tparam LENGTH LengthT of \p input and \p output arrays * \tparam T [inferred] The data type to be scanned. * \tparam ScanOp [inferred] Binary scan operator type having member T operator()(const T &a, const T &b) */ template < int LENGTH, typename T, typename ScanOp> __device__ __forceinline__ T ThreadScanExclusive( T *input, ///< [in] Input array T *output, ///< [out] Output array (may be aliased to \p input) ScanOp scan_op, ///< [in] Binary scan operator T prefix, ///< [in] Prefix to seed scan with bool apply_prefix = true) ///< [in] Whether or not the calling thread should apply its prefix. If not, the first output element is undefined. (Handy for preventing thread-0 from applying a prefix.) { T inclusive = input[0]; if (apply_prefix) { inclusive = scan_op(prefix, inclusive); } output[0] = prefix; T exclusive = inclusive; return ThreadScanExclusive(inclusive, exclusive, input + 1, output + 1, scan_op, Int2Type()); } /** * \brief Perform a sequential exclusive prefix scan over the statically-sized \p input array, seeded with the specified \p prefix. The aggregate is returned. * * \tparam LENGTH [inferred] LengthT of \p input and \p output arrays * \tparam T [inferred] The data type to be scanned. * \tparam ScanOp [inferred] Binary scan operator type having member T operator()(const T &a, const T &b) */ template < int LENGTH, typename T, typename ScanOp> __device__ __forceinline__ T ThreadScanExclusive( T (&input)[LENGTH], ///< [in] Input array T (&output)[LENGTH], ///< [out] Output array (may be aliased to \p input) ScanOp scan_op, ///< [in] Binary scan operator T prefix, ///< [in] Prefix to seed scan with bool apply_prefix = true) ///< [in] Whether or not the calling thread should apply its prefix. (Handy for preventing thread-0 from applying a prefix.) { return ThreadScanExclusive((T*) input, (T*) output, scan_op, prefix, apply_prefix); } template < int LENGTH, typename T, typename ScanOp> __device__ __forceinline__ T ThreadScanInclusive( T inclusive, T *input, ///< [in] Input array T *output, ///< [out] Output array (may be aliased to \p input) ScanOp scan_op, ///< [in] Binary scan operator Int2Type /*length*/) { #pragma unroll for (int i = 0; i < LENGTH; ++i) { inclusive = scan_op(inclusive, input[i]); output[i] = inclusive; } return inclusive; } /** * \brief Perform a sequential inclusive prefix scan over \p LENGTH elements of the \p input array. The aggregate is returned. * * \tparam LENGTH LengthT of \p input and \p output arrays * \tparam T [inferred] The data type to be scanned. * \tparam ScanOp [inferred] Binary scan operator type having member T operator()(const T &a, const T &b) */ template < int LENGTH, typename T, typename ScanOp> __device__ __forceinline__ T ThreadScanInclusive( T *input, ///< [in] Input array T *output, ///< [out] Output array (may be aliased to \p input) ScanOp scan_op) ///< [in] Binary scan operator { T inclusive = input[0]; output[0] = inclusive; // Continue scan return ThreadScanInclusive(inclusive, input + 1, output + 1, scan_op, Int2Type()); } /** * \brief Perform a sequential inclusive prefix scan over the statically-sized \p input array. The aggregate is returned. * * \tparam LENGTH [inferred] LengthT of \p input and \p output arrays * \tparam T [inferred] The data type to be scanned. * \tparam ScanOp [inferred] Binary scan operator type having member T operator()(const T &a, const T &b) */ template < int LENGTH, typename T, typename ScanOp> __device__ __forceinline__ T ThreadScanInclusive( T (&input)[LENGTH], ///< [in] Input array T (&output)[LENGTH], ///< [out] Output array (may be aliased to \p input) ScanOp scan_op) ///< [in] Binary scan operator { return ThreadScanInclusive((T*) input, (T*) output, scan_op); } /** * \brief Perform a sequential inclusive prefix scan over \p LENGTH elements of the \p input array, seeded with the specified \p prefix. The aggregate is returned. * * \tparam LENGTH LengthT of \p input and \p output arrays * \tparam T [inferred] The data type to be scanned. * \tparam ScanOp [inferred] Binary scan operator type having member T operator()(const T &a, const T &b) */ template < int LENGTH, typename T, typename ScanOp> __device__ __forceinline__ T ThreadScanInclusive( T *input, ///< [in] Input array T *output, ///< [out] Output array (may be aliased to \p input) ScanOp scan_op, ///< [in] Binary scan operator T prefix, ///< [in] Prefix to seed scan with bool apply_prefix = true) ///< [in] Whether or not the calling thread should apply its prefix. (Handy for preventing thread-0 from applying a prefix.) { T inclusive = input[0]; if (apply_prefix) { inclusive = scan_op(prefix, inclusive); } output[0] = inclusive; // Continue scan return ThreadScanInclusive(inclusive, input + 1, output + 1, scan_op, Int2Type()); } /** * \brief Perform a sequential inclusive prefix scan over the statically-sized \p input array, seeded with the specified \p prefix. The aggregate is returned. * * \tparam LENGTH [inferred] LengthT of \p input and \p output arrays * \tparam T [inferred] The data type to be scanned. * \tparam ScanOp [inferred] Binary scan operator type having member T operator()(const T &a, const T &b) */ template < int LENGTH, typename T, typename ScanOp> __device__ __forceinline__ T ThreadScanInclusive( T (&input)[LENGTH], ///< [in] Input array T (&output)[LENGTH], ///< [out] Output array (may be aliased to \p input) ScanOp scan_op, ///< [in] Binary scan operator T prefix, ///< [in] Prefix to seed scan with bool apply_prefix = true) ///< [in] Whether or not the calling thread should apply its prefix. (Handy for preventing thread-0 from applying a prefix.) { return ThreadScanInclusive((T*) input, (T*) output, scan_op, prefix, apply_prefix); } //@} end member group /** @} */ // end group UtilModule } // internal namespace } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/gpu_utils/cub/thread/thread_search.cuh000066400000000000000000000112751411340063500231300ustar00rootroot00000000000000/****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * Thread utilities for sequential search */ #pragma once #include "../util_namespace.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /** * Computes the begin offsets into A and B for the specific diagonal */ template < typename AIteratorT, typename BIteratorT, typename OffsetT, typename CoordinateT> __host__ __device__ __forceinline__ void MergePathSearch( OffsetT diagonal, AIteratorT a, BIteratorT b, OffsetT a_len, OffsetT b_len, CoordinateT& path_coordinate) { /// The value type of the input iterator typedef typename std::iterator_traits::value_type T; OffsetT split_min = CUB_MAX(diagonal - b_len, 0); OffsetT split_max = CUB_MIN(diagonal, a_len); while (split_min < split_max) { OffsetT split_pivot = (split_min + split_max) >> 1; if (a[split_pivot] <= b[diagonal - split_pivot - 1]) { // Move candidate split range up A, down B split_min = split_pivot + 1; } else { // Move candidate split range up B, down A split_max = split_pivot; } } path_coordinate.x = CUB_MIN(split_min, a_len); path_coordinate.y = diagonal - split_min; } /** * \brief Returns the offset of the first value within \p input which does not compare less than \p val */ template < typename InputIteratorT, typename OffsetT, typename T> __device__ __forceinline__ OffsetT LowerBound( InputIteratorT input, ///< [in] Input sequence OffsetT num_items, ///< [in] Input sequence length T val) ///< [in] Search key { OffsetT retval = 0; while (num_items > 0) { OffsetT half = num_items >> 1; if (input[retval + half] < val) { retval = retval + (half + 1); num_items = num_items - (half + 1); } else { num_items = half; } } return retval; } /** * \brief Returns the offset of the first value within \p input which compares greater than \p val */ template < typename InputIteratorT, typename OffsetT, typename T> __device__ __forceinline__ OffsetT UpperBound( InputIteratorT input, ///< [in] Input sequence OffsetT num_items, ///< [in] Input sequence length T val) ///< [in] Search key { OffsetT retval = 0; while (num_items > 0) { OffsetT half = num_items >> 1; if (val < input[retval + half]) { num_items = half; } else { retval = retval + (half + 1); num_items = num_items - (half + 1); } } return retval; } } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/gpu_utils/cub/thread/thread_store.cuh000066400000000000000000000430141411340063500230130ustar00rootroot00000000000000/****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * Thread utilities for writing memory using PTX cache modifiers. */ #pragma once #include #include "../util_ptx.cuh" #include "../util_type.cuh" #include "../util_namespace.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /** * \addtogroup UtilIo * @{ */ //----------------------------------------------------------------------------- // Tags and constants //----------------------------------------------------------------------------- /** * \brief Enumeration of cache modifiers for memory store operations. */ enum CacheStoreModifier { STORE_DEFAULT, ///< Default (no modifier) STORE_WB, ///< Cache write-back all coherent levels STORE_CG, ///< Cache at global level STORE_CS, ///< Cache streaming (likely to be accessed once) STORE_WT, ///< Cache write-through (to system memory) STORE_VOLATILE, ///< Volatile shared (any memory space) }; /** * \name Thread I/O (cache modified) * @{ */ /** * \brief Thread utility for writing memory using cub::CacheStoreModifier cache modifiers. Can be used to store any data type. * * \par Example * \code * #include // or equivalently * * // 32-bit store using cache-global modifier: * int *d_out; * int val; * cub::ThreadStore(d_out + threadIdx.x, val); * * // 16-bit store using default modifier * short *d_out; * short val; * cub::ThreadStore(d_out + threadIdx.x, val); * * // 256-bit store using write-through modifier * double4 *d_out; * double4 val; * cub::ThreadStore(d_out + threadIdx.x, val); * * // 96-bit store using cache-streaming cache modifier * struct TestFoo { bool a; short b; }; * TestFoo *d_struct; * TestFoo val; * cub::ThreadStore(d_out + threadIdx.x, val); * \endcode * * \tparam MODIFIER [inferred] CacheStoreModifier enumeration * \tparam InputIteratorT [inferred] Output iterator type \iterator * \tparam T [inferred] Data type of output value */ template < CacheStoreModifier MODIFIER, typename OutputIteratorT, typename T> __device__ __forceinline__ void ThreadStore(OutputIteratorT itr, T val); //@} end member group #ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document /// Helper structure for templated store iteration (inductive case) template struct IterateThreadStore { template static __device__ __forceinline__ void Store(T *ptr, T *vals) { ThreadStore(ptr + COUNT, vals[COUNT]); IterateThreadStore::template Store(ptr, vals); } template static __device__ __forceinline__ void Dereference(OutputIteratorT ptr, T *vals) { ptr[COUNT] = vals[COUNT]; IterateThreadStore::Dereference(ptr, vals); } }; /// Helper structure for templated store iteration (termination case) template struct IterateThreadStore { template static __device__ __forceinline__ void Store(T * /*ptr*/, T * /*vals*/) {} template static __device__ __forceinline__ void Dereference(OutputIteratorT /*ptr*/, T * /*vals*/) {} }; /** * Define a uint4 (16B) ThreadStore specialization for the given Cache load modifier */ #define _CUB_STORE_16(cub_modifier, ptx_modifier) \ template<> \ __device__ __forceinline__ void ThreadStore(uint4* ptr, uint4 val) \ { \ asm volatile ("st."#ptx_modifier".v4.u32 [%0], {%1, %2, %3, %4};" : : \ _CUB_ASM_PTR_(ptr), \ "r"(val.x), \ "r"(val.y), \ "r"(val.z), \ "r"(val.w)); \ } \ template<> \ __device__ __forceinline__ void ThreadStore(ulonglong2* ptr, ulonglong2 val) \ { \ asm volatile ("st."#ptx_modifier".v2.u64 [%0], {%1, %2};" : : \ _CUB_ASM_PTR_(ptr), \ "l"(val.x), \ "l"(val.y)); \ } /** * Define a uint2 (8B) ThreadStore specialization for the given Cache load modifier */ #define _CUB_STORE_8(cub_modifier, ptx_modifier) \ template<> \ __device__ __forceinline__ void ThreadStore(ushort4* ptr, ushort4 val) \ { \ asm volatile ("st."#ptx_modifier".v4.u16 [%0], {%1, %2, %3, %4};" : : \ _CUB_ASM_PTR_(ptr), \ "h"(val.x), \ "h"(val.y), \ "h"(val.z), \ "h"(val.w)); \ } \ template<> \ __device__ __forceinline__ void ThreadStore(uint2* ptr, uint2 val) \ { \ asm volatile ("st."#ptx_modifier".v2.u32 [%0], {%1, %2};" : : \ _CUB_ASM_PTR_(ptr), \ "r"(val.x), \ "r"(val.y)); \ } \ template<> \ __device__ __forceinline__ void ThreadStore(unsigned long long* ptr, unsigned long long val) \ { \ asm volatile ("st."#ptx_modifier".u64 [%0], %1;" : : \ _CUB_ASM_PTR_(ptr), \ "l"(val)); \ } /** * Define a unsigned int (4B) ThreadStore specialization for the given Cache load modifier */ #define _CUB_STORE_4(cub_modifier, ptx_modifier) \ template<> \ __device__ __forceinline__ void ThreadStore(unsigned int* ptr, unsigned int val) \ { \ asm volatile ("st."#ptx_modifier".u32 [%0], %1;" : : \ _CUB_ASM_PTR_(ptr), \ "r"(val)); \ } /** * Define a unsigned short (2B) ThreadStore specialization for the given Cache load modifier */ #define _CUB_STORE_2(cub_modifier, ptx_modifier) \ template<> \ __device__ __forceinline__ void ThreadStore(unsigned short* ptr, unsigned short val) \ { \ asm volatile ("st."#ptx_modifier".u16 [%0], %1;" : : \ _CUB_ASM_PTR_(ptr), \ "h"(val)); \ } /** * Define a unsigned char (1B) ThreadStore specialization for the given Cache load modifier */ #define _CUB_STORE_1(cub_modifier, ptx_modifier) \ template<> \ __device__ __forceinline__ void ThreadStore(unsigned char* ptr, unsigned char val) \ { \ asm volatile ( \ "{" \ " .reg .u8 datum;" \ " cvt.u8.u16 datum, %1;" \ " st."#ptx_modifier".u8 [%0], datum;" \ "}" : : \ _CUB_ASM_PTR_(ptr), \ "h"((unsigned short) val)); \ } /** * Define powers-of-two ThreadStore specializations for the given Cache load modifier */ #define _CUB_STORE_ALL(cub_modifier, ptx_modifier) \ _CUB_STORE_16(cub_modifier, ptx_modifier) \ _CUB_STORE_8(cub_modifier, ptx_modifier) \ _CUB_STORE_4(cub_modifier, ptx_modifier) \ _CUB_STORE_2(cub_modifier, ptx_modifier) \ _CUB_STORE_1(cub_modifier, ptx_modifier) \ /** * Define ThreadStore specializations for the various Cache load modifiers */ #if CUB_PTX_ARCH >= 200 _CUB_STORE_ALL(STORE_WB, wb) _CUB_STORE_ALL(STORE_CG, cg) _CUB_STORE_ALL(STORE_CS, cs) _CUB_STORE_ALL(STORE_WT, wt) #else _CUB_STORE_ALL(STORE_WB, global) _CUB_STORE_ALL(STORE_CG, global) _CUB_STORE_ALL(STORE_CS, global) _CUB_STORE_ALL(STORE_WT, volatile.global) #endif // Macro cleanup #undef _CUB_STORE_ALL #undef _CUB_STORE_1 #undef _CUB_STORE_2 #undef _CUB_STORE_4 #undef _CUB_STORE_8 #undef _CUB_STORE_16 /** * ThreadStore definition for STORE_DEFAULT modifier on iterator types */ template __device__ __forceinline__ void ThreadStore( OutputIteratorT itr, T val, Int2Type /*modifier*/, Int2Type /*is_pointer*/) { *itr = val; } /** * ThreadStore definition for STORE_DEFAULT modifier on pointer types */ template __device__ __forceinline__ void ThreadStore( T *ptr, T val, Int2Type /*modifier*/, Int2Type /*is_pointer*/) { *ptr = val; } /** * ThreadStore definition for STORE_VOLATILE modifier on primitive pointer types */ template __device__ __forceinline__ void ThreadStoreVolatilePtr( T *ptr, T val, Int2Type /*is_primitive*/) { *reinterpret_cast(ptr) = val; } /** * ThreadStore definition for STORE_VOLATILE modifier on non-primitive pointer types */ template __device__ __forceinline__ void ThreadStoreVolatilePtr( T *ptr, T val, Int2Type /*is_primitive*/) { // Create a temporary using shuffle-words, then store using volatile-words typedef typename UnitWord::VolatileWord VolatileWord; typedef typename UnitWord::ShuffleWord ShuffleWord; const int VOLATILE_MULTIPLE = sizeof(T) / sizeof(VolatileWord); const int SHUFFLE_MULTIPLE = sizeof(T) / sizeof(ShuffleWord); VolatileWord words[VOLATILE_MULTIPLE]; #pragma unroll for (int i = 0; i < SHUFFLE_MULTIPLE; ++i) reinterpret_cast(words)[i] = reinterpret_cast(&val)[i]; IterateThreadStore<0, VOLATILE_MULTIPLE>::template Dereference( reinterpret_cast(ptr), words); } /** * ThreadStore definition for STORE_VOLATILE modifier on pointer types */ template __device__ __forceinline__ void ThreadStore( T *ptr, T val, Int2Type /*modifier*/, Int2Type /*is_pointer*/) { ThreadStoreVolatilePtr(ptr, val, Int2Type::PRIMITIVE>()); } /** * ThreadStore definition for generic modifiers on pointer types */ template __device__ __forceinline__ void ThreadStore( T *ptr, T val, Int2Type /*modifier*/, Int2Type /*is_pointer*/) { // Create a temporary using shuffle-words, then store using device-words typedef typename UnitWord::DeviceWord DeviceWord; typedef typename UnitWord::ShuffleWord ShuffleWord; const int DEVICE_MULTIPLE = sizeof(T) / sizeof(DeviceWord); const int SHUFFLE_MULTIPLE = sizeof(T) / sizeof(ShuffleWord); DeviceWord words[DEVICE_MULTIPLE]; #pragma unroll for (int i = 0; i < SHUFFLE_MULTIPLE; ++i) reinterpret_cast(words)[i] = reinterpret_cast(&val)[i]; IterateThreadStore<0, DEVICE_MULTIPLE>::template Store( reinterpret_cast(ptr), words); } /** * ThreadStore definition for generic modifiers */ template __device__ __forceinline__ void ThreadStore(OutputIteratorT itr, T val) { ThreadStore( itr, val, Int2Type(), Int2Type::VALUE>()); } #endif // DOXYGEN_SHOULD_SKIP_THIS /** @} */ // end group UtilIo } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/gpu_utils/cub/util_allocator.cuh000066400000000000000000000700541411340063500221020ustar00rootroot00000000000000/****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /****************************************************************************** * Simple caching allocator for device memory allocations. The allocator is * thread-safe and capable of managing device allocations on multiple devices. ******************************************************************************/ #pragma once #include "util_namespace.cuh" #include "util_debug.cuh" #include #include #include "host/mutex.cuh" #include /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /** * \addtogroup UtilMgmt * @{ */ /****************************************************************************** * CachingDeviceAllocator (host use) ******************************************************************************/ /** * \brief A simple caching allocator for device memory allocations. * * \par Overview * The allocator is thread-safe and stream-safe and is capable of managing cached * device allocations on multiple devices. It behaves as follows: * * \par * - Allocations from the allocator are associated with an \p active_stream. Once freed, * the allocation becomes available immediately for reuse within the \p active_stream * with which it was associated with during allocation, and it becomes available for * reuse within other streams when all prior work submitted to \p active_stream has completed. * - Allocations are categorized and cached by bin size. A new allocation request of * a given size will only consider cached allocations within the corresponding bin. * - Bin limits progress geometrically in accordance with the growth factor * \p bin_growth provided during construction. Unused device allocations within * a larger bin cache are not reused for allocation requests that categorize to * smaller bin sizes. * - Allocation requests below (\p bin_growth ^ \p min_bin) are rounded up to * (\p bin_growth ^ \p min_bin). * - Allocations above (\p bin_growth ^ \p max_bin) are not rounded up to the nearest * bin and are simply freed when they are deallocated instead of being returned * to a bin-cache. * - %If the total storage of cached allocations on a given device will exceed * \p max_cached_bytes, allocations for that device are simply freed when they are * deallocated instead of being returned to their bin-cache. * * \par * For example, the default-constructed CachingDeviceAllocator is configured with: * - \p bin_growth = 8 * - \p min_bin = 3 * - \p max_bin = 7 * - \p max_cached_bytes = 6MB - 1B * * \par * which delineates five bin-sizes: 512B, 4KB, 32KB, 256KB, and 2MB * and sets a maximum of 6,291,455 cached bytes per device * */ struct CachingDeviceAllocator { //--------------------------------------------------------------------- // Constants //--------------------------------------------------------------------- /// Out-of-bounds bin static const unsigned int INVALID_BIN = (unsigned int) -1; /// Invalid size static const size_t INVALID_SIZE = (size_t) -1; #ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document /// Invalid device ordinal static const int INVALID_DEVICE_ORDINAL = -1; //--------------------------------------------------------------------- // Type definitions and helper types //--------------------------------------------------------------------- /** * Descriptor for device memory allocations */ struct BlockDescriptor { void* d_ptr; // Device pointer size_t bytes; // Size of allocation in bytes unsigned int bin; // Bin enumeration int device; // device ordinal cudaStream_t associated_stream; // Associated associated_stream cudaEvent_t ready_event; // Signal when associated stream has run to the point at which this block was freed // Constructor (suitable for searching maps for a specific block, given its pointer and device) BlockDescriptor(void *d_ptr, int device) : d_ptr(d_ptr), bytes(0), bin(INVALID_BIN), device(device), associated_stream(0), ready_event(0) {} // Constructor (suitable for searching maps for a range of suitable blocks, given a device) BlockDescriptor(int device) : d_ptr(NULL), bytes(0), bin(INVALID_BIN), device(device), associated_stream(0), ready_event(0) {} // Comparison functor for comparing device pointers static bool PtrCompare(const BlockDescriptor &a, const BlockDescriptor &b) { if (a.device == b.device) return (a.d_ptr < b.d_ptr); else return (a.device < b.device); } // Comparison functor for comparing allocation sizes static bool SizeCompare(const BlockDescriptor &a, const BlockDescriptor &b) { if (a.device == b.device) return (a.bytes < b.bytes); else return (a.device < b.device); } }; /// BlockDescriptor comparator function interface typedef bool (*Compare)(const BlockDescriptor &, const BlockDescriptor &); class TotalBytes { public: size_t free; size_t live; TotalBytes() { free = live = 0; } }; /// Set type for cached blocks (ordered by size) typedef std::multiset CachedBlocks; /// Set type for live blocks (ordered by ptr) typedef std::multiset BusyBlocks; /// Map type of device ordinals to the number of cached bytes cached by each device typedef std::map GpuCachedBytes; //--------------------------------------------------------------------- // Utility functions //--------------------------------------------------------------------- /** * Integer pow function for unsigned base and exponent */ static unsigned int IntPow( unsigned int base, unsigned int exp) { unsigned int retval = 1; while (exp > 0) { if (exp & 1) { retval = retval * base; // multiply the result by the current base } base = base * base; // square the base exp = exp >> 1; // divide the exponent in half } return retval; } /** * Round up to the nearest power-of */ void NearestPowerOf( unsigned int &power, size_t &rounded_bytes, unsigned int base, size_t value) { power = 0; rounded_bytes = 1; if (value * base < value) { // Overflow power = sizeof(size_t) * 8; rounded_bytes = size_t(0) - 1; return; } while (rounded_bytes < value) { rounded_bytes *= base; power++; } } //--------------------------------------------------------------------- // Fields //--------------------------------------------------------------------- cub::Mutex mutex; /// Mutex for thread-safety unsigned int bin_growth; /// Geometric growth factor for bin-sizes unsigned int min_bin; /// Minimum bin enumeration unsigned int max_bin; /// Maximum bin enumeration size_t min_bin_bytes; /// Minimum bin size size_t max_bin_bytes; /// Maximum bin size size_t max_cached_bytes; /// Maximum aggregate cached bytes per device const bool skip_cleanup; /// Whether or not to skip a call to FreeAllCached() when destructor is called. (The CUDA runtime may have already shut down for statically declared allocators) bool debug; /// Whether or not to print (de)allocation events to stdout GpuCachedBytes cached_bytes; /// Map of device ordinal to aggregate cached bytes on that device CachedBlocks cached_blocks; /// Set of cached device allocations available for reuse BusyBlocks live_blocks; /// Set of live device allocations currently in use #endif // DOXYGEN_SHOULD_SKIP_THIS //--------------------------------------------------------------------- // Methods //--------------------------------------------------------------------- /** * \brief Constructor. */ CachingDeviceAllocator( unsigned int bin_growth, ///< Geometric growth factor for bin-sizes unsigned int min_bin = 1, ///< Minimum bin (default is bin_growth ^ 1) unsigned int max_bin = INVALID_BIN, ///< Maximum bin (default is no max bin) size_t max_cached_bytes = INVALID_SIZE, ///< Maximum aggregate cached bytes per device (default is no limit) bool skip_cleanup = false, ///< Whether or not to skip a call to \p FreeAllCached() when the destructor is called (default is to deallocate) bool debug = false) ///< Whether or not to print (de)allocation events to stdout (default is no stderr output) : bin_growth(bin_growth), min_bin(min_bin), max_bin(max_bin), min_bin_bytes(IntPow(bin_growth, min_bin)), max_bin_bytes(IntPow(bin_growth, max_bin)), max_cached_bytes(max_cached_bytes), skip_cleanup(skip_cleanup), debug(debug), cached_blocks(BlockDescriptor::SizeCompare), live_blocks(BlockDescriptor::PtrCompare) {} /** * \brief Default constructor. * * Configured with: * \par * - \p bin_growth = 8 * - \p min_bin = 3 * - \p max_bin = 7 * - \p max_cached_bytes = (\p bin_growth ^ \p max_bin) * 3) - 1 = 6,291,455 bytes * * which delineates five bin-sizes: 512B, 4KB, 32KB, 256KB, and 2MB and * sets a maximum of 6,291,455 cached bytes per device */ CachingDeviceAllocator( bool skip_cleanup = false, bool debug = false) : bin_growth(8), min_bin(3), max_bin(7), min_bin_bytes(IntPow(bin_growth, min_bin)), max_bin_bytes(IntPow(bin_growth, max_bin)), max_cached_bytes((max_bin_bytes * 3) - 1), skip_cleanup(skip_cleanup), debug(debug), cached_blocks(BlockDescriptor::SizeCompare), live_blocks(BlockDescriptor::PtrCompare) {} /** * \brief Sets the limit on the number bytes this allocator is allowed to cache per device. * * Changing the ceiling of cached bytes does not cause any allocations (in-use or * cached-in-reserve) to be freed. See \p FreeAllCached(). */ cudaError_t SetMaxCachedBytes( size_t max_cached_bytes) { // Lock mutex.Lock(); if (debug) _CubLog("Changing max_cached_bytes (%lld -> %lld)\n", (long long) this->max_cached_bytes, (long long) max_cached_bytes); this->max_cached_bytes = max_cached_bytes; // Unlock mutex.Unlock(); return cudaSuccess; } /** * \brief Provides a suitable allocation of device memory for the given size on the specified device. * * Once freed, the allocation becomes available immediately for reuse within the \p active_stream * with which it was associated with during allocation, and it becomes available for reuse within other * streams when all prior work submitted to \p active_stream has completed. */ cudaError_t DeviceAllocate( int device, ///< [in] Device on which to place the allocation void **d_ptr, ///< [out] Reference to pointer to the allocation size_t bytes, ///< [in] Minimum number of bytes for the allocation cudaStream_t active_stream = 0) ///< [in] The stream to be associated with this allocation { *d_ptr = NULL; int entrypoint_device = INVALID_DEVICE_ORDINAL; cudaError_t error = cudaSuccess; if (device == INVALID_DEVICE_ORDINAL) { if (CubDebug(error = cudaGetDevice(&entrypoint_device))) return error; device = entrypoint_device; } // Create a block descriptor for the requested allocation bool found = false; BlockDescriptor search_key(device); search_key.associated_stream = active_stream; NearestPowerOf(search_key.bin, search_key.bytes, bin_growth, bytes); if (search_key.bin > max_bin) { // Bin is greater than our maximum bin: allocate the request // exactly and give out-of-bounds bin. It will not be cached // for reuse when returned. search_key.bin = INVALID_BIN; search_key.bytes = bytes; } else { // Search for a suitable cached allocation: lock mutex.Lock(); if (search_key.bin < min_bin) { // Bin is less than minimum bin: round up search_key.bin = min_bin; search_key.bytes = min_bin_bytes; } // Iterate through the range of cached blocks on the same device in the same bin CachedBlocks::iterator block_itr = cached_blocks.lower_bound(search_key); while ((block_itr != cached_blocks.end()) && (block_itr->device == device) && (block_itr->bin == search_key.bin)) { // To prevent races with reusing blocks returned by the host but still // in use by the device, only consider cached blocks that are // either (from the active stream) or (from an idle stream) if ((active_stream == block_itr->associated_stream) || (cudaEventQuery(block_itr->ready_event) != cudaErrorNotReady)) { // Reuse existing cache block. Insert into live blocks. found = true; search_key = *block_itr; search_key.associated_stream = active_stream; live_blocks.insert(search_key); // Remove from free blocks cached_bytes[device].free -= search_key.bytes; cached_bytes[device].live += search_key.bytes; if (debug) _CubLog("\tDevice %d reused cached block at %p (%lld bytes) for stream %lld (previously associated with stream %lld).\n", device, search_key.d_ptr, (long long) search_key.bytes, (long long) search_key.associated_stream, (long long) block_itr->associated_stream); cached_blocks.erase(block_itr); break; } block_itr++; } // Done searching: unlock mutex.Unlock(); } // Allocate the block if necessary if (!found) { // Set runtime's current device to specified device (entrypoint may not be set) if (device != entrypoint_device) { if (CubDebug(error = cudaGetDevice(&entrypoint_device))) return error; if (CubDebug(error = cudaSetDevice(device))) return error; } // Attempt to allocate if (CubDebug(error = cudaMalloc(&search_key.d_ptr, search_key.bytes)) == cudaErrorMemoryAllocation) { // The allocation attempt failed: free all cached blocks on device and retry if (debug) _CubLog("\tDevice %d failed to allocate %lld bytes for stream %lld, retrying after freeing cached allocations", device, (long long) search_key.bytes, (long long) search_key.associated_stream); error = cudaSuccess; // Reset the error we will return cudaGetLastError(); // Reset CUDART's error // Lock mutex.Lock(); // Iterate the range of free blocks on the same device BlockDescriptor free_key(device); CachedBlocks::iterator block_itr = cached_blocks.lower_bound(free_key); while ((block_itr != cached_blocks.end()) && (block_itr->device == device)) { // No need to worry about synchronization with the device: cudaFree is // blocking and will synchronize across all kernels executing // on the current device // Free device memory and destroy stream event. if (CubDebug(error = cudaFree(block_itr->d_ptr))) break; if (CubDebug(error = cudaEventDestroy(block_itr->ready_event))) break; // Reduce balance and erase entry cached_bytes[device].free -= block_itr->bytes; if (debug) _CubLog("\tDevice %d freed %lld bytes.\n\t\t %lld available blocks cached (%lld bytes), %lld live blocks (%lld bytes) outstanding.\n", device, (long long) block_itr->bytes, (long long) cached_blocks.size(), (long long) cached_bytes[device].free, (long long) live_blocks.size(), (long long) cached_bytes[device].live); cached_blocks.erase(block_itr); block_itr++; } // Unlock mutex.Unlock(); // Return under error if (error) return error; // Try to allocate again if (CubDebug(error = cudaMalloc(&search_key.d_ptr, search_key.bytes))) return error; } // Create ready event if (CubDebug(error = cudaEventCreateWithFlags(&search_key.ready_event, cudaEventDisableTiming))) return error; // Insert into live blocks mutex.Lock(); live_blocks.insert(search_key); cached_bytes[device].live += search_key.bytes; mutex.Unlock(); if (debug) _CubLog("\tDevice %d allocated new device block at %p (%lld bytes associated with stream %lld).\n", device, search_key.d_ptr, (long long) search_key.bytes, (long long) search_key.associated_stream); // Attempt to revert back to previous device if necessary if ((entrypoint_device != INVALID_DEVICE_ORDINAL) && (entrypoint_device != device)) { if (CubDebug(error = cudaSetDevice(entrypoint_device))) return error; } } // Copy device pointer to output parameter *d_ptr = search_key.d_ptr; if (debug) _CubLog("\t\t%lld available blocks cached (%lld bytes), %lld live blocks outstanding(%lld bytes).\n", (long long) cached_blocks.size(), (long long) cached_bytes[device].free, (long long) live_blocks.size(), (long long) cached_bytes[device].live); return error; } /** * \brief Provides a suitable allocation of device memory for the given size on the current device. * * Once freed, the allocation becomes available immediately for reuse within the \p active_stream * with which it was associated with during allocation, and it becomes available for reuse within other * streams when all prior work submitted to \p active_stream has completed. */ cudaError_t DeviceAllocate( void **d_ptr, ///< [out] Reference to pointer to the allocation size_t bytes, ///< [in] Minimum number of bytes for the allocation cudaStream_t active_stream = 0) ///< [in] The stream to be associated with this allocation { return DeviceAllocate(INVALID_DEVICE_ORDINAL, d_ptr, bytes, active_stream); } /** * \brief Frees a live allocation of device memory on the specified device, returning it to the allocator. * * Once freed, the allocation becomes available immediately for reuse within the \p active_stream * with which it was associated with during allocation, and it becomes available for reuse within other * streams when all prior work submitted to \p active_stream has completed. */ cudaError_t DeviceFree( int device, void* d_ptr) { int entrypoint_device = INVALID_DEVICE_ORDINAL; cudaError_t error = cudaSuccess; if (device == INVALID_DEVICE_ORDINAL) { if (CubDebug(error = cudaGetDevice(&entrypoint_device))) return error; device = entrypoint_device; } // Lock mutex.Lock(); // Find corresponding block descriptor bool recached = false; BlockDescriptor search_key(d_ptr, device); BusyBlocks::iterator block_itr = live_blocks.find(search_key); if (block_itr != live_blocks.end()) { // Remove from live blocks search_key = *block_itr; live_blocks.erase(block_itr); cached_bytes[device].live -= search_key.bytes; // Keep the returned allocation if bin is valid and we won't exceed the max cached threshold if ((search_key.bin != INVALID_BIN) && (cached_bytes[device].free + search_key.bytes <= max_cached_bytes)) { // Insert returned allocation into free blocks recached = true; cached_blocks.insert(search_key); cached_bytes[device].free += search_key.bytes; if (debug) _CubLog("\tDevice %d returned %lld bytes from associated stream %lld.\n\t\t %lld available blocks cached (%lld bytes), %lld live blocks outstanding. (%lld bytes)\n", device, (long long) search_key.bytes, (long long) search_key.associated_stream, (long long) cached_blocks.size(), (long long) cached_bytes[device].free, (long long) live_blocks.size(), (long long) cached_bytes[device].live); } } // Unlock mutex.Unlock(); // First set to specified device (entrypoint may not be set) if (device != entrypoint_device) { if (CubDebug(error = cudaGetDevice(&entrypoint_device))) return error; if (CubDebug(error = cudaSetDevice(device))) return error; } if (recached) { // Insert the ready event in the associated stream (must have current device set properly) if (CubDebug(error = cudaEventRecord(search_key.ready_event, search_key.associated_stream))) return error; } else { // Free the allocation from the runtime and cleanup the event. if (CubDebug(error = cudaFree(d_ptr))) return error; if (CubDebug(error = cudaEventDestroy(search_key.ready_event))) return error; if (debug) _CubLog("\tDevice %d freed %lld bytes from associated stream %lld.\n\t\t %lld available blocks cached (%lld bytes), %lld live blocks (%lld bytes) outstanding.\n", device, (long long) search_key.bytes, (long long) search_key.associated_stream, (long long) cached_blocks.size(), (long long) cached_bytes[device].free, (long long) live_blocks.size(), (long long) cached_bytes[device].live); } // Reset device if ((entrypoint_device != INVALID_DEVICE_ORDINAL) && (entrypoint_device != device)) { if (CubDebug(error = cudaSetDevice(entrypoint_device))) return error; } return error; } /** * \brief Frees a live allocation of device memory on the current device, returning it to the allocator. * * Once freed, the allocation becomes available immediately for reuse within the \p active_stream * with which it was associated with during allocation, and it becomes available for reuse within other * streams when all prior work submitted to \p active_stream has completed. */ cudaError_t DeviceFree( void* d_ptr) { return DeviceFree(INVALID_DEVICE_ORDINAL, d_ptr); } /** * \brief Frees all cached device allocations on all devices */ cudaError_t FreeAllCached() { cudaError_t error = cudaSuccess; int entrypoint_device = INVALID_DEVICE_ORDINAL; int current_device = INVALID_DEVICE_ORDINAL; mutex.Lock(); while (!cached_blocks.empty()) { // Get first block CachedBlocks::iterator begin = cached_blocks.begin(); // Get entry-point device ordinal if necessary if (entrypoint_device == INVALID_DEVICE_ORDINAL) { if (CubDebug(error = cudaGetDevice(&entrypoint_device))) break; } // Set current device ordinal if necessary if (begin->device != current_device) { if (CubDebug(error = cudaSetDevice(begin->device))) break; current_device = begin->device; } // Free device memory if (CubDebug(error = cudaFree(begin->d_ptr))) break; if (CubDebug(error = cudaEventDestroy(begin->ready_event))) break; // Reduce balance and erase entry cached_bytes[current_device].free -= begin->bytes; if (debug) _CubLog("\tDevice %d freed %lld bytes.\n\t\t %lld available blocks cached (%lld bytes), %lld live blocks (%lld bytes) outstanding.\n", current_device, (long long) begin->bytes, (long long) cached_blocks.size(), (long long) cached_bytes[current_device].free, (long long) live_blocks.size(), (long long) cached_bytes[current_device].live); cached_blocks.erase(begin); } mutex.Unlock(); // Attempt to revert back to entry-point device if necessary if (entrypoint_device != INVALID_DEVICE_ORDINAL) { if (CubDebug(error = cudaSetDevice(entrypoint_device))) return error; } return error; } /** * \brief Destructor */ virtual ~CachingDeviceAllocator() { if (!skip_cleanup) FreeAllCached(); } }; /** @} */ // end group UtilMgmt } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/gpu_utils/cub/util_arch.cuh000066400000000000000000000151601411340063500210340ustar00rootroot00000000000000/****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * Static architectural properties by SM version. */ #pragma once #include "util_namespace.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { #ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document #if (__CUDACC_VER_MAJOR__ >= 9) && !defined(CUB_USE_COOPERATIVE_GROUPS) #define CUB_USE_COOPERATIVE_GROUPS #endif /// CUB_PTX_ARCH reflects the PTX version targeted by the active compiler pass (or zero during the host pass). #ifndef CUB_PTX_ARCH #ifndef __CUDA_ARCH__ #define CUB_PTX_ARCH 0 #else #define CUB_PTX_ARCH __CUDA_ARCH__ #endif #endif /// Whether or not the source targeted by the active compiler pass is allowed to invoke device kernels or methods from the CUDA runtime API. #ifndef CUB_RUNTIME_FUNCTION #if !defined(__CUDA_ARCH__) || (__CUDA_ARCH__>= 350 && defined(__CUDACC_RDC__)) #define CUB_RUNTIME_ENABLED #define CUB_RUNTIME_FUNCTION __host__ __device__ #else #define CUB_RUNTIME_FUNCTION __host__ #endif #endif /// Number of threads per warp #ifndef CUB_LOG_WARP_THREADS #define CUB_LOG_WARP_THREADS(arch) \ (5) #define CUB_WARP_THREADS(arch) \ (1 << CUB_LOG_WARP_THREADS(arch)) #define CUB_PTX_WARP_THREADS CUB_WARP_THREADS(CUB_PTX_ARCH) #define CUB_PTX_LOG_WARP_THREADS CUB_LOG_WARP_THREADS(CUB_PTX_ARCH) #endif /// Number of smem banks #ifndef CUB_LOG_SMEM_BANKS #define CUB_LOG_SMEM_BANKS(arch) \ ((arch >= 200) ? \ (5) : \ (4)) #define CUB_SMEM_BANKS(arch) \ (1 << CUB_LOG_SMEM_BANKS(arch)) #define CUB_PTX_LOG_SMEM_BANKS CUB_LOG_SMEM_BANKS(CUB_PTX_ARCH) #define CUB_PTX_SMEM_BANKS CUB_SMEM_BANKS(CUB_PTX_ARCH) #endif /// Oversubscription factor #ifndef CUB_SUBSCRIPTION_FACTOR #define CUB_SUBSCRIPTION_FACTOR(arch) \ ((arch >= 300) ? \ (5) : \ ((arch >= 200) ? \ (3) : \ (10))) #define CUB_PTX_SUBSCRIPTION_FACTOR CUB_SUBSCRIPTION_FACTOR(CUB_PTX_ARCH) #endif /// Prefer padding overhead vs X-way conflicts greater than this threshold #ifndef CUB_PREFER_CONFLICT_OVER_PADDING #define CUB_PREFER_CONFLICT_OVER_PADDING(arch) \ ((arch >= 300) ? \ (1) : \ (4)) #define CUB_PTX_PREFER_CONFLICT_OVER_PADDING CUB_PREFER_CONFLICT_OVER_PADDING(CUB_PTX_ARCH) #endif /// Scale down the number of warps to keep same amount of "tile" storage as the nominal configuration for 4B data. Minimum of two warps. #ifndef CUB_BLOCK_THREADS #define CUB_BLOCK_THREADS(NOMINAL_4B_BLOCK_THREADS, T, PTX_ARCH) \ (CUB_MIN( \ NOMINAL_4B_BLOCK_THREADS * 2, \ CUB_WARP_THREADS(PTX_ARCH) * CUB_MAX( \ (NOMINAL_4B_BLOCK_THREADS / CUB_WARP_THREADS(PTX_ARCH)) * 3 / 4, \ (NOMINAL_4B_BLOCK_THREADS / CUB_WARP_THREADS(PTX_ARCH)) * 4 / sizeof(T)))) #endif /// Scale up/down number of items per thread to keep the same amount of "tile" storage as the nominal configuration for 4B data. Minimum 1 item per thread #ifndef CUB_ITEMS_PER_THREAD #define CUB_ITEMS_PER_THREAD(NOMINAL_4B_ITEMS_PER_THREAD, NOMINAL_4B_BLOCK_THREADS, T, PTX_ARCH) \ (CUB_MIN( \ NOMINAL_4B_ITEMS_PER_THREAD * 2, \ CUB_MAX( \ 1, \ (NOMINAL_4B_ITEMS_PER_THREAD * NOMINAL_4B_BLOCK_THREADS * 4 / sizeof(T)) / CUB_BLOCK_THREADS(NOMINAL_4B_BLOCK_THREADS, T, PTX_ARCH)))) #endif /// Define both nominal threads-per-block and items-per-thread #ifndef CUB_NOMINAL_CONFIG #define CUB_NOMINAL_CONFIG(NOMINAL_4B_BLOCK_THREADS, NOMINAL_4B_ITEMS_PER_THREAD, T) \ CUB_BLOCK_THREADS(NOMINAL_4B_BLOCK_THREADS, T, 200), \ CUB_ITEMS_PER_THREAD(NOMINAL_4B_ITEMS_PER_THREAD, NOMINAL_4B_BLOCK_THREADS, T, 200) #endif #endif // Do not document } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/gpu_utils/cub/util_debug.cuh000066400000000000000000000117051411340063500212060ustar00rootroot00000000000000/****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * Error and event logging routines. * * The following macros definitions are supported: * - \p CUB_LOG. Simple event messages are printed to \p stdout. */ #pragma once #include #include "util_namespace.cuh" #include "util_arch.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /** * \addtogroup UtilMgmt * @{ */ /// CUB error reporting macro (prints error messages to stderr) #if (defined(DEBUG) || defined(_DEBUG)) && !defined(CUB_STDERR) #define CUB_STDERR #endif /** * \brief %If \p CUB_STDERR is defined and \p error is not \p cudaSuccess, the corresponding error message is printed to \p stderr (or \p stdout in device code) along with the supplied source context. * * \return The CUDA error. */ __host__ __device__ __forceinline__ cudaError_t Debug( cudaError_t error, const char* filename, int line) { (void)filename; (void)line; #ifdef CUB_STDERR if (error) { #if (CUB_PTX_ARCH == 0) fprintf(stderr, "CUDA error %d [%s, %d]: %s\n", error, filename, line, cudaGetErrorString(error)); fflush(stderr); #elif (CUB_PTX_ARCH >= 200) printf("CUDA error %d [block (%d,%d,%d) thread (%d,%d,%d), %s, %d]\n", error, blockIdx.z, blockIdx.y, blockIdx.x, threadIdx.z, threadIdx.y, threadIdx.x, filename, line); #endif } #endif return error; } /** * \brief Debug macro */ #ifndef CubDebug #define CubDebug(e) cub::Debug((cudaError_t) (e), __FILE__, __LINE__) #endif /** * \brief Debug macro with exit */ #ifndef CubDebugExit #define CubDebugExit(e) if (cub::Debug((cudaError_t) (e), __FILE__, __LINE__)) { exit(1); } #endif /** * \brief Log macro for printf statements. */ #if !defined(_CubLog) #if !(defined(__clang__) && defined(__CUDA__)) #if (CUB_PTX_ARCH == 0) #define _CubLog(format, ...) printf(format,__VA_ARGS__); #elif (CUB_PTX_ARCH >= 200) #define _CubLog(format, ...) printf("[block (%d,%d,%d), thread (%d,%d,%d)]: " format, blockIdx.z, blockIdx.y, blockIdx.x, threadIdx.z, threadIdx.y, threadIdx.x, __VA_ARGS__); #endif #else // XXX shameless hack for clang around variadic printf... // Compilies w/o supplying -std=c++11 but shows warning, // so we sielence them :) #pragma clang diagnostic ignored "-Wc++11-extensions" #pragma clang diagnostic ignored "-Wunnamed-type-template-args" template inline __host__ __device__ void va_printf(char const* format, Args const&... args) { #ifdef __CUDA_ARCH__ printf(format, blockIdx.z, blockIdx.y, blockIdx.x, threadIdx.z, threadIdx.y, threadIdx.x, args...); #else printf(format, args...); #endif } #ifndef __CUDA_ARCH__ #define _CubLog(format, ...) va_printf(format,__VA_ARGS__); #else #define _CubLog(format, ...) va_printf("[block (%d,%d,%d), thread (%d,%d,%d)]: " format, __VA_ARGS__); #endif #endif #endif /** @} */ // end group UtilMgmt } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/gpu_utils/cub/util_device.cuh000066400000000000000000000246421411340063500213630ustar00rootroot00000000000000/****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * Properties of a given CUDA device and the corresponding PTX bundle */ #pragma once #include "util_type.cuh" #include "util_arch.cuh" #include "util_debug.cuh" #include "util_namespace.cuh" #include "util_macro.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /** * \addtogroup UtilMgmt * @{ */ #ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document /** * Alias temporaries to externally-allocated device storage (or simply return the amount of storage needed). */ template __host__ __device__ __forceinline__ cudaError_t AliasTemporaries( void *d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t &temp_storage_bytes, ///< [in,out] Size in bytes of \t d_temp_storage allocation void* (&allocations)[ALLOCATIONS], ///< [in,out] Pointers to device allocations needed size_t (&allocation_sizes)[ALLOCATIONS]) ///< [in] Sizes in bytes of device allocations needed { const int ALIGN_BYTES = 256; const int ALIGN_MASK = ~(ALIGN_BYTES - 1); // Compute exclusive prefix sum over allocation requests size_t allocation_offsets[ALLOCATIONS]; size_t bytes_needed = 0; for (int i = 0; i < ALLOCATIONS; ++i) { size_t allocation_bytes = (allocation_sizes[i] + ALIGN_BYTES - 1) & ALIGN_MASK; allocation_offsets[i] = bytes_needed; bytes_needed += allocation_bytes; } bytes_needed += ALIGN_BYTES - 1; // Check if the caller is simply requesting the size of the storage allocation if (!d_temp_storage) { temp_storage_bytes = bytes_needed; return cudaSuccess; } // Check if enough storage provided if (temp_storage_bytes < bytes_needed) { return CubDebug(cudaErrorInvalidValue); } // Alias d_temp_storage = (void *) ((size_t(d_temp_storage) + ALIGN_BYTES - 1) & ALIGN_MASK); for (int i = 0; i < ALLOCATIONS; ++i) { allocations[i] = static_cast(d_temp_storage) + allocation_offsets[i]; } return cudaSuccess; } /** * Empty kernel for querying PTX manifest metadata (e.g., version) for the current device */ template __global__ void EmptyKernel(void) { } #endif // DOXYGEN_SHOULD_SKIP_THIS /** * \brief Retrieves the PTX version that will be used on the current device (major * 100 + minor * 10) */ CUB_RUNTIME_FUNCTION __forceinline__ cudaError_t PtxVersion(int &ptx_version) { struct Dummy { /// Type definition of the EmptyKernel kernel entry point typedef void (*EmptyKernelPtr)(); /// Force EmptyKernel to be generated if this class is used CUB_RUNTIME_FUNCTION __forceinline__ EmptyKernelPtr Empty() { return EmptyKernel; } }; #ifndef CUB_RUNTIME_ENABLED (void)ptx_version; // CUDA API calls not supported from this device return cudaErrorInvalidConfiguration; #elif (CUB_PTX_ARCH > 0) ptx_version = CUB_PTX_ARCH; return cudaSuccess; #else cudaError_t error = cudaSuccess; do { cudaFuncAttributes empty_kernel_attrs; if (CubDebug(error = cudaFuncGetAttributes(&empty_kernel_attrs, EmptyKernel))) break; ptx_version = empty_kernel_attrs.ptxVersion * 10; } while (0); return error; #endif } /** * \brief Retrieves the SM version (major * 100 + minor * 10) */ CUB_RUNTIME_FUNCTION __forceinline__ cudaError_t SmVersion(int &sm_version, int device_ordinal) { #ifndef CUB_RUNTIME_ENABLED (void)sm_version; (void)device_ordinal; // CUDA API calls not supported from this device return cudaErrorInvalidConfiguration; #else cudaError_t error = cudaSuccess; do { // Fill in SM version int major, minor; if (CubDebug(error = cudaDeviceGetAttribute(&major, cudaDevAttrComputeCapabilityMajor, device_ordinal))) break; if (CubDebug(error = cudaDeviceGetAttribute(&minor, cudaDevAttrComputeCapabilityMinor, device_ordinal))) break; sm_version = major * 100 + minor * 10; } while (0); return error; #endif } #ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document /** * Synchronize the stream if specified */ CUB_RUNTIME_FUNCTION __forceinline__ static cudaError_t SyncStream(cudaStream_t stream) { #if (CUB_PTX_ARCH == 0) return cudaStreamSynchronize(stream); #else (void)stream; // Device can't yet sync on a specific stream return cudaDeviceSynchronize(); #endif } /** * \brief Computes maximum SM occupancy in thread blocks for executing the given kernel function pointer \p kernel_ptr on the current device with \p block_threads per thread block. * * \par Snippet * The code snippet below illustrates the use of the MaxSmOccupancy function. * \par * \code * #include // or equivalently * * template * __global__ void ExampleKernel() * { * // Allocate shared memory for BlockScan * __shared__ volatile T buffer[4096]; * * ... * } * * ... * * // Determine SM occupancy for ExampleKernel specialized for unsigned char * int max_sm_occupancy; * MaxSmOccupancy(max_sm_occupancy, ExampleKernel, 64); * * // max_sm_occupancy <-- 4 on SM10 * // max_sm_occupancy <-- 8 on SM20 * // max_sm_occupancy <-- 12 on SM35 * * \endcode * */ template CUB_RUNTIME_FUNCTION __forceinline__ cudaError_t MaxSmOccupancy( int &max_sm_occupancy, ///< [out] maximum number of thread blocks that can reside on a single SM KernelPtr kernel_ptr, ///< [in] Kernel pointer for which to compute SM occupancy int block_threads, ///< [in] Number of threads per thread block int dynamic_smem_bytes = 0) { #ifndef CUB_RUNTIME_ENABLED (void)dynamic_smem_bytes; (void)block_threads; (void)kernel_ptr; (void)max_sm_occupancy; // CUDA API calls not supported from this device return CubDebug(cudaErrorInvalidConfiguration); #else return cudaOccupancyMaxActiveBlocksPerMultiprocessor ( &max_sm_occupancy, kernel_ptr, block_threads, dynamic_smem_bytes); #endif // CUB_RUNTIME_ENABLED } /****************************************************************************** * Policy management ******************************************************************************/ /** * Kernel dispatch configuration */ struct KernelConfig { int block_threads; int items_per_thread; int tile_size; int sm_occupancy; CUB_RUNTIME_FUNCTION __forceinline__ KernelConfig() : block_threads(0), items_per_thread(0), tile_size(0), sm_occupancy(0) {} template CUB_RUNTIME_FUNCTION __forceinline__ cudaError_t Init(KernelPtrT kernel_ptr) { block_threads = AgentPolicyT::BLOCK_THREADS; items_per_thread = AgentPolicyT::ITEMS_PER_THREAD; tile_size = block_threads * items_per_thread; cudaError_t retval = MaxSmOccupancy(sm_occupancy, kernel_ptr, block_threads); return retval; } }; /// Helper for dispatching into a policy chain template struct ChainedPolicy { /// The policy for the active compiler pass typedef typename If<(CUB_PTX_ARCH < PTX_VERSION), typename PrevPolicyT::ActivePolicy, PolicyT>::Type ActivePolicy; /// Specializes and dispatches op in accordance to the first policy in the chain of adequate PTX version template CUB_RUNTIME_FUNCTION __forceinline__ static cudaError_t Invoke(int ptx_version, FunctorT &op) { if (ptx_version < PTX_VERSION) { return PrevPolicyT::Invoke(ptx_version, op); } return op.template Invoke(); } }; /// Helper for dispatching into a policy chain (end-of-chain specialization) template struct ChainedPolicy { /// The policy for the active compiler pass typedef PolicyT ActivePolicy; /// Specializes and dispatches op in accordance to the first policy in the chain of adequate PTX version template CUB_RUNTIME_FUNCTION __forceinline__ static cudaError_t Invoke(int /*ptx_version*/, FunctorT &op) { return op.template Invoke(); } }; #endif // Do not document /** @} */ // end group UtilMgmt } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/gpu_utils/cub/util_macro.cuh000066400000000000000000000071761411340063500212300ustar00rootroot00000000000000/****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /****************************************************************************** * Common C/C++ macro utilities ******************************************************************************/ #pragma once #include "util_namespace.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /** * \addtogroup UtilModule * @{ */ #ifndef CUB_ALIGN #if defined(_WIN32) || defined(_WIN64) /// Align struct #define CUB_ALIGN(bytes) __declspec(align(32)) #else /// Align struct #define CUB_ALIGN(bytes) __attribute__((aligned(bytes))) #endif #endif #ifndef CUB_MAX /// Select maximum(a, b) #define CUB_MAX(a, b) (((b) > (a)) ? (b) : (a)) #endif #ifndef CUB_MIN /// Select minimum(a, b) #define CUB_MIN(a, b) (((b) < (a)) ? (b) : (a)) #endif #ifndef CUB_QUOTIENT_FLOOR /// Quotient of x/y rounded down to nearest integer #define CUB_QUOTIENT_FLOOR(x, y) ((x) / (y)) #endif #ifndef CUB_QUOTIENT_CEILING /// Quotient of x/y rounded up to nearest integer #define CUB_QUOTIENT_CEILING(x, y) (((x) + (y) - 1) / (y)) #endif #ifndef CUB_ROUND_UP_NEAREST /// x rounded up to the nearest multiple of y #define CUB_ROUND_UP_NEAREST(x, y) ((((x) + (y) - 1) / (y)) * y) #endif #ifndef CUB_ROUND_DOWN_NEAREST /// x rounded down to the nearest multiple of y #define CUB_ROUND_DOWN_NEAREST(x, y) (((x) / (y)) * y) #endif #ifndef CUB_STATIC_ASSERT #ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document #define CUB_CAT_(a, b) a ## b #define CUB_CAT(a, b) CUB_CAT_(a, b) #endif // DOXYGEN_SHOULD_SKIP_THIS /// Static assert #define CUB_STATIC_ASSERT(cond, msg) typedef int CUB_CAT(cub_static_assert, __LINE__)[(cond) ? 1 : -1] #endif /** @} */ // end group UtilModule } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/gpu_utils/cub/util_namespace.cuh000066400000000000000000000040641411340063500220540ustar00rootroot00000000000000/****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * Place-holder for prefixing the cub namespace */ #pragma once // For example: //#define CUB_NS_PREFIX namespace thrust{ namespace detail { //#define CUB_NS_POSTFIX } } #ifndef CUB_NS_PREFIX #define CUB_NS_PREFIX #endif #ifndef CUB_NS_POSTFIX #define CUB_NS_POSTFIX #endif relion-3.1.3/src/gpu_utils/cub/util_ptx.cuh000066400000000000000000000511061411340063500207320ustar00rootroot00000000000000/****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * PTX intrinsics */ #pragma once #include "util_type.cuh" #include "util_arch.cuh" #include "util_namespace.cuh" #include "util_debug.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /** * \addtogroup UtilPtx * @{ */ /****************************************************************************** * PTX helper macros ******************************************************************************/ #ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document /** * Register modifier for pointer-types (for inlining PTX assembly) */ #if defined(_WIN64) || defined(__LP64__) #define __CUB_LP64__ 1 // 64-bit register modifier for inlined asm #define _CUB_ASM_PTR_ "l" #define _CUB_ASM_PTR_SIZE_ "u64" #else #define __CUB_LP64__ 0 // 32-bit register modifier for inlined asm #define _CUB_ASM_PTR_ "r" #define _CUB_ASM_PTR_SIZE_ "u32" #endif #endif // DOXYGEN_SHOULD_SKIP_THIS /****************************************************************************** * Inlined PTX intrinsics ******************************************************************************/ /** * \brief Shift-right then add. Returns (\p x >> \p shift) + \p addend. */ __device__ __forceinline__ unsigned int SHR_ADD( unsigned int x, unsigned int shift, unsigned int addend) { unsigned int ret; #if CUB_PTX_ARCH >= 200 asm ("vshr.u32.u32.u32.clamp.add %0, %1, %2, %3;" : "=r"(ret) : "r"(x), "r"(shift), "r"(addend)); #else ret = (x >> shift) + addend; #endif return ret; } /** * \brief Shift-left then add. Returns (\p x << \p shift) + \p addend. */ __device__ __forceinline__ unsigned int SHL_ADD( unsigned int x, unsigned int shift, unsigned int addend) { unsigned int ret; #if CUB_PTX_ARCH >= 200 asm ("vshl.u32.u32.u32.clamp.add %0, %1, %2, %3;" : "=r"(ret) : "r"(x), "r"(shift), "r"(addend)); #else ret = (x << shift) + addend; #endif return ret; } #ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document /** * Bitfield-extract. */ template __device__ __forceinline__ unsigned int BFE( UnsignedBits source, unsigned int bit_start, unsigned int num_bits, Int2Type /*byte_len*/) { unsigned int bits; #if CUB_PTX_ARCH >= 200 asm ("bfe.u32 %0, %1, %2, %3;" : "=r"(bits) : "r"((unsigned int) source), "r"(bit_start), "r"(num_bits)); #else const unsigned int MASK = (1 << num_bits) - 1; bits = (source >> bit_start) & MASK; #endif return bits; } /** * Bitfield-extract for 64-bit types. */ template __device__ __forceinline__ unsigned int BFE( UnsignedBits source, unsigned int bit_start, unsigned int num_bits, Int2Type<8> /*byte_len*/) { const unsigned long long MASK = (1ull << num_bits) - 1; return (source >> bit_start) & MASK; } #endif // DOXYGEN_SHOULD_SKIP_THIS /** * \brief Bitfield-extract. Extracts \p num_bits from \p source starting at bit-offset \p bit_start. The input \p source may be an 8b, 16b, 32b, or 64b unsigned integer type. */ template __device__ __forceinline__ unsigned int BFE( UnsignedBits source, unsigned int bit_start, unsigned int num_bits) { return BFE(source, bit_start, num_bits, Int2Type()); } /** * \brief Bitfield insert. Inserts the \p num_bits least significant bits of \p y into \p x at bit-offset \p bit_start. */ __device__ __forceinline__ void BFI( unsigned int &ret, unsigned int x, unsigned int y, unsigned int bit_start, unsigned int num_bits) { #if CUB_PTX_ARCH >= 200 asm ("bfi.b32 %0, %1, %2, %3, %4;" : "=r"(ret) : "r"(y), "r"(x), "r"(bit_start), "r"(num_bits)); #else x <<= bit_start; unsigned int MASK_X = ((1 << num_bits) - 1) << bit_start; unsigned int MASK_Y = ~MASK_X; ret = (y & MASK_Y) | (x & MASK_X); #endif } /** * \brief Three-operand add. Returns \p x + \p y + \p z. */ __device__ __forceinline__ unsigned int IADD3(unsigned int x, unsigned int y, unsigned int z) { #if CUB_PTX_ARCH >= 200 asm ("vadd.u32.u32.u32.add %0, %1, %2, %3;" : "=r"(x) : "r"(x), "r"(y), "r"(z)); #else x = x + y + z; #endif return x; } /** * \brief Byte-permute. Pick four arbitrary bytes from two 32-bit registers, and reassemble them into a 32-bit destination register. For SM2.0 or later. * * \par * The bytes in the two source registers \p a and \p b are numbered from 0 to 7: * {\p b, \p a} = {{b7, b6, b5, b4}, {b3, b2, b1, b0}}. For each of the four bytes * {b3, b2, b1, b0} selected in the return value, a 4-bit selector is defined within * the four lower "nibbles" of \p index: {\p index } = {n7, n6, n5, n4, n3, n2, n1, n0} * * \par Snippet * The code snippet below illustrates byte-permute. * \par * \code * #include * * __global__ void ExampleKernel(...) * { * int a = 0x03020100; * int b = 0x07060504; * int index = 0x00007531; * * int selected = PRMT(a, b, index); // 0x07050301 * * \endcode * */ __device__ __forceinline__ int PRMT(unsigned int a, unsigned int b, unsigned int index) { int ret; asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(ret) : "r"(a), "r"(b), "r"(index)); return ret; } #ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document /** * Sync-threads barrier. */ __device__ __forceinline__ void BAR(int count) { asm volatile("bar.sync 1, %0;" : : "r"(count)); } /** * CTA barrier */ __device__ __forceinline__ void CTA_SYNC() { __syncthreads(); } /** * CTA barrier with predicate */ __device__ __forceinline__ int CTA_SYNC_AND(int p) { return __syncthreads_and(p); } /** * Warp barrier */ __device__ __forceinline__ void WARP_SYNC(unsigned int member_mask) { #ifdef CUB_USE_COOPERATIVE_GROUPS __syncwarp(member_mask); #endif } /** * Warp any */ __device__ __forceinline__ int WARP_ANY(int predicate, unsigned int member_mask) { #ifdef CUB_USE_COOPERATIVE_GROUPS return __any_sync(member_mask, predicate); #else return ::__any(predicate); #endif } /** * Warp any */ __device__ __forceinline__ int WARP_ALL(int predicate, unsigned int member_mask) { #ifdef CUB_USE_COOPERATIVE_GROUPS return __all_sync(member_mask, predicate); #else return ::__all(predicate); #endif } /** * Warp ballot */ __device__ __forceinline__ int WARP_BALLOT(int predicate, unsigned int member_mask) { #ifdef CUB_USE_COOPERATIVE_GROUPS return __ballot_sync(member_mask, predicate); #else return __ballot(predicate); #endif } /** * Warp synchronous shfl_up */ __device__ __forceinline__ unsigned int SHFL_UP_SYNC(unsigned int word, int src_offset, int first_lane, unsigned int member_mask) { #ifdef CUB_USE_COOPERATIVE_GROUPS asm volatile("shfl.sync.up.b32 %0, %1, %2, %3, %4;" : "=r"(word) : "r"(word), "r"(src_offset), "r"(first_lane), "r"(member_mask)); #else asm volatile("shfl.up.b32 %0, %1, %2, %3;" : "=r"(word) : "r"(word), "r"(src_offset), "r"(first_lane)); #endif return word; } /** * Warp synchronous shfl_down */ __device__ __forceinline__ unsigned int SHFL_DOWN_SYNC(unsigned int word, int src_offset, int last_lane, unsigned int member_mask) { #ifdef CUB_USE_COOPERATIVE_GROUPS asm volatile("shfl.sync.down.b32 %0, %1, %2, %3, %4;" : "=r"(word) : "r"(word), "r"(src_offset), "r"(last_lane), "r"(member_mask)); #else asm volatile("shfl.down.b32 %0, %1, %2, %3;" : "=r"(word) : "r"(word), "r"(src_offset), "r"(last_lane)); #endif return word; } /** * Warp synchronous shfl_idx */ __device__ __forceinline__ unsigned int SHFL_IDX_SYNC(unsigned int word, int src_lane, int last_lane, unsigned int member_mask) { #ifdef CUB_USE_COOPERATIVE_GROUPS asm volatile("shfl.sync.idx.b32 %0, %1, %2, %3, %4;" : "=r"(word) : "r"(word), "r"(src_lane), "r"(last_lane), "r"(member_mask)); #else asm volatile("shfl.idx.b32 %0, %1, %2, %3;" : "=r"(word) : "r"(word), "r"(src_lane), "r"(last_lane)); #endif return word; } /** * Floating point multiply. (Mantissa LSB rounds towards zero.) */ __device__ __forceinline__ float FMUL_RZ(float a, float b) { float d; asm ("mul.rz.f32 %0, %1, %2;" : "=f"(d) : "f"(a), "f"(b)); return d; } /** * Floating point multiply-add. (Mantissa LSB rounds towards zero.) */ __device__ __forceinline__ float FFMA_RZ(float a, float b, float c) { float d; asm ("fma.rz.f32 %0, %1, %2, %3;" : "=f"(d) : "f"(a), "f"(b), "f"(c)); return d; } #endif // DOXYGEN_SHOULD_SKIP_THIS /** * \brief Terminates the calling thread */ __device__ __forceinline__ void ThreadExit() { asm volatile("exit;"); } /** * \brief Abort execution and generate an interrupt to the host CPU */ __device__ __forceinline__ void ThreadTrap() { asm volatile("trap;"); } /** * \brief Returns the row-major linear thread identifier for a multidimensional thread block */ __device__ __forceinline__ int RowMajorTid(int block_dim_x, int block_dim_y, int block_dim_z) { return ((block_dim_z == 1) ? 0 : (threadIdx.z * block_dim_x * block_dim_y)) + ((block_dim_y == 1) ? 0 : (threadIdx.y * block_dim_x)) + threadIdx.x; } /** * \brief Returns the warp lane ID of the calling thread */ __device__ __forceinline__ unsigned int LaneId() { unsigned int ret; asm ("mov.u32 %0, %%laneid;" : "=r"(ret) ); return ret; } /** * \brief Returns the warp ID of the calling thread. Warp ID is guaranteed to be unique among warps, but may not correspond to a zero-based ranking within the thread block. */ __device__ __forceinline__ unsigned int WarpId() { unsigned int ret; asm ("mov.u32 %0, %%warpid;" : "=r"(ret) ); return ret; } /** * \brief Returns the warp lane mask of all lanes less than the calling thread */ __device__ __forceinline__ unsigned int LaneMaskLt() { unsigned int ret; asm ("mov.u32 %0, %%lanemask_lt;" : "=r"(ret) ); return ret; } /** * \brief Returns the warp lane mask of all lanes less than or equal to the calling thread */ __device__ __forceinline__ unsigned int LaneMaskLe() { unsigned int ret; asm ("mov.u32 %0, %%lanemask_le;" : "=r"(ret) ); return ret; } /** * \brief Returns the warp lane mask of all lanes greater than the calling thread */ __device__ __forceinline__ unsigned int LaneMaskGt() { unsigned int ret; asm ("mov.u32 %0, %%lanemask_gt;" : "=r"(ret) ); return ret; } /** * \brief Returns the warp lane mask of all lanes greater than or equal to the calling thread */ __device__ __forceinline__ unsigned int LaneMaskGe() { unsigned int ret; asm ("mov.u32 %0, %%lanemask_ge;" : "=r"(ret) ); return ret; } /** @} */ // end group UtilPtx /** * \brief Shuffle-up for any data type. Each warp-lanei obtains the value \p input contributed by warp-lanei-src_offset. For thread lanes \e i < src_offset, the thread's own \p input is returned to the thread. ![](shfl_up_logo.png) * \ingroup WarpModule * * \par * - Available only for SM3.0 or newer * * \par Snippet * The code snippet below illustrates each thread obtaining a \p double value from the * predecessor of its predecessor. * \par * \code * #include // or equivalently * * __global__ void ExampleKernel(...) * { * // Obtain one input item per thread * double thread_data = ... * * // Obtain item from two ranks below * double peer_data = ShuffleUp(thread_data, 2, 0, 0xffffffff); * * \endcode * \par * Suppose the set of input \p thread_data across the first warp of threads is {1.0, 2.0, 3.0, 4.0, 5.0, ..., 32.0}. * The corresponding output \p peer_data will be {1.0, 2.0, 1.0, 2.0, 3.0, ..., 30.0}. * */ template __device__ __forceinline__ T ShuffleUp( T input, ///< [in] The value to broadcast int src_offset, ///< [in] The relative down-offset of the peer to read from int first_lane, ///< [in] Index of first lane in segment (typically 0) unsigned int member_mask) ///< [in] 32-bit mask of participating warp lanes { typedef typename UnitWord::ShuffleWord ShuffleWord; const int WORDS = (sizeof(T) + sizeof(ShuffleWord) - 1) / sizeof(ShuffleWord); T output; ShuffleWord *output_alias = reinterpret_cast(&output); ShuffleWord *input_alias = reinterpret_cast(&input); unsigned int shuffle_word; shuffle_word = SHFL_UP_SYNC((unsigned int)input_alias[0], src_offset, first_lane, member_mask); output_alias[0] = shuffle_word; #pragma unroll for (int WORD = 1; WORD < WORDS; ++WORD) { shuffle_word = SHFL_UP_SYNC((unsigned int)input_alias[WORD], src_offset, first_lane, member_mask); output_alias[WORD] = shuffle_word; } return output; } /** * \brief Shuffle-down for any data type. Each warp-lanei obtains the value \p input contributed by warp-lanei+src_offset. For thread lanes \e i >= WARP_THREADS, the thread's own \p input is returned to the thread. ![](shfl_down_logo.png) * \ingroup WarpModule * * \par * - Available only for SM3.0 or newer * * \par Snippet * The code snippet below illustrates each thread obtaining a \p double value from the * successor of its successor. * \par * \code * #include // or equivalently * * __global__ void ExampleKernel(...) * { * // Obtain one input item per thread * double thread_data = ... * * // Obtain item from two ranks below * double peer_data = ShuffleDown(thread_data, 2, 31, 0xffffffff); * * \endcode * \par * Suppose the set of input \p thread_data across the first warp of threads is {1.0, 2.0, 3.0, 4.0, 5.0, ..., 32.0}. * The corresponding output \p peer_data will be {3.0, 4.0, 5.0, 6.0, 7.0, ..., 32.0}. * */ template __device__ __forceinline__ T ShuffleDown( T input, ///< [in] The value to broadcast int src_offset, ///< [in] The relative up-offset of the peer to read from int last_lane, ///< [in] Index of first lane in segment (typically 31) unsigned int member_mask) ///< [in] 32-bit mask of participating warp lanes { typedef typename UnitWord::ShuffleWord ShuffleWord; const int WORDS = (sizeof(T) + sizeof(ShuffleWord) - 1) / sizeof(ShuffleWord); T output; ShuffleWord *output_alias = reinterpret_cast(&output); ShuffleWord *input_alias = reinterpret_cast(&input); unsigned int shuffle_word; shuffle_word = SHFL_DOWN_SYNC((unsigned int)input_alias[0], src_offset, last_lane, member_mask); output_alias[0] = shuffle_word; #pragma unroll for (int WORD = 1; WORD < WORDS; ++WORD) { shuffle_word = SHFL_DOWN_SYNC((unsigned int)input_alias[WORD], src_offset, last_lane, member_mask); output_alias[WORD] = shuffle_word; } return output; } /** * \brief Shuffle-broadcast for any data type. Each warp-lanei obtains the value \p input * contributed by warp-lanesrc_lane. For \p src_lane < 0 or \p src_lane >= WARP_THREADS, * then the thread's own \p input is returned to the thread. ![](shfl_broadcast_logo.png) * * \ingroup WarpModule * * \par * - Available only for SM3.0 or newer * * \par Snippet * The code snippet below illustrates each thread obtaining a \p double value from warp-lane0. * * \par * \code * #include // or equivalently * * __global__ void ExampleKernel(...) * { * // Obtain one input item per thread * double thread_data = ... * * // Obtain item from thread 0 * double peer_data = ShuffleIndex(thread_data, 0, 32, 0xffffffff); * * \endcode * \par * Suppose the set of input \p thread_data across the first warp of threads is {1.0, 2.0, 3.0, 4.0, 5.0, ..., 32.0}. * The corresponding output \p peer_data will be {1.0, 1.0, 1.0, 1.0, 1.0, ..., 1.0}. * */ template __device__ __forceinline__ T ShuffleIndex( T input, ///< [in] The value to broadcast int src_lane, ///< [in] Which warp lane is to do the broadcasting int logical_warp_threads, ///< [in] Number of threads per logical warp unsigned int member_mask) ///< [in] 32-bit mask of participating warp lanes { typedef typename UnitWord::ShuffleWord ShuffleWord; const int WORDS = (sizeof(T) + sizeof(ShuffleWord) - 1) / sizeof(ShuffleWord); T output; ShuffleWord *output_alias = reinterpret_cast(&output); ShuffleWord *input_alias = reinterpret_cast(&input); unsigned int shuffle_word; shuffle_word = SHFL_IDX_SYNC((unsigned int)input_alias[0], src_lane, logical_warp_threads - 1, member_mask); output_alias[0] = shuffle_word; #pragma unroll for (int WORD = 1; WORD < WORDS; ++WORD) { shuffle_word = SHFL_IDX_SYNC((unsigned int)input_alias[WORD], src_lane, logical_warp_threads - 1, member_mask); output_alias[WORD] = shuffle_word; } return output; } /** * Compute a 32b mask of threads having the same least-significant * LABEL_BITS of \p label as the calling thread. */ template inline __device__ unsigned int MatchAny(unsigned int label) { unsigned int retval; // Extract masks of common threads for each bit #pragma unroll for (int BIT = 0; BIT < LABEL_BITS; ++BIT) { unsigned int mask; unsigned int current_bit = 1 << BIT; asm ("{\n" " .reg .pred p;\n" " and.b32 %0, %1, %2;" " setp.eq.u32 p, %0, %2;\n" #ifdef CUB_USE_COOPERATIVE_GROUPS " vote.ballot.sync.b32 %0, p, 0xffffffff;\n" #else " vote.ballot.b32 %0, p;\n" #endif " @!p not.b32 %0, %0;\n" "}\n" : "=r"(mask) : "r"(label), "r"(current_bit)); // Remove peers who differ retval = (BIT == 0) ? mask : retval & mask; } return retval; // // VOLTA match // unsigned int retval; // asm ("{\n" // " match.any.sync.b32 %0, %1, 0xffffffff;\n" // "}\n" : "=r"(retval) : "r"(label)); // return retval; } } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/gpu_utils/cub/util_type.cuh000066400000000000000000001150321411340063500210770ustar00rootroot00000000000000/****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * Common type manipulation (metaprogramming) utilities */ #pragma once #include #include #include #include "util_macro.cuh" #include "util_arch.cuh" #include "util_namespace.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /** * \addtogroup UtilModule * @{ */ /****************************************************************************** * Type equality ******************************************************************************/ /** * \brief Type selection (IF ? ThenType : ElseType) */ template struct If { /// Conditional type result typedef ThenType Type; // true }; #ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document template struct If { typedef ElseType Type; // false }; #endif // DOXYGEN_SHOULD_SKIP_THIS /****************************************************************************** * Conditional types ******************************************************************************/ /** * \brief Type equality test */ template struct Equals { enum { VALUE = 0, NEGATE = 1 }; }; #ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document template struct Equals { enum { VALUE = 1, NEGATE = 0 }; }; #endif // DOXYGEN_SHOULD_SKIP_THIS /****************************************************************************** * Static math ******************************************************************************/ /** * \brief Statically determine log2(N), rounded up. * * For example: * Log2<8>::VALUE // 3 * Log2<3>::VALUE // 2 */ template struct Log2 { /// Static logarithm value enum { VALUE = Log2> 1), COUNT + 1>::VALUE }; // Inductive case }; #ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document template struct Log2 { enum {VALUE = (1 << (COUNT - 1) < N) ? // Base case COUNT : COUNT - 1 }; }; #endif // DOXYGEN_SHOULD_SKIP_THIS /** * \brief Statically determine if N is a power-of-two */ template struct PowerOfTwo { enum { VALUE = ((N & (N - 1)) == 0) }; }; /****************************************************************************** * Pointer vs. iterator detection ******************************************************************************/ /** * \brief Pointer vs. iterator */ template struct IsPointer { enum { VALUE = 0 }; }; #ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document template struct IsPointer { enum { VALUE = 1 }; }; #endif // DOXYGEN_SHOULD_SKIP_THIS /****************************************************************************** * Qualifier detection ******************************************************************************/ /** * \brief Volatile modifier test */ template struct IsVolatile { enum { VALUE = 0 }; }; #ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document template struct IsVolatile { enum { VALUE = 1 }; }; #endif // DOXYGEN_SHOULD_SKIP_THIS /****************************************************************************** * Qualifier removal ******************************************************************************/ /** * \brief Removes \p const and \p volatile qualifiers from type \p Tp. * * For example: * typename RemoveQualifiers::Type // int; */ template struct RemoveQualifiers { /// Type without \p const and \p volatile qualifiers typedef Up Type; }; #ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document template struct RemoveQualifiers { typedef Up Type; }; template struct RemoveQualifiers { typedef Up Type; }; template struct RemoveQualifiers { typedef Up Type; }; /****************************************************************************** * Marker types ******************************************************************************/ /** * \brief A simple "NULL" marker type */ struct NullType { #ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document template __host__ __device__ __forceinline__ NullType& operator =(const T&) { return *this; } __host__ __device__ __forceinline__ bool operator ==(const NullType&) { return true; } __host__ __device__ __forceinline__ bool operator !=(const NullType&) { return false; } #endif // DOXYGEN_SHOULD_SKIP_THIS }; /** * \brief Allows for the treatment of an integral constant as a type at compile-time (e.g., to achieve static call dispatch based on constant integral values) */ template struct Int2Type { enum {VALUE = A}; }; #ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document /****************************************************************************** * Size and alignment ******************************************************************************/ /// Structure alignment template struct AlignBytes { struct Pad { T val; char byte; }; enum { /// The "true CUDA" alignment of T in bytes ALIGN_BYTES = sizeof(Pad) - sizeof(T) }; /// The "truly aligned" type typedef T Type; }; // Specializations where host C++ compilers (e.g., 32-bit Windows) may disagree // with device C++ compilers (EDG) on types passed as template parameters through // kernel functions #define __CUB_ALIGN_BYTES(t, b) \ template <> struct AlignBytes \ { enum { ALIGN_BYTES = b }; typedef __align__(b) t Type; }; __CUB_ALIGN_BYTES(short4, 8) __CUB_ALIGN_BYTES(ushort4, 8) __CUB_ALIGN_BYTES(int2, 8) __CUB_ALIGN_BYTES(uint2, 8) __CUB_ALIGN_BYTES(long long, 8) __CUB_ALIGN_BYTES(unsigned long long, 8) __CUB_ALIGN_BYTES(float2, 8) __CUB_ALIGN_BYTES(double, 8) #ifdef _WIN32 __CUB_ALIGN_BYTES(long2, 8) __CUB_ALIGN_BYTES(ulong2, 8) #else __CUB_ALIGN_BYTES(long2, 16) __CUB_ALIGN_BYTES(ulong2, 16) #endif __CUB_ALIGN_BYTES(int4, 16) __CUB_ALIGN_BYTES(uint4, 16) __CUB_ALIGN_BYTES(float4, 16) __CUB_ALIGN_BYTES(long4, 16) __CUB_ALIGN_BYTES(ulong4, 16) __CUB_ALIGN_BYTES(longlong2, 16) __CUB_ALIGN_BYTES(ulonglong2, 16) __CUB_ALIGN_BYTES(double2, 16) __CUB_ALIGN_BYTES(longlong4, 16) __CUB_ALIGN_BYTES(ulonglong4, 16) __CUB_ALIGN_BYTES(double4, 16) template struct AlignBytes : AlignBytes {}; template struct AlignBytes : AlignBytes {}; template struct AlignBytes : AlignBytes {}; /// Unit-words of data movement template struct UnitWord { enum { ALIGN_BYTES = AlignBytes::ALIGN_BYTES }; template struct IsMultiple { enum { UNIT_ALIGN_BYTES = AlignBytes::ALIGN_BYTES, IS_MULTIPLE = (sizeof(T) % sizeof(Unit) == 0) && (ALIGN_BYTES % UNIT_ALIGN_BYTES == 0) }; }; /// Biggest shuffle word that T is a whole multiple of and is not larger than the alignment of T typedef typename If::IS_MULTIPLE, unsigned int, typename If::IS_MULTIPLE, unsigned short, unsigned char>::Type>::Type ShuffleWord; /// Biggest volatile word that T is a whole multiple of and is not larger than the alignment of T typedef typename If::IS_MULTIPLE, unsigned long long, ShuffleWord>::Type VolatileWord; /// Biggest memory-access word that T is a whole multiple of and is not larger than the alignment of T typedef typename If::IS_MULTIPLE, ulonglong2, VolatileWord>::Type DeviceWord; /// Biggest texture reference word that T is a whole multiple of and is not larger than the alignment of T typedef typename If::IS_MULTIPLE, uint4, typename If::IS_MULTIPLE, uint2, ShuffleWord>::Type>::Type TextureWord; }; // float2 specialization workaround (for SM10-SM13) template <> struct UnitWord { typedef int ShuffleWord; #if (CUB_PTX_ARCH > 0) && (CUB_PTX_ARCH <= 130) typedef float VolatileWord; typedef uint2 DeviceWord; #else typedef unsigned long long VolatileWord; typedef unsigned long long DeviceWord; #endif typedef float2 TextureWord; }; // float4 specialization workaround (for SM10-SM13) template <> struct UnitWord { typedef int ShuffleWord; #if (CUB_PTX_ARCH > 0) && (CUB_PTX_ARCH <= 130) typedef float VolatileWord; typedef uint4 DeviceWord; #else typedef unsigned long long VolatileWord; typedef ulonglong2 DeviceWord; #endif typedef float4 TextureWord; }; // char2 specialization workaround (for SM10-SM13) template <> struct UnitWord { typedef unsigned short ShuffleWord; #if (CUB_PTX_ARCH > 0) && (CUB_PTX_ARCH <= 130) typedef unsigned short VolatileWord; typedef short DeviceWord; #else typedef unsigned short VolatileWord; typedef unsigned short DeviceWord; #endif typedef unsigned short TextureWord; }; template struct UnitWord : UnitWord {}; template struct UnitWord : UnitWord {}; template struct UnitWord : UnitWord {}; #endif // DOXYGEN_SHOULD_SKIP_THIS /****************************************************************************** * Vector type inference utilities. ******************************************************************************/ /** * \brief Exposes a member typedef \p Type that names the corresponding CUDA vector type if one exists. Otherwise \p Type refers to the CubVector structure itself, which will wrap the corresponding \p x, \p y, etc. vector fields. */ template struct CubVector; #ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document enum { /// The maximum number of elements in CUDA vector types MAX_VEC_ELEMENTS = 4, }; /** * Generic vector-1 type */ template struct CubVector { T x; typedef T BaseType; typedef CubVector Type; }; /** * Generic vector-2 type */ template struct CubVector { T x; T y; typedef T BaseType; typedef CubVector Type; }; /** * Generic vector-3 type */ template struct CubVector { T x; T y; T z; typedef T BaseType; typedef CubVector Type; }; /** * Generic vector-4 type */ template struct CubVector { T x; T y; T z; T w; typedef T BaseType; typedef CubVector Type; }; /** * Macro for expanding partially-specialized built-in vector types */ #define CUB_DEFINE_VECTOR_TYPE(base_type,short_type) \ \ template<> struct CubVector : short_type##1 \ { \ typedef base_type BaseType; \ typedef short_type##1 Type; \ __host__ __device__ __forceinline__ CubVector operator+(const CubVector &other) const { \ CubVector retval; \ retval.x = x + other.x; \ return retval; \ } \ __host__ __device__ __forceinline__ CubVector operator-(const CubVector &other) const { \ CubVector retval; \ retval.x = x - other.x; \ return retval; \ } \ }; \ \ template<> struct CubVector : short_type##2 \ { \ typedef base_type BaseType; \ typedef short_type##2 Type; \ __host__ __device__ __forceinline__ CubVector operator+(const CubVector &other) const { \ CubVector retval; \ retval.x = x + other.x; \ retval.y = y + other.y; \ return retval; \ } \ __host__ __device__ __forceinline__ CubVector operator-(const CubVector &other) const { \ CubVector retval; \ retval.x = x - other.x; \ retval.y = y - other.y; \ return retval; \ } \ }; \ \ template<> struct CubVector : short_type##3 \ { \ typedef base_type BaseType; \ typedef short_type##3 Type; \ __host__ __device__ __forceinline__ CubVector operator+(const CubVector &other) const { \ CubVector retval; \ retval.x = x + other.x; \ retval.y = y + other.y; \ retval.z = z + other.z; \ return retval; \ } \ __host__ __device__ __forceinline__ CubVector operator-(const CubVector &other) const { \ CubVector retval; \ retval.x = x - other.x; \ retval.y = y - other.y; \ retval.z = z - other.z; \ return retval; \ } \ }; \ \ template<> struct CubVector : short_type##4 \ { \ typedef base_type BaseType; \ typedef short_type##4 Type; \ __host__ __device__ __forceinline__ CubVector operator+(const CubVector &other) const { \ CubVector retval; \ retval.x = x + other.x; \ retval.y = y + other.y; \ retval.z = z + other.z; \ retval.w = w + other.w; \ return retval; \ } \ __host__ __device__ __forceinline__ CubVector operator-(const CubVector &other) const { \ CubVector retval; \ retval.x = x - other.x; \ retval.y = y - other.y; \ retval.z = z - other.z; \ retval.w = w - other.w; \ return retval; \ } \ }; // Expand CUDA vector types for built-in primitives CUB_DEFINE_VECTOR_TYPE(char, char) CUB_DEFINE_VECTOR_TYPE(signed char, char) CUB_DEFINE_VECTOR_TYPE(short, short) CUB_DEFINE_VECTOR_TYPE(int, int) CUB_DEFINE_VECTOR_TYPE(long, long) CUB_DEFINE_VECTOR_TYPE(long long, longlong) CUB_DEFINE_VECTOR_TYPE(unsigned char, uchar) CUB_DEFINE_VECTOR_TYPE(unsigned short, ushort) CUB_DEFINE_VECTOR_TYPE(unsigned int, uint) CUB_DEFINE_VECTOR_TYPE(unsigned long, ulong) CUB_DEFINE_VECTOR_TYPE(unsigned long long, ulonglong) CUB_DEFINE_VECTOR_TYPE(float, float) CUB_DEFINE_VECTOR_TYPE(double, double) CUB_DEFINE_VECTOR_TYPE(bool, uchar) // Undefine macros #undef CUB_DEFINE_VECTOR_TYPE #endif // DOXYGEN_SHOULD_SKIP_THIS /****************************************************************************** * Wrapper types ******************************************************************************/ /** * \brief A storage-backing wrapper that allows types with non-trivial constructors to be aliased in unions */ template struct Uninitialized { /// Biggest memory-access word that T is a whole multiple of and is not larger than the alignment of T typedef typename UnitWord::DeviceWord DeviceWord; enum { WORDS = sizeof(T) / sizeof(DeviceWord) }; /// Backing storage DeviceWord storage[WORDS]; /// Alias __host__ __device__ __forceinline__ T& Alias() { return reinterpret_cast(*this); } }; /** * \brief A key identifier paired with a corresponding value */ template < typename _Key, typename _Value #if defined(_WIN32) && !defined(_WIN64) , bool KeyIsLT = (AlignBytes<_Key>::ALIGN_BYTES < AlignBytes<_Value>::ALIGN_BYTES) , bool ValIsLT = (AlignBytes<_Value>::ALIGN_BYTES < AlignBytes<_Key>::ALIGN_BYTES) #endif // #if defined(_WIN32) && !defined(_WIN64) > struct KeyValuePair { typedef _Key Key; ///< Key data type typedef _Value Value; ///< Value data type Key key; ///< Item key Value value; ///< Item value /// Constructor __host__ __device__ __forceinline__ KeyValuePair() {} /// Constructor __host__ __device__ __forceinline__ KeyValuePair(Key const& key, Value const& value) : key(key), value(value) {} /// Inequality operator __host__ __device__ __forceinline__ bool operator !=(const KeyValuePair &b) { return (value != b.value) || (key != b.key); } }; #if defined(_WIN32) && !defined(_WIN64) /** * Win32 won't do 16B alignment. This can present two problems for * should-be-16B-aligned (but actually 8B aligned) built-in and intrinsics members: * 1) If a smaller-aligned item were to be listed first, the host compiler places the * should-be-16B item at too early an offset (and disagrees with device compiler) * 2) Or, if a smaller-aligned item lists second, the host compiler gets the size * of the struct wrong (and disagrees with device compiler) * * So we put the larger-should-be-aligned item first, and explicitly pad the * end of the struct */ /// Smaller key specialization template struct KeyValuePair { typedef K Key; typedef V Value; typedef char Pad[AlignBytes::ALIGN_BYTES - AlignBytes::ALIGN_BYTES]; Value value; // Value has larger would-be alignment and goes first Key key; Pad pad; /// Constructor __host__ __device__ __forceinline__ KeyValuePair() {} /// Constructor __host__ __device__ __forceinline__ KeyValuePair(Key const& key, Value const& value) : key(key), value(value) {} /// Inequality operator __host__ __device__ __forceinline__ bool operator !=(const KeyValuePair &b) { return (value != b.value) || (key != b.key); } }; /// Smaller value specialization template struct KeyValuePair { typedef K Key; typedef V Value; typedef char Pad[AlignBytes::ALIGN_BYTES - AlignBytes::ALIGN_BYTES]; Key key; // Key has larger would-be alignment and goes first Value value; Pad pad; /// Constructor __host__ __device__ __forceinline__ KeyValuePair() {} /// Constructor __host__ __device__ __forceinline__ KeyValuePair(Key const& key, Value const& value) : key(key), value(value) {} /// Inequality operator __host__ __device__ __forceinline__ bool operator !=(const KeyValuePair &b) { return (value != b.value) || (key != b.key); } }; #endif // #if defined(_WIN32) && !defined(_WIN64) #ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document /** * \brief A wrapper for passing simple static arrays as kernel parameters */ template struct ArrayWrapper { /// Statically-sized array of type \p T T array[COUNT]; /// Constructor __host__ __device__ __forceinline__ ArrayWrapper() {} }; #endif // DOXYGEN_SHOULD_SKIP_THIS /** * \brief Double-buffer storage wrapper for multi-pass stream transformations that require more than one storage array for streaming intermediate results back and forth. * * Many multi-pass computations require a pair of "ping-pong" storage * buffers (e.g., one for reading from and the other for writing to, and then * vice-versa for the subsequent pass). This structure wraps a set of device * buffers and a "selector" member to track which is "current". */ template struct DoubleBuffer { /// Pair of device buffer pointers T *d_buffers[2]; /// Selector into \p d_buffers (i.e., the active/valid buffer) int selector; /// \brief Constructor __host__ __device__ __forceinline__ DoubleBuffer() { selector = 0; d_buffers[0] = NULL; d_buffers[1] = NULL; } /// \brief Constructor __host__ __device__ __forceinline__ DoubleBuffer( T *d_current, ///< The currently valid buffer T *d_alternate) ///< Alternate storage buffer of the same size as \p d_current { selector = 0; d_buffers[0] = d_current; d_buffers[1] = d_alternate; } /// \brief Return pointer to the currently valid buffer __host__ __device__ __forceinline__ T* Current() { return d_buffers[selector]; } /// \brief Return pointer to the currently invalid buffer __host__ __device__ __forceinline__ T* Alternate() { return d_buffers[selector ^ 1]; } }; /****************************************************************************** * Typedef-detection ******************************************************************************/ /** * \brief Defines a structure \p detector_name that is templated on type \p T. The \p detector_name struct exposes a constant member \p VALUE indicating whether or not parameter \p T exposes a nested type \p nested_type_name */ #define CUB_DEFINE_DETECT_NESTED_TYPE(detector_name, nested_type_name) \ template \ struct detector_name \ { \ template \ static char& test(typename C::nested_type_name*); \ template \ static int& test(...); \ enum \ { \ VALUE = sizeof(test(0)) < sizeof(int) \ }; \ }; /****************************************************************************** * Simple enable-if (similar to Boost) ******************************************************************************/ /** * \brief Simple enable-if (similar to Boost) */ template struct EnableIf { /// Enable-if type for SFINAE dummy variables typedef T Type; }; template struct EnableIf {}; /****************************************************************************** * Typedef-detection ******************************************************************************/ /** * \brief Determine whether or not BinaryOp's functor is of the form bool operator()(const T& a, const T&b) or bool operator()(const T& a, const T&b, unsigned int idx) */ template struct BinaryOpHasIdxParam { private: /* template struct SFINAE1 {}; template struct SFINAE2 {}; template struct SFINAE3 {}; template struct SFINAE4 {}; */ template struct SFINAE5 {}; template struct SFINAE6 {}; template struct SFINAE7 {}; template struct SFINAE8 {}; /* template static char Test(SFINAE1 *); template static char Test(SFINAE2 *); template static char Test(SFINAE3 *); template static char Test(SFINAE4 *); */ template static char Test(SFINAE5 *); template static char Test(SFINAE6 *); template static char Test(SFINAE7 *); template static char Test(SFINAE8 *); template static int Test(...); public: /// Whether the functor BinaryOp has a third unsigned int index param static const bool HAS_PARAM = sizeof(Test(NULL)) == sizeof(char); }; /****************************************************************************** * Simple type traits utilities. * * For example: * Traits::CATEGORY // SIGNED_INTEGER * Traits::NULL_TYPE // true * Traits::CATEGORY // NOT_A_NUMBER * Traits::PRIMITIVE; // false * ******************************************************************************/ /** * \brief Basic type traits categories */ enum Category { NOT_A_NUMBER, SIGNED_INTEGER, UNSIGNED_INTEGER, FLOATING_POINT }; /** * \brief Basic type traits */ template struct BaseTraits { /// Category static const Category CATEGORY = _CATEGORY; enum { PRIMITIVE = _PRIMITIVE, NULL_TYPE = _NULL_TYPE, }; }; /** * Basic type traits (unsigned primitive specialization) */ template struct BaseTraits { typedef _UnsignedBits UnsignedBits; static const Category CATEGORY = UNSIGNED_INTEGER; static const UnsignedBits LOWEST_KEY = UnsignedBits(0); static const UnsignedBits MAX_KEY = UnsignedBits(-1); enum { PRIMITIVE = true, NULL_TYPE = false, }; static __device__ __forceinline__ UnsignedBits TwiddleIn(UnsignedBits key) { return key; } static __device__ __forceinline__ UnsignedBits TwiddleOut(UnsignedBits key) { return key; } static __host__ __device__ __forceinline__ T Max() { UnsignedBits retval = MAX_KEY; return reinterpret_cast(retval); } static __host__ __device__ __forceinline__ T Lowest() { UnsignedBits retval = LOWEST_KEY; return reinterpret_cast(retval); } }; /** * Basic type traits (signed primitive specialization) */ template struct BaseTraits { typedef _UnsignedBits UnsignedBits; static const Category CATEGORY = SIGNED_INTEGER; static const UnsignedBits HIGH_BIT = UnsignedBits(1) << ((sizeof(UnsignedBits) * 8) - 1); static const UnsignedBits LOWEST_KEY = HIGH_BIT; static const UnsignedBits MAX_KEY = UnsignedBits(-1) ^ HIGH_BIT; enum { PRIMITIVE = true, NULL_TYPE = false, }; static __device__ __forceinline__ UnsignedBits TwiddleIn(UnsignedBits key) { return key ^ HIGH_BIT; }; static __device__ __forceinline__ UnsignedBits TwiddleOut(UnsignedBits key) { return key ^ HIGH_BIT; }; static __host__ __device__ __forceinline__ T Max() { UnsignedBits retval = MAX_KEY; return reinterpret_cast(retval); } static __host__ __device__ __forceinline__ T Lowest() { UnsignedBits retval = LOWEST_KEY; return reinterpret_cast(retval); } }; template struct FpLimits; template <> struct FpLimits { static __host__ __device__ __forceinline__ float Max() { return FLT_MAX; } static __host__ __device__ __forceinline__ float Lowest() { return FLT_MAX * float(-1); } }; template <> struct FpLimits { static __host__ __device__ __forceinline__ double Max() { return DBL_MAX; } static __host__ __device__ __forceinline__ double Lowest() { return DBL_MAX * double(-1); } }; /** * Basic type traits (fp primitive specialization) */ template struct BaseTraits { typedef _UnsignedBits UnsignedBits; static const Category CATEGORY = FLOATING_POINT; static const UnsignedBits HIGH_BIT = UnsignedBits(1) << ((sizeof(UnsignedBits) * 8) - 1); static const UnsignedBits LOWEST_KEY = UnsignedBits(-1); static const UnsignedBits MAX_KEY = UnsignedBits(-1) ^ HIGH_BIT; enum { PRIMITIVE = true, NULL_TYPE = false, }; static __device__ __forceinline__ UnsignedBits TwiddleIn(UnsignedBits key) { UnsignedBits mask = (key & HIGH_BIT) ? UnsignedBits(-1) : HIGH_BIT; return key ^ mask; }; static __device__ __forceinline__ UnsignedBits TwiddleOut(UnsignedBits key) { UnsignedBits mask = (key & HIGH_BIT) ? HIGH_BIT : UnsignedBits(-1); return key ^ mask; }; static __host__ __device__ __forceinline__ T Max() { return FpLimits::Max(); } static __host__ __device__ __forceinline__ T Lowest() { return FpLimits::Lowest(); } }; /** * \brief Numeric type traits */ template struct NumericTraits : BaseTraits {}; template <> struct NumericTraits : BaseTraits {}; template <> struct NumericTraits : BaseTraits<(std::numeric_limits::is_signed) ? SIGNED_INTEGER : UNSIGNED_INTEGER, true, false, unsigned char, char> {}; template <> struct NumericTraits : BaseTraits {}; template <> struct NumericTraits : BaseTraits {}; template <> struct NumericTraits : BaseTraits {}; template <> struct NumericTraits : BaseTraits {}; template <> struct NumericTraits : BaseTraits {}; template <> struct NumericTraits : BaseTraits {}; template <> struct NumericTraits : BaseTraits {}; template <> struct NumericTraits : BaseTraits {}; template <> struct NumericTraits : BaseTraits {}; template <> struct NumericTraits : BaseTraits {}; template <> struct NumericTraits : BaseTraits {}; template <> struct NumericTraits : BaseTraits {}; template <> struct NumericTraits : BaseTraits::VolatileWord, bool> {}; /** * \brief Type traits */ template struct Traits : NumericTraits::Type> {}; #endif // DOXYGEN_SHOULD_SKIP_THIS /** @} */ // end group UtilModule } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/gpu_utils/cub/warp/000077500000000000000000000000001411340063500173275ustar00rootroot00000000000000relion-3.1.3/src/gpu_utils/cub/warp/specializations/000077500000000000000000000000001411340063500225305ustar00rootroot00000000000000relion-3.1.3/src/gpu_utils/cub/warp/specializations/warp_reduce_shfl.cuh000066400000000000000000000515621411340063500265560ustar00rootroot00000000000000/****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * cub::WarpReduceShfl provides SHFL-based variants of parallel reduction of items partitioned across a CUDA thread warp. */ #pragma once #include "../../thread/thread_operators.cuh" #include "../../util_ptx.cuh" #include "../../util_type.cuh" #include "../../util_macro.cuh" #include "../../util_namespace.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /** * \brief WarpReduceShfl provides SHFL-based variants of parallel reduction of items partitioned across a CUDA thread warp. * * LOGICAL_WARP_THREADS must be a power-of-two */ template < typename T, ///< Data type being reduced int LOGICAL_WARP_THREADS, ///< Number of threads per logical warp int PTX_ARCH> ///< The PTX compute capability for which to to specialize this collective struct WarpReduceShfl { //--------------------------------------------------------------------- // Constants and type definitions //--------------------------------------------------------------------- enum { /// Whether the logical warp size and the PTX warp size coincide IS_ARCH_WARP = (LOGICAL_WARP_THREADS == CUB_WARP_THREADS(PTX_ARCH)), /// The number of warp reduction steps STEPS = Log2::VALUE, /// Number of logical warps in a PTX warp LOGICAL_WARPS = CUB_WARP_THREADS(PTX_ARCH) / LOGICAL_WARP_THREADS, }; template struct IsInteger { enum { ///Whether the data type is a small (32b or less) integer for which we can use a single SFHL instruction per exchange IS_SMALL_UNSIGNED = (Traits::CATEGORY == UNSIGNED_INTEGER) && (sizeof(S) <= sizeof(unsigned int)) }; }; // Creates a mask where the last thread in each logical warp is set template struct LastLaneMask { enum { BASE_MASK = 1 << (LOGICAL_WARP_THREADS - 1), MASK = (LastLaneMask::MASK << LOGICAL_WARP_THREADS) | BASE_MASK, }; }; // Creates a mask where the last thread in each logical warp is set template struct LastLaneMask { enum { MASK = 1 << (LOGICAL_WARP_THREADS - 1), }; }; /// Shared memory storage layout type typedef NullType TempStorage; //--------------------------------------------------------------------- // Thread fields //--------------------------------------------------------------------- int lane_id; int member_mask; //--------------------------------------------------------------------- // Construction //--------------------------------------------------------------------- /// Constructor __device__ __forceinline__ WarpReduceShfl( TempStorage &/*temp_storage*/) : lane_id(LaneId()), member_mask(IS_ARCH_WARP ? 0xffffffff : (0xffffffff >> (32 - LOGICAL_WARP_THREADS)) << (LaneId() / LOGICAL_WARP_THREADS)) {} //--------------------------------------------------------------------- // Reduction steps //--------------------------------------------------------------------- /// Reduction (specialized for summation across uint32 types) __device__ __forceinline__ unsigned int ReduceStep( unsigned int input, ///< [in] Calling thread's input item. cub::Sum /*reduction_op*/, ///< [in] Binary reduction operator int last_lane, ///< [in] Index of last lane in segment int offset) ///< [in] Up-offset to pull from { unsigned int output; // Use predicate set from SHFL to guard against invalid peers #ifdef CUB_USE_COOPERATIVE_GROUPS asm volatile( "{" " .reg .u32 r0;" " .reg .pred p;" " shfl.sync.down.b32 r0|p, %1, %2, %3, %5;" " @p add.u32 r0, r0, %4;" " mov.u32 %0, r0;" "}" : "=r"(output) : "r"(input), "r"(offset), "r"(last_lane), "r"(input), "r"(member_mask)); #else asm volatile( "{" " .reg .u32 r0;" " .reg .pred p;" " shfl.down.b32 r0|p, %1, %2, %3;" " @p add.u32 r0, r0, %4;" " mov.u32 %0, r0;" "}" : "=r"(output) : "r"(input), "r"(offset), "r"(last_lane), "r"(input)); #endif return output; } /// Reduction (specialized for summation across fp32 types) __device__ __forceinline__ float ReduceStep( float input, ///< [in] Calling thread's input item. cub::Sum /*reduction_op*/, ///< [in] Binary reduction operator int last_lane, ///< [in] Index of last lane in segment int offset) ///< [in] Up-offset to pull from { float output; // Use predicate set from SHFL to guard against invalid peers #ifdef CUB_USE_COOPERATIVE_GROUPS asm volatile( "{" " .reg .f32 r0;" " .reg .pred p;" " shfl.sync.down.b32 r0|p, %1, %2, %3, %5;" " @p add.f32 r0, r0, %4;" " mov.f32 %0, r0;" "}" : "=f"(output) : "f"(input), "r"(offset), "r"(last_lane), "f"(input), "r"(member_mask)); #else asm volatile( "{" " .reg .f32 r0;" " .reg .pred p;" " shfl.down.b32 r0|p, %1, %2, %3;" " @p add.f32 r0, r0, %4;" " mov.f32 %0, r0;" "}" : "=f"(output) : "f"(input), "r"(offset), "r"(last_lane), "f"(input)); #endif return output; } /// Reduction (specialized for summation across unsigned long long types) __device__ __forceinline__ unsigned long long ReduceStep( unsigned long long input, ///< [in] Calling thread's input item. cub::Sum /*reduction_op*/, ///< [in] Binary reduction operator int last_lane, ///< [in] Index of last lane in segment int offset) ///< [in] Up-offset to pull from { unsigned long long output; #ifdef CUB_USE_COOPERATIVE_GROUPS asm volatile( "{" " .reg .u32 lo;" " .reg .u32 hi;" " .reg .pred p;" " mov.b64 {lo, hi}, %1;" " shfl.sync.down.b32 lo|p, lo, %2, %3, %4;" " shfl.sync.down.b32 hi|p, hi, %2, %3, %4;" " mov.b64 %0, {lo, hi};" " @p add.u64 %0, %0, %1;" "}" : "=l"(output) : "l"(input), "r"(offset), "r"(last_lane), "r"(member_mask)); #else asm volatile( "{" " .reg .u32 lo;" " .reg .u32 hi;" " .reg .pred p;" " mov.b64 {lo, hi}, %1;" " shfl.down.b32 lo|p, lo, %2, %3;" " shfl.down.b32 hi|p, hi, %2, %3;" " mov.b64 %0, {lo, hi};" " @p add.u64 %0, %0, %1;" "}" : "=l"(output) : "l"(input), "r"(offset), "r"(last_lane)); #endif return output; } /// Reduction (specialized for summation across long long types) __device__ __forceinline__ long long ReduceStep( long long input, ///< [in] Calling thread's input item. cub::Sum /*reduction_op*/, ///< [in] Binary reduction operator int last_lane, ///< [in] Index of last lane in segment int offset) ///< [in] Up-offset to pull from { long long output; // Use predicate set from SHFL to guard against invalid peers #ifdef CUB_USE_COOPERATIVE_GROUPS asm volatile( "{" " .reg .u32 lo;" " .reg .u32 hi;" " .reg .pred p;" " mov.b64 {lo, hi}, %1;" " shfl.sync.down.b32 lo|p, lo, %2, %3, %4;" " shfl.sync.down.b32 hi|p, hi, %2, %3, %4;" " mov.b64 %0, {lo, hi};" " @p add.s64 %0, %0, %1;" "}" : "=l"(output) : "l"(input), "r"(offset), "r"(last_lane), "r"(member_mask)); #else asm volatile( "{" " .reg .u32 lo;" " .reg .u32 hi;" " .reg .pred p;" " mov.b64 {lo, hi}, %1;" " shfl.down.b32 lo|p, lo, %2, %3;" " shfl.down.b32 hi|p, hi, %2, %3;" " mov.b64 %0, {lo, hi};" " @p add.s64 %0, %0, %1;" "}" : "=l"(output) : "l"(input), "r"(offset), "r"(last_lane)); #endif return output; } /// Reduction (specialized for summation across double types) __device__ __forceinline__ double ReduceStep( double input, ///< [in] Calling thread's input item. cub::Sum /*reduction_op*/, ///< [in] Binary reduction operator int last_lane, ///< [in] Index of last lane in segment int offset) ///< [in] Up-offset to pull from { double output; // Use predicate set from SHFL to guard against invalid peers #ifdef CUB_USE_COOPERATIVE_GROUPS asm volatile( "{" " .reg .u32 lo;" " .reg .u32 hi;" " .reg .pred p;" " .reg .f64 r0;" " mov.b64 %0, %1;" " mov.b64 {lo, hi}, %1;" " shfl.sync.down.b32 lo|p, lo, %2, %3, %4;" " shfl.sync.down.b32 hi|p, hi, %2, %3, %4;" " mov.b64 r0, {lo, hi};" " @p add.f64 %0, %0, r0;" "}" : "=d"(output) : "d"(input), "r"(offset), "r"(last_lane), "r"(member_mask)); #else asm volatile( "{" " .reg .u32 lo;" " .reg .u32 hi;" " .reg .pred p;" " .reg .f64 r0;" " mov.b64 %0, %1;" " mov.b64 {lo, hi}, %1;" " shfl.down.b32 lo|p, lo, %2, %3;" " shfl.down.b32 hi|p, hi, %2, %3;" " mov.b64 r0, {lo, hi};" " @p add.f64 %0, %0, r0;" "}" : "=d"(output) : "d"(input), "r"(offset), "r"(last_lane)); #endif return output; } /// Reduction (specialized for swizzled ReduceByKeyOp across KeyValuePair types) template __device__ __forceinline__ KeyValuePair ReduceStep( KeyValuePair input, ///< [in] Calling thread's input item. SwizzleScanOp > /*reduction_op*/, ///< [in] Binary reduction operator int last_lane, ///< [in] Index of last lane in segment int offset) ///< [in] Up-offset to pull from { KeyValuePair output; KeyT other_key = ShuffleDown(input.key, offset, last_lane, member_mask); output.key = input.key; output.value = ReduceStep( input.value, cub::Sum(), last_lane, offset, Int2Type::IS_SMALL_UNSIGNED>()); if (input.key != other_key) output.value = input.value; return output; } /// Reduction (specialized for swizzled ReduceBySegmentOp across KeyValuePair types) template __device__ __forceinline__ KeyValuePair ReduceStep( KeyValuePair input, ///< [in] Calling thread's input item. SwizzleScanOp > /*reduction_op*/, ///< [in] Binary reduction operator int last_lane, ///< [in] Index of last lane in segment int offset) ///< [in] Up-offset to pull from { KeyValuePair output; output.value = ReduceStep(input.value, cub::Sum(), last_lane, offset, Int2Type::IS_SMALL_UNSIGNED>()); output.key = ReduceStep(input.key, cub::Sum(), last_lane, offset, Int2Type::IS_SMALL_UNSIGNED>()); if (input.key > 0) output.value = input.value; return output; } /// Reduction step (generic) template __device__ __forceinline__ _T ReduceStep( _T input, ///< [in] Calling thread's input item. ReductionOp reduction_op, ///< [in] Binary reduction operator int last_lane, ///< [in] Index of last lane in segment int offset) ///< [in] Up-offset to pull from { _T output = input; _T temp = ShuffleDown(output, offset, last_lane, member_mask); // Perform reduction op if valid if (offset + lane_id <= last_lane) output = reduction_op(input, temp); return output; } /// Reduction step (specialized for small unsigned integers size 32b or less) template __device__ __forceinline__ _T ReduceStep( _T input, ///< [in] Calling thread's input item. ReductionOp reduction_op, ///< [in] Binary reduction operator int last_lane, ///< [in] Index of last lane in segment int offset, ///< [in] Up-offset to pull from Int2Type /*is_small_unsigned*/) ///< [in] Marker type indicating whether T is a small unsigned integer { return ReduceStep(input, reduction_op, last_lane, offset); } /// Reduction step (specialized for types other than small unsigned integers size 32b or less) template __device__ __forceinline__ _T ReduceStep( _T input, ///< [in] Calling thread's input item. ReductionOp reduction_op, ///< [in] Binary reduction operator int last_lane, ///< [in] Index of last lane in segment int offset, ///< [in] Up-offset to pull from Int2Type /*is_small_unsigned*/) ///< [in] Marker type indicating whether T is a small unsigned integer { return ReduceStep(input, reduction_op, last_lane, offset); } //--------------------------------------------------------------------- // Templated inclusive scan iteration //--------------------------------------------------------------------- template __device__ __forceinline__ void ReduceStep( T& input, ///< [in] Calling thread's input item. ReductionOp reduction_op, ///< [in] Binary reduction operator int last_lane, ///< [in] Index of last lane in segment Int2Type /*step*/) { input = ReduceStep(input, reduction_op, last_lane, 1 << STEP, Int2Type::IS_SMALL_UNSIGNED>()); ReduceStep(input, reduction_op, last_lane, Int2Type()); } template __device__ __forceinline__ void ReduceStep( T& /*input*/, ///< [in] Calling thread's input item. ReductionOp /*reduction_op*/, ///< [in] Binary reduction operator int /*last_lane*/, ///< [in] Index of last lane in segment Int2Type /*step*/) {} //--------------------------------------------------------------------- // Reduction operations //--------------------------------------------------------------------- /// Reduction template < bool ALL_LANES_VALID, ///< Whether all lanes in each warp are contributing a valid fold of items int FOLDED_ITEMS_PER_LANE, ///< Number of items folded into each lane typename ReductionOp> __device__ __forceinline__ T Reduce( T input, ///< [in] Calling thread's input int folded_items_per_warp, ///< [in] Total number of valid items folded into each logical warp ReductionOp reduction_op) ///< [in] Binary reduction operator { // Get the last thread in the logical warp int first_warp_thread = 0; int last_warp_thread = LOGICAL_WARP_THREADS - 1; if (!IS_ARCH_WARP) { first_warp_thread = lane_id & (~(LOGICAL_WARP_THREADS - 1)); last_warp_thread |= lane_id; } // Common case is FOLDED_ITEMS_PER_LANE = 1 (or a multiple of 32) int lanes_with_valid_data = (folded_items_per_warp - 1) / FOLDED_ITEMS_PER_LANE; // Get the last valid lane int last_lane = (ALL_LANES_VALID) ? last_warp_thread : CUB_MIN(last_warp_thread, first_warp_thread + lanes_with_valid_data); T output = input; // // Iterate reduction steps // #pragma unroll // for (int STEP = 0; STEP < STEPS; STEP++) // { // output = ReduceStep(output, reduction_op, last_lane, 1 << STEP, Int2Type::IS_SMALL_UNSIGNED>()); // } // Template-iterate reduction steps ReduceStep(output, reduction_op, last_lane, Int2Type<0>()); return output; } /// Segmented reduction template < bool HEAD_SEGMENTED, ///< Whether flags indicate a segment-head or a segment-tail typename FlagT, typename ReductionOp> __device__ __forceinline__ T SegmentedReduce( T input, ///< [in] Calling thread's input FlagT flag, ///< [in] Whether or not the current lane is a segment head/tail ReductionOp reduction_op) ///< [in] Binary reduction operator { // Get the start flags for each thread in the warp. int warp_flags = WARP_BALLOT(flag, member_mask); if (HEAD_SEGMENTED) warp_flags >>= 1; // Mask in the last lanes of each logical warp warp_flags |= LastLaneMask<1, LOGICAL_WARPS>::MASK; // Mask out the bits below the current thread warp_flags &= LaneMaskGe(); // Find the next set flag int last_lane = __clz(__brev(warp_flags)); T output = input; // // Iterate reduction steps // #pragma unroll // for (int STEP = 0; STEP < STEPS; STEP++) // { // output = ReduceStep(output, reduction_op, last_lane, 1 << STEP, Int2Type::IS_SMALL_UNSIGNED>()); // } // Template-iterate reduction steps ReduceStep(output, reduction_op, last_lane, Int2Type<0>()); return output; } }; } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/gpu_utils/cub/warp/specializations/warp_reduce_smem.cuh000066400000000000000000000345351411340063500265640ustar00rootroot00000000000000/****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * cub::WarpReduceSmem provides smem-based variants of parallel reduction of items partitioned across a CUDA thread warp. */ #pragma once #include "../../thread/thread_operators.cuh" #include "../../thread/thread_load.cuh" #include "../../thread/thread_store.cuh" #include "../../util_type.cuh" #include "../../util_namespace.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /** * \brief WarpReduceSmem provides smem-based variants of parallel reduction of items partitioned across a CUDA thread warp. */ template < typename T, ///< Data type being reduced int LOGICAL_WARP_THREADS, ///< Number of threads per logical warp int PTX_ARCH> ///< The PTX compute capability for which to to specialize this collective struct WarpReduceSmem { /****************************************************************************** * Constants and type definitions ******************************************************************************/ enum { /// Whether the logical warp size and the PTX warp size coincide IS_ARCH_WARP = (LOGICAL_WARP_THREADS == CUB_WARP_THREADS(PTX_ARCH)), /// Whether the logical warp size is a power-of-two IS_POW_OF_TWO = PowerOfTwo::VALUE, /// The number of warp scan steps STEPS = Log2::VALUE, /// The number of threads in half a warp HALF_WARP_THREADS = 1 << (STEPS - 1), /// The number of shared memory elements per warp WARP_SMEM_ELEMENTS = LOGICAL_WARP_THREADS + HALF_WARP_THREADS, /// FlagT status (when not using ballot) UNSET = 0x0, // Is initially unset SET = 0x1, // Is initially set SEEN = 0x2, // Has seen another head flag from a successor peer }; /// Shared memory flag type typedef unsigned char SmemFlag; /// Shared memory storage layout type (1.5 warps-worth of elements for each warp) struct _TempStorage { T reduce[WARP_SMEM_ELEMENTS]; SmemFlag flags[WARP_SMEM_ELEMENTS]; }; // Alias wrapper allowing storage to be unioned struct TempStorage : Uninitialized<_TempStorage> {}; /****************************************************************************** * Thread fields ******************************************************************************/ _TempStorage &temp_storage; unsigned int lane_id; unsigned int member_mask; /****************************************************************************** * Construction ******************************************************************************/ /// Constructor __device__ __forceinline__ WarpReduceSmem( TempStorage &temp_storage) : temp_storage(temp_storage.Alias()), lane_id(IS_ARCH_WARP ? LaneId() : LaneId() % LOGICAL_WARP_THREADS), member_mask(!IS_POW_OF_TWO ? (0xffffffff >> (32 - LOGICAL_WARP_THREADS)) : // non-power-of-two subwarps cannot be tiled (0xffffffff >> (32 - LOGICAL_WARP_THREADS)) << (LaneId() / LOGICAL_WARP_THREADS)) {} /****************************************************************************** * Utility methods ******************************************************************************/ //--------------------------------------------------------------------- // Regular reduction //--------------------------------------------------------------------- /** * Reduction step */ template < bool ALL_LANES_VALID, ///< Whether all lanes in each warp are contributing a valid fold of items int FOLDED_ITEMS_PER_LANE, ///< Number of items folded into each lane typename ReductionOp, int STEP> __device__ __forceinline__ T ReduceStep( T input, ///< [in] Calling thread's input int folded_items_per_warp, ///< [in] Total number of valid items folded into each logical warp ReductionOp reduction_op, ///< [in] Reduction operator Int2Type /*step*/) { const int OFFSET = 1 << STEP; // Share input through buffer ThreadStore(&temp_storage.reduce[lane_id], input); WARP_SYNC(member_mask); // Update input if peer_addend is in range if ((ALL_LANES_VALID && IS_POW_OF_TWO) || ((lane_id + OFFSET) * FOLDED_ITEMS_PER_LANE < folded_items_per_warp)) { T peer_addend = ThreadLoad(&temp_storage.reduce[lane_id + OFFSET]); input = reduction_op(input, peer_addend); } WARP_SYNC(member_mask); return ReduceStep(input, folded_items_per_warp, reduction_op, Int2Type()); } /** * Reduction step (terminate) */ template < bool ALL_LANES_VALID, ///< Whether all lanes in each warp are contributing a valid fold of items int FOLDED_ITEMS_PER_LANE, ///< Number of items folded into each lane typename ReductionOp> __device__ __forceinline__ T ReduceStep( T input, ///< [in] Calling thread's input int /*folded_items_per_warp*/, ///< [in] Total number of valid items folded into each logical warp ReductionOp /*reduction_op*/, ///< [in] Reduction operator Int2Type /*step*/) { return input; } //--------------------------------------------------------------------- // Segmented reduction //--------------------------------------------------------------------- /** * Ballot-based segmented reduce */ template < bool HEAD_SEGMENTED, ///< Whether flags indicate a segment-head or a segment-tail typename FlagT, typename ReductionOp> __device__ __forceinline__ T SegmentedReduce( T input, ///< [in] Calling thread's input FlagT flag, ///< [in] Whether or not the current lane is a segment head/tail ReductionOp reduction_op, ///< [in] Reduction operator Int2Type /*has_ballot*/) ///< [in] Marker type for whether the target arch has ballot functionality { // Get the start flags for each thread in the warp. int warp_flags = WARP_BALLOT(flag, member_mask); if (!HEAD_SEGMENTED) warp_flags <<= 1; // Keep bits above the current thread. warp_flags &= LaneMaskGt(); // Accommodate packing of multiple logical warps in a single physical warp if (!IS_ARCH_WARP) { warp_flags >>= (LaneId() / LOGICAL_WARP_THREADS) * LOGICAL_WARP_THREADS; } // Find next flag int next_flag = __clz(__brev(warp_flags)); // Clip the next segment at the warp boundary if necessary if (LOGICAL_WARP_THREADS != 32) next_flag = CUB_MIN(next_flag, LOGICAL_WARP_THREADS); #pragma unroll for (int STEP = 0; STEP < STEPS; STEP++) { const int OFFSET = 1 << STEP; // Share input into buffer ThreadStore(&temp_storage.reduce[lane_id], input); WARP_SYNC(member_mask); // Update input if peer_addend is in range if (OFFSET + lane_id < next_flag) { T peer_addend = ThreadLoad(&temp_storage.reduce[lane_id + OFFSET]); input = reduction_op(input, peer_addend); } WARP_SYNC(member_mask); } return input; } /** * Smem-based segmented reduce */ template < bool HEAD_SEGMENTED, ///< Whether flags indicate a segment-head or a segment-tail typename FlagT, typename ReductionOp> __device__ __forceinline__ T SegmentedReduce( T input, ///< [in] Calling thread's input FlagT flag, ///< [in] Whether or not the current lane is a segment head/tail ReductionOp reduction_op, ///< [in] Reduction operator Int2Type /*has_ballot*/) ///< [in] Marker type for whether the target arch has ballot functionality { enum { UNSET = 0x0, // Is initially unset SET = 0x1, // Is initially set SEEN = 0x2, // Has seen another head flag from a successor peer }; // Alias flags onto shared data storage volatile SmemFlag *flag_storage = temp_storage.flags; SmemFlag flag_status = (flag) ? SET : UNSET; for (int STEP = 0; STEP < STEPS; STEP++) { const int OFFSET = 1 << STEP; // Share input through buffer ThreadStore(&temp_storage.reduce[lane_id], input); WARP_SYNC(member_mask); // Get peer from buffer T peer_addend = ThreadLoad(&temp_storage.reduce[lane_id + OFFSET]); WARP_SYNC(member_mask); // Share flag through buffer flag_storage[lane_id] = flag_status; // Get peer flag from buffer SmemFlag peer_flag_status = flag_storage[lane_id + OFFSET]; // Update input if peer was in range if (lane_id < LOGICAL_WARP_THREADS - OFFSET) { if (HEAD_SEGMENTED) { // Head-segmented if ((flag_status & SEEN) == 0) { // Has not seen a more distant head flag if (peer_flag_status & SET) { // Has now seen a head flag flag_status |= SEEN; } else { // Peer is not a head flag: grab its count input = reduction_op(input, peer_addend); } // Update seen status to include that of peer flag_status |= (peer_flag_status & SEEN); } } else { // Tail-segmented. Simply propagate flag status if (!flag_status) { input = reduction_op(input, peer_addend); flag_status |= peer_flag_status; } } } } return input; } /****************************************************************************** * Interface ******************************************************************************/ /** * Reduction */ template < bool ALL_LANES_VALID, ///< Whether all lanes in each warp are contributing a valid fold of items int FOLDED_ITEMS_PER_LANE, ///< Number of items folded into each lane typename ReductionOp> __device__ __forceinline__ T Reduce( T input, ///< [in] Calling thread's input int folded_items_per_warp, ///< [in] Total number of valid items folded into each logical warp ReductionOp reduction_op) ///< [in] Reduction operator { return ReduceStep(input, folded_items_per_warp, reduction_op, Int2Type<0>()); } /** * Segmented reduction */ template < bool HEAD_SEGMENTED, ///< Whether flags indicate a segment-head or a segment-tail typename FlagT, typename ReductionOp> __device__ __forceinline__ T SegmentedReduce( T input, ///< [in] Calling thread's input FlagT flag, ///< [in] Whether or not the current lane is a segment head/tail ReductionOp reduction_op) ///< [in] Reduction operator { return SegmentedReduce(input, flag, reduction_op, Int2Type<(PTX_ARCH >= 200)>()); } }; } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/gpu_utils/cub/warp/specializations/warp_scan_shfl.cuh000066400000000000000000000634331411340063500262330ustar00rootroot00000000000000/****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * cub::WarpScanShfl provides SHFL-based variants of parallel prefix scan of items partitioned across a CUDA thread warp. */ #pragma once #include "../../thread/thread_operators.cuh" #include "../../util_type.cuh" #include "../../util_ptx.cuh" #include "../../util_namespace.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /** * \brief WarpScanShfl provides SHFL-based variants of parallel prefix scan of items partitioned across a CUDA thread warp. */ template < typename T, ///< Data type being scanned int LOGICAL_WARP_THREADS, ///< Number of threads per logical warp int PTX_ARCH> ///< The PTX compute capability for which to to specialize this collective struct WarpScanShfl { //--------------------------------------------------------------------- // Constants and type definitions //--------------------------------------------------------------------- enum { /// Whether the logical warp size and the PTX warp size coincide IS_ARCH_WARP = (LOGICAL_WARP_THREADS == CUB_WARP_THREADS(PTX_ARCH)), /// The number of warp scan steps STEPS = Log2::VALUE, /// The 5-bit SHFL mask for logically splitting warps into sub-segments starts 8-bits up SHFL_C = ((0xFFFFFFFFU << STEPS) & 31) << 8, }; template struct IntegerTraits { enum { ///Whether the data type is a small (32b or less) integer for which we can use a single SFHL instruction per exchange IS_SMALL_UNSIGNED = (Traits::CATEGORY == UNSIGNED_INTEGER) && (sizeof(S) <= sizeof(unsigned int)) }; }; /// Shared memory storage layout type struct TempStorage {}; //--------------------------------------------------------------------- // Thread fields //--------------------------------------------------------------------- unsigned int lane_id; unsigned int member_mask; //--------------------------------------------------------------------- // Construction //--------------------------------------------------------------------- /// Constructor __device__ __forceinline__ WarpScanShfl( TempStorage &/*temp_storage*/) : lane_id(IS_ARCH_WARP ? LaneId() : LaneId() % LOGICAL_WARP_THREADS), member_mask(IS_ARCH_WARP ? 0xffffffff : (0xffffffff >> (32 - LOGICAL_WARP_THREADS)) << (LaneId() / LOGICAL_WARP_THREADS)) {} //--------------------------------------------------------------------- // Inclusive scan steps //--------------------------------------------------------------------- /// Inclusive prefix scan step (specialized for summation across int32 types) __device__ __forceinline__ int InclusiveScanStep( int input, ///< [in] Calling thread's input item. cub::Sum /*scan_op*/, ///< [in] Binary scan operator int first_lane, ///< [in] Index of first lane in segment int offset) ///< [in] Up-offset to pull from { int output; int shfl_c = first_lane | SHFL_C; // Shuffle control (mask and first-lane) // Use predicate set from SHFL to guard against invalid peers #ifdef CUB_USE_COOPERATIVE_GROUPS asm volatile( "{" " .reg .s32 r0;" " .reg .pred p;" " shfl.sync.up.b32 r0|p, %1, %2, %3, %5;" " @p add.s32 r0, r0, %4;" " mov.s32 %0, r0;" "}" : "=r"(output) : "r"(input), "r"(offset), "r"(shfl_c), "r"(input), "r"(member_mask)); #else asm volatile( "{" " .reg .s32 r0;" " .reg .pred p;" " shfl.up.b32 r0|p, %1, %2, %3;" " @p add.s32 r0, r0, %4;" " mov.s32 %0, r0;" "}" : "=r"(output) : "r"(input), "r"(offset), "r"(shfl_c), "r"(input)); #endif return output; } /// Inclusive prefix scan step (specialized for summation across uint32 types) __device__ __forceinline__ unsigned int InclusiveScanStep( unsigned int input, ///< [in] Calling thread's input item. cub::Sum /*scan_op*/, ///< [in] Binary scan operator int first_lane, ///< [in] Index of first lane in segment int offset) ///< [in] Up-offset to pull from { unsigned int output; int shfl_c = first_lane | SHFL_C; // Shuffle control (mask and first-lane) // Use predicate set from SHFL to guard against invalid peers #ifdef CUB_USE_COOPERATIVE_GROUPS asm volatile( "{" " .reg .u32 r0;" " .reg .pred p;" " shfl.sync.up.b32 r0|p, %1, %2, %3, %5;" " @p add.u32 r0, r0, %4;" " mov.u32 %0, r0;" "}" : "=r"(output) : "r"(input), "r"(offset), "r"(shfl_c), "r"(input), "r"(member_mask)); #else asm volatile( "{" " .reg .u32 r0;" " .reg .pred p;" " shfl.up.b32 r0|p, %1, %2, %3;" " @p add.u32 r0, r0, %4;" " mov.u32 %0, r0;" "}" : "=r"(output) : "r"(input), "r"(offset), "r"(shfl_c), "r"(input)); #endif return output; } /// Inclusive prefix scan step (specialized for summation across fp32 types) __device__ __forceinline__ float InclusiveScanStep( float input, ///< [in] Calling thread's input item. cub::Sum /*scan_op*/, ///< [in] Binary scan operator int first_lane, ///< [in] Index of first lane in segment int offset) ///< [in] Up-offset to pull from { float output; int shfl_c = first_lane | SHFL_C; // Shuffle control (mask and first-lane) // Use predicate set from SHFL to guard against invalid peers #ifdef CUB_USE_COOPERATIVE_GROUPS asm volatile( "{" " .reg .f32 r0;" " .reg .pred p;" " shfl.sync.up.b32 r0|p, %1, %2, %3, %5;" " @p add.f32 r0, r0, %4;" " mov.f32 %0, r0;" "}" : "=f"(output) : "f"(input), "r"(offset), "r"(shfl_c), "f"(input), "r"(member_mask)); #else asm volatile( "{" " .reg .f32 r0;" " .reg .pred p;" " shfl.up.b32 r0|p, %1, %2, %3;" " @p add.f32 r0, r0, %4;" " mov.f32 %0, r0;" "}" : "=f"(output) : "f"(input), "r"(offset), "r"(shfl_c), "f"(input)); #endif return output; } /// Inclusive prefix scan step (specialized for summation across unsigned long long types) __device__ __forceinline__ unsigned long long InclusiveScanStep( unsigned long long input, ///< [in] Calling thread's input item. cub::Sum /*scan_op*/, ///< [in] Binary scan operator int first_lane, ///< [in] Index of first lane in segment int offset) ///< [in] Up-offset to pull from { unsigned long long output; int shfl_c = first_lane | SHFL_C; // Shuffle control (mask and first-lane) // Use predicate set from SHFL to guard against invalid peers #ifdef CUB_USE_COOPERATIVE_GROUPS asm volatile( "{" " .reg .u64 r0;" " .reg .u32 lo;" " .reg .u32 hi;" " .reg .pred p;" " mov.b64 {lo, hi}, %1;" " shfl.sync.up.b32 lo|p, lo, %2, %3, %5;" " shfl.sync.up.b32 hi|p, hi, %2, %3, %5;" " mov.b64 r0, {lo, hi};" " @p add.u64 r0, r0, %4;" " mov.u64 %0, r0;" "}" : "=l"(output) : "l"(input), "r"(offset), "r"(shfl_c), "l"(input), "r"(member_mask)); #else asm volatile( "{" " .reg .u64 r0;" " .reg .u32 lo;" " .reg .u32 hi;" " .reg .pred p;" " mov.b64 {lo, hi}, %1;" " shfl.up.b32 lo|p, lo, %2, %3;" " shfl.up.b32 hi|p, hi, %2, %3;" " mov.b64 r0, {lo, hi};" " @p add.u64 r0, r0, %4;" " mov.u64 %0, r0;" "}" : "=l"(output) : "l"(input), "r"(offset), "r"(shfl_c), "l"(input)); #endif return output; } /// Inclusive prefix scan step (specialized for summation across long long types) __device__ __forceinline__ long long InclusiveScanStep( long long input, ///< [in] Calling thread's input item. cub::Sum /*scan_op*/, ///< [in] Binary scan operator int first_lane, ///< [in] Index of first lane in segment int offset) ///< [in] Up-offset to pull from { long long output; int shfl_c = first_lane | SHFL_C; // Shuffle control (mask and first-lane) // Use predicate set from SHFL to guard against invalid peers #ifdef CUB_USE_COOPERATIVE_GROUPS asm volatile( "{" " .reg .s64 r0;" " .reg .u32 lo;" " .reg .u32 hi;" " .reg .pred p;" " mov.b64 {lo, hi}, %1;" " shfl.sync.up.b32 lo|p, lo, %2, %3, %5;" " shfl.sync.up.b32 hi|p, hi, %2, %3, %5;" " mov.b64 r0, {lo, hi};" " @p add.s64 r0, r0, %4;" " mov.s64 %0, r0;" "}" : "=l"(output) : "l"(input), "r"(offset), "r"(shfl_c), "l"(input), "r"(member_mask)); #else asm volatile( "{" " .reg .s64 r0;" " .reg .u32 lo;" " .reg .u32 hi;" " .reg .pred p;" " mov.b64 {lo, hi}, %1;" " shfl.up.b32 lo|p, lo, %2, %3;" " shfl.up.b32 hi|p, hi, %2, %3;" " mov.b64 r0, {lo, hi};" " @p add.s64 r0, r0, %4;" " mov.s64 %0, r0;" "}" : "=l"(output) : "l"(input), "r"(offset), "r"(shfl_c), "l"(input)); #endif return output; } /// Inclusive prefix scan step (specialized for summation across fp64 types) __device__ __forceinline__ double InclusiveScanStep( double input, ///< [in] Calling thread's input item. cub::Sum /*scan_op*/, ///< [in] Binary scan operator int first_lane, ///< [in] Index of first lane in segment int offset) ///< [in] Up-offset to pull from { double output; int shfl_c = first_lane | SHFL_C; // Shuffle control (mask and first-lane) // Use predicate set from SHFL to guard against invalid peers #ifdef CUB_USE_COOPERATIVE_GROUPS asm volatile( "{" " .reg .u32 lo;" " .reg .u32 hi;" " .reg .pred p;" " .reg .f64 r0;" " mov.b64 %0, %1;" " mov.b64 {lo, hi}, %1;" " shfl.sync.up.b32 lo|p, lo, %2, %3, %4;" " shfl.sync.up.b32 hi|p, hi, %2, %3, %4;" " mov.b64 r0, {lo, hi};" " @p add.f64 %0, %0, r0;" "}" : "=d"(output) : "d"(input), "r"(offset), "r"(shfl_c), "r"(member_mask)); #else asm volatile( "{" " .reg .u32 lo;" " .reg .u32 hi;" " .reg .pred p;" " .reg .f64 r0;" " mov.b64 %0, %1;" " mov.b64 {lo, hi}, %1;" " shfl.up.b32 lo|p, lo, %2, %3;" " shfl.up.b32 hi|p, hi, %2, %3;" " mov.b64 r0, {lo, hi};" " @p add.f64 %0, %0, r0;" "}" : "=d"(output) : "d"(input), "r"(offset), "r"(shfl_c)); #endif return output; } /* /// Inclusive prefix scan (specialized for ReduceBySegmentOp across KeyValuePair types) template __device__ __forceinline__ KeyValuePairInclusiveScanStep( KeyValuePair input, ///< [in] Calling thread's input item. ReduceBySegmentOp scan_op, ///< [in] Binary scan operator int first_lane, ///< [in] Index of first lane in segment int offset) ///< [in] Up-offset to pull from { KeyValuePair output; output.value = InclusiveScanStep(input.value, cub::Sum(), first_lane, offset, Int2Type::IS_SMALL_UNSIGNED>()); output.key = InclusiveScanStep(input.key, cub::Sum(), first_lane, offset, Int2Type::IS_SMALL_UNSIGNED>()); if (input.key > 0) output.value = input.value; return output; } */ /// Inclusive prefix scan step (generic) template __device__ __forceinline__ _T InclusiveScanStep( _T input, ///< [in] Calling thread's input item. ScanOpT scan_op, ///< [in] Binary scan operator int first_lane, ///< [in] Index of first lane in segment int offset) ///< [in] Up-offset to pull from { _T temp = ShuffleUp(input, offset, first_lane, member_mask); // Perform scan op if from a valid peer _T output = scan_op(temp, input); if (static_cast(lane_id) < first_lane + offset) output = input; return output; } /// Inclusive prefix scan step (specialized for small integers size 32b or less) template __device__ __forceinline__ _T InclusiveScanStep( _T input, ///< [in] Calling thread's input item. ScanOpT scan_op, ///< [in] Binary scan operator int first_lane, ///< [in] Index of first lane in segment int offset, ///< [in] Up-offset to pull from Int2Type /*is_small_unsigned*/) ///< [in] Marker type indicating whether T is a small integer { return InclusiveScanStep(input, scan_op, first_lane, offset); } /// Inclusive prefix scan step (specialized for types other than small integers size 32b or less) template __device__ __forceinline__ _T InclusiveScanStep( _T input, ///< [in] Calling thread's input item. ScanOpT scan_op, ///< [in] Binary scan operator int first_lane, ///< [in] Index of first lane in segment int offset, ///< [in] Up-offset to pull from Int2Type /*is_small_unsigned*/) ///< [in] Marker type indicating whether T is a small integer { return InclusiveScanStep(input, scan_op, first_lane, offset); } //--------------------------------------------------------------------- // Templated inclusive scan iteration //--------------------------------------------------------------------- template __device__ __forceinline__ void InclusiveScanStep( _T& input, ///< [in] Calling thread's input item. ScanOp scan_op, ///< [in] Binary scan operator int first_lane, ///< [in] Index of first lane in segment Int2Type /*step*/) ///< [in] Marker type indicating scan step { input = InclusiveScanStep(input, scan_op, first_lane, 1 << STEP, Int2Type::IS_SMALL_UNSIGNED>()); InclusiveScanStep(input, scan_op, first_lane, Int2Type()); } template __device__ __forceinline__ void InclusiveScanStep( _T& /*input*/, ///< [in] Calling thread's input item. ScanOp /*scan_op*/, ///< [in] Binary scan operator int /*first_lane*/, ///< [in] Index of first lane in segment Int2Type /*step*/) ///< [in] Marker type indicating scan step {} /****************************************************************************** * Interface ******************************************************************************/ //--------------------------------------------------------------------- // Broadcast //--------------------------------------------------------------------- /// Broadcast __device__ __forceinline__ T Broadcast( T input, ///< [in] The value to broadcast int src_lane) ///< [in] Which warp lane is to do the broadcasting { return ShuffleIndex(input, src_lane, LOGICAL_WARP_THREADS, member_mask); } //--------------------------------------------------------------------- // Inclusive operations //--------------------------------------------------------------------- /// Inclusive scan template __device__ __forceinline__ void InclusiveScan( _T input, ///< [in] Calling thread's input item. _T &inclusive_output, ///< [out] Calling thread's output item. May be aliased with \p input. ScanOpT scan_op) ///< [in] Binary scan operator { inclusive_output = input; // Iterate scan steps int segment_first_lane = 0; // Iterate scan steps // InclusiveScanStep(inclusive_output, scan_op, segment_first_lane, Int2Type<0>()); // Iterate scan steps #pragma unroll for (int STEP = 0; STEP < STEPS; STEP++) { inclusive_output = InclusiveScanStep( inclusive_output, scan_op, segment_first_lane, (1 << STEP), Int2Type::IS_SMALL_UNSIGNED>()); } } /// Inclusive scan, specialized for reduce-value-by-key template __device__ __forceinline__ void InclusiveScan( KeyValuePair input, ///< [in] Calling thread's input item. KeyValuePair &inclusive_output, ///< [out] Calling thread's output item. May be aliased with \p input. ReduceByKeyOp scan_op) ///< [in] Binary scan operator { inclusive_output = input; KeyT pred_key = ShuffleUp(inclusive_output.key, 1, 0, member_mask); unsigned int ballot = WARP_BALLOT((pred_key != inclusive_output.key), member_mask); // Mask away all lanes greater than ours ballot = ballot & LaneMaskLe(); // Find index of first set bit int segment_first_lane = CUB_MAX(0, 31 - __clz(ballot)); // Iterate scan steps // InclusiveScanStep(inclusive_output.value, scan_op.op, segment_first_lane, Int2Type<0>()); // Iterate scan steps #pragma unroll for (int STEP = 0; STEP < STEPS; STEP++) { inclusive_output.value = InclusiveScanStep( inclusive_output.value, scan_op.op, segment_first_lane, (1 << STEP), Int2Type::IS_SMALL_UNSIGNED>()); } } /// Inclusive scan with aggregate template __device__ __forceinline__ void InclusiveScan( T input, ///< [in] Calling thread's input item. T &inclusive_output, ///< [out] Calling thread's output item. May be aliased with \p input. ScanOpT scan_op, ///< [in] Binary scan operator T &warp_aggregate) ///< [out] Warp-wide aggregate reduction of input items. { InclusiveScan(input, inclusive_output, scan_op); // Grab aggregate from last warp lane warp_aggregate = ShuffleIndex(inclusive_output, LOGICAL_WARP_THREADS - 1, LOGICAL_WARP_THREADS, member_mask); } //--------------------------------------------------------------------- // Get exclusive from inclusive //--------------------------------------------------------------------- /// Update inclusive and exclusive using input and inclusive template __device__ __forceinline__ void Update( T /*input*/, ///< [in] T &inclusive, ///< [in, out] T &exclusive, ///< [out] ScanOpT /*scan_op*/, ///< [in] IsIntegerT /*is_integer*/) ///< [in] { // initial value unknown exclusive = ShuffleUp(inclusive, 1, 0, member_mask); } /// Update inclusive and exclusive using input and inclusive (specialized for summation of integer types) __device__ __forceinline__ void Update( T input, T &inclusive, T &exclusive, cub::Sum /*scan_op*/, Int2Type /*is_integer*/) { // initial value presumed 0 exclusive = inclusive - input; } /// Update inclusive and exclusive using initial value using input, inclusive, and initial value template __device__ __forceinline__ void Update ( T /*input*/, T &inclusive, T &exclusive, ScanOpT scan_op, T initial_value, IsIntegerT /*is_integer*/) { inclusive = scan_op(initial_value, inclusive); exclusive = ShuffleUp(inclusive, 1, 0, member_mask); if (lane_id == 0) exclusive = initial_value; } /// Update inclusive and exclusive using initial value using input and inclusive (specialized for summation of integer types) __device__ __forceinline__ void Update ( T input, T &inclusive, T &exclusive, cub::Sum scan_op, T initial_value, Int2Type /*is_integer*/) { inclusive = scan_op(initial_value, inclusive); exclusive = inclusive - input; } /// Update inclusive, exclusive, and warp aggregate using input and inclusive template __device__ __forceinline__ void Update ( T input, T &inclusive, T &exclusive, T &warp_aggregate, ScanOpT scan_op, IsIntegerT is_integer) { warp_aggregate = ShuffleIndex(inclusive, LOGICAL_WARP_THREADS - 1, LOGICAL_WARP_THREADS, member_mask); Update(input, inclusive, exclusive, scan_op, is_integer); } /// Update inclusive, exclusive, and warp aggregate using input, inclusive, and initial value template __device__ __forceinline__ void Update ( T input, T &inclusive, T &exclusive, T &warp_aggregate, ScanOpT scan_op, T initial_value, IsIntegerT is_integer) { warp_aggregate = ShuffleIndex(inclusive, LOGICAL_WARP_THREADS - 1, LOGICAL_WARP_THREADS, member_mask); Update(input, inclusive, exclusive, scan_op, initial_value, is_integer); } }; } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/gpu_utils/cub/warp/specializations/warp_scan_smem.cuh000066400000000000000000000374271411340063500262440ustar00rootroot00000000000000/****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * cub::WarpScanSmem provides smem-based variants of parallel prefix scan of items partitioned across a CUDA thread warp. */ #pragma once #include "../../thread/thread_operators.cuh" #include "../../thread/thread_load.cuh" #include "../../thread/thread_store.cuh" #include "../../util_type.cuh" #include "../../util_namespace.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /** * \brief WarpScanSmem provides smem-based variants of parallel prefix scan of items partitioned across a CUDA thread warp. */ template < typename T, ///< Data type being scanned int LOGICAL_WARP_THREADS, ///< Number of threads per logical warp int PTX_ARCH> ///< The PTX compute capability for which to to specialize this collective struct WarpScanSmem { /****************************************************************************** * Constants and type definitions ******************************************************************************/ enum { /// Whether the logical warp size and the PTX warp size coincide IS_ARCH_WARP = (LOGICAL_WARP_THREADS == CUB_WARP_THREADS(PTX_ARCH)), /// Whether the logical warp size is a power-of-two IS_POW_OF_TWO = PowerOfTwo::VALUE, /// The number of warp scan steps STEPS = Log2::VALUE, /// The number of threads in half a warp HALF_WARP_THREADS = 1 << (STEPS - 1), /// The number of shared memory elements per warp WARP_SMEM_ELEMENTS = LOGICAL_WARP_THREADS + HALF_WARP_THREADS, }; /// Storage cell type (workaround for SM1x compiler bugs with custom-ops like Max() on signed chars) typedef typename If<((Equals::VALUE || Equals::VALUE) && (PTX_ARCH < 200)), int, T>::Type CellT; /// Shared memory storage layout type (1.5 warps-worth of elements for each warp) typedef CellT _TempStorage[WARP_SMEM_ELEMENTS]; // Alias wrapper allowing storage to be unioned struct TempStorage : Uninitialized<_TempStorage> {}; /****************************************************************************** * Thread fields ******************************************************************************/ _TempStorage &temp_storage; unsigned int lane_id; unsigned int member_mask; /****************************************************************************** * Construction ******************************************************************************/ /// Constructor __device__ __forceinline__ WarpScanSmem( TempStorage &temp_storage) : temp_storage(temp_storage.Alias()), lane_id(IS_ARCH_WARP ? LaneId() : LaneId() % LOGICAL_WARP_THREADS), member_mask(!IS_POW_OF_TWO ? (0xffffffff >> (32 - LOGICAL_WARP_THREADS)) : // non-power-of-two subwarps cannot be tiled (0xffffffff >> (32 - LOGICAL_WARP_THREADS)) << (LaneId() / LOGICAL_WARP_THREADS)) {} /****************************************************************************** * Utility methods ******************************************************************************/ /// Basic inclusive scan iteration (template unrolled, inductive-case specialization) template < bool HAS_IDENTITY, int STEP, typename ScanOp> __device__ __forceinline__ void ScanStep( T &partial, ScanOp scan_op, Int2Type /*step*/) { const int OFFSET = 1 << STEP; // Share partial into buffer ThreadStore(&temp_storage[HALF_WARP_THREADS + lane_id], (CellT) partial); WARP_SYNC(member_mask); // Update partial if addend is in range if (HAS_IDENTITY || (lane_id >= OFFSET)) { T addend = (T) ThreadLoad(&temp_storage[HALF_WARP_THREADS + lane_id - OFFSET]); partial = scan_op(addend, partial); } WARP_SYNC(member_mask); ScanStep(partial, scan_op, Int2Type()); } /// Basic inclusive scan iteration(template unrolled, base-case specialization) template < bool HAS_IDENTITY, typename ScanOp> __device__ __forceinline__ void ScanStep( T &/*partial*/, ScanOp /*scan_op*/, Int2Type /*step*/) {} /// Inclusive prefix scan (specialized for summation across primitive types) __device__ __forceinline__ void InclusiveScan( T input, ///< [in] Calling thread's input item. T &output, ///< [out] Calling thread's output item. May be aliased with \p input. Sum scan_op, ///< [in] Binary scan operator Int2Type /*is_primitive*/) ///< [in] Marker type indicating whether T is primitive type { T identity = 0; ThreadStore(&temp_storage[lane_id], (CellT) identity); WARP_SYNC(member_mask); // Iterate scan steps output = input; ScanStep(output, scan_op, Int2Type<0>()); } /// Inclusive prefix scan template __device__ __forceinline__ void InclusiveScan( T input, ///< [in] Calling thread's input item. T &output, ///< [out] Calling thread's output item. May be aliased with \p input. ScanOp scan_op, ///< [in] Binary scan operator Int2Type /*is_primitive*/) ///< [in] Marker type indicating whether T is primitive type { // Iterate scan steps output = input; ScanStep(output, scan_op, Int2Type<0>()); } /****************************************************************************** * Interface ******************************************************************************/ //--------------------------------------------------------------------- // Broadcast //--------------------------------------------------------------------- /// Broadcast __device__ __forceinline__ T Broadcast( T input, ///< [in] The value to broadcast unsigned int src_lane) ///< [in] Which warp lane is to do the broadcasting { if (lane_id == src_lane) { ThreadStore(temp_storage, (CellT) input); } WARP_SYNC(member_mask); return (T)ThreadLoad(temp_storage); } //--------------------------------------------------------------------- // Inclusive operations //--------------------------------------------------------------------- /// Inclusive scan template __device__ __forceinline__ void InclusiveScan( T input, ///< [in] Calling thread's input item. T &inclusive_output, ///< [out] Calling thread's output item. May be aliased with \p input. ScanOp scan_op) ///< [in] Binary scan operator { InclusiveScan(input, inclusive_output, scan_op, Int2Type::PRIMITIVE>()); } /// Inclusive scan with aggregate template __device__ __forceinline__ void InclusiveScan( T input, ///< [in] Calling thread's input item. T &inclusive_output, ///< [out] Calling thread's output item. May be aliased with \p input. ScanOp scan_op, ///< [in] Binary scan operator T &warp_aggregate) ///< [out] Warp-wide aggregate reduction of input items. { InclusiveScan(input, inclusive_output, scan_op); // Retrieve aggregate ThreadStore(&temp_storage[HALF_WARP_THREADS + lane_id], (CellT) inclusive_output); WARP_SYNC(member_mask); warp_aggregate = (T) ThreadLoad(&temp_storage[WARP_SMEM_ELEMENTS - 1]); WARP_SYNC(member_mask); } //--------------------------------------------------------------------- // Get exclusive from inclusive //--------------------------------------------------------------------- /// Update inclusive and exclusive using input and inclusive template __device__ __forceinline__ void Update( T /*input*/, ///< [in] T &inclusive, ///< [in, out] T &exclusive, ///< [out] ScanOpT /*scan_op*/, ///< [in] IsIntegerT /*is_integer*/) ///< [in] { // initial value unknown ThreadStore(&temp_storage[HALF_WARP_THREADS + lane_id], (CellT) inclusive); WARP_SYNC(member_mask); exclusive = (T) ThreadLoad(&temp_storage[HALF_WARP_THREADS + lane_id - 1]); } /// Update inclusive and exclusive using input and inclusive (specialized for summation of integer types) __device__ __forceinline__ void Update( T input, T &inclusive, T &exclusive, cub::Sum /*scan_op*/, Int2Type /*is_integer*/) { // initial value presumed 0 exclusive = inclusive - input; } /// Update inclusive and exclusive using initial value using input, inclusive, and initial value template __device__ __forceinline__ void Update ( T /*input*/, T &inclusive, T &exclusive, ScanOpT scan_op, T initial_value, IsIntegerT /*is_integer*/) { inclusive = scan_op(initial_value, inclusive); ThreadStore(&temp_storage[HALF_WARP_THREADS + lane_id], (CellT) inclusive); WARP_SYNC(member_mask); exclusive = (T) ThreadLoad(&temp_storage[HALF_WARP_THREADS + lane_id - 1]); if (lane_id == 0) exclusive = initial_value; } /// Update inclusive and exclusive using initial value using input and inclusive (specialized for summation of integer types) __device__ __forceinline__ void Update ( T input, T &inclusive, T &exclusive, cub::Sum scan_op, T initial_value, Int2Type /*is_integer*/) { inclusive = scan_op(initial_value, inclusive); exclusive = inclusive - input; } /// Update inclusive, exclusive, and warp aggregate using input and inclusive template __device__ __forceinline__ void Update ( T /*input*/, T &inclusive, T &exclusive, T &warp_aggregate, ScanOpT /*scan_op*/, IsIntegerT /*is_integer*/) { // Initial value presumed to be unknown or identity (either way our padding is correct) ThreadStore(&temp_storage[HALF_WARP_THREADS + lane_id], (CellT) inclusive); WARP_SYNC(member_mask); exclusive = (T) ThreadLoad(&temp_storage[HALF_WARP_THREADS + lane_id - 1]); warp_aggregate = (T) ThreadLoad(&temp_storage[WARP_SMEM_ELEMENTS - 1]); } /// Update inclusive, exclusive, and warp aggregate using input and inclusive (specialized for summation of integer types) __device__ __forceinline__ void Update ( T input, T &inclusive, T &exclusive, T &warp_aggregate, cub::Sum /*scan_o*/, Int2Type /*is_integer*/) { // Initial value presumed to be unknown or identity (either way our padding is correct) ThreadStore(&temp_storage[HALF_WARP_THREADS + lane_id], (CellT) inclusive); WARP_SYNC(member_mask); warp_aggregate = (T) ThreadLoad(&temp_storage[WARP_SMEM_ELEMENTS - 1]); exclusive = inclusive - input; } /// Update inclusive, exclusive, and warp aggregate using input, inclusive, and initial value template __device__ __forceinline__ void Update ( T /*input*/, T &inclusive, T &exclusive, T &warp_aggregate, ScanOpT scan_op, T initial_value, IsIntegerT /*is_integer*/) { // Broadcast warp aggregate ThreadStore(&temp_storage[HALF_WARP_THREADS + lane_id], (CellT) inclusive); WARP_SYNC(member_mask); warp_aggregate = (T) ThreadLoad(&temp_storage[WARP_SMEM_ELEMENTS - 1]); WARP_SYNC(member_mask); // Update inclusive with initial value inclusive = scan_op(initial_value, inclusive); // Get exclusive from exclusive ThreadStore(&temp_storage[HALF_WARP_THREADS + lane_id - 1], (CellT) inclusive); WARP_SYNC(member_mask); exclusive = (T) ThreadLoad(&temp_storage[HALF_WARP_THREADS + lane_id - 2]); if (lane_id == 0) exclusive = initial_value; } }; } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/gpu_utils/cub/warp/warp_reduce.cuh000066400000000000000000000607261411340063500223430ustar00rootroot00000000000000/****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * The cub::WarpReduce class provides [collective](index.html#sec0) methods for computing a parallel reduction of items partitioned across a CUDA thread warp. */ #pragma once #include "specializations/warp_reduce_shfl.cuh" #include "specializations/warp_reduce_smem.cuh" #include "../thread/thread_operators.cuh" #include "../util_arch.cuh" #include "../util_type.cuh" #include "../util_namespace.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /** * \addtogroup WarpModule * @{ */ /** * \brief The WarpReduce class provides [collective](index.html#sec0) methods for computing a parallel reduction of items partitioned across a CUDA thread warp. ![](warp_reduce_logo.png) * * \tparam T The reduction input/output element type * \tparam LOGICAL_WARP_THREADS [optional] The number of threads per "logical" warp (may be less than the number of hardware warp threads). Default is the warp size of the targeted CUDA compute-capability (e.g., 32 threads for SM20). * \tparam PTX_ARCH [optional] \ptxversion * * \par Overview * - A reduction (or fold) * uses a binary combining operator to compute a single aggregate from a list of input elements. * - Supports "logical" warps smaller than the physical warp size (e.g., logical warps of 8 threads) * - The number of entrant threads must be an multiple of \p LOGICAL_WARP_THREADS * * \par Performance Considerations * - Uses special instructions when applicable (e.g., warp \p SHFL instructions) * - Uses synchronization-free communication between warp lanes when applicable * - Incurs zero bank conflicts for most types * - Computation is slightly more efficient (i.e., having lower instruction overhead) for: * - Summation (vs. generic reduction) * - The architecture's warp size is a whole multiple of \p LOGICAL_WARP_THREADS * * \par Simple Examples * \warpcollective{WarpReduce} * \par * The code snippet below illustrates four concurrent warp sum reductions within a block of * 128 threads (one per each of the 32-thread warps). * \par * \code * #include * * __global__ void ExampleKernel(...) * { * // Specialize WarpReduce for type int * typedef cub::WarpReduce WarpReduce; * * // Allocate WarpReduce shared memory for 4 warps * __shared__ typename WarpReduce::TempStorage temp_storage[4]; * * // Obtain one input item per thread * int thread_data = ... * * // Return the warp-wide sums to each lane0 (threads 0, 32, 64, and 96) * int warp_id = threadIdx.x / 32; * int aggregate = WarpReduce(temp_storage[warp_id]).Sum(thread_data); * * \endcode * \par * Suppose the set of input \p thread_data across the block of threads is {0, 1, 2, 3, ..., 127}. * The corresponding output \p aggregate in threads 0, 32, 64, and 96 will \p 496, \p 1520, * \p 2544, and \p 3568, respectively (and is undefined in other threads). * * \par * The code snippet below illustrates a single warp sum reduction within a block of * 128 threads. * \par * \code * #include * * __global__ void ExampleKernel(...) * { * // Specialize WarpReduce for type int * typedef cub::WarpReduce WarpReduce; * * // Allocate WarpReduce shared memory for one warp * __shared__ typename WarpReduce::TempStorage temp_storage; * ... * * // Only the first warp performs a reduction * if (threadIdx.x < 32) * { * // Obtain one input item per thread * int thread_data = ... * * // Return the warp-wide sum to lane0 * int aggregate = WarpReduce(temp_storage).Sum(thread_data); * * \endcode * \par * Suppose the set of input \p thread_data across the warp of threads is {0, 1, 2, 3, ..., 31}. * The corresponding output \p aggregate in thread0 will be \p 496 (and is undefined in other threads). * */ template < typename T, int LOGICAL_WARP_THREADS = CUB_PTX_WARP_THREADS, int PTX_ARCH = CUB_PTX_ARCH> class WarpReduce { private: /****************************************************************************** * Constants and type definitions ******************************************************************************/ enum { /// Whether the logical warp size and the PTX warp size coincide IS_ARCH_WARP = (LOGICAL_WARP_THREADS == CUB_WARP_THREADS(PTX_ARCH)), /// Whether the logical warp size is a power-of-two IS_POW_OF_TWO = PowerOfTwo::VALUE, }; public: #ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document /// Internal specialization. Use SHFL-based reduction if (architecture is >= SM30) and (LOGICAL_WARP_THREADS is a power-of-two) typedef typename If<(PTX_ARCH >= 300) && (IS_POW_OF_TWO), WarpReduceShfl, WarpReduceSmem >::Type InternalWarpReduce; #endif // DOXYGEN_SHOULD_SKIP_THIS private: /// Shared memory storage layout type for WarpReduce typedef typename InternalWarpReduce::TempStorage _TempStorage; /****************************************************************************** * Thread fields ******************************************************************************/ /// Shared storage reference _TempStorage &temp_storage; /****************************************************************************** * Utility methods ******************************************************************************/ public: /// \smemstorage{WarpReduce} struct TempStorage : Uninitialized<_TempStorage> {}; /******************************************************************//** * \name Collective constructors *********************************************************************/ //@{ /** * \brief Collective constructor using the specified memory allocation as temporary storage. Logical warp and lane identifiers are constructed from threadIdx.x. */ __device__ __forceinline__ WarpReduce( TempStorage &temp_storage) ///< [in] Reference to memory allocation having layout type TempStorage : temp_storage(temp_storage.Alias()) {} //@} end member group /******************************************************************//** * \name Summation reductions *********************************************************************/ //@{ /** * \brief Computes a warp-wide sum in the calling warp. The output is valid in warp lane0. * * \smemreuse * * \par Snippet * The code snippet below illustrates four concurrent warp sum reductions within a block of * 128 threads (one per each of the 32-thread warps). * \par * \code * #include * * __global__ void ExampleKernel(...) * { * // Specialize WarpReduce for type int * typedef cub::WarpReduce WarpReduce; * * // Allocate WarpReduce shared memory for 4 warps * __shared__ typename WarpReduce::TempStorage temp_storage[4]; * * // Obtain one input item per thread * int thread_data = ... * * // Return the warp-wide sums to each lane0 * int warp_id = threadIdx.x / 32; * int aggregate = WarpReduce(temp_storage[warp_id]).Sum(thread_data); * * \endcode * \par * Suppose the set of input \p thread_data across the block of threads is {0, 1, 2, 3, ..., 127}. * The corresponding output \p aggregate in threads 0, 32, 64, and 96 will \p 496, \p 1520, * \p 2544, and \p 3568, respectively (and is undefined in other threads). * */ __device__ __forceinline__ T Sum( T input) ///< [in] Calling thread's input { return InternalWarpReduce(temp_storage).template Reduce(input, LOGICAL_WARP_THREADS, cub::Sum()); } /** * \brief Computes a partially-full warp-wide sum in the calling warp. The output is valid in warp lane0. * * All threads across the calling warp must agree on the same value for \p valid_items. Otherwise the result is undefined. * * \smemreuse * * \par Snippet * The code snippet below illustrates a sum reduction within a single, partially-full * block of 32 threads (one warp). * \par * \code * #include * * __global__ void ExampleKernel(int *d_data, int valid_items) * { * // Specialize WarpReduce for type int * typedef cub::WarpReduce WarpReduce; * * // Allocate WarpReduce shared memory for one warp * __shared__ typename WarpReduce::TempStorage temp_storage; * * // Obtain one input item per thread if in range * int thread_data; * if (threadIdx.x < valid_items) * thread_data = d_data[threadIdx.x]; * * // Return the warp-wide sums to each lane0 * int aggregate = WarpReduce(temp_storage).Sum( * thread_data, valid_items); * * \endcode * \par * Suppose the input \p d_data is {0, 1, 2, 3, 4, ... and \p valid_items * is \p 4. The corresponding output \p aggregate in thread0 is \p 6 (and is * undefined in other threads). * */ __device__ __forceinline__ T Sum( T input, ///< [in] Calling thread's input int valid_items) ///< [in] Total number of valid items in the calling thread's logical warp (may be less than \p LOGICAL_WARP_THREADS) { // Determine if we don't need bounds checking return InternalWarpReduce(temp_storage).template Reduce(input, valid_items, cub::Sum()); } /** * \brief Computes a segmented sum in the calling warp where segments are defined by head-flags. The sum of each segment is returned to the first lane in that segment (which always includes lane0). * * \smemreuse * * \par Snippet * The code snippet below illustrates a head-segmented warp sum * reduction within a block of 32 threads (one warp). * \par * \code * #include * * __global__ void ExampleKernel(...) * { * // Specialize WarpReduce for type int * typedef cub::WarpReduce WarpReduce; * * // Allocate WarpReduce shared memory for one warp * __shared__ typename WarpReduce::TempStorage temp_storage; * * // Obtain one input item and flag per thread * int thread_data = ... * int head_flag = ... * * // Return the warp-wide sums to each lane0 * int aggregate = WarpReduce(temp_storage).HeadSegmentedSum( * thread_data, head_flag); * * \endcode * \par * Suppose the set of input \p thread_data and \p head_flag across the block of threads * is {0, 1, 2, 3, ..., 31 and is {1, 0, 0, 0, 1, 0, 0, 0, ..., 1, 0, 0, 0, * respectively. The corresponding output \p aggregate in threads 0, 4, 8, etc. will be * \p 6, \p 22, \p 38, etc. (and is undefined in other threads). * * \tparam ReductionOp [inferred] Binary reduction operator type having member T operator()(const T &a, const T &b) * */ template < typename FlagT> __device__ __forceinline__ T HeadSegmentedSum( T input, ///< [in] Calling thread's input FlagT head_flag) ///< [in] Head flag denoting whether or not \p input is the start of a new segment { return HeadSegmentedReduce(input, head_flag, cub::Sum()); } /** * \brief Computes a segmented sum in the calling warp where segments are defined by tail-flags. The sum of each segment is returned to the first lane in that segment (which always includes lane0). * * \smemreuse * * \par Snippet * The code snippet below illustrates a tail-segmented warp sum * reduction within a block of 32 threads (one warp). * \par * \code * #include * * __global__ void ExampleKernel(...) * { * // Specialize WarpReduce for type int * typedef cub::WarpReduce WarpReduce; * * // Allocate WarpReduce shared memory for one warp * __shared__ typename WarpReduce::TempStorage temp_storage; * * // Obtain one input item and flag per thread * int thread_data = ... * int tail_flag = ... * * // Return the warp-wide sums to each lane0 * int aggregate = WarpReduce(temp_storage).TailSegmentedSum( * thread_data, tail_flag); * * \endcode * \par * Suppose the set of input \p thread_data and \p tail_flag across the block of threads * is {0, 1, 2, 3, ..., 31 and is {0, 0, 0, 1, 0, 0, 0, 1, ..., 0, 0, 0, 1, * respectively. The corresponding output \p aggregate in threads 0, 4, 8, etc. will be * \p 6, \p 22, \p 38, etc. (and is undefined in other threads). * * \tparam ReductionOp [inferred] Binary reduction operator type having member T operator()(const T &a, const T &b) */ template < typename FlagT> __device__ __forceinline__ T TailSegmentedSum( T input, ///< [in] Calling thread's input FlagT tail_flag) ///< [in] Head flag denoting whether or not \p input is the start of a new segment { return TailSegmentedReduce(input, tail_flag, cub::Sum()); } //@} end member group /******************************************************************//** * \name Generic reductions *********************************************************************/ //@{ /** * \brief Computes a warp-wide reduction in the calling warp using the specified binary reduction functor. The output is valid in warp lane0. * * Supports non-commutative reduction operators * * \smemreuse * * \par Snippet * The code snippet below illustrates four concurrent warp max reductions within a block of * 128 threads (one per each of the 32-thread warps). * \par * \code * #include * * __global__ void ExampleKernel(...) * { * // Specialize WarpReduce for type int * typedef cub::WarpReduce WarpReduce; * * // Allocate WarpReduce shared memory for 4 warps * __shared__ typename WarpReduce::TempStorage temp_storage[4]; * * // Obtain one input item per thread * int thread_data = ... * * // Return the warp-wide reductions to each lane0 * int warp_id = threadIdx.x / 32; * int aggregate = WarpReduce(temp_storage[warp_id]).Reduce( * thread_data, cub::Max()); * * \endcode * \par * Suppose the set of input \p thread_data across the block of threads is {0, 1, 2, 3, ..., 127}. * The corresponding output \p aggregate in threads 0, 32, 64, and 96 will \p 31, \p 63, * \p 95, and \p 127, respectively (and is undefined in other threads). * * \tparam ReductionOp [inferred] Binary reduction operator type having member T operator()(const T &a, const T &b) */ template __device__ __forceinline__ T Reduce( T input, ///< [in] Calling thread's input ReductionOp reduction_op) ///< [in] Binary reduction operator { return InternalWarpReduce(temp_storage).template Reduce(input, LOGICAL_WARP_THREADS, reduction_op); } /** * \brief Computes a partially-full warp-wide reduction in the calling warp using the specified binary reduction functor. The output is valid in warp lane0. * * All threads across the calling warp must agree on the same value for \p valid_items. Otherwise the result is undefined. * * Supports non-commutative reduction operators * * \smemreuse * * \par Snippet * The code snippet below illustrates a max reduction within a single, partially-full * block of 32 threads (one warp). * \par * \code * #include * * __global__ void ExampleKernel(int *d_data, int valid_items) * { * // Specialize WarpReduce for type int * typedef cub::WarpReduce WarpReduce; * * // Allocate WarpReduce shared memory for one warp * __shared__ typename WarpReduce::TempStorage temp_storage; * * // Obtain one input item per thread if in range * int thread_data; * if (threadIdx.x < valid_items) * thread_data = d_data[threadIdx.x]; * * // Return the warp-wide reductions to each lane0 * int aggregate = WarpReduce(temp_storage).Reduce( * thread_data, cub::Max(), valid_items); * * \endcode * \par * Suppose the input \p d_data is {0, 1, 2, 3, 4, ... and \p valid_items * is \p 4. The corresponding output \p aggregate in thread0 is \p 3 (and is * undefined in other threads). * * \tparam ReductionOp [inferred] Binary reduction operator type having member T operator()(const T &a, const T &b) */ template __device__ __forceinline__ T Reduce( T input, ///< [in] Calling thread's input ReductionOp reduction_op, ///< [in] Binary reduction operator int valid_items) ///< [in] Total number of valid items in the calling thread's logical warp (may be less than \p LOGICAL_WARP_THREADS) { return InternalWarpReduce(temp_storage).template Reduce(input, valid_items, reduction_op); } /** * \brief Computes a segmented reduction in the calling warp where segments are defined by head-flags. The reduction of each segment is returned to the first lane in that segment (which always includes lane0). * * Supports non-commutative reduction operators * * \smemreuse * * \par Snippet * The code snippet below illustrates a head-segmented warp max * reduction within a block of 32 threads (one warp). * \par * \code * #include * * __global__ void ExampleKernel(...) * { * // Specialize WarpReduce for type int * typedef cub::WarpReduce WarpReduce; * * // Allocate WarpReduce shared memory for one warp * __shared__ typename WarpReduce::TempStorage temp_storage; * * // Obtain one input item and flag per thread * int thread_data = ... * int head_flag = ... * * // Return the warp-wide reductions to each lane0 * int aggregate = WarpReduce(temp_storage).HeadSegmentedReduce( * thread_data, head_flag, cub::Max()); * * \endcode * \par * Suppose the set of input \p thread_data and \p head_flag across the block of threads * is {0, 1, 2, 3, ..., 31 and is {1, 0, 0, 0, 1, 0, 0, 0, ..., 1, 0, 0, 0, * respectively. The corresponding output \p aggregate in threads 0, 4, 8, etc. will be * \p 3, \p 7, \p 11, etc. (and is undefined in other threads). * * \tparam ReductionOp [inferred] Binary reduction operator type having member T operator()(const T &a, const T &b) */ template < typename ReductionOp, typename FlagT> __device__ __forceinline__ T HeadSegmentedReduce( T input, ///< [in] Calling thread's input FlagT head_flag, ///< [in] Head flag denoting whether or not \p input is the start of a new segment ReductionOp reduction_op) ///< [in] Reduction operator { return InternalWarpReduce(temp_storage).template SegmentedReduce(input, head_flag, reduction_op); } /** * \brief Computes a segmented reduction in the calling warp where segments are defined by tail-flags. The reduction of each segment is returned to the first lane in that segment (which always includes lane0). * * Supports non-commutative reduction operators * * \smemreuse * * \par Snippet * The code snippet below illustrates a tail-segmented warp max * reduction within a block of 32 threads (one warp). * \par * \code * #include * * __global__ void ExampleKernel(...) * { * // Specialize WarpReduce for type int * typedef cub::WarpReduce WarpReduce; * * // Allocate WarpReduce shared memory for one warp * __shared__ typename WarpReduce::TempStorage temp_storage; * * // Obtain one input item and flag per thread * int thread_data = ... * int tail_flag = ... * * // Return the warp-wide reductions to each lane0 * int aggregate = WarpReduce(temp_storage).TailSegmentedReduce( * thread_data, tail_flag, cub::Max()); * * \endcode * \par * Suppose the set of input \p thread_data and \p tail_flag across the block of threads * is {0, 1, 2, 3, ..., 31 and is {0, 0, 0, 1, 0, 0, 0, 1, ..., 0, 0, 0, 1, * respectively. The corresponding output \p aggregate in threads 0, 4, 8, etc. will be * \p 3, \p 7, \p 11, etc. (and is undefined in other threads). * * \tparam ReductionOp [inferred] Binary reduction operator type having member T operator()(const T &a, const T &b) */ template < typename ReductionOp, typename FlagT> __device__ __forceinline__ T TailSegmentedReduce( T input, ///< [in] Calling thread's input FlagT tail_flag, ///< [in] Tail flag denoting whether or not \p input is the end of the current segment ReductionOp reduction_op) ///< [in] Reduction operator { return InternalWarpReduce(temp_storage).template SegmentedReduce(input, tail_flag, reduction_op); } //@} end member group }; /** @} */ // end group WarpModule } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/gpu_utils/cub/warp/warp_scan.cuh000066400000000000000000001140031411340063500220040ustar00rootroot00000000000000/****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the NVIDIA CORPORATION nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ******************************************************************************/ /** * \file * The cub::WarpScan class provides [collective](index.html#sec0) methods for computing a parallel prefix scan of items partitioned across a CUDA thread warp. */ #pragma once #include "specializations/warp_scan_shfl.cuh" #include "specializations/warp_scan_smem.cuh" #include "../thread/thread_operators.cuh" #include "../util_arch.cuh" #include "../util_type.cuh" #include "../util_namespace.cuh" /// Optional outer namespace(s) CUB_NS_PREFIX /// CUB namespace namespace cub { /** * \addtogroup WarpModule * @{ */ /** * \brief The WarpScan class provides [collective](index.html#sec0) methods for computing a parallel prefix scan of items partitioned across a CUDA thread warp. ![](warp_scan_logo.png) * * \tparam T The scan input/output element type * \tparam LOGICAL_WARP_THREADS [optional] The number of threads per "logical" warp (may be less than the number of hardware warp threads). Default is the warp size associated with the CUDA Compute Capability targeted by the compiler (e.g., 32 threads for SM20). * \tparam PTX_ARCH [optional] \ptxversion * * \par Overview * - Given a list of input elements and a binary reduction operator, a [prefix scan](http://en.wikipedia.org/wiki/Prefix_sum) * produces an output list where each element is computed to be the reduction * of the elements occurring earlier in the input list. Prefix sum * connotes a prefix scan with the addition operator. The term \em inclusive indicates * that the ith output reduction incorporates the ith input. * The term \em exclusive indicates the ith input is not incorporated into * the ith output reduction. * - Supports non-commutative scan operators * - Supports "logical" warps smaller than the physical warp size (e.g., a logical warp of 8 threads) * - The number of entrant threads must be an multiple of \p LOGICAL_WARP_THREADS * * \par Performance Considerations * - Uses special instructions when applicable (e.g., warp \p SHFL) * - Uses synchronization-free communication between warp lanes when applicable * - Incurs zero bank conflicts for most types * - Computation is slightly more efficient (i.e., having lower instruction overhead) for: * - Summation (vs. generic scan) * - The architecture's warp size is a whole multiple of \p LOGICAL_WARP_THREADS * * \par Simple Examples * \warpcollective{WarpScan} * \par * The code snippet below illustrates four concurrent warp prefix sums within a block of * 128 threads (one per each of the 32-thread warps). * \par * \code * #include * * __global__ void ExampleKernel(...) * { * // Specialize WarpScan for type int * typedef cub::WarpScan WarpScan; * * // Allocate WarpScan shared memory for 4 warps * __shared__ typename WarpScan::TempStorage temp_storage[4]; * * // Obtain one input item per thread * int thread_data = ... * * // Compute warp-wide prefix sums * int warp_id = threadIdx.x / 32; * WarpScan(temp_storage[warp_id]).ExclusiveSum(thread_data, thread_data); * * \endcode * \par * Suppose the set of input \p thread_data across the block of threads is {1, 1, 1, 1, ...}. * The corresponding output \p thread_data in each of the four warps of threads will be * 0, 1, 2, 3, ..., 31}. * * \par * The code snippet below illustrates a single warp prefix sum within a block of * 128 threads. * \par * \code * #include * * __global__ void ExampleKernel(...) * { * // Specialize WarpScan for type int * typedef cub::WarpScan WarpScan; * * // Allocate WarpScan shared memory for one warp * __shared__ typename WarpScan::TempStorage temp_storage; * ... * * // Only the first warp performs a prefix sum * if (threadIdx.x < 32) * { * // Obtain one input item per thread * int thread_data = ... * * // Compute warp-wide prefix sums * WarpScan(temp_storage).ExclusiveSum(thread_data, thread_data); * * \endcode * \par * Suppose the set of input \p thread_data across the warp of threads is {1, 1, 1, 1, ...}. * The corresponding output \p thread_data will be {0, 1, 2, 3, ..., 31}. * */ template < typename T, int LOGICAL_WARP_THREADS = CUB_PTX_WARP_THREADS, int PTX_ARCH = CUB_PTX_ARCH> class WarpScan { private: /****************************************************************************** * Constants and type definitions ******************************************************************************/ enum { /// Whether the logical warp size and the PTX warp size coincide IS_ARCH_WARP = (LOGICAL_WARP_THREADS == CUB_WARP_THREADS(PTX_ARCH)), /// Whether the logical warp size is a power-of-two IS_POW_OF_TWO = ((LOGICAL_WARP_THREADS & (LOGICAL_WARP_THREADS - 1)) == 0), /// Whether the data type is an integer (which has fully-associative addition) IS_INTEGER = ((Traits::CATEGORY == SIGNED_INTEGER) || (Traits::CATEGORY == UNSIGNED_INTEGER)) }; /// Internal specialization. Use SHFL-based scan if (architecture is >= SM30) and (LOGICAL_WARP_THREADS is a power-of-two) typedef typename If<(PTX_ARCH >= 300) && (IS_POW_OF_TWO), WarpScanShfl, WarpScanSmem >::Type InternalWarpScan; /// Shared memory storage layout type for WarpScan typedef typename InternalWarpScan::TempStorage _TempStorage; /****************************************************************************** * Thread fields ******************************************************************************/ /// Shared storage reference _TempStorage &temp_storage; unsigned int lane_id; /****************************************************************************** * Public types ******************************************************************************/ public: /// \smemstorage{WarpScan} struct TempStorage : Uninitialized<_TempStorage> {}; /******************************************************************//** * \name Collective constructors *********************************************************************/ //@{ /** * \brief Collective constructor using the specified memory allocation as temporary storage. Logical warp and lane identifiers are constructed from threadIdx.x. */ __device__ __forceinline__ WarpScan( TempStorage &temp_storage) ///< [in] Reference to memory allocation having layout type TempStorage : temp_storage(temp_storage.Alias()), lane_id(IS_ARCH_WARP ? LaneId() : LaneId() % LOGICAL_WARP_THREADS) {} //@} end member group /******************************************************************//** * \name Inclusive prefix sums *********************************************************************/ //@{ /** * \brief Computes an inclusive prefix sum across the calling warp. * * \par * - \smemreuse * * \par Snippet * The code snippet below illustrates four concurrent warp-wide inclusive prefix sums within a block of * 128 threads (one per each of the 32-thread warps). * \par * \code * #include * * __global__ void ExampleKernel(...) * { * // Specialize WarpScan for type int * typedef cub::WarpScan WarpScan; * * // Allocate WarpScan shared memory for 4 warps * __shared__ typename WarpScan::TempStorage temp_storage[4]; * * // Obtain one input item per thread * int thread_data = ... * * // Compute inclusive warp-wide prefix sums * int warp_id = threadIdx.x / 32; * WarpScan(temp_storage[warp_id]).InclusiveSum(thread_data, thread_data); * * \endcode * \par * Suppose the set of input \p thread_data across the block of threads is {1, 1, 1, 1, ...}. * The corresponding output \p thread_data in each of the four warps of threads will be * 1, 2, 3, ..., 32}. */ __device__ __forceinline__ void InclusiveSum( T input, ///< [in] Calling thread's input item. T &inclusive_output) ///< [out] Calling thread's output item. May be aliased with \p input. { InclusiveScan(input, inclusive_output, cub::Sum()); } /** * \brief Computes an inclusive prefix sum across the calling warp. Also provides every thread with the warp-wide \p warp_aggregate of all inputs. * * \par * - \smemreuse * * \par Snippet * The code snippet below illustrates four concurrent warp-wide inclusive prefix sums within a block of * 128 threads (one per each of the 32-thread warps). * \par * \code * #include * * __global__ void ExampleKernel(...) * { * // Specialize WarpScan for type int * typedef cub::WarpScan WarpScan; * * // Allocate WarpScan shared memory for 4 warps * __shared__ typename WarpScan::TempStorage temp_storage[4]; * * // Obtain one input item per thread * int thread_data = ... * * // Compute inclusive warp-wide prefix sums * int warp_aggregate; * int warp_id = threadIdx.x / 32; * WarpScan(temp_storage[warp_id]).InclusiveSum(thread_data, thread_data, warp_aggregate); * * \endcode * \par * Suppose the set of input \p thread_data across the block of threads is {1, 1, 1, 1, ...}. * The corresponding output \p thread_data in each of the four warps of threads will be * 1, 2, 3, ..., 32}. Furthermore, \p warp_aggregate for all threads in all warps will be \p 32. */ __device__ __forceinline__ void InclusiveSum( T input, ///< [in] Calling thread's input item. T &inclusive_output, ///< [out] Calling thread's output item. May be aliased with \p input. T &warp_aggregate) ///< [out] Warp-wide aggregate reduction of input items. { InclusiveScan(input, inclusive_output, cub::Sum(), warp_aggregate); } //@} end member group /******************************************************************//** * \name Exclusive prefix sums *********************************************************************/ //@{ /** * \brief Computes an exclusive prefix sum across the calling warp. The value of 0 is applied as the initial value, and is assigned to \p exclusive_output in thread0. * * \par * - \identityzero * - \smemreuse * * \par Snippet * The code snippet below illustrates four concurrent warp-wide exclusive prefix sums within a block of * 128 threads (one per each of the 32-thread warps). * \par * \code * #include * * __global__ void ExampleKernel(...) * { * // Specialize WarpScan for type int * typedef cub::WarpScan WarpScan; * * // Allocate WarpScan shared memory for 4 warps * __shared__ typename WarpScan::TempStorage temp_storage[4]; * * // Obtain one input item per thread * int thread_data = ... * * // Compute exclusive warp-wide prefix sums * int warp_id = threadIdx.x / 32; * WarpScan(temp_storage[warp_id]).ExclusiveSum(thread_data, thread_data); * * \endcode * \par * Suppose the set of input \p thread_data across the block of threads is {1, 1, 1, 1, ...}. * The corresponding output \p thread_data in each of the four warps of threads will be * 0, 1, 2, ..., 31}. * */ __device__ __forceinline__ void ExclusiveSum( T input, ///< [in] Calling thread's input item. T &exclusive_output) ///< [out] Calling thread's output item. May be aliased with \p input. { T initial_value = 0; ExclusiveScan(input, exclusive_output, initial_value, cub::Sum()); } /** * \brief Computes an exclusive prefix sum across the calling warp. The value of 0 is applied as the initial value, and is assigned to \p exclusive_output in thread0. Also provides every thread with the warp-wide \p warp_aggregate of all inputs. * * \par * - \identityzero * - \smemreuse * * \par Snippet * The code snippet below illustrates four concurrent warp-wide exclusive prefix sums within a block of * 128 threads (one per each of the 32-thread warps). * \par * \code * #include * * __global__ void ExampleKernel(...) * { * // Specialize WarpScan for type int * typedef cub::WarpScan WarpScan; * * // Allocate WarpScan shared memory for 4 warps * __shared__ typename WarpScan::TempStorage temp_storage[4]; * * // Obtain one input item per thread * int thread_data = ... * * // Compute exclusive warp-wide prefix sums * int warp_aggregate; * int warp_id = threadIdx.x / 32; * WarpScan(temp_storage[warp_id]).ExclusiveSum(thread_data, thread_data, warp_aggregate); * * \endcode * \par * Suppose the set of input \p thread_data across the block of threads is {1, 1, 1, 1, ...}. * The corresponding output \p thread_data in each of the four warps of threads will be * 0, 1, 2, ..., 31}. Furthermore, \p warp_aggregate for all threads in all warps will be \p 32. */ __device__ __forceinline__ void ExclusiveSum( T input, ///< [in] Calling thread's input item. T &exclusive_output, ///< [out] Calling thread's output item. May be aliased with \p input. T &warp_aggregate) ///< [out] Warp-wide aggregate reduction of input items. { T initial_value = 0; ExclusiveScan(input, exclusive_output, initial_value, cub::Sum(), warp_aggregate); } //@} end member group /******************************************************************//** * \name Inclusive prefix scans *********************************************************************/ //@{ /** * \brief Computes an inclusive prefix scan using the specified binary scan functor across the calling warp. * * \par * - \smemreuse * * \par Snippet * The code snippet below illustrates four concurrent warp-wide inclusive prefix max scans within a block of * 128 threads (one per each of the 32-thread warps). * \par * \code * #include * * __global__ void ExampleKernel(...) * { * // Specialize WarpScan for type int * typedef cub::WarpScan WarpScan; * * // Allocate WarpScan shared memory for 4 warps * __shared__ typename WarpScan::TempStorage temp_storage[4]; * * // Obtain one input item per thread * int thread_data = ... * * // Compute inclusive warp-wide prefix max scans * int warp_id = threadIdx.x / 32; * WarpScan(temp_storage[warp_id]).InclusiveScan(thread_data, thread_data, cub::Max()); * * \endcode * \par * Suppose the set of input \p thread_data across the block of threads is {0, -1, 2, -3, ..., 126, -127}. * The corresponding output \p thread_data in the first warp would be * 0, 0, 2, 2, ..., 30, 30, the output for the second warp would be 32, 32, 34, 34, ..., 62, 62, etc. * * \tparam ScanOp [inferred] Binary scan operator type having member T operator()(const T &a, const T &b) */ template __device__ __forceinline__ void InclusiveScan( T input, ///< [in] Calling thread's input item. T &inclusive_output, ///< [out] Calling thread's output item. May be aliased with \p input. ScanOp scan_op) ///< [in] Binary scan operator { InternalWarpScan(temp_storage).InclusiveScan(input, inclusive_output, scan_op); } /** * \brief Computes an inclusive prefix scan using the specified binary scan functor across the calling warp. Also provides every thread with the warp-wide \p warp_aggregate of all inputs. * * \par * - \smemreuse * * \par Snippet * The code snippet below illustrates four concurrent warp-wide inclusive prefix max scans within a block of * 128 threads (one per each of the 32-thread warps). * \par * \code * #include * * __global__ void ExampleKernel(...) * { * // Specialize WarpScan for type int * typedef cub::WarpScan WarpScan; * * // Allocate WarpScan shared memory for 4 warps * __shared__ typename WarpScan::TempStorage temp_storage[4]; * * // Obtain one input item per thread * int thread_data = ... * * // Compute inclusive warp-wide prefix max scans * int warp_aggregate; * int warp_id = threadIdx.x / 32; * WarpScan(temp_storage[warp_id]).InclusiveScan( * thread_data, thread_data, cub::Max(), warp_aggregate); * * \endcode * \par * Suppose the set of input \p thread_data across the block of threads is {0, -1, 2, -3, ..., 126, -127}. * The corresponding output \p thread_data in the first warp would be * 0, 0, 2, 2, ..., 30, 30, the output for the second warp would be 32, 32, 34, 34, ..., 62, 62, etc. * Furthermore, \p warp_aggregate would be assigned \p 30 for threads in the first warp, \p 62 for threads * in the second warp, etc. * * \tparam ScanOp [inferred] Binary scan operator type having member T operator()(const T &a, const T &b) */ template __device__ __forceinline__ void InclusiveScan( T input, ///< [in] Calling thread's input item. T &inclusive_output, ///< [out] Calling thread's output item. May be aliased with \p input. ScanOp scan_op, ///< [in] Binary scan operator T &warp_aggregate) ///< [out] Warp-wide aggregate reduction of input items. { InternalWarpScan(temp_storage).InclusiveScan(input, inclusive_output, scan_op, warp_aggregate); } //@} end member group /******************************************************************//** * \name Exclusive prefix scans *********************************************************************/ //@{ /** * \brief Computes an exclusive prefix scan using the specified binary scan functor across the calling warp. Because no initial value is supplied, the \p output computed for warp-lane0 is undefined. * * \par * - \smemreuse * * \par Snippet * The code snippet below illustrates four concurrent warp-wide exclusive prefix max scans within a block of * 128 threads (one per each of the 32-thread warps). * \par * \code * #include * * __global__ void ExampleKernel(...) * { * // Specialize WarpScan for type int * typedef cub::WarpScan WarpScan; * * // Allocate WarpScan shared memory for 4 warps * __shared__ typename WarpScan::TempStorage temp_storage[4]; * * // Obtain one input item per thread * int thread_data = ... * * // Compute exclusive warp-wide prefix max scans * int warp_id = threadIdx.x / 32; * WarpScan(temp_storage[warp_id]).ExclusiveScan(thread_data, thread_data, cub::Max()); * * \endcode * \par * Suppose the set of input \p thread_data across the block of threads is {0, -1, 2, -3, ..., 126, -127}. * The corresponding output \p thread_data in the first warp would be * ?, 0, 0, 2, ..., 28, 30, the output for the second warp would be ?, 32, 32, 34, ..., 60, 62, etc. * (The output \p thread_data in warp lane0 is undefined.) * * \tparam ScanOp [inferred] Binary scan operator type having member T operator()(const T &a, const T &b) */ template __device__ __forceinline__ void ExclusiveScan( T input, ///< [in] Calling thread's input item. T &exclusive_output, ///< [out] Calling thread's output item. May be aliased with \p input. ScanOp scan_op) ///< [in] Binary scan operator { InternalWarpScan internal(temp_storage); T inclusive_output; internal.InclusiveScan(input, inclusive_output, scan_op); internal.Update( input, inclusive_output, exclusive_output, scan_op, Int2Type()); } /** * \brief Computes an exclusive prefix scan using the specified binary scan functor across the calling warp. * * \par * - \smemreuse * * \par Snippet * The code snippet below illustrates four concurrent warp-wide exclusive prefix max scans within a block of * 128 threads (one per each of the 32-thread warps). * \par * \code * #include * * __global__ void ExampleKernel(...) * { * // Specialize WarpScan for type int * typedef cub::WarpScan WarpScan; * * // Allocate WarpScan shared memory for 4 warps * __shared__ typename WarpScan::TempStorage temp_storage[4]; * * // Obtain one input item per thread * int thread_data = ... * * // Compute exclusive warp-wide prefix max scans * int warp_id = threadIdx.x / 32; * WarpScan(temp_storage[warp_id]).ExclusiveScan(thread_data, thread_data, INT_MIN, cub::Max()); * * \endcode * \par * Suppose the set of input \p thread_data across the block of threads is {0, -1, 2, -3, ..., 126, -127}. * The corresponding output \p thread_data in the first warp would be * INT_MIN, 0, 0, 2, ..., 28, 30, the output for the second warp would be 30, 32, 32, 34, ..., 60, 62, etc. * * \tparam ScanOp [inferred] Binary scan operator type having member T operator()(const T &a, const T &b) */ template __device__ __forceinline__ void ExclusiveScan( T input, ///< [in] Calling thread's input item. T &exclusive_output, ///< [out] Calling thread's output item. May be aliased with \p input. T initial_value, ///< [in] Initial value to seed the exclusive scan ScanOp scan_op) ///< [in] Binary scan operator { InternalWarpScan internal(temp_storage); T inclusive_output; internal.InclusiveScan(input, inclusive_output, scan_op); internal.Update( input, inclusive_output, exclusive_output, scan_op, initial_value, Int2Type()); } /** * \brief Computes an exclusive prefix scan using the specified binary scan functor across the calling warp. Because no initial value is supplied, the \p output computed for warp-lane0 is undefined. Also provides every thread with the warp-wide \p warp_aggregate of all inputs. * * \par * - \smemreuse * * \par Snippet * The code snippet below illustrates four concurrent warp-wide exclusive prefix max scans within a block of * 128 threads (one per each of the 32-thread warps). * \par * \code * #include * * __global__ void ExampleKernel(...) * { * // Specialize WarpScan for type int * typedef cub::WarpScan WarpScan; * * // Allocate WarpScan shared memory for 4 warps * __shared__ typename WarpScan::TempStorage temp_storage[4]; * * // Obtain one input item per thread * int thread_data = ... * * // Compute exclusive warp-wide prefix max scans * int warp_aggregate; * int warp_id = threadIdx.x / 32; * WarpScan(temp_storage[warp_id]).ExclusiveScan(thread_data, thread_data, cub::Max(), warp_aggregate); * * \endcode * \par * Suppose the set of input \p thread_data across the block of threads is {0, -1, 2, -3, ..., 126, -127}. * The corresponding output \p thread_data in the first warp would be * ?, 0, 0, 2, ..., 28, 30, the output for the second warp would be ?, 32, 32, 34, ..., 60, 62, etc. * (The output \p thread_data in warp lane0 is undefined.) Furthermore, \p warp_aggregate would be assigned \p 30 for threads in the first warp, \p 62 for threads * in the second warp, etc. * * \tparam ScanOp [inferred] Binary scan operator type having member T operator()(const T &a, const T &b) */ template __device__ __forceinline__ void ExclusiveScan( T input, ///< [in] Calling thread's input item. T &exclusive_output, ///< [out] Calling thread's output item. May be aliased with \p input. ScanOp scan_op, ///< [in] Binary scan operator T &warp_aggregate) ///< [out] Warp-wide aggregate reduction of input items. { InternalWarpScan internal(temp_storage); T inclusive_output; internal.InclusiveScan(input, inclusive_output, scan_op); internal.Update( input, inclusive_output, exclusive_output, warp_aggregate, scan_op, Int2Type()); } /** * \brief Computes an exclusive prefix scan using the specified binary scan functor across the calling warp. Also provides every thread with the warp-wide \p warp_aggregate of all inputs. * * \par * - \smemreuse * * \par Snippet * The code snippet below illustrates four concurrent warp-wide exclusive prefix max scans within a block of * 128 threads (one per each of the 32-thread warps). * \par * \code * #include * * __global__ void ExampleKernel(...) * { * // Specialize WarpScan for type int * typedef cub::WarpScan WarpScan; * * // Allocate WarpScan shared memory for 4 warps * __shared__ typename WarpScan::TempStorage temp_storage[4]; * * // Obtain one input item per thread * int thread_data = ... * * // Compute exclusive warp-wide prefix max scans * int warp_aggregate; * int warp_id = threadIdx.x / 32; * WarpScan(temp_storage[warp_id]).ExclusiveScan(thread_data, thread_data, INT_MIN, cub::Max(), warp_aggregate); * * \endcode * \par * Suppose the set of input \p thread_data across the block of threads is {0, -1, 2, -3, ..., 126, -127}. * The corresponding output \p thread_data in the first warp would be * INT_MIN, 0, 0, 2, ..., 28, 30, the output for the second warp would be 30, 32, 32, 34, ..., 60, 62, etc. * Furthermore, \p warp_aggregate would be assigned \p 30 for threads in the first warp, \p 62 for threads * in the second warp, etc. * * \tparam ScanOp [inferred] Binary scan operator type having member T operator()(const T &a, const T &b) */ template __device__ __forceinline__ void ExclusiveScan( T input, ///< [in] Calling thread's input item. T &exclusive_output, ///< [out] Calling thread's output item. May be aliased with \p input. T initial_value, ///< [in] Initial value to seed the exclusive scan ScanOp scan_op, ///< [in] Binary scan operator T &warp_aggregate) ///< [out] Warp-wide aggregate reduction of input items. { InternalWarpScan internal(temp_storage); T inclusive_output; internal.InclusiveScan(input, inclusive_output, scan_op); internal.Update( input, inclusive_output, exclusive_output, warp_aggregate, scan_op, initial_value, Int2Type()); } //@} end member group /******************************************************************//** * \name Combination (inclusive & exclusive) prefix scans *********************************************************************/ //@{ /** * \brief Computes both inclusive and exclusive prefix scans using the specified binary scan functor across the calling warp. Because no initial value is supplied, the \p exclusive_output computed for warp-lane0 is undefined. * * \par * - \smemreuse * * \par Snippet * The code snippet below illustrates four concurrent warp-wide exclusive prefix max scans within a block of * 128 threads (one per each of the 32-thread warps). * \par * \code * #include * * __global__ void ExampleKernel(...) * { * // Specialize WarpScan for type int * typedef cub::WarpScan WarpScan; * * // Allocate WarpScan shared memory for 4 warps * __shared__ typename WarpScan::TempStorage temp_storage[4]; * * // Obtain one input item per thread * int thread_data = ... * * // Compute exclusive warp-wide prefix max scans * int inclusive_partial, exclusive_partial; * WarpScan(temp_storage[warp_id]).Scan(thread_data, inclusive_partial, exclusive_partial, cub::Max()); * * \endcode * \par * Suppose the set of input \p thread_data across the block of threads is {0, -1, 2, -3, ..., 126, -127}. * The corresponding output \p inclusive_partial in the first warp would be * 0, 0, 2, 2, ..., 30, 30, the output for the second warp would be 32, 32, 34, 34, ..., 62, 62, etc. * The corresponding output \p exclusive_partial in the first warp would be * ?, 0, 0, 2, ..., 28, 30, the output for the second warp would be ?, 32, 32, 34, ..., 60, 62, etc. * (The output \p thread_data in warp lane0 is undefined.) * * \tparam ScanOp [inferred] Binary scan operator type having member T operator()(const T &a, const T &b) */ template __device__ __forceinline__ void Scan( T input, ///< [in] Calling thread's input item. T &inclusive_output, ///< [out] Calling thread's inclusive-scan output item. T &exclusive_output, ///< [out] Calling thread's exclusive-scan output item. ScanOp scan_op) ///< [in] Binary scan operator { InternalWarpScan internal(temp_storage); internal.InclusiveScan(input, inclusive_output, scan_op); internal.Update( input, inclusive_output, exclusive_output, scan_op, Int2Type()); } /** * \brief Computes both inclusive and exclusive prefix scans using the specified binary scan functor across the calling warp. * * \par * - \smemreuse * * \par Snippet * The code snippet below illustrates four concurrent warp-wide prefix max scans within a block of * 128 threads (one per each of the 32-thread warps). * \par * \code * #include * * __global__ void ExampleKernel(...) * { * // Specialize WarpScan for type int * typedef cub::WarpScan WarpScan; * * // Allocate WarpScan shared memory for 4 warps * __shared__ typename WarpScan::TempStorage temp_storage[4]; * * // Obtain one input item per thread * int thread_data = ... * * // Compute inclusive warp-wide prefix max scans * int warp_id = threadIdx.x / 32; * int inclusive_partial, exclusive_partial; * WarpScan(temp_storage[warp_id]).Scan(thread_data, inclusive_partial, exclusive_partial, INT_MIN, cub::Max()); * * \endcode * \par * Suppose the set of input \p thread_data across the block of threads is {0, -1, 2, -3, ..., 126, -127}. * The corresponding output \p inclusive_partial in the first warp would be * 0, 0, 2, 2, ..., 30, 30, the output for the second warp would be 32, 32, 34, 34, ..., 62, 62, etc. * The corresponding output \p exclusive_partial in the first warp would be * INT_MIN, 0, 0, 2, ..., 28, 30, the output for the second warp would be 30, 32, 32, 34, ..., 60, 62, etc. * * \tparam ScanOp [inferred] Binary scan operator type having member T operator()(const T &a, const T &b) */ template __device__ __forceinline__ void Scan( T input, ///< [in] Calling thread's input item. T &inclusive_output, ///< [out] Calling thread's inclusive-scan output item. T &exclusive_output, ///< [out] Calling thread's exclusive-scan output item. T initial_value, ///< [in] Initial value to seed the exclusive scan ScanOp scan_op) ///< [in] Binary scan operator { InternalWarpScan internal(temp_storage); internal.InclusiveScan(input, inclusive_output, scan_op); internal.Update( input, inclusive_output, exclusive_output, scan_op, initial_value, Int2Type()); } //@} end member group /******************************************************************//** * \name Data exchange *********************************************************************/ //@{ /** * \brief Broadcast the value \p input from warp-lanesrc_lane to all lanes in the warp * * \par * - \smemreuse * * \par Snippet * The code snippet below illustrates the warp-wide broadcasts of values from * lanes0 in each of four warps to all other threads in those warps. * \par * \code * #include * * __global__ void ExampleKernel(...) * { * // Specialize WarpScan for type int * typedef cub::WarpScan WarpScan; * * // Allocate WarpScan shared memory for 4 warps * __shared__ typename WarpScan::TempStorage temp_storage[4]; * * // Obtain one input item per thread * int thread_data = ... * * // Broadcast from lane0 in each warp to all other threads in the warp * int warp_id = threadIdx.x / 32; * thread_data = WarpScan(temp_storage[warp_id]).Broadcast(thread_data, 0); * * \endcode * \par * Suppose the set of input \p thread_data across the block of threads is {0, 1, 2, 3, ..., 127}. * The corresponding output \p thread_data will be * {0, 0, ..., 0} in warp0, * {32, 32, ..., 32} in warp1, * {64, 64, ..., 64} in warp2, etc. */ __device__ __forceinline__ T Broadcast( T input, ///< [in] The value to broadcast unsigned int src_lane) ///< [in] Which warp lane is to do the broadcasting { return InternalWarpScan(temp_storage).Broadcast(input, src_lane); } //@} end member group }; /** @} */ // end group WarpModule } // CUB namespace CUB_NS_POSTFIX // Optional outer namespace(s) relion-3.1.3/src/gpu_utils/cuda_autopicker.cu000066400000000000000000001023111411340063500212760ustar00rootroot00000000000000#include #include #include #include #include #include #include #include #include #include "src/gpu_utils/cuda_autopicker.h" #include "src/gpu_utils/cuda_mem_utils.h" #include "src/gpu_utils/cuda_projector.h" #include "src/gpu_utils/cuda_settings.h" #include "src/gpu_utils/cuda_benchmark_utils.h" #include "src/gpu_utils/cuda_helper_functions.cuh" #include "src/gpu_utils/cuda_fft.h" #include "src/macros.h" #include "src/error.h" #ifdef CUDA_FORCESTL #include "src/gpu_utils/cuda_utils_stl.cuh" #else #include "src/gpu_utils/cuda_utils_cub.cuh" #endif AutoPickerCuda::AutoPickerCuda(AutoPicker *basePicker, int dev_id, const char * timing_fnm) : node(NULL), basePckr(basePicker), allocator(new CudaCustomAllocator(0, 1)), micTransformer(0, allocator), cudaTransformer1(0, allocator), #ifdef TIMING_FILES timer(timing_fnm), #endif cudaTransformer2(0, allocator) { cudaProjectors.resize(basePckr->Mrefs.size()); have_warned_batching=false; /*====================================================== DEVICE SETTINGS ======================================================*/ device_id = dev_id; int devCount; HANDLE_ERROR(cudaGetDeviceCount(&devCount)); if(dev_id >= devCount) { //std::cerr << " using device_id=" << dev_id << " (device no. " << dev_id+1 << ") which is higher than the available number of devices=" << devCount << std::endl; CRITICAL(ERR_GPUID); } else HANDLE_ERROR(cudaSetDevice(dev_id)); }; AutoPickerCuda::AutoPickerCuda(AutoPickerMpi *basePicker, int dev_id, const char * timing_fnm) : basePckr(basePicker), allocator(new CudaCustomAllocator(0, 1)), micTransformer(0, allocator), cudaTransformer1(0, allocator), #ifdef TIMING_FILES timer(timing_fnm), #endif cudaTransformer2(0, allocator) { node = basePicker->getNode(); basePicker->verb = (node->isMaster()) ? 1 : 0; cudaProjectors.resize(basePckr->Mrefs.size()); have_warned_batching=false; /*====================================================== DEVICE SETTINGS ======================================================*/ device_id = dev_id; int devCount; HANDLE_ERROR(cudaGetDeviceCount(&devCount)); if(dev_id >= devCount) { //std::cerr << " using device_id=" << dev_id << " (device no. " << dev_id+1 << ") which is higher than the available number of devices=" << devCount << std::endl; CRITICAL(ERR_GPUID); } else HANDLE_ERROR(cudaSetDevice(dev_id)); }; void AutoPickerCuda::run() { long int my_first_micrograph, my_last_micrograph, my_nr_micrographs; if(node!=NULL) { // Each node does part of the work divide_equally(basePckr->fn_micrographs.size(), node->size, node->rank, my_first_micrograph, my_last_micrograph); } else { my_first_micrograph = 0; my_last_micrograph = basePckr->fn_micrographs.size() - 1; } my_nr_micrographs = my_last_micrograph - my_first_micrograph + 1; int barstep; if (basePckr->verb > 0) { std::cout << " Autopicking ..." << std::endl; init_progress_bar(my_nr_micrographs); barstep = XMIPP_MAX(1, my_nr_micrographs / 60); } if (!basePckr->do_read_fom_maps) { CTIC(timer,"setupProjectors"); for (int iref = 0; iref < (basePckr->Mrefs.size()); iref++) { cudaProjectors[iref].setMdlDim( basePckr->PPref[iref].data.xdim, basePckr->PPref[iref].data.ydim, basePckr->PPref[iref].data.zdim, basePckr->PPref[iref].data.yinit, basePckr->PPref[iref].data.zinit, basePckr->PPref[iref].r_max, basePckr->PPref[iref].padding_factor); cudaProjectors[iref].initMdl(&(basePckr->PPref[iref].data.data[0])); } CTOC(timer,"setupProjectors"); } FileName fn_olddir=""; for (long int imic = my_first_micrograph; imic <= my_last_micrograph; imic++) { if (basePckr->verb > 0 && imic % barstep == 0) progress_bar(imic); // Check new-style outputdirectory exists and make it if not! FileName fn_dir = basePckr->getOutputRootName(basePckr->fn_micrographs[imic]); fn_dir = fn_dir.beforeLastOf("/"); if (fn_dir != fn_olddir) { // Make a Particles directory int res = system(("mkdir -p " + fn_dir).c_str()); fn_olddir = fn_dir; } #ifdef TIMING basePckr->timer.tic(basePckr->TIMING_A5); #endif autoPickOneMicrograph(basePckr->fn_micrographs[imic], imic); } #ifdef TIMING basePckr->timer.toc(basePckr->TIMING_A5); #endif if (basePckr->verb > 0) progress_bar(my_nr_micrographs); cudaDeviceReset(); } void AutoPickerCuda::calculateStddevAndMeanUnderMask(CudaGlobalPtr< CUDACOMPLEX > &d_Fmic, CudaGlobalPtr< CUDACOMPLEX > &d_Fmic2, CudaGlobalPtr< CUDACOMPLEX > &d_Fmsk, int nr_nonzero_pixels_mask, CudaGlobalPtr< XFLOAT > &d_Mstddev, CudaGlobalPtr< XFLOAT > &d_Mmean, size_t x, size_t y, size_t mic_size, size_t workSize) { cudaTransformer2.setSize(workSize,workSize,1); deviceInitValue(d_Mstddev, (XFLOAT)0.); RFLOAT normfft = (RFLOAT)(mic_size * mic_size) / (RFLOAT)nr_nonzero_pixels_mask; CudaGlobalPtr< CUDACOMPLEX > d_Fcov(d_Fmic.getAllocator()); d_Fcov.device_alloc(d_Fmic.getSize()); CTIC(timer,"PRE-multi_0"); int Bsize( (int) ceilf(( float)d_Fmic.size/(float)BLOCK_SIZE)); cuda_kernel_convol_B<<>>( ~d_Fmic, ~d_Fmsk, ~d_Fcov, d_Fmic.getSize()); LAUNCH_HANDLE_ERROR(cudaGetLastError()); CTOC(timer,"PRE-multi_0"); CTIC(timer,"PRE-window_0"); windowFourierTransform2( d_Fcov, cudaTransformer2.fouriers, x, y, 1, workSize/2+1, workSize, 1); CTOC(timer,"PRE-window_0"); CTIC(timer,"PRE-Transform_0"); cudaTransformer2.backward(); CTOC(timer,"PRE-Transform_0"); Bsize = ( (int) ceilf(( float)cudaTransformer2.reals.size/(float)BLOCK_SIZE)); cuda_kernel_multi<<>>( cudaTransformer2.reals.d_ptr, cudaTransformer2.reals.d_ptr, (XFLOAT) normfft, cudaTransformer2.reals.size); LAUNCH_HANDLE_ERROR(cudaGetLastError()); CTIC(timer,"PRE-multi_1"); cuda_kernel_multi<<>>( cudaTransformer2.reals.d_ptr, cudaTransformer2.reals.d_ptr, d_Mstddev.d_ptr, (XFLOAT) -1, cudaTransformer2.reals.size); LAUNCH_HANDLE_ERROR(cudaGetLastError()); CTOC(timer,"PRE-multi_1"); CTIC(timer,"PRE-CenterFFT_0"); runCenterFFT(cudaTransformer2.reals, (int)cudaTransformer2.xSize, (int)cudaTransformer2.ySize, false, 1); CTOC(timer,"PRE-CenterFFT_0"); cudaTransformer2.reals.cp_on_device(d_Mmean); //TODO remove the need for this CTIC(timer,"PRE-multi_2"); Bsize = ( (int) ceilf(( float)d_Fmsk.size/(float)BLOCK_SIZE)); cuda_kernel_convol_A<<>>( ~d_Fmsk, ~d_Fmic2, ~d_Fcov, d_Fmsk.size); LAUNCH_HANDLE_ERROR(cudaGetLastError()); CTOC(timer,"PRE-multi_2"); CTIC(timer,"PRE-window_1"); windowFourierTransform2( d_Fcov, cudaTransformer2.fouriers, x, y, 1, workSize/2+1, workSize, 1); CTOC(timer,"PRE-window_1"); CTIC(timer,"PRE-Transform_1"); cudaTransformer2.backward(); CTOC(timer,"PRE-Transform_1"); CTIC(timer,"PRE-multi_3"); Bsize = ( (int) ceilf(( float)d_Mstddev.size/(float)BLOCK_SIZE)); cuda_kernel_finalizeMstddev<<>>( d_Mstddev.d_ptr, cudaTransformer2.reals.d_ptr, normfft, d_Mstddev.size); LAUNCH_HANDLE_ERROR(cudaGetLastError()); CTOC(timer,"PRE-multi_3"); CTIC(timer,"PRE-CenterFFT_1"); runCenterFFT(d_Mstddev, (int)workSize, (int)workSize, false, 1); CTOC(timer,"PRE-CenterFFT_1"); } void AutoPickerCuda::autoPickOneMicrograph(FileName &fn_mic, long int imic) { Image Imic; MultidimArray Faux, Faux2, Fmic; MultidimArray Maux, Mstddev, Mccf_best, Mpsi_best, Fctf, Mccf_best_combined; MultidimArray Mclass_best_combined; CudaGlobalPtr d_Mccf_best(basePckr->workSize*basePckr->workSize, allocator); CudaGlobalPtr d_Mpsi_best(basePckr->workSize*basePckr->workSize, allocator); d_Mccf_best.device_alloc(); d_Mpsi_best.device_alloc(); // Always use the same random seed init_random_generator(basePckr->random_seed + imic); RFLOAT sum_ref_under_circ_mask, sum_ref2_under_circ_mask; int my_skip_side = basePckr->autopick_skip_side + basePckr->particle_size/2; CTF ctf; int Npsi = 360 / basePckr->psi_sampling; int min_distance_pix = ROUND(basePckr->min_particle_distance / basePckr->angpix); XFLOAT scale = (XFLOAT)basePckr->workSize / (XFLOAT)basePckr->micrograph_size; // Read in the micrograph #ifdef TIMING basePckr->timer.tic(basePckr->TIMING_A6); #endif CTIC(timer,"readMicrograph"); Imic.read(fn_mic); CTOC(timer,"readMicrograph"); CTIC(timer,"setXmippOrigin_0"); Imic().setXmippOrigin(); CTOC(timer,"setXmippOrigin_0"); #ifdef TIMING basePckr->timer.toc(basePckr->TIMING_A6); #endif // Let's just check the square size again.... RFLOAT my_size, my_xsize, my_ysize; my_xsize = XSIZE(Imic()); my_ysize = YSIZE(Imic()); my_size = (my_xsize != my_ysize) ? XMIPP_MAX(my_xsize, my_ysize) : my_xsize; if (my_size != basePckr->micrograph_size || my_xsize != basePckr->micrograph_xsize || my_ysize != basePckr->micrograph_ysize) { Imic().printShape(); std::cerr << " micrograph_size= " << basePckr->micrograph_size << " micrograph_xsize= " << basePckr->micrograph_xsize << " micrograph_ysize= " << basePckr->micrograph_ysize << std::endl; REPORT_ERROR("AutoPicker::autoPickOneMicrograph ERROR: No differently sized micrographs are allowed in one run, sorry you will have to run separately for each size..."); } if(!basePckr->do_read_fom_maps) { CTIC(timer,"setSize_micTr"); micTransformer.setSize(basePckr->micrograph_size, basePckr->micrograph_size, 1,1); CTOC(timer,"setSize_micTr"); CTIC(timer,"setSize_cudaTr"); cudaTransformer1.setSize(basePckr->workSize,basePckr->workSize, 1, Npsi, FFTW_BACKWARD); CTOC(timer,"setSize_cudaTr"); } HANDLE_ERROR(cudaDeviceSynchronize()); if(cudaTransformer1.batchSize.size()>1 && !have_warned_batching) { have_warned_batching = true; std::cerr << std::endl << "*-----------------------------WARNING------------------------------------------------*"<< std::endl; std::cerr << "With the current settings the GPU memory is imposing a soft limit on your performace," << std::endl; std::cerr << "since one or more micrographs has to use (at least " << cudaTransformer1.batchSize.size() << ") batches of orientations to "<< std::endl; std::cerr << "achieve the total requested " << Npsi << " orientations. Consider using" << std::endl; std::cerr << "\t higher --ang" << std::endl; std::cerr << "\t harder --shrink" << std::endl; std::cerr << "\t higher --lowpass with --shrink 0" << std::endl; std::cerr << "*------------------------------------------------------------------------------------*"<< std::endl; } // Set mean to zero and stddev to 1 to prevent numerical problems with one-sweep stddev calculations.... RFLOAT avg0, stddev0, minval0, maxval0; #ifdef TIMING basePckr->timer.tic(basePckr->TIMING_A7); #endif CTIC(timer,"computeStats"); Imic().computeStats(avg0, stddev0, minval0, maxval0); CTOC(timer,"computeStats"); #ifdef TIMING basePckr->timer.toc(basePckr->TIMING_A7); #endif CTIC(timer,"middlePassFilter"); FOR_ALL_DIRECT_ELEMENTS_IN_MULTIDIMARRAY(Imic()) { // Remove pixel values that are too far away from the mean if ( ABS(DIRECT_MULTIDIM_ELEM(Imic(), n) - avg0) / stddev0 > basePckr->outlier_removal_zscore) DIRECT_MULTIDIM_ELEM(Imic(), n) = avg0; DIRECT_MULTIDIM_ELEM(Imic(), n) = (DIRECT_MULTIDIM_ELEM(Imic(), n) - avg0) / stddev0; } CTOC(timer,"middlePassFilter"); if (basePckr->micrograph_xsize !=basePckr->micrograph_ysize) { CTIC(timer,"rewindow"); // Window non-square micrographs to be a square with the largest side rewindow(Imic, basePckr->micrograph_size); CTOC(timer,"rewindow"); CTIC(timer,"gaussNoiseOutside"); // Fill region outside the original window with white Gaussian noise to prevent all-zeros in Mstddev FOR_ALL_ELEMENTS_IN_ARRAY2D(Imic()) { if (i < FIRST_XMIPP_INDEX(basePckr->micrograph_ysize) || i > LAST_XMIPP_INDEX(basePckr->micrograph_ysize) || j < FIRST_XMIPP_INDEX(basePckr->micrograph_xsize) || j > LAST_XMIPP_INDEX(basePckr->micrograph_xsize) ) A2D_ELEM(Imic(), i, j) = rnd_gaus(0.,1.); } CTOC(timer,"gaussNoiseOutside"); } #ifdef TIMING basePckr->timer.tic(basePckr->TIMING_A8); #endif CTIC(timer,"CTFread"); // Read in the CTF information if needed if (basePckr->do_ctf) { // Search for this micrograph in the metadata table FOR_ALL_OBJECTS_IN_METADATA_TABLE(basePckr->MDmic) { FileName fn_tmp; basePckr->MDmic.getValue(EMDL_MICROGRAPH_NAME, fn_tmp); if (fn_tmp==fn_mic) { ctf.read(basePckr->MDmic, basePckr->MDmic); Fctf.resize(basePckr->workSize,basePckr->workSize/2+1); ctf.getFftwImage(Fctf, basePckr->micrograph_size, basePckr->micrograph_size, basePckr->angpix, false, false, basePckr->intact_ctf_first_peak, true); break; } } } CTOC(timer,"CTFread"); #ifdef TIMING basePckr->timer.toc(basePckr->TIMING_A8); #endif #ifdef TIMING basePckr->timer.tic(basePckr->TIMING_A9); #endif CTIC(timer,"mccfResize"); Mccf_best.resize(basePckr->workSize,basePckr->workSize); CTOC(timer,"mccfResize"); CTIC(timer,"mpsiResize"); Mpsi_best.resize(basePckr->workSize,basePckr->workSize); CTOC(timer,"mpsiResize"); #ifdef TIMING basePckr->timer.toc(basePckr->TIMING_A9); #endif CudaGlobalPtr< CUDACOMPLEX > d_Fmic(allocator); CudaGlobalPtr d_Mmean(allocator); CudaGlobalPtr d_Mstddev(allocator); #ifdef TIMING basePckr->timer.tic(basePckr->TIMING_B1); #endif RFLOAT normfft = (RFLOAT)(basePckr->micrograph_size*basePckr->micrograph_size) / (RFLOAT)basePckr->nr_pixels_circular_mask;; if (basePckr->do_read_fom_maps) { CTIC(timer,"readFromFomMaps_0"); FileName fn_tmp=basePckr->getOutputRootName(fn_mic)+"_"+basePckr->fn_out+"_stddevNoise.spi"; Image It; It.read(fn_tmp); Mstddev = It(); CTOC(timer,"readFromFomMaps_0"); } else { /* * Squared difference FOM: * Sum ( (X-mu)/sig - A )^2 = * = Sum((X-mu)/sig)^2 - 2 Sum (A*(X-mu)/sig) + Sum(A)^2 * = (1/sig^2)*Sum(X^2) - (2*mu/sig^2)*Sum(X) + (mu^2/sig^2)*Sum(1) - (2/sig)*Sum(AX) + (2*mu/sig)*Sum(A) + Sum(A^2) * * However, the squared difference with an "empty" ie all-zero reference is: * Sum ( (X-mu)/sig)^2 * * The ratio of the probabilities thereby becomes: * P(ref) = 1/sqrt(2pi) * exp (( (X-mu)/sig - A )^2 / -2 ) // assuming sigma = 1! * P(zero) = 1/sqrt(2pi) * exp (( (X-mu)/sig )^2 / -2 ) * * P(ref)/P(zero) = exp(( (X-mu)/sig - A )^2 / -2) / exp ( ( (X-mu)/sig )^2 / -2) * = exp( (- (2/sig)*Sum(AX) + (2*mu/sig)*Sum(A) + Sum(A^2)) / - 2 ) * * Therefore, I do not need to calculate (X-mu)/sig beforehand!!! * */ CTIC(timer,"Imic_insert"); for(int i = 0; i< Imic().nzyxdim ; i++) micTransformer.reals[i] = (XFLOAT) Imic().data[i]; micTransformer.reals.cp_to_device(); CTOC(timer,"Imic_insert"); CTIC(timer,"runCenterFFT_0"); runCenterFFT(micTransformer.reals, micTransformer.xSize, micTransformer.ySize, true, 1); CTOC(timer,"runCenterFFT_0"); CTIC(timer,"FourierTransform_0"); micTransformer.forward(); int FMultiBsize = ( (int) ceilf(( float)micTransformer.fouriers.getSize()*2/(float)BLOCK_SIZE)); cuda_kernel_multi<<>>( (XFLOAT*)~micTransformer.fouriers, (XFLOAT)1/((XFLOAT)(micTransformer.reals.getSize())), micTransformer.fouriers.getSize()*2); LAUNCH_HANDLE_ERROR(cudaGetLastError()); CTOC(timer,"FourierTransform_0"); if (basePckr->highpass > 0.) { CTIC(timer,"highpass"); micTransformer.fouriers.streamSync(); lowPassFilterMapGPU( micTransformer.fouriers, (size_t)1, micTransformer.yFSize, micTransformer.xFSize, XSIZE(Imic()), basePckr->lowpass, basePckr->highpass, basePckr->angpix, 2, true); //false = lowpass, true=highpass micTransformer.fouriers.streamSync(); micTransformer.backward(); micTransformer.reals.streamSync(); CTOC(timer,"highpass"); } CTIC(timer,"F_cp"); CudaGlobalPtr< CUDACOMPLEX > Ftmp(allocator); Ftmp.setSize(micTransformer.fouriers.getSize()); Ftmp.device_alloc(); micTransformer.fouriers.cp_on_device(Ftmp); CTOC(timer,"F_cp"); // Also calculate the FFT of the squared micrograph CTIC(timer,"SquareImic"); cuda_kernel_square<<>>( ~micTransformer.reals, micTransformer.reals.getSize()); LAUNCH_HANDLE_ERROR(cudaGetLastError()); CTOC(timer,"SquareImic"); CTIC(timer,"FourierTransform_1"); micTransformer.forward(); cuda_kernel_multi<<>>( (XFLOAT*)~micTransformer.fouriers, (XFLOAT)1/((XFLOAT)(micTransformer.reals.getSize())), micTransformer.fouriers.getSize()*2); LAUNCH_HANDLE_ERROR(cudaGetLastError()); CTOC(timer,"FourierTransform_1"); // The following calculate mu and sig under the solvent area at every position in the micrograph CTIC(timer,"calculateStddevAndMeanUnderMask"); d_Mstddev.device_alloc(basePckr->workSize*basePckr->workSize); d_Mmean.device_alloc(basePckr->workSize*basePckr->workSize); //TODO Do this only once further up in scope CudaGlobalPtr< CUDACOMPLEX > d_Fmsk(basePckr->Finvmsk.nzyxdim, allocator); for(int i = 0; i< d_Fmsk.size ; i++) { d_Fmsk[i].x = basePckr->Finvmsk.data[i].real; d_Fmsk[i].y = basePckr->Finvmsk.data[i].imag; } d_Fmsk.put_on_device(); d_Fmsk.streamSync(); calculateStddevAndMeanUnderMask(Ftmp, micTransformer.fouriers, d_Fmsk, basePckr->nr_pixels_circular_invmask, d_Mstddev, d_Mmean, micTransformer.xFSize, micTransformer.yFSize, basePckr->micrograph_size, basePckr->workSize); //TODO remove this d_Mstddev.host_alloc(); d_Mstddev.cp_to_host(); d_Mstddev.streamSync(); Mstddev.resizeNoCp(1, basePckr->workSize, basePckr->workSize); //TODO put this in a kernel for(int i = 0; i < d_Mstddev.size ; i ++) { Mstddev.data[i] = d_Mstddev[i]; if (d_Mstddev[i] > (XFLOAT)1E-10) d_Mstddev[i] = 1 / d_Mstddev[i]; else d_Mstddev[i] = 1; } d_Mstddev.cp_to_device(); d_Mstddev.streamSync(); CTOC(timer,"calculateStddevAndMeanUnderMask"); // From now on use downsized Fmic, as the cross-correlation with the references can be done at lower resolution CTIC(timer,"windowFourierTransform_0"); d_Fmic.setSize((basePckr->workSize/2+1)*(basePckr->workSize)); d_Fmic.device_alloc(); windowFourierTransform2( Ftmp, d_Fmic, basePckr->micrograph_size/2+1, basePckr->micrograph_size, 1, //Input dimensions basePckr->workSize/2+1, basePckr->workSize, 1 //Output dimensions ); CTOC(timer,"windowFourierTransform_0"); if (basePckr->do_write_fom_maps) { CTIC(timer,"writeToFomMaps"); // TMP output FileName fn_tmp=basePckr->getOutputRootName(fn_mic)+"_"+basePckr->fn_out+"_stddevNoise.spi"; Image It; It() = Mstddev; It.write(fn_tmp); CTOC(timer,"writeToFomMaps"); } }// end if do_read_fom_maps // Now start looking for the peaks of all references // Clear the output vector with all peaks CTIC(timer,"initPeaks"); std::vector peaks; peaks.clear(); CTOC(timer,"initPeaks"); #ifdef TIMING basePckr->timer.toc(basePckr->TIMING_B1); #endif if (basePckr->autopick_helical_segments) { if (basePckr->do_read_fom_maps) { FileName fn_tmp; Image It_float; Image It_int; fn_tmp = basePckr->getOutputRootName(fn_mic)+"_"+basePckr->fn_out+"_combinedCCF.spi"; It_float.read(fn_tmp); Mccf_best_combined = It_float(); fn_tmp = basePckr->getOutputRootName(fn_mic)+"_"+basePckr->fn_out+"_combinedCLASS.spi"; It_int.read(fn_tmp); Mclass_best_combined = It_int(); } else { Mccf_best_combined.clear(); Mccf_best_combined.resize(basePckr->workSize, basePckr->workSize); Mccf_best_combined.initConstant(-99.e99); Mclass_best_combined.clear(); Mclass_best_combined.resize(basePckr->workSize, basePckr->workSize); Mclass_best_combined.initConstant(-1); } } CudaGlobalPtr< XFLOAT > d_ctf(Fctf.nzyxdim, allocator); if(basePckr->do_ctf) { for(int i = 0; i< d_ctf.size ; i++) d_ctf[i]=Fctf.data[i]; d_ctf.put_on_device(); } for (int iref = 0; iref < basePckr->Mrefs.size(); iref++) { CTIC(timer,"OneReference"); RFLOAT expected_Pratio; // the expectedFOM for this (ctf-corrected) reference if (basePckr->do_read_fom_maps) { #ifdef TIMING basePckr->timer.tic(basePckr->TIMING_B2); #endif if (!basePckr->autopick_helical_segments) { CTIC(timer,"readFromFomMaps"); FileName fn_tmp; Image It; fn_tmp.compose(basePckr->getOutputRootName(fn_mic)+"_"+basePckr->fn_out+"_ref", iref,"_bestCCF.spi"); It.read(fn_tmp); Mccf_best = It(); It.MDMainHeader.getValue(EMDL_IMAGE_STATS_MAX, expected_Pratio); // Retrieve expected_Pratio from the header of the image fn_tmp.compose(basePckr->getOutputRootName(fn_mic)+"_"+basePckr->fn_out+"_ref", iref,"_bestPSI.spi"); It.read(fn_tmp); Mpsi_best = It(); CTOC(timer,"readFromFomMaps"); } #ifdef TIMING basePckr->timer.toc(basePckr->TIMING_B2); #endif } //end else if do_read_fom_maps else { #ifdef TIMING basePckr->timer.tic(basePckr->TIMING_B3); #endif CTIC(timer,"mccfInit"); deviceInitValue(d_Mccf_best, (XFLOAT)-LARGE_NUMBER); CTOC(timer,"mccfInit"); CudaProjectorKernel projKernel = CudaProjectorKernel::makeKernel( cudaProjectors[iref], (int)basePckr->workSize/2+1, (int)basePckr->workSize, 1, // Zdim, always 1 in autopicker. (int)basePckr->workSize/2+1 -1 ); int FauxStride = (basePckr->workSize/2+1)*basePckr->workSize; #ifdef TIMING basePckr->timer.tic(basePckr->TIMING_B4); #endif CTIC(timer,"SingleProjection"); dim3 blocks((int)ceilf((float)FauxStride/(float)BLOCK_SIZE),1); if(basePckr->do_ctf) { cuda_kernel_rotateAndCtf<<>>( ~cudaTransformer1.fouriers, ~d_ctf, 0, projKernel, 0 ); } else { cuda_kernel_rotateOnly<<>>( ~cudaTransformer1.fouriers, 0, projKernel, 0 ); } LAUNCH_HANDLE_ERROR(cudaGetLastError()); CTOC(timer,"SingleProjection"); #ifdef TIMING basePckr->timer.toc(basePckr->TIMING_B4); #endif /* * FIRST PSI WAS USED FOR PREP CALCS - THIS IS NOW A DEDICATED SECTION * ------------------------------------------------------------------- */ CTIC(timer,"PREP_CALCS"); #ifdef TIMING basePckr->timer.tic(basePckr->TIMING_B5); #endif // Sjors 20April2016: The calculation for sum_ref_under_circ_mask, etc below needs to be done on original micrograph_size! CTIC(timer,"windowFourierTransform_FP"); windowFourierTransform2(cudaTransformer1.fouriers, micTransformer.fouriers, basePckr->workSize/2+1, basePckr->workSize, 1, //Input dimensions basePckr->micrograph_size/2+1, basePckr->micrograph_size, 1 //Output dimensions ); CTOC(timer,"windowFourierTransform_FP"); CTIC(timer,"inverseFourierTransform_FP"); micTransformer.backward(); CTOC(timer,"inverseFourierTransform_FP"); CTIC(timer,"runCenterFFT_FP"); runCenterFFT(micTransformer.reals, (int)micTransformer.xSize, (int)micTransformer.ySize, false, 1); CTOC(timer,"runCenterFFT_FP"); micTransformer.reals.cp_to_host(); Maux.resizeNoCp(1,basePckr->micrograph_size, basePckr->micrograph_size); micTransformer.reals.streamSync(); for (int i = 0; i < micTransformer.reals.size ; i ++) Maux.data[i] = micTransformer.reals[i]; CTIC(timer,"setXmippOrigin_FP_0"); Maux.setXmippOrigin(); CTOC(timer,"setXmippOrigin_FP_0"); // TODO: check whether I need CenterFFT(Maux, false) // Sjors 20apr2016: checked, somehow not needed. sum_ref_under_circ_mask = 0.; sum_ref2_under_circ_mask = 0.; RFLOAT suma2 = 0.; RFLOAT sumn = 1.; MultidimArray Mctfref(basePckr->particle_size, basePckr->particle_size); CTIC(timer,"setXmippOrigin_FP_1"); Mctfref.setXmippOrigin(); CTOC(timer,"setXmippOrigin_FP_1"); CTIC(timer,"suma_FP"); FOR_ALL_ELEMENTS_IN_ARRAY2D(Mctfref) // only loop over smaller Mctfref, but take values from large Maux! { if (i*i + j*j < basePckr->particle_radius2) { suma2 += A2D_ELEM(Maux, i, j) * A2D_ELEM(Maux, i, j); suma2 += 2. * A2D_ELEM(Maux, i, j) * rnd_gaus(0., 1.); sum_ref_under_circ_mask += A2D_ELEM(Maux, i, j); sum_ref2_under_circ_mask += A2D_ELEM(Maux, i, j) * A2D_ELEM(Maux, i, j); sumn += 1.; } } sum_ref_under_circ_mask /= sumn; sum_ref2_under_circ_mask /= sumn; expected_Pratio = exp(suma2 / (2. * sumn)); CTOC(timer,"suma_FP"); CTOC(timer,"PREP_CALCS"); // for all batches CTIC(timer,"AllPsi"); int startPsi(0); for (int psiIter = 0; psiIter < cudaTransformer1.batchIters; psiIter++) // psi-batches for possible memory-limits { CTIC(timer,"Projection"); dim3 blocks((int)ceilf((float)FauxStride/(float)BLOCK_SIZE),cudaTransformer1.batchSize[psiIter]); if(basePckr->do_ctf) { cuda_kernel_rotateAndCtf<<>>( ~cudaTransformer1.fouriers, ~d_ctf, DEG2RAD(basePckr->psi_sampling), projKernel, startPsi ); } else { cuda_kernel_rotateOnly<<>>( ~cudaTransformer1.fouriers, DEG2RAD(basePckr->psi_sampling), projKernel, startPsi ); } LAUNCH_HANDLE_ERROR(cudaGetLastError()); CTOC(timer,"Projection"); // Now multiply template and micrograph to calculate the cross-correlation CTIC(timer,"convol"); dim3 blocks2( (int) ceilf(( float)FauxStride/(float)BLOCK_SIZE),cudaTransformer1.batchSize[psiIter]); cuda_kernel_batch_convol_A<<>>( cudaTransformer1.fouriers.d_ptr, d_Fmic.d_ptr, FauxStride); LAUNCH_HANDLE_ERROR(cudaGetLastError()); CTOC(timer,"convol"); CTIC(timer,"CudaInverseFourierTransform_1"); cudaTransformer1.backward(); HANDLE_ERROR(cudaDeviceSynchronize()); CTOC(timer,"CudaInverseFourierTransform_1"); CTIC(timer,"runCenterFFT_1"); runCenterFFT(cudaTransformer1.reals, (int)cudaTransformer1.xSize, (int)cudaTransformer1.ySize, false, cudaTransformer1.batchSize[psiIter]); CTOC(timer,"runCenterFFT_1"); // Calculate ratio of prabilities P(ref)/P(zero) // Keep track of the best values and their corresponding iref and psi // ------------------------------------------------------------------ // So now we already had precalculated: Mdiff2 = 1/sig*Sum(X^2) - 2/sig*Sum(X) + mu^2/sig*Sum(1) // Still to do (per reference): - 2/sig*Sum(AX) + 2*mu/sig*Sum(A) + Sum(A^2) CTIC(timer,"probRatio"); HANDLE_ERROR(cudaDeviceSynchronize()); dim3 PR_blocks(ceilf((float)(cudaTransformer1.reals.size/cudaTransformer1.batchSize[psiIter])/(float)PROBRATIO_BLOCK_SIZE)); cuda_kernel_probRatio<<>>( d_Mccf_best.d_ptr, d_Mpsi_best.d_ptr, cudaTransformer1.reals.d_ptr, d_Mmean.d_ptr, d_Mstddev.d_ptr, cudaTransformer1.reals.size/cudaTransformer1.batchSize[0], (XFLOAT) -2*normfft, (XFLOAT) 2*sum_ref_under_circ_mask, (XFLOAT) sum_ref2_under_circ_mask, (XFLOAT) expected_Pratio, cudaTransformer1.batchSize[psiIter], startPsi, Npsi ); LAUNCH_HANDLE_ERROR(cudaGetLastError()); startPsi += cudaTransformer1.batchSize[psiIter]; CTOC(timer,"probRatio"); } // end for psi-batches CTOC(timer,"AllPsi"); #ifdef TIMING basePckr->timer.toc(basePckr->TIMING_B6); #endif #ifdef TIMING basePckr->timer.tic(basePckr->TIMING_B7); #endif CTIC(timer,"output"); d_Mccf_best.cp_to_host(); d_Mpsi_best.cp_to_host(); d_Mccf_best.streamSync(); for (int i = 0; i < Mccf_best.nzyxdim; i ++) { Mccf_best.data[i] = d_Mccf_best[i]; Mpsi_best.data[i] = d_Mpsi_best[i]; } CTOC(timer,"output"); if (basePckr->do_write_fom_maps && !basePckr->autopick_helical_segments) { CTIC(timer,"writeFomMaps"); // TMP output FileName fn_tmp; Image It; It() = Mccf_best; // Store expected_Pratio in the header of the image.. It.MDMainHeader.setValue(EMDL_IMAGE_STATS_MAX, expected_Pratio); // Store expected_Pratio in the header of the image fn_tmp.compose(basePckr->getOutputRootName(fn_mic)+"_"+basePckr->fn_out+"_ref", iref,"_bestCCF.spi"); It.write(fn_tmp); It() = Mpsi_best; fn_tmp.compose(basePckr->getOutputRootName(fn_mic)+"_"+basePckr->fn_out+"_ref", iref,"_bestPSI.spi"); It.write(fn_tmp); CTOC(timer,"writeFomMaps"); } // end if do_write_fom_maps #ifdef TIMING basePckr->timer.toc(basePckr->TIMING_B7); #endif #ifdef TIMING basePckr->timer.toc(basePckr->TIMING_B3); #endif } // end if do_read_fom_maps //TODO FIX HELICAL SEGMENTS SUPPORT if (basePckr->autopick_helical_segments) { if (!basePckr->do_read_fom_maps) { // Combine Mccf_best and Mpsi_best from all refs FOR_ALL_DIRECT_ELEMENTS_IN_MULTIDIMARRAY(Mccf_best) { RFLOAT new_ccf = DIRECT_MULTIDIM_ELEM(Mccf_best, n); RFLOAT old_ccf = DIRECT_MULTIDIM_ELEM(Mccf_best_combined, n); if (new_ccf > old_ccf) { DIRECT_MULTIDIM_ELEM(Mccf_best_combined, n) = new_ccf; DIRECT_MULTIDIM_ELEM(Mclass_best_combined, n) = iref; } } } } else { #ifdef TIMING basePckr->timer.tic(basePckr->TIMING_B8); #endif // Now that we have Mccf_best and Mpsi_best, get the peaks std::vector my_ref_peaks; CTIC(timer,"setXmippOriginX3"); Mstddev.setXmippOrigin(); Mccf_best.setXmippOrigin(); Mpsi_best.setXmippOrigin(); CTOC(timer,"setXmippOriginX3"); CTIC(timer,"peakSearch"); basePckr->peakSearch(Mccf_best, Mpsi_best, Mstddev, iref, my_skip_side, my_ref_peaks, scale); CTOC(timer,"peakSearch"); CTIC(timer,"peakPrune"); basePckr->prunePeakClusters(my_ref_peaks, min_distance_pix, scale); CTOC(timer,"peakPrune"); CTIC(timer,"peakInsert"); // append the peaks of this reference to all the other peaks peaks.insert(peaks.end(), my_ref_peaks.begin(), my_ref_peaks.end()); CTOC(timer,"peakInsert"); CTOC(timer,"OneReference"); #ifdef TIMING basePckr->timer.toc(basePckr->TIMING_B8); #endif } } // end for iref if (basePckr->autopick_helical_segments) { RFLOAT thres = basePckr->min_fraction_expected_Pratio; int peak_r_min = 1; std::vector ccf_peak_list; std::vector > tube_coord_list, tube_track_list; std::vector tube_len_list; MultidimArray Mccfplot; Mccf_best_combined.setXmippOrigin(); Mclass_best_combined.setXmippOrigin(); basePckr->pickCCFPeaks(Mccf_best_combined, Mclass_best_combined, thres, peak_r_min, (basePckr->particle_diameter / basePckr->angpix), ccf_peak_list, Mccfplot, my_skip_side, scale); basePckr->extractHelicalTubes(ccf_peak_list, tube_coord_list, tube_len_list, tube_track_list, (basePckr->particle_diameter / basePckr->angpix), basePckr->helical_tube_curvature_factor_max, (basePckr->min_particle_distance / basePckr->angpix), (basePckr->helical_tube_diameter / basePckr->angpix), scale); basePckr->exportHelicalTubes(Mccf_best_combined, Mccfplot, Mclass_best_combined, tube_coord_list, tube_track_list, tube_len_list, fn_mic, basePckr->fn_out, (basePckr->particle_diameter / basePckr->angpix), (basePckr->helical_tube_length_min / basePckr->angpix), my_skip_side, scale); if (basePckr->do_write_fom_maps) { FileName fn_tmp; Image It_float; Image It_int; It_float() = Mccf_best_combined; fn_tmp = basePckr->getOutputRootName(fn_mic) + "_" + basePckr->fn_out + "_combinedCCF.spi"; It_float.write(fn_tmp); It_int() = Mclass_best_combined; fn_tmp = basePckr->getOutputRootName(fn_mic) + + "_" + basePckr->fn_out + "_combinedCLASS.spi"; It_int.write(fn_tmp); } // end if do_write_fom_maps if (basePckr->do_write_fom_maps || basePckr->do_read_fom_maps) { FileName fn_tmp; Image It; It() = Mccfplot; fn_tmp = basePckr->getOutputRootName(fn_mic) + "_" + basePckr->fn_out + "_combinedPLOT.spi"; It.write(fn_tmp); } } else { #ifdef TIMING basePckr->timer.tic(basePckr->TIMING_B9); #endif //Now that we have done all references, prune the list again... CTIC(timer,"finalPeakPrune"); basePckr->prunePeakClusters(peaks, min_distance_pix, scale); CTOC(timer,"finalPeakPrune"); // And remove all too close neighbours basePckr->removeTooCloselyNeighbouringPeaks(peaks, min_distance_pix, scale); // Write out a STAR file with the coordinates MetaDataTable MDout; for (int ipeak =0; ipeak < peaks.size(); ipeak++) { MDout.addObject(); MDout.setValue(EMDL_IMAGE_COORD_X, (RFLOAT)(peaks[ipeak].x)/scale); MDout.setValue(EMDL_IMAGE_COORD_Y, (RFLOAT)(peaks[ipeak].y)/scale); MDout.setValue(EMDL_PARTICLE_CLASS, peaks[ipeak].ref + 1); // start counting at 1 MDout.setValue(EMDL_PARTICLE_AUTOPICK_FOM, peaks[ipeak].fom); MDout.setValue(EMDL_ORIENT_PSI, peaks[ipeak].psi); } FileName fn_tmp = basePckr->getOutputRootName(fn_mic) + "_" + basePckr->fn_out + ".star"; MDout.write(fn_tmp); #ifdef TIMING basePckr->timer.toc(basePckr->TIMING_B9); #endif } } relion-3.1.3/src/gpu_utils/cuda_autopicker.h000066400000000000000000000057671411340063500211370ustar00rootroot00000000000000#ifndef CUDA_AUTOPICKER_H_ #define CUDA_AUTOPICKER_H_ #include "src/mpi.h" #include "src/autopicker.h" #include "src/autopicker_mpi.h" #include "src/projector.h" #include "src/complex.h" #include "src/image.h" #include "src/gpu_utils/cuda_mem_utils.h" #include "src/gpu_utils/cuda_projector.h" #include "src/gpu_utils/cuda_settings.h" #include "src/gpu_utils/cuda_fft.h" #include "src/gpu_utils/cuda_benchmark_utils.h" #include #ifdef CUDA_DOUBLE_PRECISION #define XFLOAT double #else #define XFLOAT float #endif class AutoPickerCuda { private: MpiNode *node; public: AutoPicker *basePckr; CudaCustomAllocator *allocator; CudaFFT micTransformer; CudaFFT cudaTransformer1; CudaFFT cudaTransformer2; std::vector< CudaProjector > cudaProjectors; //Class streams ( for concurrent scheduling of class-specific kernels) std::vector< cudaStream_t > classStreams; int device_id; bool have_warned_batching; //MlDeviceBundle *devBundle; #ifdef TIMING_FILES relion_timer timer; #endif AutoPickerCuda(AutoPicker *basePicker, int dev_id, const char * timing_fnm); AutoPickerCuda(AutoPickerMpi *basePicker, int dev_id, const char * timing_fnm); void setupProjectors(); void run(); void autoPickOneMicrograph(FileName &fn_mic, long int imic); void calculateStddevAndMeanUnderMask(CudaGlobalPtr< CUDACOMPLEX > &d_Fmic, CudaGlobalPtr< CUDACOMPLEX > &d_Fmic2, CudaGlobalPtr< CUDACOMPLEX > &d_Fmsk, int nr_nonzero_pixels_mask, CudaGlobalPtr< XFLOAT > &d_Mstddev, CudaGlobalPtr< XFLOAT > &d_Mmean, size_t x, size_t y, size_t mic_size, size_t workSize); ~AutoPickerCuda() { for (int i = 0; i < classStreams.size(); i++) HANDLE_ERROR(cudaStreamDestroy(classStreams[i])); } //private: // // Uses Roseman2003 formulae to calculate stddev under the mask through FFTs // // The FFTs of the micrograph (Fmic), micrograph-squared (Fmic2) and the mask (Fmsk) need to be provided at downsize_mic // // The putput (Mstddev) will be at (binned) micrograph_size // void calculateStddevAndMeanUnderMask(const MultidimArray &Fmic, const MultidimArray &Fmic2, // MultidimArray &Fmsk, int nr_nonzero_pixels_mask, MultidimArray &Mstddev, MultidimArray &Mmean); // // // Peak search for all pixels above a given threshold in the map // void peakSearch(const MultidimArray &Mccf, const MultidimArray &Mpsi, const MultidimArray &Mstddev, int iref, int skip_side, std::vector &peaks); // // // Now prune the coordinates: within min_particle_distance: all peaks are the same cluster // // From each cluster, take the single peaks with the highest ccf // // If then, there is another peaks at a distance of at least min_particle_distance: take that one as well, and so forth... // void prunePeakClusters(std::vector &peaks, int min_distance); // // // // Only keep those peaks that are at the given distance apart from each other // void removeTooCloselyNeighbouringPeaks(std::vector &peaks, int min_distance); }; #endif /* CUDA_AUTOPICKER_H_ */ relion-3.1.3/src/gpu_utils/cuda_backprojector.cu000066400000000000000000000051771411340063500217740ustar00rootroot00000000000000#include #include #include "src/gpu_utils/cuda_settings.h" #include "src/gpu_utils/cuda_backprojector.h" #include "src/gpu_utils/cuda_device_utils.cuh" #include "src/gpu_utils/cuda_projector.cuh" size_t CudaBackprojector::setMdlDim( int xdim, int ydim, int zdim, int inity, int initz, int max_r, int paddingFactor) { if (xdim != mdlX || ydim != mdlY || zdim != mdlZ || inity != mdlInitY || initz != mdlInitZ || max_r != maxR || paddingFactor != padding_factor) { clear(); mdlX = xdim; mdlY = ydim; mdlZ = zdim; if (mdlZ < 1) mdlZ = 1; mdlXYZ = xdim*ydim*zdim; mdlInitY = inity; mdlInitZ = initz; maxR = max_r; maxR2 = max_r*max_r; padding_factor = paddingFactor; //Allocate space for model HANDLE_ERROR(cudaMalloc( (void**) &d_mdlReal, mdlXYZ * sizeof(XFLOAT))); HANDLE_ERROR(cudaMalloc( (void**) &d_mdlImag, mdlXYZ * sizeof(XFLOAT))); HANDLE_ERROR(cudaMalloc( (void**) &d_mdlWeight, mdlXYZ * sizeof(XFLOAT))); allocaton_size = mdlXYZ * sizeof(XFLOAT) * 3; } return allocaton_size; } void CudaBackprojector::initMdl() { #ifdef CUDA_DEBUG if (mdlXYZ == 0) { printf("Model dimensions must be set with setMdlDim before call to setupMdl."); CRITICAL(ERR_MDLDIM); } if (voxelCount != 0) { printf("DEBUG_ERROR: Duplicated call to model setup"); CRITICAL(ERR_MDLSET); } #endif //Initiate model with zeros DEBUG_HANDLE_ERROR(cudaMemset( d_mdlReal, 0, mdlXYZ * sizeof(XFLOAT))); DEBUG_HANDLE_ERROR(cudaMemset( d_mdlImag, 0, mdlXYZ * sizeof(XFLOAT))); DEBUG_HANDLE_ERROR(cudaMemset( d_mdlWeight, 0, mdlXYZ * sizeof(XFLOAT))); } void CudaBackprojector::getMdlData(XFLOAT *r, XFLOAT *i, XFLOAT * w) { DEBUG_HANDLE_ERROR(cudaStreamSynchronize(stream)); //Make sure to wait for remaining kernel executions DEBUG_HANDLE_ERROR(cudaMemcpyAsync( r, d_mdlReal, mdlXYZ * sizeof(XFLOAT), cudaMemcpyDeviceToHost, stream)); DEBUG_HANDLE_ERROR(cudaMemcpyAsync( i, d_mdlImag, mdlXYZ * sizeof(XFLOAT), cudaMemcpyDeviceToHost, stream)); DEBUG_HANDLE_ERROR(cudaMemcpyAsync( w, d_mdlWeight, mdlXYZ * sizeof(XFLOAT), cudaMemcpyDeviceToHost, stream)); DEBUG_HANDLE_ERROR(cudaStreamSynchronize(stream)); //Wait for copy } void CudaBackprojector::clear() { mdlX = 0; mdlY = 0; mdlZ = 0; mdlXYZ = 0; mdlInitY = 0; mdlInitZ = 0; maxR = 0; maxR2 = 0; padding_factor = 0; allocaton_size = 0; if (d_mdlReal != NULL) { DEBUG_HANDLE_ERROR(cudaFree(d_mdlReal)); DEBUG_HANDLE_ERROR(cudaFree(d_mdlImag)); DEBUG_HANDLE_ERROR(cudaFree(d_mdlWeight)); d_mdlReal = d_mdlImag = d_mdlWeight = NULL; } } CudaBackprojector::~CudaBackprojector() { clear(); } relion-3.1.3/src/gpu_utils/cuda_backprojector.h000066400000000000000000000026421411340063500216060ustar00rootroot00000000000000#ifndef CUDA_BACKPROJECTOR_H_ #define CUDA_BACKPROJECTOR_H_ #include #include "src/complex.h" #include "src/gpu_utils/cuda_settings.h" #include "src/gpu_utils/cuda_mem_utils.h" class CudaBackprojector { public: int mdlX, mdlY, mdlZ, mdlXYZ, mdlInitY, mdlInitZ, maxR, maxR2, padding_factor; size_t allocaton_size; XFLOAT *d_mdlReal, *d_mdlImag, *d_mdlWeight; cudaStream_t stream; public: CudaBackprojector(): mdlX(0), mdlY(0), mdlZ(0), mdlXYZ(0), mdlInitY(0), mdlInitZ(0), maxR(0), maxR2(0), padding_factor(0), allocaton_size(0), d_mdlReal(NULL), d_mdlImag(NULL), d_mdlWeight(NULL), stream(0) {} size_t setMdlDim( int xdim, int ydim, int zdim, int inity, int initz, int max_r, int paddingFactor); void initMdl(); void backproject( XFLOAT *d_imgs_nomask_real, XFLOAT *d_imgs_nomask_imag, XFLOAT *trans_x, XFLOAT *trans_y, XFLOAT *trans_z, XFLOAT* d_weights, XFLOAT* d_Minvsigma2s, XFLOAT* d_ctfs, unsigned long translation_num, XFLOAT significant_weight, XFLOAT weight_norm, XFLOAT *d_eulers, int imgX, int imgY, int imgZ, unsigned long imageCount, bool data_is_3D, cudaStream_t optStream); void getMdlData(XFLOAT *real, XFLOAT *imag, XFLOAT * weights); void setStream(cudaStream_t s) { stream = s; } cudaStream_t getStream() { return stream; } void clear(); ~CudaBackprojector(); }; #endif relion-3.1.3/src/gpu_utils/cuda_benchmark_utils.cu000066400000000000000000000064131411340063500223100ustar00rootroot00000000000000 #include "src/gpu_utils/cuda_benchmark_utils.h" //Non-concurrent benchmarking tools (only for Linux) #include #include #include #include #include "src/macros.h" #include "src/error.h" int relion_timer::cuda_benchmark_find_id(std::string id, std::vector v) { for (unsigned i = 0; i < v.size(); i++) if (v[i] == id) return i; return -1; } void relion_timer::cuda_cpu_tic(std::string id) { if (cuda_benchmark_find_id(id, cuda_cpu_benchmark_identifiers) == -1) { cuda_cpu_benchmark_identifiers.push_back(id); cuda_cpu_benchmark_start_times.push_back(clock()); } else { printf("DEBUG_ERROR: Provided identifier '%s' already exists in call to cuda_cpu_tic.\n", id.c_str()); CRITICAL(ERRCTIC); } } void relion_timer::cuda_cpu_toc(std::string id) { int idx = cuda_benchmark_find_id(id, cuda_cpu_benchmark_identifiers); if (idx == -1) { printf("DEBUG_ERROR: Provided identifier '%s' not found in call to cuda_cpu_toc.\n", id.c_str()); //exit( EXIT_FAILURE ); } else { clock_t t = clock() - cuda_cpu_benchmark_start_times[idx]; cuda_cpu_benchmark_identifiers.erase(cuda_cpu_benchmark_identifiers.begin()+idx); cuda_cpu_benchmark_start_times.erase(cuda_cpu_benchmark_start_times.begin()+idx); fprintf(cuda_cpu_benchmark_fPtr,"%06.2f ms ......", (float)t / CLOCKS_PER_SEC * 1000.); for (int i = 1; i < cuda_cpu_benchmark_identifiers.size(); i++) fprintf(cuda_cpu_benchmark_fPtr,"......"); fprintf(cuda_cpu_benchmark_fPtr," %s\n", id.c_str()); // printf(,"%s \t %.2f ms\n", id.c_str(), (float)t / CLOCKS_PER_SEC * 1000.); } } void relion_timer::cuda_gpu_tic(std::string id) { if (cuda_benchmark_find_id(id, cuda_gpu_benchmark_identifiers) == -1) { cudaEvent_t start, stop; cudaEventCreate(&start); cudaEventCreate(&stop); cudaEventRecord(start, 0); cuda_gpu_benchmark_identifiers.push_back(id); cuda_gpu_benchmark_start_times.push_back(start); cuda_gpu_benchmark_stop_times.push_back(stop); } else { printf("DEBUG_ERROR: Provided identifier '%s' already exists in call to cuda_gpu_tic.\n", id.c_str()); CRITICAL(ERRGTIC); } } void relion_timer::cuda_gpu_toc(std::string id) { int idx = cuda_benchmark_find_id(id, cuda_gpu_benchmark_identifiers); if (idx == -1) { printf("DEBUG_ERROR: Provided identifier '%s' not found in call to cuda_gpu_tac.\n", id.c_str()); CRITICAL(ERRGTOC); } else { cudaEventRecord(cuda_gpu_benchmark_stop_times[idx], 0); cudaEventSynchronize(cuda_gpu_benchmark_stop_times[idx]); } } void relion_timer::cuda_gpu_printtictoc() { if (cuda_gpu_benchmark_identifiers.size() == 0) { printf("DEBUG_ERROR: There were no identifiers found in the list, on call to cuda_gpu_toc.\n"); CRITICAL(ERRTPC); } else { float time; for (int idx = 0; idx < cuda_gpu_benchmark_identifiers.size(); idx ++) { cudaEventElapsedTime(&time, cuda_gpu_benchmark_start_times[idx], cuda_gpu_benchmark_stop_times[idx]); cudaEventDestroy(cuda_gpu_benchmark_start_times[idx]); cudaEventDestroy(cuda_gpu_benchmark_stop_times[idx]); fprintf(cuda_gpu_benchmark_fPtr,"%.2f ms \t %s\n", time, cuda_gpu_benchmark_identifiers[idx].c_str()); } cuda_gpu_benchmark_identifiers.clear(); cuda_gpu_benchmark_start_times.clear(); cuda_gpu_benchmark_stop_times.clear(); } } relion-3.1.3/src/gpu_utils/cuda_benchmark_utils.h000066400000000000000000000035721411340063500221330ustar00rootroot00000000000000 #ifndef CUDA_BENCHMARK_UTILS_H_ #define CUDA_BENCHMARK_UTILS_H_ //Non-concurrent benchmarking tools (only for Linux) #include #include #include #include #include #include #include #ifdef TIMING_FILES #define CTIC(timer,timing) (timer.cuda_cpu_tic(timing)) #define CTOC(timer,timing) (timer.cuda_cpu_toc(timing)) #define GTIC(timer,timing) (timer.cuda_gpu_tic(timing)) #define GTOC(timer,timing) (timer.cuda_gpu_toc(timing)) #define GATHERGPUTIMINGS(timer) (timer.cuda_gpu_printtictoc()) #elif defined CUDA_PROFILING #include #define CTIC(timer,timing) (nvtxRangePush(timing)) #define CTOC(timer,timing) (nvtxRangePop()) #define GTIC(timer,timing) #define GTOC(timer,timing) #define GATHERGPUTIMINGS(timer) #else #define CTIC(timer,timing) #define CTOC(timer,timing) #define GTIC(timer,timing) #define GTOC(timer,timing) #define GATHERGPUTIMINGS(timer) #endif class relion_timer { public: std::vector cuda_cpu_benchmark_identifiers; std::vector cuda_cpu_benchmark_start_times; FILE *cuda_cpu_benchmark_fPtr; std::vector cuda_gpu_benchmark_identifiers; std::vector cuda_gpu_benchmark_start_times; std::vector cuda_gpu_benchmark_stop_times; FILE *cuda_gpu_benchmark_fPtr; relion_timer(std::string fnm) { std::stringstream fnm_cpu, fnm_gpu; fnm_cpu << "output/" << fnm << "_cpu.dat"; cuda_cpu_benchmark_fPtr = fopen(fnm_cpu.str().c_str(),"a"); fnm_gpu << "output/" << fnm << "_gpu.dat"; cuda_gpu_benchmark_fPtr = fopen(fnm_gpu.str().c_str(),"a"); } int cuda_benchmark_find_id(std::string id, std::vector v); void cuda_cpu_tic(std::string id); void cuda_cpu_toc(std::string id); void cuda_gpu_tic(std::string id); void cuda_gpu_toc(std::string id); void cuda_gpu_printtictoc(); }; #endif /* CUDA_BENCHMARK_UTILS_H_ */ relion-3.1.3/src/gpu_utils/cuda_device_utils.cuh000066400000000000000000000067731411340063500217760ustar00rootroot00000000000000#ifndef CUDA_DEVICE_UTILS_CUH_ #define CUDA_DEVICE_UTILS_CUH_ #include #include "src/gpu_utils/cuda_settings.h" #ifdef CUDA_DOUBLE_PRECISION __device__ inline double cuda_atomic_add(double* address, double val) { unsigned long long int* address_as_ull = (unsigned long long int*)address; unsigned long long int old = *address_as_ull, assumed; do { assumed = old; old = atomicCAS(address_as_ull, assumed, __double_as_longlong(val + __longlong_as_double(assumed))); } while (assumed != old); // Note: uses integer comparison to avoid hang in case of NaN (since NaN != NaN) return __longlong_as_double(old); } #else __device__ inline void cuda_atomic_add(float* address, float value) { atomicAdd(address,value); } #endif /* * For the following functions always use fast, low-precision intrinsics */ template< typename T1, typename T2 > static inline __device__ int floorfracf(T1 a, T2 b) { // return __float2int_rd(__fdividef( (float)a, (float)b ) ); return (int)(a/b); } template< typename T1, typename T2 > static inline __device__ int ceilfracf(T1 a, T2 b) { // return __float2int_ru(__fdividef( (float)a, (float)b ) ); return (int)(a/b + 1); } static inline __device__ XFLOAT no_tex2D(XFLOAT* mdl, XFLOAT xp, XFLOAT yp, int mdlX, int mdlInitY) { int x0 = floorf(xp); XFLOAT fx = xp - x0; int x1 = x0 + 1; int y0 = floorf(yp); XFLOAT fy = yp - y0; y0 -= mdlInitY; int y1 = y0 + 1; //----------------------------- XFLOAT d00 = mdl[y0*mdlX+x0]; XFLOAT d01 = mdl[y0*mdlX+x1]; XFLOAT d10 = mdl[y1*mdlX+x0]; XFLOAT d11 = mdl[y1*mdlX+x1]; //----------------------------- XFLOAT dx0 = d00 + (d01 - d00)*fx; XFLOAT dx1 = d10 + (d11 - d10)*fx; //----------------------------- return dx0 + (dx1 - dx0)*fy; } static inline __device__ XFLOAT no_tex3D(XFLOAT* mdl, XFLOAT xp, XFLOAT yp, XFLOAT zp, int mdlX, int mdlXY, int mdlInitY, int mdlInitZ) { int x0 = floorf(xp); XFLOAT fx = xp - x0; int x1 = x0 + 1; int y0 = floorf(yp); XFLOAT fy = yp - y0; y0 -= mdlInitY; int y1 = y0 + 1; int z0 = floorf(zp); XFLOAT fz = zp - z0; z0 -= mdlInitZ; int z1 = z0 + 1; XFLOAT d000 = mdl[z0*mdlXY+y0*mdlX+x0]; XFLOAT d001 = mdl[z0*mdlXY+y0*mdlX+x1]; XFLOAT d010 = mdl[z0*mdlXY+y1*mdlX+x0]; XFLOAT d011 = mdl[z0*mdlXY+y1*mdlX+x1]; XFLOAT d100 = mdl[z1*mdlXY+y0*mdlX+x0]; XFLOAT d101 = mdl[z1*mdlXY+y0*mdlX+x1]; XFLOAT d110 = mdl[z1*mdlXY+y1*mdlX+x0]; XFLOAT d111 = mdl[z1*mdlXY+y1*mdlX+x1]; //----------------------------- XFLOAT dx00 = d000 + (d001 - d000)*fx; XFLOAT dx01 = d100 + (d101 - d100)*fx; XFLOAT dx10 = d010 + (d011 - d010)*fx; XFLOAT dx11 = d110 + (d111 - d110)*fx; //----------------------------- XFLOAT dxy0 = dx00 + (dx10 - dx00)*fy; XFLOAT dxy1 = dx01 + (dx11 - dx01)*fy; //----------------------------- return dxy0 + (dxy1 - dxy0)*fz; } __device__ __forceinline__ void translatePixel( int x, int y, XFLOAT tx, XFLOAT ty, XFLOAT &real, XFLOAT &imag, XFLOAT &tReal, XFLOAT &tImag) { XFLOAT s, c; #ifdef CUDA_DOUBLE_PRECISION sincos( x * tx + y * ty , &s, &c ); #else sincosf( x * tx + y * ty , &s, &c ); #endif tReal = c * real - s * imag; tImag = c * imag + s * real; } __device__ __forceinline__ void translatePixel( int x, int y, int z, XFLOAT tx, XFLOAT ty, XFLOAT tz, XFLOAT &real, XFLOAT &imag, XFLOAT &tReal, XFLOAT &tImag) { XFLOAT s, c; #ifdef CUDA_DOUBLE_PRECISION sincos( x * tx + y * ty + z * tz, &s, &c ); #else sincosf( x * tx + y * ty + z * tz, &s, &c ); #endif tReal = c * real - s * imag; tImag = c * imag + s * real; } #endif relion-3.1.3/src/gpu_utils/cuda_fft.h000066400000000000000000000207331411340063500175360ustar00rootroot00000000000000#ifndef CUDA_FFT_H_ #define CUDA_FFT_H_ #include "src/gpu_utils/cuda_settings.h" #include "src/gpu_utils/cuda_mem_utils.h" #include #include #ifdef DEBUG_CUDA #define HANDLE_CUFFT_ERROR( err ) (CufftHandleError( err, __FILE__, __LINE__ )) #else #define HANDLE_CUFFT_ERROR( err ) (err) //Do nothing #endif static void CufftHandleError( cufftResult err, const char *file, int line ) { if (err != CUFFT_SUCCESS) { fprintf(stderr, "Cufft error in file '%s' in line %i : %s.\n", __FILE__, __LINE__, "error" ); raise(SIGSEGV); } } class CudaFFT { bool planSet; public: #ifdef CUDA_DOUBLE_PRECISION CudaGlobalPtr reals; CudaGlobalPtr fouriers; #else CudaGlobalPtr reals; CudaGlobalPtr fouriers; #endif cufftHandle cufftPlanForward, cufftPlanBackward; int direction; int dimension, idist, odist, istride, ostride; int inembed[3]; int onembed[3]; size_t xSize,ySize,zSize,xFSize,yFSize,zFSize; std::vector< int > batchSize; CudaCustomAllocator *CFallocator; int batchSpace, batchIters, reqN; CudaFFT(cudaStream_t stream, CudaCustomAllocator *allocator, int transformDimension = 2): reals(stream, allocator), fouriers(stream, allocator), cufftPlanForward(0), cufftPlanBackward(0), direction(0), dimension((int)transformDimension), idist(0), odist(0), istride(1), ostride(1), planSet(false), xSize(0), ySize(0), zSize(0), xFSize(0), yFSize(0), zFSize(0), batchSize(1,1), reqN(1), CFallocator(allocator) {}; size_t estimate(int batch) { size_t needed(0); size_t biggness; #ifdef CUDA_DOUBLE_PRECISION if(direction<=0) { HANDLE_CUFFT_ERROR( cufftEstimateMany(dimension, inembed, inembed, istride, idist, onembed, ostride, odist, CUFFT_D2Z, batch, &biggness)); needed += biggness; } if(direction>=0) { HANDLE_CUFFT_ERROR( cufftEstimateMany(dimension, inembed, onembed, ostride, odist, inembed, istride, idist, CUFFT_Z2D, batch, &biggness)); needed += biggness; } #else if(direction<=0) { HANDLE_CUFFT_ERROR( cufftEstimateMany(dimension, inembed, inembed, istride, idist, onembed, ostride, odist, CUFFT_R2C, batch, &biggness)); needed += biggness; } if(direction>=0) { HANDLE_CUFFT_ERROR( cufftEstimateMany(dimension, inembed, onembed, ostride, odist, inembed, istride, idist, CUFFT_C2R, batch, &biggness)); needed += biggness; } #endif size_t res = needed + (size_t)odist*(size_t)batch*sizeof(XFLOAT)*(size_t)2 + (size_t)idist*(size_t)batch*sizeof(XFLOAT); return res; } void setSize(size_t x, size_t y, size_t z, int batch = 1, int setDirection = 0) { /* Optional direction input restricts transformer to * forwards or backwards tranformation only, * which reduces memory requirements, especially * for large batches of simulatanous transforms. * * FFTW_FORWARDS === -1 * FFTW_BACKWARDS === +1 * * The default direction is 0 === forwards AND backwards */ int checkDim; if(z>1) checkDim=3; else if(y>1) checkDim=2; else checkDim=1; if(checkDim != dimension) CRITICAL(ERRCUFFTDIM); if( !( (setDirection==-1)||(setDirection==0)||(setDirection==1) ) ) { std::cerr << "*ERROR : Setting a cuda transformer direction to non-defined value" << std::endl; CRITICAL(ERRCUFFTDIR); } direction = setDirection; if (x == xSize && y == ySize && z == zSize && batch == reqN && planSet) return; clear(); batchSize.resize(1); batchSize[0] = batch; reqN = batch; xSize = x; ySize = y; zSize = z; xFSize = x/2 + 1; yFSize = y; zFSize = z; idist = zSize*ySize*xSize; odist = zSize*ySize*(xSize/2+1); istride = 1; ostride = 1; if(dimension==3) { inembed[0] = zSize; inembed[1] = ySize; inembed[2] = xSize; onembed[0] = zFSize; onembed[1] = yFSize; onembed[2] = xFSize; } else if(dimension==2) { inembed[0] = ySize; inembed[1] = xSize; onembed[0] = yFSize; onembed[1] = xFSize; } else { inembed[0] = xSize; onembed[0] = xFSize; } size_t needed, avail, total; needed = estimate(batchSize[0]); DEBUG_HANDLE_ERROR(cudaMemGetInfo( &avail, &total )); // std::cout << std::endl << "needed = "; // printf("%15zu\n", needed); // std::cout << "avail = "; // printf("%15zu\n", avail); // Check if there is enough memory // // --- TO HOLD TEMPORARY DATA DURING TRANSFORMS --- // // If there isn't, find how many there ARE space for and loop through them in batches. if(needed>avail) { batchIters = 2; batchSpace = CEIL((double) batch / (double)batchIters); needed = estimate(batchSpace); while(needed>avail && batchSpace>1) { batchIters++; batchSpace = CEIL((double) batch / (double)batchIters); needed = estimate(batchSpace); } if(batchIters>1) { batchIters = (int)((float)batchIters*1.1 + 1); batchSpace = CEIL((double) batch / (double)batchIters); needed = estimate(batchSpace); } batchSize.assign(batchIters,batchSpace); // specify batchIters of batches, each with batchSpace orientations batchSize[batchIters-1] = batchSpace - (batchSpace*batchIters - batch); // set last to care for remainder. if(needed>avail) CRITICAL(ERRFFTMEMLIM); // std::cerr << std::endl << "NOTE: Having to use " << batchIters << " batches of orientations "; // std::cerr << "to achieve the total requested " << batch << " orientations" << std::endl; // std::cerr << "( this could affect performance, consider using " << std::endl; // std::cerr << "\t higher --ang" << std::endl; // std::cerr << "\t harder --shrink" << std::endl; // std::cerr << "\t higher --lopass with --shrink 0" << std::endl; } else { batchIters = 1; batchSpace = batch; } reals.setSize(idist*batchSize[0]); reals.device_alloc(); reals.host_alloc(); fouriers.setSize(odist*batchSize[0]); fouriers.device_alloc(); fouriers.host_alloc(); // DEBUG_HANDLE_ERROR(cudaMemGetInfo( &avail, &total )); // needed = estimate(batchSize[0], fudge); // std::cout << "after alloc: " << std::endl << std::endl << "needed = "; // printf("%15li\n", needed); // std::cout << "avail = "; // printf("%15li\n", avail); #ifdef CUDA_DOUBLE_PRECISION if(direction<=0) { HANDLE_CUFFT_ERROR( cufftPlanMany(&cufftPlanForward, dimension, inembed, inembed, istride, idist, onembed, ostride, odist, CUFFT_D2Z, batchSize[0])); HANDLE_CUFFT_ERROR( cufftSetStream(cufftPlanForward, fouriers.getStream())); } if(direction>=0) { HANDLE_CUFFT_ERROR( cufftPlanMany(&cufftPlanBackward, dimension, inembed, onembed, ostride, odist, inembed, istride, idist, CUFFT_Z2D, batchSize[0])); HANDLE_CUFFT_ERROR( cufftSetStream(cufftPlanBackward, reals.getStream())); } planSet = true; } void forward() { HANDLE_CUFFT_ERROR( cufftExecD2Z(cufftPlanForward, ~reals, ~fouriers) ); } void backward() { HANDLE_CUFFT_ERROR( cufftExecZ2D(cufftPlanBackward, ~fouriers, ~reals) ); } void backward(CudaGlobalPtr &dst) { HANDLE_CUFFT_ERROR( cufftExecZ2D(cufftPlanBackward, ~fouriers, ~dst) ); } #else if(direction<=0) { HANDLE_CUFFT_ERROR( cufftPlanMany(&cufftPlanForward, dimension, inembed, inembed, istride, idist, onembed, ostride, odist, CUFFT_R2C, batchSize[0])); HANDLE_CUFFT_ERROR( cufftSetStream(cufftPlanForward, fouriers.getStream())); } if(direction>=0) { HANDLE_CUFFT_ERROR( cufftPlanMany(&cufftPlanBackward, dimension, inembed, onembed, ostride, odist, inembed, istride, idist, CUFFT_C2R, batchSize[0])); HANDLE_CUFFT_ERROR( cufftSetStream(cufftPlanBackward, reals.getStream())); } planSet = true; } void forward() { if(direction==1) { std::cout << "trying to execute a forward plan for a cudaFFT transformer which is backwards-only" << std::endl; CRITICAL(ERRCUFFTDIRF); } HANDLE_CUFFT_ERROR( cufftExecR2C(cufftPlanForward, ~reals, ~fouriers) ); } void backward() { if(direction==-1) { std::cout << "trying to execute a backwards plan for a cudaFFT transformer which is forwards-only" << std::endl; CRITICAL(ERRCUFFTDIRR); } HANDLE_CUFFT_ERROR( cufftExecC2R(cufftPlanBackward, ~fouriers, ~reals) ); } #endif void clear() { if(planSet) { reals.free_if_set(); fouriers.free_if_set(); if(direction<=0) HANDLE_CUFFT_ERROR(cufftDestroy(cufftPlanForward)); if(direction>=0) HANDLE_CUFFT_ERROR(cufftDestroy(cufftPlanBackward)); planSet = false; } } ~CudaFFT() {clear();} }; #endif relion-3.1.3/src/gpu_utils/cuda_helper_functions.cu000066400000000000000000001171551411340063500225130ustar00rootroot00000000000000#include #include "src/gpu_utils/cuda_settings.h" #include "src/gpu_utils/cuda_helper_functions.cuh" #include "src/gpu_utils/cuda_kernels/BP.cuh" #include "src/macros.h" #include "src/error.h" long int makeJobsForDiff2Fine( OptimisationParamters &op, SamplingParameters &sp, long int orientation_num, long int translation_num, ProjectionParams &FineProjectionData, std::vector< long unsigned > &iover_transes, std::vector< long unsigned > &ihiddens, long int nr_over_orient, long int nr_over_trans, int ipart, IndexedDataArray &FPW, // FPW=FinePassWeights IndexedDataArrayMask &dataMask, int chunk) { long int w_base = dataMask.firstPos, w(0), k(0); // be on the safe side with the jobArrays: make them as large as they could possibly be // (this will be reduced at exit of this function) dataMask.setNumberOfJobs(orientation_num*translation_num); dataMask.setNumberOfWeights(orientation_num*translation_num); dataMask.jobOrigin.host_alloc(); dataMask.jobExtent.host_alloc(); dataMask.jobOrigin[k]=0; for (long unsigned i = 0; i < orientation_num; i++) { dataMask.jobExtent[k]=0; int tk=0; long int iover_rot = FineProjectionData.iover_rots[i]; for (long unsigned j = 0; j < translation_num; j++) { long int iover_trans = iover_transes[j]; long int ihidden = FineProjectionData.iorientclasses[i] * sp.nr_trans + ihiddens[j]; if(DIRECT_A2D_ELEM(op.Mcoarse_significant, ipart, ihidden)==1) { FPW.rot_id[w_base+w] = FineProjectionData.iorientclasses[i] % (sp.nr_dir*sp.nr_psi); // where to look for priors etc FPW.rot_idx[w_base+w] = i; // which rot for this significant task FPW.trans_idx[w_base+w] = j; // which trans - || - FPW.ihidden_overs[w_base+w]= (ihidden * nr_over_orient + iover_rot) * nr_over_trans + iover_trans; if(tk>=chunk) { tk=0; // reset counter k++; // use new element dataMask.jobOrigin[k]=w; dataMask.jobExtent[k]=0; // prepare next element for ++ incrementing } tk++; // increment limit-checker dataMask.jobExtent[k]++; // increment number of transes this job w++; } else if(tk!=0) // start a new one with the same rotidx - we expect transes to be sequential. { tk=0; // reset counter k++; // use new element dataMask.jobOrigin[k]=w; dataMask.jobExtent[k]=0; // prepare next element for ++ incrementing } } if(tk>0) // use new element (if tk==0) then we are currently on an element with no signif, so we should continue using this element { k++; dataMask.jobOrigin[k]=w; dataMask.jobExtent[k]=0; } } if(dataMask.jobExtent[k]!=0) // if we started putting somehting in last element, then the count is one higher than the index k+=1; dataMask.setNumberOfJobs(k); dataMask.setNumberOfWeights(w); // if(dataMask.weightNum>0) // { // dataMask.jobOrigin.device_alloc(); // dataMask.jobExtent.device_alloc(); // } return(w); } int makeJobsForCollect(IndexedDataArray &FPW, IndexedDataArrayMask &dataMask, unsigned long NewJobNum) // FPW=FinePassWeights { // reset the old (diff2Fine) job-definitions // dataMask.jobOrigin.free_host(); // dataMask.jobOrigin.free_device(); // dataMask.jobExtent.free_host(); // dataMask.jobExtent.free_device(); dataMask.setNumberOfJobs(NewJobNum); // dataMask.jobOrigin.host_alloc(); // dataMask.jobExtent.host_alloc(); long int jobid=0; dataMask.jobOrigin[jobid]=0; dataMask.jobExtent[jobid]=1; long int crot =FPW.rot_idx[jobid]; // set current rot for(long int n=1; n &corr_img, long int ipart, long int group_id) { // CC or not if((baseMLO->iter == 1 && baseMLO->do_firstiter_cc) || baseMLO->do_always_cc) for(int i = 0; i < corr_img.getSize(); i++) corr_img[i] = 1. / (op.local_sqrtXi2[ipart]*op.local_sqrtXi2[ipart]); else for(int i = 0; i < corr_img.getSize(); i++) corr_img[i] = *(op.local_Minvsigma2s[ipart].data + i ); // ctf-correction or not ( NOTE this is not were the difference metric is ctf-corrected, but // rather where we apply the additional correction to make the GPU-specific arithmetic equal // to the CPU method) if (baseMLO->do_ctf_correction && baseMLO->refs_are_ctf_corrected) for(int i = 0; i < corr_img.getSize(); i++) corr_img[i] *= DIRECT_MULTIDIM_ELEM(op.local_Fctfs[ipart], i)*DIRECT_MULTIDIM_ELEM(op.local_Fctfs[ipart], i); // scale-correction or not ( NOTE this is not were the difference metric is scale-corrected, but // rather where we apply the additional correction to make the GPU-specific arithmetic equal // to the CPU method) XFLOAT myscale = baseMLO->mymodel.scale_correction[group_id]; if (baseMLO->do_scale_correction) for(int i = 0; i < corr_img.getSize(); i++) corr_img[i] *= myscale * myscale; } void generateEulerMatrices( XFLOAT padding_factor, ProjectionParams &ProjectionData, XFLOAT *eulers, bool inverse) { RFLOAT alpha, beta, gamma; RFLOAT ca, sa, cb, sb, cg, sg; RFLOAT cc, cs, sc, ss; for (long int i = 0; i < ProjectionData.rots.size(); i++) { //TODO In a sense we're doing RAD2DEG just to do DEG2RAD here. //The only place the degree value is actually used is in the metadata assignment. alpha = DEG2RAD(ProjectionData.rots[i]); beta = DEG2RAD(ProjectionData.tilts[i]); gamma = DEG2RAD(ProjectionData.psis[i]); sincos(alpha, &sa, &ca); sincos(beta, &sb, &cb); sincos(gamma, &sg, &cg); cc = cb * ca; cs = cb * sa; sc = sb * ca; ss = sb * sa; if(inverse) { eulers[9 * i + 0] = ( cg * cc - sg * sa) ;// * padding_factor; //00 eulers[9 * i + 1] = (-sg * cc - cg * sa) ;// * padding_factor; //10 eulers[9 * i + 2] = ( sc ) ;// * padding_factor; //20 eulers[9 * i + 3] = ( cg * cs + sg * ca) ;// * padding_factor; //01 eulers[9 * i + 4] = (-sg * cs + cg * ca) ;// * padding_factor; //11 eulers[9 * i + 5] = ( ss ) ;// * padding_factor; //21 eulers[9 * i + 6] = (-cg * sb ) ;// * padding_factor; //02 eulers[9 * i + 7] = ( sg * sb ) ;// * padding_factor; //12 eulers[9 * i + 8] = ( cb ) ;// * padding_factor; //22 } else { eulers[9 * i + 0] = ( cg * cc - sg * sa) ;// * padding_factor; //00 eulers[9 * i + 1] = ( cg * cs + sg * ca) ;// * padding_factor; //01 eulers[9 * i + 2] = (-cg * sb ) ;// * padding_factor; //02 eulers[9 * i + 3] = (-sg * cc - cg * sa) ;// * padding_factor; //10 eulers[9 * i + 4] = (-sg * cs + cg * ca) ;// * padding_factor; //11 eulers[9 * i + 5] = ( sg * sb ) ;// * padding_factor; //12 eulers[9 * i + 6] = ( sc ) ;// * padding_factor; //20 eulers[9 * i + 7] = ( ss ) ;// * padding_factor; //21 eulers[9 * i + 8] = ( cb ) ;// * padding_factor; //22 } } } long unsigned generateProjectionSetupFine( OptimisationParamters &op, SamplingParameters &sp, MlOptimiser *baseMLO, unsigned iclass, ProjectionParams &ProjectionData) // FIXME : For coarse iteration this is **SLOW** HERE ARE SOME NOTES FOR PARALLELIZING IT (GPU OFFLOAD): /* * Since it is based on push_back, parallelizing sould be fine given som atomic opreation appends, * what takes time is looping through all this. The job-splitting in collect2jobs-preproccesing and * divideOrientationsIntoBlockjobs() relies on chunks of shared orientations being adjacent in * ProjectionData.rot_id (and thus also .rot_idx), but does not care which order those chunks appear * in. So as long as a parallelilsm and "atomic push_back" is organised to use an orientation as a * minimum unit, the job-splitting should be fine with the output. */ { //Local variables std::vector< RFLOAT > oversampled_rot, oversampled_tilt, oversampled_psi; long int orientation_num = 0; for (long int idir = sp.idir_min, iorient = 0; idir <= sp.idir_max; idir++) { for (long int ipsi = sp.ipsi_min, ipart = 0; ipsi <= sp.ipsi_max; ipsi++, iorient++) { long int iorientclass = iclass * sp.nr_dir * sp.nr_psi + iorient; if (baseMLO->isSignificantAnyParticleAnyTranslation(iorientclass, sp.itrans_min, sp.itrans_max, op.Mcoarse_significant)) { // Now get the oversampled (rot, tilt, psi) triplets // This will be only the original (rot,tilt,psi) triplet in the first pass (sp.current_oversampling==0) baseMLO->sampling.getOrientations(idir, ipsi, sp.current_oversampling, oversampled_rot, oversampled_tilt, oversampled_psi, op.pointer_dir_nonzeroprior, op.directions_prior, op.pointer_psi_nonzeroprior, op.psi_prior); // Loop over all oversampled orientations (only a single one in the first pass) for (long int iover_rot = 0; iover_rot < sp.nr_oversampled_rot; iover_rot++, ipart++) { ProjectionData.pushBackAll( (long unsigned)iclass, oversampled_rot[iover_rot], oversampled_tilt[iover_rot], oversampled_psi[iover_rot], iorientclass, iover_rot ); orientation_num ++; } } } } ProjectionData.orientation_num[iclass]=orientation_num; return orientation_num; } void runWavgKernel( CudaProjectorKernel &projector, XFLOAT *eulers, XFLOAT *Fimg_real, XFLOAT *Fimg_imag, XFLOAT *trans_x, XFLOAT *trans_y, XFLOAT *trans_z, XFLOAT *sorted_weights, XFLOAT *ctfs, XFLOAT *wdiff2s_parts, XFLOAT *wdiff2s_AA, XFLOAT *wdiff2s_XA, OptimisationParamters &op, long unsigned orientation_num, long unsigned translation_num, unsigned image_size, long int ipart, int group_id, int exp_iclass, XFLOAT part_scale, bool refs_are_ctf_corrected, bool data_is_3D, cudaStream_t stream) { //We only want as many blocks as there are chunks of orientations to be treated //within the same block (this is done to reduce memory loads in the kernel). dim3 block_dim = orientation_num;//ceil((float)orientation_num/(float)REF_GROUP_SIZE); //cudaFuncSetCacheConfig(cuda_kernel_wavg_fast, cudaFuncCachePreferShared); if (refs_are_ctf_corrected) { if(data_is_3D) cuda_kernel_wavg<<>>( eulers, projector, image_size, orientation_num, Fimg_real, Fimg_imag, trans_x, trans_y, trans_z, sorted_weights, ctfs, wdiff2s_parts, wdiff2s_AA, wdiff2s_XA, translation_num, (XFLOAT) op.sum_weight[ipart], (XFLOAT) op.significant_weight[ipart], part_scale ); else if (projector.mdlZ!=0) cuda_kernel_wavg<<>>( eulers, projector, image_size, orientation_num, Fimg_real, Fimg_imag, trans_x, trans_y, trans_z, sorted_weights, ctfs, wdiff2s_parts, wdiff2s_AA, wdiff2s_XA, translation_num, (XFLOAT) op.sum_weight[ipart], (XFLOAT) op.significant_weight[ipart], part_scale ); else cuda_kernel_wavg<<>>( eulers, projector, image_size, orientation_num, Fimg_real, Fimg_imag, trans_x, trans_y, trans_z, sorted_weights, ctfs, wdiff2s_parts, wdiff2s_AA, wdiff2s_XA, translation_num, (XFLOAT) op.sum_weight[ipart], (XFLOAT) op.significant_weight[ipart], part_scale ); } else { if(data_is_3D) cuda_kernel_wavg<<>>( eulers, projector, image_size, orientation_num, Fimg_real, Fimg_imag, trans_x, trans_y, trans_z, sorted_weights, ctfs, wdiff2s_parts, wdiff2s_AA, wdiff2s_XA, translation_num, (XFLOAT) op.sum_weight[ipart], (XFLOAT) op.significant_weight[ipart], part_scale ); else if (projector.mdlZ!=0) cuda_kernel_wavg<<>>( eulers, projector, image_size, orientation_num, Fimg_real, Fimg_imag, trans_x, trans_y, trans_z, sorted_weights, ctfs, wdiff2s_parts, wdiff2s_AA, wdiff2s_XA, translation_num, (XFLOAT) op.sum_weight[ipart], (XFLOAT) op.significant_weight[ipart], part_scale ); else cuda_kernel_wavg<<>>( eulers, projector, image_size, orientation_num, Fimg_real, Fimg_imag, trans_x, trans_y, trans_z, sorted_weights, ctfs, wdiff2s_parts, wdiff2s_AA, wdiff2s_XA, translation_num, (XFLOAT) op.sum_weight[ipart], (XFLOAT) op.significant_weight[ipart], part_scale ); } LAUNCH_HANDLE_ERROR(cudaGetLastError()); } void runBackProjectKernel( CudaBackprojector &BP, CudaProjectorKernel &projector, XFLOAT *d_img_real, XFLOAT *d_img_imag, XFLOAT *trans_x, XFLOAT *trans_y, XFLOAT *trans_z, XFLOAT* d_weights, XFLOAT* d_Minvsigma2s, XFLOAT* d_ctfs, unsigned long translation_num, XFLOAT significant_weight, XFLOAT weight_norm, XFLOAT *d_eulers, int imgX, int imgY, int imgZ, unsigned long imageCount, bool data_is_3D, bool do_sgd, cudaStream_t optStream) { if(BP.mdlZ==1) { cuda_kernel_backproject2D<<>>( d_img_real, d_img_imag, trans_x, trans_y, d_weights, d_Minvsigma2s, d_ctfs, translation_num, significant_weight, weight_norm, d_eulers, BP.d_mdlReal, BP.d_mdlImag, BP.d_mdlWeight, BP.maxR, BP.maxR2, BP.padding_factor, imgX, imgY, imgX*imgY, BP.mdlX, BP.mdlInitY); LAUNCH_HANDLE_ERROR(cudaGetLastError()); } else { if(do_sgd) { if(data_is_3D) cuda_kernel_backprojectSGD<<>>( projector, d_img_real, d_img_imag, trans_x, trans_y, trans_z, d_weights, d_Minvsigma2s, d_ctfs, translation_num, significant_weight, weight_norm, d_eulers, BP.d_mdlReal, BP.d_mdlImag, BP.d_mdlWeight, BP.maxR, BP.maxR2, BP.padding_factor, imgX, imgY, imgZ, imgX*imgY*imgZ, BP.mdlX, BP.mdlY, BP.mdlInitY, BP.mdlInitZ); else cuda_kernel_backprojectSGD<<>>( projector, d_img_real, d_img_imag, trans_x, trans_y, trans_z, d_weights, d_Minvsigma2s, d_ctfs, translation_num, significant_weight, weight_norm, d_eulers, BP.d_mdlReal, BP.d_mdlImag, BP.d_mdlWeight, BP.maxR, BP.maxR2, BP.padding_factor, imgX, imgY, imgZ, imgX*imgY*imgZ, BP.mdlX, BP.mdlY, BP.mdlInitY, BP.mdlInitZ); } else { if(data_is_3D) cuda_kernel_backproject3D<<>>( d_img_real, d_img_imag, trans_x, trans_y, trans_z, d_weights, d_Minvsigma2s, d_ctfs, translation_num, significant_weight, weight_norm, d_eulers, BP.d_mdlReal, BP.d_mdlImag, BP.d_mdlWeight, BP.maxR, BP.maxR2, BP.padding_factor, imgX, imgY, imgZ, imgX*imgY*imgZ, BP.mdlX, BP.mdlY, BP.mdlInitY, BP.mdlInitZ); else cuda_kernel_backproject3D<<>>( d_img_real, d_img_imag, trans_x, trans_y, trans_z, d_weights, d_Minvsigma2s, d_ctfs, translation_num, significant_weight, weight_norm, d_eulers, BP.d_mdlReal, BP.d_mdlImag, BP.d_mdlWeight, BP.maxR, BP.maxR2, BP.padding_factor, imgX, imgY, imgZ, imgX*imgY*imgZ, BP.mdlX, BP.mdlY, BP.mdlInitY, BP.mdlInitZ); } LAUNCH_HANDLE_ERROR(cudaGetLastError()); } } __global__ void cuda_kernel_allweights_to_mweights( unsigned long * d_iorient, XFLOAT * d_allweights, XFLOAT * d_mweights, unsigned long orientation_num, unsigned long translation_num ) { size_t idx = blockIdx.x * WEIGHT_MAP_BLOCK_SIZE + threadIdx.x; if (idx < orientation_num*translation_num) d_mweights[d_iorient[idx/translation_num] * translation_num + idx%translation_num] = d_allweights[idx/translation_num * translation_num + idx%translation_num]; } void mapAllWeightsToMweights( unsigned long * d_iorient, //projectorPlan.iorientclasses XFLOAT * d_allweights, //allWeights XFLOAT * d_mweights, //Mweight unsigned long orientation_num, //projectorPlan.orientation_num unsigned long translation_num, //translation_num cudaStream_t stream ) { int grid_size = ceil((float)(orientation_num*translation_num)/(float)WEIGHT_MAP_BLOCK_SIZE); cuda_kernel_allweights_to_mweights<<< grid_size, WEIGHT_MAP_BLOCK_SIZE, 0, stream >>>( d_iorient, d_allweights, d_mweights, orientation_num, translation_num); LAUNCH_HANDLE_ERROR(cudaGetLastError()); } size_t findThresholdIdxInCumulativeSum(CudaGlobalPtr &data, XFLOAT threshold) { int grid_size = ceil((float)(data.getSize()-1)/(float)FIND_IN_CUMULATIVE_BLOCK_SIZE); if(grid_size==0) { return(0); } else { CudaGlobalPtr idx(1, data.getStream(), data.getAllocator()); idx[0] = 0; idx.put_on_device(); cuda_kernel_find_threshold_idx_in_cumulative<<< grid_size, FIND_IN_CUMULATIVE_BLOCK_SIZE, 0, data.getStream() >>>( ~data, threshold, data.getSize()-1, ~idx); idx.cp_to_host(); DEBUG_HANDLE_ERROR(cudaStreamSynchronize(data.getStream())); return idx[0]; } } void runDiff2KernelCoarse( CudaProjectorKernel &projector, XFLOAT *trans_x, XFLOAT *trans_y, XFLOAT *trans_z, XFLOAT *corr_img, XFLOAT *Fimg_real, XFLOAT *Fimg_imag, XFLOAT *d_eulers, XFLOAT *diff2s, XFLOAT local_sqrtXi2, long unsigned orientation_num, int translation_num, int image_size, cudaStream_t stream, bool do_CC, bool data_is_3D) { const int blocks3D = (data_is_3D? D2C_BLOCK_SIZE_DATA3D : D2C_BLOCK_SIZE_REF3D); if(!do_CC) { if(projector.mdlZ!=0) { #ifdef CUDA_DOUBLE_PRECISION if (translation_num > blocks3D*4) CRITICAL(ERR_TRANSLIM); #else if (translation_num > blocks3D*8) CRITICAL(ERR_TRANSLIM); #endif unsigned rest = orientation_num % blocks3D; long unsigned even_orientation_num = orientation_num - rest; if (translation_num <= blocks3D) { if (even_orientation_num != 0) { if(data_is_3D) cuda_kernel_diff2_coarse <<>>( d_eulers, trans_x, trans_y, trans_z, Fimg_real, Fimg_imag, projector, corr_img, diff2s, translation_num, image_size); else cuda_kernel_diff2_coarse <<>>( d_eulers, trans_x, trans_y, trans_z, Fimg_real, Fimg_imag, projector, corr_img, diff2s, translation_num, image_size); } if (rest != 0) { if(data_is_3D) cuda_kernel_diff2_coarse <<>>( &d_eulers[9*even_orientation_num], trans_x, trans_y, trans_z, Fimg_real, Fimg_imag, projector, corr_img, &diff2s[translation_num*even_orientation_num], translation_num, image_size); else cuda_kernel_diff2_coarse <<>>( &d_eulers[9*even_orientation_num], trans_x, trans_y, trans_z, Fimg_real, Fimg_imag, projector, corr_img, &diff2s[translation_num*even_orientation_num], translation_num, image_size); } } else if (translation_num <= blocks3D*2) { if (even_orientation_num != 0) { if(data_is_3D) cuda_kernel_diff2_coarse <<>>( d_eulers, trans_x, trans_y, trans_z, Fimg_real, Fimg_imag, projector, corr_img, diff2s, translation_num, image_size); else cuda_kernel_diff2_coarse <<>>( d_eulers, trans_x, trans_y, trans_z, Fimg_real, Fimg_imag, projector, corr_img, diff2s, translation_num, image_size); } if (rest != 0) { if(data_is_3D) cuda_kernel_diff2_coarse <<>>( &d_eulers[9*even_orientation_num], trans_x, trans_y, trans_z, Fimg_real, Fimg_imag, projector, corr_img, &diff2s[translation_num*even_orientation_num], translation_num, image_size); else cuda_kernel_diff2_coarse <<>>( &d_eulers[9*even_orientation_num], trans_x, trans_y, trans_z, Fimg_real, Fimg_imag, projector, corr_img, &diff2s[translation_num*even_orientation_num], translation_num, image_size); } } else if (translation_num <= blocks3D*4) { if (even_orientation_num != 0) { if(data_is_3D) cuda_kernel_diff2_coarse <<>>( d_eulers, trans_x, trans_y, trans_z, Fimg_real, Fimg_imag, projector, corr_img, diff2s, translation_num, image_size); else cuda_kernel_diff2_coarse <<>>( d_eulers, trans_x, trans_y, trans_z, Fimg_real, Fimg_imag, projector, corr_img, diff2s, translation_num, image_size); } if (rest != 0) { if(data_is_3D) cuda_kernel_diff2_coarse <<>>( &d_eulers[9*even_orientation_num], trans_x, trans_y, trans_z, Fimg_real, Fimg_imag, projector, corr_img, &diff2s[translation_num*even_orientation_num], translation_num, image_size); else cuda_kernel_diff2_coarse <<>>( &d_eulers[9*even_orientation_num], trans_x, trans_y, trans_z, Fimg_real, Fimg_imag, projector, corr_img, &diff2s[translation_num*even_orientation_num], translation_num, image_size); } } #ifndef CUDA_DOUBLE_PRECISION else { if (even_orientation_num != 0) { if(data_is_3D) cuda_kernel_diff2_coarse <<>>( d_eulers, trans_x, trans_y, trans_z, Fimg_real, Fimg_imag, projector, corr_img, diff2s, translation_num, image_size); else cuda_kernel_diff2_coarse <<>>( d_eulers, trans_x, trans_y, trans_z, Fimg_real, Fimg_imag, projector, corr_img, diff2s, translation_num, image_size); } if (rest != 0) { if(data_is_3D) cuda_kernel_diff2_coarse <<>>( &d_eulers[9*even_orientation_num], trans_x, trans_y, trans_z, Fimg_real, Fimg_imag, projector, corr_img, &diff2s[translation_num*even_orientation_num], translation_num, image_size); else cuda_kernel_diff2_coarse <<>>( &d_eulers[9*even_orientation_num], trans_x, trans_y, trans_z, Fimg_real, Fimg_imag, projector, corr_img, &diff2s[translation_num*even_orientation_num], translation_num, image_size); } } #endif } else { if (translation_num > D2C_BLOCK_SIZE_2D) { printf("Number of coarse translations larger than %d on the GPU not supported.\n", D2C_BLOCK_SIZE_2D); fflush(stdout); exit(1); } unsigned rest = orientation_num % D2C_EULERS_PER_BLOCK_2D; long unsigned even_orientation_num = orientation_num - rest; if (even_orientation_num != 0) { if(data_is_3D) cuda_kernel_diff2_coarse <<>>( d_eulers, trans_x, trans_y, trans_z, Fimg_real, Fimg_imag, projector, corr_img, diff2s, translation_num, image_size); else cuda_kernel_diff2_coarse <<>>( d_eulers, trans_x, trans_y, trans_z, Fimg_real, Fimg_imag, projector, corr_img, diff2s, translation_num, image_size); LAUNCH_HANDLE_ERROR(cudaGetLastError()); } if (rest != 0) { if(data_is_3D) cuda_kernel_diff2_coarse <<>>( &d_eulers[9*even_orientation_num], trans_x, trans_y, trans_z, Fimg_real, Fimg_imag, projector, corr_img, &diff2s[translation_num*even_orientation_num], translation_num, image_size); else cuda_kernel_diff2_coarse <<>>( &d_eulers[9*even_orientation_num], trans_x, trans_y, trans_z, Fimg_real, Fimg_imag, projector, corr_img, &diff2s[translation_num*even_orientation_num], translation_num, image_size); LAUNCH_HANDLE_ERROR(cudaGetLastError()); } } } else { dim3 CCblocks(orientation_num,translation_num); if(data_is_3D) cuda_kernel_diff2_CC_coarse <<>>( d_eulers, Fimg_real, Fimg_imag, trans_x, trans_y, trans_z, projector, corr_img, diff2s, translation_num, image_size, local_sqrtXi2); else if(projector.mdlZ!=0) cuda_kernel_diff2_CC_coarse <<>>( d_eulers, Fimg_real, Fimg_imag, trans_x, trans_y, trans_z, projector, corr_img, diff2s, translation_num, image_size, local_sqrtXi2); else cuda_kernel_diff2_CC_coarse <<>>( d_eulers, Fimg_real, Fimg_imag, trans_x, trans_y, trans_z, projector, corr_img, diff2s, translation_num, image_size, local_sqrtXi2); LAUNCH_HANDLE_ERROR(cudaGetLastError()); } } void runDiff2KernelFine( CudaProjectorKernel &projector, XFLOAT *corr_img, XFLOAT *Fimgs_real, XFLOAT *Fimgs_imag, XFLOAT *trans_x, XFLOAT *trans_y, XFLOAT *trans_z, XFLOAT *eulers, long unsigned *rot_id, long unsigned *rot_idx, long unsigned *trans_idx, long unsigned *job_idx, long unsigned *job_num, XFLOAT *diff2s, OptimisationParamters &op, MlOptimiser *baseMLO, long unsigned orientation_num, long unsigned translation_num, long unsigned significant_num, unsigned image_size, int ipart, int exp_iclass, cudaStream_t stream, long unsigned job_num_count, bool do_CC, bool data_is_3D) { dim3 block_dim = job_num_count; if(!do_CC) { if(data_is_3D) cuda_kernel_diff2_fine <<>>( eulers, Fimgs_real, Fimgs_imag, trans_x, trans_y, trans_z, projector, corr_img, // in these non-CC kernels this is effectively an adjusted MinvSigma2 diff2s, image_size, op.highres_Xi2_imgs[ipart] / 2., orientation_num, translation_num, job_num_count, //significant_num, rot_idx, trans_idx, job_idx, job_num); else if(projector.mdlZ!=0) cuda_kernel_diff2_fine <<>>( eulers, Fimgs_real, Fimgs_imag, trans_x, trans_y, trans_z, projector, corr_img, // in these non-CC kernels this is effectively an adjusted MinvSigma2 diff2s, image_size, op.highres_Xi2_imgs[ipart] / 2., orientation_num, translation_num, job_num_count, //significant_num, rot_idx, trans_idx, job_idx, job_num); else cuda_kernel_diff2_fine <<>>( eulers, Fimgs_real, Fimgs_imag, trans_x, trans_y, trans_z, projector, corr_img, // in these non-CC kernels this is effectively an adjusted MinvSigma2 diff2s, image_size, op.highres_Xi2_imgs[ipart] / 2., orientation_num, translation_num, job_num_count, //significant_num, rot_idx, trans_idx, job_idx, job_num); LAUNCH_HANDLE_ERROR(cudaGetLastError()); } else { if(data_is_3D) cuda_kernel_diff2_CC_fine <<>>( eulers, Fimgs_real, Fimgs_imag, trans_x, trans_y, trans_z, projector, corr_img, diff2s, image_size, op.highres_Xi2_imgs[ipart] / 2., (XFLOAT) op.local_sqrtXi2[ipart], orientation_num, translation_num, job_num_count, //significant_num, rot_idx, trans_idx, job_idx, job_num); else if(projector.mdlZ!=0) cuda_kernel_diff2_CC_fine <<>>( eulers, Fimgs_real, Fimgs_imag, trans_x, trans_y, trans_z, projector, corr_img, diff2s, image_size, op.highres_Xi2_imgs[ipart] / 2., (XFLOAT) op.local_sqrtXi2[ipart], orientation_num, translation_num, job_num_count, //significant_num, rot_idx, trans_idx, job_idx, job_num); else cuda_kernel_diff2_CC_fine <<>>( eulers, Fimgs_real, Fimgs_imag, trans_x, trans_y, trans_z, projector, corr_img, diff2s, image_size, op.highres_Xi2_imgs[ipart] / 2., (XFLOAT) op.local_sqrtXi2[ipart], orientation_num, translation_num, job_num_count, //significant_num, rot_idx, trans_idx, job_idx, job_num); LAUNCH_HANDLE_ERROR(cudaGetLastError()); } } void runCollect2jobs( dim3 grid_dim, XFLOAT * oo_otrans_x, // otrans-size -> make const XFLOAT * oo_otrans_y, // otrans-size -> make const XFLOAT * oo_otrans_z, // otrans-size -> make const XFLOAT * myp_oo_otrans_x2y2z2, // otrans-size -> make const XFLOAT * weights, XFLOAT significant_weight, XFLOAT sum_weight, unsigned long nr_trans, unsigned long nr_oversampled_trans, unsigned long nr_oversampled_rot, int oversamples, bool skip_rots, XFLOAT * p_weights, XFLOAT * p_thr_wsum_prior_offsetx_class, XFLOAT * p_thr_wsum_prior_offsety_class, XFLOAT * p_thr_wsum_prior_offsetz_class, XFLOAT * p_thr_wsum_sigma2_offset, size_t * rot_idx, size_t * trans_idx, size_t * jobOrigin, size_t * jobExtent, bool data_is_3D ) { if(data_is_3D) { size_t shared_buffer = sizeof(XFLOAT)*SUMW_BLOCK_SIZE*5; // x+y+z+myp+weights cuda_kernel_collect2jobs<<>>( oo_otrans_x, // otrans-size -> make const oo_otrans_y, // otrans-size -> make const oo_otrans_z, // otrans-size -> make const myp_oo_otrans_x2y2z2, // otrans-size -> make const weights, significant_weight, sum_weight, nr_trans, nr_oversampled_trans, nr_oversampled_rot, oversamples, skip_rots, p_weights, p_thr_wsum_prior_offsetx_class, p_thr_wsum_prior_offsety_class, p_thr_wsum_prior_offsetz_class, p_thr_wsum_sigma2_offset, rot_idx, trans_idx, jobOrigin, jobExtent); } else { size_t shared_buffer = sizeof(XFLOAT)*SUMW_BLOCK_SIZE*4; // x+y+myp+weights cuda_kernel_collect2jobs<<>>( oo_otrans_x, // otrans-size -> make const oo_otrans_y, // otrans-size -> make const oo_otrans_z, // otrans-size -> make const myp_oo_otrans_x2y2z2, // otrans-size -> make const weights, significant_weight, sum_weight, nr_trans, nr_oversampled_trans, nr_oversampled_rot, oversamples, skip_rots, p_weights, p_thr_wsum_prior_offsetx_class, p_thr_wsum_prior_offsety_class, p_thr_wsum_prior_offsetz_class, p_thr_wsum_sigma2_offset, rot_idx, trans_idx, jobOrigin, jobExtent); } } //void windowFourierTransform2( // XFLOAT *d_in_real, // XFLOAT *d_in_imag, // XFLOAT *d_out_real, // XFLOAT *d_out_imag, // unsigned iX, unsigned iY, unsigned iZ, //Input dimensions // unsigned oX, unsigned oY, unsigned oZ, //Output dimensions // cudaStream_t stream // ) //{ // if (iX > 1 && iY/2 + 1 != iX) // REPORT_ERROR("windowFourierTransform ERROR: the Fourier transform should be of an image with equal sizes in all dimensions!"); // // if (oY == iX) // REPORT_ERROR("windowFourierTransform ERROR: there is a one-to-one map between input and output!"); // // cudaMemInit( d_out_real, 0, (size_t) oX*oY*oZ, stream ); // cudaMemInit( d_out_imag, 0, (size_t) oX*oY*oZ, stream ); // // if (oY > iX) // { // long int max_r2 = (iX - 1) * (iX - 1); // // unsigned grid_dim = ceil((float)(iX*iY*iZ) / (float) WINDOW_FT_BLOCK_SIZE); // cuda_kernel_window_fourier_transform<<< grid_dim, WINDOW_FT_BLOCK_SIZE, 0, stream >>>( // d_in_real, // d_in_imag, // d_out_real, // d_out_imag, // iX, iY, iZ, iX * iY, //Input dimensions // oX, oY, oZ, oX * oY, //Output dimensions // iX*iY*iZ, // max_r2 ); // } // else // { // unsigned grid_dim = ceil((float)(oX*oY*oZ) / (float) WINDOW_FT_BLOCK_SIZE); // cuda_kernel_window_fourier_transform<<< grid_dim, WINDOW_FT_BLOCK_SIZE, 0, stream >>>( // d_in_real, // d_in_imag, // d_out_real, // d_out_imag, // iX, iY, iZ, iX * iY, //Input dimensions // oX, oY, oZ, oX * oY, //Output dimensions // oX*oY*oZ); // } //} void windowFourierTransform2( CudaGlobalPtr &d_in, CudaGlobalPtr &d_out, size_t iX, size_t iY, size_t iZ, //Input dimensions size_t oX, size_t oY, size_t oZ, //Output dimensions size_t Npsi, size_t pos, cudaStream_t stream) { if (iX > 1 && iY/2 + 1 != iX) REPORT_ERROR("windowFourierTransform ERROR: the Fourier transform should be of an image with equal sizes in all dimensions!"); // if (oX == iX) // REPORT_ERROR("windowFourierTransform ERROR: there is a one-to-one map between input and output!"); deviceInitComplexValue(d_out, (XFLOAT)0.); HANDLE_ERROR(cudaStreamSynchronize(d_out.getStream())); if(oX==iX) { HANDLE_ERROR(cudaStreamSynchronize(d_in.getStream())); cudaCpyDeviceToDevice(&d_in.d_ptr[pos], ~d_out, oX*oY*oZ*Npsi, d_out.getStream() ); return; } if (oX > iX) { long int max_r2 = (iX - 1) * (iX - 1); dim3 grid_dim(ceil((float)(iX*iY*iZ) / (float) WINDOW_FT_BLOCK_SIZE),Npsi); cuda_kernel_window_fourier_transform<<< grid_dim, WINDOW_FT_BLOCK_SIZE, 0, d_out.getStream() >>>( &d_in.d_ptr[pos], d_out.d_ptr, iX, iY, iZ, iX * iY, //Input dimensions oX, oY, oZ, oX * oY, //Output dimensions iX*iY*iZ, max_r2 ); LAUNCH_HANDLE_ERROR(cudaGetLastError()); } else { dim3 grid_dim(ceil((float)(oX*oY*oZ) / (float) WINDOW_FT_BLOCK_SIZE),Npsi); cuda_kernel_window_fourier_transform<<< grid_dim, WINDOW_FT_BLOCK_SIZE, 0, d_out.getStream() >>>( &d_in.d_ptr[pos], d_out.d_ptr, iX, iY, iZ, iX * iY, //Input dimensions oX, oY, oZ, oX * oY, //Output dimensions oX*oY*oZ); LAUNCH_HANDLE_ERROR(cudaGetLastError()); } } void selfApplyBeamTilt2(MultidimArray &Fimg, RFLOAT beamtilt_x, RFLOAT beamtilt_y, RFLOAT wavelength, RFLOAT Cs, RFLOAT angpix, int ori_size) { if (Fimg.getDim() != 2) REPORT_ERROR("applyBeamTilt can only be done on 2D Fourier Transforms!"); RFLOAT boxsize = angpix * ori_size; RFLOAT factor = 0.360 * Cs * 10000000 * wavelength * wavelength / (boxsize * boxsize * boxsize); for (unsigned n = 0 ; n < Fimg.yxdim; n ++) { unsigned i = n / Fimg.xdim; unsigned j = n % Fimg.xdim; unsigned jp = j; int ip = i < Fimg.xdim ? i : i - Fimg.ydim; RFLOAT delta_phase = factor * (ip * ip + jp * jp) * (ip * beamtilt_y + jp * beamtilt_x); RFLOAT realval = Fimg.data[i*Fimg.xdim+j].real; RFLOAT imagval = Fimg.data[i*Fimg.xdim+j].imag; RFLOAT mag = sqrt(realval * realval + imagval * imagval); RFLOAT phas = atan2(imagval, realval) + DEG2RAD(delta_phase); // apply phase shift! realval = mag * cos(phas); imagval = mag * sin(phas); Fimg.data[i*Fimg.xdim+j] = Complex(realval, imagval); } } relion-3.1.3/src/gpu_utils/cuda_helper_functions.cuh000066400000000000000000000454211411340063500226570ustar00rootroot00000000000000#ifndef CUDA_HELPER_FUNCTIONS_CUH_ #define CUDA_HELPER_FUNCTIONS_CUH_ #include "src/gpu_utils/cuda_ml_optimiser.h" #include "src/gpu_utils/cuda_projector.h" #include "src/gpu_utils/cuda_projector.cuh" #include "src/gpu_utils/cuda_benchmark_utils.h" #include "src/gpu_utils/cuda_mem_utils.h" #include "src/gpu_utils/cuda_kernels/helper.cuh" #include "src/gpu_utils/cuda_kernels/diff2.cuh" #include "src/gpu_utils/cuda_kernels/wavg.cuh" #include #include #include #include #include #include #include "src/complex.h" #include #include #include "src/parallel.h" #include /* * This assisting function goes over the orientations determined as significant for this image, and checks * which translations should be included in the list of those which differences will be calculated for. * * Any contiguous translations with a shared orientation are grouped together into a "job" which is supplied * to the difference kernel. If there are more contiguous translations than the specified "chunk" number, * these are split into separate jobs, to increase parallelism at the cost of redundant memory reads. */ long int makeJobsForDiff2Fine( OptimisationParamters &op, SamplingParameters &sp, long int orientation_num, long int translation_num, ProjectionParams &FineProjectionData, std::vector< long unsigned > &iover_transes, std::vector< long unsigned > &ihiddens, long int nr_over_orient, long int nr_over_trans, int ipart, IndexedDataArray &FPW, // FPW=FinePassWeights IndexedDataArrayMask &dataMask, int chunk); /* * This assisting function goes over the weight-array and groups all weights with shared * orientations into 'jobs' which are fed into the collect-kenrel, which reduces all translations * with computed differences into a reduced object to be back-projected. */ int makeJobsForCollect(IndexedDataArray &FPW, IndexedDataArrayMask &dataMask, unsigned long NewJobNum); // FPW=FinePassWeights /* * Maps weights to a decoupled indexing of translations and orientations */ void mapWeights( unsigned long orientation_start, XFLOAT *mapped_weights, unsigned orientation_num, unsigned long idxArr_start, unsigned long idxArr_end, unsigned translation_num, XFLOAT *weights, long unsigned *rot_idx, long unsigned *trans_idx, unsigned long current_oversampling); void buildCorrImage(MlOptimiser *baseMLO, OptimisationParamters &op, CudaGlobalPtr &corr_img, long int ipart, long int group_id); void generateEulerMatrices( XFLOAT padding_factor, ProjectionParams &ProjectionData, XFLOAT *eulers, bool inverse); long unsigned generateProjectionSetupFine( OptimisationParamters &op, SamplingParameters &sp, MlOptimiser *baseMLO, unsigned iclass, ProjectionParams &ProjectionData); void runWavgKernel( CudaProjectorKernel &projector, XFLOAT *eulers, XFLOAT *Fimgs_real, XFLOAT *Fimgs_imag, XFLOAT *trans_x, XFLOAT *trans_y, XFLOAT *trans_z, XFLOAT *sorted_weights, XFLOAT *ctfs, XFLOAT *wdiff2s_parts, XFLOAT *wdiff2s_AA, XFLOAT *wdiff2s_XA, OptimisationParamters &op, long unsigned orientation_num, long unsigned translation_num, unsigned image_size, long int ipart, int group_id, int exp_iclass, XFLOAT part_scale, bool refs_are_ctf_corrected, bool data_is_3D, cudaStream_t stream); void runBackProjectKernel( CudaBackprojector &BP, CudaProjectorKernel &projector, XFLOAT *d_img_real, XFLOAT *d_img_imag, XFLOAT *trans_x, XFLOAT *trans_y, XFLOAT *trans_z, XFLOAT* d_weights, XFLOAT* d_Minvsigma2s, XFLOAT* d_ctfs, unsigned long translation_num, XFLOAT significant_weight, XFLOAT weight_norm, XFLOAT *d_eulers, int imgX, int imgY, int imgZ, unsigned long imageCount, bool data_is_3D, bool do_sgd, cudaStream_t optStream); #define INIT_VALUE_BLOCK_SIZE 512 template< typename T> __global__ void cuda_kernel_init_complex_value( T *data, XFLOAT value, size_t size) { size_t idx = blockIdx.x * INIT_VALUE_BLOCK_SIZE + threadIdx.x; if (idx < size) { data[idx].x = value; data[idx].y = value; } } template< typename T> __global__ void cuda_kernel_init_value( T *data, T value, size_t size) { size_t idx = blockIdx.x * INIT_VALUE_BLOCK_SIZE + threadIdx.x; if (idx < size) data[idx] = value; } template< typename T> void deviceInitComplexValue(CudaGlobalPtr &data, XFLOAT value) { int grid_size = ceil((float)(data.getSize())/(float)INIT_VALUE_BLOCK_SIZE); cuda_kernel_init_complex_value<<< grid_size, INIT_VALUE_BLOCK_SIZE, 0, data.getStream() >>>( ~data, value, data.getSize()); } template< typename T> void deviceInitValue(CudaGlobalPtr &data, T value) { int grid_size = ceil((float)data.getSize()/(float)INIT_VALUE_BLOCK_SIZE); cuda_kernel_init_value<<< grid_size, INIT_VALUE_BLOCK_SIZE, 0, data.getStream() >>>( ~data, value, data.getSize()); LAUNCH_HANDLE_ERROR(cudaGetLastError()); } template< typename T> void deviceInitValue(CudaGlobalPtr &data, T value, size_t Size) { int grid_size = ceil((float)Size/(float)INIT_VALUE_BLOCK_SIZE); cuda_kernel_init_value<<< grid_size, INIT_VALUE_BLOCK_SIZE, 0, data.getStream() >>>( ~data, value, Size); } #define WEIGHT_MAP_BLOCK_SIZE 512 __global__ void cuda_kernel_allweights_to_mweights( unsigned long * d_iorient, XFLOAT * d_allweights, XFLOAT * d_mweights, unsigned long orientation_num, unsigned long translation_num ); void mapAllWeightsToMweights( unsigned long * d_iorient, //projectorPlan.iorientclasses XFLOAT * d_allweights, //allWeights XFLOAT * d_mweights, //Mweight unsigned long orientation_num, //projectorPlan.orientation_num unsigned long translation_num, //translation_num cudaStream_t stream ); #define OVER_THRESHOLD_BLOCK_SIZE 512 template< typename T> __global__ void cuda_kernel_array_over_threshold( T *data, bool *passed, T threshold, size_t size) { size_t idx = blockIdx.x * OVER_THRESHOLD_BLOCK_SIZE + threadIdx.x; if (idx < size) { if (data[idx] >= threshold) passed[idx] = true; else passed[idx] = false; } } template< typename T> void arrayOverThreshold(CudaGlobalPtr &data, CudaGlobalPtr &passed, T threshold) { int grid_size = ceil((float)data.getSize()/(float)OVER_THRESHOLD_BLOCK_SIZE); cuda_kernel_array_over_threshold<<< grid_size, OVER_THRESHOLD_BLOCK_SIZE, 0, data.getStream() >>>( ~data, ~passed, threshold, data.getSize()); LAUNCH_HANDLE_ERROR(cudaGetLastError()); } #define FIND_IN_CUMULATIVE_BLOCK_SIZE 512 template< typename T> __global__ void cuda_kernel_find_threshold_idx_in_cumulative( T *data, T threshold, size_t size_m1, //data size minus 1 size_t *idx) { size_t i = blockIdx.x * FIND_IN_CUMULATIVE_BLOCK_SIZE + threadIdx.x; if (i < size_m1 && data[i] <= threshold && threshold < data[i+1]) idx[0] = i+1; } size_t findThresholdIdxInCumulativeSum(CudaGlobalPtr &data, XFLOAT threshold); void runDiff2KernelCoarse( CudaProjectorKernel &projector, XFLOAT *trans_x, XFLOAT *trans_y, XFLOAT *trans_z, XFLOAT *corr_img, XFLOAT *Fimg_real, XFLOAT *Fimg_imag, XFLOAT *d_eulers, XFLOAT *diff2s, XFLOAT local_sqrtXi2, long unsigned orientation_num, int translation_num, int image_size, cudaStream_t stream, bool do_CC, bool data_is_3D); void runDiff2KernelFine( CudaProjectorKernel &projector, XFLOAT *corr_img, XFLOAT *Fimgs_real, XFLOAT *Fimgs_imag, XFLOAT *trans_x, XFLOAT *trans_y, XFLOAT *trans_z, XFLOAT *eulers, long unsigned *rot_id, long unsigned *rot_idx, long unsigned *trans_idx, long unsigned *job_idx, long unsigned *job_num, XFLOAT *diff2s, OptimisationParamters &op, MlOptimiser *baseMLO, long unsigned orientation_num, long unsigned translation_num, long unsigned significant_num, unsigned image_size, int ipart, int exp_iclass, cudaStream_t stream, long unsigned job_num_count, bool do_CC, bool data_is_3D); #define WINDOW_FT_BLOCK_SIZE 128 template __global__ void cuda_kernel_window_fourier_transform( XFLOAT *g_in_real, XFLOAT *g_in_imag, XFLOAT *g_out_real, XFLOAT *g_out_imag, unsigned iX, unsigned iY, unsigned iZ, unsigned iYX, //Input dimensions unsigned oX, unsigned oY, unsigned oZ, unsigned oYX, //Output dimensions unsigned max_idx, unsigned max_r2 = 0 ) { unsigned n = threadIdx.x + WINDOW_FT_BLOCK_SIZE * blockIdx.x; long int image_offset = oX*oY*oZ*blockIdx.y; if (n >= max_idx) return; int k, i, kp, ip, jp; if (check_max_r2) { k = n / (iX * iY); i = (n % (iX * iY)) / iX; kp = k < iX ? k : k - iZ; ip = i < iX ? i : i - iY; jp = n % iX; if (kp*kp + ip*ip + jp*jp > max_r2) return; } else { k = n / (oX * oY); i = (n % (oX * oY)) / oX; kp = k < oX ? k : k - oZ; ip = i < oX ? i : i - oY; jp = n % oX; } g_out_real[(kp < 0 ? kp + oZ : kp) * oYX + (ip < 0 ? ip + oY : ip)*oX + jp + image_offset] = g_in_real[(kp < 0 ? kp + iZ : kp)*iYX + (ip < 0 ? ip + iY : ip)*iX + jp + image_offset]; g_out_imag[(kp < 0 ? kp + oZ : kp) * oYX + (ip < 0 ? ip + oY : ip)*oX + jp + image_offset] = g_in_imag[(kp < 0 ? kp + iZ : kp)*iYX + (ip < 0 ? ip + iY : ip)*iX + jp + image_offset]; } void runCollect2jobs( dim3 grid_dim, XFLOAT * oo_otrans_x, // otrans-size -> make const XFLOAT * oo_otrans_y, // otrans-size -> make const XFLOAT * oo_otrans_z, // otrans-size -> make const XFLOAT * myp_oo_otrans_x2y2z2, // otrans-size -> make const XFLOAT * weights, XFLOAT significant_weight, // TODO Put in const XFLOAT sum_weight, // TODO Put in const unsigned long nr_trans, unsigned long oversampled_trans, unsigned long oversampled_rot, int oversamples, bool skip_rots, XFLOAT * p_weights, XFLOAT * p_thr_wsum_prior_offsetx_class, XFLOAT * p_thr_wsum_prior_offsety_class, XFLOAT * p_thr_wsum_prior_offsetz_class, XFLOAT * p_thr_wsum_sigma2_offset, size_t * rot_idx, size_t * trans_idx, size_t * jobOrigin, size_t * jobExtent, bool data_is_3D ); void windowFourierTransform2( XFLOAT *d_in_real, XFLOAT *d_in_imag, XFLOAT *d_out_real, XFLOAT *d_out_imag, unsigned iX, unsigned iY, unsigned iZ, //Input dimensions unsigned oX, unsigned oY, unsigned oZ, //Output dimensions cudaStream_t stream = 0); #define WINDOW_FT_BLOCK_SIZE 128 template __global__ void cuda_kernel_window_fourier_transform( CUDACOMPLEX *g_in, CUDACOMPLEX *g_out, size_t iX, size_t iY, size_t iZ, size_t iYX, //Input dimensions size_t oX, size_t oY, size_t oZ, size_t oYX, //Output dimensions size_t max_idx, size_t max_r2 = 0 ) { size_t n = threadIdx.x + WINDOW_FT_BLOCK_SIZE * blockIdx.x; size_t oOFF = oX*oY*oZ*blockIdx.y; size_t iOFF = iX*iY*iZ*blockIdx.y; if (n >= max_idx) return; long int k, i, kp, ip, jp; if (check_max_r2) { k = n / (iX * iY); i = (n % (iX * iY)) / iX; kp = k < iX ? k : k - iZ; ip = i < iX ? i : i - iY; jp = n % iX; if (kp*kp + ip*ip + jp*jp > max_r2) return; } else { k = n / (oX * oY); i = (n % (oX * oY)) / oX; kp = k < oX ? k : k - oZ; ip = i < oX ? i : i - oY; jp = n % oX; } long int in_idx = (kp < 0 ? kp + iZ : kp) * iYX + (ip < 0 ? ip + iY : ip)*iX + jp; long int out_idx = (kp < 0 ? kp + oZ : kp) * oYX + (ip < 0 ? ip + oY : ip)*oX + jp; g_out[out_idx + oOFF] = g_in[in_idx + iOFF]; } void windowFourierTransform2( CudaGlobalPtr &d_in, CudaGlobalPtr &d_out, size_t iX, size_t iY, size_t iZ, //Input dimensions size_t oX, size_t oY, size_t oZ, //Output dimensions size_t Npsi = 1, size_t pos = 0, cudaStream_t stream = 0); void selfApplyBeamTilt2(MultidimArray &Fimg, RFLOAT beamtilt_x, RFLOAT beamtilt_y, RFLOAT wavelength, RFLOAT Cs, RFLOAT angpix, int ori_size); template void runCenterFFT(MultidimArray< T >& v, bool forward, CudaCustomAllocator *allocator) { CudaGlobalPtr img_in (v.nzyxdim, allocator); // with original data pointer // CudaGlobalPtr img_aux(v.nzyxdim, allocator); // temporary holder for (unsigned i = 0; i < v.nzyxdim; i ++) img_in[i] = (XFLOAT) v.data[i]; img_in.put_on_device(); // img_aux.device_alloc(); if ( v.getDim() == 1 ) { std::cerr << "CenterFFT on gpu reverts to cpu for dim!=2 (now dim=1)" < aux; int l, shift; l = XSIZE(v); aux.resize(l); shift = (int)(l / 2); if (!forward) shift = -shift; // Shift the input in an auxiliar vector for (int i = 0; i < l; i++) { int ip = i + shift; if (ip < 0) ip += l; else if (ip >= l) ip -= l; aux(ip) = DIRECT_A1D_ELEM(v, i); } // Copy the vector for (int i = 0; i < l; i++) DIRECT_A1D_ELEM(v, i) = DIRECT_A1D_ELEM(aux, i); } else if ( v.getDim() == 2 ) { // 2D //std::cerr << "CenterFFT on gpu with dim=2!" <>>(img_in.d_ptr, v.nzyxdim, XSIZE(v), YSIZE(v), xshift, yshift); LAUNCH_HANDLE_ERROR(cudaGetLastError()); img_in.cp_to_host(); // HANDLE_ERROR(cudaStreamSynchronize(0)); for (unsigned i = 0; i < v.nzyxdim; i ++) v.data[i] = (T) img_in[i]; } else if ( v.getDim() == 3 ) { std::cerr << "CenterFFT on gpu reverts to cpu for dim!=2 (now dim=3)" < aux; int l, shift; // Shift in the X direction l = XSIZE(v); aux.resize(l); shift = (int)(l / 2); if (!forward) shift = -shift; for (int k = 0; k < ZSIZE(v); k++) for (int i = 0; i < YSIZE(v); i++) { // Shift the input in an auxiliar vector for (int j = 0; j < l; j++) { int jp = j + shift; if (jp < 0) jp += l; else if (jp >= l) jp -= l; aux(jp) = DIRECT_A3D_ELEM(v, k, i, j); } // Copy the vector for (int j = 0; j < l; j++) DIRECT_A3D_ELEM(v, k, i, j) = DIRECT_A1D_ELEM(aux, j); } // Shift in the Y direction l = YSIZE(v); aux.resize(l); shift = (int)(l / 2); if (!forward) shift = -shift; for (int k = 0; k < ZSIZE(v); k++) for (int j = 0; j < XSIZE(v); j++) { // Shift the input in an auxiliar vector for (int i = 0; i < l; i++) { int ip = i + shift; if (ip < 0) ip += l; else if (ip >= l) ip -= l; aux(ip) = DIRECT_A3D_ELEM(v, k, i, j); } // Copy the vector for (int i = 0; i < l; i++) DIRECT_A3D_ELEM(v, k, i, j) = DIRECT_A1D_ELEM(aux, i); } // Shift in the Z direction l = ZSIZE(v); aux.resize(l); shift = (int)(l / 2); if (!forward) shift = -shift; for (int i = 0; i < YSIZE(v); i++) for (int j = 0; j < XSIZE(v); j++) { // Shift the input in an auxiliar vector for (int k = 0; k < l; k++) { int kp = k + shift; if (kp < 0) kp += l; else if (kp >= l) kp -= l; aux(kp) = DIRECT_A3D_ELEM(v, k, i, j); } // Copy the vector for (int k = 0; k < l; k++) DIRECT_A3D_ELEM(v, k, i, j) = DIRECT_A1D_ELEM(aux, k); } } else { v.printShape(); REPORT_ERROR("CenterFFT ERROR: Dimension should be 1, 2 or 3"); } } template void runCenterFFT( CudaGlobalPtr< T > &img_in, int xSize, int ySize, bool forward, int batchSize = 1) { // CudaGlobalPtr img_aux(img_in.h_ptr, img_in.size, allocator); // temporary holder // img_aux.device_alloc(); int xshift = (xSize / 2); int yshift = (ySize / 2); if (!forward) { xshift = -xshift; yshift = -yshift; } dim3 blocks(ceilf((float)((xSize*ySize)/(float)(2*CFTT_BLOCK_SIZE))),batchSize); cuda_kernel_centerFFT_2D<<>>( ~img_in, xSize*ySize, xSize, ySize, xshift, yshift); LAUNCH_HANDLE_ERROR(cudaGetLastError()); // HANDLE_ERROR(cudaStreamSynchronize(0)); // img_aux.cp_on_device(img_in.d_ptr); //update input image with centered kernel-output. } template void runCenterFFT( CudaGlobalPtr< T > &img_in, int xSize, int ySize, int zSize, bool forward, int batchSize = 1) { // CudaGlobalPtr img_aux(img_in.h_ptr, img_in.size, allocator); // temporary holder // img_aux.device_alloc(); if(zSize>1) { int xshift = (xSize / 2); int yshift = (ySize / 2); int zshift = (ySize / 2); if (!forward) { xshift = -xshift; yshift = -yshift; zshift = -zshift; } dim3 blocks(ceilf((float)((xSize*ySize*zSize)/(float)(2*CFTT_BLOCK_SIZE))),batchSize); cuda_kernel_centerFFT_3D<<>>( ~img_in, xSize*ySize*zSize, xSize, ySize, zSize, xshift, yshift, zshift); LAUNCH_HANDLE_ERROR(cudaGetLastError()); // HANDLE_ERROR(cudaStreamSynchronize(0)); // img_aux.cp_on_device(img_in.d_ptr); //update input image with centered kernel-output. } else { int xshift = (xSize / 2); int yshift = (ySize / 2); if (!forward) { xshift = -xshift; yshift = -yshift; } dim3 blocks(ceilf((float)((xSize*ySize)/(float)(2*CFTT_BLOCK_SIZE))),batchSize); cuda_kernel_centerFFT_2D<<>>( ~img_in, xSize*ySize, xSize, ySize, xshift, yshift); LAUNCH_HANDLE_ERROR(cudaGetLastError()); } } template void lowPassFilterMapGPU( CudaGlobalPtr< T > &img_in, size_t Zdim, size_t Ydim, size_t Xdim, long int ori_size, RFLOAT lowpass, RFLOAT highpass, RFLOAT angpix, int filter_edge_width, bool do_highpass) { // High or low? RFLOAT passLimit = (do_highpass ? highpass : lowpass); // Which resolution shell is the filter? int ires_filter = ROUND((ori_size * angpix)/passLimit); int filter_edge_halfwidth = filter_edge_width / 2; // Soft-edge: from 1 shell less to one shell more: XFLOAT edge_low = XMIPP_MAX(0., (ires_filter - filter_edge_halfwidth) / (RFLOAT)ori_size); // in 1/pix XFLOAT edge_high = XMIPP_MIN(Xdim, (ires_filter + filter_edge_halfwidth) / (RFLOAT)ori_size); // in 1/pix XFLOAT edge_width = edge_high - edge_low; dim3 blocks(ceilf( (float)(Xdim*Ydim*Zdim)/ (float)(CFTT_BLOCK_SIZE) ) ); if (do_highpass) { cuda_kernel_frequencyPass<<>>( ~img_in, ori_size, Xdim, Ydim, Zdim, edge_low, edge_width, edge_high, (XFLOAT)angpix, Xdim*Ydim*Zdim); } else { cuda_kernel_frequencyPass<<>>( ~img_in, ori_size, Xdim, Ydim, Zdim, edge_low, edge_width, edge_high, (XFLOAT)angpix, Xdim*Ydim*Zdim); } LAUNCH_HANDLE_ERROR(cudaGetLastError()); } #endif //CUDA_HELPER_FUNCTIONS_CUH_ relion-3.1.3/src/gpu_utils/cuda_kernels/000077500000000000000000000000001411340063500202445ustar00rootroot00000000000000relion-3.1.3/src/gpu_utils/cuda_kernels/BP.cuh000066400000000000000000000416541411340063500212600ustar00rootroot00000000000000#ifndef CUDA_BP_KERNELS_CUH_ #define CUDA_BP_KERNELS_CUH_ #include #include #include #include #include "src/gpu_utils/cuda_projector.cuh" #include "src/gpu_utils/cuda_backprojector.h" #include "src/gpu_utils/cuda_settings.h" #include "src/gpu_utils/cuda_device_utils.cuh" /* * BP KERNELS */ __global__ void cuda_kernel_backproject2D( XFLOAT *g_img_real, XFLOAT *g_img_imag, XFLOAT *g_trans_x, XFLOAT *g_trans_y, XFLOAT* g_weights, XFLOAT* g_Minvsigma2s, XFLOAT* g_ctfs, unsigned long translation_num, XFLOAT significant_weight, XFLOAT weight_norm, XFLOAT *g_eulers, XFLOAT *g_model_real, XFLOAT *g_model_imag, XFLOAT *g_model_weight, int max_r, int max_r2, XFLOAT padding_factor, unsigned img_x, unsigned img_y, unsigned img_xy, unsigned mdl_x, int mdl_inity) { unsigned tid = threadIdx.x; unsigned img = blockIdx.x; __shared__ XFLOAT s_eulers[4]; XFLOAT minvsigma2, ctf, img_real, img_imag, Fweight, real, imag, weight; if (tid == 0) s_eulers[0] = g_eulers[img*9+0] * padding_factor; else if (tid == 1) s_eulers[1] = g_eulers[img*9+1] * padding_factor; else if (tid == 2) s_eulers[2] = g_eulers[img*9+3] * padding_factor; else if (tid == 3) s_eulers[3] = g_eulers[img*9+4] * padding_factor; __syncthreads(); int pixel_pass_num(ceilf((float)img_xy/(float)BP_2D_BLOCK_SIZE)); for (unsigned pass = 0; pass < pixel_pass_num; pass++) { unsigned pixel = (pass * BP_2D_BLOCK_SIZE) + tid; if (pixel >= img_xy) continue; int x = pixel % img_x; int y = (int)floorf( (float)pixel / (float)img_x); // Don't search beyond square with side max_r if (y > max_r) { if (y >= img_y - max_r) y -= img_y; else continue; } if (x * x + y * y > max_r2) continue; //WAVG minvsigma2 = __ldg(&g_Minvsigma2s[pixel]); ctf = __ldg(&g_ctfs[pixel]); img_real = __ldg(&g_img_real[pixel]); img_imag = __ldg(&g_img_imag[pixel]); Fweight = (XFLOAT) 0.0; real = (XFLOAT) 0.0; imag = (XFLOAT) 0.0; XFLOAT temp_real, temp_imag; for (unsigned long itrans = 0; itrans < translation_num; itrans++) { weight = g_weights[img * translation_num + itrans]; if (weight >= significant_weight) { weight = (weight / weight_norm) * ctf * minvsigma2; Fweight += weight * ctf; translatePixel(x, y, g_trans_x[itrans], g_trans_y[itrans], img_real, img_imag, temp_real, temp_imag); real += temp_real * weight; imag += temp_imag * weight; } } if (Fweight > (XFLOAT) 0.0) { // Get logical coordinates in the 3D map XFLOAT xp = (s_eulers[0] * x + s_eulers[1] * y ); XFLOAT yp = (s_eulers[2] * x + s_eulers[3] * y ); // Only asymmetric half is stored if (xp < 0) { // Get complex conjugated hermitian symmetry pair xp = -xp; yp = -yp; imag = -imag; } int x0 = floorf(xp); XFLOAT fx = xp - x0; int x1 = x0 + 1; int y0 = floorf(yp); XFLOAT fy = yp - y0; y0 -= mdl_inity; int y1 = y0 + 1; XFLOAT mfx = (XFLOAT) 1.0 - fx; XFLOAT mfy = (XFLOAT) 1.0 - fy; XFLOAT dd00 = mfy * mfx; XFLOAT dd01 = mfy * fx; XFLOAT dd10 = fy * mfx; XFLOAT dd11 = fy * fx; cuda_atomic_add(&g_model_real [y0 * mdl_x + x0], dd00 * real); cuda_atomic_add(&g_model_imag [y0 * mdl_x + x0], dd00 * imag); cuda_atomic_add(&g_model_weight[y0 * mdl_x + x0], dd00 * Fweight); cuda_atomic_add(&g_model_real [y0 * mdl_x + x1], dd01 * real); cuda_atomic_add(&g_model_imag [y0 * mdl_x + x1], dd01 * imag); cuda_atomic_add(&g_model_weight[y0 * mdl_x + x1], dd01 * Fweight); cuda_atomic_add(&g_model_real [y1 * mdl_x + x0], dd10 * real); cuda_atomic_add(&g_model_imag [y1 * mdl_x + x0], dd10 * imag); cuda_atomic_add(&g_model_weight[y1 * mdl_x + x0], dd10 * Fweight); cuda_atomic_add(&g_model_real [y1 * mdl_x + x1], dd11 * real); cuda_atomic_add(&g_model_imag [y1 * mdl_x + x1], dd11 * imag); cuda_atomic_add(&g_model_weight[y1 * mdl_x + x1], dd11 * Fweight); } } } template < bool DATA3D > __global__ void cuda_kernel_backproject3D( XFLOAT *g_img_real, XFLOAT *g_img_imag, XFLOAT *g_trans_x, XFLOAT *g_trans_y, XFLOAT *g_trans_z, XFLOAT* g_weights, XFLOAT* g_Minvsigma2s, XFLOAT* g_ctfs, unsigned long translation_num, XFLOAT significant_weight, XFLOAT weight_norm, XFLOAT *g_eulers, XFLOAT *g_model_real, XFLOAT *g_model_imag, XFLOAT *g_model_weight, int max_r, int max_r2, XFLOAT padding_factor, unsigned img_x, unsigned img_y, unsigned img_z, unsigned img_xyz, unsigned mdl_x, unsigned mdl_y, int mdl_inity, int mdl_initz) { unsigned tid = threadIdx.x; unsigned img = blockIdx.x; __shared__ XFLOAT s_eulers[9]; XFLOAT minvsigma2, ctf, img_real, img_imag, Fweight, real, imag, weight; if (tid < 9) s_eulers[tid] = g_eulers[img*9+tid]; __syncthreads(); int pixel_pass_num(0); if(DATA3D) pixel_pass_num = (ceilf((float)img_xyz/(float)BP_DATA3D_BLOCK_SIZE)); else pixel_pass_num = (ceilf((float)img_xyz/(float)BP_REF3D_BLOCK_SIZE)); for (unsigned pass = 0; pass < pixel_pass_num; pass++) { unsigned pixel(0); if(DATA3D) pixel = (pass * BP_DATA3D_BLOCK_SIZE) + tid; else pixel = (pass * BP_REF3D_BLOCK_SIZE) + tid; if (pixel >= img_xyz) continue; int x,y,z,xy; if(DATA3D) { z = floorfracf(pixel, img_x*img_y); xy = pixel % (img_x*img_y); x = xy % img_x; y = floorfracf( xy, img_x); if (z > max_r) { if (z >= img_z - max_r) z = z - img_z; else continue; if(x==0) continue; } } else { x = pixel % img_x; y = floorfracf( pixel , img_x); } if (y > max_r) { if (y >= img_y - max_r) y = y - img_y; else continue; } if(DATA3D) if ( ( x * x + y * y + z * z ) > max_r2) continue; else if ( ( x * x + y * y ) > max_r2) continue; //WAVG minvsigma2 = __ldg(&g_Minvsigma2s[pixel]); ctf = __ldg(&g_ctfs[pixel]); img_real = __ldg(&g_img_real[pixel]); img_imag = __ldg(&g_img_imag[pixel]); Fweight = (XFLOAT) 0.0; real = (XFLOAT) 0.0; imag = (XFLOAT) 0.0; XFLOAT temp_real, temp_imag; for (unsigned long itrans = 0; itrans < translation_num; itrans++) { weight = g_weights[img * translation_num + itrans]; if (weight >= significant_weight) { weight = (weight / weight_norm) * ctf * minvsigma2; Fweight += weight * ctf; if(DATA3D) translatePixel(x, y, z, g_trans_x[itrans], g_trans_y[itrans], g_trans_z[itrans], img_real, img_imag, temp_real, temp_imag); else translatePixel(x, y, g_trans_x[itrans], g_trans_y[itrans], img_real, img_imag, temp_real, temp_imag); real += temp_real * weight; imag += temp_imag * weight; } } //BP if (Fweight > (XFLOAT) 0.0) { // Get logical coordinates in the 3D map XFLOAT xp,yp,zp; if(DATA3D) { xp = (s_eulers[0] * x + s_eulers[1] * y + s_eulers[2] * z) * padding_factor; yp = (s_eulers[3] * x + s_eulers[4] * y + s_eulers[5] * z) * padding_factor; zp = (s_eulers[6] * x + s_eulers[7] * y + s_eulers[8] * z) * padding_factor; } else { xp = (s_eulers[0] * x + s_eulers[1] * y ) * padding_factor; yp = (s_eulers[3] * x + s_eulers[4] * y ) * padding_factor; zp = (s_eulers[6] * x + s_eulers[7] * y ) * padding_factor; } // Only asymmetric half is stored if (xp < (XFLOAT) 0.0) { // Get complex conjugated hermitian symmetry pair xp = -xp; yp = -yp; zp = -zp; imag = -imag; } int x0 = floorf(xp); XFLOAT fx = xp - x0; int x1 = x0 + 1; int y0 = floorf(yp); XFLOAT fy = yp - y0; y0 -= mdl_inity; int y1 = y0 + 1; int z0 = floorf(zp); XFLOAT fz = zp - z0; z0 -= mdl_initz; int z1 = z0 + 1; XFLOAT mfx = (XFLOAT)1.0 - fx; XFLOAT mfy = (XFLOAT)1.0 - fy; XFLOAT mfz = (XFLOAT)1.0 - fz; XFLOAT dd000 = mfz * mfy * mfx; cuda_atomic_add(&g_model_real [z0 * mdl_x * mdl_y + y0 * mdl_x + x0], dd000 * real); cuda_atomic_add(&g_model_imag [z0 * mdl_x * mdl_y + y0 * mdl_x + x0], dd000 * imag); cuda_atomic_add(&g_model_weight[z0 * mdl_x * mdl_y + y0 * mdl_x + x0], dd000 * Fweight); XFLOAT dd001 = mfz * mfy * fx; cuda_atomic_add(&g_model_real [z0 * mdl_x * mdl_y + y0 * mdl_x + x1], dd001 * real); cuda_atomic_add(&g_model_imag [z0 * mdl_x * mdl_y + y0 * mdl_x + x1], dd001 * imag); cuda_atomic_add(&g_model_weight[z0 * mdl_x * mdl_y + y0 * mdl_x + x1], dd001 * Fweight); XFLOAT dd010 = mfz * fy * mfx; cuda_atomic_add(&g_model_real [z0 * mdl_x * mdl_y + y1 * mdl_x + x0], dd010 * real); cuda_atomic_add(&g_model_imag [z0 * mdl_x * mdl_y + y1 * mdl_x + x0], dd010 * imag); cuda_atomic_add(&g_model_weight[z0 * mdl_x * mdl_y + y1 * mdl_x + x0], dd010 * Fweight); XFLOAT dd011 = mfz * fy * fx; cuda_atomic_add(&g_model_real [z0 * mdl_x * mdl_y + y1 * mdl_x + x1], dd011 * real); cuda_atomic_add(&g_model_imag [z0 * mdl_x * mdl_y + y1 * mdl_x + x1], dd011 * imag); cuda_atomic_add(&g_model_weight[z0 * mdl_x * mdl_y + y1 * mdl_x + x1], dd011 * Fweight); XFLOAT dd100 = fz * mfy * mfx; cuda_atomic_add(&g_model_real [z1 * mdl_x * mdl_y + y0 * mdl_x + x0], dd100 * real); cuda_atomic_add(&g_model_imag [z1 * mdl_x * mdl_y + y0 * mdl_x + x0], dd100 * imag); cuda_atomic_add(&g_model_weight[z1 * mdl_x * mdl_y + y0 * mdl_x + x0], dd100 * Fweight); XFLOAT dd101 = fz * mfy * fx; cuda_atomic_add(&g_model_real [z1 * mdl_x * mdl_y + y0 * mdl_x + x1], dd101 * real); cuda_atomic_add(&g_model_imag [z1 * mdl_x * mdl_y + y0 * mdl_x + x1], dd101 * imag); cuda_atomic_add(&g_model_weight[z1 * mdl_x * mdl_y + y0 * mdl_x + x1], dd101 * Fweight); XFLOAT dd110 = fz * fy * mfx; cuda_atomic_add(&g_model_real [z1 * mdl_x * mdl_y + y1 * mdl_x + x0], dd110 * real); cuda_atomic_add(&g_model_imag [z1 * mdl_x * mdl_y + y1 * mdl_x + x0], dd110 * imag); cuda_atomic_add(&g_model_weight[z1 * mdl_x * mdl_y + y1 * mdl_x + x0], dd110 * Fweight); XFLOAT dd111 = fz * fy * fx; cuda_atomic_add(&g_model_real [z1 * mdl_x * mdl_y + y1 * mdl_x + x1], dd111 * real); cuda_atomic_add(&g_model_imag [z1 * mdl_x * mdl_y + y1 * mdl_x + x1], dd111 * imag); cuda_atomic_add(&g_model_weight[z1 * mdl_x * mdl_y + y1 * mdl_x + x1], dd111 * Fweight); } } } template < bool DATA3D > __global__ void cuda_kernel_backprojectSGD( CudaProjectorKernel projector, XFLOAT *g_img_real, XFLOAT *g_img_imag, XFLOAT *g_trans_x, XFLOAT *g_trans_y, XFLOAT *g_trans_z, XFLOAT* g_weights, XFLOAT* g_Minvsigma2s, XFLOAT* g_ctfs, unsigned long translation_num, XFLOAT significant_weight, XFLOAT weight_norm, XFLOAT *g_eulers, XFLOAT *g_model_real, XFLOAT *g_model_imag, XFLOAT *g_model_weight, int max_r, int max_r2, XFLOAT padding_factor, unsigned img_x, unsigned img_y, unsigned img_z, unsigned img_xyz, unsigned mdl_x, unsigned mdl_y, int mdl_inity, int mdl_initz) { unsigned tid = threadIdx.x; unsigned img = blockIdx.x; __shared__ XFLOAT s_eulers[9]; XFLOAT minvsigma2, ctf, img_real, img_imag, Fweight, real, imag, weight; if (tid < 9) s_eulers[tid] = g_eulers[img*9+tid]; __syncthreads(); int pixel_pass_num(0); if(DATA3D) pixel_pass_num = (ceilf((float)img_xyz/(float)BP_DATA3D_BLOCK_SIZE)); else pixel_pass_num = (ceilf((float)img_xyz/(float)BP_REF3D_BLOCK_SIZE)); for (unsigned pass = 0; pass < pixel_pass_num; pass++) { unsigned pixel(0); if(DATA3D) pixel = (pass * BP_DATA3D_BLOCK_SIZE) + tid; else pixel = (pass * BP_REF3D_BLOCK_SIZE) + tid; if (pixel >= img_xyz) continue; int x,y,z,xy; if(DATA3D) { z = floorfracf(pixel, img_x*img_y); xy = pixel % (img_x*img_y); x = xy % img_x; y = floorfracf( xy, img_x); if (z > max_r) { if (z >= img_z - max_r) z = z - img_z; else continue; if(x==0) continue; } } else { x = pixel % img_x; y = floorfracf( pixel , img_x); } if (y > max_r) { if (y >= img_y - max_r) y = y - img_y; else continue; } if(DATA3D) if ( ( x * x + y * y + z * z ) > max_r2) continue; else if ( ( x * x + y * y ) > max_r2) continue; XFLOAT ref_real = (XFLOAT) 0.0; XFLOAT ref_imag = (XFLOAT) 0.0; if(DATA3D) projector.project3Dmodel( x,y,z, s_eulers[0], s_eulers[1], s_eulers[2], s_eulers[3], s_eulers[4], s_eulers[5], s_eulers[6], s_eulers[7], s_eulers[8], ref_real, ref_imag); else projector.project3Dmodel( x,y, s_eulers[0], s_eulers[1], s_eulers[3], s_eulers[4], s_eulers[6], s_eulers[7], ref_real, ref_imag); //WAVG minvsigma2 = __ldg(&g_Minvsigma2s[pixel]); ctf = __ldg(&g_ctfs[pixel]); img_real = __ldg(&g_img_real[pixel]); img_imag = __ldg(&g_img_imag[pixel]); Fweight = (XFLOAT) 0.0; real = (XFLOAT) 0.0; imag = (XFLOAT) 0.0; ref_real *= ctf; ref_imag *= ctf; XFLOAT temp_real, temp_imag; for (unsigned long itrans = 0; itrans < translation_num; itrans++) { weight = g_weights[img * translation_num + itrans]; if (weight >= significant_weight) { weight = (weight / weight_norm) * ctf * minvsigma2; Fweight += weight * ctf; if(DATA3D) translatePixel(x, y, z, g_trans_x[itrans], g_trans_y[itrans], g_trans_z[itrans], img_real, img_imag, temp_real, temp_imag); else translatePixel(x, y, g_trans_x[itrans], g_trans_y[itrans], img_real, img_imag, temp_real, temp_imag); real += (temp_real-ref_real) * weight; imag += (temp_imag-ref_imag) * weight; } } //BP if (Fweight > (XFLOAT) 0.0) { // Get logical coordinates in the 3D map XFLOAT xp,yp,zp; if(DATA3D) { xp = (s_eulers[0] * x + s_eulers[1] * y + s_eulers[2] * z) * padding_factor; yp = (s_eulers[3] * x + s_eulers[4] * y + s_eulers[5] * z) * padding_factor; zp = (s_eulers[6] * x + s_eulers[7] * y + s_eulers[8] * z) * padding_factor; } else { xp = (s_eulers[0] * x + s_eulers[1] * y ) * padding_factor; yp = (s_eulers[3] * x + s_eulers[4] * y ) * padding_factor; zp = (s_eulers[6] * x + s_eulers[7] * y ) * padding_factor; } // Only asymmetric half is stored if (xp < (XFLOAT) 0.0) { // Get complex conjugated hermitian symmetry pair xp = -xp; yp = -yp; zp = -zp; imag = -imag; } int x0 = floorf(xp); XFLOAT fx = xp - x0; int x1 = x0 + 1; int y0 = floorf(yp); XFLOAT fy = yp - y0; y0 -= mdl_inity; int y1 = y0 + 1; int z0 = floorf(zp); XFLOAT fz = zp - z0; z0 -= mdl_initz; int z1 = z0 + 1; XFLOAT mfx = (XFLOAT)1.0 - fx; XFLOAT mfy = (XFLOAT)1.0 - fy; XFLOAT mfz = (XFLOAT)1.0 - fz; XFLOAT dd000 = mfz * mfy * mfx; cuda_atomic_add(&g_model_real [z0 * mdl_x * mdl_y + y0 * mdl_x + x0], dd000 * real); cuda_atomic_add(&g_model_imag [z0 * mdl_x * mdl_y + y0 * mdl_x + x0], dd000 * imag); cuda_atomic_add(&g_model_weight[z0 * mdl_x * mdl_y + y0 * mdl_x + x0], dd000 * Fweight); XFLOAT dd001 = mfz * mfy * fx; cuda_atomic_add(&g_model_real [z0 * mdl_x * mdl_y + y0 * mdl_x + x1], dd001 * real); cuda_atomic_add(&g_model_imag [z0 * mdl_x * mdl_y + y0 * mdl_x + x1], dd001 * imag); cuda_atomic_add(&g_model_weight[z0 * mdl_x * mdl_y + y0 * mdl_x + x1], dd001 * Fweight); XFLOAT dd010 = mfz * fy * mfx; cuda_atomic_add(&g_model_real [z0 * mdl_x * mdl_y + y1 * mdl_x + x0], dd010 * real); cuda_atomic_add(&g_model_imag [z0 * mdl_x * mdl_y + y1 * mdl_x + x0], dd010 * imag); cuda_atomic_add(&g_model_weight[z0 * mdl_x * mdl_y + y1 * mdl_x + x0], dd010 * Fweight); XFLOAT dd011 = mfz * fy * fx; cuda_atomic_add(&g_model_real [z0 * mdl_x * mdl_y + y1 * mdl_x + x1], dd011 * real); cuda_atomic_add(&g_model_imag [z0 * mdl_x * mdl_y + y1 * mdl_x + x1], dd011 * imag); cuda_atomic_add(&g_model_weight[z0 * mdl_x * mdl_y + y1 * mdl_x + x1], dd011 * Fweight); XFLOAT dd100 = fz * mfy * mfx; cuda_atomic_add(&g_model_real [z1 * mdl_x * mdl_y + y0 * mdl_x + x0], dd100 * real); cuda_atomic_add(&g_model_imag [z1 * mdl_x * mdl_y + y0 * mdl_x + x0], dd100 * imag); cuda_atomic_add(&g_model_weight[z1 * mdl_x * mdl_y + y0 * mdl_x + x0], dd100 * Fweight); XFLOAT dd101 = fz * mfy * fx; cuda_atomic_add(&g_model_real [z1 * mdl_x * mdl_y + y0 * mdl_x + x1], dd101 * real); cuda_atomic_add(&g_model_imag [z1 * mdl_x * mdl_y + y0 * mdl_x + x1], dd101 * imag); cuda_atomic_add(&g_model_weight[z1 * mdl_x * mdl_y + y0 * mdl_x + x1], dd101 * Fweight); XFLOAT dd110 = fz * fy * mfx; cuda_atomic_add(&g_model_real [z1 * mdl_x * mdl_y + y1 * mdl_x + x0], dd110 * real); cuda_atomic_add(&g_model_imag [z1 * mdl_x * mdl_y + y1 * mdl_x + x0], dd110 * imag); cuda_atomic_add(&g_model_weight[z1 * mdl_x * mdl_y + y1 * mdl_x + x0], dd110 * Fweight); XFLOAT dd111 = fz * fy * fx; cuda_atomic_add(&g_model_real [z1 * mdl_x * mdl_y + y1 * mdl_x + x1], dd111 * real); cuda_atomic_add(&g_model_imag [z1 * mdl_x * mdl_y + y1 * mdl_x + x1], dd111 * imag); cuda_atomic_add(&g_model_weight[z1 * mdl_x * mdl_y + y1 * mdl_x + x1], dd111 * Fweight); } } } #endif /* CUDA_PB_KERNELS_CUH_ */ relion-3.1.3/src/gpu_utils/cuda_kernels/diff2.cuh000066400000000000000000000422741411340063500217500ustar00rootroot00000000000000#ifndef CUDA_DIFF2_KERNELS_CUH_ #define CUDA_DIFF2_KERNELS_CUH_ #include #include #include #include #include "src/gpu_utils/cuda_projector.cuh" #include "src/gpu_utils/cuda_settings.h" #include "src/gpu_utils/cuda_device_utils.cuh" /* * DIFFERNECE-BASED KERNELS */ /* * Assuming block_sz % prefetch_fraction == 0 and prefetch_fraction < block_sz * Assuming block_sz % eulers_per_block == 0 * Assuming eulers_per_block * 3 < block_sz */ template __global__ void cuda_kernel_diff2_coarse( XFLOAT *g_eulers, XFLOAT *trans_x, XFLOAT *trans_y, XFLOAT *trans_z, XFLOAT *g_real, XFLOAT *g_imag, CudaProjectorKernel projector, XFLOAT *g_corr, XFLOAT *g_diff2s, int translation_num, int image_size ) { int tid = threadIdx.x; //Prefetch euler matrices __shared__ XFLOAT s_eulers[eulers_per_block * 9]; int max_block_pass_euler( ceilfracf(eulers_per_block*9, block_sz) * block_sz); for (int i = tid; i < max_block_pass_euler; i += block_sz) if (i < eulers_per_block * 9) s_eulers[i] = g_eulers[blockIdx.x * eulers_per_block * 9 + i]; //Setup variables __shared__ XFLOAT s_ref_real[block_sz/prefetch_fraction * eulers_per_block]; __shared__ XFLOAT s_ref_imag[block_sz/prefetch_fraction * eulers_per_block]; __shared__ XFLOAT s_real[block_sz]; __shared__ XFLOAT s_imag[block_sz]; __shared__ XFLOAT s_corr[block_sz]; XFLOAT diff2s[eulers_per_block] = {0.f}; XFLOAT tx = trans_x[tid%translation_num]; XFLOAT ty = trans_y[tid%translation_num]; XFLOAT tz = trans_z[tid%translation_num]; //Step through data int max_block_pass_pixel( ceilfracf(image_size,block_sz) * block_sz ); for (int init_pixel = 0; init_pixel < max_block_pass_pixel; init_pixel += block_sz/prefetch_fraction) { __syncthreads(); //Prefetch block-fraction-wise if(init_pixel + tid/prefetch_fraction < image_size) { int x,y,z,xy; if(DATA3D) { z = floorfracf(init_pixel + tid/prefetch_fraction, projector.imgX*projector.imgY); xy = (init_pixel + tid/prefetch_fraction) % (projector.imgX*projector.imgY); x = xy % projector.imgX; y = floorfracf( xy, projector.imgX); if (z > projector.maxR) z -= projector.imgZ; } else { x = ( init_pixel + tid/prefetch_fraction) % projector.imgX; y = floorfracf( init_pixel + tid/prefetch_fraction , projector.imgX); } if (y > projector.maxR) y -= projector.imgY; // #pragma unroll for (int i = tid%prefetch_fraction; i < eulers_per_block; i += prefetch_fraction) { if(DATA3D) // if DATA3D, then REF3D as well. projector.project3Dmodel( x,y,z, s_eulers[i*9 ], s_eulers[i*9+1], s_eulers[i*9+2], s_eulers[i*9+3], s_eulers[i*9+4], s_eulers[i*9+5], s_eulers[i*9+6], s_eulers[i*9+7], s_eulers[i*9+8], s_ref_real[eulers_per_block * (tid/prefetch_fraction) + i], s_ref_imag[eulers_per_block * (tid/prefetch_fraction) + i]); else if(REF3D) projector.project3Dmodel( x,y, s_eulers[i*9 ], s_eulers[i*9+1], s_eulers[i*9+3], s_eulers[i*9+4], s_eulers[i*9+6], s_eulers[i*9+7], s_ref_real[eulers_per_block * (tid/prefetch_fraction) + i], s_ref_imag[eulers_per_block * (tid/prefetch_fraction) + i]); else projector.project2Dmodel( x,y, s_eulers[i*9 ], s_eulers[i*9+1], s_eulers[i*9+3], s_eulers[i*9+4], s_ref_real[eulers_per_block * (tid/prefetch_fraction) + i], s_ref_imag[eulers_per_block * (tid/prefetch_fraction) + i]); } } //Prefetch block-wise if (init_pixel % block_sz == 0 && init_pixel + tid < image_size) { s_real[tid] = g_real[init_pixel + tid]; s_imag[tid] = g_imag[init_pixel + tid]; s_corr[tid] = g_corr[init_pixel + tid] / 2; } __syncthreads(); if (tid/translation_num < block_sz/translation_num) // NOTE int division A/B==C/B !=> A==C for (int i = tid / translation_num; i < block_sz/prefetch_fraction; i += block_sz/translation_num) { if((init_pixel + i) >= image_size) break; int x,y,z,xy; if(DATA3D) { z = floorfracf( init_pixel + i , projector.imgX*projector.imgY); //TODO optimize index extraction. xy = ( init_pixel + i ) % (projector.imgX*projector.imgY); x = xy % projector.imgX; y = floorfracf( xy, projector.imgX); if (z > projector.maxR) z -= projector.imgZ; } else { x = ( init_pixel + i ) % projector.imgX; y = floorfracf( init_pixel + i , projector.imgX); } if (y > projector.maxR) y -= projector.imgY; XFLOAT real, imag; if(DATA3D) translatePixel(x, y, z, tx, ty, tz, s_real[i + init_pixel % block_sz], s_imag[i + init_pixel % block_sz], real, imag); else translatePixel(x, y, tx, ty, s_real[i + init_pixel % block_sz], s_imag[i + init_pixel % block_sz], real, imag); #pragma unroll for (int j = 0; j < eulers_per_block; j ++) { XFLOAT diff_real = s_ref_real[eulers_per_block * i + j] - real; XFLOAT diff_imag = s_ref_imag[eulers_per_block * i + j] - imag; diff2s[j] += (diff_real * diff_real + diff_imag * diff_imag) * s_corr[i + init_pixel % block_sz]; } } } //Set global #pragma unroll for (int i = 0; i < eulers_per_block; i ++) cuda_atomic_add(&g_diff2s[(blockIdx.x * eulers_per_block + i) * translation_num + tid % translation_num], diff2s[i]); } template __global__ void cuda_kernel_diff2_fine( XFLOAT *g_eulers, XFLOAT *g_imgs_real, XFLOAT *g_imgs_imag, XFLOAT *trans_x, XFLOAT *trans_y, XFLOAT *trans_z, CudaProjectorKernel projector, XFLOAT *g_corr_img, XFLOAT *g_diff2s, unsigned image_size, XFLOAT sum_init, unsigned long orientation_num, unsigned long translation_num, unsigned long todo_blocks, unsigned long *d_rot_idx, unsigned long *d_trans_idx, unsigned long *d_job_idx, unsigned long *d_job_num ) { unsigned long bid = blockIdx.x; unsigned long tid = threadIdx.x; // // Specialize BlockReduce for a 1D block of 128 threads on type XFLOAT // typedef cub::BlockReduce BlockReduce; // // Allocate shared memory for BlockReduce // __shared__ typename BlockReduce::TempStorage temp_storage; unsigned long pixel; XFLOAT ref_real, ref_imag, shifted_real, shifted_imag, diff_real, diff_imag; __shared__ XFLOAT s[block_sz*chunk_sz]; //We MAY have to do up to chunk_sz translations in each block __shared__ XFLOAT s_outs[chunk_sz]; // inside the padded 2D orientation gri // if( bid < todo_blocks ) // we only need to make { unsigned trans_num = (unsigned)d_job_num[bid]; //how many transes we have for this rot for (int itrans=0; itrans projector.maxR) { if (z >= projector.imgZ - projector.maxR) z = z - projector.imgZ; else x = projector.maxR; } } else { x = pixel % projector.imgX; y = floorfracf( pixel , projector.imgX); } if (y > projector.maxR) { if (y >= projector.imgY - projector.maxR) y = y - projector.imgY; else x = projector.maxR; } if(DATA3D) projector.project3Dmodel( x,y,z, __ldg(&g_eulers[ix*9 ]), __ldg(&g_eulers[ix*9+1]), __ldg(&g_eulers[ix*9+2]), __ldg(&g_eulers[ix*9+3]), __ldg(&g_eulers[ix*9+4]), __ldg(&g_eulers[ix*9+5]), __ldg(&g_eulers[ix*9+6]), __ldg(&g_eulers[ix*9+7]), __ldg(&g_eulers[ix*9+8]), ref_real, ref_imag); else if(REF3D) projector.project3Dmodel( x,y, __ldg(&g_eulers[ix*9 ]), __ldg(&g_eulers[ix*9+1]), __ldg(&g_eulers[ix*9+3]), __ldg(&g_eulers[ix*9+4]), __ldg(&g_eulers[ix*9+6]), __ldg(&g_eulers[ix*9+7]), ref_real, ref_imag); else projector.project2Dmodel( x,y, __ldg(&g_eulers[ix*9 ]), __ldg(&g_eulers[ix*9+1]), __ldg(&g_eulers[ix*9+3]), __ldg(&g_eulers[ix*9+4]), ref_real, ref_imag); for (int itrans=0; itrans0; j/=2) { if(tid __global__ void cuda_kernel_diff2_CC_coarse( XFLOAT *g_eulers, XFLOAT *g_imgs_real, XFLOAT *g_imgs_imag, XFLOAT *g_trans_x, XFLOAT *g_trans_y, XFLOAT *g_trans_z, CudaProjectorKernel projector, XFLOAT *g_corr_img, XFLOAT *g_diff2s, unsigned translation_num, int image_size, XFLOAT exp_local_sqrtXi2 ) { int iorient = blockIdx.x; int itrans = blockIdx.y; int tid = threadIdx.x; __shared__ XFLOAT s_weight[block_sz]; s_weight[tid] = (XFLOAT)0.0; __shared__ XFLOAT s_norm[block_sz]; s_norm[tid] = (XFLOAT)0.0; XFLOAT real, imag, ref_real, ref_imag; XFLOAT e0,e1,e2,e3,e4,e5,e6,e7,e8; e0 = __ldg(&g_eulers[iorient*9 ]); e1 = __ldg(&g_eulers[iorient*9+1]); e2 = __ldg(&g_eulers[iorient*9+2]); e3 = __ldg(&g_eulers[iorient*9+3]); e4 = __ldg(&g_eulers[iorient*9+4]); e5 = __ldg(&g_eulers[iorient*9+5]); e6 = __ldg(&g_eulers[iorient*9+6]); e7 = __ldg(&g_eulers[iorient*9+7]); e8 = __ldg(&g_eulers[iorient*9+8]); __syncthreads(); unsigned pixel_pass_num( ceilfracf(image_size,block_sz) ); for (unsigned pass = 0; pass < pixel_pass_num; pass++) { unsigned pixel = (pass * block_sz) + tid; if(pixel < image_size) { int x,y,z,xy; if(DATA3D) { z = floorfracf(pixel, projector.imgX*projector.imgY); xy = pixel % (projector.imgX*projector.imgY); x = xy % projector.imgX; y = floorfracf( xy, projector.imgX); if (z > projector.maxR) { if (z >= projector.imgZ - projector.maxR) z = z - projector.imgZ; else x = projector.maxR; } } else { x = pixel % projector.imgX; y = floorfracf( pixel , projector.imgX); } if (y > projector.maxR) { if (y >= projector.imgY - projector.maxR) y = y - projector.imgY; else x = projector.maxR; } if(DATA3D) projector.project3Dmodel( x,y,z, e0,e1,e2,e3,e4,e5,e6,e7,e8, ref_real, ref_imag); else if(REF3D) projector.project3Dmodel( x,y, e0,e1,e3,e4,e6,e7, ref_real, ref_imag); else projector.project2Dmodel( x,y, e0,e1,e3,e4, ref_real, ref_imag); if(DATA3D) translatePixel(x, y, z, g_trans_x[itrans], g_trans_y[itrans], g_trans_z[itrans], g_imgs_real[pixel], g_imgs_imag[pixel], real, imag); else translatePixel(x, y, g_trans_x[itrans], g_trans_y[itrans], g_imgs_real[pixel], g_imgs_imag[pixel], real, imag); s_weight[tid] += (ref_real * real + ref_imag * imag) * __ldg(&g_corr_img[pixel]); s_norm[tid] += (ref_real * ref_real + ref_imag * ref_imag ) * __ldg(&g_corr_img[pixel]); } __syncthreads(); } for(int j=(block_sz/2); j>0; j/=2) { if(tid __global__ void cuda_kernel_diff2_CC_fine( XFLOAT *g_eulers, XFLOAT *g_imgs_real, XFLOAT *g_imgs_imag, XFLOAT *g_trans_x, XFLOAT *g_trans_y, XFLOAT *g_trans_z, CudaProjectorKernel projector, XFLOAT *g_corr_img, XFLOAT *g_diff2s, unsigned image_size, XFLOAT sum_init, XFLOAT exp_local_sqrtXi2, unsigned long orientation_num, unsigned long translation_num, unsigned long todo_blocks, unsigned long *d_rot_idx, unsigned long *d_trans_idx, unsigned long *d_job_idx, unsigned long *d_job_num ) { int bid = blockIdx.y * gridDim.x + blockIdx.x; int tid = threadIdx.x; // // Specialize BlockReduce for a 1D block of 128 threads on type XFLOAT // typedef cub::BlockReduce BlockReduce; // // Allocate shared memory for BlockReduce // __shared__ typename BlockReduce::TempStorage temp_storage; int pixel; XFLOAT ref_real, ref_imag, shifted_real, shifted_imag; __shared__ XFLOAT s[block_sz*chunk_sz]; //We MAY have to do up to chunk_sz translations in each block __shared__ XFLOAT s_cc[block_sz*chunk_sz]; __shared__ XFLOAT s_outs[chunk_sz]; if( bid < todo_blocks ) // we only need to make { unsigned trans_num = d_job_num[bid]; //how many transes we have for this rot for (int itrans=0; itrans projector.maxR) { if (z >= projector.imgZ - projector.maxR) z = z - projector.imgZ; else x = projector.maxR; } } else { x = pixel % projector.imgX; y = floorfracf( pixel , projector.imgX); } if (y > projector.maxR) { if (y >= projector.imgY - projector.maxR) y = y - projector.imgY; else x = projector.maxR; } if(DATA3D) projector.project3Dmodel( x,y,z, __ldg(&g_eulers[ix*9 ]), __ldg(&g_eulers[ix*9+1]), __ldg(&g_eulers[ix*9+2]), __ldg(&g_eulers[ix*9+3]), __ldg(&g_eulers[ix*9+4]), __ldg(&g_eulers[ix*9+5]), __ldg(&g_eulers[ix*9+6]), __ldg(&g_eulers[ix*9+7]), __ldg(&g_eulers[ix*9+8]), ref_real, ref_imag); else if(REF3D) projector.project3Dmodel( x,y, __ldg(&g_eulers[ix*9 ]), __ldg(&g_eulers[ix*9+1]), __ldg(&g_eulers[ix*9+3]), __ldg(&g_eulers[ix*9+4]), __ldg(&g_eulers[ix*9+6]), __ldg(&g_eulers[ix*9+7]), ref_real, ref_imag); else projector.project2Dmodel( x,y, __ldg(&g_eulers[ix*9 ]), __ldg(&g_eulers[ix*9+1]), __ldg(&g_eulers[ix*9+3]), __ldg(&g_eulers[ix*9+4]), ref_real, ref_imag); for (int itrans=0; itrans0; j/=2) { if(tid>y and finite precision // f_itrans = iy % oversamples_trans; XFLOAT prior = g_pdf_orientation[ix] * g_pdf_offset[c_itrans]; // Same for all threads - TODO: should be done once for all trans through warp-parallel execution XFLOAT diff2 = g_weights[pos+itrans] - avg_diff2; // Different for all threads // next line because of numerical precision of exp-function #if defined(CUDA_DOUBLE_PRECISION) if (diff2 > 700.) s_weights[tid] = 0.; else s_weights[tid] = prior * exp(-diff2); #else if (diff2 > 88.) s_weights[tid] = 0.f; else s_weights[tid] = prior * expf(-diff2); #endif // TODO: use tabulated exp function? / Sjors TODO: exp, expf, or __exp in CUDA? /Bjorn // Store the weight g_weights[pos+itrans] = s_weights[tid]; // TODO put in shared mem } } } __global__ void cuda_kernel_softMaskOutsideMap( XFLOAT *vol, long int vol_size, long int xdim, long int ydim, long int zdim, long int xinit, long int yinit, long int zinit, bool do_Mnoise, XFLOAT radius, XFLOAT radius_p, XFLOAT cosine_width ) { int tid = threadIdx.x; // vol.setXmippOrigin(); // sets xinit=xdim , also for y z XFLOAT r, raisedcos; __shared__ XFLOAT img_pixels[SOFTMASK_BLOCK_SIZE]; __shared__ XFLOAT partial_sum[SOFTMASK_BLOCK_SIZE]; __shared__ XFLOAT partial_sum_bg[SOFTMASK_BLOCK_SIZE]; XFLOAT sum_bg_total = (XFLOAT)0.0; long int texel_pass_num = ceilfracf(vol_size,SOFTMASK_BLOCK_SIZE); int texel = tid; partial_sum[tid]=(XFLOAT)0.0; partial_sum_bg[tid]=(XFLOAT)0.0; if (do_Mnoise) { for (int pass = 0; pass < texel_pass_num; pass++, texel+=SOFTMASK_BLOCK_SIZE) // loop the available warps enough to complete all translations for this orientation { XFLOAT x,y,z; if(texel radius_p) { partial_sum[tid] += (XFLOAT)1.0; partial_sum_bg[tid] += img_pixels[tid]; } else { #if defined(CUDA_DOUBLE_PRECISION) raisedcos = 0.5 + 0.5 * cospi( (radius_p - r) / cosine_width ); #else raisedcos = 0.5f + 0.5f * cospif((radius_p - r) / cosine_width ); #endif partial_sum[tid] += raisedcos; partial_sum_bg[tid] += raisedcos * img_pixels[tid]; } } } } __syncthreads(); for(int j=(SOFTMASK_BLOCK_SIZE/2); j>0; j/=2) { if(tid radius_p) img_pixels[tid]=sum_bg_total; else { #if defined(CUDA_DOUBLE_PRECISION) raisedcos = 0.5 + 0.5 * cospi( (radius_p - r) / cosine_width ); #else raisedcos = 0.5f + 0.5f * cospif((radius_p - r) / cosine_width ); #endif img_pixels[tid]= img_pixels[tid]*(1-raisedcos) + sum_bg_total*raisedcos; } vol[texel]=img_pixels[tid]; } } } __global__ void cuda_kernel_softMaskBackgroundValue( XFLOAT *vol, long int vol_size, long int xdim, long int ydim, long int zdim, long int xinit, long int yinit, long int zinit, bool do_Mnoise, XFLOAT radius, XFLOAT radius_p, XFLOAT cosine_width, XFLOAT *g_sum, XFLOAT *g_sum_bg) { int tid = threadIdx.x; int bid = blockIdx.x; // vol.setXmippOrigin(); // sets xinit=xdim , also for y z XFLOAT r, raisedcos; int x,y,z; __shared__ XFLOAT img_pixels[SOFTMASK_BLOCK_SIZE]; __shared__ XFLOAT partial_sum[SOFTMASK_BLOCK_SIZE]; __shared__ XFLOAT partial_sum_bg[SOFTMASK_BLOCK_SIZE]; long int texel_pass_num = ceilfracf(vol_size,SOFTMASK_BLOCK_SIZE*gridDim.x); int texel = bid*SOFTMASK_BLOCK_SIZE*texel_pass_num + tid; partial_sum[tid]=(XFLOAT)0.0; partial_sum_bg[tid]=(XFLOAT)0.0; for (int pass = 0; pass < texel_pass_num; pass++, texel+=SOFTMASK_BLOCK_SIZE) // loop the available warps enough to complete all translations for this orientation { if(texel radius_p) { partial_sum[tid] += (XFLOAT)1.0; partial_sum_bg[tid] += img_pixels[tid]; } else { #if defined(CUDA_DOUBLE_PRECISION) raisedcos = 0.5 + 0.5 * cospi( (radius_p - r) / cosine_width ); #else raisedcos = 0.5f + 0.5f * cospif((radius_p - r) / cosine_width ); #endif partial_sum[tid] += raisedcos; partial_sum_bg[tid] += raisedcos * img_pixels[tid]; } } } cuda_atomic_add(&g_sum[tid] , partial_sum[tid]); cuda_atomic_add(&g_sum_bg[tid], partial_sum_bg[tid]); } __global__ void cuda_kernel_cosineFilter( XFLOAT *vol, long int vol_size, long int xdim, long int ydim, long int zdim, long int xinit, long int yinit, long int zinit, bool do_Mnoise, XFLOAT radius, XFLOAT radius_p, XFLOAT cosine_width, XFLOAT bg_value) { int tid = threadIdx.x; int bid = blockIdx.x; // vol.setXmippOrigin(); // sets xinit=xdim , also for y z XFLOAT r, raisedcos; int x,y,z; __shared__ XFLOAT img_pixels[SOFTMASK_BLOCK_SIZE]; long int texel_pass_num = ceilfracf(vol_size,SOFTMASK_BLOCK_SIZE*gridDim.x); int texel = bid*SOFTMASK_BLOCK_SIZE*texel_pass_num + tid; for (int pass = 0; pass < texel_pass_num; pass++, texel+=SOFTMASK_BLOCK_SIZE) // loop the available warps enough to complete all translations for this orientation { if(texel radius_p) img_pixels[tid]=bg_value; else { #if defined(CUDA_DOUBLE_PRECISION) raisedcos = 0.5 + 0.5 * cospi( (radius_p - r) / cosine_width ); #else raisedcos = 0.5f + 0.5f * cospif((radius_p - r) / cosine_width ); #endif img_pixels[tid]= img_pixels[tid]*(1-raisedcos) + bg_value*raisedcos; } vol[texel]=img_pixels[tid]; } } } __global__ void cuda_kernel_translate2D( XFLOAT * g_image_in, XFLOAT * g_image_out, int image_size, int xdim, int ydim, int dx, int dy) { int tid = threadIdx.x; int bid = blockIdx.x; int x,y,xp,yp; int pixel=tid + bid*BLOCK_SIZE; int new_pixel; if(pixel=0 && xp>=0 && yp=0 && new_pixel=0 && yp>=0 && xp>=0 && zp=0 && new_voxel= ydim) yp -= ydim; int xp = x + xshift; if (xp < 0) xp += xdim; else if (xp >= xdim) xp -= xdim; int n_pixel = yp*xdim + xp; buffer[tid] = img_in[image_offset + n_pixel]; img_in[image_offset + n_pixel] = img_in[image_offset + pixel]; img_in[image_offset + pixel] = buffer[tid]; } // } } __global__ void cuda_kernel_centerFFT_3D(XFLOAT *img_in, int image_size, int xdim, int ydim, int zdim, int xshift, int yshift, int zshift) { __shared__ XFLOAT buffer[CFTT_BLOCK_SIZE]; int tid = threadIdx.x; int pixel = threadIdx.x + blockIdx.x*CFTT_BLOCK_SIZE; long int image_offset = image_size*blockIdx.y; int xydim = xdim*ydim; if(pixel<(image_size/2)) { int z = floorf((XFLOAT)pixel/(XFLOAT)(xydim)); int xy = pixel % xydim; int y = floorf((XFLOAT)xy/(XFLOAT)xdim); int x = xy % xdim; int yp = y + yshift; if (yp < 0) yp += ydim; else if (yp >= ydim) yp -= ydim; int xp = x + xshift; if (xp < 0) xp += xdim; else if (xp >= xdim) xp -= xdim; int zp = z + zshift; if (zp < 0) zp += zdim; else if (zp >= zdim) zp -= zdim; int n_pixel = zp*xydim + yp*xdim + xp; buffer[tid] = img_in[image_offset + n_pixel]; img_in[image_offset + n_pixel] = img_in[image_offset + pixel]; img_in[image_offset + pixel] = buffer[tid]; } } __global__ void cuda_kernel_probRatio( XFLOAT *d_Mccf, XFLOAT *d_Mpsi, XFLOAT *d_Maux, XFLOAT *d_Mmean, XFLOAT *d_Mstddev, int image_size, XFLOAT normfft, XFLOAT sum_ref_under_circ_mask, XFLOAT sum_ref2_under_circ_mask, XFLOAT expected_Pratio, int NpsiThisBatch, int startPsi, int totalPsis) { /* PLAN TO: * * 1) Pre-filter * d_Mstddev[i] = 1 / (2*d_Mstddev[i]) ( if d_Mstddev[pixel] > 1E-10 ) * d_Mstddev[i] = 1 ( else ) * * 2) Set * sum_ref2_under_circ_mask /= 2. * * 3) Total expression becomes * diff2 = ( exp(k) - 1.f ) / (expected_Pratio - 1.f) * where * k = (normfft * d_Maux[pixel] + d_Mmean[pixel] * sum_ref_under_circ_mask)*d_Mstddev[i] + sum_ref2_under_circ_mask * */ int pixel = threadIdx.x + blockIdx.x*(int)PROBRATIO_BLOCK_SIZE; if(pixel (XFLOAT)1E-10) diff2 *= d_Mstddev[pixel]; diff2 += sum_ref2_under_circ_mask; #if defined(CUDA_DOUBLE_PRECISION) diff2 = exp(-diff2 / 2.); // exponentiate to reflect the Gaussian error model. sigma=1 after normalization, 0.4=1/sqrt(2pi) #else diff2 = expf(-diff2 / 2.f); #endif // Store fraction of (1 - probability-ratio) wrt (1 - expected Pratio) diff2 = (diff2 - (XFLOAT)1.0) / (expected_Pratio - (XFLOAT)1.0); if (diff2 > Kccf) { Kccf = diff2; Kpsi = (startPsi + psi)*(360/totalPsis); } } d_Mccf[pixel] = Kccf; if (Kpsi >= 0.) d_Mpsi[pixel] = Kpsi; } } __global__ void cuda_kernel_rotateOnly( CUDACOMPLEX *d_Faux, XFLOAT psi, CudaProjectorKernel projector, int startPsi ) { int proj = blockIdx.y; int image_size=projector.imgX*projector.imgY; int pixel = threadIdx.x + blockIdx.x*BLOCK_SIZE; if(pixel projector.maxR) { if (y >= projector.imgY - projector.maxR) y = y - projector.imgY; else x = projector.maxR; } XFLOAT sa, ca; sincos((proj+startPsi)*psi, &sa, &ca); CUDACOMPLEX val; projector.project2Dmodel( x,y, ca, -sa, sa, ca, val.x,val.y); long int out_pixel = proj*image_size + pixel; d_Faux[out_pixel].x =val.x; d_Faux[out_pixel].y =val.y; } } __global__ void cuda_kernel_rotateAndCtf( CUDACOMPLEX *d_Faux, XFLOAT *d_ctf, XFLOAT psi, CudaProjectorKernel projector, int startPsi ) { int proj = blockIdx.y; int image_size=projector.imgX*projector.imgY; int pixel = threadIdx.x + blockIdx.x*BLOCK_SIZE; if(pixel projector.maxR) { if (y >= projector.imgY - projector.maxR) y = y - projector.imgY; else x = projector.maxR; } XFLOAT sa, ca; sincos((proj+startPsi)*psi, &sa, &ca); CUDACOMPLEX val; projector.project2Dmodel( x,y, ca, -sa, sa, ca, val.x,val.y); long int out_pixel = proj*image_size + pixel; d_Faux[out_pixel].x =val.x*d_ctf[pixel]; d_Faux[out_pixel].y =val.y*d_ctf[pixel]; } } __global__ void cuda_kernel_convol_A( CUDACOMPLEX *d_A, CUDACOMPLEX *d_B, int image_size) { int pixel = threadIdx.x + blockIdx.x*BLOCK_SIZE; if(pixel 0) Mstddev[pixel] = sqrt(temp); else Mstddev[pixel] = 0; } } __global__ void cuda_kernel_square( XFLOAT *A, int image_size) { int pixel = threadIdx.x + blockIdx.x*BLOCK_SIZE; if(pixel #include #include #include #include "src/gpu_utils/cuda_settings.h" #include "src/gpu_utils/cuda_device_utils.cuh" #include "src/gpu_utils/cuda_projector.cuh" #include "src/gpu_utils/cuda_projector.h" #ifdef CUDA_DOUBLE_PRECISION #define FAILSAFE_PRIOR_MIN_LIM 1e-300 #else #define FAILSAFE_PRIOR_MIN_LIM 1e-30 #endif template __global__ void cuda_kernel_exponentiate_weights_coarse( XFLOAT *g_pdf_orientation, XFLOAT *g_pdf_offset, weights_t *g_Mweight, XFLOAT avg_diff2, XFLOAT min_diff2, int nr_coarse_orient, int nr_coarse_trans) { // blockid int bid = blockIdx.x; int cid = blockIdx.y; //threadid int tid = threadIdx.x; int pos, iorient = bid*SUMW_BLOCK_SIZE+tid; weights_t weight; if(iorient 700.) weight = 0.; else weight *= exp(-diff2); #else if (diff2 > 88.) weight = 0.; else weight *= expf(-diff2); #endif // TODO: use tabulated exp function? / Sjors TODO: exp, expf, or __exp in CUDA? /Bjorn } // Store the weight g_Mweight[pos] = weight; // TODO put in shared mem } } } template __global__ void cuda_kernel_collect2jobs( XFLOAT *g_oo_otrans_x, // otrans-size -> make const XFLOAT *g_oo_otrans_y, // otrans-size -> make const XFLOAT *g_oo_otrans_z, // otrans-size -> make const XFLOAT *g_myp_oo_otrans_x2y2z2, // otrans-size -> make const XFLOAT *g_i_weights, XFLOAT op_significant_weight, // TODO Put in const XFLOAT op_sum_weight, // TODO Put in const int coarse_trans, int oversamples_trans, int oversamples_orient, int oversamples, bool do_ignore_pdf_direction, XFLOAT *g_o_weights, XFLOAT *g_thr_wsum_prior_offsetx_class, XFLOAT *g_thr_wsum_prior_offsety_class, XFLOAT *g_thr_wsum_prior_offsetz_class, XFLOAT *g_thr_wsum_sigma2_offset, unsigned long *d_rot_idx, unsigned long *d_trans_idx, unsigned long *d_job_idx, unsigned long *d_job_num ) { // blockid int bid = blockIdx.x; //threadid int tid = threadIdx.x; extern __shared__ XFLOAT buffer[]; XFLOAT * s_o_weights = &buffer[ 0]; XFLOAT * s_thr_wsum_sigma2_offset = &buffer[ SUMW_BLOCK_SIZE]; XFLOAT * s_thr_wsum_prior_offsetx_class = &buffer[2*SUMW_BLOCK_SIZE]; XFLOAT * s_thr_wsum_prior_offsety_class = &buffer[3*SUMW_BLOCK_SIZE]; XFLOAT * s_thr_wsum_prior_offsetz_class(0); if(DATA3D) s_thr_wsum_prior_offsetz_class = &buffer[4*SUMW_BLOCK_SIZE]; s_o_weights[tid] = (XFLOAT)0.0; s_thr_wsum_sigma2_offset[tid] = (XFLOAT)0.0; s_thr_wsum_prior_offsetx_class[tid] = (XFLOAT)0.0; s_thr_wsum_prior_offsety_class[tid] = (XFLOAT)0.0; if(DATA3D) s_thr_wsum_prior_offsety_class[tid] = (XFLOAT)0.0; long int pos = d_job_idx[bid]; int job_size = d_job_num[bid]; pos += tid; // pos is updated to be thread-resolved int pass_num = ceilfracf(job_size,SUMW_BLOCK_SIZE); __syncthreads(); for (int pass = 0; pass < pass_num; pass++, pos+=SUMW_BLOCK_SIZE) // loop the available warps enough to complete all translations for this orientation { if ((pass*SUMW_BLOCK_SIZE+tid)= op_significant_weight ) //TODO Might be slow (divergent threads) weight /= op_sum_weight; else weight = (XFLOAT)0.0; s_o_weights[tid] += weight; s_thr_wsum_sigma2_offset[tid] += weight * g_myp_oo_otrans_x2y2z2[iy]; s_thr_wsum_prior_offsetx_class[tid] += weight * g_oo_otrans_x[iy]; s_thr_wsum_prior_offsety_class[tid] += weight * g_oo_otrans_y[iy]; if(DATA3D) s_thr_wsum_prior_offsetz_class[tid] += weight * g_oo_otrans_z[iy]; } } __syncthreads(); // Reduction of all treanslations this orientation for(int j=(SUMW_BLOCK_SIZE/2); j>0; j/=2) { if(tid __global__ void cuda_kernel_cast( T1 *IN, T2 *OUT, int size) { int pixel = threadIdx.x + blockIdx.x*BLOCK_SIZE; if(pixel __global__ void cuda_kernel_frequencyPass( CUDACOMPLEX *A, long int ori_size, size_t Xdim, size_t Ydim, size_t Zdim, XFLOAT edge_low, XFLOAT edge_width, XFLOAT edge_high, XFLOAT angpix, int image_size) { int texel = threadIdx.x + blockIdx.x*BLOCK_SIZE; int z = texel / (Xdim*Ydim); int xy = (texel - z*Xdim*Ydim); int y = xy / Xdim; int xp = xy - y*Xdim; int zp = ( z lows are dead { A[texel].x = 0.; A[texel].y = 0.; } else if (res < edge_high) //highpass => medium lows are almost dead { XFLOAT mul = 0.5 - 0.5 * cos( PI * (res-edge_low)/edge_width); A[texel].x *= mul; A[texel].y *= mul; } } else //lowpass { if (res > edge_high) //lowpass => highs are dead { A[texel].x = 0.; A[texel].y = 0.; } else if (res > edge_low) //lowpass => medium highs are almost dead { XFLOAT mul = 0.5 + 0.5 * cos( PI * (res-edge_low)/edge_width); A[texel].x *= mul; A[texel].y *= mul; } } } } template __global__ void cuda_kernel_powerClass( CUDACOMPLEX * g_image, XFLOAT * g_spectrum, int image_size, int spectrum_size, int xdim, int ydim, int zdim, int res_limit, XFLOAT * g_highres_Xi2) { int tid = threadIdx.x; int bid = blockIdx.x; XFLOAT normFaux; __shared__ XFLOAT s_highres_Xi2[POWERCLASS_BLOCK_SIZE]; s_highres_Xi2[tid] = (XFLOAT)0.; int x,y,xy,d; int xydim = xdim*ydim; int voxel=tid + bid*POWERCLASS_BLOCK_SIZE; bool coords_in_range(true); if(voxel0.f) && (ires=res_limit) s_highres_Xi2[tid] = normFaux; } } // Reduce the higres_Xi2-values for all threads. (I tried a straight atomic-write: for 128 threads it was ~3x slower) __syncthreads(); for(int j=(POWERCLASS_BLOCK_SIZE/2); j>0.f; j/=2) { if(tid #include #include #include #include "src/gpu_utils/cuda_projector.cuh" #include "src/gpu_utils/cuda_settings.h" #include "src/gpu_utils/cuda_device_utils.cuh" template __global__ void cuda_kernel_wavg( XFLOAT *g_eulers, CudaProjectorKernel projector, unsigned image_size, unsigned long orientation_num, XFLOAT *g_img_real, XFLOAT *g_img_imag, XFLOAT *g_trans_x, XFLOAT *g_trans_y, XFLOAT *g_trans_z, XFLOAT* g_weights, XFLOAT* g_ctfs, XFLOAT *g_wdiff2s_parts, XFLOAT *g_wdiff2s_AA, XFLOAT *g_wdiff2s_XA, unsigned long translation_num, XFLOAT weight_norm, XFLOAT significant_weight, XFLOAT part_scale) { XFLOAT ref_real, ref_imag, img_real, img_imag, trans_real, trans_imag; int bid = blockIdx.x; //block ID int tid = threadIdx.x; extern __shared__ XFLOAT buffer[]; unsigned pass_num(ceilfracf(image_size,block_sz)),pixel; XFLOAT * s_wdiff2s_parts = &buffer[0]; XFLOAT * s_sumXA = &buffer[block_sz]; XFLOAT * s_sumA2 = &buffer[2*block_sz]; XFLOAT * s_eulers = &buffer[3*block_sz]; if (tid < 9) s_eulers[tid] = g_eulers[bid*9+tid]; __syncthreads(); for (unsigned pass = 0; pass < pass_num; pass++) // finish a reference proj in each block { s_wdiff2s_parts[tid] = 0.0f; s_sumXA[tid] = 0.0f; s_sumA2[tid] = 0.0f; pixel = pass * block_sz + tid; if(pixel projector.maxR) { if (z >= projector.imgZ - projector.maxR) z = z - projector.imgZ; else x = projector.maxR; } } else { x = pixel % projector.imgX; y = floorfracf( pixel , projector.imgX); } if (y > projector.maxR) { if (y >= projector.imgY - projector.maxR) y = y - projector.imgY; else x = projector.maxR; } if(DATA3D) projector.project3Dmodel( x,y,z, s_eulers[0], s_eulers[1], s_eulers[2], s_eulers[3], s_eulers[4], s_eulers[5], s_eulers[6], s_eulers[7], s_eulers[8], ref_real, ref_imag); else if(REF3D) projector.project3Dmodel( x,y, s_eulers[0], s_eulers[1], s_eulers[3], s_eulers[4], s_eulers[6], s_eulers[7], ref_real, ref_imag); else projector.project2Dmodel( x,y, s_eulers[0], s_eulers[1], s_eulers[3], s_eulers[4], ref_real, ref_imag); if (REFCTF) { ref_real *= __ldg(&g_ctfs[pixel]); ref_imag *= __ldg(&g_ctfs[pixel]); } else { ref_real *= part_scale; ref_imag *= part_scale; } img_real = __ldg(&g_img_real[pixel]); img_imag = __ldg(&g_img_imag[pixel]); for (unsigned long itrans = 0; itrans < translation_num; itrans++) { XFLOAT weight = __ldg(&g_weights[bid * translation_num + itrans]); if (weight >= significant_weight) { weight /= weight_norm; if(DATA3D) translatePixel(x, y, z, g_trans_x[itrans], g_trans_y[itrans], g_trans_z[itrans], img_real, img_imag, trans_real, trans_imag); else translatePixel(x, y, g_trans_x[itrans], g_trans_y[itrans], img_real, img_imag, trans_real, trans_imag); XFLOAT diff_real = ref_real - trans_real; XFLOAT diff_imag = ref_imag - trans_imag; s_wdiff2s_parts[tid] += weight * (diff_real*diff_real + diff_imag*diff_imag); s_sumXA[tid] += weight * ( ref_real * trans_real + ref_imag * trans_imag); s_sumA2[tid] += weight * ( ref_real*ref_real + ref_imag*ref_imag ); } } cuda_atomic_add(&g_wdiff2s_XA[pixel], s_sumXA[tid]); cuda_atomic_add(&g_wdiff2s_AA[pixel], s_sumA2[tid]); cuda_atomic_add(&g_wdiff2s_parts[pixel], s_wdiff2s_parts[tid]); } } } #endif /* CUDA_WAVG_KERNEL_CUH_ */ relion-3.1.3/src/gpu_utils/cuda_mem_utils.h000066400000000000000000001006131411340063500207510ustar00rootroot00000000000000#ifndef CUDA_DEVICE_MEM_UTILS_H_ #define CUDA_DEVICE_MEM_UTILS_H_ #ifdef CUDA #include "src/gpu_utils/cuda_settings.h" #include #endif #include #include #include #include #include #include #include #include #include "src/macros.h" #include "src/error.h" #include "src/parallel.h" #ifdef CUSTOM_ALLOCATOR_MEMGUARD #include #include #endif #ifdef DUMP_CUSTOM_ALLOCATOR_ACTIVITY #define CUSTOM_ALLOCATOR_REGION_NAME( name ) (fprintf(stderr, "\n%s", name)) #else #define CUSTOM_ALLOCATOR_REGION_NAME( name ) //Do nothing #endif #ifdef LAUNCH_CHECK #define LAUNCH_HANDLE_ERROR( err ) (LaunchHandleError( err, __FILE__, __LINE__ )) #define LAUNCH_PRIVATE_ERROR(func, status) { \ (status) = (func); \ LAUNCH_HANDLE_ERROR(status); \ } #else #define LAUNCH_HANDLE_ERROR( err ) (err) //Do nothing #define LAUNCH_PRIVATE_ERROR( err ) (err) //Do nothing #endif #ifdef DEBUG_CUDA #define DEBUG_HANDLE_ERROR( err ) (HandleError( err, __FILE__, __LINE__ )) #define DEBUG_PRIVATE_ERROR(func, status) { \ (status) = (func); \ DEBUG_HANDLE_ERROR(status); \ } #else #define DEBUG_HANDLE_ERROR( err ) (err) //Do nothing #define DEBUG_PRIVATE_ERROR( err ) (err) //Do nothing #endif #define HANDLE_ERROR( err ) (HandleError( err, __FILE__, __LINE__ )) #define PRIVATE_ERROR(func, status) { \ (status) = (func); \ HANDLE_ERROR(status); \ } static void HandleError( cudaError_t err, const char *file, int line ) { if (err != cudaSuccess) { fprintf(stderr, "ERROR: %s in %s at line %d (error-code %d)\n", cudaGetErrorString( err ), file, line, err ); fflush(stdout); raise(SIGSEGV); } //#ifdef DEBUG_CUDA // cudaError_t peek = cudaPeekAtLastError(); // if (peek != cudaSuccess) // { // printf( "DEBUG_ERROR: %s in %s at line %d (error-code %d)\n", // cudaGetErrorString( peek ), file, line, err ); // fflush(stdout); // raise(SIGSEGV); // } //#endif } #ifdef LAUNCH_CHECK static void LaunchHandleError( cudaError_t err, const char *file, int line ) { if (err != cudaSuccess) { printf( "KERNEL_ERROR: %s in %s at line %d (error-code %d)\n", cudaGetErrorString( err ), file, line, err ); fflush(stdout); CRITICAL(ERRGPUKERN); } } #endif /** * Print cuda device memory info */ static void cudaPrintMemInfo() { size_t free; size_t total; DEBUG_HANDLE_ERROR(cudaMemGetInfo( &free, &total )); float free_hr(free/(1024.*1024.)); float total_hr(total/(1024.*1024.)); printf( "free %.2fMiB, total %.2fMiB, used %.2fMiB\n", free_hr, total_hr, total_hr - free_hr); } template< typename T> static inline void cudaCpyHostToDevice( T *h_ptr, T *d_ptr, size_t size) { DEBUG_HANDLE_ERROR(cudaMemcpy( d_ptr, h_ptr, size * sizeof(T), cudaMemcpyHostToDevice)); }; template< typename T> static inline void cudaCpyHostToDevice( T *h_ptr, T *d_ptr, size_t size, cudaStream_t &stream) { DEBUG_HANDLE_ERROR(cudaMemcpyAsync( d_ptr, h_ptr, size * sizeof(T), cudaMemcpyHostToDevice, stream)); }; template< typename T> static inline void cudaCpyDeviceToHost( T *d_ptr, T *h_ptr, size_t size) { DEBUG_HANDLE_ERROR(cudaMemcpy( h_ptr, d_ptr, size * sizeof(T), cudaMemcpyDeviceToHost)); }; template< typename T> static inline void cudaCpyDeviceToHost( T *d_ptr, T *h_ptr, size_t size, cudaStream_t &stream) { DEBUG_HANDLE_ERROR(cudaMemcpyAsync( h_ptr, d_ptr, size * sizeof(T), cudaMemcpyDeviceToHost, stream)); }; template< typename T> static inline void cudaCpyDeviceToDevice( T *src, T *des, size_t size, cudaStream_t &stream) { DEBUG_HANDLE_ERROR(cudaMemcpyAsync( des, src, size * sizeof(T), cudaMemcpyDeviceToDevice, stream)); }; template< typename T> static inline void cudaMemInit( T *ptr, T value, size_t size) { DEBUG_HANDLE_ERROR(cudaMemset( ptr, value, size * sizeof(T))); }; template< typename T> static inline void cudaMemInit( T *ptr, T value, size_t size, cudaStream_t &stream) { DEBUG_HANDLE_ERROR(cudaMemsetAsync( ptr, value, size * sizeof(T), stream)); }; class CudaCustomAllocator { typedef unsigned char BYTE; const static unsigned GUARD_SIZE = 4; const static BYTE GUARD_VALUE = 145; const static int ALLOC_RETRY = 500; public: class Alloc { friend class CudaCustomAllocator; private: Alloc *prev, *next; BYTE *ptr; size_t size; bool free; cudaEvent_t readyEvent; //Event record used for auto free bool freeWhenReady; #ifdef CUSTOM_ALLOCATOR_MEMGUARD BYTE *guardPtr; void *backtrace[20]; size_t backtraceSize; #endif Alloc(): prev(NULL), next(NULL), ptr(NULL), size(0), free(0), readyEvent(0), freeWhenReady(false) {} ~Alloc() { prev = NULL; next = NULL; ptr = NULL; if (readyEvent != 0) DEBUG_HANDLE_ERROR(cudaEventDestroy(readyEvent)); } public: inline BYTE *getPtr() { return ptr; } inline size_t getSize() { return size; } inline bool isFree() { return free; } inline cudaEvent_t getReadyEvent() { return readyEvent; } inline void markReadyEvent(cudaStream_t stream = 0) { //TODO add a debug warning if event already set DEBUG_HANDLE_ERROR(cudaEventCreate(&readyEvent)); DEBUG_HANDLE_ERROR(cudaEventRecord(readyEvent, stream)); } inline void doFreeWhenReady() { freeWhenReady = true; } }; private: Alloc *first; size_t totalSize; size_t alignmentSize; bool cache; pthread_mutex_t mutex; //Look for the first suited space inline Alloc *_getFirstSuitedFree(size_t size) { Alloc *a = first; //If not the last and too small or not free go to next allocation region while (a != NULL && ( a->size <= size || ! a->free ) ) a = a->next; return a; } //Free allocs with recorded ready events inline bool _syncReadyEvents() { bool somethingReady(false); Alloc *a = first; while (a != NULL) { if (! a->free && a->freeWhenReady && a->readyEvent != 0) { DEBUG_HANDLE_ERROR(cudaEventSynchronize(a->readyEvent)); somethingReady = true; } a = a->next; } return somethingReady; } //Free allocs with recorded ready events inline bool _freeReadyAllocs() { bool somethingFreed(false); Alloc *next = first; Alloc *curr; while (next != NULL) { curr = next; next = curr->next; if (! curr->free && curr->freeWhenReady && curr->readyEvent != 0) { cudaError_t e = cudaEventQuery(curr->readyEvent); if (e == cudaSuccess) { _free(curr); next = first; //List modified, restart somethingFreed = true; } else if (e != cudaErrorNotReady) { _printState(); HandleError( e, __FILE__, __LINE__ ); } } } return somethingFreed; } inline size_t _getTotalFreeSpace() { if (cache) { size_t total = 0; Alloc *a = first; while (a != NULL) { if (a->free) total += a->size; a = a->next; } return total; } else { size_t free, total; DEBUG_HANDLE_ERROR(cudaMemGetInfo( &free, &total )); return free; } } inline size_t _getTotalUsedSpace() { size_t total = 0; Alloc *a = first; while (a != NULL) { if (!a->free) total += a->size; a = a->next; } return total; } size_t _getNumberOfAllocs() { size_t total = 0; Alloc *a = first; while (a != NULL) { if (!a->free) total ++; a = a->next; } return total; } inline size_t _getLargestContinuousFreeSpace() { if (cache) { size_t largest = 0; Alloc *a = first; while (a != NULL) { if (a->free && a->size > largest) largest = a->size; a = a->next; } return largest; } else return _getTotalFreeSpace(); } inline void _printState() { size_t total = 0; Alloc *a = first; while (a != NULL) { total += a->size; if (a->free) printf("[%luB] ", (unsigned long) a->size); else if (a->freeWhenReady) printf("<%luB> ", (unsigned long) a->size); else printf("(%luB) ", (unsigned long) a->size); a = a->next; } printf("= %luB\n", (unsigned long) total); fflush(stdout); } inline void _free(Alloc* a) { // printf("free: %u ", a->size); // _printState(); #ifdef CUSTOM_ALLOCATOR_MEMGUARD size_t guardCount = a->size - (a->guardPtr - a->ptr); BYTE *guards = new BYTE[guardCount]; cudaStream_t stream = 0; cudaCpyDeviceToHost( a->guardPtr, guards, guardCount, stream); DEBUG_HANDLE_ERROR(cudaStreamSynchronize(stream)); for (int i = 0; i < guardCount; i ++) if (guards[i] != GUARD_VALUE) { fprintf (stderr, "ERROR: CORRUPTED BYTE GUARDS DETECTED\n"); char ** messages = backtrace_symbols(a->backtrace, a->backtraceSize); // skip first stack frame (points here) for (int i = 1; i < a->backtraceSize && messages != NULL; ++i) { char *mangled_name = 0, *offset_begin = 0, *offset_end = 0; // find parantheses and +address offset surrounding mangled name for (char *p = messages[i]; *p; ++p) { if (*p == '(') { mangled_name = p; } else if (*p == '+') { offset_begin = p; } else if (*p == ')') { offset_end = p; break; } } // if the line could be processed, attempt to demangle the symbol if (mangled_name && offset_begin && offset_end && mangled_name < offset_begin) { *mangled_name++ = '\0'; *offset_begin++ = '\0'; *offset_end++ = '\0'; int status; char * real_name = abi::__cxa_demangle(mangled_name, 0, 0, &status); // if demangling is successful, output the demangled function name if (status == 0) { std::cerr << "[bt]: (" << i << ") " << messages[i] << " : " << real_name << "+" << offset_begin << offset_end << std::endl; } // otherwise, output the mangled function name else { std::cerr << "[bt]: (" << i << ") " << messages[i] << " : " << mangled_name << "+" << offset_begin << offset_end << std::endl; } // free(real_name); } // otherwise, print the whole line else { std::cerr << "[bt]: (" << i << ") " << messages[i] << std::endl; } } std::cerr << std::endl; // free(messages); exit(EXIT_FAILURE); } delete[] guards; #endif a->free = true; if (cache) { //Previous neighbor is free, concatenate if ( a->prev != NULL && a->prev->free) { //Resize and set pointer a->size += a->prev->size; a->ptr = a->prev->ptr; //Fetch secondary neighbor Alloc *ppL = a->prev->prev; //Remove primary neighbor if (ppL == NULL) //If the previous is first in chain first = a; else ppL->next = a; delete a->prev; //Attach secondary neighbor a->prev = ppL; } //Next neighbor is free, concatenate if ( a->next != NULL && a->next->free) { //Resize and set pointer a->size += a->next->size; //Fetch secondary neighbor Alloc *nnL = a->next->next; //Remove primary neighbor if (nnL != NULL) nnL->prev = a; delete a->next; //Attach secondary neighbor a->next = nnL; } } else { DEBUG_HANDLE_ERROR(cudaFree( a->ptr )); a->ptr = NULL; if ( a->prev != NULL) a->prev->next = a->next; else first = a->next; //This is the first link if ( a->next != NULL) a->next->prev = a->prev; delete a; } }; void _setup() { first = new Alloc(); first->prev = NULL; first->next = NULL; first->size = totalSize; first->free = true; if (totalSize > 0) { HANDLE_ERROR(cudaMalloc( (void**) &(first->ptr), totalSize)); cache = true; } else cache = false; } void _clear() { if (first->ptr != NULL) DEBUG_HANDLE_ERROR(cudaFree( first->ptr )); first->ptr = NULL; Alloc *a = first, *nL; while (a != NULL) { nL = a->next; delete a; a = nL; } } public: CudaCustomAllocator(size_t size, size_t alignmentSize): totalSize(size), alignmentSize(alignmentSize), first(0), cache(true) { _setup(); int mutex_error = pthread_mutex_init(&mutex, NULL); if (mutex_error != 0) { printf("ERROR: Mutex could not be created for alloactor. CODE: %d.\n", mutex_error); fflush(stdout); CRITICAL(ERR_CAMUX); } } void resize(size_t size) { Lock ml(&mutex); _clear(); totalSize = size; _setup(); } inline Alloc* alloc(size_t requestedSize) { Lock ml(&mutex); _freeReadyAllocs(); // printf("alloc: %u ", size); // _printState(); size_t size = requestedSize; #ifdef CUSTOM_ALLOCATOR_MEMGUARD //Ad byte-guards size += alignmentSize * GUARD_SIZE; //Ad an integer multiple of alignment size as byte guard size #endif #ifdef DUMP_CUSTOM_ALLOCATOR_ACTIVITY fprintf(stderr, " %.4f", 100.*(float)size/(float)totalSize); #endif Alloc *newAlloc(NULL); if (cache) { size = alignmentSize*ceilf( (float)size / (float)alignmentSize) ; //To prevent miss-aligned memory Alloc *curAlloc = _getFirstSuitedFree(size); //If out of memory if (curAlloc == NULL) { #ifdef DEBUG_CUDA size_t spaceDiff = _getTotalFreeSpace(); #endif //Try to recover before throwing error for (int i = 0; i <= ALLOC_RETRY; i ++) { if (_syncReadyEvents() && _freeReadyAllocs()) { curAlloc = _getFirstSuitedFree(size); //Is there space now? if (curAlloc != NULL) break; //Success } else usleep(10000); // 10 ms, Order of magnitude of largest kernels } #ifdef DEBUG_CUDA spaceDiff = _getTotalFreeSpace() - spaceDiff; printf("DEBUG_INFO: Out of memory handled by waiting for unfinished tasks, which freed %lu B.\n", spaceDiff); #endif //Did we manage to recover? if (curAlloc == NULL) { printf("ERROR: CudaCustomAllocator out of memory\n [requestedSpace: %lu B]\n [largestContinuousFreeSpace: %lu B]\n [totalFreeSpace: %lu B]\n", (unsigned long) size, (unsigned long) _getLargestContinuousFreeSpace(), (unsigned long) _getTotalFreeSpace()); _printState(); fflush(stdout); CRITICAL(ERRCUDACAOOM); } } if (curAlloc->size == size) { curAlloc->free = false; newAlloc = curAlloc; } else //Or curAlloc->size is smaller than size { //Setup new pointer newAlloc = new Alloc(); newAlloc->next = curAlloc; newAlloc->ptr = curAlloc->ptr; newAlloc->size = size; newAlloc->free = false; //Modify old pointer curAlloc->ptr = &(curAlloc->ptr[size]); curAlloc->size -= size; //Insert new allocation region into chain if(curAlloc->prev == NULL) //If the first allocation region first = newAlloc; else curAlloc->prev->next = newAlloc; newAlloc->prev = curAlloc->prev; newAlloc->next = curAlloc; curAlloc->prev = newAlloc; } } else { newAlloc = new Alloc(); newAlloc->size = size; newAlloc->free = false; DEBUG_HANDLE_ERROR(cudaMalloc( (void**) &(newAlloc->ptr), size)); //Just add to start by replacing first newAlloc->next = first; first->prev = newAlloc; first = newAlloc; } #ifdef CUSTOM_ALLOCATOR_MEMGUARD newAlloc->backtraceSize = backtrace(newAlloc->backtrace, 20); newAlloc->guardPtr = newAlloc->ptr + requestedSize; cudaStream_t stream = 0; cudaMemInit( newAlloc->guardPtr, GUARD_VALUE, size - requestedSize, stream); //TODO switch to specialized stream DEBUG_HANDLE_ERROR(cudaStreamSynchronize(stream)); #endif return newAlloc; }; ~CudaCustomAllocator() { { Lock ml(&mutex); _clear(); } pthread_mutex_destroy(&mutex); } //Thread-safe wrapper functions inline void free(Alloc* a) { Lock ml(&mutex); _free(a); } inline void syncReadyEvents() { Lock ml(&mutex); _syncReadyEvents(); } inline void freeReadyAllocs() { Lock ml(&mutex); _freeReadyAllocs(); } size_t getTotalFreeSpace() { Lock ml(&mutex); size_t size = _getTotalFreeSpace(); return size; } size_t getTotalUsedSpace() { Lock ml(&mutex); size_t size = _getTotalUsedSpace(); return size; } size_t getNumberOfAllocs() { Lock ml(&mutex); size_t size = _getNumberOfAllocs(); return size; } size_t getLargestContinuousFreeSpace() { Lock ml(&mutex); size_t size = _getLargestContinuousFreeSpace(); return size; } void printState() { Lock ml(&mutex); _printState(); } }; template class CudaGlobalPtr { CudaCustomAllocator *allocator; CudaCustomAllocator::Alloc *alloc; cudaStream_t stream; public: size_t size; //Size used when copying data from and to device T *h_ptr, *d_ptr; //Host and device pointers bool h_do_free, d_do_free; //True if host or device needs to be freed /*====================================================== CONSTRUCTORS WITH ALLOCATORS ======================================================*/ inline CudaGlobalPtr(CudaCustomAllocator *allocator): size(0), h_ptr(0), d_ptr(0), h_do_free(false), d_do_free(false), allocator(allocator), alloc(0), stream(cudaStreamPerThread) {}; inline CudaGlobalPtr(cudaStream_t stream, CudaCustomAllocator *allocator): size(0), h_ptr(0), d_ptr(0), h_do_free(false), d_do_free(false), allocator(allocator), alloc(0), stream(stream) {}; inline CudaGlobalPtr(size_t size, CudaCustomAllocator *allocator): size(size), h_ptr(new T[size]), d_ptr(0), h_do_free(true), d_do_free(false), allocator(allocator), alloc(0), stream(cudaStreamPerThread) {}; inline CudaGlobalPtr(size_t size, cudaStream_t stream, CudaCustomAllocator *allocator): size(size), h_ptr(new T[size]), d_ptr(0), h_do_free(true), d_do_free(false), allocator(allocator), alloc(0), stream(stream) {}; inline CudaGlobalPtr(T * h_start, size_t size, CudaCustomAllocator *allocator): size(size), h_ptr(h_start), d_ptr(0), h_do_free(false), d_do_free(false), allocator(allocator), alloc(0), stream(cudaStreamPerThread) {}; inline CudaGlobalPtr(T * h_start, size_t size, cudaStream_t stream, CudaCustomAllocator *allocator): size(size), h_ptr(h_start), d_ptr(0), h_do_free(false), d_do_free(false), allocator(allocator), alloc(0), stream(cudaStreamPerThread) {}; inline CudaGlobalPtr(T * h_start, T * d_start, size_t size, CudaCustomAllocator *allocator): size(size), h_ptr(h_start), d_ptr(d_start), h_do_free(false), d_do_free(false), allocator(allocator), alloc(0), stream(cudaStreamPerThread) {}; inline CudaGlobalPtr(T * h_start, T * d_start, size_t size, cudaStream_t stream, CudaCustomAllocator *allocator): size(size), h_ptr(h_start), d_ptr(d_start), h_do_free(false), d_do_free(false), allocator(allocator), alloc(0), stream(stream) {}; /*====================================================== CONSTRUCTORS WITHOUT ALLOCATORS ======================================================*/ inline CudaGlobalPtr(): size(0), h_ptr(0), d_ptr(0), h_do_free(false), d_do_free(false), allocator(0), alloc(0), stream(cudaStreamPerThread) {}; inline CudaGlobalPtr(cudaStream_t stream): size(0), h_ptr(0), d_ptr(0), h_do_free(false), d_do_free(false), allocator(0), alloc(0), stream(stream) {}; inline CudaGlobalPtr(size_t size): size(size), h_ptr(new T[size]), d_ptr(0), h_do_free(true), d_do_free(false), allocator(0), alloc(0), stream(cudaStreamPerThread) {}; inline CudaGlobalPtr(size_t size, cudaStream_t stream): size(size), h_ptr(new T[size]), d_ptr(0), h_do_free(true), d_do_free(false), allocator(0), alloc(0), stream(stream) {}; inline CudaGlobalPtr(T * h_start, size_t size): size(size), h_ptr(h_start), d_ptr(0), h_do_free(false), d_do_free(false), allocator(0), alloc(0), stream(0) {}; inline CudaGlobalPtr(T * h_start, size_t size, cudaStream_t stream): size(size), h_ptr(h_start), d_ptr(0), h_do_free(false), d_do_free(false), allocator(0), alloc(0), stream(cudaStreamPerThread) {}; inline CudaGlobalPtr(T * h_start, T * d_start, size_t size): size(size), h_ptr(h_start), d_ptr(d_start), h_do_free(false), d_do_free(false), allocator(0), alloc(0), stream(cudaStreamPerThread) {}; inline CudaGlobalPtr(T * h_start, T * d_start, size_t size, cudaStream_t stream): size(size), h_ptr(h_start), d_ptr(d_start), h_do_free(false), d_do_free(false), allocator(0), alloc(0), stream(stream) {}; /*====================================================== CONSTRUCTORS WITH OTHER GLOBAL POINTERS ======================================================*/ inline CudaGlobalPtr(const CudaGlobalPtr &ptr): size(ptr.size), h_ptr(ptr.h_ptr), d_ptr(ptr.d_ptr), h_do_free(false), d_do_free(false), allocator(ptr.allocator), alloc(0), stream(ptr.stream) {}; inline CudaGlobalPtr(const CudaGlobalPtr &ptr, size_t start_idx, size_t size): size(size), h_ptr(&ptr.h_ptr[start_idx]), d_ptr(&ptr.d_ptr[start_idx]), h_do_free(false), d_do_free(false), allocator(ptr.allocator), alloc(0), stream(ptr.stream) {}; /*====================================================== OTHER STUFF ======================================================*/ CudaCustomAllocator *getAllocator() {return allocator; }; cudaStream_t &getStream() {return stream; }; void setStream(cudaStream_t s) { stream = s; }; void setSize(size_t s) { size = s; }; size_t getSize() { return size; }; void setAllocator(CudaCustomAllocator *a) { free_device_if_set(); allocator = a; }; void markReadyEvent() { #ifdef DEBUG_CUDA if (alloc == NULL) printf("DEBUG_WARNING: markReadyEvent called on null allocation.\n"); #endif alloc->markReadyEvent(stream); } void setDevPtr(T *ptr) { #ifdef DEBUG_CUDA if (d_do_free) printf("DEBUG_WARNING: Device pointer set without freeing the old one.\n"); #endif d_ptr = ptr; }; void setDevPtr(const CudaGlobalPtr &ptr) { #ifdef DEBUG_CUDA if (ptr.d_ptr == NULL) printf("DEBUG_WARNING: Device pointer is not set.\n"); #endif setHstPtr(ptr.d_ptr); }; void setHstPtr(T *ptr) { #ifdef DEBUG_CUDA if (h_do_free) printf("DEBUG_WARNING: Host pointer set without freeing the old one.\n"); #endif h_ptr = ptr; }; void setHstPtr(const CudaGlobalPtr &ptr) { #ifdef DEBUG_CUDA if (ptr.h_ptr == NULL) printf("DEBUG_WARNING: Host pointer is not set.\n"); #endif setHstPtr(ptr.h_ptr); }; /** * Allocate memory on device */ inline void device_alloc() { #ifdef DEBUG_CUDA if(size==0) printf("DEBUG_WARNING: device_alloc called with size == 0"); if (d_do_free) printf("DEBUG_WARNING: Device double allocation.\n"); #endif d_do_free = true; if (CustomAlloc) { alloc = allocator->alloc(size * sizeof(T)); d_ptr = (T*) alloc->getPtr(); } else DEBUG_HANDLE_ERROR(cudaMalloc( (void**) &d_ptr, size * sizeof(T))); } /** * Allocate memory on device with given size */ inline void device_alloc(size_t newSize) { size = newSize; device_alloc(); } /** * Allocate memory on host */ inline void host_alloc() { #ifdef DEBUG_CUDA if(size==0) printf("DEBUG_WARNING: device_alloc called with size == 0"); if (h_do_free) printf("DEBUG_WARNING: Host double allocation.\n"); #endif h_do_free = true; h_ptr = new T[size]; } /** * Allocate memory on host with given size */ inline void host_alloc(size_t newSize) { size = newSize; host_alloc(); } void resize_host(size_t newSize) { #ifdef DEBUG_CUDA if (size==0) printf("DEBUG_WARNING: Resizing from size zero (permitted).\n"); #endif T* newArr = new T[newSize]; memcpy( newArr, h_ptr, newSize * sizeof(T) ); size = newSize; #ifdef DEBUG_CUDA if (d_ptr!=NULL) printf("DEBUG_WARNING: Resizing host with present device allocation.\n"); #endif free_host(); setHstPtr(newArr); h_do_free=true; } /** * Initiate device memory with provided value */ inline void device_init(int value) { #ifdef DEBUG_CUDA if (d_ptr == 0) printf("DEBUG_WARNING: Memset requested before allocation in device_init().\n"); #endif cudaMemInit( d_ptr, value, size, stream); } /** * Copy a number (size) of bytes to device stored in the host pointer */ inline void cp_to_device() { #ifdef DEBUG_CUDA if (d_ptr == 0) printf("DEBUG_WARNING: cp_to_device() called before allocation.\n"); if (h_ptr == 0) printf("DEBUG_WARNING: NULL host pointer in cp_to_device().\n"); #endif cudaCpyHostToDevice(h_ptr, d_ptr, size, stream); } /** * Copy a number (size) of bytes to device stored in the provided host pointer */ inline void cp_to_device(T * hostPtr) { #ifdef DEBUG_CUDA if (hostPtr == NULL) printf("DEBUG_WARNING: Null-pointer given in cp_to_device(hostPtr).\n"); #endif h_ptr = hostPtr; cp_to_device(); } /** * Copy a number (size) of bytes from device pointer to the provided new device pointer */ inline void cp_on_device(T * dstDevPtr) { #ifdef DEBUG_CUDA if (dstDevPtr == NULL) printf("DEBUG_WARNING: Null-pointer given in cp_on_device(dstDevPtr).\n"); #endif cudaCpyDeviceToDevice(d_ptr, dstDevPtr, size, stream); } /** * Copy a number (size) of bytes from device pointer to the provided new device pointer */ inline void cp_on_device(CudaGlobalPtr &devPtr) { #ifdef DEBUG_CUDA if (devPtr.size == 0) printf("DEBUG_WARNING: Zero size on provided pointer in cp_on_device.\n"); #endif cp_on_device(devPtr.d_ptr); } /** * alloc and copy */ inline void put_on_device() { device_alloc(); cp_to_device(); } /** * alloc size and copy */ inline void put_on_device(size_t newSize) { size=newSize; device_alloc(); cp_to_device(); } /** * Copy a number (size) of bytes from device to the host pointer */ inline void cp_to_host() { #ifdef DEBUG_CUDA if (d_ptr == NULL) printf("DEBUG_WARNING: cp_to_host() called before device allocation.\n"); if (h_ptr == NULL) printf("DEBUG_WARNING: NULL host pointer in cp_to_host().\n"); #endif cudaCpyDeviceToHost(d_ptr, h_ptr, size, stream); } /** * Copy a number (thisSize) of bytes from device to the host pointer */ inline void cp_to_host(size_t thisSize) { #ifdef DEBUG_CUDA if (d_ptr == NULL) printf("DEBUG_WARNING: cp_to_host(thisSize) called before device allocation.\n"); if (h_ptr == NULL) printf("DEBUG_WARNING: NULL host pointer in cp_to_host(thisSize).\n"); #endif cudaCpyDeviceToHost(d_ptr, h_ptr, thisSize, stream); } /** * Copy a number (thisSize) of bytes from device to a specific host pointer */ inline void cp_to_host(T* hstPtr, size_t thisSize) { #ifdef DEBUG_CUDA if (d_ptr == NULL) printf("DEBUG_WARNING: cp_to_host(hstPtr, thisSize) called before device allocation.\n"); if (hstPtr == NULL) printf("DEBUG_WARNING: NULL host pointer in cp_to_host(hstPtr, thisSize).\n"); #endif cudaCpyDeviceToHost(d_ptr, hstPtr, thisSize, stream); } /** * Copy a number (size) of bytes from device to the host pointer */ inline void cp_to_host_on_stream(cudaStream_t s) { #ifdef DEBUG_CUDA if (d_ptr == NULL) printf("DEBUG_WARNING: cp_to_host_on_stream(s) called before device allocation.\n"); if (h_ptr == NULL) printf("DEBUG_WARNING: NULL host pointer in cp_to_host_on_stream(s).\n"); #endif cudaCpyDeviceToHost(d_ptr, h_ptr, size, s); } /** * Host data quick access */ inline T& operator[](size_t idx) { return h_ptr[idx]; }; /** * Host data quick access */ inline const T& operator[](size_t idx) const { return h_ptr[idx]; }; /** * Device data quick access */ inline T& operator()(size_t idx) { return d_ptr[idx]; }; /** * Device data quick access */ inline const T& operator()(size_t idx) const { return d_ptr[idx]; }; /** * Device pointer quick access */ inline T* operator~() { #ifdef DEBUG_CUDA if (d_ptr == 0) printf("DEBUG_WARNING: \"kernel cast\" on null pointer.\n"); #endif return d_ptr; }; inline void streamSync() { DEBUG_HANDLE_ERROR(cudaStreamSynchronize(stream)); } inline T getDeviceAt(size_t idx) { T value; cudaCpyDeviceToHost(&d_ptr[idx], &value, 1, stream); streamSync(); return value; } void dump_device_to_file(std::string fileName) { T *tmp = new T[size]; cudaCpyDeviceToHost(d_ptr, tmp, size, stream); std::ofstream f; f.open(fileName.c_str()); streamSync(); for (unsigned i = 0; i < size; i ++) f << tmp[i] << std::endl; f.close(); delete [] tmp; } void dump_host_to_file(std::string fileName) { std::ofstream f; f.open(fileName.c_str()); for (unsigned i = 0; i < size; i ++) f << h_ptr[i] << std::endl; f.close(); } /** * Delete device data */ inline void free_device() { #ifdef DEBUG_CUDA if (d_ptr == 0) printf("DEBUG_WARNING: Free device memory was called on NULL pointer in free_device().\n"); #endif d_do_free = false; if (CustomAlloc) { if (alloc->getReadyEvent() == 0) alloc->markReadyEvent(stream); alloc->doFreeWhenReady(); alloc = NULL; } else DEBUG_HANDLE_ERROR(cudaFree(d_ptr)); d_ptr = 0; } /** * Delete host data */ inline void free_host() { #ifdef DEBUG_CUDA if (h_ptr == 0) { printf("DEBUG_ERROR: free_host() called on NULL pointer.\n"); exit( EXIT_FAILURE ); } #endif h_do_free = false; delete [] h_ptr; h_ptr = 0; } inline void free_host_if_set() { if (h_do_free) free_host(); } inline void free_device_if_set() { if (d_do_free) free_device(); } /** * Delete both device and host data */ inline void free() { free_device(); free_host(); } inline void free_if_set() { free_host_if_set(); free_device_if_set(); } inline ~CudaGlobalPtr() { free_if_set(); } }; template class cudaStager { public: CudaGlobalPtr AllData; size_t size; // size of allocated host-space (AllData.size dictates the amount of memory copied to/from the device) /*====================================================== CONSTRUCTORS WITH ALLOCATORS ======================================================*/ inline cudaStager(CudaCustomAllocator *allocator): AllData(allocator), size(0) {}; inline cudaStager(CudaCustomAllocator *allocator, size_t newSize): AllData(newSize,allocator), size(newSize) { AllData.size=0; }; /*====================================================== CONSTRUCTORS WITHOUT ALLOCATORS ======================================================*/ inline cudaStager(): AllData(), size(0) {}; inline cudaStager(size_t newSize): AllData(newSize), size(newSize) { AllData.size=0; }; public: void prepare_host() { if(size==0) { printf("trying to host-alloc a stager with size=0"); CRITICAL(ERR_STAGEMEM); } size_t temp_size=AllData.size; AllData.size=size; if(AllData.h_ptr==NULL) AllData.host_alloc(); else printf("WARNING : host_alloc when host-ptr is non-null"); AllData.size=temp_size; } void prepare_host(size_t alloc_size) { if(size==0) { printf("trying to device-alloc a stager with size=0"); CRITICAL(ERR_STAGEMEM); } size_t temp_size=AllData.size; AllData.size=alloc_size; if(AllData.h_ptr==NULL) AllData.host_alloc(); else printf("WARNING : host_alloc when host-ptr is non-null"); AllData.size=temp_size; } void prepare_device() { if(size==0) { printf("trying to host-alloc a stager with size=0"); CRITICAL(ERR_STAGEMEM); } size_t temp_size=AllData.size; AllData.size=size; if(AllData.d_ptr==NULL) AllData.device_alloc(); else printf("WARNING : device_alloc when dev-ptr is non-null"); AllData.size=temp_size; } void prepare_device(size_t alloc_size) { if(size==0) { printf("trying to device-alloc a stager with size=0"); CRITICAL(ERR_STAGEMEM); } size_t temp_size=AllData.size; AllData.size=alloc_size; if(AllData.d_ptr==NULL) AllData.device_alloc(); else printf("WARNING : device_alloc when dev-ptr is non-null"); AllData.size=temp_size; } void prepare() { prepare_host(); prepare_device(); } void prepare(size_t alloc_size) { prepare_host(alloc_size); prepare_device(alloc_size); } void stage(CudaGlobalPtr &input) { if(AllData.size+input.size>size) { printf("trying to stage more than stager can fit"); printf(" (attempted to stage %lu addtionally to the allready staged %lu, and total host-allocated capacity is %lu ",input.size,AllData.size,size); exit( EXIT_FAILURE ); } for(size_t i=0 ; i #include #include #include #include #include #include #include "src/gpu_utils/cuda_projector.h" #include "src/gpu_utils/cuda_projector.cuh" #include "src/gpu_utils/cuda_projector_plan.h" #include "src/gpu_utils/cuda_benchmark_utils.h" #include "src/gpu_utils/cuda_ml_optimiser.h" #include "src/gpu_utils/cuda_kernels/helper.cuh" #include "src/gpu_utils/cuda_kernels/diff2.cuh" #include "src/gpu_utils/cuda_kernels/wavg.cuh" #include "src/gpu_utils/cuda_helper_functions.cuh" #include "src/gpu_utils/cuda_mem_utils.h" #include "src/complex.h" #include "src/helix.h" #include "src/error.h" #include #include #include "src/parallel.h" #include #include #ifdef CUDA_FORCESTL #include "src/gpu_utils/cuda_utils_stl.cuh" #else #include "src/gpu_utils/cuda_utils_cub.cuh" #endif static pthread_mutex_t global_mutex = PTHREAD_MUTEX_INITIALIZER; void getFourierTransformsAndCtfs(long int my_ori_particle, OptimisationParamters &op, SamplingParameters &sp, MlOptimiser *baseMLO, MlOptimiserCuda *cudaMLO ) { GTIC(cudaMLO->timer,"getFourierTransformsAndCtfs"); //cudaMLO->timer.cuda_gpu_tic("getFourierTransformsAndCtfs"); #ifdef TIMING if (op.my_ori_particle == baseMLO->exp_my_first_ori_particle) baseMLO->timer.tic(baseMLO->TIMING_ESP_FT); #endif //FourierTransformer transformer; CUSTOM_ALLOCATOR_REGION_NAME("GFTCTF"); for (int ipart = 0; ipart < baseMLO->mydata.ori_particles[my_ori_particle].particles_id.size(); ipart++) { CTIC(cudaMLO->timer,"init"); FileName fn_img; Image img, rec_img; MultidimArray Fimg; MultidimArray Faux(cudaMLO->transformer.fFourier,true); MultidimArray Fctf; // What is my particle_id? long int part_id = baseMLO->mydata.ori_particles[my_ori_particle].particles_id[ipart]; // Which group do I belong? int group_id =baseMLO->mydata.getGroupId(part_id); // Get the right line in the exp_fn_img strings (also exp_fn_recimg and exp_fn_ctfs) int istop = 0; for (long int ii = baseMLO->exp_my_first_ori_particle; ii < my_ori_particle; ii++) istop += baseMLO->mydata.ori_particles[ii].particles_id.size(); istop += ipart; if (!baseMLO->mydata.getImageNameOnScratch(part_id, fn_img)) { std::istringstream split(baseMLO->exp_fn_img); for (int i = 0; i <= istop; i++) getline(split, fn_img); } sp.current_img = fn_img; // Get the norm_correction RFLOAT normcorr = DIRECT_A2D_ELEM(baseMLO->exp_metadata, op.metadata_offset + ipart, METADATA_NORM); // Get the optimal origin offsets from the previous iteration Matrix1D my_old_offset(2), my_prior(2); XX(my_old_offset) = DIRECT_A2D_ELEM(baseMLO->exp_metadata, op.metadata_offset + ipart, METADATA_XOFF); YY(my_old_offset) = DIRECT_A2D_ELEM(baseMLO->exp_metadata, op.metadata_offset + ipart, METADATA_YOFF); XX(my_prior) = DIRECT_A2D_ELEM(baseMLO->exp_metadata, op.metadata_offset + ipart, METADATA_XOFF_PRIOR); YY(my_prior) = DIRECT_A2D_ELEM(baseMLO->exp_metadata, op.metadata_offset + ipart, METADATA_YOFF_PRIOR); // Uninitialised priors were set to 999. if (XX(my_prior) > 998.99 && XX(my_prior) < 999.01) XX(my_prior) = 0.; if (YY(my_prior) > 998.99 && YY(my_prior) < 999.01) YY(my_prior) = 0.; if (cudaMLO->dataIs3D) { my_old_offset.resize(3); my_prior.resize(3); ZZ(my_old_offset) = DIRECT_A2D_ELEM(baseMLO->exp_metadata, op.metadata_offset + ipart, METADATA_ZOFF); ZZ(my_prior) = DIRECT_A2D_ELEM(baseMLO->exp_metadata, op.metadata_offset + ipart, METADATA_ZOFF_PRIOR); // Unitialised priors were set to 999. if (ZZ(my_prior) > 998.99 && ZZ(my_prior) < 999.01) ZZ(my_prior) = 0.; } CTOC(cudaMLO->timer,"init"); CTIC(cudaMLO->timer,"nonZeroProb"); if (baseMLO->mymodel.orientational_prior_mode != NOPRIOR && !(baseMLO->do_skip_align ||baseMLO-> do_skip_rotate)) { // First try if there are some fixed prior angles RFLOAT prior_rot = DIRECT_A2D_ELEM(baseMLO->exp_metadata, op.metadata_offset + ipart, METADATA_ROT_PRIOR); RFLOAT prior_tilt = DIRECT_A2D_ELEM(baseMLO->exp_metadata, op.metadata_offset + ipart, METADATA_TILT_PRIOR); RFLOAT prior_psi = DIRECT_A2D_ELEM(baseMLO->exp_metadata, op.metadata_offset + ipart, METADATA_PSI_PRIOR); RFLOAT prior_psi_flip_ratio = (baseMLO->mymodel.nr_bodies > 1 ) ? 0. : DIRECT_A2D_ELEM(baseMLO->exp_metadata, op.metadata_offset + ipart, METADATA_PSI_PRIOR_FLIP_RATIO); bool do_auto_refine_local_searches = (baseMLO->do_auto_refine) && (baseMLO->sampling.healpix_order >= baseMLO->autosampling_hporder_local_searches); bool do_classification_local_searches = (! baseMLO->do_auto_refine) && (baseMLO->mymodel.orientational_prior_mode == PRIOR_ROTTILT_PSI) && (baseMLO->mymodel.sigma2_rot > 0.) && (baseMLO->mymodel.sigma2_tilt > 0.) && (baseMLO->mymodel.sigma2_psi > 0.); bool do_local_angular_searches = (do_auto_refine_local_searches) || (do_classification_local_searches); // If there were no defined priors (i.e. their values were 999.), then use the "normal" angles if (prior_rot > 998.99 && prior_rot < 999.01) prior_rot = DIRECT_A2D_ELEM(baseMLO->exp_metadata, op.metadata_offset + ipart, METADATA_ROT); if (prior_tilt > 998.99 && prior_tilt < 999.01) prior_tilt = DIRECT_A2D_ELEM(baseMLO->exp_metadata, op.metadata_offset + ipart, METADATA_TILT); if ( (baseMLO->do_helical_refine) && (baseMLO->helical_keep_tilt_prior_fixed) && (do_local_angular_searches) ) prior_tilt = DIRECT_A2D_ELEM(baseMLO->exp_metadata, op.metadata_offset + ipart, METADATA_TILT); if (prior_psi > 998.99 && prior_psi < 999.01) prior_psi = DIRECT_A2D_ELEM(baseMLO->exp_metadata,op. metadata_offset + ipart, METADATA_PSI); if (prior_psi_flip_ratio > 998.99 && prior_psi_flip_ratio < 999.01) prior_psi_flip_ratio = 0.5; ////////// How does this work now: each particle has a different sampling object?!!! // Select only those orientations that have non-zero prior probability if (baseMLO->do_helical_refine) { baseMLO->sampling.selectOrientationsWithNonZeroPriorProbabilityFor3DHelicalReconstruction(prior_rot, prior_tilt, prior_psi, sqrt(baseMLO->mymodel.sigma2_rot), sqrt(baseMLO->mymodel.sigma2_tilt), sqrt(baseMLO->mymodel.sigma2_psi), op.pointer_dir_nonzeroprior, op.directions_prior, op.pointer_psi_nonzeroprior, op.psi_prior, do_local_angular_searches, prior_psi_flip_ratio); } else { baseMLO->sampling.selectOrientationsWithNonZeroPriorProbability(prior_rot, prior_tilt, prior_psi, sqrt(baseMLO->mymodel.sigma2_rot), sqrt(baseMLO->mymodel.sigma2_tilt), sqrt(baseMLO->mymodel.sigma2_psi), op.pointer_dir_nonzeroprior, op.directions_prior, op.pointer_psi_nonzeroprior, op.psi_prior); } long int nr_orients = baseMLO->sampling.NrDirections(0, &op.pointer_dir_nonzeroprior) * baseMLO->sampling.NrPsiSamplings(0, &op.pointer_psi_nonzeroprior); if (nr_orients == 0) { std::cerr << " sampling.NrDirections()= " << baseMLO->sampling.NrDirections(0, &op.pointer_dir_nonzeroprior) << " sampling.NrPsiSamplings()= " << baseMLO->sampling.NrPsiSamplings(0, &op.pointer_psi_nonzeroprior) << std::endl; REPORT_ERROR("Zero orientations fall within the local angular search. Increase the sigma-value(s) on the orientations!"); } } CTOC(cudaMLO->timer,"nonZeroProb"); CTIC(cudaMLO->timer,"setXmippOrigin1"); // Get the image and recimg data if (baseMLO->do_parallel_disc_io) { // If all followers had preread images into RAM: get those now if (baseMLO->do_preread_images) { img().reshape(baseMLO->mydata.particles[part_id].img); CTIC(cudaMLO->timer,"ParaReadPrereadImages"); FOR_ALL_DIRECT_ELEMENTS_IN_MULTIDIMARRAY(baseMLO->mydata.particles[part_id].img) { DIRECT_MULTIDIM_ELEM(img(), n) = (RFLOAT)DIRECT_MULTIDIM_ELEM(baseMLO->mydata.particles[part_id].img, n); } CTOC(cudaMLO->timer,"ParaReadPrereadImages"); } else { if (cudaMLO->dataIs3D) { CTIC(cudaMLO->timer,"ParaRead3DImages"); img.read(fn_img); img().setXmippOrigin(); CTOC(cudaMLO->timer,"ParaRead3DImages"); } else { CTIC(cudaMLO->timer,"ParaRead2DImages"); img() = baseMLO->exp_imgs[istop]; CTOC(cudaMLO->timer,"ParaRead2DImages"); } } if (baseMLO->has_converged && baseMLO->do_use_reconstruct_images) { FileName fn_recimg; std::istringstream split2(baseMLO->exp_fn_recimg); // Get the right line in the exp_fn_img string for (int i = 0; i <= istop; i++) getline(split2, fn_recimg); rec_img.read(fn_recimg); rec_img().setXmippOrigin(); } } else { // Unpack the image from the imagedata if (cudaMLO->dataIs3D) { CTIC(cudaMLO->timer,"Read3DImages"); CTIC(cudaMLO->timer,"resize"); img().resize(baseMLO->mymodel.ori_size, baseMLO->mymodel.ori_size,baseMLO-> mymodel.ori_size); CTOC(cudaMLO->timer,"resize"); // Only allow a single image per call of this function!!! nr_pool needs to be set to 1!!!! // This will save memory, as we'll need to store all translated images in memory.... FOR_ALL_DIRECT_ELEMENTS_IN_ARRAY3D(img()) { DIRECT_A3D_ELEM(img(), k, i, j) = DIRECT_A3D_ELEM(baseMLO->exp_imagedata, k, i, j); } img().setXmippOrigin(); if (baseMLO->has_converged && baseMLO->do_use_reconstruct_images) { rec_img().resize(baseMLO->mymodel.ori_size, baseMLO->mymodel.ori_size,baseMLO-> mymodel.ori_size); int offset = (baseMLO->do_ctf_correction) ? 2 * baseMLO->mymodel.ori_size : baseMLO->mymodel.ori_size; FOR_ALL_DIRECT_ELEMENTS_IN_ARRAY3D(rec_img()) { DIRECT_A3D_ELEM(rec_img(), k, i, j) = DIRECT_A3D_ELEM(baseMLO->exp_imagedata, offset + k, i, j); } rec_img().setXmippOrigin(); } CTOC(cudaMLO->timer,"Read3DImages"); } else { CTIC(cudaMLO->timer,"Read2DImages"); img().resize(baseMLO->mymodel.ori_size, baseMLO->mymodel.ori_size); FOR_ALL_DIRECT_ELEMENTS_IN_ARRAY2D(img()) { DIRECT_A2D_ELEM(img(), i, j) = DIRECT_A3D_ELEM(baseMLO->exp_imagedata, op.metadata_offset + ipart, i, j); } img().setXmippOrigin(); if (baseMLO->has_converged && baseMLO->do_use_reconstruct_images) { ////////////// TODO: think this through for no-threads here..... rec_img().resize(baseMLO->mymodel.ori_size, baseMLO->mymodel.ori_size); FOR_ALL_DIRECT_ELEMENTS_IN_ARRAY2D(rec_img()) { DIRECT_A2D_ELEM(rec_img(), i, j) = DIRECT_A3D_ELEM(baseMLO->exp_imagedata, baseMLO->exp_nr_images + op.metadata_offset + ipart, i, j); } rec_img().setXmippOrigin(); } CTOC(cudaMLO->timer,"Read2DImages"); } } CTOC(cudaMLO->timer,"setXmippOrigin1"); CTIC(cudaMLO->timer,"selfTranslate"); /* FIXME : For some reason the device-allocation inside "selfTranslate" takes a much longer time than expected. * I tried moving it up and placing the size under a bunch of if()-cases, but this simply transferred the * allocation-cost to that region. /BjoernF,160129 */ // Apply (rounded) old offsets first my_old_offset.selfROUND(); int img_size = img.data.nzyxdim; CudaGlobalPtr d_img(img_size,0,cudaMLO->devBundle->allocator); CudaGlobalPtr temp(img_size,0,cudaMLO->devBundle->allocator); d_img.device_alloc(); temp.device_alloc(); d_img.device_init(0); for (int i=0; ido_norm_correction) { CTIC(cudaMLO->timer,"norm_corr"); cuda_kernel_multi<<>>( ~temp, (XFLOAT)(baseMLO->mymodel.avg_norm_correction / normcorr), img_size); LAUNCH_PRIVATE_ERROR(cudaGetLastError(),cudaMLO->errorStatus); temp.streamSync(); CTOC(cudaMLO->timer,"norm_corr"); } // Helical reconstruction: calculate old_offset in the system of coordinates of the helix, i.e. parallel & perpendicular, depending on psi-angle! // For helices do NOT apply old_offset along the direction of the helix!! Matrix1D my_old_offset_helix_coords; RFLOAT rot_deg = DIRECT_A2D_ELEM(baseMLO->exp_metadata, op.metadata_offset + ipart, METADATA_ROT); RFLOAT tilt_deg = DIRECT_A2D_ELEM(baseMLO->exp_metadata, op.metadata_offset + ipart, METADATA_TILT); RFLOAT psi_deg = DIRECT_A2D_ELEM(baseMLO->exp_metadata, op.metadata_offset + ipart, METADATA_PSI); if ( (baseMLO->do_helical_refine) && (! baseMLO->ignore_helical_symmetry) ) { // Calculate my_old_offset_helix_coords from my_old_offset and psi angle transformCartesianAndHelicalCoords(my_old_offset, my_old_offset_helix_coords, rot_deg, tilt_deg, psi_deg, CART_TO_HELICAL_COORDS); // We do NOT want to accumulate the offsets in the direction along the helix (which is X in the helical coordinate system!) // However, when doing helical local searches, we accumulate offsets // Do NOT accumulate offsets in 3D classification of helices if ( (baseMLO->mymodel.ref_dim == 3) && (! baseMLO->do_skip_align) && (! baseMLO->do_skip_rotate) ) { // TODO: check whether the following lines make sense bool do_auto_refine_local_searches = (baseMLO->do_auto_refine) && (baseMLO->sampling.healpix_order >= baseMLO->autosampling_hporder_local_searches); bool do_classification_local_searches = (! baseMLO->do_auto_refine) && (baseMLO->mymodel.orientational_prior_mode == PRIOR_ROTTILT_PSI) && (baseMLO->mymodel.sigma2_rot > 0.) && (baseMLO->mymodel.sigma2_tilt > 0.) && (baseMLO->mymodel.sigma2_psi > 0.); bool do_local_angular_searches = (do_auto_refine_local_searches) || (do_classification_local_searches); if (!do_local_angular_searches) { if (! cudaMLO->dataIs3D) XX(my_old_offset_helix_coords) = 0.; else ZZ(my_old_offset_helix_coords) = 0.; } } // TODO: Now re-calculate the my_old_offset in the real (or image) system of coordinate (rotate -psi angle) transformCartesianAndHelicalCoords(my_old_offset_helix_coords, my_old_offset, rot_deg, tilt_deg, psi_deg, HELICAL_TO_CART_COORDS); } my_old_offset.selfROUND(); CTIC(cudaMLO->timer,"kernel_translate"); if(cudaMLO->dataIs3D) cuda_kernel_translate3D<<>>( ~temp, // translate from temp... ~d_img, // ... into d_img img_size, img.data.xdim, img.data.ydim, img.data.zdim, XX(my_old_offset), YY(my_old_offset), ZZ(my_old_offset)); else cuda_kernel_translate2D<<>>( ~temp, // translate from temp... ~d_img, // ... into d_img img_size, img.data.xdim, img.data.ydim, XX(my_old_offset), YY(my_old_offset)); LAUNCH_PRIVATE_ERROR(cudaGetLastError(),cudaMLO->errorStatus); CTOC(cudaMLO->timer,"kernel_translate"); if (baseMLO->has_converged && baseMLO->do_use_reconstruct_images) //rec_img is NOT norm_corrected in the CPU-code, so nor do we. { for (int i=0; idataIs3D) cuda_kernel_translate3D<<>>( ~temp, // translate from temp... ~d_img, // ... into d_img img_size, img.data.xdim, img.data.ydim, img.data.zdim, XX(my_old_offset), YY(my_old_offset), ZZ(my_old_offset)); else cuda_kernel_translate2D<<>>( ~temp, // translate from temp... ~d_img, // ... into d_img img_size, img.data.xdim, img.data.ydim, XX(my_old_offset), YY(my_old_offset)); LAUNCH_PRIVATE_ERROR(cudaGetLastError(),cudaMLO->errorStatus); } if ( (baseMLO->do_helical_refine) && (! baseMLO->ignore_helical_symmetry) ) { // Transform rounded Cartesian offsets to corresponding helical ones transformCartesianAndHelicalCoords(my_old_offset, my_old_offset_helix_coords, rot_deg, tilt_deg, psi_deg, CART_TO_HELICAL_COORDS); op.old_offset[ipart] = my_old_offset_helix_coords; } else { op.old_offset[ipart] = my_old_offset; // Not doing helical refinement. Rounded Cartesian offsets are stored. } // Also store priors on translations op.prior[ipart] = my_prior; CTOC(cudaMLO->timer,"selfTranslate"); CTIC(cudaMLO->timer,"calcFimg"); size_t current_size_x = baseMLO->mymodel.current_size / 2 + 1; size_t current_size_y = baseMLO->mymodel.current_size; size_t current_size_z = (cudaMLO->dataIs3D) ? baseMLO->mymodel.current_size : 1; cudaMLO->transformer1.setSize(img().xdim,img().ydim,img().zdim); //FIXME What is this? // deviceInitValue(cudaMLO->transformer1.reals, (XFLOAT)0.); // deviceInitComplexValue(cudaMLO->transformer1.fouriers, (XFLOAT)0.); // cudaMLO->transformer1.reals.streamSync(); // cudaMLO->transformer1.fouriers.streamSync(); d_img.cp_on_device(cudaMLO->transformer1.reals); runCenterFFT( cudaMLO->transformer1.reals, (int)cudaMLO->transformer1.xSize, (int)cudaMLO->transformer1.ySize, (int)cudaMLO->transformer1.zSize, false ); cudaMLO->transformer1.reals.streamSync(); cudaMLO->transformer1.forward(); cudaMLO->transformer1.fouriers.streamSync(); int FMultiBsize = ( (int) ceilf(( float)cudaMLO->transformer1.fouriers.getSize()*2/(float)BLOCK_SIZE)); cuda_kernel_multi<<transformer1.fouriers.getStream()>>>( (XFLOAT*)~cudaMLO->transformer1.fouriers, (XFLOAT)1/((XFLOAT)(cudaMLO->transformer1.reals.getSize())), cudaMLO->transformer1.fouriers.getSize()*2); LAUNCH_PRIVATE_ERROR(cudaGetLastError(),cudaMLO->errorStatus); CudaGlobalPtr d_Fimg(current_size_x * current_size_y * current_size_z, cudaMLO->devBundle->allocator); d_Fimg.device_alloc(); cudaMLO->transformer1.fouriers.streamSync(); windowFourierTransform2( cudaMLO->transformer1.fouriers, d_Fimg, cudaMLO->transformer1.xFSize,cudaMLO->transformer1.yFSize, cudaMLO->transformer1.zFSize, //Input dimensions current_size_x, current_size_y, current_size_z //Output dimensions ); CTOC(cudaMLO->timer,"calcFimg"); cudaMLO->transformer1.fouriers.streamSync(); CTIC(cudaMLO->timer,"cpFimg2Host"); d_Fimg.cp_to_host(); d_Fimg.streamSync(); Fimg.initZeros(current_size_z, current_size_y, current_size_x); for (int i = 0; i < Fimg.nzyxdim; i ++) { Fimg.data[i].real = (RFLOAT) d_Fimg[i].x; Fimg.data[i].imag = (RFLOAT) d_Fimg[i].y; } CTOC(cudaMLO->timer,"cpFimg2Host"); CTIC(cudaMLO->timer,"selfApplyBeamTilt"); // Here apply the beamtilt correction if necessary // This will only be used for reconstruction, not for alignment // But beamtilt only affects very high-resolution components anyway... // RFLOAT beamtilt_x = DIRECT_A2D_ELEM(baseMLO->exp_metadata, op.metadata_offset + ipart, METADATA_BEAMTILT_X); RFLOAT beamtilt_y = DIRECT_A2D_ELEM(baseMLO->exp_metadata, op.metadata_offset + ipart, METADATA_BEAMTILT_Y); RFLOAT Cs = DIRECT_A2D_ELEM(baseMLO->exp_metadata, op.metadata_offset + ipart, METADATA_CTF_CS); RFLOAT V = 1000. * DIRECT_A2D_ELEM(baseMLO->exp_metadata, op.metadata_offset + ipart, METADATA_CTF_VOLTAGE); RFLOAT lambda = 12.2643247 / sqrt(V * (1. + V * 0.978466e-6)); if (ABS(beamtilt_x) > 0. || ABS(beamtilt_y) > 0.) selfApplyBeamTilt(Fimg, beamtilt_x, beamtilt_y, lambda, Cs,baseMLO->mymodel.pixel_size, baseMLO->mymodel.ori_size); op.Fimgs_nomask.at(ipart) = Fimg; CTOC(cudaMLO->timer,"selfApplyBeamTilt"); CTIC(cudaMLO->timer,"zeroMask"); MultidimArray Mnoise; bool is_helical_segment = (baseMLO->do_helical_refine) || ((baseMLO->mymodel.ref_dim == 2) && (baseMLO->helical_tube_outer_diameter > 0.)); if (!baseMLO->do_zero_mask) { // Make a noisy background image with the same spectrum as the sigma2_noise // Different MPI-distributed subsets may otherwise have different instances of the random noise below, // because work is on an on-demand basis and therefore variable with the timing of distinct nodes... // Have the seed based on the part_id, so that each particle has a different instant of the noise if (baseMLO->do_realign_movies) init_random_generator(baseMLO->random_seed + part_id); else init_random_generator(baseMLO->random_seed + my_ori_particle); // This only serves for exact reproducibility tests with 1.3-code... // If we're doing running averages, then the sigma2_noise was already adjusted for the running averages. // Undo this adjustment here in order to get the right noise in the individual frames MultidimArray power_noise = baseMLO->sigma2_fudge * baseMLO->mymodel.sigma2_noise[group_id]; if (baseMLO->do_realign_movies) power_noise *= (2. * baseMLO->movie_frame_running_avg_side + 1.); // Create noisy image for outside the mask MultidimArray Fnoise; Mnoise.resize(img()); cudaMLO->transformer.setReal(Mnoise); cudaMLO->transformer.getFourierAlias(Fnoise); // Fill Fnoise with random numbers, use power spectrum of the noise for its variance FOR_ALL_ELEMENTS_IN_FFTW_TRANSFORM(Fnoise) { int ires = ROUND( sqrt( (RFLOAT)(kp * kp + ip * ip + jp * jp) ) ); if (ires >= 0 && ires < XSIZE(Fnoise)) { RFLOAT sigma = sqrt(DIRECT_A1D_ELEM(power_noise, ires)); DIRECT_A3D_ELEM(Fnoise, k, i, j).real = rnd_gaus(0., sigma); DIRECT_A3D_ELEM(Fnoise, k, i, j).imag = rnd_gaus(0., sigma); } else { DIRECT_A3D_ELEM(Fnoise, k, i, j) = 0.; } } // Back to real space Mnoise CTIC(cudaMLO->timer,"inverseFourierTransform"); cudaMLO->transformer.inverseFourierTransform(); CTOC(cudaMLO->timer,"inverseFourierTransform"); CTIC(cudaMLO->timer,"setXmippOrigin2"); Mnoise.setXmippOrigin(); CTOC(cudaMLO->timer,"setXmippOrigin2"); CTIC(cudaMLO->timer,"softMaskOutsideMap"); d_img.cp_to_host(); d_img.streamSync(); for (int i=0; iparticle_diameter / (2. * baseMLO->mymodel.pixel_size)), (baseMLO->helical_tube_outer_diameter / (2. * baseMLO->mymodel.pixel_size)), baseMLO->width_mask_edge, &Mnoise); } else softMaskOutsideMap(img(), baseMLO->particle_diameter / (2. * baseMLO->mymodel.pixel_size), (RFLOAT)baseMLO->width_mask_edge, &Mnoise); for (int i=0; itimer,"softMaskOutsideMap"); } else if (is_helical_segment) { d_img.cp_to_host(); d_img.streamSync(); for (int i=0; iparticle_diameter / (2. * baseMLO->mymodel.pixel_size)), (baseMLO->helical_tube_outer_diameter / (2. * baseMLO->mymodel.pixel_size)), baseMLO->width_mask_edge); for (int i=0; itimer,"softMaskOutsideMap"); XFLOAT cosine_width = baseMLO->width_mask_edge; XFLOAT radius = (XFLOAT)((RFLOAT)baseMLO->particle_diameter / (2. *baseMLO-> mymodel.pixel_size)); if (radius < 0) radius = ((RFLOAT)img.data.xdim)/2.; XFLOAT radius_p = radius + cosine_width; // dim3 block_dim = 1; //TODO // cuda_kernel_softMaskOutsideMap<<>>( ~d_img, // img().nzyxdim, // img.data.xdim, // img.data.ydim, // img.data.zdim, // img.data.xdim/2, // img.data.ydim/2, // img.data.zdim/2, //unused // true, // radius, // radius_p, // cosine_width); // LAUNCH_PRIVATE_ERROR(cudaGetLastError(),cudaMLO->errorStatus); XFLOAT sum_bg(0.); dim3 block_dim = 128; //TODO: set balanced (hardware-dep?) CudaGlobalPtr softMaskSum (SOFTMASK_BLOCK_SIZE,0,cudaMLO->devBundle->allocator); CudaGlobalPtr softMaskSum_bg(SOFTMASK_BLOCK_SIZE,0,cudaMLO->devBundle->allocator); softMaskSum.device_alloc(); softMaskSum_bg.device_alloc(); softMaskSum.device_init(0.f); softMaskSum_bg.device_init(0.f); cuda_kernel_softMaskBackgroundValue<<>>( ~d_img, img().nzyxdim, img.data.xdim, img.data.ydim, img.data.zdim, img.data.xdim/2, img.data.ydim/2, img.data.zdim/2, //unused true, radius, radius_p, cosine_width, ~softMaskSum, ~softMaskSum_bg); LAUNCH_PRIVATE_ERROR(cudaGetLastError(),cudaMLO->errorStatus); softMaskSum.streamSync(); sum_bg = (RFLOAT) getSumOnDevice(softMaskSum_bg) / (RFLOAT) getSumOnDevice(softMaskSum); softMaskSum.streamSync(); cuda_kernel_cosineFilter<<>>( ~d_img, img().nzyxdim, img.data.xdim, img.data.ydim, img.data.zdim, img.data.xdim/2, img.data.ydim/2, img.data.zdim/2, //unused true, radius, radius_p, cosine_width, sum_bg); LAUNCH_PRIVATE_ERROR(cudaGetLastError(),cudaMLO->errorStatus); // d_img.streamSync(); // d_img.cp_to_host(); DEBUG_HANDLE_ERROR(cudaStreamSynchronize(cudaStreamPerThread)); // FOR_ALL_DIRECT_ELEMENTS_IN_MULTIDIMARRAY(img()) // { // img.data.data[n]=(RFLOAT)d_img[n]; // } CTOC(cudaMLO->timer,"softMaskOutsideMap"); } CTOC(cudaMLO->timer,"zeroMask"); CTIC(cudaMLO->timer,"setSize"); cudaMLO->transformer1.setSize(img().xdim,img().ydim,img().zdim); // deviceInitValue(cudaMLO->transformer1.reals, (XFLOAT)0.); // deviceInitComplexValue(cudaMLO->transformer1.fouriers, (XFLOAT)0.); // cudaMLO->transformer1.reals.streamSync(); // cudaMLO->transformer1.fouriers.streamSync(); CTOC(cudaMLO->timer,"setSize"); CTIC(cudaMLO->timer,"transform"); d_img.cp_on_device(cudaMLO->transformer1.reals); runCenterFFT( // runs on input GlobalPtr.stream cudaMLO->transformer1.reals, (int)cudaMLO->transformer1.xSize, (int)cudaMLO->transformer1.ySize, (int)cudaMLO->transformer1.zSize, false ); cudaMLO->transformer1.reals.streamSync(); cudaMLO->transformer1.forward(); cudaMLO->transformer1.fouriers.streamSync(); int FMultiBsize2 = ( (int) ceilf(( float)cudaMLO->transformer1.fouriers.getSize()*2/(float)BLOCK_SIZE)); cuda_kernel_multi<<transformer1.fouriers.getStream()>>>( (XFLOAT*)~cudaMLO->transformer1.fouriers, (XFLOAT)1/((XFLOAT)(cudaMLO->transformer1.reals.getSize())), cudaMLO->transformer1.fouriers.getSize()*2); LAUNCH_PRIVATE_ERROR(cudaGetLastError(),cudaMLO->errorStatus); CTOC(cudaMLO->timer,"transform"); cudaMLO->transformer1.fouriers.streamSync(); CTIC(cudaMLO->timer,"powerClass"); // Store the power_class spectrum of the whole image (to fill sigma2_noise between current_size and ori_size if (baseMLO->mymodel.current_size < baseMLO->mymodel.ori_size) { CudaGlobalPtr spectrumAndXi2((baseMLO->mymodel.ori_size/2+1)+1,0,cudaMLO->devBundle->allocator); // last +1 is the Xi2, to remove an expensive memcpy spectrumAndXi2.device_alloc(); spectrumAndXi2.device_init(0); spectrumAndXi2.streamSync(); dim3 gridSize = CEIL((float)(cudaMLO->transformer1.fouriers.getSize()) / (float)POWERCLASS_BLOCK_SIZE); if(cudaMLO->dataIs3D) cuda_kernel_powerClass<<>>( ~cudaMLO->transformer1.fouriers, ~spectrumAndXi2, cudaMLO->transformer1.fouriers.getSize(), spectrumAndXi2.getSize()-1, cudaMLO->transformer1.xFSize, cudaMLO->transformer1.yFSize, cudaMLO->transformer1.zFSize, (baseMLO->mymodel.current_size/2)+1, // note: NOT baseMLO->mymodel.ori_size/2+1 &spectrumAndXi2.d_ptr[spectrumAndXi2.getSize()-1]); // last element is the hihgres_Xi2 else cuda_kernel_powerClass<<>>( ~cudaMLO->transformer1.fouriers, ~spectrumAndXi2, cudaMLO->transformer1.fouriers.getSize(), spectrumAndXi2.getSize()-1, cudaMLO->transformer1.xFSize, cudaMLO->transformer1.yFSize, cudaMLO->transformer1.zFSize, (baseMLO->mymodel.current_size/2)+1, // note: NOT baseMLO->mymodel.ori_size/2+1 &spectrumAndXi2.d_ptr[spectrumAndXi2.getSize()-1]); // last element is the hihgres_Xi2 LAUNCH_PRIVATE_ERROR(cudaGetLastError(),cudaMLO->errorStatus); spectrumAndXi2.streamSync(); spectrumAndXi2.cp_to_host(); spectrumAndXi2.streamSync(); op.power_imgs.at(ipart).resize(baseMLO->mymodel.ori_size/2 + 1); for (int i = 0; i<(spectrumAndXi2.getSize()-1); i ++) op.power_imgs.at(ipart).data[i] = spectrumAndXi2[i]; op.highres_Xi2_imgs.at(ipart) = spectrumAndXi2[spectrumAndXi2.getSize()-1]; } else { op.highres_Xi2_imgs.at(ipart) = 0.; } CTOC(cudaMLO->timer,"powerClass"); // We never need any resolutions higher than current_size // So resize the Fourier transforms CTIC(cudaMLO->timer,"windowFourierTransform2"); //windowFourierTransform(Faux, Fimg, baseMLO->mymodel.current_size); cudaMLO->transformer1.fouriers.streamSync(); windowFourierTransform2( cudaMLO->transformer1.fouriers, d_Fimg, cudaMLO->transformer1.xFSize,cudaMLO->transformer1.yFSize, cudaMLO->transformer1.zFSize, //Input dimensions current_size_x, current_size_y, current_size_z, //Output dimensions 1, //Npsi 0, //pos cudaMLO->transformer1.fouriers.getStream() ); CTOC(cudaMLO->timer,"windowFourierTransform2"); // Also store its CTF CTIC(cudaMLO->timer,"ctfCorr"); CTIC(cudaMLO->timer,"cpFimg2Host_2"); d_Fimg.streamSync(); d_Fimg.cp_to_host(); d_Fimg.streamSync(); for (int i = 0; i < Fimg.nzyxdim; i ++) { Fimg.data[i].real = (RFLOAT) d_Fimg[i].x; Fimg.data[i].imag = (RFLOAT) d_Fimg[i].y; } CTOC(cudaMLO->timer,"cpFimg2Host_2"); Fctf.resize(Fimg); // Now calculate the actual CTF if (baseMLO->do_ctf_correction) { if (cudaMLO->dataIs3D) { Image Ictf; if (baseMLO->do_parallel_disc_io) { CTIC(cudaMLO->timer,"CTFRead3D_disk"); // Read CTF-image from disc FileName fn_ctf; if (!baseMLO->mydata.getImageNameOnScratch(part_id, fn_ctf, true)) { std::istringstream split(baseMLO->exp_fn_ctf); // Get the right line in the exp_fn_img string for (int i = 0; i <= istop; i++) getline(split, fn_ctf); } Ictf.read(fn_ctf); CTOC(cudaMLO->timer,"CTFRead3D_disk"); } else { CTIC(cudaMLO->timer,"CTFRead3D_array"); // Unpack the CTF-image from the exp_imagedata array Ictf().resize(baseMLO->mymodel.ori_size, baseMLO->mymodel.ori_size, baseMLO->mymodel.ori_size); FOR_ALL_DIRECT_ELEMENTS_IN_ARRAY3D(Ictf()) { DIRECT_A3D_ELEM(Ictf(), k, i, j) = DIRECT_A3D_ELEM(baseMLO->exp_imagedata, baseMLO->mymodel.ori_size + k, i, j); } CTOC(cudaMLO->timer,"CTFRead3D_array"); } // Set the CTF-image in Fctf CTIC(cudaMLO->timer,"CTFSet3D_array"); Ictf().setXmippOrigin(); FOR_ALL_ELEMENTS_IN_FFTW_TRANSFORM(Fctf) { // Use negative kp,ip and jp indices, because the origin in the ctf_img lies half a pixel to the right of the actual center.... DIRECT_A3D_ELEM(Fctf, k, i, j) = A3D_ELEM(Ictf(), -kp, -ip, -jp); } CTIC(cudaMLO->timer,"CTFSet3D_array"); } else { CTIC(cudaMLO->timer,"CTFRead2D"); CTF ctf; ctf.setValues(DIRECT_A2D_ELEM(baseMLO->exp_metadata, op.metadata_offset + ipart, METADATA_CTF_DEFOCUS_U), DIRECT_A2D_ELEM(baseMLO->exp_metadata, op.metadata_offset + ipart, METADATA_CTF_DEFOCUS_V), DIRECT_A2D_ELEM(baseMLO->exp_metadata, op.metadata_offset + ipart, METADATA_CTF_DEFOCUS_ANGLE), DIRECT_A2D_ELEM(baseMLO->exp_metadata, op.metadata_offset + ipart, METADATA_CTF_VOLTAGE), DIRECT_A2D_ELEM(baseMLO->exp_metadata, op.metadata_offset + ipart, METADATA_CTF_CS), DIRECT_A2D_ELEM(baseMLO->exp_metadata, op.metadata_offset + ipart, METADATA_CTF_Q0), DIRECT_A2D_ELEM(baseMLO->exp_metadata, op.metadata_offset + ipart, METADATA_CTF_BFAC), 1., DIRECT_A2D_ELEM(baseMLO->exp_metadata, op.metadata_offset + ipart, METADATA_CTF_PHASE_SHIFT)); ctf.getFftwImage(Fctf, baseMLO->mymodel.ori_size, baseMLO->mymodel.ori_size, baseMLO->mymodel.pixel_size, baseMLO->ctf_phase_flipped, baseMLO->only_flip_phases, baseMLO->intact_ctf_first_peak, true); CTIC(cudaMLO->timer,"CTFRead2D"); } } else { Fctf.initConstant(1.); } CTOC(cudaMLO->timer,"ctfCorr"); // Store Fimg and Fctf op.Fimgs.at(ipart) = Fimg; op.Fctfs.at(ipart) = Fctf; } // end loop ipart //cudaMLO->transformer.clear(); #ifdef TIMING if (op.my_ori_particle == baseMLO->exp_my_first_ori_particle) baseMLO->timer.toc(baseMLO->TIMING_ESP_FT); #endif GTOC(cudaMLO->timer,"getFourierTransformsAndCtfs"); GATHERGPUTIMINGS(cudaMLO->timer); } void getAllSquaredDifferencesCoarse( unsigned exp_ipass, OptimisationParamters &op, SamplingParameters &sp, MlOptimiser *baseMLO, MlOptimiserCuda *cudaMLO, CudaGlobalPtr &Mweight) { #ifdef TIMING if (op.my_ori_particle == baseMLO->exp_my_first_ori_particle) baseMLO->timer.tic(baseMLO->TIMING_ESP_DIFF1); #endif CUSTOM_ALLOCATOR_REGION_NAME("DIFF_COARSE"); CTIC(cudaMLO->timer,"diff_pre_gpu"); unsigned long weightsPerPart(baseMLO->mymodel.nr_classes * sp.nr_dir * sp.nr_psi * sp.nr_trans * sp.nr_oversampled_rot * sp.nr_oversampled_trans); std::vector > dummy; baseMLO->precalculateShiftedImagesCtfsAndInvSigma2s(false, op.my_ori_particle, sp.current_image_size, sp.current_oversampling, op.metadata_offset, // inserted SHWS 12112015 sp.itrans_min, sp.itrans_max, op.Fimgs, dummy, op.Fctfs, dummy, dummy, op.local_Fctfs, op.local_sqrtXi2, op.local_Minvsigma2s); unsigned image_size = op.local_Minvsigma2s[0].nzyxdim; CTOC(cudaMLO->timer,"diff_pre_gpu"); std::vector projectorPlans(0, cudaMLO->devBundle->allocator); //If particle specific sampling plan required if (cudaMLO->devBundle->generateProjectionPlanOnTheFly) { CTIC(cudaMLO->timer,"generateProjectionSetupCoarse"); projectorPlans.resize(baseMLO->mymodel.nr_classes, cudaMLO->devBundle->allocator); for (int iclass = sp.iclass_min; iclass <= sp.iclass_max; iclass++) { if (baseMLO->mymodel.pdf_class[iclass] > 0.) { projectorPlans[iclass].setup( baseMLO->sampling, op.directions_prior, op.psi_prior, op.pointer_dir_nonzeroprior, op.pointer_psi_nonzeroprior, NULL, //Mcoarse_significant baseMLO->mymodel.pdf_class, baseMLO->mymodel.pdf_direction, sp.nr_dir, sp.nr_psi, sp.idir_min, sp.idir_max, sp.ipsi_min, sp.ipsi_max, sp.itrans_min, sp.itrans_max, 0, //current_oversampling 1, //nr_oversampled_rot iclass, true, //coarse !IS_NOT_INV, baseMLO->do_skip_align, baseMLO->do_skip_rotate, baseMLO->mymodel.orientational_prior_mode ); } } CTOC(cudaMLO->timer,"generateProjectionSetupCoarse"); } else projectorPlans = cudaMLO->devBundle->coarseProjectionPlans; // Loop only from sp.iclass_min to sp.iclass_max to deal with seed generation in first iteration size_t allWeights_size(0); for (int exp_iclass = sp.iclass_min; exp_iclass <= sp.iclass_max; exp_iclass++) allWeights_size += projectorPlans[exp_iclass].orientation_num * sp.nr_trans*sp.nr_oversampled_trans; CudaGlobalPtr allWeights(allWeights_size,cudaMLO->devBundle->allocator); allWeights.device_alloc(); long int allWeights_pos=0; bool do_CC = (baseMLO->iter == 1 && baseMLO->do_firstiter_cc) || baseMLO->do_always_cc; for (long int ipart = 0; ipart < sp.nr_particles; ipart++) { long int part_id = baseMLO->mydata.ori_particles[op.my_ori_particle].particles_id[ipart]; long int group_id = baseMLO->mydata.getGroupId(part_id); /*==================================== Generate Translations ======================================*/ CTIC(cudaMLO->timer,"translation_1"); long unsigned translation_num((sp.itrans_max - sp.itrans_min + 1) * sp.nr_oversampled_trans); CudaGlobalPtr trans_x(translation_num, cudaMLO->devBundle->allocator); CudaGlobalPtr trans_y(translation_num, cudaMLO->devBundle->allocator); CudaGlobalPtr trans_z(translation_num, cudaMLO->devBundle->allocator); CudaGlobalPtr Fimg_real(image_size, cudaMLO->devBundle->allocator); CudaGlobalPtr Fimg_imag(image_size, cudaMLO->devBundle->allocator); std::vector oversampled_translations_x, oversampled_translations_y, oversampled_translations_z; for (long int itrans = 0; itrans < translation_num; itrans++) { baseMLO->sampling.getTranslations(itrans, 0, oversampled_translations_x, oversampled_translations_y, oversampled_translations_z, (baseMLO->do_helical_refine) && (! baseMLO->ignore_helical_symmetry), baseMLO->helical_rise_initial / baseMLO->mymodel.pixel_size, baseMLO->helical_twist_initial); RFLOAT xshift = 0., yshift = 0., zshift = 0.; xshift = oversampled_translations_x[0]; yshift = oversampled_translations_y[0]; if (cudaMLO->dataIs3D) zshift = oversampled_translations_z[0]; if ( (baseMLO->do_helical_refine) && (! baseMLO->ignore_helical_symmetry) ) { RFLOAT rot_deg = DIRECT_A2D_ELEM(baseMLO->exp_metadata, op.metadata_offset + ipart, METADATA_ROT); RFLOAT tilt_deg = DIRECT_A2D_ELEM(baseMLO->exp_metadata, op.metadata_offset + ipart, METADATA_TILT); RFLOAT psi_deg = DIRECT_A2D_ELEM(baseMLO->exp_metadata,op.metadata_offset + ipart, METADATA_PSI); transformCartesianAndHelicalCoords(xshift, yshift, zshift, xshift, yshift, zshift, rot_deg, tilt_deg, psi_deg, (cudaMLO->dataIs3D) ? (3) : (2), HELICAL_TO_CART_COORDS); } trans_x[itrans] = -2 * PI * xshift / (double)baseMLO->mymodel.ori_size; trans_y[itrans] = -2 * PI * yshift / (double)baseMLO->mymodel.ori_size; trans_z[itrans] = -2 * PI * zshift / (double)baseMLO->mymodel.ori_size; } XFLOAT scale_correction = baseMLO->do_scale_correction ? baseMLO->mymodel.scale_correction[group_id] : 1; MultidimArray Fimg; windowFourierTransform(op.Fimgs[ipart], Fimg, sp.current_image_size); for (unsigned i = 0; i < image_size; i ++) { XFLOAT pixel_correction = 1.0/scale_correction; if (baseMLO->do_ctf_correction && baseMLO->refs_are_ctf_corrected) { // if ctf[i]==0, pix_corr[i] becomes NaN. // However, corr_img[i]==0, so pix-diff in kernel==0. // This is ok since originally, pix-diff==Img.real^2 + Img.imag^2, // which is ori-indep, and we subtract min_diff form ALL orients. if (op.local_Fctfs[ipart].data[i]!=0) pixel_correction /= op.local_Fctfs[ipart].data[i]; } Fimg_real[i] = Fimg.data[i].real * pixel_correction; Fimg_imag[i] = Fimg.data[i].imag * pixel_correction; } trans_x.put_on_device(); trans_y.put_on_device(); trans_z.put_on_device(); Fimg_real.put_on_device(); Fimg_imag.put_on_device(); CTOC(cudaMLO->timer,"translation_1"); // To speed up calculation, several image-corrections are grouped into a single pixel-wise "filter", or image-correciton CudaGlobalPtr corr_img(image_size, cudaMLO->devBundle->allocator); corr_img.device_alloc(); buildCorrImage(baseMLO,op,corr_img,ipart,group_id); corr_img.cp_to_device(); deviceInitValue(allWeights, (XFLOAT) (op.highres_Xi2_imgs[ipart] / 2.)); allWeights_pos = 0; for (int exp_iclass = sp.iclass_min; exp_iclass <= sp.iclass_max; exp_iclass++) DEBUG_HANDLE_ERROR(cudaStreamSynchronize(cudaMLO->classStreams[exp_iclass])); DEBUG_HANDLE_ERROR(cudaStreamSynchronize(cudaStreamPerThread)); for (int exp_iclass = sp.iclass_min; exp_iclass <= sp.iclass_max; exp_iclass++) { if ( projectorPlans[exp_iclass].orientation_num > 0 ) { /*==================================== Kernel Call ======================================*/ CudaProjectorKernel projKernel = CudaProjectorKernel::makeKernel( cudaMLO->devBundle->cudaProjectors[exp_iclass], op.local_Minvsigma2s[0].xdim, op.local_Minvsigma2s[0].ydim, op.local_Minvsigma2s[0].zdim, op.local_Minvsigma2s[0].xdim-1); runDiff2KernelCoarse( projKernel, ~trans_x, ~trans_y, ~trans_z, ~corr_img, ~Fimg_real, ~Fimg_imag, ~projectorPlans[exp_iclass].eulers, &allWeights(allWeights_pos), (XFLOAT) op.local_sqrtXi2[ipart], projectorPlans[exp_iclass].orientation_num, translation_num, image_size, cudaMLO->classStreams[exp_iclass], do_CC, cudaMLO->dataIs3D); mapAllWeightsToMweights( ~projectorPlans[exp_iclass].iorientclasses, &allWeights(allWeights_pos), &Mweight(ipart*weightsPerPart), projectorPlans[exp_iclass].orientation_num, translation_num, cudaMLO->classStreams[exp_iclass] ); /*==================================== Retrieve Results ======================================*/ allWeights_pos += projectorPlans[exp_iclass].orientation_num*translation_num; } } for (int exp_iclass = sp.iclass_min; exp_iclass <= sp.iclass_max; exp_iclass++) DEBUG_HANDLE_ERROR(cudaStreamSynchronize(cudaMLO->classStreams[exp_iclass])); DEBUG_HANDLE_ERROR(cudaStreamSynchronize(cudaStreamPerThread)); // does not appear to be NEEDED FOR NON-BLOCKING CLASS STREAMS in tests, but should be to sync against classStreams op.min_diff2[ipart] = getMinOnDevice(allWeights); op.avg_diff2[ipart] = (RFLOAT) getSumOnDevice(allWeights) / (RFLOAT) allWeights_size; } // end loop ipart #ifdef TIMING if (op.my_ori_particle == baseMLO->exp_my_first_ori_particle) baseMLO->timer.toc(baseMLO->TIMING_ESP_DIFF1); #endif } void getAllSquaredDifferencesFine(unsigned exp_ipass, OptimisationParamters &op, SamplingParameters &sp, MlOptimiser *baseMLO, MlOptimiserCuda *cudaMLO, std::vector &FinePassWeights, std::vector > &FPCMasks, std::vector &FineProjectionData, std::vector > &stagerD2) { #ifdef TIMING if (op.my_ori_particle == baseMLO->exp_my_first_ori_particle) baseMLO->timer.tic(baseMLO->TIMING_ESP_DIFF2); #endif CUSTOM_ALLOCATOR_REGION_NAME("DIFF_FINE"); CTIC(cudaMLO->timer,"diff_pre_gpu"); CTIC(cudaMLO->timer,"precalculateShiftedImagesCtfsAndInvSigma2s"); std::vector > dummy; baseMLO->precalculateShiftedImagesCtfsAndInvSigma2s(false, op.my_ori_particle, sp.current_image_size, sp.current_oversampling, op.metadata_offset, // inserted SHWS 12112015 sp.itrans_min, sp.itrans_max, op.Fimgs, dummy, op.Fctfs, dummy, dummy, op.local_Fctfs, op.local_sqrtXi2, op.local_Minvsigma2s); CTOC(cudaMLO->timer,"precalculateShiftedImagesCtfsAndInvSigma2s"); MultidimArray Fref; Fref.resize(op.local_Minvsigma2s[0]); unsigned image_size = op.local_Minvsigma2s[0].nzyxdim; CTOC(cudaMLO->timer,"diff_pre_gpu"); /*======================================================================================= Particle Iteration =========================================================================================*/ for (long int ipart = 0; ipart < sp.nr_particles; ipart++) { // Reset size without de-allocating: we will append everything significant within // the current allocation and then re-allocate the then determined (smaller) volume long int part_id = baseMLO->mydata.ori_particles[op.my_ori_particle].particles_id[ipart]; long int group_id = baseMLO->mydata.getGroupId(part_id); /*==================================== Generate Translations ======================================*/ CTIC(cudaMLO->timer,"translation_2"); long unsigned translation_num((sp.itrans_max - sp.itrans_min + 1) * sp.nr_oversampled_trans); CudaGlobalPtr Fimg_real(image_size, cudaMLO->devBundle->allocator); CudaGlobalPtr Fimg_imag(image_size, cudaMLO->devBundle->allocator); CudaGlobalPtr trans_x(translation_num, cudaMLO->devBundle->allocator); CudaGlobalPtr trans_y(translation_num, cudaMLO->devBundle->allocator); CudaGlobalPtr trans_z(translation_num, cudaMLO->devBundle->allocator); std::vector oversampled_translations_x, oversampled_translations_y, oversampled_translations_z; int j = 0; for (long int itrans = 0; itrans < (sp.itrans_max - sp.itrans_min + 1); itrans++) { baseMLO->sampling.getTranslations(itrans, baseMLO->adaptive_oversampling, oversampled_translations_x, oversampled_translations_y, oversampled_translations_z, (baseMLO->do_helical_refine) && (! baseMLO->ignore_helical_symmetry), baseMLO->helical_rise_initial / baseMLO->mymodel.pixel_size, baseMLO->helical_twist_initial); for (long int iover_trans = 0; iover_trans < oversampled_translations_x.size(); iover_trans++) { RFLOAT xshift = 0., yshift = 0., zshift = 0.; xshift = oversampled_translations_x[iover_trans]; yshift = oversampled_translations_y[iover_trans]; if (cudaMLO->dataIs3D) zshift = oversampled_translations_z[iover_trans]; if ( (baseMLO->do_helical_refine) && (! baseMLO->ignore_helical_symmetry) ) { RFLOAT rot_deg = DIRECT_A2D_ELEM(baseMLO->exp_metadata, op.metadata_offset + ipart, METADATA_ROT); RFLOAT tilt_deg = DIRECT_A2D_ELEM(baseMLO->exp_metadata, op.metadata_offset + ipart, METADATA_TILT); RFLOAT psi_deg = DIRECT_A2D_ELEM(baseMLO->exp_metadata, op.metadata_offset + ipart, METADATA_PSI); transformCartesianAndHelicalCoords(xshift, yshift, zshift, xshift, yshift, zshift, rot_deg, tilt_deg, psi_deg, (cudaMLO->dataIs3D) ? (3) : (2), HELICAL_TO_CART_COORDS); } trans_x[j] = -2 * PI * xshift / (double)baseMLO->mymodel.ori_size; trans_y[j] = -2 * PI * yshift / (double)baseMLO->mymodel.ori_size; trans_z[j] = -2 * PI * zshift / (double)baseMLO->mymodel.ori_size; j ++; } } XFLOAT scale_correction = baseMLO->do_scale_correction ? baseMLO->mymodel.scale_correction[group_id] : 1; MultidimArray Fimg, Fimg_nomask; windowFourierTransform(op.Fimgs[ipart], Fimg, sp.current_image_size); for (unsigned i = 0; i < image_size; i ++) { XFLOAT pixel_correction = 1.0/scale_correction; if (baseMLO->do_ctf_correction && baseMLO->refs_are_ctf_corrected) { // if ctf[i]==0, pix_corr[i] becomes NaN. // However, corr_img[i]==0, so pix-diff in kernel==0. // This is ok since originally, pix-diff==Img.real^2 + Img.imag^2, // which is ori-indep, and we subtract min_diff form ALL orients. if (op.local_Fctfs[ipart].data[i]!=0) pixel_correction /= op.local_Fctfs[ipart].data[i]; } Fimg_real[i] = Fimg.data[i].real * pixel_correction; Fimg_imag[i] = Fimg.data[i].imag * pixel_correction; } CTOC(cudaMLO->timer,"translation_2"); CTIC(cudaMLO->timer,"kernel_init_1"); CudaGlobalPtr corr_img(image_size, cudaMLO->devBundle->allocator); corr_img.device_alloc(); buildCorrImage(baseMLO,op,corr_img,ipart,group_id); trans_x.put_on_device(); trans_y.put_on_device(); trans_z.put_on_device(); Fimg_real.put_on_device(); Fimg_imag.put_on_device(); corr_img.cp_to_device(); CTOC(cudaMLO->timer,"kernel_init_1"); std::vector< CudaGlobalPtr > eulers((sp.iclass_max-sp.iclass_min+1), cudaMLO->devBundle->allocator); cudaStager AllEulers(cudaMLO->devBundle->allocator,9*FineProjectionData[ipart].orientationNumAllClasses); AllEulers.prepare_device(); unsigned long newDataSize(0); for (int exp_iclass = sp.iclass_min; exp_iclass <= sp.iclass_max; exp_iclass++) { FPCMasks[ipart][exp_iclass].weightNum=0; if ((baseMLO->mymodel.pdf_class[exp_iclass] > 0.) && (FineProjectionData[ipart].class_entries[exp_iclass] > 0) ) { // use "slice" constructor with class-specific parameters to retrieve a temporary ProjectionParams with data for this class ProjectionParams thisClassProjectionData( FineProjectionData[ipart], FineProjectionData[ipart].class_idx[exp_iclass], FineProjectionData[ipart].class_idx[exp_iclass]+FineProjectionData[ipart].class_entries[exp_iclass]); // since we retrieved the ProjectionParams for *the whole* class the orientation_num is also equal. thisClassProjectionData.orientation_num[0] = FineProjectionData[ipart].class_entries[exp_iclass]; long unsigned orientation_num = thisClassProjectionData.orientation_num[0]; if(orientation_num==0) continue; CTIC(cudaMLO->timer,"pair_list_1"); long unsigned significant_num(0); long int nr_over_orient = baseMLO->sampling.oversamplingFactorOrientations(sp.current_oversampling); long int nr_over_trans = baseMLO->sampling.oversamplingFactorTranslations(sp.current_oversampling); // Prepare the mask of the weight-array for this class if (FPCMasks[ipart][exp_iclass].weightNum==0) FPCMasks[ipart][exp_iclass].firstPos = newDataSize; long unsigned ihidden(0); std::vector< long unsigned > iover_transes, ihiddens; for (long int itrans = sp.itrans_min; itrans <= sp.itrans_max; itrans++, ihidden++) { for (long int iover_trans = 0; iover_trans < sp.nr_oversampled_trans; iover_trans++) { ihiddens.push_back(ihidden); iover_transes.push_back(iover_trans); } } int chunkSize(0); if(cudaMLO->dataIs3D) chunkSize = D2F_CHUNK_DATA3D; else if(cudaMLO->refIs3D) chunkSize = D2F_CHUNK_DATA3D; else chunkSize = D2F_CHUNK_2D; // Do more significance checks on translations and create jobDivision significant_num = makeJobsForDiff2Fine( op, sp, // alot of different type inputs... orientation_num, translation_num, thisClassProjectionData, iover_transes, ihiddens, nr_over_orient, nr_over_trans, ipart, FinePassWeights[ipart], FPCMasks[ipart][exp_iclass], // ..and output into index-arrays mask... chunkSize); // ..based on a given maximum chunk-size // extend size by number of significants found this class newDataSize += significant_num; FPCMasks[ipart][exp_iclass].weightNum = significant_num; FPCMasks[ipart][exp_iclass].lastPos = FPCMasks[ipart][exp_iclass].firstPos + significant_num; CTOC(cudaMLO->timer,"pair_list_1"); CTIC(cudaMLO->timer,"IndexedArrayMemCp2"); // FPCMasks[ipart][exp_iclass].jobOrigin.cp_to_device(); // FPCMasks[ipart][exp_iclass].jobExtent.cp_to_device(); stagerD2[ipart].stage(FPCMasks[ipart][exp_iclass].jobOrigin); stagerD2[ipart].stage(FPCMasks[ipart][exp_iclass].jobExtent); CTOC(cudaMLO->timer,"IndexedArrayMemCp2"); CTIC(cudaMLO->timer,"generateEulerMatrices"); eulers[exp_iclass-sp.iclass_min].setSize(9*FineProjectionData[ipart].class_entries[exp_iclass]); eulers[exp_iclass-sp.iclass_min].host_alloc(); generateEulerMatrices( baseMLO->mymodel.PPref[exp_iclass].padding_factor, thisClassProjectionData, &(eulers[exp_iclass-sp.iclass_min])[0], !IS_NOT_INV); AllEulers.stage(eulers[exp_iclass-sp.iclass_min]); CTOC(cudaMLO->timer,"generateEulerMatrices"); } } // copy stagers to device stagerD2[ipart].cp_to_device(); AllEulers.cp_to_device(); FinePassWeights[ipart].rot_id.cp_to_device(); //FIXME this is not used FinePassWeights[ipart].rot_idx.cp_to_device(); FinePassWeights[ipart].trans_idx.cp_to_device(); for (int exp_iclass = sp.iclass_min; exp_iclass <= sp.iclass_max; exp_iclass++) DEBUG_HANDLE_ERROR(cudaStreamSynchronize(cudaMLO->classStreams[exp_iclass])); DEBUG_HANDLE_ERROR(cudaStreamSynchronize(cudaStreamPerThread)); for (int exp_iclass = sp.iclass_min; exp_iclass <= sp.iclass_max; exp_iclass++) { if ((baseMLO->mymodel.pdf_class[exp_iclass] > 0.) && (FineProjectionData[ipart].class_entries[exp_iclass] > 0) ) { long unsigned orientation_num = FineProjectionData[ipart].class_entries[exp_iclass]; if(orientation_num==0) continue; long unsigned significant_num(FPCMasks[ipart][exp_iclass].weightNum); if(significant_num==0) continue; CTIC(cudaMLO->timer,"Diff2MakeKernel"); CudaProjectorKernel projKernel = CudaProjectorKernel::makeKernel( cudaMLO->devBundle->cudaProjectors[exp_iclass], op.local_Minvsigma2s[0].xdim, op.local_Minvsigma2s[0].ydim, op.local_Minvsigma2s[0].zdim, op.local_Minvsigma2s[0].xdim-1); CTOC(cudaMLO->timer,"Diff2MakeKernel"); // Use the constructed mask to construct a partial class-specific input IndexedDataArray thisClassFinePassWeights(FinePassWeights[ipart],FPCMasks[ipart][exp_iclass], cudaMLO->devBundle->allocator); CTIC(cudaMLO->timer,"Diff2CALL"); runDiff2KernelFine( projKernel, ~corr_img, ~Fimg_real, ~Fimg_imag, ~trans_x, ~trans_y, ~trans_z, ~eulers[exp_iclass-sp.iclass_min], ~thisClassFinePassWeights.rot_id, ~thisClassFinePassWeights.rot_idx, ~thisClassFinePassWeights.trans_idx, ~FPCMasks[ipart][exp_iclass].jobOrigin, ~FPCMasks[ipart][exp_iclass].jobExtent, ~thisClassFinePassWeights.weights, op, baseMLO, orientation_num, translation_num, significant_num, image_size, ipart, exp_iclass, cudaMLO->classStreams[exp_iclass], FPCMasks[ipart][exp_iclass].jobOrigin.getSize(), ((baseMLO->iter == 1 && baseMLO->do_firstiter_cc) || baseMLO->do_always_cc), cudaMLO->dataIs3D ); // DEBUG_HANDLE_ERROR(cudaStreamSynchronize(cudaStreamPerThread)); CTOC(cudaMLO->timer,"Diff2CALL"); } // end if class significant } // end loop iclass for (int exp_iclass = sp.iclass_min; exp_iclass <= sp.iclass_max; exp_iclass++) DEBUG_HANDLE_ERROR(cudaStreamSynchronize(cudaMLO->classStreams[exp_iclass])); DEBUG_HANDLE_ERROR(cudaStreamSynchronize(cudaStreamPerThread)); FinePassWeights[ipart].setDataSize( newDataSize ); CTIC(cudaMLO->timer,"collect_data_1"); if(baseMLO->adaptive_oversampling!=0) { op.min_diff2[ipart] = (RFLOAT) getMinOnDevice(FinePassWeights[ipart].weights); op.avg_diff2[ipart] = (RFLOAT) getSumOnDevice(FinePassWeights[ipart].weights) / (RFLOAT) FinePassWeights[ipart].weights.size; } CTOC(cudaMLO->timer,"collect_data_1"); // std::cerr << " fine pass minweight = " << op.min_diff2[ipart] << std::endl; }// end loop ipart #ifdef TIMING if (op.my_ori_particle == baseMLO->exp_my_first_ori_particle) baseMLO->timer.toc(baseMLO->TIMING_ESP_DIFF2); #endif } template void convertAllSquaredDifferencesToWeights(unsigned exp_ipass, OptimisationParamters &op, SamplingParameters &sp, MlOptimiser *baseMLO, MlOptimiserCuda *cudaMLO, std::vector< IndexedDataArray> &PassWeights, std::vector< std::vector< IndexedDataArrayMask > > &FPCMasks, CudaGlobalPtr &Mweight, // FPCMasks = Fine-Pass Class-Masks bool failsafeMode = false) { #ifdef TIMING if (op.my_ori_particle == baseMLO->exp_my_first_ori_particle) { if (exp_ipass == 0) baseMLO->timer.tic(baseMLO->TIMING_ESP_WEIGHT1); else baseMLO->timer.tic(baseMLO->TIMING_ESP_WEIGHT2); } #endif // Ready the "prior-containers" for all classes (remake every ipart) CudaGlobalPtr pdf_orientation((sp.iclass_max-sp.iclass_min+1) * sp.nr_dir * sp.nr_psi, cudaMLO->devBundle->allocator); CudaGlobalPtr pdf_offset((sp.iclass_max-sp.iclass_min+1)*sp.nr_trans, cudaMLO->devBundle->allocator); RFLOAT pdf_orientation_mean(0); unsigned pdf_orientation_count(0); CUSTOM_ALLOCATOR_REGION_NAME("CASDTW_PDF"); pdf_orientation.device_alloc(); pdf_offset.device_alloc(); // pdf_orientation is ipart-independent, so we keep it above ipart scope CTIC(cudaMLO->timer,"get_orient_priors"); for (int exp_iclass = sp.iclass_min; exp_iclass <= sp.iclass_max; exp_iclass++) for (long int idir = sp.idir_min, iorientclass = (exp_iclass-sp.iclass_min) * sp.nr_dir * sp.nr_psi; idir <=sp.idir_max; idir++) for (long int ipsi = sp.ipsi_min; ipsi <= sp.ipsi_max; ipsi++, iorientclass++) { RFLOAT pdf(0); if (baseMLO->do_skip_align || baseMLO->do_skip_rotate) pdf = baseMLO->mymodel.pdf_class[exp_iclass]; else if (baseMLO->mymodel.orientational_prior_mode == NOPRIOR) pdf = DIRECT_MULTIDIM_ELEM(baseMLO->mymodel.pdf_direction[exp_iclass], idir); else pdf = op.directions_prior[idir] * op.psi_prior[ipsi]; pdf_orientation[iorientclass] = pdf; pdf_orientation_mean += pdf; pdf_orientation_count ++; } pdf_orientation_mean /= (RFLOAT) pdf_orientation_count; //If mean is non-zero bring all values closer to 1 to improve numerical accuracy //This factor is over all classes and is thus removed in the final normalization if (pdf_orientation_mean != 0.) for (int i = 0; i < pdf_orientation.getSize(); i ++) pdf_orientation[i] /= pdf_orientation_mean; pdf_orientation.cp_to_device(); CTOC(cudaMLO->timer,"get_orient_priors"); if(exp_ipass==0 || baseMLO->adaptive_oversampling!=0) { op.sum_weight.clear(); op.sum_weight.resize(sp.nr_particles, (RFLOAT)(sp.nr_particles)); op.max_weight.clear(); op.max_weight.resize(sp.nr_particles, (RFLOAT)-1); } if (exp_ipass==0) op.Mcoarse_significant.resizeNoCp(1,1,sp.nr_particles, XSIZE(op.Mweight)); XFLOAT my_significant_weight; op.significant_weight.clear(); op.significant_weight.resize(sp.nr_particles, 0.); // loop over all particles inside this ori_particle for (long int ipart = 0; ipart < sp.nr_particles; ipart++) { long int part_id = baseMLO->mydata.ori_particles[op.my_ori_particle].particles_id[ipart]; RFLOAT old_offset_z; RFLOAT old_offset_x = XX(op.old_offset[ipart]); RFLOAT old_offset_y = YY(op.old_offset[ipart]); if (cudaMLO->dataIs3D) old_offset_z = ZZ(op.old_offset[ipart]); if ((baseMLO->iter == 1 && baseMLO->do_firstiter_cc) || baseMLO->do_always_cc) { if(exp_ipass==0) { int nr_coarse_weights = (sp.iclass_max-sp.iclass_min+1)*sp.nr_particles * sp.nr_dir * sp.nr_psi * sp.nr_trans; PassWeights[ipart].weights.setDevPtr(&Mweight(ipart*nr_coarse_weights)); PassWeights[ipart].weights.setHstPtr(&Mweight[ipart*nr_coarse_weights]); PassWeights[ipart].weights.setSize(nr_coarse_weights); } PassWeights[ipart].weights.h_do_free=false; std::pair min_pair=getArgMinOnDevice(PassWeights[ipart].weights); PassWeights[ipart].weights.cp_to_host(); DEBUG_HANDLE_ERROR(cudaStreamSynchronize(cudaStreamPerThread)); //Set all device-located weights to zero, and only the smallest one to 1. DEBUG_HANDLE_ERROR(cudaMemsetAsync(~(PassWeights[ipart].weights), 0.f, PassWeights[ipart].weights.getSize()*sizeof(XFLOAT),0)); XFLOAT unity=1; DEBUG_HANDLE_ERROR(cudaMemcpyAsync( &(PassWeights[ipart].weights(min_pair.first) ), &unity, sizeof(XFLOAT), cudaMemcpyHostToDevice, 0)); PassWeights[ipart].weights.cp_to_host(); DEBUG_HANDLE_ERROR(cudaStreamSynchronize(cudaStreamPerThread)); my_significant_weight = 0.999; DIRECT_A2D_ELEM(baseMLO->exp_metadata, op.metadata_offset + ipart, METADATA_NR_SIGN) = (RFLOAT) 1.; if (exp_ipass==0) // TODO better memset, 0 => false , 1 => true for (int ihidden = 0; ihidden < XSIZE(op.Mcoarse_significant); ihidden++) if (DIRECT_A2D_ELEM(op.Mweight, ipart, ihidden) >= my_significant_weight) DIRECT_A2D_ELEM(op.Mcoarse_significant, ipart, ihidden) = true; else DIRECT_A2D_ELEM(op.Mcoarse_significant, ipart, ihidden) = false; else { std::pair max_pair = getArgMaxOnDevice(PassWeights[ipart].weights); op.max_index[ipart].fineIdx = PassWeights[ipart].ihidden_overs[max_pair.first]; op.max_weight[ipart] = max_pair.second; } } else { long int sumRedSize=0; for (int exp_iclass = sp.iclass_min; exp_iclass <= sp.iclass_max; exp_iclass++) sumRedSize+= (exp_ipass==0) ? ceilf((float)(sp.nr_dir*sp.nr_psi)/(float)SUMW_BLOCK_SIZE) : ceil((float)FPCMasks[ipart][exp_iclass].jobNum / (float)SUMW_BLOCK_SIZE); // loop through making translational priors for all classes this ipart - then copy all at once - then loop through kernel calls ( TODO: group kernel calls into one big kernel) CTIC(cudaMLO->timer,"get_offset_priors"); double pdf_offset_mean(0); std::vector pdf_offset_t(pdf_offset.getSize()); unsigned pdf_offset_count(0); for (int exp_iclass = sp.iclass_min; exp_iclass <= sp.iclass_max; exp_iclass++) { /*========================================= Fetch+generate Translation data ===========================================*/ RFLOAT myprior_x, myprior_y, myprior_z; if (baseMLO->mymodel.ref_dim == 2) { myprior_x = XX(baseMLO->mymodel.prior_offset_class[exp_iclass]); myprior_y = YY(baseMLO->mymodel.prior_offset_class[exp_iclass]); } else { myprior_x = XX(op.prior[ipart]); myprior_y = YY(op.prior[ipart]); if (cudaMLO->dataIs3D) myprior_z = ZZ(op.prior[ipart]); } for (long int itrans = sp.itrans_min; itrans <= sp.itrans_max; itrans++) { RFLOAT mypriors_len2 = myprior_x * myprior_x + myprior_y * myprior_y; if (cudaMLO->dataIs3D) mypriors_len2 += myprior_z * myprior_z; // If it is doing helical refinement AND Cartesian vector myprior has a length > 0, transform the vector to its helical coordinates if ( (baseMLO->do_helical_refine) && (! baseMLO->ignore_helical_symmetry) && (mypriors_len2 > 0.00001) ) { RFLOAT rot_deg = DIRECT_A2D_ELEM(baseMLO->exp_metadata, op.metadata_offset + ipart, METADATA_ROT); RFLOAT tilt_deg = DIRECT_A2D_ELEM(baseMLO->exp_metadata, op.metadata_offset + ipart, METADATA_TILT); RFLOAT psi_deg = DIRECT_A2D_ELEM(baseMLO->exp_metadata, op.metadata_offset + ipart, METADATA_PSI); transformCartesianAndHelicalCoords(myprior_x, myprior_y, myprior_z, myprior_x, myprior_y, myprior_z, rot_deg, tilt_deg, psi_deg, (cudaMLO->dataIs3D) ? (3) : (2), CART_TO_HELICAL_COORDS); } // (For helical refinement) Now offset, old_offset, sampling.translations and myprior are all in helical coordinates // To speed things up, only calculate pdf_offset at the coarse sampling. // That should not matter much, and that way one does not need to calculate all the OversampledTranslations double pdf(0); RFLOAT offset_x = old_offset_x + baseMLO->sampling.translations_x[itrans]; RFLOAT offset_y = old_offset_y + baseMLO->sampling.translations_y[itrans]; double tdiff2 = 0.; if ( (! baseMLO->do_helical_refine) || (baseMLO->ignore_helical_symmetry) || (cudaMLO->dataIs3D) ) tdiff2 += (offset_x - myprior_x) * (offset_x - myprior_x); tdiff2 += (offset_y - myprior_y) * (offset_y - myprior_y); if (cudaMLO->dataIs3D) { RFLOAT offset_z = old_offset_z + baseMLO->sampling.translations_z[itrans]; if ( (! baseMLO->do_helical_refine) || (baseMLO->ignore_helical_symmetry) ) tdiff2 += (offset_z - myprior_z) * (offset_z - myprior_z); } // P(offset|sigma2_offset) // This is the probability of the offset, given the model offset and variance. if (baseMLO->mymodel.sigma2_offset < 0.0001) pdf = ( tdiff2 > 0.) ? 0. : 1.; else pdf = exp ( tdiff2 / (-2. * baseMLO->mymodel.sigma2_offset) ) / ( 2. * PI * baseMLO->mymodel.sigma2_offset ); pdf_offset_t[(exp_iclass-sp.iclass_min)*sp.nr_trans + itrans] = pdf; pdf_offset_mean += pdf; pdf_offset_count ++; } } pdf_offset_mean /= (double) pdf_offset_count; //If mean is non-zero bring all values closer to 1 to improve numerical accuracy //This factor is over all classes and is thus removed in the final normalization if (pdf_offset_mean != 0.) for (int i = 0; i < pdf_offset.getSize(); i ++) pdf_offset[i] = pdf_offset_t[i] / pdf_offset_mean; pdf_offset.cp_to_device(); CTOC(cudaMLO->timer,"get_offset_priors"); CTIC(cudaMLO->timer,"sumweight1"); long int block_num; //Make sure most significant value is at least within single precision limit and some slack to distinguish peaks after prior multiplication XFLOAT local_norm = (XFLOAT)op.avg_diff2[ipart]; if (local_norm - op.min_diff2[ipart] > 50) local_norm = op.min_diff2[ipart] + 50; if(exp_ipass==0) { CudaGlobalPtr weights(Mweight.getAllocator()); weights.setSize(Mweight.getSize()); if (sizeof(weights_t) == sizeof(XFLOAT)) { weights.setHstPtr((weights_t*) Mweight.h_ptr); weights.setDevPtr((weights_t*) Mweight.d_ptr); weights.setAllocator(Mweight.getAllocator()); } else { weights.device_alloc(); block_num = ceilf((float)Mweight.getSize()/(float)BLOCK_SIZE); cuda_kernel_cast<<>> (~Mweight,~weights,Mweight.getSize()); } CudaGlobalPtr ipartMweight( weights, ipart * op.Mweight.xdim + sp.nr_dir * sp.nr_psi * sp.nr_trans * sp.iclass_min, (sp.iclass_max-sp.iclass_min+1) * sp.nr_dir * sp.nr_psi * sp.nr_trans); block_num = ceilf((float)(sp.nr_dir*sp.nr_psi)/(float)SUMW_BLOCK_SIZE); dim3 block_dim(block_num,sp.iclass_max-sp.iclass_min+1); if (failsafeMode) //Prevent zero prior products in fail-safe mode { cuda_kernel_exponentiate_weights_coarse <<>>( ~pdf_orientation, ~pdf_offset, ~ipartMweight, local_norm, (XFLOAT)op.min_diff2[ipart], sp.nr_dir*sp.nr_psi, sp.nr_trans); } else { cuda_kernel_exponentiate_weights_coarse <<>>( ~pdf_orientation, ~pdf_offset, ~ipartMweight, local_norm, (XFLOAT)op.min_diff2[ipart], sp.nr_dir*sp.nr_psi, sp.nr_trans); } CTIC(cudaMLO->timer,"sort"); DEBUG_HANDLE_ERROR(cudaStreamSynchronize(cudaStreamPerThread)); long ipart_length = (sp.iclass_max-sp.iclass_min+1) * sp.nr_dir * sp.nr_psi * sp.nr_trans; if (ipart_length > 1) { //Wrap the current ipart data in a new pointer CudaGlobalPtr unsorted_ipart(weights, ipart * op.Mweight.xdim + sp.nr_dir * sp.nr_psi * sp.nr_trans * sp.iclass_min, ipart_length); CudaGlobalPtr filtered(unsorted_ipart.getSize(), cudaMLO->devBundle->allocator); CUSTOM_ALLOCATOR_REGION_NAME("CASDTW_SORTSUM"); filtered.device_alloc(); MoreThanCubOpt moreThanOpt(0.); size_t filteredSize = filterOnDevice(unsorted_ipart, filtered, moreThanOpt); if (filteredSize == 0) { if (failsafeMode) //Only print error if not managed to recover through fail-safe mode { std::cerr << std::endl; std::cerr << " fn_img= " << sp.current_img << std::endl; std::cerr << " ipart= " << ipart << " adaptive_fraction= " << baseMLO->adaptive_fraction << std::endl; std::cerr << " min_diff2= " << op.min_diff2[ipart] << std::endl; pdf_orientation.dump_device_to_file("error_dump_pdf_orientation"); pdf_offset.dump_device_to_file("error_dump_pdf_offset"); unsorted_ipart.dump_device_to_file("error_dump_filtered"); std::cerr << "Dumped data: error_dump_pdf_orientation, error_dump_pdf_orientation and error_dump_unsorted." << std::endl; } CRITICAL(ERRFILTEREDZERO); // "filteredSize == 0" } filtered.setSize(filteredSize); CudaGlobalPtr sorted(filteredSize, cudaMLO->devBundle->allocator); CudaGlobalPtr cumulative_sum(filteredSize, cudaMLO->devBundle->allocator); sorted.device_alloc(); cumulative_sum.device_alloc(); sortOnDevice(filtered, sorted); scanOnDevice(sorted, cumulative_sum); CTOC(cudaMLO->timer,"sort"); op.sum_weight[ipart] = cumulative_sum.getDeviceAt(cumulative_sum.getSize() - 1); long int my_nr_significant_coarse_samples; size_t thresholdIdx(0); int grid_size = ceil((float)(cumulative_sum.getSize()-1)/(float)FIND_IN_CUMULATIVE_BLOCK_SIZE); if(grid_size > 0) { CudaGlobalPtr idx(1, cumulative_sum.getStream(), cumulative_sum.getAllocator()); idx[0] = 0; idx.put_on_device(); cuda_kernel_find_threshold_idx_in_cumulative <<< grid_size, FIND_IN_CUMULATIVE_BLOCK_SIZE, 0, cumulative_sum.getStream() >>>( ~cumulative_sum, (1 - baseMLO->adaptive_fraction) * op.sum_weight[ipart], cumulative_sum.getSize()-1, ~idx); idx.cp_to_host(); DEBUG_HANDLE_ERROR(cudaStreamSynchronize(cumulative_sum.getStream())); thresholdIdx = idx[0]; } my_nr_significant_coarse_samples = filteredSize - thresholdIdx; if (my_nr_significant_coarse_samples == 0) { if (failsafeMode) //Only print error if not managed to recover through fail-safe mode { std::cerr << std::endl; std::cerr << " fn_img= " << sp.current_img << std::endl; std::cerr << " ipart= " << ipart << " adaptive_fraction= " << baseMLO->adaptive_fraction << std::endl; std::cerr << " threshold= " << (1 - baseMLO->adaptive_fraction) * op.sum_weight[ipart] << " thresholdIdx= " << thresholdIdx << std::endl; std::cerr << " op.sum_weight[ipart]= " << op.sum_weight[ipart] << std::endl; std::cerr << " min_diff2= " << op.min_diff2[ipart] << std::endl; unsorted_ipart.dump_device_to_file("error_dump_unsorted"); filtered.dump_device_to_file("error_dump_filtered"); sorted.dump_device_to_file("error_dump_sorted"); cumulative_sum.dump_device_to_file("error_dump_cumulative_sum"); std::cerr << "Written error_dump_unsorted, error_dump_filtered, error_dump_sorted, and error_dump_cumulative_sum." << std::endl; } CRITICAL(ERRNOSIGNIFS); // "my_nr_significant_coarse_samples == 0" } if (baseMLO->maximum_significants > 0 && my_nr_significant_coarse_samples > baseMLO->maximum_significants) { my_nr_significant_coarse_samples = baseMLO->maximum_significants; thresholdIdx = filteredSize - my_nr_significant_coarse_samples; } weights_t significant_weight = sorted.getDeviceAt(thresholdIdx); CTIC(cudaMLO->timer,"getArgMaxOnDevice"); std::pair max_pair = getArgMaxOnDevice(unsorted_ipart); CTOC(cudaMLO->timer,"getArgMaxOnDevice"); op.max_index[ipart].coarseIdx = max_pair.first; op.max_weight[ipart] = max_pair.second; // Store nr_significant_coarse_samples for this particle DIRECT_A2D_ELEM(baseMLO->exp_metadata, op.metadata_offset + ipart, METADATA_NR_SIGN) = (RFLOAT) my_nr_significant_coarse_samples; CudaGlobalPtr Mcoarse_significant( &op.Mcoarse_significant.data[ipart * op.Mweight.xdim + sp.nr_dir * sp.nr_psi * sp.nr_trans * sp.iclass_min], (sp.iclass_max-sp.iclass_min+1) * sp.nr_dir * sp.nr_psi * sp.nr_trans, cudaMLO->devBundle->allocator); CUSTOM_ALLOCATOR_REGION_NAME("CASDTW_SIG"); Mcoarse_significant.device_alloc(); DEBUG_HANDLE_ERROR(cudaStreamSynchronize(cudaStreamPerThread)); arrayOverThreshold(unsorted_ipart, Mcoarse_significant, significant_weight); Mcoarse_significant.cp_to_host(); DEBUG_HANDLE_ERROR(cudaStreamSynchronize(cudaStreamPerThread)); } else if (ipart_length == 1) { op.Mcoarse_significant.data[ipart * op.Mweight.xdim + sp.nr_dir * sp.nr_psi * sp.nr_trans * sp.iclass_min] = 1; } else CRITICAL(ERRNEGLENGTH); } else { for (int exp_iclass = sp.iclass_min; exp_iclass <= sp.iclass_max; exp_iclass++) DEBUG_HANDLE_ERROR(cudaStreamSynchronize(cudaMLO->classStreams[exp_iclass])); DEBUG_HANDLE_ERROR(cudaStreamSynchronize(cudaStreamPerThread)); for (int exp_iclass = sp.iclass_min; exp_iclass <= sp.iclass_max; exp_iclass++) // TODO could use classStreams { if ((baseMLO->mymodel.pdf_class[exp_iclass] > 0.) && (FPCMasks[ipart][exp_iclass].weightNum > 0) ) { // Use the constructed mask to build a partial (class-specific) input // (until now, PassWeights has been an empty placeholder. We now create class-paritals pointing at it, and start to fill it with stuff) IndexedDataArray thisClassPassWeights(PassWeights[ipart],FPCMasks[ipart][exp_iclass], cudaMLO->devBundle->allocator); CudaGlobalPtr pdf_orientation_class(&(pdf_orientation[(exp_iclass-sp.iclass_min)*sp.nr_dir*sp.nr_psi]), &( pdf_orientation((exp_iclass-sp.iclass_min)*sp.nr_dir*sp.nr_psi) ), sp.nr_dir*sp.nr_psi); CudaGlobalPtr pdf_offset_class(&(pdf_offset[(exp_iclass-sp.iclass_min)*sp.nr_trans]), &( pdf_offset((exp_iclass-sp.iclass_min)*sp.nr_trans) ), sp.nr_trans); block_num = ceil((float)FPCMasks[ipart][exp_iclass].jobNum / (float)SUMW_BLOCK_SIZE); //thisClassPassWeights.rot_idx.getSize() / SUM_BLOCK_SIZE; dim3 block_dim(block_num); cuda_kernel_exponentiate_weights_fine<<classStreams[exp_iclass]>>>( ~pdf_orientation_class, ~pdf_offset_class, ~thisClassPassWeights.weights, (XFLOAT)local_norm, sp.nr_oversampled_rot, sp.nr_oversampled_trans, ~thisClassPassWeights.rot_id, ~thisClassPassWeights.trans_idx, ~FPCMasks[ipart][exp_iclass].jobOrigin, ~FPCMasks[ipart][exp_iclass].jobExtent, FPCMasks[ipart][exp_iclass].jobNum); LAUNCH_PRIVATE_ERROR(cudaGetLastError(),cudaMLO->errorStatus); } } for (int exp_iclass = sp.iclass_min; exp_iclass <= sp.iclass_max; exp_iclass++) DEBUG_HANDLE_ERROR(cudaStreamSynchronize(cudaMLO->classStreams[exp_iclass])); DEBUG_HANDLE_ERROR(cudaStreamSynchronize(cudaStreamPerThread)); PassWeights[ipart].weights.cp_to_host(); // note that the host-pointer is shared: we're copying to Mweight. CTIC(cudaMLO->timer,"sort"); DEBUG_HANDLE_ERROR(cudaStreamSynchronize(cudaStreamPerThread)); size_t weightSize = PassWeights[ipart].weights.getSize(); CudaGlobalPtr sorted(weightSize, cudaMLO->devBundle->allocator); CudaGlobalPtr cumulative_sum(weightSize, cudaMLO->devBundle->allocator); CUSTOM_ALLOCATOR_REGION_NAME("CASDTW_FINE"); sorted.device_alloc(); cumulative_sum.device_alloc(); sortOnDevice(PassWeights[ipart].weights, sorted); scanOnDevice(sorted, cumulative_sum); CTOC(cudaMLO->timer,"sort"); if(baseMLO->adaptive_oversampling!=0) { op.sum_weight[ipart] = cumulative_sum.getDeviceAt(cumulative_sum.getSize() - 1); if (op.sum_weight[ipart]==0) { std::cerr << std::endl; std::cerr << " fn_img= " << sp.current_img << std::endl; std::cerr << " part_id= " << part_id << std::endl; std::cerr << " ipart= " << ipart << std::endl; std::cerr << " op.min_diff2[ipart]= " << op.min_diff2[ipart] << std::endl; int group_id = baseMLO->mydata.getGroupId(part_id); std::cerr << " group_id= " << group_id << std::endl; std::cerr << " ml_model.scale_correction[group_id]= " << baseMLO->mymodel.scale_correction[group_id] << std::endl; std::cerr << " exp_significant_weight[ipart]= " << op.significant_weight[ipart] << std::endl; std::cerr << " exp_max_weight[ipart]= " << op.max_weight[ipart] << std::endl; std::cerr << " ml_model.sigma2_noise[group_id]= " << baseMLO->mymodel.sigma2_noise[group_id] << std::endl; CRITICAL(ERRSUMWEIGHTZERO); //"op.sum_weight[ipart]==0" } size_t thresholdIdx = findThresholdIdxInCumulativeSum(cumulative_sum, (1 - baseMLO->adaptive_fraction) * op.sum_weight[ipart]); my_significant_weight = sorted.getDeviceAt(thresholdIdx); CTIC(cudaMLO->timer,"getArgMaxOnDevice"); std::pair max_pair = getArgMaxOnDevice(PassWeights[ipart].weights); CTOC(cudaMLO->timer,"getArgMaxOnDevice"); op.max_index[ipart].fineIdx = PassWeights[ipart].ihidden_overs[max_pair.first]; op.max_weight[ipart] = max_pair.second; } else { my_significant_weight = sorted.getDeviceAt(0); } } CTOC(cudaMLO->timer,"sumweight1"); } op.significant_weight[ipart] = (RFLOAT) my_significant_weight; } // end loop ipart #ifdef TIMING if (op.my_ori_particle == baseMLO->exp_my_first_ori_particle) { if (exp_ipass == 0) baseMLO->timer.toc(baseMLO->TIMING_ESP_WEIGHT1); else baseMLO->timer.toc(baseMLO->TIMING_ESP_WEIGHT2); } #endif } void storeWeightedSums(OptimisationParamters &op, SamplingParameters &sp, MlOptimiser *baseMLO, MlOptimiserCuda *cudaMLO, std::vector &FinePassWeights, std::vector &ProjectionData, std::vector > &FPCMasks, std::vector > &stagerSWS) { #ifdef TIMING if (op.my_ori_particle == baseMLO->exp_my_first_ori_particle) baseMLO->timer.tic(baseMLO->TIMING_ESP_WSUM); #endif CTIC(cudaMLO->timer,"store_init"); int ibody(0); //Not supported yet // Re-do below because now also want unmasked images AND if (stricht_highres_exp >0.) then may need to resize std::vector > dummy; baseMLO->precalculateShiftedImagesCtfsAndInvSigma2s(false, op.my_ori_particle, sp.current_image_size, sp.current_oversampling, op.metadata_offset, // inserted SHWS 12112015 sp.itrans_min, sp.itrans_max, op.Fimgs, op.Fimgs_nomask, op.Fctfs, dummy, dummy, op.local_Fctfs, op.local_sqrtXi2, op.local_Minvsigma2s); // In doThreadPrecalculateShiftedImagesCtfsAndInvSigma2s() the origin of the op.local_Minvsigma2s was omitted. // Set those back here for (long int ipart = 0; ipart < sp.nr_particles; ipart++) { long int part_id = baseMLO->mydata.ori_particles[op.my_ori_particle].particles_id[ipart]; int group_id = baseMLO->mydata.getGroupId(part_id); DIRECT_MULTIDIM_ELEM(op.local_Minvsigma2s[ipart], 0) = 1. / (baseMLO->sigma2_fudge * DIRECT_A1D_ELEM(baseMLO->mymodel.sigma2_noise[group_id], 0)); } // For norm_correction and scale_correction of all particles of this ori_particle std::vector exp_wsum_norm_correction; std::vector > exp_wsum_scale_correction_XA, exp_wsum_scale_correction_AA; std::vector > thr_wsum_signal_product_spectra, thr_wsum_reference_power_spectra; exp_wsum_norm_correction.resize(sp.nr_particles, 0.); // For scale_correction if (baseMLO->do_scale_correction) { MultidimArray aux; aux.initZeros(baseMLO->mymodel.ori_size/2 + 1); exp_wsum_scale_correction_XA.resize(sp.nr_particles, aux); exp_wsum_scale_correction_AA.resize(sp.nr_particles, aux); thr_wsum_signal_product_spectra.resize(baseMLO->mymodel.nr_groups, aux); thr_wsum_reference_power_spectra.resize(baseMLO->mymodel.nr_groups, aux); } std::vector oversampled_translations_x, oversampled_translations_y, oversampled_translations_z; bool have_warned_small_scale = false; // Make local copies of weighted sums (except BPrefs, which are too big) // so that there are not too many mutex locks below std::vector > thr_wsum_sigma2_noise, thr_wsum_pdf_direction; std::vector thr_wsum_norm_correction, thr_sumw_group, thr_wsum_pdf_class, thr_wsum_prior_offsetx_class, thr_wsum_prior_offsety_class; RFLOAT thr_wsum_sigma2_offset; MultidimArray thr_metadata, zeroArray; // Wsum_sigma_noise2 is a 1D-spectrum for each group zeroArray.initZeros(baseMLO->mymodel.ori_size/2 + 1); thr_wsum_sigma2_noise.resize(baseMLO->mymodel.nr_groups, zeroArray); // wsum_pdf_direction is a 1D-array (of length sampling.NrDirections()) for each class zeroArray.initZeros(baseMLO->sampling.NrDirections()); thr_wsum_pdf_direction.resize(baseMLO->mymodel.nr_classes, zeroArray); // sumw_group is a RFLOAT for each group thr_sumw_group.resize(baseMLO->mymodel.nr_groups, 0.); // wsum_pdf_class is a RFLOAT for each class thr_wsum_pdf_class.resize(baseMLO->mymodel.nr_classes, 0.); if (baseMLO->mymodel.ref_dim == 2) { thr_wsum_prior_offsetx_class.resize(baseMLO->mymodel.nr_classes, 0.); thr_wsum_prior_offsety_class.resize(baseMLO->mymodel.nr_classes, 0.); } // wsum_sigma2_offset is just a RFLOAT thr_wsum_sigma2_offset = 0.; unsigned image_size = op.Fimgs[0].nzyxdim; CTOC(cudaMLO->timer,"store_init"); /*======================================================================================= COLLECT 2 AND SET METADATA =======================================================================================*/ CTIC(cudaMLO->timer,"collect_data_2"); int nr_transes = sp.nr_trans*sp.nr_oversampled_trans; int nr_fake_classes = (sp.iclass_max-sp.iclass_min+1); int oversamples = sp.nr_oversampled_trans * sp.nr_oversampled_rot; std::vector block_nums(sp.nr_particles*nr_fake_classes); for (long int ipart = 0; ipart < sp.nr_particles; ipart++) { // Allocate space for all classes, so that we can pre-calculate data for all classes, copy in one operation, call kenrels on all classes, and copy back in one operation CudaGlobalPtr oo_otrans_x(nr_fake_classes*nr_transes, cudaMLO->devBundle->allocator); // old_offset_oversampled_trans_x CudaGlobalPtr oo_otrans_y(nr_fake_classes*nr_transes, cudaMLO->devBundle->allocator); CudaGlobalPtr oo_otrans_z(nr_fake_classes*nr_transes, cudaMLO->devBundle->allocator); CudaGlobalPtr myp_oo_otrans_x2y2z2(nr_fake_classes*nr_transes, cudaMLO->devBundle->allocator); // my_prior_old_offs....x^2*y^2*z^2 myp_oo_otrans_x2y2z2.device_alloc(); int sumBlockNum =0; long int part_id = baseMLO->mydata.ori_particles[op.my_ori_particle].particles_id[ipart]; int group_id = baseMLO->mydata.getGroupId(part_id); CTIC(cudaMLO->timer,"collect_data_2_pre_kernel"); for (int exp_iclass = sp.iclass_min; exp_iclass <= sp.iclass_max; exp_iclass++) { int fake_class = exp_iclass-sp.iclass_min; // if we only have the third class to do, the third class will be the "first" we do, i.e. the "fake" first. if ((baseMLO->mymodel.pdf_class[exp_iclass] == 0.) || (ProjectionData[ipart].class_entries[exp_iclass] == 0) ) continue; // Use the constructed mask to construct a partial class-specific input IndexedDataArray thisClassFinePassWeights(FinePassWeights[ipart],FPCMasks[ipart][exp_iclass], cudaMLO->devBundle->allocator); // Re-define the job-partition of the indexedArray of weights so that the collect-kernel can work with it. block_nums[nr_fake_classes*ipart + fake_class] = makeJobsForCollect(thisClassFinePassWeights, FPCMasks[ipart][exp_iclass], ProjectionData[ipart].orientation_num[exp_iclass]); stagerSWS[ipart].stage(FPCMasks[ipart][exp_iclass].jobOrigin); stagerSWS[ipart].stage(FPCMasks[ipart][exp_iclass].jobExtent); sumBlockNum+=block_nums[nr_fake_classes*ipart + fake_class]; RFLOAT myprior_x, myprior_y, myprior_z; RFLOAT old_offset_x = XX(op.old_offset[ipart]); RFLOAT old_offset_y = YY(op.old_offset[ipart]); RFLOAT old_offset_z; if (baseMLO->mymodel.ref_dim == 2) { myprior_x = XX(baseMLO->mymodel.prior_offset_class[exp_iclass]); myprior_y = YY(baseMLO->mymodel.prior_offset_class[exp_iclass]); } else { myprior_x = XX(op.prior[ipart]); myprior_y = YY(op.prior[ipart]); if (cudaMLO->dataIs3D) { myprior_z = ZZ(op.prior[ipart]); old_offset_z = ZZ(op.old_offset[ipart]); } } /*====================================================== COLLECT 2 ======================================================*/ //Pregenerate oversampled translation objects for kernel-call for (long int itrans = 0, iitrans = 0; itrans < sp.nr_trans; itrans++) { baseMLO->sampling.getTranslations(itrans, baseMLO->adaptive_oversampling, oversampled_translations_x, oversampled_translations_y, oversampled_translations_z, (baseMLO->do_helical_refine) && (! baseMLO->ignore_helical_symmetry), baseMLO->helical_rise_initial / baseMLO->mymodel.pixel_size, baseMLO->helical_twist_initial); for (long int iover_trans = 0; iover_trans < sp.nr_oversampled_trans; iover_trans++, iitrans++) { oo_otrans_x[fake_class*nr_transes+iitrans] = old_offset_x + oversampled_translations_x[iover_trans]; oo_otrans_y[fake_class*nr_transes+iitrans] = old_offset_y + oversampled_translations_y[iover_trans]; if (cudaMLO->dataIs3D) oo_otrans_z[fake_class*nr_transes+iitrans] = old_offset_z + oversampled_translations_z[iover_trans]; // Calculate the vector length of myprior RFLOAT mypriors_len2 = myprior_x * myprior_x + myprior_y * myprior_y; if (cudaMLO->dataIs3D) mypriors_len2 += myprior_z * myprior_z; // If it is doing helical refinement AND Cartesian vector myprior has a length > 0, transform the vector to its helical coordinates if ( (baseMLO->do_helical_refine) && (! baseMLO->ignore_helical_symmetry) && (mypriors_len2 > 0.00001) ) { RFLOAT rot_deg = DIRECT_A2D_ELEM(baseMLO->exp_metadata, op.metadata_offset + ipart, METADATA_ROT); RFLOAT tilt_deg = DIRECT_A2D_ELEM(baseMLO->exp_metadata, op.metadata_offset + ipart, METADATA_TILT); RFLOAT psi_deg = DIRECT_A2D_ELEM(baseMLO->exp_metadata, op.metadata_offset + ipart, METADATA_PSI); transformCartesianAndHelicalCoords(myprior_x, myprior_y, myprior_z, myprior_x, myprior_y, myprior_z, rot_deg, tilt_deg, psi_deg, (cudaMLO->dataIs3D) ? (3) : (2), CART_TO_HELICAL_COORDS); } // TODO: Feb20,2017 - Shaoda does not understand what you are doing here ... ??? // Please check whether the following is compatible with 3D reconstructions of 2D helical segments AND 3D helical subtomograms ??? // Preliminary tests show that the code from Shaoda gave worse reconstructions of VipA/VipB (EMPIAR-10019) // While TMV (EMPIAR-10020) and BtubAB subtomo results are not affected // ========= OLD =========== if ( (! baseMLO->do_helical_refine) || (baseMLO->ignore_helical_symmetry) ) RFLOAT diffx = myprior_x - oo_otrans_x[fake_class*nr_transes+iitrans]; RFLOAT diffx = myprior_x - oo_otrans_x[fake_class*nr_transes+iitrans]; RFLOAT diffy = myprior_y - oo_otrans_y[fake_class*nr_transes+iitrans]; RFLOAT diffz = 0; if (cudaMLO->dataIs3D) diffz = myprior_z - (old_offset_z + oversampled_translations_z[iover_trans]); // ======= SHAODA ========== //RFLOAT diffx = 0.; //if ( (! baseMLO->do_helical_refine) || (baseMLO->ignore_helical_symmetry) || (cudaMLO->dataIs3D) ) // diffx = myprior_x - oo_otrans_x[fake_class*nr_transes+iitrans]; //RFLOAT diffy = myprior_y - oo_otrans_y[fake_class*nr_transes+iitrans]; //RFLOAT diffz = 0; //if (cudaMLO->dataIs3D) //{ // if ( (! baseMLO->do_helical_refine) || (baseMLO->ignore_helical_symmetry) ) // diffz = myprior_z - (old_offset_z + oversampled_translations_z[iover_trans]); //} myp_oo_otrans_x2y2z2[fake_class*nr_transes+iitrans] = diffx*diffx + diffy*diffy + diffz*diffz; } } } stagerSWS[ipart].cp_to_device(); oo_otrans_x.put_on_device(); oo_otrans_y.put_on_device(); oo_otrans_z.put_on_device(); myp_oo_otrans_x2y2z2.cp_to_device(); DEBUG_HANDLE_ERROR(cudaStreamSynchronize(cudaStreamPerThread)); CudaGlobalPtr p_weights(sumBlockNum, cudaMLO->devBundle->allocator); CudaGlobalPtr p_thr_wsum_prior_offsetx_class(sumBlockNum, cudaMLO->devBundle->allocator); CudaGlobalPtr p_thr_wsum_prior_offsety_class(sumBlockNum, cudaMLO->devBundle->allocator); CudaGlobalPtr p_thr_wsum_prior_offsetz_class(sumBlockNum, cudaMLO->devBundle->allocator); CudaGlobalPtr p_thr_wsum_sigma2_offset(sumBlockNum, cudaMLO->devBundle->allocator); p_weights.device_alloc(); p_thr_wsum_prior_offsetx_class.device_alloc(); p_thr_wsum_prior_offsety_class.device_alloc(); if (cudaMLO->dataIs3D) p_thr_wsum_prior_offsetz_class.device_alloc(); else p_thr_wsum_prior_offsetz_class.d_ptr = p_thr_wsum_prior_offsety_class.d_ptr; p_thr_wsum_sigma2_offset.device_alloc(); CTOC(cudaMLO->timer,"collect_data_2_pre_kernel"); int partial_pos=0; for (int exp_iclass = sp.iclass_min; exp_iclass <= sp.iclass_max; exp_iclass++) { int fake_class = exp_iclass-sp.iclass_min; // if we only have the third class to do, the third class will be the "first" we do, i.e. the "fake" first. if ((baseMLO->mymodel.pdf_class[exp_iclass] == 0.) || (ProjectionData[ipart].class_entries[exp_iclass] == 0) ) continue; // Use the constructed mask to construct a partial class-specific input IndexedDataArray thisClassFinePassWeights(FinePassWeights[ipart],FPCMasks[ipart][exp_iclass], cudaMLO->devBundle->allocator); int cpos=fake_class*nr_transes; int block_num = block_nums[nr_fake_classes*ipart + fake_class]; dim3 grid_dim_collect2 = block_num; runCollect2jobs(grid_dim_collect2, &(oo_otrans_x(cpos) ), // otrans-size -> make const &(oo_otrans_y(cpos) ), // otrans-size -> make const &(oo_otrans_z(cpos) ), // otrans-size -> make const &(myp_oo_otrans_x2y2z2(cpos) ), // otrans-size -> make const ~thisClassFinePassWeights.weights, (XFLOAT)op.significant_weight[ipart], (XFLOAT)op.sum_weight[ipart], sp.nr_trans, sp.nr_oversampled_trans, sp.nr_oversampled_rot, oversamples, (baseMLO->do_skip_align || baseMLO->do_skip_rotate ), &p_weights(partial_pos), &p_thr_wsum_prior_offsetx_class(partial_pos), &p_thr_wsum_prior_offsety_class(partial_pos), &p_thr_wsum_prior_offsetz_class(partial_pos), &p_thr_wsum_sigma2_offset(partial_pos), ~thisClassFinePassWeights.rot_idx, ~thisClassFinePassWeights.trans_idx, ~FPCMasks[ipart][exp_iclass].jobOrigin, ~FPCMasks[ipart][exp_iclass].jobExtent, cudaMLO->dataIs3D); LAUNCH_PRIVATE_ERROR(cudaGetLastError(),cudaMLO->errorStatus); partial_pos+=block_num; } CTIC(cudaMLO->timer,"collect_data_2_post_kernel"); p_weights.cp_to_host(); p_thr_wsum_sigma2_offset.cp_to_host(); p_thr_wsum_prior_offsetx_class.cp_to_host(); p_thr_wsum_prior_offsety_class.cp_to_host(); if (cudaMLO->dataIs3D) p_thr_wsum_prior_offsetz_class.cp_to_host(); DEBUG_HANDLE_ERROR(cudaStreamSynchronize(cudaStreamPerThread)); int iorient = 0; partial_pos=0; for (int exp_iclass = sp.iclass_min; exp_iclass <= sp.iclass_max; exp_iclass++) { int fake_class = exp_iclass-sp.iclass_min; // if we only have the third class to do, the third class will be the "first" we do, i.e. the "fake" first. if ((baseMLO->mymodel.pdf_class[exp_iclass] == 0.) || (ProjectionData[ipart].class_entries[exp_iclass] == 0) ) continue; int block_num = block_nums[nr_fake_classes*ipart + fake_class]; for (long int n = partial_pos; n < partial_pos+block_num; n++) { iorient= FinePassWeights[ipart].rot_id[FPCMasks[ipart][exp_iclass].jobOrigin[n-partial_pos]+FPCMasks[ipart][exp_iclass].firstPos]; long int mydir, idir=floor(iorient/sp.nr_psi); if (baseMLO->mymodel.orientational_prior_mode == NOPRIOR) mydir = idir; else mydir = op.pointer_dir_nonzeroprior[idir]; // store partials according to indices of the relevant dimension DIRECT_MULTIDIM_ELEM(thr_wsum_pdf_direction[exp_iclass], mydir) += p_weights[n]; thr_sumw_group[group_id] += p_weights[n]; thr_wsum_pdf_class[exp_iclass] += p_weights[n]; thr_wsum_sigma2_offset += p_thr_wsum_sigma2_offset[n]; if (baseMLO->mymodel.ref_dim == 2) { thr_wsum_prior_offsetx_class[exp_iclass] += p_thr_wsum_prior_offsetx_class[n]; thr_wsum_prior_offsety_class[exp_iclass] += p_thr_wsum_prior_offsety_class[n]; } } partial_pos+=block_num; } // end loop iclass CTOC(cudaMLO->timer,"collect_data_2_post_kernel"); } // end loop ipart /*====================================================== SET METADATA ======================================================*/ std::vector< RFLOAT> oversampled_rot, oversampled_tilt, oversampled_psi; for (long int ipart = 0; ipart < sp.nr_particles; ipart++) { CTIC(cudaMLO->timer,"setMetadata"); // CTIC(cudaMLO->timer,"getArgMaxOnDevice"); // std::pair max_pair = getArgMaxOnDevice(FinePassWeights[ipart].weights); // CTOC(cudaMLO->timer,"getArgMaxOnDevice"); // op.max_index.fineIdx = FinePassWeights[ipart].ihidden_overs[max_pair.first]; // op.max_weight[ipart] = max_pair.second; //std::cerr << "max val = " << op.max_weight[ipart] << std::endl; //std::cerr << "max index = " << max_index.fineIdx << std::endl; if(baseMLO->adaptive_oversampling!=0) op.max_index[ipart].fineIndexToFineIndices(sp); // set partial indices corresponding to the found max_index, to be used below else op.max_index[ipart].coarseIndexToCoarseIndices(sp); baseMLO->sampling.getTranslations(op.max_index[ipart].itrans, baseMLO->adaptive_oversampling, oversampled_translations_x, oversampled_translations_y, oversampled_translations_z, (baseMLO->do_helical_refine) && (! baseMLO->ignore_helical_symmetry), baseMLO->helical_rise_initial / baseMLO->mymodel.pixel_size, baseMLO->helical_twist_initial); //TODO We already have rot, tilt and psi don't calculated them again if(baseMLO->do_skip_align || baseMLO->do_skip_rotate) baseMLO->sampling.getOrientations(sp.idir_min, sp.ipsi_min, baseMLO->adaptive_oversampling, oversampled_rot, oversampled_tilt, oversampled_psi, op.pointer_dir_nonzeroprior, op.directions_prior, op.pointer_psi_nonzeroprior, op.psi_prior); else baseMLO->sampling.getOrientations(op.max_index[ipart].idir, op.max_index[ipart].ipsi, baseMLO->adaptive_oversampling, oversampled_rot, oversampled_tilt, oversampled_psi, op.pointer_dir_nonzeroprior, op.directions_prior, op.pointer_psi_nonzeroprior, op.psi_prior); baseMLO->sampling.getOrientations(op.max_index[ipart].idir, op.max_index[ipart].ipsi, baseMLO->adaptive_oversampling, oversampled_rot, oversampled_tilt, oversampled_psi, op.pointer_dir_nonzeroprior, op.directions_prior, op.pointer_psi_nonzeroprior, op.psi_prior); RFLOAT rot = oversampled_rot[op.max_index[ipart].ioverrot]; RFLOAT tilt = oversampled_tilt[op.max_index[ipart].ioverrot]; RFLOAT psi = oversampled_psi[op.max_index[ipart].ioverrot]; int icol_rot = (baseMLO->mymodel.nr_bodies == 1) ? METADATA_ROT : 0 + METADATA_LINE_LENGTH_BEFORE_BODIES + (ibody) * METADATA_NR_BODY_PARAMS; int icol_tilt = (baseMLO->mymodel.nr_bodies == 1) ? METADATA_TILT : 1 + METADATA_LINE_LENGTH_BEFORE_BODIES + (ibody) * METADATA_NR_BODY_PARAMS; int icol_psi = (baseMLO->mymodel.nr_bodies == 1) ? METADATA_PSI : 2 + METADATA_LINE_LENGTH_BEFORE_BODIES + (ibody) * METADATA_NR_BODY_PARAMS; int icol_xoff = (baseMLO->mymodel.nr_bodies == 1) ? METADATA_XOFF : 3 + METADATA_LINE_LENGTH_BEFORE_BODIES + (ibody) * METADATA_NR_BODY_PARAMS; int icol_yoff = (baseMLO->mymodel.nr_bodies == 1) ? METADATA_YOFF : 4 + METADATA_LINE_LENGTH_BEFORE_BODIES + (ibody) * METADATA_NR_BODY_PARAMS; int icol_zoff = (baseMLO->mymodel.nr_bodies == 1) ? METADATA_ZOFF : 5 + METADATA_LINE_LENGTH_BEFORE_BODIES + (ibody) * METADATA_NR_BODY_PARAMS; RFLOAT old_rot = DIRECT_A2D_ELEM(baseMLO->exp_metadata, op.metadata_offset + ipart, icol_rot); DIRECT_A2D_ELEM(baseMLO->exp_metadata, op.metadata_offset + ipart, icol_rot) = rot; RFLOAT old_tilt = DIRECT_A2D_ELEM(baseMLO->exp_metadata, op.metadata_offset + ipart, icol_tilt); DIRECT_A2D_ELEM(baseMLO->exp_metadata, op.metadata_offset + ipart, icol_tilt) = tilt; RFLOAT old_psi = DIRECT_A2D_ELEM(baseMLO->exp_metadata, op.metadata_offset + ipart, icol_psi); DIRECT_A2D_ELEM(baseMLO->exp_metadata, op.metadata_offset + ipart, icol_psi) = psi; Matrix1D shifts(2); //21may2015 if (baseMLO->mymodel.nr_bodies == 1) { // include old_offsets for normal refinement (i.e. non multi-body) XX(shifts) = XX(op.old_offset[ipart]) + oversampled_translations_x[op.max_index[ipart].iovertrans]; YY(shifts) = YY(op.old_offset[ipart]) + oversampled_translations_y[op.max_index[ipart].iovertrans]; } else { // For multi-body refinements, only store 'residual' translations XX(shifts) = oversampled_translations_x[op.max_index[ipart].iovertrans]; YY(shifts) = oversampled_translations_y[op.max_index[ipart].iovertrans]; } if (cudaMLO->dataIs3D) { shifts.resize(3); if (baseMLO->mymodel.nr_bodies == 1) ZZ(shifts) = ZZ(op.old_offset[ipart]) + oversampled_translations_z[op.max_index[ipart].iovertrans]; else ZZ(shifts) = oversampled_translations_z[op.max_index[ipart].iovertrans]; } // Use oldpsi-angle to rotate back the XX(exp_old_offset[ipart]) + oversampled_translations_x[iover_trans] and if ( (baseMLO->do_helical_refine) && (! baseMLO->ignore_helical_symmetry) ) transformCartesianAndHelicalCoords(shifts, shifts, old_rot, old_tilt, old_psi, HELICAL_TO_CART_COORDS); DIRECT_A2D_ELEM(baseMLO->exp_metadata, op.metadata_offset + ipart, icol_xoff) = XX(shifts); DIRECT_A2D_ELEM(baseMLO->exp_metadata, op.metadata_offset + ipart, icol_yoff) = YY(shifts); if (cudaMLO->dataIs3D) DIRECT_A2D_ELEM(baseMLO->exp_metadata, op.metadata_offset + ipart, icol_zoff) = ZZ(shifts); if (ibody == 0) { DIRECT_A2D_ELEM(baseMLO->exp_metadata, op.metadata_offset + ipart, METADATA_CLASS) = (RFLOAT)op.max_index[ipart].iclass + 1; RFLOAT pmax = op.max_weight[ipart]/op.sum_weight[ipart]; if(pmax>1) //maximum normalised probability weight is (unreasonably) larger than unity CRITICAL("Relion is finding a normalised probability greater than 1"); DIRECT_A2D_ELEM(baseMLO->exp_metadata, op.metadata_offset + ipart, METADATA_PMAX) = pmax; } CTOC(cudaMLO->timer,"setMetadata"); } CTOC(cudaMLO->timer,"collect_data_2"); /*======================================================================================= MAXIMIZATION =======================================================================================*/ CTIC(cudaMLO->timer,"maximization"); for (long int ipart = 0; ipart < sp.nr_particles; ipart++) { long int part_id = baseMLO->mydata.ori_particles[op.my_ori_particle].particles_id[ipart]; int group_id = baseMLO->mydata.getGroupId(part_id); /*====================================================== TRANSLATIONS ======================================================*/ long unsigned translation_num((sp.itrans_max - sp.itrans_min + 1) * sp.nr_oversampled_trans); CudaGlobalPtr trans_x(translation_num, cudaMLO->devBundle->allocator); CudaGlobalPtr trans_y(translation_num, cudaMLO->devBundle->allocator); CudaGlobalPtr trans_z(translation_num, cudaMLO->devBundle->allocator); int j = 0; for (long int itrans = 0; itrans < (sp.itrans_max - sp.itrans_min + 1); itrans++) { baseMLO->sampling.getTranslations(itrans, baseMLO->adaptive_oversampling, oversampled_translations_x, oversampled_translations_y, oversampled_translations_z, (baseMLO->do_helical_refine) && (! baseMLO->ignore_helical_symmetry), baseMLO->helical_rise_initial / baseMLO->mymodel.pixel_size, baseMLO->helical_twist_initial); //TODO Called multiple time to generate same list, reuse the same list for (long int iover_trans = 0; iover_trans < oversampled_translations_x.size(); iover_trans++) { RFLOAT xshift = 0., yshift = 0., zshift = 0.; xshift = oversampled_translations_x[iover_trans]; yshift = oversampled_translations_y[iover_trans]; if (cudaMLO->dataIs3D) zshift = oversampled_translations_z[iover_trans]; if ( (baseMLO->do_helical_refine) && (! baseMLO->ignore_helical_symmetry) ) { RFLOAT rot_deg = DIRECT_A2D_ELEM(baseMLO->exp_metadata, op.metadata_offset + ipart, METADATA_ROT); RFLOAT tilt_deg = DIRECT_A2D_ELEM(baseMLO->exp_metadata, op.metadata_offset + ipart, METADATA_TILT); RFLOAT psi_deg = DIRECT_A2D_ELEM(baseMLO->exp_metadata, op.metadata_offset + ipart, METADATA_PSI); transformCartesianAndHelicalCoords(xshift, yshift, zshift, xshift, yshift, zshift, rot_deg, tilt_deg, psi_deg, (cudaMLO->dataIs3D) ? (3) : (2), HELICAL_TO_CART_COORDS); } trans_x[j] = -2 * PI * xshift / (double)baseMLO->mymodel.ori_size; trans_y[j] = -2 * PI * yshift / (double)baseMLO->mymodel.ori_size; trans_z[j] = -2 * PI * zshift / (double)baseMLO->mymodel.ori_size; j ++; } } trans_x.put_on_device(); trans_y.put_on_device(); trans_z.put_on_device(); /*====================================================== IMAGES ======================================================*/ CUSTOM_ALLOCATOR_REGION_NAME("TRANS_3"); CTIC(cudaMLO->timer,"translation_3"); CudaGlobalPtr Fimgs_real(image_size, cudaMLO->devBundle->allocator); CudaGlobalPtr Fimgs_imag(image_size, cudaMLO->devBundle->allocator); CudaGlobalPtr Fimgs_nomask_real(image_size, cudaMLO->devBundle->allocator); CudaGlobalPtr Fimgs_nomask_imag(image_size, cudaMLO->devBundle->allocator); MultidimArray Fimg, Fimg_nonmask; windowFourierTransform(op.Fimgs[ipart], Fimg, sp.current_image_size); windowFourierTransform(op.Fimgs_nomask[ipart], Fimg_nonmask, sp.current_image_size); for (unsigned i = 0; i < image_size; i ++) { Fimgs_real[i] = Fimg.data[i].real; Fimgs_imag[i] = Fimg.data[i].imag; Fimgs_nomask_real[i] = Fimg_nonmask.data[i].real; Fimgs_nomask_imag[i] = Fimg_nonmask.data[i].imag; } Fimgs_real.put_on_device(); Fimgs_imag.put_on_device(); Fimgs_nomask_real.put_on_device(); Fimgs_nomask_imag.put_on_device(); CTOC(cudaMLO->timer,"translation_3"); /*====================================================== SCALE ======================================================*/ XFLOAT part_scale(1.); if (baseMLO->do_scale_correction) { part_scale = baseMLO->mymodel.scale_correction[group_id]; if (part_scale > 10000.) { std::cerr << " rlnMicrographScaleCorrection= " << part_scale << " group= " << group_id + 1 << std::endl; CRITICAL(ERRHIGHSCALE); } else if (part_scale < 0.001) { if (!have_warned_small_scale) { std::cout << " WARNING: ignoring group " << group_id + 1 << " with very small or negative scale (" << part_scale << "); Use larger groups for more stable scale estimates." << std::endl; have_warned_small_scale = true; } part_scale = 0.001; } } CudaGlobalPtr ctfs(image_size, cudaMLO->devBundle->allocator); if (baseMLO->do_ctf_correction) { for (unsigned i = 0; i < image_size; i++) ctfs[i] = (XFLOAT) op.local_Fctfs[ipart].data[i] * part_scale; } else //TODO should be handled by memset for (unsigned i = 0; i < image_size; i++) ctfs[i] = part_scale; ctfs.put_on_device(); /*====================================================== MINVSIGMA ======================================================*/ CudaGlobalPtr Minvsigma2s(image_size, cudaMLO->devBundle->allocator); if (baseMLO->do_map) for (unsigned i = 0; i < image_size; i++) Minvsigma2s[i] = op.local_Minvsigma2s[ipart].data[i]; else for (unsigned i = 0; i < image_size; i++) Minvsigma2s[i] = 1; Minvsigma2s.put_on_device(); /*====================================================== CLASS LOOP ======================================================*/ CUSTOM_ALLOCATOR_REGION_NAME("wdiff2s"); CudaGlobalPtr wdiff2s_AA(baseMLO->mymodel.nr_classes*image_size, 0, cudaMLO->devBundle->allocator); CudaGlobalPtr wdiff2s_XA(baseMLO->mymodel.nr_classes*image_size, 0, cudaMLO->devBundle->allocator); CudaGlobalPtr wdiff2s_sum(image_size, 0, cudaMLO->devBundle->allocator); wdiff2s_AA.device_alloc(); wdiff2s_AA.device_init(0.f); wdiff2s_XA.device_alloc(); wdiff2s_XA.device_init(0.f); unsigned long AAXA_pos=0; wdiff2s_sum.device_alloc(); wdiff2s_sum.device_init(0.f); CUSTOM_ALLOCATOR_REGION_NAME("BP_data"); // Loop from iclass_min to iclass_max to deal with seed generation in first iteration CudaGlobalPtr sorted_weights(ProjectionData[ipart].orientationNumAllClasses * translation_num, 0, cudaMLO->devBundle->allocator); std::vector > eulers(baseMLO->mymodel.nr_classes, cudaMLO->devBundle->allocator); int classPos = 0; for (int exp_iclass = sp.iclass_min; exp_iclass <= sp.iclass_max; exp_iclass++) DEBUG_HANDLE_ERROR(cudaStreamSynchronize(cudaMLO->classStreams[exp_iclass])); DEBUG_HANDLE_ERROR(cudaStreamSynchronize(cudaStreamPerThread)); for (int exp_iclass = sp.iclass_min; exp_iclass <= sp.iclass_max; exp_iclass++) { if((baseMLO->mymodel.pdf_class[exp_iclass] == 0.) || (ProjectionData[ipart].class_entries[exp_iclass] == 0)) continue; // Use the constructed mask to construct a partial class-specific input IndexedDataArray thisClassFinePassWeights(FinePassWeights[ipart],FPCMasks[ipart][exp_iclass], cudaMLO->devBundle->allocator); CTIC(cudaMLO->timer,"thisClassProjectionSetupCoarse"); // use "slice" constructor with class-specific parameters to retrieve a temporary ProjectionParams with data for this class ProjectionParams thisClassProjectionData( ProjectionData[ipart], ProjectionData[ipart].class_idx[exp_iclass], ProjectionData[ipart].class_idx[exp_iclass]+ProjectionData[ipart].class_entries[exp_iclass]); thisClassProjectionData.orientation_num[0] = ProjectionData[ipart].orientation_num[exp_iclass]; CTOC(cudaMLO->timer,"thisClassProjectionSetupCoarse"); long unsigned orientation_num(thisClassProjectionData.orientation_num[0]); /*====================================================== PROJECTIONS ======================================================*/ eulers[exp_iclass].setSize(orientation_num * 9); eulers[exp_iclass].setStream(cudaMLO->classStreams[exp_iclass]); eulers[exp_iclass].host_alloc(); CTIC(cudaMLO->timer,"generateEulerMatricesProjector"); generateEulerMatrices( baseMLO->mymodel.PPref[exp_iclass].padding_factor, thisClassProjectionData, &eulers[exp_iclass][0], !IS_NOT_INV); eulers[exp_iclass].device_alloc(); eulers[exp_iclass].cp_to_device(); CTOC(cudaMLO->timer,"generateEulerMatricesProjector"); /*====================================================== MAP WEIGHTS ======================================================*/ CTIC(cudaMLO->timer,"pre_wavg_map"); for (long unsigned i = 0; i < orientation_num*translation_num; i++) sorted_weights[classPos+i] = -999.; for (long unsigned i = 0; i < thisClassFinePassWeights.weights.getSize(); i++) sorted_weights[classPos+(thisClassFinePassWeights.rot_idx[i]) * translation_num + thisClassFinePassWeights.trans_idx[i] ] = thisClassFinePassWeights.weights[i]; classPos+=orientation_num*translation_num; CTOC(cudaMLO->timer,"pre_wavg_map"); } sorted_weights.put_on_device(); // These syncs are necessary (for multiple ranks on the same GPU), and (assumed) low-cost. for (int exp_iclass = sp.iclass_min; exp_iclass <= sp.iclass_max; exp_iclass++) DEBUG_HANDLE_ERROR(cudaStreamSynchronize(cudaMLO->classStreams[exp_iclass])); DEBUG_HANDLE_ERROR(cudaStreamSynchronize(cudaStreamPerThread)); classPos = 0; for (int exp_iclass = sp.iclass_min; exp_iclass <= sp.iclass_max; exp_iclass++) { if((baseMLO->mymodel.pdf_class[exp_iclass] == 0.) || (ProjectionData[ipart].class_entries[exp_iclass] == 0)) continue; /*====================================================== KERNEL CALL ======================================================*/ long unsigned orientation_num(ProjectionData[ipart].orientation_num[exp_iclass]); CudaProjectorKernel projKernel = CudaProjectorKernel::makeKernel( cudaMLO->devBundle->cudaProjectors[exp_iclass], op.local_Minvsigma2s[0].xdim, op.local_Minvsigma2s[0].ydim, op.local_Minvsigma2s[0].zdim, op.local_Minvsigma2s[0].xdim-1); runWavgKernel( projKernel, ~eulers[exp_iclass], ~Fimgs_real, ~Fimgs_imag, ~trans_x, ~trans_y, ~trans_z, &sorted_weights.d_ptr[classPos], ~ctfs, ~wdiff2s_sum, &wdiff2s_AA(AAXA_pos), &wdiff2s_XA(AAXA_pos), op, orientation_num, translation_num, image_size, ipart, group_id, exp_iclass, part_scale, baseMLO->refs_are_ctf_corrected, cudaMLO->dataIs3D, cudaMLO->classStreams[exp_iclass]); /*====================================================== BACKPROJECTION ======================================================*/ #ifdef TIMING if (op.my_ori_particle == baseMLO->exp_my_first_ori_particle) baseMLO->timer.tic(baseMLO->TIMING_WSUM_BACKPROJ); #endif CTIC(cudaMLO->timer,"backproject"); runBackProjectKernel( cudaMLO->devBundle->cudaBackprojectors[exp_iclass], projKernel, ~Fimgs_nomask_real, ~Fimgs_nomask_imag, ~trans_x, ~trans_y, ~trans_z, &sorted_weights.d_ptr[classPos], ~Minvsigma2s, ~ctfs, translation_num, (XFLOAT) op.significant_weight[ipart], (XFLOAT) op.sum_weight[ipart], ~eulers[exp_iclass], op.local_Minvsigma2s[0].xdim, op.local_Minvsigma2s[0].ydim, op.local_Minvsigma2s[0].zdim, orientation_num, cudaMLO->dataIs3D, baseMLO->do_sgd, cudaMLO->classStreams[exp_iclass]); CTOC(cudaMLO->timer,"backproject"); #ifdef TIMING if (op.my_ori_particle == baseMLO->exp_my_first_ori_particle) baseMLO->timer.toc(baseMLO->TIMING_WSUM_BACKPROJ); #endif //Update indices AAXA_pos += image_size; classPos += orientation_num*translation_num; } // end loop iclass CUSTOM_ALLOCATOR_REGION_NAME("UNSET"); // NOTE: We've never seen that this sync is necessary, but it is needed in principle, and // its absence in other parts of the code has caused issues. It is also very low-cost. for (int exp_iclass = sp.iclass_min; exp_iclass <= sp.iclass_max; exp_iclass++) DEBUG_HANDLE_ERROR(cudaStreamSynchronize(cudaMLO->classStreams[exp_iclass])); DEBUG_HANDLE_ERROR(cudaStreamSynchronize(cudaStreamPerThread)); wdiff2s_AA.cp_to_host(); wdiff2s_XA.cp_to_host(); wdiff2s_sum.cp_to_host(); DEBUG_HANDLE_ERROR(cudaStreamSynchronize(cudaStreamPerThread)); AAXA_pos=0; for (int exp_iclass = sp.iclass_min; exp_iclass <= sp.iclass_max; exp_iclass++) { if((baseMLO->mymodel.pdf_class[exp_iclass] == 0.) || (ProjectionData[ipart].class_entries[exp_iclass] == 0)) continue; for (long int j = 0; j < image_size; j++) { int ires = DIRECT_MULTIDIM_ELEM(baseMLO->Mresol_fine, j); if (ires > -1 && baseMLO->do_scale_correction && DIRECT_A1D_ELEM(baseMLO->mymodel.data_vs_prior_class[exp_iclass], ires) > 3.) { DIRECT_A1D_ELEM(exp_wsum_scale_correction_AA[ipart], ires) += wdiff2s_AA[AAXA_pos+j]; DIRECT_A1D_ELEM(exp_wsum_scale_correction_XA[ipart], ires) += wdiff2s_XA[AAXA_pos+j]; } } AAXA_pos += image_size; } // end loop iclass for (long int j = 0; j < image_size; j++) { int ires = DIRECT_MULTIDIM_ELEM(baseMLO->Mresol_fine, j); if (ires > -1) { thr_wsum_sigma2_noise[group_id].data[ires] += (RFLOAT) wdiff2s_sum[j]; exp_wsum_norm_correction[ipart] += (RFLOAT) wdiff2s_sum[j]; //TODO could be gpu-reduced } } } // end loop ipart CTOC(cudaMLO->timer,"maximization"); CTIC(cudaMLO->timer,"store_post_gpu"); // Extend norm_correction and sigma2_noise estimation to higher resolutions for all particles // Also calculate dLL for each particle and store in metadata // loop over all particles inside this ori_particle RFLOAT thr_avg_norm_correction = 0.; RFLOAT thr_sum_dLL = 0., thr_sum_Pmax = 0.; for (long int ipart = 0; ipart < sp.nr_particles; ipart++) { long int part_id = baseMLO->mydata.ori_particles[op.my_ori_particle].particles_id[ipart]; int group_id = baseMLO->mydata.getGroupId(part_id); // If the current images were smaller than the original size, fill the rest of wsum_model.sigma2_noise with the power_class spectrum of the images for (int ires = baseMLO->mymodel.current_size/2 + 1; ires < baseMLO->mymodel.ori_size/2 + 1; ires++) { DIRECT_A1D_ELEM(thr_wsum_sigma2_noise[group_id], ires) += DIRECT_A1D_ELEM(op.power_imgs[ipart], ires); // Also extend the weighted sum of the norm_correction exp_wsum_norm_correction[ipart] += DIRECT_A1D_ELEM(op.power_imgs[ipart], ires); } // Store norm_correction // Multiply by old value because the old norm_correction term was already applied to the image if (baseMLO->do_norm_correction) { RFLOAT old_norm_correction = DIRECT_A2D_ELEM(baseMLO->exp_metadata, op.metadata_offset + ipart, METADATA_NORM); old_norm_correction /= baseMLO->mymodel.avg_norm_correction; // The factor two below is because exp_wsum_norm_correctiom is similar to sigma2_noise, which is the variance for the real/imag components // The variance of the total image (on which one normalizes) is twice this value! RFLOAT normcorr = old_norm_correction * sqrt(exp_wsum_norm_correction[ipart] * 2.); thr_avg_norm_correction += normcorr; // Now set the new norm_correction in the relevant position of exp_metadata DIRECT_A2D_ELEM(baseMLO->exp_metadata, op.metadata_offset + ipart, METADATA_NORM) = normcorr; // Print warning for strange norm-correction values if (!((baseMLO->iter == 1 && baseMLO->do_firstiter_cc) || baseMLO->do_always_cc) && DIRECT_A2D_ELEM(baseMLO->exp_metadata, op.metadata_offset + ipart, METADATA_NORM) > 10.) { std::cout << " WARNING: norm_correction= "<< DIRECT_A2D_ELEM(baseMLO->exp_metadata, op.metadata_offset + ipart, METADATA_NORM) << " for particle " << part_id << " in group " << group_id + 1 << "; Are your groups large enough? Or is the reference on the correct greyscale?" << std::endl; } } // Store weighted sums for scale_correction if (baseMLO->do_scale_correction) { // Divide XA by the old scale_correction and AA by the square of that, because was incorporated into Fctf exp_wsum_scale_correction_XA[ipart] /= baseMLO->mymodel.scale_correction[group_id]; exp_wsum_scale_correction_AA[ipart] /= baseMLO->mymodel.scale_correction[group_id] * baseMLO->mymodel.scale_correction[group_id]; thr_wsum_signal_product_spectra[group_id] += exp_wsum_scale_correction_XA[ipart]; thr_wsum_reference_power_spectra[group_id] += exp_wsum_scale_correction_AA[ipart]; } // Calculate DLL for each particle RFLOAT logsigma2 = 0.; FOR_ALL_DIRECT_ELEMENTS_IN_MULTIDIMARRAY(baseMLO->Mresol_fine) { int ires = DIRECT_MULTIDIM_ELEM(baseMLO->Mresol_fine, n); // Note there is no sqrt in the normalisation term because of the 2-dimensionality of the complex-plane // Also exclude origin from logsigma2, as this will not be considered in the P-calculations if (ires > 0) logsigma2 += log( 2. * PI * DIRECT_A1D_ELEM(baseMLO->mymodel.sigma2_noise[group_id], ires)); } RFLOAT dLL; XFLOAT local_norm = (XFLOAT)op.avg_diff2[ipart]; if (local_norm - op.min_diff2[ipart] > 50) local_norm = op.min_diff2[ipart] + 50; if ((baseMLO->iter==1 && baseMLO->do_firstiter_cc) || baseMLO->do_always_cc) dLL = -op.min_diff2[ipart]; else dLL = log(op.sum_weight[ipart]) - local_norm - logsigma2; // Store dLL of each image in the output array, and keep track of total sum DIRECT_A2D_ELEM(baseMLO->exp_metadata, op.metadata_offset + ipart, METADATA_DLL) = dLL; thr_sum_dLL += dLL; // Also store sum of Pmax thr_sum_Pmax += DIRECT_A2D_ELEM(baseMLO->exp_metadata, op.metadata_offset + ipart, METADATA_PMAX); } // Now, inside a global_mutex, update the other weighted sums among all threads if (!baseMLO->do_skip_maximization) { pthread_mutex_lock(&global_mutex); for (int n = 0; n < baseMLO->mymodel.nr_groups; n++) { baseMLO->wsum_model.sigma2_noise[n] += thr_wsum_sigma2_noise[n]; baseMLO->wsum_model.sumw_group[n] += thr_sumw_group[n]; if (baseMLO->do_scale_correction) { baseMLO->wsum_model.wsum_signal_product_spectra[n] += thr_wsum_signal_product_spectra[n]; baseMLO->wsum_model.wsum_reference_power_spectra[n] += thr_wsum_reference_power_spectra[n]; } } for (int n = 0; n < baseMLO->mymodel.nr_classes; n++) { baseMLO->wsum_model.pdf_class[n] += thr_wsum_pdf_class[n]; if (baseMLO->mymodel.ref_dim == 2) { XX(baseMLO->wsum_model.prior_offset_class[n]) += thr_wsum_prior_offsetx_class[n]; YY(baseMLO->wsum_model.prior_offset_class[n]) += thr_wsum_prior_offsety_class[n]; } if (!(baseMLO->do_skip_align || baseMLO->do_skip_rotate) ) baseMLO->wsum_model.pdf_direction[n] += thr_wsum_pdf_direction[n]; } baseMLO->wsum_model.sigma2_offset += thr_wsum_sigma2_offset; if (baseMLO->do_norm_correction) baseMLO->wsum_model.avg_norm_correction += thr_avg_norm_correction; baseMLO->wsum_model.LL += thr_sum_dLL; baseMLO->wsum_model.ave_Pmax += thr_sum_Pmax; pthread_mutex_unlock(&global_mutex); } // end if !do_skip_maximization CTOC(cudaMLO->timer,"store_post_gpu"); #ifdef TIMING if (op.my_ori_particle == baseMLO->exp_my_first_ori_particle) baseMLO->timer.toc(baseMLO->TIMING_ESP_WSUM); #endif } size_t MlDeviceBundle::checkFixedSizedObjects(int shares) { int devCount; size_t BoxLimit; HANDLE_ERROR(cudaGetDeviceCount(&devCount)); if(device_id >= devCount) CRITICAL(ERR_GPUID); HANDLE_ERROR(cudaSetDevice(device_id)); size_t free(0), total(0); DEBUG_HANDLE_ERROR(cudaMemGetInfo( &free, &total )); float margin(1.05); BoxLimit = pow(free/(margin*2.5*sizeof(XFLOAT)*((float)shares)),(1/3.0)) / (2.0); size_t BytesNeeded = ((float)shares)*margin*2.5*sizeof(XFLOAT)*pow((baseMLO->mymodel.ori_size*2),3); return(BoxLimit); } void MlDeviceBundle::setupFixedSizedObjects() { unsigned nr_classes = baseMLO->mymodel.nr_classes; int devCount; HANDLE_ERROR(cudaGetDeviceCount(&devCount)); if(device_id >= devCount) { //std::cerr << " using device_id=" << device_id << " (device no. " << device_id+1 << ") which is higher than the available number of devices=" << devCount << std::endl; CRITICAL(ERR_GPUID); } else HANDLE_ERROR(cudaSetDevice(device_id)); //Can we pre-generate projector plan and corresponding euler matrices for all particles if (baseMLO->do_skip_align || baseMLO->do_skip_rotate || baseMLO->do_auto_refine || baseMLO->mymodel.orientational_prior_mode != NOPRIOR) generateProjectionPlanOnTheFly = true; else generateProjectionPlanOnTheFly = false; // clear() called on std::vector appears to set size=0, even if we have an explicit // destructor for each member, so we need to set the size to what is was before cudaProjectors.resize(nr_classes); cudaBackprojectors.resize(nr_classes); /*====================================================== PROJECTOR AND BACKPROJECTOR ======================================================*/ //Loop over classes for (int iclass = 0; iclass < nr_classes; iclass++) { cudaProjectors[iclass].setMdlDim( baseMLO->mymodel.PPref[iclass].data.xdim, baseMLO->mymodel.PPref[iclass].data.ydim, baseMLO->mymodel.PPref[iclass].data.zdim, baseMLO->mymodel.PPref[iclass].data.yinit, baseMLO->mymodel.PPref[iclass].data.zinit, baseMLO->mymodel.PPref[iclass].r_max, baseMLO->mymodel.PPref[iclass].padding_factor); cudaProjectors[iclass].initMdl(baseMLO->mymodel.PPref[iclass].data.data); cudaBackprojectors[iclass].setMdlDim( baseMLO->wsum_model.BPref[iclass].data.xdim, baseMLO->wsum_model.BPref[iclass].data.ydim, baseMLO->wsum_model.BPref[iclass].data.zdim, baseMLO->wsum_model.BPref[iclass].data.yinit, baseMLO->wsum_model.BPref[iclass].data.zinit, baseMLO->wsum_model.BPref[iclass].r_max, baseMLO->wsum_model.BPref[iclass].padding_factor); cudaBackprojectors[iclass].initMdl(); } /*====================================================== CUSTOM ALLOCATOR ======================================================*/ int memAlignmentSize; cudaDeviceGetAttribute ( &memAlignmentSize, cudaDevAttrTextureAlignment, device_id ); allocator = new CudaCustomAllocator(0, memAlignmentSize); } void MlDeviceBundle::setupTunableSizedObjects(size_t allocationSize) { unsigned nr_classes = baseMLO->mymodel.nr_classes; int devCount; HANDLE_ERROR(cudaGetDeviceCount(&devCount)); if(device_id >= devCount) { //std::cerr << " using device_id=" << device_id << " (device no. " << device_id+1 << ") which is higher than the available number of devices=" << devCount << std::endl; CRITICAL(ERR_GPUID); } else HANDLE_ERROR(cudaSetDevice(device_id)); /*====================================================== CUSTOM ALLOCATOR ======================================================*/ #ifdef DEBUG_CUDA printf("DEBUG: Total GPU allocation size set to %zu MB on device id %d.\n", allocationSize / (1000*1000), device_id); #endif #ifndef CUDA_NO_CUSTOM_ALLOCATION allocator->resize(allocationSize); #endif /*====================================================== PROJECTION PLAN ======================================================*/ coarseProjectionPlans.resize(nr_classes, allocator); for (int iclass = 0; iclass < nr_classes; iclass++) { //If doing predefined projector plan at all and is this class significant if (!generateProjectionPlanOnTheFly && baseMLO->mymodel.pdf_class[iclass] > 0.) { std::vector exp_pointer_dir_nonzeroprior; std::vector exp_pointer_psi_nonzeroprior; std::vector exp_directions_prior; std::vector exp_psi_prior; long unsigned itrans_max = baseMLO->sampling.NrTranslationalSamplings() - 1; long unsigned nr_idir = baseMLO->sampling.NrDirections(0, &exp_pointer_dir_nonzeroprior); long unsigned nr_ipsi = baseMLO->sampling.NrPsiSamplings(0, &exp_pointer_psi_nonzeroprior ); coarseProjectionPlans[iclass].setup( baseMLO->sampling, exp_directions_prior, exp_psi_prior, exp_pointer_dir_nonzeroprior, exp_pointer_psi_nonzeroprior, NULL, //Mcoarse_significant baseMLO->mymodel.pdf_class, baseMLO->mymodel.pdf_direction, nr_idir, nr_ipsi, 0, //idir_min nr_idir - 1, //idir_max 0, //ipsi_min nr_ipsi - 1, //ipsi_max 0, //itrans_min itrans_max, 0, //current_oversampling 1, //nr_oversampled_rot iclass, true, //coarse !IS_NOT_INV, baseMLO->do_skip_align, baseMLO->do_skip_rotate, baseMLO->mymodel.orientational_prior_mode ); } } }; void MlOptimiserCuda::resetData() { int devCount; HANDLE_ERROR(cudaGetDeviceCount(&devCount)); if(device_id >= devCount) { //std::cerr << " using device_id=" << device_id << " (device no. " << device_id+1 << ") which is higher than the available number of devices=" << devCount << std::endl; CRITICAL(ERR_GPUID); } else HANDLE_ERROR(cudaSetDevice(device_id)); unsigned nr_classes = baseMLO->mymodel.nr_classes; classStreams.resize(nr_classes, 0); for (int i = 0; i < nr_classes; i++) HANDLE_ERROR(cudaStreamCreate(&classStreams[i])); //HANDLE_ERROR(cudaStreamCreateWithFlags(&classStreams[i],cudaStreamNonBlocking)); transformer1.clear(); transformer2.clear(); failsafe_attempts = 0; }; void MlOptimiserCuda::doThreadExpectationSomeParticles(int thread_id) { #ifdef TIMING // Only time one thread if (thread_id == 0) baseMLO->timer.tic(baseMLO->TIMING_ESP_THR); #endif // CTOC(cudaMLO->timer,"interParticle"); int devCount; HANDLE_ERROR(cudaGetDeviceCount(&devCount)); if(device_id >= devCount) { //std::cerr << " using device_id=" << device_id << " (device no. " << device_id+1 << ") which is higher than the available number of devices=" << devCount << std::endl; CRITICAL(ERR_GPUID); } else DEBUG_HANDLE_ERROR(cudaSetDevice(device_id)); //std::cerr << " calling on device " << device_id << std::endl; //put mweight allocation here size_t first_ipart = 0, last_ipart = 0; while (baseMLO->exp_ipart_ThreadTaskDistributor->getTasks(first_ipart, last_ipart)) { CTIC(timer,"oneTask"); for (long unsigned ipart = first_ipart; ipart <= last_ipart; ipart++) { CTIC(timer,"oneParticle"); #ifdef TIMING // Only time one thread if (thread_id == 0) baseMLO->timer.tic(baseMLO->TIMING_ESP_DIFF2_A); #endif unsigned my_ori_particle = baseMLO->exp_my_first_ori_particle + ipart; SamplingParameters sp; sp.nr_particles = baseMLO->mydata.ori_particles[my_ori_particle].particles_id.size(); OptimisationParamters op(sp.nr_particles, my_ori_particle); // In the first iteration, multiple seeds will be generated // A single random class is selected for each pool of images, and one does not marginalise over the orientations // The optimal orientation is based on signal-product (rather than the signal-intensity sensitive Gaussian) // If do_firstiter_cc, then first perform a single iteration with K=1 and cross-correlation criteria, afterwards // Decide which classes to integrate over (for random class assignment in 1st iteration) sp.iclass_min = 0; sp.iclass_max = baseMLO->mymodel.nr_classes - 1; // low-pass filter again and generate the seeds if (baseMLO->do_generate_seeds) { if (baseMLO->do_firstiter_cc && baseMLO->iter == 1) { // In first (CC) iter, use a single reference (and CC) sp.iclass_min = sp.iclass_max = 0; } else if ( (baseMLO->do_firstiter_cc && baseMLO->iter == 2) || (!baseMLO->do_firstiter_cc && baseMLO->iter == 1)) { // In second CC iter, or first iter without CC: generate the seeds // Now select a single random class // exp_part_id is already in randomized order (controlled by -seed) // WARNING: USING SAME iclass_min AND iclass_max FOR SomeParticles!! // Make sure random division is always the same with the same seed long int idx = my_ori_particle - baseMLO->exp_my_first_ori_particle; if (idx >= baseMLO->exp_random_class_some_particles.size()) REPORT_ERROR("BUG: expectationOneParticle idx>random_class_some_particles.size()"); sp.iclass_min = sp.iclass_max = baseMLO->exp_random_class_some_particles[idx]; } } // Global exp_metadata array has metadata of all ori_particles. Where does my_ori_particle start? for (long int iori = baseMLO->exp_my_first_ori_particle; iori <= baseMLO->exp_my_last_ori_particle; iori++) { if (iori == my_ori_particle) break; op.metadata_offset += baseMLO->mydata.ori_particles[iori].particles_id.size(); } #ifdef TIMING // Only time one thread if (thread_id == 0) baseMLO->timer.toc(baseMLO->TIMING_ESP_DIFF2_A); #endif CTIC(timer,"getFourierTransformsAndCtfs"); getFourierTransformsAndCtfs(my_ori_particle, op, sp, baseMLO, this); CTOC(timer,"getFourierTransformsAndCtfs"); if (baseMLO->do_realign_movies && baseMLO->movie_frame_running_avg_side > 0) { baseMLO->calculateRunningAveragesOfMovieFrames(my_ori_particle, op.Fimgs, op.power_imgs, op.highres_Xi2_imgs); } // To deal with skipped alignments/rotations if (baseMLO->do_skip_align) { sp.itrans_min = sp.itrans_max = sp.idir_min = sp.idir_max = sp.ipsi_min = sp.ipsi_max = my_ori_particle - baseMLO->exp_my_first_ori_particle; } else { sp.itrans_min = 0; sp.itrans_max = baseMLO->sampling.NrTranslationalSamplings() - 1; if (baseMLO->do_skip_rotate) { sp.idir_min = sp.idir_max = sp.ipsi_min = sp.ipsi_max = my_ori_particle - baseMLO->exp_my_first_ori_particle; } else { sp.idir_min = sp.ipsi_min = 0; sp.idir_max = baseMLO->sampling.NrDirections(0, &op.pointer_dir_nonzeroprior) - 1; sp.ipsi_max = baseMLO->sampling.NrPsiSamplings(0, &op.pointer_psi_nonzeroprior ) - 1; } } // Initialise significant weight to minus one, so that all coarse sampling points will be handled in the first pass op.significant_weight.resize(sp.nr_particles, -1.); // Only perform a second pass when using adaptive oversampling //int nr_sampling_passes = (baseMLO->adaptive_oversampling > 0) ? 2 : 1; // But on the gpu the data-structures are different between passes, so we need to make a symbolic pass to set the weights up for storeWS int nr_sampling_passes = 2; /// -- This is a iframe-indexed vector, each entry of which is a dense data-array. These are replacements to using // Mweight in the sparse (Fine-sampled) pass, coarse is unused but created empty input for convert ( FIXME ) std::vector CoarsePassWeights(1, devBundle->allocator) ,FinePassWeights(sp.nr_particles, devBundle->allocator); // -- This is a iframe-indexed vector, each entry of which is a class-indexed vector of masks, one for each // class in FinePassWeights std::vector < std::vector > FinePassClassMasks(sp.nr_particles, std::vector (baseMLO->mymodel.nr_classes, devBundle->allocator)); // -- This is a iframe-indexed vector, each entry of which is parameters used in the projection-operations *after* the // coarse pass, declared here to keep scope to storeWS std::vector < ProjectionParams > FineProjectionData(sp.nr_particles, baseMLO->mymodel.nr_classes); std::vector < cudaStager > stagerD2(sp.nr_particles,devBundle->allocator), stagerSWS(sp.nr_particles,devBundle->allocator); for (int ipass = 0; ipass < nr_sampling_passes; ipass++) { CTIC(timer,"weightPass"); #ifdef TIMING // Only time one thread if (thread_id == 0) baseMLO->timer.tic(baseMLO->TIMING_ESP_DIFF2_B); #endif if (baseMLO->strict_highres_exp > 0.) // Use smaller images in both passes and keep a maximum on coarse_size, just like in FREALIGN sp.current_image_size = baseMLO->coarse_size; else if (baseMLO->adaptive_oversampling > 0) // Use smaller images in the first pass, larger ones in the second pass sp.current_image_size = (ipass == 0) ? baseMLO->coarse_size : baseMLO->mymodel.current_size; else sp.current_image_size = baseMLO->mymodel.current_size; // Use coarse sampling in the first pass, oversampled one the second pass sp.current_oversampling = (ipass == 0) ? 0 : baseMLO->adaptive_oversampling; sp.nr_dir = (baseMLO->do_skip_align || baseMLO->do_skip_rotate) ? 1 : baseMLO->sampling.NrDirections(0, &op.pointer_dir_nonzeroprior); sp.nr_psi = (baseMLO->do_skip_align || baseMLO->do_skip_rotate) ? 1 : baseMLO->sampling.NrPsiSamplings(0, &op.pointer_psi_nonzeroprior); sp.nr_trans = (baseMLO->do_skip_align) ? 1 : baseMLO->sampling.NrTranslationalSamplings(); sp.nr_oversampled_rot = baseMLO->sampling.oversamplingFactorOrientations(sp.current_oversampling); sp.nr_oversampled_trans = baseMLO->sampling.oversamplingFactorTranslations(sp.current_oversampling); #ifdef TIMING // Only time one thread if (thread_id == 0) baseMLO->timer.toc(baseMLO->TIMING_ESP_DIFF2_B); #endif op.min_diff2.resize(sp.nr_particles, 0); op.avg_diff2.resize(sp.nr_particles, 0); if (ipass == 0) { unsigned long weightsPerPart(baseMLO->mymodel.nr_classes * sp.nr_dir * sp.nr_psi * sp.nr_trans * sp.nr_oversampled_rot * sp.nr_oversampled_trans); op.Mweight.resizeNoCp(1,1,sp.nr_particles, weightsPerPart); CudaGlobalPtr Mweight(devBundle->allocator); Mweight.setSize(sp.nr_particles * weightsPerPart); Mweight.setHstPtr(op.Mweight.data); Mweight.device_alloc(); deviceInitValue(Mweight, -999.); Mweight.streamSync(); CTIC(timer,"getAllSquaredDifferencesCoarse"); getAllSquaredDifferencesCoarse(ipass, op, sp, baseMLO, this, Mweight); CTOC(timer,"getAllSquaredDifferencesCoarse"); try { CTIC(timer,"convertAllSquaredDifferencesToWeightsCoarse"); convertAllSquaredDifferencesToWeights(ipass, op, sp, baseMLO, this, CoarsePassWeights, FinePassClassMasks, Mweight); CTOC(timer,"convertAllSquaredDifferencesToWeightsCoarse"); } catch (RelionError XE) { getAllSquaredDifferencesCoarse(ipass, op, sp, baseMLO, this, Mweight); #ifndef CUDA_DOUBLE_PRECISION try { convertAllSquaredDifferencesToWeights(ipass, op, sp, baseMLO, this, CoarsePassWeights, FinePassClassMasks, Mweight); } catch (RelionError XE) #endif { if (failsafe_attempts > baseMLO->failsafe_threshold) CRITICAL(ERRNUMFAILSAFE); //Rerun in fail-safe mode convertAllSquaredDifferencesToWeights(ipass, op, sp, baseMLO, this, CoarsePassWeights, FinePassClassMasks, Mweight, true); if (failsafe_attempts <= 10) { std::cerr << std::endl << "WARNING: Exception (" << XE.msg << ") handled by switching to fail-safe mode." << std::endl; if (failsafe_attempts == 10) std::cerr << "NOTE: No more fail-safe warnings will be issued." << std::endl; } failsafe_attempts ++; } } } else { #ifdef TIMING // Only time one thread if (thread_id == 0) baseMLO->timer.tic(baseMLO->TIMING_ESP_DIFF2_D); #endif // // -- go through all classes and generate projectionsetups for all classes - to be used in getASDF and storeWS below -- // // the reason to do this globally is subtle - we want the orientation_num of all classes to estimate a largest possible // // weight-array, which would be insanely much larger than necessary if we had to assume the worst. for (long int iframe = 0; iframe < sp.nr_particles; iframe++) { FineProjectionData[iframe].orientationNumAllClasses = 0; for (int exp_iclass = sp.iclass_min; exp_iclass <= sp.iclass_max; exp_iclass++) { if(exp_iclass>0) FineProjectionData[iframe].class_idx[exp_iclass] = FineProjectionData[iframe].rots.size(); FineProjectionData[iframe].class_entries[exp_iclass] = 0; CTIC(timer,"generateProjectionSetup"); FineProjectionData[iframe].orientationNumAllClasses += generateProjectionSetupFine( op, sp, baseMLO, exp_iclass, FineProjectionData[iframe]); CTOC(timer,"generateProjectionSetup"); } //set a maximum possible size for all weights (to be reduced by significance-checks) FinePassWeights[iframe].setDataSize(FineProjectionData[iframe].orientationNumAllClasses*sp.nr_trans*sp.nr_oversampled_trans); FinePassWeights[iframe].dual_alloc_all(); stagerD2[iframe].size= 2*(FineProjectionData[iframe].orientationNumAllClasses*sp.nr_trans*sp.nr_oversampled_trans); stagerD2[iframe].prepare(); } #ifdef TIMING // Only time one thread if (thread_id == 0) baseMLO->timer.toc(baseMLO->TIMING_ESP_DIFF2_D); #endif // printf("Allocator used space before 'getAllSquaredDifferencesFine': %.2f MiB\n", (float)devBundle->allocator->getTotalUsedSpace()/(1024.*1024.)); CTIC(timer,"getAllSquaredDifferencesFine"); getAllSquaredDifferencesFine(ipass, op, sp, baseMLO, this, FinePassWeights, FinePassClassMasks, FineProjectionData, stagerD2); CTOC(timer,"getAllSquaredDifferencesFine"); FinePassWeights[0].weights.cp_to_host(); CudaGlobalPtr Mweight(devBundle->allocator); //DUMMY CTIC(timer,"convertAllSquaredDifferencesToWeightsFine"); convertAllSquaredDifferencesToWeights(ipass, op, sp, baseMLO, this, FinePassWeights, FinePassClassMasks, Mweight); CTOC(timer,"convertAllSquaredDifferencesToWeightsFine"); } CTOC(timer,"weightPass"); } #ifdef TIMING // Only time one thread if (thread_id == 0) baseMLO->timer.tic(baseMLO->TIMING_ESP_DIFF2_E); #endif // For the reconstruction step use mymodel.current_size! sp.current_image_size = baseMLO->mymodel.current_size; for (long int iframe = 0; iframe < sp.nr_particles; iframe++) { stagerSWS[iframe].size= 2*(FineProjectionData[iframe].orientationNumAllClasses); stagerSWS[iframe].prepare(); } #ifdef TIMING // Only time one thread if (thread_id == 0) baseMLO->timer.toc(baseMLO->TIMING_ESP_DIFF2_E); #endif CTIC(timer,"storeWeightedSums"); storeWeightedSums(op, sp, baseMLO, this, FinePassWeights, FineProjectionData, FinePassClassMasks, stagerSWS); CTOC(timer,"storeWeightedSums"); CTOC(timer,"oneParticle"); } CTOC(timer,"oneTask"); } // CTIC(cudaMLO->timer,"interParticle"); // exit(0); #ifdef TIMING // Only time one thread if (thread_id == 0) baseMLO->timer.toc(baseMLO->TIMING_ESP_THR); #endif } relion-3.1.3/src/gpu_utils/cuda_ml_optimiser.h000066400000000000000000000336701411340063500214660ustar00rootroot00000000000000#ifndef CUDA_ML_OPTIMISER_H_ #define CUDA_ML_OPTIMISER_H_ #include "src/mpi.h" #include "src/ml_optimiser.h" #include "src/gpu_utils/cuda_mem_utils.h" #include "src/gpu_utils/cuda_projector_plan.h" #include "src/gpu_utils/cuda_projector.h" #include "src/gpu_utils/cuda_backprojector.h" #include "src/gpu_utils/cuda_fft.h" #include "src/gpu_utils/cuda_benchmark_utils.h" #include //#include #ifdef CUDA_DOUBLE_PRECISION #define XFLOAT double #else #define XFLOAT float #endif //#ifdef DEBUG_CUDA //#define HANDLE_CUFFT_ERROR( err ) (CufftHandleError( err, __FILE__, __LINE__ )) //#else //#define HANDLE_CUFFT_ERROR( err ) (err) //Do nothing //#endif //static void CufftHandleError( cufftResult err, const char *file, int line ) //{ // if (err != CUFFT_SUCCESS) // { // fprintf(stderr, "Cufft error in file '%s' in line %i : %s.\n", // __FILE__, __LINE__, "error" ); // raise(SIGSEGV); // } //} class SamplingParameters { public: unsigned long nr_dir, nr_psi, nr_trans, nr_oversampled_rot, nr_oversampled_trans, nr_particles, current_oversampling, current_image_size, iclass_min, iclass_max, idir_min, idir_max, ipsi_min, ipsi_max, itrans_min, itrans_max; std::string current_img; SamplingParameters(): nr_dir(0), nr_psi(0), nr_trans(0), nr_oversampled_rot(0), nr_oversampled_trans(0), nr_particles(0), current_oversampling(0), current_image_size(0), iclass_min(0), iclass_max(0), idir_min(0), idir_max(0), ipsi_min(0), ipsi_max(0), itrans_min(0), itrans_max(0), current_img() {}; }; class Indices { public: int fineIdx, coarseIdx, iclass, idir, ipsi, itrans, ioverrot, iovertrans; Indices(): fineIdx(0), coarseIdx(0), iclass(0), idir(0), ipsi(0), itrans(0), ioverrot(0), iovertrans(0) {}; void fineIndexToFineIndices(SamplingParameters sp) // converts an "ihidden_over" (finely sampled) index to partial indices (and coarse index) { int oversamples = sp.nr_oversampled_rot*sp.nr_oversampled_trans; int t_idx = fineIdx; iclass = floor( t_idx / ( sp.nr_dir * sp.nr_psi * sp.nr_trans * oversamples )); t_idx -= iclass * ( sp.nr_dir * sp.nr_psi * sp.nr_trans * oversamples ); idir = floor( t_idx / ( sp.nr_psi * sp.nr_trans * oversamples )); t_idx -= idir * ( sp.nr_psi * sp.nr_trans * oversamples ); ipsi = floor( t_idx / ( sp.nr_trans * oversamples )); t_idx -= ipsi * ( sp.nr_trans * oversamples ); itrans = floor( t_idx / oversamples ); t_idx -= itrans * oversamples ; ioverrot = floor( t_idx / sp.nr_oversampled_trans ); t_idx -= ioverrot * sp.nr_oversampled_trans ; iovertrans = t_idx ; coarseIdx = sp.nr_trans * sp.nr_psi * idir + sp.nr_trans * ipsi + itrans; } void fineIndicesToFineIndex(SamplingParameters sp) // converts partial indices to an "ihidden_over" (finely sampled) index // FIXME Untested { int oversamples = sp.nr_oversampled_rot*sp.nr_oversampled_trans; int idx = 0; idx += iclass * sp.nr_dir * sp.nr_psi * sp.nr_trans * oversamples; idx += idir * sp.nr_psi * sp.nr_trans * oversamples; idx += ipsi * sp.nr_trans * oversamples; idx += itrans * oversamples; idx += ioverrot * sp.nr_oversampled_trans; idx += iovertrans; fineIdx = idx; } void coarseIndexToCoarseIndices(SamplingParameters sp) // converts an "ihidden" (coarsely sampled) index to coarse partial indices // FIXME Untested { int t_idx = coarseIdx; iclass = floor( t_idx / ( sp.nr_dir * sp.nr_psi * sp.nr_trans)); t_idx -= iclass * ( sp.nr_dir * sp.nr_psi * sp.nr_trans); idir = floor( t_idx / ( sp.nr_psi * sp.nr_trans )); t_idx -= idir * ( sp.nr_psi * sp.nr_trans ); ipsi = floor( t_idx / ( sp.nr_trans )); t_idx -= ipsi * ( sp.nr_trans ); itrans = t_idx ; ioverrot = 0; iovertrans = 0; } void coarseIndicesToCoarseIndex(SamplingParameters sp) // converts coarse partial indices to an "ihidden" (coarsely sampled) index // FIXME Untested { int idx = 0; idx += idir * sp.nr_psi * sp.nr_trans; idx += ipsi * sp.nr_trans; idx += itrans; coarseIdx = idx; } }; class OptimisationParamters { public: unsigned metadata_offset; unsigned long my_ori_particle; std::vector > Fimgs, Fimgs_nomask, local_Fimgs_shifted, local_Fimgs_shifted_nomask; std::vector > Fctfs, local_Fctfs, local_Minvsigma2s; std::vector pointer_dir_nonzeroprior, pointer_psi_nonzeroprior; std::vector directions_prior, psi_prior, local_sqrtXi2; std::vector highres_Xi2_imgs, min_diff2, avg_diff2; MultidimArray Mcoarse_significant; // And from storeWeightedSums std::vector sum_weight, significant_weight, max_weight; std::vector > old_offset, prior; std::vector > power_imgs; MultidimArray Mweight; std::vector max_index; OptimisationParamters (unsigned nr_particles, unsigned long my_ori_particle): metadata_offset(0), my_ori_particle(my_ori_particle) { power_imgs.resize(nr_particles); highres_Xi2_imgs.resize(nr_particles); Fimgs.resize(nr_particles); Fimgs_nomask.resize(nr_particles); Fctfs.resize(nr_particles); old_offset.resize(nr_particles); prior.resize(nr_particles); max_index.resize(nr_particles); }; }; class IndexedDataArrayMask { public: // indexes of job partition // every element in jobOrigin is a reference to point to a position in a IndexedDataArray.weights array where that job starts RELATIVE to firstPos // every element in jobExtent specifies the number of weights for that job CudaGlobalPtr jobOrigin, jobExtent; size_t firstPos, lastPos; // positions in indexedDataArray data and index arrays to slice out size_t weightNum, jobNum; // number of weights and jobs this class inline IndexedDataArrayMask(CudaCustomAllocator *allocator): jobOrigin(allocator), jobExtent(allocator), firstPos(), lastPos(), weightNum(), jobNum() {}; public: void setNumberOfJobs(size_t newSize) { jobNum=newSize; jobOrigin.setSize(newSize); jobExtent.setSize(newSize); } void setNumberOfWeights(size_t newSize) { weightNum=newSize; } inline ~IndexedDataArrayMask() { // jobOrigin.free_host(); // jobExtent.free_host(); }; }; class IndexedDataArray { public: //actual data CudaGlobalPtr weights; // indexes with same length as data // -- basic indices --------------------------------- // rot_id = id of rot = which of all POSSIBLE orientations this weight signifies // rot_idx = index of rot = which in the sequence of the determined significant orientations this weight signifies // trans_id = id of trans = which of all POSSIBLE translations this weight signifies // -- special indices --------------------------------- // ihidden_overs = mapping to MWeight-based indexing for compatibility CudaGlobalPtr rot_id, rot_idx, trans_idx, ihidden_overs; inline IndexedDataArray(CudaCustomAllocator *allocator): weights(allocator), rot_id(allocator), rot_idx(allocator), trans_idx(allocator), ihidden_overs(allocator) {}; // constructor which takes a parent IndexedDataArray and a mask to create a child inline IndexedDataArray(IndexedDataArray &parent, IndexedDataArrayMask &mask, CudaCustomAllocator *allocator): weights( &(parent.weights.h_ptr[mask.firstPos]) ,&(parent.weights.d_ptr[mask.firstPos]) ,mask.weightNum, allocator), rot_id( &(parent.rot_id.h_ptr[mask.firstPos]) ,&(parent.rot_id.d_ptr[mask.firstPos]) ,mask.weightNum, allocator), rot_idx( &(parent.rot_idx.h_ptr[mask.firstPos]) ,&(parent.rot_idx.d_ptr[mask.firstPos]) ,mask.weightNum, allocator), trans_idx( &(parent.trans_idx.h_ptr[mask.firstPos]) ,&(parent.trans_idx.d_ptr[mask.firstPos]) ,mask.weightNum, allocator), ihidden_overs( &(parent.ihidden_overs.h_ptr[mask.firstPos]),&(parent.ihidden_overs.d_ptr[mask.firstPos]) ,mask.weightNum, allocator) { weights.d_do_free=false; rot_id.d_do_free=false; rot_idx.d_do_free=false; trans_idx.d_do_free=false; ihidden_overs.d_do_free=false; weights.h_do_free=false; rot_id.h_do_free=false; rot_idx.h_do_free=false; trans_idx.h_do_free=false; ihidden_overs.h_do_free=false; }; public: void setDataSize(size_t newSize) { weights.setSize(newSize); rot_id.setSize(newSize); rot_idx.setSize(newSize); trans_idx.setSize(newSize); ihidden_overs.setSize(newSize); } void resize_host_all(size_t newSize) { weights.resize_host(newSize); rot_id.resize_host(newSize); rot_idx.resize_host(newSize); trans_idx.resize_host(newSize); ihidden_overs.resize_host(newSize); } void host_alloc_all() { weights.host_alloc(); rot_id.host_alloc(); rot_idx.host_alloc(); trans_idx.host_alloc(); ihidden_overs.host_alloc(); } void device_alloc_all() { weights.device_alloc(); rot_id.device_alloc(); rot_idx.device_alloc(); trans_idx.device_alloc(); ihidden_overs.device_alloc(); } void dual_alloc_all() { host_alloc_all(); device_alloc_all(); } }; class ProjectionParams { public: std::vector< size_t > orientation_num; // the number of significant orientation for each class size_t orientationNumAllClasses; // sum of the above std::vector< RFLOAT > rots, tilts, psis; std::vector< size_t > iorientclasses, iover_rots; // These are arrays which detial the number of entries in each class, and where each class starts. // NOTE: There is no information about which class each class_idx refers to, there is only // a distinction between different classes. std::vector< size_t > class_entries, class_idx; inline ProjectionParams(): rots(), tilts(), psis(), iorientclasses(), iover_rots(), class_entries(), class_idx(), orientation_num(), orientationNumAllClasses(0) {}; inline ProjectionParams(size_t classes): rots(), tilts(), psis(), iorientclasses(), iover_rots(), class_entries(classes), class_idx(classes), orientation_num(classes), orientationNumAllClasses(0) { class_idx[0]=0; class_entries[0]=0; }; // constructor that slices out a part of a parent ProjectionParams, assumed to contain a single (partial or entire) class inline ProjectionParams(ProjectionParams &parent, size_t start, size_t end): rots( parent.rots.begin() +start, parent.rots.begin() +end), tilts( parent.tilts.begin() +start, parent.tilts.begin() +end), psis( parent.psis.begin() +start, parent.psis.begin() +end), iorientclasses( parent.iorientclasses.begin() +start, parent.iorientclasses.begin() +end), iover_rots( parent.iover_rots.begin() +start, parent.iover_rots.begin() +end), orientation_num(1), orientationNumAllClasses(0), class_entries(1,end-start), class_idx(1,0) // NOTE: this is NOT the class, but rather where in these partial PrjParams to start, which is @ 0. {}; public: // Appends new values into the projection parameters for later use. // class_idx is used as such: // the n:th class (beginning with 0:th) // begins @ element class_idx[n] // ends @ element class_idx[n]+class_entries[n] void pushBackAll(size_t iclass, RFLOAT NEWrot,RFLOAT NEWtilt ,RFLOAT NEWpsi, size_t NEWiorientclasses,size_t NEWiover_rots) { // incremement the counter for this class class_entries[iclass]++; // and push a new entry rots.push_back(NEWrot); tilts.push_back(NEWtilt); psis.push_back(NEWpsi); iorientclasses.push_back(NEWiorientclasses); iover_rots.push_back(NEWiover_rots); } }; /* * Bundle of device-objects */ class MlDeviceBundle { public: //The CUDA accelerated projector set std::vector< CudaProjector > cudaProjectors; //The CUDA accelerated back-projector set std::vector< CudaBackprojector > cudaBackprojectors; //Used for precalculations of projection setup CudaCustomAllocator *allocator; //Used for precalculations of projection setup bool generateProjectionPlanOnTheFly; std::vector< CudaProjectorPlan > coarseProjectionPlans; MlOptimiser *baseMLO; int device_id; int rank_shared_count; bool haveWarnedRefinementMem; MlDeviceBundle(MlOptimiser *baseMLOptimiser): baseMLO(baseMLOptimiser), generateProjectionPlanOnTheFly(false), rank_shared_count(1), device_id(-1), haveWarnedRefinementMem(false), allocator(NULL) {}; void setDevice(int did) { device_id = did; } size_t checkFixedSizedObjects(int shares); void setupFixedSizedObjects(); void setupTunableSizedObjects(size_t allocationSize); void syncAllBackprojects() { DEBUG_HANDLE_ERROR(cudaDeviceSynchronize()); } ~MlDeviceBundle() { cudaProjectors.clear(); cudaBackprojectors.clear(); coarseProjectionPlans.clear(); //Delete this lastly delete allocator; HANDLE_ERROR(cudaSetDevice(device_id)); HANDLE_ERROR(cudaDeviceReset()); } }; class MlOptimiserCuda { public: // transformer as holder for reuse of fftw_plans FourierTransformer transformer; //Class streams ( for concurrent scheduling of class-specific kernels) std::vector< cudaStream_t > classStreams; cudaError_t errorStatus; CudaFFT transformer1; CudaFFT transformer2; MlOptimiser *baseMLO; bool refIs3D; bool dataIs3D; int device_id; unsigned failsafe_attempts; MlDeviceBundle *devBundle; #ifdef TIMING_FILES relion_timer timer; #endif MlOptimiserCuda(MlOptimiser *baseMLOptimiser, MlDeviceBundle* bundle, const char * timing_fnm) : baseMLO(baseMLOptimiser), transformer1(cudaStreamPerThread, bundle->allocator, baseMLOptimiser->mymodel.data_dim), transformer2(cudaStreamPerThread, bundle->allocator, baseMLOptimiser->mymodel.data_dim), refIs3D(baseMLO->mymodel.ref_dim == 3), dataIs3D(baseMLO->mymodel.data_dim == 3), devBundle(bundle), device_id(bundle->device_id), #ifdef TIMING_FILES timer(timing_fnm), #endif errorStatus((cudaError_t)0), failsafe_attempts(0) {}; void resetData(); void doThreadExpectationSomeParticles(int thread_id); ~MlOptimiserCuda() { for (int i = 0; i < classStreams.size(); i++) if (classStreams[i] != NULL) HANDLE_ERROR(cudaStreamDestroy(classStreams[i])); } }; #endif relion-3.1.3/src/gpu_utils/cuda_projector.cu000066400000000000000000000212061411340063500211420ustar00rootroot00000000000000#include "src/gpu_utils/cuda_projector.h" #include bool CudaProjector::setMdlDim( int xdim, int ydim, int zdim, int inity, int initz, int maxr, int paddingFactor) { if(zdim == 1) zdim = 0; if (xdim == mdlX && ydim == mdlY && zdim == mdlZ && inity == mdlInitY && initz == mdlInitZ && maxr == mdlMaxR && paddingFactor == padding_factor) return false; clear(); mdlX = xdim; mdlY = ydim; mdlZ = zdim; if(zdim == 0) mdlXYZ = xdim*ydim; else mdlXYZ = xdim*ydim*zdim; mdlInitY = inity; mdlInitZ = initz; mdlMaxR = maxr; padding_factor = paddingFactor; #ifndef CUDA_NO_TEXTURES #if(COMPLEXTEXTURE) mdlComplex = new cudaTextureObject_t(); // create channel to describe data type (bits,bits,bits,bits,type) cudaChannelFormatDesc desc; desc = cudaCreateChannelDesc(32, 32, 0, 0, cudaChannelFormatKindFloat); struct cudaResourceDesc resDesc_complex; struct cudaTextureDesc texDesc; // -- Zero all data in objects handlers memset(&resDesc_complex, 0, sizeof(cudaResourceDesc)); memset(&texDesc, 0, sizeof(cudaTextureDesc)); if(mdlZ!=0) // 3D model { texArrayComplex = new cudaArray_t(); // -- make extents for automatic pitch:ing (aligment) of allocated 3D arrays cudaExtent volumeSize = make_cudaExtent(mdlX, mdlY, mdlZ); // -- Allocate and copy data using very celver CUDA memcpy-functions HANDLE_ERROR(cudaMalloc3DArray(texArrayComplex, &desc, volumeSize)); // -- Descriptors of the channel(s) in the texture(s) resDesc_complex.res.array.array = *texArrayComplex; resDesc_complex.resType = cudaResourceTypeArray; } else // 2D model { HANDLE_ERROR(cudaMallocPitch(&texArrayComplex2D, &pitch2D, sizeof(CUDACOMPLEX)*mdlX,mdlY)); // -- Descriptors of the channel(s) in the texture(s) resDesc_complex.resType = cudaResourceTypePitch2D; resDesc_complex.res.pitch2D.devPtr = texArrayComplex2D; resDesc_complex.res.pitch2D.pitchInBytes = pitch2D; resDesc_complex.res.pitch2D.width = mdlX; resDesc_complex.res.pitch2D.height = mdlY; resDesc_complex.res.pitch2D.desc = desc; } // -- Decriptors of the texture(s) and methods used for reading it(them) -- texDesc.filterMode = cudaFilterModeLinear; texDesc.readMode = cudaReadModeElementType; texDesc.normalizedCoords = false; for(int n=0; n<3; n++) texDesc.addressMode[n]=cudaAddressModeClamp; // -- Create texture object(s) HANDLE_ERROR(cudaCreateTextureObject(mdlComplex, &resDesc_complex, &texDesc, NULL)); #else mdlReal = new cudaTextureObject_t(); mdlImag = new cudaTextureObject_t(); // create channel to describe data type (bits,bits,bits,bits,type) cudaChannelFormatDesc desc; desc = cudaCreateChannelDesc(32, 0, 0, 0, cudaChannelFormatKindFloat); struct cudaResourceDesc resDesc_real, resDesc_imag; struct cudaTextureDesc texDesc; // -- Zero all data in objects handlers memset(&resDesc_real, 0, sizeof(cudaResourceDesc)); memset(&resDesc_imag, 0, sizeof(cudaResourceDesc)); memset(&texDesc, 0, sizeof(cudaTextureDesc)); if(mdlZ!=0) // 3D model { texArrayReal = new cudaArray_t(); texArrayImag = new cudaArray_t(); // -- make extents for automatic pitch:ing (aligment) of allocated 3D arrays cudaExtent volumeSize = make_cudaExtent(mdlX, mdlY, mdlZ); // -- Allocate and copy data using very clever CUDA memcpy-functions HANDLE_ERROR(cudaMalloc3DArray(texArrayReal, &desc, volumeSize)); HANDLE_ERROR(cudaMalloc3DArray(texArrayImag, &desc, volumeSize)); // -- Descriptors of the channel(s) in the texture(s) resDesc_real.res.array.array = *texArrayReal; resDesc_imag.res.array.array = *texArrayImag; resDesc_real.resType = cudaResourceTypeArray; resDesc_imag.resType = cudaResourceTypeArray; } else // 2D model { HANDLE_ERROR(cudaMallocPitch(&texArrayReal2D, &pitch2D, sizeof(XFLOAT)*mdlX,mdlY)); HANDLE_ERROR(cudaMallocPitch(&texArrayImag2D, &pitch2D, sizeof(XFLOAT)*mdlX,mdlY)); // -- Descriptors of the channel(s) in the texture(s) resDesc_real.resType = cudaResourceTypePitch2D; resDesc_real.res.pitch2D.devPtr = texArrayReal2D; resDesc_real.res.pitch2D.pitchInBytes = pitch2D; resDesc_real.res.pitch2D.width = mdlX; resDesc_real.res.pitch2D.height = mdlY; resDesc_real.res.pitch2D.desc = desc; // ------------------------------------------------- resDesc_imag.resType = cudaResourceTypePitch2D; resDesc_imag.res.pitch2D.devPtr = texArrayImag2D; resDesc_imag.res.pitch2D.pitchInBytes = pitch2D; resDesc_imag.res.pitch2D.width = mdlX; resDesc_imag.res.pitch2D.height = mdlY; resDesc_imag.res.pitch2D.desc = desc; } // -- Decriptors of the texture(s) and methods used for reading it(them) -- texDesc.filterMode = cudaFilterModeLinear; texDesc.readMode = cudaReadModeElementType; texDesc.normalizedCoords = false; for(int n=0; n<3; n++) texDesc.addressMode[n]=cudaAddressModeClamp; // -- Create texture object(s) HANDLE_ERROR(cudaCreateTextureObject(mdlReal, &resDesc_real, &texDesc, NULL)); HANDLE_ERROR(cudaCreateTextureObject(mdlImag, &resDesc_imag, &texDesc, NULL)); #endif #else DEBUG_HANDLE_ERROR(cudaMalloc( (void**) &mdlReal, mdlXYZ * sizeof(XFLOAT))); DEBUG_HANDLE_ERROR(cudaMalloc( (void**) &mdlImag, mdlXYZ * sizeof(XFLOAT))); #endif return true; } #if(!COMPLEXTEXTURE) void CudaProjector::initMdl(XFLOAT *real, XFLOAT *imag) { #ifdef CUDA_DEBUG if (mdlXYZ == 0) { printf("DEBUG_ERROR: Model dimensions must be set with setMdlDim before call to setMdlData."); CRITICAL(ERR_MDLDIM); } if (mdlReal != 0) { printf("DEBUG_ERROR: Duplicated call to setMdlData."); CRITICAL(ERR_MDLSET); } #endif #ifndef CUDA_NO_TEXTURES if(mdlZ!=0) // 3D model { // -- make extents for automatic pitching (aligment) of allocated 3D arrays cudaMemcpy3DParms copyParams = {0}; copyParams.extent = make_cudaExtent(mdlX, mdlY, mdlZ); copyParams.kind = cudaMemcpyHostToDevice; // -- Copy data copyParams.dstArray = *texArrayReal; copyParams.srcPtr = make_cudaPitchedPtr(real, mdlX * sizeof(XFLOAT), mdlY, mdlZ); DEBUG_HANDLE_ERROR(cudaMemcpy3D(©Params)); copyParams.dstArray = *texArrayImag; copyParams.srcPtr = make_cudaPitchedPtr(imag, mdlX * sizeof(XFLOAT), mdlY, mdlZ); DEBUG_HANDLE_ERROR(cudaMemcpy3D(©Params)); } else // 2D model { DEBUG_HANDLE_ERROR(cudaMemcpy2D(texArrayReal2D, pitch2D, real, sizeof(XFLOAT) * mdlX, sizeof(XFLOAT) * mdlX, mdlY, cudaMemcpyHostToDevice)); DEBUG_HANDLE_ERROR(cudaMemcpy2D(texArrayImag2D, pitch2D, imag, sizeof(XFLOAT) * mdlX, sizeof(XFLOAT) * mdlX, mdlY, cudaMemcpyHostToDevice)); } #else DEBUG_HANDLE_ERROR(cudaMemcpy( mdlReal, real, mdlXYZ * sizeof(XFLOAT), cudaMemcpyHostToDevice)); DEBUG_HANDLE_ERROR(cudaMemcpy( mdlImag, imag, mdlXYZ * sizeof(XFLOAT), cudaMemcpyHostToDevice)); #endif } #endif void CudaProjector::initMdl(Complex *data) { #if(COMPLEXTEXTURE) if(mdlZ!=0) // 3D model { // -- make extents for automatic pitching (aligment) of allocated 3D arrays cudaMemcpy3DParms copyParams = {0}; copyParams.extent = make_cudaExtent(mdlX, mdlY, mdlZ); copyParams.kind = cudaMemcpyHostToDevice; // -- Copy data copyParams.dstArray = *texArrayComplex; copyParams.srcPtr = make_cudaPitchedPtr(data, mdlX * sizeof(CUDACOMPLEX), mdlY, mdlZ); DEBUG_HANDLE_ERROR(cudaMemcpy3D(©Params)); } else // 2D model { DEBUG_HANDLE_ERROR(cudaMemcpy2D(texArrayComplex2D, pitch2D, data, sizeof(CUDACOMPLEX) * mdlX, sizeof(CUDACOMPLEX) * mdlX, mdlY, cudaMemcpyHostToDevice)); } #else XFLOAT *tmpReal = new XFLOAT[mdlXYZ]; XFLOAT *tmpImag = new XFLOAT[mdlXYZ]; for (unsigned long i = 0; i < mdlXYZ; i ++) { tmpReal[i] = (XFLOAT) data[i].real; tmpImag[i] = (XFLOAT) data[i].imag; } initMdl(tmpReal, tmpImag); delete [] tmpReal; delete [] tmpImag; #endif } #if(COMPLEXTEXTURE) void CudaProjector::clear() { if (mdlComplex != 0) { cudaDestroyTextureObject(*mdlComplex); delete mdlComplex; if(mdlZ!=0) //3D case { cudaFreeArray(*texArrayComplex); delete texArrayComplex; } else //2D case cudaFree(texArrayComplex2D); texArrayComplex= 0; mdlComplex = 0; } } #else void CudaProjector::clear() { mdlX = 0; mdlY = 0; mdlZ = 0; mdlXYZ = 0; mdlInitY = 0; mdlInitZ = 0; mdlMaxR = 0; padding_factor = 0; allocaton_size = 0; if (mdlReal != 0) { #ifndef CUDA_NO_TEXTURES cudaDestroyTextureObject(*mdlReal); cudaDestroyTextureObject(*mdlImag); delete mdlReal; delete mdlImag; if(mdlZ!=0) //3D case { cudaFreeArray(*texArrayReal); cudaFreeArray(*texArrayImag); delete texArrayReal; delete texArrayImag; } else //2D case { HANDLE_ERROR(cudaFree(texArrayReal2D)); HANDLE_ERROR(cudaFree(texArrayImag2D)); } texArrayReal = 0; texArrayImag = 0; #else cudaFree(mdlReal); cudaFree(mdlImag); #endif mdlReal = 0; mdlImag = 0; } } #endif relion-3.1.3/src/gpu_utils/cuda_projector.cuh000066400000000000000000000171041411340063500213140ustar00rootroot00000000000000#ifndef CUDA_PROJECTOR_CUH_ #define CUDA_PROJECTOR_CUH_ #include #include "src/gpu_utils/cuda_projector.h" #include "src/gpu_utils/cuda_device_utils.cuh" #ifndef CUDA_NO_TEXTURES #define PROJECTOR_PTR_TYPE cudaTextureObject_t #else #define PROJECTOR_PTR_TYPE XFLOAT * #endif class CudaProjectorKernel { public: int mdlX, mdlXY, mdlZ, imgX, imgY, imgZ, mdlInitY, mdlInitZ, padding_factor, maxR, maxR2; PROJECTOR_PTR_TYPE mdlReal; PROJECTOR_PTR_TYPE mdlImag; PROJECTOR_PTR_TYPE mdlComplex; CudaProjectorKernel( int mdlX, int mdlY, int mdlZ, int imgX, int imgY, int imgZ, int mdlInitY, int mdlInitZ, int padding_factor, int maxR, PROJECTOR_PTR_TYPE mdlComplex ): mdlX(mdlX), mdlXY(mdlX*mdlY), mdlZ(mdlZ), imgX(imgX), imgY(imgY), imgZ(imgZ), mdlInitY(mdlInitY), mdlInitZ(mdlInitZ), padding_factor(padding_factor), maxR(maxR), maxR2(maxR*maxR), mdlComplex(mdlComplex) {}; CudaProjectorKernel( int mdlX, int mdlY, int mdlZ, int imgX, int imgY, int imgZ, int mdlInitY, int mdlInitZ, int padding_factor, int maxR, PROJECTOR_PTR_TYPE mdlReal, PROJECTOR_PTR_TYPE mdlImag ): mdlX(mdlX), mdlXY(mdlX*mdlY), mdlZ(mdlZ), imgX(imgX), imgY(imgY), imgZ(imgZ), mdlInitY(mdlInitY), mdlInitZ(mdlInitZ), padding_factor(padding_factor), maxR(maxR), maxR2(maxR*maxR), mdlReal(mdlReal), mdlImag(mdlImag) {}; __device__ __forceinline__ void project3Dmodel( int x, int y, int z, XFLOAT e0, XFLOAT e1, XFLOAT e2, XFLOAT e3, XFLOAT e4, XFLOAT e5, XFLOAT e6, XFLOAT e7, XFLOAT e8, XFLOAT &real, XFLOAT &imag) { int r2; r2 = x*x + y*y + z*z; if (r2 <= maxR2) { XFLOAT xp = (e0 * x + e1 * y + e2 * z ) * padding_factor; XFLOAT yp = (e3 * x + e4 * y + e5 * z ) * padding_factor; XFLOAT zp = (e6 * x + e7 * y + e8 * z ) * padding_factor; #ifdef CUDA_NO_TEXTURES if (xp < 0) { // Get complex conjugated hermitian symmetry pair xp = -xp; yp = -yp; zp = -zp; real = no_tex3D(mdlReal, xp, yp, zp, mdlX, mdlXY, mdlInitY, mdlInitZ); imag = - no_tex3D(mdlImag, xp, yp, zp, mdlX, mdlXY, mdlInitY, mdlInitZ); } else { real = no_tex3D(mdlReal, xp, yp, zp, mdlX, mdlXY, mdlInitY, mdlInitZ); imag = no_tex3D(mdlImag, xp, yp, zp, mdlX, mdlXY, mdlInitY, mdlInitZ); } #else #if(!COMPLEXTEXTURE) if (xp < 0) { // Get complex conjugated hermitian symmetry pair xp = -xp; yp = -yp; zp = -zp; yp -= mdlInitY; zp -= mdlInitZ; real = tex3D(mdlReal, xp + (XFLOAT)0.5, yp + (XFLOAT)0.5, zp + (XFLOAT)0.5); imag = - tex3D(mdlImag, xp + (XFLOAT)0.5, yp + (XFLOAT)0.5, zp + (XFLOAT)0.5); } else { yp -= mdlInitY; zp -= mdlInitZ; real = tex3D(mdlReal, xp + (XFLOAT)0.5, yp + (XFLOAT)0.5, zp + (XFLOAT)0.5); imag = tex3D(mdlImag, xp + (XFLOAT)0.5, yp + (XFLOAT)0.5, zp + (XFLOAT)0.5); } #else CUDACOMPLEX val; if (xp < 0) { // Get complex conjugated hermitian symmetry pair xp = -xp; yp = -yp; zp = -zp; yp -= mdlInitY; zp -= mdlInitZ; val = tex3D(mdlComplex, xp + (XFLOAT)0.5, yp + (XFLOAT)0.5, zp + (XFLOAT)0.5); val.y = -val.y; } else { yp -= mdlInitY; zp -= mdlInitZ; val = tex3D(mdlComplex, xp + (XFLOAT)0.5, yp + (XFLOAT)0.5, zp + (XFLOAT)0.5); } real=val.x; imag=val.y; #endif #endif } else { real = 0.0f; imag = 0.0f; } } __device__ __forceinline__ void project3Dmodel( int x, int y, XFLOAT e0, XFLOAT e1, XFLOAT e3, XFLOAT e4, XFLOAT e6, XFLOAT e7, XFLOAT &real, XFLOAT &imag) { int r2; r2 = x*x + y*y; if (r2 <= maxR2) { XFLOAT xp = (e0 * x + e1 * y ) * padding_factor; XFLOAT yp = (e3 * x + e4 * y ) * padding_factor; XFLOAT zp = (e6 * x + e7 * y ) * padding_factor; #ifdef CUDA_NO_TEXTURES if (xp < 0) { // Get complex conjugated hermitian symmetry pair xp = -xp; yp = -yp; zp = -zp; real = no_tex3D(mdlReal, xp, yp, zp, mdlX, mdlXY, mdlInitY, mdlInitZ); imag = - no_tex3D(mdlImag, xp, yp, zp, mdlX, mdlXY, mdlInitY, mdlInitZ); } else { real = no_tex3D(mdlReal, xp, yp, zp, mdlX, mdlXY, mdlInitY, mdlInitZ); imag = no_tex3D(mdlImag, xp, yp, zp, mdlX, mdlXY, mdlInitY, mdlInitZ); } #else #if(!COMPLEXTEXTURE) if (xp < 0) { // Get complex conjugated hermitian symmetry pair xp = -xp; yp = -yp; zp = -zp; yp -= mdlInitY; zp -= mdlInitZ; real = tex3D(mdlReal, xp + (XFLOAT)0.5, yp + (XFLOAT)0.5, zp + (XFLOAT)0.5); imag = - tex3D(mdlImag, xp + (XFLOAT)0.5, yp + (XFLOAT)0.5, zp + (XFLOAT)0.5); } else { yp -= mdlInitY; zp -= mdlInitZ; real = tex3D(mdlReal, xp + (XFLOAT)0.5, yp + (XFLOAT)0.5, zp + (XFLOAT)0.5); imag = tex3D(mdlImag, xp + (XFLOAT)0.5, yp + (XFLOAT)0.5, zp + (XFLOAT)0.5); } #else CUDACOMPLEX val; if (xp < 0) { // Get complex conjugated hermitian symmetry pair xp = -xp; yp = -yp; zp = -zp; yp -= mdlInitY; zp -= mdlInitZ; val = tex3D(mdlComplex, xp + (XFLOAT)0.5, yp + (XFLOAT)0.5, zp + (XFLOAT)0.5); val.y = -val.y; } else { yp -= mdlInitY; zp -= mdlInitZ; val = tex3D(mdlComplex, xp + (XFLOAT)0.5, yp + (XFLOAT)0.5, zp + (XFLOAT)0.5); } real=val.x; imag=val.y; #endif #endif } else { real = 0.0f; imag = 0.0f; } } __device__ __forceinline__ void project2Dmodel( int x, int y, XFLOAT e0, XFLOAT e1, XFLOAT e3, XFLOAT e4, XFLOAT &real, XFLOAT &imag) { int r2; r2 = x*x + y*y; if (r2 <= maxR2) { XFLOAT xp = (e0 * x + e1 * y ) * padding_factor; XFLOAT yp = (e3 * x + e4 * y ) * padding_factor; #ifdef CUDA_NO_TEXTURES if (xp < 0) { // Get complex conjugated hermitian symmetry pair xp = -xp; yp = -yp; real = no_tex2D(mdlReal, xp, yp, mdlX, mdlInitY); imag = - no_tex2D(mdlImag, xp, yp, mdlX, mdlInitY); } else { real = no_tex2D(mdlReal, xp, yp, mdlX, mdlInitY); imag = no_tex2D(mdlImag, xp, yp, mdlX, mdlInitY); } #else #if(!COMPLEXTEXTURE) if (xp < 0) { // Get complex conjugated hermitian symmetry pair xp = -xp; yp = -yp; yp -= mdlInitY; real = tex2D(mdlReal, xp + (XFLOAT)0.5, yp + (XFLOAT)0.5); imag = - tex2D(mdlImag, xp + (XFLOAT)0.5, yp + (XFLOAT)0.5); } else { yp -= mdlInitY; real = tex2D(mdlReal, xp + (XFLOAT)0.5, yp + (XFLOAT)0.5); imag = tex2D(mdlImag, xp + (XFLOAT)0.5, yp + (XFLOAT)0.5); } #else CUDACOMPLEX val; if (xp < 0) { // Get complex conjugated hermitian symmetry pair xp = -xp; yp = -yp; yp -= mdlInitY; val = tex2D(mdlComplex, xp + (XFLOAT)0.5, yp + (XFLOAT)0.5); val.y = -val.y; } else { yp -= mdlInitY; val = tex2D(mdlComplex, xp + (XFLOAT)0.5, yp + (XFLOAT)0.5); } real=val.x; imag=val.y; #endif #endif } else { real=(XFLOAT)0; imag=(XFLOAT)0; } } static CudaProjectorKernel makeKernel(CudaProjector &p, int imgX, int imgY, int imgZ, int imgMaxR) { int maxR = p.mdlMaxR >= imgMaxR ? imgMaxR : p.mdlMaxR; CudaProjectorKernel k( p.mdlX, p.mdlY, p.mdlZ, imgX, imgY, imgZ, p.mdlInitY, p.mdlInitZ, p.padding_factor, maxR, #if(COMPLEXTEXTURE) *p.mdlComplex #else #ifndef CUDA_NO_TEXTURES *p.mdlReal, *p.mdlImag #else p.mdlReal, p.mdlImag #endif #endif ); return k; } }; #endif relion-3.1.3/src/gpu_utils/cuda_projector.h000066400000000000000000000025711411340063500207660ustar00rootroot00000000000000#ifndef CUDA_PROJECTOR_H_ #define CUDA_PROJECTOR_H_ #include "src/complex.h" #include "src/gpu_utils/cuda_settings.h" #include "src/gpu_utils/cuda_mem_utils.h" #include class CudaProjector { friend class CudaProjectorKernel; int mdlX, mdlY, mdlZ, mdlXYZ, mdlMaxR, mdlInitY, mdlInitZ, padding_factor; size_t allocaton_size; #ifndef CUDA_NO_TEXTURES #if(COMPLEXTEXTURE) XFLOAT *texArrayComplex2D; cudaArray_t *texArrayComplex; cudaTextureObject_t *mdlComplex; #else XFLOAT *texArrayReal2D, *texArrayImag2D; cudaArray_t *texArrayReal, *texArrayImag; cudaTextureObject_t *mdlReal, *mdlImag; #endif size_t pitch2D; #else XFLOAT *mdlReal, *mdlImag; #endif public: CudaProjector(): mdlX(0), mdlY(0), mdlZ(0), mdlXYZ(0), mdlMaxR(0), mdlInitY(0), mdlInitZ(0), padding_factor(0), allocaton_size(0) { #ifndef CUDA_NO_TEXTURES #if(COMPLEXTEXTURE) texArrayComplex2D = 0; texArrayComplex = 0; mdlComplex = 0; #else texArrayReal2D = 0; texArrayImag2D = 0; texArrayReal = 0; texArrayImag = 0; mdlReal = 0; mdlImag = 0; #endif pitch2D = 0; #else mdlReal = 0; mdlImag = 0; #endif } bool setMdlDim( int xdim, int ydim, int zdim, int inity, int initz, int maxr, int paddingFactor); void initMdl(XFLOAT *real, XFLOAT *imag); void initMdl(Complex *data); void clear(); ~CudaProjector() { clear(); }; }; #endif relion-3.1.3/src/gpu_utils/cuda_projector_plan.cu000066400000000000000000000306371411340063500221640ustar00rootroot00000000000000#include "src/gpu_utils/cuda_projector_plan.h" #include "src/time.h" #include //#define PP_TIMING #ifdef PP_TIMING Timer timer; int TIMING_TOP = timer.setNew("setup"); int TIMING_SAMPLING = timer.setNew(" sampling"); int TIMING_PRIOR = timer.setNew(" prior"); int TIMING_PROC_CALC = timer.setNew(" procCalc"); int TIMING_PROC = timer.setNew(" proc"); int TIMING_GEN = timer.setNew(" genOri"); int TIMING_PERTURB = timer.setNew(" perturb"); int TIMING_EULERS = timer.setNew(" eulers"); #define TIMING_TIC(id) timer.tic(id) #define TIMING_TOC(id) timer.toc(id) #else #define TIMING_TIC(id) #define TIMING_TOC(id) #endif void getOrientations(HealpixSampling &sampling, long int idir, long int ipsi, int oversampling_order, std::vector &my_rot, std::vector &my_tilt, std::vector &my_psi, std::vector &pointer_dir_nonzeroprior, std::vector &directions_prior, std::vector &pointer_psi_nonzeroprior, std::vector &psi_prior) { my_rot.clear(); my_tilt.clear(); my_psi.clear(); long int my_idir, my_ipsi; if (sampling.orientational_prior_mode == NOPRIOR) { my_idir = idir; my_ipsi = ipsi; } else { my_idir = pointer_dir_nonzeroprior[idir]; my_ipsi = pointer_psi_nonzeroprior[ipsi]; } if (oversampling_order == 0) { my_rot.push_back(sampling.rot_angles[my_idir]); my_tilt.push_back(sampling.tilt_angles[my_idir]); my_psi.push_back(sampling.psi_angles[my_ipsi]); } else if (!sampling.is_3D) { // for 2D sampling, only push back oversampled psi rotations sampling.pushbackOversampledPsiAngles(my_ipsi, oversampling_order, 0., 0., my_rot, my_tilt, my_psi); } else { // Set up oversampled grid for 3D sampling Healpix_Base HealPixOver(oversampling_order + sampling.healpix_order, NEST); int fact = HealPixOver.Nside()/sampling.healpix_base.Nside(); int x, y, face; RFLOAT rot, tilt; // Get x, y and face for the original, coarse grid long int ipix = sampling.directions_ipix[my_idir]; sampling.healpix_base.nest2xyf(ipix, x, y, face); // Loop over the oversampled Healpix pixels on the fine grid for (int j = fact * y; j < fact * (y+1); ++j) { for (int i = fact * x; i < fact * (x+1); ++i) { long int overpix = HealPixOver.xyf2nest(i, j, face); // this one always has to be double (also for SINGLE_PRECISION CALCULATIONS) for call to external library double zz, phi; HealPixOver.pix2ang_z_phi(overpix, zz, phi); rot = RAD2DEG(phi); tilt = ACOSD(zz); // The geometrical considerations about the symmetry below require that rot = [-180,180] and tilt [0,180] sampling.checkDirection(rot, tilt); sampling.pushbackOversampledPsiAngles(my_ipsi, oversampling_order, rot, tilt, my_rot, my_tilt, my_psi); } } } } template __global__ void cuda_kernel_make_eulers_2D( XFLOAT *alphas, XFLOAT *eulers, unsigned orientation_num) { unsigned oid = blockIdx.x * BLOCK_SIZE + threadIdx.x; //Orientation id if (oid >= orientation_num) return; XFLOAT ca, sa; XFLOAT a = alphas[oid] * (XFLOAT)PI / (XFLOAT)180.0; #ifdef CUDA_DOUBLE_PRECISION sincos(a, &sa, &ca); #else sincosf(a, &sa, &ca); #endif if(!invert) { eulers[9 * oid + 0] = ca;//00 eulers[9 * oid + 1] = sa;//01 eulers[9 * oid + 2] = 0 ;//02 eulers[9 * oid + 3] =-sa;//10 eulers[9 * oid + 4] = ca;//11 eulers[9 * oid + 5] = 0 ;//12 eulers[9 * oid + 6] = 0 ;//20 eulers[9 * oid + 7] = 0 ;//21 eulers[9 * oid + 8] = 1 ;//22 } else { eulers[9 * oid + 0] = ca;//00 eulers[9 * oid + 1] =-sa;//10 eulers[9 * oid + 2] = 0 ;//20 eulers[9 * oid + 3] = sa;//01 eulers[9 * oid + 4] = ca;//11 eulers[9 * oid + 5] = 0 ;//21 eulers[9 * oid + 6] = 0 ;//02 eulers[9 * oid + 7] = 0 ;//12 eulers[9 * oid + 8] = 1 ;//22 } } template __global__ void cuda_kernel_make_eulers_3D( XFLOAT *alphas, XFLOAT *betas, XFLOAT *gammas, XFLOAT *eulers, unsigned orientation_num, XFLOAT *R) { XFLOAT a(0.f),b(0.f),g(0.f), A[9],B[9]; XFLOAT ca, sa, cb, sb, cg, sg, cc, cs, sc, ss; unsigned oid = blockIdx.x * BLOCK_SIZE + threadIdx.x; //Orientation id if (oid >= orientation_num) return; for (int i = 0; i < 9; i ++) B[i] = (XFLOAT) 0.f; a = alphas[oid] * (XFLOAT)PI / (XFLOAT)180.0; b = betas[oid] * (XFLOAT)PI / (XFLOAT)180.0; g = gammas[oid] * (XFLOAT)PI / (XFLOAT)180.0; #ifdef CUDA_DOUBLE_PRECISION sincos(a, &sa, &ca); sincos(b, &sb, &cb); sincos(g, &sg, &cg); #else sincosf(a, &sa, &ca); sincosf(b, &sb, &cb); sincosf(g, &sg, &cg); #endif cc = cb * ca; cs = cb * sa; sc = sb * ca; ss = sb * sa; A[0] = ( cg * cc - sg * sa);//00 A[1] = ( cg * cs + sg * ca);//01 A[2] = (-cg * sb ) ;//02 A[3] = (-sg * cc - cg * sa);//10 A[4] = (-sg * cs + cg * ca);//11 A[5] = ( sg * sb ) ;//12 A[6] = ( sc ) ;//20 A[7] = ( ss ) ;//21 A[8] = ( cb ) ;//22 if (perturb) for (int i = 0; i < 3; i++) for (int j = 0; j < 3; j++) for (int k = 0; k < 3; k++) B[i * 3 + j] += A[i * 3 + k] * R[k * 3 + j]; else for (int i = 0; i < 9; i++) B[i] = A[i]; if(invert) { eulers[9 * oid + 0] = B[0];//00 eulers[9 * oid + 1] = B[3];//01 eulers[9 * oid + 2] = B[6];//02 eulers[9 * oid + 3] = B[1];//10 eulers[9 * oid + 4] = B[4];//11 eulers[9 * oid + 5] = B[7];//12 eulers[9 * oid + 6] = B[2];//20 eulers[9 * oid + 7] = B[5];//21 eulers[9 * oid + 8] = B[8];//22 } else { eulers[9 * oid + 0] = B[0];//00 eulers[9 * oid + 1] = B[1];//10 eulers[9 * oid + 2] = B[2];//20 eulers[9 * oid + 3] = B[3];//01 eulers[9 * oid + 4] = B[4];//11 eulers[9 * oid + 5] = B[5];//21 eulers[9 * oid + 6] = B[6];//02 eulers[9 * oid + 7] = B[7];//12 eulers[9 * oid + 8] = B[8];//22 } } void CudaProjectorPlan::setup( HealpixSampling &sampling, std::vector &directions_prior, std::vector &psi_prior, std::vector &pointer_dir_nonzeroprior, std::vector &pointer_psi_nonzeroprior, MultidimArray *Mcoarse_significant, std::vector &pdf_class, std::vector > &pdf_direction, unsigned long nr_dir, unsigned long nr_psi, unsigned long idir_min, unsigned long idir_max, unsigned long ipsi_min, unsigned long ipsi_max, unsigned long itrans_min, unsigned long itrans_max, unsigned long current_oversampling, unsigned long nr_oversampled_rot, unsigned iclass, bool coarse, bool inverseMatrix, bool do_skip_align, bool do_skip_rotate, int orientational_prior_mode) { TIMING_TIC(TIMING_TOP); std::vector< RFLOAT > oversampled_rot, oversampled_tilt, oversampled_psi; CudaGlobalPtr alphas(nr_dir * nr_psi * nr_oversampled_rot * 9, eulers.getAllocator()); CudaGlobalPtr betas (nr_dir * nr_psi * nr_oversampled_rot * 9, eulers.getAllocator()); CudaGlobalPtr gammas(nr_dir * nr_psi * nr_oversampled_rot * 9, eulers.getAllocator()); CudaGlobalPtr perturb(9, eulers.getAllocator()); eulers.free_if_set(); eulers.setSize(nr_dir * nr_psi * nr_oversampled_rot * 9); eulers.host_alloc(); iorientclasses.free_if_set(); iorientclasses.setSize(nr_dir * nr_psi * nr_oversampled_rot); iorientclasses.host_alloc(); orientation_num = 0; Matrix2D R(3,3); RFLOAT myperturb(0.); if (ABS(sampling.random_perturbation) > 0.) { myperturb = sampling.random_perturbation * sampling.getAngularSampling(); if (sampling.is_3D) { Euler_angles2matrix(myperturb, myperturb, myperturb, R); for (int i = 0; i < 9; i ++) perturb[i] = (XFLOAT) R.mdata[i]; perturb.put_on_device(); } } TIMING_TIC(TIMING_SAMPLING); for (long int idir = idir_min, iorient = 0; idir <= idir_max; idir++) { for (long int ipsi = ipsi_min, ipart = 0; ipsi <= ipsi_max; ipsi++, iorient++) { long int iorientclass = iclass * nr_dir * nr_psi + iorient; TIMING_TIC(TIMING_PRIOR); // Get prior for this direction and skip calculation if prior==0 RFLOAT pdf_orientation; if (do_skip_align || do_skip_rotate) { pdf_orientation = pdf_class[iclass]; } else if (orientational_prior_mode == NOPRIOR) { pdf_orientation = DIRECT_MULTIDIM_ELEM(pdf_direction[iclass], idir); } else { pdf_orientation = directions_prior[idir] * psi_prior[ipsi]; } TIMING_TOC(TIMING_PRIOR); // In the first pass, always proceed // In the second pass, check whether one of the translations for this orientation of any of the particles had a significant weight in the first pass // if so, proceed with projecting the reference in that direction bool do_proceed(false); TIMING_TIC(TIMING_PROC_CALC); if (coarse && pdf_orientation > 0.) do_proceed = true; else if (pdf_orientation > 0.) { long int nr_trans = itrans_max - itrans_min + 1; for (long int ipart = 0; ipart < YSIZE(*Mcoarse_significant); ipart++) { long int ihidden = iorient * nr_trans; for (long int itrans = itrans_min; itrans <= itrans_max; itrans++, ihidden++) { if (DIRECT_A2D_ELEM(*Mcoarse_significant, ipart, ihidden)) { do_proceed = true; break; } } } } TIMING_TOC(TIMING_PROC_CALC); TIMING_TIC(TIMING_PROC); if (do_proceed) { // Now get the oversampled (rot, tilt, psi) triplets // This will be only the original (rot,tilt,psi) triplet in the first pass (sp.current_oversampling==0) TIMING_TIC(TIMING_GEN); getOrientations(sampling, idir, ipsi, current_oversampling, oversampled_rot, oversampled_tilt, oversampled_psi, pointer_dir_nonzeroprior, directions_prior, pointer_psi_nonzeroprior, psi_prior); TIMING_TOC(TIMING_GEN); // Loop over all oversampled orientations (only a single one in the first pass) for (long int iover_rot = 0; iover_rot < nr_oversampled_rot; iover_rot++, ipart++) { if (sampling.is_3D) { alphas[orientation_num] = oversampled_rot[iover_rot]; betas[orientation_num] = oversampled_tilt[iover_rot]; gammas[orientation_num] = oversampled_psi[iover_rot]; } else { alphas[orientation_num] = oversampled_psi[iover_rot] + myperturb; } iorientclasses[orientation_num] = iorientclass; orientation_num ++; } } TIMING_TOC(TIMING_PROC); } } TIMING_TOC(TIMING_SAMPLING); iorientclasses.setSize(orientation_num); iorientclasses.put_on_device(); eulers.setSize(orientation_num * 9); eulers.device_alloc(); alphas.setSize(orientation_num); alphas.put_on_device(); if(sampling.is_3D) { betas.setSize(orientation_num); betas.put_on_device(); gammas.setSize(orientation_num); gammas.put_on_device(); } int grid_size = ceil((float)orientation_num/(float)BLOCK_SIZE); if(inverseMatrix) if(sampling.is_3D) if (ABS(sampling.random_perturbation) > 0.) cuda_kernel_make_eulers_3D<<>>( ~alphas, ~betas, ~gammas, ~eulers, orientation_num, ~perturb); else cuda_kernel_make_eulers_3D<<>>( ~alphas, ~betas, ~gammas, ~eulers, orientation_num, NULL); else cuda_kernel_make_eulers_2D<<>>( ~alphas, ~eulers, orientation_num); else if(sampling.is_3D) if (ABS(sampling.random_perturbation) > 0.) cuda_kernel_make_eulers_3D<<>>( ~alphas, ~betas, ~gammas, ~eulers, orientation_num, ~perturb); else cuda_kernel_make_eulers_3D<<>>( ~alphas, ~betas, ~gammas, ~eulers, orientation_num, NULL); else cuda_kernel_make_eulers_2D<<>>( ~alphas, ~eulers, orientation_num); TIMING_TOC(TIMING_TOP); } void CudaProjectorPlan::printTo(std::ostream &os) // print { os << "orientation_num = " << orientation_num << std::endl; os << "iorientclasses.size = " << iorientclasses.getSize() << std::endl; os << std::endl << "iorientclasses\tiover_rots\teulers" << std::endl; for (int i = 0; i < iorientclasses.getSize(); i ++) { os << iorientclasses[i] << "\t\t" << "\t"; for (int j = 0; j < 9; j++) os << eulers[i * 9 + j] << "\t"; os << std::endl; } } void CudaProjectorPlan::clear() { orientation_num = 0; iorientclasses.free_if_set(); iorientclasses.setSize(0); eulers.free_if_set(); eulers.setSize(0); #ifdef PP_TIMING timer.printTimes(false); #endif } relion-3.1.3/src/gpu_utils/cuda_projector_plan.h000066400000000000000000000030151411340063500217720ustar00rootroot00000000000000#ifndef CUDA_PROJECTOR_PLAN_H_ #define CUDA_PROJECTOR_PLAN_H_ #include #include "src/gpu_utils/cuda_settings.h" #include "src/gpu_utils/cuda_mem_utils.h" #include "src/healpix_sampling.h" #include #include class CudaProjectorPlan { public: CudaGlobalPtr< long unsigned> iorientclasses; CudaGlobalPtr eulers; long unsigned orientation_num; CudaProjectorPlan(CudaCustomAllocator *allocator): iorientclasses(allocator), eulers(allocator), orientation_num(0) {}; //Copy constructor CudaProjectorPlan( const CudaProjectorPlan& other ): iorientclasses(other.iorientclasses), eulers(other.eulers), orientation_num(other.orientation_num) {}; void setup( HealpixSampling &sampling, std::vector &directions_prior, std::vector &psi_prior, std::vector &pointer_dir_nonzeroprior, std::vector &pointer_psi_nonzeroprior, MultidimArray *Mcoarse_significant, std::vector &pdf_class, std::vector > &pdf_direction, unsigned long nr_dir, unsigned long nr_psi, unsigned long nr_oversampled_rot, unsigned long idir_min, unsigned long idir_max, unsigned long ipsi_min, unsigned long ipsi_max, unsigned long itrans_min, unsigned long itrans_max, unsigned long current_oversampling, unsigned iclass, bool coarse, bool inverseMatrix, bool do_skip_align, bool do_skip_rotate, int orientational_prior_mode); void printTo(std::ostream &os); // print void clear(); }; #endif relion-3.1.3/src/gpu_utils/cuda_settings.h000066400000000000000000000044201411340063500206120ustar00rootroot00000000000000#ifndef CUDA_SETTINGS_H_ #define CUDA_SETTINGS_H_ // Required compute capability #define CUDA_CC_MAJOR 3 #define CUDA_CC_MINOR 5 #define COMPLEXTEXTURE false #define LAUNCH_CHECK #define CUDA_BENCHMARK_OLD true #ifdef CUDA_DOUBLE_PRECISION #define XFLOAT double #define CUDACOMPLEX double2 #else #define XFLOAT float #define CUDACOMPLEX float2 #endif #ifdef RELION_SINGLE_PRECISION #define RFLOAT float #else #define RFLOAT double #endif // GENERAL ----------------------------- #define MAX_RESOL_SHARED_MEM 32 #define BLOCK_SIZE 128 // ------------------------------------- // COARSE DIFF ------------------------- #define D2C_BLOCK_SIZE_2D 512 #define D2C_EULERS_PER_BLOCK_2D 4 #define D2C_BLOCK_SIZE_REF3D 128 #define D2C_EULERS_PER_BLOCK_REF3D 16 #define D2C_BLOCK_SIZE_DATA3D 64 #define D2C_EULERS_PER_BLOCK_DATA3D 32 // ------------------------------------- // FINE DIFF --------------------------- #define D2F_BLOCK_SIZE_2D 256 #define D2F_CHUNK_2D 7 #define D2F_BLOCK_SIZE_REF3D 256 #define D2F_CHUNK_REF3D 7 #define D2F_BLOCK_SIZE_DATA3D 512 #define D2F_CHUNK_DATA3D 4 // ------------------------------------- // WAVG -------------------------------- #define WAVG_BLOCK_SIZE_DATA3D 512 #define WAVG_BLOCK_SIZE 256 // ------------------------------------- // MISC -------------------------------- #define SUMW_BLOCK_SIZE 32 #define SOFTMASK_BLOCK_SIZE 128 #define CFTT_BLOCK_SIZE 128 #define PROBRATIO_BLOCK_SIZE 128 #define POWERCLASS_BLOCK_SIZE 128 #define PROJDIFF_CHUNK_SIZE 14 // ------------------------------------- #define BACKPROJECTION4_BLOCK_SIZE 64 #define BACKPROJECTION4_GROUP_SIZE 16 #define BACKPROJECTION4_PREFETCH_COUNT 3 #define BP_2D_BLOCK_SIZE 128 #define BP_REF3D_BLOCK_SIZE 128 #define BP_DATA3D_BLOCK_SIZE 640 #define REF_GROUP_SIZE 3 // -- Number of references to be treated per block -- // This applies to wavg and reduces global memory // accesses roughly proportionally, but scales shared // memory usage by allocating // ( 6*REF_GROUP_SIZE + 4 ) * BLOCK_SIZE XFLOATS. // DEPRECATED #define NR_CLASS_MUTEXES 5 //The approximate minimum amount of memory each process occupies on a device (in MBs) #define GPU_THREAD_MEMORY_OVERHEAD_MB 200 #endif /* CUDA_SETTINGS_H_ */ relion-3.1.3/src/gpu_utils/cuda_skunks.cuh000066400000000000000000000064231411340063500206250ustar00rootroot00000000000000#ifndef CUDA_SKUNKS_CUH_ #define CUDA_SKUNKS_CUH_ #include "src/projector.h" #include "src/multidim_array.h" #include "src/fftw.h" void computeFourierTransformMap(Projector *P, MultidimArray &vol_in, MultidimArray &power_spectrum, int current_size = -1, int nr_threads = 1, bool do_gridding = true) { MultidimArray Mpad; MultidimArray Faux; FourierTransformer transformer; RFLOAT normfft; // Size of padded real-space volume int padoridim = P->padding_factor * P->ori_size; // Initialize data array of the oversampled transform P->ref_dim = vol_in.getDim(); // Make Mpad switch (P->ref_dim) { case 2: Mpad.initZeros(padoridim, padoridim); normfft = (RFLOAT)(P->padding_factor * P->padding_factor); break; case 3: Mpad.initZeros(padoridim, padoridim, padoridim); if (P->data_dim ==3) normfft = (RFLOAT)(P->padding_factor * P->padding_factor * P->padding_factor); else normfft = (RFLOAT)(P->padding_factor * P->padding_factor * P->padding_factor * P->ori_size); break; default: REPORT_ERROR("Projector::computeFourierTransformMap%%ERROR: Dimension of the data array should be 2 or 3"); } // First do a gridding pre-correction on the real-space map: // Divide by the inverse Fourier transform of the interpolator in Fourier-space // 10feb11: at least in 2D case, this seems to be the wrong thing to do!!! // TODO: check what is best for subtomo! if (do_gridding)// && data_dim != 3) P->griddingCorrect(vol_in); // Pad translated map with zeros vol_in.setXmippOrigin(); Mpad.setXmippOrigin(); FOR_ALL_ELEMENTS_IN_ARRAY3D(vol_in) // This will also work for 2D A3D_ELEM(Mpad, k, i, j) = A3D_ELEM(vol_in, k, i, j); // Translate padded map to put origin of FT in the center CenterFFT(Mpad, true); // Calculate the oversampled Fourier transform transformer.FourierTransform(Mpad, Faux, false); // Free memory: Mpad no longer needed Mpad.clear(); // Resize data array to the right size and initialise to zero P->initZeros(current_size); // Fill data only for those points with distance to origin less than max_r // (other points will be zero because of initZeros() call above // Also calculate radial power spectrum power_spectrum.initZeros(P->ori_size / 2 + 1); MultidimArray counter(power_spectrum); counter.initZeros(); int max_r2 = P->r_max * P->r_max * P->padding_factor * P->padding_factor; FOR_ALL_ELEMENTS_IN_FFTW_TRANSFORM(Faux) // This will also work for 2D { int r2 = kp*kp + ip*ip + jp*jp; // The Fourier Transforms are all "normalised" for 2D transforms of size = ori_size x ori_size if (r2 <= max_r2) { // Set data array A3D_ELEM(P->data, kp, ip, jp) = DIRECT_A3D_ELEM(Faux, k, i, j) * normfft; // Calculate power spectrum int ires = ROUND( sqrt((RFLOAT)r2) / P->padding_factor ); // Factor two because of two-dimensionality of the complex plane DIRECT_A1D_ELEM(power_spectrum, ires) += norm(A3D_ELEM(P->data, kp, ip, jp)) / 2.; DIRECT_A1D_ELEM(counter, ires) += 1.; } } // Calculate radial average of power spectrum FOR_ALL_DIRECT_ELEMENTS_IN_ARRAY1D(power_spectrum) { if (DIRECT_A1D_ELEM(counter, i) < 1.) DIRECT_A1D_ELEM(power_spectrum, i) = 0.; else DIRECT_A1D_ELEM(power_spectrum, i) /= DIRECT_A1D_ELEM(counter, i); } } #endif //CUDA_SKUNKS_CUH_ relion-3.1.3/src/gpu_utils/cuda_utils_cub.cuh000066400000000000000000000252061411340063500213000ustar00rootroot00000000000000#ifndef CUDA_UTILS_CUB_CUH_ #define CUDA_UTILS_CUB_CUH_ #include #include "src/gpu_utils/cuda_settings.h" #include "src/gpu_utils/cuda_mem_utils.h" #include #include #include // Because thrust uses CUB, thrust defines CubLog and CUB tries to redefine it, // resulting in warnings. This avoids those warnings. #if(defined(CubLog) && defined(__CUDA_ARCH__) && (__CUDA_ARCH__<= 520)) // Intetionally force a warning for new arch #undef CubLog #endif #include "src/gpu_utils/cub/device/device_radix_sort.cuh" #include "src/gpu_utils/cub/device/device_reduce.cuh" #include "src/gpu_utils/cub/device/device_scan.cuh" #include "src/gpu_utils/cub/device/device_select.cuh" template static std::pair getArgMaxOnDevice(CudaGlobalPtr &ptr) { #ifdef DEBUG_CUDA if (ptr.size == 0) printf("DEBUG_WARNING: getArgMaxOnDevice called with pointer of zero size.\n"); if (ptr.d_ptr == NULL) printf("DEBUG_WARNING: getArgMaxOnDevice called with null device pointer.\n"); if (ptr.getAllocator() == NULL) printf("DEBUG_WARNING: getArgMaxOnDevice called with null allocator.\n"); #endif CudaGlobalPtr > max_pair(1, ptr.getStream(), ptr.getAllocator()); max_pair.device_alloc(); size_t temp_storage_size = 0; DEBUG_HANDLE_ERROR(cub::DeviceReduce::ArgMax( NULL, temp_storage_size, ~ptr, ~max_pair, ptr.size)); if(temp_storage_size==0) temp_storage_size=1; CudaCustomAllocator::Alloc* alloc = ptr.getAllocator()->alloc(temp_storage_size); DEBUG_HANDLE_ERROR(cub::DeviceReduce::ArgMax( alloc->getPtr(), temp_storage_size, ~ptr, ~max_pair, ptr.size, ptr.getStream())); max_pair.cp_to_host(); ptr.streamSync(); ptr.getAllocator()->free(alloc); std::pair pair; pair.first = max_pair[0].key; pair.second = max_pair[0].value; return pair; } template static std::pair getArgMinOnDevice(CudaGlobalPtr &ptr) { #ifdef DEBUG_CUDA if (ptr.size == 0) printf("DEBUG_WARNING: getArgMinOnDevice called with pointer of zero size.\n"); if (ptr.d_ptr == NULL) printf("DEBUG_WARNING: getArgMinOnDevice called with null device pointer.\n"); if (ptr.getAllocator() == NULL) printf("DEBUG_WARNING: getArgMinOnDevice called with null allocator.\n"); #endif CudaGlobalPtr > min_pair(1, ptr.getStream(), ptr.getAllocator()); min_pair.device_alloc(); size_t temp_storage_size = 0; DEBUG_HANDLE_ERROR(cub::DeviceReduce::ArgMin( NULL, temp_storage_size, ~ptr, ~min_pair, ptr.size)); if(temp_storage_size==0) temp_storage_size=1; CudaCustomAllocator::Alloc* alloc = ptr.getAllocator()->alloc(temp_storage_size); DEBUG_HANDLE_ERROR(cub::DeviceReduce::ArgMin( alloc->getPtr(), temp_storage_size, ~ptr, ~min_pair, ptr.size, ptr.getStream())); min_pair.cp_to_host(); ptr.streamSync(); ptr.getAllocator()->free(alloc); std::pair pair; pair.first = min_pair[0].key; pair.second = min_pair[0].value; return pair; } template static T getMaxOnDevice(CudaGlobalPtr &ptr) { #ifdef DEBUG_CUDA if (ptr.size == 0) printf("DEBUG_ERROR: getMaxOnDevice called with pointer of zero size.\n"); if (ptr.d_ptr == NULL) printf("DEBUG_ERROR: getMaxOnDevice called with null device pointer.\n"); if (ptr.getAllocator() == NULL) printf("DEBUG_ERROR: getMaxOnDevice called with null allocator.\n"); #endif CudaGlobalPtr max_val(1, ptr.getStream(), ptr.getAllocator()); max_val.device_alloc(); size_t temp_storage_size = 0; DEBUG_HANDLE_ERROR(cub::DeviceReduce::Max( NULL, temp_storage_size, ~ptr, ~max_val, ptr.size)); if(temp_storage_size==0) temp_storage_size=1; CudaCustomAllocator::Alloc* alloc = ptr.getAllocator()->alloc(temp_storage_size); DEBUG_HANDLE_ERROR(cub::DeviceReduce::Max( alloc->getPtr(), temp_storage_size, ~ptr, ~max_val, ptr.size, ptr.getStream())); max_val.cp_to_host(); ptr.streamSync(); ptr.getAllocator()->free(alloc); return max_val[0]; } template static T getMinOnDevice(CudaGlobalPtr &ptr) { #ifdef DEBUG_CUDA if (ptr.size == 0) printf("DEBUG_ERROR: getMinOnDevice called with pointer of zero size.\n"); if (ptr.d_ptr == NULL) printf("DEBUG_ERROR: getMinOnDevice called with null device pointer.\n"); if (ptr.getAllocator() == NULL) printf("DEBUG_ERROR: getMinOnDevice called with null allocator.\n"); #endif CudaGlobalPtr min_val(1, ptr.getStream(), ptr.getAllocator()); min_val.device_alloc(); size_t temp_storage_size = 0; DEBUG_HANDLE_ERROR(cub::DeviceReduce::Min( NULL, temp_storage_size, ~ptr, ~min_val, ptr.size)); if(temp_storage_size==0) temp_storage_size=1; CudaCustomAllocator::Alloc* alloc = ptr.getAllocator()->alloc(temp_storage_size); DEBUG_HANDLE_ERROR(cub::DeviceReduce::Min( alloc->getPtr(), temp_storage_size, ~ptr, ~min_val, ptr.size, ptr.getStream())); min_val.cp_to_host(); ptr.streamSync(); ptr.getAllocator()->free(alloc); return min_val[0]; } template static T getSumOnDevice(CudaGlobalPtr &ptr) { #ifdef DEBUG_CUDA if (ptr.size == 0) printf("DEBUG_ERROR: getSumOnDevice called with pointer of zero size.\n"); if (ptr.d_ptr == NULL) printf("DEBUG_ERROR: getSumOnDevice called with null device pointer.\n"); if (ptr.getAllocator() == NULL) printf("DEBUG_ERROR: getSumOnDevice called with null allocator.\n"); #endif CudaGlobalPtr val(1, ptr.getStream(), ptr.getAllocator()); val.device_alloc(); size_t temp_storage_size = 0; DEBUG_HANDLE_ERROR(cub::DeviceReduce::Sum( NULL, temp_storage_size, ~ptr, ~val, ptr.size)); if(temp_storage_size==0) temp_storage_size=1; CudaCustomAllocator::Alloc* alloc = ptr.getAllocator()->alloc(temp_storage_size); DEBUG_HANDLE_ERROR(cub::DeviceReduce::Sum( alloc->getPtr(), temp_storage_size, ~ptr, ~val, ptr.size, ptr.getStream())); val.cp_to_host(); ptr.streamSync(); ptr.getAllocator()->free(alloc); return val[0]; } template static void sortOnDevice(CudaGlobalPtr &in, CudaGlobalPtr &out) { #ifdef DEBUG_CUDA if (in.size == 0 || out.size == 0) printf("DEBUG_ERROR: sortOnDevice called with pointer of zero size.\n"); if (in.d_ptr == NULL || out.d_ptr == NULL) printf("DEBUG_ERROR: sortOnDevice called with null device pointer.\n"); if (in.getAllocator() == NULL) printf("DEBUG_ERROR: sortOnDevice called with null allocator.\n"); #endif size_t temp_storage_size = 0; cudaStream_t stream = in.getStream(); DEBUG_HANDLE_ERROR(cub::DeviceRadixSort::SortKeys( NULL, temp_storage_size, ~in, ~out, in.size)); if(temp_storage_size==0) temp_storage_size=1; CudaCustomAllocator::Alloc* alloc = in.getAllocator()->alloc(temp_storage_size); DEBUG_HANDLE_ERROR(cub::DeviceRadixSort::SortKeys( alloc->getPtr(), temp_storage_size, ~in, ~out, in.size, 0, sizeof(T) * 8, stream)); alloc->markReadyEvent(stream); alloc->doFreeWhenReady(); } template static void sortDescendingOnDevice(CudaGlobalPtr &in, CudaGlobalPtr &out) { #ifdef DEBUG_CUDA if (in.size == 0 || out.size == 0) printf("DEBUG_ERROR: sortDescendingOnDevice called with pointer of zero size.\n"); if (in.d_ptr == NULL || out.d_ptr == NULL) printf("DEBUG_ERROR: sortDescendingOnDevice called with null device pointer.\n"); if (in.getAllocator() == NULL) printf("DEBUG_ERROR: sortDescendingOnDevice called with null allocator.\n"); #endif size_t temp_storage_size = 0; cudaStream_t stream = in.getStream(); DEBUG_HANDLE_ERROR(cub::DeviceRadixSort::SortKeysDescending( NULL, temp_storage_size, ~in, ~out, in.size)); if(temp_storage_size==0) temp_storage_size=1; CudaCustomAllocator::Alloc* alloc = in.getAllocator()->alloc(temp_storage_size); DEBUG_HANDLE_ERROR(cub::DeviceRadixSort::SortKeysDescending( alloc->getPtr(), temp_storage_size, ~in, ~out, in.size, 0, sizeof(T) * 8, stream)); alloc->markReadyEvent(stream); alloc->doFreeWhenReady(); } class AllocatorThrustWrapper { public: // just allocate bytes typedef char value_type; std::vector allocs; CudaCustomAllocator *allocator; AllocatorThrustWrapper(CudaCustomAllocator *allocator): allocator(allocator) {} ~AllocatorThrustWrapper() { for (int i = 0; i < allocs.size(); i ++) allocator->free(allocs[i]); } char* allocate(std::ptrdiff_t num_bytes) { CudaCustomAllocator::Alloc* alloc = allocator->alloc(num_bytes); allocs.push_back(alloc); return (char*) alloc->getPtr(); } void deallocate(char* ptr, size_t n) { //TODO fix this (works fine without it though) /Dari } }; template struct MoreThanCubOpt { T compare; MoreThanCubOpt(T compare) : compare(compare) {} __device__ __forceinline__ bool operator()(const T &a) const { return (a > compare); } }; template static int filterOnDevice(CudaGlobalPtr &in, CudaGlobalPtr &out, SelectOp select_op) { #ifdef DEBUG_CUDA if (in.size == 0 || out.size == 0) printf("DEBUG_ERROR: filterOnDevice called with pointer of zero size.\n"); if (in.d_ptr == NULL || out.d_ptr == NULL) printf("DEBUG_ERROR: filterOnDevice called with null device pointer.\n"); if (in.getAllocator() == NULL) printf("DEBUG_ERROR: filterOnDevice called with null allocator.\n"); #endif size_t temp_storage_size = 0; cudaStream_t stream = in.getStream(); CudaGlobalPtr num_selected_out(1, stream, in.getAllocator()); num_selected_out.device_alloc(); DEBUG_HANDLE_ERROR(cub::DeviceSelect::If(NULL, temp_storage_size, ~in, ~out, ~num_selected_out, in.size, select_op, stream)); if(temp_storage_size==0) temp_storage_size=1; CudaCustomAllocator::Alloc* alloc = in.getAllocator()->alloc(temp_storage_size); DEBUG_HANDLE_ERROR(cub::DeviceSelect::If(alloc->getPtr(), temp_storage_size, ~in, ~out, ~num_selected_out, in.size, select_op, stream)); num_selected_out.cp_to_host(); DEBUG_HANDLE_ERROR(cudaStreamSynchronize(stream)); in.getAllocator()->free(alloc); return num_selected_out[0]; } template static void scanOnDevice(CudaGlobalPtr &in, CudaGlobalPtr &out) { #ifdef DEBUG_CUDA if (in.size == 0 || out.size == 0) printf("DEBUG_ERROR: scanOnDevice called with pointer of zero size.\n"); if (in.d_ptr == NULL || out.d_ptr == NULL) printf("DEBUG_ERROR: scanOnDevice called with null device pointer.\n"); if (in.getAllocator() == NULL) printf("DEBUG_ERROR: scanOnDevice called with null allocator.\n"); #endif size_t temp_storage_size = 0; cudaStream_t stream = in.getStream(); DEBUG_HANDLE_ERROR(cub::DeviceScan::InclusiveSum( NULL, temp_storage_size, ~in, ~out, in.size)); if(temp_storage_size==0) temp_storage_size=1; CudaCustomAllocator::Alloc* alloc = in.getAllocator()->alloc(temp_storage_size); DEBUG_HANDLE_ERROR(cub::DeviceScan::InclusiveSum( alloc->getPtr(), temp_storage_size, ~in, ~out, in.size, stream)); alloc->markReadyEvent(stream); alloc->doFreeWhenReady(); } #endif relion-3.1.3/src/gpu_utils/cuda_utils_stl.cuh000066400000000000000000000076401411340063500213330ustar00rootroot00000000000000#ifndef CUDA_UTILS_STL_CUH_ #define CUDA_UTILS_STL_CUH_ #include #include "src/gpu_utils/cuda_settings.h" #include "src/gpu_utils/cuda_mem_utils.h" #include #include #include #include #include "src/gpu_utils/cub/device/device_radix_sort.cuh" #include "src/gpu_utils/cub/device/device_reduce.cuh" template static T getMaxOnDevice(CudaGlobalPtr &ptr) { #ifdef DEBUG_CUDA if (ptr.size == 0) printf("DEBUG_ERROR: getMaxOnDevice called with pointer of zero size.\n"); if (ptr.d_ptr == NULL) printf("DEBUG_ERROR: getMaxOnDevice called with null device pointer.\n"); if (ptr.getAllocator() == NULL) printf("DEBUG_ERROR: getMaxOnDevice called with null allocator.\n"); #endif ptr.cp_to_host(); DEBUG_HANDLE_ERROR(cudaStreamSynchronize(0)); return (T)*std::max_element(ptr.h_ptr,ptr.h_ptr + ptr.size); } template static std::pair getArgMaxOnDevice(CudaGlobalPtr &ptr) { #ifdef DEBUG_CUDA if (ptr.size == 0) printf("DEBUG_WARNING: getArgMaxOnDevice called with pointer of zero size.\n"); if (ptr.d_ptr == NULL) printf("DEBUG_WARNING: getArgMaxOnDevice called with null device pointer.\n"); if (ptr.getAllocator() == NULL) printf("DEBUG_WARNING: getArgMaxOnDevice called with null allocator.\n"); #endif std::pair max_pair; ptr.cp_to_host(); DEBUG_HANDLE_ERROR(cudaStreamSynchronize(0)); max_pair.first = std::distance(ptr.h_ptr,std::max_element(ptr.h_ptr,ptr.h_ptr + ptr.size)); max_pair.second = ptr.h_ptr[max_pair.first]; return max_pair; } template static T getMinOnDevice(CudaGlobalPtr &ptr) { #ifdef DEBUG_CUDA if (ptr.size == 0) printf("DEBUG_ERROR: getMinOnDevice called with pointer of zero size.\n"); if (ptr.d_ptr == NULL) printf("DEBUG_ERROR: getMinOnDevice called with null device pointer.\n"); if (ptr.getAllocator() == NULL) printf("DEBUG_ERROR: getMinOnDevice called with null allocator.\n"); #endif ptr.cp_to_host(); DEBUG_HANDLE_ERROR(cudaStreamSynchronize(0)); return (T)*std::min_element(ptr.h_ptr,ptr.h_ptr + ptr.size); } template static std::pair getArgMinOnDevice(CudaGlobalPtr &ptr) { #ifdef DEBUG_CUDA if (ptr.size == 0) printf("DEBUG_WARNING: getArgMinOnDevice called with pointer of zero size.\n"); if (ptr.d_ptr == NULL) printf("DEBUG_WARNING: getArgMinOnDevice called with null device pointer.\n"); if (ptr.getAllocator() == NULL) printf("DEBUG_WARNING: getArgMinOnDevice called with null allocator.\n"); #endif std::pair min_pair; ptr.cp_to_host(); DEBUG_HANDLE_ERROR(cudaStreamSynchronize(0)); min_pair.first = std::distance(ptr.h_ptr,std::min_element(ptr.h_ptr,ptr.h_ptr + ptr.size)); min_pair.second = ptr.h_ptr[min_pair.first]; return min_pair; } template static T getSumOnDevice(CudaGlobalPtr &ptr) { #ifdef DEBUG_CUDA if (ptr.size == 0) printf("DEBUG_ERROR: getSumOnDevice called with pointer of zero size.\n"); if (ptr.d_ptr == NULL) printf("DEBUG_ERROR: getSumOnDevice called with null device pointer.\n"); if (ptr.getAllocator() == NULL) printf("DEBUG_ERROR: getSumOnDevice called with null allocator.\n"); #endif ptr.cp_to_host(); DEBUG_HANDLE_ERROR(cudaStreamSynchronize(0)); T sum(0); for(long int i =0; i static void sortOnDevice(CudaGlobalPtr &in, CudaGlobalPtr &out) { #ifdef DEBUG_CUDA if (in.size == 0 || out.size == 0) printf("DEBUG_ERROR: sortOnDevice called with pointer of zero size.\n"); if (in.d_ptr == NULL || out.d_ptr == NULL) printf("DEBUG_ERROR: sortOnDevice called with null device pointer.\n"); if (in.getAllocator() == NULL) printf("DEBUG_ERROR: sortOnDevice called with null allocator.\n"); #endif in.cp_to_host(); DEBUG_HANDLE_ERROR(cudaStreamSynchronize(0)); std::sort(in.h_ptr,in.h_ptr + in.size); for(long int i =0; i #include "src/gpu_utils/cuda_settings.h" #include "src/gpu_utils/cuda_mem_utils.h" #include #include #include #include #include #include #include template static T getMaxOnDevice(CudaGlobalPtr &ptr) { thrust::device_ptr dp = thrust::device_pointer_cast(~ptr); thrust::device_ptr pos = thrust::max_element(dp, dp + ptr.size); unsigned int pos_index = thrust::distance(dp, pos); T max_val; DEBUG_HANDLE_ERROR(cudaMemcpy(&max_val, &ptr.d_ptr[pos_index], sizeof(T), cudaMemcpyDeviceToHost)); return max_val; } template static std::pair getArgMaxOnDevice(CudaGlobalPtr &ptr) { std::pair pair; thrust::device_ptr dp = thrust::device_pointer_cast(~ptr); thrust::device_ptr pos = thrust::max_element(dp, dp + ptr.size); pair.first = thrust::distance(dp, pos); DEBUG_HANDLE_ERROR(cudaMemcpy( &pair.second, &ptr.d_ptr[pair.first], sizeof(T), cudaMemcpyDeviceToHost)); return pair; } template static T getMinOnDevice(CudaGlobalPtr &ptr) { thrust::device_ptr dp = thrust::device_pointer_cast(~ptr); thrust::device_ptr pos = thrust::min_element(dp, dp + ptr.size); unsigned int pos_index = thrust::distance(dp, pos); T min_val; DEBUG_HANDLE_ERROR(cudaMemcpy(&min_val, &ptr.d_ptr[pos_index], sizeof(T), cudaMemcpyDeviceToHost)); return min_val; } template static std::pair getArgMinOnDevice(CudaGlobalPtr &ptr) { std::pair pair; thrust::device_ptr dp = thrust::device_pointer_cast(~ptr); thrust::device_ptr pos = thrust::min_element(dp, dp + ptr.size); pair.first = thrust::distance(dp, pos); DEBUG_HANDLE_ERROR(cudaMemcpy( &pair.second, &ptr.d_ptr[pair.first], sizeof(T), cudaMemcpyDeviceToHost)); return pair; } template static T getSumOnDevice(CudaGlobalPtr &ptr) { thrust::device_ptr dp = thrust::device_pointer_cast(~ptr); return thrust::reduce(dp, dp + ptr.size); } template static void sortOnDevice(CudaGlobalPtr &in, CudaGlobalPtr &out) { //TODO Actually do sorting only on device instead of copying back and forth //Copy from "in" to "out" on device and do sorting there // DEBUG_HANDLE_ERROR(cudaMemcpy( ~out, ~in, in.size * sizeof(T), cudaMemcpyDeviceToDevice)); // thrust::device_ptr dp = thrust::device_pointer_cast(~out); // thrust::device_vector dv(dp, dp + in.size); // thrust::sort(dv.begin(), dv.end() ); T *h_vec = new T[in.size]; DEBUG_HANDLE_ERROR(cudaMemcpy( h_vec, ~in, in.size * sizeof(T), cudaMemcpyDeviceToHost)); thrust::sort(h_vec, h_vec + in.size); DEBUG_HANDLE_ERROR(cudaMemcpy( ~out, h_vec, in.size * sizeof(T), cudaMemcpyHostToDevice)); delete [] h_vec; } #endif relion-3.1.3/src/gui_background.xpm000066400000000000000000014222211411340063500173070ustar00rootroot00000000000000/* XPM */ static const char *gui_background[] = { "598 332 257 2", " c None", ". c #010300", "+ c #0E100E", "@ c #1A1B1A", "# c #1E196A", "$ c #252524", "% c #1D1C96", "& c #221E83", "* c #562120", "= c #2B2D2B", "- c #322663", "; c #2624B1", "> c #333433", ", c #2728CE", "' c #402E5C", ") c #4E2F3F", "! c #282BDF", "~ c #7A2A2A", "{ c #2D2FF0", "] c #3D32AD", "^ c #90292A", "/ c #6A3434", "( c #3D4243", "_ c #5C3A4C", ": c #3837FF", "< c #55424D", "[ c #4D454A", "} c #4A4747", "| c #464A4C", "1 c #513FAB", "2 c #AB3131", "3 c #594A51", "4 c #4C4F50", "5 c #59469E", "6 c #A83A3A", "7 c #6F485B", "8 c #644789", "9 c #5245D5", "0 c #644E5B", "a c #4447FF", "b c #744965", "c c #A14246", "d c #555757", "e c #63545B", "f c #C24041", "g c #795084", "h c #D23D3D", "i c #5D5F5F", "j c #5C5E75", "k c #80556E", "l c #725784", "m c #7C586B", "n c #875572", "o c #5855FD", "p c #735C6A", "q c #6F5F67", "r c #A15160", "s c #6B5BBE", "t c #656667", "u c #8259A4", "v c #915D7C", "w c #7263A0", "x c #E84A49", "y c #915E9A", "z c #DE4F4F", "A c #6C6E6E", "B c #8F637D", "C c #A05F81", "D c #906296", "E c #8C677B", "F c #7C6D74", "G c #836B79", "H c #9A6483", "I c #757281", "J c #747575", "K c #9D69A0", "L c #DF569F", "M c #A76A8C", "N c #7474C6", "O c #A06D8B", "P c #977286", "Q c #996AD2", "R c #837A7F", "S c #7B7D7C", "T c #7273FF", "U c #DF5EA2", "V c #A0738E", "W c #B46D94", "X c #C56A83", "Y c #D96671", "Z c #8C7C84", "` c #927A89", " . c #8879B8", ".. c #FE5F60", "+. c #AC70AE", "@. c #837DAA", "#. c #B37195", "$. c #AE7394", "%. c #A973AD", "&. c #A672C1", "*. c #A97694", "=. c #A07B8F", "-. c #8C8288", ";. c #848585", ">. c #C97291", ",. c #E169A8", "'. c #967CD1", "). c #8183CA", "!. c #9C7DC0", "~. c #BB799E", "{. c #C078A1", "]. c #B77C9D", "^. c #B47BB1", "/. c #B27E9D", "(. c #A98397", "_. c #BD7BB1", ":. c #8C8D90", "<. c #958C92", "[. c #8D8F8C", "}. c #8487FF", "|. c #B184A0", "1. c #A48B9A", "2. c #C87FAA", "3. c #C381A6", "4. c #B782C0", "5. c #9E8E95", "6. c #BF84A5", "7. c #BB86A5", "8. c #8C94A8", "9. c #9090CC", "0. c #919598", "a. c #B18BA0", "b. c #949693", "c. c #9D9399", "d. c #E47CB4", "e. c #8E8EFF", "f. c #AB8CDD", "g. c #DA84B5", "h. c #D586B5", "i. c #D088B3", "j. c #CC8AB0", "k. c #C88CAE", "l. c #C48EAE", "m. c #BF90AC", "n. c #C88DBD", "o. c #BA94A8", "p. c #AD99A4", "q. c #9D9EA2", "r. c #9D9F9C", "s. c #CD9291", "t. c #E688BC", "u. c #9D9DCB", "v. c #B69AAB", "w. c #A89FA5", "x. c #F78A93", "y. c #DD8EBC", "z. c #D890BB", "A. c #E48DBE", "B. c #D393B6", "C. c #D293BC", "D. c #CC96B5", "E. c #C799B5", "F. c #C39CB1", "G. c #B5A1AD", "H. c #A5A7A5", "I. c #A0A1FF", "J. c #E696C4", "K. c #E198C4", "L. c #DB9BBE", "M. c #DB9BC4", "N. c #EE96C8", "O. c #CAA3B8", "P. c #C3A6B8", "Q. c #ADAEAC", "R. c #BEA9B4", "S. c #D8A2C1", "T. c #D4A4C1", "U. c #EC9ECD", "V. c #ED9FC7", "W. c #BAB1B8", "X. c #E3A3CA", "Y. c #D2ABC1", "Z. c #B2B2DA", "`. c #CBAEC0", " + c #C5B1BC", ".+ c #B6B7B5", "++ c #F9A1D3", "@+ c #BCB4CC", "#+ c #F9A5CE", "$+ c #F5A7D5", "%+ c #EAADD3", "&+ c #E3B0CF", "*+ c #EFACD6", "=+ c #D9B5C9", "-+ c #C0BCD1", ";+ c #D4B7C8", ">+ c #F5ABDE", ",+ c #BEBFBC", "'+ c #CABBC3", ")+ c #C0BEC3", "!+ c #D0B9C7", "~+ c #BDBDE0", "{+ c #E1B6CE", "]+ c #C5BDE7", "^+ c #BDBEFC", "/+ c #C1BFF1", "(+ c #F2B5DC", "_+ c #EDB9D9", ":+ c #E2BED2", "<+ c #F9B6E3", "[+ c #D2C4CC", "}+ c #C7C8C5", "|+ c #DBC1D1", "1+ c #C9C8CC", "2+ c #FABEE6", "3+ c #F5C1D9", "4+ c #F6C3E4", "5+ c #E3CAD9", "6+ c #D8CED5", "7+ c #D2D0D4", "8+ c #DECDD5", "9+ c #D0D2CF", "0+ c #F2C8E1", "a+ c #DFCDEE", "b+ c #F0CBE0", "c+ c #ECD2E1", "d+ c #E0D6DD", "e+ c #D8DAD7", "f+ c #E7D5DD", "g+ c #DAD9DD", "h+ c #FFD0F1", "i+ c #E0DFD6", "j+ c #F8D8EC", "k+ c #F9DBD9", "l+ c #E0E2DF", "m+ c #F0DDE5", "n+ c #E9DFE6", "o+ c #E3E1E6", "p+ c #FFDADB", "q+ c #E7E1E0", "r+ c #F5DFEF", "s+ c #F8E5EE", "t+ c #F2E7EE", "u+ c #E9EBE8", "v+ c #ECEAEF", "w+ c #EFEAE8", "x+ c #FDECF5", "y+ c #FAF0F6", "z+ c #F5F2F7", "A+ c #F2F4F1", "B+ c #F8F3F1", "C+ c #F8F8EE", "D+ c #FFF6FD", "E+ c #F8FAF7", "F+ c #FCFAFE", "G+ c #FFFAF9", "H+ c #FEFFFC", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+v+n+y+F+H+H+H+H+H+H+G+s+s+t+E+H+n+x+F+H+H+H+H+H+H+H+H+H+H+H+E+n+x+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+z+t+n+y+F+H+H+H+F+z+s+s+v+H+H+H+E+n+x+F+H+H+H+H+H+H+H+H+H+H+H+E+n+F+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+z+v+m+y+H+H+G+y+s+t+B+H+H+H+H+H+B+m+y+F+H+H+H+H+H+H+H+H+H+H+H+B+m+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+z+H+o+s+z+F+s+s+v+F+H+H+H+H+H+H+H+v+s+y+H+H+H+H+H+H+H+H+H+H+H+H+v+m+s+s+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+z+H+H+n+s+s+s+z+H+H+H+H+H+H+H+H+H+E+n+x+F+H+H+H+H+H+H+H+H+H+H+H+F+s+s+m+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+z+H+H+B+d+m+z+H+H+H+H+H+H+H+H+H+H+H+z+m+x+F+H+H+H+H+H+H+H+H+F+y+x+s+m+s+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+G+H+H+t+d+d+t+H+H+H+H+H+H+H+H+H+H+H+H+v+m+D+F+H+H+H+H+E+z+x+x+x+m+n+f+m+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+z+s+v+o+f+t+H+H+H+H+H+H+H+H+H+H+H+H+v+s+F+H+H+H+y+x+x+s+r+m+n+v+n+d+s+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+G+s+t+H+z+f+f+t+H+F+F+F+H+H+H+G+G+G+G+F+n+x+F+y+s+x+m+m+s+m+m+F+n+t+B+d+y+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+t+m+z+y+m+m+8+c+m+y+y+y+x+x+y+x+y+x+x+x+s+r+x+s+t+t+t+r+t+n+m+H+f+z+H+v+m+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+n+t+s+m+m+s+r+c+r+m+m+n+n+n+n+n+n+t+t+t+t+n+s+t+B+x+r+y+z+m+H+v+n+G+H+H+n+s+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+n+s+r+m+r+r+c+n+o+5+s+G+G+F+F+G+G+F+F+H+H+H+G+d+x+x+s+z+H+v+n+H+n+t+G+H+H+B+m+F+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+z+m+r+r+s+m+n+G+H+E+f+j+y+H+H+H+H+H+H+H+H+H+H+H+z+m+s+v+H+F+n+F+H+n+t+H+H+H+H+v+s+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+s+r+r+r+m+v+B+G+G+G+E+m+c+j+F+H+H+H+H+H+H+H+H+H+H+H+m+m+z+H+v+m+H+H+n+z+H+H+H+H+z+n+s+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+t+r+r+f+t+z+H+H+H+H+F+G+F+n+c+5+r+F+H+H+H+H+H+H+H+H+H+H+f+m+n+G+n+t+H+o+t+F+H+H+G+y+y+n+n+G+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+t+8+5+m+t+G+H+H+H+H+H+H+F+G+n+c+o+5+s+H+H+H+H+H+H+H+H+H+z+m+m+d+n+s+H+H+n+t+H+F+y+x+s+t+H+q+o+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+t+r+f+[+c+y+H+H+H+H+H+H+H+H+G+t+5+F+6+c+y+F+H+H+H+H+H+H+H+B+m+F+q+n+t+H+H+t+z+z+s+s+t+H+H+H+G+o+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+z+c+m+z+6+8+c+H+H+H+H+H+H+H+H+G+t+c+y+y+c+j+s+y+G+H+H+H+H+H+v+r+H+H+n+t+H+G+t+s+s+t+y+H+H+H+H+H+v+v+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+m+f+G+z+t+6+5+s+H+H+H+H+F+z+y+x+s+r+c+c+m+c+j+c+r+s+y+F+H+H+o+r+H+H+n+t+F+t+t+n+t+G+H+H+H+H+H+H+F+o+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+m+f+z+H+t+y+z+[+5+y+G+t+s+x+x+r+c+r+m+v+G+G+d+j+c+6+5+j+r+s+F+d+r+H+H+n+t+t+z+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+m+m+z+H+z+t+y+E+v+'+c+r+x+r+5+f+n+f+s+z+H+H+E+G+6+j+y+E+g+8+c+r+r+r+H+H+n+t+G+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+m+c+v+H+H+v+t+F+H+z+5+j+r+n+B+H+F+m+r+t+H+H+H+H+G+z+5+j+F+H+H+w+n+8+m+H+H+t+F+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+s+c+o+H+H+B+y+z+H+E+m+5+8+r+y+H+F+s+r+m+H+H+H+H+H+E+F+v+5+r+F+G+F+H+o+c+s+y+t+F+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+t+c+t+H+H+F+z+y+y+H+n+c+d+z+8+r+y+t+r+c+H+H+H+H+H+H+H+F+F+d+c+s+F+G+G+E+[+r+z+s+F+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+o+c+n+E+H+H+z+D+v+v+n+c+8+H+E+o+5+r+m+m+G+H+H+H+H+H+H+H+F+F+z+8+j+y+H+H+G+6+m+H+x+F+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+d+8+m+F+H+G+y+z+H+v+c+c+B+H+H+z+8+j+r+v+z+v+v+z+F+G+F+F+F+F+B+g+5+j+F+G+y+d+5+t+s+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+d+8+8+s+H+y+y+G+H+f+5+t+G+y+s+c+5+5+s+a+~+/+/+/+~+n+F+G+H+H+F+z+d+5+r+y+y+B+[+c+s+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+g+f+6+5+m+y+F+H+m+c+n+n+c+c+5+8+z+d+5+r+/+/+/+/+/+/+a+F+H+H+H+z+v+8+j+j+s+y+8+5+s+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+g+m+z+6+8+c+z+n+c+c+c+c+5+d+z+F+v+~+-+r+a+^+^+^+/+/+/+n+H+H+H+H+n+j+b+b+j+j+j+j+m+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+g+m+F+o+[+[+5+c+c+c+f+o+u+z+F+F+1+~+]+-+s+a+^+^+^+^+/+/+n+F+F+s+c+c+8+5+5+d+d+5+5+x+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+o+m+z+y+d+[+5+5+n+v+E+H+H+B+F+z+-+~+]+]+|+r+/+^+^+^+^+/+/+r+r+c+c+o+v+8+b+r+E+c+5+5+s+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+v+m+v+x+z+6+5+n+F+H+H+H+H+F+F+z+-+~+]+/+]+5+a+/+^+^+^+/+b+j+c+m+z+H+z+n+[+b+m+8+c+'+5+s+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+v+m+t+x+H+B+8+j+G+H+H+H+F+F+H+z+-+~+~+]+/+~+m+a+^+^+]+j+b+-+]+n+H+H+H+v+n+!+c+|+5+g+'+5+m+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+m+s+y+H+B+[+c+r+H+H+F+F+F+H+H+)+-+~+~+]+/+|+r+/+a+j+b+]+^+/+/+v+H+H+E+o+f+|+b+b+z+o+[+5+m+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+G+G+G+G+F+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+m+s+F+H+E+6+[+j+x+H+F+F+H+H+H+o+-+-+~+]+/+]+5+j+c+]+/+^+^+^+/+/+v+H+H+E+n+[+b+b+y+H+v+[+5+s+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+G+F+s+b+%+%+_+s+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+m+t+F+H+E+d+8+8+j+y+F+F+H+H+H+H+1+-+-+~+]+]+;+j+a+^+^+^+^+^+^+^+/+v+H+H+z+d+|+b+m+H+H+F+g+8+f+t+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+G+y+3+d.U U U U ,.s+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+n+y+F+H+H+d+f+w+5+r+F+H+H+H+H+H+F+1+-+-+~+~+|+!+j+]+^+^+^+^+^+^+^+/+z+F+m+5+b+:+j+z+H+G+H+[+'+8+c+z+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+F+3+d.,.,.d.,.,.d.s+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+n+m+z+F+H+o+5+F+n+j+y+H+H+H+H+H+H+A+1+-+@+-+5+Z.|+j+/+^+^+^+^+^+^+/+~+c+:+5+5+'+b+b+j+r+s+f+c+6+[+|+f+z+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+F+X.d.(+s+y+x+j+%+s+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+g+f+t+F+v+8+z+z+f+r+F+H+H+H+H+H+H+z+)+-+|+|+]+~+c+a+^+^+^+^+^+/+a+b+|+8+v+d+b+!+:+5+5+|+:+b+j+c+5+c+c+m+t+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+F+q+7+7+q+g+o+z+H+H+E+F+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+G+F+F+F+F+F+F+F+F+F+F+F+F+F+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+F+X.d.r+H+H+H+H+H+H+H+H+H+H+H+H+H+H+z+B+G+G+G+G+G+G+G+G+F+B+F+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+z+v+q+g+7+}+q+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+g+d+m+n+8+v+y+v+c+r+G+H+H+H+H+H+H+z+'+5+-+~+Z.@+j+]+^+^+]+b+b+|+6+o+o+n+b+|+m+[+:+t+F+v+[+f+8+5+|+|+c+c+c+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+v+W.J A R :.;.;.q.}+o+)+7+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+y+b+3+_+_+_+_+_+_+_+_+_+b+s+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+3+,.3+H+H+H+H+H+H+H+H+H+H+H+H+H+H+z+)+W.q.r.q.H.H.H.H.q.W.)+v+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+7+:.i } = > q.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+g+6+8+8+n+D+G+g+r+x+H+G+G+H+G+z+t+8+5+-+@+~+Z.|+j+]+|+b+|+8+A+A+A+d+b+5+G+n+d+;+b+G+z+8+y+H+H+H+v+o+'+!+c+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+q+R > w.H+H+H+H+q+R } t 4 w.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+F+j+%+d.U U U U U U d.X.b+y+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+s+d.A.y+H+H+H+H+H+H+H+H+H+H+H+H+H+H+G+1+r.i $ $ $ $ $ $ } b.1+B+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+F+.+d = = ( q.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+o+6+8+f+f+f+f+[+r+m+f+f+c+5+|+|+|+5+5+5+5+5+!+5+b+:+~+a+E+E+A+m+b+5+o+E+E+t+8+:+j+E+d+m+H+H+H+H+H+o+ +!+c+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+B+A $ w.H+H+H+H+H+H+G+:.= > W.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+x+d.,.,.,.,.,.j+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+G+H+H+H+H+H+H+H+F+(+U _+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+q+t = > > = 4 7+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+g+> = ( q.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+v+d+6+f+d+d+6+8+8+8+f+6+d+o+o+'+8+g+W.@+@+@+@+c+a+a+A+A+B+c+b+5+w+u+u+E+z+t+'+:+s+g+f+H+H+H+H+H+B+[+'+;+c+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+q.= } G+H+H+H+H+H+H+H+w+t = Q.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+G+}+[.Q.v+H+H+H+H+H+H+_+,.,.,.,.J.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+j+%+_+y+H+H+H+H+H+s+d.d.j+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+Q.$ > > $ b.G+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+E+d = ( q.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+v+d+f+s+y+y+t+8+m+z+E+H+H+H+[+m+F+o+@+@+Z.Z.@+b+t+u+w+c+b+f+B+A+A+u+u+E+t+t+ +b+f+f+F+H+H+H+H+E+6+5+6+`.m+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+z+} = t H+H+H+H+H+H+H+H+H+Q.( Q.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+g+d @ > Q.H+H+H+H+H+H+0+,.,.,.,._+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+D+d.U U %+H+H+H+H+H+0+U t.s+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+,+> = > $ w.G+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+E+t = ( q.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+v+d+f+t+z+v+d+c+B+H+H+H+H+[+m+G+H+o+@+@+Z.~+5+j+o+5+b+f+A+E+A+A+A+u+u+A+m+t+`.b+f+B+H+F+G+G+H+o+|+t+[+;+m+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+W.> = J H+H+H+H+H+H+H+H+H+q+t H.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+1+> = @ c.H+H+H+H+H+H+b+,.,.,.,._+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+x+U ,.,.t.H+H+H+H+D+X.L V.y+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+}+} = > $ W.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+G+t = ( q.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+B+d+f+y+H+B+f+m+F+H+H+H+[+m+G+H+H+g+@+@+1+1+5+5+b+f+u+A+A+E+A+A+A+u+u+t+m+[+:+j+c+r+r+j+j+j+c+c+j+b+j+j+F+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+:.> > } H+H+H+H+H+H+H+H+H+H+c.w.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+w+;.( t 7+H+H+H+H+H+H+b+,.,.,.,.3+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+G+V.,.d.b+H+H+H+H+s+d.U V.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+1+} = > $ ,+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+E+t = ( q.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+d+m+y+H+w+c+s+H+H+H+6+f+F+F+F+D+[+)+}+}+'+b+5+u+u+A+A+A+E+A+A+u+u+o+d+|+3+j+b+|+;+|+|+5+c+8+|+5+5+b+f+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+R > > > w.H+H+H+H+H+H+H+H+H+)+)+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+w+}+o+G+H+H+H+H+H+H+b+,.,.,.,._+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+y+j+j+G+H+H+H+H+j+,.U %+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+}+} = > $ ,+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+E+t = ( q.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+7+m+y+G+g+c+y+F+D+6+f+y+y+t+y+z+7+,+1+|+ +5+d+u+u+A+A+A+E+E+A+A+o+b+0+|+;+r+t+o+6+[+8+f+6+[+f+c+|+5+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+:.> > > } W.H+H+H+H+H+H+H+H+B+B+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+b+,.,.,.,._+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+0+U ,._+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+}+} = > $ ,+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+E+t = ( q.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+g+n+s+s+8+r+y+D+d+f+v+B+H+H+H+H+}+'+|+1+!+5+o+u+u+A+A+A+E+A+d+0+0+[+t+8+:+r+G+H+z+g+6+8+'+|+b+ +|+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+)+> > > = } :.B+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+b+,.,.,.,.3+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+F+_+U ,.3+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+}+} = > $ )+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+E+t = ( q.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+o+n+f+f+f+m+B+o+f+F+H+H+H+H+H+z+|+[+}+)+;+5+o+u+u+A+A+z+c+0+:+6+l+g+s+ +c+y+F+H+H+H+v+6+b+r+ +|+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+G+} > > > > > d q.B+H+H+H+H+H+H+H+H+H+H+H+H+H+7+:.R <.R ;.W.v+H+G+g+,+Q.c.q.o+H+H+H+H+H+H+b+,.,.,.,._+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+j+0+3+X.V.r+H+H+H+F+X.U d.b+H+H+H+H+H+H+H+H+H+H+H+s+V.%+j+H+H+H+H+H+}+} = > $ ,+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+B+.+;.J -.;.:.)+G+H+H+H+H+H+E+t = ( w.H+q+w.<.J t <.7+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+v+d+6+m+y+o+8+F+H+H+H+H+H+8+5+1+,+}+W.:+f+l+u+u+n+c+3+c+v+A+u+l+o+d+!+j+F+H+H+H+H+5+j+m+'+|+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+)+> > > > > > ( 4 c.z+H+H+H+H+H+H+H+G+H+g+A } w.w+w+q+J > 4 q.o+J ( ( > ( ,+H+H+H+H+H+H+b+,.,.,.,.3+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+V.,.,.,.,.3+H+H+y+3+d.U ,.X.b+0+0+0+0+0+0+j+F+H+H+G+s+t.,.V.F+H+H+H+}+} = > $ )+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+G+Q.} > d )+v+z+H.} } w.H+H+H+H+E+t = ( q.)+b.v+u+w+Q.4 > J q+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+o+6+f+f+8+F+H+H+H+H+o+|+f+E+}+,+,+ +:+g+l+8+0+:+d+A+E+A+A+u+l+o+6+|+r+H+H+H+v+b+5+5+'+|+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+W.> = > > > > > > 4 W.G+H+H+H+H+H+H+W.} $ .+E+H+H+H+q+> = } .+F+q.= = > .+H+H+H+H+H+H+b+,.,.,.,.3+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+F+%+,.,.,.0+H+s+d.,.,.,.,.,.,.,.,.,.,.,.,.X.F+H+H+H+H+s+,.,.A.G+H+H+}+} = > = ,+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+7+} = = H.H+H+H+H+R > > q.F+H+H+E+t = > d [.G+H+H+H+H+,+} = } 7+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+g+6+f+s+z+H+H+H+[+5+z+H+B+,+,+.+;+:+|+0+|+o+A+u+A+A+E+A+u+l+g+'+c+r+H+H+5+j+d+[+'+!+t+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+7+d = > > > > > > > J w+H+H+H+H+)+} = J B+H+H+H+H+A+A = > :.H+q+i = > .+H+H+H+H+H+H+b+,.,.,.,.3+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+F+,.,.,.3+y+d.U U ,.,.,.U U U U ,.,.,.,.s+H+H+H+H+H+F+X.U ,.%+H+H+}+} = > = ,+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+7+} $ i g+H+H+H+H+1+( = } G+H+H+E+t = = 4 q+H+H+H+H+H+H+r.> = } o+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+f+r+m+s+y+n+5+f+H+H+v+m+,+)+ +0+0+[+e+l+u+u+A+E+H+H+A+u+l+g+!+j+y+n+j+c+E+6+'+!+m+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+F+W.} = > > > > > = t q+H+H+w+d = = 7+G+H+H+H+H+H+q+} 4 7+H+q+J = > .+H+H+H+H+H+H+b+,.,.,.,.3+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+,.,.,.3+H+m+s+0+,.U d.0+s+m+s+V.,.U V.F+H+H+H+H+H+H+j+U ,.t.G+H+}+} = > = ,+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+7+q.7+H+H+H+H+H+z+} = = E+H+H+E+t = > t H+H+H+H+H+H+H+B+i = = ;.A+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+n+c+n+n+f+5+5+t+H+H+t+x+o+|+0+;+c+d+e+l+u+A+E+H+H+H+E+u+l+l+7+;+r+5+b+s+D+d+ +!+c+H+H+H+G+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+B+q.> = > > > > $ R z+H+:.> = t A+H+H+H+H+H+H+H+H+H+H+H+o+J = > .+H+H+H+H+H+H+b+,.,.,.,.3+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+,.,.,.3+H+H+H+r+,.U V.H+H+H+H+N.U U V.j+x+y+s+s+s+s+b+U ,.d.j+H+}+} = > $ )+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+z+4 = = E+H+H+E+t = > [.H+H+H+H+H+H+H+H+b.> = > o+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+B+c+y+E+n+|+8+s+x+z+s+c+b+3+'+W.;+j+q+u+A+A+H+H+H+E+A+u+l+l+l+[+|+j+5+t+t+v+'+ +5+G+z+H+o+t+v+F+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+w+R > > > > = > ,+B+i = = r.E+H+H+H+H+H+H+H+H+H+H+H+w+J $ > .+H+H+H+H+H+H+b+,.,.,.,.3+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+,.,.,.0+H+H+H+j+U U X.H+H+H+m+d.,.,.,.,.,.,.,.,.,.,.,.,.d.V.D+H+}+} = > $ )+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+1+} > = E+H+H+E+t = ( q.H+H+H+H+H+H+H+H+}+} = $ )+E+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+f+c+F+f+5+B+o+f+m+5+0+|+v+l+,+[+5+c+u+A+E+H+H+E+A+u+u+l+l+l+q+|+0+8+d+m+m+6+5+c+H+H+H+F+o+n+v+t+t+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+F+W.> = > > $ :.W.4 = $ )+G+H+H+H+H+H+H+H+H+H+H+H+w+J $ > .+H+H+H+H+H+H+b+,.,.,.,._+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+,.,.,.3+H+H+H+b+U ,._+H+H+H+3+,.,.U j+H+H+H+H+H+H+H+H+H+H+H+H+H+}+} = > $ ,+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+G+A > > = E+H+H+E+t = ( W.H+H+H+H+H+H+H+H+w+d = = <.G+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+7+[+f+|+t+H+F+n+b+0+|+m+t+)+9+e+'+j+s+E+H+H+H+A+A+u+l+l+l+o+5+0+`.;+v+o+n+[+j+5+s+s+F+F+n+n+t+z+o+t+t+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+E+H+H+H+H+H+H+H+H+H+H+H+Q.> > > = t ;.} = $ g+H+H+H+H+H+H+H+H+H+H+H+H+w+J $ > .+H+H+H+H+H+H+b+,.,.,.,._+H+H+H+H+H+H+H+H+H+H+H+H+H+H+G+H+H+H+,.,.,._+H+H+F+3+U ,.3+H+H+H+_+,.,.,.s+H+H+H+H+H+H+H+H+H+H+H+H+H+}+} = > $ )+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+F+H+H+H+H+G+B+q+R c.( > = E+H+H+E+t = ( .+H+H+H+H+H+H+H+H+F+i = = :.E+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+v+'+ +8+G+z+j+0+|+c+z+v+}+}+e+l+o+|+j+y+H+H+E+A+u+l+e+l+6+b+b+8+)+P.f+H+H+8+j+5+t+t+t+s+s+s+s+x+x+s+s+y+y+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+1+z+H+H+H+H+H+H+H+H+H+H+w+J = > = d J ( = $ e+H+H+H+H+H+H+H+H+H+H+H+H+w+J $ > .+H+H+H+H+H+H+b+,.,.,.,._+H+H+H+H+H+H+H+H+H+H+H+H+H+H+s+F+H+H+,.,.,._+H+H+F+%+U ,.0+H+H+H+%+,.,.U s+H+H+H+H+H+H+H+H+H+H+H+H+H+}+} = > $ ,+H+H+H+H+H+H+H+H+H+H+H+H+H+A+g+z+H+E+v+1+b.t R o+z+} = = E+H+H+E+t = ( .+H+H+H+H+H+H+H+H+H+i = = ;.E+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+'+w.W.c+b+3+f+n+s+F+e+}+e+l+u+A+g+:+r+F+E+A+u+l+e+e+6+b+b+g+u+o+W.;+y+z+j+c+8+E+E+F+G+B+B+o+s+x+z+v+t+s+t+t+t+t+t+t+z+t+n+t+z+z+G+F+z+o+t+z+F+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+:.)+F+H+H+H+H+H+H+H+H+H+H+W.$ > = d R } = $ )+H+H+H+H+H+H+H+H+H+H+H+H+w+J $ > W.H+H+H+H+H+H+b+,.,.,.,.%+H+H+H+H+H+H+H+H+H+H+H+H+H+y+J.y+H+H+,.,.,._+H+H+D+V.U d.j+H+H+H+_+,.,.U b+G+H+H+H+H+H+H+H+H+H+H+H+H+}+| = > $ W.H+H+H+H+H+H+H+H+H+H+H+H+H+q+;.q+w+q.} $ 4 )+H+H+v+| > = E+H+H+E+t = ( Q.H+H+H+H+H+H+H+H+G+i = = [.E+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+5+|+;+=+;+v+H+t+s+q+,+9+l+u+u+A+H+6+b+s+z+u+u+l+9+6+0+b+o+u+u+u+g+R.5+n+j+|+f+E+H+H+H+H+G+F+n+s+F+H+G+m+H+H+H+H+H+H+H+H+o+n+z+H+H+H+H+v+v+o+o+n+t+t+G+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+c.J B+H+H+H+H+H+H+H+H+H+H+1+( = = t w.4 = = c.E+H+H+H+H+H+H+H+H+H+H+H+w+J $ > .+H+H+H+H+H+H+b+,.,.,.,.X.H+H+H+H+H+H+H+H+H+H+H+H+H+0+J.y+H+H+,.,.,._+H+H+y+V.U d.j+H+H+H+0+,.,.U V.F+H+H+H+H+H+H+H+H+H+H+H+H+}+} = > $ w.G+H+H+H+H+H+H+H+H+H+H+H+H+7+t 7+J $ = t o+H+H+H+v+} > = E+H+H+E+t = ( q.H+H+H+H+H+H+H+H+o+d = $ W.F+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+c+c+j+!+G.f+H+H+t+m+,+9+e+l+u+A+E+H+H+!+j+t+u+l+7+[+0+5+7+l+u+u+u+u+)+ +|+b+'+f+E+H+H+H+H+H+G+F+n+x+H+G+m+H+H+H+H+H+H+H+H+H+o+n+G+H+H+H+H+z+H+H+H+o+n+n+t+t+H+H+H+H+H+H+H+H+H+H+H+H+H+q.> q+H+H+H+H+H+H+H+H+H+H+1+( = $ ;.g+t = = d v+H+H+H+H+H+H+H+H+H+H+H+w+J = > W.H+H+H+H+H+H+b+,.,.,.,.V.H+H+H+H+H+H+H+H+H+H+H+H+H+J.A.D+H+H+,.,.,._+H+H+y+t.U t.s+H+H+H+s+d.U ,.,.s+H+H+H+H+H+H+H+H+H+H+G+H+}+} = > $ q.G+H+H+H+H+H+H+H+H+H+H+H+F+r.4 w.$ = } ,+H+H+H+H+q+} > = E+H+H+E+t = > [.H+H+H+H+H+H+H+H+}+} = = q+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+d+G+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+F+x+r+b+5+c+n+[+p.P.n+H+n+)+}+e+l+u+A+E+H+H+H+o+|+j+o+7+|+0+|+7+e+l+l+u+u+u+u+W.;+5+'+f+H+H+H+H+H+H+H+F+v+s+y+B+m+H+H+H+H+H+H+H+H+H+H+o+d+H+H+H+H+G+y+H+H+H+F+s+y+t+s+t+H+H+H+H+H+H+H+H+H+H+H+q.> ;.G+H+H+H+H+H+H+H+H+H+.+= = > Q.H+[.> > $ )+H+H+H+H+H+H+H+H+H+G+H+w+J = > W.H+H+H+H+H+H+0+,.,.,.,.d.H+H+H+H+H+H+H+H+H+H+H+H+j+,.J.D+H+H+,.,.,.3+H+H+x+d.U A.y+H+H+H+H+V.,.,.U V.F+H+H+H+H+H+H+H+H+y+s+H+,+( = > = ;.B+H+H+H+H+H+H+H+H+H+H+H+q+} } R = = J w+H+H+H+H+}+( > = z+H+H+E+t = > R H+H+H+H+H+H+H+H+<.> $ J A+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+ +m 7.%+_+5+t+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+s+r+j+b+|+c+m+E+H+E+[+5.`.t+}+}+e+l+u+u+A+H+H+H+E+A+[+b+c+|+0+|+}+e+e+l+l+l+u+u+u+[+|+[+'+f+H+H+H+H+H+H+H+H+E+o+s+y+m+H+H+H+H+H+H+H+H+H+H+H+o+f+F+H+H+H+H+s+m+t+H+H+H+v+s+m+y+H+H+H+H+H+H+H+H+H+H+q.> > 1+H+H+H+H+H+H+H+H+B+;.$ = J w+H+q+i = = d w+H+H+H+H+H+H+H+o+o+H+q+A = $ Q.H+H+H+H+H+H+_+,.,.,.,.,.b+H+H+H+H+H+H+H+H+H+H+y+A.U V.F+H+H+,.,.,._+H+H+s+,.U A.F+H+H+H+H+s+d.,.,.,.3+G+H+H+H+H+H+H+y+%+b+H+Q.= > > = } 7+H+H+H+H+H+H+H+H+H+H+B+;.$ 4 R = = S u+H+H+H+H+c.> > = g+H+H+H+t = = i H+H+H+H+H+H+H+u+i $ ( 7+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+Y.v v H 2.y.U.++U.&+b+t+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+d+c+=+;+;+=+=+|+=+;+5+5+5+|+P.Y.=+'+6+7+l+u+A+E+H+H+E+A+u+u+ +j+0+!+1+}+9+e+e+l+l+u+u+8+j+|+ +'+f+E+H+H+H+H+H+H+H+H+F+d+s+m+H+H+H+H+H+H+H+H+H+H+G+y+F+s+s+m+t+G+v+H+H+H+H+H+H+m+n+n+x+H+H+H+H+H+H+H+H+H+q.> $ 4 7+H+H+H+H+H+H+H+)+( $ 4 }+H+H+H+)+} = $ S q+H+H+H+H+H+q+q.z+H+7+} = $ b.H+H+H+H+H+y+t.,.,.,.,.,.d.j+H+H+H+H+H+H+H+G+m+J.,.U V.F+H+x+,.,.,.J.H+H+s+U U J.H+H+H+H+H+H+0+d.U ,.,._+x+H+H+H+H+r+V.V.H+w+J = > > > = J 7+H+H+H+H+H+H+H+H+7+;.= = t w.$ = i q+H+H+H+w+4 = > = )+H+H+7+t = = } 1+H+H+H+H+H+H+[.= } w.E+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+G+O.h.n M #.M B v H ].j.y.y.6.|+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+6+F._+4+4+4+4+0+0+0+0+0+0+0+0+0+0+h+4+4+_+_+0+b+c+r+s+m+t+n+o+'+j+|+)+,+}+9+9+e+e+l+l+8+j+c+7+W.'+f+u+E+H+H+H+H+H+H+H+F+B+m+r+H+H+H+H+H+H+H+H+F+s+F+y+t+m+s+H+H+H+H+H+H+H+H+H+H+m+m+g+s+s+H+H+H+H+H+H+H+H+q.J R 4 4 Q.v+H+H+H+B+Q.} $ i }+H+H+H+H+H+)+A > $ t Q.7+q+7+Q.q.)+F+}+A $ $ $ } Q.B+F+s+0+A.U U U U U U U d.%+3+b+j+j+0+_+V.d.U U L V.s+b+A.U U U ,.%+r+r+U ,.V.H+H+H+H+H+H+H+b+A.,.U U A._+3+_+X.t.y.!+,+J $ $ = = = = $ 4 [.Q.}+7+7+,+Q.;.d $ $ $ t w+i $ = ;.q+w+q+g+b.t = = i Q.H.-.t = ( } t 7+H+H+H+u+[.( d Q.F+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+5+L.2.i.].z.M.%+$+i.3./.a.a.P n M E.f+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+t+O.T._+{+5+5+|+|+;+m+f+d+d+d+n+6+Y.4+0+|+[+|+5+|+:+:+{+{+{+0+b+0+b+P.!+,+}+}+9+9+e+e+8+j+c+q+u+)+'+8+v+w+E+H+H+H+H+H+H+H+F+v+m+F+H+H+H+H+H+F+s+z+F+H+H+H+o+m+D+H+H+H+H+H+H+H+G+z+d+s+o+n+s+t+H+H+H+H+H+H+H+7+w+B+7+q.R b.q.q.:.J 4 d b.q+H+H+H+H+H+H+H+q+H.J } } t ;.c.w.}+H+o+W.:.;.:.:.;.w.o+y+3+V.J.J.V.V.V.V.V.V.J.A.V.%+%+%+X.V.A.J.J.V.N.b+j+_+V.V.V.V.V.V.0+0+U ,.%+H+H+H+H+H+H+H+H+s+3+V.d.,.,.d.d.A._+t+.+b.S ;.;.;.;.;.;.;.S R c.Q.Q.Q.H.[.J S ;.;.;.W.H+q+[.i d R H.1+F+H+q+r.t } t :.}+t } )+}+H.H.)+)+W.b.S r.q+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+5+X.z.{.i.>+U.~.J.*.J.#.M 2.J.$+z.#.B n $.F.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+z+P.&+;+Y.4+n+H+s+r+y+H+H+H+H+H+H+)+_+=+=+j+q+A+E+z+B+v+o+o+[+'+;+j+=+[+`.'+}+}+}+9+e+6+j+f+o+u+u+d+W.8+r+r+s+x+y+y+F+G+H+H+H+z+m+r+F+F+H+x+s+z+H+H+H+H+H+H+H+o+m+H+H+H+H+H+H+H+H+H+d+s+v+o+m+s+t+F+H+H+H+H+H+H+H+H+H+H+q+,+W.)+W.Q.,+q+G+H+H+H+H+H+H+H+H+H+H+w+7+,+W.,+7+G+H+H+z+B+G+F+F+F+F+B+B+F+x+x+y+y+y+y+y+y+y+y+y+y+x+x+x+x+x+y+y+y+y+y+y+G+D+y+F+F+F+F+F+D+y+3+U ,._+H+H+H+H+H+H+H+H+H+H+y+m+j+b+b+j+y+H+H+v+u+A+A+A+A+A+A+A+A+A+A+A+u+u+u+u+A+A+A+A+A+A+F+H+H+G+q+}+}+7+z+H+H+H+H+q+1+7+w+H+W.1+H+H+H+q+7+1+}+}+w+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+m+L.y.2.2.>+>+3.O =.h.i.J.a.S.O.1.O {.J.U.J.{.~.~.|+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+|+Y.=+z+6+&+4+n+r+m+H+H+H+H+H+H+9+R.4+8+'+0+j+E+H+H+E+A+u+l+e+}+|+b+;+n+'+|+7+}+}+9+6+j+c+q+l+q+n+j+f+c+f+d+8+c+r+r+r+r+s+s+t+t+s+y+H+s+n+H+H+H+H+H+H+H+H+H+H+H+d+s+H+H+H+H+H+H+H+H+o+m+z+z+v+m+s+t+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+F+(+U ,.3+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+G+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+m+S.h.y.i.U.>+j.].B.8+H+o.J.A.8+Y._+c+H+1.n (.k.z.y.y.h.T.t+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+5+Y.&+n+H+H+'+_+b+s+H+H+H+H+H+H+u+,+Y.4+g+u+ +0+s+H+E+A+u+u+e+9+'+b+!+`.m+7+`.c+e+}+6+j+c+g+e+o+f+j+m+o+6+r+z+A+u+v+o+n+n+m+m+m+r+x+n+m+s+q+w+B+H+H+H+H+H+H+H+H+H+H+m+s+H+H+H+H+H+H+H+H+f+y+E+y+o+m+n+t+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+F+X.U d.b+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+6+F.i.h.K.U.K.D.$.M.X.f+H+H+6+B.J.n+v+T.4+m+ +H z+q+'+R.F.6.y.L.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+m+Y.&+f+H+H+H+v+O._+m+H+H+H+H+H+E+,+.+{+3+u+A+w+Y.j+z+A+u+u+l+9+}+|+b+f+G.5+H+'+;+n+d+j+5+7+e+g+m+r+n+u+u+o+c+s+A+A+E+E+E+E+E+B+n+x+v+A+o+s+m+k+k+k+k+k+m+m+m+w+w+w+G+q+m+x+H+H+H+H+H+H+z+m+x+D+y+y+d+s+n+t+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+D+V.U d.j+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+m H K.>+K.m.8+Y.*+k.d+H+H+H+n+M.>+&+H+[+%+0+P.H z+H+H+H+z+n+j.2.;+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+d+:+Y.;+E+H+H+B+r+|+Y.4+F+H+H+H+H+}+,+ +4+5+u+A+H+n+{+j+u+u+l+e+,+g+0+5+d+R.|+F+A+R.|+j+5+}+9+d+r+c+g+l+u+u+u+g+r+t+A+E+E+E+E+E+t+x+v+E+E+E+n+s+p+k+k+k+k+k+k+k+k+k+k+k+k+8+m+s+H+H+H+z+D+D+G+m+m+s+r+s+s+s+n+y+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+y+t.U d.x+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+E.~.>+U.M 7 o+0+>+l.q+H+H+H+H+5+y.i.>+c+H+F.4+7.M z+H+H+H+H+G+j.2.~.n+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+z+5+Y.Y.v+H+H+F+r+j+z+[+&+b+H+H+H+}+,+9+;+4+o+A+E+H+E+8+0+m+l+e+}+u+|+j+d+d+'+;+m+G+d+;+5+F+w+f+r+f+e+l+l+l+u+u+u+d+r+z+H+E+E+E+z+x+t+H+E+E+E+w+f+s+p+k+k+k+k+k+k+k+k+k+k+k+k+8+m+k+m+x+y+r+s+s+E+z+v+d+f+f+s+r+m+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+y+d.U t.G+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+|+~.#.2.B k _ m C.T.G+H+H+H+H+F+{+y.1.z.(+z+A+m.E.M d+H+H+H+H+G+#.].#.m.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+5+;+{+[+F+H+F+r+j+z+H+z+`._+m+H+l+.+}+7+{+_+u+E+H+H+H+q+;+0+g+}+9+d+0+c+z+o+f+R.5+f+c+c+ +v+r+j+d+9+e+e+l+l+u+u+u+q+c+r+F+E+E+z+s+s+E+E+E+E+E+y+k+r+p+k+k+k+k+p+p+p+p+p+p+p+p+k+f+x+s+m+n+s+s+F+H+H+H+v+z+o+d+s+s+s+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+x+,.,.V.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+n+3.h.k i.$./.Z B |.s+H+H+H+H+H+F+B.B.F+o.J.b+H+6+*.M d+H+H+H+H+G+C &+E ~.;+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+c+;+&+=+v+H+G+r+j+t+H+H+H+o+O.4+d+,+}+e+[+_+5+E+H+H+H+A+u+[+:+b+}+A+|+j+t+G+E+n+ +!+c+r+[+w.8+r+z+u+e+e+e+l+l+l+u+u+u+g+m+y+E+A+s+x+z+E+E+E+E+E+s+p+f+s+p+p+k+k+k+k+p+p+p+p+p+k+m+r+s+m+k+t+s+z+H+H+H+z+y+G+E+o+d+s+m+s+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+x+U ,._+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+7.h./.d+C 2.*.L.K.D.0+G+H+H+H+H+n+g.{+G+v+l.$+=+4+*+M.{+H+H+H+H+G+C _+g+v k.z+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+7+p.{+Y.8+H+B+r+j+n+H+H+H+H+H+[+S.:+,+9+e+[+&+n+H+H+H+A+u+u+l+R.0+r+d+j+c+t+t+y+s+[+;+j+c+f+W.[+z+H+H+E+l+e+e+l+l+u+u+u+w+d+m+B+n+x+t+A+E+E+E+E+E+k+k+k+f+r+p+p+p+p+p+p+p+p+k+m+r+m+s+f+s+m+s+y+H+H+H+H+z+z+H+H+z+d+m+s+x+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+G+b+U d.m+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+F.2.3.d+H+v.2.M $+D.;+_+:+H+H+H+H+Y.A.5+H+H+P.z.U.M.o.m.<+5+H+H+H+G+W _+H+p.M Y.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+'+O.Y.|+z+G+r+j+d+H+H+H+H+H+H+H+G._+:+6+g+|+_+G+H+H+E+A+u+l+e+,+R.0+:+0+r+x+r+r+c+'+;+f+w+v+6+[+f+H+H+H+E+l+l+l+l+l+u+u+A+w+f+m+x+s+A+A+A+A+A+A+A+k+k+k+f+f+s+k+k+k+k+p+p+k+r+m+k+k+k+k+f+x+t+E+H+H+H+B+z+G+H+H+H+z+d+s+y+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+F+(+U A.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+|+{.i. +H+H+v+$.3.U.8+w+T.4+m+H+H+H+j.U.m+c+X.X.3.>+Y.H+g+S.2+t+H+E+G+W _+F+F+P ~.m+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+ +O._+!+F+r+j+d+H+H+H+H+H+H+H+e+v.&+_+%+&+_+3+z+G+E+A+u+l+e+}+[+5+=+h+|+n+n+v+B+w+;+b+y+G+H+F+7+8+s+F+H+H+E+u+l+l+l+u+v+v+o+d+r+s+w+u+u+u+u+A+A+A+k+k+k+k+f+r+p+k+k+k+k+m+r+m+k+p+p+k+m+r+m+s+H+H+H+H+z+z+H+H+H+H+H+t+s+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+D+A.,.0+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+d+6.h.o.E+H+H+B+(.2.l.H+H+6+S._+m+r+c+X.*+%+>+*+$+C.>+$+_+0+O.M.S.;+Y.|+M S.5+f+G.H m.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+R.F.Y._+|+j+n+H+H+H+H+H+H+H+u+G.&+O.&+&+O._+&+_+b+m+n+l+e+9+,+m+t+!+_+5+E+H+H+v+|+:+j+n+n+t+z+z+7+f+m+y+G+H+H+A+q+o+n+m+m+f+f+f+r+v+u+u+u+u+u+u+w+k+m+k+k+k+f+s+k+k+k+r+m+k+k+k+k+k+m+m+k+8+m+y+H+H+G+z+F+H+H+H+H+t+s+F+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+s+U A.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+E.h.|.E+H+H+H+B+` #.k.|+c+c+B._+0+4+(+4+{.#.#.k.l.7.|.z.$+h.J.J.U.y.i.~.#.3.$.].].v ~.;+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+v.O.v._+3+m+H+H+H+H+H+H+H+H+'+&+Y.7+ +_+&+0+[+R.F.&+4+3+|+'+l+H+E+:+{+{+t+H+B+5+|+b+m+t+z+o+d+t+n+6+8+f+s+y+z+t+m+m+f+n+t+w+o+o+f+s+q+l+l+l+l+u+q+f+q+f+f+f+f+f+s+m+r+f+f+k+k+f+m+r+c+k+k+f+d+m+F+H+F+z+F+H+H+H+t+x+s+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+F+X.,.r+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+!+2.].z+H+H+H+H+y+X.*.i.M._+h+4+X.3.~.h+j. +B+F+F+F+F+H+(.U.j.O O $.i.J.++++6.p.Z ` G ~.3.5+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+v+o.;+!+P.4+c+F+H+H+H+H+H+H+d+&+O.}+e+1+F.4+c+E+z+v+'+G.O._+_+0+c+t+&+;+P.:+z+f+b+!+j+t+t+F+H+F+o+d+6+d+f+d+m+m+f+n+v+G+H+H+H+H+H+v+r+y+A+u+u+l+l+q+q+q+q+q+q+q+8+r+r+f+f+f+f+f+m+r+f+f+k+k+B+v+f+s+G+z+G+H+H+H+y+x+s+F+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+F+j+,._+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+n+{.3.6+H+H+H+H+H+Y.*+z.>+h+M.7.E *.k.%+X.6+H+v+v+z+F+H+H+v+].U.k.v+g+*.*.H >+h.D.F+H+H+*.i.6.t+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+g+O.[+|+!+Y.4+r+G+H+H+H+H+v+Y.T.W.1+|+{+&+{+j+z+E+A+u+u+}+W. +=+_+_+4+x+W.P.:+|+f+|+j+z+t+z+H+H+H+H+w+d+d+n+6+t+E+H+H+H+H+H+H+H+H+H+d+r+F+H+H+H+H+G+B+w+w+w+q+d+m+f+s+q+d+d+f+r+m+d+k+f+q+B+F+z+d+r+y+z+F+H+H+z+s+s+t+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+F+F+G+H+G+D+j+d.%+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+s+r+5+!+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+v+k.i.!+H+H+H+H+H+!+J.U.z.m.$+{+E+E+w+T.(+l.Z.N s 1 s ).~+q+_+j.i.*+y+H+!+L.z.%+=.W B.t+H+p.i.~.E.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+[+Y.|+|+5+!+&+_+y+H+H+H+z+;+T.p. +{+&+=+[+ +{+r+E+E+A+u+l+e+E+w+d+Y.4+:+v+G.=+c+o+b+m+H+v+z+H+H+H+H+H+z+s+t+d+z+H+H+H+H+H+H+H+H+H+H+F+m+s+G+H+G+F+F+F+F+H+H+z+m+s+v+n+s+n+m+r+n+q+q+q+w+B+F+G+F+B+f+s+z+G+H+z+s+s+t+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+G+s+_+_+0+b+0+V.d.%+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+f+b+b+0+h+4+/.M |.n+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+(.~.o.H+H+H+H+t+_+$+J.m.f+z+/.<+c+H+m+%+].1 ; , , , , , ; 5 M.;+P.U.(+B+H+/.>+E.A+1.M B.m+q+#.3.#.Y.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+g+1.v.=+=+|+Y._+b+F+H+H+`.&+O.{+&+O. +1+e+q+`.b+t+E+A+u+u+e+e+E+H+Y.&+O.|+|+G.|+|+b+z+H+v+y+H+H+H+H+H+H+o+m+n+z+H+H+H+H+H+H+H+H+H+H+F+v+m+y+H+G+G+H+H+H+H+B+m+m+F+F+v+m+s+m+z+E+G+E+E+G+F+F+H+G+F+o+f+s+H+y+x+s+t+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+b+d.U U L U ,.d.b+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+G+c+_+&+4+h+h+%+i.$.k E H #.~.5+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+Y.v n P o+H+H+|+U.$+~.v.H+H+H+d+j.(+z+C.&+1 ; , ! { { { { ! , ] ~+H+V >+0+d+*+K.L.r+H+ +M 2.f+V j.V 3.`.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+v.P.p.F.Y.O.Y.4+c+t+!+&+&+_+O.p.W.,+}+e+l+[+{+b+A+E+A+u+u+e+l+v+Y.b+R.Y.:+'+P.:+b+H+H+z+z+F+H+H+H+H+H+H+d+n+F+H+H+H+H+H+H+H+H+H+H+H+F+d+r+z+F+F+H+H+H+B+m+m+F+z+m+m+m+f+z+H+H+H+H+H+G+F+F+H+F+F+F+d+s+t+x+s+n+z+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+F+V.d.d.d.t.V.3+G+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+t+c+&+&+(+4+_+X.j.C v m Z )+H+'+v H #.{.=+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+:+X.B v 7 B ;+L.++z.7.k.A+H+H+H+E+O.*+B.4+u ; , ! { : a : : { ! , ; ).7+].$+B.>+a.L.%+y+H+W.M 2.*.2.F.=.3.f+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+ +;+|+ +p.O.o.&+4+|+&+&+{+`.5+q.H..+,+9+e+l+ +0+j+A+E+A+u+l+9+7+_+c+5+G.{+5+'+=+m+H+H+z+z+G+H+H+H+H+H+H+v+m+y+H+H+H+H+H+H+H+H+H+H+F+F+z+f+x+G+H+H+H+F+m+m+t+m+m+m+n+v+d+m+G+H+H+H+F+F+G+F+F+G+F+B+z+f+s+s+v+t+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+y+m+j+s+D+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+P $.S.S.l.3.W C v P v.'+w+H+H+H+d+n E H ~.{.S.n+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+{+*+].k v E ~.N.i.m.l.X.8+H+H+H+H+H+o+L.4+n.; , ! { : a T T a : { ! , , s @.2.>+C.B+!+M._+H+H+'+V 2.2.Y.[+V k.n+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+d+O.:+j+H+7+R.1.&+&+Y.5+o+t+r+6+r.H..+}+9+e+e+P.b+m+E+A+A+u+l+R._+:+5+o+ +=+!+3+f+G+F+B+z+F+H+H+H+H+H+H+B+d+s+F+H+H+z+z+y+t+t+t+s+m+m+m+5+b+j+r+r+t+s+m+r+m+m+v+B+H+G+v+f+t+H+H+H+F+F+G+F+F+F+z+t+s+m+f+v+t+G+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+d+n k _ ) _ H P +d+v+H+H+H+H+H+H+A+n a.W.v ~.{.B.|+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+{+2+].B n 3.$+y.v b =.X.Y.H+H+H+H+H+H+|+*+U.h.9 { { : o }.I.I.o : : { { ! , ; n.$+k.H+E+O.$+b+H+H+o+P {.3.|+G.O E.;+;+|+f+n+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+z+P.:+5+H+H+z+o+O.&+|+H+G+m+r+G+e+b.H..+}+9+e+7+Y.j+t+E+A+A+u+ +4+|+n+G+v+G.=+3+f+B+z+z+v+z+z+F+F+G+H+H+E+d+m+s+F+!+X.2+4+2+2+2+2+(+(+<+(+(+*+*+S.S.r+m+t+z+G+H+H+H+F+F+d+m+z+H+G+F+z+z+z+z+t+s+m+m+o+7+n+y+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+F.v (.O.O.v #.;+H+H+H+H+H+H+m+c+&+_+_+U.K.j.2.~.#.i.O.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+{+2+]. +C.2+>+j.'+v+p 7 O s+H+H+H+H+H+5+$+M.8 z.&.{ : o }.I.I.T a a a : : { ! +.2+#.2.Y.H+q+l.<+m+H+H+v+V z.U.++J.z.3.3.].].m B Y.z+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+'+=+b+H+H+H+H+6+T.0+s+t+r+y+H+E+,+r.Q.,+}+9+e+'+{+j+z+E+A+u+O.4+7+E+H+H+o+R.{+f+H+H+E+F+w+v+y+z+y+F+F+G+n+n+t+F.B.j.K././././.*.V P P E E ` v.p 6.{+z+H+H+H+H+H+H+H+G+F+6+s+F+F+B+B+z+t+s+s+t+H+H+v+g+g+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+$.n H &+X.v.].2.c+m+c+&+&+%+_+_+4+&+X.4+z.K.U.++y.i.h.D.t+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+=+*+C.M.<+%+].z.<+y+H+&+a.C.b+H+H+H+H+5+U.M.1 ! u V.Q o e.I.e.T a a a a a : { &.4+&.; D i.=+H+[+L.(+z+G+n+K.C.O O k.z.J.J.J.i.~.M ~.~.l.{+c+s+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+[+Y.:+H+H+H+H+q+1.&+b+c+y+G+H+H+A+Q.r.Q.,+}+9+e+ +0+m+E+E+l+{+_+9+u+H+H+H+[+P.Y.5+F+H+H+H+E+z+v+t+t+y+F+n+d+E.U.6.m C.B.a.(.v.v.G.'+8+f+s+m+s+Z E k.5+s+F+H+F+F+G+F+F+F+o+f+y+H+G+y+s+s+t+H+H+H+H+H+t+B+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+6+3.k ].&+S.j+&+%+*+%+X.S.T.T.T.O.!+[+[+S.&+a.D.y.#.#.i.h.z.&+|+m+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+t+=+M.*+X.S.E.C G.P.J._+:+4+D.E.$+c+H+H+5+>+M.1 ! { { y y.f.I.e.o a : { : a a o '.2++.! { ] v j.|+H+'+M._+{+3.<+m.!+g+u+6+'+P.F.D.i.>+J.y.J.y.J.K.X.&+;+n+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+g+;+:+H+H+H+H+B+p.O._+c+y+H+H+H+H+l+r.r..+,+9+e+7+Y.b+n+E+g+_+:+l+9+u+H+E+!+P.'+P.;+t+H+H+H+H+F+F+w+d+5+O.B.z.7.O.Z E K.=+n+t+t+t+t+v+v+z+w+g+R.m O S.m+m+y+F+F+F+F+G+G+v+d+m+t+s+m+t+F+H+H+F+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+F.~.C.2+(+K.j.7.m.*.~.F.n+o+v+A+H+H+H+H+`._+&+K.i.h.v n H M {.y.K.L.=+d+H+H+H+H+H+H+H+H+H+H+H+f+0+4+<+z.m.|+n+'+C |+B+m.U.&+B.v+6+M.*+y+`.<+B.] ! { : a s ~.J.'.o : : { { : : a &.(+K a : { { ; M 3.w o+T.X.].*+S.w+H+H+H+H+H+H+A+v+[+i.*+P B $.J.2.J.J.j.l.O.f+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+!+;+E+H+H+H+z+ +F.Y.4+r+F+H+H+H+H+9+b.H..+}+9+e+[+{+j+v+[+_+5+u+l+9+A+v+;+5+E+7+G.P.|+F+H+H+E+f+=+T.7.B.L.`.[+n+n+F O V.m+G+G+G+H+H+H+G+F+v+7+E B ].=+v+m+s+y+H+F+z+y+t+n+m+m+t+F+H+F+F+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+7.i.K.*+m.[+g+F+H+[+W C.n+H+H+H+H+H+H+H+H+/.h+U.#.P g+P.W v M P *.i.J.J.L.=+G+H+H+H+H+H+H+5+&+4+(+i.H 6+H+H+H+'+C [+H+n+i.y.[+H+H+P.*+D *+B.] { { : o T I. .i.h.o : { ! ! { { &.2+%.o a a : : { 1 #._.; !.m.j.<+[+H+H+H+H+H+H+H+H+H+H+v.U.M.P H J.1.P $.z.U.z.j.m.f+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+ +=+|+z+H+H+H+[+O.R.&+0+t+H+H+H+H+H+}+b.H..+}+9+l+G.0+r+|+_+n+A+u+e+9+o+`.t+H+H+B+c.v.Y.5+b+&+X.S.7.K.M.S.{+3+{+:+v.p 3.&+t+G+H+G+F+F+F+D+z+g+` *.B j.f+v+n+s+t+t+s+s+n+n+g+g+H+F+F+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+8+z.y.H M.%+z+H+H+H+H+ +{.j.n+H+H+H+H+F+t+T.{.K.%+P.H+H+w+o.H $.E.A+'+v./.i.y.y.X.*+*+X.X.*+(+X.~.*.6.d+H+H+H+H+ +C n+F+Y.k.U.{+H+H+v+g $+C.] { { : o }.I.I.T 5 z.&., ! , , ! u 2+4., a a a a a : { 5 2.+.; 8 <+3.n+H+H+H+H+H+H+H+H+H+H+t+$.>+D.*.J.!+F.|.W.a.k.K.K.i.D.c+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+ +`.=+5+z+H+7+O.'+P._+b+F+H+H+H+H+E+Q.r.Q.,+}+9+e+v.b+{+_+b+c+:+:+:+{+&+0+0+0+b+0+o.p Y.0+&+=.|.K.B.m.&+&+&+_+_+(+(.m z.&+f+s+t+t+y+y+t+t+t+W.*.1.V B.m+t+n+t+n+n+t+z+t+z+w+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+t+X.i.M m.Y.*+&+H+H+H+H+E+v.2.k.v+H+H+t+{+z.2.i.<+2.z.t+H+H+g+/.v ~.5+H+H+A+[+7.(+j.D.S.S.C.(+C.) ) v [+H+H+H+H+H+v.$.v+=+2+O.k.>+c+~+w <+n.] { : a o e.I.e.T a : u y.Q , ; ; u 2+^.9 : : : a a a a : { 8 i.u 4.X.=.D.t+H+H+H+H+H+H+H+z+c+X.k.C.4+$.J.D.l.!+H+H+g+'+F.l.i.h.L.c+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+g+'+O.=+c+6+P.`.5+Y._+r+G+H+H+H+H+v+r.r.Q.,+}+1+R.F.&+&+{+{+{+:+:+:+:+:+:+|+5+T.S.q m a.o.l.z.l.o.F.o.(.(.(.(.=.a.p V z.l.F.Y.;+5+f+t+B+E+q+P Y.E B.{+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+t+L.h.n $.z+v+L.2+;+H+H+H+H+A+v.i.L.n+{+K.h./.a.(+M.v.h.X.v+H+H+o+B H l.F+H+H+f+2+M.d+v+v+w+R.3.4+H 0 k /.n+H+H+H+H+(./.Y.4+S.o+ +K.M.D *+j.] { : a T e.I.e.o a : { , y J.5 % u (+^.; ! { { : : a a a a a ! g i.K.o.H.5.O.F+H+H+H+H+H+n+&+X.E.n+O.>+z.z.l.7.z+H+H+H+H+E+o+6+v.j.&+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+g+R.O.=+P.Y.c+5+{+4+s+H+H+H+y+r+!+p.G.`.=+Y.F.o.T.&+;+w+o+o+q+7+}+q+z+v+;+X.T.p.3 E B.K.:+o+v+v+o+o+o+o+n+n+n+[+P ].K.G.1.=.a.o.F.O.:+0+{+4+4+_+$+{+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+n+B.y.M n 6+H+H+'+*+(+t+H+H+H+H+H+(.i.y.y.$.R.z+_+>+ +H+a.y.L.z+H+H+v+k #.!+H+G+_+$+`.H+H+n+S.J.{.M.(+n+R.v $.n+H+H+H+V M &+%+8+H+E+=.>+<+k.9 { : a T I.I.}.o : : { ! , ] #.J.8 (+_.; , , ! ! { { : : a a a a u $.y.'+}+.+b.G.H+H+H+H+5+%+L.`.G+H+w+k.>+i.L.$+m+H+H+H+H+H+H+H+w.j.&+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+'+v.v.Y.=+f+ +_+0+t+n+;+{+Y.&+&+Y.O.o.1.o.1.a.&+{+A+E+A+u+u+e+}+H+|+X.T.G+B+Z k.$+`.H+H+H+H+H+H+H+H+H+H+H+E+ +m i.%+v+|+7.S._+h+h+_+C.6.O K.C.3.&+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+n+B.y.$.6.*.t+H+H+H+|.<+{+H+H+H+H+H+n+C.K.*.w+H+5+*+7.H+H+A+|.J.&+G+H+G+R.H #.y+c+*+D.E+t+Y.h.i.7.'+F.(+_+H+p.B M [+H+H+O 6.&+P.H+A+r.=.$+K.p.l+T a T I.I.}.o : { { ! , ; ; 5 i.$+n.& % ; ; , , ! { { : : a 9 _.V.i.M.X.7+9+,+H.H.u+v+{+*+S.6+H+H+H+H+v.>+%+m.S.%+F+H+H+H+H+H+H+=.z.{+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+w+W.P.O.O.F.F._+Y.Y.Y.Y.P.R.'+w.[.H.w.O.R.1.o.&+f+A+E+A+u+u+7+1.X.{+F+H+H+R.V U._+H+H+H+H+H+H+H+H+H+H+H+H+q+v.B J.(+4+4+_+D.m.P.5.G =.V $+O.` 3.S.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+n+L.h.M &+l.#.j.|+H+H+z+k.(+m+H+H+H+o+F.;+m.D.6+o+%+M.n+H+H+H+w+/.y.:+m+_+J.3.2.B.&+M.|+{+z.2.m. +A+H+A+7.2+c+H+R.k M !+A+C 6.k.r+8+b.(.$+L.3.j.n+F+I.I.I.T a : { { ! , ; % & g K.2.v - & % % ; , , ! ! { ! Q y.y.u @+F.>+&+e+9+}+.+r.P.L.=+o+H+H+H+H+v+S.>+&+c+ +X.0+H+H+H+H+H+o+V z.:+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+'+p.O.P.R.Y.Y.!+7+w+E+H+H+E+r.b.1.Y.)+}+p.E.:+w+A+E+A+g+O.%+Y.v+H+H+v+a.*.V $+m+H+H+H+H+H+H+H+H+H+H+H+E+a.j.i.U.Y.8+g+A+H+H+H+R.=.].$+|.F.P $.L.s+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+v+B.y./.&+%+'+'+M i.Y.B+H+8+K.&+G+H+,+q.q.H.H.w.r.o.>+a.)+l+A+H+H+o+].y.C.A.l.d+R.{.(+j.K.2.*.8+w+H+H+H+H+n+j._+y+H+'+k M a.6.i.$+T.R a.<+B.[+G.i.L.w+F+I.T a : { ! ! , ; % & D >+B.` b v - # & % ; ; , ! 9 4.V.%.9 }.H+B+/.>+:+l+e+9+,+Q.c.e+H+H+H+H+v+E.*+E.a.4+m+o.%+m+H+H+H+H+ +*.K.5+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+d+`.{+|+6+{+0+F+H+H+H+H+H+l+c.F.O.,+}+9+p.S.c+A+E+o+Y.m.S.1+u+H+H+[+].|+p.].(+z+H+H+H+H+H+H+H+H+H+H+!+B.i.G 3.(+e+E+H+H+H+B+a.V U.&+w.E /.V O L.t+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+j.y.H k D.O.H+H+7+V 2.l.z+B+T.<+=+r.b..+,+}+9+}+W.$+M.w.H.r.b.b.Q.)+|.2.y.o.H+H+d+*+M.2.$.R.E+H+H+H+H+H+H+H+;+X.{+H+H+'+k M k.a.G ;.v.>+B.7+u+A+(.y.L.E+r+a : { ! ! , ; % & D >+l.v+G+p.n H ' # # & % ; u J.J.u , { o+u+E+g+j.<+8+u+l+9+}+.+H.,+E+H+n+D.M.U.d+n+D._+d+k._+z+H+H+E+o.O K.5+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+g+P.{+|+!+4+5+H+H+H+H+H+H+'+O.p.Q.,+9+7+a.S.o+6+S.&+(.%+7+9+E+F+(.l.z+z+G z.3+H+H+H+H+H+H+E+e+.+1.z.2.<.H.` U.E..+e+H+H+6+|.*.>+5+H+o+5.E E V k.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+k.J.v _ _ 7 Y.H+H+H+w+=.3.{.f+d+X.X.<..+}+e+l+l+e+C.(+o.9+}+,+,+.+Q.r.b.` i.z.6+u+M.(+~.{.;+H+H+H+H+H+H+H+H+H+v+E.*+c+H+z+|.M &+W.[.v.>+L.6+u+E+H+u+|.y.{+C+/+{ ! , , ; % # K >+D.o+H+H+H+5.n *.` - # # +.#+{.1 ! ! u.e+l+u+E+R.z.4+v+u+l+e+9+,+Q.Q.6+L.L.K.T.H+E+`._+_+!+M.c+H+H+g+m.*.U.f+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+ +{+;+=+4+m+H+H+H+H+E+ +O.b.H..+,+9+W.m.Y.S.&+8+p.L.|+e+9+[+].|+F+G+[+V U.c+A+l+}+r.[.J J G i.j.5..+}+,+|.>+!+,+9+H+G.|.j.U.E+H+H+H+7+Z 0 ].l.t+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+!+#.i.7 P E *.2.`.E+H+H+E+R.#.i.;+(.(+S.,+e+u+A+E+=+<+k.9+l+e+e+9+9+9+}+}+.+(.h.6.(.2+6.` (.6.'+E+H+H+H+H+H+H+H+H+8+X.(+f+&+*+m.1.[.o.>+S.[+u+E+H+H+H+v+*.y.;+i+N ! , ; & # ^.$+T.o+H+H+H+H+H+5.k /.G.l h.y.g & ; ; N }+9+e+e+l+u+F.K.b+E+A+u+l+e+}+.+q.F.3.*+n+H+H+u+E.4+5+O.X.y+H+[+7.O U.n+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+'+=+Y.{+0+y+H+H+H+v+O.;+Q.[.H..+}+9+p.S.&+d+H+1+l.:+l+9+=.k.;+8+[+'+Z 6.V.<.S S ;.;.;.` z.k.c.Q.,+9+l+W.~.$+6+.+e+(./.K.{+H+H+H+H+H+H+W.m ].D.5+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+l.M y.k.X.5+d+*.2.P.H+H+H+E+6+O *.;.].2+Y.l+E+H+n+(+K.[+E+A+u+u+l+l+l+e+e+e+}+a.h.X.z.w.Q.H.q.q.H.H..+}+l+H+H+H+H+H+o.2+X.M.a.;.b.F.<+B.6+A+H+H+H+H+H+H+[+~.z.'+W.] ; & - n.U.Y.B+H+H+H+H+H+H+H+'+b *.U.#.l # & % 5 q..+,+}+9+e+l+7+l.>+5+H+E+u+l+e+9+}+5.U.T.n+t+t+t+P.C.0+O.L.&+E+P.7.$.U.v+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+[+;+O.4+c+F+H+H+d+F.5+f+b.b.Q.,+}+p.(.S.f+A+u+*._+u+w.7.a.c.c.:.[.[.F z.L.:.[.[.;.=.z.6.-.r..+}+e+u+E+G.i._+7+.+1.6.U.t+H+H+H+H+H+H+7+E m 3.6.Y.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+8+{.(.#.h.F.B+H+n+/.2.c+H+H+H+H+v+F H.w.z.&+n+H+G+b+>+O.H+H+H+H+E+E+A+A+u+u+l+l+g+$.U.E.9+9+}+}+,+,+.+Q.H.r.b.H.Q.e+u+o+~.2+V S r.v.>+k.g+A+H+H+H+H+H+E+u+e+G.2.z.w.w & - z.>+Y.G+H+H+H+H+H+H+B+|+j.#.n M [+$.g # ' J [.r.Q..+,+9+9+l+ +M.*+t+H+H+A+o+!+T.&+2+>+>+<+<+<+>+>+>+$+z.K.Y.a.a.~.U.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+[+Y.Y.4+m+E+t+ +O.r+r+ +[.r.Q.W.o.a.m._+w+E+1.S.w.=.k.<.b.b.b.b.[.b.E K.o.[.[.P K.7.;.[.H.,+9+e+u+A+A+P U.|+1+W.6.*+F+H+H+H+H+H+H+G.7.m k 3.*.D.m+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+/.~.;+$.i.;+H+H+H+6+].2.5+H+H+H+H.b..+1+a.*+{+c+5+X.B.z+H+H+H+H+H+H+H+H+H+E+E+z+Y.2+M.A.!+l+e+e+e+9+9+}+}+,+,+.+Q.r.b.:.Z K.T.w.P.>+S.d+E+H+H+H+H+H+E+u+e+9+,+` y.3.j ' K.>+m.z+H+H+H+H+G+y+=+i.h.j.a.` ~.3.F.J.P d d A ;.b.H.Q.,+}+9+e+1.$+&+t+c+&+X.4+4+j+j.].*.*.|.F.F.o.m.J.y.y.y.h.E.i.L.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+R.P.{+0+j+r+F.=+8+v+H+,+[.r.p.m.'+5.D.|+A+R.B.1.l.a.b.b.b.b.b.b.b.:.O K.5.P J.7.;.;.;.H.,+9+e+l+u+A+o+/.$+6+W.y.b+H+H+H+H+H+H+A+a.7.g+m O $.B 6.5+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+!+{.|.S.S.#.D.z+H+H+H+[+$.2.5+H+Q.[.G.!+:+&+(+$+N.U.K.`.u+A+A+E+E+E+H+H+H+H+H+E+:+2+T.G.i.J.|+A+u+u+l+l+e+e+e+9+9+}+}+,+,+.+a.*+S.$+S.6+E+H+H+H+H+H+A+l+e+9+,+.+b.E y.].X.U.~.X.z+H+H+B+f+T.i.2.l.P.6+H+H+1.~.j.z.3.O.[.d i J [.r.Q..+,+9+}+|.<+(+%+&+Y.k._+S.1+9+.+.+H+H+H+H+v+7.J.V v ~.i.J.L.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+ +v.0+0+|+F.m+H+H+H+E+H.:.1.F.,+1+1.S.f+W./.7.l.c.r.b.b.b.b.b.b.[.R 3.6.y.7.R ;.;.;.r..+}+9+e+l+u+A+ +z.%+a.U.8+H+H+H+H+H+H+7+|.O.y+P.V X.$.$.{.Y.n+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+].3.m.2+F.a.2.f+H+H+H+H+d+M j.P.=.Y.0+(+K.l.o.j.N._+{.o.e+l+l+l+u+u+A+A+A+E+F+:+4+6.n+H+!+2.X.t+H+E+E+A+A+u+u+l+l+l+e+e+9+9+'+C.2+C.|+t+v+z+w+A+H+A+l+e+9+,+Q.b.S 4 V $+i.M j.J.r+B+;+X.z.{.|.8+v+H+H+H+H+F+v.W 3.E 6.;+}+t 4 A ;.b.H.Q.,+9+*.C.(+;+6+d+&+X.d+u+e+,+b.A+H+H+H+H+[+i.U.m E ].>+O.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+ +O.h+P.Y.v+H+H+H+G+c+c.m.G..+}+W.m.S.q.P 7.a.r.r.b.b.b.b.b.[.[.[.E M.S.S ;.;.S S b.H..+}+9+e+l+u+u+1.U.K.%+W.9+H+H+H+H+n+F.k._+h+4+*+B.J.J.J.J.X.m+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+`.i.M (+7.z+8+{.m.H+H+H+H+E+;+i.4+h+*+3.a.'+F+E+E.<+i.H #.R.9+e+e+e+e+l+l+l+u+;+4+j.7+E+H+H+v.h.L.F+H+H+H+H+H+E+E+A+A+u+u+u+[+B.N.(+<+$+$+$+$+$+U.K.C.X.L.S.E.m.(.E P.L.&+_ _ 7 v ].z.y.~.V '+n+H+H+H+H+H+H+H+B+=.{.a.E k.f+H+H.( i J [.r.Q.1.i.$.S.&+l+E.(+|+H+u+e+}+r.l+H+H+H+H+H+G.J.j.m M.i.d+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+G.{+O.|+F+H+H+G+r+j+1.E.w.Q.,+}+p.D.O.` 7.m.1.c.b.b.b.b.[.[.[.;.E ].h+1.S S S S ;.b.Q.,+}+9+e+l+6+o.~.>+{+9+.+q+f+{+&+4+h+4+M.k.F.'+E $.J.a.V ~.&+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+F+k.~.l.%+8+H+v+7.{.v+F+s+{+X.4+_+B.*.5.9+A+H+G+c+4+C.O #.#.*.,+}+}+}+9+9+9+e+!+%+L.W.u+u+u+u+A+(.A.{+F+H+H+H+H+H+H+H+H+H+B+=+A.++J.z.$+z.].6.k.j.C.z.K.z.K.K.J.U.U.$+>+(+j.*.q ) _ v #.M j.U.N.N.A.K.X.X.%+&+{+:+|.H 2.v.V l.n+H+e+t 4 A ;.-.3.3.c.a.4+E.(+{+z+E+l+9+,+b.l+H+H+H+H+H+ +H U.i.>+*.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+5.Y.:+F+H+G+c+j+8+o.F.[.r.Q.,+Q.` L.P E P /.7.|.1.<.:.[.[.[.R *.E j.0+R J J A A ;.q.W.W.R.m.3.2.i.3.~.*+Y.S.%+4+4+_+j.#.P.o+H+H+H+'+B {.4+H+m L.t+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+;+i.O 4+Y.G+H+H+;+i.Y._+*+&+T.*.` q.9+l+u+E+H+c+(+B.W.5.H #.{.1..+.+.+,+,+}+ +X.&+R.9+e+e+e+e+l+7+7.A.=+A+A+E+E+E+H+H+H+m+S.N.U.J.l.[+7.$+|+u+l+9+)+,+W.c.-.F e Z +7.4+B.$.;+_+z.v |.R.;+;+O.F.O.T.T.S.L.B.C.z.K.j.H M i.o.|.l.n+y+&+-.i d P 2.P w.(.C.h+_+t+H+E+l+9+.+[.l+H+H+H+H+d+B.a.7.$+i.o.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+g+1.Y.c+F+m+b+s+'+E.8+Q.[.r.Q.Q.:.(.=.a.:.R ` P *./.|.(.5.-.G ].R p X.O.J A t q ` (.7.2.{.3.|.o.R.1+P y.*+{+T.o. +n+1.m.t+H+H+H+H+H+G.v h.j+Z 3.j+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+G+D.M {+L.o+H+H+H+=+$+4+B.O.8+w.;.Q.}+e+l+E+H+|+4+M.'+l+9+5.v ~.$.:.r.r.H.Q.G._+%+p.,+}+}+}+9+9+9+e+W.3.J.;+l+l+u+u+A+E+|+K.N.++h.F.o+H+g+j.$+f+u+e+9+}+.+H.;.d b.E+t+_+X.v ].K.g./.h.{+H+H+H+H+E+A+A+B+w+o+o+n+d+n+R.b k H j.(.*.7.E.L.;+R.G 2.2.6.3.{.C X.0+y+H+A+l+9+Q.;.E+H+H+H+G+O.6.t+!+N.n 6+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+p.v.{+j+j+m+E+o.T.z+A+b.[.H.H.r.Z |.E.b.b.[.;.R F E O *./.B 6.=.G V _+(.=./.3.i.~.O =.p.W.,+9+u+R.~.#.y.O.9+,+}+A+=.Y.B+H+H+H+H+H+w+(.H U.v.v &+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+5+{.|.X.[+H+H+H+Y.J.{.2+&+z+u+;.H.,+9+l+A+H+|+4+X. +l+e+}+,+<.v ~.P S ;.;.5._+*+G H.Q.Q..+.+,+,+,+}+}+w.2.K.'+e+e+e+7+D.++J.++{.'+A+E+E+H+8+i.M.7+9+,+Q.r.;.i Q.E+v+E.(+V k #.#.8+!+g.c+H+H+H+H+H+H+H+H+H+H+H+H+H+H+w+=.O 7 H 3.~.~.~.~.3.3.3.i.~.H P 5.1+(.(+c+G+u+e+}+Q.[.y+n+8+5+F.k.|+H+l.J.O H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+[+1.O.{+5+H+6+E.;+H+H+u+;.b.r.r.-.|.(+p.[.[.[.;.;.S J q m V *.].].$.6.z.j.$.E e e S [.H..+}+9+)+].i.p./.J.6+e+,+c.7.5+H+H+H+H+H+H+H+d+E ~.J.k j.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+6.#.M.F.H+H+z+E.A.~.W.6.4+{+[.r.,+9+l+u+H+f+_+%+`.u+e+9+,+Q.b.i v ~.E d q S.*+E ;.;.[.b.b.r.H.H.Q.Q..+,+` y.B.'+1+'+B.N.J.J.7.1+l+l+u+u+A+E+O.U.D.,+Q.b.S e G.G+y+k.%+k.y.~.C 8+H+F.J.t+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+ +].m.G.2.B B H B G M i.m F r.,+9+l+A+/.4+s+u+e+}+H.Z k.l.l.k.7.D.v+d+y.z.o.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+v+1.1.T.|+H+ +E.t+H+H+H+;.b.r.b.Z |.l.(+:.;.;.;.S S S S J m E q e e m L.E.i A J d t ;.b.Q.,+W.|.i.P.A+p.2.U.g+9+.+F.n+H+H+H+H+H+H+H+H+R.B i.i.{.c+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+'+W *.7.E+H+z+S.A.l.d+H+v+k.%+p..+}+e+u+H+t+_+%+`.A+l+9+}+Q.b.A J p.M ~.E 7.2+O 3 d d i t J S ;.[.[.b.r.r.H.=.y.E.F.y.N.N.i.o.)+9+9+e+e+l+E+H+o+l.>+O.H.S J 1.(+:+M.M.%+h.j.P.H Y.E+3.X.v+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+E+P 6.!+D.J.L.*+X.E.h.E.[.S H.,+9+l+E+8+L._+g+e+,+b.=.R.'+v.D./.k.t+F.>+#.!+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+R.v.o.&+|+O.Y.H+H+H+H+S b.H.Q.Z m.` S.&+;.S S S S J A A p /.R [.[.R B 4+5+E+H+b.4 A ;.b.H.(.2.a.l+u+A+` h.&+q+9+.+q+H+H+H+H+H+H+H+H+E+(.H y.h.S.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+k H 7 *.[+t+L.y.|.o+H+H+H+R.X.X.W.e+u+H+t+&+X.`.A+l+e+}+.+r.S i l+z+V #.#.C.$.j.*.)+B E J d } } d t A J R Z =.$.J.N.J.N.].p..+,+,+,+}+}+l+E+H+H+6+3.$+R.[..+t+D.*+z.4+{.E.d+B+V m.d+h.&+E+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+7+O E.R.K.J.O.m.h.7.q+[.;.Q.}+e+u+E+F+O._+{+9+,+[.S.c+w+T.B.!+7.F.K.K.M f+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+'+Y.1.O.F.m.n+H+H+H+H+;.b.H.,+` o.;.` (+F.J J A J ;.r.W.Z *.d+z+d+Y.5.*.4+x+E+`.q d J [.` 2.m.1+e+l+u+u+B J.5+e+}+,+A+H+H+H+H+H+H+H+H+o+V M J.y.G+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+`.n O m H M z.y.=.G+H+H+H+e+R (.4+S.l+E+t+%+2+F.w+l+e+}+,+H.;.t }+H+H+ +M 3.H 7 n =.H+Y.U.|+E+u+}+;.0 E $.3.i.2.#.2.<+y.P :.b.r.H.H.Q..+}+l+A+H+H+E+ +h.X.w.b.Y.U.C.h+j.R.F+H+B+v.M a.N.=+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+p.].|+V U.T.B.#.S.g+J [.Q.}+e+u+H+H+v+k.h+P..+;.l.%+E.B.8+B+F.3.>+$.7.n+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+6+O.:+` o.O.F+H+H+H+u+;.b.Q..+P O.1+.+a.(+p.H.Q.A+E+E+H+'+H m+H+ +|+B+F k.0+O.T.T.F i F ~.6.w.,+9+e+l+u+7+M J.o+e+}+W.=+;+=+`.`.;+5+|+8+v.H ~.J.|+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+z+B.v M [+v.y.y.(.z+H+H+H+E+;.H.W.B.4+Y.v+&+4+l.u+u+e+9+,+Q.[.A [.H+H+F+|+k.].W.0 _ 7 k v.j.~.|+F.6.{.2.3.].*.P ` [ 0 M.T.q A J S ;.[.b.Q.9+e+u+E+H+H+u+o.U.E.;.K.J.%+%+`.H+H+H+H+R.M M U.8+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+B+V 7.6.3.i.2.a.*+E.J b..+}+e+A+H+H+H+ +M.0+w.b.R.L.L.!+H+E+R.i.K.H E.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+o+`.:+[+a.O.;+z+G+H+u+;.b.Q.w.(.O.9+e+)+D._+1+,+B+H+H+H+g+B |+g+Y.f+H+q+E %+(+F.4+ +d $.i.Z H..+}+9+e+l+u+G.i.L.l+e+,+v.m.m.7.7.7.7.7.6.k./.*.K.X.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+|+>+v H o.J.i.V H F.v+H+E+q.b..+9+ +M.(+&+(+E.u+A+l+9+,+Q.b.J J E+H+H+c+_+B.O.j+B.v 7 7 _ n #.W #.#.#.#.*.P.8+F+H+d+*.H 2+|.J A d d t [..+}+e+l+A+H+H+A+g+m.$+a.` K._+]._+m+H+H+H+[+O #.K.w+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+'+*.B.M 2.T.A+F.m.A r.,+9+l+A+H+H+H+A+a.%+O.1.%+D.D.t+H+H+|+U.$.*.O.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+!+=+v+` a.o.o.=+c+d+;.r.Q.q.|.`.9+e+e+R.%+=+.+;+m+G+H+o+=.O.R.:+z+H+v+P./._+E.4+n+|.i.G S b.H..+}+9+e+l+l+(.A.{+l+9+.+W.g+'+R.v.o.a.D.X.(+&+_+U.:+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+G+%+z.k #.y.{. +g+P C |.[+,+[.Q.}+e+u+`.*+_+O.B+A+l+9+}+.+r.S i [+m+b+E._+L.$.M.j.|.P P m e G P.p.(.V ].2.h.J.U.$+(+X.i.H 6._+5+u+9+b.t [.Q.,+9+l+u+H+H+E+u+'+j.>+(.(+&+[+C.4+z+H+H+v+*.$.D.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+v+E.$.i.M z.y+z+Z J H.,+9+l+E+H+H+E+u+1+|._+D.=+'+7.=+H+B+Y.U.B ].!+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+'+:+o+a.o.=+o.a.a.F.o.p.W.<.m. +e+e+e+}+o.(+R.'+`.|+m+w+1./.=+f+H+f+_+&+` k.4+_+P.i.o.} A ;.b.Q.,+}+9+e+u+7+H U.5+l+)+G.:+0+j+4+(+M.S.O.o.H y.i.j.m+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+5+U.O #.J.6.d+H+H+F+1.H V ;.Q.}+9+l+E+S.h+i.7.o+u+e+}+,+H.;.d |.h+h+_+%+(+O v.[+z+G+B+B+B+B+B+G+G+G+G+F+o+[+R.a./.M 2.2.v #.S.%+n+H+u+t ;.r.,+}+e+u+E+H+H+u+l+p.K.$+&+f+H+P.*+b+H+H+H+(.#.v.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+t+7.i.{.M 2.D.m+;.S Q.,+9+l+E+H+H+E+l+9+W.D.%+F+E+P.k.n+z+z.j.B k.d+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+ +3+[+a.8+Y.j+;+'+p.(.m.E.` m.R.'+[+[+[+w.D.4+f+g+R.;+|+1./.Y.F+5+_+&+d+[+P %+&+2.E.G+;.4 A ;.r.Q.,+}+9+l+u+W.#.U.S._+0+4+%+j.E.!+n+F+B+(.O $+v #.L.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+M.C.o.i.U.&+G+H+H+H+H+5.R H.,+9+l+v+{+h+j.E ~.M '+9+,+Q.[.t m M j.*.L.4+#.P.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+G.2.v #.M 4+z.B.c+F A b.Q.}+9+l+A+H+H+E+u+7+k.<+&+z+H+z+E.%+m+H+H+Y.J./.z+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+!+{.2. +R.*.y.C.q [.Q.}+e+u+H+H+H+E+l+9+Q.=.4+&+{+O.j.T.=+J.(.V l.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+ +:+;+o.f+!+&+0+s+u+;.-.1.=.m.D.D.D.D.D.D.=.S.l.7.m.m.m.=./.`.:+%+Y.!+Y.X.o.B z.B.n+F+d+e d J [.r.Q.,+}+e+l+u+P J.(+L.7.1.w.A+H+H+H+H+7+V ].++` k {.Y.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+:+U.6.i.~.3.U.m+H+H+H+E+b.b.,+9+e+w+:+4+C.[+E+1.#.].R.Q.b.J q B M [+b+4+~.O o+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+o+6.v $.p.7.h+z.h.#.d ;.H.,+9+e+u+E+H+H+o+S.(+k.J.b+H+H+8+X._+t+H+F.y.#.Y.F+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+G+|.2.O.H+H+'+i.2.t b..+}+e+u+H+H+H+A+e+'+O.X.U.4+U.K.y.J.++X.v.6.D.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+v+ +=+Y.o.|+s+!+0+b+6+[.H.Q.w.` |.o.R.R.R.R.p.` E (.o.o.a.P P k.L.X.L.X.%+S.b+G M %+5+{+(+O.d i J [.H..+,+9+9+v.{.#.A.!+u+e+,+}+H+H+H+H+1./.3.U.u+p H 2.|+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+G+K.K.2.2.k #.J.S.H+H+H+H.[..+}+e+u+|+(+L.f+H+A+g+1.M ].5.S i e+a.#.l.4+j.W '+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+(.v $.8+g+j.4+O.O.d J r..+}+e+l+A+H+z+_+*+F.W.6.U.f+H+H+P.(+_+H+E.y.B ~.5+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+!+{.m.B+H+H+H+(.y.` b..+9+l+A+H+H+E+5+&+*+U.D.;+j.4+|+[+O >+j.*.].L.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+o+;+P.Y.F.j+s+B+;+0+ +[.r..+,+W.v.S.!+e+e+e+9+.+F =.d+w+6+a.(.E 7.k.O.O.D.k.L.B O ~.U.{+5+z+c.} i S b.H..+}+1.2.3.R.6.U.d+l+9+.+9+H+H+7+/.|.i._+H+1+7 ~.j.f+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+&+>+2.2.P.R.M 3.++|+H+}+[.Q.}+e+u+8+(+X.f+H+E+l+e+}+1.M ~.e }+H+R.#.4+k.{.=.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+ +H M !+H+[+X.0+z+,+d [.Q.,+9+l+u+E+&+<+|.7+}+5.z.U.v+H+E+m.4+:+m.y.[+*.#.z+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+o+3.3.6+H+H+H+H+g+#.B.q.,+9+l+E+H+|+M.<+U.7.Z Q.H+ +M.0+w+B.*+H ].O X.G+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+g+;+G.Y.F.5+F+H+o+{+0+b.H..+,+}+}+v.S.;+e+9+}+9+g+` o.t+=+&+&+o.G *.l.n+F+z+6+k ].P y.{+H+v+{+` 4 t S b.H.p.3.2.'+u+1.i.*+q+e+9+.+e+H+1.D.a.y.j+H+H+c.k i.S.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+f+$+3.y.|.z+6+C H J.z.l+[.H.,+9+l+8+%+&+P.H+E+u+e+9+,+H.G $.3.8+H+|+4+X.#.~.g+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+w+v C P.E+H+o.(+5+H+i S r..+}+e+q+T.2+D.g+l+9+,+P U.M.H+H+g+k._+3.h.o+R.#.m.H+H+H+H+H+H+H+H+H+H+H+H+H+H+A+6.i.a.B+H+H+H+H+H+P h.o.,+9+g+5+M.<+*+7.G.,+r.}+H+H+a.(+E.>+~.B #.V L.n+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+6+;+'+O.O.t+H+H+H+'+0+;+r..+,+}+9+}+v.T.;+9+}+9+F+p.` E.&+F.[+H+E+5.V 6.5+H+1+O F.1+O J.f+;+0+5+S 4 A ;.:.*.2.R.e+e+u+E J.3+l+e+}+,+W.|.E.O.A.y+H+H+H+q M i.5+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+{+z.h.7.o+H+E+H *.7.$+(.b.,+9+e+8+_+&+F.H+H+u+e+9+,+Q.b.A G M ~.=+_+%+C 2.P.G+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+=.H O.G+H+v+T.%+f+H.t b.Q.}+1+O.<+D.o+E+l+e+}+r.V $+{+E+H+8+C.C.i.w+E+(.{.`.H+H+H+H+H+H+H+H+H+H+H+H+E+O.i.~.|+H+H+H+H+H+H+t $.j. +=+X.U.X.Y.'+l+9+,+b.}+H+H+v+l.<+>+=.B $.H M.f+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+[+|+6+v.Y.t+H+H+H+E+P.h+ +.+,+}+9+e+1+p.T.;+'+!+{+T.` 1.o.:+z+H+H+H+'+P $.=+1.$.;+H+5.~.J.&+c+E+A+A d A B h.1.,+}+e+l+7+O U.c+l+e+}+c.l.f+/.*+F+H+H+H+v+p 2.c+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+s+j.y.~.[+H+H+F+(.$.R.y.K.w.}+e+[+&+%+Y.E+H+A+l+9+}+.+r.S A l+R.O {.X.n v ].o+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+[+n o.E+H+H+!+_+&+7+t ;.H..+O.<+l.g+H+E+u+e+9+.+:.$.U.c+H+F+P.{.2.H+H+w+v 3.8+H+H+H+H+H+H+H+H+H+H+H+8+i.~.M.z+H+H+H+H+H+A+d G y.*+U.C.;+n+H+E+l+9+.+b.l+H+H+H+=+(+3.*.B v #.C.5+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+'+!+c+1.Y.z+H+H+H+H+.+O.b+'+,+}+9+e+l+9+1.E.&+S.F.;+d+F 1.;+F+H+H+H+H+g+1.H k ].t+H+z+G i.%+z+H+H+9+d E h.|.r.Q.,+9+e+l+W.3.U.v+l+9+,+G.v+{._+G+H+H+H+H+W.2.j+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+O.h.{.'+H+H+H+H+ +M G P ++O.7+l+&+%+Y.F+H+A+l+e+}+.+r.;.i ,+H+H+Y.J.n ) _ G.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+F+B ~.F.n+H+H+m.h+D.A J r.a.<+l.7+A+H+H+A+l+9+,+r.-.C.J.z+H+o+B.{.5+H+H+7+H k.8+d+f+m+m+m+m+m+m+m+m+~.2.U.Y.H+H+H+H+H+H+9+E K.U.++o.o+G+H+H+A+e+}+Q.b.A+H+H+z+&+K.v $.P m #.~.b+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+ +5+r+p.Y.f+v+E+H+H+W.c.{+|+,+9+9+9+'+Y.&+F.F.'+F+H+z+`.G. +G+H+H+H+H+H+G+w.m 6.|+H+H+W.v y.r+H+H+H+(.i.*.J [.H..+,+9+e+g+1.h.&+u+l+9+,+5.h.b+H+H+H+H+H+G.J.r+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+n+{.h.1.H+H+H+H+H+8+v ;.H.3.K.8+S.4+E.B+H+E+u+e+9+,+H.[.t r.H+H+B+h.X.'+v H 7+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+1.v #.#.7.5+g+j.h+V t P <+D.'+l+u+E+H+E+u+e+}+Q.[.v.J.X.G+o+B.j._+5+;+m.v M 2.U.$+$+$+$+$+>+>+>+C.H B j.n+H+H+H+H+F+f+B.$+C.P J.L.u+H+H+H+u+e+}+H.b.H+H+H+f+U.C.k v H m H ~.{+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+`.c+m+R.O.Y.O.O.;+;+R.w.G.4+'+1+!+T.&+&+P.)+}+g+H+H+|+j+P.[+G+H+H+H+H+H+H+H+w.m ].=+G+d+k.M U.y+H+!+2.3.4 i S b.H..+}+9+e+1+|.N.5+u+e+9+` U.r+H+H+H+H+H+1.J.m+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+E.h.V d+H+H+H+H+H+!+R r.,+G.J.M.4+l.o+H+H+u+e+9+,+Q.b.A S H+H+H+T.>+=+6+3.*.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+g+B n =.O C $.*.M.T.V <+D.w.}+e+u+E+H+H+A+g+1+W.5.R.S.$+&+[+B.2.X.W #.].|.P $.#.|.O.F.F.F.=+=+;+1.n k v f+H+H+H+n+&+J.z.*.c.,+|.++:+H+H+H+u+e+,+r.Q.H+H+E+k.X.j.O.5.v H k ~.F.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+o+;+c+F+G.;+7+ +P.O.O.F.O.F.Y.0+T.&+=+ +7+e+9+}+'+f+d+b+c+ +c+F+H+H+H+H+H+H+H+H+Z 7 M Y.[+L.V 2.%+8+~.j.n+S } t ;.b.Q.,+}+9+l+W.#.$+f+u+l+O $+n+H+H+H+H+H+=.J.f+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+;+y.M 7 E 1. +o+H+m+` b..+R.F.y.h+k.o+H+H+u+l+9+}+.+r.J J u+H+H+n+$+M.o+O.{.G.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+E+v.#.G.g+R.P v M _+2+l.-.Q.}+}+6+8+5+b+:+{+0+_+_+(+%+X.>+z.z.2.M H 7.d+v+w+1.M l.n+H+H+H+H+H+v+T.n E O B.m+v+{+$+A.D.q S Q.}+)+~.$+t+H+E+l+9+,+b.}+H+H+`.$+C.m.z+E+p.v H M F.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+[+ +t+w+F.5+y+r+c+d+v+w.c.p.p.T.{+'+7+e+e+e+9+}+[+!+;+b+y+ +f+F+H+H+H+H+H+H+H+H+B+Z k M 7.B.;+P h.$.2.8+H+u+A 4 A ;.r.Q.,+}+9+l+5.2.(+z+e+{.X.7+H+H+H+H+H+H $+m+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+=.J.6.Z _ _ b k ` ` ;.1.k.z.i.W &+%+t+H+A+l+e+}+.+r.;.e 1+H+H+H+&+>+F.A+m.#.7+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+[+$.$.B+H+E+v.H $.>+E (.E.S._+4+h+4+2+*+z.{.7.|.` V ~.i.>+J.y.N.y.i.O.o+H+E+1.C 6.n+H+H+H+8+X.U.k V '+$.k.X.$+3.!+v+A [.Q.}+e+p.A._+F+E+l+9+.+[.l+H+w+C.*+].f+H+H+H+1.v #.T.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+'+G.!+[+Y.|+j+f+z+H+H+.+r.Q.,+p.&+|+e+e+e+e+9+,+u+n+:+:+5+R.f+F+H+H+H+H+H+H+H+H+H+o+P k H 3.5+W.H J.=+H+H+H+9+d d A [.r.Q.,+}+e+l+` y.%+S.*+T.,+E+H+H+H+u+M U.f+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+z+].~.K.f+E+)+q _ b P 2.h.i.*.1.g+E.h+S.z+l+e+}+,+H.[.t 1.&+G+H+m+*+i.w+d+6.V H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+=.~.P.H+H+k.$+o.%+h+4+_+%+M.3./.a.P.'+7+e+e+,+r.(.j.P K.U.(.|.3.y.J.y.B.:+s+` H ].t+G+c+U.U.P.p.n f+l.U.y.F.A+H+9+J b..+}+e+u+|.N.s+A+e+}+Q.S z+H+O.2+3.;+E+H+H+H+H+` H o.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+ +R.R.p.=+c+y+H+H+H+H+.+r.Q.,+}+v._+8+e+e+e+9+,+z+5+b+[+!+R.f+G+H+H+H+H+H+H+H+H+H+H+'+/.m v 3.v.H 2.(+F+H+H+H+Q.4 i J [.r..+,+9+l+q+$.2+_+2+i.o.z+H+H+H+7+{.K.d+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+o+O M y.Y.z+s+5+k.i.2.].(.W.e+H+H+n+z.4+S.=+'+W.H.b.A S Y.(+:+H+{+U.P.H+!+W G.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+7+H 7.v+6+J.S. +~.4+k.G <.Q.,+l+A+H+H+H+u+e+}+Q.R g+z+k.>+{+H+g+ +E #.i.h.g.K.~.{.~.`.z.K.Y.s+Y.].y.J.l.$.{+F+H+.+J r..+9+e+A+6+z.X.q+e+}+H.<.b+n+%+X.m.B+H+H+H+H+H+E.++F.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+R.;+G.G.=+z+H+H+H+H+H+r.r.Q.,+}+)+O.{+g+e+e+9+}+d+0+s+H+q+ +m+F+H+H+H+H+H+H+H+H+H+H+E+p.6.*.v n 3.v y.0+B+G+B+F+R 3 q Z 5.p.R. + +a.#.C.<+X.P {.&+m+z+H+1+i.L.n+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+P n ~.j.S.z.2.M E 5.w.9+u+E+H+n+%+J.K.$+h.y.L.l.=.e u+v+S._+=+K.L.B+B+F.C o+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+` #.!+T.++P.D.M k.4+1.r..+}+e+l+E+H+H+A+l+9+,+b.}+5+$+z.$+5+H+H+7+V ~.F.;+m.3.i.y.y.*+$+*+K.K.U.U.`.w+G.j.Y.G+r.S r.,+9+l+E+E+O.N.Y.e+,+r.` 4+S.<+$.n+H+H+H+H+H+g+K.z.F.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+f+!+=+{+H+w+!+|+R.P.;+D+H+H+H+H+H+r.H..+,+}+9+W.&+|+e+e+9+,+=+j+z+H+z+R.c+F+H+H+H+H+H+H+H+H+H+H+H+E+` k./.k v H M z.E.T.D.S.7.$.].~.~.~.#.#.#.M M C M.L.Q.G W z.E.=+1.y.L.t+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+v.M v y.{.H v.7.R r.,+9+l+E+5+J.A.|.v.U.i.V #.y.J.J.B.Y.F._+C.U.c+H+z+$.V G+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+v+n *.K.z.!+{.o.6+C.&+c.Q.,+9+l+u+H+H+E+u+e+}+Q.;.B.%+ +z.$+f+H+H+o+*.j.f+E+f+6.~.$+m.F.R.8+6+F.U.&+H+E+a.B.O.;.S H.,+9+l+E+H+o+k.U.'+,+b.W.k.*+j.8+H+H+H+H+H+H+Y.$+#.F.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+f+T.X.j.i.2.%+b+g+|+|+!+Y.b+r+F+H+H+H+H+H.H..+,+}+9+9+R._+|+e+e+ +j+r+H+H+v+G.:+F+H+H+H+H+H+H+H+H+H+H+H+H+7+/.B.v H 1.E $.H H n v V v m 7 p p Z 5.m #.a.W.B y.F.r.Z H h.6.B J.L.z+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+[+M O j.{.G+m+i.R r.,+9+g+O.N.i.v.u+q+E.>+l.;.q =.].z.y.J.i.z.j.$.7.|.W `.F+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+G.M i.F.7.#.n+H+`.(+o.H.,+9+e+u+E+H+H+u+e+9+.+` >+&+H+p.J.%+z+H+H+6+/.j.!+X.C.U.M.E+H+H+H+H+u+k.$+c+H+E+P k.F ;.Q.}+e+u+H+H+H+p.J.L..+;.;+/.$+E.H+H+H+H+H+H+z+L.U.k m.z+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+t+;+M.2+h+X.H H 1.m.2+Y.|+|+;+P.b+j+x+G+H+H+E+H.H..+,+}+9+e+)+O._+{+'+:+j+z+H+H+v+R.=+s+H+H+H+H+H+H+H+H+H+H+H+H+H+V 7.B ~.X.Y.P M.=+H+z+{+X.( d A ;.b.r.k ~.W.}+c.M B.W.H+[+H i.M ++%+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H /.m.N.F.&+z.F [.Q.R.L.N.k.'+H+E+u+'+B.>+(.S A+A+R.$.~.y.U.M V /.M {.|+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+o+$.#.1.{.F.B+H+v+T.(+1..+1+e+l+A+H+H+A+l+e+W.C.*+8+H+H+F.J.{+H+H+H+G.].z.X.7.$+=+H+H+H+H+H+H+'+z.%+z+H+g+V t [.Q.}+e+u+H+H+H+A+|.U.v.S &+M.X.&+s+H+H+H+H+H+8+U.S.P m.z+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+s+c+{+X.(+&+S.O. +P {.5+'+S._+=+!+`.=+[+0+j+y+H+H+A+H.H..+,+}+9+e+W.Y.O._+&+0+b+G+H+H+w+ +;+m+H+H+H+H+H+H+H+H+H+H+H+H+H+(.6.v /.P.D.B.M 2+&+X.X.|.> d J ;.r.F $.|.,+,+}+G {.T.E+H+1+=.3.y.&+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+=.$.v.z.z.z.L.J S a.J.y.a.7+H+H+H+A+l+W.i.>+` W.&+M.2.O J.h.M (.R.C {.;+F+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+v+B.~.$.3.6+H+H+H+[+&+&+w.,+9+e+u+E+H+E+u+e+F.<+(.l+H+H+v+D.U.|+H+t+&+i.i.L.M.y.B+H+H+H+H+H+H+H+a.N.b+e+p.1.J b..+}+e+A+H+H+H+A+}+3.X.D.z.(+O.&+0+G+H+H+H+E+=+A.f+P m.B+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+n+:+{+4+(+M.7.O.[+v+H+H+W.H j.f+o.(+_+ +`.|+B+;+0+r+y+H+E+H.H..+,+}+9+9+R.Y.W.Y.0+{+{+b+m+H+v+ +;+:+H+H+H+H+H+H+H+H+H+H+H+H+H+G.].H m.H+d+`.P ].(+T.D.|.( i J [.b.E #.G.,+,+,+W.E h.c+H+H+1+6.U.%+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+ +M !+F.N.{.|+)+].++3.p.7+l+A+H+H+E+u+e+G.J.L.P h.$.[+O.++|.H M |.{.#.Y.F+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+t+z.M 2.a.E+H+H+H+u+P 4+T.Q.}+e+l+E+H+H+A+6+%+K.b..+H+H+H+6+C.U.;+X.K.O.!+2.J.T.u+l+9+,+H.H.r.[.;.6.K.c.b.q.r.H.,+9+l+A+H+H+H+u+F.J.<+$+2+K.=+P.4+b+H+H+H+z+L.z.G+P m.B+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+5.m h+(+3.v R.B+E+H+H+H+H+H+` M S.d+$.4+O.`.f+H+n+0+b+y+F+E+H.H..+}+9+9+1+O.`.!+Y.0+6+W.P.{+b+8+!+!+:+F+H+H+H+H+H+H+H+H+H+H+H+H+'+O H E.H+H+G+z+G C.%+*+` } i S [.;.O $.,+,+,+,+.+<.$.K.B+H+ +U.i.&+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+o+C o.6+h.B.P.K.N.$.<.,+9+e+u+H+H+H+u+l+9+a.>+*.p.E+B+X.J.6+P.#.$.#.#.m.G+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+f+{.~.~.O f+H+H+H+H+[.$.h+P.,+9+l+u+H+H+z+Y.>+o.Q.r.u+E+E+E+R.U.J.7.5.r.b.=.N.Z [.[.b.r.H.H.Q.Q..+1.J.E.,+,+}+}+}+9+l+E+H+G+d+B.++i.6.2+4+M {.|+m.h+m+H+H+n+y.:+H+(.m.G+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+n+[+(._ 7 H 7 #.i.M o.8+z+G+F+H+H+H+P ~.O.p.3.4+Y.n+G+F+|+0+j+y+u+H.Q.,+}+}+7+'+O.{+_+=+:+j+7+o+'+=+{+:+[+:+s+H+H+H+H+H+H+H+H+H+H+H+H+d+k H F.H+H+H+H+g+H _+{+.+( i S [.G M p.,+,+,+,+.+H.G i.&+H+F.>+~.S.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+O *.Y.#.J.y.z.(.A r..+}+e+l+E+H+H+A+l+e+)+l.$+=+H+5+U.6.H+g+V #.P l.k.G+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+;+{.2.V M l.z+H+H+H+6+3 M._+W.9+e+u+E+H+t+M.C.}+.+b.r.b.b.[.;.=.U.E.r.H.w.C.C.W.,+,+,+}+}+}+}+9+9+}+m.++!+e+e+e+e+l+u+H+t+{+J.J.m.1.k.K.i.1.V {.F.L.{+|+m+F.J.m+H+(.m.G+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+v+m+5+;+m.].#.C M k M _ _ |.O v H M ].m.|+f+n+F+7+B ~.=+a.X._+Y.;+m+d+=+h+r+t+H.Q.)+'+!+=+Y.{+|+d+u+[+b+c+7+z+o+'+=+{+{+c+H+H+H+H+H+H+H+H+H+H+H+H+v+E H F.H+H+H+E+Y.Y.].4+Z } t S ;.E $.W.,+,+,+,+.+r.a.H y.=+D.K.M S.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+o.M j.i.j.K.;+u+i [.Q.,+9+l+A+H+H+E+u+e+9+w.C.>+|+{+A.[+H+n+O #.$.*.#.F+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+F+a.#.#.g+v.W Y.H+H+B+_+F G (+T.,+e+l+A+H+Y.U.;+7+,+H.H..+.+,+,+.+/.U.`.}+G.N.Y.7+9+9+e+e+e+e+e+l+l+l+ +z.U.g+u+u+A+A+u+5+X.A.B.R.F.J.J.F.H j.G.V 2.6./.V *.2.K.E.Y.P m.G+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+ +Y.T.k.~.{.z.C.~.i.y.2.H y.v.d+'+B 7.P 7 n n H $.6.E.Y.=.H 3.5+|._+T.!+P.;+`.{+j+c+R.`.=+{+Y.{+{+d+u+E+E+A+;+j+8+7+f+6+!+|+=+:+t+H+H+H+H+H+H+H+H+H+H+H+w+G H Y.H+H+H+!+4+5+` C.&+e t S R H a.W.,+,+,+,+.+H.R.7.H z.K.$.H S.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+[+C 2.6.H _+&+E+r.S H.,+9+e+u+E+H+H+A+l+9+}+1.U.*+z.j.G+F+Y.{.k ~.#.W n+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+m {.O B+w+(.W [+H+=+4+(.4 |.4+P.9+e+u+n+K.&+g+e+}+,+}+9+9+9+9+e+'+j.U.[+k.U.[+l+l+u+u+u+u+u+A+A+E+E+H+a.$+&+H+H+H+z+S.$+h.`.d+D.++2.5.}+'+M k.(.v 2.6.L.O U./.B O B D.t+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+W._ H #.i.(+{+m.i.J.i.H =. +8+F+E+o.#.$.4+|+W.B n n k v M ].k #.k.p.3.4+=+8+ +G.P.0+{+{+Y.R.W.Y._+[+u+u+A+E+E+u+O.j+6+'+'+|+m+ +0+|+F+H+H+H+H+H+H+H+H+H+H+z+Z H ;+H+H+g+&+0+y+q+m *+o.t ;.7 #.p..+,+,+,+,+.+H.l+G.*.~.U.B H D.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+d+M H !+O j.4+Y.e+t b..+}+e+l+A+H+H+E+u+e+}+.+=.>+J.O.H+t+3./.B+v ~.{.8+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+[+v ~.v H d+[+$.~.|+_+B.F+A q S.%+W.e+u+D.<+=+A+l+9+9+l+l+l+l+l+u+u+ +K.C.y.&+v+E+E+E+H+H+H+H+H+H+H+H+H+u+].$+f+F+c+i.++6.d+m+z.++/.)+,+b.e+R.M i.|.B {.j.3.$+n+E+q+1.i.5+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+m n L.h+0+l.i.y.i.#.#.o.H+H+H+H+6+M m.'+i.0+y+H+'+(.3.~.H n k k 3.E.=.%+4+z+H+E+[+`.:+5+H.H. +{+!+l+l+u+u+A+E+E+g+{+c+7+[+'+|+P.0+;+c+G+H+H+H+H+H+H+H+H+H+E+5.H ;+H+E+Y.j+c+H+H+'+v (+` J v |.Q..+,+,+,+,+Q.H.6+Y.~.M i.B v D.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+z+|.M P.*.M L.(+|+[.;.H.,+9+e+u+E+H+H+u+l+9+)+v.J.J.E.n+F.{.[+H+R.C M !+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+(.2.m.` k v +1.#.~.X.|+z+.+i P 4+&+}+'+*+X.w+E+u+e+u+A+A+A+E+E+E+H+E+O.$+U.|+H+H+H+H+H+H+H+H+H+H+H+H+H+H+'+h.&+S.A.K.P.o+=+g.K.O.u+e+9+.+[.!+O #.l.'+V ~.i.M.t+H+H+'+2.E.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+;+b k l.2.2.y.3.$.{.2.O.H+H+H+H+A+V 3.n+H+v.X.b+H+H+H+7+G.7.i.j.0 n {.O.*.4+b+G+H+H+t+5+j+m+G.{+!+}+9+e+l+u+A+A+E+E+[+0+f+7+d+g+ +0+R.=+s+H+H+H+H+H+H+H+H+H+H+W.H 7.;+;+{+{+H+H+H+H+[ k.&+p #.1.Q.,+,+}+,+W.p./.{.2.B.O 7 n v E.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+`.3.H H |.o.(+_+<.S r..+}+e+u+E+H+H+A+l+7+O.$+z.B #./.M m.z+H+z+P #.(.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+v+H 2.L.z+'+E n (.V K.y.U.%+S.q S *.4+O.S.>+!+H+H+u+u+H+H+H+H+H+H+H+H+H+!+_+K.Y.E+H+H+H+H+H+H+E+E+E+E+A+A+A+u+o.N.y.E.[+[+L.N.k.8+H+A+l+e+}+.+c.`.=.{.T.'+*.j.j._+F+H+d+~.3.f+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+o+O H #.h.h.{.#.O i.i.O.G+H+H+H+H+p.W Y.E+H+v+m.(+c+H+H+H+H+q+E i.m v H k.[+].4+r+H+H+H+8+j+|+{+R.Q.,+9+e+l+u+u+A+E+E+v+;+j+6+e+H+'+4+;+ +:+t+H+H+H+H+H+H+H+H+H+W.B P v M 6.E.|+f+H+H+> p L.v ~.q..+,+W.G.m.j.h.2.F.5+o+w.) 7 H E.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+6+6.#.H O w+m.2+a.t [.Q.}+9+l+A+H+H+E+o+E.>+3.Z [.` #.#.P.H+H+H+!+h.H v+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+R.~.M y.5+H+v+` n H 6.P.|.6.{.G S <.k._+>+D.A+H+H+A+E+H+H+H+H+H+H+H+H+v+&+(+O i.;+A+A+A+u+u+u+u+u+l+l+l+l+l+g+$.J.>+'+ +K.A.a.o+E+H+H+A+l+e+}+Q.r.u+P {.l._+j.2.j.5+H+v+].j.E.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+(.~.J.i.M /.m.V h.i.P.F+H+H+H+H+g+$.j.n+H+H+H+[+C.4+t+H+H+H+w+m 2.n M k M 6. +k.4+F+H+H+E+;+{+:+W.H..+,+9+e+l+u+u+A+E+E+6+:+j+7+u+'+4+6+ + +:+G+H+H+H+H+H+H+H+E+p.M d+G.|.E n M ].7.o.3 d E #.*.Q.w.(.j.y.2.*.1.W.E+H+H+p.< ) v m.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+(.2.H M |.v+l.(+p ;.H.,+9+e+u+E+H+z+D.>+].w.Q.;.R.v M #.!+F+H+|+y.#.F.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+(.2.o.j.C.z+H+H+(.O &+t+H+H+}+J r..+a.(+<+y.B.{+f+F+G+F+G+E+E+A+A+A+u+;+_+E.g+O 2. +l+l+e+e+e+e+e+e+9+9+9+}+*.i.O *+z.N.i.p.e+l+u+E+H+H+u+l+e+}+H.,+E+V {.M.'+B 2.j.f+z+7.6.6.n+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+6+O 2+k.7 k B |+2.2.*.H+H+H+H+H+H+(.2.Y.H+H+H+H+H+a.(+_+H+H+H+v+m #.k M a.E ~.D.l.j.O.H+H+H+'+:+j+c+w.H..+}+9+e+l+u+u+A+E+E+'+b+c+e+'+_+t+o+R.;+5+G+H+H+H+H+H+H+E+p.$.6+S.j+F+q+p.E B $.#.$.m ~.U./.i.y.j.(.w.,+Q.5.7.7.7.=.O.< 7 a.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+'+{.H M {.7.;+&+M.F b..+}+e+l+A+t+X.$+D.1+9+.+[.G {.1.*.W `.B+{+h.H ].o+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+d+6.$.o+Y.A.=+H+f+(+M.%+0+F+u+J b.w.k.N.J.J.y.i.h.g.h.L.{+|+8+6+q+l+l+6+S.X.1+e+W.$.2.R.9+9+}+}+}+}+}+,+,+,+(.h.].(.2.$+j.5..+9+e+l+A+H+H+E+u+l+9+}+r. +B.#.{.m+g+V {.3.n+F.3./.O.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+1.C.2+n _ k n {.B.=.++_+E+H+H+H+[+~.].z+H+H+H+H+H+u+/.4+j+H+H+v+k M n M E.p.B {.#.v _ d+H+H+ +P.b+j+c+q.Q.,+}+9+e+l+u+u+A+E+u+`.j+d+W._+s+H+z+W.;+|+t+H+H+H+H+H+z+v./.O.h+m+H+H+H+H+7+Z 0 B k n i.h.7.1.Q.}+}+,+Q.w.O.O.(.|.{+T.*.a.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+*.M H P W 6.m.4+7.;.Q.,+9+e+|+*+U.E.g+l+9+,+H.J =.H+6+O #.o.D.i.E #.O.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+`.{.v.H+g+j.y.d+{+$+!+O.(+5+[.[.v.z.++3. +7.J.7.[+`.m.].2.g.y.K.X.T.`.E._+P.}+}+}+c.$.2.R.,+.+.+Q.Q.Q.Q.H.1.2.y.i.2.*.E #.(.Q.,+9+e+l+A+H+H+E+u+l+)+E.A.z.O.].z.f+g+=.{.l.o.i.P l.n+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+6+$.h+~.D.=._ 7 *.f+w+/.U.c+H+H+z+a.{.8+H+H+H+H+H+H+H+6+~.4+y+H+v+n v v H m.8+1+b #._ 7 6+H+H+R.Y.!+b+j+[+r.Q.,+}+9+l+l+u+A+E+E+o+=+j+W._+m+H+H+'+!+ +;+c+z+H+H+H+w+=.].0+b+H+H+z+f+|+_+` 4 3 B O E {.P.,+}+}+,+,+Q.)+E+H+Z =.k.(+6.E.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+G.H C n+1.#.].3.h+|.r..+}+!+$+y.v.G+A+l+e+}+Q.;.H.H+H+g+E ~.~.7.d+v /.n+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+z+o.~./.n+H+o.J.B.*+T.E+n+S.` ;.|.U.A.|.o+H+6+~.J.5+l+7+)+W.p.=.*.{.y.J.U.T.W..+Q.Q.Q.-.#.j.5.r.b.b.[.[.-.=.3.y.3.E q i d 7 {.(..+}+9+e+u+A+H+H+H+[+B.N.7.c.9+'+#.B.d+E+P 2.~.i.p./.|+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+P S.M.k.4+c+5.n O.H+H+v+M $+y+H+[+W E.H+H+H+H+H+H+H+H+H+!+M.0+H+6+n B H v l.5+z+W.n v M v+H+A+P.;+o+|+j+j+'+r.Q.,+9+e+l+u+u+A+E+E+6+:+;+_+5+H+H+ +5+z+'+`.|+t+H+H+w+=.E._+t+5+b+3+&+T.Y.F d e H 1.b.H z.)+}+}+,+,+Q.}+H+w+=./.(+Y.m E.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+v C 6+G+R.#.$.K._+1.Q.P.U.K. +H+H+E+u+e+9+.+b.J H+H+H+H+P ~.H E m C `.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+B+1.n $.#.!+g+~.J.j.s+H+H+w.F 3.++i.R.A+H+H+H+6+{.K.'+}+,+Q.Q.Q.Q.q.` T.X.Z b.b.[.[.[.;.F #.6.R A t G *.h.y.3.M ~.E ;..+,+} k {.=..+}+9+e+u+E+F+Y.A.J.v.}+,+r.u+1.{.S.z+G+B 2.i.6.P l.f+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+'+M 2+*.0+:+H+p.J.{+H+H+H+ +h.L.F+V ~.n+H+H+H+H+H+H+H+H+H+B+a.(+r+[+C P.k v 7.!+|+|+` n $.H+H+v+`.|+H+d+j+b+m+W.H..+}+9+e+l+u+u+A+E+z+|+j+_+!+g+d+ +|+v+t+'+P.Y.|+n+8+P X.&+0+j+b+T.F.d+H+;.d p #.<.H.5.{.B.}+}+,+.+Q.e+H+w.B <+M.F.m E.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+a.#.#./.o.*.{.M (+T.m.$+C.G.u+E+H+H+A+l+9+,+H.S 9+H+H+E+&+X.P 7 7 7 m B+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+E+F.C 1.O M F.(.y.S.z+H+g+` z.N.l.'+u+H+H+H+H+H+R.2.j.W.H.b.[.[.;.-.S.4+H t t i t t A A J G 3.$.m ~.h.i.6.R.g+R.7.2.L.c+F+9+d k 2.a..+}+9+e+8+B.A.L.[+l+e+}+.+r.A+v.2.&+|+L.O 2.j.*.V /.;+z+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+m *+L.E.T.H+H+v.J.{+H+H+H+E+F.N.E.{.Y.E+H+H+H+H+H+H+H+H+H+H+6+D.(+R.v 5+m n E.g+[+!+a.h.S.H+H+g+`.8+H+E+!+j+j+8+H.H..+}+9+e+l+u+u+A+E+6+=+{+O.O.O.O.O.F.O.O.O.O.O.E.(.E M.&+=+5+v+[+o.{+F+J d k *.b.H.Q.P y.O.}+,+.+Q.u+H+=.*+&+a.k.m E.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+6+v v V H C M #.~.(+>+k.R.9+l+A+H+H+A+u+e+}+.+b.[.H+H+;+>+T.o+7+W.G n 6+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+E+o.C o+g+(.H $.#.K.t+f+S.A.i.v.1+u+E+H+H+H+H+A+l+v.y.k.:.i d 4 t =.4+~.P b.H..+}+}+9+l+A+'+*.{.i.$.P.|.4+m+H+H+o+P.].2.S.c+,+d B {.v.,+.+O.J.h.Y.v+H+A+l+e+}+Q.H.v+a.2.j.{+O.K.~.i.j.6./.m.8+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+G.~.4+#.{+E+H+H+o.J.F.f+c+b+u.l u u K q+H+H+H+H+H+H+H+H+H+H+H+H+P.*+m.v f+5.n E.G+E+g+v J.T.5+t+6+Y.f+y+y+m+=+j+j+[+b.Q.,+}+9+e+l+u+w+8+=+O.F.Y. +R.v.P.P.F.Y.Y.{+{+F.P *.*.$+:+H+H+H+[+o.3+t e H (.q.H..+H.H J. +,+.+H.u+;+_+_+t+R.7.m E.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+v+|.C 6+v+v.M E P 3.U.(.}+9+l+u+H+H+E+u+l+9+,+H.;.l+B+X.*+8+H+H+H+[+M o.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+F+o.C g+H+E+a.n M h.B.K.N./.W.9+l+A+H+H+H+H+A+l+e+,+=.J.7.Q.9+u+!+(+k.{.[+H+H+H+H+H+H+v+T.J.h.*._ b B 1.X.0+y+H+H+H+o+(.W z.S.R 4 B {.a.B.N.6.[+u+H+H+H+A+l+e+}+Q.q.E.M h.Y.&+y.B.7.O i.z.j.6.O.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+q+V 4+C.M T.&+&+%+O J.C.2+^.# & % ; ; ; 1 -+H+H+H+H+H+H+H+H+H+H+H+E+|.].H m+}+b E.z+H+p.M N.P.`.|+ +`.r+x+r+m+8+:+j+j+1+b.Q.,+}+e+g+|+{+_+O.{+1.Y.8+e+'+F+F+F+v+n+n+`.&+P 7.a.7.$+|+v+H+H+6+O.4 3 $.5.r.H..+,+c.#.L.)+.+H. +&+X.f+H+'+|.E O.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+v.#.R.H+=+h.8+H+R.z.U.G.}+e+u+E+H+H+A+l+e+}+Q.[.w.T.<+].8+8+8+n+'+*.O H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+E+o.C o+H+d+(+C.G ~.++J.*..+9+l+A+H+H+H+H+E+u+e+}+.+[.O J.{+H+d+X.T.#.M O.H+H+H+H+z+Y.y.J.6.R.m+].7 7 k v S.=+8+8+8+8+8+p.H i.z.m p H 2.J.|.1+e+l+A+H+H+H+u+l+9+}+H.v.P.$.h.h.{+v.X.=.(.|.z.K.j.7.n+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+p.M.2+~.%+<+<+*+M.$.U.$.l & % ; , , , , , ; Z.H+H+H+H+H+H+H+H+H+H+H+g+b B n+1+M %+F+v+v 2.X.H+g+R.R.=+;+8+B+H+H+8+b+j+m+Q.r.W.,+'+{+_+_+3+ +P.Y.1.Y.g+9+l+H+H+H+G+!+&+&+E E.[+Z B.%+Y.Y.;+f+R.d m /.<.r.Q..+,+}+` i.O..+<.C.*+m.H+H+q+=.E Y.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+6+].O A+Y.2.6+H+}+G J.K.W.9+l+A+H+H+E+u+e+9+.+b.G X.z.v W W W W M H #. +H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+E+o.M o+H+{+*+O.:+h.U.k.W.9+e+u+H+H+H+H+H+u+e+9+,+H.S R l.y.|+S.4+|.].M K.f+s+m+|+i.y.C.k.M.*+*+S.].n _ 7 v M M M M M M #.M n M i.2.H $.~.G.,+9+e+l+A+H+H+E+u+l+9+,+H.e+G.#.y.c+A+a.|.;+E+'+o.B.z.j.|+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+g+/.h+~.7.Y. +'+6+7+/.J.l & % ; , ! { { { ! , ; Z.G+B+z+z+v+v+v+t+n+n+d+7 _ !+Z h._+H+1.#.i._+H+H+H+'+P.`.Y.`.|+n+n+;+j+j+d+w.R.{+{+{+_+{+6+A+'+F.F.F.|+l+9+u+H+z+|+&+{+[+V Y.z+H+v.K.F. +`.O.o.} H =.[.r.Q..+,+}+)+V z.G.B.(+l.;+H+H+H+Z E Y.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+*.#. +&+2.o+H+H+S P N.L.1+e+u+E+H+H+u+l+9+,+H.a.>+O.'+'+ +G.G.R.=.2.O 8+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+E+o.M o+m+*+j.L.N./.V >+T.9+u+E+H+H+H+H+A+l+9+,+Q.;.;.u+d+].h.4+E.P.#.7.v O B.h.h.W $.m.{+:+=+5+8+6+[+ +5.p.p.1.P V O #.~.3.3.$.v M m e {.B.W.}+9+e+u+A+H+H+E+u+l+9+,+p.X.i.M._+5+b+T.S.z+H+E+d+`.7.j.=+H+H+H+H+H+H+H+H+H+H+H+H+H+H+o.&+M.2.3.f+H+H+H+u+k.J.- % ; , ! { { : : { { , ] E.T.T.D.l.l.l.6.].6.$.].) E 7 J.3+q+O M A.b+H+H+H+v+ +!+d+[+ +F.Y.`.:+j+:+{+{+P.P.&+=+7+l+u+A+W.F.o.T.8+e+e+d+=+%+|+H+p.E |.O.f+A+m.$+c+z+v+'+0 ].` [.r.Q.)+,+}+}+c.$.z._+E.m.t+H+H+H+q.E Y.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+R.#.|.T.W E+H+H+Q.J V ++m.e+l+A+H+H+E+l+e+}+W.U.C.G+H+H+H+E+E+H+g+].~./.n+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+E+v.M d+T.C.J.y.P :.H.].<+:+E+H+H+H+H+A+l+9+}+Q.b.A 9+H+H+O.U.3.k H #.*.n 2.J.{.P.g+G+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+G+d+[+;+F.V ].v $.3 q 2.k.W.}+9+e+u+E+H+H+E+u+e+F.U.y.V 6._+S.T.O.X.&+H+H+H+F+d+$.j.Y.H+H+H+H+H+H+H+H+H+H+H+H+6+].2+k v i.6.v+H+H+v+k.J.- % , , { : a o o a { ! ; 8 n n O V V V V P v %+S.F _ v U.5+p.3.M U.m+H+H+H+H+ +|+z+E+z+v+[+G.v.0+{+5+)+G.3+=+1+9+l+l+u+u+ +o.o.T.o+[+&+&+5+F+H+P 7.(.V l.O.R.B.*+Q.H.J m ].R b.H.Q.,+.+G.|.j.~.z.X.1.Y.F+H+H+H+[+k.{+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+w+H $.3.~.H+H+H+A+i ;.6.++P.l+u+H+H+E+u+e+9+m.>+G v+H+H+H+H+H+H+H+(.2.#.|.B+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+E+v.M v.{.U.z.G ;.Q.}+W.C.*+m+H+H+H+E+l+e+}+.+r.J <.5+S.].(+C._ 7 k k 7 2.z.F.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+A+P O #.O d p h.m.)+}+9+l+u+E+H+H+v+7.N.3.w.r.O.L.(+n+H+P.%+b+H+H+H+z+|.j.j.|+H+H+H+H+H+H+H+H+H+H+H+v 2+l.y.k k 2.D.z+H+7+j.y.# ; , ! { : o e.e.o : { , N H+H+H+H+H+H+G+v.~.4+5+)+7 H $+`./.~.#.*+t+H+H+H+H+g+;+m+H+H+H+H+H+v+;+j+b+|+_+=+Q.,+}+e+l+u+u+o+G.o.F.O.T.&+6+H+E+1+|.a.q.R p *.7.V $+m.S t O *.S b.H.p.a.3.z.z.6.(.V J.k.5+H+H+H+H+G.$+{+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+(.#.{.6.H+H+H+H+b.J 5.z.J.`.c+m+s+s+m+8+R.>+l.J 1.F+H+H+H+H+H+H+'+{.#.#.F.E+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+P.#.3.J.D.c.J H.,+9+l+ +K.X.z+H+E+u+e+9+,+H.S p #.#.M X.L.M ~.M k 0 p ` 6+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+g+P v ~.E d E h.o..+}+e+l+u+A+5+z.N.E.1+9+.+H.v.&+4+F+A+O.(+m+H+H+F+P.3.V k.c+H+H+H+H+H+H+H+H+H+` M._+O.(.++].P 2.~.|+` 2.i.& ; , ! { : o }.e.o : { , N z+H+H+H+H+E+v.W _+S.H+R.) #.U.a.W V y._+H+H+H+H+H+H+R.5+F+H+H+H+H+H+v+=+b+_+|+w.r..+,+9+e+l+u+A+q+v.1.E.T.w.r.b.[.F 7.1.[.[.[.F P *.*.$+1.q ].P Z (.6.i.j.l.o.W.,+}+:.$.K.m+H+H+H+H+a.$+Y.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+6+C #.|.H+H+H+H+l+i b.1.J.>+A.A.J.U.U.K.U.>+j.a.P Y.E+H+H+H+H+H+E+M ~.H {.O.E+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+o+|.y.y.E.7+S r..+}+e+u+B+O.>+&+H+u+l+9+,+H.;.q V a.*.&+%+;+v.3.z.;+g+o+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+g+P H 6.q.i H y.o.,+}+e+q+Y.J.z.;+v+u+e+}+Q.Q.a.2+c+H+o+E._+m+B+H+6+3.o.(.B.f+H+H+H+H+H+H+H+ +~.h+E.z+q+6.$+!+P ~.{.n i.i.] ; , , { { : a a : { ! , N z+H+H+H+E+v.2.C.%+y+ +2.b k #.O 6.(.U.3+H+H+H+H+H+H+[+|+z+H+H+H+H+H+H+[+_+{+m+l+r.H..+}+9+e+l+u+A+'+a.1.o.q.b.b.[.` l.<.b.b.[.[.;.G m 6.B.E 6.k.i.3.|.v.G..+}+}+}+,+Q.G 2._+G+H+H+u+l.*+O.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+V M o.H+H+H+H+H+[.;.<.M >+C.o.O.o.(.3.2+{.2.y.J.J.L.Y.f+B+H+H+H+v.{.P.H 3.`.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+7+].U.{.H L.;.b.Q.}+e+l+E+H+o+B.<+=+l+9+}+Q.[.A o+z+8+&+*+5+o+B.h.v.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+[+7 #.F.r.d M z.G.,+'+B.N.6.n+E+H+u+l+e+}+Q.H.6.4+s+H+8+D.E.E.O.v.].D.q+|.k.t+H+H+H+H+H+v+*.#.$+&+H+H+'+h.%+6+7 ~.2.i.2.] , , , ! { { : : { { , ; u s+H+H+H+v.i.~.%+c+[+~.i.$.) k W `.*.$+c+H+H+H+H+H+H+B+`.m+H+H+H+H+H+H+'+O.4+b+s+)+q.Q.,+}+9+e+l+u+R.a.a.(.v.r.r.:.|.a.b.b.b.b.b.b.[.;.G $.$.3.V ` c.H..+,+}+}+}+}+,+H.5.B y.m+H+H+7+X.L.Y.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+ +M *.R.z+F+H+H+}+A ;.$.~.>+:+E+H+m+2+K.'+W.` V ~.y.J.N.h.D.;+n+n+W E.6+M 3.8+z+[+G.`.`.`.`.`.`.P.P.!+8+8+8+c+|+i.N.~.|.$.(.;.Q.,+9+l+A+H+H+H+6+z.(+'+}+.+b.A v t+z+S.(+P.G+T.h.a.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+R.k 3.|+J e #.C.a.y.U.F.q+A+H+H+E+u+l+9+,+H.Z M.0+|+%+&+a.=.|.|.*.i.|.(./.l.H+H+H+H+H+P.2.H 2.$+r+H+F+o.N.Y.- ' #.i.~.K ] , , ! ! ! { { ! ! , % K X.G+H+|+i.2.B.4+[+#.2.z.>+) v 6.[+j.$+H+H+H+H+H+H+H+H+'+|+c+c+c+c+c+c+v.P.{+4+c+c+q.r.Q.,+}+e+l+[+O.F.(.(.m.p.r.Z 7.5.b.b.b.b.b.b.[.[.:.P C.G J [.r.Q..+,+}+}+}+}+}+9+,+c.*.y.f+H+'+*+6.Y.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+v+V M M #.F.8+t+u+S R $.M k.U.!+G+{+*+P.l+9+,+r.A $.~.7.L.z.h.g.i.3.~.5+o.#.$.H H $.2.z.i.y.y.y.J.y.J.U.U.U.>+>+>+{.$.M.|.J H.,+9+l+u+H+H+H+H+H+P.K.S.W.r.S F ~.F.T.(+Y.F+Y.2.O.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+w.H 6.7.O O {.U.z.v.}+e+u+A+H+H+E+u+l+9+,+r.=.(+X._+[+=+0+8+7+1.i.|.a.P 7.T.H+H+H+d+#.{.&+G.h.*+y+H+o+k.J.# % 8 y.i.i.~.5 , ! ! , ! , , , ; 5 o.A.:+8+2.j.$.4+T./.{.3.U.j.~._ (.R.J.X.H+H+H+H+H+H+H+H+d+|+b+j+j+j+j+|+O.5+!+_+0+m+w+b.r.Q.,+9+7+P.T.w.5.a.(.o.q.` m.r.b.b.b.b.b.[.[.[.;.m B._+R [.r.Q..+,+}+9+e+u+u+e+9+.+Z j.z.|+O.$+V Y.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+;+k.$.n H M 7.O.Z q #.V p.X.*+Y.*+D.v+l+9+}+Q.;.=.++S.g+8+;+Y.l.z.U.J.U.<+U.J.y.h.i.C.C.L.D.S.T.Y.{+=+O.~.>+#.K.X.y.6.J r.,+}+e+u+E+H+H+H+H+u+e+o.>+E.;.;.7+|.j.4+{+B+O.h.E.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+` M |.o.B k *+l..+}+9+e+u+A+H+H+E+u+e+9+,+c.l.2+|+H+d+{+j+H+1+3.E.u+R.P B.f+H+H+*.2.l.z+H+a.A.&+H+v+E J._.% u ++H #.z.i.K 1 , , , , ; % & 6+q+k.i.3.L.o.(+S.|.#.2.K.U.U.T.) E =.U.{+H+H+H+H+H+H+H+H+H+[+|+v+z+z+B+R.=+n+w+!+4+j+D+l+b.H..+}+W.O.G.H.q.o.(.(.a.=.o.b.b.b.b.b.[.[.[.[.R O P X.T.[.r.Q.,+}+e+u+A+A+u+l+e+}+Q.E K.k.7.U.P Y.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+L.>+v v n n #.~.H #.O p.*.>+$+X.f+E+u+e+}+.+b.J ` (.G.u+H+v+O.K.C.M v %+&+!+d+y+6+B #.|.n+B+B+B+H+n+k.j.K.i.3.>+l.F b..+}+e+l+E+H+H+H+H+A+l+9+W.l.>+G A+B+D.h+T.E+O.y.6.o+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+1.$.n+o+/.e *.2+v..+}+9+l+u+E+H+H+A+u+e+9+.+Z %+4+m+8+m.S.S.T.z.3.[+ +|./.{+H+(.2.].c+H+H+o+7.++:+8.# v $+u u V.u ; g 3.y.2.u ; , ; % & -+H+H+v.7 O +S.4+H H 2.>+$+C.5+F+R _ $.++m+H+H+H+H+H+H+H+H+H+g+;+c+H+H+H+G.|+F+H+z+Y.4+r+z+'+[.H.W.o.O.w.H.r.1.T.` =.|.5.b.b.b.b.[.[.[.[.[.G /.-.E <+P.H..+}+9+e+l+u+A+A+u+l+e+}+<.H K.M.B.P =+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+[+U.z.k H H b k v $.{.{.~.~.>+S.z+H+u+l+9+,+H.r.H.r.[.;.S F /.>+V '+$.3.4+{+H+H+H+o+P #.*.t+H+H+8+z.i.>+z.U.7.l.U.5.Q.}+9+l+A+H+H+H+H+E+l+9+}+.+5.M.>+n+=+4+3.|.5+z.3.g+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+R.$.z+[+*+p.e K.&+w.,+}+9+l+u+E+H+H+A+l+e+9+w.].j.z.>+<+>+>+>+*+M #.#.$.B D.P.3.~.Y.H+H+H+H+'+i.U.- & & {.J.~.y.1 ! ! ] v i.y.#.1 % & u.H+H+|+z._ B a._+M ~.>+>+C.!+E+H+H+g+) 3.y.H+H+H+H+H+H+H+H+H+H+H+'+:+t+H+o+`.c+H+H+H+d+{+b+j+c+w.b.5.T.R.H.H.r.<.o.m.G m.c.b.b.[.[.[.[.[.;.;.m |.R S *.(+p.Q.,+}+9+l+u+u+A+A+u+l+9+,+F 3.$+7.P =+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+m.>+m.p.P H /.B M B ` 1.1.k.A.:+H+A+l+e+}+.+}+,+,+,+.+Q.c.U.L.:.S -.F E.(+|+l+u+E+E+1.M ].8+5+z.i.z.X.7.!+H+R.K.D.)+9+l+A+H+H+H+H+E+u+e+}+.+r.S P *+M.&+L.v #.{.j.n+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+ +$.z+E.<+t+.+m *+E.Q.,+}+e+l+u+E+H+H+A+g+`.K.J.n 3.O.`.P.a.O.d+5.k H M #.M M 2.O z+H+H+z+m+:+v J.h.& ; 5 i.i.j.] ! ! ! , 5 D i.B.y w t+z+!+i.L.5.7 #.L.U.>+X.;+A+H+H+H+H+v+E K.&+H+H+H+H+H+H+H+H+H+H+H+H+ +5+G+'+=+n+H+H+H+E+|+_+b+m+v+q.F.O.H.H.r.r.b.` S.P X.O.[.[.[.[.[.;.;.;.J B (.R J F C.&+q.Q.,+9+e+l+u+A+A+A+u+l+}+G.*.U.|.P Y.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+v+C.z.`.o+V i.$.~.P Q.}+e+[+B.U.|+H+u+e+9+9+e+e+9+9+9+}+k.>+v.,+.+Q.Q.1.&+T.-.b.H.Q.Q.Z B O 2.h.#.<+O.z+H+}+J |.>+O.l+u+H+H+H+H+H+u+e+9+,+H.;.[.u+7.2+C.k 7 b $.8+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+ +$.6+L._+H+H+r.H *+p.Q.,+9+e+l+A+H+H+|+S.N.C.1.q #.5+H+H+5+0+j+H+R.H $.v M i.v k v.[+:+0+_+~.- g #+&.; ; g y.3., ! ! , , , ; 5 v {.3.L.&+{.i.|+|+$.>+K.z.l.o+H+H+H+H+H+v+F.K.K.f+H+H+H+H+H+H+H+H+H+H+H+H+7+;+f+R.=+E+H+H+H+H+d+Y.4+r+z+R.T.w.r.r.r.r.r.:.=.(.m.%+p.;.;.;.;.;.S S q /.` J A A G %+F.r..+,+9+e+l+u+A+A+A+q+F.X.M.k.*+P |.=+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+;+U.~.8+i.y.$.~.G H.,+9+e+ +z.J.8+E+l+l+u+u+l+l+l+l+P.<+j.9+9+9+}+}+}+(.4+O.H.Q.H.r.b.[.:.-.` *+z.)+A+l+J r.p.M.*+6+H+H+H+H+H+u+l+9+,+Q.[.A l+z+M._+E q _ 7 p.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+ +/.O.<+|+H+H+E+p.C.M.q.Q.,+9+e+u+g+T.U.J.(.<.[.;.v 7.G+H+z+Y.j+c+o+=.H ~.i.i.#.) _ k &+C.6.' # & M #+s , 5 J.3., ! , , , ; ; % # G.C ~.i.A.N.B.6.>+j.y.m.F+H+H+H+H+H+'+/.U.K.5+H+H+H+H+H+H+H+H+H+H+H+H+H+H+'+`.`.5+H+H+H+H+E+m+!+&+0+!+Y.`.[.r.r.b.b.b.[.F m.|.D.%+;.;.;.S S S S p 7.R A A t d V 4+1.H..+}+9+e+l+u+A+d+D.$+B.'+<.B.*+1.7.=+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+G+S.U.H J.Y.V ~.q b.Q.}+e+l+P.U.M.o+A+H+H+E+E+A+A+o+&+>+R.u+l+l+e+e+e+9+6.4+`.}+}+,+,+.+.+Q.m.>+1.[.;.J [.Q.}+o.<+S.H+H+H+H+A+l+9+}+Q.b.A l+H+T.h+E.H+o+~.6.v+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+ +*.k.*+v+H+H+H+B+F.(+a.r..+}+7+!+z.$+3.-.b.b.b.[.G {.1.E+H+g+&+b+!+*.i.J.#.M y.X.=.7 ].M g # & % 1 3.U.5 H J.K.4., , , , ; % & -+B+P #.$.1.*.k H n 3.=+H+H+H+H+H+G+G.~.>+K.[+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+v+R.=+t+H+H+H+H+z+j+x+ +_+O.O.G+}+[.b.b.b.[.[.R P a.=.%+T.S S S S S A E 7.S [.b.Q.,+R B._+c.H.,+}+9+e+l+'+j.$+E.o+e+,+` X._+1.k.=+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+|+>+i.$.O./.#.e ;.H.,+9+l+u+a.N.S.z+H+H+H+H+H+H+5+>+l.E+A+A+u+u+u+u+u+[+L.&+6+e+9+9+9+9+ +X.C..+,+.+Q.Q.,+9+9+j.<+c+H+H+E+l+e+}+.+r.J H.H+5+4+C.;+Y.*.i.P.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+ +O X.S.H+H+H+H+H+o+D.(+1.r.W.D.N.K.=.[.b.b.b.b.b.R O $.S e+H+'+_+m.2.y.6.X.7.3.y.!+[+C.O - & % ; ; 5 J.J.#.y.W $+&., , ; % & @+G+O.~.k ~.E.D.k n v 1.F+H+H+H+H+n+a.2.U.L.o+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+[+`.;+t+G+H+H+m+j+F+F+O.Y.|+F+H+9+H.b.[.[.:.;.(.1.1.=.X.(.c..+9+l+c.*.E.x+H+H+H+H+u+G %+T.b.Q.,+}+1+F.K.*+5+A+u+l+9+Q.V <+5+` l.5+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+D.>+#.{.~.H *.A r..+}+e+u+v+E.N.{+E+H+H+H+H+z+X.K.d+H+H+H+H+H+E+E+E+A+;+_+T.l+l+l+l+q+O.U. +9+9+9+}+}+}+e+A+d+i.&+z+E+u+e+9+,+H.S Z |+O.&+X.W W W #.y.6+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+ +M U.5+H+H+H+H+H+H+'+M.&+|.J.y.E.c.;.[.[.b.b.b.b.[.G {.` S 9+A+/.K.i.j.S.{+P.k y.{+H+p 4 ' % ; ; , 5 M ++y._.] #.U.1 ; % & @.7+E.2.i.$.n #.{.M _ O [+H+H+H+H+!+*.y.U.Y.F+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+w+G.P.O.|+t+A+5+s+F+H+G.{+b+r+F+H+A+e+1+ +'+7+=.;+w+'+=.|.E.|+m+v+` 7.T.y+H+H+H+H+H+1+P (+p.r.Q.G.D.U.T.6+A+A+A+u+l+9+w.7.2+6+` l.c+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+d+z.X.P ].H M q [.Q.}+9+e+l+7+k.N.O.A+w+A+A+!+U.{+G+H+H+H+H+H+H+H+H+H+v+S.4+=+H+A+A+6+K.S.g+l+l+e+e+e+e+u+E+H+O.U.|+u+l+9+,+H.;.G 3.2.]._+|.R.'+|.~.y.n+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+ +O B.z+H+H+H+H+H+H+H+O.$+J.6.p.q.S ;.;.[.[.[.b.b.b.R M ].R P 7.J.{.M |+t+G+7+E H K.[+d A I ] ; , 1 2.~.M +++., ] i.n.] % 5 E.*.2.y.h.W (.E #.~.b n F.F+H+z+o.~.U.J.;+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+'+;+ +P.=+;+c+y+H+o+o.O.j+b+x+F+H+w+5+c+y+H+1.Y.z+H+d+=.P *.l.o.P 7.0+F+H+H+H+H+H+H+p./.&+<.(.K.K.`.7+l+u+A+A+u+v+q+s.r M.&+O.P l.f+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+F.>+].#.(.w+F S r..+}+9+9+9+ +z.J.'+l+l+q+i.X.8+u+A+A+E+E+E+H+H+H+H+H+8+&+_+f+H+H+D.>+|+E+A+A+u+u+u+l+u+H+H+t+M.K.7+9+}+Q.[.A *.V (.(+&+z+H+|+W ~.h.v+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+[+M O.H+H+H+H+H+H+H+H+=.#.i.-.b.J J S S ;.[.[.[.b.b.b.m n 3.i.J.~.E 7.f+H+H+ +/.7 i.O t S b.w ; 1 2.i.] 1 V.K.9 , 5 V.+.8 #.i.y.i./.R.w+H+P n {.v._ O t+d+$.~.$+z.8+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+R.5+B+ +O.Y.f+H+ +Y.d+=+j+j+D+B+|+c+t+H+H+G.F.z+H+F+!+*+E.P P *.l.7./.m.E.E.F.O.o.|.V &+L.$+L.W.}+9+e+l+u+A+B+p+Y X X.K.$+K.U.B.k.t+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+3.C.#.v.H+G.t ;.r..+,+,+}+}+p.J.K. +9+F.>+F.e+e+l+l+l+u+u+u+A+A+A+E+H+O.4+&+z+8+>+&+v+H+H+H+H+H+E+E+A+H+H+E+;+<+k.}+W.c.Z E L.M._+_+;+H+o+#.].3.i.v+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+[+M ;+H+H+H+H+H+H+H+6+~.$.2.=.A i t A J S ;.[.[.[.[.c.m 7 ~.J.*.J [.(.E.H+H+v.].1.v y.Z [.r.w.5 2.h.1 ! 8 $+<+&.; ] #.U.y.2.M V g+H+H+H+R.M M v {.m n |.3.2.U.L.n+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+[+;+y+n+|+R.O.`.v.=+z+[+:+j+r+5+j+t+H+H+H+'+a.t+H+H+w+O.<+O.v.=.*.|.o.m.m.m./.6./.7././.$+B.<.Q.,+}+9+e+l+m+x.Y B.$+V.r ].$+m.D.M.L.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+(.#.M '+H+o+` i ;.b.H.Q.Q.Q.Q.(.N.L.G.X.M.W.9+9+9+9+e+e+e+e+l+l+l+q+u+l+m.4+|+L.>+!+H+H+H+H+H+H+H+H+H+H+H+H+7+k.>+O.=+&+%+*+L.4+0+#.E.o+m.i.P.L.2.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+o+(.6.d+H+H+H+H+H+E+o.2.m.m.h.e d i t A J S ;.[.[.Z ].H i.y.V ;.;.J F 7.5+A+a.l.H.3 3.j.b.H.1.i.h.1 ! , #.U.^.<+u %.i.#.M v n M 5+5+:+{+].K.M.C.X.3.H M z.U.&+v+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+ +5+c+j+z+7+G.O.Y.=+|+F.0+:+:+f+B+H+H+H+g+|.m+H+H+H+6+D.(+!+1.7.{+u+v+A+'+E.O.'+6+6+[+/.<+5.b.Q.,+}+g+s.Y x.U.#+Y z z r k._+d+1.B.=+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+'+C j.;+H+H+!+/.d A S [.[.b.b.b.(.N.C.>+(..+,+,+,+}+}+}+}+9+9+9+e+e+e+l+[+S.(+>+B.l+u+A+A+A+E+E+H+H+H+v+|+T.&+4+h+0+M.l.O.8+T.4+D.$.#.*.i.|.w+L.W H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+'+*.7.v+H+H+H+H+[+3.$.n+[+i.$.} 4 d i A J S ;.Z 3.#.3.i.P [.[.[.;.S F a.d+7.=.d A P y.a.5.i.h.G.9.! ] y.y.K K.<+K.U.X.*+>+<+>+>+>+>+$+$+J.y.h.{.v M J.J.Y.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+6+;+j+c+m+r+5+v.P.P.v.a.o.O.O.Y.=+=+;+|+6+a.5+H+H+H+8+o.X.&+1.7.Y.H+A+!+_+3+t+H+H+H+H+'+j.S.:.r.Q.,+s.X #+x...........z r K.j+1+*.L.f+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+o+n &+X.o+H+E+j.K.F d d i t A J Z ~.$+i.[.r.r.H.H.Q.Q..+.+,+,+,+}+}+}+9+9+P._+>+'+e+e+l+l+l+q+u+8+=+L.&+h+h+%+j./.(+m.o+H+5+_+S.C._+H ~.#.o+g+B.L.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+p.].E.z+H+H+z+m.{. +H+A+|.J.p } 4 d i A J F 2.i.v #.*.[.b.b.b.[.[.S J 1.B 4 A ;.R ~.2.3.i.G.}+}+u.g <+<+<+>+*+$+K.z.i.2.#.#.H a.O #.*.d+g+F+H+w+1.].5+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+F+!+5+r+r+r+5+v.=+t+B+H+o+v.F.v.v.o.F.O.F.a.O.;+m+8+:+5+v.%+o.=.T.B+=+_+=+B+H+H+H+H+H+H+p.$+o.:.r.a.z.$+Y ............z z O K.D+P B.T.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+(.#.4+&+E+H+8+A.&+H+u+'+Z E ~.h.3.j.J.P A J S ;.[.b.b.r.H.H.Q.Q..+.+,+)+T.*+h+E.}+1+[+[+;+T.M.(+_+_+S.l.v.w.Q.b.*.(+c+n+L.&+n+'+X.7.M ~.8+o+B.D.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+1.#.O.G+H+[+W |.F+H+H+'+2.B.R } } 4 i G i.i.#.~.k ~.Z b.b.b.b.[.[.S A 3 t ;.[.w./.z.z.v.}+}+}+,+1.~.{.M 8 l [+v.#.|.W.B M H o.X.L.n+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+6+;+|+n+B+H+p.|+G+H+H+H+6+Y.{+m+A+q+6+ +R.F.F.m.T.T.c+f+E.%+E l.T._+5+H+H+H+H+H+H+H+H+B+m.*+1.|.U.V.Y z z ............z c 3.X.W.].j.|+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+G.H j.4+5+H+H+E.z.=+Y.L.i.{.#.F.[+[+3.J.` [.S A i d t A S ;.[.[.b.r.q.D.<+a.k.4+O.=+_+%+*+S.T.O.P. +)+9+}+.+r.J c.&+&+S._+|+H+H+7.i.V #.6. +z.E.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+(.#.;+v+/.~.d+H+H+H+H+(.y.=+Z ( } p h.3.M i.V q M |.[.b.b.b.b.[.[.d i S Z m.K.U.C.%+!+}+}+G.l.J.i.(.I j o+v+].~.t+H+'+B M 6._+m+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+6+;+c+F+H+R.=+z+H+H+H+H+ +Y.:+t+H+H+H+H+u+G.o.a.E.S.T.F.%+|.|.{+m+H+H+H+H+H+H+H+H+H+E+R.C.*+V.X c f z z ..............z r K._+E j.k.m+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+6+O #.X.&+d+[+m.h.z.2.#.(.j.~.E+H+H+[+2.y.!+u+u+l+}+Q.[.A d 4 d t J B.2+/.(.X.h+(+M.7.a.p.w.W.W.,+,+}+,+.+r.S J H+;+_+h+O.E+H+O.y.W l.p.#.$.h.E.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+v+(.3.P {.o.n+f+|+:+{+{+M z._+(.m y.].B i.$.F S k ~.<.[.b.b.b.b.A d F /.*+$+j.G.W.k.&+R.a.J.A.|.c.:.J 9+H+(.{.5+H+H+H+p.n ].5+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+d+`.5+G+[+`.t+H+H+H+H+A+R.{+5+F+H+H+H+d+:+5+6+1.(.m.E.a.S.P =+H+H+H+H+H+H+H+H+H+H+F+R.V _+S.~ c 6 f z z ..............z H U.*.].7.S.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+6+B ].%+3.h.i.~.M v.H+f+K.E.H+H+H+H+[+h.h.n+H+H+H+H+H+H+=+z.E.Z 7.2+y.%+4+X.$.J.7.;.[.b.b.r.r.H.Q.Q.H.b.S J E+v+S.>+i.l.o+F.J.k.a.{.n+=.#.2.F.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+[+V $.j.&+4+h+h+4+_+S.V h.E./.y.].7 2.3.p A S q M =.:.[.b.b.S [ V K.>+z.1..+,+,+1.&+K.N.j.1.r.[.S ,+H+ +{.T.B+H+H+v+L.4+7.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+6+`.5+g+O.t+H+H+H+H+H+w+v.{+f+F+H+n+:+=+=+{+{+Y.p.1.(.m.P O.n+G+G+G+F+H+H+H+H+H+B+1.B *.4+H c c f f z z ..............f ].K.].6.|.E.n+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+p.3.~.H B (.K.L.A+5+y.`.H+H+H+H+H+!+y.B.t+H+H+H+B+v+D.y._+X.2+<+_+&+Y.w.[.|.++=.i A A A S ;.;.;.S t ;.A+n+L.$+Y.=.{.~.h.D.v+[+3.m.A+*.~.`.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+R.M $.6.T.|+!+d+o+u+z+7.#.y.D.Z $.i.p d t A J k #.:.[.[.<.E z.>+M.(.H.,+,+}+)+v.K.J.3.q.Q.b.S <.H+o+M j.d+H+H+z+T.4+2.`.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+6+;+;+v.f+H+H+H+H+E+F+6+O.{+f+5+Y.{+{+:+|+d+H+H+H+[+1.(.|.7.l.T.{+5+c+m+y+y+z+z+1.*.a.k.{+r c 6 f z z x ..............c j.C.z.P =.7.;+z+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+d+1.m b n H {.O.:+g.6+H+H+H+H+H+E+F.y.L.5+5+:+0+0+&+*+X.*+S.{+5+o+H+H+H+v+k.++=.,+Q.b.q q t S p v.d+f+X.U.Y.H+o+H #.{.d+H+w+7.2.d+T.i.[+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+o+k H H 7.5+H+H+H+H+F+1.v /.v+m.h.m ( 4 d t A q #.P -.|.M.>+C.a.c.Q..+,+}+,+o.J.M.$.J.v.b.S G 0+t+(.i.;+H+H+H+O.(+j.#.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+6+`.G.|+n+n+m+c+5+5+|+P.{+{+{+|+f+n+H+H+H+H+H+H+H+H+g+'+v.(./.D.L.L.X.&+_+_+0+=.*.0+O.S.D.~ c 6 f z z x ............z r K.z.B.R.p.V m.5+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+v+W.m b v H 6.2.H+H+H+H+H+H+H+E+a.J.(+2+*+K.i.3.k.U.6.M.d+H+H+H+H+H+H+d+z.$+5+H+g+M.D.g+A+!+M.T.*+J.F.H+o+L.h.O {.k.v+H+v.i.7.(+#.o+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+B ].V v ~.m.n+H+H+z+P n V P.2.O.q+4 ( 4 d i t v ~.X.>+j.` <.r.Q..+,+}+W.l.>+B.W.<.~.z.Z t n M.*.2.7.H+H+H+O.(+B.#.m.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+7+p.=+:+=+=+Y.Y.Y.Y.Y.P.p. +H+H+H+H+H+H+H+H+H+H+H+H+H+H+o+ +(.*.l.l.m.O.`.O.P /.{+;+o._+r ~ c 6 f z z x ............f #.$+$+X.T.P.E |.O.n+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+'+p n i.$.H |.O.;+8+n+F+H+E+H y.#.'+6+P.z.>+o.5+_+S.E+H+H+H+H+H+H+ +y.U.d+T.>+`.H+H+G+|.U.y.8+H+t+B.J.O.H+v.2.k.t+g+#.C.%+$.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+1.].D.8+E #.].!+H+[+C k H k j.m+H+u+d ( } d i B >+K.P J [.r.Q..+,+,+W.k.>+6.W..+Q.Z 2.3.n+[+B M {.v+H+H+ +*+&+B W 8+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+g+'+'+'+[+f+f+d+d+d+d+7+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+g+p.P |.7.F.!+=.m.F+H+o+E.4+/ c c f f z x x ........z r j.K.$+].z.U.K.6./.k.;+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+d+a./.#.M M W h.h.h.B.L.H ~.y.m.m+M.U.F.H+E+{+4+=+H+H+H+H+H+H+H+P.J.B.X.M.E+H+G+|+U.K.B.n+d+j.y.O.H+H+H+v.~.~.d+|.2+~.|.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+p.$.j.{+H+p.O W F.V 3.E v 7 {.|+H+H+)+[ ( } p 3.j.4+` ;.r.H..+,+,+R.C.$+m.,+,+Q.H.[.m h.&+v+|.v ~.Y.H+f+X.4+P.V ].H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+W.1.a./.B 6.5+z+H+ +&+T.~ c 6 f z z x ......>.6.C.i.>.*+{+[+o.D.L.K.z.j.D.m+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+o+'+v.|.H M #.~.2.2.H M $.y.z.K.#.&+&+:+O.M._+5+n+d+n+n+n+m+m+/.J.z.F.f+8+{+U.j.*.%+S.j.i.;+H+H+H+H+H+R.~.].X.M.M O.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+7+H 6.L.m+H+7+(.v #.E.W.7 v M D.G+H+z+1.3 [ $.3.p C.&+c.H.Q.,+.+v.z.U.F.)+,+Q.H.b.J q.a.y.].2.B v 2.!+T.%+|+[+C ;+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+o+'+E V *.m.=+8+(._+(.~ c 6 f z z x x ..Y >.X z f M <+s+H+w+6+!+O.D.i.L.m+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+F+o+R.=.v M #.v ~.i.{.y.#.$.#.2.g.J.N.U.j.$+$+$+$+$+$+$+K.i.U.j.K.j.>+i.U.U.>+$+i.!+H+H+H+H+H+H+H+R.#.z.n n !+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+O z.L.S.H+H+z+'+v 3.|+7 M M {.z+H+o+Y.V B i.p 4 p *+T.Q..+,+o.U.J.R.,+,+.+H.b.S [.H+6+$.J.T.[+m #.k.(+:+E+=.#.n+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+p.*.|.=.B /././._+B r X 6 f z z x x ..........z /.2+F+H+H+H+B+v+R.z.m+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+ +=.n ~.M (.(.2.#.(.g+6+ +o.H ~.7.D.D.l.m.m.|.m.$.i.U.i.>+l.F.F.F.O.].B.d+H+H+H+H+H+H+t+M.J.v _ _ d+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+P B.V.X.5+H+H+H+ +M ~.7 $.H {.m.&+0+j+m i.O } } 4 V <+R.)+a.U.z.R.}+,+.+Q.r.;.A A+z+E.#.~.K.f+1.H 2+3+F+B+v l.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+7+(.l.m+w+p.P B k.X.z.j.c f f z x x x ..Y x.....Y M.0+H+H+H+H+H+p.K.F+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+v+v.~.{.F.|.2.#.!+H+H+H+R.W |+H+H+H+H+H+H+o+6.$.J.$+7.v+H+H+H+H+G.2.|+H+H+H+H+H+5+h.U.*.j.i.R.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+R.O U.C.X.z+H+F+t+a.~.v #.i.#.j.0+{+a.~.3.[+[.( } q D.*+/.U.C.W.}+,+.+Q.r.[.;.r.W.Y.h.T.G.2.%+o.2+M.E.H+p.M ;+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+p.l.{+H+H+H+1+a.C.*./ ~ 6 f f z x x ..x.p+x...z C >+c+H+H+H+H+P *+F+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+[+/.{.6.H {.~.f+H+H+o+C O.G+H+H+H+o+5+S.i.2.7.].].!+H+H+H+H+z+#.S.z+H+H+B+=+U.K.o.j.2.!+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+6+H ].$+L.O.c+j+0+&+H ~.n z.$.2.8+w+|.i.B 6+|+E [ p $.(+>+D.'+9+9+9+9+9+9+}+9+,+b./.T.G+H+v.{.$+&+v 3.m+E 3.d+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+'+|.D.F+H+H+H+B+E.2+k ~ c 6 f z z x ..Y x.Y ..x c 3.<+t+H+H+A+B (+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+B+v.W {.v ~.3.[+H+G+V /.z+H+n+Y.B.y.{.$.#.6+[+$.2.`.H+H+H+H+F.2.f+H+z+T.U.L.R.j.2.[+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+B+P $.].B.].7.l.l.O.R.B v M M 2.Y.P.i.k m |.3.~.3.i.2.#.i.l.1+e+l+u+E+H+E+u+e+}+Q.c.o+H+o+E.h.z.O k M O M l.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+` D.f+H+H+H+H+d+B._+/ ~ c 6 f z x x ......x z Y E K.4+G+H+7+O 4+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+ +].i.M #.{.[+H+P.M !+m.i.y.~.o.7+F.2.d+H+ +$.2.O.H+H+H+d+{.T.F+L.++D.d+~.h.!+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+6+p.(.V *.].3.3.].H v n H #.$.3.~.v #.i.y.i.#.P e J B h.F.1+e+u+E+H+H+A+l+9+,+r.}+8+j.++L.m n v v k ~.`.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+'+|.=+H+H+H+H+H+p.X.l.* ~ 6 f z z x x x x x f s.q+E $+j+H+w.~.4+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+7+|.{.#.~.{.|+P.#.h.z.k.P.o+H+H+`.2.8+H+H+W./.{.O.z+H+z+F.~.M.U.T.[+~.y.`.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+o+[+G.1.(.|.m k H n #.i.i.i.2.].o.[+A+A+J i S H h.F.9+l+u+H+H+E+u+l+9+W.(.y.U.T.B+E+Z k v v ~.o+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+` l.Y.f+y+F+H+H+(.%+m / ~ 6 f f z z z z z 6 8+H+)+$.<+D+Z i.0+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+G+P.$.M #.2.h.3.#.8+w+H+H+H+H+!+i.P.H+F+H+o+|.W E.v+H+Y.z.J.=+6+k.h.;+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+o+6+` M v 3.3.~.H 7 E n+B+H+H+9+d J R $.h.`.e+l+A+H+H+E+u+ +D.N.j.5+H+H+H+H+p.k H #.v+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+7+a.7.B.&+b+m+z+v+m.k./ ~ ^ 6 f f f f f 2 s.H+H+H+p.j.h+p K.j+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+6+=.b H H H n 6+A+H+H+H+H+'+i.F.H+H+H+H+v+a.{.l.`.$+z.~.8+L.h.!+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+v+=.H v H 2.2.#.~.j.k.|.F. +F d S -.#.z. +l+u+E+H+A+O.K.N.m.<.l+H+H+H+H+H+p.k H B+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+o+1.|.D.D.&+_+{+(.C.].* / ~ c 6 6 6 ^ s.G+H+H+H+w+a.*+/.U.r+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+o+R.m k H y.z.B.Y.8+z+6+j.E.H+H+F+n+8+o.v 2.z.i.B ~.~.y.;+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+6+E H M ~.y.2.y.N.++++++J.} A [.G ~.C.'+l+A+o+D.U.z.R.}+Q.b.H+H+H+H+t+S.(+#.G+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+p.*.l.o.o.S.|.B $+k.7 / / ~ ~ E q+H+H+H+H+H+H+)+/.<+X.s+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+'+1.l.B.K.y.A.h.3.2.#.m.6.W {.{.2.#.v ~.k b b M =+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+G.=.B n 2.i.#.a.F.E.D.P 4 J r.G {.B.6+'+i.N.B.g+u+9+,+H.,+H+H+z+O._+$.#.B+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+[+=.7.|+6+1.V ].$+m.{+o+q+E+H+H+H+H+H+F+v+t+!+V z.(+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+v+6+|+Y.T.B.j.#.$.$.*.*.*.V |.m.a.1.1.` p.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+[+v.E v 2.B.:+f+[+R.0 G Z 1.V 2.k.y.K.=+E+H+u+e+9+.+r.u+H+P.%+{+B #.B+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+o+1.*.O.v./.` k.*+S.D+H+H+H+H+v+8+;+Y.D.j.2.k.F.|+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+z+t+n+o+o+o+v+t+v+G+G+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+v+'+P v 2.j.3.3.3.~.3.3.~.H h.i.'+A+H+H+E+u+e+}+H.,+'+X._+d+P #.v+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+ +V V /.'+G M.X.s+H+n+!+E.D.L.j.3.6.6.!+t+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+o+1.k H M H m 0 G G Z O z.Y.u+E+H+H+A+l+9+,+c.k.0+|+u+P #.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+1+E /.k.7.V $+E.l.B.B.B.7.*.B.%+&+z+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+g+a.3.i.k.q A [.Q.q.O i.;+u+H+H+E+u+e+}+o.4+{+H+o+V #.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+F+p.E V *.6.B.B.O.!+'+O.%+_+5+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+6+F.~.h.q J b.Q.c.M i.[+E+H+H+A+l+[+%+T.)+H+o+V $.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+6+P.|.V m a.v+|+%+%+b+z+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+d+F.v d ;.r..+p.~.i.d+H+H+H+u+5+&+W.b.u+u+V $.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+6+(.m P E.*+{+t+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+o+d A [.H.,+1.3.B.g+H+H+F+b+;+}+Q.<.a.H #.F.!+f+g+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+v+ +q p.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+W.p (.c+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+Q.d S r.Q.,+P 2.B.v+H+F+j+l+9+,+q.|.O #.#.#.W W M /.=.p.[+t+G+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+z+f+`./.W #.~.n 5+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+w+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+A t [.H.,+,+(.2.{+x+r+E+u+e+}+Q.<.'+m H *.v.o.o.*.b n W #.7.O.`.8+d+n+H+H+H+H+H+H+H+H+H+H+H+H+H+H+n+5+Y.E.3.W 2.C.X.$+&+$.K.s+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+,+d J b.Q.,+1+=.2.&+F+H+A+l+e+}+r.9+o+V {.O.d+F.S.%+J.J.++++N.N.J.k./.7.L.X.X.&+{+=+|+8+n+t+5+{+S.B.i.2.#.{.M.(+U.T.f+v+'+i.L.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+e i ;.r..+}+)+6.U.n+H+H+u+l+9+,+[.E+g+H 2.%+h+%+$.J.~.n M m.F.m.v M ~.#.#.i.y.++++++++i._ m.h+%+6.=.O z.2+$+D.n+F+H+H+z+o.J.&+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+` } A [.H.,+9+p.J.%+A+H+E+u+e+)+w.v.L.(+4+M.].d+u+$.J.P v |.z+F+v+R.E H $.#.M M H M #.k ) n H M l.K.<+(+T.d+H+H+H+H+H+H+q+/.++5+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+1.m 4 S b.Q.,+}+|.++:+H+H+6+;+Y.X.*+&+:+|+P 3.=+f+(.2.B.B v /.l.6.~.{.M v B V O M #.#.B _ H i.X.$+&+:+n+H+H+H+H+H+H+H+H+H+[+~.U.s+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+p.M 0 i ;.r..+}+W.3.$+;+T._+2+X.T.G.c.7+!+*.H 3.3.3.C y.{.b H ~.a.P. +[+n+n+o+g+ +=.v H ~.C.<+X.|+v+H+H+H+H+H+H+H+H+H+H+H+H+ +2.%+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+[+H P i J b.Q.,+9+1.J.2+4+M.O.6+7+}+H.(.y.i.#.H a.G.'+3.h.v+o.*+j+G+H+H+H+H+H+H+H+B+a.y.J.J.h+h.%+G+H+H+H+H+H+H+H+H+H+H+H+H+G+v.A.{+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+q+v ].G i S r..+}+P i.>+Y.B+H+A+l+9+,+c.1.6+B l.z+H+H+P.y.{+v+D.*+t+H+H+H+H+H+z+;+i.$+z.{.K.L.o.A.b+H+H+H+H+H+H+H+H+H+H+H+H+H+v+7.N.c+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+b 3.O.d A [.H.1.2.6.L.%+z+H+E+u+e+}+H.,+g+v Y.H+H+H+o+7.J.8+[+X.(+z+H+H+A+5+S.A.X.].2.J.M J.|+l.N.c+H+H+H+H+H+H+H+H+H+H+H+H+H+6+j.J.t+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+6+k.B.t+Q.4 J Z 2.~.'+o.*+&+H+H+A+l+9+,+r.G $.|+H+H+H+H+ +i.X.B+F.(+0+H+d+S.U.J.T.|.K.J.6. +{.%+'+2.$+m+H+H+H+H+H+H+H+H+H+H+H+H+o+P i.%+G+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+l.h+|+H+H+t e ~.2.w.}+,+6.2+5+H+E+u+e+9+.+;.].d+H+H+H+H+H+=.J.:+H+*.4+/.K.$+B.'+E.U.J.#.|+H+=.y.c+p.y.%+z+H+H+H+H+H+H+H+H+o+ +7.2.y.2.L.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+p.7.C.o+H+H+)+H i.1.Q.,+9+R.C.2+y+H+A+l+e+}+H.5.z+H+H+H+H+H+o+#.X.t+R.2.U.K.Y.d+k.U.i.#.D.z+H+v+#.z.z+o.A.0+G+H+H+E+z+n+;+7.{.h.z.k.F.6+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+v+p n n H [+o+|.i.E S r.Q.}+e+(.U.0+G+H+u+l+9+,+r.l+H+H+H+H+H+H+ +2.B.J.J.C.Y.!+z.U.E.H 3.8+H+H+F+R.h.&+A+7.J.c+d+5+Y.D.{.2.j.T.;+6+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+ +n k B m v V h.~.0 t [.H.,+}+7+7.*+m+H+E+u+e+}+Q.Q.E+H+H+H+H+H+v.h.>+L.m+F.3.J.U.5+v.{.F.H+H+H+H+v+|.A.5+d+j.K.j.i.{.6.O.f+t+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+v 3.7 B W.E i.#.Y.q.d S b.Q.,+7+'+C.X.G+H+A+l+9+,+r.e+H+H+H+v+/.J.i.M.*+o+7.$+h.!+7+$.~.F+H+H+H+H+H+ +i.K.d+E.*+i.F.d+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+1.#.U.7 v =.3.3.3.{+H+A i ;.r..+}+7+o.<+{+H+E+u+e+9+.+Q.H+F+8+j.N.L.6+o.<+2.U.k.M._+a.{.8+H+H+H+H+H+H+H+=.J.B._+k.$+j+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+g+M {.*+` k #.B.6+o.i.:+e+d A [.H.,+9+)+k.<+c+H+A+l+e+}+b.v+Y.h.y.{+z+d+E.J.<+&+z+o.X.#.O.H+H+H+H+H+H+H+H+w+M J.&+'+~.$+z+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+(.~.i.B./.M z.5+H+w+(.i.5+r.4 S r.Q.,+e+R.K.4+t+H+A+l+}+-.l.A.L.m+g+F.z.g.l.B.0+y+6+M i.c+H+H+H+H+H+H+H+H+v+M j.&+H+ +h._+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+R.~.M J.K.M #.J.b+H+H+g+/.z.4 } t [.H..+}+e+=.$+0+G+A+e+p.z.N.D.o+F.i.J.j.5+G+v.%+=+$.2.M S.G+H+H+H+H+H+H+E+7.E.#.2+5+E+|.N.c+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+a.{.H $+$.2./.j.U.y+H+H+6+p 4 A t S b.Q.,+9+e+].<+f+g+o.y.y.].!+3.y.z.O.v+B+z+t+m.2+i.O.v.2.5+H+H+H+H+H+H+O.S.v 2.L._+t+d+i.$+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+'+#.$.j.z.M 3.d+!+A.{+H+H+9+4 t ;.b.b.r.Q.,+}+9+ +M.S.6.N.k.V C.h.y.{.m.O.E.7./.7./.S.%+t+g+].i.v+H+H+H+H+8+D.V i.Y.P.&+4+v+P.J._+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+V 3.].U.$.{.|+H+v+D.U.f+A+J i S [.r.Q..+,+,+}+}+}+m.U.y.M ].y.h.i.#.C $.].$.7.*.H %+/._+_+G+p.i.T.H+H+H+z+m./.2.D.E+E+|.4+b+7+3.$+c+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+E+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+G.W i.#.3.#.E.z+H+H+'+K.M.Q.4 A [.r.H..+,+,+}+}+}+1.h.J.0+h.A.l. +~.X.v+G+G+H+d+M B.0+t+*.h+b+E+O K.f+H+H+a.k.#.~.z+H+H+o+].4+M.2+(+5+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+u+q.d ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( 4 ;.l+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+o+7 #.a.$.M j.n+H+H+H+H+a.++E i ;.b.H.Q.,+,+}+}+)+P y.y.i.J.y.Y.H+H+v.h.c+H+F+'+~.].h+c+H+g+j.4+x+'+{.&+G+ +C.*.2.f+H+F+t+;+].L._+S.f+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+e+A ( | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | ( 4 9+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+o+P _ k v H i.;+H+H+H+H+H+'+~.J.Z [.r.Q..+,+}+}+W.|.J.N.y.k.P i.&+z+H+v+7.i.z+!+3.{.4+{+E+H+H+!+X.0+H+=.i.Y.k.|.2.;+B+5+{+M.$+_+|.%+b+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+A+;.( | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | i E+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+o.#._ E E {.~.R.o+H+H+n+Y.G 0 A.l.r.H..+,+}+}+w.6.J.J.3.1.;.b.O.g.&+H+H+!+h.l.i.{._+&+G+H+H+H+B+E._+:+w+$.K.V {.7.O.&+%+U.S.c+y+o+D.4+:+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+}+i | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | d i | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | ( }+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+ +2./.) k O H #.~.~.*.=././.} t V N.p.Q.,+,+w.|.i.J.3.1.H.b.;.A+v+/.A.|+H+o+E M ~.B.%+y+H+H+H+H+H+d+C.4+f+P.v ~.2.C.2+*+D.f+G+H+H+H+'+M.0+t+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+.+4 | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | Q.}+4 | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | r.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+n+#.3.F+) v 2.!+1.k H $.#.#.M B G ` i.B.W.p./.h.N.y.a.W..+r.;.}+H+H+6+j.A.8+v.n k O (+c+H+H+H+H+H+H+H+ +%+_+R.H i.D.(+T.d+H+H+H+H+H+H+H+a.(+0+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+Q.| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | e+H+4 | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | 0.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+o.y.|+H+7 ~.T.G+H+(.7.7.G.P n M #.M #.J.~.h.y.J.i.v.,+.+H.[.H.E+H+H+H+R.i.y.{.7._ *.:+F+H+H+H+H+H+E+n+|+]._+$.2.2.B.n+H+H+H+H+H+H+H+H+H+A+|.4+j+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+Q.| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | 9+E+4 | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | 0.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+2.K.;+q+v i.5+H+ +_+T.(+0+/ / R Z ` E {.*+~.i.i. +,+.+Q.b.[.l+H+H+H+H+u+B y.K.m._ 7 '+H+H+H+H+6+=+T.0+_+B.$.j.M 2.z.c+H+H+H+H+H+H+H+H+H+v+ +6.4+z+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+Q.| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | 9+E+4 | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | 0.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+ +U.2.O.P #.{.{+6+%+0+t+E.7 ~ ^ G r.Q..+1.X.M.i. +,+,+Q.r.;.,+H+H+H+H+A+(.z.H y.X.P _ +H+o+`.B._+h+_+C.=+n+` C.M.#.i.S.z+H+H+H+w+6+R.o./.3.i.2.X.b+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+Q.| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | 9+E+4 | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | 0.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+7.>+V v ~.].|.i.B.b+5+H+w.* ~ ^ 2 w..+.+=.i.y.m.,+,+.+H.[.` m+H+H+H+E+v.i.B.'+$.N.V ) =.D.M.4+4+M.E.8+v+d+f+m #.2+6.2.2.Y.`.|.].W {.2.i.j.k./.o.P.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+Q.| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | 9+E+4 | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | 0.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+7+K.X.p.H {.8+[+{.B.|+H+q+* ~ ^ 2 2 f p.a.y.j.7.A.P..+Q.b.S ].&+B+H+H+;+i.S.d+E.D.3.y.7 *.S.D.~.M ].6.~.W W W n M 3.(+#.~.{.{.3.k.E.F.P.!+6+v+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+Q.| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | 9+E+4 | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | 0.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+P.$+=+'+W F.H+ +].h.8+H+F * ^ ^ 2 f h X y.l.'+W.6.J.p.r.;.}+o.z.c+H+P.i.S.d+D.2+D.].y.{.W W C ]././.|.F.O.`.!+R.B v ~.j.#.~.7.n+o+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+Q.| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | 9+E+4 | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | 0.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+S.J.d+O {.F.d+%+E.i.B.,+* ~ ^ 2 f 2 r y.>.Y R.9+p.z.J.-.H.E+v+#.&+o.z.L.v+o.4+].z.N.l.'+6+o+H+H+H+H+H+H+H+H+H+H+[+*.H K.X.{.h.t+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+Q.| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | 9+E+4 | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | 0.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+d+K.&+p.{.$.|.D.0+5+V y.7 ~ ^ 2 2 2 C J.>.h x h Y s.B J.B.g+H+H+v.#.i.L.v+!+].~.N.y.o.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+W.O M $+3.i.S.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+Q.| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | 9+E+4 | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | 0.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+|+J.v.#.l.(.$.0+&+&+E.#.>.~ ^ 2 6 W y.X h x x h h h 2 C J._+G+z+1._ H v+[+].y.y.3.;+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+'+B ~.>+#.z.5+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+Q.| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | 9+E+4 | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | 0.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+D.K.H 2.8+R.j.S.&+5+1.7 z.r 2 6 W z.X h x x h h h f 2 P m.A.;+o.2.7 m F.z.y.h.k.f+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+g+=.j.X.#.L.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+Q.| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | 9+E+4 | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | 0.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+g+n H {.E.H+'+C.|+H+w+* ~ #.>.c {.y.X h x x h h h f 2 c B+d+k.J.i.l.=.2.U.h.h.E.G+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+o+=.*+K.h.;+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+Q.| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | 9+E+4 | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | 0.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+W._ M ~.o+H+'+z.=+H+Z * ~ r 3.3.y.X h x x x h h h 2 ^ +H+v+a.h.y.*.y.J.2.z.{+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+v+/.<+~.*.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+Q.| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | 9+E+4 | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | 0.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+v.b ~.#.|+H+[+B.{+7+* ~ ^ c #.U.X h x x x h h h 2 ^ s.H+d+E.2.$.N.J.#.#.K.5+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+'+j.*+#.;+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+Q.| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | 9+E+4 | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | 0.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+=.M i.H 2.;+7+B.T.5.~ ^ 6 r ~.U.y.h x x h h h f 2 c q+[+k.i.3.J.B.k 7 =.n+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+ +z.K.$.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+Q.| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | 9+E+4 | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | 0.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+b #.h.a.v 3.o.k.D./ ^ ^ 6 M W C >+>.h x h h h 2 ^ R.6+B.3.3.++L. +W 6.n+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+o.>+3.O.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+Q.| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | 9+E+4 | | | | | | | | | | | | | | | 4 i | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | 4 t 4 | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | 0.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+7+v ].].y.[+O ~.3.j./ ^ 2 r 3.Y f #.>+Y h h h f ^ r |+k.3.i.$+{+[+#.B.8+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+g+7.<+~.z+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+Q.| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | 9+E+4 | | | | ;.9+A ( | | | | | | ( q.H+d | | | | | | | | | | | | | | | | | | ( r.q.4 | | | | | | | | | | | | | | | | | | | | | | | | | | t E+H+,+( | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | 0.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+g+H ].H y.&+o+|.3.]./ ^ c $.>.h x f j.K.f h f 2 ~ $.$.i.J.U.|+q+*.i.;+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+'+C.*+O.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+Q.| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | 9+E+4 | | | | 0.A+S ( | | i S J d ( q.H+J S i | | d S J 4 | d i i i d t S t 4 | e+l+A d 4 t S t | 4 i i i t i d | d i 4 | | | | d S A 4 4 .+E+r.S ( | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | 0.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+p.C M M J.8+B+*.j./ ~ v 2.Y h x x Y M.y.f 2 ^ S.m 3.N.S.t+A+o.2.=+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+v.K.%+t+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+Q.| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | 9+E+4 | | | | b.A+S ( ( J l+H+H+l+d q.E+H+E+A+t i u+H+E+}+d q.H+E+u+9+A+H+H+r.0.A+H+H+q.:.H+E+E+[.i l+H+E+E+H+H.( .+u+A | | | A A+E+H+,+t H+H+u+0.( | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | 0.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+G.M H i.j.8+1.z.C / ~.X h x x x h X <+X ^ r *.i.U.Y.z+H+1.2.O.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+(.K.Y.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+Q.| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | 9+E+4 | | | | b.A+S ( | i b.Q.9+H+A r.H+b.S H+.+r.E+;.0.E+0.b.H+.+;.[.r.Q.u+u+t e+l+J i E+9+A 9+u+t e+u+q.:.}+e+;.e+.+d | | ( 9+u+J Q.H+S ,+A+A d | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | 0.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+1.#.H y.{./.3.~.M ~.c f h h h h f {.>+r P J.U.H '+H+ +{.D.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+k 2.c+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+Q.| | | | | | | 4 d d d | | | | | | | 4 d d 4 | | 4 d d d d d 4 4 | | | | | | | | | | d i i i d | | | | | | | | 9+E+4 | | | | b.A+S ( ( J l+A+u+H+A r.H+4 ( u+9+,+u+> t u+.+r.l+t ( q.A+u+A+w+d 9+e+| i H+0.( b.A+;.e+,+> ( :.u+l+u+S ( | | ( A+,+( t H+q.Q.u+| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | 0.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+1.H 2.i.2.3.M ~.r ^ 6 2 h h h f r z.>+A.%+ +) G. +~.j.t+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+6+].E.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+Q.| | | | | | | S Q..+Q.4 | | | | | d r..+Q.J ( ( S Q..+.+.+Q.H.0.J d | | | | | | d S Q.}+e+9+.+:.t 4 | | | | | 9+E+4 | | | | b.E+0.t i .+B+0.0.H+A r.H+J A E+Q.H.E+t ;.A+r.q.l+t d 9+l+S 1+w+d 9+l+t i E+,+t ,+u+J e+,+( | t 9+H+,+i | | | ( e+l+t [.H+;.Q.u+4 | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | 0.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+1.C h.F.B M ~.H / ~ ~ r r >.>.2.{.y.{.(.g+_ P 6.i.8+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+!+f+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+Q.| | | | | | ( .+H+H+H+S | | | | | S H+H+H+Q.( ( Q.H+H+H+H+H+H+H+H+Q.d | | | | A ,+H+H+H+H+H+H+H+l+i | | | | | 9+E+4 | | | | [.H+u+u+q+,+l+e+u+H+A b.A+e+w+l+t A u+u+l+9+t r.e+t | Q.u+e+A+l+d H.u+e+:.Q.A+9+B+Q.d 9+.+( | ( Q.H+:.| | | | | :.u+e+B+9+d Q.l+| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | 0.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+1.{.O 7 {.v H M #.2.i.i.2.#.V !+[+m v H 7 /.(+c+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+Q.| | | | | | ( ,+H+H+H+,+( | | | 4 H.H+H+H+Q.| ( H.E+G+9+9+9+9+u+H+H+H.( | | A e+H+H+l+}+.+,+9+u+H+A | | | | | 9+E+4 | | | | t H.q.H.r.A :..+q.r.d A q.Q.Q.S ( | S Q.H.A ( J :.d ( i q.Q.q.:.4 d q..+J 4 q..+0.4 4 :.S ( | 4 ,+e+t ( | | | | ( :..+H.i | S 0.| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | 0.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+B+(.M $.2.2.i.i.i.~.#.M M M #.M |.(.k n v 7.Y.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+Q.| | | | | | ( }+H+H+H+A+4 | | | t l+H+H+H+.+| ( H.E+B+A 4 4 d ;.u+H+A+| | d 9+H+E+,+J | ( ( d ;..+i | | | | | 9+E+4 | | | | | ( ( ( ( ( ( ( ( ( | | ( ( ( ( | | ( ( ( ( | ( ( | | | ( ( ( ( | | ( ( | | ( ( ( | | ( ( | ( A 9+q.4 | | | | | | ( ( ( | | ( ( | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | 0.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+v+1.6.6.].].].i.K.U.U.U.U.J.A.A.A.A.$.E o.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+Q.| | | | | | ( 9+H+G+H+H+:.| | | S H+H+E+H+,+d ( H.E+B+t | | | ( .+H+E+t | :.H+H+,+4 ( | | | | ( 4 | | | | | | 9+E+4 | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | i S i | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | 0.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+d+d+n+m+m+m+m+m+m+m+m+f+5+c+c+c+f+n+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+Q.| | | | | | ( l+H+e+E+H+9+( | d Q.H+u+l+H+,+d ( H.E+B+t | | | ( ,+H+E+i d 9+H+l+A ( | | | | | | | | | | | | | 9+E+4 | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | ( | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | 0.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+Q.| | | | | | ( u+H+}+}+H+A+i | t u+H+9+.+H+}+i ( H.E+B+i ( ( 4 S u+H+u+| i E+H+}+( | | | | | | | | | | | | | | 9+E+4 | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | ( | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | ( | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | 0.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+u+u+u+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+B+u+u+E+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+E+u+u+A+H+H+H+H+H+H+H+H+G+u+A+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+E+u+u+A+H+H+u+A+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+Q.| | | | | | | A+H+,+[.H+E+0.| ;.H+A+H.H.H+9+t ( H.G+E+}+.+.+,+u+H+H+b.( t H+E+Q.( | | | | | | | | | | | | | | 9+E+4 | | | | | | | | | | | | | | | | | | t t | | | | | | | | | | | | | | | | | | A d | | | | | | | | | | | | | | | | | ( ( | | 4 | | | | | | | d A 4 | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | 0.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+A+J ( J H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+Q.( d l+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+9+i > ,+E+H+H+H+H+H+H+H+}+} Q.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+o+( 4 .+H+}+( i 9+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+Q.| | | | | | d A+H+.+d E+H+9+| .+H+l+J H.H+e+A ( H.G+H+H+H+H+H+H+A+H.4 | t H+G+Q.( | | | | | | | | | | | | | | 9+E+4 | | | | S 9+:.( i Q..+i | | ( ( | | .+,+| | | ( ( | | | ( ( | | | | | | | d e+:.| | ( ( | | | | | | | | | ( 0.9+9+9+H.4 t 9+;.| | ( ( | | S 9+J ( | ( ( | | | | ( ( | | ( | | | ( | | | | | | | | | | | | | | | | | | 0.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+u+( . > H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+[.. + 9+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+,+@ . r.E+H+H+H+H+H+H+A+S + ,+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+7+. . b.H+Q.. + ,+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+Q.| | | | | | t A+H+Q.| }+H+u+J u+H+}+| q.H+l+J ( H.E+H+u+u+A+H+H+u+d | | i G+H+}+( | | | | | | | | | | | | | | 9+E+4 | | | | 0.H+9+d [.H+u+A ( i ;.A ( | 9+e+( ( t ;.A ( | | J S i 4 t d | t i i E+r.| A ;.S 4 | d t d t d | | ( ,+H+,+9+E+;.t u+;.| 4 S S 4 | :.E+S ( i ;.A | | ( t ;.J t d A | | 4 A 4 | | | | | | | | | | | | | | | | | 0.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+u+( . ( H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+[.. + e+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+,+$ . r.E+H+H+H+H+H+H+u+> = H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+7+. . b.H+u+H.Q.A+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+Q.| | | | | | A A+H+q.| ;.H+E+}+H+A+q.( q.H+l+S ( H.E+B+S t t l+H+E+.+( | d 9+H+l+A ( | | | | | | | | | | | | | 9+E+4 | | | | :.H+E+H.,+H+l+t S e+E+A+H.| 9+e+| 0.u+u+l+:.4 Q.A+E+9+q.9+S | 9+H.t E+r.d u+A+u+9+i J u+u+u+0.( | ( ,+w+:.H.H+;.A A+;.4 9+A+A+1+d ;.A+S J l+E+A+r.| 0.l+G+u+}+A u+0.| 0.l+i | | | | | | | | | | | | | | | | | 0.H+H+H+H+H+H+H+H+H+H+H+H+H+H+9+H.H.A+H+u+H..+H+H+g+r.Q.l+r.Q.e+H+u+.+.+u+H+g+Q.H.l+Q.H.e+H+G+Q.Q.v+H+u+H.Q.l+H+H+H+v+.+Q.A+Q.J 0.E+H+H.J b.E+H+E+9+r.}+v+:.Q.H+H+A+Q.S J 0.E+H+H+H+H+H+u+( . ( H+u+Q..+l+.+J 0.A+H+H.J 0.E+H+H+[.. + 7+H.J b.u+H+H+H+H+H+H+H+H+}+:.A :.7+H+u+q.;.A b.l+H+H+A+,+H.o+}+S :.u+H+.+J ;.u+H+H+H+H+H+H+H+u+H.S J 0.l+H+H+H+H+v+H.S S q.H+H+H+H+H+A+H.H.9+H+z+,+r.}+H+}+$ . r.H+l+H.H.)+H+H+g+. A H+9+r.9+o+;.,+H+H+e+0.J :.l+H+H+7+. . b.H+l+r.Q.u+H+H+A+H.J S Q.H+H+H+u+H.H.H+.+S [.e+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+Q.| | | | | | S A+H+0.| d E+H+E+H+l+t ( 0.H+u+;.> H.E+B+t ( | t B+H+E+r.( 4 :.H+H+,+4 ( | | | | | ( | | | | | | 9+E+4 | | | | 0.E+l+E+l+w+e+t }+l+S ,+E+d 9+e+4 A+l+b.l+9+;.A+Q.S q.H.A+:.| u+,+t E+r.4 H..+9+E+:.S H+,+:.A | | ( ,+H+A+E+H+;.A H+[.b.H+b.[.E+r.S A+:..+l+;.,+u+;.e+9+J 9+l+4 l+l+| u+e+| | | | | | | | | | | | | | | | | | 0.H+H+H+H+H+H+H+H+H+H+H+H+H+H+Q.= . 9+H+b.@ @ E+E+A + 4 l+= + :.H+Q.$ $ ,+H+J + ( u+= + J H+u++ @ r.H+.+. ( }+H+H+H+9+( @ } + . + i d . . + d H+E+r.. } t . S H+1+( + . . $ A+H+H+H+H+H+u+( . ( H+9+( = ( + . + A } + . + i H+H+[.. . > . . + | 9+H+H+H+H+H+F+A @ . . . Q.H+d . . . + > )+H+o+i + ( $ . . 4 i + . + > E+H+H+H+H+H+u+A . . . . ( }+H+H+.+> + . . > H+H+H+H+H+e+@ @ [.H+l+d . S E+}+$ . r.F+} + $ g+H+E+r.. Q.H+b.. 4 i . ;.H+H.= . . . $ 1+H+7+. . b.H+Q.. > ,+H+}+( + . . + d u+H+l+@ + t + . + > e+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+Q.| | | | | ( ;.A+H+:.| | ,+H+H+H+}+( ( :.H+B+;.( H.E+B+t | | | S H+H+A+;.( i 9+H+E+}+t ( ( ( ( S ,+i | | | | | 9+E+4 | | | | 0.l+.+H+,+,+e+S l+Q.| ;.H+J 9+e+t H+H+E+H+l+H.u+J ( | ;.A+:.| l+.+t E+r.A u+E+u+E+0.S A+S | | | | ( ,+u+b.S l+e+S E+[..+E+| d u+,+S A+0.9+,+( 0.w+H.u+0.( ,+l+4 0.H+.+E+:.( | | | | | | | | | | | | | | | | | 0.H+H+H+H+H+H+H+H+H+H+H+H+H+H+l+$ . b.H+4 . . )+E+$ . S A+4 . i H+;.. . r.H+( . S H+4 . > H+.+. . i H+;.. | E+H+H+H+7+= . + A $ . . + J = . + ,+H+r.. . + + b.l+| . + A [.b.E+H+H+H+H+H+u+( . ( H+1+> . + i > . . @ A $ . . g+H+[.. . $ [.i . . A H+H+H+H+H+J + . 4 :.S o+H+q.q.9+)+$ . d H+o+d . . d ( . . . A > . . r.H+H+H+H+H+A+.+0.g+Q.> . J F+e+> . + S [.r.H+H+H+H+H+e+. . ;.H+e+} . A E+}+$ . Q.S . . ,+H+H+A+4 @ A+H+0.. . . + r.9+> . ;.9+d . > H+7+. . b.H+Q.. $ ,+v+d . = q.S + . S E+o+. . . t d . . A H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+Q.| | | | | ( 0.E+H+S | | S H+H+A+0.( ( ;.H+G+:.> H.E+B+t | | | | b.H+H+u+t ( A u+H+H+A+e+.+,+l+A+G+t | | | | | 9+E+4 | | | | b.l+;.r.J Q.l+A e+}+| r.H+t 9+e+d H+,+d A :.0.G+;.( t 0.u+:.4 u+.+t E+b.H.H+t J H+0.S A+J ( | | | ( ,+l+i | 9+A+S E+[.Q.E+i d A+.+S A+0.}+}+| q.B+r.l+.+t 9+l+4 4 B+E+u+| | | | | | | | | | | | | | | | | | | 0.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+4 . A A+@ + . S A+. + }+H+0.. 4 e+i . . J 7+= . }+H+q.. . E+J . + = u+d . S H+H+H+H+7+= . S A+.++ . ;.E+9+. + :.H+q.. . S A+H+0.+ . q.A+E+E+H+H.;.;.;.q.A+( . ( H+9+> . A H+r.@ . 0.E+Q.+ . H.H+[.. + Q.H+g+d . ( e+H+H+H+l+> . 4 l+F+E+H+H+H+1+;.i @ . > A+v+d . d l+}+= . t u+u++ . A H+H+H+H+H+H+E+}+;.i @ . 4 u+S . . .+A+E+E+H+H+H+H+H+e+. . ;.H+e+} . A E+}+$ . t + . Q.H+H+H+u++ 4 H+E+0.. + ;.A+H+;.. = H.1+:.. . l+7+. . b.H+Q.. $ ,+,++ . 0.H+H+4 . @ u+v+. . i E+u+$ . ( H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+Q.| | | | | ( r.E+H+J | | d A+H+u+i | ( ;.G+H+0.> H.G+B+t | | | | 4 .+H+H+e+d | t 9+H+H+H+H+H+H+H+A+t | | | | | 9+E+4 | | | | 0.u+A | ( Q.l+t H.E+u+H+e+| 9+e+| }+H+u+B+9+A l+B+w+u+0.9+E+E+H+.+t E+r.:.H+u+E+E+b.S A+J ( | | | ( )+H+E+E+H+H.J H+[.t H+A+w+A+J ;.E+S 0.H+u+H+}+d ,+H+E+H+l+4 | Q.H+r.( | | | | | | | | | | | | | | | | | | 0.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+:.. ( .+. A . > }+. = H+H+,+. > b.> ( = ( b.@ $ l+H+u+. . 9+= + d @ Q.= @ .+H+H+H+H+7+= . b.H+9++ . H.H+A+. + ;.H+q.. $ .+H+H+A . @ 7+H+H+H+H+} . . . > E+( . ( H+9+> . ;.H+.+$ . .+H+9++ . q.H+[.. + 9+H+A+S . = ,+H+H+H+)+$ . b.E+H+H+H+H+A . . . + . > u+v+d . t A+l+> . ;.A+H++ . t H+H+H+H+H+A+S . . . . . } u+d . $ l+H+H+H+H+H+H+H+H+e+. . ;.H+e+} . A E+}+$ . . . . 1+H+H+H+.+. b.H+E+0.. = ,+H+H+i . . . . . . . ,+7+. . b.H+Q.. $ ,+r.. + Q.H+H+J . . l+v+. . ;.H+A+} . > H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+Q.| | | | | ( :.9+e+d | | 4 :.,+q.( | | A }+e+;.( :.9+}+i | | | | | 4 ,+e+9+;.( | 4 S }+l+B+u+9+q.i | | | | | | 9+E+4 | | | | A 0.d | ( S 0.d | :.,+H.d | :.:.| 4 0..+H.J ( t H..+:.4 J .+H.H.S 4 q.J | 0.,+r.q.A i q.i | | | | ( ;.Q.H.H.S 4 d H.t | A .+.+J | t q.i 4 ;.,+q.i ( J r.r.u+e+4 | ,+E+d | | | | | | | | | | | | | | | | | | | 0.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+9+@ + J . )+$ . :.. A H+H+e+( @ d + :.i + d + A A+H+H+> . :.. d b.+ A . ( A+H+H+H+H+7+= . b.H+9++ . H.H+A+. + ;.H+q.. $ ,+H+H+J . + }+H+H+H+H+9+,+,+,+}+A+( . ( H+9+> . ;.H+.+$ . .+H+9++ . q.H+[.. + 9+H+A+J . > 9+H+H+H+,+$ . :.E+H+H+H+,+. . ;.u+;.. > u+v+d . t A+u+> . ;.A+H++ . t H+H+H+H+H+,+. + ;.E+J . } u+i . + l+H+H+H+H+H+H+H+H+g+. . S H+e+} . A E+}+$ . @ + . $ E+H+E+A + g+H+E+0.. = }+H+H+t . $ A S S S J u+7+. . b.H+Q.. $ ,+H.. + Q.H+H+A . . u+v+. . ;.H+A+} . > H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+Q.| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | 9+E+4 | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | ( ,+H+H+H+[.| d E+,+( | | | | | | | | | | | | | | | | | | | 0.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+> . $ + E+t . $ + .+H+H+A+S . + @ ,+:.. @ . .+H+H+H+;.. $ . q.7+. $ . A H+,+$ > e+g+= . b.H+9++ . H.H+A+. + ;.H+q.. $ ,+H+H+b.+ . i u+H+E+H+H+H+H+H+H+u+( . ( H+9+> . ;.H+.+$ . .+H+9++ . q.H+[.. . S H+}+@ . d l+A . q.o+> . @ }+H+H+H+[.. + o+A+i . > u+v+d . t A+u+> . ;.A+H++ . t H+o+$ $ 1+r.. = }+A+i . } u+;.+ . J A+H+E+H+,++ > A+l++ . 4 H+Q.@ . A E+}+$ . r.r.. . A H+A+$ > H+H+E+0.. = }+H+H+[.+ . .+H+H+H+E+H+7+. . b.H+Q.. $ ,+,+@ . S E+A+> . ( u+v+. . ;.H+A+} . > H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+Q.| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | 9+E+4 | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | t :.0.t | | d [.i | | | | | | | | | | | | | | | | | | | | 0.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+A . . 4 E+Q.. . @ H+H+H+H+Q.. . | u+.+$ . @ e+H+H+H+7+. . + u+u+@ . @ Q.H+S . . H.g+= . b.H+9++ . H.H+A+. + ;.H+q.. $ ,+H+H+v+4 . . + @ > A+H+H+H+H+H+u+( . ( H+9+> . ;.H+.+$ . .+H+9++ . q.H+[.. . . + . . @ Q.}+$ . 4 v+:.+ . . @ + )+.+. . = | . . > u+v+d . t A+u+> . ;.A+H++ . t H+0.. . J 1+. . > ( . . } e+v+> . . + @ ( H+i . + H.A+4 . . . . . . A E+}+$ . r.H+( . . .+e+. S H+H+E+0.. = }+H+H+u+( . . = > @ J H+7+. . b.H+Q.. $ .+H+J . . ( $ . . .+H+o+. . ;.H+A+} . > H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+Q.| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | 9+A+4 | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | 0.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+.+$ . r.H+A++ + t H+H+H+H+9+> . J H+l+| . A u+H+H+H+H+$ + ( H+A+t . ( u+H+H.+ = }+g+( . r.H+9+@ . H.H+A+. @ :.H+H.. > ,+H+H+H+u+t + . . @ u+H+H+H+H+H+u+4 . } H+9+( . [.H+,+> . .+H+e+@ . H.H+;.+ | ;.. . $ q.H+o+d . ;.E+H+b.$ + . . q.H+J . + $ S . > l+u+i . A A+u+( . :.A+H+@ + A H+1++ @ .+E+S . . ( d + } e+H+l+d + . . = H+q.. $ l+H+}+$ . + J J . A A+}+= . H.H+7++ + $ S + }+H+H+E+0.. > }+H+H+H+l+d + . . . A H+g+. @ r.H+Q.. = ,+H+A+S @ . . $ .+E+H+v+@ + [.H+A+d . ( H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+,+d | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | d i | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | .+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+A+A+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+F+9+g+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+A+e+H+H+H+H+E+v+F+H+H+H+H+o+9+A+H+H+H+A+g+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+E+v+H+H+H+u+o+H+H+H+H+H+H+H+H+A+9+o+H+H+H+u+F+H+H+H+F+g+u+H+H+H+H+H+H+H+H+H+H+H+H+H+H+Q.:.H+H+H+H+H+H+H+H+H+H+H+H+H+E+9+e+E+H+H+H+H+H+H+H+H+H+H+H+H+H+H+E+e+o+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+e+J ( | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | 4 u+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+Q.4 | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | r.H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+Q.t 4 4 | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | 4 4 i 0.G+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+e+H.0.b.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.b.b.q.9+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+", "H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+H+"}; relion-3.1.3/src/gui_entries.cpp000066400000000000000000000255651411340063500166300ustar00rootroot00000000000000 /*************************************************************************** * * Author: "Sjors H.W. Scheres" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #include "src/gui_entries.h" float fltkTextToFloat(const char* str) { float result = -999.; if (str == NULL) fl_message("ERROR: NULL entry for TextToFloat conversion. Check your inputs!"); else { std::string mystring = std::string(str); if (mystring.substr(0,2) == "$$") return 0; else if (!sscanf(str, "%f", &result)) fl_message("ERROR: Invalid (non-numerical?) entry for TextToFloat conversion. Check your inputs!"); } return result; } // This allows CURRENT_ODIR browse buttons std::string current_browse_directory; ShowHelpText::ShowHelpText(const char *help) { int w=640; int h=480; Fl_Window *win = new Fl_Window(w, h); Fl_Text_Buffer *buff = new Fl_Text_Buffer(); Fl_Text_Display *disp = new Fl_Text_Display(20, 20, w-40, h-40, "relion additional text."); disp->buffer(buff); disp->wrap_mode(1,79); win->resizable(*disp); win->show(); buff->text(help); } ShowHelpText::~ShowHelpText(){}; void GuiEntry::clear() { deactivate_option = -1; /* //joboption.clear(); if (inp != NULL) { delete inp; inp = NULL; } if (help != NULL) { delete help; help = NULL; } if (browse != NULL) { delete browse; browse = NULL; } if (choice != NULL) { delete choice; choice = NULL; } if (menu != NULL) { delete menu; menu = NULL; } if (my_deactivate_group != NULL) { delete my_deactivate_group; my_deactivate_group = NULL; } if (slider != NULL) { delete slider; slider = NULL; } */ } bool create_scheduler_gui; void GuiEntry::initialise(int x, int y, Fl_Group * deactivate_this_group, bool _actually_activate, int height, int wcol2, int wcol3) { // The input field int mywidth = (joboption.joboption_type == JOBOPTION_SLIDER && !create_scheduler_gui) ? 50 : wcol2; inp = new Fl_Input(x, y, mywidth, height, joboption.label_gui.c_str()); inp->color(GUI_INPUT_COLOR); inp->textsize(ENTRY_FONTSIZE); inp->labelsize(ENTRY_FONTSIZE); inp->value(joboption.default_value.c_str()); // Display help button if needed if (joboption.helptext != "") { // The Help button help = new Fl_Button( XCOL3, y, wcol3, height, "?"); help->callback( cb_help, this ); help->color(GUI_BUTTON_COLOR); help->labelsize(ENTRY_FONTSIZE); } if (joboption.joboption_type == JOBOPTION_FILENAME) { // The Browse button browse = new Fl_Button( XCOL4, y, WCOL4, height, "Browse"); browse->callback( cb_browse, this ); browse->color(GUI_BUTTON_COLOR); browse->labelsize(ENTRY_FONTSIZE); } else if (joboption.joboption_type == JOBOPTION_INPUTNODE) { // The Browse button browse = new Fl_Button( XCOL4, y, WCOL4, height, "Browse"); browse->callback( cb_browse_node, this ); browse->color(GUI_BUTTON_COLOR); browse->labelsize(ENTRY_FONTSIZE); } else if (joboption.joboption_type == JOBOPTION_RADIO || joboption.joboption_type == JOBOPTION_BOOLEAN) { if (!create_scheduler_gui) { choice = new Fl_Choice(XCOL2, y, WCOL2, height); if (joboption.joboption_type == JOBOPTION_RADIO) { // Add all items to the menu for (int i = 0; i < joboption.radio_options.size(); i++) { // Add all items to the menu choice->add(joboption.radio_options[i].c_str()); if (joboption.radio_options[i] == joboption.default_value) choice->picked(choice->mvalue()); } } else // boolean { if (deactivate_this_group != NULL) { my_deactivate_group = deactivate_this_group; actually_activate = _actually_activate; } choice->menu(bool_options); if (joboption.default_value=="Yes") choice->picked(&bool_options[0]); else choice->picked(&bool_options[1]); } choice->callback(cb_menu, this); choice->textsize(ENTRY_FONTSIZE); menu = choice; //menu->color(GUI_BACKGROUND_COLOR); menu->color(GUI_INPUT_COLOR); menu->textsize(ENTRY_FONTSIZE); } } else if (joboption.joboption_type == JOBOPTION_SLIDER) { if (!create_scheduler_gui) { int floatwidth = 50; // Slider is shorter than wcol2, so that underlying input field becomes visible slider = new Fl_Slider(XCOL2 + floatwidth, y, wcol2 - floatwidth, height); slider->type(1); slider->callback(cb_slider, this); slider->minimum(joboption.min_value); slider->maximum(joboption.max_value); slider->step(joboption.step_value); slider->type(FL_HOR_NICE_SLIDER); slider->color(GUI_BACKGROUND_COLOR); inp->callback(cb_input, this); inp->when(FL_WHEN_ENTER_KEY|FL_WHEN_NOT_CHANGED); // Set the default in the input and the slider: inp->value(joboption.default_value.c_str()); slider->value(textToDouble(joboption.default_value)); } } } void GuiEntry::place(JobOption &_joboption, int &y, int _deactivate_option, Fl_Group * deactivate_this_group, bool actually_activate, int x, int h, int wcol2, int wcol3 ) { // Clear if existing clear(); // What to do when continue is toggled deactivate_option = _deactivate_option; joboption = _joboption; // Add the entry to the window initialise(x, y, deactivate_this_group, actually_activate, h, wcol2, wcol3); // Update the Y-coordinate y += h + 2; } // Set the value back from the Fl_Input into the JobOption.value void GuiEntry::setValue(std::string _value) { joboption.value = _value; inp->value(_value.c_str()); // Also update menu or slider if necessary if (menu != NULL) { if (_value.substr(0,2) == "$$") { menu->add(_value.c_str()); const Fl_Menu_Item *p = menu->find_item(_value.c_str()); menu->picked(p); } else { const Fl_Menu_Item *p = menu->find_item(inp->value()); if ( p ) menu->picked(p); // if we cant find the menu option, just pick first menu entry else menu->picked(&menu->menu()[0]); } } if (slider != NULL) { if (_value.substr(0,2) != "$$") slider->value(fltkTextToFloat(inp->value())); } } void GuiEntry::deactivate(bool do_deactivate) { if (do_deactivate) { if (inp) inp->deactivate(); if (help) help->deactivate(); if (browse) browse->deactivate(); if (menu) menu->deactivate(); if (slider) slider->deactivate(); } else { if (inp) inp->activate(); if (help) help->activate(); if (browse) browse->activate(); if (menu) menu->activate(); if (slider) slider->activate(); } } // Help button call-back functions void GuiEntry::cb_help(Fl_Widget* o, void* v) { GuiEntry* T=(GuiEntry*)v; T->cb_help_i(); } void GuiEntry::cb_help_i() { ShowHelpText *help = new ShowHelpText(joboption.helptext.c_str()); } void GuiEntry::cb_browse(Fl_Widget* o, void* v) { GuiEntry* T=(GuiEntry*)v; T->cb_browse_i(); } void GuiEntry::cb_browse_i() { Fl::scheme("gtk+"); Fl_File_Chooser * G_chooser = new Fl_File_Chooser("", joboption.pattern.c_str(), Fl_File_Chooser::SINGLE, ""); if (joboption.directory=="CURRENT_ODIR") G_chooser->directory(current_browse_directory.c_str()); else G_chooser->directory(joboption.directory.c_str()); G_chooser->color(GUI_BACKGROUND_COLOR); G_chooser->show(); // Block until user picks something. // (The other way to do this is to use a callback()) // while(G_chooser->shown()) { Fl::wait(); } // Print the results if ( G_chooser->value() == NULL ) { //fprintf(stderr, "(User hit 'Cancel')\n"); return; } char relname[FL_PATH_MAX]; fl_filename_relative(relname,sizeof(relname),G_chooser->value()); FileName fn_pre, fn_jobnr, fn_post, fn_out; decomposePipelineSymlinkName(relname, fn_pre, fn_jobnr, fn_post); fn_out = fn_pre + fn_jobnr + fn_post; inp->value(fn_out.c_str()); } void GuiEntry::cb_browse_node(Fl_Widget* o, void* v) { GuiEntry* T=(GuiEntry*)v; T->cb_browse_node_i(); } void GuiEntry::cb_browse_node_i() { Fl::scheme("gtk+"); Fl_File_Chooser * G_chooser = new Fl_File_Chooser("", joboption.pattern.c_str(), Fl_File_Chooser::SINGLE, ""); std::string fn_dir = ".Nodes/" + integerToString(joboption.node_type); G_chooser->directory(fn_dir.c_str()); G_chooser->color(GUI_BACKGROUND_COLOR); G_chooser->show(); // Block until user picks something. // (The other way to do this is to use a callback()) // while(G_chooser->shown()) { Fl::wait(); } // Print the results if ( G_chooser->value() == NULL ) { //fprintf(stderr, "(User hit 'Cancel')\n"); return; } char relname[FL_PATH_MAX]; fl_filename_relative(relname,sizeof(relname),G_chooser->value()); // Get rid of the .Nodes/type/ directory-name again std::string replace = std::string(relname); std::string replace2 = (std::string::npos == replace.find(fn_dir.c_str())) ? replace : replace.substr(fn_dir.length()+1, replace.length()); char relname2[FL_PATH_MAX]; strcpy(relname2, replace2.c_str()); FileName fn_pre, fn_jobnr, fn_post, fn_out; decomposePipelineSymlinkName(replace2, fn_pre, fn_jobnr, fn_post); fn_out = fn_pre + fn_jobnr + fn_post; inp->value(fn_out.c_str()); } void GuiEntry::cb_menu(Fl_Widget* o, void* v) { GuiEntry* T=(GuiEntry*)v; T->cb_menu_i(); } void GuiEntry::cb_menu_i() { if (!create_scheduler_gui) { const Fl_Menu_Item* m = menu->mvalue(); // Set my own value inp->value(m->label()); // In case this was a boolean that deactivates a group, do so: if (my_deactivate_group != NULL) { std::string myval = std::string(inp->value()); if (myval.substr(0,2) == "$$") my_deactivate_group->activate(); else if ( actually_activate && (strcmp(inp->value(), "Yes") == 0) || !actually_activate && (strcmp(inp->value(), "No") == 0)) my_deactivate_group->deactivate(); else my_deactivate_group->activate(); } } } void GuiEntry::cb_slider(Fl_Widget* o, void* v) { GuiEntry* T=(GuiEntry*)v; T->cb_slider_i(); } void GuiEntry::cb_slider_i() { static int recurse = 0; if ( recurse ) { return; } else { recurse = 1; std::string str = floatToString(slider->value()); inp->value(str.c_str()); slider->redraw(); recurse = 0; } } void GuiEntry::cb_input(Fl_Widget* o, void* v) { GuiEntry* T=(GuiEntry*)v; T->cb_input_i(); } void GuiEntry::cb_input_i() { static int recurse = 0; if ( recurse ) { return; } else { recurse = 1; if (!create_scheduler_gui) slider->value(fltkTextToFloat(inp->value())); // pass input's value to slider recurse = 0; } } relion-3.1.3/src/gui_entries.h000066400000000000000000000216421411340063500162650ustar00rootroot00000000000000/*************************************************************************** * * Author: "Sjors H.W. Scheres" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #ifndef SRC_GUI_ENTRIES_H_ #define SRC_GUI_ENTRIES_H_ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "src/macros.h" #include "src/strings.h" #include "src/filename.h" #include "src/pipeline_jobs.h" #include #include #include #include #include #include #include #include #include #include #include #include #define GUI_MAX_RADIO_ELEMENTS 15 // forward declaration current_browse_directory, which allows CURRENT_ODIR browse buttons extern std::string current_browse_directory; // Create the scheduler GUI: without sliders or pull-down menus extern bool create_scheduler_gui; // Gui layout //#define XCOL1 10 //#define XCOL2 260 //#define XCOL3 460 //#define XCOL4 475 //#define XCOL5 535 //Additional space in tab if more than 4 XXXextraiXXX template variables are used defined by //environment variable RELION_QSUB_EXTRA_COUNT #define GUIEXTRA \ ( (getenv ("RELION_QSUB_EXTRA_COUNT"))? \ (std::max(0,(atoi(getenv ("RELION_QSUB_EXTRA_COUNT"))-4))*STEPY) : 0 ) #define MENUHEIGHT 30 #define TABHEIGHT 25 #define GUIWIDTH 800 #define GUIHEIGHT_OLD 420+GUIEXTRA #define GUIHEIGHT_EXT_START 370+GUIEXTRA #define GUIHEIGHT_EXT_START2 (GUIHEIGHT_EXT_START+MENUHEIGHT+10) #define GUIHEIGHT_EXT 800+GUIEXTRA #define XCOL0 200 #define WCOL0 200 #define XCOL1 ( (XCOL0) + 10 ) #define XCOL2 ( (XCOL0) + 280 ) #define XCOL3 ( (XCOL0) + 480 ) #define XCOL4 ( (XCOL0) + 495 ) #define XCOL5 ( (XCOL0) + 555 ) #define ENTRY_FONTSIZE 13 #define STEPY 20 #define COLUMN_SEPARATION 3 #define WCOL1 ( (XCOL2) - (XCOL1) - (COLUMN_SEPARATION) ) #define WCOL2 ( (XCOL3) - (XCOL2) - (COLUMN_SEPARATION) ) #define WCOL3 ( (XCOL4) - (XCOL3) - (COLUMN_SEPARATION) ) #define WCOL4 ( (XCOL5) - (XCOL4) - (COLUMN_SEPARATION) ) //version-1.0 #define GUI_BUTTON_COLOR (fl_rgb_color(200,255,100)) //version-1.0 #define GUI_RUNBUTTON_COLOR (fl_rgb_color(255,155,0)) //version-1.1 #define GUI_BUTTON_COLOR (fl_rgb_color(50,150,250)) //version-1.1 #define GUI_RUNBUTTON_COLOR (fl_rgb_color(255,50,50)) //version-1.2 #define GUI_BUTTON_COLOR (fl_rgb_color(155,150,255)) //version-1.2 #define GUI_RUNBUTTON_COLOR (fl_rgb_color(205,53,100)) //version-1.3 #define GUI_BUTTON_COLOR (fl_rgb_color(50, 200, 50)) //version-1.3 #define GUI_RUNBUTTON_COLOR (fl_rgb_color(255,80,80)) //version-1.4 #define GUI_RUNBUTTON_COLOR (fl_rgb_color(205,40,150)) //version-1.4 #define GUI_BUTTON_COLOR (fl_rgb_color(60, 180, 155)) //version-1.4 #define GUI_BUTTON_DARK_COLOR (fl_rgb_color(45, 135, 120)) //version-2.0 #define GUI_BUTTON_COLOR (fl_rgb_color(0, 200, 255)) //version-2.0 #define GUI_BUTTON_DARK_COLOR (fl_rgb_color(0, 160, 200)) //version-2.0 #define GUI_RUNBUTTON_COLOR (fl_rgb_color(70, 120, 255)) //version-2.1 #define GUI_BUTTON_COLOR (fl_rgb_color(100, 200, 50)) //version-2.1 #define GUI_BUTTON_DARK_COLOR (fl_rgb_color(70, 140, 30)) //version-2.1 #define GUI_RUNBUTTON_COLOR (fl_rgb_color(0, 130, 0)) //version-3.0 #define GUI_BUTTON_COLOR (fl_rgb_color(255, 180, 132)) //version-3.0 #define GUI_BUTTON_DARK_COLOR (fl_rgb_color(250, 150, 124)) //version-3.0 #define GUI_RUNBUTTON_COLOR (fl_rgb_color(235, 130, 0)) //version-3.1 #define GUI_BUTTON_COLOR (fl_rgb_color(238,130,238)) #define GUI_BUTTON_DARK_COLOR (fl_rgb_color(200, 110, 200)) #define GUI_RUNBUTTON_COLOR (fl_rgb_color(170, 0, 170)) #define GUI_BACKGROUND_COLOR (fl_rgb_color(230,230,240)) // slightly blue because of blue buttons in 2.0! #define GUI_BACKGROUND_COLOR2 (fl_rgb_color(180,180,190)) // slightly blue because of blue buttons in 2.0! // devel-version //#define GUI_BUTTON_COLOR (fl_rgb_color(255, 150, 150)) //#define GUI_BUTTON_DARK_COLOR (fl_rgb_color(200, 120, 120)) //#define GUI_RUNBUTTON_COLOR (fl_rgb_color(170, 0, 0)) //#define GUI_BACKGROUND_COLOR (fl_rgb_color(255,200,200)) // slightly red //#define GUI_BACKGROUND_COLOR2 (fl_rgb_color(230,180,180)) // slightly red //possible?#define GUI_BUTTON_COLOR (fl_rgb_color(50, 200, 255)) //devel-version //possible #define GUI_RUNBUTTON_COLOR (fl_rgb_color(205,0,155)) #define GUI_INPUT_COLOR (fl_rgb_color(255,255,230)) #define TOGGLE_DEACTIVATE 0 #define TOGGLE_REACTIVATE 1 #define TOGGLE_ALWAYS_DEACTIVATE 2 #define TOGGLE_LEAVE_ACTIVE 3 static Fl_Menu_Item bool_options[] = { {"Yes"}, {"No"}, {0} // this should be the last entry }; // A text to Float converter that raises an error window. float fltkTextToFloat(const char* str); /** This class displays opens an additional window with (help) text * */ class ShowHelpText{ public: // Constructor that opens and displays the window ShowHelpText(const char* help = NULL); // Empty destructor ~ShowHelpText(); }; class GuiEntry{ public: // What to do upon toggle of continue button int deactivate_option; // Input value storage Fl_Input* inp; // JobOption JobOption joboption; // Button to show additional help text Fl_Button* help; ////////////// FileName entry // Browse button Fl_Button* browse; ////////////// Radio entry // The choices Fl_Choice * choice; // The menu Fl_Menu_* menu; // Deactivate this group Fl_Group * my_deactivate_group; bool actually_activate; ////////////// Slider entry // The slider Fl_Slider * slider; /** Constructor with x,y-position from top left * wcol1, wcol2 and wcol3 are the widths of the three columns described above * title is the value displayed in the first column * defaultvalue is what will appear by default in the input value * help is the additional help text. If it is set to NULL, no help button will be displayed */ GuiEntry() { deactivate_option = -1; inp = NULL; help = NULL; browse = NULL; choice = NULL; menu = NULL; my_deactivate_group = NULL; actually_activate = false; slider = NULL; }; /** Empty destructor */ ~GuiEntry() { clear(); } // Clear this entry void clear(); /** Here really start the entry */ void initialise(int x, int y, Fl_Group * deactivate_this_group, bool actually_activate, int height, int wcol2, int wcol3); /** Place an entry on a window */ void place(JobOption &joboption, int &y, int _deactivate_option = TOGGLE_LEAVE_ACTIVE, Fl_Group * deactivate_this_group = NULL, bool actually_activate = false, int x = XCOL2, int h = STEPY, int wcol2 = WCOL2, int wcol3 = WCOL3 ); // Set _value in the Fl_Input on the GUI, and also in the joboptions. Also update menu/slider if necessary void setValue(std::string _value); // Deactivate this entry if the input boolean is true void deactivate(bool do_deactivate = true); /** Call-back functions for the help button * The method of using two functions of static void and inline void was copied from: * http://www3.telus.net/public/robark/ */ static void cb_help(Fl_Widget*, void*); void cb_help_i(); // Call-back functions for the browse button static void cb_browse(Fl_Widget*, void*); void cb_browse_i(); // Call-back functions for the browse button static void cb_browse_node(Fl_Widget*, void*); void cb_browse_node_i(); // Call-back functions for the menu static void cb_menu(Fl_Widget*, void*); void cb_menu_i(); // Call-back functions for the slider static void cb_slider(Fl_Widget*, void*); void cb_slider_i(); static void cb_input(Fl_Widget*, void*); void cb_input_i(); }; #endif /* SRC_NEWGUI_ENTRIES_H_ */ relion-3.1.3/src/gui_jobwindow.cpp000066400000000000000000001546311411340063500171560ustar00rootroot00000000000000/*************************************************************************** * * Author: "Sjors H.W. Scheres" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #include "src/gui_jobwindow.h" JobWindow::JobWindow(int _x, int _y, int _w, int _h, const char* title ) : Fl_Box(x,y,w,h,title) { clear(); x = _x; y = _y; w = _w; h = _h; } void JobWindow::clear() { tabs = NULL; tab1 = tab2 = tab3 = tab4 = tab5 = tab6 = tab7 = runtab = NULL; group1 = group2 = group3 = group4 = group5 = group6 = group7 = queue_group = NULL; current_y = start_y = 0; is_continue = false; guientries.clear(); } void JobWindow::setupTabs(int nr_tabs) { current_y = y; // top of the GUI char * my_allow_change_dedicated = getenv ("RELION_ALLOW_CHANGE_MINIMUM_DEDICATED"); if (my_allow_change_dedicated == NULL) { do_allow_change_minimum_dedicated = DEFAULTMININIMUMDEDICATED; } else { int check_allow = textToInteger(my_allow_change_dedicated); do_allow_change_minimum_dedicated = (check_allow == 0) ? false : true; } // Set up tabs if (nr_tabs >= 1) // there is always the running tab, which is not counted on the input nr_tabs! { tabs = new Fl_Tabs(x, current_y, w, h - MENUHEIGHT); current_y += TABHEIGHT; tabs->begin(); tab1 = new Fl_Group(x, current_y , w, h - MENUHEIGHT, ""); tab1->end(); tab1->color(GUI_BACKGROUND_COLOR); tab1->selection_color(GUI_BACKGROUND_COLOR2); if (nr_tabs >= 2) { tab2 = new Fl_Group(x, current_y , w, h - MENUHEIGHT, ""); tab2->end(); tab2->color(GUI_BACKGROUND_COLOR); tab2->selection_color(GUI_BACKGROUND_COLOR2); } if (nr_tabs >= 3) { tab3 = new Fl_Group(x, current_y, w, h - MENUHEIGHT, ""); tab3->end(); tab3->color(GUI_BACKGROUND_COLOR); tab3->selection_color(GUI_BACKGROUND_COLOR2); } if (nr_tabs >= 4) { tab4 = new Fl_Group(x, current_y, w, h - MENUHEIGHT, ""); tab4->end(); tab4->color(GUI_BACKGROUND_COLOR); tab4->selection_color(GUI_BACKGROUND_COLOR2); } if (nr_tabs >= 5) { tab5 = new Fl_Group(x, current_y, w, h - MENUHEIGHT, ""); tab5->end(); tab5->color(GUI_BACKGROUND_COLOR); tab5->selection_color(GUI_BACKGROUND_COLOR2); } if (nr_tabs >= 6) { tab6 = new Fl_Group(x, current_y, w, h - MENUHEIGHT, ""); tab6->end(); tab6->color(GUI_BACKGROUND_COLOR); tab6->selection_color(GUI_BACKGROUND_COLOR2); } if (nr_tabs >= 7) { tab7 = new Fl_Group(x, current_y, w, h - MENUHEIGHT, ""); tab7->end(); tab7->color(GUI_BACKGROUND_COLOR); tab7->selection_color(GUI_BACKGROUND_COLOR2); } if (nr_tabs >= 8) { std::cerr << "ERROR: only 7 job-specific tabs implemented..." << std::endl; exit(1); } current_y += 15; start_y = current_y; runtab = new Fl_Group(x, current_y, w, h - MENUHEIGHT, ""); runtab->label("Running"); // Fill this in later, when we have the joboptions runtab->end(); setupRunTab(); runtab->color(GUI_BACKGROUND_COLOR); runtab->selection_color(GUI_BACKGROUND_COLOR2); tabs->end(); } } void JobWindow::setupRunTab() { runtab->begin(); resetHeight(); bool has_parallel = false; if (myjob.joboptions.find("nr_mpi") != myjob.joboptions.end()) { place("nr_mpi", TOGGLE_LEAVE_ACTIVE); has_parallel = true; } if (myjob.joboptions.find("nr_threads") != myjob.joboptions.end()) { place("nr_threads", TOGGLE_LEAVE_ACTIVE); has_parallel = true; } // Add a little spacer if (has_parallel) current_y += STEPY/4; // Set up queue groups for running tab queue_group = new Fl_Group(WCOL0, MENUHEIGHT, 550, 600-MENUHEIGHT, ""); queue_group->end(); place("do_queue", TOGGLE_LEAVE_ACTIVE, queue_group); queue_group->begin(); place("queuename"); place("qsub"); char * extra_count_text = getenv ("RELION_QSUB_EXTRA_COUNT"); const char extra_count_val = (extra_count_text ? atoi(extra_count_text) : 2); for (int i=1; i<=extra_count_val; i++) { std::stringstream out; out<end(); guientries["do_queue"].cb_menu_i(); // This is to make the default effective // Add a little spacer current_y += STEPY/4; place("other_args"); runtab->end(); } void JobWindow::place(std::string key, int deactivate_option, Fl_Group * deactivate_this_group, bool actually_activate) { if (myjob.joboptions.find(key) == myjob.joboptions.end()) std::cerr << "WARNING: cannot find " << key << " in the defined joboptions of jobtype= " << myjob.type << std::endl; guientries[key].place(myjob.joboptions[key], current_y, deactivate_option, deactivate_this_group, actually_activate); } void JobWindow::place2(std::string key1, std::string key2, std::string label, int deactivate_option) { if (myjob.joboptions.find(key1) == myjob.joboptions.end()) std::cerr << "WARNING: cannot find " << key1 << " in the defined joboptions of jobtype= " << myjob.type << std::endl; if (myjob.joboptions.find(key2) == myjob.joboptions.end()) std::cerr << "WARNING: cannot find " << key2 << " in the defined joboptions of jobtype= " << myjob.type << std::endl; myjob.joboptions[key1].label_gui = label; myjob.joboptions[key2].label_gui = ""; int old_y = current_y; guientries[key1].place(myjob.joboptions[key1], current_y, deactivate_option, NULL, false, XCOL2, STEPY, (WCOL2 - COLUMN_SEPARATION) / 2); current_y = old_y; guientries[key2].place(myjob.joboptions[key2], current_y, deactivate_option, NULL, false, XCOL2 + (WCOL2 + COLUMN_SEPARATION) / 2, STEPY, (WCOL2 - COLUMN_SEPARATION) / 2); } void JobWindow::place3(std::string key1, std::string key2, std::string key3, std::string label, int deactivate_option) { if (myjob.joboptions.find(key1) == myjob.joboptions.end()) std::cerr << "WARNING: cannot find " << key1 << " in the defined joboptions of jobtype= " << myjob.type << std::endl; if (myjob.joboptions.find(key2) == myjob.joboptions.end()) std::cerr << "WARNING: cannot find " << key2 << " in the defined joboptions of jobtype= " << myjob.type << std::endl; if (myjob.joboptions.find(key3) == myjob.joboptions.end()) std::cerr << "WARNING: cannot find " << key3 << " in the defined joboptions of jobtype= " << myjob.type<< std::endl; myjob.joboptions[key1].label_gui = label; myjob.joboptions[key2].label_gui = ""; myjob.joboptions[key3].label_gui = ""; int old_y = current_y; guientries[key1].place(myjob.joboptions[key1], current_y, deactivate_option, NULL, false, XCOL2, STEPY, (WCOL2 - COLUMN_SEPARATION * 2) / 3); current_y = old_y; guientries[key2].place(myjob.joboptions[key2], current_y, deactivate_option, NULL, false, XCOL2 + 1 + (WCOL2 + COLUMN_SEPARATION) / 3, STEPY, (WCOL2 - COLUMN_SEPARATION * 2) / 3); current_y = old_y; guientries[key3].place(myjob.joboptions[key3], current_y, deactivate_option, NULL, false, XCOL2 + 1 + 2 * (WCOL2 + COLUMN_SEPARATION) / 3, STEPY, (WCOL2 - COLUMN_SEPARATION * 2) / 3); } void JobWindow::toggle_new_continue(bool _is_continue) { is_continue = _is_continue; myjob.is_continue = _is_continue; for (std::map::iterator it=guientries.begin(); it!=guientries.end(); ++it) { int my_option = (it->second).deactivate_option; switch (my_option) { case TOGGLE_DEACTIVATE: { (it->second).deactivate(is_continue); break; } case TOGGLE_REACTIVATE: { (it->second).deactivate(!is_continue); break; } case TOGGLE_ALWAYS_DEACTIVATE: { (it->second).deactivate(true); break; } case TOGGLE_LEAVE_ACTIVE: { // do nothing break; } default: { REPORT_ERROR("ERROR: unrecognised deactivate-option for GUI entry " + it->first); } } } } void JobWindow::resetHeight() { current_y = start_y; } // Update all values in the Fl_Input entries from the corresponding job_options void JobWindow::updateMyGui() { for (std::map::iterator it=guientries.begin(); it!=guientries.end(); ++it) { if (myjob.joboptions.find(it->first) == myjob.joboptions.end()) std::cerr << "WARNING: cannot find " << it->first << " in the defined joboptions!" <second).setValue((myjob.joboptions[it->first]).value); } } // Update all values in the Fl_Input entries into the corresponding job_options void JobWindow::updateMyJob() { for (std::map::iterator it=myjob.joboptions.begin(); it!=myjob.joboptions.end(); ++it) { if (guientries.find(it->first) == guientries.end()) { std::cerr << "ERROR: cannot find " << it->first << " in the defined joboptions!" <second.value = std::string(((guientries[it->first]).inp)->value()); } } void JobWindow::initialise(int my_job_type) { if (my_job_type == PROC_IMPORT) { myjob.initialise(my_job_type); initialiseImportWindow(); } else if (my_job_type == PROC_MOTIONCORR) { myjob.initialise(my_job_type); initialiseMotioncorrWindow(); } else if (my_job_type == PROC_CTFFIND) { myjob.initialise(my_job_type); initialiseCtffindWindow(); } else if (my_job_type == PROC_MANUALPICK) { myjob.initialise(my_job_type); initialiseManualpickWindow(); } else if (my_job_type == PROC_AUTOPICK) { myjob.initialise(my_job_type); initialiseAutopickWindow(); } else if (my_job_type == PROC_EXTRACT) { myjob.initialise(my_job_type); initialiseExtractWindow(); } else if (my_job_type == PROC_CLASSSELECT) { myjob.initialise(my_job_type); initialiseSelectWindow(); } else if (my_job_type == PROC_2DCLASS) { myjob.initialise(my_job_type); initialiseClass2DWindow(); } else if (my_job_type == PROC_INIMODEL) { myjob.initialise(my_job_type); initialiseInimodelWindow(); } else if (my_job_type == PROC_3DCLASS) { myjob.initialise(my_job_type); initialiseClass3DWindow(); } else if (my_job_type == PROC_3DAUTO) { myjob.initialise(my_job_type); initialiseAutorefineWindow(); } else if (my_job_type == PROC_MULTIBODY) { myjob.initialise(my_job_type); initialiseMultiBodyWindow(); } else if (my_job_type == PROC_MASKCREATE) { myjob.initialise(my_job_type); initialiseMaskcreateWindow(); } else if (my_job_type == PROC_JOINSTAR) { myjob.initialise(my_job_type); initialiseJoinstarWindow(); } else if (my_job_type == PROC_SUBTRACT) { myjob.initialise(my_job_type); initialiseSubtractWindow(); } else if (my_job_type == PROC_POST) { myjob.initialise(my_job_type); initialisePostprocessWindow(); } else if (my_job_type == PROC_RESMAP) { myjob.initialise(my_job_type); initialiseLocresWindow(); } else if (my_job_type == PROC_MOTIONREFINE) { myjob.initialise(my_job_type); initialiseMotionrefineWindow(); } else if (my_job_type == PROC_CTFREFINE) { myjob.initialise(my_job_type); initialiseCtfrefineWindow(); } else if (my_job_type == PROC_EXTERNAL) { myjob.initialise(my_job_type); initialiseExternalWindow(); } else { REPORT_ERROR("ERROR: unrecognised job-type to add to the GUI"); } // read settings if hidden file exists myjob.read("", is_continue); // update the window updateMyGui(); } void JobWindow::initialiseImportWindow() { setupTabs(2); tab1->begin(); tab1->label("Movies/mics"); resetHeight(); group1 = new Fl_Group(WCOL0, MENUHEIGHT, 550, 600-MENUHEIGHT, ""); group1->end(); place("do_raw", TOGGLE_DEACTIVATE, group1, false); // Add a little spacer current_y += STEPY/2; group1->begin(); place("fn_in_raw"); place("is_multiframe"); // Add a little spacer current_y += STEPY/2; place("optics_group_name"); place("fn_mtf"); place("angpix"); place("kV"); place("Cs"); place("Q0"); place("beamtilt_x"); place("beamtilt_y"); group1->end(); guientries["do_raw"].cb_menu_i(); // make default active tab1->end(); tab2->begin(); tab2->label("Others"); resetHeight(); group2 = new Fl_Group(WCOL0, MENUHEIGHT, 550, 600-MENUHEIGHT, ""); group2->end(); place("do_other", TOGGLE_DEACTIVATE, group2, false); group2->begin(); // Add a little spacer current_y += STEPY/2; place("fn_in_other"); place("node_type"); // Add a little spacer current_y += STEPY/2; place("optics_group_particles"); group2->end(); guientries["do_other"].cb_menu_i(); // make default active tab2->end(); // Always deactivate the queue option guientries["do_queue"].deactivate_option = TOGGLE_ALWAYS_DEACTIVATE; myjob.joboptions["do_queue"].setString("No"); } void JobWindow::initialiseMotioncorrWindow() { setupTabs(2); tab1->begin(); tab1->label("I/O"); resetHeight(); place("input_star_mics", TOGGLE_DEACTIVATE); // Add a little spacer current_y += STEPY/2; place("first_frame_sum", TOGGLE_DEACTIVATE); place("last_frame_sum", TOGGLE_DEACTIVATE); place("dose_per_frame", TOGGLE_DEACTIVATE); place("pre_exposure", TOGGLE_DEACTIVATE); place("eer_grouping", TOGGLE_DEACTIVATE); // Add a little spacer current_y += STEPY/2; group1 = new Fl_Group(WCOL0, MENUHEIGHT, 550, 600-MENUHEIGHT, ""); group1->end(); place("do_dose_weighting", TOGGLE_DEACTIVATE, group1); group1->begin(); place("do_save_noDW", TOGGLE_DEACTIVATE); group1->end(); guientries["do_dose_weighting"].cb_menu_i(); // make default active group2 = new Fl_Group(WCOL0, MENUHEIGHT, 550, 600-MENUHEIGHT, ""); group2->end(); place("do_save_ps", TOGGLE_DEACTIVATE, group2); group2->begin(); place("group_for_ps", TOGGLE_DEACTIVATE); group2->end(); tab1->end(); tab2->begin(); tab2->label("Motion"); resetHeight(); place("bfactor", TOGGLE_DEACTIVATE); place2("patch_x", "patch_y", "Number of patches X, Y", TOGGLE_DEACTIVATE); place("group_frames", TOGGLE_DEACTIVATE); place("bin_factor", TOGGLE_DEACTIVATE); place("fn_gain_ref", TOGGLE_DEACTIVATE); place("gain_rot", TOGGLE_DEACTIVATE); place("gain_flip", TOGGLE_DEACTIVATE); place("fn_defect", TOGGLE_DEACTIVATE); current_y += STEPY/2; group4 = new Fl_Group(WCOL0, MENUHEIGHT, 550, 600-MENUHEIGHT, ""); group4->end(); place("do_own_motioncor", TOGGLE_DEACTIVATE, group4, true); group4->begin(); place("fn_motioncor2_exe", TOGGLE_DEACTIVATE); place("gpu_ids"); place("other_motioncor2_args", TOGGLE_DEACTIVATE); group4->end(); guientries["do_own_motioncor"].cb_menu_i(); // make default active tab2->end(); } void JobWindow::initialiseCtffindWindow() { setupTabs(3); tab1->begin(); tab1->label("I/O"); resetHeight(); place("input_star_mics", TOGGLE_DEACTIVATE); place("use_noDW", TOGGLE_DEACTIVATE); // Add a little spacer current_y += STEPY/2; group1 = new Fl_Group(WCOL0, MENUHEIGHT, 550, 600-MENUHEIGHT, ""); group1->end(); place("do_phaseshift", TOGGLE_DEACTIVATE, group1); group1->begin(); place3("phase_min", "phase_max", "phase_step", "Phase shift - Min, Max, Step (deg)", TOGGLE_DEACTIVATE); group1->end(); guientries["do_phaseshift"].cb_menu_i(); // make default active // Add a little spacer current_y += STEPY/2; place("dast", TOGGLE_DEACTIVATE); tab1->end(); tab2->begin(); tab2->label("CTFFIND-4.1"); resetHeight(); group2 = new Fl_Group(WCOL0, MENUHEIGHT, 550, 600-MENUHEIGHT, ""); group2->end(); place("use_ctffind4", TOGGLE_DEACTIVATE, group2); group2->begin(); place("fn_ctffind_exe", TOGGLE_DEACTIVATE); place("use_given_ps", TOGGLE_DEACTIVATE); place("slow_search", TOGGLE_DEACTIVATE); place("ctf_win", TOGGLE_DEACTIVATE); group2->end(); guientries["use_ctffind4"].cb_menu_i(); // make default active // Add a little spacer current_y += STEPY/2; place("box", TOGGLE_DEACTIVATE); place("resmin", TOGGLE_DEACTIVATE); place("resmax", TOGGLE_DEACTIVATE); place("dfmin", TOGGLE_DEACTIVATE); place("dfmax", TOGGLE_DEACTIVATE); place("dfstep", TOGGLE_DEACTIVATE); tab2->end(); tab3->begin(); tab3->label("Gctf"); resetHeight(); group4 = new Fl_Group(WCOL0, MENUHEIGHT, 550, 600-MENUHEIGHT, ""); group4->end(); place("use_gctf", TOGGLE_DEACTIVATE, group4); group4->begin(); place("fn_gctf_exe", TOGGLE_DEACTIVATE); place("do_ignore_ctffind_params", TOGGLE_DEACTIVATE); place("do_EPA", TOGGLE_DEACTIVATE); // Add a little spacer current_y += STEPY/2; place("other_gctf_args", TOGGLE_DEACTIVATE); // Add a little spacer current_y += STEPY/2; place("gpu_ids", TOGGLE_LEAVE_ACTIVE); group4->end(); guientries["use_gctf"].cb_menu_i(); // make default active tab3->end(); } void JobWindow::initialiseManualpickWindow() { setupTabs(3); tab1->begin(); tab1->label("I/O"); resetHeight(); place("fn_in", TOGGLE_DEACTIVATE); tab1->end(); tab2->begin(); tab2->label("Display"); resetHeight(); place("diameter"); place("micscale"); place("sigma_contrast"); place("white_val"); place("black_val"); current_y += STEPY/2; place("lowpass"); place("highpass"); place("angpix"); current_y += STEPY/2; place ("do_startend"); current_y += STEPY/2; place("ctfscale"); tab2->end(); tab3->begin(); tab3->label("Colors"); group1 = new Fl_Group(WCOL0, MENUHEIGHT, 550, 600-MENUHEIGHT, ""); group1->end(); resetHeight(); place("do_color", TOGGLE_LEAVE_ACTIVE, group1); group1->begin(); place("color_label"); place("fn_color"); place("blue_value"); place("red_value"); group1->end(); guientries["do_color"].cb_menu_i(); // make default active tab3->end(); // Always deactivate the queue option guientries["do_queue"].deactivate_option = TOGGLE_ALWAYS_DEACTIVATE; myjob.joboptions["do_queue"].setString("No"); } void JobWindow::initialiseAutopickWindow() { setupTabs(5); tab1->begin(); tab1->label("I/O"); resetHeight(); place("fn_input_autopick", TOGGLE_DEACTIVATE); place("angpix", TOGGLE_DEACTIVATE); current_y += STEPY/2; place("fn_refs_autopick", TOGGLE_DEACTIVATE); group1 = new Fl_Group(WCOL0, MENUHEIGHT, 550, 600-MENUHEIGHT, ""); group1->end(); place("do_ref3d", TOGGLE_DEACTIVATE, group1); group1->begin(); place("fn_ref3d_autopick", TOGGLE_DEACTIVATE); place("ref3d_symmetry", TOGGLE_DEACTIVATE); place("ref3d_sampling", TOGGLE_DEACTIVATE); group1->end(); guientries["do_ref3d"].cb_menu_i(); place("do_log", TOGGLE_DEACTIVATE); tab1->end(); tab2->begin(); tab2->label("Laplacian"); resetHeight(); place("log_diam_min", TOGGLE_DEACTIVATE); place("log_diam_max", TOGGLE_DEACTIVATE); place("log_invert", TOGGLE_DEACTIVATE); // Add a little spacer current_y += STEPY/2; place("log_maxres", TOGGLE_DEACTIVATE); place("log_adjust_thr"); place("log_upper_thr"); tab2->end(); tab3->begin(); tab3->label("References"); resetHeight(); //set up group group2 = new Fl_Group(WCOL0, MENUHEIGHT, 550, 600-MENUHEIGHT, ""); group2->end(); place("lowpass", TOGGLE_DEACTIVATE); place("highpass", TOGGLE_DEACTIVATE); place("angpix_ref", TOGGLE_DEACTIVATE); place("particle_diameter", TOGGLE_DEACTIVATE); // Add a little spacer current_y += STEPY/2; place("psi_sampling_autopick", TOGGLE_DEACTIVATE); // Add a little spacer current_y += STEPY/2; place("do_invert_refs", TOGGLE_DEACTIVATE); place("do_ctf_autopick", TOGGLE_DEACTIVATE, group2); group2->begin(); place("do_ignore_first_ctfpeak_autopick", TOGGLE_DEACTIVATE); //(current_y, "Ignore CTFs until first peak?", false,"Set this to Yes, only if this option was also used to generate the references."); group2->end(); guientries["do_ctf_autopick"].cb_menu_i(); tab3->end(); tab4->begin(); tab4->label("autopicking"); resetHeight(); place("threshold_autopick"); place("mindist_autopick"); place("maxstddevnoise_autopick"); place("minavgnoise_autopick"); current_y += STEPY/2; place("do_write_fom_maps"); place("do_read_fom_maps"); // Add a little spacer current_y += STEPY/2; // Set up queue groups for running tab place("shrink", TOGGLE_DEACTIVATE); group3 = new Fl_Group(WCOL0, MENUHEIGHT, 550, 600-MENUHEIGHT, ""); group3->end(); place("use_gpu", TOGGLE_LEAVE_ACTIVE, group3); group3->begin(); place("gpu_ids"); group3->end(); guientries["use_gpu"].cb_menu_i(); tab4->end(); tab5->begin(); tab5->label("Helix"); resetHeight(); group4 = new Fl_Group(WCOL0, MENUHEIGHT, 550, 600-MENUHEIGHT, ""); group4->end(); place("do_pick_helical_segments", TOGGLE_DEACTIVATE, group4); group4->begin(); place("do_amyloid"); place("helical_tube_outer_diameter"); current_y += STEPY/2; place("helical_nr_asu"); place("helical_rise"); current_y += STEPY/2; place("helical_tube_kappa_max"); place("helical_tube_length_min"); group4->end(); guientries["do_pick_helical_segments"].cb_menu_i(); tab5->end(); } void JobWindow::initialiseExtractWindow() { setupTabs(3); tab1->begin(); tab1->label("I/O"); resetHeight(); place("star_mics", TOGGLE_DEACTIVATE); current_y += STEPY/2; place("coords_suffix", TOGGLE_DEACTIVATE); current_y += STEPY/2; group1 = new Fl_Group(WCOL0, MENUHEIGHT, 550, 600-MENUHEIGHT, ""); group1->end(); place("do_reextract", TOGGLE_DEACTIVATE, group1); group1->begin(); place("fndata_reextract", TOGGLE_DEACTIVATE); place("do_reset_offsets", TOGGLE_DEACTIVATE); group7 = new Fl_Group(WCOL0, MENUHEIGHT, 550, 600-MENUHEIGHT, ""); group7->end(); place("do_recenter", TOGGLE_DEACTIVATE, group7); group7->begin(); place3("recenter_x","recenter_y", "recenter_z", "Recenter on - X, Y, Z (pix):", TOGGLE_DEACTIVATE); group7->end(); guientries["do_recenter"].cb_menu_i(); group1->end(); guientries["do_reextract"].cb_menu_i(); tab1->end(); tab2->begin(); tab2->label("extract"); resetHeight(); place("extract_size", TOGGLE_DEACTIVATE); //(current_y,"Particle box size (pix):", 128, 64, 512, 8, "Size of the extracted particles (in pixels). This should be an even number!"); place("do_invert", TOGGLE_DEACTIVATE); //(current_y, "Invert contrast?", true, "If set to Yes, the contrast in the particles will be inverted."); // Add a little spacer current_y += STEPY/2; group3 = new Fl_Group(WCOL0, MENUHEIGHT, 550, 600-MENUHEIGHT, ""); group3->end(); place("do_norm", TOGGLE_DEACTIVATE, group3); group3->begin(); place("bg_diameter", TOGGLE_DEACTIVATE); //(current_y, "Diameter background circle (pix): ", -1, -1, 600, 10, "Particles will be normalized to a mean value of zero and a standard-deviation of one for all pixels in the background area.\ The background area is defined as all pixels outside a circle with this given diameter in pixels (before rescaling). When specifying a negative value, a default value of 75% of the Particle box size will be used."); place("white_dust", TOGGLE_DEACTIVATE); //(current_y, "Stddev for white dust removal: ", -1, -1, 10, 0.1, "Remove very white pixels from the extracted particles. \ Pixels values higher than this many times the image stddev will be replaced with values from a Gaussian distribution. \n \n Use negative value to switch off dust removal."); place("black_dust", TOGGLE_DEACTIVATE); //(current_y, "Stddev for black dust removal: ", -1, -1, 10, 0.1, "Remove very black pixels from the extracted particles. \ Pixels values higher than this many times the image stddev will be replaced with values from a Gaussian distribution. \n \n Use negative value to switch off dust removal."); group3->end(); guientries["do_norm"].cb_menu_i(); // Add a little spacer current_y += STEPY/2; group4 = new Fl_Group(WCOL0, MENUHEIGHT, 550, 600-MENUHEIGHT, ""); group4->end(); place("do_rescale", TOGGLE_DEACTIVATE, group4); group4->begin(); place("rescale", TOGGLE_DEACTIVATE); group4->end(); guientries["do_rescale"].cb_menu_i(); tab2->end(); tab3->begin(); tab3->label("Helix"); resetHeight(); group5 = new Fl_Group(WCOL0, MENUHEIGHT, 550, 600-MENUHEIGHT, ""); group5->end(); place("do_extract_helix", TOGGLE_DEACTIVATE, group5); group5->begin(); place("helical_tube_outer_diameter", TOGGLE_DEACTIVATE); current_y += STEPY/2; place("helical_bimodal_angular_priors", TOGGLE_DEACTIVATE); group6 = new Fl_Group(WCOL0, MENUHEIGHT, 550, 600-MENUHEIGHT, ""); group6->end(); current_y += STEPY/2; place("do_extract_helical_tubes", TOGGLE_DEACTIVATE, group6); group6->begin(); place("do_cut_into_segments", TOGGLE_DEACTIVATE); place("helical_nr_asu", TOGGLE_DEACTIVATE); place("helical_rise", TOGGLE_DEACTIVATE); group6->end(); guientries["do_extract_helical_tubes"].cb_menu_i(); group5->end(); guientries["do_extract_helix"].cb_menu_i(); tab3->end(); } void JobWindow::initialiseSelectWindow() { setupTabs(4); tab1->begin(); tab1->label("I/O"); resetHeight(); place("fn_model", TOGGLE_DEACTIVATE); place("fn_mic", TOGGLE_DEACTIVATE); place("fn_data", TOGGLE_DEACTIVATE); place("fn_coords", TOGGLE_DEACTIVATE); tab1->end(); tab2->begin(); tab2->label("Class options"); resetHeight(); place("do_recenter", TOGGLE_DEACTIVATE); group1 = new Fl_Group(WCOL0, MENUHEIGHT, 550, 600-MENUHEIGHT, ""); group1->end(); place("do_regroup", TOGGLE_DEACTIVATE, group1); group1->begin(); place("nr_groups", TOGGLE_DEACTIVATE); group1->end(); guientries["do_regroup"].cb_menu_i(); tab2->end(); tab3->begin(); tab3->label("Subsets"); resetHeight(); group3 = new Fl_Group(WCOL0, MENUHEIGHT, 550, 600-MENUHEIGHT, ""); group3->end(); place("do_select_values", TOGGLE_DEACTIVATE, group3); group3->begin(); place("select_label", TOGGLE_DEACTIVATE); place("select_minval", TOGGLE_DEACTIVATE); place("select_maxval", TOGGLE_DEACTIVATE); group3->end(); guientries["do_select_values"].cb_menu_i(); group4 = new Fl_Group(WCOL0, MENUHEIGHT, 550, 600-MENUHEIGHT, ""); group4->end(); // Add a little spacer current_y += STEPY/2; place("do_discard", TOGGLE_DEACTIVATE, group4); group4->begin(); place("discard_label", TOGGLE_DEACTIVATE); place("discard_sigma", TOGGLE_DEACTIVATE); group4->end(); guientries["do_discard"].cb_menu_i(); group5 = new Fl_Group(WCOL0, MENUHEIGHT, 550, 600-MENUHEIGHT, ""); group5->end(); // Add a little spacer current_y += STEPY/2; place("do_split", TOGGLE_DEACTIVATE, group5); group5->begin(); place("do_random", TOGGLE_DEACTIVATE); place("split_size", TOGGLE_DEACTIVATE); place("nr_split", TOGGLE_DEACTIVATE); group5->end(); guientries["do_split"].cb_menu_i(); tab3->end(); tab4->begin(); tab4->label("Duplicates"); resetHeight(); group2 = new Fl_Group(WCOL0, MENUHEIGHT, 550, 600-MENUHEIGHT, ""); group2->end(); place("do_remove_duplicates", TOGGLE_DEACTIVATE, group2); group2->begin(); place("duplicate_threshold", TOGGLE_DEACTIVATE); place("image_angpix", TOGGLE_DEACTIVATE); group2->end(); guientries["do_remove_duplicates"].cb_menu_i(); tab4->end(); // Always deactivate the queue option guientries["do_queue"].deactivate_option = TOGGLE_ALWAYS_DEACTIVATE; myjob.joboptions["do_queue"].setString("No"); } void JobWindow::initialiseClass2DWindow() { setupTabs(6); tab1->begin(); tab1->label("I/O"); resetHeight(); place("fn_img", TOGGLE_DEACTIVATE); place("fn_cont", TOGGLE_REACTIVATE); tab1->end(); tab2->begin(); tab2->label("CTF"); group1 = new Fl_Group(WCOL0, MENUHEIGHT, 550, 600-MENUHEIGHT, ""); group1->end(); resetHeight(); place("do_ctf_correction", TOGGLE_DEACTIVATE, group1); group1->begin(); place("ctf_intact_first_peak", TOGGLE_DEACTIVATE); group1->end(); guientries["do_ctf_correction"].cb_menu_i(); // To make default effective tab2->end(); tab3->begin(); tab3->label("Optimisation"); resetHeight(); //set up groups group2 = new Fl_Group(WCOL0, MENUHEIGHT, 550, 600-MENUHEIGHT, ""); group2->end(); place("nr_classes", TOGGLE_DEACTIVATE); place("tau_fudge"); // Add a little spacer current_y += STEPY/2; place("nr_iter"); place("do_fast_subsets", TOGGLE_DEACTIVATE); // Add a little spacer current_y += STEPY/2; place("particle_diameter"); place("do_zero_mask", TOGGLE_DEACTIVATE); // Add a little spacer current_y += STEPY/2; place("highres_limit", TOGGLE_DEACTIVATE); // Add a little spacer current_y += STEPY/2; tab3->end(); tab4->begin(); tab4->label("Sampling"); //set up groups group3 = new Fl_Group(WCOL0, MENUHEIGHT, 550, 600-MENUHEIGHT, ""); group3->end(); resetHeight(); place("dont_skip_align", TOGGLE_LEAVE_ACTIVE, group3); group3->begin(); place("psi_sampling"); place("offset_range"); place("offset_step"); current_y += STEPY/2; place("allow_coarser"); group3->end(); guientries["dont_skip_align"].cb_menu_i(); // to make default effective tab4->end(); tab5->begin(); tab5->label("Helix"); resetHeight(); group4 = new Fl_Group(WCOL0, MENUHEIGHT, 550, 600-MENUHEIGHT, ""); group4->end(); place("do_helix", TOGGLE_DEACTIVATE, group4); group4->begin(); place("helical_tube_outer_diameter"); place("do_bimodal_psi"); place("range_psi"); group7 = new Fl_Group(WCOL0, MENUHEIGHT, 550, 600-MENUHEIGHT, ""); group7->end(); place("do_restrict_xoff", TOGGLE_LEAVE_ACTIVE, group7); group7->begin(); place("helical_rise", TOGGLE_LEAVE_ACTIVE); group7->end(); guientries["do_restrict_xoff"].cb_menu_i(); group4->end(); guientries["do_helix"].cb_menu_i(); // to make default effective tab5->end(); tab6->begin(); tab6->label("Compute"); resetHeight(); place("do_parallel_discio"); place("nr_pool"); group5 = new Fl_Group(WCOL0, MENUHEIGHT, 550, 600-MENUHEIGHT, ""); group5->end(); place("do_preread_images", TOGGLE_LEAVE_ACTIVE, group5, true); group5->begin(); place("scratch_dir"); group5->end(); place("do_combine_thru_disc"); // Add a little spacer current_y += STEPY/2; // Set up queue groups for running tab group6 = new Fl_Group(WCOL0, MENUHEIGHT, 550, 600-MENUHEIGHT, ""); group6->end(); place("use_gpu", TOGGLE_LEAVE_ACTIVE, group6); group6->begin(); place("gpu_ids", TOGGLE_LEAVE_ACTIVE); group6->end(); guientries["use_gpu"].cb_menu_i(); tab6->end(); } void JobWindow::initialiseInimodelWindow() { setupTabs(5); tab1->begin(); tab1->label("I/O"); resetHeight(); place("fn_img", TOGGLE_DEACTIVATE); place("fn_cont", TOGGLE_REACTIVATE); tab1->end(); tab2->begin(); tab2->label("CTF"); group1 = new Fl_Group(WCOL0, MENUHEIGHT, 550, 600-MENUHEIGHT, ""); group1->end(); resetHeight(); #ifdef ALLOW_CTF_IN_SGD place("do_ctf_correction", TOGGLE_DEACTIVATE, group1); group1->begin(); place("ctf_intact_first_peak", TOGGLE_DEACTIVATE); group1->end(); guientries["do_ctf_correction"].cb_menu_i(); // To make default effective #else Fl_Text_Buffer *textbuff1 = new Fl_Text_Buffer(); textbuff1->text("CTF-modulation, as mentioned in claim 1 of patent US10,282,513B2, is disabled\nYou can enable it by rebuilding, using -DALLOW_CTF_IN_SGD=ON in cmake."); Fl_Text_Display* textdisp1 = new Fl_Text_Display(XCOL1, current_y, WCOL1+WCOL2+WCOL3+10, STEPY*1.8); textdisp1->textsize(11); textdisp1->color(GUI_BACKGROUND_COLOR); textdisp1->buffer(textbuff1); current_y += STEPY*2.5; place("do_ctf_correction", TOGGLE_ALWAYS_DEACTIVATE); group1->begin(); place("ctf_phase_flipped", TOGGLE_ALWAYS_DEACTIVATE); place("ctf_intact_first_peak", TOGGLE_ALWAYS_DEACTIVATE); group1->end(); #endif tab2->end(); tab3->begin(); tab3->label("Optimisation"); resetHeight(); place("nr_classes", TOGGLE_DEACTIVATE); // Add a little spacer current_y += STEPY/2; place("particle_diameter"); place("do_solvent", TOGGLE_DEACTIVATE); place("sym_name", TOGGLE_DEACTIVATE); // Add a little spacer current_y += STEPY/2; place("sampling"); place("offset_range"); place("offset_step"); tab3->end(); tab4->begin(); tab4->label("SGD"); resetHeight(); place("sgd_ini_iter"); place("sgd_inbetween_iter"); place("sgd_fin_iter"); place("sgd_write_iter"); // Add a little spacer current_y += STEPY/2; place("sgd_ini_resol"); place("sgd_fin_resol"); // Add a little spacer current_y += STEPY/2; place("sgd_ini_subset_size"); place("sgd_fin_subset_size"); // Add a little spacer current_y += STEPY/2; place("sgd_sigma2fudge_halflife", TOGGLE_DEACTIVATE); tab4->end(); tab5->begin(); tab5->label("Compute"); resetHeight(); place("do_parallel_discio"); place("nr_pool"); place("do_pad1"); place("skip_gridding"); group5 = new Fl_Group(WCOL0, MENUHEIGHT, 550, 600-MENUHEIGHT, ""); group5->end(); place("do_preread_images", TOGGLE_LEAVE_ACTIVE, group5, true); group5->begin(); place("scratch_dir"); group5->end(); place("do_combine_thru_disc"); // Add a little spacer current_y += STEPY/2; // Set up queue groups for running tab group6 = new Fl_Group(WCOL0, MENUHEIGHT, 550, 600-MENUHEIGHT, ""); group6->end(); place("use_gpu", TOGGLE_LEAVE_ACTIVE, group6); group6->begin(); place("gpu_ids", TOGGLE_LEAVE_ACTIVE); group5->end(); guientries["use_gpu"].cb_menu_i(); tab5->end(); } void JobWindow::initialiseClass3DWindow() { setupTabs(7); tab1->begin(); tab1->label("I/O"); resetHeight(); place("fn_img", TOGGLE_DEACTIVATE); place("fn_cont", TOGGLE_REACTIVATE); place("fn_ref", TOGGLE_DEACTIVATE); place("fn_mask"); tab1->end(); tab2->begin(); tab2->label("Reference"); resetHeight(); place("ref_correct_greyscale", TOGGLE_DEACTIVATE); place("ini_high", TOGGLE_DEACTIVATE); // Add a little spacer current_y += STEPY/2; place("sym_name", TOGGLE_DEACTIVATE); tab2->end(); tab3->begin(); tab3->label("CTF"); group1 = new Fl_Group(WCOL0, MENUHEIGHT, 550, 600-MENUHEIGHT, ""); group1->end(); resetHeight(); place("do_ctf_correction", TOGGLE_DEACTIVATE, group1); group1->begin(); place("ctf_corrected_ref", TOGGLE_DEACTIVATE); place("ctf_intact_first_peak", TOGGLE_DEACTIVATE); group1->end(); guientries["do_ctf_correction"].cb_menu_i(); // To make default effective tab3->end(); tab4->begin(); tab4->label("Optimisation"); resetHeight(); //set up groups group2 = new Fl_Group(WCOL0, MENUHEIGHT, 550, 600-MENUHEIGHT, ""); group2->end(); place("nr_classes", TOGGLE_DEACTIVATE); place("tau_fudge"); // Add a little spacer current_y += STEPY/2; place("nr_iter"); place("do_fast_subsets", TOGGLE_DEACTIVATE); // Add a little spacer current_y += STEPY/2; place("particle_diameter"); place("do_zero_mask", TOGGLE_DEACTIVATE); // Add a little spacer current_y += STEPY/2; place("highres_limit", TOGGLE_DEACTIVATE); tab4->end(); tab5->begin(); tab5->label("Sampling"); //set up groups group3 = new Fl_Group(WCOL0, MENUHEIGHT, 550, 600-MENUHEIGHT, ""); group3->end(); resetHeight(); place("dont_skip_align", TOGGLE_LEAVE_ACTIVE, group3); group3->begin(); place("sampling"); place("offset_range"); place("offset_step"); group4 = new Fl_Group(WCOL0, MENUHEIGHT, 550, 600-MENUHEIGHT, ""); group4->end(); place("do_local_ang_searches", TOGGLE_LEAVE_ACTIVE, group4); group4->begin(); place("sigma_angles"); place("relax_sym"); group4->end(); guientries["do_local_ang_searches"].cb_menu_i(); // to make default effective current_y += STEPY/2; place("allow_coarser"); group3->end(); guientries["dont_skip_align"].cb_menu_i(); // to make default effective tab5->end(); tab6->begin(); tab6->label("Helix"); resetHeight(); group5 = new Fl_Group(WCOL0, MENUHEIGHT, 550, 600-MENUHEIGHT, ""); group5->end(); //helix_text", TOGGLE_DEACTIVATE); //(current_y, "Nov 21, 2015"); place("do_helix", TOGGLE_DEACTIVATE, group5); group5->begin(); place2("helical_tube_inner_diameter", "helical_tube_outer_diameter", "Tube diameter - inner, outer (A):", TOGGLE_DEACTIVATE); place3("range_rot", "range_tilt", "range_psi", "Angular search range - rot, tilt, psi (deg):", TOGGLE_DEACTIVATE); place("helical_range_distance", TOGGLE_DEACTIVATE); place("keep_tilt_prior_fixed", TOGGLE_DEACTIVATE); // Add a little spacer current_y += STEPY/2; group8 = new Fl_Group(WCOL0, MENUHEIGHT, 550, 600-MENUHEIGHT, ""); group8->end(); place("do_apply_helical_symmetry", TOGGLE_DEACTIVATE, group8); group8->begin(); place("helical_nr_asu", TOGGLE_DEACTIVATE); place2("helical_twist_initial", "helical_rise_initial", "Initial twist (deg), rise (A):", TOGGLE_DEACTIVATE); place("helical_z_percentage", TOGGLE_DEACTIVATE); group8->end(); guientries["do_apply_helical_symmetry"].cb_menu_i(); // to make default effective // Add a little spacer current_y += STEPY/2; group6 = new Fl_Group(WCOL0, MENUHEIGHT, 550, 600-MENUHEIGHT, ""); group6->end(); place("do_local_search_helical_symmetry", TOGGLE_DEACTIVATE, group6); group6->begin(); place3("helical_twist_min","helical_twist_max", "helical_twist_inistep", "Twist search - Min, Max, Step (deg):", TOGGLE_DEACTIVATE); place3("helical_rise_min", "helical_rise_max", "helical_rise_inistep", "Rise search - Min, Max, Step (A):", TOGGLE_DEACTIVATE); group6->end(); guientries["do_local_search_helical_symmetry"].cb_menu_i(); // to make default effective group5->end(); guientries["do_helix"].cb_menu_i(); // to make default effective tab6->end(); tab7->begin(); tab7->label("Compute"); resetHeight(); place("do_parallel_discio"); place("nr_pool"); place("do_pad1"); place("skip_gridding"); group7 = new Fl_Group(WCOL0, MENUHEIGHT, 550, 600-MENUHEIGHT, ""); group7->end(); place("do_preread_images", TOGGLE_LEAVE_ACTIVE, group7, true); group7->begin(); place("scratch_dir"); group7->end(); place("do_combine_thru_disc"); // Add a little spacer current_y += STEPY/2; // Set up queue groups for running tab group8 = new Fl_Group(WCOL0, MENUHEIGHT, 550, 600-MENUHEIGHT, ""); group8->end(); place("use_gpu", TOGGLE_LEAVE_ACTIVE, group8); group8->begin(); place("gpu_ids"); group8->end(); guientries["use_gpu"].cb_menu_i(); // This is to make the default effective tab7->end(); } void JobWindow::initialiseAutorefineWindow() { setupTabs(7); tab1->begin(); tab1->label("I/O"); resetHeight(); place("fn_img", TOGGLE_DEACTIVATE); place("fn_cont", TOGGLE_REACTIVATE); place("fn_ref", TOGGLE_DEACTIVATE); place("fn_mask"); tab1->end(); tab2->begin(); tab2->label("Reference"); resetHeight(); place("ref_correct_greyscale", TOGGLE_DEACTIVATE); place("ini_high", TOGGLE_DEACTIVATE); // Add a little spacer current_y += STEPY/2; place("sym_name", TOGGLE_DEACTIVATE); tab2->end(); tab3->begin(); tab3->label("CTF"); group1 = new Fl_Group(WCOL0, MENUHEIGHT, 550, 600-MENUHEIGHT, ""); group1->end(); resetHeight(); place("do_ctf_correction", TOGGLE_DEACTIVATE, group1); group1->begin(); place("ctf_corrected_ref", TOGGLE_DEACTIVATE); place("ctf_intact_first_peak", TOGGLE_DEACTIVATE); group1->end(); guientries["do_ctf_correction"].cb_menu_i(); // To make default effective tab3->end(); tab4->begin(); tab4->label("Optimisation"); resetHeight(); place("particle_diameter"); place("do_zero_mask", TOGGLE_DEACTIVATE); // Add a little spacer current_y += STEPY/2; place("do_solvent_fsc"); tab4->end(); tab5->begin(); tab5->label("Auto-sampling"); resetHeight(); place("sampling", TOGGLE_DEACTIVATE); place("offset_range", TOGGLE_DEACTIVATE); place("offset_step", TOGGLE_DEACTIVATE); current_y += STEPY/2; place("auto_local_sampling", TOGGLE_DEACTIVATE); place("relax_sym"); current_y += STEPY/2; place("auto_faster"); tab5->end(); tab6->begin(); tab6->label("Helix"); resetHeight(); group2 = new Fl_Group(WCOL0, MENUHEIGHT, 550, 600-MENUHEIGHT, ""); group2->end(); place("do_helix", TOGGLE_DEACTIVATE, group2); group2->begin(); place2("helical_tube_inner_diameter", "helical_tube_outer_diameter", "Tube diameter - inner, outer (A):",TOGGLE_DEACTIVATE); place3("range_rot", "range_tilt", "range_psi", "Angular search range - rot, tilt, psi (deg):", TOGGLE_DEACTIVATE); place("helical_range_distance", TOGGLE_DEACTIVATE); place("keep_tilt_prior_fixed", TOGGLE_DEACTIVATE); // Add a little spacer current_y += STEPY/2; group5 = new Fl_Group(WCOL0, MENUHEIGHT, 550, 600-MENUHEIGHT, ""); group5->end(); place("do_apply_helical_symmetry", TOGGLE_DEACTIVATE, group5); group5->begin(); place("helical_nr_asu", TOGGLE_DEACTIVATE); place2("helical_twist_initial", "helical_rise_initial", "Initial twist (deg), rise (A):",TOGGLE_DEACTIVATE); place("helical_z_percentage", TOGGLE_DEACTIVATE); group5->end(); guientries["do_apply_helical_symmetry"].cb_menu_i(); // to make default effective // Add a little spacer current_y += STEPY/2; group3 = new Fl_Group(WCOL0, MENUHEIGHT, 550, 600-MENUHEIGHT, ""); group3->end(); place("do_local_search_helical_symmetry", TOGGLE_DEACTIVATE, group3); group3->begin(); place3("helical_twist_min", "helical_twist_max", "helical_twist_inistep", "Twist search - Min, Max, Step (deg):", TOGGLE_DEACTIVATE); place3("helical_rise_min", "helical_rise_max","helical_rise_inistep","Rise search - Min, Max, Step (A):", TOGGLE_DEACTIVATE); group3->end(); guientries["do_local_search_helical_symmetry"].cb_menu_i(); // to make default effective group2->end(); guientries["do_helix"].cb_menu_i(); // to make default effective tab6->end(); tab7->begin(); tab7->label("Compute"); resetHeight(); place("do_parallel_discio"); place("nr_pool"); place("do_pad1"); place("skip_gridding"); group4 = new Fl_Group(WCOL0, MENUHEIGHT, 550, 600-MENUHEIGHT, ""); group4->end(); place("do_preread_images", TOGGLE_LEAVE_ACTIVE, group4, true); group4->begin(); place("scratch_dir"); group4->end(); place("do_combine_thru_disc"); // Add a little spacer current_y += STEPY/2; // Set up queue groups for running tab group5 = new Fl_Group(WCOL0, MENUHEIGHT, 550, 600-MENUHEIGHT, ""); group5->end(); place("use_gpu", TOGGLE_LEAVE_ACTIVE, group5); group5->begin(); place("gpu_ids"); group5->end(); guientries["use_gpu"].cb_menu_i(); // This is to make the default effective tab7->end(); } void JobWindow::initialiseMultiBodyWindow() { setupTabs(4); tab1->begin(); tab1->label("I/O"); resetHeight(); place("fn_in", TOGGLE_DEACTIVATE); place("fn_cont", TOGGLE_REACTIVATE); place("fn_bodies", TOGGLE_DEACTIVATE); // Add a little spacer current_y += STEPY/2; place("do_subtracted_bodies", TOGGLE_DEACTIVATE); tab1->end(); tab2->begin(); tab2->label("Auto-sampling"); resetHeight(); place("sampling", TOGGLE_DEACTIVATE); place("offset_range", TOGGLE_DEACTIVATE); place("offset_step", TOGGLE_DEACTIVATE); tab2->end(); tab3->begin(); tab3->label("Analyse"); resetHeight(); group5 = new Fl_Group(WCOL0, MENUHEIGHT, 550, 600-MENUHEIGHT, ""); group5->end(); place("do_analyse", TOGGLE_LEAVE_ACTIVE, group5); group5->begin(); place("nr_movies"); group6 = new Fl_Group(WCOL0, MENUHEIGHT, 550, 600-MENUHEIGHT, ""); group6->end(); place("do_select", TOGGLE_LEAVE_ACTIVE, group6); group6->begin(); place("select_eigenval"); place("eigenval_min"); place("eigenval_max"); group6->end(); guientries["do_select"].cb_menu_i(); // This is to make the default effective group5->end(); guientries["do_analyse"].cb_menu_i(); // This is to make the default effective tab3->end(); tab4->begin(); tab4->label("Compute"); resetHeight(); place("do_parallel_discio"); place("nr_pool"); place("do_pad1"); place("skip_gridding"); group7 = new Fl_Group(WCOL0, MENUHEIGHT, 550, 600-MENUHEIGHT, ""); group7->end(); place("do_preread_images", TOGGLE_LEAVE_ACTIVE, group7, true); group7->begin(); place("scratch_dir"); group7->end(); place("do_combine_thru_disc"); // Add a little spacer current_y += STEPY/2; // Set up queue groups for running tab group4 = new Fl_Group(WCOL0, MENUHEIGHT, 550, 600-MENUHEIGHT, ""); group4->end(); place("use_gpu", TOGGLE_LEAVE_ACTIVE, group4); group4->begin(); place("gpu_ids"); group4->end(); guientries["use_gpu"].cb_menu_i(); // This is to make the default effective tab4->end(); } void JobWindow::initialiseMaskcreateWindow() { setupTabs(3); tab1->begin(); tab1->label("I/O"); resetHeight(); place("fn_in", TOGGLE_DEACTIVATE); //(current_y, "Input 3D map:", NODE_3DREF, "", "MRC map files (*.mrc)", "Provide an input MRC map from which to start binarizing the map."); tab1->end(); tab2->begin(); tab2->label("Mask"); resetHeight(); place("lowpass_filter"); place("angpix"); // Add a little spacer current_y += STEPY/2; place("inimask_threshold"); place("extend_inimask"); place("width_mask_edge"); tab2->end(); tab3->begin(); tab3->label("Helix"); resetHeight(); group1 = new Fl_Group(WCOL0, MENUHEIGHT, 550, 600-MENUHEIGHT, ""); group1->end(); place("do_helix", TOGGLE_LEAVE_ACTIVE, group1); group1->begin(); place("helical_z_percentage"); group1->end(); guientries["do_helix"].cb_menu_i(); // to make default effective tab3->end(); } void JobWindow::initialiseJoinstarWindow() { setupTabs(3); tab1->begin(); tab1->label("particles"); resetHeight(); group1 = new Fl_Group(WCOL0, MENUHEIGHT, 550, 600-MENUHEIGHT, ""); group1->end(); place("do_part", TOGGLE_DEACTIVATE, group1); group1->begin(); place("fn_part1", TOGGLE_DEACTIVATE); place("fn_part2", TOGGLE_DEACTIVATE); place("fn_part3", TOGGLE_DEACTIVATE); place("fn_part4", TOGGLE_DEACTIVATE); group1->end(); guientries["do_part"].cb_menu_i(); // make default active tab1->end(); tab2->begin(); tab2->label("micrographs"); resetHeight(); group2 = new Fl_Group(WCOL0, MENUHEIGHT, 550, 600-MENUHEIGHT, ""); group2->end(); place("do_mic", TOGGLE_DEACTIVATE, group2); group2->begin(); place("fn_mic1", TOGGLE_DEACTIVATE); place("fn_mic2", TOGGLE_DEACTIVATE); place("fn_mic3", TOGGLE_DEACTIVATE); place("fn_mic4", TOGGLE_DEACTIVATE); group2->end(); guientries["do_mic"].cb_menu_i(); // make default active tab2->end(); tab3->begin(); tab3->label("movies"); resetHeight(); group3 = new Fl_Group(WCOL0, MENUHEIGHT, 550, 600-MENUHEIGHT, ""); group3->end(); place("do_mov", TOGGLE_DEACTIVATE, group3); //(current_y, "Combine movie STAR files?", false, "", mov_group); group3->begin(); place("fn_mov1", TOGGLE_DEACTIVATE); place("fn_mov2", TOGGLE_DEACTIVATE); place("fn_mov3", TOGGLE_DEACTIVATE); place("fn_mov4", TOGGLE_DEACTIVATE); group3->end(); guientries["do_mov"].cb_menu_i(); // make default active tab3->end(); } void JobWindow::initialiseSubtractWindow() { setupTabs(2); tab1->begin(); tab1->label("I/O"); resetHeight(); place("fn_opt", TOGGLE_DEACTIVATE); place("fn_mask", TOGGLE_DEACTIVATE); group1 = new Fl_Group(WCOL0, MENUHEIGHT, 550, 600-MENUHEIGHT, ""); group1->end(); place("do_data", TOGGLE_DEACTIVATE, group1); group1->begin(); place("fn_data", TOGGLE_DEACTIVATE); group1->end(); guientries["do_data"].cb_menu_i(); // make default active current_y += STEPY/2; group2 = new Fl_Group(WCOL0, MENUHEIGHT, 550, 600-MENUHEIGHT, ""); group2->end(); place("do_fliplabel", TOGGLE_DEACTIVATE, group2); group2->begin(); place("fn_fliplabel", TOGGLE_DEACTIVATE); group2->end(); guientries["do_fliplabel"].cb_menu_i(); // make default active tab1->end(); tab2->begin(); tab2->label("Centering"); resetHeight(); group3 = new Fl_Group(WCOL0, MENUHEIGHT, 550, 600-MENUHEIGHT, ""); group3->end(); place("do_center_mask", TOGGLE_DEACTIVATE, group3, true); group3->begin(); group4 = new Fl_Group(WCOL0, MENUHEIGHT, 550, 600-MENUHEIGHT, ""); group4->end(); place("do_center_xyz", TOGGLE_DEACTIVATE, group4); group4->begin(); place3("center_x", "center_y", "center_z", "Center coordinate - X, Y, Z (pix):", TOGGLE_DEACTIVATE); group4->end(); guientries["do_center_xyz"].cb_menu_i(); // To make default effective group3->end(); guientries["do_center_mask"].cb_menu_i(); // To make default effective current_y += STEPY/2; place("new_box", TOGGLE_DEACTIVATE); tab2->end(); } void JobWindow::initialisePostprocessWindow() { setupTabs(3); tab1->begin(); tab1->label("I/O"); resetHeight(); place("fn_in", TOGGLE_DEACTIVATE); //(current_y, "One of the 2 unfiltered half-maps:", NODE_HALFMAP, "", "MRC map files (*half1_class001_unfil.mrc)", "Provide one of the two unfiltered half-reconstructions that were output upon convergence of a 3D auto-refine run."); place("fn_mask", TOGGLE_DEACTIVATE); //(current_y, "Solvent mask:", NODE_MASK, "", "Image Files (*.{spi,vol,msk,mrc})", "Provide a soft mask where the protein is white (1) and the solvent is black (0). Often, the softer the mask the higher resolution estimates you will get. A soft edge of 5-10 pixels is often a good edge width."); current_y += STEPY/2; place("angpix"); tab1->end(); tab2->begin(); tab2->label("Sharpen"); resetHeight(); group1 = new Fl_Group(WCOL0, MENUHEIGHT, 550, 600-MENUHEIGHT, ""); group1->end(); place("do_auto_bfac", TOGGLE_LEAVE_ACTIVE, group1); group1->begin(); place("autob_lowres"); group1->end(); guientries["do_auto_bfac"].cb_menu_i(); group2 = new Fl_Group(WCOL0, MENUHEIGHT, 550, 600-MENUHEIGHT, ""); group2->end(); place("do_adhoc_bfac", TOGGLE_LEAVE_ACTIVE, group2); group2->begin(); place("adhoc_bfac"); group2->end(); guientries["do_adhoc_bfac"].cb_menu_i(); current_y += STEPY/2; place("fn_mtf"); place("mtf_angpix"); tab2->end(); tab3->begin(); tab3->label("Filter"); resetHeight(); group3 = new Fl_Group(WCOL0, MENUHEIGHT, 550, 600-MENUHEIGHT, ""); group3->end(); place("do_skip_fsc_weighting", TOGGLE_LEAVE_ACTIVE, group3); group3->begin(); place("low_pass"); group3->end(); guientries["do_skip_fsc_weighting"].cb_menu_i(); tab3->end(); } void JobWindow::initialiseLocresWindow() { setupTabs(3); tab1->begin(); tab1->label("I/O"); resetHeight(); place("fn_in", TOGGLE_DEACTIVATE); place("fn_mask"); current_y += STEPY/2; place("angpix", TOGGLE_DEACTIVATE); tab1->end(); tab2->begin(); tab2->label("ResMap"); resetHeight(); group1 = new Fl_Group(WCOL0, MENUHEIGHT, 550, 600-MENUHEIGHT, ""); group1->end(); place("do_resmap_locres", TOGGLE_DEACTIVATE, group1); group1->begin(); place("fn_resmap", TOGGLE_DEACTIVATE); current_y += STEPY /2 ; place("pval", TOGGLE_DEACTIVATE); place("minres", TOGGLE_DEACTIVATE); place("maxres", TOGGLE_DEACTIVATE); place("stepres", TOGGLE_DEACTIVATE); group1->end(); guientries["do_resmap_locres"].cb_menu_i(); tab2->end(); tab3->begin(); tab3->label("Relion"); resetHeight(); group2 = new Fl_Group(WCOL0, MENUHEIGHT, 550, 600-MENUHEIGHT, ""); group2->end(); place("do_relion_locres", TOGGLE_DEACTIVATE, group2); group2->begin(); //place("locres_sampling", TOGGLE_DEACTIVATE); //place("randomize_at", TOGGLE_DEACTIVATE); //current_y += STEPY /2 ; place("adhoc_bfac", TOGGLE_DEACTIVATE); place("fn_mtf", TOGGLE_DEACTIVATE); group2->end(); guientries["do_relion_locres"].cb_menu_i(); tab3->end(); } void JobWindow::initialiseMotionrefineWindow() { setupTabs(3); tab1->begin(); tab1->label("I/O"); resetHeight(); // I/O place("fn_mic", TOGGLE_DEACTIVATE); place("fn_data", TOGGLE_DEACTIVATE); place("fn_post", TOGGLE_DEACTIVATE); current_y += STEPY /2 ; place("first_frame", TOGGLE_DEACTIVATE); place("last_frame", TOGGLE_DEACTIVATE); current_y += STEPY /2 ; place("extract_size", TOGGLE_DEACTIVATE); place("rescale", TOGGLE_DEACTIVATE); tab1->end(); tab2->begin(); tab2->label("Train"); resetHeight(); // Train for optimal parameters group2 = new Fl_Group(WCOL0, MENUHEIGHT, 550, 600-MENUHEIGHT, ""); group2->end(); place("do_param_optim", TOGGLE_LEAVE_ACTIVE, group2); group2->begin(); place("eval_frac"); place("optim_min_part"); group2->end(); guientries["do_param_optim"].cb_menu_i(); tab2->end(); tab3->begin(); tab3->label("Polish"); resetHeight(); // Polishing group1 = new Fl_Group(WCOL0, MENUHEIGHT, 550, 600-MENUHEIGHT, ""); group1->end(); place("do_polish", TOGGLE_DEACTIVATE, group1); current_y += STEPY /2 ; group1->begin(); place("opt_params", TOGGLE_DEACTIVATE); group4 = new Fl_Group(WCOL0, MENUHEIGHT, 550, 600-MENUHEIGHT, ""); group4->end(); place("do_own_params", TOGGLE_DEACTIVATE, group4); group4->begin(); place("sigma_vel", TOGGLE_DEACTIVATE); place("sigma_div", TOGGLE_DEACTIVATE); place("sigma_acc", TOGGLE_DEACTIVATE); group4->end(); guientries["do_own_params"].cb_menu_i(); current_y += STEPY /2 ; place("minres", TOGGLE_DEACTIVATE); place("maxres", TOGGLE_DEACTIVATE); tab3->end(); } void JobWindow::initialiseCtfrefineWindow() { setupTabs(2); tab1->begin(); tab1->label("I/O"); resetHeight(); // I/O place("fn_data", TOGGLE_DEACTIVATE); place("fn_post", TOGGLE_DEACTIVATE); tab1->end(); tab2->begin(); tab2->label("Fit"); resetHeight(); group3 = new Fl_Group(WCOL0, MENUHEIGHT, 550, 600-MENUHEIGHT, ""); group3->end(); place("do_aniso_mag", TOGGLE_LEAVE_ACTIVE, group3, true); //true means: activating aniso_mag will deactive higher-order aberrations current_y += STEPY /2 ; group3->begin(); group1 = new Fl_Group(WCOL0, MENUHEIGHT, 550, 600-MENUHEIGHT, ""); group1->end(); place("do_ctf", TOGGLE_LEAVE_ACTIVE, group1); group1->begin(); place("do_defocus", TOGGLE_LEAVE_ACTIVE); place("do_astig", TOGGLE_LEAVE_ACTIVE); place("do_bfactor", TOGGLE_LEAVE_ACTIVE); place("do_phase", TOGGLE_LEAVE_ACTIVE); group1->end(); guientries["do_ctf"].cb_menu_i(); current_y += STEPY /2 ; group4 = new Fl_Group(WCOL0, MENUHEIGHT, 550, 600-MENUHEIGHT, ""); group4->end(); place("do_tilt", TOGGLE_LEAVE_ACTIVE, group4); group4->begin(); place("do_trefoil", TOGGLE_LEAVE_ACTIVE); group4->end(); guientries["do_tilt"].cb_menu_i(); current_y += STEPY /2 ; place("do_4thorder", TOGGLE_LEAVE_ACTIVE); group3->end(); guientries["do_aniso_mag"].cb_menu_i(); current_y += STEPY /2 ; place("minres", TOGGLE_DEACTIVATE); tab2->end(); } void JobWindow::initialiseExternalWindow() { setupTabs(2); tab1->begin(); tab1->label("Input"); resetHeight(); // I/O place("fn_exe", TOGGLE_DEACTIVATE); current_y += STEPY /2 ; place("in_mov", TOGGLE_DEACTIVATE); place("in_mic", TOGGLE_DEACTIVATE); place("in_part", TOGGLE_DEACTIVATE); place("in_coords", TOGGLE_DEACTIVATE); place("in_3dref", TOGGLE_DEACTIVATE); place("in_mask", TOGGLE_DEACTIVATE); tab1->end(); tab2->begin(); tab2->label("Params"); resetHeight(); place2("param1_label", "param1_value", "Param1 label, value:", TOGGLE_LEAVE_ACTIVE); place2("param2_label", "param2_value", "Param2 label, value:", TOGGLE_LEAVE_ACTIVE); place2("param3_label", "param3_value", "Param3 label, value:", TOGGLE_LEAVE_ACTIVE); place2("param4_label", "param4_value", "Param4 label, value:", TOGGLE_LEAVE_ACTIVE); place2("param5_label", "param5_value", "Param5 label, value:", TOGGLE_LEAVE_ACTIVE); place2("param6_label", "param6_value", "Param6 label, value:", TOGGLE_LEAVE_ACTIVE); place2("param7_label", "param7_value", "Param7 label, value:", TOGGLE_LEAVE_ACTIVE); place2("param8_label", "param8_value", "Param8 label, value:", TOGGLE_LEAVE_ACTIVE); place2("param9_label", "param9_value", "Param9 label, value:", TOGGLE_LEAVE_ACTIVE); place2("param10_label", "param10_value", "Param10 label, value:", TOGGLE_LEAVE_ACTIVE); tab2->end(); } relion-3.1.3/src/gui_jobwindow.h000066400000000000000000000101661411340063500166150ustar00rootroot00000000000000/*************************************************************************** * * Author: "Sjors H.W. Scheres" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #ifndef SRC_GUI_JOBWINDOW_H_ #define SRC_GUI_JOBWINDOW_H_ #include "src/pipeline_jobs.h" #include "src/gui_entries.h" class JobWindow : public Fl_Box { protected: // Which process type is this? bool is_continue; public: // Which job is this for? RelionJob myjob; // All the GuiEntries of this job std::map guientries; // oldstyle GUI bool do_oldstyle; // Sizes int x, y, w, h; // Position of current cursor to place new elements int start_y, current_y; // Tabs Fl_Tabs *tabs; Fl_Group *tab1, *tab2, *tab3, *tab4, *tab5, *tab6, *tab7, *runtab; // Groups Fl_Group *group1, *group2, *group3, *group4, *group5, *group6, *group7, *group8, *queue_group; public: // Constructor with x, y, w, h and a title JobWindow(int _x = WCOL0, int _y = 2, int _w = GUIWIDTH - WCOL0 - 10, int _h = GUIHEIGHT_OLD-65, const char* title = ""); // Destructor ~JobWindow() { clear(); } // Clear everything void clear(); // set up the tabs void setupTabs(int nr_tabs); void initialise(int my_job_type); void resetHeight(); // Place a single entry void place(std::string key, int deactivate_option = TOGGLE_LEAVE_ACTIVE, Fl_Group * deactivate_this_group = NULL, bool actually_activate=false); // Place two entries on one line void place2(std::string key1, std::string key2, std::string label, int deactivate_option = TOGGLE_LEAVE_ACTIVE); // Place three entries on one line void place3(std::string key1, std::string key2, std::string key3, std::string label, int deactivate_option = TOGGLE_LEAVE_ACTIVE); void setupRunTab(); // De/re-activate options upon toggling the continue button void toggle_new_continue(bool is_continue); // Write the job submission script void saveJobSubmissionScript(std::string newfilename, std::string outputname, std::vector commands); // Initialise pipeiline stuff for each job, return outputname void initialisePipeline(std::string &outputname, std::string defaultname, int job_counter); // Prepare the final (job submission or combined (mpi) command of possibly multiple lines) // Returns true to go ahead, and false to cancel bool prepareFinalCommand(std::string &outputname, std::vector &commands, std::string &final_command, bool do_makedir = true); // Update all values in the Fl_Input entries into/from the corresponding job_options void updateMyGui(); void updateMyJob(); private: static void cb_menu_continue(Fl_Widget*, void*); inline void cb_menu_continue_i(); // initialise the window for each of the jobtypes void initialiseImportWindow(); void initialiseMotioncorrWindow(); void initialiseCtffindWindow(); void initialiseManualpickWindow(); void initialiseAutopickWindow(); void initialiseExtractWindow(); void initialiseSelectWindow(); void initialiseClass2DWindow(); void initialiseInimodelWindow(); void initialiseClass3DWindow(); void initialiseAutorefineWindow(); void initialiseMultiBodyWindow(); void initialiseMaskcreateWindow(); void initialiseJoinstarWindow(); void initialiseSubtractWindow(); void initialisePostprocessWindow(); void initialiseLocresWindow(); void initialiseMotionrefineWindow(); void initialiseCtfrefineWindow(); void initialiseExternalWindow(); }; #endif /* SRC_NEWGUI_JOBWINDOW_H_ */ relion-3.1.3/src/gui_mainwindow.cpp000066400000000000000000003333261411340063500173300ustar00rootroot00000000000000/*************************************************************************** * * Author: "Sjors H.W. Scheres" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #include "src/gui_mainwindow.h" #include "src/gui_background.xpm" #define STR_HELPER(x) #x #define STR(x) STR_HELPER(x) bool show_scheduler; bool show_expand_stdout; // The StdOutDisplay allows looking at the entire stdout or stderr file int StdOutDisplay::handle(int ev) { if (ev==FL_PUSH && Fl::event_clicks()) { // double-click if (Fl::event_clicks()) { if (show_scheduler) { current_browse_directory = schedule.name; } else { if (current_job < 0) return 0; current_browse_directory = pipeline.processList[current_job].name; } FileName fn = current_browse_directory + fn_file; std::string command; if (exists(fn)) { if (fn_file == "run.out") { if (maingui_do_read_only) { NoteEditorWindow* w = new NoteEditorWindow(800, 400, fn.c_str(), fn.c_str(), false); // false means dont_allow_save w->show(); return 1; } else { std::string command = "awk -F\"\r\" '{if (NF>1) {print $NF} else {print}}' < " + fn + " > .gui_tmpstd"; int res = system(command.c_str()); NoteEditorWindow* w = new NoteEditorWindow(800, 400, fn.c_str(), ".gui_tmpstd", false); //false means dont_allow_save, as its temp file anyway w->show(); return 1; } } else { NoteEditorWindow* w = new NoteEditorWindow(800, 400, fn.c_str(), fn, true); // true means allow_save, this is useful to remove past errors w->show(); return 1; } } } // end if double click } // end if FL_PUSH return 0; } int SchedulerWindow::fill(FileName _pipeline_name, std::vector _scheduled_jobs) { //color(GUI_BACKGROUND_COLOR); int current_y = 2, max_y = 2; int ystep = 35; int xcol = w()-120; // Scroll bars Fl_Scroll scroll(0, current_y, w(), h()); scroll.type(Fl_Scroll::VERTICAL); my_jobs.clear(); pipeline_name = _pipeline_name; for (int ijob = 0; ijob < _scheduled_jobs.size(); ijob++) { my_jobs.push_back(_scheduled_jobs[ijob]); int xcoor = (ijob < 1+_scheduled_jobs.size()/2) ? 20 : w()-170; if (ijob == 1+_scheduled_jobs.size()/2) current_y = 2; Fl_Check_Button *mycheck = new Fl_Check_Button(xcoor, current_y, ystep-8, ystep-8, _scheduled_jobs[ijob].c_str()); mycheck->labelsize(ENTRY_FONTSIZE); check_buttons.push_back(mycheck); mycheck->value(1); current_y += ystep; if (current_y > max_y) max_y = current_y; } current_y = max_y; schedule_name = new Fl_Input(xcol, current_y, 100, ystep-8, "Provide a name for this schedule: "); current_y += ystep; wait_before = new Fl_Input(xcol, current_y, 100, ystep-8, "Wait this many minutes before starting?"); current_y += ystep; repeat = new Fl_Input(xcol, current_y, 100, ystep-8, "Run the jobs how many times?"); current_y += ystep; wait = new Fl_Input(xcol, current_y, 100, ystep-8, "Wait at least in between (in minutes)?"); current_y += ystep; wait_after = new Fl_Input(xcol, current_y, 100, ystep-8, "Wait at least after each job (in seconds)?"); current_y += ystep; // Set the input value schedule_name->value("schedule1"); schedule_name->color(GUI_INPUT_COLOR); schedule_name->textsize(ENTRY_FONTSIZE); schedule_name->labelsize(ENTRY_FONTSIZE); repeat->value("1"); repeat->color(GUI_INPUT_COLOR); repeat->textsize(ENTRY_FONTSIZE); repeat->labelsize(ENTRY_FONTSIZE); wait->value("15"); wait->color(GUI_INPUT_COLOR); wait->textsize(ENTRY_FONTSIZE); wait->labelsize(ENTRY_FONTSIZE); wait_before->value("0"); wait_before->color(GUI_INPUT_COLOR); wait_before->textsize(ENTRY_FONTSIZE); wait_before->labelsize(ENTRY_FONTSIZE); wait_after->value("10"); wait_after->color(GUI_INPUT_COLOR); wait_after->textsize(ENTRY_FONTSIZE); wait_after->labelsize(ENTRY_FONTSIZE); // Button to execute Fl_Button *execute_button = new Fl_Button(w()-200, current_y, 80, 30, "Execute"); execute_button->color(GUI_RUNBUTTON_COLOR); execute_button->labelsize(12); execute_button->callback(cb_execute, this); // Button to cancel Fl_Button *cancel_button = new Fl_Button(w()-100, current_y, 80, 30, "Cancel"); cancel_button->color(GUI_RUNBUTTON_COLOR); cancel_button->labelsize(12); cancel_button->callback(cb_cancel, this); resizable(*this); show(); return Fl::run(); } void SchedulerWindow::cb_cancel(Fl_Widget*, void* v) { SchedulerWindow* T=(SchedulerWindow*)v; T->hide(); } void SchedulerWindow::cb_execute(Fl_Widget*, void* v) { SchedulerWindow* T=(SchedulerWindow*)v; T->cb_execute_i(); T->hide(); } void SchedulerWindow::cb_execute_i() { FileName fn_sched(schedule_name->value()); FileName fn_check = "RUNNING_PIPELINER_" + pipeline_name + "_" + fn_sched; if (exists(fn_check)) { std::string msg = "ERROR: a file called " + fn_check + " already exists. \n This implies another set of scheduled jobs with this name is already running. \n Cancelling job execution..."; fl_message("%s", msg.c_str()); } else { // Make a string with all job-ids to process std::string jobids="\""; for (int ijob = 0; ijob < my_jobs.size(); ijob++) { if (check_buttons[ijob]->value()) jobids += my_jobs[ijob] + " "; } jobids += "\""; std::string myrepeat(repeat->value()); std::string mywait(wait->value()); std::string mywait_before(wait_before->value()); std::string mywait_after(wait_after->value()); std::string command = "relion_pipeliner --pipeline " + pipeline_name; command += " --schedule " + fn_sched; command += " --repeat " + myrepeat; command += " --min_wait " + mywait; command += " --min_wait_before " + mywait_before; command += " --sec_wait_after " + mywait_after; command += " --RunJobs " + jobids; // Run this in the background, so control returns to the window command += " &"; int res = system(command.c_str()); std::cout << " Launching: " << command << std::endl; std::cout << " Stop execution of this set of scheduled jobs by deleting file: " << fn_check << std::endl; } } NoteEditorWindow::NoteEditorWindow(int w, int h, const char* title, FileName _fn_note, bool _allow_save):Fl_Window(w,h,title) { allow_save = _allow_save; editor = new Fl_Text_Editor(0, 0, w, h-50); editor->wrap_mode(Fl_Text_Display::WRAP_AT_BOUNDS,10); textbuff_note = new Fl_Text_Buffer; editor->buffer(textbuff_note); textbuff_note->transcoding_warning_action=NULL; fn_note = _fn_note; if (exists(fn_note)) int err = textbuff_note->loadfile(fn_note.c_str()); else textbuff_note->text("Describe what this job or project is about here..."); editor->insert_position(editor->buffer()->length()); editor->show_insert_position(); if (allow_save) { // Button to save and exit Fl_Button *save_button = new Fl_Button(w-200, h-40, 80, 30, "Save"); save_button->color(GUI_RUNBUTTON_COLOR); save_button->labelsize(12); save_button->callback(cb_save, this); } // Button to exit Fl_Button *cancel_button = new Fl_Button(w-100, h-40, 80, 30, "Cancel"); cancel_button->color(GUI_RUNBUTTON_COLOR); cancel_button->labelsize(12); cancel_button->callback(cb_cancel, this); resizable(*this); } void NoteEditorWindow::cb_cancel(Fl_Widget*, void* v) { NoteEditorWindow* T=(NoteEditorWindow*)v; T->hide(); } void NoteEditorWindow::cb_save(Fl_Widget*, void* v) { NoteEditorWindow* T=(NoteEditorWindow*)v; T->cb_save_i(); T->hide(); } void NoteEditorWindow::cb_save_i() { int err = textbuff_note->savefile(fn_note.c_str()); } GuiMainWindow::GuiMainWindow(int w, int h, const char* title, FileName fn_pipe, FileName fn_sched, int _update_every_sec, int _exit_after_sec, bool _do_read_only):Fl_Window(w,h,title) { // Set initial Timer tickTimeLastChanged(); show_expand_stdout = false; // Setup read_only maingui_do_read_only = _do_read_only; pipeline.do_read_only = _do_read_only; do_order_alphabetically = false; FileName fn_lock=".gui_projectdir"; if (!exists(fn_lock)) { int ret = fl_choice("Your current directory does not look like a RELION project directory.\nOnly run the RELION GUI from your project directory.\nDo you want to start a new project here?", "No", "Yes", 0); this->begin(); // apparently fl_choice changes Fl_Group::current. Thus, we have to reclaim it. if (ret == 1) touch(".gui_projectdir"); else { std::cout << " Exiting ... " << std::endl; exit(0); } } // First setup the old part of the GUI h = GUIHEIGHT_OLD; // TODO: control file location and use better figure background_grp = new Fl_Group(WCOL0-10, 0 ,w-WCOL0, h-55); // Initial screen picture with some explanation on how to use the GUI //image_box = new Fl_Box(WCOL0-8, 0 ,w-WCOL0, h-35); // widget that will contain image image_box = new Fl_Box(WCOL0-8, 50 ,w-WCOL0, h-120); // widget that will contain image xpm_image = new Fl_Pixmap(gui_background); image_box->image(xpm_image); // attach xpm image to box background_grp->end(); // read in schedule if it exists, otherwise just initialise schedule with its name if (fn_sched != "") { show_scheduler = true; create_scheduler_gui = true; schedule.do_read_only = _do_read_only; schedule.setName(fn_sched+"/"); pipeline.name = fn_sched+"/schedule"; if (exists(schedule.name+"schedule.star")) { schedule.read(DONT_LOCK); pipeline.name = fn_sched+"/schedule"; } else { std::string command = "mkdir -p " + fn_sched; int res = system(command.c_str()); schedule.write(DONT_LOCK); // empty write } } else { show_scheduler = false; create_scheduler_gui = false; // Read in the pipeline STAR file if it exists pipeline.name = fn_pipe; } if (exists(pipeline.name + "_pipeline.star")) { std::string lock_message = "mainGUI constructor"; pipeline.read(DO_LOCK, lock_message); // With the locking system, each read needs to be followed soon with a write pipeline.write(DO_LOCK); } else { pipeline.write(); } color(GUI_BACKGROUND_COLOR); menubar = new Fl_Menu_Bar(-3, 0, WCOL0-7, MENUHEIGHT); menubar->add("File/Re-read pipeline", FL_ALT+'r', cb_reread_pipeline, this); menubar->add("File/Edit project note", FL_ALT+'e', cb_edit_project_note, this); if (!maingui_do_read_only) menubar->add("File/Print all notes", 0, cb_print_notes, this); if (!maingui_do_read_only) menubar->add("File/Remake .Nodes\\/", FL_ALT+'n', cb_remake_nodesdir, this); menubar->add("File/Display", FL_ALT+'d', cb_display, this); menubar->add("File/_Overwrite continue", FL_ALT+'o', cb_toggle_overwrite_continue, this); menubar->add("File/_Show initial screen", FL_ALT+'z', cb_show_initial_screen, this); if (!maingui_do_read_only) menubar->add("File/_Empty trash", FL_ALT+'t', cb_empty_trash, this); menubar->add("File/About", 0, cb_about, this); menubar->add("File/Quit", FL_ALT+'q', cb_quit, this); if (!maingui_do_read_only) { menubar->add("Jobs/Save job settings", FL_ALT+'s', cb_save, this); menubar->add("Jobs/_Load job settings", FL_ALT+'l', cb_load, this); } menubar->add("Jobs/Order alphabetically", FL_ALT+'a', cb_order_jobs_alphabetically, this); menubar->add("Jobs/_Order chronologically", FL_ALT+'c', cb_order_jobs_chronologically, this); if (!maingui_do_read_only) { menubar->add("Jobs/_Undelete job(s)", FL_ALT+'u', cb_undelete_job, this); menubar->add("Jobs/Run scheduled jobs", 0, cb_start_pipeliner, this); menubar->add("Jobs/Stop running scheduled jobs", 0, cb_stop_pipeliner, this); menubar->add("Jobs/Export scheduled job(s)", 0, cb_export_jobs, this); menubar->add("Jobs/_Import scheduled job(s)", 0, cb_import_jobs, this); menubar->add("Jobs/Gently clean all jobs", FL_ALT+'g', cb_gently_clean_all_jobs, this); menubar->add("Jobs/Harshly clean all jobs", FL_ALT+'h', cb_harshly_clean_all_jobs, this); // See which schedules there are FileName schedule_wildcard = "Schedules/*"; std::vector schedules; schedule_wildcard.globFiles(schedules); for (int i = 0; i < schedules.size(); i++) { std::string mylabel = "Schedules/" + schedules[i]; menubar->add(mylabel.c_str(), 0, cb_toggle_schedule, this); } menubar->add("Schedules/_Copy schedule", 0, cb_copy_schedule, this); menubar->add("Schedules/_Show pipeline", FL_ALT+'p', cb_toggle_pipeline, this); } current_y = MENUHEIGHT + 10; // Fill browser in the right order browser = new Fl_Hold_Browser(10,MENUHEIGHT+5,WCOL0-20,h-MENUHEIGHT-60); browser->textsize(RLN_FONTSIZE-1); current_job = -1; int i = 0; browse_grp[i] = new Fl_Group(WCOL0, 2, 550, 615-MENUHEIGHT); browser->add("Import"); gui_jobwindows[i] = new JobWindow(); gui_jobwindows[i]->initialise(PROC_IMPORT); browse_grp[i]->end(); i++; browse_grp[i] = new Fl_Group(WCOL0, 2, 550, 615-MENUHEIGHT); browser->add("Motion correction"); gui_jobwindows[i] = new JobWindow(); gui_jobwindows[i]->initialise(PROC_MOTIONCORR); browse_grp[i]->end(); i++; browse_grp[i] = new Fl_Group(WCOL0, 2, 550, 615-MENUHEIGHT); browser->add("CTF estimation"); gui_jobwindows[i] = new JobWindow(); gui_jobwindows[i]->initialise(PROC_CTFFIND); browse_grp[i]->end(); i++; browse_grp[i] = new Fl_Group(WCOL0, 2, 550, 615-MENUHEIGHT); browser->add("Manual picking"); gui_jobwindows[i] = new JobWindow(); gui_jobwindows[i]->initialise(PROC_MANUALPICK); browse_grp[i]->end(); i++; browse_grp[i] = new Fl_Group(WCOL0, 2, 550, 615-MENUHEIGHT); browser->add("Auto-picking"); gui_jobwindows[i] = new JobWindow(); gui_jobwindows[i]->initialise(PROC_AUTOPICK); browse_grp[i]->end(); i++; browse_grp[i] = new Fl_Group(WCOL0, 2, 550, 615-MENUHEIGHT); browser->add("Particle extraction"); gui_jobwindows[i] = new JobWindow(); gui_jobwindows[i]->initialise(PROC_EXTRACT); browse_grp[i]->end(); i++; browse_grp[i] = new Fl_Group(WCOL0, 2, 550, 615-MENUHEIGHT); browser->add("Subset selection"); gui_jobwindows[i] = new JobWindow(); gui_jobwindows[i]->initialise(PROC_CLASSSELECT); browse_grp[i]->end(); i++; browse_grp[i] = new Fl_Group(WCOL0, 2, 550, 615-MENUHEIGHT); browser->add("2D classification"); gui_jobwindows[i] = new JobWindow(); gui_jobwindows[i]->initialise(PROC_2DCLASS); browse_grp[i]->end(); i++; browse_grp[i] = new Fl_Group(WCOL0, 2, 550, 615-MENUHEIGHT); browser->add("3D initial model"); gui_jobwindows[i] = new JobWindow(); gui_jobwindows[i]->initialise(PROC_INIMODEL); browse_grp[i]->end(); i++; browse_grp[i] = new Fl_Group(WCOL0, 2, 550, 615-MENUHEIGHT); browser->add("3D classification"); gui_jobwindows[i] = new JobWindow(); gui_jobwindows[i]->initialise(PROC_3DCLASS); browse_grp[i]->end(); i++; browse_grp[i] = new Fl_Group(WCOL0, 2, 550, 615-MENUHEIGHT); browser->add("3D auto-refine"); gui_jobwindows[i] = new JobWindow(); gui_jobwindows[i]->initialise(PROC_3DAUTO); browse_grp[i]->end(); i++; browse_grp[i] = new Fl_Group(WCOL0, 2, 550, 615-MENUHEIGHT); browser->add("3D multi-body"); gui_jobwindows[i] = new JobWindow(); gui_jobwindows[i]->initialise(PROC_MULTIBODY); browse_grp[i]->end(); i++; browse_grp[i] = new Fl_Group(WCOL0, 2, 550, 615-MENUHEIGHT); browser->add("CTF refinement"); gui_jobwindows[i] = new JobWindow(); gui_jobwindows[i]->initialise(PROC_CTFREFINE); browse_grp[i]->end(); i++; browse_grp[i] = new Fl_Group(WCOL0, 2, 550, 615-MENUHEIGHT); browser->add("Bayesian polishing"); gui_jobwindows[i] = new JobWindow(); gui_jobwindows[i]->initialise(PROC_MOTIONREFINE); browse_grp[i]->end(); i++; browse_grp[i] = new Fl_Group(WCOL0, 2, 550, 615-MENUHEIGHT); browser->add("Mask creation"); gui_jobwindows[i] = new JobWindow(); gui_jobwindows[i]->initialise(PROC_MASKCREATE); browse_grp[i]->end(); i++; browse_grp[i] = new Fl_Group(WCOL0, 2, 550, 615-MENUHEIGHT); browser->add("Join star files"); gui_jobwindows[i] = new JobWindow(); gui_jobwindows[i]->initialise(PROC_JOINSTAR); browse_grp[i]->end(); i++; browse_grp[i] = new Fl_Group(WCOL0, 2, 550, 615-MENUHEIGHT); browser->add("Particle subtraction"); gui_jobwindows[i] = new JobWindow(); gui_jobwindows[i]->initialise(PROC_SUBTRACT); browse_grp[i]->end(); i++; browse_grp[i] = new Fl_Group(WCOL0, 2, 550, 615-MENUHEIGHT); browser->add("Post-processing"); gui_jobwindows[i] = new JobWindow(); gui_jobwindows[i]->initialise(PROC_POST); browse_grp[i]->end(); i++; browse_grp[i] = new Fl_Group(WCOL0, 2, 550, 615-MENUHEIGHT); browser->add("Local resolution"); gui_jobwindows[i] = new JobWindow(); gui_jobwindows[i]->initialise(PROC_RESMAP); browse_grp[i]->end(); i++; browse_grp[i] = new Fl_Group(WCOL0, 2, 550, 615-MENUHEIGHT); browser->add("External"); gui_jobwindows[i] = new JobWindow(); gui_jobwindows[i]->initialise(PROC_EXTERNAL); browse_grp[i]->end(); browser->callback(cb_select_browsegroup, this); browser->end(); browser->select(1); // just start from the beginning // Add run buttons on the menubar as well print_CL_button = new Fl_Button(GUIWIDTH - 215, h-90, 100, 32, "Check command"); print_CL_button->color(GUI_RUNBUTTON_COLOR); print_CL_button->labelsize(11); print_CL_button->callback(cb_print_cl, this); expand_stdout_button = new Fl_Button(XJOBCOL1, GUIHEIGHT_EXT_START , 85, MENUHEIGHT, "I/O view"); expand_stdout_button->color(GUI_BUTTON_COLOR); expand_stdout_button->callback(cb_toggle_expand_stdout, this); // A) Pipeliner part of the GUI pipeliner_grp = new Fl_Group(0, 0, 2*w, 2*h); pipeliner_grp->begin(); run_button = new Fl_Button(GUIWIDTH - 110 , h-90, 100, 32, "Run!"); run_button->color(GUI_RUNBUTTON_COLOR); run_button->labelfont(FL_ITALIC); run_button->labelsize(14); run_button->callback(cb_run, this); if (maingui_do_read_only) run_button->deactivate(); schedule_button = new Fl_Button(GUIWIDTH - 320 , h-90, 100, 32, "Schedule"); schedule_button->color(GUI_RUNBUTTON_COLOR); schedule_button->labelfont(FL_ITALIC); schedule_button->labelsize(14); schedule_button->callback(cb_schedule, this); if (maingui_do_read_only) schedule_button->deactivate(); menubar2 = new Fl_Menu_Bar(XJOBCOL1+87, GUIHEIGHT_EXT_START, 95, MENUHEIGHT); menubar2->color(GUI_BUTTON_COLOR); menubar2->add("Job actions/Edit Note", 0, cb_edit_note, this); if (!maingui_do_read_only) { menubar2->add("Job actions/Alias", 0, cb_set_alias, this); menubar2->add("Job actions/Abort running", 0, cb_abort, this); menubar2->add("Job actions/Mark as finished", 0, cb_mark_as_finished, this); menubar2->add("Job actions/Mark as failed", 0, cb_mark_as_failed, this); menubar2->add("Job actions/Make flowchart", 0, cb_make_flowchart, this); menubar2->add("Job actions/Gentle clean", 0, cb_gentle_cleanup, this); menubar2->add("Job actions/Harsh clean", 0, cb_harsh_cleanup, this); menubar2->add("Job actions/Delete", 0, cb_delete, this); } // Fl_input with the alias of the new job (or the name of an existing one) alias_current_job = new Fl_Input(XJOBCOL2 , GUIHEIGHT_EXT_START+3, JOBCOLWIDTH, MENUHEIGHT-6, "Current:"); // Left-hand side browsers for input/output nodes and processes display_io_node = new Fl_Choice(XJOBCOL3+50, GUIHEIGHT_EXT_START+3, 200, MENUHEIGHT-6); display_io_node->label("Display:"); display_io_node->color(GUI_BUTTON_COLOR); display_io_node->callback(cb_display_io_node, this); pipeliner_jobs_grp = new Fl_Group(0, 0, 2*w, 2*h); pipeliner_jobs_grp->begin(); // Add browsers for finished and running jobs Fl_Text_Buffer *textbuff1 = new Fl_Text_Buffer(); textbuff1->text("Finished jobs"); Fl_Text_Display* textdisp1 = new Fl_Text_Display(XJOBCOL1, GUIHEIGHT_EXT_START2, JOBCOLWIDTH, 25); textdisp1->buffer(textbuff1); textdisp1->color(GUI_BACKGROUND_COLOR); finished_job_browser = new Fl_Select_Browser(XJOBCOL1, GUIHEIGHT_EXT_START2 + 25, JOBCOLWIDTH, JOBHEIGHT+25); finished_job_browser->callback(cb_select_finished_job, this); finished_job_browser->textsize(RLN_FONTSIZE-1); finished_job_browser->end(); Fl_Text_Buffer *textbuff2 = new Fl_Text_Buffer(); textbuff2->text("Running jobs"); Fl_Text_Display* textdisp2 = new Fl_Text_Display(XJOBCOL2, GUIHEIGHT_EXT_START2, JOBCOLWIDTH, 25); textdisp2->buffer(textbuff2); textdisp2->color(GUI_BACKGROUND_COLOR); running_job_browser = new Fl_Select_Browser(XJOBCOL2, GUIHEIGHT_EXT_START2 + 25, JOBCOLWIDTH, JOBHALFHEIGHT); running_job_browser->callback(cb_select_running_job, this); running_job_browser->textsize(RLN_FONTSIZE-1); running_job_browser->end(); Fl_Text_Buffer *textbuff3 = new Fl_Text_Buffer(); textbuff3->text("Scheduled jobs"); Fl_Text_Display* textdisp3 = new Fl_Text_Display(XJOBCOL2, GUIHEIGHT_EXT_START2 + JOBHALFHEIGHT + 25, JOBCOLWIDTH, 25); textdisp3->buffer(textbuff3); textdisp3->color(GUI_BACKGROUND_COLOR); scheduled_job_browser = new Fl_Select_Browser(XJOBCOL2, GUIHEIGHT_EXT_START2 + 25 + JOBHALFHEIGHT + 25, JOBCOLWIDTH, JOBHALFHEIGHT); scheduled_job_browser->callback(cb_select_scheduled_job, this); scheduled_job_browser->textsize(RLN_FONTSIZE-1); Fl_Text_Buffer *textbuff4 = new Fl_Text_Buffer(); textbuff4->text("Input to this job"); Fl_Text_Display* textdisp4 = new Fl_Text_Display(XJOBCOL3, GUIHEIGHT_EXT_START2, JOBCOLWIDTH, 25); textdisp4->buffer(textbuff4); textdisp4->color(GUI_BACKGROUND_COLOR); input_job_browser = new Fl_Select_Browser(XJOBCOL3, GUIHEIGHT_EXT_START2 + 25, JOBCOLWIDTH, JOBHALFHEIGHT); input_job_browser->callback(cb_select_input_job, this); input_job_browser->textsize(RLN_FONTSIZE-1); Fl_Text_Buffer *textbuff5 = new Fl_Text_Buffer(); textbuff5->text("Output from this job"); Fl_Text_Display* textdisp5 = new Fl_Text_Display(XJOBCOL3, GUIHEIGHT_EXT_START2 + JOBHALFHEIGHT + 25, JOBCOLWIDTH, 25); textdisp5->buffer(textbuff5); textdisp5->color(GUI_BACKGROUND_COLOR); output_job_browser = new Fl_Select_Browser(XJOBCOL3, GUIHEIGHT_EXT_START2 + 25 + JOBHALFHEIGHT + 25, JOBCOLWIDTH, JOBHALFHEIGHT); output_job_browser->callback(cb_select_output_job, this); output_job_browser->textsize(RLN_FONTSIZE-1); // Display stdout and stderr of jobs textbuff_stdout = new Fl_Text_Buffer(); textbuff_stderr = new Fl_Text_Buffer(); // Disable warning message about UTF-8 transcoding textbuff_stdout->transcoding_warning_action=NULL; textbuff_stderr->transcoding_warning_action=NULL; disp_stdout = new StdOutDisplay(XJOBCOL1, GUIHEIGHT_EXT_START2 + JOBHEIGHT + STDOUT_Y-5, w-20, 105); disp_stderr = new StdOutDisplay(XJOBCOL1, GUIHEIGHT_EXT_START2 + JOBHEIGHT + STDERR_Y-5, w-20, 50); disp_stdout->fn_file = "run.out"; disp_stderr->fn_file = "run.err"; textbuff_stdout->text("stdout will go here; double-click this window to open stdout in a separate window"); textbuff_stderr->text("stderr will go here; double-click this window to open stderr in a separate window"); disp_stdout->buffer(textbuff_stdout); disp_stderr->buffer(textbuff_stderr); disp_stderr->textcolor(FL_RED); disp_stdout->textsize(RLN_FONTSIZE-1); disp_stderr->textsize(RLN_FONTSIZE-1); disp_stdout->wrap_mode(Fl_Text_Display::WRAP_AT_BOUNDS,0); disp_stderr->wrap_mode(Fl_Text_Display::WRAP_AT_BOUNDS,0); disp_stdout->scrollbar_width(0); disp_stderr->scrollbar_width(0); pipeliner_jobs_grp->end(); pipeliner_grp->end(); // B) Scheduler part of the GUI scheduler_grp = new Fl_Group(0, 0, 4*w, 4*h); scheduler_grp->begin(); scheduler_run_grp = new Fl_Group(0, 0, 4*w, 4*h); scheduler_run_grp->begin(); // Buttons for current_node and running/aborting the schedule scheduler_current_node = new Fl_Choice(XJOBCOL1+90+65, GUIHEIGHT_EXT_START + 3, 140, 23); scheduler_current_node->label("Current:"); scheduler_current_node->textsize(RLN_FONTSIZE-2); scheduler_current_node->color(GUI_INPUT_COLOR); scheduler_set_current_button = new Fl_Button(XJOBCOL1+90+210, GUIHEIGHT_EXT_START + 3, 50, 23); scheduler_set_current_button->label("Set"); scheduler_set_current_button->color(GUI_BUTTON_COLOR); scheduler_set_current_button->callback(cb_scheduler_set_current, this); scheduler_prev_button = new Fl_Button(XJOBCOL1+90+210+55, GUIHEIGHT_EXT_START + 3, 50, 23); scheduler_prev_button->label("Prev"); scheduler_prev_button->color(GUI_BUTTON_COLOR); scheduler_prev_button->callback(cb_scheduler_prev, this); scheduler_next_button = new Fl_Button(XJOBCOL1+90+210+2*55, GUIHEIGHT_EXT_START + 3, 50, 23); scheduler_next_button->label("Next"); scheduler_next_button->color(GUI_BUTTON_COLOR); scheduler_next_button->callback(cb_scheduler_next, this); scheduler_reset_button = new Fl_Button(XJOBCOL1+90+210+3*55, GUIHEIGHT_EXT_START + 3, 50, 23); scheduler_reset_button->label("Reset"); scheduler_reset_button->color(GUI_BUTTON_COLOR); scheduler_reset_button->callback(cb_scheduler_reset, this); scheduler_run_button = new Fl_Button(GUIWIDTH - 90, GUIHEIGHT_EXT_START + 1, 80, 25); scheduler_run_button->label("Run!"); scheduler_run_button->color(GUI_RUNBUTTON_COLOR); scheduler_run_button->labelfont(FL_ITALIC); scheduler_run_button->labelsize(14); scheduler_run_button->callback(cb_scheduler_run, this); scheduler_run_grp->end(); scheduler_unlock_button = new Fl_Button(GUIWIDTH - 256, GUIHEIGHT_EXT_START + 1, 80, 25); scheduler_unlock_button->label("Unlock"); scheduler_unlock_button->labelfont(FL_ITALIC); scheduler_unlock_button->labelsize(14); scheduler_unlock_button->color(GUI_RUNBUTTON_COLOR); scheduler_unlock_button->callback(cb_scheduler_unlock, this); // Don't allow any changes on the GUI while a Schedule is running, i.e. it's directory is locked for writing scheduler_abort_button = new Fl_Button(GUIWIDTH - 173, GUIHEIGHT_EXT_START + 1, 80, 25); scheduler_abort_button->label("Abort"); scheduler_abort_button->labelfont(FL_ITALIC); scheduler_abort_button->labelsize(14); scheduler_abort_button->color(GUI_RUNBUTTON_COLOR); scheduler_abort_button->callback(cb_scheduler_abort, this); //scheduler_grp->end(); scheduler_job_name = new Fl_Input(GUIWIDTH - 550, h-83, 150, 25, "Name:"); scheduler_job_name->color(GUI_INPUT_COLOR); add_job_button = new Fl_Button(GUIWIDTH - 110 , h-90, 100, 32, "Add job"); add_job_button->color(GUI_RUNBUTTON_COLOR); add_job_button->labelfont(FL_ITALIC); add_job_button->labelsize(14); add_job_button->callback(cb_scheduler_add_job, this); // Select one of three modes for adding a new job scheduler_job_mode = new Fl_Choice(GUIWIDTH - 400 , h-83, 80, 25); scheduler_job_mode->label(""); scheduler_job_mode->color(GUI_BUTTON_COLOR); scheduler_job_mode->textsize(12); scheduler_job_mode->menu(job_mode_options); scheduler_job_has_started = new Fl_Choice(GUIWIDTH - 320 , h-83, 100, 25); scheduler_job_has_started->label(""); scheduler_job_has_started->color(GUI_BUTTON_COLOR); scheduler_job_has_started->textsize(12); scheduler_job_has_started->menu(job_has_started_options); // TODO: fill options for this choice! scheduler_jobs_grp = new Fl_Group(0, 0, 4*w, 4*h); scheduler_jobs_grp->begin(); // Scheduler variables int height_var = 35; Fl_Text_Buffer *textbuffvar = new Fl_Text_Buffer(); textbuffvar->text("Variables"); Fl_Text_Display* textdispvar = new Fl_Text_Display(XJOBCOL1, GUIHEIGHT_EXT_START+height_var, JOBCOLWIDTH-105, 24); textdispvar->buffer(textbuffvar); textdispvar->textsize(12); textdispvar->color(GUI_BACKGROUND_COLOR); scheduler_variable_name = new Fl_Input(XJOBCOL1, GUIHEIGHT_EXT_START+height_var+23, JOBCOLWIDTH*0.4, 21); scheduler_variable_name->color(GUI_INPUT_COLOR); scheduler_variable_name->textsize(RLN_FONTSIZE-2); scheduler_variable_value = new Fl_Input(XJOBCOL1+JOBCOLWIDTH*0.4, GUIHEIGHT_EXT_START+height_var+23, JOBCOLWIDTH*0.6, 21); scheduler_variable_value->color(GUI_INPUT_COLOR); scheduler_variable_value->textsize(RLN_FONTSIZE-2); delete_scheduler_variable_button = new Fl_Button(XJOBCOL1+JOBCOLWIDTH-105, GUIHEIGHT_EXT_START+height_var, 50, 23); delete_scheduler_variable_button->color(GUI_BUTTON_COLOR); delete_scheduler_variable_button->labelfont(FL_ITALIC); delete_scheduler_variable_button->labelsize(RLN_FONTSIZE); delete_scheduler_variable_button->label("Del"); delete_scheduler_variable_button->callback(cb_delete_scheduler_variable, this); set_scheduler_variable_button = new Fl_Button(XJOBCOL1+JOBCOLWIDTH-50, GUIHEIGHT_EXT_START+height_var, 50, 23); set_scheduler_variable_button->color(GUI_BUTTON_COLOR); set_scheduler_variable_button->labelfont(FL_ITALIC); set_scheduler_variable_button->labelsize(RLN_FONTSIZE); set_scheduler_variable_button->label("Set"); set_scheduler_variable_button->callback(cb_set_scheduler_variable, this); scheduler_variable_browser = new Fl_Hold_Browser(XJOBCOL1, GUIHEIGHT_EXT_START+height_var + 44, JOBCOLWIDTH, 182); scheduler_variable_browser->callback(cb_select_scheduler_variable, this); scheduler_variable_browser->textsize(RLN_FONTSIZE-2); scheduler_variable_browser->end(); // Scheduler operators Fl_Text_Buffer *textbuffnode = new Fl_Text_Buffer(); textbuffnode->text("Operators"); Fl_Text_Display* textdispnode = new Fl_Text_Display(XJOBCOL2, GUIHEIGHT_EXT_START + height_var, JOBCOLWIDTH-105, 24); textdispnode->buffer(textbuffnode); textdispnode->textsize(12); textdispnode->color(GUI_BACKGROUND_COLOR); scheduler_operator_type = new Fl_Choice(XJOBCOL2, GUIHEIGHT_EXT_START+23 + height_var, JOBCOLWIDTH/2 + 10, 21); scheduler_operator_type->color(GUI_INPUT_COLOR); scheduler_operator_type->menu(operator_type_options); scheduler_operator_type->textsize(RLN_FONTSIZE-2); scheduler_operator_output = new Fl_Choice(XJOBCOL2 + 34 + JOBCOLWIDTH/2, GUIHEIGHT_EXT_START+23 + height_var, JOBCOLWIDTH/2-34, 21); scheduler_operator_output->label("->"); scheduler_operator_output->color(GUI_INPUT_COLOR); scheduler_operator_output->textsize(RLN_FONTSIZE-2); scheduler_operator_input1 = new Fl_Choice(XJOBCOL2 + 20, GUIHEIGHT_EXT_START+44 + height_var, JOBCOLWIDTH/2-20, 21); scheduler_operator_input1->label("i1:"); scheduler_operator_input1->color(GUI_INPUT_COLOR); scheduler_operator_input1->textsize(RLN_FONTSIZE-2); scheduler_operator_input2 = new Fl_Choice(XJOBCOL2 + 34 + JOBCOLWIDTH/2, GUIHEIGHT_EXT_START+44 + height_var, JOBCOLWIDTH/2-34, 21); scheduler_operator_input2->label("i2:"); scheduler_operator_input2->textsize(RLN_FONTSIZE-2); scheduler_operator_input2->color(GUI_INPUT_COLOR); delete_scheduler_operator_button = new Fl_Button(XJOBCOL2+JOBCOLWIDTH-105, GUIHEIGHT_EXT_START + height_var, 50, 23); delete_scheduler_operator_button->color(GUI_BUTTON_COLOR); delete_scheduler_operator_button->labelfont(FL_ITALIC); delete_scheduler_operator_button->labelsize(RLN_FONTSIZE); delete_scheduler_operator_button->label("Del"); delete_scheduler_operator_button->callback(cb_delete_scheduler_operator, this); add_scheduler_operator_button = new Fl_Button(XJOBCOL2+JOBCOLWIDTH-50, GUIHEIGHT_EXT_START + height_var, 50, 23); add_scheduler_operator_button->color(GUI_BUTTON_COLOR); add_scheduler_operator_button->labelfont(FL_ITALIC); add_scheduler_operator_button->labelsize(RLN_FONTSIZE); add_scheduler_operator_button->label("Add"); add_scheduler_operator_button->callback(cb_add_scheduler_operator, this); scheduler_operator_browser = new Fl_Hold_Browser(XJOBCOL2, GUIHEIGHT_EXT_START + height_var + 65, JOBCOLWIDTH, 161); scheduler_operator_browser->callback(cb_select_scheduler_operator, this); scheduler_operator_browser->textsize(RLN_FONTSIZE-2); scheduler_operator_browser->end(); int height_ops = height_var + 134; // Scheduler jobs Fl_Text_Buffer *textbuff3s = new Fl_Text_Buffer(); textbuff3s->text("Jobs"); Fl_Text_Display* textdisp3s = new Fl_Text_Display(XJOBCOL1, GUIHEIGHT_EXT-160, JOBCOLWIDTH-50, 24); textdisp3s->buffer(textbuff3s); textdisp3s->textsize(12); textdisp3s->color(GUI_BACKGROUND_COLOR); scheduler_delete_job_button = new Fl_Button(XJOBCOL1+JOBCOLWIDTH-50, GUIHEIGHT_EXT-160, 50, 23); scheduler_delete_job_button->color(GUI_BUTTON_COLOR); scheduler_delete_job_button->labelfont(FL_ITALIC); scheduler_delete_job_button->labelsize(RLN_FONTSIZE); scheduler_delete_job_button->label("Del"); scheduler_delete_job_button->callback(cb_delete_scheduler_job, this); scheduler_job_browser = new Fl_Hold_Browser(XJOBCOL1, GUIHEIGHT_EXT-160+23, JOBCOLWIDTH, 128); scheduler_job_browser->callback(cb_select_scheduled_job, this); scheduler_job_browser->textsize(RLN_FONTSIZE-1); Fl_Text_Buffer *textbuff4s = new Fl_Text_Buffer(); textbuff4s->text("Input to this job"); Fl_Text_Display* textdisp4s = new Fl_Text_Display(XJOBCOL2, GUIHEIGHT_EXT-160, JOBCOLWIDTH, 24); textdisp4s->buffer(textbuff4s); textdisp4s->textsize(12); textdisp4s->color(GUI_BACKGROUND_COLOR); scheduler_input_job_browser = new Fl_Hold_Browser(XJOBCOL2, GUIHEIGHT_EXT-160+24, JOBCOLWIDTH, 50); scheduler_input_job_browser->callback(cb_select_input_job, this); scheduler_input_job_browser->textsize(RLN_FONTSIZE-1); Fl_Text_Buffer *textbuff5s = new Fl_Text_Buffer(); textbuff5s->text("Output from this job"); Fl_Text_Display* textdisp5s = new Fl_Text_Display(XJOBCOL2, GUIHEIGHT_EXT-160+ 76, JOBCOLWIDTH, 24); textdisp5s->buffer(textbuff5s); textdisp5s->textsize(12); textdisp5s->color(GUI_BACKGROUND_COLOR); scheduler_output_job_browser = new Fl_Hold_Browser(XJOBCOL2, GUIHEIGHT_EXT-160 + 100, JOBCOLWIDTH, 50); scheduler_output_job_browser->callback(cb_select_output_job, this); scheduler_output_job_browser->textsize(RLN_FONTSIZE-1); // Scheduler edges Fl_Text_Buffer *textbuffedge = new Fl_Text_Buffer(); textbuffedge->text("Edges"); Fl_Text_Display* textdispedge = new Fl_Text_Display(XJOBCOL3, GUIHEIGHT_EXT_START+height_var, JOBCOLWIDTH-105, 24); textdispedge->buffer(textbuffedge); textdispedge->textsize(12); textdispedge->color(GUI_BACKGROUND_COLOR); scheduler_edge_input= new Fl_Choice(XJOBCOL3, GUIHEIGHT_EXT_START+height_var+23, JOBCOLWIDTH/2 + 10, 21); scheduler_edge_input->color(GUI_INPUT_COLOR); scheduler_edge_input->textsize(RLN_FONTSIZE-2); scheduler_edge_output = new Fl_Choice(XJOBCOL3 + 34 + JOBCOLWIDTH/2, GUIHEIGHT_EXT_START+height_var+23, JOBCOLWIDTH/2-34, 21); scheduler_edge_output->label("->"); scheduler_edge_output->color(GUI_INPUT_COLOR); scheduler_edge_output->textsize(RLN_FONTSIZE-2); scheduler_edge_boolean = new Fl_Choice(XJOBCOL3 + 20, GUIHEIGHT_EXT_START+height_var+44, JOBCOLWIDTH/2-20, 21); scheduler_edge_boolean->label("if:"); scheduler_edge_boolean->color(GUI_INPUT_COLOR); scheduler_edge_boolean->textsize(RLN_FONTSIZE-2); scheduler_edge_outputtrue = new Fl_Choice(XJOBCOL3 + 34 + JOBCOLWIDTH/2, GUIHEIGHT_EXT_START+height_var+44, JOBCOLWIDTH/2-34, 21); scheduler_edge_outputtrue->label(":"); scheduler_edge_outputtrue->textsize(RLN_FONTSIZE-2); scheduler_edge_outputtrue->color(GUI_INPUT_COLOR); delete_scheduler_edge_button = new Fl_Button(XJOBCOL3+JOBCOLWIDTH-105, GUIHEIGHT_EXT_START+height_var, 50, 23); delete_scheduler_edge_button->color(GUI_BUTTON_COLOR); delete_scheduler_edge_button->labelfont(FL_ITALIC); delete_scheduler_edge_button->labelsize(RLN_FONTSIZE); delete_scheduler_edge_button->label("Del"); delete_scheduler_edge_button->callback(cb_delete_scheduler_edge, this); add_scheduler_edge_button = new Fl_Button(XJOBCOL3+JOBCOLWIDTH-50, GUIHEIGHT_EXT_START+height_var, 50, 23); add_scheduler_edge_button->color(GUI_BUTTON_COLOR); add_scheduler_edge_button->labelfont(FL_ITALIC); add_scheduler_edge_button->labelsize(RLN_FONTSIZE); add_scheduler_edge_button->label("Add"); add_scheduler_edge_button->callback(cb_add_scheduler_edge, this); scheduler_edge_browser = new Fl_Hold_Browser(XJOBCOL3, GUIHEIGHT_EXT_START+height_var + 65, JOBCOLWIDTH, 320); scheduler_edge_browser->callback(cb_select_scheduler_edge, this); scheduler_edge_browser->textsize(RLN_FONTSIZE-2); scheduler_edge_browser->end(); scheduler_jobs_grp->end(); scheduler_run_grp->end(); scheduler_grp->end(); if (show_scheduler) { pipeliner_grp->hide(); scheduler_grp->show(); fillSchedulerNodesAndVariables(); if (schedule.isWriteLocked()) { scheduler_run_grp->deactivate(); } } else { scheduler_grp->hide(); pipeliner_grp->show(); } // B) Scheduler part of the GUI expand_stdout_grp = new Fl_Group(0, 0, 4*w, 4*h); expand_stdout_grp->begin(); disp_expand_stdout = new StdOutDisplay(XJOBCOL1, GUIHEIGHT_EXT_START2-5, w-20, 300); disp_expand_stderr = new StdOutDisplay(XJOBCOL1, GUIHEIGHT_EXT_START2-5 + 305, w-20, 85); disp_expand_stdout->fn_file = "run.out"; disp_expand_stderr->fn_file = "run.err"; textbuff_stdout->text("stdout will go here; double-click this window to open stdout in a separate window"); textbuff_stderr->text("stderr will go here; double-click this window to open stderr in a separate window"); disp_expand_stdout->buffer(textbuff_stdout); disp_expand_stderr->buffer(textbuff_stderr); disp_expand_stderr->textcolor(FL_RED); disp_expand_stdout->textsize(RLN_FONTSIZE-1); disp_expand_stderr->textsize(RLN_FONTSIZE-1); disp_expand_stdout->wrap_mode(Fl_Text_Display::WRAP_AT_BOUNDS,0); disp_expand_stderr->wrap_mode(Fl_Text_Display::WRAP_AT_BOUNDS,0); disp_expand_stdout->scrollbar_width(0); disp_expand_stderr->scrollbar_width(0); expand_stdout_grp->end(); if (!show_expand_stdout) expand_stdout_grp->hide(); // Fill the actual browsers fillRunningJobLists(); fillStdOutAndErr(); // Mechanism to update stdout and stderr continuously and also update the JobLists // Also exit the GUI if it has been idle for too long update_every_sec = _update_every_sec; exit_after_sec = (float)_exit_after_sec; if (update_every_sec > 0) Fl::add_timeout(update_every_sec, Gui_Timer_CB, (void*)this); cb_show_initial_screen_i(); // Set and activate current selection from side-browser cb_select_browsegroup_i(true); // make default active; true is used to show_initial_screen is_main_continue = false; // default is a new run } static void Gui_Timer_CB(void *userdata) { GuiMainWindow *o = (GuiMainWindow*)userdata; time_t now; time (&now); double dif = difftime (now, time_last_change); // If the GUI has been idle for too long, then exit if (dif > o->exit_after_sec) { std::cout << " The relion GUI has been idle for more than " << o->exit_after_sec << " seconds, exiting now... " << std::endl; exit(0); } if (show_scheduler) { // Always refill the stdout and stderr windows for scheduler o->fillStdOutAndErr(); FileName mychanged = schedule.name + SCHEDULE_HAS_CHANGED; if (exists(mychanged)) { schedule.read(DONT_LOCK); o->fillSchedulerNodesAndVariables(); std::remove(mychanged.c_str()); } } else { // Update the stdout and stderr windows if we're currently pointing at a running job if (current_job >= 0 && pipeline.processList[current_job].status == PROC_RUNNING) o->fillStdOutAndErr(); // Check for job completion if the pipeline has been changed if (exists(PIPELINE_HAS_CHANGED)) o->updateJobLists(); } // Refresh every so many seconds Fl::repeat_timeout(o->update_every_sec, Gui_Timer_CB, userdata); } void GuiMainWindow::clear() { if (menubar != NULL) { delete menubar; menubar = NULL; } if (menubar2 != NULL) { delete menubar2; menubar2 = NULL; } } std::string GuiMainWindow::getJobNameForDisplay(Process &job) { FileName result; FileName fn_pre, fn_jobnr, fn_post; if (show_scheduler) { result = job.name; result = result.afterFirstOf(schedule.name); return result.beforeLastOf("/"); } else if (!decomposePipelineFileName(job.name, fn_pre, fn_jobnr, fn_post)) { result = job.name; } else { std::string numberonly = (fn_jobnr.afterFirstOf("b")).beforeFirstOf("/"); if (job.alias != "None") result = numberonly + ": " + job.alias; else result = numberonly + ": " + job.name; } return result; } // Update the content of the finished, running and scheduled job lists void GuiMainWindow::fillRunningJobLists() { // Go back to the same positions in the vertical scroll bars of the job lists after updating... int mypos_running = running_job_browser->position(); int mypos_scheduled = scheduled_job_browser->position(); int mypos_finished = finished_job_browser->position(); int myhpos_running = running_job_browser->hposition(); int myhpos_scheduled = scheduled_job_browser->hposition(); int myhpos_finished = finished_job_browser->hposition(); // Clear whatever was in there finished_job_browser->clear(); finished_processes.clear(); running_job_browser->clear(); running_processes.clear(); scheduled_job_browser->clear(); scheduled_processes.clear(); // Fill the finished Jobs browsers if (do_order_alphabetically) { // Only re-order the finished jobs! std::vector > vp; for (long int i = pipeline.processList.size() -1; i >= 0; i--) { if (pipeline.processList[i].alias != "None") vp.push_back(std::make_pair(pipeline.processList[i].alias, i)); else vp.push_back(std::make_pair(pipeline.processList[i].name, i)); } // Sort on the first elements of the pairs std::sort(vp.begin(), vp.end()); for (long int ip = 0; ip < vp.size(); ip++) { long int i = vp[ip].second; if (pipeline.processList[i].status == PROC_FINISHED_SUCCESS || pipeline.processList[i].status == PROC_FINISHED_FAILURE || pipeline.processList[i].status == PROC_FINISHED_ABORTED) { finished_processes.push_back(i); if (pipeline.processList[i].status == PROC_FINISHED_SUCCESS) finished_job_browser->add(vp[ip].first.c_str()); else if (pipeline.processList[i].status == PROC_FINISHED_ABORTED) finished_job_browser->add(("@C1@-@." + vp[ip].first).c_str()); else finished_job_browser->add(("@C1@." + vp[ip].first).c_str()); } } } else { // For finished jobs search backwards, so that last jobs are at the top for (long int i = pipeline.processList.size() -1; i >= 0; i--) { if (pipeline.processList[i].status == PROC_FINISHED_SUCCESS || pipeline.processList[i].status == PROC_FINISHED_FAILURE || pipeline.processList[i].status == PROC_FINISHED_ABORTED) { finished_processes.push_back(i); if (pipeline.processList[i].status == PROC_FINISHED_SUCCESS) finished_job_browser->add((getJobNameForDisplay(pipeline.processList[i])).c_str()); else if (pipeline.processList[i].status == PROC_FINISHED_ABORTED) finished_job_browser->add(("@C1@-@." + getJobNameForDisplay(pipeline.processList[i])).c_str()); else finished_job_browser->add(("@C1@." + getJobNameForDisplay(pipeline.processList[i])).c_str()); } } } // For running and scheduled jobs search forwards, so that last jobs are at the bottom for (long int i = 0; i < pipeline.processList.size(); i++) { if (pipeline.processList[i].status == PROC_RUNNING) { running_processes.push_back(i); running_job_browser->add((getJobNameForDisplay(pipeline.processList[i])).c_str()); } else if (pipeline.processList[i].status == PROC_SCHEDULED) { scheduled_processes.push_back(i); scheduled_job_browser->add((getJobNameForDisplay(pipeline.processList[i])).c_str()); } } running_job_browser->position(mypos_running); scheduled_job_browser->position(mypos_scheduled); finished_job_browser->position(mypos_finished); running_job_browser->hposition(myhpos_running); scheduled_job_browser->hposition(myhpos_scheduled); finished_job_browser->hposition(myhpos_finished); } void GuiMainWindow::fillToAndFromJobLists() { display_io_node->clear(); input_job_browser->clear(); output_job_browser->clear(); scheduler_input_job_browser->clear(); scheduler_output_job_browser->clear(); io_nodes.clear(); input_processes.clear(); output_processes.clear(); if (current_job >= 0) { // Where do the input nodes come from? for (long int inode = 0; inode < (pipeline.processList[current_job]).inputNodeList.size(); inode++) { long int mynode = (pipeline.processList[current_job]).inputNodeList[inode]; if (pipeline.nodeList[mynode].type != NODE_MOVIES) // no display for movie rootname { FileName fnt = pipeline.nodeList[mynode].name; if (exists(fnt)) { fnt = "in: " + fnt.afterLastOf("/"); display_io_node->add(fnt.c_str()); io_nodes.push_back(mynode); } } long int myproc = (pipeline.nodeList[mynode]).outputFromProcess; if (myproc >= 0) { // Check if this process was already there bool already_there = false; for (long int i = 0; i < input_processes.size(); i++) { if (myproc == input_processes[i]) { already_there=true; break; } } if (!already_there) { input_processes.push_back(myproc); if (show_scheduler) scheduler_input_job_browser->add((getJobNameForDisplay(pipeline.processList[myproc])).c_str()); else input_job_browser->add((getJobNameForDisplay(pipeline.processList[myproc])).c_str()); } } } // Where do the output nodes lead to? for (long int inode = 0; inode < (pipeline.processList[current_job]).outputNodeList.size(); inode++) { long int mynode = (pipeline.processList[current_job]).outputNodeList[inode]; FileName fnt = pipeline.nodeList[mynode].name; if (exists(fnt)) { fnt = "out: " + fnt.afterLastOf("/"); display_io_node->add(fnt.c_str()); io_nodes.push_back(mynode); } long int nr_outputs = (pipeline.nodeList[mynode]).inputForProcessList.size(); for (long int iproc = 0; iproc < nr_outputs; iproc++) { long int myproc = (pipeline.nodeList[mynode]).inputForProcessList[iproc]; // Check if this process was already there bool already_there = false; for (long int i = 0; i < output_processes.size(); i++) { if (myproc == output_processes[i]) { already_there=true; break; } } if (!already_there) { output_processes.push_back(myproc); if (show_scheduler) scheduler_output_job_browser->add((getJobNameForDisplay(pipeline.processList[myproc])).c_str()); else output_job_browser->add((getJobNameForDisplay(pipeline.processList[myproc])).c_str()); } } } } } // Update the content of the finished, running and scheduled job lists void GuiMainWindow::fillSchedulerNodesAndVariables() { // Go back to the same positions in the vertical scroll bars of the job lists after updating... int mypos_scheduler_variable = scheduler_variable_browser->value(); int mypos_scheduler_operator = scheduler_operator_browser->value(); int mypos_scheduler_edge = scheduler_edge_browser->value(); int mypos_scheduler_job = scheduler_job_browser->value(); // Clear whatever was in there scheduler_variable_browser->clear(); scheduler_operator_browser->clear(); scheduler_operator_output->clear(); scheduler_operator_input1->clear(); scheduler_operator_input2->clear(); scheduler_edge_browser->clear(); scheduler_edge_input->clear(); scheduler_edge_output->clear(); scheduler_edge_boolean->clear(); scheduler_edge_outputtrue->clear(); scheduler_job_browser->clear(); scheduled_processes.clear(); scheduler_current_node->clear(); operators_list.clear(); // Fill jobs browser for (long int i = 0; i < pipeline.processList.size(); i++) { scheduler_job_browser->add((getJobNameForDisplay(pipeline.processList[i])).c_str()); scheduled_processes.push_back(i); } // Also get input/output // Fill edges browser for (int i = 0; i < schedule.edges.size(); i++ ) { std::string mylabel; if (schedule.edges[i].is_fork) mylabel = schedule.edges[i].inputNode + " -> (" + schedule.edges[i].myBooleanVariable + ") ? " + schedule.edges[i].outputNodeTrue + " : " + schedule.edges[i].outputNode; else mylabel = schedule.edges[i].inputNode + " -> " + schedule.edges[i].outputNode; scheduler_edge_browser->add(mylabel.c_str()); } // Fill variables browser, and pull-down menus for operator input/output { std::map scheduler_floats = schedule.getCurrentFloatVariables(); std::map::iterator it; int i = 0; for ( it = scheduler_floats.begin(); it != scheduler_floats.end(); it++ ) { std::string mylabel = it->first + " = " + floatToString(it->second.value) + " (" + floatToString(it->second.original_value) + ")"; scheduler_variable_browser->add(mylabel.c_str()); scheduler_operator_output->add(it->first.c_str()); scheduler_operator_input1->add(it->first.c_str()); scheduler_operator_input2->add(it->first.c_str()); i++; } } { std::map scheduler_bools = schedule.getCurrentBooleanVariables(); std::map::iterator it; int i = 0; for ( it = scheduler_bools.begin(); it != scheduler_bools.end(); it++ ) { std::string myval = (it->second.value) ? "True" : "False"; std::string myorival = (it->second.original_value) ? "True" : "False"; std::string mylabel = it->first + " = " + myval + " (" + myorival + ")"; scheduler_variable_browser->add(mylabel.c_str()); scheduler_operator_output->add(it->first.c_str()); scheduler_operator_input1->add(it->first.c_str()); scheduler_operator_input2->add(it->first.c_str()); scheduler_edge_boolean->add(it->first.c_str()); i++; } } { std::map scheduler_strings = schedule.getCurrentStringVariables(); std::map::iterator it; int i = 0; for ( it = scheduler_strings.begin(); it != scheduler_strings.end(); it++ ) { std::string mylabel = it->first + " = " + it->second.value + " (" + it->second.original_value + ")"; scheduler_variable_browser->add(mylabel.c_str()); scheduler_operator_output->add(it->first.c_str()); scheduler_operator_input1->add(it->first.c_str()); scheduler_operator_input2->add(it->first.c_str()); i++; } } // Fill operator browser { std::map scheduler_operators = schedule.getCurrentOperators(); std::map::iterator it; int i = 0; for ( it = scheduler_operators.begin(); it != scheduler_operators.end(); it++ ) { std::string mylabel = it->first; scheduler_operator_browser->add(mylabel.c_str()); operators_list.push_back(it->first); scheduler_edge_input->add(it->first.c_str()); scheduler_edge_output->add(it->first.c_str()); scheduler_edge_outputtrue->add(it->first.c_str()); scheduler_current_node->add(it->first.c_str()); i++; } } // Also add jobnames to the input/output nodes of the edges for (long int i = 0; i < pipeline.processList.size(); i++) { if (pipeline.processList[i].status == PROC_SCHEDULED) { scheduler_edge_input->add((getJobNameForDisplay(pipeline.processList[i])).c_str()); scheduler_edge_output->add((getJobNameForDisplay(pipeline.processList[i])).c_str()); scheduler_edge_outputtrue->add((getJobNameForDisplay(pipeline.processList[i])).c_str()); scheduler_current_node->add((getJobNameForDisplay(pipeline.processList[i])).c_str()); } } // Set the value of the current_node // Set the current_node if (schedule.current_node != "undefined") { scheduler_current_node->value(scheduler_current_node->find_item(schedule.current_node.c_str())); } scheduler_operator_output->add(""); scheduler_operator_input1->add(""); scheduler_operator_input2->add(""); scheduler_edge_outputtrue->add(""); scheduler_edge_boolean->add(""); if (mypos_scheduler_variable >= 0) { scheduler_variable_browser->value(mypos_scheduler_variable); cb_select_scheduler_variable_i(); } if (mypos_scheduler_operator >= 0) { scheduler_operator_browser->value(mypos_scheduler_operator); cb_select_scheduler_operator_i(); } if (mypos_scheduler_edge >= 0) { scheduler_edge_browser->value(mypos_scheduler_edge); cb_select_scheduler_edge_i(); } if (mypos_scheduler_job > 0) { scheduler_job_browser->value(mypos_scheduler_job); } if (schedule.isWriteLocked()) { scheduler_run_grp->deactivate(); } else { scheduler_run_grp->activate(); } } void GuiMainWindow::fillStdOutAndErr() { FileName fn_out = ""; FileName fn_err = ""; FileName fn_outtail, fn_errtail; if (current_job >= 0 || show_scheduler) { std::string myroot = (show_scheduler) ? schedule.name : pipeline.processList[current_job].name; fn_out = myroot + "run.out"; fn_err = myroot + "run.err"; fn_outtail = myroot + ".run.out.tail"; fn_errtail = myroot + ".run.err.tail"; } if (exists(fn_out)) { if (maingui_do_read_only) { int err = textbuff_stdout->loadfile(fn_out.c_str()); } else { // Remove annoying carriage returns std::string command = "tail -n 20 < " + fn_out + " | awk -F\"\r\" '{if (NF>1) {print $NF} else {print}}' > " + fn_outtail; int res = system(command.c_str()); std::ifstream in(fn_outtail.c_str(), std::ios_base::in); if (in.fail()) REPORT_ERROR( (std::string) "MetaDataTable::read: File " + fn_outtail + " does not exists" ); int err = textbuff_stdout->loadfile(fn_outtail.c_str()); in.close(); } // Scroll to the bottom disp_stdout->insert_position(textbuff_stdout->length()-1); disp_stdout->show_insert_position(); disp_expand_stdout->insert_position(textbuff_stdout->length()-1); disp_expand_stdout->show_insert_position(); } else textbuff_stdout->text("stdout will go here; double-click this window to open stdout in a separate window"); if (exists(fn_err)) { if (maingui_do_read_only) { int err = textbuff_stderr->loadfile(fn_err.c_str()); } else { std::string command = "tail -10 " + fn_err + " > " + fn_errtail; int res = system(command.c_str()); std::ifstream in(fn_errtail.c_str(), std::ios_base::in); if (in.fail()) REPORT_ERROR( (std::string) "MetaDataTable::read: File " + fn_errtail + " does not exists" ); int err = textbuff_stderr->loadfile(fn_errtail.c_str()); in.close(); } // Scroll to the bottom disp_stderr->insert_position(textbuff_stderr->length()-1); disp_stderr->show_insert_position(); disp_expand_stderr->insert_position(textbuff_stderr->length()-1); disp_expand_stderr->show_insert_position(); } else textbuff_stderr->text("stderr will go here; double-click this window to open stderr in a separate window"); } void GuiMainWindow::tickTimeLastChanged() { time(&time_last_change); } void GuiMainWindow::updateJobLists() { pipeline.checkProcessCompletion(); if (show_scheduler) fillSchedulerNodesAndVariables(); else fillRunningJobLists(); fillToAndFromJobLists(); } void GuiMainWindow::loadJobFromPipeline(int this_job) { // Set the "static int" to which job we're currently pointing current_job = this_job; int itype = pipeline.processList[current_job].type; // The following line allows certain browse buttons to only open the current directory (using CURRENT_ODIR) current_browse_directory = pipeline.processList[current_job].name; // What type of job is this? for ( int t=0; tmyjob.type == itype ) browser->value(t+1); } // change GUI to the corresponding jobwindow cb_select_browsegroup_i(); // Re-read the settings for this job and update the values inside the GUI int iwin = (browser->value() - 1); gui_jobwindows[iwin]->myjob.read(pipeline.processList[current_job].name, is_main_continue); gui_jobwindows[iwin]->updateMyGui(); // If a finished or running job was loaded from the pipeline: set this to be a continuation job // If a scheduled job was loaded, only set is_main_continue to true when it is PROC_SCHEDULED //if (pipeline.processList[current_job].status == PROC_SCHEDULED && !gui_jobwindows[iwin]->myjob.is_continue) // is_main_continue = false; //else // is_main_continue = true; // Any job loaded from the pipeline will initially be set as a continuation job // but for show-scheduler, no job should be a continuation if (show_scheduler) { is_main_continue = false; do_overwrite_continue = true; } else { is_main_continue = true; } cb_toggle_continue_i(); // Set the alias in the window alias_current_job->value((getJobNameForDisplay(pipeline.processList[current_job])).c_str()); alias_current_job->position(0); //left-centered text in box // Update all job lists in the main GUI updateJobLists(); // File the out and err windows fillStdOutAndErr(); } void GuiMainWindow::cb_select_browsegroup(Fl_Widget* o, void* v) { GuiMainWindow* T=(GuiMainWindow*)v; // When clicking the job browser on the left: reset current_job to -1 (i.e. a new job, not yet in the pipeline) current_job = -1; T->cb_select_browsegroup_i(); run_button->activate(); } void GuiMainWindow::cb_select_browsegroup_i(bool show_initial_screen) { // Update timer tickTimeLastChanged(); // Hide the initial screen if (show_initial_screen) background_grp->show(); else background_grp->hide(); int iwin = (browser->value() - 1); if (iwin < 0 || iwin >= NR_BROWSE_TABS) return; // Show the 'selected' group, hide the others for ( int t=0; tshow(); } else { browse_grp[t]->hide(); } } // Update all job lists in the main GUI updateJobLists(); is_main_continue = false; do_overwrite_continue = false; // If the GUI got changed, put that change into the joboption now gui_jobwindows[iwin]->updateMyJob(); // toggle the continue status of this job cb_toggle_continue_i(); alias_current_job->value("Give_alias_here"); scheduler_job_name->value(""); scheduler_job_name->activate(); scheduler_job_has_started->deactivate(); scheduler_job_has_started->picked(&job_has_started_options[1]); // initialise to has_not_started textbuff_stdout->text("stdout will go here; double-click this window to open stdout in a separate window"); textbuff_stderr->text("stderr will go here; double-click this window to open stderr in a separate window"); } void GuiMainWindow::cb_select_finished_job(Fl_Widget* o, void* v) { GuiMainWindow* T=(GuiMainWindow*)v; T->cb_select_finished_job_i(); run_button->activate(); } void GuiMainWindow::cb_select_finished_job_i() { // Update timer tickTimeLastChanged(); // Show the 'selected' group, hide the others int idx = finished_job_browser->value() - 1; if (idx >= 0) // only if a non-empty line was selected loadJobFromPipeline(finished_processes[idx]); } void GuiMainWindow::cb_select_running_job(Fl_Widget* o, void* v) { GuiMainWindow* T=(GuiMainWindow*)v; T->cb_select_running_job_i(); run_button->activate(); } void GuiMainWindow::cb_select_running_job_i() { // Update timer tickTimeLastChanged(); // Show the 'selected' group, hide the others int idx = running_job_browser->value() - 1; if (idx >= 0) // only if a non-empty line was selected loadJobFromPipeline(running_processes[idx]); } void GuiMainWindow::cb_select_scheduled_job(Fl_Widget* o, void* v) { // std::cout << "v = " << v << std::endl; GuiMainWindow* T=(GuiMainWindow*)v; T->cb_select_scheduled_job_i(); run_button->activate(); } void GuiMainWindow::cb_select_scheduled_job_i() { // Update timer tickTimeLastChanged(); // Show the 'selected' group, hide the others int idx = (show_scheduler) ? scheduler_job_browser->value() - 1 : scheduled_job_browser->value() - 1; if (idx < 0) return; // only if a non-empty line was selected loadJobFromPipeline(scheduled_processes[idx]); if (show_scheduler) { FileName jobname = getJobNameForDisplay(pipeline.processList[current_job]); scheduler_job_name->value(jobname.c_str()); scheduler_job_name->deactivate(); bool found = false; for (int i = 0; i < 3; i++) { if (schedule.jobs[jobname].mode == job_mode_options[i].label()) { found = true; scheduler_job_mode->value(i); } } if (schedule.jobs[jobname].job_has_started) scheduler_job_has_started->value(0); else scheduler_job_has_started->value(1); scheduler_job_has_started->activate(); if (!found) REPORT_ERROR("ERROR: unrecognised job_mode ..."); } } void GuiMainWindow::cb_select_input_job(Fl_Widget* o, void* v) { GuiMainWindow* T=(GuiMainWindow*)v; T->cb_select_input_job_i(); run_button->activate(); } void GuiMainWindow::cb_select_input_job_i() { // Update timer tickTimeLastChanged(); // Show the 'selected' group, hide the others int idx = (show_scheduler) ? scheduler_input_job_browser->value() - 1 : input_job_browser->value() - 1; if (idx >= 0) // only if a non-empty line was selected { loadJobFromPipeline(input_processes[idx]); } } void GuiMainWindow::cb_select_output_job(Fl_Widget* o, void* v) { GuiMainWindow* T=(GuiMainWindow*)v; T->cb_select_output_job_i(); run_button->activate(); } void GuiMainWindow::cb_select_output_job_i() { // Update timer tickTimeLastChanged(); // Show the 'selected' group, hide the others int idx = (show_scheduler) ? scheduler_output_job_browser->value() - 1 : output_job_browser->value() - 1; if (idx >= 0) // only if a non-empty line was selected loadJobFromPipeline(output_processes[idx]); } void GuiMainWindow::cb_display_io_node(Fl_Widget* o, void* v) { GuiMainWindow* T=(GuiMainWindow*)v; T->cb_display_io_node_i(); run_button->activate(); } void GuiMainWindow::cb_display_io_node_i() { // Run relion_display on the output node int idx = display_io_node->value(); long int mynode = io_nodes[idx]; std::string command; if (pipeline.nodeList[mynode].type == NODE_MIC_COORDS) { // A manualpicker jobwindow for display of micrographs.... RelionJob manualpickjob; FileName fn_job = ".gui_manualpick"; bool iscont=false; if (exists(fn_job+"job.star") || exists(fn_job+"run.job")) { manualpickjob.read(fn_job.c_str(), iscont, true); // true means do initialise } else { fl_message("ERROR: Save a Manual picking job parameter file (using the Save jobs settings option from the Jobs menu) before displaying coordinate files. "); return; } // Get the name of the micrograph STAR file from reading the suffix file FileName fn_suffix = pipeline.nodeList[mynode].name; if (fn_suffix.getExtension() == "star") { std::ifstream in(fn_suffix.data(), std::ios_base::in); FileName fn_star; in >> fn_star ; in.close(); if (fn_star != "") { FileName fn_dirs = fn_suffix.beforeLastOf("/")+"/"; fn_suffix = fn_suffix.afterLastOf("/").without("coords_suffix_"); fn_suffix = fn_suffix.withoutExtension(); // Launch the manualpicker... command="`which relion_manualpick` --i " + fn_star; command += " --odir " + fn_dirs; command += " --pickname " + fn_suffix; command += " --scale " + manualpickjob.joboptions["micscale"].getString(); command += " --sigma_contrast " + manualpickjob.joboptions["sigma_contrast"].getString(); command += " --black " + manualpickjob.joboptions["black_val"].getString(); command += " --white " + manualpickjob.joboptions["white_val"].getString(); std::string error_message = ""; float mylowpass = manualpickjob.joboptions["lowpass"].getNumber(error_message); if (error_message != "") {fl_message("joboption['lowpass'] %s", error_message.c_str()); return;} if (mylowpass > 0.) command += " --lowpass " + manualpickjob.joboptions["lowpass"].getString(); float myhighpass = manualpickjob.joboptions["highpass"].getNumber(error_message); if (error_message != "") {fl_message("joboption['highpass'] %s", error_message.c_str()); return;} if (myhighpass > 0.) command += " --highpass " + manualpickjob.joboptions["highpass"].getString(); float myangpix = manualpickjob.joboptions["angpix"].getNumber(error_message); if (error_message != "") {fl_message("joboption['angpix'] %s", error_message.c_str()); return;} if (myangpix > 0.) command += " --angpix " + manualpickjob.joboptions["angpix"].getString(); command += " --ctf_scale " + manualpickjob.joboptions["ctfscale"].getString(); command += " --particle_diameter " + manualpickjob.joboptions["diameter"].getString(); if (manualpickjob.joboptions["do_color"].getBoolean()) { command += " --color_label " + manualpickjob.joboptions["color_label"].getString(); command += " --blue " + manualpickjob.joboptions["blue_value"].getString(); command += " --red " + manualpickjob.joboptions["red_value"].getString(); if (manualpickjob.joboptions["fn_color"].getString().length() > 0) command += " --color_star " + manualpickjob.joboptions["fn_color"].getString(); } // Other arguments for extraction command += " " + manualpickjob.joboptions["other_args"].getString() + " &"; } else { fl_message("Only coordinates in .star format, generated in the pipeline, can be displayed here."); } } else { fl_message("Only coordinates in .star format, generated in the pipeline, can be displayed here."); } } else if (pipeline.nodeList[mynode].type == NODE_PDF_LOGFILE) { const char * default_pdf_viewer = getenv ("RELION_PDFVIEWER_EXECUTABLE"); char mydefault[]=DEFAULTPDFVIEWER; if (default_pdf_viewer == NULL) { default_pdf_viewer=mydefault; } std::string myviewer(default_pdf_viewer); command = myviewer + " " + pipeline.nodeList[mynode].name + "&"; } else if (pipeline.nodeList[mynode].type == NODE_POLISH_PARAMS) { command = "cat " + pipeline.nodeList[mynode].name; } else if (pipeline.nodeList[mynode].type != NODE_POST) { command = "relion_display --gui --i " + pipeline.nodeList[mynode].name + " &"; } //std::cerr << " command= " << command << std::endl; int res= system(command.c_str()); } void GuiMainWindow::cb_add_scheduler_edge(Fl_Widget* o, void*v) { GuiMainWindow* T=(GuiMainWindow*)v; T->cb_add_scheduler_edge_i(); } void GuiMainWindow::cb_add_scheduler_edge_i() { std::string input, output; int idx; idx = scheduler_edge_input->value(); if (idx < 0 || idx >= scheduler_edge_input->size()) { std::cerr << " Error getting input from scheduler edge window, please try again ..." << std::endl; return; } else { input = scheduler_edge_input->text(idx); } idx = scheduler_edge_output->value(); if (idx < 0 || idx >= scheduler_edge_output->size()) { std::cerr << " Error getting output from scheduler edge window, please try again ..." << std::endl; return; } else { output = scheduler_edge_output->text(idx); } idx = scheduler_edge_boolean->value(); if (idx >= 0) { std::string mybool = scheduler_edge_boolean->text(idx); int idx2= scheduler_edge_outputtrue->value(); if (idx2 < 0 || idx2 >= scheduler_edge_outputtrue->size()) { std::cerr << " Error getting outputtrue from scheduler edge window, please try again ..." << std::endl; return; } else { std::string outputtrue = scheduler_edge_outputtrue->text(idx2); schedule.read(DO_LOCK); schedule.addFork(input, mybool, outputtrue, output); schedule.write(DO_LOCK); } } else { schedule.read(DO_LOCK); schedule.addEdge(input, output); schedule.write(DO_LOCK); } fillSchedulerNodesAndVariables(); } void GuiMainWindow::cb_delete_scheduler_edge(Fl_Widget* o, void*v) { GuiMainWindow* T=(GuiMainWindow*)v; T->cb_delete_scheduler_edge_i(); } void GuiMainWindow::cb_delete_scheduler_edge_i() { int idx = scheduler_edge_browser->value(); if (idx <= 0) { fl_message("Please select a job."); return; } std::string ask = "Are you sure you want to delete this edge?"; int proceed = fl_choice("%s", "Cancel", "Delete!", NULL, ask.c_str()); if (!proceed) { do_overwrite_continue = false; return; } schedule.read(DO_LOCK); schedule.removeEdge(idx-1); schedule.write(DO_LOCK); // Also reset entry fields scheduler_edge_input->value(-1); scheduler_edge_output->value(-1); scheduler_edge_outputtrue->value(-1); scheduler_edge_boolean->value(-1); scheduler_edge_browser->value(-1); fillSchedulerNodesAndVariables(); } void GuiMainWindow::cb_select_scheduler_edge(Fl_Widget *o, void* v) { GuiMainWindow* T=(GuiMainWindow*)v; T->cb_select_scheduler_edge_i(); } void GuiMainWindow::cb_select_scheduler_edge_i() { // Get position of the browser: int idx = scheduler_edge_browser->value(); if (idx >= 1) { int i = idx - 1; FileName mytext = scheduler_edge_browser->text(idx); scheduler_edge_input->value(scheduler_edge_input->find_item(schedule.edges[i].inputNode.c_str())); scheduler_edge_output->value(scheduler_edge_output->find_item(schedule.edges[i].outputNode.c_str())); if (schedule.edges[i].is_fork) { scheduler_edge_boolean->value(scheduler_edge_boolean->find_item(schedule.edges[i].myBooleanVariable.c_str())); scheduler_edge_outputtrue->value(scheduler_edge_outputtrue->find_item(schedule.edges[i].outputNodeTrue.c_str())); } else { scheduler_edge_boolean->value(scheduler_edge_boolean->find_item("")); scheduler_edge_outputtrue->value(scheduler_edge_outputtrue->find_item("")); } } else { scheduler_edge_input->value(-1); scheduler_edge_output->value(-1); scheduler_edge_boolean->value(-1); scheduler_edge_outputtrue->value(-1); } } void GuiMainWindow::cb_set_scheduler_variable(Fl_Widget* o, void*v) { GuiMainWindow* T=(GuiMainWindow*)v; T->cb_set_scheduler_variable_i(); } void GuiMainWindow::cb_set_scheduler_variable_i() { std::string myname = scheduler_variable_name->value(); std::string myval = scheduler_variable_value->value(); if (myname == "") return; schedule.read(DO_LOCK); schedule.setVariable(myname, myval); schedule.setOriginalVariable(myname, myval); // Also reset entry fields scheduler_variable_name->value(""); scheduler_variable_value->value(""); fillSchedulerNodesAndVariables(); schedule.write(DO_LOCK); } void GuiMainWindow::cb_delete_scheduler_variable(Fl_Widget* o, void*v) { GuiMainWindow* T=(GuiMainWindow*)v; T->cb_delete_scheduler_variable_i(); } void GuiMainWindow::cb_delete_scheduler_variable_i() { std::string myname = scheduler_variable_name->value(); if (myname == "") return; std::string ask = "Are you sure you want to delete this variable, and all operators or edges that use it?"; int proceed = fl_choice("%s", "Cancel", "Delete!", NULL, ask.c_str()); if (!proceed) { do_overwrite_continue = false; return; } schedule.read(DO_LOCK); schedule.removeVariable(myname); schedule.write(DO_LOCK); // Also reset entry fields scheduler_variable_name->value(""); scheduler_variable_value->value(""); fillSchedulerNodesAndVariables(); } void GuiMainWindow::cb_add_scheduler_operator(Fl_Widget* o, void*v) { GuiMainWindow* T=(GuiMainWindow*)v; T->cb_add_scheduler_operator_i(); } void GuiMainWindow::cb_add_scheduler_operator_i() { int idx = scheduler_operator_type->value(); if (idx < 0) { std::cerr << "ERROR: select an operator type, try again... " << std::endl; return; } std::string type = scheduler_operator_type->text(idx); idx = scheduler_operator_output->value(); std::string output = (idx < 0 || idx >= scheduler_operator_output->size()) ? "" : scheduler_operator_output->text(idx); idx = scheduler_operator_input1->value(); std::string input1 = (idx < 0 || idx >= scheduler_operator_input1->size()) ? "" : scheduler_operator_input1->text(idx); idx = scheduler_operator_input2->value(); std::string input2 = (idx < 0 || idx >= scheduler_operator_input2->size()) ? "" : scheduler_operator_input2->text(idx); std::string error_message; SchedulerOperator myop = schedule.initialiseOperator(type, input1, input2, output, error_message); if (error_message != "") { fl_message("%s", error_message.c_str()); return; } else { std::string newname = myop.getName(); if (schedule.isOperator(newname)) { fl_message("ERROR: this operator already exists..."); return; } schedule.read(DO_LOCK); schedule.addOperator(myop); schedule.write(DO_LOCK); // Also reset entry fields scheduler_operator_type->value(-1); scheduler_operator_output->value(-1); scheduler_operator_input1->value(-1); scheduler_operator_input2->value(-1); fillSchedulerNodesAndVariables(); } } void GuiMainWindow::cb_delete_scheduler_operator(Fl_Widget* o, void*v) { GuiMainWindow* T=(GuiMainWindow*)v; T->cb_delete_scheduler_operator_i(); } void GuiMainWindow::cb_delete_scheduler_operator_i() { std::string ask = "Are you sure you want to delete this operator and its connecting edges?"; int proceed = fl_choice("%s", "Cancel", "Delete!", NULL, ask.c_str()); if (!proceed) { do_overwrite_continue = false; return; } const std::string type = scheduler_operator_type->text(scheduler_operator_type->value()); std::string output = "", input1 = "", input2 = ""; // Some operators do not have these arguments. if (scheduler_operator_output->value() >= 0) output = scheduler_operator_output->text(scheduler_operator_output->value()); if (scheduler_operator_input1->value() >= 0) input1 = scheduler_operator_input1->text(scheduler_operator_input1->value()); if (scheduler_operator_input2->value() >= 0) input2 = scheduler_operator_input2->text(scheduler_operator_input2->value()); const std::string name = schedule.getOperatorName(type, input1, input2, output); schedule.read(DO_LOCK); schedule.removeOperator(name); schedule.write(DO_LOCK); // Also reset entry fields scheduler_operator_type->value(-1); scheduler_operator_output->value(-1); scheduler_operator_input1->value(-1); scheduler_operator_input2->value(-1); fillSchedulerNodesAndVariables(); } void GuiMainWindow::cb_select_scheduler_variable(Fl_Widget* o, void*v) { GuiMainWindow* T=(GuiMainWindow*)v; T->cb_select_scheduler_variable_i(); } void GuiMainWindow::cb_select_scheduler_variable_i() { // Get position of the browser: int idx = scheduler_variable_browser->value(); if (idx >=1) { FileName mytext = scheduler_variable_browser->text(idx); FileName myname = mytext.beforeFirstOf(" = "); FileName myval = mytext.afterFirstOf(" = "); myval = myval.beforeFirstOf(" ("); scheduler_variable_name->value(myname.c_str()); scheduler_variable_value->value(myval.c_str()); } else { scheduler_variable_name->value(""); scheduler_variable_value->value(""); } } void GuiMainWindow::cb_select_scheduler_operator(Fl_Widget *o, void* v) { GuiMainWindow* T=(GuiMainWindow*)v; T->cb_select_scheduler_operator_i(); } void GuiMainWindow::cb_select_scheduler_operator_i() { // Get position of the browser: int idx = scheduler_operator_browser->value(); if (idx >= 1) { FileName myname = scheduler_operator_browser->text(idx); std::string type, input1, input2, output; schedule.getOperatorParameters(myname, type, input1, input2, output); scheduler_operator_type->value(scheduler_operator_type->find_item(type.c_str())); if (scheduler_operator_output->find_item(output.c_str())) scheduler_operator_output->value(scheduler_operator_output->find_item(output.c_str())); else scheduler_operator_output->value(scheduler_operator_output->find_item("")); if (scheduler_operator_input1->find_item(input1.c_str())) scheduler_operator_input1->value(scheduler_operator_input1->find_item(input1.c_str())); else scheduler_operator_input1->value(scheduler_operator_input1->find_item("")); if (scheduler_operator_input2->find_item(input2.c_str())) scheduler_operator_input2->value(scheduler_operator_input2->find_item(input2.c_str())); else scheduler_operator_input2->value(scheduler_operator_input2->find_item("")); } else { scheduler_operator_type->value(-1); scheduler_operator_output->value(-1); scheduler_operator_input1->value(-1); scheduler_operator_input2->value(-1); } } void GuiMainWindow::cb_scheduler_set_current(Fl_Widget *o, void* v) { GuiMainWindow* T=(GuiMainWindow*)v; T->cb_scheduler_set_current_i(); } void GuiMainWindow::cb_scheduler_set_current_i() { if (scheduler_current_node->value() < 0) { std::cerr << " ERROR: scheduler_current_node->value()= " << scheduler_current_node->value() << std::endl; return; } schedule.read(DO_LOCK); schedule.current_node= std::string(scheduler_current_node->text(scheduler_current_node->value())); schedule.write(DO_LOCK); // If a schedule has finished: activate the GUI again if (schedule.current_node == "EXIT") { scheduler_run_grp->activate(); } if (schedule.isJob(schedule.current_node)) { for (long int ii = 0; ii < scheduled_processes.size(); ii++) { long int id = scheduled_processes[ii]; if (schedule.current_node == getJobNameForDisplay(pipeline.processList[id])) { scheduler_job_browser->value(id+1); cb_select_scheduled_job_i(); } } } else { for (int i =0; i < operators_list.size(); i++) { if (schedule.current_node == operators_list[i]) { scheduler_operator_browser->value(i+1); cb_select_scheduler_operator_i(); } } } // Also set the edge from this node to the next one! for (int i = 0; i < schedule.edges.size(); i++ ) { if (schedule.edges[i].inputNode == schedule.current_node) { scheduler_edge_browser->value(i+1); cb_select_scheduler_edge_i(); } } } void GuiMainWindow::cb_scheduler_next(Fl_Widget *o, void* v) { GuiMainWindow* T=(GuiMainWindow*)v; T->cb_scheduler_next_i(); } void GuiMainWindow::cb_scheduler_next_i() { std::string mycurrent, nextnode; mycurrent = schedule.current_node; if (schedule.current_node == "undefined") { if (schedule.edges.size() > 0) { schedule.current_node = schedule.edges[0].inputNode; scheduler_current_node->value(scheduler_current_node->find_item(schedule.current_node.c_str())); cb_scheduler_set_current_i(); return; } else { return; } } for (int i = 0; i < schedule.edges.size(); i++) { if (schedule.edges[i].inputNode == mycurrent) { if (schedule.edges[i].is_fork) { std::string ask = "Fork on " + schedule.edges[i].myBooleanVariable + ". Do you want this to be True or False?"; int is_true = fl_choice("%s", "False", "True", NULL, ask.c_str()); nextnode = (is_true) ? schedule.edges[i].outputNodeTrue : schedule.edges[i].outputNode; } else { nextnode = schedule.edges[i].outputNode; } const Fl_Menu_Item *myitem = scheduler_current_node->find_item(nextnode.c_str()); if (myitem == NULL) { fl_message("ERROR: next node is undefined"); } else { scheduler_current_node->value(myitem); cb_scheduler_set_current_i(); } return; } } } void GuiMainWindow::cb_scheduler_prev(Fl_Widget *o, void* v) { GuiMainWindow* T=(GuiMainWindow*)v; T->cb_scheduler_prev_i(); } void GuiMainWindow::cb_scheduler_prev_i() { // If already at the beginning, just return if (schedule.current_node == "undefined") return; std::string myprev = schedule.getPreviousNode(); std::cerr << " myprev= " << myprev << std::endl; if (myprev == "undefined") { fl_message("ERROR: previous node is undefined"); } else { scheduler_current_node->value(scheduler_current_node->find_item(myprev.c_str())); cb_scheduler_set_current_i(); } return; } void GuiMainWindow::cb_scheduler_reset(Fl_Widget *o, void* v) { GuiMainWindow* T=(GuiMainWindow*)v; T->cb_scheduler_reset_i(); } void GuiMainWindow::cb_scheduler_reset_i() { std::string ask = "Are you sure you want to reset all variables to their initial state, in order to start over from scratch?"; int proceed = fl_choice("%s", "Cancel", "Reset!", NULL, ask.c_str()); if (proceed) { schedule.read(DO_LOCK); schedule.reset(); schedule.write(DO_LOCK); fillSchedulerNodesAndVariables(); cb_scheduler_set_current_i(); } } void GuiMainWindow::cb_scheduler_unlock(Fl_Widget *o, void* v) { GuiMainWindow* T=(GuiMainWindow*)v; T->cb_scheduler_unlock_i(); } void GuiMainWindow::cb_scheduler_unlock_i() { schedule.unlock(); show_expand_stdout = true; cb_toggle_expand_stdout_i(); scheduler_run_grp->activate(); return; } void GuiMainWindow::cb_scheduler_abort(Fl_Widget *o, void* v) { GuiMainWindow* T=(GuiMainWindow*)v; T->cb_scheduler_abort_i(); } void GuiMainWindow::cb_scheduler_abort_i() { std::string ask = "Are you sure you want to abort this schedule?"; int proceed = fl_choice("%s", "Cancel", "Abort!", NULL, ask.c_str()); if (proceed) { schedule.abort(); return; } } void GuiMainWindow::cb_scheduler_run(Fl_Widget *o, void* v) { GuiMainWindow* T=(GuiMainWindow*)v; T->cb_scheduler_run_i(); } void GuiMainWindow::cb_scheduler_run_i() { FileName name_wo_dir = schedule.name; std::string command = " relion_scheduler --schedule " + name_wo_dir.afterFirstOf("Schedules/") + " --run --pipeline_control " + schedule.name + " >> " + schedule.name + "run.out 2>> " + schedule.name + "run.err &"; int res = system(command.c_str()); scheduler_run_grp->deactivate(); show_expand_stdout = false; cb_toggle_expand_stdout_i(); } void GuiMainWindow::cb_display(Fl_Widget* o, void* v) { GuiMainWindow* T=(GuiMainWindow*)v; T->cb_display_i(); } void GuiMainWindow::cb_display_i() { std::string command = " relion_display --gui &" ; int res = system(command.c_str()); } void GuiMainWindow::cb_toggle_continue_i() { if (is_main_continue || do_overwrite_continue) { if (do_overwrite_continue) { run_button->label("Overwrite!"); add_job_button->label("Save"); add_job_button->color(GUI_BUTTON_COLOR); } else { run_button->label("Continue!"); } run_button->color(GUI_BUTTON_COLOR); run_button->labelfont(FL_ITALIC); run_button->labelsize(13); alias_current_job->deactivate(); } else { run_button->label("Run!"); add_job_button->label("Add job"); add_job_button->color(GUI_RUNBUTTON_COLOR); run_button->color(GUI_RUNBUTTON_COLOR); run_button->labelfont(FL_ITALIC); run_button->labelsize(16); alias_current_job->activate(); } int my_window = (browser->value() - 1); gui_jobwindows[my_window]->toggle_new_continue(is_main_continue && !do_overwrite_continue); } void GuiMainWindow::cb_print_cl(Fl_Widget* o, void* v) { GuiMainWindow* T=(GuiMainWindow*)v; T->cb_print_cl_i(); } void GuiMainWindow::cb_print_cl_i() { int iwin = browser->value() - 1; // And update the job inside it gui_jobwindows[iwin]->updateMyJob(); std::string error_message; if (!pipeline.getCommandLineJob(gui_jobwindows[iwin]->myjob, current_job, is_main_continue, false, DONT_MKDIR, do_overwrite_continue, commands, final_command, error_message)) { fl_message("%s",error_message.c_str()); } else { std::string command= "", mesg = " The command is: "; for (int icom = 0; icom < commands.size(); icom++) { if (icom > 0) command += " && "; command += commands[icom]; } fl_input("%s", command.c_str(), mesg.c_str()); // Don't free the returned string! It comes from Fl_Input::value(), which returns // "pointer to an internal buffer - do not free() this". } } // Run button call-back functions void GuiMainWindow::cb_run(Fl_Widget* o, void* v) { GuiMainWindow* T=(GuiMainWindow*)v; // Deactivate Run button to prevent the user from accidentally submitting many jobs run_button->deactivate(); // Run the job T->cb_run_i(false, false); // 1st false means dont only_schedule, 2nd false means dont open the note editor window } // Run button call-back functions void GuiMainWindow::cb_schedule(Fl_Widget* o, void* v) { GuiMainWindow* T=(GuiMainWindow*)v; T->cb_run_i(true, false); // 1st true means only_schedule, do not run, 2nd false means dont open the note editor window } void GuiMainWindow::cb_run_i(bool only_schedule, bool do_open_edit) { if (do_overwrite_continue) { std::string ask = "Are you sure you want to overwrite this job?"; int proceed = fl_choice("%s", "Cancel", "Overwrite!", NULL, ask.c_str()); if (!proceed) { do_overwrite_continue = false; return; } } // Get which jobtype the GUI is on now int iwin = browser->value() - 1; // And update the job inside it gui_jobwindows[iwin]->updateMyJob(); // Update timer tickTimeLastChanged(); std::string error_message; if (!pipeline.runJob(gui_jobwindows[iwin]->myjob, current_job, only_schedule, is_main_continue, false, do_overwrite_continue, error_message)) { fl_message("%s", error_message.c_str()); // Allow the user to fix the error and submit this job again run_button->activate(); return; } // Update all job lists in the main GUI updateJobLists(); // Open the edit note window if (do_open_edit) { // Open the note editor window cb_edit_note_i(); } // Also set alias from the alias_current_job input if (!(is_main_continue || do_overwrite_continue)) { std::string alias= (std::string)alias_current_job->value(); if (alias != "Give_alias_here" && alias != pipeline.processList[current_job].name) cb_set_alias_i(alias); } do_overwrite_continue = false; // Select this job now loadJobFromPipeline(current_job); } void GuiMainWindow::cb_delete_scheduler_job(Fl_Widget* o, void* v) { GuiMainWindow* T=(GuiMainWindow*)v; T->cb_delete_scheduler_job_i(); } void GuiMainWindow::cb_delete_scheduler_job_i() { std::vector deleteProcesses, deleteNodes; pipeline.deleteJobGetNodesAndProcesses(current_job, true, deleteNodes, deleteProcesses); // Before we do anything: confirm this is really what the user wants to do.... std::string ask; ask = "Are you sure you want to delete the following jobs, and their connecting edges? \n"; for (size_t i = 0; i < deleteProcesses.size(); i++) { if (deleteProcesses[i]) { std::string name = getJobNameForDisplay(pipeline.processList[i]); ask += " - " + name + "\n"; } } if (fl_choice("%s", "Cancel", "Move", NULL, ask.c_str())) { // Remove the jobs from the schedule itself schedule.read(DO_LOCK); for (int i = 0; i < deleteProcesses.size(); i++) if (deleteProcesses[i]) schedule.removeJob(getJobNameForDisplay(pipeline.processList[i])); schedule.write(DO_LOCK); // And remove from the local pipeliner pipeline.deleteNodesAndProcesses(deleteNodes, deleteProcesses); // Reset current_job current_job = -1; scheduler_job_name->value(""); fillStdOutAndErr(); // Update all job lists in the main GUI updateJobLists(); } std::string jobname = scheduler_job_name->value(); } void GuiMainWindow::cb_scheduler_add_job(Fl_Widget* o, void* v) { GuiMainWindow* T=(GuiMainWindow*)v; T->cb_scheduler_add_job_i(); } void GuiMainWindow::cb_scheduler_add_job_i() { // Get which jobtype the GUI is on now int iwin = browser->value() - 1; // And update the job inside it gui_jobwindows[iwin]->updateMyJob(); std::string mode = job_mode_options[scheduler_job_mode->value()].label(); std::string jobname = scheduler_job_name->value(); if (do_overwrite_continue) { // Write the possibly updated job settings gui_jobwindows[iwin]->myjob.write(pipeline.processList[current_job].name); // Also write the possibly updated job_mode std::string mode = job_mode_options[scheduler_job_mode->value()].label(); std::string has_started_str = job_has_started_options[scheduler_job_has_started->value()].label(); schedule.read(DO_LOCK); schedule.jobs[jobname].mode = mode; schedule.jobs[jobname].job_has_started = (has_started_str == "has started"); schedule.write(DO_LOCK); } else { // Add job to the schedule // Get the mode, and the jobname if (jobname == "") { fl_message("%s","You need to provide a Name for this job in the scheduler."); return; } // TODO: test the command line std::string error_message, dummy; if (!gui_jobwindows[iwin]->myjob.getCommands(dummy, commands, final_command, false, 1, error_message)) { fl_message("%s", error_message.c_str()); return; } schedule.read(DO_LOCK); schedule.addJob(gui_jobwindows[iwin]->myjob, jobname, mode); schedule.write(DO_LOCK); scheduler_job_name->value(""); updateJobLists(); } } // Run button call-back functions void GuiMainWindow::cb_delete(Fl_Widget* o, void* v) { GuiMainWindow* T=(GuiMainWindow*)v; T->cb_delete_i(); } void GuiMainWindow::cb_delete_i(bool do_ask, bool do_recursive) { if (current_job < 0) { fl_message("Please select a job."); return; } std::vector deleteProcesses, deleteNodes; pipeline.deleteJobGetNodesAndProcesses(current_job, do_recursive, deleteNodes, deleteProcesses); // Before we do anything: confirm this is really what the user wants to do.... int proceed; if (do_ask) { std::string ask; ask = "Are you sure you want to move the following processes to Trash? \n"; for (size_t i = 0; i < deleteProcesses.size(); i++) { if (deleteProcesses[i]) { std::string name = (pipeline.processList[i].alias == "None") ? pipeline.processList[i].name : pipeline.processList[i].alias; ask += " - " + name + "\n"; } } proceed = fl_choice("%s", "Cancel", "Move", NULL, ask.c_str()); } else { proceed = 1; } if (proceed) { pipeline.deleteNodesAndProcesses(deleteNodes, deleteProcesses); // Reset current_job current_job = -1; fillStdOutAndErr(); // Update all job lists in the main GUI updateJobLists(); } } // Run button call-back functions void GuiMainWindow::cb_gently_clean_all_jobs(Fl_Widget* o, void* v) { GuiMainWindow* T=(GuiMainWindow*)v; T->cb_clean_all_jobs_i(false); } // Run button call-back functions void GuiMainWindow::cb_harshly_clean_all_jobs(Fl_Widget* o, void* v) { GuiMainWindow* T=(GuiMainWindow*)v; T->cb_clean_all_jobs_i(true); } void GuiMainWindow::cb_clean_all_jobs_i(bool do_harsh) { int proceed = 1; std::string ask; if (do_harsh) { ask = "Are you sure you want to harshly clean up intermediate files from the entire pipeline? \n\n\ Harsh cleaning will remove micrographs, movies and particle stacks from all MotionCorr, Extract, \n\ Polish and Subtract directories. This means you will NOT be able to use those images in subsequent runs anymore, \n\ although you could always recreate the data by continuing the job (possibly at considerable computing costs).\n \n \ You can protect specific jobs from harsh cleaning by creating a file called \"NO_HARSH_CLEAN\" inside their directory,\n\ e.g. by using \"touch Polish/job045/NO_HARSH_CLEAN\". Below is a list of currently protected jobs (if any):\n \n"; for (int myjob = 0; myjob < pipeline.processList.size(); myjob++) { if (pipeline.processList[myjob].status == PROC_FINISHED_SUCCESS && (pipeline.processList[myjob].type == PROC_MOTIONCORR || pipeline.processList[myjob].type == PROC_EXTRACT || pipeline.processList[myjob].type == PROC_SUBTRACT)) { if (exists(pipeline.processList[myjob].name + "NO_HARSH_CLEAN")) ask += pipeline.processList[myjob].name + " \n"; } } } else { ask = "Are you sure you want to gently clean up intermediate files from the entire pipeline?"; } proceed = fl_choice("%s", "Cancel", "Clean up", NULL, ask.c_str()); if (proceed) { std::string how = (do_harsh) ? "Harshly" : "Gently"; std::cout << how << " cleaning all finished jobs ..." << std::endl; std::string error_message; if (!pipeline.cleanupAllJobs(do_harsh, error_message)) fl_message("%s",error_message.c_str()); fl_message("Done cleaning! Don't forget the files are all still in the Trash folder. Use the \"Empty Trash\" option from the File menu to permanently delete them."); } } // Run button call-back functions void GuiMainWindow::cb_gentle_cleanup(Fl_Widget* o, void* v) { GuiMainWindow* T=(GuiMainWindow*)v; T->cb_cleanup_i(-1, true, false); } void GuiMainWindow::cb_harsh_cleanup(Fl_Widget* o, void* v) { GuiMainWindow* T=(GuiMainWindow*)v; T->cb_cleanup_i(-1, true, true); } void GuiMainWindow::cb_cleanup_i(int myjob, bool do_verb, bool do_harsh) { // Allow cleaning the currently selected job from the GUI if (myjob < 0) { if (current_job < 0) { fl_message("Please select a job."); return; } myjob = current_job; } int proceed = 1; if (do_verb) { std::string ask; ask = "Are you sure you want to clean up intermediate files from " + pipeline.processList[current_job].name + "?"; proceed = fl_choice("%s", "Cancel", "Clean up", NULL, ask.c_str()); } if (proceed) { std::string error_message; if (!pipeline.cleanupJob(myjob, do_harsh, error_message)) fl_message("%s",error_message.c_str()); } } // Run button call-back functions void GuiMainWindow::cb_set_alias(Fl_Widget* o, void* v) { GuiMainWindow* T=(GuiMainWindow*)v; T->cb_set_alias_i(); } void GuiMainWindow::cb_set_alias_i(std::string alias) { if (current_job < 0) { fl_message("Please select a job."); return; } FileName fn_pre, fn_jobnr, fn_post, fn_dummy, default_ask; if (!decomposePipelineFileName(pipeline.processList[current_job].name, fn_pre, fn_jobnr, fn_post)) REPORT_ERROR("GuiMainWindow::cb_set_alias_i ERROR: invalid pipeline process name: " + pipeline.processList[current_job].name); // Start the asking window with the current alias std::string error_message; FileName fn_alias = pipeline.processList[current_job].alias; if (fn_alias != "None") { default_ask = fn_alias.without(fn_pre); if (default_ask[default_ask.length()-1] == '/') default_ask = default_ask.beforeLastOf("/"); } else default_ask = fn_jobnr.beforeLastOf("/"); bool is_done = false; while (!is_done) { // If the alias already contains a uniquedate string it may be a continuation of a relion_refine job // (where alias_current_job contains a different uniqdate than the outputname of the job) if (alias == "" || decomposePipelineFileName(alias, fn_dummy, fn_dummy, fn_dummy) ) // if an alias is provided, just check it is unique, otherwise ask { const char * palias; palias = fl_input("Rename to: ", default_ask.c_str()); if (palias == NULL) return; std::string al2(palias); alias = al2; } if (pipeline.setAliasJob(current_job, alias, error_message)) is_done = true; else { alias = ""; fl_message("%s",error_message.c_str()); } } } // Run button call-back functions void GuiMainWindow::cb_abort(Fl_Widget* o, void* v) { GuiMainWindow* T=(GuiMainWindow*)v; T->cb_abort_i(); } void GuiMainWindow::cb_abort_i(std::string alias) { if (current_job < 0) { fl_message("Please select a job."); return; } if (pipeline.processList[current_job].status != PROC_RUNNING) { std::string error_message = "You can only abort running jobs ... "; fl_message("%s",error_message.c_str()); } else { std::string ask = "Are you sure you want to abort job: " + pipeline.processList[current_job].name + " ?"; int proceed = fl_choice("%s", "Cancel", "Abort!", NULL, ask.c_str()); if (proceed) { touch(pipeline.processList[current_job].name + RELION_JOB_ABORT_NOW); } } } // Run button call-back functions void GuiMainWindow::cb_mark_as_finished(Fl_Widget* o, void* v) { GuiMainWindow* T=(GuiMainWindow*)v; T->cb_mark_as_finished_i(); } // Run button call-back functions void GuiMainWindow::cb_mark_as_failed(Fl_Widget* o, void* v) { GuiMainWindow* T=(GuiMainWindow*)v; T->cb_mark_as_finished_i(true); } void GuiMainWindow::cb_mark_as_finished_i(bool is_failed) { if (current_job < 0) { fl_message("You can only mark existing jobs as finished!"); return; } std::string error_message; if (!pipeline.markAsFinishedJob(current_job, error_message, is_failed)) fl_message("%s",error_message.c_str()); else updateJobLists(); } // Run button call-back functions void GuiMainWindow::cb_make_flowchart(Fl_Widget* o, void* v) { GuiMainWindow* T=(GuiMainWindow*)v; T->cb_make_flowchart_i(); } void GuiMainWindow::cb_make_flowchart_i() { if (current_job < 0) { fl_message("Please select a job."); return; } std::string error_message; if (!pipeline.makeFlowChart(current_job, true, error_message)) fl_message("%s",error_message.c_str()); else updateJobLists(); } void GuiMainWindow::cb_edit_note(Fl_Widget*, void* v) { GuiMainWindow* T=(GuiMainWindow*)v; T->cb_edit_note_i(); } void GuiMainWindow::cb_edit_project_note(Fl_Widget*, void* v) { GuiMainWindow* T=(GuiMainWindow*)v; T->cb_edit_note_i(true); // true means is_project_note } void GuiMainWindow::cb_edit_note_i(bool is_project_note) { FileName fn_note; std::string title; if (is_project_note) { fn_note = "project_note.txt"; title = "Overall project notes"; } else { if (current_job < 0) { fl_message(" You can only edit the note for existing jobs ... "); return; } fn_note = pipeline.processList[current_job].name + "note.txt"; title = (pipeline.processList[current_job].alias == "None") ? pipeline.processList[current_job].name : pipeline.processList[current_job].alias; } NoteEditorWindow* w = new NoteEditorWindow(660, 400, title.c_str(), fn_note, !maingui_do_read_only); w->show(); } // Save button call-back function void GuiMainWindow::cb_save(Fl_Widget* o, void* v) { GuiMainWindow* T=(GuiMainWindow*)v; T->cb_save_i(); } void GuiMainWindow::cb_save_i() { // Get which job we're dealing with, and update it from the GUI int iwin = browser->value() - 1; gui_jobwindows[iwin]->updateMyJob(); // For scheduled jobs, also allow saving the .job file in the output directory if (current_job >= 0 && (pipeline.processList[current_job].status == PROC_SCHEDULED)) { gui_jobwindows[iwin]->myjob.write(pipeline.processList[current_job].name); } // Write the hidden file gui_jobwindows[iwin]->myjob.write(""); } // Load button call-back function void GuiMainWindow::cb_load(Fl_Widget* o, void* v) { GuiMainWindow* T=(GuiMainWindow*)v; T->cb_load_i(); } void GuiMainWindow::cb_load_i() { int iwin = browser->value() - 1; gui_jobwindows[iwin]->myjob.read("", is_main_continue); alias_current_job->value("Give_alias_here"); gui_jobwindows[iwin]->updateMyGui(); // Make the current continue-setting active cb_toggle_continue_i(); } // Load button call-back function void GuiMainWindow::cb_undelete_job(Fl_Widget* o, void* v) { GuiMainWindow* T=(GuiMainWindow*)v; T->cb_undelete_job_i(); } void GuiMainWindow::cb_undelete_job_i() { std::string fn_dir = "./Trash/."; std::string fn_filter = "Pipeline STAR files (job_pipeline.star)"; Fl_File_Chooser chooser(fn_dir.c_str(), fn_filter.c_str(), Fl_File_Chooser::SINGLE, "Choose pipeline STAR file to import"); chooser.show(); // Block until user picks something. while(chooser.shown()) { Fl::wait(); } // User hit cancel? if ( chooser.value() == NULL ) return; char relname[FL_PATH_MAX]; fl_filename_relative(relname,sizeof(relname),chooser.value()); FileName fn_pipe(relname); pipeline.undeleteJob(fn_pipe); } void GuiMainWindow::cb_export_jobs(Fl_Widget* o, void* v) { GuiMainWindow* T=(GuiMainWindow*)v; T->cb_export_jobs_i(); } void GuiMainWindow::cb_export_jobs_i() { // Get the name of this block of exported jobs and make the corresponding directory const char * answer; std::string default_answer="export1"; answer = fl_input("Name of the exported block of jobs? ", default_answer.c_str()); std::string mydir(answer); std::string error_message; if (!pipeline.exportAllScheduledJobs(mydir, error_message)) fl_message("%s",error_message.c_str()); } void GuiMainWindow::cb_import_jobs(Fl_Widget* o, void* v) { GuiMainWindow* T=(GuiMainWindow*)v; T->cb_import_jobs_i(); } void GuiMainWindow::cb_import_jobs_i() { // Get the directory with the Exported jobs std::string fn_dir = "."; std::string fn_filter = "Export STAR file (exported.star)"; Fl_File_Chooser chooser(fn_dir.c_str(), fn_filter.c_str(), Fl_File_Chooser::SINGLE, "Choose pipeline STAR file to import"); chooser.show(); // Block until user picks something. while(chooser.shown()) { Fl::wait(); } // User hit cancel? if ( chooser.value() == NULL ) return; FileName fn_export(chooser.value()); pipeline.importJobs(fn_export); // refresh the joblists updateJobLists(); } // Re-order running and finished job lists void GuiMainWindow::cb_order_jobs_alphabetically(Fl_Widget* o, void* v) { GuiMainWindow* T=(GuiMainWindow*)v; do_order_alphabetically = true; T->fillRunningJobLists(); } // Re-order running and finished job lists void GuiMainWindow::cb_order_jobs_chronologically(Fl_Widget* o, void* v) { GuiMainWindow* T=(GuiMainWindow*)v; do_order_alphabetically = false; T->fillRunningJobLists(); } // Empty-trash button call-back function void GuiMainWindow::cb_empty_trash(Fl_Widget* o, void* v) { GuiMainWindow* T=(GuiMainWindow*)v; T->cb_empty_trash_i(); } void GuiMainWindow::cb_empty_trash_i() { std::string ask = "Are you sure you want to remove the entire Trash folder?"; int proceed = fl_choice("%s", "Cancel", "Empty Trash", NULL, ask.c_str()); if (proceed) { std::string command = "rm -rf Trash"; std::cout << " Executing: " << command << std::endl; int res = system(command.c_str()); } } void GuiMainWindow::cb_print_notes(Fl_Widget*, void* v) { GuiMainWindow* T=(GuiMainWindow*)v; T->cb_print_notes_i(); } void GuiMainWindow::cb_print_notes_i() { std::ofstream fh; FileName fn_tmp = pipeline.name + "_all_notes.txt"; fh.open((fn_tmp).c_str(), std::ios::out); for (size_t i = 0; i < pipeline.processList.size(); i++) { FileName fn_note = pipeline.processList[i].name+"note.txt"; fh << " ################################################################ " << std::endl; fh << " # Job= " << pipeline.processList[i].name; if (pipeline.processList[i].alias != "None") fh <<" alias: " << pipeline.processList[i].alias; fh << std::endl; if (exists(fn_note)) { std::ifstream in(fn_note.data(), std::ios_base::in); std::string line; if (in.fail()) REPORT_ERROR( (std::string) "ERROR: cannot read file " + fn_note); in.seekg(0); while (getline(in, line, '\n')) { fh << line << std::endl; } in.close(); } } fh.close(); fl_message("Done writing all notes into file: %s" , fn_tmp.c_str()); } void GuiMainWindow::cb_remake_nodesdir(Fl_Widget*, void* v) { GuiMainWindow* T=(GuiMainWindow*)v; T->cb_remake_nodesdir_i(); } void GuiMainWindow::cb_remake_nodesdir_i() { pipeline.remakeNodeDirectory(); } void GuiMainWindow::cb_reread_pipeline(Fl_Widget*, void* v) { GuiMainWindow* T=(GuiMainWindow*)v; T->cb_reread_pipeline_i(); } void GuiMainWindow::cb_reread_pipeline_i() { std::string lock_message = " mainGUI reread_pipeline_i"; pipeline.read(DO_LOCK, lock_message); // With the locking system, each read needs to be followed soon with a write pipeline.write(DO_LOCK); } void GuiMainWindow::cb_reactivate_runbutton(Fl_Widget* o, void* v) { GuiMainWindow* T=(GuiMainWindow*)v; T->cb_reactivate_runbutton_i(); } void GuiMainWindow::cb_reactivate_runbutton_i() { run_button->activate(); } void GuiMainWindow::cb_toggle_overwrite_continue(Fl_Widget* o, void* v) { GuiMainWindow* T=(GuiMainWindow*)v; T->cb_toggle_overwrite_continue_i(); } void GuiMainWindow::cb_toggle_overwrite_continue_i() { do_overwrite_continue = !do_overwrite_continue; cb_toggle_continue_i(); } void GuiMainWindow::cb_show_initial_screen(Fl_Widget* o, void* v) { GuiMainWindow* T=(GuiMainWindow*)v; T->cb_show_initial_screen_i(); } void GuiMainWindow::cb_show_initial_screen_i() { run_button->deactivate(); cb_select_browsegroup_i(true); } void GuiMainWindow::cb_toggle_pipeliner_scheduler(Fl_Widget* o, void* v) { GuiMainWindow* T=(GuiMainWindow*)v; T->cb_toggle_pipeliner_scheduler_i(); } void GuiMainWindow::cb_toggle_pipeliner_scheduler_i() { if (show_scheduler) { pipeliner_grp->hide(); scheduler_grp->show(); // If this schedule is running, then use the I/O viewer, otherwise use Jobs viewer show_expand_stdout = !schedule.isWriteLocked(); cb_toggle_expand_stdout_i(); } else { scheduler_grp->hide(); pipeliner_grp->show(); // After toggling, always go back to non-expanded view show_expand_stdout = true; cb_toggle_expand_stdout_i(); } } void GuiMainWindow::cb_create_schedule(Fl_Widget* o, void* v) { GuiMainWindow* T=(GuiMainWindow*)v; if (!show_scheduler) return; std::string mesg = " Name of the new schedule: "; const char* name = fl_input("%s", "", mesg.c_str()); FileName fn_new = std::string(name); if (fn_new.length() > 0) { T->cb_toggle_schedule_i(false, fn_new); } } void GuiMainWindow::cb_copy_schedule(Fl_Widget* o, void* v) { GuiMainWindow* T=(GuiMainWindow*)v; if (!show_scheduler) return; std::string mesg = " Name of the copy schedule: "; const char* name = fl_input("%s", "", mesg.c_str()); FileName fn_copy = std::string(name); if (fn_copy.length() > 0) { schedule.copy("Schedules/" + fn_copy); T->cb_toggle_schedule_i(false, fn_copy); } } void GuiMainWindow::cb_toggle_schedule(Fl_Widget* o, void* v) { GuiMainWindow* T=(GuiMainWindow*)v; T->cb_toggle_schedule_i(false); } void GuiMainWindow::cb_toggle_pipeline(Fl_Widget* o, void* v) { GuiMainWindow* T=(GuiMainWindow*)v; T->cb_toggle_schedule_i(true); } void GuiMainWindow::cb_toggle_schedule_i(bool do_pipeline, FileName fn_new_schedule) { if (do_pipeline) { // Read in the pipeline STAR file if it exists pipeline.name = "default"; show_scheduler = false; } else { show_scheduler = true; FileName fn_sched; if (fn_new_schedule != "") { fn_sched = "Schedules/" + fn_new_schedule; // Also add entry to the menu std::string mylabel = "Schedules/Schedules/" + fn_new_schedule; menubar->add(mylabel.c_str(), 0, cb_toggle_schedule, this); } else { fn_sched = "Schedules/" + std::string(menubar->text()); } schedule.setName(fn_sched+"/"); pipeline.name = fn_sched+"/schedule"; // Read in scheduler or create new one if it did not exist if (exists(schedule.name+"schedule.star")) { schedule.read(DONT_LOCK); pipeline.name = fn_sched+"/schedule"; } else { std::string command = "mkdir -p " + fn_sched; int res = system(command.c_str()); schedule.write(DONT_LOCK); // empty write } fillStdOutAndErr(); } if (exists(pipeline.name + "_pipeline.star")) { std::string lock_message = "mainGUI constructor"; pipeline.read(DO_LOCK, lock_message); // With the locking system, each read needs to be followed soon with a write pipeline.write(DO_LOCK); } else { pipeline.write(); } cb_toggle_pipeliner_scheduler_i(); updateJobLists(); } void GuiMainWindow::cb_start_pipeliner(Fl_Widget* o, void* v) { GuiMainWindow* T=(GuiMainWindow*)v; T->cb_start_pipeliner_i(); } void GuiMainWindow::cb_start_pipeliner_i() { std::vector job_names; for (long int ii =0; ii < scheduled_processes.size(); ii++) { long int id = scheduled_processes[ii]; job_names.push_back(pipeline.processList[id].name); } SchedulerWindow* w = new SchedulerWindow(400, 300, "Select which jobs to execute"); w->fill(pipeline.name, job_names); } void GuiMainWindow::cb_stop_pipeliner(Fl_Widget* o, void* v) { GuiMainWindow* T=(GuiMainWindow*)v; T->cb_stop_pipeliner_i(); } void GuiMainWindow::cb_stop_pipeliner_i() { std::string fn_filter = "Pipeline scheduled file (RUNNING_PIPELINER_" + pipeline.name + "_*)"; Fl_File_Chooser chooser(".", fn_filter.c_str(), Fl_File_Chooser::SINGLE, "Choose which scheduler to stop"); chooser.show(); // Block until user picks something. while(chooser.shown()) { Fl::wait(); } // User hit cancel? if ( chooser.value() == NULL ) return; FileName fn_del(chooser.value()); std::cout <<" Deleting file : " << fn_del<< std::endl; std::remove(fn_del.c_str()); } void GuiMainWindow::cb_toggle_expand_stdout(Fl_Widget* o, void* v) { GuiMainWindow* T=(GuiMainWindow*)v; T->cb_toggle_expand_stdout_i(); } void GuiMainWindow::cb_toggle_expand_stdout_i() { if (show_expand_stdout) { expand_stdout_grp->hide(); if (show_scheduler) { scheduler_jobs_grp->show(); } else { pipeliner_jobs_grp->show(); } expand_stdout_button->label("I/O view"); show_expand_stdout = false; } else { expand_stdout_grp->show(); if (show_scheduler) { scheduler_jobs_grp->hide(); } else { pipeliner_jobs_grp->hide(); } expand_stdout_button->label("Job view"); show_expand_stdout = true; } } void GuiMainWindow::cb_about(Fl_Widget* o, void* v) { GuiMainWindow* T=(GuiMainWindow*)v; T->cb_about_i(); } void GuiMainWindow::cb_about_i() { #define HELPTEXT ("RELION " RELION_SHORT_VERSION " \n\n\ RELION is developed in the groups of:\n\n\ Sjors H.W. Scheres at the MRC Laboratory of Molecular Biology\n\ - Sjors H.W. Scheres\n\ - Shaoda He\n\ - Takanori Nakane\n\ - Jasenko Zivanov\n\ - Liyi Dong\n\ - Dari Kimanius\n\ \n\ and Erik Lindahl at Stockholm University\n\ - Erik Lindahl\n\ - Björn O. Forsber\n\ \n\ Note that RELION is completely free, open-source software. You can redistribute it and/or modify it for your own purposes, but please do make sure \ the contribution of the developers are acknowledged appropriately. In order to maintain an overview of existing versions, a notification regarding \ any redistribution of (modified versions of) the code is appreciated (contact Sjors directly).\n\n\ If RELION is useful in your work, please cite us. Relevant papers are:\n \n \ * General Bayesian approach (and first mention of RELION):\n\ Scheres (2012) J. Mol. Biol. (PMID: 22100448)\n\n\ * RELION implementation details and the 3D auto-refine procedure:\n\ Scheres (2012) J. Struct. Biol. (PMID: 23000701)\n\n\ * Gold-standard FSC and the relevance of the 0.143 criterion:\n\ Scheres & Chen (2012) Nat. Meth. (PMID: 22842542)\n\n\ * Correction of mask effects on the FSC curve by randomised phases:\n\ Chen et al. (2013) Ultramicroscopy (PMID: 23872039)\n\n\ * Auto-picking :\n\ Scheres (2014) J. Struct. Biol. (PMID: 25486611)\n\n\ * Sub-tomogram averaging :\n\ Bharat et al. (2015) Structure (PMID: 26256537)\n\n\ * RELION 2.0 GPU capability and autopicking acceleration:\n\ Kimanius et al. (2016) eLife (PMID: 27845625)\n\n\ * Helical reconstruction:\n\ He & Scheres (2017) J, Struct. Biol. (PMID: 28193500)\n\n\ * RELION 3.0 CTFRefine, RelionIt, CPU acceleration, Ewald sphere correction:\n\ Zivanov et al. (2018) eLife (PMID: 30412051)\n\n\ * Multibody refinement:\n\ Nakane et al. (2018) eLife (PMID: 29856314)\n\n\ * Bayesian Polishing:\n\ Zivanov et al. (2019) IUCrJ (PMID: 30713699)\n\n\ * Higher-order aberration correction, magnification anisotropy correction\n\ Zivanov et al. (2020) IUCrJ (PMID: 32148853)\n\n\ * Amyloid structure determination:\n\ Scheres (2020) Acta Crystallor. D (PMID: 32038040)\n\n\ \ Please also cite relevant papers when you used external programs or their algorithms re-implemented in RELION: \n \n \ * MOTIONCOR2 algorithm for beam-induced motion correction:\n\ Zheng et al (2017) Nat. Meth. (PMID: 28250466)\n\n\ * CTFFIND4 for CTF-estimation:\n\ Rohou & Grigorieff (2015) J. Struct. Biol. (PMID: 26278980)\n\n\ * GCTF for CTF-estimation:\n\ Zhang (2016) J. Struct. Biol. (PMID: 26592709)\n\n\ * Stochastic Gradient Descent for initial model generation:\n\ Punjani et al. (2017) Nat. Meth. (PMID: 28165473)\n\n\ * ResMap for local-resolution estimation:\n\ Kucukelbir et al. (2014) Nat. Meth. (PMID: 24213166)\n\n\ * Symmetry relaxation:\n\ Ilca et al. (2019) Nature (PMID: 31142835) \n\ Abrishami et al. (2020) Prog. Biophys. Mol. Biol. (PMID: 32470354) \n\n\ * Postscript plots are made using CPlot2D from http://www.amzsaki.com\n\n\ \ About the start up screen:\n\n\ The map shown is the cryo-EM map of mouse heavy-chain apoferritin\n\ at 1.54 A (EMDB-9865). This is the highest resolution single particle\n\ reconstruction map deposited to EMDB as of August 2019. The raw dataset\n\ is also available at EMPIAR-10248.\ ") ShowHelpText *help = new ShowHelpText(HELPTEXT); } void GuiMainWindow::cb_quit(Fl_Widget* o, void* v) { GuiMainWindow* T=(GuiMainWindow*)v; T->cb_quit_i(); } void GuiMainWindow::cb_quit_i() { exit(0); } relion-3.1.3/src/gui_mainwindow.h000066400000000000000000000422251411340063500167700ustar00rootroot00000000000000/*************************************************************************** * * Author: "Sjors H.W. Scheres" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #ifndef SRC_GUI_MAINWINDOW_H_ #define SRC_GUI_MAINWINDOW_H_ #define Complex tmpComplex #include #include "src/gui_jobwindow.h" #undef Complex #include "src/pipeliner.h" #include "src/scheduler.h" #include #include #include #include #include #include #include // Sizing #define JOBCOLWIDTH (250) #define XJOBCOL1 (10) #define XJOBCOL2 (JOBCOLWIDTH + 25) #define XJOBCOL3 (2*JOBCOLWIDTH + 40) #define JOBHEIGHT (170) #define JOBHALFHEIGHT ( (JOBHEIGHT) / (2) ) #define STDOUT_Y (60) #define STDERR_Y (170) #define DO_WRITE true #define DONT_WRITE false #define DO_READ true #define DONT_READ false #define DO_TOGGLE_CONT true #define DONT_TOGGLE_CONT false #define DO_GET_CL true #define DONT_GET_CL false #define DO_MKDIR true #define DONT_MKDIR false // font size of browser windows on the main GUI #define RLN_FONTSIZE 13 // Maximum number of jobs in the job-browsers in the pipeline-part of the GUI #define MAX_JOBS_BROWSER 50 // This class organises the main window of the relion GUI static Fl_Hold_Browser *browser; static Fl_Group *browse_grp[NR_BROWSE_TABS]; static Fl_Group *background_grp; static Fl_Group *pipeliner_jobs_grp; static Fl_Group *pipeliner_grp; static Fl_Group *scheduler_grp; static Fl_Group *scheduler_run_grp; static Fl_Group *scheduler_jobs_grp; static Fl_Group *expand_stdout_grp; static Fl_Choice *display_io_node; static Fl_Select_Browser *finished_job_browser, *running_job_browser, *scheduled_job_browser, *input_job_browser, *output_job_browser; static Fl_Box *image_box; static Fl_Pixmap *xpm_image; // For keeping track of which process to use in the process browser on the GUI static std::vector running_processes, finished_processes, scheduled_processes, input_processes, output_processes, io_nodes; static bool is_main_continue; static bool do_overwrite_continue; static JobWindow *gui_jobwindows[NR_BROWSE_TABS]; // Run button // Sjors 16feb2018: somehow suddenly this run_button needs to be a non-static: otherwise it doesn't change to 'continue now' and doesnt grey out... static Fl_Button *run_button; static Fl_Button *print_CL_button; static Fl_Button *schedule_button; static Fl_Button *expand_stdout_button; static Fl_Input *alias_current_job; // Sjors 27May2019: scheduler static Fl_Input *scheduler_job_name; static Fl_Button *add_job_button; static Fl_Choice *scheduler_job_mode, *scheduler_job_has_started; static Fl_Menu_Item job_mode_options[] = { {"new"}, {"continue"}, {"overwrite"}, {0} // this should be the last entry }; static Fl_Menu_Item job_has_started_options[] = { {"has started"}, {"has not started"}, {0} // this should be the last entry }; // Scheduler variables static Fl_Hold_Browser *scheduler_variable_browser; static Fl_Button *set_scheduler_variable_button, *add_scheduler_operator_button; static Fl_Button *delete_scheduler_variable_button, *delete_scheduler_operator_button; static Fl_Input *scheduler_variable_name, *scheduler_variable_value; //Scheduler Operators static Fl_Hold_Browser *scheduler_operator_browser; static std::vector operators_list; static Fl_Menu_Item operator_type_options[] = { {SCHEDULE_FLOAT_OPERATOR_SET}, {SCHEDULE_FLOAT_OPERATOR_PLUS}, {SCHEDULE_FLOAT_OPERATOR_MINUS}, {SCHEDULE_FLOAT_OPERATOR_MULT}, {SCHEDULE_FLOAT_OPERATOR_DIVIDE}, {SCHEDULE_FLOAT_OPERATOR_ROUND}, {SCHEDULE_FLOAT_OPERATOR_COUNT_IMAGES}, {SCHEDULE_FLOAT_OPERATOR_COUNT_WORDS}, {SCHEDULE_FLOAT_OPERATOR_READ_STAR}, {SCHEDULE_FLOAT_OPERATOR_READ_STAR_TABLE_MAX}, {SCHEDULE_FLOAT_OPERATOR_READ_STAR_TABLE_MIN}, {SCHEDULE_FLOAT_OPERATOR_READ_STAR_TABLE_AVG}, {SCHEDULE_FLOAT_OPERATOR_READ_STAR_TABLE_SORT_IDX}, {SCHEDULE_BOOLEAN_OPERATOR_AND}, {SCHEDULE_BOOLEAN_OPERATOR_OR}, {SCHEDULE_BOOLEAN_OPERATOR_NOT}, {SCHEDULE_BOOLEAN_OPERATOR_GT}, {SCHEDULE_BOOLEAN_OPERATOR_LT}, {SCHEDULE_BOOLEAN_OPERATOR_EQ}, {SCHEDULE_BOOLEAN_OPERATOR_GE}, {SCHEDULE_BOOLEAN_OPERATOR_LE}, {SCHEDULE_BOOLEAN_OPERATOR_FILE_EXISTS}, {SCHEDULE_BOOLEAN_OPERATOR_READ_STAR}, {SCHEDULE_STRING_OPERATOR_JOIN}, {SCHEDULE_STRING_OPERATOR_BEFORE_FIRST}, {SCHEDULE_STRING_OPERATOR_BEFORE_LAST}, {SCHEDULE_STRING_OPERATOR_AFTER_FIRST}, {SCHEDULE_STRING_OPERATOR_AFTER_LAST}, {SCHEDULE_STRING_OPERATOR_READ_STAR}, {SCHEDULE_STRING_OPERATOR_GLOB}, {SCHEDULE_STRING_OPERATOR_NTH_WORD}, {SCHEDULE_OPERATOR_TOUCH_FILE}, {SCHEDULE_OPERATOR_COPY_FILE}, {SCHEDULE_OPERATOR_MOVE_FILE}, {SCHEDULE_OPERATOR_DELETE_FILE}, {SCHEDULE_WAIT_OPERATOR_SINCE_LAST_TIME}, {SCHEDULE_EMAIL_OPERATOR}, {SCHEDULE_EXIT_OPERATOR}, {0} // this should be the last entry }; static Fl_Choice *scheduler_operator_type, *scheduler_operator_output, *scheduler_operator_input1, *scheduler_operator_input2; // Scheduler jobs static Fl_Hold_Browser *scheduler_job_browser, *scheduler_input_job_browser, *scheduler_output_job_browser; static Fl_Button *scheduler_delete_job_button; //Scheduler Edges static Fl_Choice *scheduler_edge_input, *scheduler_edge_output, *scheduler_edge_boolean, *scheduler_edge_outputtrue; static Fl_Hold_Browser *scheduler_edge_browser; static Fl_Button *delete_scheduler_edge_button, *add_scheduler_edge_button; // Scheduler current state static Fl_Choice *scheduler_current_node; static Fl_Button *scheduler_run_button, *scheduler_reset_button, *scheduler_set_current_button; static Fl_Button *scheduler_next_button, *scheduler_prev_button; static Fl_Button *scheduler_abort_button, *scheduler_unlock_button; static Fl_Text_Buffer *textbuff_stdout; static Fl_Text_Buffer *textbuff_stderr; static void Gui_Timer_CB(void *userdata); // Read-only GUI? static bool maingui_do_read_only; // Show the scheduler view extern bool show_scheduler; // Show expand stdout view extern bool show_expand_stdout; // The pipeline this GUI is acting on static PipeLine pipeline; // The current Scheduler static Schedule schedule; // Which is the current job being displayed? static int current_job; static FileName global_outputname; // Order jobs in finished window alphabetically? static bool do_order_alphabetically; // The last time something changed static time_t time_last_change; // Stdout and stderr display class StdOutDisplay : public Fl_Text_Display { public: std::string fn_file; StdOutDisplay(int X, int Y, int W, int H, const char *l = 0) : Fl_Text_Display(X, Y, W, H, l){}; ~StdOutDisplay() {}; int handle(int ev); }; static StdOutDisplay *disp_stdout, *disp_expand_stdout; static StdOutDisplay *disp_stderr, *disp_expand_stderr; class NoteEditorWindow : public Fl_Window { public: FileName fn_note; Fl_Text_Editor *editor; Fl_Text_Buffer *textbuff_note; bool allow_save; NoteEditorWindow(int w, int h, const char* t, FileName _fn_note, bool _allow_save = true); ~NoteEditorWindow() {}; private: static void cb_save(Fl_Widget*, void*); inline void cb_save_i(); static void cb_cancel(Fl_Widget*, void*); inline void cb_cancel_i(); }; class SchedulerWindow : public Fl_Window { public: FileName pipeline_name; // Name of this pipeline (e.g. default) std::vector check_buttons; Fl_Input *repeat, *wait_before, *wait, *schedule_name, *wait_after; std::vector my_jobs; // Which jobs to execute SchedulerWindow(int w, int h, const char* title): Fl_Window(w, h, title){} ~SchedulerWindow() {}; int fill(FileName _pipeline_name, std::vector _scheduled_jobs); private: static void cb_execute(Fl_Widget*, void*); inline void cb_execute_i(); static void cb_cancel(Fl_Widget*, void*); inline void cb_cancel_i(); }; /* class SchedulerAddVariableOperatorWindow : public Fl_Window { public: Fl_Input *name, *value, *type, *input1, *input2, *output; SchedulerAddVariableOperatorWindow(int w, int h, const char* title): Fl_Window(w, h, title){} ~SchedulerAddVariableOperatorWindow() {}; int fill(bool is_variable, bool is_add); private: static void cb_add(Fl_Widget*, void*); inline void cb_add_i(); static void cb_cancel(Fl_Widget*, void*); inline void cb_cancel_i(); }; */ class GuiMainWindow : public Fl_Window { public: // For Tabs Fl_Menu_Bar *menubar, *menubar2; // For clicking in stdout/err windows StdOutDisplay *stdoutbox, *stderrbox; // Update GUI every how many seconds int update_every_sec; // Exit GUI after how many seconds idle? float exit_after_sec; // For job submission std::string final_command; std::vector commands; // Constructor with w x h size of the window and a title GuiMainWindow(int w, int h, const char* title, FileName fn_pipe, FileName fn_sched, int _update_every_sec, int _exit_after_sec, bool _do_read_only = false); // Destructor ~GuiMainWindow(){ clear(); }; // Clear stuff void clear(); // How will jobs be displayed in the GUI job running, finished, in, out & scheduled job lists std::string getJobNameForDisplay(Process &job); // Update the content of the finished, running and scheduled job lists void fillRunningJobLists(); // Update the content of the input and output job lists for the current job void fillToAndFromJobLists(); void fillSchedulerNodesAndVariables(); // Need public access for auto-updating the GUI void fillStdOutAndErr(); // Touch the TimeStamp of the last change void tickTimeLastChanged(); // Update all job lists (running, scheduled, finished, as well as to/from) void updateJobLists(); // When a job is selected from the job browsers at the bottom: set current_job there, load that one in the current window // and update all job lists at the bottom void loadJobFromPipeline(int this_job); // Run scheduled jobs from the pipeliner void runScheduledJobs(FileName fn_sched, FileName fn_jobids, int nr_repeat, long int minutes_wait); private: // Vertical distance from the top int start_y; // Current height int current_y; /** Call-back functions * The method of using two functions of static void and inline void was copied from: * http://www3.telus.net/public/robark/ */ static void cb_select_browsegroup(Fl_Widget*, void*); inline void cb_select_browsegroup_i(bool is_initial = false); static void cb_select_finished_job(Fl_Widget*, void*); inline void cb_select_finished_job_i(); static void cb_select_running_job(Fl_Widget*, void*); inline void cb_select_running_job_i(); static void cb_select_scheduled_job(Fl_Widget*, void*); inline void cb_select_scheduled_job_i(); static void cb_select_input_job(Fl_Widget*, void*); inline void cb_select_input_job_i(); static void cb_select_output_job(Fl_Widget*, void*); inline void cb_select_output_job_i(); static void cb_display_io_node(Fl_Widget*, void*); inline void cb_display_io_node_i(); static void cb_add_scheduler_edge(Fl_Widget*, void*); inline void cb_add_scheduler_edge_i(); static void cb_delete_scheduler_edge(Fl_Widget*, void*); inline void cb_delete_scheduler_edge_i(); static void cb_select_scheduler_edge(Fl_Widget*, void*); inline void cb_select_scheduler_edge_i(); static void cb_set_scheduler_variable(Fl_Widget*, void*); inline void cb_set_scheduler_variable_i(); static void cb_delete_scheduler_variable(Fl_Widget*, void*); inline void cb_delete_scheduler_variable_i(); static void cb_select_scheduler_variable(Fl_Widget*, void*); inline void cb_select_scheduler_variable_i(); static void cb_add_scheduler_operator(Fl_Widget*, void*); inline void cb_add_scheduler_operator_i(); static void cb_delete_scheduler_operator(Fl_Widget*, void*); inline void cb_delete_scheduler_operator_i(); static void cb_select_scheduler_operator(Fl_Widget*, void*); inline void cb_select_scheduler_operator_i(); static void cb_delete_scheduler_job(Fl_Widget*, void*); inline void cb_delete_scheduler_job_i(); static void cb_scheduler_add_job(Fl_Widget*, void*); inline void cb_scheduler_add_job_i(); static void cb_scheduler_set_current(Fl_Widget*, void*); inline void cb_scheduler_set_current_i(); static void cb_scheduler_next(Fl_Widget*, void*); inline void cb_scheduler_next_i(); static void cb_scheduler_prev(Fl_Widget*, void*); inline void cb_scheduler_prev_i(); static void cb_scheduler_unlock(Fl_Widget*, void*); inline void cb_scheduler_unlock_i(); static void cb_scheduler_abort(Fl_Widget*, void*); inline void cb_scheduler_abort_i(); static void cb_scheduler_reset(Fl_Widget*, void*); inline void cb_scheduler_reset_i(); static void cb_scheduler_run(Fl_Widget*, void*); inline void cb_scheduler_run_i(); static void cb_display(Fl_Widget*, void*); inline void cb_display_i(); inline void cb_toggle_continue_i(); static void cb_run(Fl_Widget*, void*); static void cb_schedule(Fl_Widget*, void*); inline void cb_run_i(bool only_schedule = false, bool do_open_edit = true); static void cb_delete(Fl_Widget*, void*); inline void cb_delete_i(bool do_ask = true, bool do_recursive = true); static void cb_gently_clean_all_jobs(Fl_Widget*, void*); static void cb_harshly_clean_all_jobs(Fl_Widget*, void*); inline void cb_clean_all_jobs_i(bool do_harsh); static void cb_gentle_cleanup(Fl_Widget*, void*); static void cb_harsh_cleanup(Fl_Widget*, void*); inline void cb_cleanup_i(int myjob = -1, bool do_verb = true, bool do_harsh = false); static void cb_set_alias(Fl_Widget*, void*); inline void cb_set_alias_i(std::string newalias = ""); static void cb_abort(Fl_Widget*, void*); inline void cb_abort_i(std::string newalias = ""); static void cb_mark_as_finished(Fl_Widget*, void*); static void cb_mark_as_failed(Fl_Widget*, void*); inline void cb_mark_as_finished_i(bool is_failed = false); static void cb_make_flowchart(Fl_Widget*, void*); inline void cb_make_flowchart_i(); static void cb_edit_project_note(Fl_Widget*, void*); static void cb_edit_note(Fl_Widget*, void*); inline void cb_edit_note_i(bool is_project_note = false); static void cb_print_cl(Fl_Widget*, void*); inline void cb_print_cl_i(); static void cb_save(Fl_Widget*, void*); inline void cb_save_i(); static void cb_load(Fl_Widget*, void*); inline void cb_load_i(); static void cb_undelete_job(Fl_Widget*, void*); inline void cb_undelete_job_i(); static void cb_export_jobs(Fl_Widget*, void*); inline void cb_export_jobs_i(); static void cb_import_jobs(Fl_Widget*, void*); inline void cb_import_jobs_i(); static void cb_order_jobs_alphabetically(Fl_Widget*, void*); static void cb_order_jobs_chronologically(Fl_Widget*, void*); static void cb_empty_trash(Fl_Widget*, void*); inline void cb_empty_trash_i(); static void cb_print_notes(Fl_Widget*, void*); inline void cb_print_notes_i(); static void cb_remake_nodesdir(Fl_Widget*, void*); inline void cb_remake_nodesdir_i(); static void cb_reread_pipeline(Fl_Widget*, void*); inline void cb_reread_pipeline_i(); static void cb_reactivate_runbutton(Fl_Widget*, void*); inline void cb_reactivate_runbutton_i(); static void cb_toggle_overwrite_continue(Fl_Widget*, void*); inline void cb_toggle_overwrite_continue_i(); static void cb_show_initial_screen(Fl_Widget*, void*); inline void cb_show_initial_screen_i(); static void cb_toggle_pipeliner_scheduler(Fl_Widget*, void*); inline void cb_toggle_pipeliner_scheduler_i(); static void cb_copy_schedule(Fl_Widget*, void*); static void cb_toggle_schedule(Fl_Widget*, void*); static void cb_toggle_pipeline(Fl_Widget*, void*); static void cb_create_schedule(Fl_Widget*, void*); inline void cb_toggle_schedule_i(bool do_pipeline, FileName fn_new_schedule = ""); static void cb_start_pipeliner(Fl_Widget*, void*); inline void cb_start_pipeliner_i(); static void cb_stop_pipeliner(Fl_Widget*, void*); inline void cb_stop_pipeliner_i(); static void cb_toggle_expand_stdout(Fl_Widget*, void*); inline void cb_toggle_expand_stdout_i(); static void cb_about(Fl_Widget*, void*); inline void cb_about_i(); public: static void cb_quit(Fl_Widget*, void*); private: inline void cb_quit_i(); }; #endif /* SRC_NEWGUI_MAINWINDOW_CPP_ */ relion-3.1.3/src/healpix_sampling.cpp000066400000000000000000003062011411340063500176240ustar00rootroot00000000000000/*************************************************************************** * * Author: "Sjors H.W. Scheres" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #include "src/healpix_sampling.h" //#define DEBUG_SAMPLING //#define DEBUG_CHECKSIZES //#define DEBUG_HELICAL_ORIENTATIONAL_SEARCH void HealpixSampling::clear() { is_3D = false; isRelax = false; fn_sym = "C1"; fn_sym_relax = "C1"; limit_tilt = psi_step = offset_range = offset_step = helical_offset_step = psi_step_ori = offset_range_ori = offset_step_ori = 0.; random_perturbation = perturbation_factor = 0.; // Jun19,2015 - Shaoda, Helical refinement helical_offset_step = -1.; directions_ipix.clear(); rot_angles.clear(); tilt_angles.clear(); psi_angles.clear(); translations_x.clear(); translations_y.clear(); translations_z.clear(); L_repository.clear(); R_repository.clear(); L_repository_relax.clear(); R_repository_relax.clear(); pgGroup = pgOrder = 0; pgGroupRelaxSym = pgOrderRelaxSym = 0; } void HealpixSampling::initialise( int ref_dim, bool do_3d_trans, bool do_changepsi, bool do_warnpsi, bool do_local_searches_helical, bool do_helical_refine, RFLOAT rise_Angst, RFLOAT twist_deg) { if (ref_dim != -1) is_3D = (ref_dim == 3); // Set the symmetry relaxation flag isRelax = fn_sym_relax == "" ? false : true; // Set flag for x,y,z-translations is_3d_trans = do_3d_trans; // By default psi_step is approximate sampling of rot,tilt in 3D; and 10 degrees in 2D if (psi_step < 0) { if (is_3D) psi_step = 360. / (6 * ROUND(std::pow(2., healpix_order))); else psi_step = 10.; } if (perturbation_factor < 0. || perturbation_factor > 1.) REPORT_ERROR("HealpixSampling::initialise: random perturbation factor should be between 0 and 1."); if (is_3D) { healpix_base.Set(healpix_order, NEST); // Set up symmetry R_repository.clear(); L_repository.clear(); initialiseSymMats(fn_sym, pgGroup, pgOrder, R_repository, L_repository); // Set up symmetry matrices for symmetry relax if (fn_sym_relax != "") { R_repository_relax.clear(); L_repository_relax.clear(); initialiseSymMats(fn_sym_relax, pgGroupRelaxSym, pgOrderRelaxSym, R_repository_relax, L_repository_relax); } } else { int t_nr_psi = CEIL(360./psi_step); if(t_nr_psi%32!=0 && do_changepsi) { // Force-adjust psi_step to be multiples of 32 (for efficient GPU calculations) t_nr_psi = CEIL((float)t_nr_psi / 32.0)*32; if (do_warnpsi) std::cout << " + WARNING: Changing psi sampling rate (before oversampling) to " << 360./(RFLOAT)t_nr_psi << " degrees, for more efficient GPU calculations" << std::endl; } psi_step = 360./(RFLOAT)t_nr_psi; fn_sym = "C1"; // This may not be set yet if restarting a 2D run.... } // Store the not-oversampled translations, and make sure oversampled sampling is 1 pixel //setTranslations(); // May06,2015 - Shaoda & Sjors, Helical translational searches setTranslations(-1, -1, do_local_searches_helical, do_helical_refine, -1, rise_Angst, twist_deg); // Store the non-oversampled projection directions setOrientations(-1, -1.); // Random perturbation and filling of the directions, psi_angles and translations vectors resetRandomlyPerturbedSampling(); // SHWS 27feb2020: Set original sampling rates to allow 2D/3D classifications using coarser ones in earlier iterations healpix_order_ori = healpix_order; psi_step_ori = psi_step; offset_range_ori = offset_range; offset_step_ori = offset_step; } void HealpixSampling::initialiseSymMats(FileName fn_sym_, int & pgGroup_, int & pgOrder_, std::vector > & R_repository_, std::vector > & L_repository_) { // Set up symmetry SymList SL; SL.isSymmetryGroup(fn_sym_, pgGroup_, pgOrder_); SL.read_sym_file(fn_sym_); // Precalculate (3x3) symmetry matrices Matrix2D L(4, 4), R(4, 4); Matrix2D Identity(3,3); Identity.initIdentity(); R_repository_.clear(); L_repository_.clear(); R_repository_.push_back(Identity); L_repository_.push_back(Identity); for (int isym = 0; isym < SL.SymsNo(); isym++) { SL.get_matrices(isym, L, R); R.resize(3, 3); L.resize(3, 3); R_repository_.push_back(R); L_repository_.push_back(L); } } void HealpixSampling::resetRandomlyPerturbedSampling() { // Actual instance of random perturbation // Add to the random perturbation from the last iteration, so it keeps changing strongly... random_perturbation += rnd_unif(0.5*perturbation_factor, perturbation_factor); random_perturbation = realWRAP(random_perturbation, -perturbation_factor, perturbation_factor); } void HealpixSampling::read(FileName fn_in) { // Open input file std::ifstream in(fn_in.data(), std::ios_base::in); if (in.fail()) REPORT_ERROR( (std::string) "HealpixSampling::readStar: File " + fn_in + " cannot be read." ); MetaDataTable MD; // Read general stuff MD.readStar(in, "sampling_general"); in.close(); if (!MD.getValue(EMDL_SAMPLING_IS_3D, is_3D) || !MD.getValue(EMDL_SAMPLING_IS_3D_TRANS, is_3d_trans) || !MD.getValue(EMDL_SAMPLING_PSI_STEP, psi_step) || !MD.getValue(EMDL_SAMPLING_OFFSET_RANGE, offset_range) || !MD.getValue(EMDL_SAMPLING_OFFSET_STEP, offset_step) || !MD.getValue(EMDL_SAMPLING_PERTURBATION_FACTOR, perturbation_factor)) REPORT_ERROR("HealpixSampling::readStar: incorrect sampling_general table"); // Jun19,2015 - Shaoda, Helical translational searches, backward compatibility if (!MD.getValue(EMDL_SAMPLING_HELICAL_OFFSET_STEP, helical_offset_step)) helical_offset_step = -1.; // SHWS 27Feb2020: backwards compatibility: older star files will not yet have original sampling parameters, just use current ones if (!MD.getValue(EMDL_SAMPLING_OFFSET_STEP_ORI, offset_step_ori)) offset_step_ori = offset_step; if (!MD.getValue(EMDL_SAMPLING_OFFSET_RANGE_ORI, offset_range_ori)) offset_range_ori = offset_range; if (!MD.getValue(EMDL_SAMPLING_PSI_STEP_ORI, psi_step_ori)) psi_step_ori = psi_step; if (is_3D) { if (!MD.getValue(EMDL_SAMPLING_HEALPIX_ORDER, healpix_order) || !MD.getValue(EMDL_SAMPLING_SYMMETRY, fn_sym) || !MD.getValue(EMDL_SAMPLING_LIMIT_TILT, limit_tilt) ) REPORT_ERROR("HealpixSampling::readStar: incorrect sampling_general table for 3D sampling"); // For 3D samplings reset psi_step to -1: // By default it will then be set to the healpix sampling // Only if the --psi_step option is given on the command line it will be set to something different! psi_step = -1.; // SHWS 27Feb2020: backwards compatibility: older star files will not yet have original sampling parameters, just use current ones if (!MD.getValue(EMDL_SAMPLING_HEALPIX_ORDER_ORI, healpix_order_ori)) healpix_order_ori = healpix_order; } else { fn_sym = "irrelevant"; limit_tilt = 0.; healpix_order = 0; } } void HealpixSampling::write(FileName fn_out) { MetaDataTable MD; std::ofstream fh; FileName fn_tmp; fn_tmp = fn_out + "_sampling.star"; fh.open((fn_tmp).c_str(), std::ios::out); if (!fh) REPORT_ERROR( (std::string)"HealpixSampling::write: Cannot write file: " + fn_tmp); MD.setIsList(true); MD.addObject(); MD.setName("sampling_general"); MD.setValue(EMDL_SAMPLING_IS_3D, is_3D); MD.setValue(EMDL_SAMPLING_IS_3D_TRANS, is_3d_trans); if (is_3D) { MD.setValue(EMDL_SAMPLING_HEALPIX_ORDER, healpix_order); MD.setValue(EMDL_SAMPLING_SYMMETRY, fn_sym); MD.setValue(EMDL_SAMPLING_LIMIT_TILT, limit_tilt); } MD.setValue(EMDL_SAMPLING_PSI_STEP, psi_step); MD.setValue(EMDL_SAMPLING_OFFSET_RANGE, offset_range); MD.setValue(EMDL_SAMPLING_OFFSET_STEP, offset_step); // Jun19,2015 - Shaoda, Helical translational searches MD.setValue(EMDL_SAMPLING_HELICAL_OFFSET_STEP, helical_offset_step); MD.setValue(EMDL_SAMPLING_PERTURB, random_perturbation); MD.setValue(EMDL_SAMPLING_PERTURBATION_FACTOR, perturbation_factor); //27Feb2020 SHWS: write original sampling rates to allow 2D/3D classifications to use coarser ones in initial iterations MD.setValue(EMDL_SAMPLING_HEALPIX_ORDER_ORI, healpix_order_ori); MD.setValue(EMDL_SAMPLING_PSI_STEP_ORI, psi_step_ori); MD.setValue(EMDL_SAMPLING_OFFSET_RANGE_ORI, offset_range_ori); MD.setValue(EMDL_SAMPLING_OFFSET_STEP_ORI, offset_step_ori); MD.write(fh); // In the 3D case, also write a table with the sampled rot, tilt angles if (is_3D) { MD.clear(); MD.setIsList(false); MD.setName("sampling_directions"); for (long int idir = 0; idir < NrDirections(); idir++) { RFLOAT rot, tilt; getDirection(idir, rot, tilt); MD.addObject(); MD.setValue(EMDL_ORIENT_ROT, rot); MD.setValue(EMDL_ORIENT_TILT, tilt); } MD.write(fh); } // Close the file fh.close(); } void HealpixSampling::setTranslations( RFLOAT new_offset_step, RFLOAT new_offset_range, bool do_local_searches_helical, bool do_helical_refine, RFLOAT new_helical_offset_step, RFLOAT helical_rise_Angst, RFLOAT helical_twist_deg) { // Ordinary single particles int maxp; // Max half nr samplings in all directions RFLOAT xoff, yoff, zoff, max2, old_offset_step, old_offset_range; // Offset lengths // Helical refinement int maxh; // Max half nr samplings in along helical axis RFLOAT h_range, old_helical_offset_step; // Translations along helical axis // Check old and new offsets old_offset_step = offset_step; // can be < 0 ?????? old_offset_range = offset_range; // can be < 0 ?????? old_helical_offset_step = helical_offset_step; // can be < 0 if ( (new_offset_step > 0.) && (new_offset_range >= 0.) ) { offset_step = new_offset_step; offset_range = new_offset_range; } else { if (!(offset_step > 0.)) { std::cerr << " offset_range= " << offset_range << " offset_step= " << offset_step << std::endl; REPORT_ERROR("HealpixSampling::setTranslations BUG %% Trying to set translations with uninitialised offset_step!"); } } // Sometimes new offsets are set to -1, that means the old offsets remain unchanged. new_offset_step = offset_step; // > 0 new_offset_range = offset_range; // >= 0 // Ordinary single particles maxp = CEIL(offset_range / offset_step); // Perpendicular to helical axis (P1, P2) maxh = maxp; // Helical refinement if (do_helical_refine) { // Assume all helical parameters are valid (this should be checked before in ml_optimiser.cpp) helical_rise_Angst = fabs(helical_rise_Angst); helical_twist_deg = fabs(helical_twist_deg); // Search range (half) along helical axis = (-0.5 * rise, +0.5 * rise) h_range = (helical_rise_Angst / 2.); // If continue from old run or new offset is not applicable... if (new_helical_offset_step < 0.) new_helical_offset_step = old_helical_offset_step; // New_helical_offset_step is not OK. // (1) negative value // (2) larger than before (if there is a valid offset before) // (3) larger than new_offset_step // (4) samplings along helical axis is less than 3 if ( (new_helical_offset_step < 0.) || ( (new_helical_offset_step > old_helical_offset_step) && (old_helical_offset_step > 0.) ) || (new_helical_offset_step > new_offset_step) || ((helical_rise_Angst / new_helical_offset_step) < 3.) ) { // First try 'new_offset_step' new_helical_offset_step = new_offset_step; // Change to 'old_helical_offset_step' if the old is smaller if ( (old_helical_offset_step > 0.) && (new_helical_offset_step > old_helical_offset_step) ) new_helical_offset_step = old_helical_offset_step; // New_helical_offset_step should be finer than 1/3 the helical rise if ( (helical_rise_Angst / new_helical_offset_step) < 3.) new_helical_offset_step = helical_rise_Angst / 3.; } maxh = CEIL(h_range / new_helical_offset_step); // Out of range samplings will be excluded next if (do_local_searches_helical) // Local searches along helical axis { // Local searches (2*2+1=5 samplings) if (maxh > 2) maxh = 2; // New helical offset step is smaller than 1/3 of the old one, samplings should be increased. if ( (old_helical_offset_step > 0.) && ((old_helical_offset_step / new_helical_offset_step) > 3) ) maxh = FLOOR(old_helical_offset_step / new_helical_offset_step); // Use FLOOR here! // Local searches should not be wider than 1/3 of the helical rise if ( ((new_helical_offset_step * maxh) > (helical_rise_Angst / 6.)) ) { maxh = FLOOR(helical_rise_Angst / 6. / new_helical_offset_step); // Use FLOOR here! if (maxh < 1) // At least we should do some searches... maxh = 1; } } // DEBUG - this should not happen if (maxh < 0) maxh = 0; helical_offset_step = new_helical_offset_step; } // DEBUG if ( (maxh < 0) || (maxp < 0) ) { std::cerr << "maxh= " << maxh << " maxp= " << maxp << std::endl; REPORT_ERROR("HealpixSampling::setTranslations BUG %% No translations to set! ('maxh' or 'maxp' < 0)"); } translations_x.clear(); translations_y.clear(); translations_z.clear(); for (long int ix = -maxh; ix <= maxh; ix++) { // For helices use a different step size along helical axis X xoff = (do_helical_refine) ? (ix * helical_offset_step) : (ix * offset_step); // For helical refinement, exclude xoff outside the range of (-0.5 * rise, +0.5 * rise) if ( (do_helical_refine) && (ix != 0) && (fabs(xoff) > fabs(helical_rise_Angst / 2.)) ) continue; for (long int iy = -maxp; iy <= maxp; iy++) { yoff = iy * offset_step; // For helices do not limit translations along helical axis X max2 = (do_helical_refine) ? (yoff * yoff) : (xoff * xoff + yoff * yoff); if (is_3d_trans) { for (long int iz = -maxp; iz <= maxp; iz++) { zoff = iz * offset_step; if ((max2 + zoff * zoff) <= (offset_range * offset_range)) { translations_y.push_back(yoff); if (do_helical_refine) // Z axis corresponds to the helical axis in 3D subtomogram averaging !!! { translations_x.push_back(zoff); translations_z.push_back(xoff); } else { translations_x.push_back(xoff); translations_z.push_back(zoff); } } } } else { if (max2 < (offset_range * offset_range) + 0.001) // +0.001 prevent precision errors in relion-3.1 { translations_x.push_back(xoff); translations_y.push_back(yoff); } } } } #ifdef DEBUG_SETTRANS std::cerr << " is_3d_trans= " << is_3d_trans << std::endl; for (int i = 0; i < translations_x.size(); i++) std::cerr << " translations_x[i]= " << translations_x[i] << std::endl; #endif return; } /* Set only a single translation */ void HealpixSampling::addOneTranslation( RFLOAT offset_x, RFLOAT offset_y, RFLOAT offset_z, bool do_clear, bool do_helical_refine, RFLOAT rot_deg, RFLOAT tilt_deg, RFLOAT psi_deg) { if (do_clear) { translations_x.clear(); translations_y.clear(); translations_z.clear(); } if (do_helical_refine) transformCartesianAndHelicalCoords(offset_x, offset_y, offset_z, offset_x, offset_y, offset_z, rot_deg, tilt_deg, psi_deg, ((is_3d_trans) ? (3) : (2)), CART_TO_HELICAL_COORDS); translations_x.push_back(offset_x); translations_y.push_back(offset_y); if (is_3d_trans) translations_z.push_back(offset_z); } void HealpixSampling::setOrientations(int _order, RFLOAT _psi_step) { // Initialise directions_ipix.clear(); rot_angles.clear(); tilt_angles.clear(); psi_angles.clear(); if (_order >= 0) healpix_order = _order; // Setup the HealPix object // For adaptive oversampling only precalculate the COARSE sampling! if (_order >= 0) healpix_base.Set(_order, NEST); // 3D directions if (is_3D) { RFLOAT rot, tilt; for (long int ipix = 0; ipix < healpix_base.Npix(); ipix++) { getDirectionFromHealPix(ipix, rot, tilt); // Push back as Matrix1D's in the vectors rot_angles.push_back(rot); tilt_angles.push_back(tilt); directions_ipix.push_back(ipix); } //#define DEBUG_SAMPLING #ifdef DEBUG_SAMPLING writeAllOrientationsToBild("orients_all.bild", "1 0 0 ", 0.020); #endif // Now remove symmetry-related pixels if not relaxing symmetry // TODO check size of healpix_base.max_pixrad if (!isRelax) removeSymmetryEquivalentPoints(0.5 * RAD2DEG(healpix_base.max_pixrad())); #ifdef DEBUG_SAMPLING writeAllOrientationsToBild("orients_sym.bild", "0 1 0 ", 0.021); #endif // Also remove limited tilt angles removePointsOutsideLimitedTiltAngles(); #ifdef DEBUG_SAMPLING if (ABS(limit_tilt) < 90.) writeAllOrientationsToBild("orients_tilt.bild", "1 1 0 ", 0.022); #endif } else { rot_angles.push_back(0.); tilt_angles.push_back(0.); directions_ipix.push_back(-1); } // 2D in-plane angles // By default in 3D case: use more-or-less same psi-sampling as the 3D healpix object // By default in 2D case: use 5 degree if (_psi_step > 0.) psi_step = _psi_step; int nr_psi = CEIL(360./psi_step); RFLOAT psi; psi_step = 360./(RFLOAT)nr_psi; for (int ipsi = 0; ipsi < nr_psi; ipsi++) { psi = ipsi * psi_step; psi_angles.push_back(psi); } //#define DEBUG_SAMPLING #ifdef DEBUG_SAMPLING writeAllOrientationsToBild("orients_final.bild", "1 0 0 ", 0.020); #endif } /* Set only a single orientation */ void HealpixSampling::addOneOrientation(RFLOAT rot, RFLOAT tilt, RFLOAT psi, bool do_clear) { if (do_clear) { directions_ipix.clear(); rot_angles.clear(); tilt_angles.clear(); psi_angles.clear(); } // 3D directions if (is_3D) { rot_angles.push_back(rot); tilt_angles.push_back(tilt); directions_ipix.push_back(-1); } else { rot_angles.push_back(0.); tilt_angles.push_back(0.); directions_ipix.push_back(-1); } // in-plane rotation psi_angles.push_back(psi); } void HealpixSampling::writeAllOrientationsToBild(FileName fn_bild, std::string rgb, RFLOAT size) { std::ofstream out; out.open (fn_bild.c_str()); if (!out) REPORT_ERROR( (std::string)"HealpixSampling::writeAllOrientationsToBild: Cannot write file: " + fn_bild); out << ".color 1 0 0 \n"; out << ".arrow 0 0 0 1 0 0 0.01 \n"; out << ".color 0 1 0 \n"; out << ".arrow 0 0 0 0 1 0 0.01 \n"; out << ".color 0 0 1 \n"; out << ".arrow 0 0 0 0 0 1 0.01 \n"; Matrix1D v(3); out << ".color " << rgb << std::endl; for (unsigned long int ipix = 0; ipix < rot_angles.size(); ipix++) { Euler_angles2direction(rot_angles[ipix], tilt_angles[ipix], v); out << ".sphere " << XX(v) << " " << YY(v) << " " << ZZ(v) << " " << floatToString(size) << std::endl; } out.close(); } void HealpixSampling::writeNonZeroPriorOrientationsToBild(FileName fn_bild, RFLOAT rot_prior, RFLOAT tilt_prior, std::vector &pointer_dir_nonzeroprior, std::string rgb, RFLOAT size) { std::ofstream out; out.open (fn_bild.c_str()); if (!out) REPORT_ERROR( (std::string)"HealpixSampling::writeNonZeroOrientationsToBild: Cannot write file: " + fn_bild); out << ".color 1 0 0 \n"; out << ".arrow 0 0 0 1 0 0 0.01 \n"; out << ".color 0 1 0 \n"; out << ".arrow 0 0 0 0 1 0 0.01 \n"; out << ".color 0 0 1 \n"; out << ".arrow 0 0 0 0 0 1 0.01 \n"; Matrix1D v(3); Euler_angles2direction(rot_prior, tilt_prior, v); out << ".color 1 0 0 \n"; out << ".sphere " << XX(v) << " " << YY(v) << " " << ZZ(v) << " " << floatToString(size) << std::endl; out << ".color " << rgb << std::endl; for (unsigned long int ipix = 0; ipix < pointer_dir_nonzeroprior.size(); ipix++) { long int idir = pointer_dir_nonzeroprior[ipix]; Euler_angles2direction(rot_angles[idir], tilt_angles[idir], v); out << ".sphere " << XX(v) << " " << YY(v) << " " << ZZ(v) << " " << floatToString(size) << std::endl; } out.close(); } RFLOAT HealpixSampling::calculateDeltaRot(Matrix1D my_direction, RFLOAT rot_prior) { // Rotate the x,y-components of the direction, according to rot-prior Matrix1D< RFLOAT > my_rot_direction; Matrix2D< RFLOAT > A; rotation2DMatrix(rot_prior, A); my_rot_direction = A.inv() * my_direction; // Get component along the new Y-axis return fabs(ASIND(my_rot_direction(1))); } void HealpixSampling::selectOrientationsWithNonZeroPriorProbability( RFLOAT prior_rot, RFLOAT prior_tilt, RFLOAT prior_psi, RFLOAT sigma_rot, RFLOAT sigma_tilt, RFLOAT sigma_psi, std::vector &pointer_dir_nonzeroprior, std::vector &directions_prior, std::vector &pointer_psi_nonzeroprior, std::vector &psi_prior, bool do_bimodal_search_psi, RFLOAT sigma_cutoff, RFLOAT sigma_tilt_from_ninety, RFLOAT sigma_psi_from_zero) { pointer_dir_nonzeroprior.clear(); directions_prior.clear(); // Do not check the mates again std::vector idir_flag(rot_angles.size(), false); if (is_3D) { //std::cerr<<"sigma_rot "< prior90_direction; if (sigma_tilt_from_ninety > 0.) { // pre-calculate original (0,90) direction Euler_angles2direction(0., 90., prior90_direction); } // Loop over all directions RFLOAT sumprior = 0.; RFLOAT sumprior_withsigmafromzero = 0.; // Keep track of the closest distance to prevent 0 orientations RFLOAT best_ang = 9999.; long int best_idir = -999; for (long int idir = 0; idir < rot_angles.size(); idir++) { // Check if this direction was met before as symmetry mate if (idir_flag[idir] == true) continue; bool is_nonzero_pdf = false; // Any prior involving BOTH rot and tilt. if ( (sigma_rot > 0.) && (sigma_tilt > 0.) ) { // Get the direction of the prior Matrix1D prior_direction, my_direction, sym_direction, best_direction; Euler_angles2direction(prior_rot, prior_tilt, prior_direction); // Get the current direction in the loop Euler_angles2direction(rot_angles[idir], tilt_angles[idir], my_direction); best_direction = my_direction; // Loop over all symmetry operators to find the operator that brings this direction nearest to the prior if no symmetry relaxation if (!isRelax) { RFLOAT best_dotProduct = dotProduct(prior_direction, my_direction); for (int j = 0; j < R_repository.size(); j++) { sym_direction = L_repository[j] * (my_direction.transpose() * R_repository[j]).transpose(); RFLOAT my_dotProduct = dotProduct(prior_direction, sym_direction); if (my_dotProduct > best_dotProduct) { best_direction = sym_direction; best_dotProduct = my_dotProduct; } } } // Now that we have the best direction, find the corresponding prior probability RFLOAT diffang = ACOSD( dotProduct(best_direction, prior_direction) ); if (diffang > 180.) diffang = ABS(diffang - 360.); if (do_bimodal_search_psi && (diffang > 90.)) // KThurber diffang = ABS(diffang - 180.); // KThurber // Only consider differences within sigma_cutoff * sigma_rot // TODO: If sigma_rot and sigma_tilt are not the same (NOT for helices)? RFLOAT biggest_sigma = XMIPP_MAX(sigma_rot, sigma_tilt); if (diffang < sigma_cutoff * biggest_sigma) { // TODO!!! If tilt is zero then any rot will be OK!!!!! //std::cerr<<"Best direction index: "< 0.) { Matrix1D my_direction, sym_direction; RFLOAT sym_rot, sym_tilt; // Get the current direction in the loop Euler_angles2direction(rot_angles[idir], tilt_angles[idir], my_direction); RFLOAT diffang = calculateDeltaRot(my_direction, prior_rot); RFLOAT best_diffang = diffang; for (int j = 0; j < R_repository.size(); j++) { sym_direction = L_repository[j] * (my_direction.transpose() * R_repository[j]).transpose(); diffang = calculateDeltaRot(sym_direction, prior_rot); if (diffang < best_diffang) best_diffang = diffang; } // Only consider differences within sigma_cutoff * sigma_rot if (best_diffang < sigma_cutoff * sigma_rot) { RFLOAT prior = gaussian1D(best_diffang, sigma_rot, 0.); pointer_dir_nonzeroprior.push_back(idir); directions_prior.push_back(prior); sumprior += prior; is_nonzero_pdf = true; } // Keep track of the nearest direction if (best_diffang < best_ang) { best_idir = idir; best_ang = diffang; } } else if (sigma_tilt > 0.) { Matrix1D my_direction, sym_direction; RFLOAT sym_rot, sym_tilt; // Get the current direction in the loop Euler_angles2direction(rot_angles[idir], tilt_angles[idir], my_direction); // Loop over all symmetry operators to find the operator that brings this direction nearest to the prior RFLOAT diffang = ABS(tilt_angles[idir] - prior_tilt); if (diffang > 180.) diffang = ABS(diffang - 360.); RFLOAT best_diffang = diffang; for (int j = 0; j < R_repository.size(); j++) { sym_direction = L_repository[j] * (my_direction.transpose() * R_repository[j]).transpose(); Euler_direction2angles(sym_direction, sym_rot, sym_tilt); diffang = ABS(sym_tilt - prior_tilt); if (diffang > 180.) diffang = ABS(diffang - 360.); if (diffang < best_diffang) best_diffang = diffang; } // Only consider differences within sigma_cutoff * sigma_tilt if (best_diffang < sigma_cutoff * sigma_tilt) { RFLOAT prior = gaussian1D(best_diffang, sigma_tilt, 0.); pointer_dir_nonzeroprior.push_back(idir); directions_prior.push_back(prior); sumprior += prior; is_nonzero_pdf = true; } // Keep track of the nearest direction if (best_diffang < best_ang) { best_idir = idir; best_ang = diffang; } } // end if any prior involving rot and/or tilt else { // If no prior on the directions: just add all of them pointer_dir_nonzeroprior.push_back(idir); directions_prior.push_back(1.); sumprior += 1.; is_nonzero_pdf = true; } // For priors on deviations from (0,90)-degree (rot,tilt) angles in multi-body refinement if (sigma_tilt_from_ninety > 0. && is_nonzero_pdf) { // Get the current direction in the loop (re-do, as sometimes sigma_rot and sigma_tilt are both zero! Matrix1D my_direction, best_direction, sym_direction; Euler_angles2direction(rot_angles[idir], tilt_angles[idir], my_direction); // Loop over all symmetry operators to find the operator that brings this direction nearest to the prior RFLOAT best_dotProduct = dotProduct(prior90_direction, my_direction); best_direction = my_direction; for (int j = 0; j < R_repository.size(); j++) { sym_direction = L_repository[j] * (my_direction.transpose() * R_repository[j]).transpose(); RFLOAT my_dotProduct = dotProduct(prior90_direction, sym_direction); if (my_dotProduct > best_dotProduct) { best_direction = sym_direction; best_dotProduct = my_dotProduct; } } // Now that we have the best direction, find the corresponding prior probability RFLOAT diffang = ACOSD( dotProduct(best_direction, prior90_direction) ); if (diffang < -180.) diffang = ABS(diffang + 360.); else if (diffang > 180.) diffang = ABS(diffang - 360.); diffang = ABS(diffang); long int mypos = pointer_dir_nonzeroprior.size() - 1; // Check tilt angle is within 3*sigma_tilt_from_ninety if (diffang > sigma_cutoff * sigma_tilt_from_ninety) { pointer_dir_nonzeroprior.pop_back(); directions_prior.pop_back(); } else { RFLOAT prior = gaussian1D(diffang, sigma_tilt_from_ninety, 0.); directions_prior[mypos] *= prior; sumprior_withsigmafromzero += directions_prior[mypos]; } } // Here add the code for relax symmetry to find the symmetry mates } // end for idir //Normalise the prior probability distribution to have sum 1 over all psi-angles for (long int idir = 0; idir < directions_prior.size(); idir++) { if (sigma_tilt_from_ninety > 0.) directions_prior[idir] /= sumprior_withsigmafromzero; else directions_prior[idir] /= sumprior; } // If there were no directions at all, just select the single nearest one: if (directions_prior.size() == 0) { pointer_dir_nonzeroprior.push_back(best_idir); //std::cerr<<"No direction has been found"<> c; #endif } else { pointer_dir_nonzeroprior.push_back(0); directions_prior.push_back(1.); } // Psi-angles pointer_psi_nonzeroprior.clear(); psi_prior.clear(); RFLOAT sumprior = 0.; RFLOAT sumprior_withsigmafromzero = 0.; RFLOAT best_diff = 9999.; long int best_ipsi = -999; for (long int ipsi = 0; ipsi < psi_angles.size(); ipsi++) { bool is_nonzero_pdf = false; // Sjors 12jul2017: for small tilt-angles, rot-angle may become anything, psi-angle then follows that // Therefore, psi-prior may be completely wrong.... The following line would however be a very expensive fix.... //if (sigma_psi > 0. && prior_tilt > 10.) if (sigma_psi > 0.) { RFLOAT diffpsi = ABS(psi_angles[ipsi] - prior_psi); if (diffpsi > 180.) diffpsi = ABS(diffpsi - 360.); if (do_bimodal_search_psi && (diffpsi > 90.)) diffpsi = ABS(diffpsi - 180.); // Only consider differences within sigma_cutoff * sigma_psi if (diffpsi < sigma_cutoff * sigma_psi) { RFLOAT prior = gaussian1D(diffpsi, sigma_psi, 0.); pointer_psi_nonzeroprior.push_back(ipsi); psi_prior.push_back(prior); sumprior += prior; is_nonzero_pdf = true; // TMP DEBUGGING if (prior == 0.) { std::cerr << " psi_angles[ipsi]= " << psi_angles[ipsi] << " prior_psi= " << prior_psi << std::endl; std::cerr << " diffpsi= " << diffpsi << " sigma_cutoff= " << sigma_cutoff << " sigma_psi= " << sigma_psi << std::endl; REPORT_ERROR("prior on psi is zero!"); } } // Keep track of the nearest sampling point if (diffpsi < best_diff) { best_ipsi = ipsi; best_diff = diffpsi; } } else { pointer_psi_nonzeroprior.push_back(ipsi); psi_prior.push_back(1.); sumprior += 1.; is_nonzero_pdf = true; } // For priors on deviations from 0 psi angles in multi-body refinement if (sigma_psi_from_zero > 0. && is_nonzero_pdf) { long int mypos = pointer_psi_nonzeroprior.size() - 1; // Check psi angle is within sigma_cutoff*sigma_psi_from_zero RFLOAT diff_psi = psi_angles[ipsi]; if (diff_psi > 180.) diff_psi -= 360.; else if (diff_psi < -180.) diff_psi += 360.; diff_psi = ABS(diff_psi); if (diff_psi > sigma_cutoff * sigma_psi_from_zero) { pointer_psi_nonzeroprior.pop_back(); psi_prior.pop_back(); } else { RFLOAT prior = gaussian1D(diff_psi, sigma_psi_from_zero, 0.); psi_prior[mypos] *= prior; sumprior_withsigmafromzero += psi_prior[mypos]; } } } // end for ipsi // Normalise the prior probability distribution to have sum 1 over all psi-angles for (long int ipsi = 0; ipsi < psi_prior.size(); ipsi++) { if (sigma_psi_from_zero > 0.) psi_prior[ipsi] /= sumprior_withsigmafromzero; else psi_prior[ipsi] /= sumprior; } // If there were no directions at all, just select the single nearest one: if (psi_prior.size() == 0) { if (best_ipsi < 0) REPORT_ERROR("HealpixSampling::selectOrientationsWithNonZeroPriorProbability BUG: best_ipsi < 0"); pointer_psi_nonzeroprior.push_back(best_ipsi); psi_prior.push_back(1.); } #ifdef DEBUG_SAMPLING std::cerr << " psi_angles.size()= " << psi_angles.size() << " psi_step= " << psi_step << std::endl; std::cerr << " psi_prior.size()= " << psi_prior.size() << " pointer_psi_nonzeroprior.size()= " << pointer_psi_nonzeroprior.size() << " sumprior= " << sumprior << std::endl; #endif return; } void HealpixSampling::findSymmetryMate(long int idir_, RFLOAT prior_, std::vector &pointer_dir_nonzeroprior, std::vector &directions_prior, std::vector &idir_flag) { Matrix1D my_direction, sym_direction; RFLOAT angular_sampling = DEG2RAD(360. / (6 * ROUND(std::pow(2., healpix_order)))) * 2; // Calculate the search radius // Direction for the best-matched Healpix index Euler_angles2direction(rot_angles[idir_], tilt_angles[idir_], my_direction); // Find the best symmetry mates in the HealPix library for (int i = 1; i < R_repository_relax.size(); i++) { int best_direction_index; std::vector listpix; // Array with the list of indices for the neighbors RFLOAT alpha; // For Rot RFLOAT beta; // For Theta sym_direction = L_repository_relax[i] * (my_direction.transpose() * R_repository_relax[i]).transpose(); Euler_direction2angles(sym_direction, alpha, beta); alpha = DEG2RAD(alpha); beta = DEG2RAD(beta); pointing prior_direction_pointing(beta, alpha); // Object required by healpix function healpix_base.query_disc(prior_direction_pointing, angular_sampling, listpix); // Search healpix for closest indices best_direction_index = listpix[0]; // If there are more than one neighbors then select the best if (listpix.size() > 1) { Matrix1D current_direction; Euler_angles2direction(rot_angles[best_direction_index], tilt_angles[best_direction_index], current_direction); RFLOAT best_dotProduct = dotProduct(sym_direction, current_direction); for (long int j = 1; j < listpix.size(); j++) { int current_index = listpix[j]; // Assuming sigma_tilt and sigma_rot are set // Get the current direction Euler_angles2direction(rot_angles[current_index], tilt_angles[current_index], current_direction); RFLOAT my_dotProduct = dotProduct(sym_direction, current_direction); if (my_dotProduct > best_dotProduct && idir_flag[current_index] != true) { best_direction_index = current_index; best_dotProduct = my_dotProduct; } } } // Now we have the best symmetry mate index pointer_dir_nonzeroprior.push_back(best_direction_index); directions_prior.push_back(prior_); idir_flag[best_direction_index] = true; } } void HealpixSampling::selectOrientationsWithNonZeroPriorProbabilityFor3DHelicalReconstruction( RFLOAT prior_rot, RFLOAT prior_tilt, RFLOAT prior_psi, RFLOAT sigma_rot, RFLOAT sigma_tilt, RFLOAT sigma_psi, std::vector &pointer_dir_nonzeroprior, std::vector &directions_prior, std::vector &pointer_psi_nonzeroprior, std::vector &psi_prior, bool do_auto_refine_local_searches, RFLOAT prior_psi_flip_ratio, RFLOAT prior_rot_flip_ratio, // KThurber RFLOAT sigma_cutoff) { // Helical references are always along Z axis in 3D helical reconstructions // Therefore tilt priors are usually not far from 90 degrees // Tilt~0 problem: if tilt=0 or 180 degrees, a change in rot is equal to some changes in psi // If tilt priors are estimated around 0 or 180 degrees for a segment, this segment must be rubbish // So tilt~0 problem does not impact helical reconstruction // Because if user provides this rubbish segment, just let it have any orientations and it will give out rubbish // Just throw a warning message for this rubbish segment // If tilt~0 problem is ignored, I can implement 2D Gaussian priors for rot-tilt pairs // This can be more accurate than previous implementation // It also saves time when sigma_rot is much larger than sigma_tilt (during local searches in 3D auto-refine) RFLOAT prior_psi_flip_ratio_thres_min = 0.01; RFLOAT prior_rot_flip_ratio_thres_min = 0.01; // KThurber pointer_dir_nonzeroprior.clear(); directions_prior.clear(); if (is_3D) { // If tilt prior is less than 20 or larger than 160 degrees, print a warning message //if (fabs(((prior_tilt / 180.) - ROUND(prior_tilt / 180.)) * 180.) < 20.) //{ // std::cerr << " WARNING: A helical segment is found with tilt prior= " << prior_tilt // << " degrees. It will probably impact searches of orientations in 3D helical reconstruction."<< std::endl; //} // Loop over all directions RFLOAT sumprior = 0.; // Keep track of the closest distance to prevent 0 orientations RFLOAT best_ang = 9999.; long int best_idir = -999; for (long int idir = 0; idir < rot_angles.size(); idir++) { // Any prior involving BOTH rot and tilt. if ( (sigma_rot > 0.) && (sigma_tilt > 0.) ) { Matrix1D prior_direction, my_direction, sym_direction, best_direction; // Get the direction of the prior Euler_angles2direction(prior_rot, prior_tilt, prior_direction); // Get the current direction in the loop Euler_angles2direction(rot_angles[idir], tilt_angles[idir], my_direction); // Loop over all symmetry operators to find the operator that brings this direction nearest to the prior RFLOAT best_dotProduct = dotProduct(prior_direction, my_direction); best_direction = my_direction; for (int j = 0; j < R_repository.size(); j++) { sym_direction = L_repository[j] * (my_direction.transpose() * R_repository[j]).transpose(); RFLOAT my_dotProduct = dotProduct(prior_direction, sym_direction); if (my_dotProduct > best_dotProduct) { best_direction = sym_direction; best_dotProduct = my_dotProduct; } } if (!do_auto_refine_local_searches) { // Assume tilt = (0, +180) // TODO: Check if "(tilt_angles[idir] > 0.01) && (tilt_angles[idir] < 179.99)" is needed //if (prior_psi_flip_ratio > prior_psi_flip_ratio_thres_min) if (prior_psi_flip_ratio > -1.) // KThurber above line changed to primarily dummy if { Matrix1D my_direction2, sym_direction2, best_direction2; // Get the current direction in the loop Euler_angles2direction(rot_angles[idir], (180. - tilt_angles[idir]), my_direction2); // Loop over all symmetry operators to find the operator that brings this direction nearest to the prior RFLOAT best_dotProduct2 = dotProduct(prior_direction, my_direction2); best_direction2 = my_direction2; for (int j = 0; j < R_repository.size(); j++) { sym_direction2 = L_repository[j] * (my_direction2.transpose() * R_repository[j]).transpose(); RFLOAT my_dotProduct2 = dotProduct(prior_direction, sym_direction2); if (my_dotProduct2 > best_dotProduct2) { best_direction2 = sym_direction2; best_dotProduct2 = my_dotProduct2; } } if (best_dotProduct2 > best_dotProduct) { best_dotProduct = best_dotProduct2; best_direction = best_direction2; } } } // Calculate the differences from sym_rot, sym_tilt to prior_rot, prior_tilt RFLOAT sym_rot, sym_tilt, diff_rot, diff_tilt, diffang; Euler_direction2angles(best_direction, sym_rot, sym_tilt); diff_rot = ABS(sym_rot - prior_rot); if (diff_rot > 180.) diff_rot = ABS(diff_rot - 360.); // KThurber begin add bool is_rot_flipped = false; if (!do_auto_refine_local_searches) { if (diff_rot > 90.) { diff_rot = ABS(diff_rot - 180.); is_rot_flipped = true; } } // KThurber end add diff_tilt = ABS(sym_tilt - prior_tilt); if (diff_tilt > 180.) diff_tilt = ABS(diff_tilt - 360.); diffang = sqrt((diff_rot * diff_rot) + (diff_tilt * diff_tilt)); if ( (diff_rot < sigma_cutoff * sigma_rot) && (diff_tilt < sigma_cutoff * sigma_tilt) ) { // TODO!!! If tilt is zero then any rot will be OK!!!!! RFLOAT prior = gaussian2D(diff_rot, diff_tilt, sigma_rot, sigma_tilt, 0.); // KThurber begin add if (!do_auto_refine_local_searches) { if (is_rot_flipped) prior *= prior_rot_flip_ratio; else prior *= (1. - prior_rot_flip_ratio); } // KThurber end add pointer_dir_nonzeroprior.push_back(idir); directions_prior.push_back(prior); sumprior += prior; #ifdef DEBUG_HELICAL_ORIENTATIONAL_SEARCH std::cout << "rot & tilt OK, diffang = " << diffang << std::endl; std::cout << " rot, tilt = " << rot_angles[idir] << ", " << tilt_angles[idir] << std::endl; #endif } else { #ifdef DEBUG_HELICAL_ORIENTATIONAL_SEARCH std::cout << "rot & tilt FAILED, diffang = " << diffang << std::endl; std::cout << " rot, tilt = " << rot_angles[idir] << ", " << tilt_angles[idir] << std::endl; #endif } // Keep track of the nearest direction if (diffang < best_ang) { best_idir = idir; best_ang = diffang; } } else if (sigma_tilt > 0.) { Matrix1D my_direction, sym_direction; RFLOAT sym_rot, sym_tilt; // Get the current direction in the loop Euler_angles2direction(rot_angles[idir], tilt_angles[idir], my_direction); // Loop over all symmetry operators to find the operator that brings this direction nearest to the prior RFLOAT diffang = ABS(tilt_angles[idir] - prior_tilt); if (diffang > 180.) diffang = ABS(diffang - 360.); RFLOAT best_tilt = tilt_angles[idir]; RFLOAT best_diffang = diffang; for (int j = 0; j < R_repository.size(); j++) { sym_direction = L_repository[j] * (my_direction.transpose() * R_repository[j]).transpose(); Euler_direction2angles(sym_direction, sym_rot, sym_tilt); diffang = ABS(sym_tilt - prior_tilt); if (diffang > 180.) diffang = ABS(diffang - 360.); if (diffang < best_diffang) { best_diffang = diffang; best_tilt = sym_tilt; } } if (!do_auto_refine_local_searches) { // Assume tilt = (0, +180) // TODO: Check if "(tilt_angles[idir] > 0.01) && (tilt_angles[idir] < 179.99)" is needed //if (prior_psi_flip_ratio > prior_psi_flip_ratio_thres_min) if (prior_psi_flip_ratio > -1.) // KThurber above line changed to primarily dummy if { Matrix1D my_direction2, sym_direction2; // Get the current direction in the loop sym_tilt = 180. - tilt_angles[idir]; // Shaoda want the prior on tilt, centered around 90 degrees, so P(87) == P(93) Euler_angles2direction(rot_angles[idir], sym_tilt, my_direction2); // Loop over all symmetry operators to find the operator that brings this direction nearest to the prior diffang = ABS(sym_tilt - prior_tilt); if (diffang > 180.) diffang = ABS(diffang - 360.); if (diffang < best_diffang) { best_diffang = diffang; best_tilt = sym_tilt; } for (int j = 0; j < R_repository.size(); j++) { sym_direction2 = L_repository[j] * (my_direction2.transpose() * R_repository[j]).transpose(); Euler_direction2angles(sym_direction2, sym_rot, sym_tilt); diffang = ABS(sym_tilt - prior_tilt); if (diffang > 180.) diffang = ABS(diffang - 360.); if (diffang < best_diffang) { best_diffang = diffang; best_tilt = sym_tilt; } } } } // Only consider differences within sigma_cutoff * sigma_tilt if (best_diffang < sigma_cutoff * sigma_tilt) { RFLOAT prior = gaussian1D(best_diffang, sigma_tilt, 0.); pointer_dir_nonzeroprior.push_back(idir); directions_prior.push_back(prior); sumprior += prior; #ifdef DEBUG_HELICAL_ORIENTATIONAL_SEARCH std::cout << "rot & tilt OK, diffang = " << diffang << std::endl; std::cout << " rot, tilt = " << rot_angles[idir] << ", " << tilt_angles[idir] << std::endl; #endif } else { #ifdef DEBUG_HELICAL_ORIENTATIONAL_SEARCH std::cout << "rot & tilt FAILED, diffang = " << diffang << std::endl; std::cout << " rot, tilt = " << rot_angles[idir] << ", " << tilt_angles[idir] << std::endl; #endif } // Keep track of the nearest direction if (best_diffang < best_ang) { best_idir = idir; best_ang = diffang; } } else if (sigma_rot > 0.) { REPORT_ERROR("healpix_sampling.cpp::selectOrientationsWithNonZeroPriorProbabilityFor3DHelicalReconstruction() BUG: Rot but not tilt prior exists! It is not reasonable for 3D helical reconstruction!"); } else { // If no prior on the directions: just add all of them pointer_dir_nonzeroprior.push_back(idir); directions_prior.push_back(1.); sumprior += 1.; } } // end for idir //Normalise the prior probability distribution to have sum 1 over all psi-angles for (long int idir = 0; idir < directions_prior.size(); idir++) directions_prior[idir] /= sumprior; // If there were no directions at all, just select the single nearest one: if (directions_prior.size() == 0) { if (best_idir < 0) REPORT_ERROR("HealpixSampling::selectOrientationsWithNonZeroPriorProbability BUG: best_idir < 0"); pointer_dir_nonzeroprior.push_back(best_idir); directions_prior.push_back(1.); } #ifdef DEBUG_SAMPLING writeNonZeroPriorOrientationsToBild("orients_local.bild", prior_rot, prior_tilt, pointer_dir_nonzeroprior, "0 0 1", 0.023); std::cerr << " directions_prior.size()= " << directions_prior.size() << " pointer_dir_nonzeroprior.size()= " << pointer_dir_nonzeroprior.size() << std::endl; std::cerr << " sumprior= " << sumprior << std::endl; char c; std::cerr << "Written orients_local.bild for prior on angles ("<> c; #endif } else { pointer_dir_nonzeroprior.push_back(0); directions_prior.push_back(1.); } // Psi-angles pointer_psi_nonzeroprior.clear(); psi_prior.clear(); RFLOAT sumprior = 0.; RFLOAT best_diff = 9999.; long int best_ipsi = -999; bool is_psi_flipped = false; for (long int ipsi = 0; ipsi < psi_angles.size(); ipsi++) { if (sigma_psi > 0.) { RFLOAT diffpsi = ABS(psi_angles[ipsi] - prior_psi); if (diffpsi > 180.) diffpsi = ABS(diffpsi - 360.); if (!do_auto_refine_local_searches) { if ( (prior_psi_flip_ratio > prior_psi_flip_ratio_thres_min) && (diffpsi > 90.)) { diffpsi = ABS(diffpsi - 180.); is_psi_flipped = true; } } // Only consider differences within sigma_cutoff * sigma_psi if (diffpsi < sigma_cutoff * sigma_psi) { RFLOAT prior = gaussian1D(diffpsi, sigma_psi, 0.); if (!do_auto_refine_local_searches) { if (is_psi_flipped) prior *= prior_psi_flip_ratio; else prior *= (1. - prior_psi_flip_ratio); } pointer_psi_nonzeroprior.push_back(ipsi); psi_prior.push_back(prior); sumprior += prior; #ifdef DEBUG_HELICAL_ORIENTATIONAL_SEARCH std::cout << "psi OK, diffang = " << diffpsi << std::endl; std::cout << " psi = " << psi_angles[ipsi] << std::endl; #endif // TMP DEBUGGING if (prior == 0.) { std::cerr << " psi_angles[ipsi]= " << psi_angles[ipsi] << " prior_psi= " << prior_psi << std::endl; std::cerr << " diffpsi= " << diffpsi << " sigma_cutoff= " << sigma_cutoff << " sigma_psi= " << sigma_psi << std::endl; REPORT_ERROR("prior on psi is zero!"); } } else { #ifdef DEBUG_HELICAL_ORIENTATIONAL_SEARCH std::cout << "psi FAILED, diffang = " << diffpsi << std::endl; std::cout << " psi = " << psi_angles[ipsi] << std::endl; #endif } // Keep track of the nearest sampling point if (diffpsi < best_diff) { best_ipsi = ipsi; best_diff = diffpsi; } } else { pointer_psi_nonzeroprior.push_back(ipsi); psi_prior.push_back(1.); sumprior += 1.; } } // Normalise the prior probability distribution to have sum 1 over all psi-angles for (long int ipsi = 0; ipsi < psi_prior.size(); ipsi++) psi_prior[ipsi] /= sumprior; // If there were no directions at all, just select the single nearest one: if (psi_prior.size() == 0) { if (best_ipsi < 0) REPORT_ERROR("HealpixSampling::selectOrientationsWithNonZeroPriorProbability BUG: best_ipsi < 0"); pointer_psi_nonzeroprior.push_back(best_ipsi); psi_prior.push_back(1.); } #ifdef DEBUG_SAMPLING std::cerr << " psi_angles.size()= " << psi_angles.size() << " psi_step= " << psi_step << std::endl; std::cerr << " psi_prior.size()= " << psi_prior.size() << " pointer_psi_nonzeroprior.size()= " << pointer_psi_nonzeroprior.size() << " sumprior= " << sumprior << std::endl; #endif } FileName HealpixSampling::symmetryGroup() { return fn_sym; } long int HealpixSampling::getHealPixIndex(long int idir) { #ifdef DEBUG_CHECKSIZES if (idir >= directions_ipix.size()) { std::cerr<< "idir= "<= directions_ipix.size()"); } #endif return directions_ipix[idir]; } void HealpixSampling::checkDirection(RFLOAT &rot, RFLOAT &tilt) { // The geometrical considerations about the symmetry below require that rot = [-180,180] and tilt [0,180] // The following was incorrect?! if (tilt < 0.) { tilt = -tilt; rot += 180.; } bool is_ok = false; while (!is_ok) { if (rot > 180.) rot -= 360.; else if (rot < -180.) rot += 360.; else is_ok = true; } } void HealpixSampling::getDirectionFromHealPix(long int ipix, RFLOAT &rot, RFLOAT &tilt) { // this one always has to be double (also for SINGLE_PRECISION CALCULATIONS) for call to external library double zz, phi; healpix_base.pix2ang_z_phi(ipix, zz, phi); rot = RAD2DEG(phi); tilt = ACOSD(zz); // The geometrical considerations about the symmetry below require that rot = [-180,180] and tilt [0,180] checkDirection(rot, tilt); } RFLOAT HealpixSampling::getTranslationalSampling(int adaptive_oversampling) { return offset_step / std::pow(2., adaptive_oversampling); } RFLOAT HealpixSampling::getHelicalTranslationalSampling(int adaptive_oversampling) { return helical_offset_step / std::pow(2., adaptive_oversampling); } RFLOAT HealpixSampling::getAngularSampling(int adaptive_oversampling) { if (is_3D) { int order = healpix_order + adaptive_oversampling; return 360. / (6 * ROUND(std::pow(2., order))); } else return psi_step / std::pow(2., adaptive_oversampling); } long int HealpixSampling::NrDirections(int oversampling_order, const std::vector *pointer_dir_nonzeroprior) { long int mysize = (pointer_dir_nonzeroprior != NULL && (*pointer_dir_nonzeroprior).size() > 0) ? (*pointer_dir_nonzeroprior).size() : rot_angles.size(); if (oversampling_order == 0) return mysize; else return ROUND(std::pow(2., oversampling_order * 2)) * mysize; } long int HealpixSampling::NrPsiSamplings(int oversampling_order, const std::vector *pointer_psi_nonzeroprior) { long int mysize = (pointer_psi_nonzeroprior != NULL && (*pointer_psi_nonzeroprior).size() > 0) ? (*pointer_psi_nonzeroprior).size() : psi_angles.size(); if (oversampling_order == 0) return mysize; else return ROUND(std::pow(2., oversampling_order)) * mysize; } long int HealpixSampling::NrTranslationalSamplings(int oversampling_order) { if (oversampling_order == 0) return translations_x.size(); else { if (is_3d_trans) return ROUND(std::pow(2., oversampling_order * 3)) * translations_x.size(); else return ROUND(std::pow(2., oversampling_order * 2)) * translations_x.size(); } } long int HealpixSampling::NrSamplingPoints(int oversampling_order, const std::vector *pointer_dir_nonzeroprior, const std::vector *pointer_psi_nonzeroprior) { return NrDirections(oversampling_order, pointer_dir_nonzeroprior) * NrPsiSamplings(oversampling_order, pointer_psi_nonzeroprior) * NrTranslationalSamplings(oversampling_order); } /* How often is each orientation oversampled? */ int HealpixSampling::oversamplingFactorOrientations(int oversampling_order) { if (is_3D) return ROUND(std::pow(2., oversampling_order * 3)); else return ROUND(std::pow(2., oversampling_order)); } /* How often is each translation oversampled? */ int HealpixSampling::oversamplingFactorTranslations(int oversampling_order) { if (is_3d_trans) return ROUND(std::pow(2., oversampling_order * 3)); else return ROUND(std::pow(2., oversampling_order * 2)); } void HealpixSampling::getDirection(long int idir, RFLOAT &rot, RFLOAT &tilt) { #ifdef DEBUG_CHECKSIZES if (idir >= rot_angles.size()) { std::cerr<< "idir= "<= rot_angles.size()"); } #endif rot = rot_angles[idir]; tilt = tilt_angles[idir]; } void HealpixSampling::getPsiAngle(long int ipsi, RFLOAT &psi) { #ifdef DEBUG_CHECKSIZES if (ipsi >= psi_angles.size()) { std::cerr<< "ipsi= "<= psi_angles.size()"); } #endif psi = psi_angles[ipsi]; } void HealpixSampling::getTranslationInPixel(long int itrans, RFLOAT my_pixel_size, RFLOAT &trans_x, RFLOAT &trans_y, RFLOAT &trans_z) { #ifdef DEBUG_CHECKSIZES if (itrans >= translations_x.size()) { std::cerr<< "itrans= "<= translations_x.size()"); } #endif trans_x = translations_x[itrans] / my_pixel_size; trans_y = translations_y[itrans] / my_pixel_size; if (is_3d_trans) trans_z = translations_z[itrans] / my_pixel_size; } long int HealpixSampling::getPositionSamplingPoint(int iclass, long int idir, long int ipsi, long int itrans) { return iclass * rot_angles.size() * psi_angles.size() * translations_x.size() + idir * psi_angles.size() * translations_x.size() + ipsi * translations_x.size() + itrans; } long int HealpixSampling::getPositionOversampledSamplingPoint(long int ipos, int oversampling_order, int iover_rot, int iover_trans) { if (oversampling_order == 0) return ipos; else { int nr_over_orient = oversamplingFactorOrientations(oversampling_order); int nr_over_trans = oversamplingFactorTranslations(oversampling_order); return ipos * nr_over_orient * nr_over_trans + nr_over_trans * iover_rot + iover_trans; } } void HealpixSampling::getTranslationsInPixel(long int itrans, int oversampling_order, RFLOAT my_pixel_size, std::vector &my_translations_x, std::vector &my_translations_y, std::vector &my_translations_z, bool do_helical_refine) { #ifdef DEBUG_CHECKSIZES if (itrans >= translations_x.size()) { std::cerr<< "itrans= "<= translations_x.size()"); } #endif my_translations_x.clear(); my_translations_y.clear(); my_translations_z.clear(); if (oversampling_order == 0) { my_translations_x.push_back(translations_x[itrans] / my_pixel_size); my_translations_y.push_back(translations_y[itrans] / my_pixel_size); if (is_3d_trans) my_translations_z.push_back(translations_z[itrans] / my_pixel_size); } else { int nr_oversamples = ROUND(std::pow(2., oversampling_order)); // DEBUG if ( (nr_oversamples < 1) ) { std::cerr << "oversampling_order= " << oversampling_order << " nr_oversamples= " << nr_oversamples << std::endl; REPORT_ERROR("HealpixSampling::getTranslations BUG %% 'nr_oversamples' should be a positive integer!"); } // Helical refinement RFLOAT h_step = offset_step; if (do_helical_refine) { h_step = helical_offset_step; } if (h_step < 0.) { std::cerr << "helical_offset_step (h_step)= " << h_step << std::endl; REPORT_ERROR("HealpixSampling::getTranslations BUG %% 'helical_offset_step (h_step)' should be positive!"); } RFLOAT over_xoff = 0., over_yoff = 0., over_zoff = 0.; for (int itrans_overx = 0; itrans_overx < nr_oversamples; itrans_overx++) { if ( (do_helical_refine) && (!is_3d_trans) ) // Helical reconstruction with 2D segments over_xoff = translations_x[itrans] - 0.5 * h_step + (0.5 + itrans_overx) * h_step / nr_oversamples; else over_xoff = translations_x[itrans] - 0.5 * offset_step + (0.5 + itrans_overx) * offset_step / nr_oversamples; for (int itrans_overy = 0; itrans_overy < nr_oversamples; itrans_overy++) { over_yoff = translations_y[itrans] - 0.5 * offset_step + (0.5 + itrans_overy) * offset_step / nr_oversamples; if (is_3d_trans) { for (int itrans_overz = 0; itrans_overz < nr_oversamples; itrans_overz++) { if (do_helical_refine) // Helical reconstruction in 3D subtomogram averaging over_zoff = translations_z[itrans] - 0.5 * h_step + (0.5 + itrans_overz) * h_step / nr_oversamples; else over_zoff = translations_z[itrans] - 0.5 * offset_step + (0.5 + itrans_overz) * offset_step / nr_oversamples; my_translations_x.push_back(over_xoff / my_pixel_size); my_translations_y.push_back(over_yoff / my_pixel_size); my_translations_z.push_back(over_zoff / my_pixel_size); } } else { my_translations_x.push_back(over_xoff / my_pixel_size); my_translations_y.push_back(over_yoff / my_pixel_size); } } } // This could rarely happen. Just in case all over-sampled xoff are excluded in helical refinement... // AVOID THIS BY CHOOSING AN INITIAL ANGULAR SAMPLING FINER THAN HALF OF THE TWIST !!!!!! if ( (do_helical_refine) && (my_translations_x.size() < 1) ) { my_translations_x.push_back(translations_x[itrans] / my_pixel_size); my_translations_y.push_back(translations_y[itrans] / my_pixel_size); if (is_3d_trans) my_translations_z.push_back(translations_z[itrans] / my_pixel_size); } } if (ABS(random_perturbation) > 0.) { RFLOAT myperturb = random_perturbation * offset_step / my_pixel_size; // Oct31,2015 - Shaoda - TODO: Please consider this!!! RFLOAT myperturb_helical = random_perturbation * helical_offset_step / my_pixel_size; for (int iover = 0; iover < my_translations_x.size(); iover++) { // If doing helical refinement, DONT put perturbation onto translations along helical axis??? if ( (do_helical_refine) && (!is_3d_trans) ) // Helical reconstruction with 2D segments my_translations_x[iover] += myperturb_helical; else my_translations_x[iover] += myperturb; my_translations_y[iover] += myperturb; if (is_3d_trans) { if (do_helical_refine) my_translations_z[iover] += myperturb_helical; // Helical reconstruction in 3D subtomogram averaging else my_translations_z[iover] += myperturb; } } } } void HealpixSampling::getOrientations(long int idir, long int ipsi, int oversampling_order, std::vector &my_rot, std::vector &my_tilt, std::vector &my_psi, std::vector &pointer_dir_nonzeroprior, std::vector &directions_prior, std::vector &pointer_psi_nonzeroprior, std::vector &psi_prior) { my_rot.clear(); my_tilt.clear(); my_psi.clear(); long int my_idir, my_ipsi; if (pointer_dir_nonzeroprior.size() > idir && pointer_psi_nonzeroprior.size() > ipsi) { // nonzeroprior vectors have been initialised, so use priors! my_idir = pointer_dir_nonzeroprior[idir]; my_ipsi = pointer_psi_nonzeroprior[ipsi]; } else { // no priors my_idir = idir; my_ipsi = ipsi; } #ifdef DEBUG_CHECKSIZES if (my_idir >= rot_angles.size()) { std::cerr<< "my_idir= "<= rot_angles.size()"); } if (my_ipsi >= psi_angles.size()) { std::cerr<< "my_ipsi= "<= psi_angles.size()"); } #endif if (oversampling_order == 0) { my_rot.push_back(rot_angles[my_idir]); my_tilt.push_back(tilt_angles[my_idir]); my_psi.push_back(psi_angles[my_ipsi]); } else if (!is_3D) { // for 2D sampling, only push back oversampled psi rotations pushbackOversampledPsiAngles(my_ipsi, oversampling_order, 0., 0., my_rot, my_tilt, my_psi); } else { // Set up oversampled grid for 3D sampling Healpix_Base HealPixOver(oversampling_order + healpix_order, NEST); int fact = HealPixOver.Nside()/healpix_base.Nside(); int x, y, face; RFLOAT rot, tilt; // Get x, y and face for the original, coarse grid long int ipix = directions_ipix[my_idir]; healpix_base.nest2xyf(ipix, x, y, face); // Loop over the oversampled Healpix pixels on the fine grid for (int j = fact * y; j < fact * (y+1); ++j) { for (int i = fact * x; i < fact * (x+1); ++i) { long int overpix = HealPixOver.xyf2nest(i, j, face); // this one always has to be double (also for SINGLE_PRECISION CALCULATIONS) for call to external library double zz, phi; HealPixOver.pix2ang_z_phi(overpix, zz, phi); rot = RAD2DEG(phi); tilt = ACOSD(zz); // The geometrical considerations about the symmetry below require that rot = [-180,180] and tilt [0,180] checkDirection(rot, tilt); pushbackOversampledPsiAngles(my_ipsi, oversampling_order, rot, tilt, my_rot, my_tilt, my_psi); } } } // Random perturbation if (ABS(random_perturbation) > 0.) { RFLOAT myperturb = random_perturbation * getAngularSampling(); for (int iover = 0; iover < my_rot.size(); iover++) { if (is_3D) { Matrix2D A(3,3), R(3,3); Euler_angles2matrix(my_rot[iover], my_tilt[iover], my_psi[iover], A); Euler_angles2matrix(myperturb, myperturb, myperturb, R); A = A * R; Euler_matrix2angles(A, my_rot[iover], my_tilt[iover], my_psi[iover]); } else { my_psi[iover] += myperturb; } } } } void HealpixSampling::pushbackOversampledPsiAngles(long int ipsi, int oversampling_order, RFLOAT rot, RFLOAT tilt, std::vector &oversampled_rot, std::vector &oversampled_tilt, std::vector &oversampled_psi) { if (oversampling_order == 0) { oversampled_rot.push_back(rot); oversampled_tilt.push_back(tilt); oversampled_psi.push_back(psi_angles[ipsi]); } else { int nr_ipsi_over = ROUND(std::pow(2., oversampling_order)); for (int ipsi_over = 0; ipsi_over < nr_ipsi_over; ipsi_over++) { RFLOAT overpsi = psi_angles[ipsi] - 0.5 * psi_step + (0.5 + ipsi_over) * psi_step / nr_ipsi_over; oversampled_rot.push_back(rot); oversampled_tilt.push_back(tilt); if (!is_3D && overpsi>180.) overpsi-=360.; oversampled_psi.push_back(overpsi); } } } /* Calculate an angular distance between two sets of Euler angles */ RFLOAT HealpixSampling::calculateAngularDistance(RFLOAT rot1, RFLOAT tilt1, RFLOAT psi1, RFLOAT rot2, RFLOAT tilt2, RFLOAT psi2) { if (is_3D) { Matrix1D direction1(3), direction1p(3), direction2(3); Euler_angles2direction(rot1, tilt1, direction1); Euler_angles2direction(rot2, tilt2, direction2); // Find the symmetry operation where the Distance based on Euler axes is minimal RFLOAT min_axes_dist = 3600.; RFLOAT rot2p, tilt2p, psi2p; Matrix2D E1, E2; Matrix1D v1, v2; for (int j = 0; j < R_repository.size(); j++) { Euler_apply_transf(L_repository[j], R_repository[j], rot2, tilt2, psi2, rot2p, tilt2p, psi2p); // Distance based on Euler axes Euler_angles2matrix(rot1, tilt1, psi1, E1); Euler_angles2matrix(rot2p, tilt2p, psi2p, E2); RFLOAT axes_dist = 0; for (int i = 0; i < 3; i++) { E1.getRow(i, v1); E2.getRow(i, v2); axes_dist += ACOSD(CLIP(dotProduct(v1, v2), -1., 1.)); } axes_dist /= 3.; if (axes_dist < min_axes_dist) min_axes_dist = axes_dist; }// for all symmetry operations j return min_axes_dist; } else { RFLOAT diff = ABS(psi2 - psi1); return realWRAP(diff, 0., 360.); } } void HealpixSampling::writeBildFileOrientationalDistribution(MultidimArray &pdf_direction, FileName &fn_bild, RFLOAT R, RFLOAT offset, const Matrix2D *Aorient, const Matrix1D *Acom, RFLOAT Rmax_frac, RFLOAT width_frac) { if (!is_3D) return; if (XSIZE(pdf_direction) != rot_angles.size()) { std::cerr << " XSIZE(pdf_direction)= " << XSIZE(pdf_direction) << " rot_angles.size()= " << rot_angles.size() << std::endl; REPORT_ERROR("HealpixSampling::writeBildFileOrientationalDistribution XSIZE(pdf_direction) != rot_angles.size()!"); } RFLOAT pdfmax, pdfmin, pdfmean, pdfsigma; pdf_direction.computeStats(pdfmean, pdfsigma, pdfmin, pdfmax); std::ofstream fh_bild; fh_bild.open(fn_bild.c_str(), std::ios::out); if (!fh_bild) REPORT_ERROR("HealpixSampling::writeBildFileOrientationalDistribution: cannot open " + fn_bild); // 2 * PI * R = 360 degrees, 2*radius should cover angular sampling at width_frac=1 RFLOAT width = width_frac * PI*R*(getAngularSampling()/360.); Matrix1D v(3); for (long int iang = 0; iang < rot_angles.size(); iang++) { RFLOAT pdf = DIRECT_A1D_ELEM(pdf_direction, iang); // Don't make a cylinder for pdf==0 if (pdf > 0.) { // Colour from blue to red according to deviations from sigma_pdf RFLOAT colscale = (pdf - pdfmean) / pdfsigma; colscale = XMIPP_MIN(colscale, 5.); colscale = XMIPP_MAX(colscale, -1.); colscale /= 6.; colscale += 1./6.; // colscale ranges from 0 (-5 sigma) to 1 (+5 sigma) // The length of the cylinder will depend on the pdf_direction RFLOAT Rp = R + Rmax_frac * R * pdf / pdfmax; Euler_angles2direction(rot_angles[iang], tilt_angles[iang], v); if (Aorient != NULL) { // In multi-body refinement, the rotations are relative to (rot,tilt)=(0,90) to prevent problems with psi-prior!!! Matrix2D A; rotation3DMatrix(90., 'Y', A, false); v = (*Aorient).transpose() * A * v; } Matrix1D offsetp(3); if (Acom != NULL) offsetp = *Acom; else offsetp.initZeros(); // Don't include cylinders with zero length, as chimera will complain about that.... if (ABS((R - Rp) * XX(v)) > 0.01 || ABS((R - Rp) * YY(v)) > 0.01 || ABS((R - Rp) * ZZ(v)) > 0.01) { // The width of the cylinders will be determined by the sampling: fh_bild << ".color " << colscale << " 0 " << 1. - colscale << std::endl; fh_bild << ".cylinder " << R * XX(v) + offset + XX(offsetp) << " " << R * YY(v) + offset + YY(offsetp) << " " << R * ZZ(v) + offset + ZZ(offsetp) << " " << Rp * XX(v) + offset + XX(offsetp) << " " << Rp * YY(v) + offset + YY(offsetp) << " " << Rp * ZZ(v) + offset + ZZ(offsetp) << " " << width <<"\n"; } } } // Close and write file to disc fh_bild.close(); } ///////// PRIVATE STUFF void HealpixSampling::removePointsOutsideLimitedTiltAngles() { if (ABS(limit_tilt) < 90.) { std::vector pruned_rot_angles; std::vector pruned_tilt_angles; std::vector pruned_directions_ipix; pruned_rot_angles.clear(); pruned_tilt_angles.clear(); pruned_directions_ipix.clear(); for (long int i = 0; i < tilt_angles.size(); i++) { RFLOAT tilt = tilt_angles[i]; // Let tilt angle range from -90 to 90. if (tilt > 90.) tilt -= 180.; // Keep side views || keep top views if ( (limit_tilt > 0. && ABS(tilt) >= ABS(limit_tilt)) || (limit_tilt < 0. && ABS(tilt) <= ABS(limit_tilt)) ) { pruned_rot_angles.push_back(rot_angles[i]); pruned_tilt_angles.push_back(tilt_angles[i]); pruned_directions_ipix.push_back(directions_ipix[i]); } } rot_angles = pruned_rot_angles; tilt_angles = pruned_tilt_angles; directions_ipix = pruned_directions_ipix; } } // The way symmetry is handled was copied from Xmipp. // The original disclaimer is copied below /*************************************************************************** * * Authors: Roberto Marabini * * Unidad de Bioinformatica of Centro Nacional de Biotecnologia , CSIC * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA * 02111-1307 USA * * All comments concerning this program package may be sent to the * e-mail address 'xmipp@cnb.csic.es' ***************************************************************************/ void HealpixSampling::removeSymmetryEquivalentPoints(RFLOAT max_ang) { // Maximum distance RFLOAT cos_max_ang = cos(DEG2RAD(max_ang)); RFLOAT my_dotProduct; Matrix1D direction(3), direction1(3); std::vector > directions_vector; // Calculate all vectors and fill directions_vector for (long int i = 0; i < rot_angles.size(); i++) { Euler_angles2direction(rot_angles[i], tilt_angles[i], direction); directions_vector.push_back(direction); } // First call to conventional remove_redundant_points removeSymmetryEquivalentPointsGeometric(pgGroup, pgOrder, directions_vector); #ifdef DEBUG_SAMPLING writeAllOrientationsToBild("orients_sym0.bild", "0 1 0", 0.021); #endif // Only correct the seams (i.e. the borders of the asymmetric units) for small numbers of directions // For large numbers, the sampling is very fine and the probability distributions are probably delta functions anyway // Large numbers take long times to calculate... // Only a small fraction of the points at the border of the AU is thrown away anyway... if (rot_angles.size() < 4000) { // Create no_redundant vectors std::vector > no_redundant_directions_vector; std::vector no_redundant_rot_angles; std::vector no_redundant_tilt_angles; std::vector no_redundant_directions_ipix; // Then check all points versus each other for (long int i = 0; i < rot_angles.size(); i++) { direction1=directions_vector[i]; bool uniq = true; //for (long int k = 0; k < no_redundant_directions_vector.size(); k++) // i is probably closer to latest additions: loop backwards over k.... for (long int k = no_redundant_directions_vector.size() -1; k >= 0; k--) { for (int j = 0; j < R_repository.size(); j++) { direction = L_repository[j] * (no_redundant_directions_vector[k].transpose() * R_repository[j]).transpose(); //Calculate distance my_dotProduct = dotProduct(direction,direction1); if (my_dotProduct > cos_max_ang) { uniq = false; break; } }// for j if (!uniq) break; } // for k if (uniq) { no_redundant_directions_vector.push_back(directions_vector[i]); no_redundant_rot_angles.push_back(rot_angles[i]); no_redundant_tilt_angles.push_back(tilt_angles[i]); no_redundant_directions_ipix.push_back(directions_ipix[i]); } } // for i // Now overwrite the rot/tilt_angles and directions_vectors with their no_redundant counterparts rot_angles = no_redundant_rot_angles; tilt_angles = no_redundant_tilt_angles; directions_ipix = no_redundant_directions_ipix; } } void HealpixSampling::removeSymmetryEquivalentPointsGeometric(const int symmetry, int sym_order, std::vector > &directions_vector) { Matrix2D L(4, 4), R(4, 4); Matrix2D aux(3, 3); Matrix1D row1(3), row2(3), row(3); std::vector > no_redundant_directions_vector; std::vector no_redundant_rot_angles; std::vector no_redundant_tilt_angles; std::vector no_redundant_directions_ipix; RFLOAT my_dotProduct; if (symmetry == pg_CN) {//OK for (long int i = 0; i < rot_angles.size(); i++) { if (rot_angles[i] >= (-180. / sym_order) && rot_angles[i] <= (180. / sym_order)) { no_redundant_rot_angles.push_back(rot_angles[i]); no_redundant_tilt_angles.push_back(tilt_angles[i]); no_redundant_directions_vector.push_back(directions_vector[i]); no_redundant_directions_ipix.push_back(directions_ipix[i]); } }// for i } else if (symmetry == pg_CI || symmetry == pg_CS ) {//OK for (long int i = 0; i < rot_angles.size(); i++) { if (tilt_angles[i] <= 90) { no_redundant_rot_angles.push_back(rot_angles[i]); no_redundant_tilt_angles.push_back(tilt_angles[i]); no_redundant_directions_vector.push_back(directions_vector[i]); no_redundant_directions_ipix.push_back(directions_ipix[i]); } }// for i } else if (symmetry == pg_CNV ) {//OK for (long int i = 0; i < rot_angles.size(); i++) { if (rot_angles[i] >= 0. / sym_order && rot_angles[i] <= 180. / sym_order) { no_redundant_rot_angles.push_back(rot_angles[i]); no_redundant_tilt_angles.push_back(tilt_angles[i]); no_redundant_directions_vector.push_back(directions_vector[i]); no_redundant_directions_ipix.push_back(directions_ipix[i]); } }// for i } else if (symmetry == pg_CNH ) {//OK for (long int i = 0; i < rot_angles.size(); i++) { if (rot_angles[i] >= -180. / sym_order && rot_angles[i] <= 180. / sym_order && tilt_angles[i] <= 90. ) { no_redundant_rot_angles.push_back(rot_angles[i]); no_redundant_tilt_angles.push_back(tilt_angles[i]); no_redundant_directions_vector.push_back(directions_vector[i]); no_redundant_directions_ipix.push_back(directions_ipix[i]); } }// for i } else if (symmetry == pg_SN ) {//OK for (long int i = 0; i < rot_angles.size(); i++) { if (rot_angles[i] >= -180.*2. / sym_order && rot_angles[i] <= 180.*2. / sym_order && tilt_angles[i] <= 90. ) { no_redundant_rot_angles.push_back(rot_angles[i]); no_redundant_tilt_angles.push_back(tilt_angles[i]); no_redundant_directions_vector.push_back(directions_vector[i]); no_redundant_directions_ipix.push_back(directions_ipix[i]); } }// for i } else if (symmetry == pg_DN ) { for (long int i = 0; i < rot_angles.size(); i++) { if (sym_order == 1) { // D1 is special! if (tilt_angles[i] <= 90.) { no_redundant_rot_angles.push_back(rot_angles[i]); no_redundant_tilt_angles.push_back(tilt_angles[i]); no_redundant_directions_vector.push_back(directions_vector[i]); no_redundant_directions_ipix.push_back(directions_ipix[i]); } } else { if (rot_angles[i] >= -180. / (sym_order) + 90. && rot_angles[i] <= 180. / (sym_order) + 90. && tilt_angles[i] <= 90. ) { no_redundant_rot_angles.push_back(rot_angles[i]); no_redundant_tilt_angles.push_back(tilt_angles[i]); no_redundant_directions_vector.push_back(directions_vector[i]); no_redundant_directions_ipix.push_back(directions_ipix[i]); } } }// for i } else if (symmetry == pg_DNV ) { for (long int i = 0; i < rot_angles.size(); i++) { if (rot_angles[i] >= 90. && rot_angles[i] <= 180. / (sym_order) + 90. && tilt_angles[i] <= 90. ) { no_redundant_rot_angles.push_back(rot_angles[i]); no_redundant_tilt_angles.push_back(tilt_angles[i]); no_redundant_directions_vector.push_back(directions_vector[i]); no_redundant_directions_ipix.push_back(directions_ipix[i]); } }// for i } else if (symmetry == pg_DNH ) { for (long int i = 0; i < rot_angles.size(); i++) { if (rot_angles[i] >= 90. && rot_angles[i] <= 180. / (sym_order) + 90. && tilt_angles[i] <= 90. ) { no_redundant_rot_angles.push_back(rot_angles[i]); no_redundant_tilt_angles.push_back(tilt_angles[i]); no_redundant_directions_vector.push_back(directions_vector[i]); no_redundant_directions_ipix.push_back(directions_ipix[i]); } }// for i } else if (symmetry == pg_T ) {//OK Matrix1D _3_fold_axis_1_by_3_fold_axis_2(3); _3_fold_axis_1_by_3_fold_axis_2 = vectorR3(-0.942809, 0., 0.); _3_fold_axis_1_by_3_fold_axis_2.selfNormalize(); Matrix1D _3_fold_axis_2_by_3_fold_axis_3(3); _3_fold_axis_2_by_3_fold_axis_3 = vectorR3(0.471405, 0.272165, 0.7698); _3_fold_axis_2_by_3_fold_axis_3.selfNormalize(); Matrix1D _3_fold_axis_3_by_3_fold_axis_1(3); _3_fold_axis_3_by_3_fold_axis_1 = vectorR3(0.471404, 0.816497, 0.); _3_fold_axis_3_by_3_fold_axis_1.selfNormalize(); for (long int i = 0; i < rot_angles.size(); i++) { if (rot_angles[i] >= 90. && rot_angles[i] <= 150. || rot_angles[i] == 0 ) if ( dotProduct(directions_vector[i], _3_fold_axis_1_by_3_fold_axis_2) >= 0 && dotProduct(directions_vector[i], _3_fold_axis_2_by_3_fold_axis_3) >= 0 && dotProduct(directions_vector[i], _3_fold_axis_3_by_3_fold_axis_1) >= 0 ) { no_redundant_rot_angles.push_back(rot_angles[i]); no_redundant_tilt_angles.push_back(tilt_angles[i]); no_redundant_directions_vector.push_back(directions_vector[i]); no_redundant_directions_ipix.push_back(directions_ipix[i]); } }// for i } else if (symmetry == pg_TD ) {//OK Matrix1D _2_fold_axis_1_by_3_fold_axis_2(3); _2_fold_axis_1_by_3_fold_axis_2 = vectorR3(-0.942809, 0., 0.); _2_fold_axis_1_by_3_fold_axis_2.selfNormalize(); Matrix1D _3_fold_axis_2_by_3_fold_axis_5(3); _3_fold_axis_2_by_3_fold_axis_5 = vectorR3(0.471405, 0.272165, 0.7698); _3_fold_axis_2_by_3_fold_axis_5.selfNormalize(); Matrix1D _3_fold_axis_5_by_2_fold_axis_1(3); _3_fold_axis_5_by_2_fold_axis_1 = vectorR3(0., 0.471405, -0.666667); _3_fold_axis_5_by_2_fold_axis_1.selfNormalize(); for (long int i = 0; i < rot_angles.size(); i++) { // if ( rot_angles[i]>= 120. && // rot_angles[i]<= 150. || // rot_angles[i]== 0 // ) if ( dotProduct(directions_vector[i], _2_fold_axis_1_by_3_fold_axis_2) >= 0 && dotProduct(directions_vector[i], _3_fold_axis_2_by_3_fold_axis_5) >= 0 && dotProduct(directions_vector[i], _3_fold_axis_5_by_2_fold_axis_1) >= 0 ) { no_redundant_rot_angles.push_back(rot_angles[i]); no_redundant_tilt_angles.push_back(tilt_angles[i]); no_redundant_directions_vector.push_back(directions_vector[i]); no_redundant_directions_ipix.push_back(directions_ipix[i]); } }// for i } else if (symmetry == pg_TH ) {//OK Matrix1D _3_fold_axis_1_by_2_fold_axis_1(3); _3_fold_axis_1_by_2_fold_axis_1 = vectorR3(-0.816496, 0., 0.); _3_fold_axis_1_by_2_fold_axis_1.selfNormalize(); Matrix1D _2_fold_axis_1_by_2_fold_axis_2(3); _2_fold_axis_1_by_2_fold_axis_2 = vectorR3(0.707107, 0.408248, -0.57735); _2_fold_axis_1_by_2_fold_axis_2.selfNormalize(); Matrix1D _2_fold_axis_2_by_3_fold_axis_1(3); _2_fold_axis_2_by_3_fold_axis_1 = vectorR3(-0.408248, -0.707107, 0.); _2_fold_axis_2_by_3_fold_axis_1.selfNormalize(); for (long int i = 0; i < rot_angles.size(); i++) { // if ( rot_angles[i]>= 120. && // rot_angles[i]<= 150. || // rot_angles[i]== 0 // ) if ( dotProduct(directions_vector[i], _3_fold_axis_1_by_2_fold_axis_1) >= 0 && dotProduct(directions_vector[i], _2_fold_axis_1_by_2_fold_axis_2) >= 0 && dotProduct(directions_vector[i], _2_fold_axis_2_by_3_fold_axis_1) >= 0 ) { no_redundant_rot_angles.push_back(rot_angles[i]); no_redundant_tilt_angles.push_back(tilt_angles[i]); no_redundant_directions_vector.push_back(directions_vector[i]); no_redundant_directions_ipix.push_back(directions_ipix[i]); } }// for i } else if (symmetry == pg_O ) {//OK Matrix1D _3_fold_axis_1_by_3_fold_axis_2(3); _3_fold_axis_1_by_3_fold_axis_2 = vectorR3(0., -1., 1.); _3_fold_axis_1_by_3_fold_axis_2.selfNormalize(); Matrix1D _3_fold_axis_2_by_4_fold_axis(3); _3_fold_axis_2_by_4_fold_axis = vectorR3(1., 1., 0.); _3_fold_axis_2_by_4_fold_axis.selfNormalize(); Matrix1D _4_fold_axis_by_3_fold_axis_1(3); _4_fold_axis_by_3_fold_axis_1 = vectorR3(-1., 1., 0.); _4_fold_axis_by_3_fold_axis_1.selfNormalize(); for (long int i = 0; i < rot_angles.size(); i++) { if ((rot_angles[i] >= 45. && rot_angles[i] <= 135. && tilt_angles[i] <= 90.) || rot_angles[i] == 0. ) if ( dotProduct(directions_vector[i], _3_fold_axis_1_by_3_fold_axis_2) >= 0 && dotProduct(directions_vector[i], _3_fold_axis_2_by_4_fold_axis) >= 0 && dotProduct(directions_vector[i], _4_fold_axis_by_3_fold_axis_1) >= 0 ) { no_redundant_rot_angles.push_back(rot_angles[i]); no_redundant_tilt_angles.push_back(tilt_angles[i]); no_redundant_directions_vector.push_back(directions_vector[i]); no_redundant_directions_ipix.push_back(directions_ipix[i]); } }// for i } else if (symmetry == pg_OH ) {//OK Matrix1D _3_fold_axis_1_by_3_fold_axis_2(3); _3_fold_axis_1_by_3_fold_axis_2 = vectorR3(0., -1., 1.); _3_fold_axis_1_by_3_fold_axis_2.selfNormalize(); Matrix1D _3_fold_axis_2_by_4_fold_axis(3); _3_fold_axis_2_by_4_fold_axis = vectorR3(1., 1., 0.); _3_fold_axis_2_by_4_fold_axis.selfNormalize(); Matrix1D _4_fold_axis_by_3_fold_axis_1(3); _4_fold_axis_by_3_fold_axis_1 = vectorR3(-1., 1., 0.); _4_fold_axis_by_3_fold_axis_1.selfNormalize(); for (long int i = 0; i < rot_angles.size(); i++) { if (rot_angles[i] >= 90. && rot_angles[i] <= 135. && tilt_angles[i] <= 90.) if ( dotProduct(directions_vector[i], _3_fold_axis_1_by_3_fold_axis_2) >= 0 && dotProduct(directions_vector[i], _3_fold_axis_2_by_4_fold_axis) >= 0 && dotProduct(directions_vector[i], _4_fold_axis_by_3_fold_axis_1) >= 0 ) { no_redundant_rot_angles.push_back(rot_angles[i]); no_redundant_tilt_angles.push_back(tilt_angles[i]); no_redundant_directions_vector.push_back(directions_vector[i]); no_redundant_directions_ipix.push_back(directions_ipix[i]); } }// for i } else if (symmetry == pg_I || symmetry == pg_I2) {//OK Matrix1D _5_fold_axis_1_by_5_fold_axis_2(3); _5_fold_axis_1_by_5_fold_axis_2 = vectorR3(0., 1., 0.); _5_fold_axis_1_by_5_fold_axis_2.selfNormalize(); Matrix1D _5_fold_axis_2_by_3_fold_axis(3); _5_fold_axis_2_by_3_fold_axis = vectorR3(-0.4999999839058737, -0.8090170074556163, 0.3090169861701543); _5_fold_axis_2_by_3_fold_axis.selfNormalize(); Matrix1D _3_fold_axis_by_5_fold_axis_1(3); _3_fold_axis_by_5_fold_axis_1 = vectorR3(0.4999999839058737, -0.8090170074556163, 0.3090169861701543); _3_fold_axis_by_5_fold_axis_1.selfNormalize(); for (long int i = 0; i < rot_angles.size(); i++) { if ( dotProduct(directions_vector[i], _5_fold_axis_1_by_5_fold_axis_2) >= 0 && dotProduct(directions_vector[i], _5_fold_axis_2_by_3_fold_axis) >= 0 && dotProduct(directions_vector[i], _3_fold_axis_by_5_fold_axis_1) >= 0 ) { no_redundant_rot_angles.push_back(rot_angles[i]); no_redundant_tilt_angles.push_back(tilt_angles[i]); no_redundant_directions_vector.push_back(directions_vector[i]); no_redundant_directions_ipix.push_back(directions_ipix[i]); } }// for i } else if (symmetry == pg_I1) {//OK Matrix2D A(3, 3); Euler_angles2matrix(0, 90, 0, A); Matrix1D _5_fold_axis_1_by_5_fold_axis_2(3); _5_fold_axis_1_by_5_fold_axis_2 = A * vectorR3(0., 1., 0.); _5_fold_axis_1_by_5_fold_axis_2.selfNormalize(); Matrix1D _5_fold_axis_2_by_3_fold_axis(3); _5_fold_axis_2_by_3_fold_axis = A * vectorR3(-0.4999999839058737, -0.8090170074556163, 0.3090169861701543); _5_fold_axis_2_by_3_fold_axis.selfNormalize(); Matrix1D _3_fold_axis_by_5_fold_axis_1(3); _3_fold_axis_by_5_fold_axis_1 = A * vectorR3(0.4999999839058737, -0.8090170074556163, 0.3090169861701543); _3_fold_axis_by_5_fold_axis_1.selfNormalize(); for (long int i = 0; i < rot_angles.size(); i++) { if ( dotProduct(directions_vector[i], _5_fold_axis_1_by_5_fold_axis_2) >= 0 && dotProduct(directions_vector[i], _5_fold_axis_2_by_3_fold_axis) >= 0 && dotProduct(directions_vector[i], _3_fold_axis_by_5_fold_axis_1) >= 0 ) { no_redundant_rot_angles.push_back(rot_angles[i]); no_redundant_tilt_angles.push_back(tilt_angles[i]); no_redundant_directions_vector.push_back(directions_vector[i]); no_redundant_directions_ipix.push_back(directions_ipix[i]); } }// for i } else if (symmetry == pg_I3) {//OK Matrix2D A(3, 3); Euler_angles2matrix(0,31.7174745559,0, A); Matrix1D _5_fold_axis_1_by_5_fold_axis_2(3); _5_fold_axis_1_by_5_fold_axis_2 = A * vectorR3(0., 1., 0.); _5_fold_axis_1_by_5_fold_axis_2.selfNormalize(); Matrix1D _5_fold_axis_2_by_3_fold_axis(3); _5_fold_axis_2_by_3_fold_axis = A * vectorR3(-0.4999999839058737, -0.8090170074556163, 0.3090169861701543); _5_fold_axis_2_by_3_fold_axis.selfNormalize(); Matrix1D _3_fold_axis_by_5_fold_axis_1(3); _3_fold_axis_by_5_fold_axis_1 = A * vectorR3(0.4999999839058737, -0.8090170074556163, 0.3090169861701543); _3_fold_axis_by_5_fold_axis_1.selfNormalize(); for (long int i = 0; i < rot_angles.size(); i++) { if ( dotProduct(directions_vector[i], _5_fold_axis_1_by_5_fold_axis_2) >= 0 && dotProduct(directions_vector[i], _5_fold_axis_2_by_3_fold_axis) >= 0 && dotProduct(directions_vector[i], _3_fold_axis_by_5_fold_axis_1) >= 0 ) { no_redundant_rot_angles.push_back(rot_angles[i]); no_redundant_tilt_angles.push_back(tilt_angles[i]); no_redundant_directions_vector.push_back(directions_vector[i]); no_redundant_directions_ipix.push_back(directions_ipix[i]); } }// for i } else if (symmetry == pg_I4) {//OK Matrix2D A(3, 3); Euler_angles2matrix(0,-31.7174745559,0, A); Matrix1D _5_fold_axis_1_by_5_fold_axis_2(3); _5_fold_axis_1_by_5_fold_axis_2 = A * vectorR3(0., 0., 1.); _5_fold_axis_1_by_5_fold_axis_2.selfNormalize(); Matrix1D _5_fold_axis_2_by_3_fold_axis(3); _5_fold_axis_2_by_3_fold_axis = A * vectorR3(0.187592467856686, -0.303530987314591, -0.491123477863004); _5_fold_axis_2_by_3_fold_axis.selfNormalize(); Matrix1D _3_fold_axis_by_5_fold_axis_1(3); _3_fold_axis_by_5_fold_axis_1 = A * vectorR3(0.187592467856686, 0.303530987314591, -0.491123477863004); _3_fold_axis_by_5_fold_axis_1.selfNormalize(); for (long int i = 0; i < rot_angles.size(); i++) { if ( dotProduct(directions_vector[i], _5_fold_axis_2_by_3_fold_axis) <= 0 && dotProduct(directions_vector[i], _3_fold_axis_by_5_fold_axis_1) <= 0 && dotProduct(directions_vector[i], _5_fold_axis_1_by_5_fold_axis_2) <= 0 ) { no_redundant_rot_angles.push_back(rot_angles[i]); no_redundant_tilt_angles.push_back(tilt_angles[i]); no_redundant_directions_vector.push_back(directions_vector[i]); no_redundant_directions_ipix.push_back(directions_ipix[i]); } }// for i } else if (symmetry == pg_I5) {//OK std::cerr << "ERROR: Symmetry pg_I5 not implemented" << std::endl; exit(0); } else if (symmetry == pg_IH || symmetry == pg_I2H) {//OK Matrix1D _5_fold_axis_1_by_5_fold_axis_2(3); _5_fold_axis_1_by_5_fold_axis_2 = vectorR3(0., 1., 0.); _5_fold_axis_1_by_5_fold_axis_2.selfNormalize(); Matrix1D _5_fold_axis_2_by_3_fold_axis(3); _5_fold_axis_2_by_3_fold_axis = vectorR3(-0.4999999839058737, -0.8090170074556163, 0.3090169861701543); _5_fold_axis_2_by_3_fold_axis.selfNormalize(); Matrix1D _3_fold_axis_by_5_fold_axis_1(3); _3_fold_axis_by_5_fold_axis_1 = vectorR3(0.4999999839058737, -0.8090170074556163, 0.3090169861701543); _3_fold_axis_by_5_fold_axis_1.selfNormalize(); Matrix1D _3_fold_axis_by_2_fold_axis(3); _3_fold_axis_by_2_fold_axis = vectorR3(1.,0.,0.); _3_fold_axis_by_2_fold_axis.selfNormalize(); for (long int i = 0; i < rot_angles.size(); i++) { if ( dotProduct(directions_vector[i], _5_fold_axis_1_by_5_fold_axis_2) >= 0 && dotProduct(directions_vector[i], _5_fold_axis_2_by_3_fold_axis) >= 0 && dotProduct(directions_vector[i], _3_fold_axis_by_2_fold_axis) >= 0 ) { no_redundant_rot_angles.push_back(rot_angles[i]); no_redundant_tilt_angles.push_back(tilt_angles[i]); no_redundant_directions_vector.push_back(directions_vector[i]); no_redundant_directions_ipix.push_back(directions_ipix[i]); } }// for i } else if (symmetry == pg_I1H) {//OK Matrix2D A(3, 3); Euler_angles2matrix(0, 90, 0, A); Matrix1D _5_fold_axis_1_by_5_fold_axis_2(3); _5_fold_axis_1_by_5_fold_axis_2 = A * vectorR3(0., 1., 0.); _5_fold_axis_1_by_5_fold_axis_2.selfNormalize(); Matrix1D _5_fold_axis_2_by_3_fold_axis(3); _5_fold_axis_2_by_3_fold_axis = A * vectorR3(-0.4999999839058737, -0.8090170074556163, 0.3090169861701543); _5_fold_axis_2_by_3_fold_axis.selfNormalize(); Matrix1D _3_fold_axis_by_5_fold_axis_1(3); _3_fold_axis_by_5_fold_axis_1 = A * vectorR3(0.4999999839058737, -0.8090170074556163, 0.3090169861701543); _3_fold_axis_by_5_fold_axis_1.selfNormalize(); Matrix1D _3_fold_axis_by_2_fold_axis(3); _3_fold_axis_by_2_fold_axis = A * vectorR3(1.,0.,0.); _3_fold_axis_by_2_fold_axis.selfNormalize(); for (long int i = 0; i < rot_angles.size(); i++) { if ( dotProduct(directions_vector[i], _5_fold_axis_1_by_5_fold_axis_2) >= 0 && dotProduct(directions_vector[i], _5_fold_axis_2_by_3_fold_axis) >= 0 && dotProduct(directions_vector[i], _3_fold_axis_by_2_fold_axis) >= 0 ) { no_redundant_rot_angles.push_back(rot_angles[i]); no_redundant_tilt_angles.push_back(tilt_angles[i]); no_redundant_directions_vector.push_back(directions_vector[i]); no_redundant_directions_ipix.push_back(directions_ipix[i]); } }// for i } else if (symmetry == pg_I3H) {//OK Matrix2D A(3, 3); Euler_angles2matrix(0,31.7174745559,0, A); Matrix1D _5_fold_axis_1_by_5_fold_axis_2(3); _5_fold_axis_1_by_5_fold_axis_2 = A * vectorR3(0., 0., 1.); _5_fold_axis_1_by_5_fold_axis_2.selfNormalize(); Matrix1D _5_fold_axis_2_by_3_fold_axis(3); _5_fold_axis_2_by_3_fold_axis = A * vectorR3(0.187592467856686, -0.303530987314591, -0.491123477863004); _5_fold_axis_2_by_3_fold_axis.selfNormalize(); Matrix1D _3_fold_axis_by_5_fold_axis_1(3); _3_fold_axis_by_5_fold_axis_1 = A * vectorR3(0.187592467856686, 0.303530987314591, -0.491123477863004); _3_fold_axis_by_5_fold_axis_1.selfNormalize(); Matrix1D _3_fold_axis_by_2_fold_axis(3); _3_fold_axis_by_2_fold_axis = vectorR3(0.,1.,0.); _3_fold_axis_by_2_fold_axis.selfNormalize(); for (long int i = 0; i < rot_angles.size(); i++) { if ( dotProduct(directions_vector[i], _5_fold_axis_2_by_3_fold_axis) >= 0 && dotProduct(directions_vector[i], _3_fold_axis_by_5_fold_axis_1) >= 0 && dotProduct(directions_vector[i], _5_fold_axis_1_by_5_fold_axis_2) >= 0 && dotProduct(directions_vector[i], _3_fold_axis_by_2_fold_axis) >= 0 ) { no_redundant_rot_angles.push_back(rot_angles[i]); no_redundant_tilt_angles.push_back(tilt_angles[i]); no_redundant_directions_vector.push_back(directions_vector[i]); no_redundant_directions_ipix.push_back(directions_ipix[i]); } }// for i } else if (symmetry == pg_I4H) {//OK Matrix2D A(3, 3); Euler_angles2matrix(0,-31.7174745559,0, A); Matrix1D _5_fold_axis_1_by_5_fold_axis_2(3); _5_fold_axis_1_by_5_fold_axis_2 = A * vectorR3(0., 0., 1.); _5_fold_axis_1_by_5_fold_axis_2.selfNormalize(); Matrix1D _5_fold_axis_2_by_3_fold_axis(3); _5_fold_axis_2_by_3_fold_axis = A * vectorR3(0.187592467856686, -0.303530987314591, -0.491123477863004); _5_fold_axis_2_by_3_fold_axis.selfNormalize(); Matrix1D _3_fold_axis_by_5_fold_axis_1(3); _3_fold_axis_by_5_fold_axis_1 = A * vectorR3(0.187592467856686, 0.303530987314591, -0.491123477863004); _3_fold_axis_by_5_fold_axis_1.selfNormalize(); Matrix1D _3_fold_axis_by_2_fold_axis(3); _3_fold_axis_by_2_fold_axis = vectorR3(0.,1.,0.); _3_fold_axis_by_2_fold_axis.selfNormalize(); for (long int i = 0; i < rot_angles.size(); i++) { if ( dotProduct(directions_vector[i], _5_fold_axis_2_by_3_fold_axis) <= 0 && dotProduct(directions_vector[i], _3_fold_axis_by_5_fold_axis_1) <= 0 && dotProduct(directions_vector[i], _5_fold_axis_1_by_5_fold_axis_2) <= 0 && dotProduct(directions_vector[i], _3_fold_axis_by_2_fold_axis) >= 0 ) { no_redundant_rot_angles.push_back(rot_angles[i]); no_redundant_tilt_angles.push_back(tilt_angles[i]); no_redundant_directions_vector.push_back(directions_vector[i]); no_redundant_directions_ipix.push_back(directions_ipix[i]); } }// for i } else if (symmetry == pg_I5H) {//OK std::cerr << "ERROR: pg_I5H Symmetry not implemented" << std::endl; exit(0); } else { std::cerr << "ERROR: Symmetry " << symmetry << "is not known" << std::endl; exit(0); } // Now overwrite the rot/tilt_angles and directions_vectors with their no_redundant counterparts rot_angles = no_redundant_rot_angles; tilt_angles = no_redundant_tilt_angles; directions_vector = no_redundant_directions_vector; directions_ipix = no_redundant_directions_ipix; } #undef DEBUG_SAMPLING relion-3.1.3/src/healpix_sampling.h000066400000000000000000000415241411340063500172750ustar00rootroot00000000000000/*************************************************************************** * * Author: "Sjors H.W. Scheres" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #ifndef _HEALPIX_SAMPLING_HH #define _HEALPIX_SAMPLING_HH #include "src/Healpix_2.15a/healpix_base.h" #include "src/metadata_table.h" #include "src/macros.h" #include "src/multidim_array.h" #include "src/symmetries.h" #include "src/euler.h" #include "src/transformations.h" #include "src/helix.h" // For the angular searches #define NOPRIOR 0 #define PRIOR_ROTTILT_PSI 1 class HealpixSampling { public: /** Healpix sampling object */ Healpix_Base healpix_base; /** Random perturbation */ RFLOAT random_perturbation; /** Amount of random perturbation */ RFLOAT perturbation_factor; /** In-plane (psi-angle) sampling rate (and original one) */ RFLOAT psi_step, psi_step_ori; /** Healpix order (and original one) */ int healpix_order, healpix_order_ori; /* Translational search range and sampling rate (as of v3.1 in Angstroms!) (and original one) */ // Jun19,2015 - Shaoda, Helical refinement (all in Angstroms!) RFLOAT offset_range, offset_step, helical_offset_step, offset_range_ori, offset_step_ori; /** Flag whether this is a real 3D sampling */ bool is_3D; /** Flag whether the translations are 3D (for volume refinement) */ bool is_3d_trans; /** Flag whether relax symmetry */ bool isRelax; /** Name of the Symmetry group */ FileName fn_sym; FileName fn_sym_relax; /** List of symmetry operators */ std::vector > R_repository, L_repository; std::vector > R_repository_relax, L_repository_relax; /** Two numbers that describe the symmetry group */ int pgGroup; int pgOrder; int pgGroupRelaxSym; int pgOrderRelaxSym; /** Limited tilt angle range */ RFLOAT limit_tilt; /** vector with the original pixel number in the healpix object */ std::vector directions_ipix; /** vector with sampling points described by angles */ std::vector rot_angles, tilt_angles; /** vector with the psi-samples */ std::vector psi_angles; /** vector with the X,Y(,Z)-translations (as of v3.1 in Angstroms!) */ std::vector translations_x, translations_y, translations_z; public: // Empty constructor HealpixSampling(): offset_range(0), offset_step(0), is_3d_trans(false), pgGroup(0), pgGroupRelaxSym(0), perturbation_factor(0), is_3D(false), random_perturbation(0), psi_step(0), limit_tilt(0), healpix_order(0), pgOrder(0), pgOrderRelaxSym(0) {} // Destructor ~HealpixSampling() { directions_ipix.clear(); rot_angles.clear(); tilt_angles.clear(); psi_angles.clear(); translations_x.clear(); translations_y.clear(); translations_z.clear(); } // Start from all empty vectors and meaningless parameters void clear(); /** Set up the entire sampling object * * The in-plane (psi-angle) sampling is linear, * input_psi_sampling is modified to contain an integer number of equally-sized sampling points * For the 3D-case, a negative input_psi_sampling will result in a psi-sampling similar to the sqrt of a HealPix pixel area. * * The HEALPix sampling is implemented as described by Gorski et al (2005), The Astrophysical Journal, 622:759-771 * The order defines the number of sampling points, and thereby the angular sampling rate * From this paper is the following table: * * order Npix Theta-sampling * 0 12 58.6 * 1 48 29.3 * 2 192 14.7 * 3 768 7.33 * 4 3072 3.66 * 5 12288 1.83 * 6 49152 0.55 * 7 196608 0.28 * 8 786432 0.14 * etc... * * */ // May 6, 2015 - Shaoda & Sjors - initialise for helical translations void initialise( int ref_dim = -1, bool do_3d_trans = false, bool do_changepsi = false, bool do_warnpsi = false, bool do_local_searches_helical = false, bool do_helical_refine = false, RFLOAT rise_Angst = 0., RFLOAT twist_deg = 0.); // Initialize the symmetry matrices void initialiseSymMats(FileName fn_sym_, int & pgGroup_, int & pgOrder_, std::vector > & R_repository, std::vector > & L_repository); // Reset the random perturbation void resetRandomlyPerturbedSampling(); // Read in all information from the command line to build the sampling object // Jun19,2015 - Shaoda, is that available??? //void read(IOParser &parser, int ori_size, int ref_dim); // Read CL options after a -continue statement. // Jun19,2015 - Shaoda, is that available??? //void readContinue(int argc, char **argv, bool &directions_have_changed); // Read in all information from a STAR file (for restarting) void read(FileName fn_in); // Write the sampling information to a STAR file void write(FileName fn_out); /* Set the non-oversampled list of translations in Angstroms * For single particles, offset ranges are equal along different directions * For helices, x offsets (along helical axis) should be less within -+0.5 * rise * */ void setTranslations( RFLOAT new_offset_step = -1., RFLOAT new_offset_range = -1., bool do_local_searches_helical = false, bool do_helical_refine = false, RFLOAT new_helical_offset_step = -1., RFLOAT helical_rise_Angst = 0., RFLOAT helical_twist_deg = 0.); /* Add a single translation */ void addOneTranslation( RFLOAT offset_x, RFLOAT offset_y, RFLOAT offset_z, bool do_clear = false, bool do_helical_refine = false, RFLOAT rot_deg = 0., RFLOAT tilt_deg = 0., RFLOAT psi_deg = 0.); /* Set the non-oversampled lists of directions and in-plane rotations */ void setOrientations(int _order = -1, RFLOAT _psi_step = -1.); /* Add a single orientation */ void addOneOrientation(RFLOAT rot, RFLOAT tilt, RFLOAT psi, bool do_clear = false); /* Write all orientations as a sphere in a bild file * Mainly useful for debugging */ void writeAllOrientationsToBild(FileName fn_bild, std::string rgb = "1 0 0", RFLOAT size = 0.025); void writeNonZeroPriorOrientationsToBild(FileName fn_bild, RFLOAT rot_prior, RFLOAT tilt_prior, std::vector &pointer_dir_nonzeroprior, std::string rgb = "0 0 1", RFLOAT size = 0.025); /* Sjors, 9nov2015: new rot-priors for DNA-origami-bound refinements */ RFLOAT calculateDeltaRot(Matrix1D my_direction, RFLOAT rot_prior); /* Select all orientations with zero prior probabilities * store all these in the vectors pointer_dir_nonzeroprior and pointer_psi_nonzeroprior * Also precalculate their prior probabilities and store in directions_prior and psi_prior */ // Jun 04 - Shaoda & Sjors, Bimodel psi searches for helices void selectOrientationsWithNonZeroPriorProbability( RFLOAT prior_rot, RFLOAT prior_tilt, RFLOAT prior_psi, RFLOAT sigma_rot, RFLOAT sigma_tilt, RFLOAT sigma_psi, std::vector &pointer_dir_nonzeroprior, std::vector &directions_prior, std::vector &pointer_psi_nonzeroprior, std::vector &psi_prior, bool do_bimodal_search_psi = false, RFLOAT sigma_cutoff = 3., RFLOAT sigma_tilt_from_ninety = -1., RFLOAT sigma_psi_from_zero = -1.); void selectOrientationsWithNonZeroPriorProbabilityFor3DHelicalReconstruction( RFLOAT prior_rot, RFLOAT prior_tilt, RFLOAT prior_psi, RFLOAT sigma_rot, RFLOAT sigma_tilt, RFLOAT sigma_psi, std::vector &pointer_dir_nonzeroprior, std::vector &directions_prior, std::vector &pointer_psi_nonzeroprior, std::vector &psi_prior, bool do_auto_refine_local_searches, RFLOAT prior_psi_flip_ratio = 0.5, RFLOAT prior_rot_flip_ratio = 0.5, // KThurber RFLOAT sigma_cutoff = 3.); // Find the symmetry mate by searching the Healpix library void findSymmetryMate(long int idir_, RFLOAT prior_, std::vector &pointer_dir_nonzeroprior, std::vector &directions_prior, std::vector &idir_flag); /** Get the symmetry group of this sampling object */ FileName symmetryGroup(); /* Get the original HEALPix index for this direction * Note that because of symmetry-equivalence removal idir no longer corresponds to the HEALPix pixel number * */ long int getHealPixIndex(long int idir); /** The geometrical considerations about the symmetry below require that rot = [-180,180] and tilt [0,180] */ void checkDirection(RFLOAT &rot, RFLOAT &tilt); /* Get the rot and tilt angles in the center of the ipix'th HEALPix sampling pixel * This involves calculations in the HEALPix library */ void getDirectionFromHealPix(long int ipix, RFLOAT &rot, RFLOAT &tilt); /* Get the translational sampling step in Angstroms */ RFLOAT getTranslationalSampling(int adaptive_oversampling = 0); /* Get the translational sampling step along helical axis in Angstroms */ RFLOAT getHelicalTranslationalSampling(int adaptive_oversampling = 0); /* Get approximate angular sampling in degrees for any adaptive oversampling */ RFLOAT getAngularSampling(int adaptive_oversampling = 0); /* Get the number of symmetry-unique sampling points * Note that because of symmetry-equivalence removal this number is not the number of original HEALPix pixels * In the case of orientational priors, the number of directions with non-zero prior probability is returned */ long int NrDirections(int oversampling_order = 0, const std::vector *pointer_dir_nonzeroprior = NULL); /* Get the number of in-plane (psi-angle) sampling points */ long int NrPsiSamplings(int oversampling_order = 0, const std::vector *pointer_psi_nonzeroprior = NULL); /* Get the number of in-plane translational sampling points */ long int NrTranslationalSamplings(int oversampling_order = 0); /* Get the total number of (oversampled) sampling points, i.e. all (rot, tilt, psi, xoff, yoff) quintets */ long int NrSamplingPoints(int oversampling_order = 0, const std::vector *pointer_dir_nonzeroprior = NULL, const std::vector *pointer_psi_nonzeroprior = NULL); /* How often is each orientation oversampled? */ int oversamplingFactorOrientations(int oversampling_order); /* How often is each translation oversampled? */ int oversamplingFactorTranslations(int oversampling_order); /* Get the rot and tilt angles from the precalculated sampling_points_angles vector * This does not involve calculations in the HEALPix library * Note that because of symmetry-equivalence removal idir no longer corresponds to the HEALPix pixel number */ void getDirection(long int idir, RFLOAT &rot, RFLOAT &tilt); /* Get the value for the ipsi'th precalculated psi angle */ void getPsiAngle(long int ipsi, RFLOAT &psi); /* Get the value for the itrans'th precalculated translations */ void getTranslationInPixel(long int itrans, RFLOAT my_pixel_size, RFLOAT &trans_x, RFLOAT &trans_y, RFLOAT &trans_z); /* Get the position of this sampling point in the original array */ long int getPositionSamplingPoint(int iclass, long int idir, long int ipsi, long int itrans); /* Get the position of this sampling point in the oversampled array */ long int getPositionOversampledSamplingPoint(long int ipos, int oversampling_order, int iover_rot, int iover_trans); /* Get the vectors of (xx, yy) for a more finely (oversampled) translational sampling * The oversampling_order is the difference in order of the original (coarse) and the oversampled (fine) sampling * An oversampling_order == 0 will give rise to the same (xx, yy) pair as the original itrans. * An oversampling_order == 1 will give rise to 2*2 new (rot, tilt) pairs. * An oversampling_order == 2 will give rise to 4*4 new (rot, tilt) pairs. * etc. */ void getTranslationsInPixel(long int itrans, int oversampling_order, RFLOAT my_pixel_size, std::vector &my_translations_x, std::vector &my_translations_y, std::vector &my_translations_z, bool do_helical_refine = false); /* Get the vectors of (rot, tilt, psi) angle triplets for a more finely (oversampled) sampling * The oversampling_order is the difference in order of the original (coarse) and the oversampled (fine) sampling * An oversampling_order == 0 will give rise to the same (rot, tilt, psi) triplet as the original ipix. * An oversampling_order == 1 will give rise to 2*2*2 new (rot, tilt, psi) triplets. * An oversampling_order == 2 will give rise to 4*4*4 new (rot, tilt, psi) triplets. * etc. * * If only_nonzero_prior is true, then only the orientations with non-zero prior probabilities will be returned * This is for local angular searches */ void getOrientations(long int idir, long int ipsi, int oversampling_order, std::vector &my_rot, std::vector &my_tilt, std::vector &my_psi, std::vector &pointer_dir_nonzeroprior, std::vector &directions_prior, std::vector &pointer_psi_nonzeroprior, std::vector &psi_prior); /* Gets the vector of psi angles for a more finely (oversampled) sampling and * pushes each instance back into the oversampled_orientations vector with the given rot and tilt * The oversampling_order is the difference in order of the original (coarse) and the oversampled (fine) sampling * An oversampling_order == 0 will give rise to the same psi angle as the original ipsi. * An oversampling_order == 1 will give rise to 2 new psi angles * An oversampling_order == 2 will give rise to 4 new psi angles * etc. */ void pushbackOversampledPsiAngles(long int ipsi, int oversampling_order, RFLOAT rot, RFLOAT tilt, std::vector &oversampled_rot, std::vector &oversampled_tilt, std::vector &oversampled_psi); /* Calculate an angular distance between two sets of Euler angles */ RFLOAT calculateAngularDistance(RFLOAT rot1, RFLOAT tilt1, RFLOAT psi1, RFLOAT rot2, RFLOAT tilt2, RFLOAT psi2); /* Write a BILD file describing the angular distribution * R determines the radius of the sphere on which cylinders will be placed * Rmax_frac determines the length of the longest cylinder (relative to R, 0.2 + +20%) * width_frac determines how broad each cylinder is. frac=1 means they touch each other * */ void writeBildFileOrientationalDistribution(MultidimArray &pdf_direction, FileName &fn_bild, RFLOAT R, RFLOAT offset = 0., const Matrix2D *Aorient = NULL, const Matrix1D *Acom = NULL, RFLOAT Rmax_frac = 0.3, RFLOAT width_frac = 0.5); private: /* Eliminate points from the sampling_points_vector and sampling_points_angles vectors * that are outside the allowed tilt range. * Let tilt angles range from -90 to 90, then: * if (limit_tilt > 0) then top views (that is with ABS(tilt) > limit_tilt) are removed and side views are kept * if (limit_tilt < 0) then side views (that is with ABS(tilt) < limit_tilt) are removed and top views are kept */ void removePointsOutsideLimitedTiltAngles(); /* Eliminate symmetry-equivalent points from the sampling_points_vector and sampling_points_angles vectors This function first calls removeSymmetryEquivalentPointsGeometric, and then checks each point versus all others to calculate an angular distance If this distance is less than 0.8 times the angular sampling, the point is deleted This cares care of sampling points near the edge of the geometrical considerations */ void removeSymmetryEquivalentPoints(RFLOAT max_ang); /* eliminate symmetry-related points based on simple geometrical considerations, symmetry group, symmetry order */ void removeSymmetryEquivalentPointsGeometric(const int symmetry, int sym_order, std::vector > &sampling_points_vector); }; //@} #endif relion-3.1.3/src/helix.cpp000066400000000000000000006301451411340063500154200ustar00rootroot00000000000000/*************************************************************************** * * Author: "Shaoda He" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #include "src/macros.h" #include "src/helix.h" // Wider search ranges for helical twist and rise #define WIDE_HELICAL_TWIST_AND_RISE_SEARCHES // Exclude segments close to the edges of the 2D micrographs / 3D tomograms. Please switch it on. #define EXCLUDE_SEGMENTS_ON_THE_EDGES void makeHelicalSymmetryList( std::vector& list, RFLOAT rise_min_pix, RFLOAT rise_max_pix, RFLOAT rise_step_pix, bool search_rise, RFLOAT twist_min_deg, RFLOAT twist_max_deg, RFLOAT twist_step_deg, bool search_twist) { // Assume all parameters are within range RFLOAT rise_pix, twist_deg; int rise_samplings, twist_samplings; std::vector tmp_list; if (search_rise) rise_samplings = ROUND(fabs(rise_max_pix - rise_min_pix) / fabs(rise_step_pix)); else { rise_min_pix = (rise_min_pix + rise_max_pix) / 2.; rise_samplings = 0; } if (search_twist) twist_samplings = ROUND(fabs(twist_max_deg - twist_min_deg) / fabs(twist_step_deg)); else { twist_min_deg = (twist_min_deg + twist_max_deg) / 2.; twist_samplings = 0; } // Store a matrix of symmetries tmp_list.clear(); for (int ii = 0; ii <= rise_samplings; ii++) { for (int jj = 0; jj <= twist_samplings; jj++) { rise_pix = rise_min_pix + rise_step_pix * RFLOAT(ii); twist_deg = twist_min_deg + twist_step_deg * RFLOAT(jj); tmp_list.push_back(HelicalSymmetryItem(twist_deg, rise_pix)); } } // Check duplications and return this matrix RFLOAT twist_dev_deg, twist_avg_deg, rise_dev_pix, rise_avg_pix; RFLOAT err_max = (1e-10); bool same_twist, same_rise; for (int ii = 0; ii < list.size(); ii++) { for (int jj = 0; jj < tmp_list.size(); jj++) { same_twist = same_rise = false; twist_dev_deg = fabs(list[ii].twist_deg - tmp_list[jj].twist_deg); rise_dev_pix = fabs(list[ii].rise_pix - tmp_list[jj].rise_pix); twist_avg_deg = (fabs(list[ii].twist_deg) + fabs(tmp_list[jj].twist_deg)) / 2.; rise_avg_pix = (fabs(list[ii].rise_pix) + fabs(tmp_list[jj].rise_pix)) / 2.; if (twist_avg_deg < err_max) { if (twist_dev_deg < err_max) same_twist = true; } else { if ((twist_dev_deg / twist_avg_deg) < err_max) same_twist = true; } if (rise_avg_pix < err_max) { if (rise_dev_pix < err_max) same_rise = true; } else { if ( (rise_dev_pix / rise_avg_pix) < err_max) same_rise = true; } if (same_twist && same_rise) tmp_list[jj].dev = list[ii].dev; } } list.clear(); list = tmp_list; return; }; bool calcCCofHelicalSymmetry( const MultidimArray& v, RFLOAT r_min_pix, RFLOAT r_max_pix, RFLOAT z_percentage, RFLOAT rise_pix, RFLOAT twist_deg, RFLOAT& cc, int& nr_asym_voxels) { // TODO: go through every line to find bugs!!! int rec_len, r_max_XY, startZ, finishZ; double dist_r_pix, sum_pw1, sum_pw2, sum_n, sum_chunk = 0., sum_chunk_n = 0.; //std::vector sin_rec, cos_rec, dev_voxel, dev_chunk; std::vector sin_rec, cos_rec; if ( (STARTINGZ(v) != FIRST_XMIPP_INDEX(ZSIZE(v))) || (STARTINGY(v) != FIRST_XMIPP_INDEX(YSIZE(v))) || (STARTINGX(v) != FIRST_XMIPP_INDEX(XSIZE(v))) ) REPORT_ERROR("helix.cpp::calcCCofHelicalSymmetry(): The origin of input 3D MultidimArray is not at the center (use v.setXmippOrigin() before calling this function)!"); // Check r_max r_max_XY = (XSIZE(v) < YSIZE(v)) ? XSIZE(v) : YSIZE(v); r_max_XY = (r_max_XY + 1) / 2 - 1; if ( r_max_pix > (((RFLOAT)(r_max_XY)) - 0.01) ) // 0.01 - avoid segmentation fault r_max_pix = (((RFLOAT)(r_max_XY)) - 0.01); // Set startZ and finishZ startZ = FLOOR( (-1.) * ((RFLOAT)(ZSIZE(v)) * z_percentage * 0.5) ); finishZ = CEIL( ((RFLOAT)(ZSIZE(v))) * z_percentage * 0.5 ); startZ = (startZ <= (STARTINGZ(v))) ? (STARTINGZ(v) + 1) : (startZ); finishZ = (finishZ >= (FINISHINGZ(v))) ? (FINISHINGZ(v) - 1) : (finishZ); // Calculate tabulated sine and cosine values rec_len = 2 + (CEIL((RFLOAT(ZSIZE(v)) + 2.) / rise_pix)); sin_rec.clear(); cos_rec.clear(); sin_rec.resize(rec_len); cos_rec.resize(rec_len); for (int id = 0; id < rec_len; id++) #ifdef RELION_SINGLE_PRECISION SINCOSF(DEG2RAD(((RFLOAT)(id)) * twist_deg), &sin_rec[id], &cos_rec[id]); #else SINCOS(DEG2RAD(((RFLOAT)(id)) * twist_deg), &sin_rec[id], &cos_rec[id]); #endif rise_pix = fabs(rise_pix); // Test a chunk of Z length = rise //dev_chunk.clear(); // Iterate through all coordinates on Z, Y and then X axes FOR_ALL_ELEMENTS_IN_ARRAY3D(v) { RFLOAT xp, yp, zp, fx, fy, fz; // Test a chunk of Z length = rise // for(idz = startZ; (idz <= (startZ + ((int)(floor(rise_pix))))) && (idz <= finishZ); idz++) if ( (k < startZ) || (k > (startZ + (FLOOR(rise_pix)))) || (k > finishZ) ) continue; dist_r_pix = sqrt(i * i + j * j); if ( (dist_r_pix < r_min_pix) || (dist_r_pix > r_max_pix) ) continue; // Pick a voxel in the chunk //dev_voxel.clear(); //dev_voxel.push_back(A3D_ELEM(v, k, i, j)); // Pick other voxels according to this voxel and helical symmetry zp = k; int rot_id = 0; sum_pw1 = A3D_ELEM(v, k, i, j); sum_pw2 = A3D_ELEM(v, k, i, j)*A3D_ELEM(v, k, i, j); sum_n = 1.; while (1) { // Rise zp += rise_pix; if (zp > finishZ) // avoid segmentation fault - finishZ is always strictly smaller than FINISHINGZ(v)! break; // Twist rot_id++; xp = ((RFLOAT)(j)) * cos_rec[rot_id] - ((RFLOAT)(i)) * sin_rec[rot_id]; yp = ((RFLOAT)(j)) * sin_rec[rot_id] + ((RFLOAT)(i)) * cos_rec[rot_id]; // Trilinear interpolation (with physical coords) // Subtract STARTINGX,Y,Z to accelerate access to data // In that way use DIRECT_A3D_ELEM, rather than A3D_ELEM int x0, y0, z0, x1, y1, z1; x0 = FLOOR(xp); fx = xp - x0; x0 -= STARTINGX(v); x1 = x0 + 1; y0 = FLOOR(yp); fy = yp - y0; y0 -= STARTINGY(v); y1 = y0 + 1; z0 = FLOOR(zp); fz = zp - z0; z0 -= STARTINGZ(v); z1 = z0 + 1; // DEBUG if ( (x0 < 0) || (y0 < 0) || (z0 < 0) || (x1 >= XSIZE(v)) || (y1 >= YSIZE(v)) || (z1 >= ZSIZE(v)) ) std::cout << " idzidyidx= " << k << ", " << i << ", " << j << ", x0x1y0y1z0z1= " << x0 << ", " << x1 << ", " << y0 << ", " << y1 << ", " << z0 << ", " << z1 << std::endl; RFLOAT d000, d001, d010, d011, d100, d101, d110, d111; d000 = DIRECT_A3D_ELEM(v, z0, y0, x0); d001 = DIRECT_A3D_ELEM(v, z0, y0, x1); d010 = DIRECT_A3D_ELEM(v, z0, y1, x0); d011 = DIRECT_A3D_ELEM(v, z0, y1, x1); d100 = DIRECT_A3D_ELEM(v, z1, y0, x0); d101 = DIRECT_A3D_ELEM(v, z1, y0, x1); d110 = DIRECT_A3D_ELEM(v, z1, y1, x0); d111 = DIRECT_A3D_ELEM(v, z1, y1, x1); RFLOAT dx00, dx01, dx10, dx11; dx00 = LIN_INTERP(fx, d000, d001); dx01 = LIN_INTERP(fx, d100, d101); dx10 = LIN_INTERP(fx, d010, d011); dx11 = LIN_INTERP(fx, d110, d111); RFLOAT dxy0, dxy1, ddd; dxy0 = LIN_INTERP(fy, dx00, dx10); dxy1 = LIN_INTERP(fy, dx01, dx11); ddd = LIN_INTERP(fz, dxy0, dxy1); // Record this voxel sum_pw1 += ddd; sum_pw2 += ddd * ddd; sum_n += 1.; // dev_voxel.push_back(ddd); } sum_pw1 /= sum_n; sum_pw2 /= sum_n; //dev_chunk.push_back(sum_pw2 - sum_pw1 * sum_pw1); // Sum_chunk sum_chunk += sum_pw2 - sum_pw1 * sum_pw1; sum_chunk_n += 1.; /* // Calc dev of this voxel in the chunk if (dev_voxel.size() > 1) { sum_pw1 = sum_pw2 = 0.; for (int id = 0; id < dev_voxel.size(); id++) { sum_pw1 += dev_voxel[id]; sum_pw2 += dev_voxel[id] * dev_voxel[id]; } sum_pw1 /= dev_voxel.size(); sum_pw2 /= dev_voxel.size(); // TODO: record stddev or dev??? dev_chunk.push_back(sum_pw2 - sum_pw1 * sum_pw1); } dev_voxel.clear(); */ } // Calc avg of all voxels' devs in this chunk (for a specific helical symmetry) if (sum_chunk_n < 1) { cc = (1e10); nr_asym_voxels = 0; return false; } else { cc = (sum_chunk / sum_chunk_n); } nr_asym_voxels = sum_chunk_n; //dev_chunk.clear(); return true; }; bool localSearchHelicalSymmetry( const MultidimArray& v, RFLOAT pixel_size_A, RFLOAT sphere_radius_A, RFLOAT cyl_inner_radius_A, RFLOAT cyl_outer_radius_A, RFLOAT z_percentage, RFLOAT rise_min_A, RFLOAT rise_max_A, RFLOAT rise_inistep_A, RFLOAT& rise_refined_A, RFLOAT twist_min_deg, RFLOAT twist_max_deg, RFLOAT twist_inistep_deg, RFLOAT& twist_refined_deg, std::ostream* o_ptr) { // TODO: whether iterations can exit & this function works for negative twist int iter, box_len, nr_asym_voxels, nr_rise_samplings, nr_twist_samplings, nr_min_samplings, nr_max_samplings, best_id, iter_not_converged; RFLOAT r_min_pix, r_max_pix, best_dev, err_max; RFLOAT rise_min_pix, rise_max_pix, rise_step_pix, rise_inistep_pix, twist_step_deg, rise_refined_pix; RFLOAT rise_local_min_pix, rise_local_max_pix, twist_local_min_deg, twist_local_max_deg; std::vector helical_symmetry_list; bool out_of_range, search_rise, search_twist; // Check input 3D reference if (v.getDim() != 3) REPORT_ERROR("helix.cpp::localSearchHelicalSymmetry(): Input helical reference is not 3D! (v.getDim() = " + integerToString(v.getDim()) + ")"); // Set the length of the box box_len = (XSIZE(v) < YSIZE(v)) ? XSIZE(v) : YSIZE(v); box_len = (box_len < ZSIZE(v)) ? box_len : ZSIZE(v); // Initialise refined helical parameters // Check helical parameters rise_refined_A = (rise_min_A + rise_max_A) / 2.; twist_refined_deg = (twist_min_deg + twist_max_deg) / 2.; checkParametersFor3DHelicalReconstruction( false, true, 1, rise_refined_A, rise_min_A, rise_max_A, twist_refined_deg, twist_min_deg, twist_max_deg, box_len, pixel_size_A, z_percentage, sphere_radius_A * 2., cyl_inner_radius_A * 2., cyl_outer_radius_A * 2.); rise_refined_pix = rise_refined_A / pixel_size_A; // Initialise other parameters out_of_range = false; r_min_pix = cyl_inner_radius_A / pixel_size_A; r_max_pix = cyl_outer_radius_A / pixel_size_A; rise_inistep_pix = rise_inistep_A / pixel_size_A; rise_local_min_pix = rise_min_pix = rise_min_A / pixel_size_A; rise_local_max_pix = rise_max_pix = rise_max_A / pixel_size_A; twist_local_min_deg = twist_min_deg; twist_local_max_deg = twist_max_deg; if (o_ptr != NULL) { (*o_ptr) << " ### RELION helix toolbox - local searches of helical symmetry" << std::endl; (*o_ptr) << " --> Box size = " << ((long int)(XSIZE(v))) << ", Z(%) = " << (z_percentage * 100.) << "%, pixel size = " << pixel_size_A << " Angstroms, inner diameter = " << (cyl_inner_radius_A * 2.) << " Angstroms, outer diameter = " << (cyl_outer_radius_A * 2.) << " Angstroms." << std::endl; (*o_ptr) << " --> Searching twist from " << twist_min_deg << " to " << twist_max_deg << " degrees, rise from " << rise_min_A << " to " << rise_max_A << " Angstroms." << std::endl; } // Initial searches - Iteration 1 // Sampling of twist should be smaller than 1 degree // Sampling of rise should be smaller than 1% // And also make sure to search for at least 5*5 sampling points // Avoid too many searches (at most 1000*1000) err_max = (1e-5); search_rise = search_twist = true; nr_min_samplings = 5; nr_max_samplings = 1000; twist_inistep_deg = (twist_inistep_deg < (1e-5)) ? (1.) : (twist_inistep_deg); nr_twist_samplings = CEIL(fabs(twist_local_min_deg - twist_local_max_deg) / twist_inistep_deg); nr_twist_samplings = (nr_twist_samplings > nr_min_samplings) ? (nr_twist_samplings) : (nr_min_samplings); nr_twist_samplings = (nr_twist_samplings < nr_max_samplings) ? (nr_twist_samplings) : (nr_max_samplings); twist_step_deg = fabs(twist_local_min_deg - twist_local_max_deg) / RFLOAT(nr_twist_samplings); if ( fabs(twist_local_min_deg - twist_local_max_deg) < err_max) { search_twist = false; twist_step_deg = 0.; twist_min_deg = twist_max_deg = twist_local_min_deg = twist_local_max_deg = twist_refined_deg; } if (o_ptr != NULL) { if (search_twist) (*o_ptr) << " --> Initial searching step of twist is " << twist_step_deg << " degrees (" << nr_twist_samplings << " samplings)." << std::endl; else (*o_ptr) << " --> No need to search for twist..." << std::endl; } rise_inistep_pix = (rise_inistep_pix < (1e-5)) ? (1e30) : (rise_inistep_pix); rise_step_pix = 0.01 * ((fabs(rise_local_min_pix) + fabs(rise_local_max_pix)) / 2.); rise_step_pix = (rise_step_pix < rise_inistep_pix) ? (rise_step_pix) : (rise_inistep_pix); nr_rise_samplings = CEIL(fabs(rise_local_min_pix - rise_local_max_pix) / rise_step_pix); nr_rise_samplings = (nr_rise_samplings > nr_min_samplings) ? (nr_rise_samplings) : (nr_min_samplings); nr_rise_samplings = (nr_rise_samplings < nr_max_samplings) ? (nr_rise_samplings) : (nr_max_samplings); rise_step_pix = fabs(rise_local_min_pix - rise_local_max_pix) / RFLOAT(nr_rise_samplings); if ((fabs(rise_local_min_pix - rise_local_max_pix) / fabs(rise_refined_pix)) < err_max) { search_rise = false; rise_step_pix = 0.; rise_min_pix = rise_max_pix = rise_local_min_pix = rise_local_max_pix = rise_refined_pix; } if (o_ptr != NULL) { if (search_rise) (*o_ptr) << " --> Initial searching step of rise is " << rise_step_pix * pixel_size_A << " Angstroms (" << nr_rise_samplings << " samplings)." << std::endl; else (*o_ptr) << " --> No need to search for rise..." << std::endl; (*o_ptr) << " --> " << nr_twist_samplings * nr_rise_samplings << " initial samplings." << std::endl; } if ( (!search_twist) && (!search_rise) ) return true; if (o_ptr != NULL) (*o_ptr) << std::endl << " TAG TWIST(DEGREES) RISE(ANGSTROMS) DEV" << std::endl; // Local searches helical_symmetry_list.clear(); iter_not_converged = 0; for (iter = 1; iter <= 100; iter++) { // TODO: please check this!!! // rise_step_pix and twist_step_deg should be strictly > 0 now! (if they are to be searched) makeHelicalSymmetryList( helical_symmetry_list, rise_local_min_pix, rise_local_max_pix, rise_step_pix, search_rise, twist_local_min_deg, twist_local_max_deg, twist_step_deg, search_twist); if (helical_symmetry_list.size() < 1) REPORT_ERROR("helix.cpp::localSearchHelicalSymmetry(): BUG No helical symmetries are found in the search list!"); best_dev = (1e30); best_id = -1; for (int ii = 0; ii < helical_symmetry_list.size(); ii++) { // If this symmetry is not calculated before if (helical_symmetry_list[ii].dev > (1e30)) { // TODO: please check this!!! calcCCofHelicalSymmetry( v, r_min_pix, r_max_pix, z_percentage, helical_symmetry_list[ii].rise_pix, helical_symmetry_list[ii].twist_deg, helical_symmetry_list[ii].dev, nr_asym_voxels); if (o_ptr != NULL) (*o_ptr) << " NEW" << std::flush; } else { if (o_ptr != NULL) (*o_ptr) << " OLD" << std::flush; } if (helical_symmetry_list[ii].dev < best_dev) { best_dev = helical_symmetry_list[ii].dev; best_id = ii; } if (o_ptr != NULL) { (*o_ptr) << std::setw(15) << std::setiosflags(std::ios::fixed) << helical_symmetry_list[ii].twist_deg << std::resetiosflags(std::ios::fixed) << std::setw(15) << std::setiosflags(std::ios::fixed) << (helical_symmetry_list[ii].rise_pix * pixel_size_A) << std::resetiosflags(std::ios::fixed) << std::setw(20) << std::setiosflags(std::ios::scientific) << helical_symmetry_list[ii].dev << std::resetiosflags(std::ios::scientific) << std::endl; } } // Update refined symmetry rise_refined_pix = helical_symmetry_list[best_id].rise_pix; rise_refined_A = rise_refined_pix * pixel_size_A; twist_refined_deg = helical_symmetry_list[best_id].twist_deg; if (o_ptr != NULL) { (*o_ptr) << " ################################################################################" << std::endl; (*o_ptr) << " ##### Refined Twist = " << twist_refined_deg << ", Rise = " << rise_refined_A << ", Dev = " << helical_symmetry_list[best_id].dev << std::endl; (*o_ptr) << " ################################################################################" << std::endl; } // Out of range... if ( (search_rise) && (rise_refined_pix < rise_min_pix) ) { out_of_range = true; search_rise = false; rise_step_pix = 0.; rise_local_min_pix = rise_local_max_pix = rise_refined_pix = rise_min_pix; rise_refined_A = rise_refined_pix * pixel_size_A; } if ( (search_rise) && (rise_refined_pix > rise_max_pix) ) { out_of_range = true; search_rise = false; rise_step_pix = 0.; rise_local_min_pix = rise_local_max_pix = rise_refined_pix = rise_max_pix; rise_refined_A = rise_refined_pix * pixel_size_A; } if ( (search_twist) && (twist_refined_deg < twist_min_deg) ) { out_of_range = true; search_twist = false; twist_step_deg = 0.; twist_local_min_deg = twist_local_max_deg = twist_refined_deg = twist_min_deg; } if ( (search_twist) && (twist_refined_deg > twist_max_deg) ) { out_of_range = true; search_twist = false; twist_step_deg = 0.; twist_local_min_deg = twist_local_max_deg = twist_refined_deg = twist_max_deg; } // Not converged in this iteration... if ( (iter > 1) && (!out_of_range) ) { // If the symmetry does not fall into the local search range // Try 7*, 9*, 11* ... samplings bool this_iter_not_converged = false; if (search_rise) { if ( (rise_refined_pix < (rise_local_min_pix + rise_step_pix * 0.5)) || (rise_refined_pix > (rise_local_max_pix - rise_step_pix * 0.5)) ) { this_iter_not_converged = true; rise_local_min_pix = rise_refined_pix - ((RFLOAT)(iter_not_converged) + 3.) * rise_step_pix; rise_local_max_pix = rise_refined_pix + ((RFLOAT)(iter_not_converged) + 3.) * rise_step_pix; } } if (search_twist) { if ( (twist_refined_deg < (twist_local_min_deg + twist_step_deg * 0.5)) || (twist_refined_deg > (twist_local_max_deg - twist_step_deg * 0.5)) ) { this_iter_not_converged = true; twist_local_min_deg = twist_refined_deg - ((RFLOAT)(iter_not_converged) + 3.) * twist_step_deg; twist_local_max_deg = twist_refined_deg + ((RFLOAT)(iter_not_converged) + 3.) * twist_step_deg; } } if (this_iter_not_converged) { iter_not_converged++; if (o_ptr != NULL) (*o_ptr) << " !!! NR_ITERATION_NOT_CONVERGED = " << iter_not_converged << " !!!" << std::endl; if (iter_not_converged > 10) // Up to 25*25 samplings are allowed (original 5*5 samplings) { if (o_ptr != NULL) (*o_ptr) << " WARNING: Local searches of helical symmetry cannot converge. Consider a finer initial sampling of helical parameters." << std::endl; else std::cout << " WARNING: Local searches of helical symmetry cannot converge. Consider a finer initial sampling of helical parameters." << std::endl; return false; } continue; } } iter_not_converged = 0; // Set 5*5 finer samplings for the next iteration if (search_rise) { rise_local_min_pix = rise_refined_pix - rise_step_pix; rise_local_max_pix = rise_refined_pix + rise_step_pix; } if (search_twist) { twist_local_min_deg = twist_refined_deg - twist_step_deg; twist_local_max_deg = twist_refined_deg + twist_step_deg; } // When there is no need to search for either twist or rise if ( (search_rise) && ((rise_step_pix / fabs(rise_refined_pix)) < err_max) ) { rise_local_min_pix = rise_local_max_pix = rise_refined_pix; search_rise = false; } if ( (search_twist) && ((twist_step_deg / fabs(twist_refined_deg)) < err_max) ) { twist_local_min_deg = twist_local_max_deg = twist_refined_deg; search_twist = false; } // Stop searches if step sizes are too small if ( (!search_twist) && (!search_rise) ) break; // Decrease step size if (search_rise) rise_step_pix /= 2.; if (search_twist) twist_step_deg /= 2.; } if (out_of_range) { if (o_ptr != NULL) (*o_ptr) << " WARNING: Refined helical symmetry is out of the search range. Check whether the initial guess of helical symmetry is reasonable. Or you may want to modify the search range." << std::endl; else std::cout << " WARNING: Refined helical symmetry is out of the search range. Check whether the initial guess of helical symmetry is reasonable. Or you may want to modify the search range." << std::endl; return false; } return true; }; RFLOAT getHelicalSigma2Rot( RFLOAT helical_rise_Angst, RFLOAT helical_twist_deg, RFLOAT helical_offset_step_Angst, RFLOAT rot_step_deg, RFLOAT old_sigma2_rot) { if ( (helical_offset_step_Angst < 0.) || (rot_step_deg < 0.) || (old_sigma2_rot < 0.) ) REPORT_ERROR("helix.cpp::getHelicalSigma2Rot: Helical offset step, rot step or sigma2_rot cannot be negative!"); RFLOAT nr_samplings_along_helical_axis = (fabs(helical_rise_Angst)) / helical_offset_step_Angst; RFLOAT rot_search_range = (fabs(helical_twist_deg)) / nr_samplings_along_helical_axis; RFLOAT new_rot_step = rot_search_range / 6.; RFLOAT factor = ceil(new_rot_step / rot_step_deg); RFLOAT new_sigma2_rot = old_sigma2_rot; //RFLOAT factor_max = 10.; if (factor > 1.) { // Avoid extremely big sigma_rot!!! Too expensive in time and memory!!! //if (factor > factor_max) // factor = factor_max; new_sigma2_rot *= factor * factor; } return new_sigma2_rot; }; bool checkParametersFor3DHelicalReconstruction( bool ignore_symmetry, bool do_symmetry_local_refinement, int nr_asu, RFLOAT rise_initial_A, RFLOAT rise_min_A, RFLOAT rise_max_A, RFLOAT twist_initial_deg, RFLOAT twist_min_deg, RFLOAT twist_max_deg, int box_len, RFLOAT pixel_size_A, RFLOAT z_percentage, RFLOAT particle_diameter_A, RFLOAT tube_inner_diameter_A, RFLOAT tube_outer_diameter_A, bool verboseOutput) { RFLOAT nr_units_min = 2.; // Minimum nr_particles required along lenZ_max RFLOAT rise_range_max_percentage = 0.3334; // Verbose output if (verboseOutput) { std::cout << "##########################################################" << std::endl; std::cout << " CHECKING PARAMETERS FOR 3D HELICAL RECONSTRUCTION..." << std::endl; std::cout << "##########################################################" << std::endl; } // Check pixel size if (verboseOutput) std::cout << " Pixel size = " << pixel_size_A << " Angstrom(s)" << std::endl; if (pixel_size_A < 0.001) { if (verboseOutput) std::cout << " ERROR! Pixel size should be larger than 0.001 Angstroms!" << std::endl; else REPORT_ERROR("helix.cpp::chechParametersFor3DHelicalReconstruction(): Pixel size should be larger than 0.001 Angstroms!"); return false; } // Check box size and calculate half box size if (verboseOutput) std::cout << " Box size = " << box_len << " pixels = " << (RFLOAT)(box_len) * pixel_size_A << " Angstroms" << std::endl; if (box_len < 10) { if (verboseOutput) std::cout << " ERROR! Input box size should be larger than 10!" << std::endl; else REPORT_ERROR("helix.cpp::chechParametersFor3DHelicalReconstruction(): Input box size should be larger than 10!"); return false; } int half_box_len = box_len / 2 - ((box_len + 1) % 2); // Calculate radii in pixels RFLOAT particle_radius_pix = particle_diameter_A * 0.5 / pixel_size_A; RFLOAT tube_inner_radius_pix = tube_inner_diameter_A * 0.5 / pixel_size_A; RFLOAT tube_outer_radius_pix = tube_outer_diameter_A * 0.5 / pixel_size_A; // Check particle radius if (verboseOutput) { std::cout << " Particle diameter = " << particle_radius_pix * 2. << " pixels = " << particle_diameter_A << " Angstroms" << std::endl; std::cout << " Half box size = " << half_box_len << " pixels = " << (RFLOAT)(half_box_len) * pixel_size_A << " Angstroms" << std::endl; } if ( (particle_radius_pix < 2.) || (particle_radius_pix > half_box_len) ) { if (verboseOutput) std::cout << " ERROR! Particle radius should be > 2 pixels and < half the box size!" << std::endl; else REPORT_ERROR("helix.cpp::chechParametersFor3DHelicalReconstruction(): Particle radius should be > 2 and < half the box size!"); return false; } // Check inner and outer tube radii if (verboseOutput) { if (tube_inner_diameter_A > 0.) std::cout << " Inner tube diameter = " << tube_inner_radius_pix * 2. << " pixels = " << tube_inner_diameter_A << " Angstroms" << std::endl; std::cout << " Outer tube diameter = " << tube_outer_radius_pix * 2. << " pixels = " << tube_outer_diameter_A << " Angstroms" << std::endl; } if ( (tube_outer_radius_pix < 2.) || (tube_outer_radius_pix > half_box_len) //|| ( (particle_radius_pix + 0.001) < tube_outer_radius_pix ) ) || (particle_radius_pix < tube_outer_radius_pix) ) { if (verboseOutput) std::cout << " ERROR! Outer tube diameter should be > 4 pixels, < particle diameter and < half the box size!" << std::endl; else REPORT_ERROR("helix.cpp::chechParametersFor3DHelicalReconstruction(): Outer tube diameter should be > 4 pixels, < particle diameter and < half the box size"); return false; } if ( (tube_inner_radius_pix > 0.) && ((tube_inner_radius_pix + 2.) > tube_outer_radius_pix) ) { if (verboseOutput) std::cout << " ERROR! Inner tube diameter should be remarkably smaller (> 4 pixels) than the outer one!" << std::endl; else REPORT_ERROR("helix.cpp::chechParametersFor3DHelicalReconstruction(): Inner tube diameter should be remarkably smaller (> 4 pixels) than the outer one!"); return false; } // STOP CHECKING OTHER PARAMETERS IF HELICAL SYMMETRY IS IGNORED IN 3D RECONSTRUCTION! if (ignore_symmetry) { if (verboseOutput) std::cout << " You have chosen to ignore helical symmetry! Stop checking now..." << std::endl; return true; } // CHECKING HELICAL SYMMETRY RELATED PARAMETERS... // Force same helical twist and rise if local refinement is not to be performed if (!do_symmetry_local_refinement) { twist_min_deg = twist_max_deg = twist_initial_deg; rise_min_A = rise_max_A = rise_initial_A; } RFLOAT rise_avg_A = (rise_min_A + rise_max_A) / 2.; RFLOAT twist_avg_deg = (twist_min_deg + twist_max_deg) / 2.; // Check helical twist and rise if (verboseOutput) { if (do_symmetry_local_refinement) { std::cout << " Helical twist (min, initial, average, max) = " << twist_min_deg << ", " << twist_initial_deg << ", " << twist_avg_deg << ", " << twist_max_deg << " degrees" << std::endl; std::cout << " Helical rise (min, initial, average, max) = " << rise_min_A / pixel_size_A << ", " << rise_initial_A / pixel_size_A << ", " << rise_avg_A / pixel_size_A << ", " << rise_max_A / pixel_size_A << " pixels" << std::endl; std::cout << " Helical rise (min, initial, average, max) = " << rise_min_A << ", " << rise_initial_A << ", " << rise_avg_A << ", " << rise_max_A << " Angstroms" << std::endl; } else { std::cout << " Helical twist = " << twist_initial_deg << " degree(s)" << std::endl; std::cout << " Helical rise = " << rise_initial_A / pixel_size_A << " pixel(s) = " << rise_initial_A << " Angstrom(s)" << std::endl; } } if ( (fabs(twist_min_deg) > 360.) || (fabs(twist_initial_deg) > 360.) || (fabs(twist_max_deg) > 360.) ) { if (verboseOutput) std::cout << " ERROR! Helical twist should be > -360 and < +360 degrees!" << std::endl; else REPORT_ERROR("helix.cpp::chechParametersFor3DHelicalReconstruction(): Helical twist should be > -360 and < +360 degrees!"); return false; } if ( (rise_min_A < 0.001) || ((rise_min_A / pixel_size_A) < 0.001) || (rise_initial_A < 0.001) || ((rise_initial_A / pixel_size_A) < 0.001) || (rise_max_A < 0.001) || ((rise_max_A / pixel_size_A) < 0.001) ) { if (verboseOutput) std::cout << " ERROR! Helical rise should be > +0.001 Angstroms and > +0.001 pixels!" << std::endl; else REPORT_ERROR("helix.cpp::chechParametersFor3DHelicalReconstruction(): Helical rise should be > +0.001 Angstroms and > +0.001 pixels!"); return false; } if (do_symmetry_local_refinement) { if ( (twist_min_deg > twist_max_deg) || (twist_initial_deg < twist_min_deg) || (twist_initial_deg > twist_max_deg) || (rise_min_A > rise_max_A) || (rise_initial_A < rise_min_A) || (rise_initial_A > rise_max_A) ) { if (verboseOutput) std::cout << " ERROR! The following condition must be satisfied (both for helical twist and rise): min < initial < max !" << std::endl; else REPORT_ERROR("helix.cpp::chechParametersFor3DHelicalReconstruction(): The following condition must be satisfied (both for helical twist and rise): min < initial < max !"); return false; } #ifndef WIDE_HELICAL_TWIST_AND_RISE_SEARCHES //RFLOAT rise_min_A_thres = rise_avg_A * (1. - rise_range_max_percentage); //RFLOAT rise_max_A_thres = rise_avg_A * (1. + rise_range_max_percentage); if ( (fabs(twist_avg_deg - twist_min_deg) > 180.01) || ((fabs(rise_avg_A - rise_min_A) / fabs(rise_avg_A)) > rise_range_max_percentage) ) { if (verboseOutput) std::cout << " ERROR! Searching ranges of helical twist and rise should be < 180 degrees and < +/-33.34% from the min-max average respectively!" << std::endl; else REPORT_ERROR("helix.cpp::chechParametersFor3DHelicalReconstruction(): Searching ranges of helical twist and rise should be < 180 degrees and < +/-33.34% from the min-max average respectively!"); return false; } #endif } // Check Z percentage if (verboseOutput) std::cout << " Z percentage = " << z_percentage << " = " << z_percentage * 100. << " %" << std::endl; if ( (z_percentage < 0.001) || (z_percentage > 0.999) ) { if (verboseOutput) std::cout << " ERROR! Z percentage should at least be > 0.001 and < 0.999 (0.1% ~ 99.9%)!" << std::endl; else REPORT_ERROR("helix.cpp::chechParametersFor3DHelicalReconstruction(): Z percentage should at least be > 0.001 and < 0.999 (0.1% ~ 99.9%)!"); return false; } RFLOAT z_percentage_min = (nr_units_min * rise_max_A) / (pixel_size_A * (RFLOAT)(box_len)); z_percentage_min = (z_percentage_min < 0.001) ? (0.001) : (z_percentage_min); RFLOAT z_percentage_max = ( (2.) * sqrt( (particle_diameter_A * particle_diameter_A / 4.) - (tube_outer_diameter_A * tube_outer_diameter_A / 4.) ) / pixel_size_A) / ((RFLOAT)(box_len)); z_percentage_max = (z_percentage_max > 0.999) ? (0.999) : (z_percentage_max); if (verboseOutput) std::cout << " Z percentage should be > " << z_percentage_min << " and < " << z_percentage_max << " (under current settings)" << std::endl; if (z_percentage_min > z_percentage_max) { if (verboseOutput) std::cout << " ERROR! The range of Z percentage is invalid! To decrease the lower bound, make maximum rise smaller or box size larger. To increase the upper bound, make the particle diameter (along with the box size) larger and the outer tube diameter smaller!" << std::endl; else REPORT_ERROR("helix.cpp::chechParametersFor3DHelicalReconstruction(): The range of Z percentage is invalid! To decrease the lower bound, make maximum rise smaller or box size larger. To increase the upper bound, make the particle diameter (along with the box size) larger and the outer tube diameter smaller!"); return false; } if ( (z_percentage < z_percentage_min) || (z_percentage > z_percentage_max) ) { if (verboseOutput) std::cout << " ERROR! Z percentage is out of range under current settings!" << std::endl; else REPORT_ERROR("helix.cpp::chechParametersFor3DHelicalReconstruction(): Z percentage is out of range under current settings!"); return false; } // Check maximum rise (DO I NEED THIS???) RFLOAT rise_max_upper_bound_A = pixel_size_A * (RFLOAT)(box_len) * z_percentage / nr_units_min; if (do_symmetry_local_refinement) { if (verboseOutput) std::cout << " Upper bound of maximum rise = " << rise_max_upper_bound_A / pixel_size_A << " pixels = " << rise_max_upper_bound_A << " Angstroms (under current settings)" << std::endl; if (fabs(rise_max_A) > rise_max_upper_bound_A) // THIS CANNOT HAPPEN. ERRORS HAVE ALREADY BEEN RAISED IN Z PERCENTAGE CHECK. { if (verboseOutput) std::cout << " ERROR! Maximum rise exceeds its upper bound!" << std::endl; else REPORT_ERROR("helix.cpp::chechParametersFor3DHelicalReconstruction(): Maximum rise exceeds its upper bound!"); return false; } } // Check number of asymmetrical units RFLOAT half_nr_asu_max = 0.5 * (1. - z_percentage) * (RFLOAT(box_len)) * pixel_size_A / rise_max_A; int nr_asu_max = 2 * (int(floor(half_nr_asu_max))) + 1; nr_asu_max = (nr_asu_max < 1) ? (1) : (nr_asu_max); if (verboseOutput) std::cout << " Number of asymmetrical units = " << nr_asu << ", maximum value = " << nr_asu_max << " (under current settings)" << std::endl; if (nr_asu < 1) { if (verboseOutput) std::cout << " ERROR! Number of asymmetrical units (an integer) should be at least 1!" << std::endl; else REPORT_ERROR("helix.cpp::chechParametersFor3DHelicalReconstruction(): Number of asymmetrical units (a positive integer) should be at least 1!"); return false; } if ( (nr_asu > 1) && (nr_asu > nr_asu_max) ) { if (verboseOutput) std::cout << " ERROR! Number of asymmetrical units exceeds its upper bound!" << std::endl; else REPORT_ERROR("helix.cpp::chechParametersFor3DHelicalReconstruction(): Number of asymmetrical units exceeds its upper bound!"); return false; } // Everything seems fine :) return true; } void imposeHelicalSymmetryInRealSpace( MultidimArray& v, RFLOAT pixel_size_A, RFLOAT sphere_radius_A, RFLOAT cyl_inner_radius_A, RFLOAT cyl_outer_radius_A, RFLOAT z_percentage, RFLOAT rise_A, RFLOAT twist_deg, RFLOAT cosine_width_pix) { bool ignore_helical_symmetry = false; long int Xdim, Ydim, Zdim, Ndim, box_len; RFLOAT rise_pix, sphere_radius_pix, cyl_inner_radius_pix, cyl_outer_radius_pix, r_min, r_max, d_min, d_max, D_min, D_max, z_min, z_max; int rec_len; std::vector sin_rec, cos_rec; MultidimArray vout; if (v.getDim() != 3) REPORT_ERROR("helix.cpp::imposeHelicalSymmetryInRealSpace(): Input helical reference is not 3D! (vol.getDim() = " + integerToString(v.getDim()) + ")"); v.getDimensions(Xdim, Ydim, Zdim, Ndim); box_len = (Xdim < Ydim) ? Xdim : Ydim; box_len = (box_len < Zdim) ? box_len : Zdim; // Check helical parameters checkParametersFor3DHelicalReconstruction( false, false, 1, rise_A, rise_A, rise_A, twist_deg, twist_deg, twist_deg, box_len, pixel_size_A, z_percentage, sphere_radius_A * 2., cyl_inner_radius_A * 2., cyl_outer_radius_A * 2.); // Parameters of mask if (cosine_width_pix < 1.) cosine_width_pix = 1.; // Avoid 'divided by 0' error rise_pix = fabs(rise_A / pixel_size_A); // Keep helical rise as a positive number sphere_radius_pix = sphere_radius_A / pixel_size_A; cyl_inner_radius_pix = cyl_inner_radius_A / pixel_size_A; cyl_outer_radius_pix = cyl_outer_radius_A / pixel_size_A; r_min = sphere_radius_pix; r_max = sphere_radius_pix + cosine_width_pix; d_min = cyl_inner_radius_pix - cosine_width_pix; d_max = cyl_inner_radius_pix; D_min = cyl_outer_radius_pix; D_max = cyl_outer_radius_pix + cosine_width_pix; // Crop the central slices v.setXmippOrigin(); z_max = ((RFLOAT)(Zdim)) * z_percentage / 2.; if (z_max > (((RFLOAT)(FINISHINGZ(v))) - 1.)) z_max = (((RFLOAT)(FINISHINGZ(v))) - 1.); z_min = -z_max; if (z_min < (((RFLOAT)(STARTINGZ(v))) + 1.)) z_min = (((RFLOAT)(STARTINGZ(v))) + 1.); // Init volumes v.setXmippOrigin(); vout.clear(); vout.resize(v); vout.setXmippOrigin(); // Calculate tabulated sine and cosine values rec_len = 2 + (CEIL((RFLOAT(Zdim) + 2.) / rise_pix)); sin_rec.clear(); cos_rec.clear(); sin_rec.resize(rec_len); cos_rec.resize(rec_len); for (int id = 0; id < rec_len; id++) #ifdef RELION_SINGLE_PRECISION SINCOSF(DEG2RAD(((RFLOAT)(id)) * twist_deg), &sin_rec[id], &cos_rec[id]); #else SINCOS(DEG2RAD(((RFLOAT)(id)) * twist_deg), &sin_rec[id], &cos_rec[id]); #endif FOR_ALL_ELEMENTS_IN_ARRAY3D(v) { // Out of the mask RFLOAT dd = (RFLOAT)(i * i + j * j); RFLOAT rr = dd + (RFLOAT)(k * k); RFLOAT d = sqrt(dd); RFLOAT r = sqrt(rr); if ( (r > r_max) || (d < d_min) || (d > D_max) ) { A3D_ELEM(v, k, i, j) = 0.; continue; } // How many voxels should be used to calculate the average? RFLOAT zi = (RFLOAT)(k); RFLOAT yi = (RFLOAT)(i); RFLOAT xi = (RFLOAT)(j); int rot_max = -(CEIL((zi - z_max) / rise_pix)); int rot_min = -(FLOOR((zi - z_min) / rise_pix)); if (rot_max < rot_min) REPORT_ERROR("helix.cpp::makeHelicalReferenceInRealSpace(): ERROR in imposing symmetry!"); // Do the average RFLOAT pix_sum, pix_weight; pix_sum = pix_weight = 0.; for (int id = rot_min; id <= rot_max; id++) { // Get the sine and cosine value RFLOAT sin_val, cos_val; if (id >= 0) { sin_val = sin_rec[id]; cos_val = cos_rec[id]; } else { sin_val = (-1.) * sin_rec[-id]; cos_val = cos_rec[-id]; } // Get the voxel coordinates RFLOAT zp = zi + ((RFLOAT)(id)) * rise_pix; RFLOAT yp = xi * sin_val + yi * cos_val; RFLOAT xp = xi * cos_val - yi * sin_val; // Trilinear interpolation (with physical coords) // Subtract STARTINGY and STARTINGZ to accelerate access to data (STARTINGX=0) // In that way use DIRECT_A3D_ELEM, rather than A3D_ELEM int x0, y0, z0, x1, y1, z1; RFLOAT fx, fy, fz; x0 = FLOOR(xp); fx = xp - x0; x0 -= STARTINGX(v); x1 = x0 + 1; y0 = FLOOR(yp); fy = yp - y0; y0 -= STARTINGY(v); y1 = y0 + 1; z0 = FLOOR(zp); fz = zp - z0; z0 -= STARTINGZ(v); z1 = z0 + 1; RFLOAT d000, d001, d010, d011, d100, d101, d110, d111; d000 = DIRECT_A3D_ELEM(v, z0, y0, x0); d001 = DIRECT_A3D_ELEM(v, z0, y0, x1); d010 = DIRECT_A3D_ELEM(v, z0, y1, x0); d011 = DIRECT_A3D_ELEM(v, z0, y1, x1); d100 = DIRECT_A3D_ELEM(v, z1, y0, x0); d101 = DIRECT_A3D_ELEM(v, z1, y0, x1); d110 = DIRECT_A3D_ELEM(v, z1, y1, x0); d111 = DIRECT_A3D_ELEM(v, z1, y1, x1); RFLOAT dx00, dx01, dx10, dx11; dx00 = LIN_INTERP(fx, d000, d001); dx01 = LIN_INTERP(fx, d100, d101); dx10 = LIN_INTERP(fx, d010, d011); dx11 = LIN_INTERP(fx, d110, d111); RFLOAT dxy0, dxy1; dxy0 = LIN_INTERP(fy, dx00, dx10); dxy1 = LIN_INTERP(fy, dx01, dx11); pix_sum += LIN_INTERP(fz, dxy0, dxy1); pix_weight += 1.; } if (pix_weight > 0.9) { A3D_ELEM(vout, k, i, j) = pix_sum / pix_weight; if ( (d > d_max) && (d < D_min) && (r < r_min) ) {} else // The pixel is within cosine edge(s) { pix_weight = 1.; if (d < d_max) // d_min < d < d_max : w=(0~1) pix_weight = 0.5 + (0.5 * cos(PI * ((d_max - d) / cosine_width_pix))); else if (d > D_min) // D_min < d < D_max : w=(1~0) pix_weight = 0.5 + (0.5 * cos(PI * ((d - D_min) / cosine_width_pix))); if (r > r_min) // r_min < r < r_max { pix_sum = 0.5 + (0.5 * cos(PI * ((r - r_min) / cosine_width_pix))); pix_weight = (pix_sum < pix_weight) ? (pix_sum) : (pix_weight); } A3D_ELEM(vout, k, i, j) *= pix_weight; } } else A3D_ELEM(vout, k, i, j) = 0.; } // Copy and exit v = vout; sin_rec.clear(); cos_rec.clear(); vout.clear(); return; }; /* void searchCnZSymmetry( const MultidimArray& v, RFLOAT r_min_pix, RFLOAT r_max_pix, int cn_start, int cn_end, std::vector& cn_list, std::vector& cc_list, std::vector& nr_asym_voxels_list, std::ofstream* fout_ptr) { int cn, Xdim, Ydim, Zdim, Ndim, nr_asym_voxels; RFLOAT cc; bool ok_flag; Xdim = XSIZE(v); Ydim = YSIZE(v); Zdim = ZSIZE(v); Ndim = NSIZE(v); if( (Ndim != 1) || (Zdim < 5) || (Ydim < 5) || (Xdim < 5) ) REPORT_ERROR("helix.cpp::searchCnZSymmetry(): Input 3D MultidimArray has Wrong dimensions! (Ndim = " + integerToString(Ndim) + ", Zdim = " + integerToString(Zdim) + ", Ydim = " + integerToString(Ydim) + ", Xdim = " + integerToString(Xdim) + ")"); if( (r_max_pix < 2.) || (r_min_pix < 0.) || ((r_max_pix - r_min_pix) < 2.) || (cn_start <= 1) || (cn_end <= 1) || (cn_start > cn_end) || (cn_end > 36) ) REPORT_ERROR("helix.cpp::searchCnZSymmetry(): Wrong parameters!"); cn_list.clear(); cc_list.clear(); nr_asym_voxels_list.clear(); for(cn = cn_start; cn <= cn_end; cn++) { ok_flag = calcCCOfCnZSymmetry(v, r_min_pix, r_max_pix, cn, cc, nr_asym_voxels); if(!ok_flag) continue; cn_list.push_back(cn); cc_list.push_back(cc); nr_asym_voxels_list.push_back(nr_asym_voxels); if(fout_ptr != NULL) (*fout_ptr) << "Test Cn = " << cn << ", cc = " << cc << ", asym voxels = " << nr_asym_voxels << std::endl; } return; } */ /* RFLOAT calcCCofPsiFor2DHelicalSegment( const MultidimArray& v, RFLOAT psi_deg, RFLOAT pixel_size_A, RFLOAT sphere_radius_A, RFLOAT cyl_outer_radius_A) { RFLOAT sphere_radius_pix, sphere_radius2_pix, cyl_radius_pix, r2, x, y, xp, yp, sum, sum2, nr, val; int x0, y0, vec_id, vec_len, half_box_len, box_len; std::vector sum_list, pix_list; Matrix2D R; if (pixel_size_A < 0.001) REPORT_ERROR("helix.cpp::calcCCofPsiFor2DHelicalSegment(): Pixel size (in Angstroms) should be larger than 0.001!"); if (v.getDim() != 2) REPORT_ERROR("helix.cpp::calcCCofPsiFor2DHelicalSegment(): Input MultidimArray should be 2D!"); if ( (YSIZE(v) < 10) || (XSIZE(v) < 10) ) REPORT_ERROR("helix.cpp::calcCCofPsiFor2DHelicalSegment(): Input 2D MultidimArray should be larger than 10*10 pixels!"); if ( (STARTINGY(v) != FIRST_XMIPP_INDEX(YSIZE(v))) || (STARTINGX(v) != FIRST_XMIPP_INDEX(XSIZE(v))) ) REPORT_ERROR("helix.cpp::calcCCofPsiFor2DHelicalSegment(): The origin of input 2D MultidimArray is not at the center (use v.setXmippOrigin() before calling this function)!"); box_len = (XSIZE(v) < YSIZE(v)) ? XSIZE(v) : YSIZE(v); half_box_len = box_len / 2 - ((box_len + 1) % 2); sphere_radius_pix = sphere_radius_A / pixel_size_A; cyl_radius_pix = cyl_outer_radius_A / pixel_size_A; if ( (sphere_radius_pix < 2.) || (sphere_radius_pix > half_box_len) || (cyl_radius_pix < 2.) || (cyl_radius_pix > half_box_len) || ( (sphere_radius_pix + 0.001) < cyl_radius_pix ) ) REPORT_ERROR("helix.cpp::calcCCofPsiFor2DHelicalSegment(): Radii of spherical and/or cylindrical masks are invalid!"); sphere_radius2_pix = sphere_radius_pix * sphere_radius_pix; x0 = STARTINGX(v); y0 = STARTINGY(v); vec_len = (int)((2. * cyl_radius_pix)) + 2; sum_list.clear(); sum_list.resize(vec_len); pix_list.clear(); pix_list.resize(vec_len); for (vec_id = 0; vec_id < vec_len; vec_id++) sum_list[vec_id] = pix_list[vec_id] = 0.; rotation2DMatrix(psi_deg, R, false); R.setSmallValuesToZero(); FOR_ALL_ELEMENTS_IN_ARRAY2D(v) { x = j; y = i; r2 = i * i + j * j; if (r2 > sphere_radius2_pix) continue; //xp = x * R(0, 0) + y * R(0, 1); //vec_id = ROUND(xp - x0); yp = x * R(1, 0) + y * R(1, 1); vec_id = ROUND(yp - y0); if ( (vec_id < 0) || (vec_id >= vec_len) ) continue; pix_list[vec_id]++; sum_list[vec_id] += A2D_ELEM(v, i, j); } nr = sum = sum2 = 0.; for (vec_id = 0; vec_id < vec_len; vec_id++) { if (pix_list[vec_id] > 0.5) { nr += 1.; val = sum_list[vec_id] / pix_list[vec_id]; sum += val; sum2 += val * val; } } if (nr < 0.5) return (-1.); return ( (sum2 / nr) - ((sum / nr) * (sum / nr)) ); } RFLOAT localSearchPsiFor2DHelicalSegment( const MultidimArray& v, RFLOAT pixel_size_A, RFLOAT sphere_radius_A, RFLOAT cyl_outer_radius_A, RFLOAT ori_psi_deg, RFLOAT search_half_range_deg, RFLOAT search_step_deg) { RFLOAT psi_deg, best_psi_deg, max_cc, cc; if ( (search_step_deg < 0.000001) || (search_step_deg > 2.) ) REPORT_ERROR("helix.cpp::localSearchPsiFor2DHelicalSegment(): Search step of psi angle should be 0.000001~1.999999 degrees!"); if ( (search_half_range_deg < 0.000001) || (search_half_range_deg < search_step_deg) ) REPORT_ERROR("helix.cpp::localSearchPsiFor2DHelicalSegment(): Search half range of psi angle should be not be less than 0.000001 degrees or the step size!"); max_cc = -1.; best_psi_deg = 0.; for (psi_deg = (ori_psi_deg - search_half_range_deg); psi_deg < (ori_psi_deg + search_half_range_deg); psi_deg += search_step_deg) { cc = calcCCofPsiFor2DHelicalSegment(v, psi_deg, pixel_size_A, sphere_radius_A, cyl_outer_radius_A); if ( (cc > 0.) && (cc > max_cc) ) { max_cc = cc; best_psi_deg = psi_deg; } // DEBUG //std::cout << "psi_deg = " << psi_deg << ", cc = " << cc << std::endl; } // DEBUG //std::cout << "------------------------------------------------" << std::endl; if (max_cc < 0.) REPORT_ERROR("helix.cpp::localSearchPsiFor2DHelicalSegment(): Error! No cc values for any psi angles are valid! Check if the input images are blank!"); return best_psi_deg; } RFLOAT searchPsiFor2DHelicalSegment( const MultidimArray& v, RFLOAT pixel_size_A, RFLOAT sphere_radius_A, RFLOAT cyl_outer_radius_A) { int nr_iter; RFLOAT search_half_range_deg, search_step_deg, best_psi_deg; search_half_range_deg = 89.999; search_step_deg = 0.5; best_psi_deg = 0.; for(nr_iter = 1; nr_iter <= 20; nr_iter++) { best_psi_deg = localSearchPsiFor2DHelicalSegment(v, pixel_size_A, sphere_radius_A, cyl_outer_radius_A, best_psi_deg, search_half_range_deg, search_step_deg); search_half_range_deg = 2. * search_step_deg; search_step_deg /= 2.; if (fabs(search_step_deg) < 0.00001) break; } return best_psi_deg; } */ void calcRadialAverage( const MultidimArray& v, std::vector& radial_avg_val_list) { int ii, Xdim, Ydim, Zdim, Ndim, list_size, dist; MultidimArray vol; std::vector radial_pix_counter_list; vol.clear(); radial_pix_counter_list.clear(); radial_avg_val_list.clear(); // Check dimensions Xdim = XSIZE(v); Ydim = YSIZE(v); Zdim = ZSIZE(v); Ndim = NSIZE(v); if( (Ndim != 1) || (Zdim < 5) || (Ydim < 5) || (Xdim < 5) ) REPORT_ERROR("helix.cpp::calcRadialAverage(): Input 3D MultidimArray has wrong dimensions! (Ndim = " + integerToString(Ndim) + ", Zdim = " + integerToString(Zdim) + ", Ydim = " + integerToString(Ydim) + ", Xdim = " + integerToString(Xdim) + ")"); // Resize and init vectors list_size = ROUND(sqrt(Xdim * Xdim + Ydim * Ydim)) + 2; radial_pix_counter_list.resize(list_size); radial_avg_val_list.resize(list_size); for(ii = 0; ii < list_size; ii++) { radial_pix_counter_list[ii] = 0.; radial_avg_val_list[ii] = 0.; } vol = v; vol.setXmippOrigin(); FOR_ALL_ELEMENTS_IN_ARRAY3D(vol) { dist = ROUND(sqrt(i * i + j * j)); if( (dist < 0) || (dist > (list_size - 1)) ) continue; radial_pix_counter_list[dist] += 1.; radial_avg_val_list[dist] += A3D_ELEM(vol, k, i, j); } for(ii = 0; ii < list_size; ii++) { if(radial_pix_counter_list[ii] > 0.9) radial_avg_val_list[ii] /= radial_pix_counter_list[ii]; else radial_avg_val_list[ii] = 0.; } radial_pix_counter_list.clear(); vol.clear(); return; }; void cutZCentralPartOfSoftMask( MultidimArray& mask, RFLOAT z_percentage, RFLOAT cosine_width) { int dim = mask.getDim(); int Zdim = ZSIZE(mask); RFLOAT idz_s, idz_s_w, idz_e, idz_e_w, idz, val; if (dim != 3) REPORT_ERROR("helix.cpp::cutZCentralPartOfSoftMask(): Input mask should have a dimension of 3!"); if (Zdim < 5) REPORT_ERROR("helix.cpp::cutZCentralPartOfSoftMask(): Z length of 3D mask is less than 5!"); if ( (z_percentage < 0.1) || (z_percentage > 0.9) ) REPORT_ERROR("helix.cpp::cutZCentralPartOfSoftMask(): Only 10%-90% of total Z length should be retained!"); if (cosine_width < 0.001) REPORT_ERROR("helix.cpp::cutZCentralPartOfSoftMask(): Cosine width for soft edge should larger than 0!"); idz_e = ((RFLOAT)(Zdim)) * z_percentage / 2.; idz_s = idz_e * (-1.); idz_s_w = idz_s - cosine_width; idz_e_w = idz_e + cosine_width; // DEBUG //std::cout << "z_len, z_percentage, cosine_width = " << Zdim << ", " << z_percentage << ", " << cosine_width << std::endl; //std::cout << "idz_s_w, idz_s, idz_e, idz_e_w = " << idz_s_w << ", " << idz_s << ", " << idz_e << ", " << idz_e_w << std::endl; mask.setXmippOrigin(); FOR_ALL_ELEMENTS_IN_ARRAY3D(mask) { idz = ((RFLOAT)(k)); if ( (idz > idz_s) && (idz < idz_e) ) {} else if ( (idz < idz_s_w) || (idz > idz_e_w) ) A3D_ELEM(mask, k, i, j) = 0.; else { val = 1.; if (idz < idz_s) val = 0.5 + 0.5 * cos(PI * (idz_s - idz) / cosine_width); else if (idz > idz_e) val = 0.5 + 0.5 * cos(PI * (idz - idz_e) / cosine_width); A3D_ELEM(mask, k, i, j) *= val; } } return; }; void createCylindricalReference( MultidimArray& v, int box_size, RFLOAT inner_diameter_pix, RFLOAT outer_diameter_pix, RFLOAT cosine_width) { RFLOAT r, dist, inner_radius_pix, outer_radius_pix; // Check dimensions if (box_size < 5) REPORT_ERROR("helix.cpp::createCylindricalReference(): Invalid box size."); if ( (inner_diameter_pix > outer_diameter_pix) || (outer_diameter_pix < 0.) || (outer_diameter_pix > (box_size - 1)) || (cosine_width < 0.) ) REPORT_ERROR("helix.cpp::createCylindricalReference(): Parameter(s) error!"); inner_radius_pix = inner_diameter_pix / 2.; outer_radius_pix = outer_diameter_pix / 2.; v.clear(); v.resize(box_size, box_size, box_size); v.setXmippOrigin(); FOR_ALL_ELEMENTS_IN_ARRAY3D(v) { r = sqrt(i * i + j * j); if ( (r > inner_radius_pix) && (r < outer_radius_pix) ) { A3D_ELEM(v, k, i, j) = 1.; continue; } dist = -9999.; if ( (r > outer_radius_pix) && (r < (outer_radius_pix + cosine_width)) ) dist = r - outer_radius_pix; else if ( (r < inner_radius_pix) && (r > (inner_radius_pix - cosine_width)) ) dist = inner_radius_pix - r; if (dist > 0.) { A3D_ELEM(v, k, i, j) = 0.5 + 0.5 * cos(PI * dist / cosine_width); continue; } A3D_ELEM(v, k, i, j) = 0.; } return; } void createCylindricalReferenceWithPolarity( MultidimArray& v, int box_size, RFLOAT inner_diameter_pix, RFLOAT outer_diameter_pix, RFLOAT ratio_topbottom, RFLOAT cosine_width) { RFLOAT r, r_min, r_max, dist, top_radius_pix, bottom_radius_pix; // Check dimensions if (box_size < 5) REPORT_ERROR("helix.cpp::createCylindricalReferenceWithPolarity(): Invalid box size."); if ( (inner_diameter_pix > outer_diameter_pix) || (outer_diameter_pix < 0.) || (outer_diameter_pix > (box_size - 1)) || (ratio_topbottom < 0.) || (ratio_topbottom > 1.) || (cosine_width < 0.) ) REPORT_ERROR("helix.cpp::createCylindricalReferenceWithPolarity(): Parameter(s) error!"); // Set top and bottom radii top_radius_pix = outer_diameter_pix / 2.; bottom_radius_pix = outer_diameter_pix * ratio_topbottom / 2.; if (inner_diameter_pix > 0.) bottom_radius_pix = (inner_diameter_pix / 2.) + ratio_topbottom * (outer_diameter_pix / 2. - inner_diameter_pix / 2.); v.clear(); v.resize(box_size, box_size, box_size); v.setXmippOrigin(); r_min = r_max = -1.; if (inner_diameter_pix > 0.) r_min = inner_diameter_pix / 2.; for (long int k=STARTINGZ(v); k<=FINISHINGZ(v); k++) { r_max = top_radius_pix - (top_radius_pix - bottom_radius_pix) * ((RFLOAT)(k - STARTINGZ(v))) / ((RFLOAT)(box_size)); for (long int i=STARTINGY(v); i<=FINISHINGY(v); i++) { for (long int j=STARTINGX(v); j<=FINISHINGX(v); j++) { r = sqrt(i * i + j * j); if ( (r > r_min) && (r < r_max) ) { A3D_ELEM(v, k, i, j) = 1.; continue; } dist = -9999.; if ( (r > r_max) && (r < (r_max + cosine_width)) ) dist = r - r_max; if ( (r < r_min) && (r > (r_min - cosine_width)) ) dist = r_min - r; if (dist > 0.) { A3D_ELEM(v, k, i, j) = 0.5 + 0.5 * cos(PI * dist / cosine_width); continue; } A3D_ELEM(v, k, i, j) = 0.; } } } return; } void transformCartesianAndHelicalCoords( Matrix1D& in, Matrix1D& out, RFLOAT rot_deg, RFLOAT tilt_deg, RFLOAT psi_deg, bool direction) { int dim; RFLOAT x0, y0, z0; Matrix1D aux; Matrix2D A, B; dim = in.size(); if( (dim != 2) && (dim != 3) ) REPORT_ERROR("helix.cpp::transformCartesianAndHelicalCoords(): Vector of input coordinates should have 2 or 3 values!"); aux.clear(); aux.resize(3); XX(aux) = XX(in); YY(aux) = YY(in); ZZ(aux) = (dim == 3) ? (ZZ(in)) : (0.); if (dim == 2) rot_deg = tilt_deg = 0.; A.clear(); A.resize(3, 3); // TODO: check whether rot_deg should be always set to 0 ! // TODO: fix the --random_seed and use --perturb 0 option for testing ! Euler_angles2matrix(rot_deg, tilt_deg, psi_deg, A, false); if (direction == CART_TO_HELICAL_COORDS) // Don't put minus signs before angles, use 'transpose' instead A = A.transpose(); aux = A * aux; out.clear(); out.resize(2); XX(out) = XX(aux); YY(out) = YY(aux); if (dim == 3) { out.resize(3); ZZ(out) = ZZ(aux); } aux.clear(); A.clear(); return; } void transformCartesianAndHelicalCoords( RFLOAT xin, RFLOAT yin, RFLOAT zin, RFLOAT& xout, RFLOAT& yout, RFLOAT& zout, RFLOAT rot_deg, RFLOAT tilt_deg, RFLOAT psi_deg, int dim, bool direction) { if( (dim != 2) && (dim != 3) ) REPORT_ERROR("helix.cpp::transformCartesianAndHelicalCoords(): Vector of input coordinates should have 2 or 3 values!"); Matrix1D in, out; in.clear(); out.clear(); in.resize(dim); XX(in) = xin; YY(in) = yin; if (dim == 3) ZZ(in) = zin; transformCartesianAndHelicalCoords(in, out, rot_deg, tilt_deg, psi_deg, direction); xout = XX(out); yout = YY(out); if (dim == 3) zout = ZZ(out); return; } /* void makeBlot( MultidimArray& v, RFLOAT y, RFLOAT x, RFLOAT r) { int Xdim, Ydim, Zdim, Ndim; RFLOAT dist, min; v.getDimensions(Xdim, Ydim, Zdim, Ndim); if( (Ndim != 1) || (Zdim != 1) || (YXSIZE(v) <= 2) ) return; min = DIRECT_A2D_ELEM(v, 0, 0); FOR_ALL_DIRECT_ELEMENTS_IN_ARRAY2D(v) min = (DIRECT_A2D_ELEM(v, i, j) < min) ? DIRECT_A2D_ELEM(v, i, j) : min; v.setXmippOrigin(); FOR_ALL_ELEMENTS_IN_ARRAY2D(v) { dist = (i - y) * (i - y) + (j - x) * (j - x); dist = sqrt(dist); if(dist < r) A2D_ELEM(v, i, j) = min; } return; } */ void makeSimpleHelixFromPDBParticle( const Assembly& ori, Assembly& helix, RFLOAT radius_A, RFLOAT twist_deg, RFLOAT rise_A, int nr_copy, bool do_center) { int nr_ori_atoms; Matrix1D mass_centre, shift; Matrix2D rotational_matrix; Assembly aux0, aux1; if (nr_copy < 3) REPORT_ERROR("helix.cpp::makeHelixFromPDBParticle(): More than 3 copies of original assemblies are required to form a helix!"); nr_ori_atoms = ori.numberOfAtoms(); std::cout << "Original assembly contains " << nr_ori_atoms << " atoms." << std::endl; if (nr_ori_atoms < 1) REPORT_ERROR("helix.cpp::makeHelixFromPDBParticle(): Original assembly contains no atoms!"); // Calculate centre of mass of the original assembly mass_centre.resize(3); mass_centre.initZeros(); for (int imol = 0; imol < ori.molecules.size(); imol++) { for (int ires = 0; ires < ori.molecules[imol].residues.size(); ires++) { for (int iatom = 0; iatom < ori.molecules[imol].residues[ires].atoms.size(); iatom++) { if( ((ori.molecules[imol].residues[ires].atoms[iatom]).coords).size() != 3 ) REPORT_ERROR("helix.cpp::makeHelixFromPDBParticle(): Coordinates of atoms should have a dimension of 3!"); mass_centre += (ori.molecules[imol].residues[ires].atoms[iatom]).coords; } } } mass_centre /= (RFLOAT)(nr_ori_atoms); aux0.clear(); aux0 = ori; // Set the original particle on (r, 0, 0), if r > 0 // Else just impose helical symmetry (make copies) according to the original particle if (do_center) { for (int imol = 0; imol < aux0.molecules.size(); imol++) { for (int ires = 0; ires < aux0.molecules[imol].residues.size(); ires++) { for (int iatom = 0; iatom < aux0.molecules[imol].residues[ires].atoms.size(); iatom++) { (aux0.molecules[imol].residues[ires].atoms[iatom]).coords -= mass_centre; XX((aux0.molecules[imol].residues[ires].atoms[iatom]).coords) += radius_A; } } } std::cout << "Centre of mass (in Angstroms) = " << XX(mass_centre) << ", " << YY(mass_centre) << ", " << ZZ(mass_centre) << std::endl; std::cout << "Bring the centre of mass to the (helical_radius, 0, 0)" << std::endl; std::cout << "Helical radius (for centre of mass) assigned = " << radius_A << " Angstroms" << std::endl; } // Construct the helix rotational_matrix.clear(); shift.resize(3); shift.initZeros(); helix.clear(); helix.join(aux0); for (int ii = (((nr_copy + 1) % 2) - (nr_copy / 2)) ; ii <= (nr_copy / 2); ii++) { if (ii == 0) continue; rotation2DMatrix(((RFLOAT)(ii)) * twist_deg, rotational_matrix, true); ZZ(shift) = (RFLOAT)(ii) * rise_A; aux1.clear(); aux1 = aux0; aux1.applyTransformation(rotational_matrix, shift); helix.join(aux1); } return; } /* void normalise2DImageSlices( const FileName& fn_in, const FileName& fn_out, int bg_radius, RFLOAT white_dust_stddev, RFLOAT black_dust_stddev) { Image stack_in, slice; int Xdim, Ydim, Zdim; long int Ndim, ii; if ( (fn_in.getExtension() != "mrcs") || (fn_out.getExtension() != "mrcs") ) { REPORT_ERROR("helix.cpp::normalise2DImageSlices(): Input and output should be .mrcs files!"); } stack_in.clear(); stack_in.read(fn_in, false, -1, false, false); // readData = false, select_image = -1, mapData= false, is_2D = false); stack_in.getDimensions(Xdim, Ydim, Zdim, Ndim); std::cout << "File = " << fn_in.c_str() << std::endl; std::cout << "X, Y, Z, N dim = " << Xdim << ", " << Ydim << ", " << Zdim << ", " << Ndim << std::endl; std::cout << "bg_radius = " << bg_radius << ", white_dust_stddev = " << white_dust_stddev << ", black_dust_stddev = " << black_dust_stddev << std::endl; if( (Zdim != 1) || (Ndim < 1) || (Xdim < 3) || (Ydim < 3) ) { REPORT_ERROR("helix.cpp::normalise2DImageSlices(): Invalid image dimensionality!"); } for(ii = 0; ii < Ndim; ii++) { slice.read(fn_in, true, ii, false, false); normalise(slice, bg_radius, white_dust_stddev, black_dust_stddev, false); // Write this particle to the stack on disc // First particle: write stack in overwrite mode, from then on just append to it if (ii == 0) slice.write(fn_out, -1, (Ndim > 1), WRITE_OVERWRITE); else slice.write(fn_out, -1, false, WRITE_APPEND); } return; } */ void applySoftSphericalMask( MultidimArray& v, RFLOAT sphere_diameter, RFLOAT cosine_width) { RFLOAT r, r_max, r_max_edge; int dim = v.getDim(); if (dim != 3) REPORT_ERROR("helix.cpp::applySoftSphericalMask(): Input image should have a dimension of 3!"); if (cosine_width < 0.01) REPORT_ERROR("helix.cpp::applySoftSphericalMask(): Cosine width should be a positive value!"); v.setXmippOrigin(); r_max = (XSIZE(v) < YSIZE(v)) ? (XSIZE(v)) : (YSIZE(v)); r_max = (r_max < ZSIZE(v)) ? (r_max) : (ZSIZE(v)); // Nov11,2016 - Commented the following lines for r > 90% masks //if (cosine_width > 0.05 * r_max) // r_max -= 2. * cosine_width; //r_max *= 0.45; if ( (sphere_diameter > 0.01) && ((sphere_diameter / 2.) < r_max) ) r_max = sphere_diameter / 2.; r_max_edge = r_max + cosine_width; FOR_ALL_ELEMENTS_IN_ARRAY3D(v) { r = sqrt(k * k + i * i + j * j); if (r > r_max) { if (r < r_max_edge) A3D_ELEM(v, k, i, j) *= 0.5 + 0.5 * cos(PI * (r - r_max) / cosine_width); else A3D_ELEM(v, k, i, j) = 0.; } } return; } void extractHelicalSegmentsFromTubes_Multiple( FileName& suffix_in, FileName& suffix_out, int format_tag, int nr_asu, RFLOAT rise_A, RFLOAT pixel_size_A, RFLOAT Xdim, RFLOAT Ydim, RFLOAT box_size_pix, bool bimodal_angular_priors, bool cut_into_segments) { int total_segments, total_tubes, nr_segments, nr_tubes; FileName fns_in; std::vector fn_in_list; MetaDataTable MD_out; fns_in = "*" + suffix_in; fns_in.globFiles(fn_in_list); if (fn_in_list.size() < 1) REPORT_ERROR("helix.cpp::extractHelicalSegmentsFromTubes_Multiple(): No input files are found!"); total_segments = total_tubes = 0; for (int ii = 0; ii < fn_in_list.size(); ii++) { FileName fn_out; fn_out = fn_in_list[ii].beforeFirstOf(suffix_in) + suffix_out; if (format_tag == RELION_STAR_FORMAT) convertHelicalTubeCoordsToMetaDataTable(fn_in_list[ii], MD_out, nr_segments, nr_tubes, nr_asu, rise_A, pixel_size_A, Xdim, Ydim, box_size_pix, bimodal_angular_priors, cut_into_segments); else if (format_tag == XIMDISP_COORDS_FORMAT) convertXimdispHelicalTubeCoordsToMetaDataTable(fn_in_list[ii], MD_out, nr_segments, nr_tubes, nr_asu, rise_A, pixel_size_A, Xdim, Ydim, box_size_pix, bimodal_angular_priors, cut_into_segments); else if (format_tag == EMAN2_FORMAT) convertEmanHelicalTubeCoordsToMetaDataTable(fn_in_list[ii], MD_out, nr_segments, nr_tubes, nr_asu, rise_A, pixel_size_A, Xdim, Ydim, box_size_pix, bimodal_angular_priors, cut_into_segments); else REPORT_ERROR("helix.cpp::extractHelicalSegmentsFromTubes_Multiple(): BUG Invalid format tag!"); total_segments += nr_segments; total_tubes += nr_tubes; MD_out.write(fn_out); } std::cout << " ### " << total_segments << " segments (" << total_tubes << " tubes, ~" << (total_segments * nr_asu) << " subunits) are extracted from " << fn_in_list.size() << " input files. ###" << std::endl; return; } void convertHelicalTubeCoordsToMetaDataTable( FileName& fn_in, MetaDataTable& MD_out, int& total_segments, int& total_tubes, int nr_asu, RFLOAT rise_A, RFLOAT pixel_size_A, RFLOAT Xdim, RFLOAT Ydim, RFLOAT box_size_pix, bool bimodal_angular_priors, bool cut_into_segments) { int nr_segments, MDobj_id; RFLOAT psi_deg, psi_rad, x1, y1, x2, y2, dx, dy, xp, yp, step_pix, half_box_size_pix, len_pix, psi_prior_flip_ratio, pitch; int id; MetaDataTable MD_in; std::vector x1_coord_list, y1_coord_list, x2_coord_list, y2_coord_list, pitch_list; std::vector tube_id_list; // Check parameters and open files if ( (nr_asu < 1) || (rise_A < 0.001) || (pixel_size_A < 0.01) ) REPORT_ERROR("helix.cpp::convertHelicalTubeCoordsToMetaDataTable(): Wrong parameters!"); if ( (box_size_pix < 2) || (Xdim < box_size_pix) || (Ydim < box_size_pix)) REPORT_ERROR("helix.cpp::convertHelicalTubeCoordsToMetaDataTable(): Wrong dimensions or box size!"); if (fn_in.getExtension() != "star") REPORT_ERROR("helix.cpp::convertHelicalTubeCoordsToMetaDataTable(): MetadataTable should have .star extension. Error(s) in " + fn_in); half_box_size_pix = box_size_pix / 2.; psi_prior_flip_ratio = UNIMODAL_PSI_PRIOR_FLIP_RATIO; if (bimodal_angular_priors) { psi_prior_flip_ratio = BIMODAL_PSI_PRIOR_FLIP_RATIO; } // Read input STAR file MD_in.clear(); MD_out.clear(); MD_in.read(fn_in); if (MD_in.numberOfObjects() < 1) // Handle empty input files return; if ( (!MD_in.containsLabel(EMDL_IMAGE_COORD_X)) || (!MD_in.containsLabel(EMDL_IMAGE_COORD_Y)) ) REPORT_ERROR("helix.cpp::convertHelicalTubeCoordsToMetaDataTable(): Input STAR file does not contain X and Y coordinates! Error(s) in " + fn_in); if (MD_in.numberOfObjects() % 2) REPORT_ERROR("helix.cpp::convertHelicalTubeCoordsToMetaDataTable(): Input coordinates should be in pairs! Error(s) in" + fn_in); // Sjors added MDin_has_id and MDin_has_pitch to allow manual calculation of different cross-over distances to be carried onto the extracted segments... bool MDin_has_id = MD_in.containsLabel(EMDL_PARTICLE_HELICAL_TUBE_ID); bool MDin_has_pitch = MD_in.containsLabel(EMDL_PARTICLE_HELICAL_TUBE_PITCH); x1_coord_list.clear(); y1_coord_list.clear(); x2_coord_list.clear(); y2_coord_list.clear(); tube_id_list.clear(); pitch_list.clear(); MDobj_id = 0; FOR_ALL_OBJECTS_IN_METADATA_TABLE(MD_in) { MDobj_id++; MD_in.getValue(EMDL_IMAGE_COORD_X, xp); MD_in.getValue(EMDL_IMAGE_COORD_Y, yp); if (MDobj_id % 2) { x1_coord_list.push_back(xp); y1_coord_list.push_back(yp); if (MDin_has_id) { MD_in.getValue(EMDL_PARTICLE_HELICAL_TUBE_ID, id); tube_id_list.push_back(id); } if (MDin_has_pitch) { MD_in.getValue(EMDL_PARTICLE_HELICAL_TUBE_PITCH, pitch); pitch_list.push_back(pitch); } } else { x2_coord_list.push_back(xp); y2_coord_list.push_back(yp); } } if ( (x1_coord_list.size() != x2_coord_list.size()) || (x2_coord_list.size() != y1_coord_list.size()) || (y1_coord_list.size() != y2_coord_list.size()) || (y2_coord_list.size() != x1_coord_list.size()) ) REPORT_ERROR("helix.cpp::convertHelicalTubeCoordsToMetaDataTable(): BUG in reading input STAR file " + fn_in); MD_in.clear(); // Init output STAR file MD_out.clear(); MD_out.addLabel(EMDL_IMAGE_COORD_X); MD_out.addLabel(EMDL_IMAGE_COORD_Y); MD_out.addLabel(EMDL_PARTICLE_HELICAL_TUBE_ID); MD_out.addLabel(EMDL_ORIENT_TILT_PRIOR); MD_out.addLabel(EMDL_ORIENT_PSI_PRIOR); MD_out.addLabel(EMDL_PARTICLE_HELICAL_TRACK_LENGTH_ANGSTROM); MD_out.addLabel(EMDL_ORIENT_PSI_PRIOR_FLIP_RATIO); if (MDin_has_id) MD_out.addLabel(EMDL_PARTICLE_HELICAL_TUBE_ID); if (MDin_has_pitch) MD_out.addLabel(EMDL_PARTICLE_HELICAL_TUBE_PITCH); // Calculate all coordinates for helical segments nr_segments = 0; step_pix = nr_asu * rise_A / pixel_size_A; for (int tube_id = 0; tube_id < x1_coord_list.size(); tube_id++) { x1 = x1_coord_list[tube_id]; y1 = y1_coord_list[tube_id]; x2 = x2_coord_list[tube_id]; y2 = y2_coord_list[tube_id]; if (MDin_has_id) id = tube_id_list[tube_id]; if (MDin_has_pitch) pitch = pitch_list[tube_id]; psi_rad = atan2(y2 - y1, x2 - x1); psi_deg = RAD2DEG(psi_rad); dx = step_pix * cos(psi_rad); dy = step_pix * sin(psi_rad); if (!cut_into_segments) { MD_out.addObject(); MD_out.setValue(EMDL_IMAGE_COORD_X, ((x1 + x2) / 2.)); MD_out.setValue(EMDL_IMAGE_COORD_Y, ((y1 + y2) / 2.)); MD_out.setValue(EMDL_PARTICLE_HELICAL_TUBE_ID, (tube_id + 1)); MD_out.setValue(EMDL_ORIENT_TILT_PRIOR, 90.); MD_out.setValue(EMDL_ORIENT_PSI_PRIOR, -psi_deg); MD_out.setValue(EMDL_PARTICLE_HELICAL_TRACK_LENGTH_ANGSTROM, 0.); MD_out.setValue(EMDL_ORIENT_PSI_PRIOR_FLIP_RATIO, psi_prior_flip_ratio); if (MDin_has_id) MD_out.setValue(EMDL_PARTICLE_HELICAL_TUBE_ID, id); if (MDin_has_pitch) MD_out.setValue(EMDL_PARTICLE_HELICAL_TUBE_PITCH, pitch); nr_segments++; continue; } xp = x1 - (dx * 0.99); yp = y1 - (dy * 0.99); len_pix = -step_pix; while (1) { xp += dx; yp += dy; len_pix += step_pix; if ( ((xp > x1) && (xp > x2)) || ((xp < x1) && (xp < x2)) || ((yp > y1) && (yp > y2)) || ((yp < y1) && (yp < y2)) ) { break; } else { #ifdef EXCLUDE_SEGMENTS_ON_THE_EDGES // Avoid segments lying on the edges of the micrographs if ( (xp < half_box_size_pix) || (xp > (Xdim - half_box_size_pix)) || (yp < half_box_size_pix) || (yp > (Ydim - half_box_size_pix)) ) { // Extract from filament start-end coordinates. It is not necessary to notify the user. //std::cerr << " WARNING: Particle at (" << xp << ", " << yp << ") in coordinate file " << fn_in << " is NOT extracted because it is too close to the edge." << std::flush; //std::cerr << " Box_size_pix = " << box_size_pix << ", Dimensions = " << Xdim << " * " << Ydim << std::endl; continue; } #endif MD_out.addObject(); MD_out.setValue(EMDL_IMAGE_COORD_X, xp); MD_out.setValue(EMDL_IMAGE_COORD_Y, yp); MD_out.setValue(EMDL_PARTICLE_HELICAL_TUBE_ID, (tube_id + 1)); MD_out.setValue(EMDL_ORIENT_TILT_PRIOR, 90.); MD_out.setValue(EMDL_ORIENT_PSI_PRIOR, -psi_deg); MD_out.setValue(EMDL_PARTICLE_HELICAL_TRACK_LENGTH_ANGSTROM, pixel_size_A * len_pix); MD_out.setValue(EMDL_ORIENT_PSI_PRIOR_FLIP_RATIO, psi_prior_flip_ratio); if (MDin_has_id) MD_out.setValue(EMDL_PARTICLE_HELICAL_TUBE_ID, id); if (MDin_has_pitch) MD_out.setValue(EMDL_PARTICLE_HELICAL_TUBE_PITCH, pitch); nr_segments++; } } } if (nr_segments < 1) { std::cout << " WARNING: no segments extracted from file '" << fn_in << "'!" << std::endl; } else { std::cout << "Input STAR file = " << fn_in << ", tubes = " << x1_coord_list.size() << ", segments = " << nr_segments << ", subunits ~ " << (nr_segments * nr_asu) << std::endl; } total_segments = nr_segments; total_tubes = x1_coord_list.size(); } void combineParticlePriorsWithKaiLocalCTF( FileName& fn_priors, FileName& fn_local_ctf, FileName& fn_combined) { MetaDataTable MD_priors, MD_local_ctf; std::vector x, y, dU, dV, dAng, Cs, pix, mag, Q0, volt, fom, maxres, bfac, sfac, phase; RFLOAT _x, _y, _dU, _dV, _dAng, _Cs, _pix, _mag, _Q0, _volt, _fom, _maxres, _bfac, _sfac, _phase; int ii; if ( (fn_priors.getFileFormat() != "star") || (fn_local_ctf.getFileFormat() != "star") || (fn_combined.getFileFormat() != "star") ) REPORT_ERROR("helix.cpp::combineParticlePriorsWithKaiLocalCTF(): MetaDataTable should have .star extension."); // if ( (fn_priors == fn_local_ctf) || (fn_local_ctf == fn_combined) || (fn_combined == fn_priors) ) // REPORT_ERROR("helix.cpp::combineParticlePriorsWithKaiLocalCTF(): File names must be different."); if (fn_priors == fn_local_ctf) REPORT_ERROR("helix.cpp::combineParticlePriorsWithKaiLocalCTF(): Input file names must be different."); MD_priors.clear(); MD_local_ctf.clear(); MD_priors.read(fn_priors); MD_local_ctf.read(fn_local_ctf); if (MD_priors.numberOfObjects() != MD_local_ctf.numberOfObjects()) REPORT_ERROR("helix.cpp::combineParticlePriorsWithKaiLocalCTF(): MetaDataTables to be combined are not of the same size."); if ( (!MD_priors.containsLabel(EMDL_IMAGE_COORD_X)) || (!MD_priors.containsLabel(EMDL_IMAGE_COORD_Y)) // || (!MD_local_ctf.containsLabel(EMDL_MICROGRAPH_NAME)) || (!MD_local_ctf.containsLabel(EMDL_IMAGE_COORD_X)) || (!MD_local_ctf.containsLabel(EMDL_IMAGE_COORD_Y)) || (!MD_local_ctf.containsLabel(EMDL_CTF_VOLTAGE)) || (!MD_local_ctf.containsLabel(EMDL_CTF_DEFOCUSU)) || (!MD_local_ctf.containsLabel(EMDL_CTF_DEFOCUSV)) || (!MD_local_ctf.containsLabel(EMDL_CTF_DEFOCUS_ANGLE)) || (!MD_local_ctf.containsLabel(EMDL_CTF_CS)) || (!MD_local_ctf.containsLabel(EMDL_CTF_DETECTOR_PIXEL_SIZE)) || (!MD_local_ctf.containsLabel(EMDL_CTF_MAGNIFICATION)) || (!MD_local_ctf.containsLabel(EMDL_CTF_Q0)) ) REPORT_ERROR("helix.cpp::combineParticlePriorsWithKaiLocalCTF(): Labels missing in MetaDataTables."); x.clear(); y.clear(); dU.clear(); dV.clear(); dAng.clear(); // necessary Cs.clear(); pix.clear(); mag.clear(); Q0.clear(); volt.clear(); // necessary fom.clear(); maxres.clear(); bfac.clear(); sfac.clear(); phase.clear(); // optional FOR_ALL_OBJECTS_IN_METADATA_TABLE(MD_local_ctf) { MD_local_ctf.getValue(EMDL_IMAGE_COORD_X, _x); x.push_back(_x); MD_local_ctf.getValue(EMDL_IMAGE_COORD_Y, _y); y.push_back(_y); MD_local_ctf.getValue(EMDL_CTF_DEFOCUSU, _dU); dU.push_back(_dU); MD_local_ctf.getValue(EMDL_CTF_DEFOCUSV, _dV); dV.push_back(_dV); MD_local_ctf.getValue(EMDL_CTF_DEFOCUS_ANGLE, _dAng); dAng.push_back(_dAng); MD_local_ctf.getValue(EMDL_CTF_CS, _Cs); Cs.push_back(_Cs); MD_local_ctf.getValue(EMDL_CTF_DETECTOR_PIXEL_SIZE, _pix); pix.push_back(_pix); MD_local_ctf.getValue(EMDL_CTF_MAGNIFICATION, _mag); mag.push_back(_mag); MD_local_ctf.getValue(EMDL_CTF_Q0, _Q0); Q0.push_back(_Q0); MD_local_ctf.getValue(EMDL_CTF_VOLTAGE, _volt); volt.push_back(_volt); if (MD_local_ctf.containsLabel(EMDL_CTF_FOM)) MD_local_ctf.getValue(EMDL_CTF_FOM, _fom); fom.push_back(_fom); if (MD_local_ctf.containsLabel(EMDL_CTF_MAXRES)) MD_local_ctf.getValue(EMDL_CTF_MAXRES, _maxres); maxres.push_back(_maxres); if (MD_local_ctf.containsLabel(EMDL_CTF_BFACTOR)) MD_local_ctf.getValue(EMDL_CTF_BFACTOR, _bfac); bfac.push_back(_bfac); if (MD_local_ctf.containsLabel(EMDL_CTF_SCALEFACTOR)) MD_local_ctf.getValue(EMDL_CTF_SCALEFACTOR, _sfac); sfac.push_back(_sfac); if (MD_local_ctf.containsLabel(EMDL_CTF_PHASESHIFT)) MD_local_ctf.getValue(EMDL_CTF_PHASESHIFT, _phase); phase.push_back(_phase); } if (!MD_priors.containsLabel(EMDL_CTF_DEFOCUSU)) MD_priors.addLabel(EMDL_CTF_DEFOCUSU); if (!MD_priors.containsLabel(EMDL_CTF_DEFOCUSV)) MD_priors.addLabel(EMDL_CTF_DEFOCUSV); if (!MD_priors.containsLabel(EMDL_CTF_DEFOCUS_ANGLE)) MD_priors.addLabel(EMDL_CTF_DEFOCUS_ANGLE); if (!MD_priors.containsLabel(EMDL_CTF_CS)) MD_priors.addLabel(EMDL_CTF_CS); if (!MD_priors.containsLabel(EMDL_CTF_DETECTOR_PIXEL_SIZE)) MD_priors.addLabel(EMDL_CTF_DETECTOR_PIXEL_SIZE); if (!MD_priors.containsLabel(EMDL_CTF_MAGNIFICATION)) MD_priors.addLabel(EMDL_CTF_MAGNIFICATION); if (!MD_priors.containsLabel(EMDL_CTF_Q0)) MD_priors.addLabel(EMDL_CTF_Q0); if (!MD_priors.containsLabel(EMDL_CTF_VOLTAGE)) MD_priors.addLabel(EMDL_CTF_VOLTAGE); if (!MD_priors.containsLabel(EMDL_CTF_FOM)) MD_priors.addLabel(EMDL_CTF_FOM); if (!MD_priors.containsLabel(EMDL_CTF_MAXRES)) MD_priors.addLabel(EMDL_CTF_MAXRES); if (!MD_priors.containsLabel(EMDL_CTF_BFACTOR)) MD_priors.addLabel(EMDL_CTF_BFACTOR); if (!MD_priors.containsLabel(EMDL_CTF_SCALEFACTOR)) MD_priors.addLabel(EMDL_CTF_SCALEFACTOR); if (!MD_priors.containsLabel(EMDL_CTF_PHASESHIFT)) MD_priors.addLabel(EMDL_CTF_PHASESHIFT); ii = -1; FOR_ALL_OBJECTS_IN_METADATA_TABLE(MD_priors) { ii++; MD_priors.getValue(EMDL_IMAGE_COORD_X, _x); MD_priors.getValue(EMDL_IMAGE_COORD_Y, _y); if ( (fabs(x[ii] - _x) > 1.001) || (fabs(y[ii] - _y) > 1.001) ) REPORT_ERROR("helix.cpp::combineParticlePriorsWithKaiLocalCTF(): Coordinates from the two MetaDataTables do not match."); //MD_priors.setValue(EMDL_IMAGE_COORD_X, x[ii]); //MD_priors.setValue(EMDL_IMAGE_COORD_Y, y[ii]); MD_priors.setValue(EMDL_CTF_DEFOCUSU, dU[ii]); MD_priors.setValue(EMDL_CTF_DEFOCUSV, dV[ii]); MD_priors.setValue(EMDL_CTF_DEFOCUS_ANGLE, dAng[ii]); MD_priors.setValue(EMDL_CTF_CS, Cs[ii]); MD_priors.setValue(EMDL_CTF_DETECTOR_PIXEL_SIZE, pix[ii]); MD_priors.setValue(EMDL_CTF_MAGNIFICATION, mag[ii]); MD_priors.setValue(EMDL_CTF_Q0, Q0[ii]); MD_priors.setValue(EMDL_CTF_VOLTAGE, volt[ii]); if (MD_local_ctf.containsLabel(EMDL_CTF_FOM)) MD_priors.setValue(EMDL_CTF_FOM, fom[ii]); if (MD_local_ctf.containsLabel(EMDL_CTF_MAXRES)) MD_priors.setValue(EMDL_CTF_MAXRES, maxres[ii]); if (MD_local_ctf.containsLabel(EMDL_CTF_BFACTOR)) MD_priors.setValue(EMDL_CTF_BFACTOR, bfac[ii]); if (MD_local_ctf.containsLabel(EMDL_CTF_SCALEFACTOR)) MD_priors.setValue(EMDL_CTF_SCALEFACTOR, sfac[ii]); if (MD_local_ctf.containsLabel(EMDL_CTF_PHASESHIFT)) MD_priors.setValue(EMDL_CTF_PHASESHIFT, phase[ii]); } MD_priors.write(fn_combined); return; } void combineParticlePriorsWithKaiLocalCTF_Multiple( std::string& suffix_priors, std::string& suffix_local_ctf, std::string& suffix_combined) { FileName fns_priors; std::vector fn_priors_list; // if ( (suffix_priors == suffix_local_ctf) || (suffix_priors == suffix_combined) || (suffix_combined == suffix_priors) ) // REPORT_ERROR("helix.cpp::combineParticlePriorsWithKaiLocalCTF_Multiple(): File names error!"); if (suffix_priors == suffix_local_ctf) REPORT_ERROR("helix.cpp::combineParticlePriorsWithKaiLocalCTF_Multiple(): Input file names error!"); fns_priors = "*" + suffix_priors; fns_priors.globFiles(fn_priors_list); std::cout << "Number of input files = " << fn_priors_list.size() << std::endl; if (fn_priors_list.size() < 1) REPORT_ERROR("helix.cpp::combineParticlePriorsWithKaiLocalCTF_Multiple(): No input files are found!"); for (int ii = 0; ii < fn_priors_list.size(); ii++) { FileName fn_local_ctf, fn_combined; fn_local_ctf = fn_priors_list[ii].beforeFirstOf(suffix_priors) + suffix_local_ctf; fn_combined = fn_priors_list[ii].beforeFirstOf(suffix_priors) + suffix_combined; combineParticlePriorsWithKaiLocalCTF(fn_priors_list[ii], fn_local_ctf, fn_combined); } return; } void setNullTiltPriorsInDataStar( FileName& fn_in, FileName& fn_out) { MetaDataTable MD; if ( (fn_in.getFileFormat() != "star") || (fn_out.getFileFormat() != "star") ) REPORT_ERROR("helix.cpp::addNullTiltPriorsToDataStar(): MetaDataTable should have .star extension."); if (fn_in == fn_out) REPORT_ERROR("helix.cpp::addNullTiltPriorsToDataStar(): File names must be different."); MD.read(fn_in); if (!MD.containsLabel(EMDL_ORIENT_TILT_PRIOR)) MD.addLabel(EMDL_ORIENT_TILT_PRIOR); FOR_ALL_OBJECTS_IN_METADATA_TABLE(MD) { MD.setValue(EMDL_ORIENT_TILT_PRIOR, 90.); } MD.write(fn_out); return; } void removeBadTiltHelicalSegmentsFromDataStar( FileName& fn_in, FileName& fn_out, RFLOAT max_dev_deg) { MetaDataTable MD_in, MD_out; int nr_segments_old, nr_segments_new; RFLOAT tilt_deg; if ( (max_dev_deg < 0.) || (max_dev_deg > 89.) ) REPORT_ERROR("helix.cpp::removeBadTiltParticlesFromDataStar(): Max deviations of tilt angles from 90 degree should be in the range of 0~89 degrees."); if ( (fn_in.getFileFormat() != "star") || (fn_out.getFileFormat() != "star") ) REPORT_ERROR("helix.cpp::removeBadTiltParticlesFromDataStar(): MetaDataTable should have .star extension."); if (fn_in == fn_out) REPORT_ERROR("helix.cpp::removeBadTiltParticlesFromDataStar(): File names must be different."); MD_in.clear(); MD_out.clear(); MD_in.read(fn_in); // TODO: Use EMDL_ORIENT_TILT or EMDL_ORIENT_TILT_PRIOR ? if (!MD_in.containsLabel(EMDL_ORIENT_TILT)) REPORT_ERROR("helix.cpp::removeBadTiltParticlesFromDataStar(): Input .star file contains no tilt angles."); nr_segments_old = nr_segments_new = 0; FOR_ALL_OBJECTS_IN_METADATA_TABLE(MD_in) { nr_segments_old++; MD_in.getValue(EMDL_ORIENT_TILT, tilt_deg); if (fabs(tilt_deg - 90.) < max_dev_deg) { nr_segments_new++; MD_out.addObject(MD_in.getObject()); } } MD_out.write(fn_out); std::cout << " Number of segments (input / output) = " << nr_segments_old << " / " << nr_segments_new << std::endl; return; } void removeBadPsiHelicalSegmentsFromDataStar( FileName& fn_in, FileName& fn_out, RFLOAT max_dev_deg) { MetaDataTable MD_in, MD_out; int nr_segments_old, nr_segments_new; RFLOAT psi_deg, psi_prior_deg, diff_psi; if ( (max_dev_deg < 0.) || (max_dev_deg > 89.) ) REPORT_ERROR("helix.cpp::removeBadPsiParticlesFromDataStar(): Max deviations of tilt angles from 90 degree should be in the range of 0~89 degrees."); if ( (fn_in.getFileFormat() != "star") || (fn_out.getFileFormat() != "star") ) REPORT_ERROR("helix.cpp::removeBadPsiParticlesFromDataStar(): MetaDataTable should have .star extension."); if (fn_in == fn_out) REPORT_ERROR("helix.cpp::removeBadPsiParticlesFromDataStar(): File names must be different."); MD_in.clear(); MD_out.clear(); MD_in.read(fn_in); if ( (!MD_in.containsLabel(EMDL_ORIENT_PSI)) || (!MD_in.containsLabel(EMDL_ORIENT_PSI_PRIOR)) ) REPORT_ERROR("helix.cpp::removeBadTiltParticlesFromDataStar(): Input .star file contains no psi angles with their priors."); nr_segments_old = nr_segments_new = 0; FOR_ALL_OBJECTS_IN_METADATA_TABLE(MD_in) { nr_segments_old++; MD_in.getValue(EMDL_ORIENT_PSI, psi_deg); MD_in.getValue(EMDL_ORIENT_PSI_PRIOR, psi_prior_deg); diff_psi = ABS(psi_deg - psi_prior_deg); if (diff_psi > 180.) diff_psi = ABS(diff_psi - 360.); if (diff_psi < max_dev_deg) { nr_segments_new++; MD_out.addObject(MD_in.getObject()); } } MD_out.write(fn_out); std::cout << " Number of segments (input / output) = " << nr_segments_old << " / " << nr_segments_new << std::endl; return; } void convertHelicalSegmentCoordsToStarFile_Multiple( FileName& suffix_coords, FileName& suffix_out, int format_tag, RFLOAT pixel_size_A, RFLOAT Xdim, RFLOAT Ydim, RFLOAT boxsize, bool bimodal_angular_priors) { int total_segments, nr_segments, total_tubes, nr_tubes; FileName fns_coords; std::vector fn_coords_list; MetaDataTable MD_out; fns_coords = "*" + suffix_coords; fns_coords.globFiles(fn_coords_list); if (fn_coords_list.size() < 1) REPORT_ERROR("helix.cpp::convertHelicalCoordsToStarFile_Multiple(): No input files are found!"); total_segments = total_tubes = 0; for (int ii = 0; ii < fn_coords_list.size(); ii++) { FileName fn_out; fn_out = fn_coords_list[ii].beforeFirstOf(suffix_coords) + suffix_out; if (format_tag == XIMDISP_COORDS_FORMAT) convertXimdispHelicalSegmentCoordsToMetaDataTable(fn_coords_list[ii], MD_out, nr_segments, nr_tubes, pixel_size_A, Xdim, Ydim, boxsize, bimodal_angular_priors); else if (format_tag == EMAN2_FORMAT) convertEmanHelicalSegmentCoordsToMetaDataTable(fn_coords_list[ii], MD_out, nr_segments, nr_tubes, pixel_size_A, Xdim, Ydim, boxsize, bimodal_angular_priors); else REPORT_ERROR("helix.cpp::convertHelicalCoordsToStarFile_Multiple(): BUG Invalid format tag!"); total_segments += nr_segments; total_tubes += nr_tubes; MD_out.write(fn_out); } std::cout << " ### " << total_segments << " segments (" << total_tubes << " tubes) are extracted from " << fn_coords_list.size() << " input files. ###" << std::endl; return; } void convertHelicalSegmentCoordsToMetaDataTable( FileName& fn_in, MetaDataTable& MD_out, int& total_segments, bool is_3D_data, RFLOAT Xdim, RFLOAT Ydim, RFLOAT Zdim, RFLOAT box_size_pix, bool bimodal_angular_priors) { MetaDataTable MD_in; if (fn_in.getExtension() != "star") REPORT_ERROR("helix.cpp::convertHelicalSegmentCoordsToMetaDataTable(): Input file should have .star extension!!"); if ( (box_size_pix < 2) || (Xdim < box_size_pix) || (Ydim < box_size_pix) || ((is_3D_data) && (Zdim < box_size_pix)) ) REPORT_ERROR("helix.cpp::convertHelicalSegmentCoordsToMetaDataTable(): Wrong dimensions or box size!"); RFLOAT x = 0., y = 0., z = 0.; RFLOAT half_box_size_pix = box_size_pix / 2.; RFLOAT psi_prior_flip_ratio = UNIMODAL_PSI_PRIOR_FLIP_RATIO; if (bimodal_angular_priors) { psi_prior_flip_ratio = BIMODAL_PSI_PRIOR_FLIP_RATIO; } MD_in.clear(); MD_out.clear(); MD_in.read(fn_in); if (MD_in.numberOfObjects() < 1) // Handle empty input files return; if ( (!MD_in.containsLabel(EMDL_IMAGE_COORD_X)) || (!MD_in.containsLabel(EMDL_IMAGE_COORD_Y)) || ( (is_3D_data) && (!MD_in.containsLabel(EMDL_IMAGE_COORD_Z)) ) || (!MD_in.containsLabel(EMDL_PARTICLE_HELICAL_TUBE_ID)) || (!MD_in.containsLabel(EMDL_ORIENT_TILT_PRIOR)) || (!MD_in.containsLabel(EMDL_ORIENT_PSI_PRIOR)) || (!MD_in.containsLabel(EMDL_PARTICLE_HELICAL_TRACK_LENGTH_ANGSTROM)) ) REPORT_ERROR("helix.cpp::convertHelicalSegmentCoordsToMetaDataTable(): Prior information of helical segments is missing in " + fn_in); int nr_segments = 0; z = 1.; FOR_ALL_OBJECTS_IN_METADATA_TABLE(MD_in) { MD_in.getValue(EMDL_IMAGE_COORD_X, x); MD_in.getValue(EMDL_IMAGE_COORD_Y, y); if (is_3D_data) MD_in.getValue(EMDL_IMAGE_COORD_Z, z); #ifdef EXCLUDE_SEGMENTS_ON_THE_EDGES // Avoid segments lying on the edges of the micrographs if ( (x < half_box_size_pix) || (x > (Xdim - half_box_size_pix)) || (y < half_box_size_pix) || (y > (Ydim - half_box_size_pix)) || ( (is_3D_data) && ((z < half_box_size_pix) || (z > (Zdim - half_box_size_pix))) ) ) { std::cerr << " WARNING: Particle at (" << x << ", " << y << ", " << z << std::flush; std::cerr << ") in coordinate file " << fn_in << " is NOT extracted because it is too close to the edge." << std::flush; std::cerr << " Box_size_pix = " << box_size_pix << std::flush; std::cerr << ", Dimensions = " << Xdim << " * " << Ydim << " * " << Zdim << std::endl; continue; } #endif nr_segments++; MD_out.addObject(MD_in.getObject()); // TODO: check whether there is a bug... MD_out.setValue(EMDL_ORIENT_PSI_PRIOR_FLIP_RATIO, psi_prior_flip_ratio); } total_segments = nr_segments; MD_in.clear(); } void convertXimdispHelicalSegmentCoordsToMetaDataTable( FileName& fn_in, MetaDataTable& MD_out, int& total_segments, int& total_tubes, RFLOAT pixel_size_A, RFLOAT Xdim, RFLOAT Ydim, RFLOAT box_size_pix, bool bimodal_angular_priors) { int nr_segments_on_edges, nr_segments, nr_tubes; RFLOAT x, y, x_old, y_old, psi_deg_old, psi_deg, half_box_size_pix, len_pix, psi_prior_flip_ratio; std::ifstream fin; std::string line; std::vector words; if ( (box_size_pix < 2) || (Xdim < box_size_pix) || (Ydim < box_size_pix)) REPORT_ERROR("helix.cpp::convertXimdispHelicalSegmentCoordsToMetaDataTable(): Wrong dimensions or box size!"); // Header of output file MD_out.clear(); MD_out.addLabel(EMDL_IMAGE_COORD_X); MD_out.addLabel(EMDL_IMAGE_COORD_Y); MD_out.addLabel(EMDL_PARTICLE_HELICAL_TUBE_ID); MD_out.addLabel(EMDL_ORIENT_TILT_PRIOR); MD_out.addLabel(EMDL_ORIENT_PSI_PRIOR); MD_out.addLabel(EMDL_PARTICLE_HELICAL_TRACK_LENGTH_ANGSTROM); MD_out.addLabel(EMDL_ORIENT_PSI_PRIOR_FLIP_RATIO); half_box_size_pix = box_size_pix / 2.; psi_prior_flip_ratio = UNIMODAL_PSI_PRIOR_FLIP_RATIO; if (bimodal_angular_priors) { psi_prior_flip_ratio = BIMODAL_PSI_PRIOR_FLIP_RATIO; } fin.open(fn_in.c_str(), std::ios_base::in); if (fin.fail()) REPORT_ERROR("helix.cpp::convertXimdispHelicalSegmentCoordsToMetaDataTable(): Cannot open input file " + fn_in); line.clear(); getline(fin, line, '\n'); psi_deg_old = 999999.; len_pix = 0.; nr_segments_on_edges = nr_segments = nr_tubes = 0; while (getline(fin, line, '\n')) { // Read in a new line of x, y, psi if (line.size() < 2) // End of file break; words.clear(); tokenize(line, words); if (words.size() != 3) REPORT_ERROR("helix.cpp::convertXimdispHelicalSegmentCoordsToMetaDataTable(): Invalid input file " + fn_in); x = textToFloat(words[0]); y = textToFloat(words[1]); psi_deg = textToFloat(words[2]); // Check whether it is on a new helical tube if (fabs(psi_deg - psi_deg_old) > 0.1) { nr_tubes++; len_pix = 0.; x_old = x; y_old = y; } // Accumulate the length len_pix += sqrt( (x - x_old) * (x - x_old) + (y - y_old) * (y - y_old) ); x_old = x; y_old = y; psi_deg_old = psi_deg; #ifdef EXCLUDE_SEGMENTS_ON_THE_EDGES // Avoid segments lying on the edges of the micrographs if ( (x < half_box_size_pix) || (x > (Xdim - half_box_size_pix)) || (y < half_box_size_pix) || (y > (Ydim - half_box_size_pix)) ) { nr_segments_on_edges++; std::cerr << " WARNING: Particle at (" << x << ", " << y << ") in coordinate file " << fn_in << " is NOT extracted because it is too close to the edge." << std::flush; std::cerr << " Box_size_pix = " << box_size_pix << ", Dimensions = " << Xdim << " * " << Ydim << std::endl; continue; } #endif nr_segments++; MD_out.addObject(); MD_out.setValue(EMDL_IMAGE_COORD_X, x); MD_out.setValue(EMDL_IMAGE_COORD_Y, y); MD_out.setValue(EMDL_PARTICLE_HELICAL_TUBE_ID, nr_tubes); MD_out.setValue(EMDL_ORIENT_TILT_PRIOR, 90.); MD_out.setValue(EMDL_ORIENT_PSI_PRIOR, -psi_deg); MD_out.setValue(EMDL_PARTICLE_HELICAL_TRACK_LENGTH_ANGSTROM, pixel_size_A * len_pix); MD_out.setValue(EMDL_ORIENT_PSI_PRIOR_FLIP_RATIO, psi_prior_flip_ratio); line.clear(); } fin.close(); total_segments = nr_segments; total_tubes = nr_tubes; std::cout << "Input XIMDISP coordinates = " << fn_in.c_str() << ", micrograph size = " << Xdim << " * " << Ydim << ", box size = " << box_size_pix << ", tubes = " << nr_tubes << ", " << nr_segments_on_edges << " segments excluded, " << nr_segments << " segments left." << std::endl; } void convertXimdispHelicalTubeCoordsToMetaDataTable( FileName& fn_in, MetaDataTable& MD_out, int& total_segments, int& total_tubes, int nr_asu, RFLOAT rise_A, RFLOAT pixel_size_A, RFLOAT Xdim, RFLOAT Ydim, RFLOAT box_size_pix, bool bimodal_angular_priors, bool cut_into_segments) { int nr_segments, nr_tubes; RFLOAT xp, yp, dx, dy, x1, y1, x2, y2, psi_deg, psi_rad, half_box_size_pix, len_pix, psi_prior_flip_ratio; std::ifstream fin; std::string line; std::vector words; std::vector x, y; if ( (box_size_pix < 2) || (Xdim < box_size_pix) || (Ydim < box_size_pix)) REPORT_ERROR("helix.cpp::convertXimdispHelicalTubeCoordsToMetaDataTable(): Wrong dimensions or box size!"); if (pixel_size_A < 0.001) REPORT_ERROR("helix.cpp::convertXimdispHelicalTubeCoordsToMetaDataTable(): Invalid pixel size!"); RFLOAT step_pix = ((RFLOAT)(nr_asu)) * rise_A / pixel_size_A; if ( (nr_asu < 1) || (rise_A < 0.001) || (step_pix < 0.001) ) REPORT_ERROR("helix.cpp::convertXimdispHelicalTubeCoordsToMetaDataTable(): Invalid helical rise or number of asymmetrical units!"); // Header of output file MD_out.clear(); MD_out.addLabel(EMDL_IMAGE_COORD_X); MD_out.addLabel(EMDL_IMAGE_COORD_Y); MD_out.addLabel(EMDL_PARTICLE_HELICAL_TUBE_ID); MD_out.addLabel(EMDL_ORIENT_TILT_PRIOR); MD_out.addLabel(EMDL_ORIENT_PSI_PRIOR); MD_out.addLabel(EMDL_PARTICLE_HELICAL_TRACK_LENGTH); MD_out.addLabel(EMDL_ORIENT_PSI_PRIOR_FLIP_RATIO); x.resize(4); y.resize(4); half_box_size_pix = box_size_pix / 2.; psi_prior_flip_ratio = UNIMODAL_PSI_PRIOR_FLIP_RATIO; if (bimodal_angular_priors) { psi_prior_flip_ratio = BIMODAL_PSI_PRIOR_FLIP_RATIO; } fin.open(fn_in.c_str(), std::ios_base::in); if (fin.fail()) REPORT_ERROR("helix.cpp::convertXimdispHelicalTubeCoordsToMetaDataTable(): Cannot open input file " + fn_in); nr_tubes = nr_segments = 0; while (getline(fin, line, '\n')) { // Read in new helical tube if (line.size() < 2) // End of file break; words.clear(); tokenize(line, words); if (words.size() != 2) REPORT_ERROR("helix.cpp::convertXimdispHelicalTubeCoordsToMetaDataTable(): Invalid input file " + fn_in); nr_tubes++; // Read in starting and end points for this helical tube for (int iline = 0; iline < 4; iline++) { line.clear(); getline(fin, line, '\n'); words.clear(); tokenize(line, words); if (words.size() != 2) REPORT_ERROR("helix.cpp::convertXimdispHelicalTubeCoordsToMetaDataTable(): Invalid input file " + fn_in); x[iline] = textToFloat(words[0]); y[iline] = textToFloat(words[1]); } line.clear(); getline(fin, line, '\n'); // Calculate starting and end points for this helical tube x1 = (x[0] + x[1]) / 2.; y1 = (y[0] + y[1]) / 2.; x2 = (x[2] + x[3]) / 2.; y2 = (y[2] + y[3]) / 2.; psi_rad = atan2(y2 - y1, x2 - x1); psi_deg = RAD2DEG(psi_rad); dx = step_pix * cos(psi_rad); dy = step_pix * sin(psi_rad); if (!cut_into_segments) { MD_out.addObject(); MD_out.setValue(EMDL_IMAGE_COORD_X, ((x1 + x2) / 2.)); MD_out.setValue(EMDL_IMAGE_COORD_Y, ((y1 + y2) / 2.)); MD_out.setValue(EMDL_PARTICLE_HELICAL_TUBE_ID, nr_tubes); MD_out.setValue(EMDL_ORIENT_TILT_PRIOR, 90.); MD_out.setValue(EMDL_ORIENT_PSI_PRIOR, -psi_deg); MD_out.setValue(EMDL_PARTICLE_HELICAL_TRACK_LENGTH_ANGSTROM, 0.); MD_out.setValue(EMDL_ORIENT_PSI_PRIOR_FLIP_RATIO, psi_prior_flip_ratio); continue; } // Calculate coordinates for all segments xp = x1 - (dx * 0.99); yp = y1 - (dy * 0.99); len_pix = -step_pix; while (1) { xp += dx; yp += dy; len_pix += step_pix; if ( ((xp > x1) && (xp > x2)) || ((xp < x1) && (xp < x2)) || ((yp > y1) && (yp > y2)) || ((yp < y1) && (yp < y2)) ) { break; } else { #ifdef EXCLUDE_SEGMENTS_ON_THE_EDGES // Avoid segments lying on the edges of the micrographs if ( (xp < half_box_size_pix) || (xp > (Xdim - half_box_size_pix)) || (yp < half_box_size_pix) || (yp > (Ydim - half_box_size_pix)) ) { // Extract from filament start-end coordinates. It is not necessary to notify the user. //std::cerr << " WARNING: Particle at (" << xp << ", " << yp << ") in coordinate file " << fn_in << " is NOT extracted because it is too close to the edge." << std::flush; //std::cerr << " Box_size_pix = " << box_size_pix << ", Dimensions = " << Xdim << " * " << Ydim << std::endl; continue; } #endif MD_out.addObject(); MD_out.setValue(EMDL_IMAGE_COORD_X, xp); MD_out.setValue(EMDL_IMAGE_COORD_Y, yp); MD_out.setValue(EMDL_PARTICLE_HELICAL_TUBE_ID, nr_tubes); MD_out.setValue(EMDL_ORIENT_TILT_PRIOR, 90.); MD_out.setValue(EMDL_ORIENT_PSI_PRIOR, -psi_deg); MD_out.setValue(EMDL_PARTICLE_HELICAL_TRACK_LENGTH_ANGSTROM, pixel_size_A * len_pix); MD_out.setValue(EMDL_ORIENT_PSI_PRIOR_FLIP_RATIO, psi_prior_flip_ratio); nr_segments++; } } line.clear(); } fin.close(); total_segments = MD_out.numberOfObjects(); total_tubes = nr_tubes; std::cout << "Input XIMDISP coordinates = " << fn_in.c_str() << ", micrograph size = " << Xdim << " * " << Ydim << ", box size = " << box_size_pix << ", tubes = " << nr_tubes << ", segments = " << MD_out.numberOfObjects() << "." << std::endl; } void convertEmanHelicalSegmentCoordsToMetaDataTable( FileName& fn_in, MetaDataTable& MD_out, int& total_segments, int& total_tubes, RFLOAT pixel_size_A, RFLOAT Xdim, RFLOAT Ydim, RFLOAT box_size_pix, bool bimodal_angular_priors) { int nr_segments_on_edges, nr_segments, nr_tubes; RFLOAT x, y, x_old, y_old, psi_deg, half_box_size_pix, len_pix, width, psi_prior_flip_ratio; std::ifstream fin; std::string line; std::vector words; if ( (box_size_pix < 2) || (Xdim < box_size_pix) || (Ydim < box_size_pix)) REPORT_ERROR("helix.cpp::convertEmanHelicalSegmentCoordsToMetaDataTable(): Wrong dimensions or box size!"); // Header of output file MD_out.clear(); MD_out.addLabel(EMDL_IMAGE_COORD_X); MD_out.addLabel(EMDL_IMAGE_COORD_Y); MD_out.addLabel(EMDL_PARTICLE_HELICAL_TUBE_ID); MD_out.addLabel(EMDL_ORIENT_TILT_PRIOR); MD_out.addLabel(EMDL_ORIENT_PSI_PRIOR); MD_out.addLabel(EMDL_PARTICLE_HELICAL_TRACK_LENGTH_ANGSTROM); MD_out.addLabel(EMDL_ORIENT_PSI_PRIOR_FLIP_RATIO); half_box_size_pix = box_size_pix / 2.; psi_prior_flip_ratio = UNIMODAL_PSI_PRIOR_FLIP_RATIO; if (bimodal_angular_priors) { psi_prior_flip_ratio = BIMODAL_PSI_PRIOR_FLIP_RATIO; } fin.open(fn_in.c_str(), std::ios_base::in); if (fin.fail()) REPORT_ERROR("helix.cpp::convertEmanHelicalSegmentCoordsToMetaDataTable(): Cannot open input file " + fn_in); line.clear(); len_pix = 0.; nr_segments_on_edges = nr_segments = nr_tubes = 0; while (getline(fin, line, '\n')) { if (line.size() < 2) // End of file break; // Find lines which start with "#helix: (" int char_offset = 9; if ((line.substr(0, char_offset + 1)).find("#helix: (") != std::string::npos) { nr_tubes++; // Get psi angle RFLOAT x1, y1, x2, y2; char cdummy; std::istringstream ss(line.substr(char_offset)); //std::cout << line.substr(char_offset) << std::endl; ss >> x1 >> cdummy >> y1 >> cdummy >> cdummy >> cdummy >> x2 >> cdummy >> y2 >> cdummy >> cdummy >> width; psi_deg = RAD2DEG(atan2(y2 - y1, x2 - x1)); len_pix = 0.; x_old = y_old = (1.1e30); continue; } else if (line[0] == '#') { continue; } // Get x, y coordinates words.clear(); tokenize(line, words); if (words.size() != 2) REPORT_ERROR("helix.cpp::convertEmanHelicalSegmentCoordsToMetaDataTable(): Invalid input file " + fn_in); x = textToFloat(words[0]); y = textToFloat(words[1]); // Accumulate the length if (x_old < (1e30)) len_pix += sqrt( (x - x_old) * (x - x_old) + (y - y_old) * (y - y_old) ); x_old = x; y_old = y; #ifdef EXCLUDE_SEGMENTS_ON_THE_EDGES // Avoid segments lying on the edges of the micrographs if ( (x < half_box_size_pix) || (x > (Xdim - half_box_size_pix)) || (y < half_box_size_pix) || (y > (Ydim - half_box_size_pix)) ) { nr_segments_on_edges++; std::cerr << " WARNING: Particle at (" << x << ", " << y << ") in coordinate file " << fn_in << " is NOT extracted because it is too close to the edge." << std::flush; std::cerr << " Box_size_pix = " << box_size_pix << ", Dimensions = " << Xdim << " * " << Ydim << std::endl; continue; } #endif nr_segments++; MD_out.addObject(); MD_out.setValue(EMDL_IMAGE_COORD_X, x); MD_out.setValue(EMDL_IMAGE_COORD_Y, y); MD_out.setValue(EMDL_PARTICLE_HELICAL_TUBE_ID, nr_tubes); MD_out.setValue(EMDL_ORIENT_TILT_PRIOR, 90.); MD_out.setValue(EMDL_ORIENT_PSI_PRIOR, -psi_deg); MD_out.setValue(EMDL_PARTICLE_HELICAL_TRACK_LENGTH_ANGSTROM, pixel_size_A * len_pix); MD_out.setValue(EMDL_ORIENT_PSI_PRIOR_FLIP_RATIO, psi_prior_flip_ratio); line.clear(); } fin.close(); total_segments = nr_segments; total_tubes = nr_tubes; std::cout << "Input EMAN2 coordinates = " << fn_in.c_str() << ", micrograph size = " << Xdim << " * " << Ydim << ", box size = " << box_size_pix << ", tubes = " << nr_tubes << ", " << nr_segments_on_edges << " segments excluded, " << nr_segments << " segments left." << std::endl; } void convertEmanHelicalTubeCoordsToMetaDataTable( FileName& fn_in, MetaDataTable& MD_out, int& total_segments, int& total_tubes, int nr_asu, RFLOAT rise_A, RFLOAT pixel_size_A, RFLOAT Xdim, RFLOAT Ydim, RFLOAT box_size_pix, bool bimodal_angular_priors, bool cut_into_segments) { int nr_segments, nr_tubes; RFLOAT xp, yp, dx, dy, x1, y1, x2, y2, psi_deg, psi_rad, half_box_size_pix, len_pix, psi_prior_flip_ratio; std::ifstream fin; std::string line; std::vector words; if ( (box_size_pix < 2) || (Xdim < box_size_pix) || (Ydim < box_size_pix)) REPORT_ERROR("helix.cpp::convertEmanHelicalTubeCoordsToMetaDataTable(): Wrong dimensions or box size!"); if (pixel_size_A < 0.001) REPORT_ERROR("helix.cpp::convertEmanHelicalTubeCoordsToMetaDataTable(): Invalid pixel size!"); RFLOAT step_pix = ((RFLOAT)(nr_asu)) * rise_A / pixel_size_A; if ( (nr_asu < 1) || (rise_A < 0.001) || (step_pix < 0.001) ) REPORT_ERROR("helix.cpp::convertEmanHelicalTubeCoordsToMetaDataTable(): Invalid helical rise or number of asymmetrical units!"); // Header of output file MD_out.clear(); MD_out.addLabel(EMDL_IMAGE_COORD_X); MD_out.addLabel(EMDL_IMAGE_COORD_Y); MD_out.addLabel(EMDL_PARTICLE_HELICAL_TUBE_ID); MD_out.addLabel(EMDL_ORIENT_TILT_PRIOR); MD_out.addLabel(EMDL_ORIENT_PSI_PRIOR); MD_out.addLabel(EMDL_PARTICLE_HELICAL_TRACK_LENGTH_ANGSTROM); MD_out.addLabel(EMDL_ORIENT_PSI_PRIOR_FLIP_RATIO); half_box_size_pix = box_size_pix / 2.; psi_prior_flip_ratio = UNIMODAL_PSI_PRIOR_FLIP_RATIO; if (bimodal_angular_priors) { psi_prior_flip_ratio = BIMODAL_PSI_PRIOR_FLIP_RATIO; } fin.open(fn_in.c_str(), std::ios_base::in); if (fin.fail()) REPORT_ERROR("helix.cpp::convertEmanHelicalTubeCoordsToMetaDataTable(): Cannot open input file " + fn_in); nr_tubes = nr_segments = 0; line.clear(); while (getline(fin, line, '\n')) { RFLOAT width1, width2, width3, width4; int tag; // Read in new helical tube if (line.size() < 2) // End of file break; // Get x1, y1 words.clear(); tokenize(line, words); if (words.size() != 5) REPORT_ERROR("helix.cpp::convertEmanHelicalTubeCoordsToMetaDataTable(): Invalid input file " + fn_in); x1 = textToFloat(words[0]); y1 = textToFloat(words[1]); width1 = textToFloat(words[2]); width2 = textToFloat(words[3]); tag = textToInteger(words[4]); if ( (tag != (-1)) || (fabs(width1 - width2) > 0.01) ) REPORT_ERROR("helix.cpp::convertEmanHelicalTubeCoordsToMetaDataTable(): Invalid input file " + fn_in); x1 += width1 / 2.; y1 += width1 / 2.; // Get x2, y2 line.clear(); getline(fin, line, '\n'); words.clear(); tokenize(line, words); if (words.size() != 5) REPORT_ERROR("helix.cpp::convertEmanHelicalTubeCoordsToMetaDataTable(): Invalid input file " + fn_in); x2 = textToFloat(words[0]); y2 = textToFloat(words[1]); width3 = textToFloat(words[2]); width4 = textToFloat(words[3]); tag = textToInteger(words[4]); if ( (tag != (-2)) || (fabs(width3 - width4) > 0.01) || (fabs(width3 - width1) > 0.01) ) REPORT_ERROR("helix.cpp::convertEmanHelicalTubeCoordsToMetaDataTable(): Invalid input file " + fn_in); x2 += width3 / 2.; y2 += width3 / 2.; nr_tubes++; psi_rad = atan2(y2 - y1, x2 - x1); psi_deg = RAD2DEG(psi_rad); dx = step_pix * cos(psi_rad); dy = step_pix * sin(psi_rad); // Truncate both ends of the helical tube RFLOAT trans_offset = 0.; x1 += ((width1 / 2.) - trans_offset) * cos(psi_rad); y1 += ((width1 / 2.) - trans_offset) * sin(psi_rad); x2 -= ((width1 / 2.) - trans_offset) * cos(psi_rad); y2 -= ((width1 / 2.) - trans_offset) * sin(psi_rad); if (!cut_into_segments) { MD_out.addObject(); MD_out.setValue(EMDL_IMAGE_COORD_X, ((x1 + x2) / 2.)); MD_out.setValue(EMDL_IMAGE_COORD_Y, ((y1 + y2) / 2.)); MD_out.setValue(EMDL_PARTICLE_HELICAL_TUBE_ID, nr_tubes); MD_out.setValue(EMDL_ORIENT_TILT_PRIOR, 90.); MD_out.setValue(EMDL_ORIENT_PSI_PRIOR, -psi_deg); MD_out.setValue(EMDL_PARTICLE_HELICAL_TRACK_LENGTH_ANGSTROM, 0.); MD_out.setValue(EMDL_ORIENT_PSI_PRIOR_FLIP_RATIO, psi_prior_flip_ratio); continue; } // Calculate coordinates for all segments xp = x1 - (dx * 0.99); yp = y1 - (dy * 0.99); len_pix = -step_pix; while (1) { xp += dx; yp += dy; len_pix += step_pix; if ( ((xp > x1) && (xp > x2)) || ((xp < x1) && (xp < x2)) || ((yp > y1) && (yp > y2)) || ((yp < y1) && (yp < y2)) ) { break; } else { #ifdef EXCLUDE_SEGMENTS_ON_THE_EDGES // Avoid segments lying on the edges of the micrographs if ( (xp < half_box_size_pix) || (xp > (Xdim - half_box_size_pix)) || (yp < half_box_size_pix) || (yp > (Ydim - half_box_size_pix)) ) { // Extract from filament start-end coordinates. It is not necessary to notify the user. //std::cerr << " WARNING: Particle at (" << xp << ", " << yp << ") in coordinate file " << fn_in << " is NOT extracted because it is too close to the edge." << std::flush; //std::cerr << " Box_size_pix = " << box_size_pix << ", Dimensions = " << Xdim << " * " << Ydim << std::endl; continue; } #endif MD_out.addObject(); MD_out.setValue(EMDL_IMAGE_COORD_X, xp); MD_out.setValue(EMDL_IMAGE_COORD_Y, yp); MD_out.setValue(EMDL_PARTICLE_HELICAL_TUBE_ID, nr_tubes); MD_out.setValue(EMDL_ORIENT_TILT_PRIOR, 90.); MD_out.setValue(EMDL_ORIENT_PSI_PRIOR, -psi_deg); MD_out.setValue(EMDL_PARTICLE_HELICAL_TRACK_LENGTH_ANGSTROM, pixel_size_A * len_pix); MD_out.setValue(EMDL_ORIENT_PSI_PRIOR_FLIP_RATIO, psi_prior_flip_ratio); nr_segments++; } } line.clear(); } fin.close(); total_segments = MD_out.numberOfObjects(); total_tubes = nr_tubes; std::cout << "Input EMAN2 coordinates = " << fn_in.c_str() << ", micrograph size = " << Xdim << " * " << Ydim << ", box size = " << box_size_pix << ", tubes = " << nr_tubes << ", segments = " << MD_out.numberOfObjects() << "." << std::endl; } /* void divideHelicalSegmentsFromMultipleMicrographsIntoRandomHalves( FileName& fn_in, FileName& fn_out, int random_seed) { RFLOAT ratio; std::string mic_name; int nr_swaps, nr_segments_subset1, nr_segments_subset2, helical_tube_id; bool divide_according_to_helical_tube_id; MetaDataTable MD; std::map map_mics; std::map::const_iterator ii_map; std::vector > vec_mics; if (fn_in == fn_out) REPORT_ERROR("helix.cpp::divideHelicalSegmentsFromMultipleMicrographsIntoRandomHalves(): File names error!"); MD.clear(); std::cout << " Loading input file..." << std::endl; MD.read(fn_in); init_random_generator(random_seed); if (!MD.containsLabel(EMDL_MICROGRAPH_NAME)) REPORT_ERROR("helix.cpp::divideHelicalSegmentsFromMultipleMicrographsIntoRandomHalves(): Input MetadataTable should contain rlnMicrographName!"); if (!MD.containsLabel(EMDL_PARTICLE_RANDOM_SUBSET)) MD.addLabel(EMDL_PARTICLE_RANDOM_SUBSET); if (MD.containsLabel(EMDL_PARTICLE_HELICAL_TUBE_ID)) divide_according_to_helical_tube_id = true; else divide_according_to_helical_tube_id = false; // Count micrograph names map_mics.clear(); FOR_ALL_OBJECTS_IN_METADATA_TABLE(MD) { mic_name.clear(); MD.getValue(EMDL_MICROGRAPH_NAME, mic_name); if (divide_according_to_helical_tube_id) { MD.getValue(EMDL_PARTICLE_HELICAL_TUBE_ID, helical_tube_id); if (helical_tube_id < 1) REPORT_ERROR("helix.cpp::divideHelicalSegmentsFromMultipleMicrographsIntoRandomHalves(): Helical tube ID should be positive integer!"); mic_name += std::string("_TUBEID_"); mic_name += std::string(integerToString(helical_tube_id)); } if ((map_mics.insert(std::make_pair(mic_name, 1))).second == false) map_mics[mic_name]++; } vec_mics.clear(); for (ii_map = map_mics.begin(); ii_map != map_mics.end(); ii_map++) vec_mics.push_back(*ii_map); if (random_seed != 0) { // Randomise // 1. Randomise total number of swaps needed nr_swaps = ROUND(rnd_unif(vec_mics.size(), 2. * vec_mics.size())); // DEBUG if (divide_according_to_helical_tube_id) std::cout << " Helical tubes= " << vec_mics.size() << ", nr_swaps= " << nr_swaps << std::endl; else std::cout << " Micrographs= " << vec_mics.size() << ", nr_swaps= " << nr_swaps << std::endl; // 2. Perform swaps for (int ii = 0; ii < nr_swaps; ii++) { int ptr_a, ptr_b; std::pair tmp; ptr_a = ROUND(rnd_unif(0, vec_mics.size())); ptr_b = ROUND(rnd_unif(0, vec_mics.size())); if ( (ptr_a == ptr_b) || (ptr_a < 0 ) || (ptr_b < 0) || (ptr_a >= vec_mics.size()) || (ptr_b >= vec_mics.size()) ) continue; tmp = vec_mics[ptr_a]; vec_mics[ptr_a] = vec_mics[ptr_b]; vec_mics[ptr_b] = tmp; // DEBUG //std::cout << " Swap mic_id= " << ptr_a << " with mic_id= " << ptr_b << "." << std::endl; } } // Divide micrographs into halves map_mics.clear(); nr_segments_subset1 = nr_segments_subset2 = 0; for (int ii = 0; ii < vec_mics.size(); ii++) { if (nr_segments_subset1 < nr_segments_subset2) { nr_segments_subset1 += vec_mics[ii].second; vec_mics[ii].second = 1; } else { nr_segments_subset2 += vec_mics[ii].second; vec_mics[ii].second = 2; } map_mics.insert(vec_mics[ii]); } if ( (nr_segments_subset1 < 1) || (nr_segments_subset2 < 1) ) REPORT_ERROR("helix.cpp::divideHelicalSegmentsFromMultipleMicrographsIntoRandomHalves(): Number of helical segments from one of the two half sets is 0!"); ratio = (RFLOAT(nr_segments_subset1) / RFLOAT(nr_segments_subset2)); if ( (ratio > 1.5) || ( (1. / ratio) > 1.5) ) REPORT_ERROR("helix.cpp::divideHelicalSegmentsFromMultipleMicrographsIntoRandomHalves(): Numbers of helical segments from two half sets are extremely unbalanced!"); FOR_ALL_OBJECTS_IN_METADATA_TABLE(MD) { mic_name.clear(); MD.getValue(EMDL_MICROGRAPH_NAME, mic_name); if (divide_according_to_helical_tube_id) { MD.getValue(EMDL_PARTICLE_HELICAL_TUBE_ID, helical_tube_id); if (helical_tube_id < 1) REPORT_ERROR("helix.cpp::divideHelicalSegmentsFromMultipleMicrographsIntoRandomHalves(): Helical tube ID should be positive integer!"); mic_name += std::string("_TUBEID_"); mic_name += std::string(integerToString(helical_tube_id)); } MD.setValue(EMDL_PARTICLE_RANDOM_SUBSET, map_mics[mic_name]); } // DEBUG std::cout << " Helical segments in two half sets = " << nr_segments_subset1 << ", " << nr_segments_subset2 << std::endl; std::cout << " Writing output file..." << std::endl; MD.write(fn_out); std::cout << " Done!" << std::endl; return; } */ void makeHelicalReference2D( MultidimArray& out, int box_size, RFLOAT particle_diameter_A, RFLOAT tube_diameter_A, RFLOAT pixel_size_A, bool is_tube_white) { RFLOAT particle_diameter_pix, tube_diameter_pix, p2, dist2, r2, t2; if (pixel_size_A < 0.0001) REPORT_ERROR("helix.cpp::makeHelicalReference2D(): Invalid pixel size!"); particle_diameter_pix = particle_diameter_A / pixel_size_A; tube_diameter_pix = tube_diameter_A / pixel_size_A; if (box_size < 10) REPORT_ERROR("helix.cpp::makeHelicalReference2D(): Invalid box size!"); if ( (particle_diameter_pix < 2.) || (particle_diameter_pix > box_size) ) REPORT_ERROR("helix.cpp::makeHelicalReference2D(): Invalid particle diameter!"); if ( (tube_diameter_pix < 1.) || (tube_diameter_pix > particle_diameter_pix) ) REPORT_ERROR("helix.cpp::makeHelicalReference2D(): Invalid tube diameter!"); r2 = (particle_diameter_pix / 2.) * (particle_diameter_pix / 2.); t2 = (tube_diameter_pix / 2.) * (tube_diameter_pix / 2.); out.clear(); out.resize(box_size, box_size); out.initZeros(); out.setXmippOrigin(); FOR_ALL_ELEMENTS_IN_ARRAY2D(out) { dist2 = (RFLOAT)(i * i + j * j); p2 = (RFLOAT)(j * j); if ( (dist2 < r2) && (p2 < t2) ) { if (is_tube_white) A2D_ELEM(out, i, j) = 1.; else A2D_ELEM(out, i, j) = (-1.); } } return; }; /* void makeHelicalReference3D( MultidimArray& out, int box_size, RFLOAT pixel_size_A, RFLOAT twist_deg, RFLOAT rise_A, RFLOAT tube_diameter_A, RFLOAT particle_diameter_A, int sym_Cn) { RFLOAT rise_pix, tube_diameter_pix, particle_diameter_pix, particle_radius_pix; int particle_radius_max_pix; Matrix2D matrix1, matrix2; Matrix1D vec0, vec1, vec2; out.clear(); if (box_size < 5) REPORT_ERROR("helix.cpp::makeHelicalReference3D(): Box size should be larger than 5!"); if (pixel_size_A < 0.001) REPORT_ERROR("helix.cpp::makeHelicalReference3D(): Pixel size (in Angstroms) should be larger than 0.001!"); if ( (fabs(twist_deg) < 0.01) || (fabs(twist_deg) > 179.99) || ((rise_A / pixel_size_A) < 0.001) ) REPORT_ERROR("helix.cpp::makeHelicalReference3D(): Wrong helical twist or rise!"); if (sym_Cn < 1) REPORT_ERROR("helix.cpp::makeHelicalReference3D(): Rotation symmetry Cn is invalid (n should be positive integer)!"); rise_pix = rise_A / pixel_size_A; tube_diameter_pix = tube_diameter_A / pixel_size_A; particle_diameter_pix = particle_diameter_A / pixel_size_A; particle_radius_pix = particle_diameter_pix / 2.; particle_radius_max_pix = (CEIL(particle_diameter_pix / 2.)) + 1; if (particle_diameter_pix < 2.) REPORT_ERROR("helix.cpp::makeHelicalReference3D(): Particle diameter should be larger than 2 pixels!"); if ( (tube_diameter_pix < 0.001) || (tube_diameter_pix > (RFLOAT)(box_size)) ) REPORT_ERROR("helix.cpp::makeHelicalReference3D(): Tube diameter should be larger than 1 pixel and smaller than box size!"); out.resize(box_size, box_size, box_size); out.initZeros(); out.setXmippOrigin(); RFLOAT x0, y0, z0; x0 = tube_diameter_pix / 2.; y0 = 0.; z0 = (RFLOAT)(FIRST_XMIPP_INDEX(box_size)); vec0.clear(); vec0.resize(2); XX(vec0) = x0; YY(vec0) = y0; vec1.clear(); vec1.resize(2); vec2.clear(); vec2.resize(2); for (int id = 0; ;id++) { RFLOAT rot1_deg, x1, y1, z1; rot1_deg = (RFLOAT)(id) * twist_deg; rotation2DMatrix(rot1_deg, matrix1, false); vec1 = matrix1 * vec0; x1 = XX(vec1); y1 = YY(vec1); z1 = z0 + (RFLOAT)(id) * rise_pix; if (z1 > LAST_XMIPP_INDEX(box_size)) break; for (int Cn = 0; Cn < sym_Cn; Cn++) { RFLOAT rot2_deg, x2, y2, z2; rot2_deg = (360.) * (RFLOAT)(Cn) / (RFLOAT)(sym_Cn); rotation2DMatrix(rot2_deg, matrix2, false); vec2 = matrix2 * vec1; x2 = XX(vec2); y2 = YY(vec2); z2 = z1; for (int dx = -particle_radius_max_pix; dx <= particle_radius_max_pix; dx++) { for (int dy = -particle_radius_max_pix; dy <= particle_radius_max_pix; dy++) { for (int dz = -particle_radius_max_pix; dz <= particle_radius_max_pix; dz++) { RFLOAT _x, _y, _z, dist, val_old, val_new; int x3, y3, z3; x3 = ROUND(x2) + dx; y3 = ROUND(y2) + dy; z3 = ROUND(z2) + dz; if ( (x3 < FIRST_XMIPP_INDEX(box_size)) || (x3 > LAST_XMIPP_INDEX(box_size)) || (y3 < FIRST_XMIPP_INDEX(box_size)) || (y3 > LAST_XMIPP_INDEX(box_size)) || (z3 < FIRST_XMIPP_INDEX(box_size)) || (z3 > LAST_XMIPP_INDEX(box_size)) ) continue; _x = (RFLOAT)(x3) - x2; _y = (RFLOAT)(y3) - y2; _z = (RFLOAT)(z3) - z2; dist = sqrt(_x * _x + _y * _y + _z * _z); if (dist > particle_radius_pix) continue; val_old = A3D_ELEM(out, z3, y3, x3); val_new = 0.5 + 0.5 * cos(PI * dist / particle_radius_pix); if (val_new > val_old) A3D_ELEM(out, z3, y3, x3) = val_new; } } } } } return; } */ void makeHelicalReference3DWithPolarity( MultidimArray& out, int box_size, RFLOAT pixel_size_A, RFLOAT twist_deg, RFLOAT rise_A, RFLOAT tube_diameter_A, RFLOAT particle_diameter_A, RFLOAT cyl_diameter_A, RFLOAT topbottom_ratio, int sym_Cn, int nr_filaments_helix_with_seam) { RFLOAT rise_pix, tube_diameter_pix, particle_diameter_pix, particle_radius_pix, cyl_radius_pix, top_radius_pix, bottom_radius_pix; int particle_radius_max_pix; bool append_additional_densities = false; Matrix2D matrix1, matrix2; Matrix1D vec0, vec1, vec2; out.clear(); if (box_size < 5) REPORT_ERROR("helix.cpp::makeHelicalReference3DWithPolarity(): Box size should be larger than 5!"); if (pixel_size_A < 0.001) REPORT_ERROR("helix.cpp::makeHelicalReference3DWithPolarity(): Pixel size (in Angstroms) should be larger than 0.001!"); if ( (fabs(twist_deg) > 179.99) || ((rise_A / pixel_size_A) < 0.001) ) REPORT_ERROR("helix.cpp::makeHelicalReference3DWithPolarity(): Wrong helical twist or rise!"); if (sym_Cn < 1) REPORT_ERROR("helix.cpp::makeHelicalReference3DWithPolarity(): Rotation symmetry Cn is invalid (n should be positive integer)!"); if ( (topbottom_ratio < 0.) || (topbottom_ratio > 1.) ) REPORT_ERROR("helix.cpp::makeHelicalReference3DWithPolarity(): Top-bottom width ratio should be 0~1!"); if ( (nr_filaments_helix_with_seam > 1) && (sym_Cn != 1) ) REPORT_ERROR("helix.cpp::makeHelicalReference3DWithPolarity(): Set Cn point group symmetry to 1 for a helix with seam!"); if ( (nr_filaments_helix_with_seam > 1) && (!(topbottom_ratio > 0.9999)) ) REPORT_ERROR("helix.cpp::makeHelicalReference3DWithPolarity(): Set top-bottom width ratio to 1 for a helix with seam!"); rise_pix = rise_A / pixel_size_A; tube_diameter_pix = tube_diameter_A / pixel_size_A; particle_diameter_pix = particle_diameter_A / pixel_size_A; particle_radius_pix = particle_diameter_pix / 2.; particle_radius_max_pix = (CEIL(particle_diameter_pix / 2.)) + 1; top_radius_pix = cyl_radius_pix = 0.5 * cyl_diameter_A / pixel_size_A; bottom_radius_pix = top_radius_pix * topbottom_ratio; if (particle_diameter_pix < 2.) REPORT_ERROR("helix.cpp::makeHelicalReference3DWithPolarity(): Particle diameter should be larger than 2 pixels!"); if ( (tube_diameter_pix < 0.001) || (tube_diameter_pix > (RFLOAT)(box_size)) ) REPORT_ERROR("helix.cpp::makeHelicalReference3DWithPolarity(): Tube diameter should be larger than 1 pixel and smaller than box size!"); if ( (cyl_radius_pix < 1.) || (cyl_radius_pix > particle_radius_pix) ) REPORT_ERROR("helix.cpp::makeHelicalReference3DWithPolarity(): Cylindrical diameter should be > 1 pixel and < particle diameter!"); out.resize(box_size, box_size, box_size); out.initZeros(); out.setXmippOrigin(); RFLOAT x0, y0, z0; // OLD //x0 = tube_diameter_pix / 2.; //y0 = 0.; // NEW - To generate references with Dn symmetry. TODO: Am I doing what I want? z0 = rise_pix * FLOOR((RFLOAT)(FIRST_XMIPP_INDEX(box_size)) / rise_pix); x0 = (tube_diameter_pix / 2.) * cos( (PI * twist_deg * z0) / (rise_pix * 180.) ); y0 = (tube_diameter_pix / 2.) * sin( (PI * twist_deg * z0) / (rise_pix * 180.) ); vec0.clear(); vec0.resize(2); XX(vec0) = x0; YY(vec0) = y0; vec1.clear(); vec1.resize(2); vec2.clear(); vec2.resize(2); append_additional_densities = false; for (int id = 0; ;id++) { RFLOAT rot1_deg, x1, y1, z1; rot1_deg = (RFLOAT)(id) * twist_deg; rotation2DMatrix(rot1_deg, matrix1, false); vec1 = matrix1 * vec0; x1 = XX(vec1); y1 = YY(vec1); z1 = z0 + (RFLOAT)(id) * rise_pix; if (z1 > LAST_XMIPP_INDEX(box_size)) break; for (int Cn = 0; Cn < sym_Cn; Cn++) { RFLOAT rot2_deg, x2, y2, z2; rot2_deg = (360.) * (RFLOAT)(Cn) / (RFLOAT)(sym_Cn); rotation2DMatrix(rot2_deg, matrix2, false); vec2 = matrix2 * vec1; x2 = XX(vec2); y2 = YY(vec2); z2 = z1; for (int dz = -particle_radius_max_pix; dz <= particle_radius_max_pix; dz++) { RFLOAT thres_xy = (top_radius_pix - bottom_radius_pix) * 0.5 * dz / particle_radius_pix + (top_radius_pix + bottom_radius_pix) / 2.; for (int dy = -particle_radius_max_pix; dy <= particle_radius_max_pix; dy++) { for (int dx = -particle_radius_max_pix; dx <= particle_radius_max_pix; dx++) { RFLOAT _x, _y, _z, dist, val_old, val_new; int x3, y3, z3; x3 = ROUND(x2) + dx; y3 = ROUND(y2) + dy; z3 = ROUND(z2) + dz; if ( (x3 < FIRST_XMIPP_INDEX(box_size)) || (x3 > LAST_XMIPP_INDEX(box_size)) || (y3 < FIRST_XMIPP_INDEX(box_size)) || (y3 > LAST_XMIPP_INDEX(box_size)) || (z3 < FIRST_XMIPP_INDEX(box_size)) || (z3 > LAST_XMIPP_INDEX(box_size)) ) continue; _x = (RFLOAT)(x3) - x2; _y = (RFLOAT)(y3) - y2; _z = (RFLOAT)(z3) - z2; dist = sqrt(_x * _x + _y * _y + _z * _z); if (dist > particle_radius_pix) continue; val_old = A3D_ELEM(out, z3, y3, x3); val_new = 0.; // Draw the shape you want! if (topbottom_ratio > 0.9999) // Without polarity. Thus spheres. { val_new = 0.5 + 0.5 * cos(PI * dist / particle_radius_pix); if (val_new > val_old) A3D_ELEM(out, z3, y3, x3) = val_new; } else // With polarity { dist = sqrt(_x * _x + _y * _y); if (dist < thres_xy) { val_new = 0.5 + 0.5 * cos(PI * dist / thres_xy); val_new *= 0.5 + 0.5 * cos(PI * 0.5 * _z / particle_radius_pix); // something arbitrary if (val_new > val_old) A3D_ELEM(out, z3, y3, x3) = val_new; } } } } } } if (nr_filaments_helix_with_seam > 1) { if (id % nr_filaments_helix_with_seam == 0) append_additional_densities = (append_additional_densities == true) ? (false) : (true); if (append_additional_densities) { x1 *= (tube_diameter_pix + particle_diameter_pix) / tube_diameter_pix; y1 *= (tube_diameter_pix + particle_diameter_pix) / tube_diameter_pix; z1 += particle_diameter_pix / 2.; for (int dz = -particle_radius_max_pix / 2.; dz <= particle_radius_max_pix / 2.; dz++) { for (int dy = -particle_radius_max_pix / 2.; dy <= particle_radius_max_pix / 2.; dy++) { for (int dx = -particle_radius_max_pix / 2.; dx <= particle_radius_max_pix / 2.; dx++) { RFLOAT _x, _y, _z, dist, val_old, val_new; int x2, y2, z2; x2 = ROUND(x1 + dx); y2 = ROUND(y1 + dy); z2 = ROUND(z1 + dz); if ( (x2 < FIRST_XMIPP_INDEX(box_size)) || (x2 > LAST_XMIPP_INDEX(box_size)) || (y2 < FIRST_XMIPP_INDEX(box_size)) || (y2 > LAST_XMIPP_INDEX(box_size)) || (z2 < FIRST_XMIPP_INDEX(box_size)) || (z2 > LAST_XMIPP_INDEX(box_size)) ) continue; _x = (RFLOAT)(x2) - x1; _y = (RFLOAT)(y2) - y1; _z = (RFLOAT)(z2) - z1; dist = sqrt(_x * _x + _y * _y + _z * _z); if (dist > (particle_radius_pix / 2.)) continue; val_old = A3D_ELEM(out, z2, y2, x2); val_new = 0.; val_new = 0.5 + 0.5 * cos(2. * PI * dist / particle_radius_pix); if (val_new > val_old) A3D_ELEM(out, z2, y2, x2) = val_new; } } }// End of looping over x,y,z } }// End of nr_filaments_helix_with_seam > 1 } return; } void divideStarFile( FileName& fn_in, int nr) { FileName fn_out; MetaDataTable MD_in, MD; int total_lines, nr_lines, line_id, file_id; if (fn_in.getExtension() != "star") REPORT_ERROR("helix.cpp::divideStarFile: Input file should be in .star format!"); if ( (nr < 2) || (nr > 999999) ) REPORT_ERROR("helix.cpp::divideStarFile: The number of output files should be within range 2~999999!"); std::cout << " Loading input file: " << fn_in << " ..." << std::endl; MD_in.clear(); MD_in.read(fn_in); std::cout << " Input file loaded." << std::endl; total_lines = MD_in.numberOfObjects(); if (total_lines < nr) REPORT_ERROR("helix.cpp::divideStarFile: The number of total objects is smaller than the number of output files!"); nr_lines = total_lines / nr; std::cout << " Total objects = " << total_lines << ", number of output files = " << nr << std::endl; std::cout << " Writing output files..." << std::endl; line_id = 0; file_id = 0; MD.clear(); FOR_ALL_OBJECTS_IN_METADATA_TABLE(MD_in) { line_id++; MD.addObject(MD_in.getObject()); if ( ((line_id % nr_lines) == 0) && ((total_lines - line_id) > nr_lines) ) { file_id++; fn_out = fn_in.withoutExtension() + "_sub" + integerToString(file_id, 6, '0'); fn_out = fn_out.addExtension("star"); MD.write(fn_out); MD.clear(); } } if (MD.numberOfObjects() != 0) { file_id++; fn_out = fn_in.withoutExtension() + "_sub" + integerToString(file_id, 6, '0'); fn_out = fn_out.addExtension("star"); MD.write(fn_out); MD.clear(); } std::cout << " Done!" << std::endl; return; } void mergeStarFiles(FileName& fn_in) { int file_id; std::vector fns_list; MetaDataTable MD_combined, MD_in; FileName fn_root, fn_out; fns_list.clear(); fn_root = "*" + fn_in + "*"; if (fn_root.globFiles(fns_list) < 2) REPORT_ERROR("helix.cpp::combineStarFiles: Only 0 or 1 input file! There is no need to combine!"); for (file_id = 0; file_id < fns_list.size(); file_id++) { if (fns_list[file_id].getExtension() != "star") REPORT_ERROR("helix.cpp::combineStarFiles: Input files should have STAR extension!"); } std::cout << " Combining STAR files: " << std::endl; std::cout << " BEWARE: All STAR files should contain the same header!" << std::endl; for (file_id = 0; file_id < fns_list.size(); file_id++) std::cout << " " << fns_list[file_id] << std::endl; std::cout << " Loading input files..." << std::endl; MD_combined.clear(); for (file_id = 0; file_id < fns_list.size(); file_id++) { MD_in.clear(); MD_in.read(fns_list[file_id]); FOR_ALL_OBJECTS_IN_METADATA_TABLE(MD_in) { MD_combined.addObject(MD_in.getObject()); } std::cout << " " << MD_combined.numberOfObjects() << " objects loaded." << std::endl; } std::cout << " Writing the combined output file..." << std::endl; fn_out = fn_in + "_combined.star"; MD_combined.write(fn_out); std::cout << " Done!" << std::endl; return; } void sortHelicalTubeID(MetaDataTable& MD) { std::string str_particle_fullname, str_particle_name, str_comment, str_particle_id; int int_tube_id, nr_tubes; std::vector list; std::set tubes; long int MDobjectID; if ( (!MD.containsLabel(EMDL_IMAGE_NAME)) || (!MD.containsLabel(EMDL_ORIENT_TILT)) || (!MD.containsLabel(EMDL_ORIENT_PSI)) || (!MD.containsLabel(EMDL_ORIENT_TILT_PRIOR)) || (!MD.containsLabel(EMDL_ORIENT_PSI_PRIOR)) || (!MD.containsLabel(EMDL_PARTICLE_HELICAL_TUBE_ID)) || (!MD.containsLabel(EMDL_PARTICLE_HELICAL_TRACK_LENGTH_ANGSTROM)) || (!MD.containsLabel(EMDL_ORIENT_PSI_PRIOR_FLIP_RATIO)) ) REPORT_ERROR("helix.cpp::sortHelicalTubeID: Labels of helical prior information are missing!"); int_tube_id = 0; FOR_ALL_OBJECTS_IN_METADATA_TABLE(MD) { MD.getValue(EMDL_IMAGE_NAME, str_particle_fullname); MD.getValue(EMDL_PARTICLE_HELICAL_TUBE_ID, int_tube_id); str_comment = str_particle_name + "@TUBE@" + integerToString(int_tube_id, 6); tubes.insert(str_comment); str_particle_name = str_particle_fullname.substr(str_particle_fullname.find("@") + 1); str_particle_id = str_particle_fullname.substr(0, str_particle_fullname.find("@")); str_comment = str_particle_name + "@TUBE@" + integerToString(int_tube_id, 6) + "@PARTICLE@" + str_particle_id; // DEBUG //std::cout << str_comment << std::endl; MD.setValue(EMDL_IMAGE_NAME, str_comment); } MD.newSort(EMDL_IMAGE_NAME); nr_tubes = tubes.size(); tubes.clear(); FOR_ALL_OBJECTS_IN_METADATA_TABLE(MD) { MD.getValue(EMDL_IMAGE_NAME, str_comment); str_particle_id = str_comment.substr(str_comment.find("@PARTICLE@") + 10); str_particle_name = str_comment.substr(0, str_comment.find("@TUBE@")); str_comment = str_particle_id + "@" + str_particle_name; MD.setValue(EMDL_IMAGE_NAME, str_comment); } std::vector dummy; updatePriorsForHelicalReconstruction( MD, 1.,dummy, dummy, 1, false, false, 0., 0., 0., 1., false, 1); list.clear(); return; } void simulateHelicalSegments( bool is_3d_tomo, FileName& fn_vol_in, FileName& fn_star_out, RFLOAT white_noise, int new_box_size, int nr_subunits, int nr_asu, int nr_tubes, bool do_bimodal_searches, RFLOAT cyl_outer_diameter_A, RFLOAT angpix, RFLOAT rise_A, RFLOAT twist_deg, RFLOAT sigma_psi, RFLOAT sigma_tilt, RFLOAT sigma_offset, int random_seed) { Image img; int nr_segments, tube_id; RFLOAT rot, psi, tilt, new_psi, new_tilt, xoff, yoff, zoff, new_xoff, new_yoff, new_zoff, step_pix, psi_flip_ratio, len_pix; MetaDataTable MD; FileName fn_mic, fn_star_out_full, fn_star_out_priors, fn_star_out_wopriors, fn_ext; std::ofstream fout; std::string command; if ( (fn_vol_in.contains("temporary")) || (fn_star_out.contains("temporary")) ) REPORT_ERROR("helix.cpp::simulateHelicalSegments: Avoid 'temporary' in the input and output file names!"); if ( (is_3d_tomo) && (fn_star_out.contains("subtomo")) ) REPORT_ERROR("helix.cpp::simulateHelicalSegments: Avoid 'subtomo' in the input and output file names!"); if (fn_vol_in.getExtension() != "mrc") REPORT_ERROR("helix.cpp::simulateHelicalSegments: Input 3D volume should be in .mrc format!"); img.read(fn_vol_in, false); // Read the header only! if ( (img().getDim() != 3) || (XSIZE(img()) != YSIZE(img())) || (YSIZE(img()) != ZSIZE(img())) || (XSIZE(img()) < 10) ) REPORT_ERROR("helix.cpp::simulateHelicalSegments: Input volume should be a 3D cube (>10*10*10)!"); if ( (new_box_size > XSIZE(img())) || (new_box_size < 10) || (new_box_size % 2) ) REPORT_ERROR("helix.cpp::simulateHelicalSegments: Cropped box size (an even number) should be at least 10, and smaller than the box size of the input 3D volume!"); if (angpix < 0.001) REPORT_ERROR("helix.cpp::simulateHelicalSegments: Pixel size should be larger than 0.001 Angstroms!"); if ( (rise_A < 0.001) || ((rise_A / angpix) < 0.001) || ((rise_A / angpix) > (0.3333 * new_box_size)) ) REPORT_ERROR("helix.cpp::simulateHelicalSegments: Rise is smaller than 0.001 pixels or larger than 1/3 of the new (cropped) box size!"); if ( (fabs(twist_deg) < 0.001) || (fabs(twist_deg) > 180.)) REPORT_ERROR("helix.cpp::simulateHelicalSegments: Error in helical twist!"); // TODO: raise error if nr_asu<0 or too big, n too small! if ( (nr_tubes < 2) || (nr_subunits < 10) || (nr_asu < 1) || (((nr_subunits / nr_asu) / nr_tubes) < 3) || ((nr_subunits / nr_asu) > 999000) || ((nr_asu * rise_A / angpix) > new_box_size) ) REPORT_ERROR("helix.cpp::simulateHelicalSegments: Errors in the number of tubes, asymmetrical units or total subunits!"); if ( (sigma_psi > 10.) || (sigma_tilt > 10.) ) REPORT_ERROR("helix.cpp::simulateHelicalSegments: sigma_psi and sigma_tilt should not be larger than 10 degrees."); if (sigma_offset > 50.) REPORT_ERROR("helix.cpp::simulateHelicalSegments: sigma_trans should not be larger than 50 pixels."); if (fn_star_out.getExtension() != "star") REPORT_ERROR("helix.cpp::simulateHelicalSegments: Output file should be in STAR format!"); fn_star_out_full = fn_star_out.withoutExtension() + (std::string)("_info.star"); fn_star_out_priors = fn_star_out.withoutExtension() + (std::string)("_helical_priors.star"); fn_star_out_wopriors = fn_star_out.withoutExtension() + (std::string)("_no_priors.star"); if (is_3d_tomo) fout.open("simulate-3d-subtomos.sh", std::ios::out); else fout.open("simulate-2d-segments.sh", std::ios::out); if (!fout) REPORT_ERROR( (std::string)"helix.cpp::simulateHelicalSegments: Cannot write to .sh script!"); sigma_tilt = (sigma_tilt > 0.) ? (sigma_tilt) : (0.); sigma_psi = (sigma_psi > 0.) ? (sigma_psi) : (0.); sigma_offset = (sigma_offset > 0.) ? (sigma_offset) : (0.); if (random_seed < 0) random_seed = time(NULL); init_random_generator(random_seed); nr_segments = nr_subunits / nr_asu; MD.clear(); MD.addLabel(EMDL_ORIENT_ROT); MD.addLabel(EMDL_ORIENT_TILT); MD.addLabel(EMDL_ORIENT_PSI); MD.addLabel(EMDL_ORIENT_ORIGIN_X_ANGSTROM); MD.addLabel(EMDL_ORIENT_ORIGIN_Y_ANGSTROM); if (is_3d_tomo) MD.addLabel(EMDL_ORIENT_ORIGIN_Z_ANGSTROM); MD.addLabel(EMDL_ORIENT_ROT_PRIOR); MD.addLabel(EMDL_ORIENT_TILT_PRIOR); MD.addLabel(EMDL_ORIENT_PSI_PRIOR); MD.addLabel(EMDL_PARTICLE_HELICAL_TRACK_LENGTH_ANGSTROM); MD.addLabel(EMDL_ORIENT_PSI_PRIOR_FLIP_RATIO); MD.addLabel(EMDL_PARTICLE_HELICAL_TUBE_ID); MD.addLabel(EMDL_IMAGE_NAME); MD.addLabel(EMDL_MICROGRAPH_NAME); MD.addLabel(EMDL_CTF_DETECTOR_PIXEL_SIZE); MD.addLabel(EMDL_CTF_MAGNIFICATION); tube_id = 0; step_pix = nr_asu * rise_A / angpix; psi_flip_ratio = (do_bimodal_searches) ? (0.5) : (0.); for (int id = 0; id < nr_segments; id++) { if ( ( (id % (nr_segments / nr_tubes)) == 0 ) && ( (nr_segments - id) >= (nr_segments / nr_tubes) ) ) { tube_id++; len_pix = -step_pix; if (is_3d_tomo) tilt = rnd_unif(0.01, 179.99); // If realWRAP function works well, set this to 0-180. else tilt = rnd_unif(85., 95.); rot = rnd_unif(0.01, 359.99); psi = rnd_unif(-179.99, 179.99); } len_pix += step_pix; rot += twist_deg * ((RFLOAT)(nr_asu)); rot = realWRAP(rot, -180., 180.); // Does this realWRAP function work well? No... rot = (rot < -180.) ? (rot + 360.) : (rot); rot = (rot > 180.) ? (rot - 360.) : (rot); if (sigma_tilt < 0.0001) new_tilt = tilt; else new_tilt = tilt + rnd_gaus(0., sigma_tilt); new_tilt = (new_tilt < 0.) ? (-new_tilt) : (new_tilt); // Do NOT change the polarities new_tilt = (new_tilt > 180.) ? (360. - new_tilt) : (new_tilt); if (sigma_psi < 0.0001) new_psi = psi; else new_psi = psi + rnd_gaus(0., sigma_psi); new_psi = (new_psi < -180.) ? (new_psi + 360.) : (new_psi); new_psi = (new_psi > 180.) ? (new_psi - 360.) : (new_psi); xoff = yoff = zoff = new_xoff = new_yoff = new_zoff = 0.; if (sigma_offset > 0.0001) { new_xoff = (is_3d_tomo) ? (rnd_gaus(0., sigma_offset)) : (rnd_unif(-0.5 * rise_A, 0.5 * rise_A)); new_yoff = rnd_gaus(0., sigma_offset); new_zoff = (is_3d_tomo) ? (rnd_unif(-0.5 * rise_A, 0.5 * rise_A)) : (0.); transformCartesianAndHelicalCoords( new_xoff, new_yoff, new_zoff, xoff, yoff, zoff, rot, new_tilt, new_psi, (is_3d_tomo) ? (3) : (2), HELICAL_TO_CART_COORDS); } MD.addObject(); MD.setValue(EMDL_ORIENT_ROT, rot); MD.setValue(EMDL_ORIENT_TILT, new_tilt); MD.setValue(EMDL_ORIENT_PSI, new_psi); MD.setValue(EMDL_ORIENT_ORIGIN_X_ANGSTROM, xoff); MD.setValue(EMDL_ORIENT_ORIGIN_Y_ANGSTROM, yoff); if (is_3d_tomo) MD.setValue(EMDL_ORIENT_ORIGIN_Z_ANGSTROM, zoff); MD.setValue(EMDL_ORIENT_ROT_PRIOR, rot); MD.setValue(EMDL_ORIENT_TILT_PRIOR, new_tilt); MD.setValue(EMDL_ORIENT_PSI_PRIOR, new_psi); MD.setValue(EMDL_PARTICLE_HELICAL_TRACK_LENGTH_ANGSTROM, angpix * len_pix); MD.setValue(EMDL_ORIENT_PSI_PRIOR_FLIP_RATIO, psi_flip_ratio); MD.setValue(EMDL_PARTICLE_HELICAL_TUBE_ID, tube_id); if (is_3d_tomo) { fn_mic.compose((id + 1), "dummy"); fn_mic = fn_mic.beforeFirstOf("@"); fn_mic = (std::string)("subtomo-3d-") + (std::string)(fn_mic) + (std::string)(".mrc"); } else fn_mic.compose((id + 1), "segments-2d.mrcs"); MD.setValue(EMDL_IMAGE_NAME, fn_mic); if (is_3d_tomo) MD.setValue(EMDL_MICROGRAPH_NAME, (std::string)("tomogram-01.mrc")); else MD.setValue(EMDL_MICROGRAPH_NAME, (std::string)("micrograph-01.mrc")); MD.setValue(EMDL_CTF_DETECTOR_PIXEL_SIZE, 14.0); MD.setValue(EMDL_CTF_MAGNIFICATION, (140000. / angpix)); // Generate .sh script if (is_3d_tomo) { fout << "relion_project --i " << fn_vol_in << " --o temporary-image-3d.mrc --angpix " << angpix << " --rot " << rot << " --tilt " << new_tilt << " --psi " << new_psi << " --xoff " << xoff << " --yoff " << yoff << " --zoff " << zoff << " --3d_rot" << std::flush; if (white_noise > 0.) fout << " --add_noise --white_noise " << white_noise; fout << std::endl; fout << "relion_image_handler --i temporary-image-3d.mrc --o " << fn_mic << " --new_box " << new_box_size << std::endl; //fout << "rm -rf temporary-image-3d.mrc" << std::endl; } } if (is_3d_tomo) { fout << "rm -rf temporary-image-3d.mrc" << std::endl; fout << "relion_helix_toolbox --norm --i " << fn_star_out_full << " --o_root _norm --angpix " << angpix << " --cyl_outer_diameter " << cyl_outer_diameter_A << std::endl; fout << "rm -rf subtomo-3d-??????.mrc" << std::endl; } MD.write(fn_star_out_full); if (!is_3d_tomo) { fout << "relion_project --i " << fn_vol_in << " --o temporary-images-2d --angpix " << angpix << " --ang " << fn_star_out_full << std::flush; if (white_noise > 0.) fout << " --add_noise --white_noise " << white_noise; fout << std::endl; fout << "relion_image_handler --i temporary-images-2d.mrcs --o tt --new_box " << new_box_size << std::endl; fout << "rm -rf temporary-images-2d.mrcs temporary-images-2d.star" << std::endl; fout << "mv temporary-images-2d_tt.mrcs segments-2d.mrcs" << std::endl; fout << "relion_helix_toolbox --norm --i " << fn_star_out_full << " --o_root _norm --angpix " << angpix << " --cyl_outer_diameter " << cyl_outer_diameter_A << std::endl; fout << "rm -rf segments-2d.mrcs" << std::endl; } fout << "mv " << fn_star_out_full << " " << (fn_star_out_full.withoutExtension() + (std::string)(".txt")) << std::endl; fout.close(); MD.deactivateLabel(EMDL_ORIENT_ROT_PRIOR); FOR_ALL_OBJECTS_IN_METADATA_TABLE(MD) { MD.setValue(EMDL_ORIENT_ROT, 0.); if (!is_3d_tomo) { MD.setValue(EMDL_ORIENT_TILT, 90.); MD.setValue(EMDL_ORIENT_TILT_PRIOR, 90.); } MD.setValue(EMDL_ORIENT_ORIGIN_X_ANGSTROM, 0.); MD.setValue(EMDL_ORIENT_ORIGIN_Y_ANGSTROM, 0.); if (is_3d_tomo) MD.setValue(EMDL_ORIENT_ORIGIN_Z_ANGSTROM, 0.); MD.getValue(EMDL_IMAGE_NAME, fn_mic); fn_ext = fn_mic.getExtension(); fn_mic = fn_mic.withoutExtension() + "_norm." + fn_ext; MD.setValue(EMDL_IMAGE_NAME, fn_mic); } MD.write(fn_star_out_priors); FOR_ALL_OBJECTS_IN_METADATA_TABLE(MD) { MD.setValue(EMDL_ORIENT_ROT, 0.); MD.setValue(EMDL_ORIENT_TILT, 0.); MD.setValue(EMDL_ORIENT_PSI, 0.); } MD.deactivateLabel(EMDL_ORIENT_TILT_PRIOR); MD.deactivateLabel(EMDL_ORIENT_PSI_PRIOR); MD.deactivateLabel(EMDL_PARTICLE_HELICAL_TRACK_LENGTH_ANGSTROM); MD.deactivateLabel(EMDL_ORIENT_PSI_PRIOR_FLIP_RATIO); MD.deactivateLabel(EMDL_PARTICLE_HELICAL_TUBE_ID); MD.write(fn_star_out_wopriors); if (is_3d_tomo) command = "chmod u+x simulate-3d-subtomos.sh"; else command = "chmod u+x simulate-2d-segments.sh"; int res = system(command.c_str()); return; }; void outputHelicalSymmetryStatus( int iter, RFLOAT rise_initial_A, RFLOAT rise_min_A, RFLOAT rise_max_A, RFLOAT twist_initial_deg, RFLOAT twist_min_deg, RFLOAT twist_max_deg, bool do_local_search_helical_symmetry, std::vector& rise_A, std::vector& twist_deg, RFLOAT rise_A_half1, RFLOAT rise_A_half2, RFLOAT twist_deg_half1, RFLOAT twist_deg_half2, bool do_split_random_halves, std::ostream& out) { if (iter < 1) REPORT_ERROR("helix.cpp::outputHelicalSymmetryStatus(): BUG iteration id cannot be less than 1!"); if ( (do_local_search_helical_symmetry) && (iter > 1) ) { out << " Local searches of helical twist from " << twist_min_deg << " to " << twist_max_deg << " degrees, rise from " << rise_min_A << " to " << rise_max_A << " Angstroms." << std::endl; } else { out << " For all classes, helical twist = " << twist_initial_deg << " degrees, rise = " << rise_initial_A << " Angstroms." << std::endl; return; } if (do_split_random_halves) { RFLOAT twist_avg_deg = (twist_deg_half1 + twist_deg_half2) / 2.; RFLOAT rise_avg_A = (rise_A_half1 + rise_A_half2) / 2.; // TODO: raise a warning if two sets of helical parameters are >1% apart? out << " (Half 1) Refined helical twist = " << twist_deg_half1 << " degrees, rise = " << rise_A_half1 << " Angstroms." << std::endl; out << " (Half 2) Refined helical twist = " << twist_deg_half2 << " degrees, rise = " << rise_A_half2 << " Angstroms." << std::endl; out << " Averaged helical twist = " << twist_avg_deg << " degrees, rise = " << rise_avg_A << " Angstroms." << std::endl; return; } else { if ( (rise_A.size() != twist_deg.size()) || (rise_A.size() < 1) ) REPORT_ERROR("helix.cpp::outputHelicalSymmetryStatus(): BUG vectors rise_A and twist_deg are not of the same size!"); for (int iclass = 0; iclass < rise_A.size(); iclass++) { out << " (Class " << (iclass + 1) << ") Refined helical twist = " << twist_deg[iclass] << " degrees, rise = " << rise_A[iclass] << " Angstroms." << std::endl; } } } void excludeLowCTFCCMicrographs( FileName& fn_in, FileName& fn_out, RFLOAT cc_min, RFLOAT EPA_lowest_res, RFLOAT df_min, RFLOAT df_max) { bool contain_EPA_res; MetaDataTable MD_in, MD_out; int nr_mics_old, nr_mics_new; RFLOAT cc, EPA_res, dU, dV; if ( (fn_in.getFileFormat() != "star") || (fn_out.getFileFormat() != "star") ) REPORT_ERROR("helix.cpp::excludeLowCTFCCMicrographs(): MetaDataTable should have .star extension."); if (fn_in == fn_out) REPORT_ERROR("helix.cpp::excludeLowCTFCCMicrographs(): File names must be different."); if (df_min > df_max) REPORT_ERROR("helix.cpp::excludeLowCTFCCMicrographs(): Minimum defocus threshold should be smaller the maximum."); MD_in.clear(); MD_in.read(fn_in); if ( (!MD_in.containsLabel(EMDL_CTF_DEFOCUSU)) || (!MD_in.containsLabel(EMDL_CTF_DEFOCUSV)) || (!MD_in.containsLabel(EMDL_CTF_DEFOCUS_ANGLE)) || (!MD_in.containsLabel(EMDL_CTF_VOLTAGE)) || (!MD_in.containsLabel(EMDL_CTF_CS)) || (!MD_in.containsLabel(EMDL_CTF_Q0)) || (!MD_in.containsLabel(EMDL_CTF_MAGNIFICATION)) || (!MD_in.containsLabel(EMDL_CTF_DETECTOR_PIXEL_SIZE)) || (!MD_in.containsLabel(EMDL_CTF_FOM)) ) REPORT_ERROR("helix.cpp::excludeLowCTFCCMicrographs(): Input STAR file should contain CTF information."); contain_EPA_res = MD_in.containsLabel(EMDL_CTF_MAXRES); nr_mics_old = nr_mics_new = 0; MD_out.clear(); FOR_ALL_OBJECTS_IN_METADATA_TABLE(MD_in) { nr_mics_old++; MD_in.getValue(EMDL_CTF_FOM, cc); MD_in.getValue(EMDL_CTF_MAXRES, EPA_res); MD_in.getValue(EMDL_CTF_DEFOCUSU, dU); MD_in.getValue(EMDL_CTF_DEFOCUSV, dV); if ( (cc > cc_min) && (dU > df_min) && (dU < df_max) && (dV > df_min) && (dV < df_max) ) { if ( (contain_EPA_res) && (EPA_res > EPA_lowest_res) ) {} else { nr_mics_new++; MD_out.addObject(MD_in.getObject()); } } } std::cout << " Number of micrographs (input / output) = " << nr_mics_old << " / " << nr_mics_new << std::endl; if (MD_out.numberOfObjects() < 1) std::cout << " No micrographs in output file!" << std::endl; MD_out.write(fn_out); return; } void cutOutPartOfHelix( const MultidimArray& vin, MultidimArray& vout, long int new_boxdim, RFLOAT ang_deg, RFLOAT z_percentage) { long int Xdim, Ydim, Zdim, Ndim, old_boxdim; vout.clear(); if (vin.getDim() != 3) REPORT_ERROR("helix.cpp::cutOutPartOfHelix(): Input image is not 3D!"); if (!(z_percentage > 0.)) REPORT_ERROR("helix.cpp::cutOutPartOfHelix(): Z length must be larger than 0!"); if (!(ang_deg > 0.)) REPORT_ERROR("helix.cpp::cutOutPartOfHelix(): Angular range must be larger than 0!"); ang_deg = (ang_deg > 91.) ? (91.) : (ang_deg); vin.getDimensions(Xdim, Ydim, Zdim, Ndim); old_boxdim = (Xdim < Ydim) ? (Xdim) : (Ydim); old_boxdim = (Zdim < old_boxdim) ? (Zdim) : (old_boxdim); if ( (new_boxdim <= 0) || (new_boxdim > (old_boxdim / 2)) ) new_boxdim = old_boxdim / 2; vout.initZeros(new_boxdim, new_boxdim, new_boxdim); // Fill in values long int old_ymax = YSIZE(vin) + FIRST_XMIPP_INDEX(YSIZE(vin)); long int old_xmax = XSIZE(vin) + FIRST_XMIPP_INDEX(XSIZE(vin)); long int old_x0 = FIRST_XMIPP_INDEX(XSIZE(vin)); long int old_y0 = FIRST_XMIPP_INDEX(YSIZE(vin)); long int old_z0 = FIRST_XMIPP_INDEX(ZSIZE(vin)); long int new_z0 = FIRST_XMIPP_INDEX(ZSIZE(vout)); for (long int zi = 0; zi < ZSIZE(vout); zi++) { // Z subscript is out of range if ( ((RFLOAT)(ABS(zi + new_z0)) / (RFLOAT)(ZSIZE(vin))) > (z_percentage / 2.) ) continue; // Loop over X and Y for (long int yi = 0; yi < YSIZE(vout); yi++) { for (long int xi = 0; xi < XSIZE(vout); xi++) { RFLOAT deg = (180.) * atan2((double)(yi), (double)(xi)) / PI; // X or Y subscripts is out of range if ( (ang_deg < 90.) && ( (deg < ((45.) - (ang_deg / 2.))) || (deg > ((45.) + (ang_deg / 2.))) ) ) continue; if ( (yi >= old_ymax) || (xi >= old_xmax) ) continue; // Fill in voxels DIRECT_A3D_ELEM(vout, zi, yi, xi) = DIRECT_A3D_ELEM(vin, zi + new_z0 - old_z0, yi - old_y0, xi - old_x0); } } } vout.setXmippOrigin(); } void HelicalSegmentPriorInfoEntry::clear() { helical_tube_name.clear(); MDobjectID = -1; rot_deg = psi_deg = tilt_deg = 0.; dx_A = dy_A = dz_A = 0.; track_pos_A = 0.; has_wrong_polarity = false; subset = classID = 0; rot_prior_deg = psi_prior_deg = tilt_prior_deg = 0.; // KThurber dx_prior_A = dy_prior_A = dz_prior_A = 0.; psi_flip_ratio = 0.; psi_prior_flip = false; // KThurber }; bool HelicalSegmentPriorInfoEntry::operator<(const HelicalSegmentPriorInfoEntry &rhs) const { if ( (helical_tube_name.length() == 0) || (rhs.helical_tube_name.length() == 0) ) { std::cerr << "Compare # " << MDobjectID << " with # " << rhs.MDobjectID << std::endl; REPORT_ERROR("helix.h::HelicalSegmentPriorInfoEntry::operator<(): Name string of helical segments are empty!"); } if (helical_tube_name != rhs.helical_tube_name) return (helical_tube_name < rhs.helical_tube_name); if (fabs(track_pos_A - rhs.track_pos_A) < (1e-5)) { std::cerr << "Compare # " << MDobjectID << " with # " << rhs.MDobjectID << std::endl; REPORT_ERROR("helix.h::HelicalSegmentPriorInfoEntry::operator<(): A pair of same helical segments is found!"); } return (track_pos_A < rhs.track_pos_A); }; void HelicalSegmentPriorInfoEntry::checkPsiPolarity() { RFLOAT diff_psi = ABS(psi_deg - psi_prior_deg); has_wrong_polarity = false; if (diff_psi > 180.) diff_psi = ABS(diff_psi - 360.); if (diff_psi > 90.) has_wrong_polarity = true; }; // KThurber add this entire function void flipPsiTiltForHelicalSegment( RFLOAT old_psi, RFLOAT old_tilt, RFLOAT& new_psi, RFLOAT& new_tilt) { new_psi = (old_psi < 0.) ? (old_psi + 180.) : (old_psi - 180.); new_tilt = 180. - old_tilt; } //#define DEBUG_HELICAL_UPDATE_ANGULAR_PRIORS void updatePriorsForOneHelicalTube( std::vector& list, int sid, int eid, int& nr_wrong_polarity, bool &reverse_direction, RFLOAT sigma_segment_dist, std::vector helical_rise, std::vector helical_twist, bool is_3D_data, bool do_auto_refine, RFLOAT sigma2_rot, // KThurber RFLOAT sigma2_tilt, RFLOAT sigma2_psi, RFLOAT sigma2_offset, RFLOAT sigma_cutoff) { RFLOAT range_rot, range_tilt, range_psi, range2_offset, psi_flip_ratio; std::string str_name; int nr_same_polarity, nr_opposite_polarity, subset, data_dim; bool do_avg, unimodal_angular_priors; // Check subscript if ( (list.size() < 1) || (sid < 0) || (eid >= list.size()) || (sid > eid) ) REPORT_ERROR("helix.cpp::updatePriorsForOneHelicalTube(): Subscripts are invalid!"); // Init data_dim = (is_3D_data) ? (3) : (2); // TODO: test: Do not do local averaging if data_dim == 3 do_avg = (!is_3D_data) && (sigma_segment_dist > 0.01) && (list.size() > 1); // Do local average of orientations and translations or just flip tilt and psi angles? sigma2_rot = (sigma2_rot > 0.) ? (sigma2_rot) : (0.); // KThurber sigma2_tilt = (sigma2_tilt > 0.) ? (sigma2_tilt) : (0.); sigma2_psi = (sigma2_psi > 0.) ? (sigma2_psi) : (0.); sigma2_offset = (sigma2_offset > 0.) ? (sigma2_offset) : (0.); range_rot = sigma_cutoff * sqrt(sigma2_rot); // KThurber range_tilt = sigma_cutoff * sqrt(sigma2_tilt); range_psi = sigma_cutoff * sqrt(sigma2_psi); range2_offset = sigma_cutoff * sigma_cutoff * sigma2_offset; // Check helical segments and their polarity str_name = list[sid].helical_tube_name; subset = list[sid].subset; nr_same_polarity = nr_opposite_polarity = 1; // Laplace smoothing unimodal_angular_priors = true; for (int id = sid; id <= eid; id++) { if (list[id].helical_tube_name != str_name) REPORT_ERROR("helix.cpp::updatePriorsForOneHelicalTube(): Helical segments do not come from the same tube!"); if (list[id].subset != subset) // Do I really need this? REPORT_ERROR("helix.cpp::updatePriorsForOneHelicalTube(): Helical segments do not come from the same subset!"); if (list[id].has_wrong_polarity) { flipPsiTiltForHelicalSegment(list[id].psi_deg, list[id].tilt_deg, list[id].psi_deg, list[id].tilt_deg); nr_opposite_polarity++; } else { nr_same_polarity++; } if ((fabs(list[id].psi_flip_ratio - UNIMODAL_PSI_PRIOR_FLIP_RATIO) > 0.01) ) unimodal_angular_priors = false; } psi_flip_ratio = ((RFLOAT)(nr_opposite_polarity)) / (((RFLOAT)(nr_opposite_polarity)) + ((RFLOAT)(nr_same_polarity))); if ( (unimodal_angular_priors) && (nr_opposite_polarity <= 1) ) psi_flip_ratio = UNIMODAL_PSI_PRIOR_FLIP_RATIO; nr_wrong_polarity = nr_opposite_polarity - 1; // Change the polarity of the entire helix if psi_flip_ratio is larger than 0.5 if (psi_flip_ratio > 0.5) { for (int id = sid; id <= eid; id++) { flipPsiTiltForHelicalSegment(list[id].psi_prior_deg, list[id].tilt_prior_deg, list[id].psi_prior_deg, list[id].tilt_prior_deg); list[id].psi_flip_ratio = (1. - psi_flip_ratio); } } // Calculate new distance-averaged angular priors // SHWS 27042020: do two passes: one normal and one with opposite distances and find out which one is the best RFLOAT delta_prior_straight = 0., delta_prior_opposite = 0.; for (int iflip = 0; iflip < 2; iflip++) { RFLOAT delta_prior = 0.; for (int id = sid; id <= eid; id++) { RFLOAT this_rot, this_psi, this_tilt, center_pos, this_pos, sum_w, this_w, offset2; RFLOAT length_rot_vec, center_x_helix, this_x_helix; // KThurber Matrix1D this_ang_vec, sum_ang_vec, this_trans_vec, center_trans_vec, sum_trans_vec; Matrix1D this_rot_vec, sum_rot_vec; // KThurber // Init this_rot = this_psi = this_tilt = center_pos = this_pos = sum_w = this_w = offset2 = 0.; this_ang_vec.initZeros(3); this_rot_vec.initZeros(2); // KThurber sum_ang_vec.initZeros(3); sum_rot_vec.initZeros(2); // KThurber this_trans_vec.initZeros(data_dim); center_trans_vec.initZeros(data_dim); sum_trans_vec.initZeros(data_dim); // Check position center_pos = this_pos = list[id].track_pos_A; // Calculate weights sum_w = this_w = ((do_avg) ? (gaussian1D(this_pos, sigma_segment_dist, center_pos)) : (1.)); // Analyze orientations this_psi = list[id].psi_deg; // REFRESH PSI PRIOR this_tilt = list[id].tilt_deg; // REFRESH TILT PRIOR Euler_angles2direction(this_psi, this_tilt, this_ang_vec); sum_ang_vec = this_ang_vec * this_w; // rotation angle all new KThurber this_rot = list[id].rot_deg; // KThurber this_rot_vec(0) = cos(DEG2RAD(this_rot)); this_rot_vec(1) = sin(DEG2RAD(this_rot)); sum_rot_vec = this_rot_vec * this_w; // for adjusting rot angle by shift along helix center_x_helix = list[id].dx_A * cos(DEG2RAD(this_psi)) - list[id].dy_A * sin(DEG2RAD(this_psi)); // end new KThurber // Analyze translations XX(this_trans_vec) = list[id].dx_prior_A = list[id].dx_A; // REFRESH XOFF PRIOR YY(this_trans_vec) = list[id].dy_prior_A = list[id].dy_A; // REFRESH YOFF PRIOR if (is_3D_data) ZZ(this_trans_vec) = list[id].dz_prior_A = list[id].dz_A; // REFRESH ZOFF PRIOR transformCartesianAndHelicalCoords(this_trans_vec, this_trans_vec, (is_3D_data) ? (this_rot) : (0.), (is_3D_data) ? (this_tilt) : (0.), this_psi, CART_TO_HELICAL_COORDS); center_trans_vec = this_trans_vec; // Record helical coordinates of the central segment if (!is_3D_data) XX(this_trans_vec) = 0.; // Do not accumulate translation along helical axis else ZZ(this_trans_vec) = 0.; sum_trans_vec = this_trans_vec * this_w; // Local averaging if (do_avg) { for (int idd = sid; idd <= eid; idd++) { // Find another segment if (id == idd) continue; // Check position this_pos = list[idd].track_pos_A; if (fabs(this_pos - center_pos) > (sigma_segment_dist * sigma_cutoff)) continue; // Calculate weights this_w = gaussian1D(this_pos, sigma_segment_dist, center_pos); sum_w += this_w; // Analyze orientations // KThurber calc rot corrected for length along segment // This defines what the sign of pitch should be // KThurber unwind rotation angle in order to average // note should probably resolve ambiguity of rot=x or x+180 with 2d classes first // pitch in Angstroms, because positions are in Angstroms, pitch is 180 degree length in Angstroms // for adjusting rot angle by shift along helix RFLOAT pitch; if (list[idd].classID - 1 >= helical_twist.size()) REPORT_ERROR("ERROR: classID out of range..."); if (fabs(helical_twist[list[idd].classID - 1]) > 0.) { RFLOAT pitch = helical_rise[list[idd].classID - 1] * 180. / helical_twist[list[idd].classID - 1]; this_x_helix = list[idd].dx_A * cos(DEG2RAD(list[idd].psi_deg)) - list[idd].dy_A * sin(DEG2RAD(list[idd].psi_deg)); // In the second pass, check the direction from large to small distances RFLOAT sign = (iflip == 1) ? 1. : -1.; this_rot = list[idd].rot_deg + sign*(180./pitch)*(this_pos - center_pos - this_x_helix + center_x_helix); } else this_rot = list[idd].rot_deg; this_rot_vec(0) = cos(DEG2RAD(this_rot)); this_rot_vec(1) = sin(DEG2RAD(this_rot)); sum_rot_vec += this_rot_vec * this_w; this_psi = list[idd].psi_deg; this_tilt = list[idd].tilt_deg; Euler_angles2direction(this_psi, this_tilt, this_ang_vec); sum_ang_vec += this_ang_vec * this_w; // Analyze translations XX(this_trans_vec) = list[idd].dx_A; YY(this_trans_vec) = list[idd].dy_A; if (is_3D_data) ZZ(this_trans_vec) = list[idd].dz_A; transformCartesianAndHelicalCoords(this_trans_vec, this_trans_vec, (is_3D_data) ? (this_rot) : (0.), (is_3D_data) ? (this_tilt) : (0.), this_psi, CART_TO_HELICAL_COORDS); if (!is_3D_data) XX(this_trans_vec) = 0.; // Do not accumulate translation along helical axis else ZZ(this_trans_vec) = 0.; sum_trans_vec += this_trans_vec * this_w; } sum_ang_vec /= sum_w; Euler_direction2angles(sum_ang_vec, this_psi, this_tilt); // KThurber added sum_rot_vec /= sum_w; length_rot_vec = sqrt(pow(sum_rot_vec(0),2) + pow(sum_rot_vec(1),2)); if (length_rot_vec!=0) { sum_rot_vec(0) = sum_rot_vec(0) / length_rot_vec; sum_rot_vec(1) = sum_rot_vec(1) / length_rot_vec; this_rot = RAD2DEG(acos(sum_rot_vec(0))); if (sum_rot_vec(1) < 0.) this_rot = -1. * this_rot; // if sign negative, angle is negative } else this_rot = list[id].rot_deg; // don't change prior if average fails // KThurber end new section if (iflip == 0) { // Distance-averaged priors for original distances in filament // cannot store in rot_prior_deg, as will be needed in calculation for iflip==1 pass! list[id].rot_prior_deg_ori = this_rot; // KThurber list[id].psi_prior_deg_ori = this_psi; // REFRESH PSI PRIOR list[id].tilt_prior_deg_ori = this_tilt; // REFRESH TILT PRIOR } else { // Distance-averaged priors for flipped (opposite) filament list[id].rot_prior_deg = this_rot; // KThurber list[id].psi_prior_deg = this_psi; // REFRESH PSI PRIOR list[id].tilt_prior_deg = this_tilt; // REFRESH TILT PRIOR } // Keep track how different the priors are from the actual angles to determine straight/opposite RFLOAT diff = fabs(list[id].rot_deg - this_rot); if (diff > 180.) diff = fabs(diff - 360.); // Also count 180 degree errors (basically up-side-down flips) as OK // All we're after is the direction of the helix, so upside down errors should be ignored here... if (diff > 90.) diff = fabs(diff - 180.); delta_prior += diff; //std::cout << iflip << " " < range2_offset) // only now average translations { if (!is_3D_data) XX(sum_trans_vec) = XX(center_trans_vec); else { this_rot = list[id].rot_deg; ZZ(sum_trans_vec) = ZZ(center_trans_vec); } transformCartesianAndHelicalCoords(sum_trans_vec, sum_trans_vec, (is_3D_data) ? (this_rot) : (0.), (is_3D_data) ? (this_tilt) : (0.), this_psi, HELICAL_TO_CART_COORDS); // Averaged translations - use respective averaged tilt and psi list[id].dx_prior_A = XX(sum_trans_vec); // REFRESH XOFF PRIOR list[id].dy_prior_A = YY(sum_trans_vec); // REFRESH YOFF PRIOR if (is_3D_data) list[id].dz_prior_A = ZZ(sum_trans_vec); // REFRESH ZOFF PRIOR } } // end if do_avg } // end for id if (iflip == 1) delta_prior_opposite = delta_prior / (eid-sid+1); else delta_prior_straight = delta_prior / (eid-sid+1); } // end for iflip //std::cout << " Delta prior straight= " << delta_prior_straight << " opposite= " << delta_prior_opposite << std::endl; // Change the direction of the distances in the tube if the total angular difference is smaller for the opposite direction if (delta_prior_opposite < delta_prior_straight) { reverse_direction = true; } else { reverse_direction = false; for (int id = sid; id <= eid; id++) { list[id].rot_prior_deg = list[id].rot_prior_deg_ori; list[id].psi_prior_deg = list[id].psi_prior_deg_ori; list[id].tilt_prior_deg = list[id].tilt_prior_deg_ori; } } // for debugging /* for (int id = sid; id <= eid; id++) { std::cout << list[id].track_pos_A << " "<< list[id].rot_deg << " " < helical_rise, std::vector helical_twist, int helical_nstart, bool is_3D_data, bool do_auto_refine, RFLOAT sigma2_rot, RFLOAT sigma2_tilt, RFLOAT sigma2_psi, RFLOAT sigma2_offset, bool keep_tilt_prior_fixed, int verb) { // If we're not averaging angles from neighbouring segments in the helix, // then just set the priors to the angles from the previous iteration // this effectively means that each segment is completely independent from the rest // the priors are only used to center the local angular searches if (sigma_segment_dist < 0.) { updateAngularPriorsForHelicalReconstructionFromLastIter(MD, keep_tilt_prior_fixed); return; } // Check labels if (MD.numberOfObjects() < 1) REPORT_ERROR("helix.cpp::updatePriorsForHelicalReconstruction: MetaDataTable is empty!"); if (!MD.containsLabel(EMDL_IMAGE_NAME)) REPORT_ERROR("helix.cpp::updatePriorsForHelicalReconstruction: rlnImageName is missing!"); if ( ( (is_3D_data) && (!MD.containsLabel(EMDL_ORIENT_ROT)) ) || (!MD.containsLabel(EMDL_ORIENT_TILT)) || (!MD.containsLabel(EMDL_ORIENT_PSI)) || (!MD.containsLabel(EMDL_ORIENT_ORIGIN_X_ANGSTROM)) || (!MD.containsLabel(EMDL_ORIENT_ORIGIN_Y_ANGSTROM)) || ( (is_3D_data) && (!MD.containsLabel(EMDL_ORIENT_ORIGIN_Z_ANGSTROM)) ) || (!MD.containsLabel(EMDL_ORIENT_TILT_PRIOR)) || (!MD.containsLabel(EMDL_ORIENT_PSI_PRIOR)) || (!MD.containsLabel(EMDL_PARTICLE_HELICAL_TUBE_ID)) || (!MD.containsLabel(EMDL_PARTICLE_HELICAL_TRACK_LENGTH_ANGSTROM)) || (!MD.containsLabel(EMDL_ORIENT_PSI_PRIOR_FLIP_RATIO)) || ( (do_auto_refine) && (!MD.containsLabel(EMDL_PARTICLE_RANDOM_SUBSET)) ) ) REPORT_ERROR("helix.cpp::updatePriorsForHelicalReconstruction: Labels of helical prior information are missing!"); std::vector list; long int MDobjectID; // For N-start helices, revert back to the N-start twist and rise (not the 1-start ones) // This is in order to reduce amplification of small deviations in twist and rise if (helical_nstart > 1) { // Assume same N-start for all classes // Shaoda's formula (which need to be inverted, as we want original N-start rise and twist back) // rise_1-start = rise / N // twist_1-start = (twist+360)/N if twist>0 // twist_1-start = (twist-360)/N if twist<0 for (int iclass=0; iclass < helical_rise.size(); iclass++) { helical_rise[iclass] *= helical_nstart; RFLOAT aux = helical_twist[iclass] * helical_nstart; helical_twist[iclass] = (aux > 360.) ? aux + 360. : aux - 360.; if (verb > 0) std::cout << " + for rotational priors go back to " << helical_nstart << "-start helical twist= " << helical_twist[iclass] << " and rise= " << helical_rise[iclass] << std::endl; } } // Read _data.star file list.clear(); MDobjectID = -1; FOR_ALL_OBJECTS_IN_METADATA_TABLE(MD) { HelicalSegmentPriorInfoEntry segment; std::string str_mic; int tube_id; segment.clear(); MD.getValue(EMDL_MICROGRAPH_NAME, str_mic); MD.getValue(EMDL_PARTICLE_HELICAL_TUBE_ID, tube_id); segment.helical_tube_name = str_mic + integerToString(tube_id); MD.getValue(EMDL_PARTICLE_HELICAL_TRACK_LENGTH_ANGSTROM, segment.track_pos_A); if (MD.containsLabel(EMDL_ORIENT_ROT)) MD.getValue(EMDL_ORIENT_ROT, segment.rot_deg); // KThurber else segment.rot_deg = 0.; if (MD.containsLabel(EMDL_ORIENT_ROT_PRIOR)) MD.getValue(EMDL_ORIENT_ROT_PRIOR, segment.rot_prior_deg); // KThurber //else segment.rot_prior_deg = 0.; else segment.rot_prior_deg = segment.rot_deg; // SHWS, modified from KThurber! MD.getValue(EMDL_ORIENT_TILT, segment.tilt_deg); MD.getValue(EMDL_ORIENT_TILT_PRIOR, segment.tilt_prior_deg); MD.getValue(EMDL_ORIENT_PSI, segment.psi_deg); MD.getValue(EMDL_ORIENT_PSI_PRIOR, segment.psi_prior_deg); MD.getValue(EMDL_ORIENT_PSI_PRIOR_FLIP_RATIO, segment.psi_flip_ratio); if (MD.containsLabel(EMDL_ORIENT_PSI_PRIOR_FLIP)) // KThurber2 MD.getValue(EMDL_ORIENT_PSI_PRIOR_FLIP, segment.psi_prior_flip); else segment.psi_prior_flip = false; if (MD.containsLabel(EMDL_PARTICLE_CLASS)) MD.getValue(EMDL_PARTICLE_CLASS, segment.classID); else segment.classID = 1; if (do_auto_refine) MD.getValue(EMDL_PARTICLE_RANDOM_SUBSET, segment.subset); // Do I really need this? MD.getValue(EMDL_ORIENT_ORIGIN_X_ANGSTROM, segment.dx_A); MD.getValue(EMDL_ORIENT_ORIGIN_Y_ANGSTROM, segment.dy_A); if (is_3D_data) MD.getValue(EMDL_ORIENT_ORIGIN_Z_ANGSTROM, segment.dz_A); segment.checkPsiPolarity(); MDobjectID++; segment.MDobjectID = MDobjectID; list.push_back(segment); } // Sort the list so that segments from the same helical tube come together std::stable_sort(list.begin(), list.end()); // Loop over every helical tube long total_opposite_polarity = 0; long total_opposite_rot = 0; // KThurber long total_same_rot = 0; // KThurber for (int sid = 0; sid < list.size(); ) { // A helical tube [id_s, id_e] int nr_opposite_polarity = -1; int eid = sid; // start id (sid) and end id (eid) while (1) { eid++; if (eid >= list.size()) break; if (list[eid].helical_tube_name != list[sid].helical_tube_name) break; } eid--; // Real work... bool reverse_direction; updatePriorsForOneHelicalTube(list, sid, eid, nr_opposite_polarity, reverse_direction, sigma_segment_dist, helical_rise, helical_twist, is_3D_data, do_auto_refine, sigma2_rot, sigma2_tilt, sigma2_psi, sigma2_offset); total_opposite_polarity += nr_opposite_polarity; if (reverse_direction) total_opposite_rot += 1; else total_same_rot += 1; // Write to _data.star file for (int id = sid; id <= eid; id++) { if (reverse_direction) MD.setValue(EMDL_PARTICLE_HELICAL_TRACK_LENGTH_ANGSTROM, -1. * list[id].track_pos_A, list[id].MDobjectID); if (!keep_tilt_prior_fixed) MD.setValue(EMDL_ORIENT_TILT_PRIOR, list[id].tilt_prior_deg, list[id].MDobjectID); MD.setValue(EMDL_ORIENT_PSI_PRIOR, list[id].psi_prior_deg, list[id].MDobjectID); MD.setValue(EMDL_ORIENT_PSI_PRIOR_FLIP_RATIO, list[id].psi_flip_ratio, list[id].MDobjectID); MD.setValue(EMDL_ORIENT_ROT_PRIOR, list[id].rot_prior_deg, list[id].MDobjectID); // KThurber MD.setValue(EMDL_ORIENT_ORIGIN_X_ANGSTROM, list[id].dx_prior_A, list[id].MDobjectID); MD.setValue(EMDL_ORIENT_ORIGIN_Y_ANGSTROM, list[id].dy_prior_A, list[id].MDobjectID); if (is_3D_data) MD.setValue(EMDL_ORIENT_ORIGIN_Z_ANGSTROM, list[id].dz_prior_A, list[id].MDobjectID); } // Next helical tube sid = eid + 1; } list.clear(); if ( (verb > 0) ) { long total_same_polarity = MD.numberOfObjects() - total_opposite_polarity; RFLOAT opposite_percentage = (100.) * ((RFLOAT)(total_opposite_polarity)) / ((RFLOAT)(MD.numberOfObjects())); RFLOAT opposite_percentage_rot = (100.) * ((RFLOAT)(total_opposite_rot)) / ((RFLOAT)(total_same_rot + total_opposite_rot)); std::cout << " Number of helical segments with same / opposite polarity to their psi priors: " << total_same_polarity << " / " << total_opposite_polarity << " (" << opposite_percentage << "%)" << std::endl; std::cout << " Number of helices with same / reverse direction for rot priors: " << total_same_rot << " / " << total_opposite_rot << " (" << opposite_percentage_rot << "%)" << std::endl; } } void updateAngularPriorsForHelicalReconstructionFromLastIter( MetaDataTable& MD, bool keep_tilt_prior_fixed) { if (MD.numberOfObjects() < 1) REPORT_ERROR("helix.cpp::updateAngularPriorsForHelicalReconstruction: MetaDataTable is empty!"); bool have_tilt = MD.containsLabel(EMDL_ORIENT_TILT); bool have_psi = MD.containsLabel(EMDL_ORIENT_PSI); bool have_tilt_prior = MD.containsLabel(EMDL_ORIENT_TILT_PRIOR); bool have_psi_prior = MD.containsLabel(EMDL_ORIENT_PSI_PRIOR); bool have_rot = MD.containsLabel(EMDL_ORIENT_ROT); // KThurber bool have_rot_prior = MD.containsLabel(EMDL_ORIENT_ROT_PRIOR); // KThurber if ( (!have_tilt_prior) && (!have_psi_prior) && (!have_rot_prior)) // KThurber return; FOR_ALL_OBJECTS_IN_METADATA_TABLE(MD) { RFLOAT val; if (have_tilt && have_tilt_prior && (!keep_tilt_prior_fixed) ) { MD.getValue(EMDL_ORIENT_TILT, val); MD.setValue(EMDL_ORIENT_TILT_PRIOR, val); } if (have_psi && have_psi_prior) { MD.getValue(EMDL_ORIENT_PSI, val); MD.setValue(EMDL_ORIENT_PSI_PRIOR, val); } // KThurber add rot section if (have_rot && have_rot_prior) { MD.getValue(EMDL_ORIENT_ROT, val); MD.setValue(EMDL_ORIENT_ROT_PRIOR, val); } } } void setPsiFlipRatioInStarFile(MetaDataTable& MD, RFLOAT ratio) { if (!MD.containsLabel(EMDL_ORIENT_PSI_PRIOR_FLIP_RATIO)) REPORT_ERROR("helix.cpp::setPsiFlipRatioInStarFile: Psi flip ratio is not found in this STAR file!"); FOR_ALL_OBJECTS_IN_METADATA_TABLE(MD) { MD.setValue(EMDL_ORIENT_PSI_PRIOR_FLIP_RATIO, ratio); } } void plotLatticePoints(MetaDataTable& MD, int x1, int y1, int x2, int y2) { MD.clear(); MD.addLabel(EMDL_IMAGE_COORD_X); MD.addLabel(EMDL_IMAGE_COORD_Y); for (int i = -10; i <= 10; i++) { for (int j = -10; j <= 10; j++) { MD.addObject(); MD.setValue(EMDL_IMAGE_COORD_X, RFLOAT(i * x1 + j * x2)); MD.setValue(EMDL_IMAGE_COORD_Y, RFLOAT(i * y1 + j * y2)); } } } void grabParticleCoordinates( FileName& fn_in, FileName& fn_out) { MetaDataTable MD_in, MD_out; RFLOAT x, y, z; bool contain_z_coord = false; if (fn_in.getExtension() != "star") REPORT_ERROR("helix.cpp::grabParticleCoordinates: Input file must have STAR extension!"); MD_in.clear(); MD_in.read(fn_in); if ( (!MD_in.containsLabel(EMDL_IMAGE_COORD_X)) || (!MD_in.containsLabel(EMDL_IMAGE_COORD_Y)) ) REPORT_ERROR("helix.cpp::grabParticleCoordinates: Input file must have X and Y coordinates!"); contain_z_coord = MD_in.containsLabel(EMDL_IMAGE_COORD_Z); MD_out.clear(); x = y = z = 0.; FOR_ALL_OBJECTS_IN_METADATA_TABLE(MD_in) { MD_in.getValue(EMDL_IMAGE_COORD_X, x); MD_in.getValue(EMDL_IMAGE_COORD_Y, y); if (contain_z_coord) MD_in.getValue(EMDL_IMAGE_COORD_Z, z); MD_out.addObject(); MD_out.setValue(EMDL_IMAGE_COORD_X, x); MD_out.setValue(EMDL_IMAGE_COORD_Y, y); if (contain_z_coord) MD_out.setValue(EMDL_IMAGE_COORD_Z, z); } MD_out.write(fn_out); return; } void grabParticleCoordinates_Multiple( std::string& suffix_fin, std::string& suffix_fout) { FileName fns_in; std::vector fn_in_list; if (suffix_fin == suffix_fout) REPORT_ERROR("helix.cpp::grabParticleCoordinates_Multiple(): File names error!"); fns_in = "*" + suffix_fin; fns_in.globFiles(fn_in_list); std::cout << "Number of input files = " << fn_in_list.size() << std::endl; if (fn_in_list.size() < 1) REPORT_ERROR("helix.cpp::grabParticleCoordinates_Multiple(): No input files are found!"); for (int ii = 0; ii < fn_in_list.size(); ii++) { FileName fn_out = fn_in_list[ii].beforeFirstOf(suffix_fin) + suffix_fout; grabParticleCoordinates(fn_in_list[ii], fn_out); } return; } void calculateRadialAvg(MultidimArray &v, RFLOAT angpix) { std::vector rval, rcount; long int size, rint; if ( (XSIZE(v) < 5) || (YSIZE(v) < 5) || (ZSIZE(v) < 5) || (NSIZE(v) != 1) ) REPORT_ERROR("helix.cpp::calculateRadialAvg(): Input image should be a 3D box larger than 5*5*5 !"); if (!(angpix > 0.)) REPORT_ERROR("helix.cpp::calculateRadialAvg(): Pixel size should be larger than 0 !"); v.setXmippOrigin(); size = (XSIZE(v) < YSIZE(v)) ? XSIZE(v) : YSIZE(v); size = size / 2 + 2; rval.resize(size); rcount.resize(size); for (int ii = 0; ii < rval.size(); ii++) rval[ii] = rcount[ii] = 0.; FOR_ALL_ELEMENTS_IN_ARRAY3D(v) { rint = ROUND(sqrt((RFLOAT)(i * i + j * j))); if (rint >= size) continue; rval[rint] += A3D_ELEM(v, k, i, j); rcount[rint] += 1.; } for (int ii = 0; ii < rval.size(); ii++) { if (rcount[ii] < 0.5) rval[ii] = 0.; else rval[ii] /= rcount[ii]; std::cout << ii * angpix << " " << rval[ii] << std::endl; } } void transformCartesianToHelicalCoordsForStarFiles( MetaDataTable& MD_in, MetaDataTable& MD_out) { RFLOAT rot, tilt, psi, xoff, yoff, zoff; bool is_3d_trans = MD_in.containsLabel(EMDL_ORIENT_ORIGIN_Z_ANGSTROM); MD_out.clear(); MD_out = MD_in; FOR_ALL_OBJECTS_IN_METADATA_TABLE(MD_in) { MD_in.getValue(EMDL_ORIENT_ROT, rot); MD_in.getValue(EMDL_ORIENT_TILT, tilt); MD_in.getValue(EMDL_ORIENT_PSI, psi); MD_in.getValue(EMDL_ORIENT_ORIGIN_X_ANGSTROM, xoff); MD_in.getValue(EMDL_ORIENT_ORIGIN_Y_ANGSTROM, yoff); if (is_3d_trans) MD_in.getValue(EMDL_ORIENT_ORIGIN_Z_ANGSTROM, zoff); transformCartesianAndHelicalCoords( xoff, yoff, zoff, xoff, yoff, zoff, rot, tilt, psi, ((is_3d_trans) ? (3) : (2)), CART_TO_HELICAL_COORDS); MD_out.setValue(EMDL_ORIENT_ORIGIN_X_ANGSTROM, xoff); MD_out.setValue(EMDL_ORIENT_ORIGIN_Y_ANGSTROM, yoff); if (is_3d_trans) MD_out.setValue(EMDL_ORIENT_ORIGIN_Z_ANGSTROM, zoff); MD_out.nextObject(); } } void normaliseHelicalSegments( FileName& fn_in, FileName& fn_out_root, RFLOAT helical_outer_diameter_A, RFLOAT pixel_size_A) { bool is_3D_data = false, is_mrcs = false, have_tilt_prior = false, have_psi_prior = false, read_angpix_from_star = false; RFLOAT rot_deg = 0., tilt_deg = 0., psi_deg = 0., avg = 0., stddev = 0., val = 0., det_pixel_size = 0., mag = 0.; Image img0; MetaDataTable MD; FileName img_name, file_ext; if (fn_in.getExtension() != "star") REPORT_ERROR("helix.cpp::normaliseHelicalSegments(): Please provide a STAR file as input!"); // Read STAR file MD.clear(); MD.read(fn_in); have_tilt_prior = MD.containsLabel(EMDL_ORIENT_TILT_PRIOR); have_psi_prior = MD.containsLabel(EMDL_ORIENT_PSI_PRIOR); read_angpix_from_star = (MD.containsLabel(EMDL_CTF_DETECTOR_PIXEL_SIZE)) && (MD.containsLabel(EMDL_CTF_MAGNIFICATION)); if ( (!MD.containsLabel(EMDL_IMAGE_NAME)) ) REPORT_ERROR("helix.cpp::normaliseHelicalSegments(): MetaDataLabel _rlnImageName is missing!"); if ( (!have_tilt_prior) && (!MD.containsLabel(EMDL_ORIENT_TILT)) ) REPORT_ERROR("helix.cpp::normaliseHelicalSegments(): MetaDataLabel _rlnAngleTilt or _rlnAngleTiltPrior is missing!"); if ( (!have_psi_prior) && (!MD.containsLabel(EMDL_ORIENT_PSI)) ) REPORT_ERROR("helix.cpp::normaliseHelicalSegments(): MetaDataLabel _rlnAnglePsi or _rlnAnglePsiPrior is missing!"); FOR_ALL_OBJECTS_IN_METADATA_TABLE(MD) { // Read image name and angular priors MD.getValue(EMDL_IMAGE_NAME, img_name); file_ext = img_name.getExtension(); is_mrcs = (file_ext == "mrcs"); rot_deg = tilt_deg = psi_deg = 0.; if (have_tilt_prior) MD.getValue(EMDL_ORIENT_TILT_PRIOR, tilt_deg); else MD.getValue(EMDL_ORIENT_TILT, tilt_deg); if (have_psi_prior) MD.getValue(EMDL_ORIENT_PSI_PRIOR, psi_deg); else MD.getValue(EMDL_ORIENT_PSI, psi_deg); if (read_angpix_from_star) { MD.getValue(EMDL_CTF_DETECTOR_PIXEL_SIZE, det_pixel_size); MD.getValue(EMDL_CTF_MAGNIFICATION, mag); pixel_size_A = det_pixel_size * 10000. / mag; } // DEBUG //std::cout << " pixel_size_A = " << pixel_size_A << std::endl; if (pixel_size_A < (1e-4)) REPORT_ERROR("helix.cpp::normaliseHelicalSegments(): Invalid pixel size for image " + ((std::string)(img_name))); if ((helical_outer_diameter_A / pixel_size_A) < 10.) REPORT_ERROR("helix.cpp::normaliseHelicalSegments(): Diameter of the tubular mask should be larger than 10 pixels!"); // Read image img0.clear(); img0.read(img_name); is_3D_data = (ZSIZE(img0()) > 1) || (NSIZE(img0()) > 1); if ( (XSIZE(img0()) < (helical_outer_diameter_A / pixel_size_A)) || (YSIZE(img0()) < (helical_outer_diameter_A / pixel_size_A)) ) REPORT_ERROR("helix.cpp::normaliseHelicalSegments(): Diameter of the tubular mask is larger than the box XY dimensions!"); if (!is_3D_data) rot_deg = tilt_deg = 0.; // Calculate avg and stddev calculateBackgroundAvgStddev( img0, avg, stddev, 0, true, helical_outer_diameter_A * 0.5 / pixel_size_A, tilt_deg, psi_deg); if (stddev < 0.0001) { std::cout << " !!! WARNING: " << img_name << " has bg_avg = " << avg << " and bg_stddev = " << stddev << " . bg_stddev is set to 0.0001. The image cannot be properly normalised!" << std::endl; stddev = 0.0001; } else std::cout << " Normalising " << img_name << " with bg_avg = " << avg << " and bg_stddev = " << stddev << " . " << std::endl; // Normalise FOR_ALL_DIRECT_ELEMENTS_IN_MULTIDIMARRAY(img0()) { val = DIRECT_MULTIDIM_ELEM(img0(),n); DIRECT_MULTIDIM_ELEM(img0(), n) = (val - avg) / stddev; } // Rename img_name = img_name.withoutExtension() + fn_out_root + "." + file_ext; if (is_3D_data) { img0.setSamplingRateInHeader(pixel_size_A, pixel_size_A, pixel_size_A); img0.setStatisticsInHeader(); } // Write if (is_3D_data) img0.write(img_name); else img0.write(img_name, -1, true, WRITE_APPEND); img0.clear(); } if (!is_3D_data) { // Read the header of .mrcs stack img_name = img_name.substr(img_name.find("@") + 1); // Set the pixel size in the file header img0.read(img_name); img0.MDMainHeader.setValue(EMDL_IMAGE_SAMPLINGRATE_X, pixel_size_A); img0.MDMainHeader.setValue(EMDL_IMAGE_SAMPLINGRATE_Y, pixel_size_A); img0.write(img_name); } } // Copied online from http://paulbourke.net/miscellaneous/interpolation/ // Author: Paul Bourke, December 1999 /* Tension: 1 is high, 0 normal, -1 is low Bias: 0 is even, positive is towards first segment, negative towards the other */ // mu is the percentage between y1 and y2 RFLOAT HermiteInterpolate1D( RFLOAT y0, RFLOAT y1, RFLOAT y2, RFLOAT y3, RFLOAT mu, RFLOAT tension, RFLOAT bias) { RFLOAT m0 = 0., m1 = 0., mu2 = 0., mu3 = 0., a0 = 0., a1 = 0., a2 = 0., a3 = 0.; mu2 = mu * mu; mu3 = mu2 * mu; m0 = (y1 - y0) * (1. + bias) * (1. - tension) / 2.; m0 += (y2 - y1) * (1. - bias) * (1. - tension) / 2.; m1 = (y2 - y1) * (1. + bias) * (1. - tension) / 2.; m1 += (y3 - y2) * (1. - bias) * (1. - tension) / 2.; a0 = 2. * mu3 - 3. * mu2 + 1.; a1 = mu3 - 2. * mu2 + mu; a2 = mu3 - mu2; a3 = (-2.) * mu3 + 3. * mu2; return (a0 * y1 + a1 * m0 + a2 * m1 + a3 * y2); } void HermiteInterpolateOne3DHelicalFilament( MetaDataTable& MD_in, MetaDataTable& MD_out, int& total_segments, int nr_asu, RFLOAT rise_A, RFLOAT pixel_size_A, RFLOAT box_size_pix, int helical_tube_id, RFLOAT Xdim, RFLOAT Ydim, RFLOAT Zdim, bool bimodal_angular_priors) { RFLOAT x0, x1, x2, x3, xa, xb, y0, y1, y2, y3, ya, yb, z0, z1, z2, z3, za, zb, mu1, mu2; RFLOAT step_pix, chord_pix, accu_len_pix, present_len_pix, len_pix, psi_prior_flip_ratio, tilt_deg, psi_deg; RFLOAT half_box_size_pix = box_size_pix / 2.; int nr_partitions, nr_segments; std::vector xlist, ylist, zlist; Matrix1D dr; // DEBUG: Do not exclude particles on the edges // Xdim = Ydim = Zdim = 999999.; if (MD_in.numberOfObjects() <= 1) REPORT_ERROR("helix.cpp::HermiteInterpolateOne3DHelicalFilament(): MetaDataTable should have at least two points for interpolation!"); if ( (box_size_pix < 2) || (Xdim < box_size_pix) || (Ydim < box_size_pix) || (Zdim < box_size_pix) ) REPORT_ERROR("helix.cpp::HermiteInterpolateOne3DHelicalFilament(): Wrong dimensions or box size!"); if (pixel_size_A < 0.001) REPORT_ERROR("helix.cpp::HermiteInterpolateOne3DHelicalFilament(): Invalid pixel size!"); RFLOAT interbox_pix = ((RFLOAT)(nr_asu)) * rise_A / pixel_size_A; if ( (nr_asu < 1) || (rise_A < 0.001) || (interbox_pix < 0.58) ) REPORT_ERROR("helix.cpp::HermiteInterpolateOne3DHelicalFilament(): Invalid helical rise or number of asymmetrical units!"); if ( (!MD_in.containsLabel(EMDL_IMAGE_COORD_X)) || (!MD_in.containsLabel(EMDL_IMAGE_COORD_Y)) || (!MD_in.containsLabel(EMDL_IMAGE_COORD_Z)) ) REPORT_ERROR("helix.cpp::HermiteInterpolateOne3DHelicalFilament(): MetaDataTable should have _rlnOriginX, _rlnOriginY and _rlnOriginZ!"); // Header of output file MD_out.clear(); MD_out.addLabel(EMDL_IMAGE_COORD_X); MD_out.addLabel(EMDL_IMAGE_COORD_Y); MD_out.addLabel(EMDL_IMAGE_COORD_Z); MD_out.addLabel(EMDL_PARTICLE_HELICAL_TUBE_ID); MD_out.addLabel(EMDL_ORIENT_TILT_PRIOR); MD_out.addLabel(EMDL_ORIENT_PSI_PRIOR); MD_out.addLabel(EMDL_PARTICLE_HELICAL_TRACK_LENGTH_ANGSTROM); MD_out.addLabel(EMDL_ORIENT_PSI_PRIOR_FLIP_RATIO); //half_box_size_pix = box_size_pix / 2.; psi_prior_flip_ratio = UNIMODAL_PSI_PRIOR_FLIP_RATIO; if (bimodal_angular_priors) { psi_prior_flip_ratio = BIMODAL_PSI_PRIOR_FLIP_RATIO; } // Load all manually picked coordinates xlist.clear(); ylist.clear(); zlist.clear(); FOR_ALL_OBJECTS_IN_METADATA_TABLE(MD_in) { MD_in.getValue(EMDL_IMAGE_COORD_X, x0); MD_in.getValue(EMDL_IMAGE_COORD_Y, y0); MD_in.getValue(EMDL_IMAGE_COORD_Z, z0); xlist.push_back(x0); ylist.push_back(y0); zlist.push_back(z0); } // Interpolate accu_len_pix = 0.; present_len_pix = -1.; nr_segments = 0; dr.initZeros(3); for (int id = 0; id < (xlist.size() - 1); id++) { // Step size for interpolation is smaller than 1% of the inter-box distance // sqrt(0.57735 * 0.57735 * 0.57735 * 3) = 1.0, step size is larger than 1 pixel // TODO: 1% ? Too expensive computationally? Try 10% ? step_pix = (interbox_pix < 57.735) ? (0.57735) : (interbox_pix / 100.); // 1% //step_pix = (interbox_pix < 5.7735) ? (0.57735) : (interbox_pix / 10.); // 10% // Collect points 0, 1, 2, 3 for interpolations x0 = x1 = x2 = x3 = y0 = y1 = y2 = y3 = z0 = z1 = z2 = z3 = 0.; // Point 0 if (id == 0) { x0 = 2. * xlist[id] - xlist[id + 1]; y0 = 2. * ylist[id] - ylist[id + 1]; z0 = 2. * zlist[id] - zlist[id + 1]; } else { x0 = xlist[id - 1]; y0 = ylist[id - 1]; z0 = zlist[id - 1]; } // Point 1 and Point 2 x1 = xlist[id]; y1 = ylist[id]; z1 = zlist[id]; x2 = xlist[id + 1]; y2 = ylist[id + 1]; z2 = zlist[id + 1]; // Point 3 if (id == (xlist.size() - 2)) { x3 = 2. * xlist[id + 1] - xlist[id]; y3 = 2. * ylist[id + 1] - ylist[id]; z3 = 2. * zlist[id + 1] - zlist[id]; } else { x3 = xlist[id + 2]; y3 = ylist[id + 2]; z3 = zlist[id + 2]; } // Chord distance between point 1 and 2 // TODO: what will happen if the chord length is smaller than step_pix? chord_pix = sqrt((x1 - x2) * (x1 - x2) + (y1 - y2) * (y1 - y2) + (z1 - z2) * (z1 - z2)); nr_partitions = int(CEIL(chord_pix / step_pix)); nr_partitions = (nr_partitions <= 0) ? (1) : (nr_partitions); // Partitioning for (int ip = 0; ip < nr_partitions; ip++) { xa = ya = za = xb = yb = zb = mu1 = mu2 = len_pix = 0.; mu1 = RFLOAT( (RFLOAT(ip)) / (RFLOAT(nr_partitions)) ); mu2 = RFLOAT( (RFLOAT(ip) + 1.) / (RFLOAT(nr_partitions)) ); xa = HermiteInterpolate1D(x0, x1, x2, x3, mu1); ya = HermiteInterpolate1D(y0, y1, y2, y3, mu1); za = HermiteInterpolate1D(z0, z1, z2, z3, mu1); xb = HermiteInterpolate1D(x0, x1, x2, x3, mu2); yb = HermiteInterpolate1D(y0, y1, y2, y3, mu2); zb = HermiteInterpolate1D(z0, z1, z2, z3, mu2); len_pix = sqrt((xa - xb) * (xa - xb) + (ya - yb) * (ya - yb) + (za - zb) * (za - zb)); present_len_pix += len_pix; accu_len_pix += len_pix; // Output one segment (xb, yb, zb) if (present_len_pix > 0.) { present_len_pix -= interbox_pix; #ifdef EXCLUDE_SEGMENTS_ON_THE_EDGES // Check whether this segment lies on the edges of the 3D tomogram if ( (xb < half_box_size_pix) || (xb > (Xdim - half_box_size_pix)) || (yb < half_box_size_pix) || (yb > (Ydim - half_box_size_pix)) || (zb < half_box_size_pix) || (zb > (Zdim - half_box_size_pix)) ) { std::cout << std::resetiosflags(std::ios::fixed); std::cout << " WARNING: Particle at (" << xb << ", " << yb << ", " << zb << ") is ignored because it is too close to the edge. " << std::flush; std::cout << " Box_size_pix = " << box_size_pix << ", Dimensions = " << Xdim << " * " << Ydim << " * " << Zdim << " ." << std::flush; std::cout << " Please choose a smaller box size OR reconstruct the 3D tomogram with a larger number of Z slices!" << std::endl; continue; } #endif // Add this segment to the list nr_segments++; MD_out.addObject(); MD_out.setValue(EMDL_IMAGE_COORD_X, xb); MD_out.setValue(EMDL_IMAGE_COORD_Y, yb); MD_out.setValue(EMDL_IMAGE_COORD_Z, zb); MD_out.setValue(EMDL_PARTICLE_HELICAL_TUBE_ID, helical_tube_id); XX(dr) = xb - xa; YY(dr) = yb - ya; ZZ(dr) = zb - za; estimateTiltPsiPriors(dr, tilt_deg, psi_deg); if (fabs(tilt_deg) < 0.001) tilt_deg = 0.; if (fabs(psi_deg) < 0.001) psi_deg = 0.; MD_out.setValue(EMDL_ORIENT_TILT_PRIOR, tilt_deg); MD_out.setValue(EMDL_ORIENT_PSI_PRIOR, psi_deg); MD_out.setValue(EMDL_PARTICLE_HELICAL_TRACK_LENGTH_ANGSTROM, accu_len_pix * pixel_size_A); MD_out.setValue(EMDL_ORIENT_PSI_PRIOR_FLIP_RATIO, psi_prior_flip_ratio); } } } total_segments = nr_segments; } void Interpolate3DCurves( FileName& fn_in_root, FileName& fn_out_root, int nr_asu, RFLOAT rise_A, RFLOAT pixel_size_A, RFLOAT box_size_pix, int binning_factor, bool bimodal_angular_priors) { Image img; std::vector fn_in_list; FileName fn_tomo, fn_in_glob, fn_in, fn_out; std::ifstream fin; std::string line; std::vector words; std::vector xlist, ylist, zlist; MetaDataTable MD_in, MD_out, MD_all; RFLOAT x0, y0, z0, val; int nr_points = 0, total_segments = 0, nr_segments = 0; int xdim = 0, ydim = 0, zdim = 0, xdim_img = 0, ydim_img = 0, zdim_img = 0; long int ndim = 0, ndim_img = 0; char buf_word[4], buf_qword[16], tmp_char; bool contain_3d_points = false, flip_YZ = false; // General parameter checks if (binning_factor < 1) REPORT_ERROR("helix.cpp::Interpolate3DCurves(): Binning factor should be larger than 1!"); fn_tomo = fn_in_root + ".mrc"; if (!exists(fn_tomo)) REPORT_ERROR("helix.cpp::Interpolate3DCurves(): Reconstructed 3D tomogram " + fn_tomo + " is not found!"); // Read the header of 3D reconstructed tomogram img.clear(); img.read(fn_tomo, false); img.getDimensions(xdim_img, ydim_img, zdim_img, ndim_img); if (zdim_img <= 1) REPORT_ERROR("helix.cpp::Interpolate3DCurves(): Dimension Z of reconstructed 3D tomogram " + fn_tomo + " is 1!"); if (ndim_img != 1) REPORT_ERROR("helix.cpp::Interpolate3DCurves(): Dimension N of reconstructed 3D tomogram " + fn_tomo + " is not 1!"); // Glob all files if (fn_in_root.length() <= 1) REPORT_ERROR("helix.cpp::Interpolate3DCurves(): Input rootname is an empty string!"); //std::cout << " fn_in_root = " << fn_in_root << std::endl; // DEBUG //fn_in_glob = "*" + fn_in_root + "*"; // OLD fn_in_glob = fn_in_root + "*"; // NEW fn_in_glob.globFiles(fn_in_list); //std::cout << " fn_in_glob = " << fn_in_glob << std::endl; // DEBUG //std::cout << " nr_giles_globbed = " << fn_in_list.size() << std::endl; if (fn_in_list.size() < 1) REPORT_ERROR("helix.cpp::Interpolate3DCurves(): No input files are found!"); // Check input filenames std::cout << " #############################################################" << std::endl; std::cout << " Coordinate files (.mod, .star or .txt, .coords text files with XYZ coordinates) to be processed: " << std::endl; for (int fid = 0; fid < fn_in_list.size(); fid++) { fn_in = fn_in_list[fid]; std::string fn_ext = fn_in.getExtension(); // Method 1 //if ( (fn_ext == "") || (fn_ext == "log") || (fn_ext == "mrc") || (fn_ext == "mrcs") || (fn_ext == "ctf") // || (fn_ext == "st") || (fn_ext == "order") || (fn_ext == "tlt") || (fn_ext == "trial")) //{ // fn_in_list.erase(fn_in_list.begin() + fid); // fid--; // continue; //} // Method 2 if ( (fn_ext != "mod") && (fn_ext != "star") && (fn_ext != "coords") && (fn_ext != "txt") ) { fn_in_list.erase(fn_in_list.begin() + fid); fid--; continue; } if ( (fn_in.contains(fn_in_root)) && ( (fn_in.afterFirstOf(fn_in_root)).contains(fn_in_root) ) ) REPORT_ERROR("helix.cpp::Interpolate3DCurves(): Every input filename should contain one and only one input rootname! Invalid filename: " + (std::string)(fn_in)); std::cout << " " << fn_in << std::endl; } std::cout << " " << fn_in_list.size() << " files (filaments) found." << std::endl; std::cout << " Please check whether all coordinate files are included and remove files which do not contain manually picked coordinates!" << std::endl; std::cout << " #############################################################" << std::endl; std::cout << " Are all coordinate files included? Do all of the files shown above contain manually picked XYZ coordinates? (y/n): " << std::flush; line.clear(); std::cin >> line; if ( (line[0] != 'y') && (line[0] != 'Y') ) { std::cout << std::endl << " No! Exit now..." << std::endl; return; } // Check endianness - for MOD files only int num = 1; bool is_little_endian = false; if(*(char *)&num == 1) is_little_endian = true; //std::cout << is_little_endian << std::endl; // Real work begins... MD_all.clear(); total_segments = nr_segments = 0; for (int fid = 0; fid < fn_in_list.size(); fid++) { contain_3d_points = false; flip_YZ = false; xlist.clear(); ylist.clear(); zlist.clear(); // Open an input file fn_in = fn_in_list[fid]; std::cout << " ### Input filename = " << fn_in << std::endl; // MOD file format definition: http://bio3d.colorado.edu/imod/doc/binspec.html if (fn_in.getExtension() == "mod") { fin.open(fn_in.c_str(), std::ios_base::in|std::ios_base::binary); if (fin.fail()) REPORT_ERROR("helix.cpp::Interpolate3DCurves(): Cannot open input file: " + (std::string)(fn_in)); // Scheme 1 - does not work for MOD files with more than one 'OBJT's (objects) /* fin.read(reinterpret_cast(buf_qword), sizeof(buf_qword)); // Read the first line for (int id = 0; id < 14; id++) // Read model data structure (232 bytes) fin.read(reinterpret_cast(buf_qword), sizeof(buf_qword)); for (int id = 0; id < 11; id++) // Read object data structure (first 160 out of 176 bytes) fin.read(reinterpret_cast(buf_qword), sizeof(buf_qword)); // 26 lines in total omitted fin.read(reinterpret_cast(buf_word), sizeof(buf_word)); fin.read(reinterpret_cast(buf_word), sizeof(buf_word)); */ // Scheme 2 fin.read(reinterpret_cast(buf_qword), sizeof(buf_qword)); // Read the first 16 bytes fin.read(reinterpret_cast(buf_qword), sizeof(buf_qword)); // Read the second 16 bytes if ( (buf_qword[0] != 'M') || (buf_qword[1] != 'o') || (buf_qword[2] != 'd') || (buf_qword[3] != 'e') || (buf_qword[4] != 'l') ) REPORT_ERROR("helix.cpp::Interpolate3DCurves(): IMOD file header does not contain 'Model' tag!"); for (int id = 0; id < 6; id++) fin.read(reinterpret_cast(buf_qword), sizeof(buf_qword)); // Read the next 96 bytes fin.read(reinterpret_cast(buf_word), sizeof(buf_word)); fin.read(reinterpret_cast(buf_word), sizeof(buf_word)); // Read the next 8 bytes // Name of model ends fin.read(reinterpret_cast(buf_word), sizeof(buf_word)); // Xdim if (is_little_endian) { SWAP(buf_word[0], buf_word[3], tmp_char); SWAP(buf_word[1], buf_word[2], tmp_char); } xdim = *(reinterpret_cast(buf_word)); fin.read(reinterpret_cast(buf_word), sizeof(buf_word)); // Ydim if (is_little_endian) { SWAP(buf_word[0], buf_word[3], tmp_char); SWAP(buf_word[1], buf_word[2], tmp_char); } ydim = *(reinterpret_cast(buf_word)); fin.read(reinterpret_cast(buf_word), sizeof(buf_word)); // Zdim if (is_little_endian) { SWAP(buf_word[0], buf_word[3], tmp_char); SWAP(buf_word[1], buf_word[2], tmp_char); } zdim = *(reinterpret_cast(buf_word)); std::cout << " Binning factor = " << binning_factor << std::endl; std::cout << " Dimensions XYZ (binned, unflipped coords) = " << xdim << " * " << ydim << " * " << zdim << std::endl; std::cout << " Dimensions XYZ (unbinned, unflipped coords) = " << xdim * binning_factor << " * " << ydim * binning_factor << " * " << zdim * binning_factor << std::endl; std::cout << " Dimensions XYZ (3D tomogram .mrc) = " << xdim_img << " * " << ydim_img << " * " << zdim_img << std::endl; fin.read(reinterpret_cast(buf_word), sizeof(buf_word)); // Number of objects fin.read(reinterpret_cast(buf_word), sizeof(buf_word)); // Flags if (is_little_endian) { SWAP(buf_word[0], buf_word[3], tmp_char); SWAP(buf_word[1], buf_word[2], tmp_char); } if ((*(reinterpret_cast(buf_word))) & 0x00010000) // Check flag #16 - flip YZ? flip_YZ = true; //std::cout << (*(reinterpret_cast(buf_word))) << std::endl; std::cout << " Model last viewed on Y/Z flipped or rotated image? = " << std::flush; if (flip_YZ) std::cout << "TRUE" << std::endl; else std::cout << "FALSE" << std::endl; contain_3d_points = false; while (fin.read(reinterpret_cast(buf_word), sizeof(buf_word))) // Read 4-byte blocks { if ( (buf_word[0] == 'C') && (buf_word[1] == 'O') && (buf_word[2] == 'N') && (buf_word[3] == 'T') ) // Find contour section { contain_3d_points = true; break; } } if (!contain_3d_points) REPORT_ERROR("helix.cpp::Interpolate3DCurves(): IMOD file does not seem to contain manually picked 3D coordiantes!"); fin.read(reinterpret_cast(buf_word), sizeof(buf_word)); // Number of 3D points (meshes) if (is_little_endian) { SWAP(buf_word[0], buf_word[3], tmp_char); SWAP(buf_word[1], buf_word[2], tmp_char); } nr_points = *(reinterpret_cast(buf_word)); //std::cout << nr_points << std::endl; fin.read(reinterpret_cast(buf_word), sizeof(buf_word)); if (nr_points <= 2) REPORT_ERROR("helix.cpp::Interpolate3DCurves(): Input coordinate file: " + (std::string)(fn_in) + " should contain at least 2 points!"); fin.read(reinterpret_cast(buf_word), sizeof(buf_word)); fin.read(reinterpret_cast(buf_word), sizeof(buf_word)); std::cout << " Original XYZ coordinates (unbinned):" << std::endl; for (int id = 0; id < nr_points; id++) { fin.read(reinterpret_cast(buf_word), sizeof(buf_word)); if (is_little_endian) { SWAP(buf_word[0], buf_word[3], tmp_char); SWAP(buf_word[1], buf_word[2], tmp_char); } val = ((RFLOAT)(binning_factor)) * (*(reinterpret_cast(buf_word))); xlist.push_back(val); fin.read(reinterpret_cast(buf_word), sizeof(buf_word)); if (is_little_endian) { SWAP(buf_word[0], buf_word[3], tmp_char); SWAP(buf_word[1], buf_word[2], tmp_char); } val = ((RFLOAT)(binning_factor)) * (*(reinterpret_cast(buf_word))); if (flip_YZ) zlist.push_back(val); else ylist.push_back(val); fin.read(reinterpret_cast(buf_word), sizeof(buf_word)); if (is_little_endian) { SWAP(buf_word[0], buf_word[3], tmp_char); SWAP(buf_word[1], buf_word[2], tmp_char); } val = ((RFLOAT)(binning_factor)) * (*(reinterpret_cast(buf_word))); if (flip_YZ) ylist.push_back(val); else zlist.push_back(val); // OLD //std::cout << " " << xlist[xlist.size() - 1] << " " << ylist[ylist.size() - 1] << " " << zlist[zlist.size() - 1] << std::endl; // NEW std::cout << std::setw(15) << std::fixed << xlist[xlist.size() - 1]; std::cout << std::setw(15) << std::fixed << ylist[ylist.size() - 1]; std::cout << std::setw(15) << std::fixed << zlist[zlist.size() - 1]; std::cout << std::endl; } //std::cout << xlist.size() << std::endl; } else if (fn_in.getExtension() == "star") { MetaDataTable MD; RFLOAT xx = 0., yy = 0., zz = 0.; MD.clear(); MD.read(fn_in); if ( (!MD.containsLabel(EMDL_IMAGE_COORD_X)) || (!MD.containsLabel(EMDL_IMAGE_COORD_Y)) || (!MD.containsLabel(EMDL_IMAGE_COORD_Z)) ) REPORT_ERROR("helix.cpp::Interpolate3DCurves(): Input coordinate STAR file " + (std::string)(fn_in) + " should contain _rlnCoordinateX Y and Z!"); FOR_ALL_OBJECTS_IN_METADATA_TABLE(MD) { MD.getValue(EMDL_IMAGE_COORD_X, xx); MD.getValue(EMDL_IMAGE_COORD_Y, yy); MD.getValue(EMDL_IMAGE_COORD_Z, zz); xlist.push_back(xx); ylist.push_back(yy); zlist.push_back(zz); } } else { fin.open(fn_in.c_str(), std::ios_base::in); if (fin.fail()) REPORT_ERROR("helix.cpp::Interpolate3DCurves(): Cannot open coordinate file: " + (std::string)(fn_in)); // Read x, y, z coordinates into vectors and close the input file while (getline(fin, line, '\n')) { words.clear(); tokenize(line, words); if (words.size() == 0) // Empty line. continue; if (words.size() != 3) REPORT_ERROR("helix.cpp::Interpolate3DCurves(): Invalid input coordinate file " + fn_in); xlist.push_back(textToFloat(words[0])); ylist.push_back(textToFloat(words[1])); zlist.push_back(textToFloat(words[2])); // Screen output std::cout << " Original XYZ coordinates: " << textToFloat(words[0]) << ", " << textToFloat(words[1]) << ", " << textToFloat(words[2]) << std::endl; } } fin.close(); if (xlist.size() < 2) REPORT_ERROR("helix.cpp::Interpolate3DCurves(): Input coordinate file: " + (std::string)(fn_in) + " should contain at least 2 points!"); // Load control points for 3D interpolations MD_in.clear(); MD_in.addLabel(EMDL_IMAGE_COORD_X); MD_in.addLabel(EMDL_IMAGE_COORD_Y); MD_in.addLabel(EMDL_IMAGE_COORD_Z); // Mode 1 - Just use the manually picked points as control points //for (int id = 0; id < xlist.size(); id++) //{ // MD_in.addObject(); // MD_in.setValue(EMDL_IMAGE_COORD_X, xlist[id]); // MD_in.setValue(EMDL_IMAGE_COORD_Y, ylist[id]); // MD_in.setValue(EMDL_IMAGE_COORD_Z, zlist[id]); //} // Mode 2 - Manually picked points are zigzag. Choose middle points of short line segments as control points. // Generate smooth curves // However, tilt angles of the segments around start- and end- points of the filaments deviate more from ~90 degrees. for (int id = 0; id < xlist.size(); id++) { if (id == 0) // Start point { MD_in.addObject(); MD_in.setValue(EMDL_IMAGE_COORD_X, xlist[0]); MD_in.setValue(EMDL_IMAGE_COORD_Y, ylist[0]); MD_in.setValue(EMDL_IMAGE_COORD_Z, zlist[0]); continue; } // Middle points of each short line segment MD_in.addObject(); MD_in.setValue(EMDL_IMAGE_COORD_X, (xlist[id] + xlist[id - 1]) / 2.); MD_in.setValue(EMDL_IMAGE_COORD_Y, (ylist[id] + ylist[id - 1]) / 2.); MD_in.setValue(EMDL_IMAGE_COORD_Z, (zlist[id] + zlist[id - 1]) / 2.); if (id == (xlist.size() - 1)) // End point { MD_in.addObject(); MD_in.setValue(EMDL_IMAGE_COORD_X, xlist[id]); MD_in.setValue(EMDL_IMAGE_COORD_Y, ylist[id]); MD_in.setValue(EMDL_IMAGE_COORD_Z, zlist[id]); break; } } //std::cout << MD_in.numberOfObjects() << std::endl; // DEBUG //fn_out = fn_in.beforeFirstOf(fn_in_root) + fn_out_root + ".star"; // DEBUG //MD_in.write(fn_out); // DEBUG // Interpolate HermiteInterpolateOne3DHelicalFilament( MD_in, MD_out, nr_segments, nr_asu, rise_A, pixel_size_A, box_size_pix, fid + 1, xdim_img, ydim_img, zdim_img, bimodal_angular_priors); // Output if (MD_out.numberOfObjects() < 1) std::cout << " WARNING: No sub-tomograms have been interpolated on this helical filament!" << std::endl; else MD_all.append(MD_out); total_segments += nr_segments; std::cout << " Interpolated " << nr_segments << " helical segments from 3D point set " << fn_in << std::endl; std::cout << " ========================================================= " << std::endl; } //fn_out = fn_in.beforeFirstOf(fn_in_root) + fn_out_root + ".star"; // OLD fn_out = fn_in_root + fn_out_root + ".star"; // NEW // DEBUG //std::cout << " fn_in = " << fn_in << std::endl; //std::cout << " fn_in_root = " << fn_in_root << std::endl; //std::cout << " fn_out_root = " << fn_out_root << std::endl; //std::cout << " fn_out = " << fn_out << std::endl; if (MD_all.numberOfObjects() < 1) std::cout << " ### Done! WARNING: No sub-tomograms have been interpolated! Please check whether you have done everything correctly." << std::endl; else { MD_all.write(fn_out); std::cout << " ### Done! Interpolated " << total_segments << " helical segments on " << fn_in_list.size() << " filaments. Output file: " << fn_out << std::endl; } } void estimateTiltPsiPriors( Matrix1D& dr, RFLOAT& tilt_deg, RFLOAT& psi_deg) { // euler.cpp: Euler_direction2angles: input angles = (a, b, g) then 3x3 matrix = // cosg*cosb*cosa - sing*sina, cosg*cosb*sina + sing*cosa, -cosg*sinb, // -sing*cosb*cosa - cosg*sina, -sing*cosb*sina + cosg*cosa, sing*sinb, // sinb*cosa, sinb*sina, cosb. // euler.cpp: Euler_direction2angles: input angles = (0, b, g) then 3x3 matrix = // cosg*cosb, sing, -cosg*sinb, // -sing*cosb, cosg, sing*sinb, // sinb, 0, cosb. RFLOAT tilt_rad = 0., psi_rad = 0., vec_len = 0.; int dim = dr.size(); if ( (dim != 2) && (dim != 3) ) REPORT_ERROR("helix.cpp::estimateTiltPsiPriors(): Input Matrix1D should have a size of 2 or 3!"); vec_len = XX(dr) * XX(dr) + YY(dr) * YY(dr); vec_len += (dim == 3) ? (ZZ(dr) * ZZ(dr)) : (0.); vec_len = sqrt(vec_len); if (vec_len < 0.0001) REPORT_ERROR("helix.cpp::estimateTiltPsiPriors(): Vector length is smaller than 0.0001!"); // A * (0, 0, z) = (x', y', z') // x' = -z*cosg*sinb // y' = z*sing*sinb // z' = z*cosb // cosb = z' / z // tang = y' / (-x') if (dim == 3) { // Tilt (b) should be [0, +180] degrees. Psi (g) should be [-180, +180] degrees tilt_rad = acos(ZZ(dr) / vec_len); // 'acos' returns an angle within [0, +pi] radians for tilt psi_rad = atan2(YY(dr), (-1.) * XX(dr)); // 'atan2' returns an angle within [-pi, +pi] radians for rot } else psi_rad = (-1.) * atan2(YY(dr), XX(dr)); if (dim == 3) tilt_deg = RAD2DEG(tilt_rad); psi_deg = RAD2DEG(psi_rad); } void readFileHeader( FileName& fn_in, FileName& fn_out, int nr_bytes) { std::ifstream fin; std::ofstream fout; int nr_blocks = 0, curr_block = 0; char data[100]; if (nr_bytes > 10 * 1024 * 1024) REPORT_ERROR("helix.cpp::readFileHeader(): Don't copy more than 10MB data!"); fin.open(fn_in.c_str(), std::ios_base::in|std::ios_base::binary); if (fin.fail()) REPORT_ERROR("helix.cpp::readFileHeader(): Cannot open input file: " + (std::string)(fn_in)); fout.open(fn_out.c_str(), std::ios_base::out|std::ios_base::binary); if (fout.fail()) REPORT_ERROR("helix.cpp::readFileHeader(): Cannot open output file: " + (std::string)(fn_out)); nr_blocks = nr_bytes / 100; nr_blocks = (nr_blocks < 1) ? (1) : (nr_blocks); std::cout << " Copying the first " << nr_blocks * 100 << " bytes from " << fn_in << " to " << fn_out << " ..." << std::endl; curr_block = 0; while (fin.read(reinterpret_cast(data), sizeof(data))) { curr_block++; fout.write(reinterpret_cast(data), sizeof(data)); if (curr_block >= nr_blocks) break; } fin.close(); fout.close(); } void select3DsubtomoFrom2Dproj( MetaDataTable& MD_2d, MetaDataTable& MD_3d, MetaDataTable& MD_out) { //std::vector xlist, ylist, zlist, idlist; std::vector mic_list, img_list; int id = 0; //RFLOAT xx = 0., yy = 0., zz = 0.; FileName mic_str, img_str; const size_t id_length = 6; // 6-digit ID bool auto_pixel_size = false; RFLOAT Dpix = -1., Mag = -1., _Dpix = -1., _Mag = -1.; MD_out.clear(); if (MD_2d.numberOfObjects() < 1) REPORT_ERROR("helix.cpp::select3DsubtomoFrom2Dproj(): MetaDataTable 2D projections is empty!"); if ( (!MD_2d.containsLabel(EMDL_IMAGE_COORD_X)) || (!MD_2d.containsLabel(EMDL_IMAGE_COORD_Y)) || (!MD_2d.containsLabel(EMDL_IMAGE_COORD_Z)) || (!MD_2d.containsLabel(EMDL_MICROGRAPH_NAME)) || (!MD_2d.containsLabel(EMDL_IMAGE_NAME)) ) REPORT_ERROR("helix.cpp::select3DsubtomoFrom2Dproj(): MetaDataTable 2D projections should contain labels _rlnCoordinateXYZ, _rlnMicrographName and _rlnImageName!"); // For particle rescaling - Does MD_2d contain Dpix and Magnification? auto_pixel_size = (MD_2d.containsLabel(EMDL_CTF_DETECTOR_PIXEL_SIZE)) && (MD_2d.containsLabel(EMDL_CTF_MAGNIFICATION)); if (MD_3d.numberOfObjects() < 1) REPORT_ERROR("helix.cpp::select3DsubtomoFrom2Dproj(): MetaDataTable 3D subtomograms is empty!"); if ( (!MD_3d.containsLabel(EMDL_IMAGE_COORD_X)) || (!MD_3d.containsLabel(EMDL_IMAGE_COORD_Y)) || (!MD_3d.containsLabel(EMDL_IMAGE_COORD_Z)) || (!MD_3d.containsLabel(EMDL_MICROGRAPH_NAME)) || (!MD_3d.containsLabel(EMDL_IMAGE_NAME)) ) REPORT_ERROR("helix.cpp::select3DsubtomoFrom2Dproj(): MetaDataTable 3D subtomograms should contain labels _rlnCoordinateXYZ, _rlnMicrographName and _rlnImageName!"); if (auto_pixel_size) { if ( (!MD_3d.containsLabel(EMDL_CTF_DETECTOR_PIXEL_SIZE)) || (!MD_3d.containsLabel(EMDL_CTF_MAGNIFICATION)) ) REPORT_ERROR("helix.cpp::select3DsubtomoFrom2Dproj(): If MetaDataTable 2D projections contains Dpix and Magnification, then MetaDataTable 3D subtomograms should also do!"); // Firstly, check whether the pixel sizes of segments in MD_3d are the same Dpix = -1., Mag = -1., _Dpix = -1., _Mag = -1.; FOR_ALL_OBJECTS_IN_METADATA_TABLE(MD_3d) { MD_3d.getValue(EMDL_CTF_DETECTOR_PIXEL_SIZE, _Dpix); MD_3d.getValue(EMDL_CTF_MAGNIFICATION, _Mag); if ( (!(_Dpix > 0.)) || (!(_Mag > 0.)) ) REPORT_ERROR("helix.cpp::select3DsubtomoFrom2Dproj(): Please ensure that all entries in MetaDataTable 3D subtomograms have valid Dpix and Magnification!"); if ( (Dpix < 0.) && (Mag < 0.) ) { Dpix = _Dpix; Mag = _Mag; continue; } if ( (fabs(_Dpix - Dpix) > 0.001) || (fabs(_Mag - Mag) > 0.001) ) REPORT_ERROR("helix.cpp::select3DsubtomoFrom2Dproj(): Please ensure that all entries in MetaDataTable 3D subtomograms have the same pixel size!"); } std::cout << " Dpix and Magnification of 3D sub-tomograms: " << Dpix << " and " << Mag << std::endl; // Then, check whether the pixel sizes of segments in MD_2d are the same. Store Dpix and Mag. Dpix = -1., Mag = -1., _Dpix = -1., _Mag = -1.; FOR_ALL_OBJECTS_IN_METADATA_TABLE(MD_2d) { MD_2d.getValue(EMDL_CTF_DETECTOR_PIXEL_SIZE, _Dpix); MD_2d.getValue(EMDL_CTF_MAGNIFICATION, _Mag); if ( (!(_Dpix > 0.)) || (!(_Mag > 0.)) ) REPORT_ERROR("helix.cpp::select3DsubtomoFrom2Dproj(): Please ensure that all entries in MetaDataTable 2D projections have valid Dpix and Magnification!"); if ( (Dpix < 0.) && (Mag < 0.) ) { Dpix = _Dpix; Mag = _Mag; continue; } if ( (fabs(_Dpix - Dpix) > 0.001) || (fabs(_Mag - Mag) > 0.001) ) REPORT_ERROR("helix.cpp::select3DsubtomoFrom2Dproj(): Please ensure that all entries in MetaDataTable 2D projections have the same pixel size!"); } std::cout << " Dpix and Magnification of 2D projections: " << Dpix << " and " << Mag << std::endl; std::cout << " Reset Dpix and Magnification of selected 3D sub-tomograms..." << std::endl; // Dpix and Mag have been stored. } // Gather all the selected subtomograms //xlist.clear(); ylist.clear(); zlist.clear(); idlist.clear(); mic_list.clear(); FOR_ALL_OBJECTS_IN_METADATA_TABLE(MD_2d) { //MD_2d.getValue(EMDL_IMAGE_COORD_X, xx); //MD_2d.getValue(EMDL_IMAGE_COORD_Y, yy); //MD_2d.getValue(EMDL_IMAGE_COORD_Z, zz); MD_2d.getValue(EMDL_MICROGRAPH_NAME, mic_str); MD_2d.getValue(EMDL_IMAGE_NAME, img_str); //id = textToInteger(img_str.beforeFirstOf("@")); // 6-digit ID //xlist.push_back(xx); ylist.push_back(yy); zlist.push_back(zz); idlist.push_back(id); mic_list.push_back(mic_str + img_str.beforeFirstOf("@")); } std::sort(mic_list.begin(), mic_list.end()); // Scan through all the subtomograms MD_out.clear(); FOR_ALL_OBJECTS_IN_METADATA_TABLE(MD_3d) { //MD_3d.getValue(EMDL_IMAGE_COORD_X, xx); //MD_3d.getValue(EMDL_IMAGE_COORD_Y, yy); //MD_3d.getValue(EMDL_IMAGE_COORD_Z, zz); MD_3d.getValue(EMDL_MICROGRAPH_NAME, mic_str); MD_3d.getValue(EMDL_IMAGE_NAME, img_str); img_str = img_str.withoutExtension(); if (img_str.length() < id_length) REPORT_ERROR("helix.cpp::select3DsubtomoFrom2Dproj(): img_str.length() < " + integerToString(id_length) + " ! img_str = " + (std::string)(img_str)); img_str = img_str.substr(img_str.length() - id_length, id_length); //std::cout << " " << img_str << std::flush; img_str = mic_str + img_str; if (std::binary_search(mic_list.begin(), mic_list.end(), img_str)) // Subtomogram selected { MD_out.addObject(MD_3d.getObject()); // For particle rescaling - reset pixel size if (auto_pixel_size) { MD_out.setValue(EMDL_CTF_DETECTOR_PIXEL_SIZE, Dpix); MD_out.setValue(EMDL_CTF_MAGNIFICATION, Mag); } } } //std::cout << std::endl; } void averageAsymmetricUnits2D( ObservationModel& obsModel, MetaDataTable &MDimgs, FileName fn_o_root, int nr_asu, RFLOAT rise) { if (nr_asu == 1) { std::cout << " WARNING: averageAsymmetricUnits2D nr_asu=1, so not doing anything ..."; return; } int nr_asu_half = nr_asu / 2; RFLOAT angpix; FourierTransformer transformer; MultidimArray Fimg, Faux, Fsum; long int imgno = 0; init_progress_bar(MDimgs.numberOfObjects()); FOR_ALL_OBJECTS_IN_METADATA_TABLE(MDimgs) { FileName fn_img; Matrix1D in(2), out(2); Image img; RFLOAT psi, angpix; int optics_group; MDimgs.getValue(EMDL_IMAGE_NAME, fn_img); MDimgs.getValue(EMDL_ORIENT_PSI, psi); MDimgs.getValue(EMDL_IMAGE_OPTICS_GROUP, optics_group); optics_group--; angpix = obsModel.getPixelSize(optics_group); img.read(fn_img); transformer.FourierTransform(img(), Fimg, false); Fsum = Fimg; // original image //std::cerr << " imgno= " << imgno << " fn_img= " << fn_img << " psi= " << psi << " rise= " << rise << " angpix= " << angpix << " nr_asu= " << nr_asu << " xsize= " << XSIZE(img()) << std::endl; for (int i = 2; i <= nr_asu; i++) { // one way if (i%2 == 0) { XX(in) = rise * (i/2) / angpix; } // the other way else { XX(in) = -1. * rise * (i/2) / angpix; } YY(in) = 0.; transformCartesianAndHelicalCoords(in, out, 0., 0., psi, HELICAL_TO_CART_COORDS); //std::cerr << " i= " << i << " XX(in)= " << XX(in) << " YY(in)= " << YY(in) << " XX(out)= " << XX(out) << " YY(out)= " << YY(out) << std::endl; shiftImageInFourierTransform(Fimg, Faux, XSIZE(img()), XX(out), YY(out)); Fsum += Faux; } FOR_ALL_DIRECT_ELEMENTS_IN_MULTIDIMARRAY(Fimg) { DIRECT_MULTIDIM_ELEM(Fimg, n) = DIRECT_MULTIDIM_ELEM(Fsum, n) / (RFLOAT)nr_asu; } transformer.inverseFourierTransform(); // Write this particle to the stack on disc // First particle: write stack in overwrite mode, from then on just append to it MDimgs.setValue(EMDL_IMAGE_ORI_NAME, fn_img); fn_img.compose(imgno+1, fn_o_root + "particles.mrcs"); if (imgno == 0) img.write(fn_img, -1, false, WRITE_OVERWRITE); else img.write(fn_img, -1, false, WRITE_APPEND); MDimgs.setValue(EMDL_IMAGE_NAME, fn_img); if (imgno%60==0) progress_bar(imgno); imgno++; } progress_bar(MDimgs.numberOfObjects()); } relion-3.1.3/src/helix.h000066400000000000000000000330021411340063500150520ustar00rootroot00000000000000/*************************************************************************** * * Author: "Shaoda He" * MRC Laboratory of Molecular Biology * * Kent Thurber from the NIH provided code for rot-angle priors (indicated with // KThurber comments) * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #ifndef HELIX_H_ #define HELIX_H_ #include "src/multidim_array.h" #include "src/macros.h" #include "src/complex.h" #include "src/fftw.h" #include "src/image.h" #include "src/transformations.h" #include "src/euler.h" #include "src/assembly.h" #include "src/jaz/obs_model.h" #include "src/time.h" #include #define CART_TO_HELICAL_COORDS true #define HELICAL_TO_CART_COORDS false #define RELION_STAR_FORMAT 0 #define XIMDISP_COORDS_FORMAT 1 #define EMAN2_FORMAT 2 #define UNIMODAL_PSI_PRIOR_FLIP_RATIO (0.) #define BIMODAL_PSI_PRIOR_FLIP_RATIO (0.5) class HelicalSymmetryItem { public: RFLOAT twist_deg; RFLOAT rise_pix; RFLOAT dev; HelicalSymmetryItem() { twist_deg = rise_pix = -1.; dev = (1e35); } HelicalSymmetryItem(RFLOAT _twist_deg, RFLOAT _rise_pix, RFLOAT _dev = (1e35)) { twist_deg = _twist_deg; rise_pix = _rise_pix; dev = _dev; } }; void makeHelicalSymmetryList( std::vector& list, RFLOAT rise_min_pix, RFLOAT rise_max_pix, RFLOAT rise_step_pix, bool search_rise, RFLOAT twist_min_deg, RFLOAT twist_max_deg, RFLOAT twist_step_deg, bool search_twist); bool calcCCofHelicalSymmetry( const MultidimArray& v, RFLOAT r_min_pix, RFLOAT r_max_pix, RFLOAT z_percentage, RFLOAT rise_pix, RFLOAT twist_deg, RFLOAT& cc, int& nr_asym_voxels); bool localSearchHelicalSymmetry( const MultidimArray& v, RFLOAT pixel_size_A, RFLOAT sphere_radius_A, RFLOAT cyl_inner_radius_A, RFLOAT cyl_outer_radius_A, RFLOAT z_percentage, RFLOAT rise_min_A, RFLOAT rise_max_A, RFLOAT rise_inistep_A, RFLOAT& rise_refined_A, RFLOAT twist_min_deg, RFLOAT twist_max_deg, RFLOAT twist_inistep_deg, RFLOAT& twist_refined_deg, std::ostream* o_ptr = NULL); RFLOAT getHelicalSigma2Rot( RFLOAT helical_rise_Angst, RFLOAT helical_twist_deg, RFLOAT helical_offset_step_Angst, RFLOAT rot_step_deg, RFLOAT old_sigma2_rot); bool checkParametersFor3DHelicalReconstruction( bool ignore_symmetry, bool do_symmetry_local_refinement, int nr_asu, RFLOAT rise_initial_A, RFLOAT rise_min_A, RFLOAT rise_max_A, RFLOAT twist_initial_deg, RFLOAT twist_min_deg, RFLOAT twist_max_deg, int box_len, RFLOAT pixel_size_A, RFLOAT z_percentage, RFLOAT particle_diameter_A, RFLOAT tube_inner_diameter_A, RFLOAT tube_outer_diameter_A, bool verboseOutput = false); void imposeHelicalSymmetryInRealSpace( MultidimArray& v, RFLOAT pixel_size_A, RFLOAT sphere_radius_A, RFLOAT cyl_inner_radius_A, RFLOAT cyl_outer_radius_A, RFLOAT z_percentage, RFLOAT rise_A, RFLOAT twist_deg, RFLOAT cosine_width_pix); // Some functions only for specific testing void calcRadialAverage( const MultidimArray& v, std::vector& radial_avg_val_list); void cutZCentralPartOfSoftMask( MultidimArray& mask, RFLOAT z_percentage, RFLOAT cosine_width = 5.); void createCylindricalReference( MultidimArray& v, int box_size, RFLOAT inner_diameter_pix, RFLOAT outer_diameter_pix, RFLOAT cosine_width = 5.); void createCylindricalReferenceWithPolarity( MultidimArray& v, int box_size, RFLOAT inner_diameter_pix, RFLOAT outer_diameter_pix, RFLOAT ratio_topbottom = 0.5, RFLOAT cosine_width = 5.); void transformCartesianAndHelicalCoords( Matrix1D& in, Matrix1D& out, RFLOAT rot_deg, RFLOAT tilt_deg, RFLOAT psi_deg, bool direction); void transformCartesianAndHelicalCoords( RFLOAT xin, RFLOAT yin, RFLOAT zin, RFLOAT& xout, RFLOAT& yout, RFLOAT& zout, RFLOAT rot_deg, RFLOAT tilt_deg, RFLOAT psi_deg, int dim, bool direction); // C1 Helix // If radius_A < 0, detect the radius of original assembly and calculate r as sqrt(x^2 + y^2) void makeSimpleHelixFromPDBParticle( const Assembly& ori, Assembly& helix, RFLOAT radius_A, RFLOAT twist_deg, RFLOAT rise_A, int nr_copy, bool do_center = false); void applySoftSphericalMask( MultidimArray& v, RFLOAT sphere_diameter = -1., RFLOAT cosine_width = 5.); void extractHelicalSegmentsFromTubes_Multiple( FileName& suffix_in, FileName& suffix_out, int format_tag, int nr_asu, RFLOAT rise_A, RFLOAT pixel_size_A, RFLOAT Xdim, RFLOAT Ydim, RFLOAT box_size_pix, bool bimodal_angular_priors = true, bool cut_into_segments = true); void convertHelicalTubeCoordsToMetaDataTable( FileName& fn_in, MetaDataTable& MD_out, int& total_segments, int& total_tubes, int nr_asu, RFLOAT rise_A, RFLOAT pixel_size_A, RFLOAT Xdim, RFLOAT Ydim, RFLOAT box_size_pix, bool bimodal_angular_priors = true, bool cut_into_segments = true); void combineParticlePriorsWithKaiLocalCTF( FileName& fn_priors, FileName& fn_local_ctf, FileName& fn_combined); // Files of priors: mic1_priors.star, files of local CTF: mic1_local.star // Then suffix_priors = _priors.star, suffix_local_ctf = _local.star void combineParticlePriorsWithKaiLocalCTF_Multiple( std::string& suffix_priors, std::string& suffix_local_ctf, std::string& suffix_combined); void setNullTiltPriorsInDataStar( FileName& fn_in, FileName& fn_out); void removeBadTiltHelicalSegmentsFromDataStar( FileName& fn_in, FileName& fn_out, RFLOAT max_dev_deg = 15.); void removeBadPsiHelicalSegmentsFromDataStar( FileName& fn_in, FileName& fn_out, RFLOAT max_dev_deg = 15.); void convertHelicalSegmentCoordsToStarFile_Multiple( FileName& suffix_coords, FileName& suffix_out, int format_tag, RFLOAT pixel_size_A, RFLOAT Xdim, RFLOAT Ydim, RFLOAT boxsize, bool bimodal_angular_priors = true); void convertHelicalSegmentCoordsToMetaDataTable( FileName& fn_in, MetaDataTable& MD_out, int& total_segments, bool is_3D, RFLOAT Xdim, RFLOAT Ydim, RFLOAT Zdim, RFLOAT box_size_pix, bool bimodal_angular_priors = true); void convertXimdispHelicalSegmentCoordsToMetaDataTable( FileName& fn_in, MetaDataTable& MD_out, int& total_segments, int& total_tubes, RFLOAT pixel_size_A, RFLOAT Xdim, RFLOAT Ydim, RFLOAT box_size_pix, bool bimodal_angular_priors = true); void convertXimdispHelicalTubeCoordsToMetaDataTable( FileName& fn_in, MetaDataTable& MD_out, int& total_segments, int& total_tubes, int nr_asu, RFLOAT rise_A, RFLOAT pixel_size_A, RFLOAT Xdim, RFLOAT Ydim, RFLOAT box_size_pix, bool bimodal_angular_priors = true, bool cut_into_segments = true); void convertEmanHelicalSegmentCoordsToMetaDataTable( FileName& fn_in, MetaDataTable& MD_out, int& total_segments, int& total_tubes, RFLOAT pixel_size_A, RFLOAT Xdim, RFLOAT Ydim, RFLOAT box_size_pix, bool bimodal_angular_priors = true); void convertEmanHelicalTubeCoordsToMetaDataTable( FileName& fn_in, MetaDataTable& MD_out, int& total_segments, int& total_tubes, int nr_asu, RFLOAT rise_A, RFLOAT pixel_size_A, RFLOAT Xdim, RFLOAT Ydim, RFLOAT box_size_pix, bool bimodal_angular_priors = true, bool cut_into_segments = true); void makeHelicalReference2D( MultidimArray& out, int box_size, RFLOAT particle_diameter_A, RFLOAT tube_diameter_A, RFLOAT pixel_size_A, bool is_tube_white = true); void makeHelicalReference3DWithPolarity( MultidimArray& out, int box_size, RFLOAT pixel_size_A, RFLOAT twist_deg, RFLOAT rise_A, RFLOAT tube_diameter_A, RFLOAT particle_diameter_A, RFLOAT cyl_diameter_A, RFLOAT topbottom_ratio, int sym_Cn = 1, int nr_filaments_helix_with_seam = -1); void divideStarFile( FileName& fn_in, int nr); void mergeStarFiles(FileName& fn_in); void sortHelicalTubeID(MetaDataTable& MD); void simulateHelicalSegments( bool is_3d_tomo, FileName& fn_vol_in, FileName& fn_star_out, RFLOAT white_noise, int new_box_size, int nr_subunits, int nr_asu, int nr_tubes, bool do_bimodal_searches, RFLOAT cyl_outer_diameter_A, RFLOAT angpix, RFLOAT rise_A, RFLOAT twist_deg, RFLOAT sigma_psi, RFLOAT sigma_tilt, RFLOAT sigma_offset, int random_seed = -1); void outputHelicalSymmetryStatus( int iter, RFLOAT rise_initial_A, RFLOAT rise_min_A, RFLOAT rise_max_A, RFLOAT twist_initial_deg, RFLOAT twist_min_deg, RFLOAT twist_max_deg, bool do_local_search_helical_symmetry, std::vector& rise_A, std::vector& twist_deg, RFLOAT rise_A_half1, RFLOAT rise_A_half2, RFLOAT twist_deg_half1, RFLOAT twist_deg_half2, bool do_split_random_halves, std::ostream& out); void excludeLowCTFCCMicrographs( FileName& fn_in, FileName& fn_out, RFLOAT cc_min = -1., RFLOAT EPA_lowest_res = 999999., RFLOAT df_min = -999999., RFLOAT df_max = 999999.); void cutOutPartOfHelix( const MultidimArray& vin, MultidimArray& vout, long int new_boxdim, RFLOAT ang_deg, RFLOAT z_percentage); // TESTING... class HelicalSegmentPriorInfoEntry { public: std::string helical_tube_name; long int MDobjectID; RFLOAT rot_deg, psi_deg, tilt_deg; RFLOAT dx_A, dy_A, dz_A; RFLOAT track_pos_A; bool has_wrong_polarity; int subset, classID; RFLOAT rot_prior_deg, psi_prior_deg, tilt_prior_deg; RFLOAT rot_prior_deg_ori, psi_prior_deg_ori, tilt_prior_deg_ori; RFLOAT dx_prior_A, dy_prior_A, dz_prior_A; RFLOAT psi_flip_ratio; bool psi_prior_flip; void clear(); HelicalSegmentPriorInfoEntry() { clear(); }; ~HelicalSegmentPriorInfoEntry() { clear(); }; void checkPsiPolarity(); bool operator<(const HelicalSegmentPriorInfoEntry &rhs) const; }; // KThurber add this function void flipPsiTiltForHelicalSegment( RFLOAT old_psi, RFLOAT old_tilt, RFLOAT& new_psi, RFLOAT& new_tilt); void updatePriorsForOneHelicalTube( std::vector& list, int sid, int eid, int& nr_wrong_polarity, bool &reverse_direction, RFLOAT sigma_segment_dist, std::vector helical_rise, std::vector helical_twist, bool is_3D_data, bool do_auto_refine, RFLOAT sigma2_rot, // KThurber RFLOAT sigma2_tilt, RFLOAT sigma2_psi, RFLOAT sigma2_offset, RFLOAT sigma_cutoff = 3.); void updatePriorsForHelicalReconstruction( MetaDataTable& MD, RFLOAT sigma_segment_dist, std::vector helical_rise, std::vector helical_twist, int helical_nstart, bool is_3D, bool do_auto_refine, RFLOAT sigma2_rot, RFLOAT sigma2_tilt, RFLOAT sigma2_psi, RFLOAT sigma2_offset, bool keep_tilt_prior_fixed, int verb); void updateAngularPriorsForHelicalReconstructionFromLastIter( MetaDataTable& MD, bool keep_tilt_prior_fixed); void testDataFileTransformXY(MetaDataTable& MD); void setPsiFlipRatioInStarFile(MetaDataTable& MD, RFLOAT ratio = 0.); void plotLatticePoints(MetaDataTable& MD, int x1, int y1, int x2, int y2); void grabParticleCoordinates( FileName& fn_in, FileName& fn_out); // Files of priors: mic1_priors.star, files of local CTF: mic1_local.star // Then suffix_priors = _priors.star, suffix_local_ctf = _local.star void grabParticleCoordinates_Multiple( std::string& suffix_fin, std::string& suffix_fout); void calculateRadialAvg(MultidimArray &v, RFLOAT angpix); void transformCartesianToHelicalCoordsForStarFiles( MetaDataTable& MD_in, MetaDataTable& MD_out); void normaliseHelicalSegments( FileName& fn_in, FileName& fn_out_root, RFLOAT helical_outer_diameter_A, RFLOAT pixel_size_A); // Copied online from http://paulbourke.net/miscellaneous/interpolation/ // Author: Paul Bourke, December 1999 /* Tension: 1 is high, 0 normal, -1 is low Bias: 0 is even, positive is towards first segment, negative towards the other */ // mu is the percentage between y1 and y2 RFLOAT HermiteInterpolate1D( RFLOAT y0, RFLOAT y1, RFLOAT y2, RFLOAT y3, RFLOAT mu, RFLOAT tension = 0., RFLOAT bias = 0.); void HermiteInterpolateOne3DHelicalFilament( MetaDataTable& MD_in, MetaDataTable& MD_out, int& total_segments, int nr_asu, RFLOAT rise_A, RFLOAT pixel_size_A, RFLOAT box_size_pix, int helical_tube_id, RFLOAT Xdim = 999999., RFLOAT Ydim = 999999., RFLOAT Zdim = 999999., bool bimodal_angular_priors = true); void Interpolate3DCurves( FileName& fn_in_root, FileName& fn_out_root, int nr_asu, RFLOAT rise_A, RFLOAT pixel_size_A, RFLOAT box_size_pix, int binning_factor = 1, bool bimodal_angular_priors = true); void estimateTiltPsiPriors( Matrix1D& dr, RFLOAT& tilt_deg, RFLOAT& psi_deg); void readFileHeader( FileName& fn_in, FileName& fn_out, int nr_bytes = 100); void select3DsubtomoFrom2Dproj( MetaDataTable& MD_2d, MetaDataTable& MD_3d, MetaDataTable& MD_out); void averageAsymmetricUnits2D( ObservationModel& obsModel, MetaDataTable &MDimgs, FileName fn_o_root, int nr_asu, RFLOAT rise); #endif /* HELIX_H_ */ relion-3.1.3/src/helix_inimodel2d.cpp000066400000000000000000001121131411340063500175140ustar00rootroot00000000000000/*************************************************************************** * * Author: "Sjors H.W. Scheres" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #include "helix_inimodel2d.h" //#define DEBUG void HelixAlignerModel::initialise(int nr_classes, int ydim, int xdim) { MultidimArray tmp; tmp.initZeros(ydim, xdim); tmp.setXmippOrigin(); for (int iclass = 0; iclass < nr_classes; iclass++) { Aref.push_back(tmp); Asum.push_back(tmp); Asumw.push_back(tmp); pdf.push_back(0); } tmp.initZeros(ydim,ydim); for (int iclass = 0; iclass < nr_classes; iclass++) { Arec.push_back(tmp); } } void HelixAlignerModel::initZeroSums() { for (int iclass = 0; iclass < Asum.size(); iclass++) { Asum[iclass].initZeros(); Asumw[iclass].initZeros(); pdf[iclass] = 0.; } } // Cleaning up void HelixAlignerModel::clear() { Aref.clear(); Arec.clear(); Asum.clear(); Asumw.clear(); pdf.clear(); } void HelixAligner::clear() { // TODO, clean up.. } void HelixAligner::usage() { parser.writeUsage(std::cout); } void HelixAligner::parseInitial(int argc, char **argv) { parser.setCommandLine(argc, argv); // General optimiser I/O stuff int general_section = parser.addSection("General options"); fn_out = parser.getOption("--o", "Output rootname",""); fn_imgs = parser.getOption("--i", " STAR file with the input images and orientation parameters",""); // deactivate fn_mics approach: never really worked... fn_mics = ""; /* fn_mics = parser.getOption("--mic", "OR: STAR file with the input micrographs",""); fn_coord_suffix = parser.getOption("--coord_suffix", "The suffix for the start-end coordinate files, e.g. \"_picked.star\" or \".box\"",""); fn_coord_dir = parser.getOption("--coord_dir", "The directory where the coordinate files are (default is same as micrographs)", "ASINPUT"); extract_width = textToInteger(parser.getOption("--extract_width", "Width (in pixels) of the images for the helices to be extracted ", "100")); */ int param_section = parser.addSection("Parameters"); crossover_distance = textToFloat(parser.getOption("--crossover_distance", "Distance in Angstroms between 2 cross-overs","")); nr_iter = textToInteger(parser.getOption("--iter", "Maximum number of iterations to perform", "10")); nr_classes = textToInteger(parser.getOption("--K", "Number of classes", "1")); angpix = textToFloat(parser.getOption("--angpix", "Pixel size in Angstroms (default take from STAR file)", "-1")); maxres = textToFloat(parser.getOption("--maxres", "Limit calculations to approximately this resolution in Angstroms", "-1")); max_shift_A = textToFloat(parser.getOption("--search_shift", "How many Angstroms to search translations perpendicular to helical axis?", "0")); max_rotate = textToFloat(parser.getOption("--search_angle", "How many degrees to search in-plane rotations?", "0")); step_rotate = textToFloat(parser.getOption("--step_angle", "The step size (in degrees) of the rotational searches", "1")); fn_inimodel = parser.getOption("--iniref", "An initial model to starting optimisation path", ""); symmetry = textToInteger(parser.getOption("--sym", "Order of symmetry in the 2D xy-slice?", "1")); max_smear = textToInteger(parser.getOption("--smear", "Smear out each image along X to ensure continuity", "0")); random_seed = textToInteger(parser.getOption("--random_seed", "Random seed (default is with clock)", "-1")); search_size = textToInteger(parser.getOption("--search_size", "Search this many pixels up/down of the target downscaled size to fit best crossover distance", "5")); mask_diameter = textToFloat(parser.getOption("--mask_diameter", "The diameter (A) of a mask to be aplpied to the 2D reconstruction", "-1")); nr_threads = textToInteger(parser.getOption("--j", "Number of (openMP) threads", "1")); do_only_make_3d = parser.checkOption("--only_make_3d", "Take the iniref image, and create a 3D model from that without any alignment of the input images"); verb = 1; if (parser.checkForErrors(verb)) REPORT_ERROR("Errors encountered on the command line (see above), exiting..."); } // Run multiple iterations void HelixAligner::initialise() { // Randomise the order of the particles if (random_seed == -1) random_seed = time(NULL); // Also randomize random-number-generator for perturbations on the angles init_random_generator(random_seed); if (fn_imgs!= "") { // Get the image size MetaDataTable MD; MD.read(fn_imgs); MD.firstObject(); FileName fn_img; Image img; if (MD.containsLabel(EMDL_IMAGE_NAME)) MD.getValue(EMDL_IMAGE_NAME, fn_img); else if (MD.containsLabel(EMDL_MLMODEL_REF_IMAGE)) MD.getValue(EMDL_MLMODEL_REF_IMAGE, fn_img); else REPORT_ERROR("ERROR: input STAR file does not contain rlnImageName or rlnReferenceImage!"); img.read(fn_img, false); // only read the header int xdim=XSIZE(img()); int ydim = YSIZE(img()); ori_size = xdim; if (XSIZE(img()) != YSIZE(img()) || ZSIZE(img()) != 1) REPORT_ERROR("ERROR: only squared 2D images are allowed."); // Get the pixel size if (MD.containsLabel(EMDL_CTF_MAGNIFICATION) && MD.containsLabel(EMDL_CTF_DETECTOR_PIXEL_SIZE)) { RFLOAT mag, dstep, my_angpix; MD.getValue(EMDL_CTF_MAGNIFICATION, mag); MD.getValue(EMDL_CTF_DETECTOR_PIXEL_SIZE, dstep); my_angpix = 10000. * dstep / mag; std::cout << " Using pixel size from the input STAR file: " << my_angpix << std::endl; angpix = my_angpix; } } else if (fn_mics != "") { // Read in the micrographs STAR file MDmics.read(fn_mics); // Get the pixel size if (MDmics.containsLabel(EMDL_CTF_MAGNIFICATION) && MDmics.containsLabel(EMDL_CTF_DETECTOR_PIXEL_SIZE)) { RFLOAT mag, dstep, my_angpix; MDmics.getValue(EMDL_CTF_MAGNIFICATION, mag); MDmics.getValue(EMDL_CTF_DETECTOR_PIXEL_SIZE, dstep); my_angpix = 10000. * dstep / mag; std::cout << " Using pixel size from the input STAR file: " << my_angpix << std::endl; angpix = my_angpix; } // Make sure the coordinate file directory names end with a '/' if (fn_coord_dir != "ASINPUT" && fn_coord_dir[fn_coord_dir.length()-1] != '/') fn_coord_dir+="/"; // Loop over all micrographs in the input STAR file and warn of coordinate file or micrograph file do not exist FOR_ALL_OBJECTS_IN_METADATA_TABLE(MDmics) { FileName fn_mic; MDmics.getValue(EMDL_MICROGRAPH_NAME, fn_mic); FileName fn_pre, fn_jobnr, fn_post; decomposePipelineFileName(fn_mic, fn_pre, fn_jobnr, fn_post); FileName fn_coord = fn_coord_dir + fn_post.withoutExtension() + fn_coord_suffix; if (!exists(fn_coord)) std::cerr << "Warning: coordinate file " << fn_coord << " does not exist..." << std::endl; if (!exists(fn_mic)) std::cerr << "Warning: micrograph file " << fn_mic << " does not exist..." << std::endl; } ori_size = extract_width; } else if (do_only_make_3d && fn_inimodel != "") { Image img; img.read(fn_inimodel); img().setXmippOrigin(); if (angpix < 0.) { img.MDMainHeader.getValue(EMDL_IMAGE_SAMPLINGRATE_X, angpix); std::cout << " Using pixel size from the input file header: " << angpix << std::endl; } ori_size = XSIZE(img()); // The 3D reconstruction float deg_per_pixel = 180. * angpix / (crossover_distance); Image vol; vol().resize(ori_size, ori_size, ori_size); for (int k = 0; k < ZSIZE(vol()); k++) { float ang = deg_per_pixel * k; Matrix2D Arot; rotation2DMatrix(ang, Arot); MultidimArray Mrot; Mrot.initZeros(img()); applyGeometry(img(), Mrot, Arot, true, false); FOR_ALL_DIRECT_ELEMENTS_IN_ARRAY2D(Mrot) DIRECT_A3D_ELEM(vol(), k, i, j) = DIRECT_A2D_ELEM(Mrot, i, j); } vol.setSamplingRateInHeader(angpix); vol.write(fn_out + ".mrc"); std::cout << " * Written " << fn_out << ".mrc" << std::endl; exit(RELION_EXIT_SUCCESS); } else { REPORT_ERROR("ERROR: provide --i, -mic, or --only_make_3d and --iniref"); } if (angpix < 0.) { REPORT_ERROR("ERROR: provide pixel size through --angpix or through the magnification and detectorpixel size in the input STAR file."); } if (maxres < 0. || maxres < 2. * angpix) { maxres = 2. * angpix; std::cout << " Setting maximum resolution to " << maxres << std::endl; } down_size = ori_size * angpix * 2. / maxres; // Make sure that the crossover distance is close to an integer times the (downsized) pixel size of the model! float best_fit = 1.; down_angpix = 0.; int best_size = 0; for (int delta_size = -search_size; delta_size <= search_size; delta_size+= 2) { int mysize = down_size + delta_size; mysize -= mysize%2; //make even in case it is not already if (mysize <= ori_size) { float myangpix = angpix * (float)ori_size/(float)mysize; float mydiv = (2. * crossover_distance) / myangpix; // Also want even number of pixels in rectangle! float myfit = fmod(mydiv, 2); if (myfit > 1.) myfit -= 2.; myfit = fabs(myfit); if (myfit < best_fit) { best_fit = myfit; down_angpix = myangpix; best_size = mysize; } std::cout << " * mydiv= " << mydiv << " myangpix= " << myangpix << " myfit= " << myfit << std::endl; } } std::cout << " *** best_angpix= " << down_angpix << " rectangles xsize= " << (2. * crossover_distance)/down_angpix << std::endl; down_size = best_size; yrect = ROUND(ori_size * angpix/down_angpix); // Make even yrect -= yrect%2; xrect = ROUND((2. * crossover_distance)/down_angpix); model.initialise(nr_classes, yrect, xrect); max_shift = CEIL(max_shift_A / down_angpix); mask_radius_pix = (mask_diameter > 0) ? CEIL(mask_diameter / (2. * down_angpix)) : yrect/2 - 2; std::cout << " maxres= " << maxres << " angpix= " << angpix << " down_size= " << down_size << std::endl; std::cout << " xrect= " << xrect << " yrect= " << yrect << " down_angpix= " << down_angpix << std::endl; std::cout << " max_shift= " << max_shift << " mask_radius_pix= "<< mask_radius_pix<< std::endl; // Now read in all images if (fn_mics == "") readImages(); else getHelicesFromMics(); initialiseClasses(); } // Read in all the images void HelixAligner::readImages() { MD.read(fn_imgs); if (verb > 0) { std::cout << " Reading in all images ..." << std::endl; init_progress_bar(MD.numberOfObjects()); } std::vector > dummy; long int ipart=0; FOR_ALL_OBJECTS_IN_METADATA_TABLE(MD) { FileName fn_img; Image img; if (MD.containsLabel(EMDL_IMAGE_NAME)) MD.getValue(EMDL_IMAGE_NAME, fn_img); else if (MD.containsLabel(EMDL_MLMODEL_REF_IMAGE)) MD.getValue(EMDL_MLMODEL_REF_IMAGE, fn_img); else REPORT_ERROR("ERROR: input STAR file does not contain rlnImageName or rlnReferenceImage!"); img.read(fn_img); img().setXmippOrigin(); // Rethink this when expanding program to 3D! RFLOAT yoff = 0.; RFLOAT psi = 0.; if (MD.containsLabel(EMDL_ORIENT_ORIGIN_Y_ANGSTROM)) MD.getValue(EMDL_ORIENT_ORIGIN_Y_ANGSTROM, yoff); if (MD.containsLabel(EMDL_ORIENT_PSI)) MD.getValue(EMDL_ORIENT_PSI, psi); ori_psis.push_back(psi); ori_yoffs.push_back(yoff); // Apply the actual transformation Matrix2D A; rotation2DMatrix(psi, A); MAT_ELEM(A,1, 2) = -yoff / angpix; selfApplyGeometry(img(), A, IS_INV, DONT_WRAP); Xrects.push_back(dummy); // Calculate all rotated versions if (ipart==0) psis.clear(); for (int iflip =0; iflip < 2; iflip++) { for (RFLOAT ang = 0; ang <= max_rotate; ang += step_rotate) { Matrix2D Arot; MultidimArray Irot; RFLOAT myang = (iflip == 1) ? ang + 180. : ang; Irot.initZeros(img()); rotation2DMatrix(myang, Arot); applyGeometry(img(), Irot, Arot, true, false); resizeMap(Irot, down_size); Irot.setXmippOrigin(); Xrects[Xrects.size()-1].push_back(Irot); if (ipart==0) psis.push_back(myang); if (ang > 0.) { // Also rotate in the opposite direction Irot.initZeros(img()); applyGeometry(img(), Irot, Arot, false, false); resizeMap(Irot, down_size); Irot.setXmippOrigin(); Xrects[Xrects.size()-1].push_back(Irot); if (ipart==0) psis.push_back(-myang); } } } ipart++; if (verb > 0 && ipart%50 == 0) progress_bar(ipart); //#define DEBUG_READIMAGES #ifdef DEBUG_READIMAGES FileName fnt; fnt.compose("helixnew", Xrects.size(),"spi",3); Image It; It()=Xrects[Xrects.size()-1][3]; It.write(fnt); #endif } if (verb > 0) progress_bar(MD.numberOfObjects()); #ifdef DEBUG std::cerr << "done readImages" << std::endl; #endif } void HelixAligner::getHelicesFromMics() { if (verb > 0) { std::cout << " Reading in all micrographs ..." << std::endl; init_progress_bar(MDmics.numberOfObjects()); } // Loop over all micrographs in the input STAR file and warn of coordinate file or micrograph file do not exist long int imic = 0; FOR_ALL_OBJECTS_IN_METADATA_TABLE(MDmics) { imic++; FileName fn_mic; MDmics.getValue(EMDL_MICROGRAPH_NAME, fn_mic); FileName fn_pre, fn_jobnr, fn_post; decomposePipelineFileName(fn_mic, fn_pre, fn_jobnr, fn_post); FileName fn_coord = fn_coord_dir + fn_post.withoutExtension() + fn_coord_suffix; if (!exists(fn_mic) || !exists(fn_coord)) { if (!exists(fn_mic)) std::cerr << "Warning: micrograph file " << fn_mic << " does not exist..." << std::endl; if (!exists(fn_coord)) std::cerr << "Warning: coordinate file " << fn_coord << " does not exist..." << std::endl; } else { Image Imic; Imic.read(fn_mic); RFLOAT avg = Imic().computeAvg(); // Read in the coordinate files MetaDataTable MDcoords; MDcoords.read(fn_coord); if (MDcoords.numberOfObjects()%2 == 1) { std::cerr << " ERROR: not an even number of entries in " << fn_coord << "! Skipping this micrograph... " << std::endl; continue; } // Get all start-end coordinate pairs std::vector x1_coord_list, y1_coord_list, x2_coord_list, y2_coord_list, pitch_list; RFLOAT xp, yp; int MDobj_id = 0; FOR_ALL_OBJECTS_IN_METADATA_TABLE(MDcoords) { MDobj_id++; MDcoords.getValue(EMDL_IMAGE_COORD_X, xp); MDcoords.getValue(EMDL_IMAGE_COORD_Y, yp); if (MDobj_id % 2) { x1_coord_list.push_back(xp); y1_coord_list.push_back(yp); } else { x2_coord_list.push_back(xp); y2_coord_list.push_back(yp); } } // Now extract the images: make all helices stand upright... Y becomes helical axis, X becomes helix width // For that we need to do interpolations... for (int ipair = 0; ipair < x1_coord_list.size(); ipair++) { std::vector > dummy; Xrects.push_back(dummy); // Calculate all rotated versions int oldxsize, oldysize, oldsize; bool do_set_oldsize = true; for (RFLOAT ang = 0.; ang <= max_rotate; ang += step_rotate) { RFLOAT x1,x2,y1,y2,xcen,ycen; x1=x1_coord_list[ipair]; x2=x2_coord_list[ipair]; y1=y1_coord_list[ipair]; y2=y2_coord_list[ipair]; xcen = x1 + (x2-x1)/2; ycen = y1 + (y2-y1)/2; int xsize = FLOOR(sqrt((x2-x1)*(x2-x1) + (y2-y1)*(y2-y1))); RFLOAT phi = RAD2DEG(atan(RFLOAT(y2-y1)/RFLOAT(x2-x1))); MultidimArray Ihelix; Ihelix.resize(extract_width, xsize); Ihelix.setXmippOrigin(); int nrots = (ang > 0.) ? 2 : 1; for (int irot = 0; irot < nrots; irot++) { Matrix2D Arot(3,3); if (irot == 0) rotation2DMatrix(phi+ang, Arot); else rotation2DMatrix(phi-ang, Arot); Arot(0,2) = xcen; Arot(1,2) = ycen; int m1, n1, m2, n2; RFLOAT x, y, xp, yp; RFLOAT minxp, minyp, maxxp, maxyp; int cen_x, cen_y, cen_xp, cen_yp; RFLOAT wx, wy; int Xdim, Ydim; // Find center and limits of image cen_y = (int)(YSIZE(Ihelix) / 2); cen_x = (int)(XSIZE(Ihelix) / 2); cen_yp = (int)(YSIZE(Imic()) / 2); cen_xp = (int)(XSIZE(Imic()) / 2); minxp = 0; minyp = 0; maxxp = XSIZE(Imic()) - 1; maxyp = YSIZE(Imic()) - 1; Xdim = XSIZE(Imic()); Ydim = YSIZE(Imic()); for (int i = 0; i < YSIZE(Ihelix); i++) { // Calculate position of the beginning of the row in the output image x = -cen_x; y = i - cen_y; // Calculate this position in the input image according to the // geometrical transformation // they are related by // coords_output(=x,y) = A * coords_input (=xp,yp) xp = x * Arot(0, 0) + y * Arot(0, 1) + Arot(0, 2); yp = x * Arot(1, 0) + y * Arot(1, 1) + Arot(1, 2); for (int j = 0; j < XSIZE(Ihelix); j++) { bool interp; RFLOAT tmp; // If the point is outside the image, apply a periodic extension // of the image, what exits by one side enters by the other interp = true; if (xp < minxp || xp > maxxp) interp = false; if (yp < minyp || yp > maxyp) interp = false; if (interp) { // Linear interpolation // Calculate the integer position in input image, be careful // that it is not the nearest but the one at the top left corner // of the interpolation square. Ie, (0.7,0.7) would give (0,0) // Calculate also weights for point m1+1,n1+1 wx = xp;// + cen_xp; m1 = (int) wx; wx = wx - m1; m2 = m1 + 1; wy = yp;// + cen_yp; n1 = (int) wy; wy = wy - n1; n2 = n1 + 1; // Perform interpolation // if wx == 0 means that the rightest point is useless for this // interpolation, and even it might not be defined if m1=xdim-1 // The same can be said for wy. tmp = (RFLOAT)((1 - wy) * (1 - wx) * DIRECT_A2D_ELEM(Imic(), n1, m1)); if (m2 < Xdim) tmp += (RFLOAT)((1 - wy) * wx * DIRECT_A2D_ELEM(Imic(), n1, m2)); if (n2 < Ydim) { tmp += (RFLOAT)(wy * (1 - wx) * DIRECT_A2D_ELEM(Imic(), n2, m1)); if (m2 < Xdim) tmp += (RFLOAT)(wy * wx * DIRECT_A2D_ELEM(Imic(), n2, m2)); } dAij(Ihelix, i, j) = tmp; } // if interp else dAij(Ihelix, i, j) = avg; // Compute new point inside input image xp += Arot(0, 0); yp += Arot(1, 0); } } //#define DEBUG_GETHELICESFROMMICS #ifdef DEBUG_GETHELICESFROMMICS FileName fntt; fntt.compose("helixnew1_beforedown", Xrects.size(),"spi",3); Image Itt; Itt()=Ihelix; Itt.write(fntt); #endif // Downscale if needed MultidimArray Idown = Ihelix; if (down_angpix > angpix) { RFLOAT avg = Idown.computeAvg(); int oldxsize = XSIZE(Idown); int oldysize = YSIZE(Idown); int oldsize = oldxsize; if ( oldxsize != oldysize ) { oldsize = XMIPP_MAX( oldxsize, oldysize ); Idown.setXmippOrigin(); Idown.window(FIRST_XMIPP_INDEX(oldsize), FIRST_XMIPP_INDEX(oldsize), LAST_XMIPP_INDEX(oldsize), LAST_XMIPP_INDEX(oldsize), avg); } int newsize = ROUND(oldsize * angpix / down_angpix); newsize -= newsize%2; //make even in case it is not already resizeMap(Idown, newsize); if ( oldxsize != oldysize ) { int newxsize = ROUND(oldxsize * angpix / down_angpix); int newysize = ROUND(oldysize * angpix / down_angpix); newxsize -= newxsize%2; //make even in case it is not already newysize -= newysize%2; //make even in case it is not already Idown.setXmippOrigin(); Idown.window(FIRST_XMIPP_INDEX(newysize), FIRST_XMIPP_INDEX(newxsize), LAST_XMIPP_INDEX(newysize), LAST_XMIPP_INDEX(newxsize)); } } // adhoc normalisation of images RFLOAT avg,stddev,min,max; Idown.computeStats(avg,stddev,min,max); Idown -= avg; Idown /= -stddev; // divide by minus stddev to flip contrast to white... Xrects[Xrects.size()-1].push_back(Idown); } // end for irot (for positive and negative rotation } // end for over rotations if (verb > 0) progress_bar(imic); //#define DEBUG_GETHELICESFROMMICS2 #ifdef DEBUG_GETHELICESFROMMICS2 FileName fnt; fnt.compose("helixnew1", Xrects.size(),"spi",3); Image It; It()=Xrects[Xrects.size()-1][1]; It.write(fnt); #endif } } } if (verb > 0) progress_bar(MDmics.numberOfObjects()); } void HelixAligner::initialiseClasses() { #ifdef DEBUG std::cerr << "Entering initialiseClasses" << std::endl; #endif if (model.Aref.size() == 0) REPORT_ERROR("BUG: non-initialised model!"); if (verb > 0) std::cout << " Initialising reference(s) ..." << std::endl; if (fn_inimodel != "") { if (nr_classes > 1) REPORT_ERROR("ERROR: can only use initial reference for single-class!"); Image Iref; Iref.read(fn_inimodel); resizeMap(Iref(), YSIZE(model.Aref[0])); Iref().setXmippOrigin(); std::cerr << " model.Arec.size()= " << model.Arec.size() << std::endl; model.Arec[0] = Iref(); // Now project the reconstruction back out into the model.Aref[iclass] Projector PP(YSIZE(model.Aref[0]), TRILINEAR, 2, 1, 1); // Set the FT of img inside the Projector MultidimArray dummy; PP.computeFourierTransformMap(Iref(), dummy, YSIZE(model.Aref[0]), 1); // Calculate all projected lines for (int j = 0; j < XSIZE(model.Aref[0]); j++) { Matrix2D A2D; MultidimArray myline(YSIZE(model.Aref[0])); MultidimArray myFline(YSIZE(model.Aref[0])/2 + 1); FourierTransformer transformer; RFLOAT rot = (RFLOAT)j*360./(XSIZE(model.Aref[0])); rotation2DMatrix(rot, A2D); PP.get2DFourierTransform(myFline, A2D); transformer.inverseFourierTransform(myFline,myline); // Shift the image back to the center... myline.setXmippOrigin(); CenterFFT(myline, false); for (int i = 0; i < YSIZE(model.Aref[0]); i++) DIRECT_A2D_ELEM(model.Aref[0], i, j) = DIRECT_A1D_ELEM(myline, i); } #define DEBUGREC2D #ifdef DEBUGREC2D Image It; It()=model.Aref[0]; It.write("after_reproject.spi"); #endif } else { // Randomly position all particles along the X-direction model.initZeroSums(); // Loop over all particles if (verb > 0) init_progress_bar(Xrects.size()); for (int ipart = 0; ipart < Xrects.size(); ipart++) { // Set into a random class int myclass = (int)(rnd_unif() * nr_classes); int random_xoffset = (int)(rnd_unif() * xrect); for (int j_smear = -max_smear; j_smear <= max_smear; j_smear++) { double smearw = (max_smear ==0 ) ? 1 : gaussian1D((double)j_smear, (double)max_smear/3); FOR_ALL_ELEMENTS_IN_ARRAY2D(Xrects[ipart][0]) { int jp = j + random_xoffset + j_smear; while (jp < STARTINGX(model.Aref[myclass])) jp += xrect; while (jp > FINISHINGX(model.Aref[myclass])) jp -= xrect; // this places the original image in the offset-translated center of the rectangle A2D_ELEM(model.Asum[myclass], i, jp) += smearw * A2D_ELEM(Xrects[ipart][0], i, j); A2D_ELEM(model.Asumw[myclass], i, jp) += smearw; // This places the Y-flipped image at half a cross-over distance from the first one int ip = -i; if (ip >= STARTINGY(Xrects[ipart][0]) && ip <= FINISHINGY(Xrects[ipart][0])) { int jjp = jp + xrect/2; while (jjp < STARTINGX(model.Aref[myclass])) jjp += xrect; while (jjp > FINISHINGX(model.Aref[myclass])) jjp -= xrect; A2D_ELEM(model.Asum[myclass], ip, jjp) += smearw * A2D_ELEM(Xrects[ipart][0], i, j); A2D_ELEM(model.Asumw[myclass], ip, jjp) += smearw; } } } model.pdf[myclass] += 1.; if (verb > 0) progress_bar(ipart); } if (verb > 0) progress_bar(Xrects.size()); // After all images have been set, maximise the references in the model maximisation(); } #ifdef DEBUG std::cerr << "Leaving initialiseClasses" << std::endl; #endif } void HelixAligner::expectationOneParticleNoFFT(long int ipart) { int twostarty = 2 * FIRST_XMIPP_INDEX(yrect); double maxccf = -100.; int best_class = -1; int best_k_rot = -1; int best_i_offset = -1; int best_j_offset = -1; for (int iclass = 0; iclass < nr_classes; iclass++) { int k_rot = 0; for (int k_rot = 0; k_rot < Xrects[ipart].size(); k_rot++) { for (int i_offset = -max_shift; i_offset <= max_shift; i_offset++) { for (int j_offset = 0; j_offset < xrect; j_offset++) { double ccf_xa = 0; double ccf_x2 = 0; double ccf_a2 = 0; for (long int i=STARTINGY(Xrects[ipart][k_rot]); i<=FINISHINGY(Xrects[ipart][k_rot]); i++) \ { int ip = i + i_offset; if (ip < -mask_radius_pix || ip > mask_radius_pix) continue; /* while (ip < STARTINGY(model.Aref[iclass])) ip += yrect; while (ip > FINISHINGY(model.Aref[iclass])) ip -= yrect; */ for (long int j=STARTINGX(Xrects[ipart][k_rot]); j<=FINISHINGX(Xrects[ipart][k_rot]); j++) { int jp = j + j_offset; while (jp < STARTINGX(model.Aref[iclass])) jp += xrect; while (jp > FINISHINGX(model.Aref[iclass])) jp -= xrect; // This places the Y-flipped image at half a cross-over distance from the first one int ipp = -ip; // Don't let the image run out of the height of the box if (ipp >= STARTINGY(Xrects[ipart][k_rot]) && ipp <= FINISHINGY(Xrects[ipart][k_rot])) { int jpp = jp + xrect/2; while (jpp < STARTINGX(model.Aref[iclass])) jpp += xrect; while (jpp > FINISHINGX(model.Aref[iclass])) jpp -= xrect; // this places the original image in the offset-translated center of the rectangle ccf_xa += A2D_ELEM(model.Aref[iclass], ip, jp) * A2D_ELEM(Xrects[ipart][k_rot], i, j); ccf_a2 += A2D_ELEM(model.Aref[iclass], ip, jp) * A2D_ELEM(model.Aref[iclass], ip, jp); ccf_xa += A2D_ELEM(model.Aref[iclass], ipp, jpp) * A2D_ELEM(Xrects[ipart][k_rot], i, j); ccf_a2 += A2D_ELEM(model.Aref[iclass], ipp, jpp) * A2D_ELEM(model.Aref[iclass], ipp, jpp); ccf_x2 += 2. * A2D_ELEM(Xrects[ipart][k_rot], i, j) * A2D_ELEM(Xrects[ipart][k_rot], i, j); } } // end loop j } // end loop i double ccf = (ccf_x2 > 0. && ccf_a2 > 0.) ? ccf_xa/(sqrt(ccf_x2) * sqrt(ccf_a2)) : 0.; // Find the best fit if (ccf > maxccf) { maxccf = ccf; best_class = iclass; best_k_rot = k_rot; best_i_offset = i_offset; best_j_offset = j_offset; } } // end for j_offset } // end for i_offset } // end for k_rot } if (maxccf < -1.) REPORT_ERROR("BUG: not found maxccf!"); // Now set the optimal Y-translations and rotations in the output STAR file RFLOAT yoff, psi; psi = ori_psis[ipart] + psis[best_k_rot]; yoff = ori_yoffs[ipart] + best_i_offset * down_angpix; MD.setValue(EMDL_ORIENT_ORIGIN_Y_ANGSTROM, yoff, ipart); MD.setValue(EMDL_ORIENT_PSI, psi, ipart); // Now add the image to that class reference // To ensure continuity in the reference: smear out every image along X #pragma omp critical { for (int j_smear = -max_smear; j_smear <= max_smear; j_smear++) { double smearw = (max_smear< XMIPP_EQUAL_ACCURACY) ? 1 : gaussian1D((double)j_smear, (double)max_smear/3); FOR_ALL_ELEMENTS_IN_ARRAY2D(Xrects[ipart][best_k_rot]) { int jp = j + best_j_offset + j_smear; while (jp < STARTINGX(model.Aref[best_class])) jp += xrect; while (jp > FINISHINGX(model.Aref[best_class])) jp -= xrect; int ip = i + best_i_offset; while (ip < STARTINGY(model.Aref[best_class])) ip += yrect; while (ip > FINISHINGY(model.Aref[best_class])) ip -= yrect; // this places the original image in the offset-translated center of the rectangle A2D_ELEM(model.Asum[best_class], ip, jp) += smearw * A2D_ELEM(Xrects[ipart][best_k_rot], i, j); A2D_ELEM(model.Asumw[best_class], ip, jp) += smearw; // This places the Y-flipped image at half a cross-over distance from the first one int ipp = -ip; if (ipp >= STARTINGY(Xrects[ipart][best_k_rot]) && ipp <= FINISHINGY(Xrects[ipart][best_k_rot])) { int jpp = jp + xrect/2; while (jpp > FINISHINGX(model.Aref[best_class])) jpp -= xrect; A2D_ELEM(model.Asum[best_class], ipp, jpp) += smearw * A2D_ELEM(Xrects[ipart][best_k_rot], i, j); A2D_ELEM(model.Asumw[best_class], ipp, jpp) += smearw; } } } model.pdf[best_class] += 1.; } } void HelixAligner::expectation() { // Initialise the wsum_model to zeros model.initZeroSums(); if (verb > 0) { init_progress_bar(Xrects.size()); } #pragma omp parallel for num_threads(nr_threads) for (long int ipart = 0; ipart < Xrects.size(); ipart++) { expectationOneParticleNoFFT(ipart); if (ipart%nr_threads==0) progress_bar(ipart); } progress_bar(Xrects.size()); } void HelixAligner::maximisation() { #ifdef DEBUGREC2D Image It; It()=model.Asumw[0]; It.write("Asumw.spi"); It()=model.Asum[0]; It.write("Asum.spi"); #endif // Update the references double allsum = 0.; for (int iclass = 0; iclass < nr_classes; iclass++) { for (int i = 0; i < yrect; i++) { for (int j = 0; j < xrect; j++) { if (DIRECT_A2D_ELEM(model.Asumw[iclass], i, j) > 0.) DIRECT_A2D_ELEM(model.Aref[iclass], i, j) = DIRECT_A2D_ELEM(model.Asum[iclass], i, j) / DIRECT_A2D_ELEM(model.Asumw[iclass], i, j); else DIRECT_A2D_ELEM(model.Aref[iclass], i, j) = 0.; // Also store sum of classes in Asum for writeOut DIRECT_A2D_ELEM(model.Asum[iclass], i, j) = DIRECT_A2D_ELEM(model.Aref[iclass], i, j); } } allsum += model.pdf[iclass]; reconstruct2D(iclass); } for (int iclass = 0; iclass < nr_classes; iclass++) model.pdf[iclass]/=allsum; } void HelixAligner::reconstruct2D(int iclass) { #ifdef DEBUG std::cerr << "Entering reconstruct2D" << std::endl; #endif #ifdef DEBUGREC2D Image It; It()=model.Aref[iclass]; It.write("before_reproject.spi"); #endif // Loop over the length of the helix to get the transforms of all 1D images std::vector > myFlines; for (int j = 0; j < XSIZE(model.Aref[iclass]); j++) { MultidimArray myline(YSIZE(model.Aref[iclass])); MultidimArray myFline; FourierTransformer transformer; for (int i = 0; i < YSIZE(model.Aref[iclass]); i++) DIRECT_A1D_ELEM(myline, i) = DIRECT_A2D_ELEM(model.Aref[iclass], i, j); CenterFFT(myline, true); transformer.FourierTransform(myline, myFline, false); myFlines.push_back(myFline); } // Then reconstruct BackProjector BP(YSIZE(model.Aref[iclass]), 2, "C1", TRILINEAR, 2, 1, 0, 1.9, 15, 1, false); BP.initialiseDataAndWeight(YSIZE(model.Aref[iclass])); for (int j = 0; j < myFlines.size(); j++) { Matrix2D A2D; RFLOAT rot = (RFLOAT)j*360./(XSIZE(model.Aref[iclass])); rotation2DMatrix(rot, A2D); BP.set2DFourierTransform(myFlines[j], A2D); } MultidimArray dummy; model.Arec[iclass].initZeros(); BP.reconstruct(model.Arec[iclass], 10, false, dummy); if (symmetry > 1) { #ifdef DEBUGREC2D It()=model.Arec[iclass]; resizeMap(It(), ori_size); It.write("rec_beforesym.spi"); #endif MultidimArray Asum = model.Arec[iclass]; for (int i = 1; i < symmetry; i++) { RFLOAT ang = i*360./(RFLOAT)symmetry; Matrix2D A2D; rotation2DMatrix(ang, A2D); MultidimArray Arot; Arot.initZeros(model.Arec[iclass]); applyGeometry(model.Arec[iclass], Arot, A2D, false, false); Asum += Arot; } model.Arec[iclass] = Asum / (RFLOAT)symmetry; } if (mask_diameter > 0.) { RFLOAT pixel_radius = mask_diameter/(2.*down_angpix); softMaskOutsideMap(model.Arec[iclass], pixel_radius, 0.); } #ifdef DEBUGREC2D It()=model.Arec[iclass]; resizeMap(It(), ori_size); It.write("rec.spi"); #endif // Now project the reconstruction back out into the model.Aref[iclass] Projector PP(YSIZE(model.Aref[iclass]), TRILINEAR, 2, 1, 1); // Set the FT of img inside the Projector PP.computeFourierTransformMap(model.Arec[iclass], dummy, YSIZE(model.Aref[iclass]), 1); // Calculate all projected lines for (int j = 0; j < myFlines.size(); j++) { Matrix2D A2D; MultidimArray myline(YSIZE(model.Aref[iclass])); FourierTransformer transformer; RFLOAT rot = (RFLOAT)j*360./(XSIZE(model.Aref[iclass])); rotation2DMatrix(rot, A2D); myFlines[j].initZeros(); PP.get2DFourierTransform(myFlines[j], A2D); transformer.inverseFourierTransform(myFlines[j],myline); // Shift the image back to the center... CenterFFT(myline, false); for (int i = 0; i < YSIZE(model.Aref[iclass]); i++) DIRECT_A2D_ELEM(model.Aref[iclass], i, j) = DIRECT_A1D_ELEM(myline, i); } #ifdef DEBUGREC2D It()=model.Aref[iclass]; It.write("after_reproject.spi"); #endif #ifdef DEBUG std::cerr << "Leaving reconstruct2D" << std::endl; #endif } void HelixAligner::writeOut(int iter) { //std::cout << " **** Model for iteration " << iter << std::endl; #ifdef DEBUG for (int iclass = 0; iclass < nr_classes; iclass++) { FileName fn_class = fn_out + "_it" + integerToString(iter, 3) + "_class" + integerToString(iclass+1, 3)+".spi"; Image Ic; Ic()=model.Aref[iclass]; Ic.write(fn_class); std::cout << " * Written " << fn_class << std::endl; fn_class = fn_out + "_it" + integerToString(iter, 3) + "_class" + integerToString(iclass+1, 3)+"_reconstructed.spi"; Ic()=model.Arec[iclass]; Ic.write(fn_class); std::cout << " * Written " << fn_class << std::endl; } #else FileName fn_iter = fn_out + "_it" + integerToString(iter, 3); MD.write(fn_iter+".star"); Image Aimg(xrect, yrect, 1, nr_classes); for (int iclass = 0; iclass < nr_classes; iclass++) { FOR_ALL_DIRECT_ELEMENTS_IN_ARRAY2D(model.Aref[iclass]) { DIRECT_NZYX_ELEM(Aimg(), iclass, 0, i, j) = DIRECT_A2D_ELEM(model.Aref[iclass], i, j); } } Aimg.write(fn_iter + "_reprojections.mrcs"); for (int iclass = 0; iclass < nr_classes; iclass++) { FOR_ALL_DIRECT_ELEMENTS_IN_ARRAY2D(model.Asum[iclass]) { DIRECT_NZYX_ELEM(Aimg(), iclass, 0, i, j) = DIRECT_A2D_ELEM(model.Asum[iclass], i, j); } } Aimg.write(fn_iter + "_summed_classes.mrcs"); Image Aimg2(yrect, yrect, 1, nr_classes); for (int iclass = 0; iclass < nr_classes; iclass++) { FOR_ALL_DIRECT_ELEMENTS_IN_ARRAY2D(model.Arec[iclass]) { DIRECT_NZYX_ELEM(Aimg2(), iclass, 0, i, j) = DIRECT_A2D_ELEM(model.Arec[iclass], i, j); } } Aimg2.write(fn_iter + "_reconstructed.mrcs"); #endif if (nr_classes > 1) { for (int iclass = 0; iclass < nr_classes; iclass++) { std:: cout << " * Fraction class " << iclass+1 << " = " << model.pdf[iclass] << std::endl; } } } void HelixAligner::reconstruct3D() { for (int iclass = 0; iclass < nr_classes; iclass++) { FileName fn_class = fn_out + "_class" + integerToString(iclass+1, 3)+"_projections.spi"; Image Ic; Ic()=model.Aref[iclass]; Ic.setSamplingRateInHeader(angpix); Ic.write(fn_class); std::cout << " * Written " << fn_class << std::endl; fn_class = fn_out + "_class" + integerToString(iclass+1, 3)+"_rec2d.spi"; Ic()=model.Arec[iclass]; resizeMap(Ic(), ori_size); Ic.setSamplingRateInHeader(angpix); Ic.write(fn_class); MultidimArray Mori = Ic(); std::cout << " * Written " << fn_class << std::endl; // The 3D reconstruction float deg_per_pixel = 180. * angpix / (crossover_distance); Ic().resize(ori_size, ori_size, ori_size); for (int k = 0; k < ZSIZE(Ic()); k++) { float ang = deg_per_pixel * k; Matrix2D Arot; rotation2DMatrix(ang, Arot); MultidimArray Mrot; Mrot.initZeros(Mori); applyGeometry(Mori, Mrot, Arot, true, false); FOR_ALL_DIRECT_ELEMENTS_IN_ARRAY2D(Mrot) DIRECT_A3D_ELEM(Ic(), k, i, j) = DIRECT_A2D_ELEM(Mrot, i, j); } fn_class = fn_out + "_class" + integerToString(iclass+1, 3)+"_rec3d.mrc"; Ic.setSamplingRateInHeader(angpix); Ic.write(fn_class); std::cout << " * Written " << fn_class << std::endl; } } // Run multiple iterations void HelixAligner::run() { // Write out the starting model as well writeOut(0); int decrease_smear = ROUND((float)max_smear/(float)(nr_iter+5)); for (int iter = 1; iter <= nr_iter; iter++) { if (verb > 0) { std::cout << " Iteration " << iter <<" of " << nr_iter << std::endl; if (max_smear > 0) std::cout << " = smearing references by " << max_smear << " downsampled pixels along helical axis " << std::endl; } expectation(); maximisation(); writeOut(iter); if (max_smear > 0) max_smear -= decrease_smear; } // Reconstruct the final solution in 3D reconstruct3D(); } relion-3.1.3/src/helix_inimodel2d.h000066400000000000000000000113771411340063500171730ustar00rootroot00000000000000/*************************************************************************** * * Author: "Sjors H.W. Scheres" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #ifndef SRC_HELIX_INIMODEL2D_H_ #define SRC_HELIX_INIMODEL2D_H_ #include "src/parallel.h" #include "src/time.h" #include "src/filename.h" #include "src/metadata_table.h" #include "src/image.h" #include "src/euler.h" #include "src/backprojector.h" #include "src/transformations.h" #include "src/fftw.h" class HelixAlignerModel { public: // The reference images std::vector > Aref; // The reconstructed xy-slice std::vector > Arec; // The running sum reference images std::vector > Asum; // The total sum reference images std::vector > Asumw; // Total contribution to each class std::vector pdf; // Empty constructor HelixAlignerModel() {}; // Destructor ~HelixAlignerModel() {clear(); }; // Cleaning up void clear(); // To initialise model void initialise(int nr_classes, int ydim, int xdim); // To initialise the sums to zero void initZeroSums(); }; class HelixAligner { public: // I/O Parser IOParser parser; //Input images FileName fn_imgs; // Output rootname FileName fn_out; // Number of iterations, classes int nr_iter, nr_classes; // Pixel size of input images float angpix; // random int random_seed; // Diameter of circular mask within to extract images float mask_diameter, mask_radius_pix; // Maximum resolution to be taken into account (approximate, as adjusted to accommodate exact crossover_distance float maxres; // How many pixels away from the target resolution to search for optimal downscaled pixel size? int search_size; // Distance in Angstroms between 2 cross-overs (i.e. 180 degrees of twist) float crossover_distance; // Height in Angstroms to be taken into account float height; // How much smearing to apply to the initial reference (to start with a smoother reference along the helical axis) int max_smear; // How many pixels to search up and down? int max_shift; RFLOAT max_shift_A; // How many degrees to rotate? RFLOAT max_rotate; // Rotation step RFLOAT step_rotate; // The model to be refined HelixAlignerModel model; // Input micrographs FileName fn_mics; MetaDataTable MDmics; // STAR file with all (selected) micrographs, the suffix of the coordinates files, and the directory where the coordinate files are FileName fn_coord_suffix, fn_coord_dir ; // Width of images to be extracted int extract_width; // Filename of initial 2D reconstruction for model FileName fn_inimodel; // Only make 3d bool do_only_make_3d; // Symmetry order (Cn) int symmetry; // Number of openMP threads int nr_threads; private: // Size of the original and downscaled images int ori_size, down_size; // Size of the rectangle int xrect, yrect; // X-Size of the images being placed inside the rectangle int ximg; // Downsized pixel size float down_angpix; // Verbosity int verb; // Pre-calculated Gaussian weight vector MultidimArray weight; // Pre-read (rotated versions of) all Xrect of the (downscaled) images into RAM std::vector > > Xrects; // Foroptimal orientation control std::vector psis, ori_psis, ori_yoffs; MetaDataTable MD; public: // Empty constructor HelixAligner() {}; // Destructor ~HelixAligner() {clear(); }; // Usage void usage(); // Cleaning up void clear(); void parseInitial(int argc, char **argv); void initialise(); // Read in all the images void readImages(); // 22 June 2017: extract helices from start-end coordinates in micrographs void getHelicesFromMics(); // Initialise classes randomly void initialiseClasses(); void expectationOneParticleNoFFT(long int ipart); void expectation(); void maximisation(); void reconstruct2D(int iclass); void writeOut(int iter); void reconstruct3D(); // Run multiple iterations void run(); }; #endif /* SRC_HELIX_INIMODEL2D_H_ */ relion-3.1.3/src/image.cpp000066400000000000000000000273431411340063500153710ustar00rootroot00000000000000/*************************************************************************** * * Author: "Sjors H.W. Scheres" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #include "src/image.h" //#define DEBUG_REGULARISE_HELICAL_SEGMENTS // Get size of datatype unsigned long gettypesize(DataType type) { unsigned long size; switch ( type ) { case UChar: case SChar: size = sizeof(char); break; case UShort: case Short: size = sizeof(short); break; case UInt: case Int: size = sizeof(int); break; case Float: size = sizeof(float); break; case Double: size = sizeof(RFLOAT); break; case Boolean: size = sizeof(bool); break; case UHalf: REPORT_ERROR("Logic error: UHalf (4-bit) needs special consideration. Don't use this function."); break; default: size = 0; } return(size); } int datatypeString2Int(std::string s) { toLower(s); if (!strcmp(s.c_str(),"uchar")) { return UChar; } else if (!strcmp(s.c_str(),"ushort")) { return UShort; } else if (!strcmp(s.c_str(),"short")) { return Short; } else if (!strcmp(s.c_str(),"uint")) { return UInt; } else if (!strcmp(s.c_str(),"int")) { return Int; } else if (!strcmp(s.c_str(),"float")) { return Float; } else REPORT_ERROR("datatypeString2int; unknown datatype"); } // Some image-specific operations void normalise( Image &I, int bg_radius, RFLOAT white_dust_stddev, RFLOAT black_dust_stddev, bool do_ramp, bool is_helical_segment, RFLOAT helical_mask_tube_outer_radius_pix, RFLOAT tilt_deg, RFLOAT psi_deg) { RFLOAT avg, stddev; if (2*bg_radius > XSIZE(I())) REPORT_ERROR("normalise ERROR: 2*bg_radius is larger than image size!"); if ( (is_helical_segment) && ( (2 * (helical_mask_tube_outer_radius_pix + 1)) > XSIZE(I()) ) ) REPORT_ERROR("normalise ERROR: Diameter of helical tube is larger than image size!"); if (is_helical_segment) { if (I().getDim() == 2) tilt_deg = 0.; } if (white_dust_stddev > 0. || black_dust_stddev > 0.) { // Calculate initial avg and stddev values calculateBackgroundAvgStddev(I, avg, stddev, bg_radius, is_helical_segment, helical_mask_tube_outer_radius_pix, tilt_deg, psi_deg); // Remove white and black noise if (white_dust_stddev > 0.) removeDust(I, true, white_dust_stddev, avg, stddev); if (black_dust_stddev > 0.) removeDust(I, false, black_dust_stddev, avg, stddev); } if (do_ramp) subtractBackgroundRamp(I, bg_radius, is_helical_segment, helical_mask_tube_outer_radius_pix, tilt_deg, psi_deg); // Calculate avg and stddev (also redo if dust was removed!) calculateBackgroundAvgStddev(I, avg, stddev, bg_radius, is_helical_segment, helical_mask_tube_outer_radius_pix, tilt_deg, psi_deg); if (stddev < 1e-10) { std::cerr << " WARNING! Stddev of image " << I.name() << " is zero! Skipping normalisation..." << std::endl; } else { // Subtract avg and divide by stddev for all pixels FOR_ALL_DIRECT_ELEMENTS_IN_MULTIDIMARRAY(I()) DIRECT_MULTIDIM_ELEM(I(), n) = (DIRECT_MULTIDIM_ELEM(I(), n) - avg) / stddev; } } void calculateBackgroundAvgStddev(Image &I, RFLOAT &avg, RFLOAT &stddev, int bg_radius, bool is_helical_segment, RFLOAT helical_mask_tube_outer_radius_pix, RFLOAT tilt_deg, RFLOAT psi_deg) { int bg_radius2 = bg_radius * bg_radius; RFLOAT sum, sum2, n, val, d; sum = sum2 = n = 0.; avg = stddev = 0.; if (is_helical_segment) { int dim = I().getDim(); if ( (dim != 2) && (dim != 3) ) REPORT_ERROR("image.cpp::calculateBackgroundAvgStddev(): 2D or 3D image is required!"); if (dim == 2) tilt_deg = 0.; Matrix1D coords; Matrix2D A; // Init coords coords.clear(); coords.resize(3); coords.initZeros(); // Init rotational matrix A A.clear(); A.resize(3, 3); // Rotate the particle (helical axes are X and Z for 2D and 3D segments respectively) Euler_angles2matrix(0., tilt_deg, psi_deg, A, false); // Don't put negative signs before tilt and psi values, use 'transpose' instead A = A.transpose(); // Refer to the code in calculateBackgroundAvgStddev() for 3D implementation #ifdef DEBUG_REGULARISE_HELICAL_SEGMENTS FileName fn_test; Image img_test; int angle = ROUND(fabs(psi_deg)); fn_test = integerToString(angle); if (psi_deg < 0.) fn_test = fn_test.addExtension("neg"); fn_test = fn_test.addExtension("mrc"); img_test.clear(); img_test().resize(I()); img_test().initZeros(); std::cout << "FileName = " << fn_test.c_str() << std::endl; #endif // Calculate avg in the background pixels FOR_ALL_ELEMENTS_IN_ARRAY3D(I()) { // X, Y, Z coordinates if (dim == 3) ZZ(coords) = ((RFLOAT)(k)); else ZZ(coords) = 0.; YY(coords) = ((RFLOAT)(i)); XX(coords) = ((RFLOAT)(j)); // Rotate coords = A * coords; // Distance from the point to helical axis (perpendicular to X axis) if (dim == 3) d = sqrt(YY(coords) * YY(coords) + XX(coords) * XX(coords)); else d = ABS(YY(coords)); if (d > helical_mask_tube_outer_radius_pix) { val = A3D_ELEM(I(), k, i, j); sum += val; sum2 += val * val; n += 1.; #ifdef DEBUG_REGULARISE_HELICAL_SEGMENTS A3D_ELEM(img_test(), k, i, j) = 1.; // Mark bg pixels as 1, others as 0 #endif } } if (n < 0.9) { REPORT_ERROR("image.cpp::calculateBackgroundAvgStddev(): No pixels in background are found. Radius of helical mask is too large."); } avg = sum / n; stddev = sqrt( (sum2 / n) - (avg * avg) ); #ifdef DEBUG_REGULARISE_HELICAL_SEGMENTS img_test.write(fn_test); #endif } else { // Calculate avg in the background pixels FOR_ALL_ELEMENTS_IN_ARRAY3D(I()) { if ( (k*k + i*i + j*j) > bg_radius2) { val = A3D_ELEM(I(), k, i, j); sum += val; sum2 += val * val; n += 1.; } } if (n < 0.9) { REPORT_ERROR("image.cpp::calculateBackgroundAvgStddev(): No pixels in background are found. Radius of circular mask is too large."); } avg = sum / n; stddev = sqrt( (sum2 / n) - (avg * avg) ); } return; } void subtractBackgroundRamp( Image &I, int bg_radius, bool is_helical_segment, RFLOAT helical_mask_tube_outer_radius_pix, RFLOAT tilt_deg, RFLOAT psi_deg) { int bg_radius2 = bg_radius * bg_radius; fit_point3D point; std::vector allpoints; RFLOAT pA, pB, pC, avgbg, stddevbg, minbg, maxbg; if (I().getDim() == 3) REPORT_ERROR("ERROR %% calculateBackgroundRamp is not implemented for 3D data!"); if (is_helical_segment) // not implemented for 3D data { Matrix1D coords; Matrix2D A; if (I().getDim() == 2) tilt_deg = 0.; // Init coords coords.clear(); coords.resize(3); coords.initZeros(); // Init rotational matrix A A.clear(); A.resize(3, 3); // Rotate the particle (helical axes are X and Z for 2D and 3D segments respectively) // Since Z = 0, tilt_deg does not matter Euler_angles2matrix(0., tilt_deg, psi_deg, A, false); // Don't put negative signs before tilt and psi values, use 'transpose' instead A = A.transpose(); FOR_ALL_ELEMENTS_IN_ARRAY2D(I()) // not implemented for 3D data { ZZ(coords) = 0.; YY(coords) = ((RFLOAT)(i)); XX(coords) = ((RFLOAT)(j)); // Rotate coords = A * coords; if (ABS(YY(coords)) > helical_mask_tube_outer_radius_pix) // not implemented for 3D data { point.x = j; point.y = i; point.z = A2D_ELEM(I(), i, j); point.w = 1.; allpoints.push_back(point); } } if (allpoints.size() < 5) REPORT_ERROR("image.cpp::subtractBackgroundRamp(): Less than 5 pixels in background are found. Radius of helical mask is too large."); } else { FOR_ALL_ELEMENTS_IN_ARRAY2D(I()) { if (i*i + j*j > bg_radius2) { point.x = j; point.y = i; point.z = A2D_ELEM(I(), i, j); point.w = 1.; allpoints.push_back(point); } } } fitLeastSquaresPlane(allpoints, pA, pB, pC); // Substract the plane from the image FOR_ALL_ELEMENTS_IN_ARRAY2D(I()) { A2D_ELEM(I(), i, j) -= pA * j + pB * i + pC; } } void removeDust(Image &I, bool is_white, RFLOAT thresh, RFLOAT avg, RFLOAT stddev) { FOR_ALL_ELEMENTS_IN_ARRAY3D(I()) { RFLOAT aux = A3D_ELEM(I(), k, i, j); if (is_white && aux - avg > thresh * stddev) A3D_ELEM(I(), k, i, j) = rnd_gaus(avg, stddev); else if (!is_white && aux - avg < -thresh * stddev) A3D_ELEM(I(), k, i, j) = rnd_gaus(avg, stddev); } } void invert_contrast(Image &I) { FOR_ALL_DIRECT_ELEMENTS_IN_MULTIDIMARRAY(I()) { DIRECT_MULTIDIM_ELEM(I(), n) *= -1; } } void rescale(Image &I, int mysize) { int olddim = XSIZE(I()); resizeMap(I(), mysize); // Also modify the scale in the MDmainheader (if present) RFLOAT oldscale, newscale; if (I.MDMainHeader.getValue(EMDL_IMAGE_SAMPLINGRATE_X, oldscale)) { newscale = oldscale * (RFLOAT)olddim / (RFLOAT)mysize; I.MDMainHeader.setValue(EMDL_IMAGE_SAMPLINGRATE_X, newscale); } if (I.MDMainHeader.getValue(EMDL_IMAGE_SAMPLINGRATE_Y, oldscale)) { newscale = oldscale * (RFLOAT)olddim / (RFLOAT)mysize; I.MDMainHeader.setValue(EMDL_IMAGE_SAMPLINGRATE_Y, newscale); } if (I().getDim() == 3 && I.MDMainHeader.getValue(EMDL_IMAGE_SAMPLINGRATE_Z, oldscale) ) { newscale = oldscale * (RFLOAT)olddim / (RFLOAT)mysize; I.MDMainHeader.setValue(EMDL_IMAGE_SAMPLINGRATE_Z, newscale); } } void rewindow(Image &I, int mysize) { // Check 2D or 3D dimensionality if (I().getDim() == 2) { I().window(FIRST_XMIPP_INDEX(mysize), FIRST_XMIPP_INDEX(mysize), LAST_XMIPP_INDEX(mysize), LAST_XMIPP_INDEX(mysize)); } else if (I().getDim() == 3) { I().window(FIRST_XMIPP_INDEX(mysize), FIRST_XMIPP_INDEX(mysize), FIRST_XMIPP_INDEX(mysize), LAST_XMIPP_INDEX(mysize), LAST_XMIPP_INDEX(mysize), LAST_XMIPP_INDEX(mysize)); } } void getImageContrast(MultidimArray &image, RFLOAT &minval, RFLOAT &maxval, RFLOAT &sigma_contrast) { // First check whether to apply sigma-contrast, i.e. set minval and maxval to the mean +/- sigma_contrast times the stddev bool redo_minmax = (sigma_contrast > 0. || minval != maxval); if (sigma_contrast > 0. || minval == maxval) { RFLOAT avg, stddev; image.computeStats(avg, stddev, minval, maxval); if (sigma_contrast > 0.) { minval = avg - sigma_contrast * stddev; maxval = avg + sigma_contrast * stddev; redo_minmax = true; } } if (redo_minmax) { FOR_ALL_DIRECT_ELEMENTS_IN_MULTIDIMARRAY(image) { RFLOAT val = DIRECT_MULTIDIM_ELEM(image, n); if (val > maxval) DIRECT_MULTIDIM_ELEM(image, n) = maxval; else if (val < minval) DIRECT_MULTIDIM_ELEM(image, n) = minval; } } } relion-3.1.3/src/image.h000066400000000000000000001221201411340063500150230ustar00rootroot00000000000000/*************************************************************************** * * Author: "Sjors H.W. Scheres", "Takanori Nakane" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ /*************************************************************************** * * Authors: Sjors H.W. Scheres (scheres@cnb.csic.es) * * Unidad de Bioinformatica of Centro Nacional de Biotecnologia , CSIC * * Part of this module has been developed by Lorenzo Zampighi and Nelson Tang * Dept. Physiology of the David Geffen School of Medicine * Univ. of California, Los Angeles. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA * 02111-1307 USA * * All comments concerning this program package may be sent to the * e-mail address 'xmipp@cnb.csic.es' ***************************************************************************/ #ifndef IMAGE_H #define IMAGE_H #include #include #include #include #include #include #include "src/funcs.h" #include "src/memory.h" #include "src/filename.h" #include "src/multidim_array.h" #include "src/transformations.h" #include "src/metadata_table.h" #include "src/fftw.h" /// @defgroup Images Images //@{ /** Data type. * This class defines the datatype of the data inside this image. */ typedef enum { Unknown_Type = 0, // Undefined data type UChar = 1, // Unsigned character or byte type SChar = 2, // Signed character (for CCP4) UShort = 3, // Unsigned integer (2-byte) Short = 4, // Signed integer (2-byte) UInt = 5, // Unsigned integer (4-byte) Int = 6, // Signed integer (4-byte) Long = 7, // Signed integer (4 or 8 byte, depending on system) Float = 8, // Floating point (4-byte) Double = 9, // Double precision floating point (8-byte) Boolean = 10, // Boolean (1-byte?) UHalf = 11, // Signed 4-bit integer (SerialEM extension) LastEntry = 15 // This must be the last entry } DataType; /** Write mode * This class defines the writing behavior. */ typedef enum { WRITE_OVERWRITE, //forget about the old file and overwrite it WRITE_APPEND, //append and object at the end of a stack, so far can not append stacks WRITE_REPLACE, //replace a particular object by another WRITE_READONLY //only can read the file } WriteMode; extern "C" { typedef struct TiffInMemory { unsigned char *buf; tsize_t size; toff_t pos; } TiffInMemory; static tsize_t TiffInMemoryReadProc(thandle_t handle, tdata_t buf, tsize_t read_size) { TiffInMemory *tiff_handle = (TiffInMemory*)handle; #ifdef TIFF_DEBUG std::cout << "TiffInMemoryReadProc: read_size = " << read_size << " cur_pos = " << tiff_handle->pos << " buf_size = " << tiff_handle->size << std::endl; #endif if (tiff_handle->pos + read_size >= tiff_handle->size) REPORT_ERROR("TiffInMemoryReadProc: seeking beyond the end of the buffer."); memcpy(buf, tiff_handle->buf + tiff_handle->pos, read_size); tiff_handle->pos += read_size; return read_size; } static tsize_t TiffInMemoryWriteProc(thandle_t handle, tdata_t buf, tsize_t write_size) { #ifdef TIFF_DEBUG REPORT_ERROR("TiffInMemoryWriteProc: Not implemented."); #endif return -1; } static toff_t TiffInMemorySeekProc(thandle_t handle, toff_t offset, int whence) { TiffInMemory *tiff_handle = (TiffInMemory*)handle; #ifdef TIFF_DEBUG std::cout << "TiffInMemorySeekProc: offset = " << offset << " cur_pos = " << tiff_handle->pos << " buf_size = " << tiff_handle->size << std::endl; #endif switch (whence) { case SEEK_SET: tiff_handle->pos = 0; break; case SEEK_CUR: tiff_handle->pos += offset; break; case SEEK_END: REPORT_ERROR("TIFFInMemorySeekProc: SEEK_END is not supported."); // break; // intentional to suppress compiler warnings. } if (tiff_handle->pos >= tiff_handle->size) REPORT_ERROR("TIFFInMemorySeekProc: seeking beyond the end of the buffer."); return 0; } static int TiffInMemoryCloseProc(thandle_t handle) { #ifdef TIFF_DEBUG std::cout << "TiffInMemoryCloseProc" << std::endl; #endif return 0; } static toff_t TiffInMemorySizeProc(thandle_t handle) { #ifdef TIFF_DEBUG std::cout << "TiffInMemorySizeProc" << std::endl; #endif return ((TiffInMemory*)handle)->size; } static int TiffInMemoryMapFileProc(thandle_t handle, tdata_t *base, toff_t *size) { TiffInMemory *tiff_handle = (TiffInMemory*)handle; #ifdef TIFF_DEBUG std::cout << "TiffInMemoryMapFileProc" << std::endl; #endif *base = tiff_handle->buf; *size = tiff_handle->size; return 1; } static void TiffInMemoryUnmapFileProc(thandle_t handle, tdata_t base, toff_t size) { #ifdef TIFF_DEBUG std::cout << "TiffInMemoryUnmapFileProc" << std::endl; #endif return; } } /** File handler class * This struct is used to share the File handlers with Image Collection class */ class fImageHandler { public: FILE* fimg; // Image File handler FILE* fhed; // Image File header handler TIFF* ftiff; FileName ext_name; // Filename extension bool exist; // Shows if the file exists bool isTiff; // Shows if this is a TIFF file /** Empty constructor */ fImageHandler() { fimg=NULL; fhed=NULL; ftiff=NULL; ext_name=""; exist=false; isTiff=false; } /** Destructor: closes file (if it still open) */ ~fImageHandler() { closeFile(); } void openFile(const FileName &name, int mode = WRITE_READONLY) { // Close any file that was left open in this handler if (!(fimg ==NULL && fhed == NULL)) closeFile(); FileName fileName, headName = ""; // get the format, checking for possible format specifier before suffix // getFileFormat("file.spi") will return "spi" // getFileFormat("file.spi:mrc") will return "mrc" // getFileFormat("file") will return "" ext_name = name.getFileFormat(); long int dump; name.decompose(dump, fileName); // Subtract 1 to have numbering 0...N-1 instead of 1...N if (dump > 0) dump--; // create the filename from a possible input format specifier (file.spi:mrc means "it's called .spi, but it's really a .mrc") // file.spi:mrc -> file.spi fileName = fileName.removeFileFormat(); size_t found = fileName.find_first_of("%"); if (found!=std::string::npos) fileName = fileName.substr(0, found) ; exist = exists(fileName); std::string wmChar; switch (mode) { case WRITE_READONLY: if (!exist) REPORT_ERROR((std::string) "Cannot read file " + fileName + " It does not exist" ); wmChar = "r"; break; case WRITE_OVERWRITE: wmChar = "w"; break; case WRITE_APPEND: if (exist) wmChar = "r+"; else wmChar = "w+"; break; case WRITE_REPLACE: wmChar = "r+"; break; } if (ext_name.contains("img") || ext_name.contains("hed")) { fileName = fileName.withoutExtension(); headName = fileName.addExtension("hed"); fileName = fileName.addExtension("img"); } else if(ext_name=="") { ext_name="spi"; // SPIDER is default format if none is specified fileName = fileName.addExtension(ext_name); } isTiff = ext_name.contains("tif"); if (isTiff && mode != WRITE_READONLY) REPORT_ERROR((std::string)"TIFF is supported only for reading"); // Open image file if ((!isTiff && ((fimg = fopen(fileName.c_str(), wmChar.c_str())) == NULL)) || (isTiff && ((ftiff = TIFFOpen(fileName.c_str(), "r")) == NULL)) ) REPORT_ERROR((std::string)"Image::openFile cannot open: " + name); if (headName != "") { if ((fhed = fopen(headName.c_str(), wmChar.c_str())) == NULL) REPORT_ERROR((std::string)"Image::openFile cannot open: " + headName); } else fhed = NULL; } void closeFile() { ext_name=""; exist=false; // Check whether the file was closed already if (fimg == NULL && fhed == NULL && ftiff == NULL) return; if (isTiff && ftiff != NULL) { TIFFClose(ftiff); ftiff = NULL; } if (!isTiff && fclose(fimg) != 0) REPORT_ERROR((std::string)"Can not close image file "); else fimg = NULL; if (fhed != NULL && fclose(fhed) != 0) REPORT_ERROR((std::string)"Can not close header file "); else fhed = NULL; } }; /** Returns memory size of datatype */ unsigned long gettypesize(DataType type); /** Convert datatype string to datatypr enun */ int datatypeString2Int(std::string s); /** Swapping trigger. * Threshold file z size above which bytes are swapped. */ #define SWAPTRIG 65535 /** Template class for images. * The image class is the general image handling class. */ template class Image { public: MultidimArray data; // The image data array MetaDataTable MDMainHeader; // metadata for the file private: FileName filename; // File name FILE* fimg; // Image File handler FILE* fhed; // Image File header handler bool stayOpen; // To maintain the image file open after read/write int dataflag; // Flag to force reading of the data unsigned long i; // Current image number (may be > NSIZE) unsigned long offset; // Data offset int swap; // Perform byte swapping upon reading long int replaceNsize; // Stack size in the replace case bool _exists; // does target file exists? // equal 0 is not exists or not a stack bool mmapOn; // Mapping when loading from file int mFd; // Handle the file in reading method and mmap size_t mappedSize; // Size of the mapped file public: /** Empty constructor * * An empty image is created. * * @code * Image I; * @endcode */ Image() { mmapOn = false; clear(); MDMainHeader.addObject(); } /** Constructor with size * * A blank image (0.0 filled) is created with the given size. Pay attention * to the dimension order: Y and then X. * * @code * Image I(64,64); * @endcode */ Image(long int Xdim, long int Ydim, long int Zdim=1, long int Ndim=1) { mmapOn = false; clear(); data.resize(Ndim, Zdim, Ydim, Xdim); MDMainHeader.addObject(); } /** Clear. * Initialize everything to 0 */ void clear() { if (mmapOn) { munmap(data.data-offset,mappedSize); close(mFd); data.data = NULL; } else data.clear(); dataflag = -1; i = 0; filename = ""; offset = 0; swap = 0; clearHeader(); replaceNsize=0; mmapOn = false; } /** Clear the header of the image */ void clearHeader() { MDMainHeader.clear(); } /** Destructor. */ ~Image() { clear(); } /** Specific read functions for different file formats */ #include "src/rwSPIDER.h" #include "src/rwMRC.h" #include "src/rwIMAGIC.h" #include "src/rwTIFF.h" /** Is this file an image * * Check whether a real-space image can be read * */ bool isImage(const FileName &name) { return !read(name, false); } /** Rename the image */ void rename (const FileName &name) { filename = name; } /** General read function * you can read a single image from a single image file * or a single image file from an stack, in the second case * the select slide may come in the image name or in the select_img parameter * file name takes precedence over select_img * If -1 is given the whole object is read * The number before @ in the filename is 1-indexed, while select_img is 0-indexed. */ int read(const FileName &name, bool readdata=true, long int select_img=-1, bool mapData = false, bool is_2D = false) { if (name == "") REPORT_ERROR("ERROR: trying to read image with empty file name!"); int err = 0; fImageHandler hFile; hFile.openFile(name); err = _read(name, hFile, readdata, select_img, mapData, is_2D); // the destructor of fImageHandler will close the file // Negative errors are bad return err; } /** Read function from a file that has already been opened * */ int readFromOpenFile(const FileName &name, fImageHandler &hFile, long int select_img, bool is_2D = false) { int err = 0; err = _read(name, hFile, true, select_img, false, is_2D); // Reposition file pointer for a next read rewind(fimg); return err; } /** General write function * select_img= which slice should I replace * overwrite = 0, append slice * overwrite = 1 overwrite slice * * NOTE: * select_img has higher priority than the number before "@" in the name. * select_img counts from 0, while the number before "@" in the name from 1! */ void write(FileName name="", long int select_img=-1, bool isStack=false, int mode=WRITE_OVERWRITE) { const FileName &fname = (name == "") ? filename : name; fImageHandler hFile; hFile.openFile(name, mode); _write(fname, hFile, select_img, isStack, mode); // the destructor of fImageHandler will close the file } /** Cast a page of data from type dataType to type Tdest * input pointer char * */ void castPage2T(char *page, T *ptrDest, DataType datatype, size_t pageSize ) { switch (datatype) { case Unknown_Type: REPORT_ERROR("ERROR: datatype is Unknown_Type"); case UChar: { if (typeid(T) == typeid(unsigned char)) memcpy(ptrDest, page, pageSize * sizeof(T)); else { unsigned char *ptr = (unsigned char *)page; for (size_t i = 0; i < pageSize; i++) ptrDest[i] = (T)ptr[i]; } break; } case SChar: { if (typeid(T) == typeid(signed char)) { memcpy(ptrDest, page, pageSize * sizeof(T)); } else { signed char *ptr = (signed char *)page; for (size_t i = 0; i < pageSize; i++) ptrDest[i] = (T)ptr[i]; } break; } case UShort: { if (typeid(T) == typeid(unsigned short)) { memcpy(ptrDest, page, pageSize * sizeof(T)); } else { unsigned short *ptr = (unsigned short *)page; for(size_t i = 0; i < pageSize; i++) ptrDest[i] = (T)ptr[i]; } break; } case Short: { if (typeid(T) == typeid(short)) { memcpy(ptrDest, page, pageSize * sizeof(T)); } else { short *ptr = (short *)page; for(size_t i = 0; i < pageSize; i++) ptrDest[i] = (T)ptr[i]; } break; } case UInt: { if (typeid(T) == typeid(unsigned int)) { memcpy(ptrDest, page, pageSize * sizeof(T)); } else { unsigned int *ptr = (unsigned int *)page; for(size_t i = 0; i < pageSize; i++) ptrDest[i] = (T)ptr[i]; } break; } case Int: { if (typeid(T) == typeid(int)) { memcpy(ptrDest, page, pageSize * sizeof(T)); } else { int *ptr = (int *)page; for(size_t i = 0; i < pageSize; i++) ptrDest[i] = (T)ptr[i]; } break; } case Long: { if (typeid(T) == typeid(long)) { memcpy(ptrDest, page, pageSize * sizeof(T)); } else { long *ptr = (long *)page; for(size_t i = 0; i < pageSize; i++) ptrDest[i] = (T)ptr[i]; } break; } case Float: { if (typeid(T) == typeid(float)) { memcpy(ptrDest, page, pageSize * sizeof(T)); } else { float *ptr = (float *)page; for(size_t i = 0; i < pageSize; i++) ptrDest[i] = (T)ptr[i]; } break; } case Double: { if (typeid(T) == typeid(RFLOAT)) { memcpy(ptrDest, page, pageSize * sizeof(T)); } else { RFLOAT *ptr = (RFLOAT *)page; for(size_t i = 0; i < pageSize; i++) ptrDest[i] = (T)ptr[i]; } break; } case UHalf: { if (pageSize % 2 != 0) REPORT_ERROR("Logic error in castPage2T; for UHalf, pageSize must be even."); for(size_t i = 0, ilim = pageSize / 2; i < ilim; i++) { // Here we are assuming the fill-order is LSB2MSB according to IMOD's // iiProcessReadLine() in libiimod/mrcsec.c. // The default fill-order in the TIFF specification is MSB2LSB // but IMOD assumes LSB2MSB even for TIFF. // See IMOD's iiTIFFCheck() in libiimod/iitif.c. ptrDest[i * 2 ] = (T)(page[i] & 15); // 1111 = 1+2+4+8 = 15 ptrDest[i * 2 + 1] = (T)((page[i] >> 4) & 15); } break; } default: { std::cerr<<"Datatype= "< (iTemp); } /** Write an entire page as datatype * * A page of datasize_n elements T is cast to datatype and written to fimg * The memory for the casted page is allocated and freed internally. */ void writePageAsDatatype(FILE * fimg, DataType datatype, size_t datasize_n ) { size_t datasize = datasize_n * gettypesize(datatype); char * fdata = (char *) askMemory(datasize); castPage2Datatype(MULTIDIM_ARRAY(data), fdata, datatype, datasize_n); fwrite( fdata, datasize, 1, fimg ); freeMemory(fdata, datasize); } /** Swap an entire page * input pointer char * */ void swapPage(char * page, size_t pageNrElements, DataType datatype) { unsigned long datatypesize = gettypesize(datatype); #ifdef DEBUG std::cerr<<"DEBUG swapPage: Swapping image data with swap= " << swap<<" datatypesize= "< 1 ) { for (unsigned long i=0; i 1 ) { REPORT_ERROR("Image Class::ReadData: mmap with multiple \ images file not compatible. Try selecting a unique image."); } fclose(fimg); //if ( ( mFd = open(filename.c_str(), O_RDWR, S_IREAD | S_IWRITE) ) == -1 ) if ( ( mFd = open(filename.c_str(), O_RDWR, S_IRUSR | S_IWUSR) ) == -1 ) REPORT_ERROR("Image Class::ReadData: Error opening the image file."); char * map; mappedSize = pagesize+offset; if ( (map = (char*) mmap(0,mappedSize, PROT_READ | PROT_WRITE, MAP_SHARED, mFd, 0)) == (void*) -1 ) REPORT_ERROR("Image Class::ReadData: mmap of image file failed."); data.data = reinterpret_cast (map+offset); } else { // Reset select to get the correct offset if ( select_img < 0 ) select_img = 0; char* page = NULL; // Allocate memory for image data (Assume xdim, ydim, zdim and ndim are already set // if memory already allocated use it (no resize allowed) data.coreAllocateReuse(); myoffset = offset + select_img*(pagesize + pad); //#define DEBUG #ifdef DEBUG data.printShape(); printf("DEBUG: Page size: %ld offset= %d \n", pagesize, offset); printf("DEBUG: Swap = %d Pad = %ld Offset = %ld\n", swap, pad, offset); printf("DEBUG: myoffset = %d select_img= %d \n", myoffset, select_img); #endif if (pagesize > pagemax) page = (char *) askMemory(pagemax*sizeof(char)); else page = (char *) askMemory(pagesize*sizeof(char)); // Because we requested XYSIZE to be even for UHalf, this is always safe. int error_fseek = fseek(fimg, myoffset, SEEK_SET); if (error_fseek != 0) return -1; for (size_t myn=0; myn pagemax ) readsize = pagemax; if (datatype == UHalf) readsize_n = readsize * 2; else readsize_n = readsize/datatypesize; #ifdef DEBUG std::cout << "NX = " << XSIZE(data) << " NY = " << YSIZE(data) << " NZ = " << ZSIZE(data) << std::endl; std::cout << "pagemax = " << pagemax << " pagesize = " << pagesize << " readsize = " << readsize << " readsize_n = " << readsize_n << std::endl; #endif //Read page from disc size_t result = fread( page, readsize, 1, fimg ); if (result != 1) return -2; //swap per page if (swap) swapPage(page, readsize, datatype); // cast to T per page castPage2T(page, MULTIDIM_ARRAY(data) + haveread_n, datatype, readsize_n); haveread_n += readsize_n; } if ( pad > 0 ) { //fread( padpage, pad, 1, fimg); error_fseek = fseek( fimg, pad, SEEK_CUR ); if (error_fseek != 0) return -1; } } //if ( pad > 0 ) // freeMemory(padpage, pad*sizeof(char)); if ( page != NULL ) freeMemory(page, pagesize*sizeof(char)); #ifdef DEBUG printf("DEBUG img_read_data: Finished reading and converting data\n"); #endif } return 0; } /** Data access * * This operator can be used to access the data multidimarray. * In this way we could resize an image just by * resizing its associated matrix or we could add two images by adding their * matrices. * @code * I().resize(128, 128); * I2() = I1() + I2(); * @endcode */ MultidimArray& operator()() { return data; } const MultidimArray& operator()() const { return data; } /** Pixel access * * This operator is used to access a pixel within a 2D image. This is a * logical access, so you could access to negative positions if the image * has been defined so (see the general explanation for the class). * * @code * std::cout << "Grey level of pixel (-3,-3) of the image = " << I(-3, -3) * << std::endl; * * I(-3, -3) = I(-3, -2); * @endcode */ T& operator()(int i, int j) const { return A2D_ELEM(data, i, j); } /** Set pixel * (direct access) needed by swig */ void setPixel(int i, int j, T v) { IMGPIXEL(*this,i,j)=v; } /** Get pixel * (direct acces) needed by swig */ T getPixel(int i, int j) const { return IMGPIXEL(*this,i,j); } /** Voxel access * * This operator is used to access a voxel within a 3D image. This is a * logical access, so you could access to negative positions if the image * has been defined so (see the general explanation for the class). * * @code * std::cout << "Grey level of pixel (-3,-3, 1) of the volume = " << I(-3, -3, 1) * << std::endl; * * I(-3, -3, 1) = I(-3, -2, 0); * @endcode */ T& operator()(int k, int i, int j) const { return A3D_ELEM(data, k, i, j); } /** Get file name * * @code * std::cout << "Image name = " << I.name() << std::endl; * @endcode */ const FileName & name() const { return filename; } /** Get Image dimensions */ void getDimensions(int &Xdim, int &Ydim, int &Zdim, long int &Ndim) const { Xdim = XSIZE(data); Ydim = YSIZE(data); Zdim = ZSIZE(data); Ndim = NSIZE(data); } long unsigned int getSize() const { return NZYXSIZE(data); } /* Is there label in the main header */ bool mainContainsLabel(EMDLabel label) const { return MDMainHeader.containsLabel(label); } /** Data type * * @code * std::cout << "datatype= " << dataType() << std::endl; * @endcode */ int dataType() const { int dummy; MDMainHeader.getValue(EMDL_IMAGE_DATATYPE, dummy); return dummy; } /** Sampling RateX * * @code * std::cout << "sampling= " << samplingRateX() << std::endl; * @endcode */ RFLOAT samplingRateX(const long int n = 0) const { RFLOAT dummy = 1.; MDMainHeader.getValue(EMDL_IMAGE_SAMPLINGRATE_X, dummy); return dummy; } /** Sampling RateY * * @code * std::cout << "sampling= " << samplingRateY() << std::endl; * @endcode */ RFLOAT samplingRateY(const long int n = 0) const { RFLOAT dummy = 1.; MDMainHeader.getValue(EMDL_IMAGE_SAMPLINGRATE_Y, dummy); return dummy; } /** Set file name */ void setName(const FileName &_filename) { filename = _filename; } /* Set image statistics in the main header * */ void setStatisticsInHeader() { RFLOAT avg,stddev,minval,maxval; data.computeStats(avg, stddev, minval, maxval); MDMainHeader.setValue(EMDL_IMAGE_STATS_AVG, avg); MDMainHeader.setValue(EMDL_IMAGE_STATS_STDDEV, stddev); MDMainHeader.setValue(EMDL_IMAGE_STATS_MIN, minval); MDMainHeader.setValue(EMDL_IMAGE_STATS_MAX, maxval); } void setSamplingRateInHeader(RFLOAT rate_x, RFLOAT rate_y = -1., RFLOAT rate_z = -1.) { MDMainHeader.setValue(EMDL_IMAGE_SAMPLINGRATE_X, rate_x); if (rate_y < 0.) rate_y = rate_x; MDMainHeader.setValue(EMDL_IMAGE_SAMPLINGRATE_Y, rate_y); if (ZSIZE(data)>1) { if (rate_z < 0.) rate_z = rate_x; MDMainHeader.setValue(EMDL_IMAGE_SAMPLINGRATE_Z, rate_z); } } /** Show image properties */ friend std::ostream& operator<<(std::ostream& o, const Image& I) { o << "Image type : "; o << "Real-space image" << std::endl; o << "Reversed : "; if (I.swap) o << "TRUE" << std::endl; else o << "FALSE" << std::endl; o << "Data type : "; switch (I.dataType()) { case Unknown_Type: o << "Undefined data type"; break; case UChar: o << "Unsigned character or byte type"; break; case SChar: o << "Signed character (for CCP4)"; break; case UShort: o << "Unsigned integer (2-byte)"; break; case Short: o << "Signed integer (2-byte)"; break; case UInt: o << "Unsigned integer (4-byte)"; break; case Int: o << "Signed integer (4-byte)"; break; case Long: o << "Signed integer (4 or 8 byte, depending on system)"; break; case Float: o << "Floating point (4-byte)"; break; case Double: o << "Double precision floating point (8-byte)"; break; case Boolean: o << "Boolean (1-byte?)"; break; case UHalf: o << "4-bit integer"; break; } o << std::endl; o << "dimensions : " << NSIZE(I()) << " x " << ZSIZE(I()) << " x " << YSIZE(I()) << " x " << XSIZE(I()); o << " (noObjects x slices x rows x columns)" << std::endl; return o; } /** Sum this object with other file and keep in this object */ void sumWithFile(const FileName &fn) { Image aux; aux.read(fn); (*this)()+=aux(); } int readTiffInMemory(void* buf, size_t size, bool readdata=true, long int select_img = -1, bool mapData = false, bool is_2D = false) { int err = 0; TiffInMemory handle; handle.buf = (unsigned char*)buf; handle.size = size; handle.pos = 0; // Check whether to read the data or only the header dataflag = ( readdata ) ? 1 : -1; // Check whether to map the data or not mmapOn = mapData; //Just clear the header before reading MDMainHeader.clear(); MDMainHeader.addObject(); TIFF* ftiff = TIFFClientOpen("in-memory-tiff", "r", (thandle_t)&handle, TiffInMemoryReadProc, TiffInMemoryWriteProc, TiffInMemorySeekProc, TiffInMemoryCloseProc, TiffInMemorySizeProc, TiffInMemoryMapFileProc, TiffInMemoryUnmapFileProc); err = readTIFF(ftiff, select_img, readdata, true, "in-memory-tiff"); TIFFClose(ftiff); return err; } private: int _read(const FileName &name, fImageHandler &hFile, bool readdata=true, long int select_img = -1, bool mapData = false, bool is_2D = false) { int err = 0; // Check whether to read the data or only the header dataflag = ( readdata ) ? 1 : -1; // Check whether to map the data or not mmapOn = mapData; FileName ext_name = hFile.ext_name; fimg = hFile.fimg; fhed = hFile.fhed; long int dump; name.decompose(dump, filename); // Subtract 1 to have numbering 0...N-1 instead of 1...N if (dump > 0) dump--; filename = name; if (select_img == -1) select_img = dump; #undef DEBUG //#define DEBUG #ifdef DEBUG std::cerr << "READ\n" << "name="<= 0 && ext_name.contains("mrc")) REPORT_ERROR("Image::read ERROR: stacks of images in MRC-format should have extension .mrcs; .mrc extensions are reserved for 3D maps."); else if (ext_name.contains("mrc")) // mrc 3D map err = readMRC(select_img, false, name); else if (ext_name.contains("img") || ext_name.contains("hed"))// err = readIMAGIC(select_img);//imagic is always an stack else if (ext_name.contains("dm")) REPORT_ERROR("The Digital Micrograph format (DM3, DM4) is not supported. You can convert it to MRC by other programs, for example, dm2mrc in IMOD."); else if (ext_name.contains("eer") || ext_name.contains("ecc")) REPORT_ERROR("BUG: EER movies should be handled by EERRenderer, not by Image."); else err = readSPIDER(select_img); // Negative errors are bad. return err; } void _write(const FileName &name, fImageHandler &hFile, long int select_img=-1, bool isStack=false, int mode=WRITE_OVERWRITE) { int err = 0; FileName ext_name = hFile.ext_name; fimg = hFile.fimg; fhed = hFile.fhed; _exists = hFile.exist; filename = name; long int aux; FileName filNamePlusExt(name); name.decompose(aux, filNamePlusExt); // Subtract 1 to have numbering 0...N-1 instead of 1...N if (aux > 0) aux--; if (select_img == -1) select_img = aux; size_t found = filNamePlusExt.find_first_of("%"); std::string imParam = ""; if (found!=std::string::npos) { imParam = filNamePlusExt.substr(found+1).c_str(); filNamePlusExt = filNamePlusExt.substr(0, found) ; } found = filNamePlusExt.find_first_of(":"); if ( found!=std::string::npos) filNamePlusExt = filNamePlusExt.substr(0, found); //#define DEBUG #ifdef DEBUG std::cerr << "write" <getDimensions(Xdim,Ydim, Zdim, Ndim); Image auxI; replaceNsize=0;//reset replaceNsize in case image is reused if(select_img == -1 && mode == WRITE_REPLACE) REPORT_ERROR("write: Please specify object to be replaced"); else if(!_exists && mode == WRITE_REPLACE) { std:: stringstream replace_number; replace_number << select_img; REPORT_ERROR((std::string)"Cannot replace object number: " + replace_number.str() + " in file " +filename + ". It does not exist"); } else if (_exists && (mode == WRITE_REPLACE || mode == WRITE_APPEND)) { auxI.dataflag = -2; auxI.read(filNamePlusExt,false); int _Xdim, _Ydim, _Zdim; long int _Ndim; auxI.getDimensions(_Xdim,_Ydim, _Zdim, _Ndim); replaceNsize=_Ndim; if(Xdim!=_Xdim || Ydim!=_Ydim || Zdim!=_Zdim ) REPORT_ERROR("write: target and source objects have different size"); if(mode==WRITE_REPLACE && select_img>_Ndim) REPORT_ERROR("write: cannot replace image stack is not large enough"); if(auxI.replaceNsize <1 && (mode==WRITE_REPLACE || mode==WRITE_APPEND)) REPORT_ERROR("write: output file is not an stack"); } else if(!_exists && mode==WRITE_APPEND) { ; } else if (mode == WRITE_READONLY)//If new file we are in the WRITE_OVERWRITE mode { REPORT_ERROR( (std::string) "File " + name + " opened in read-only mode. Cannot write."); } /* * SELECT FORMAT */ if(ext_name.contains("spi") || ext_name.contains("xmp") || ext_name.contains("stk") || ext_name.contains("vol")) err = writeSPIDER(select_img,isStack,mode); else if (ext_name.contains("mrcs")) writeMRC(select_img,true,mode); else if (ext_name.contains("mrc")) writeMRC(select_img,false,mode); else if (ext_name.contains("img") || ext_name.contains("hed")) writeIMAGIC(select_img,mode); else err = writeSPIDER(select_img,isStack,mode); if ( err < 0 ) { std::cerr << " Filename = " << filename << " Extension= " << ext_name << std::endl; REPORT_ERROR((std::string)"Error writing file "+ filename + " Extension= " + ext_name); } /* If initially the file did not exist, once the first image is written, then the file exists */ if (!_exists) hFile.exist = _exists = true; } }; // Some image-specific operations // For image normalisation void normalise(Image &I, int bg_radius, RFLOAT white_dust_stddev, RFLOAT black_dust_stddev, bool do_ramp, bool is_helical_segment = false, RFLOAT helical_mask_tube_outer_radius_pix = -1., RFLOAT tilt_deg = 0., RFLOAT psi_deg = 0.); void calculateBackgroundAvgStddev(Image &I, RFLOAT &avg, RFLOAT &stddev, int bg_radius, bool is_helical_segment = false, RFLOAT helical_mask_tube_outer_radius_pix = -1., RFLOAT tilt_deg = 0., RFLOAT psi_deg = 0.); void subtractBackgroundRamp(Image &I, int bg_radius, bool is_helical_segment = false, RFLOAT helical_mask_tube_outer_radius_pix = -1., RFLOAT tilt_deg = 0., RFLOAT psi_deg = 0.); // For dust removal void removeDust(Image &I, bool is_white, RFLOAT thresh, RFLOAT avg, RFLOAT stddev); // for contrast inversion void invert_contrast(Image &I); // for image re-scaling void rescale(Image &I, int mysize); // for image re-windowing void rewindow(Image &I, int mysize); /// @defgroup ImageFormats Image Formats /// @ingroup Images // Functions belonging to this topic are commented in rw*.h //@} #define GREYSCALE 0 #define BLACKGREYREDSCALE 1 #define BLUEGREYWHITESCALE 2 #define BLUEGREYREDSCALE 3 #define RAINBOWSCALE 4 #define CYANBLACKYELLOWSCALE 5 void getImageContrast(MultidimArray &image, RFLOAT &minval, RFLOAT &maxval, RFLOAT &sigma_contrast); inline void greyToRGB(const int color_scheme, const unsigned char grey, unsigned char &red, unsigned char &green, unsigned char &blue) { switch (color_scheme) { case GREYSCALE: red = green = blue = grey; break; case BLACKGREYREDSCALE: if (grey >= 128) { red = 255; blue = green = FLOOR((RFLOAT)(255 - grey)*2); } else { red = green = blue = FLOOR((RFLOAT)(grey*2.)); } break; case BLUEGREYWHITESCALE: if (grey >= 128) { red = green = blue = FLOOR((RFLOAT)((grey - 128) * 2)); } else { red = 0; blue = green = FLOOR((RFLOAT)(255 - 2 * grey)); } break; case BLUEGREYREDSCALE: { const RFLOAT a = grey / 85.0; // group const int X = FLOOR(a); //this is the integer part const unsigned char Y = FLOOR(255 * (a - X)); //fractional part from 0 to 255 switch(X) { case 0: red = 0; green = 255-Y; blue = 255 - Y; break; case 1: red = Y; green = Y; blue = Y; break; case 2: red = 255; green = 255-Y; blue = 255 - Y; break; case 3: red = 255; green = 0; blue = 0; break; } break; } case RAINBOWSCALE: { const RFLOAT a = (255 - grey) / 64.; //invert and group const int X = FLOOR(a); const unsigned char Y = FLOOR(255 * (a - X)); //fractional part from 0 to 255 switch(X) { case 0: red = 255; green = Y; blue = 0; break; case 1: red = 255 - Y; green = 255; blue = 0; break; case 2: red = 0; green = 255; blue = Y; break; case 3: red = 0; green = 255-Y; blue = 255; break; case 4: red = 0; green = 0; blue = 255; break; } break; } case CYANBLACKYELLOWSCALE: { const RFLOAT d_rb = 3 * (grey - 128); const RFLOAT d_g = 3 * (std::abs(grey - 128) - 42); red = (unsigned char)(FLOOR(XMIPP_MIN(255., XMIPP_MAX(0.0, d_rb)))); green = (unsigned char)(FLOOR(XMIPP_MIN(255., XMIPP_MAX(0.0, d_g)))); blue = (unsigned char)(FLOOR(XMIPP_MIN(255., XMIPP_MAX(0.0, -d_rb)))); break; } } return; } #endif relion-3.1.3/src/jaz/000077500000000000000000000000001411340063500143565ustar00rootroot00000000000000relion-3.1.3/src/jaz/Fourier_helper.cpp000066400000000000000000000043511411340063500200370ustar00rootroot00000000000000/*************************************************************************** * * Author: "Jasenko Zivanov" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #include void FourierHelper::FourierShift2D(MultidimArray& img, RFLOAT xshift, RFLOAT yshift) { const long w = img.xdim; const long h = img.ydim; xshift /= h; yshift /= h; if (ABS(xshift) < XMIPP_EQUAL_ACCURACY && ABS(yshift) < XMIPP_EQUAL_ACCURACY) { return; } for (long int yy = 0; yy < h; yy++) for (long int xx = 0; xx < w; xx++) { RFLOAT x = xx; RFLOAT y = yy < w? yy : yy - h; RFLOAT dotp = -2.0 * PI * (x * xshift + y * yshift); RFLOAT a, b; #ifdef RELION_SINGLE_PRECISION SINCOSF(dotp, &b, &a); #else SINCOS(dotp, &b, &a); #endif RFLOAT c = DIRECT_A2D_ELEM(img, yy, xx).real; RFLOAT d = DIRECT_A2D_ELEM(img, yy, xx).imag; RFLOAT ac = a * c; RFLOAT bd = b * d; RFLOAT ab_cd = (a + b) * (c + d); DIRECT_A2D_ELEM(img, yy, xx) = Complex(ac - bd, ab_cd - ac - bd); } } void FourierHelper::FourierShift2D(MultidimArray &img, RFLOAT xshift, RFLOAT yshift) { FourierTransformer ft; MultidimArray imgC; ft.FourierTransform(img, imgC); //FourierShift2D(imgC, xshift, yshift); shiftImageInFourierTransform(imgC, imgC, img.ydim, xshift, yshift); ft.inverseFourierTransform(imgC, img); } relion-3.1.3/src/jaz/Fourier_helper.h000066400000000000000000000023601411340063500175020ustar00rootroot00000000000000/*************************************************************************** * * Author: "Jasenko Zivanov" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #ifndef FOURIER_HELPER_H #define FOURIER_HELPER_H #include #include class FourierHelper { public: static void FourierShift2D(MultidimArray& img, RFLOAT xshift, RFLOAT yshift); static void FourierShift2D(MultidimArray& img, RFLOAT xshift, RFLOAT yshift); }; #endif relion-3.1.3/src/jaz/Gaussian_pyramid.cpp000066400000000000000000000016411411340063500203630ustar00rootroot00000000000000/*************************************************************************** * * Author: "Jasenko Zivanov" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ relion-3.1.3/src/jaz/Gaussian_pyramid.h000066400000000000000000000202471411340063500200330ustar00rootroot00000000000000/*************************************************************************** * * Author: "Jasenko Zivanov" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #ifndef GAUSSIAN_PYRAMID_H #define GAUSSIAN_PYRAMID_H #include #include #include #include #include #include #include template class GaussianPyramid { public: GaussianPyramid(const Image& img, double sigma = 1.0, double cutoff = 2.5, int maxLev = -1); double sigma, cutoff; std::vector> levels; std::vector scales; T value(double x, double y, double s); std::vector> getUpsampled(); Image getUpsampledLevel(int i); Image getInterpolated(double sig); static void test(const Image& img); static void interpolationTest(const Image& img); static void timeTest(const Image& img0); }; template GaussianPyramid::GaussianPyramid(const Image& img, double sigma, double cutoff, int maxLev) : sigma(sigma), cutoff(cutoff), levels(0), scales(0) { int w0 = img.data.xdim; int h0 = img.data.ydim; int w = w0; int h = h0; Image curr = img; double currScale = 1.0; double currL0Var = 0.0; double wLast = w; if (PI * sigma < cutoff) { REPORT_ERROR("GaussianPyramid::GaussianPyramid: unable to shrink image levels - cutoff too large.\n"); } levels.push_back(curr); scales.push_back(currScale); double dScale = PI * sigma / cutoff; int i = 0; if (maxLev < 0) maxLev = floor(log((double)w0)/log(dScale)); while (i < maxLev) { double tgtSigma_l0 = sigma * pow(dScale, i); double dSigma = sqrt(tgtSigma_l0 * tgtSigma_l0 - currL0Var) / currScale; currL0Var += currScale * currScale * dSigma * dSigma; int wNext = (int)(w / currScale + 0.5); double dScaleAct = wLast / wNext; wLast = wNext; currScale *= dScaleAct; Image filt, next; FilterHelper::separableGaussianXY(curr, filt, dSigma, 3*dSigma, true); ResamplingHelper::subsample2D_cubic(filt, dScaleAct, next); levels.push_back(next); scales.push_back(currScale); curr = next; i++; } } template std::vector> GaussianPyramid::getUpsampled() { std::vector> out(levels.size()); out[0] = levels[0]; for (int i = 1; i < levels.size(); i++) { ResamplingHelper::upsample2D_cubic(levels[i], scales[i], out[i], true); } return out; } template Image GaussianPyramid::getUpsampledLevel(int i) { Image out; ResamplingHelper::upsample2D_cubic(levels[i], scales[i], out, true); return out; } template Image GaussianPyramid::getInterpolated(double sig) { Image out(levels[0].data.xdim, levels[0].data.ydim); if (sig <= 0) return levels[0]; else if (sig < sigma) { int k = 3*sig < 1? 1 : 3*sig; FilterHelper::separableGaussianXY(levels[0], out, sig, k, true); } else { double lambda = PI * sigma / cutoff; double l = log(sig/sigma) / log(lambda) + 1.0; int l0 = (int)l; double f = l - l0; if (l0 >= levels.size()-1) return levels[levels.size()-1]; Image img0 = getUpsampledLevel(l0); Image img1 = getUpsampledLevel(l0+1); ImageOp::linearCombination(img0, img1, 1.0 - f, f, out); } return out; } template void GaussianPyramid::test(const Image& img) { int lev = 10; int mp = 5; double k = 2.5; double sg = 1.0; GaussianPyramid gp(img, 1.0, 2.5, lev); std::vector> test(lev*mp), baseline(lev*mp); double lambda = PI*sg/k; for (int i = 0; i < lev; i++) for (int j = 0; j < mp; j++) { double t = (mp*i + j)/(double)(mp); double sig = t < 1.0? sg * t : sg * pow(lambda,t-1); test[mp*i + j] = gp.getInterpolated(sig); FilterHelper::separableGaussianXY(img, baseline[mp*i + j], sig, 3*sig, true); } ImageLog::write(test, "debug/pyr_interpolated"); ImageLog::write(baseline, "debug/pyr_baseline"); } template void GaussianPyramid::interpolationTest(const Image &img) { const double ds = 0.25; for (double s0 = 0.0; s0 < 2.0; s0 += ds) { double s1 = s0 + ds; Image img0, img1, imgP, imgT; FilterHelper::separableGaussianXY(img, img0, s0, 6, true); FilterHelper::separableGaussianXY(img, img1, s1, 6, true); imgT.data.resize(img.data); std::stringstream stss0; stss0 << s0; std::stringstream stss1; stss1 << s1; std::ofstream ofs("debug/interp_"+stss0.str()+"-"+stss1.str()+".dat"); for (double p = s0; p <= s1; p += ds/50.0) { FilterHelper::separableGaussianXY(img, imgP, p, 6, true); double c_min = 1e20; double t_opt = 0.0; for (double t = 0.0; t < 1.0; t += 0.005) { ImageOp::linearCombination(img0, img1, 1.0 - t, t, imgT); double c = FilterHelper::L2distance(imgT, imgP); if (c < c_min) { c_min = c; t_opt = t; } } ofs << p << " " << s0 + t_opt*(s1 - s0) << "\n"; std::cout << p << " between " << s0 << " and " << s1 << ": t = " << t_opt << "\n"; } std::cout << "\n"; } } template void GaussianPyramid::timeTest(const Image& img0) { int lev = 10; int pc = 20; double sig = 5.0; const int s = img0.data.xdim; const int sh = img0.data.xdim/2 + 1; Image img = img0; Image sum(s,s); sum.data.initZeros(); double t0 = omp_get_wtime(); GaussianPyramid gp(img, 1.0, 2.5, lev); for (int p = 0; p < pc-1; p++) { gp = GaussianPyramid(img, 1.0, 2.5, lev); } double t1 = omp_get_wtime(); std::cout << "creating " << pc << " pyramids: " << (t1 - t0) << "s\n"; Image bl; for (int p = 0; p < pc; p++) for (int q = 0; q < pc-1; q++) { bl = gp.getInterpolated(5.0); ImageOp::linearCombination(sum, bl, 1, 1, sum); } ImageLog::write(sum, "debug/sum_pyr"); double t2 = omp_get_wtime(); std::cout << "reading " << pc*(pc-1) << " images: " << (t2 - t1) << "s\n"; std::cout << "both: " << (t2 - t0) << "s\n"; FourierTransformer ft; Image fs(sh,s); Image rs(s,s); double sig_hat = s/(2.0 * PI * sig); double sh2 = sig_hat * sig_hat; sum.data.initZeros(); for (int p = 0; p < pc; p++) for (int q = 0; q < pc-1; q++) { ft.FourierTransform(img(), fs()); for (int y = 0; y < s; y++) for (int x = 0; x < sh; x++) { double yy = y < sh? y : y - s; double xx = x; fs(y,x) *= exp(-0.5*(xx*xx + yy*yy)/sh2); } ft.inverseFourierTransform(fs(), rs()); ImageOp::linearCombination(sum, rs, 1, 1, sum); } ImageLog::write(sum, "debug/sum_fftw"); double t3 = omp_get_wtime(); std::cout << "performing " << pc*(pc-1) << " Fourier convolutions: " << (t3 - t2) << "s\n"; } #endif relion-3.1.3/src/jaz/Gaussian_pyramid.h.bak000066400000000000000000000156171411340063500205740ustar00rootroot00000000000000/*************************************************************************** * * Author: "Jasenko Zivanov" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #ifndef GAUSSIAN_PYRAMID_H #define GAUSSIAN_PYRAMID_H #include #include #include #include #include #include template class GaussianPyramid { public: GaussianPyramid(const Image& img, double sigma = 1.0, double cutoff = 2.5, int maxLev = -1, int initial = 3); double sigma, cutoff; int initial; std::vector> levels; std::vector scales; T value(double x, double y, double s); std::vector> getUpsampled(); Image getUpsampledLevel(int i); Image getInterpolated(double sig); static void test(const Image& img); static void interpolationTest(const Image& img); }; template GaussianPyramid::GaussianPyramid(const Image& img, double sigma, double cutoff, int maxLev, int initial) : sigma(sigma), cutoff(cutoff), initial(initial), levels(0), scales(0) { int w0 = img.data.xdim; int h0 = img.data.ydim; int w = w0; int h = h0; Image curr = img; double currScale = 1.0; double currL0Var = 0.0; double wLast = w; if (PI * sigma < cutoff) { REPORT_ERROR("GaussianPyramid::GaussianPyramid: unable to shrink image levels - cutoff too large.\n"); } levels.push_back(curr); scales.push_back(currScale); double dScale = PI * sigma / cutoff; int i = 0; for (int j = 0; j < initial; j++) { const double sig = (j+1)*sigma/(double)(initial+1); Image filt; FilterHelper::separableGaussianXY(levels[0], filt, sig, 3*sigma, true); levels.push_back(filt); scales.push_back(1.0); } if (maxLev < 0) maxLev = floor(log((double)w0)/log(dScale)); while (i < maxLev) { double tgtSigma_l0 = sigma * pow(dScale, i); double dSigma = sqrt(tgtSigma_l0 * tgtSigma_l0 - currL0Var) / currScale; currL0Var += currScale * currScale * dSigma * dSigma; int wNext = (int)(w / currScale + 0.5); double dScaleAct = wLast / wNext; wLast = wNext; currScale *= dScaleAct; Image filt, next; FilterHelper::separableGaussianXY(curr, filt, dSigma, 3*dSigma, true); ResamplingHelper::subsample2D_cubic(filt, dScaleAct, next); levels.push_back(next); scales.push_back(currScale); curr = next; i++; } } template std::vector> GaussianPyramid::getUpsampled() { std::vector> out(levels.size()); out[0] = levels[0]; for (int i = 1; i < levels.size(); i++) { ResamplingHelper::upsample2D_cubic(levels[i], scales[i], out[i], true); } return out; } template Image GaussianPyramid::getUpsampledLevel(int i) { Image out; ResamplingHelper::upsample2D_cubic(levels[i], scales[i], out, true); return out; } template Image GaussianPyramid::getInterpolated(double sig) { Image out(levels[0].data.xdim, levels[0].data.ydim); if (sig <= 0) return levels[0]; else if (sig < sigma) { // s = j*sigma/(double)(initial+1) // <=> // j = (initial+1)*s/sigma double l = (initial+1)*sig/sigma; int l0 = (int)l; double f = l - l0; Image img0 = levels[l0]; Image img1 = levels[l0+1]; ImageOp::linearCombination(img0, img1, 1.0 - f, f, out); } else { double lambda = PI * sigma / cutoff; double l = log(sig/sigma) / log(lambda) + 1.0 + initial; int l0 = (int)l; double f = l - l0; if (l0 >= levels.size()-1) return levels[levels.size()-1]; Image img0 = getUpsampledLevel(l0); Image img1 = getUpsampledLevel(l0+1); ImageOp::linearCombination(img0, img1, 1.0 - f, f, out); } return out; } template void GaussianPyramid::test(const Image& img) { int lev = 4; int mp = 20; double k = 2.5; double sg = 1.0; GaussianPyramid gp(img, 1.0, 2.5, lev); std::vector> test(lev*mp), baseline(lev*mp); double lambda = PI*sg/k; for (int i = 0; i < lev; i++) for (int j = 0; j < mp; j++) { double t = (mp*i + j)/(double)(mp); double sig = t < 1.0? sg * t : sg * pow(lambda,t-1); test[mp*i + j] = gp.getInterpolated(sig); FilterHelper::separableGaussianXY(img, baseline[mp*i + j], sig, 3*sig, true); } VtkHelper::write(test, "debug/pyr_interpolated.vtk"); VtkHelper::write(baseline, "debug/pyr_baseline.vtk"); } template void GaussianPyramid::interpolationTest(const Image &img) { const double ds = 0.25; for (double s0 = 0.0; s0 < 2.0; s0 += ds) { double s1 = s0 + ds; Image img0, img1, imgP, imgT; FilterHelper::separableGaussianXY(img, img0, s0, 6, true); FilterHelper::separableGaussianXY(img, img1, s1, 6, true); imgT.data.resize(img.data); std::stringstream stss0; stss0 << s0; std::stringstream stss1; stss1 << s1; std::ofstream ofs("debug/interp_"+stss0.str()+"-"+stss1.str()+".dat"); for (double p = s0; p <= s1; p += ds/50.0) { FilterHelper::separableGaussianXY(img, imgP, p, 6, true); double c_min = 1e20; double t_opt = 0.0; for (double t = 0.0; t < 1.0; t += 0.005) { ImageOp::linearCombination(img0, img1, 1.0 - t, t, imgT); double c = FilterHelper::L2distance(imgT, imgP); if (c < c_min) { c_min = c; t_opt = t; } } ofs << p << " " << s0 + t_opt*(s1 - s0) << "\n"; std::cout << p << " between " << s0 << " and " << s1 << ": t = " << t_opt << "\n"; } std::cout << "\n"; } } #endif relion-3.1.3/src/jaz/aberration_fit.cpp000066400000000000000000000206711411340063500200600ustar00rootroot00000000000000/*************************************************************************** * * Author: "Jasenko Zivanov" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #include #include OriginalBasis AberrationFit::fitBasic( Image phase, Image weight, double angpix) { Matrix2D A(5,5); Matrix1D b(5); A.initZeros(); b.initZeros(); const int sh = phase.data.xdim; const int s = phase.data.ydim; const double as = angpix * s; OriginalBasis basis; std::vector vals(5); for (int yi = 0; yi < s; yi++) for (int xi = 0; xi < sh; xi++) { const double x = xi/as; const double y = yi > sh? (yi - s)/as: yi/as; basis.getBasisValues(x, y, &vals[0]); const double v = phase(yi,xi); const double w = weight(yi,xi); for (int r = 0; r < 5; r++) { b(r) += w * w * vals[r] * v; for (int c = 0; c < 5; c++) { A(r,c) += w * w * vals[r] * vals[c]; } } } const double tol = 1e-20; Matrix1D sol(5); solve(A, b, sol, tol); for (int i = 0; i < 5; i++) { basis.coefficients[i] = sol(i); } return basis; } Image AberrationFit::draw(AberrationBasis *fit, double angpix, int s) { const int sh = s/2 + 1; const double as = angpix * s; Image vis(sh,s); std::vector vals(fit->coefficients.size(), 0.0); for (int yi = 0; yi < s; yi++) for (int xi = 0; xi < sh; xi++) { const double x = xi/as; const double y = yi > sh? (yi - s)/as: yi/as; fit->getBasisValues(x, y, &vals[0]); double v = 0.0; for (int i = 0; i < 5; i++) { v += fit->coefficients[i] * vals[i]; } vis(yi,xi) = v; } return vis; } AberrationBasis::AberrationBasis(int dims) : coefficients(dims, 0.0) {} void AberrationBasis::offsetCtf(MetaDataTable &mdt, int particle) { // identical to CTF::read() and CTF::initialize(): double kV, DeltafU, DeltafV, azimuthal_angle, Cs, scale, Q0, phase_shift; if (!mdt.getValue(EMDL_CTF_VOLTAGE, kV, particle)) kV = 200; if (!mdt.getValue(EMDL_CTF_DEFOCUSU, DeltafU, particle)) DeltafU = 0; if (!mdt.getValue(EMDL_CTF_DEFOCUSV, DeltafV, particle)) DeltafV = DeltafU; if (!mdt.getValue(EMDL_CTF_DEFOCUS_ANGLE, azimuthal_angle, particle)) azimuthal_angle = 0; if (!mdt.getValue(EMDL_CTF_CS, Cs, particle)) Cs = 0; //if (!mdt.getValue(EMDL_CTF_SCALEFACTOR, scale, particle)) scale = 1; if (!mdt.getValue(EMDL_CTF_Q0, Q0, particle)) Q0 = 0; //if (!mdt.getValue(EMDL_CTF_PHASESHIFT, phase_shift, particle)) phase_shift = 0; //std::cout << DeltafU << ", " << DeltafV << " @ " << azimuthal_angle << "°, " << Cs << ", " << Q0 << "\n"; double local_Cs = Cs * 1e7; double local_kV = kV * 1e3; double rad_azimuth = DEG2RAD(azimuthal_angle); double defocus_average = -(DeltafU + DeltafV) * 0.5; double defocus_deviation = -(DeltafU - DeltafV) * 0.5; double lambda=12.2643247 / sqrt(local_kV * (1. + local_kV * 0.978466e-6)); double K1 = (PI / 2) * 2 * lambda; double K2 = (PI / 2) * local_Cs * lambda * lambda * lambda; double K3 = atan(Q0/sqrt(1-Q0*Q0)); _offsetCtf(local_Cs, lambda, rad_azimuth, defocus_average, defocus_deviation, K1, K2, K3, mdt, particle); } OriginalBasis::OriginalBasis() : AberrationBasis(5) {} void OriginalBasis::getBasisValues(double x, double y, double *dest) { dest[0] = 1.0; // phase shift dest[1] = x*x + y*y; // defocus dest[2] = x*x - y*y; // oblique astigmatism dest[3] = x*y; // vertical astigmatism dest[4] = (x*x + y*y)*(x*x + y*y); // primary spherical } void OriginalBasis::_offsetCtf( double local_Cs, double lambda, double rad_azimuth, double defocus_average, double defocus_deviation, double K1, double K2, double K3, MetaDataTable &mdt, int particle) { /* from ctf.h: RFLOAT u2 = X * X + Y * Y; RFLOAT u4 = u2 * u2; RFLOAT deltaf = defocus_average + defocus_deviation*cos(2*(atan2(Y, X) - rad_azimuth)) argument = K1 * deltaf * u2 + K2 * u4 - K5 - K3 K1 = PI / 2 * 2 * lambda; K2 = PI / 2 * local_Cs * lambda * lambda * lambda; K3 = atan(Q0/sqrt(1-Q0*Q0)); K5 = DEG2RAD(phase_shift); local_Cs = Cs * 1e7; astigmatism/defocus: K1 * deltaf * u2 = K1 * defocus_average * u2 + defocus_deviation * K1 * cos(2*(phi - rad_azimuth)) * u2 = K1 * defocus_average * u2 + defocus_deviation * K1 * cos(2*phi - 2*rad_azimuth) * u2 = K1 * defocus_average * u2 + defocus_deviation * K1 * [cos(2*phi) cos(2*rad_azimuth) + sin(2*phi) sin(2*rad_azimuth)] * u2 = K1 * defocus_average * u2 + defocus_deviation * K1 * [(cos²(phi) - sin²(phi)) cos(2*rad_azimuth) + 2 sin(phi) cos(phi) sin(2*rad_azimuth)] * u2 = K1 * defocus_average * u2 + defocus_deviation * K1 * [(X² - Y²) cos(2*rad_azimuth) + 2 Y X sin(2*rad_azimuth)] = b1 (X² + Y²) + b2 (X² - Y²) + b3 (XY) where: b1 = K1 * defocus_average b2 = K1 * defocus_deviation * cos(2*rad_azimuth) b3 = 2 * K1 * defocus_deviation * sin(2*rad_azimuth) <=> defocus_average = b1 / (PI lambda) defocus_deviation = sqrt(b2² + b3²/4)/(PI lambda) rad_azimuth = atan2(b3/2, b2) / 2 */ double b1 = K1 * defocus_average + coefficients[1]; double b2 = K1 * defocus_deviation * cos(2*rad_azimuth) + coefficients[2]; double b3 = 2 * K1 * defocus_deviation * sin(2*rad_azimuth) + coefficients[3]; double new_defocus_average = b1 / (PI * lambda); double new_defocus_deviation = sqrt(b2*b2 + b3*b3/4)/(PI*lambda); double new_rad_azimuth = atan2(b3/2.0, b2) / 2.0; double azimuthal_angle = RAD2DEG(new_rad_azimuth); double DeltafU = -new_defocus_average - new_defocus_deviation; double DeltafV = new_defocus_deviation - new_defocus_average; /* spherical aberration: K2 * u4 = b4 * u4 <=> PI / 2 * local_Cs * lambda * lambda * lambda = b4; <=> local_Cs = 2 * b4 / (PI lambda³) <=> Cs = 1e-7 * 2 * b4 / (PI lambda³) */ double b4 = PI * lambda*lambda*lambda * local_Cs / 2.0 + coefficients[4]; double Cs = 1e-7 * 2.0 * b4 / (PI * lambda*lambda*lambda); /* phase shift / amp. contrast: K3 = atan(Q0/sqrt(1-Q0*Q0)) <=> Q0/sqrt(1-Q0²) = tan(K3) <=> Q0² = (1 - Q0²) * tan²(K3) <=> Q0²(1 + tan²K3) = tan²K3 <=> Q0 = sqrt(tan²K3/(1 + tan²K3)) */ double b0 = K3 - coefficients[0]; if (b0 < 0) { double phase_shift; if (!mdt.getValue(EMDL_CTF_PHASESHIFT, phase_shift, particle)) phase_shift = 0; phase_shift = phase_shift - RAD2DEG(coefficients[0]); mdt.setValue(EMDL_CTF_PHASESHIFT, phase_shift, particle); } else { double t0 = tan(b0); double Q0 = sqrt(t0*t0/(1 + t0*t0)); mdt.setValue(EMDL_CTF_Q0, Q0, particle); } mdt.setValue(EMDL_CTF_DEFOCUSU, DeltafU, particle); mdt.setValue(EMDL_CTF_DEFOCUSV, DeltafV, particle); mdt.setValue(EMDL_CTF_DEFOCUS_ANGLE, azimuthal_angle, particle); mdt.setValue(EMDL_CTF_CS, Cs, particle); //std::cout << DeltafU << ", " << DeltafV << " @ " << azimuthal_angle << "°, " << Cs << ", " << Q0 << "\n\n"; } relion-3.1.3/src/jaz/aberration_fit.h000066400000000000000000000041511411340063500175200ustar00rootroot00000000000000/*************************************************************************** * * Author: "Jasenko Zivanov" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #ifndef ABERRATION_FIT_H #define ABERRATION_FIT_H #include #include class AberrationBasis { public: AberrationBasis(int dims); std::vector coefficients; virtual void getBasisValues(double x, double y, double* dest) = 0; virtual void _offsetCtf(double local_Cs, double lambda, double rad_azimuth, double defocus_average, double defocus_deviation, double K1, double K2, double K3, MetaDataTable& mdt, int particle) = 0; void offsetCtf(MetaDataTable& mdt, int particle); }; class OriginalBasis : public AberrationBasis { public: OriginalBasis(); void getBasisValues(double x, double y, double* dest); void _offsetCtf(double local_Cs, double lambda, double rad_azimuth, double defocus_average, double defocus_deviation, double K1, double K2, double K3, MetaDataTable& mdt, int particle); }; class AberrationFit { public: static OriginalBasis fitBasic(Image phase, Image weight, double angpix); static Image draw(AberrationBasis* fit, double angpix, int s); }; #endif relion-3.1.3/src/jaz/archive/000077500000000000000000000000001411340063500157775ustar00rootroot00000000000000relion-3.1.3/src/jaz/archive/apps/000077500000000000000000000000001411340063500167425ustar00rootroot00000000000000relion-3.1.3/src/jaz/archive/apps/em_motion.cpp000066400000000000000000000701441411340063500214420ustar00rootroot00000000000000 #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include using namespace gravis; int main(int argc, char *argv[]) { std::string starFn, reconFn0, reconFn1, maskFn, moviePath, outPath, fn_sym; RFLOAT beamtilt_x, beamtilt_y, paddingFactor, dmga, dmgb, dmgc, totalDose, sig_pos, sig_cutoff, k_cutoff, sig_vel0, sig_vel1, sig_vel2, sig_vel, sig_div0, sig_div1, sig_div2, sig_div, sig_acc, helical_rise, helical_twist; bool applyTilt, evaluate; bool rigid = true; bool rounded = false; bool localOpt = true; bool rigidLocalOpt = true; bool rigidLocalOptOnly = true; long maxMG = -1, minMG = 0; int it_number, nr_omp_threads, nr_helical_asu, evalFrames; Image map0, map1, dummy; IOParser parser; try { parser.setCommandLine(argc, argv); parser.addSection("General options"); starFn = parser.getOption("--i", "Input STAR file with the projection images and their orientations", ""); reconFn0 = parser.getOption("--m0", "Reference, half 1", ""); reconFn1 = parser.getOption("--m1", "Reference, half 2", ""); maskFn = parser.getOption("--mask", "Reference mask", ""); moviePath = parser.getOption("--movies", "Input path to movie files", ""); outPath = parser.getOption("--out", "Output path", "tracks"); it_number = textToInteger(parser.getOption("--iters", "Number of iterations", "5")); paddingFactor = textToFloat(parser.getOption("--pad", "Padding factor", "2")); beamtilt_x = textToFloat(parser.getOption("--beamtilt_x", "Beamtilt in the X-direction (in mrad)", "0.")); beamtilt_y = textToFloat(parser.getOption("--beamtilt_y", "Beamtilt in the Y-direction (in mrad)", "0.")); applyTilt = (ABS(beamtilt_x) > 0. || ABS(beamtilt_y) > 0.); dmga = textToFloat(parser.getOption("--dmg_a", "Damage model, parameter a", " 3.40406")); dmgb = textToFloat(parser.getOption("--dmg_b", " b", "-1.06027")); dmgc = textToFloat(parser.getOption("--dmg_c", " c", "-0.540896")); totalDose = textToFloat(parser.getOption("--dose", "Total electron dose (in e^-/A^2)", "1")); sig_pos = textToFloat(parser.getOption("--s_pos", "Position sigma", "30.0")); sig_vel0 = textToFloat(parser.getOption("--s_vel_0", "Velocity sigma, frame 1", "6.0")); sig_vel1 = textToFloat(parser.getOption("--s_vel_1", "Velocity sigma, frame 2", "4.0")); sig_vel2 = textToFloat(parser.getOption("--s_vel_2", "Velocity sigma, frame 3", "3.0")); sig_vel = textToFloat(parser.getOption("--s_vel", "Velocity sigma, other frames", "2.0")); sig_div0 = textToFloat(parser.getOption("--s_div_0", "Divergence sigma, frame 1", "0.15")); sig_div1 = textToFloat(parser.getOption("--s_div_1", "Divergence sigma, frame 2", "0.1")); sig_div2 = textToFloat(parser.getOption("--s_div_2", "Divergence sigma, frame 3", "0.05")); sig_div = textToFloat(parser.getOption("--s_div", "Divergence sigma, other frames", "0.01")); sig_acc = textToFloat(parser.getOption("--s_acc", "Acceleration sigma", "-1.0")); sig_cutoff = textToFloat(parser.getOption("--s_cut", "Crop range (in sigma)", "3.0")); k_cutoff = textToFloat(parser.getOption("--k_cut", "Freq. cutoff (in pixels)", "-1.0")); nr_omp_threads = textToInteger(parser.getOption("--jomp", "Number of OMP threads", "1")); maxMG = textToInteger(getParameter(argc, argv, "--max_MG", "-1")); minMG = textToInteger(getParameter(argc, argv, "--min_MG", "0")); rigid = parser.checkOption("--rigid", "Rigid alignment instead of EM algorithm"); localOpt = parser.checkOption("--local", "Refine tracks locally"); rigidLocalOpt = parser.checkOption("--rigid-local", "Refine rigid track locally"); rigidLocalOptOnly = parser.checkOption("--rigid-local-only", "Refine only the rigid track locally"); evalFrames = textToInteger(parser.getOption("--eval", "Measure FSC for this many initial frames", "0")); fn_sym = parser.getOption("--sym", "Symmetry group", "c1"); nr_helical_asu = textToInteger(parser.getOption("--nr_helical_asu", "Number of helical asymmetrical units", "1")); helical_rise = textToFloat(parser.getOption("--helical_rise", "Helical rise (in Angstroms)", "0.")); helical_twist = textToFloat(parser.getOption("--helical_twist", "Helical twist (in degrees, + for right-handedness)", "0.")); evaluate = evalFrames > 0; try { map0.read(reconFn0); } catch (RelionError XE) { std::cout << "Unable to read map: " << reconFn0 << "\n"; exit(1); } try { map1.read(reconFn1); } catch (RelionError XE) { std::cout << "Unable to read map: " << reconFn1 << "\n"; exit(1); } } catch (RelionError XE) { parser.writeUsage(std::cout); std::cerr << XE; exit(1); } if (map0.data.xdim != map0.data.ydim || map0.data.ydim != map0.data.zdim) { REPORT_ERROR(reconFn0 + " is not cubical.\n"); } if (map1.data.xdim != map1.data.ydim || map1.data.ydim != map1.data.zdim) { REPORT_ERROR(reconFn1 + " is not cubical.\n"); } if ( map0.data.xdim != map1.data.xdim || map0.data.ydim != map1.data.ydim || map0.data.zdim != map1.data.zdim) { REPORT_ERROR(reconFn0 + " and " + reconFn1 + " are of unequal size.\n"); } const int s = map0.data.xdim; const int sh = map0.data.xdim/2 + 1; if (maskFn != "") { std::cout << "masking references...\n"; Image mask, maskedRef; try { mask.read(maskFn); } catch (RelionError XE) { std::cout << "Unable to read mask: " << maskFn << "\n"; exit(1); } mask.read(maskFn); ImageOp::multiply(mask, map0, maskedRef); map0 = maskedRef; ImageOp::multiply(mask, map1, maskedRef); map1 = maskedRef; } std::vector sigma2ref(sh, 0.0); const bool refVar = false; if (refVar) { std::cout << "measuring reference variance...\n"; Image ref0, ref1, deltaRef(sh,s,s); FourierTransformer ft; ft.FourierTransform(map0(), ref0()); ft.FourierTransform(map1(), ref1()); ImageOp::linearCombination(ref0, ref1, 0.5, -0.5, deltaRef); sigma2ref = FscHelper::powerSpectrum3D(deltaRef); std::ofstream sigR_out(outPath + "_sig2_ref.dat"); for (int i = 0; i < sigma2ref.size(); i++) { sigR_out << i << " " << sigma2ref[i] << "\n"; } } MetaDataTable mdt0; mdt0.read(starFn); RFLOAT Cs, lambda, kV; mdt0.getValue(EMDL_CTF_CS, Cs, 0); mdt0.getValue(EMDL_CTF_VOLTAGE, kV, 0); RFLOAT V = kV * 1e3; lambda = 12.2643247 / sqrt(V * (1.0 + V * 0.978466e-6)); std::cout << "transforming references...\n"; Projector projector0(s, TRILINEAR, paddingFactor, 10, 2); projector0.computeFourierTransformMap(map0.data, dummy.data, s); Projector projector1(s, TRILINEAR, paddingFactor, 10, 2); projector1.computeFourierTransformMap(map1.data, dummy.data, s); std::vector mdts = StackHelper::splitByStack(&mdt0); RFLOAT mag, dstep; mdts[0].getValue(EMDL_CTF_MAGNIFICATION, mag, 0); mdts[0].getValue(EMDL_CTF_DETECTOR_PIXEL_SIZE, dstep, 0); RFLOAT angpix = 10000 * dstep / mag; ObservationModel obsModel(angpix); if (applyTilt) { obsModel = ObservationModel(angpix, Cs, kV * 1e3, beamtilt_x, beamtilt_y); } const long gc = maxMG >= 0? maxMG : mdts.size()-1; const long g0 = minMG; std::string name, fullName, movieName; mdts[0].getValue(EMDL_IMAGE_NAME, fullName, 0); mdts[0].getValue(EMDL_MICROGRAPH_NAME, movieName, 0); name = fullName.substr(fullName.find("@")+1); std::string finName; if (moviePath == "") { finName = name; } else { finName = moviePath + "/" + movieName.substr(movieName.find_last_of("/")+1); } Image stack0; stack0.read(finName, false); const int pc0 = mdts[0].numberOfObjects(); const bool zstack = stack0.data.zdim > 1; const int stackSize = zstack? stack0.data.zdim : stack0.data.ndim; const int fc = stackSize / pc0; std::vector sig_vel_vec(4); sig_vel_vec[0] = sig_vel0; sig_vel_vec[1] = sig_vel1; sig_vel_vec[2] = sig_vel2; sig_vel_vec[3] = sig_vel; std::vector sig_div_vec(4); sig_div_vec[0] = sig_div0; sig_div_vec[1] = sig_div1; sig_div_vec[2] = sig_div2; sig_div_vec[3] = sig_div; std::cout << "pc0 = " << pc0 << "\n"; std::cout << "fc = " << fc << "\n"; std::vector > dmgWeight = DamageHelper::damageWeights( s, angpix, 0, fc, totalDose, dmga, dmgb, dmgc); int k_out = k_cutoff + 21; for (int f = 0; f < fc; f++) { dmgWeight[f].data.xinit = 0; dmgWeight[f].data.yinit = 0; if (k_cutoff > 0.0) { std::stringstream stsf; stsf << f; dmgWeight[f] = FilterHelper::ButterworthEnvFreq2D(dmgWeight[f], k_cutoff-1, k_cutoff+1); } } std::cout << "mg range: " << g0 << ".." << gc << "\n"; std::vector fts(nr_omp_threads); double t0 = omp_get_wtime(); int pctot = 0; const bool writeDebugImages = false; const bool measureFCC = true; std::vector > tables(nr_omp_threads), tablesV(nr_omp_threads), tablesVW(nr_omp_threads); std::vector > weights0(nr_omp_threads), weights0V(nr_omp_threads), weights0VW(nr_omp_threads); std::vector > weights1(nr_omp_threads), weights1V(nr_omp_threads), weights1VW(nr_omp_threads); if (evaluate) { if (measureFCC) { for (int i = 0; i < nr_omp_threads; i++) { FscHelper::initFscTable(sh, fc, tables[i], weights0[i], weights1[i]); FscHelper::initFscTable(sh, fc, tablesV[i], weights0V[i], weights1V[i]); FscHelper::initFscTable(sh, fc, tablesVW[i], weights0VW[i], weights1VW[i]); } } } for (long g = g0; g <= gc; g++) { std::cout << "micrograph " << g << " / " << mdts.size() <<"\n"; std::stringstream stsg; stsg << g; const int pc = mdts[g].numberOfObjects(); pctot += pc; std::vector > > movie; try { movie = StackHelper::loadMovieStackFS( &mdts[g], moviePath, false, nr_omp_threads, &fts); } catch (RelionError XE) { continue; } std::vector sigma2 = StackHelper::powerSpectrum(movie); if (refVar) { std::ofstream sigD_out(outPath + "_mg" + stsg.str() + "_sig2_data.dat"); for (int i = 0; i < sigma2.size(); i++) { sigD_out << i << " " << sigma2[i] << "\n"; sigma2[i] += sigma2ref[i]; } } #pragma omp parallel for num_threads(nr_omp_threads) for (int p = 0; p < pc; p++) for (int f = 0; f < fc; f++) { MotionRefinement::noiseNormalize(movie[p][f], sigma2, movie[p][f]); } std::vector positions(pc); std::vector defoci(pc); for (int p = 0; p < pc; p++) { mdts[g].getValue(EMDL_IMAGE_COORD_X, positions[p].x, p); mdts[g].getValue(EMDL_IMAGE_COORD_Y, positions[p].y, p); double du, dv; mdts[g].getValue(EMDL_CTF_DEFOCUSU, du, p); mdts[g].getValue(EMDL_CTF_DEFOCUSV, dv, p); defoci[p] = 0.5*(du + dv)/angpix; } bool allDefociEqual = true; for (int p = 1; p < pc; p++) { if (defoci[p] != defoci[0]) { allDefociEqual = false; } } if (allDefociEqual) { std::cout << "WARNING: The defoci are identical for all particles!\n"; std::cout << " You may want to determine per-particle defoci first.\n"; } MotionEM motEM( projector0, projector1, obsModel, mdts[g], movie, positions, sigma2, dmgWeight, sig_pos, sig_vel_vec, sig_div_vec, sig_cutoff, nr_omp_threads); if (it_number > 0) { std::cout << " computing initial correlations...\n"; motEM.computeInitial(); } if (writeDebugImages) { for (int p = 0; p < pc; p++) { std::stringstream stsp; stsp << p; ImageLog::write(motEM.posProb[p], "ppDebug/i"+stsg.str()+"_p"+stsp.str()+"_pos_initial", CenterXY); } } if (!rigid) for (int it = 0; it < it_number; it++) { std::stringstream stsit; stsit << it; std::cout << " iteration " << it << "\n"; motEM.iterate(); if (writeDebugImages) { for (int p = 0; p < pc; p++) { std::stringstream stsp; stsp << p; ImageLog::write(motEM.velProb[p], "ppDebug/i"+stsg.str()+"_p"+stsp.str()+"_vel_"+stsit.str(), CenterXY); ImageLog::write(motEM.posProb[p], "ppDebug/i"+stsg.str()+"_p"+stsp.str()+"_pos_"+stsit.str(), CenterXY); } } } std::vector> tracks(pc); if (rigid && it_number > 0) { std::vector globTrack = motEM.getGlobalTrack(); if (rounded) { for (int f = 0; f < fc; f++) { globTrack[f].x = std::round(globTrack[f].x); globTrack[f].y = std::round(globTrack[f].y); } } for (int p = 0; p < pc; p++) { tracks[p] = globTrack; } } else { for (int p = 0; p < pc; p++) { tracks[p] = motEM.getTrack(p); } } if (localOpt) { if (rigidLocalOpt) { std::vector velWgh(fc-1); std::vector accWgh(fc-1, sig_acc > 0.0? 1.0/(sig_acc*sig_acc) : 0.0); for (int f = 0; f < fc-1; f++) { double sv; if (f < sig_vel_vec.size()) { sv = sig_vel_vec[f]; } else { sv = sig_vel_vec[sig_vel_vec.size()-1]; } velWgh[f] = 1.0 / (sv*sv); } std::vector>> divWgh(0); std::vector globTrack = motEM.getGlobalTrack(); std::vector>> ccSum(1); ccSum[0] = motEM.e_sum; for (int f = 0; f < fc-1; f++) { velWgh[f] *= pc; } LocalMotionFit lmf(ccSum, velWgh, accWgh, divWgh, std::vector(fc, d2Vector(0,0)), nr_omp_threads); std::vector initial(2*fc); for (int f = 0; f < fc; f++) { initial[2*f] = globTrack[f].x; initial[2*f + 1] = globTrack[f].y; } std::vector grad0(2*fc*pc); lmf.grad(initial, grad0, 0); double gl = 0.0; for (int i = 0; i < grad0.size(); i++) { double gi = grad0[i]; gl += gi*gi; } gl = sqrt(gl); std::cout << "gl = " << gl << "\n"; std::cout << " optimizing rigid path locally...\n"; std::vector optPos = GradientDescent::optimize( initial, lmf, 0.05/gl, 1e-9/gl, 1e-9, 10000, 0.0, true); for (int p = 0; p < pc; p++) { for (int f = 0; f < fc; f++) { tracks[p][f].x = optPos[2*f]; tracks[p][f].y = optPos[2*f + 1]; } } } if (!rigidLocalOptOnly) { std::vector velWgh(fc-1); std::vector accWgh(fc-1, sig_acc > 0.0? 0.5/(sig_acc*sig_acc) : 0.0); for (int f = 0; f < fc-1; f++) { double sv; if (f < sig_vel_vec.size()) { sv = sig_vel_vec[f]; } else { sv = sig_vel_vec[sig_vel_vec.size()-1]; } velWgh[f] = 0.5 / (sv*sv); } std::vector>> divWgh(fc-1); for (int f = 0; f < fc-1; f++) { divWgh[f] = std::vector>(pc); for (int p = 0; p < pc; p++) { divWgh[f][p] = std::vector(pc); for (int q = 0; q < pc; q++) { d2Vector dp = positions[p] - positions[q]; double dd = defoci[p] - defoci[q]; double dist = sqrt(dp.x*dp.x + dp.y*dp.y + dd*dd); double sd; if (f < sig_div_vec.size()) { sd = sig_div_vec[f]; } else { sd = sig_div_vec[sig_div_vec.size()-1]; } divWgh[f][p][q] = 0.5 / (sd * sd * dist); } } } LocalMotionFit lmf(motEM.initialCC, velWgh, accWgh, divWgh, std::vector(fc, d2Vector(0,0)), nr_omp_threads); std::vector initial(2*pc*fc); for (int p = 0; p < pc; p++) { for (int f = 0; f < fc; f++) { initial[2*(p*fc + f)] = tracks[p][f].x; initial[2*(p*fc + f) + 1] = tracks[p][f].y; } } std::vector grad0(2*fc*pc); lmf.grad(initial, grad0, 0); double gl = 0.0; for (int i = 0; i < grad0.size(); i++) { double gi = grad0[i]; gl += gi*gi; } gl = sqrt(gl); std::cout << "gl = " << gl << "\n"; std::cout << " optimizing locally...\n"; std::vector optPos = GradientDescent::optimize( initial, lmf, 0.05/gl, 1e-9/gl, 1e-9, 10000, 0.0, false); for (int p = 0; p < pc; p++) { for (int f = 0; f < fc; f++) { tracks[p][f].x = optPos[2*(p*fc + f)]; tracks[p][f].y = optPos[2*(p*fc + f) + 1]; } } } } if (evaluate) { if (measureFCC) { #pragma omp parallel for num_threads(nr_omp_threads) for (int p = 0; p < pc; p++) { int threadnum = omp_get_thread_num(); Image pred; std::vector> obs = movie[p]; if (it_number > 0) { for (int f = 0; f < fc; f++) { shiftImageInFourierTransform(obs[f](), obs[f](), s, -tracks[p][f].x, -tracks[p][f].y); } } int randSubset; mdts[g].getValue(EMDL_PARTICLE_RANDOM_SUBSET, randSubset, p); randSubset -= 1; if (randSubset == 0) { pred = obsModel.predictObservation(projector1, mdts[g], p, true, true); } else { pred = obsModel.predictObservation(projector0, mdts[g], p, true, true); } FscHelper::updateFscTable(obs, pred, tables[threadnum], weights0[threadnum], weights1[threadnum]); std::vector vel(fc); vel[0] = tracks[p][1] - tracks[p][0]; for (int f = 1; f < fc-1; f++) { vel[f] = 0.5*(tracks[p][f+1] - tracks[p][f-1]); } vel[fc-1] = tracks[p][fc-1] - tracks[p][fc-2]; FscHelper::updateFscTableVelWgh(obs, vel, pred, tablesVW[threadnum], weights0VW[threadnum], weights1VW[threadnum]); FscHelper::updateVelFscTable( obs, vel, pred, tablesV[threadnum], weights0V[threadnum], weights1V[threadnum], k_cutoff, k_out); } } } if (it_number > 0) { std::vector> centTracks(pc), visTracks(pc), centVisTracks(pc); double visScale = 30.0; for (int p = 0; p < pc; p++) { centTracks[p] = std::vector(fc); visTracks[p] = std::vector(fc); centVisTracks[p] = std::vector(fc); } std::vector globalTrack(fc); for (int f = 0; f < fc; f++) { globalTrack[f] = d2Vector(0,0); for (int p = 0; p < pc; p++) { globalTrack[f] += tracks[p][f]; } globalTrack[f] /= pc; for (int p = 0; p < pc; p++) { centTracks[p][f] = tracks[p][f] - globalTrack[f]; visTracks[p][f] = positions[p] + visScale * tracks[p][f]; centVisTracks[p][f] = positions[p] + visScale * centTracks[p][f]; } } std::ofstream rawOut(outPath + "_mg" + stsg.str() + "_tracks.dat"); std::ofstream visOut(outPath + "_mg" + stsg.str() + "_visTracks.dat"); //std::ofstream centVisOut(outPath + "_mg" + stsg.str() + "_centVisTracks.dat"); std::ofstream visOut15(outPath + "_mg" + stsg.str() + "_visTracks_first15.dat"); for (int p = 0; p < pc; p++) { rawOut << "#particle " << p << "\n"; visOut << "#particle " << p << "\n"; //centVisOut << "#particle " << p << "\n"; visOut15 << "#particle " << p << "\n"; for (int f = 0; f < fc; f++) { rawOut << tracks[p][f].x << " " << tracks[p][f].y << "\n"; visOut << visTracks[p][f].x << " " << visTracks[p][f].y << "\n"; //centVisOut << centVisTracks[p][f].x << " " << centVisTracks[p][f].y << "\n"; if (f < 15) visOut15 << visTracks[p][f].x << " " << visTracks[p][f].y << "\n"; } rawOut << "\n"; visOut << "\n"; //centVisOut << "\n"; visOut15 << "\n"; } std::ofstream glbOut(outPath + "_mg" + stsg.str() + "_globTrack.dat"); for (int f = 0; f < fc; f++) { glbOut << globalTrack[f].x << " " << globalTrack[f].y << "\n"; } } } // micrographs if (evaluate) { if (measureFCC) { Image table, weight; Image tableV, weightV; Image tableVW, weightVW; FscHelper::mergeFscTables(tables, weights0, weights1, table, weight); FscHelper::mergeFscTables(tablesV, weights0V, weights1V, tableV, weightV); FscHelper::mergeFscTables(tablesVW, weights0VW, weights1VW, tableVW, weightVW); ImageLog::write(tableV, outPath + "_FCC_data_V"); const int ccvBins = 10; std::vector ccByV(ccvBins, 0.0); std::vector ccByV_w(ccvBins, 0.0); for (int f = 0; f < 4; f++) for (int k = 0; k < sh; k++) { int b = k*ccvBins/sh; ccByV[b] += tableV(f,k)*weightV(f,k); ccByV_w[b] += weightV(f,k); } std::ofstream ccvOut(outPath + "_CC_by_V.dat"); for (int b = 0; b < ccvBins; b++) { if (ccByV_w[b] > 0.0) { ccByV[b] /= ccByV_w[b]; } ccvOut << (b+0.5)*sh/ccvBins << " " << ccByV[b] << "\n"; } int f_max = fc; double total = 0.0; double totalVW = 0.0; std::ofstream fccOut(outPath + "_FCC_perFrame.dat"); std::ofstream fccOutVW(outPath + "_FCC_perFrame_velWgh_Gauss.dat"); for (int y = 0; y < f_max; y++) { double avg = 0.0; double avgVW = 0.0; for (int k = k_cutoff+2; k < k_out; k++) { avg += table(y,k); avgVW += tableVW(y,k); } avg /= k_out - k_cutoff - 1; avgVW /= k_out - k_cutoff - 1; fccOut << y << " " << avg << "\n"; fccOutVW << y << " " << avgVW << "\n"; total += avg; totalVW += avgVW; } total /= f_max; totalVW /= f_max; std::cout << "total: " << total << " (" << totalVW <<")\n"; table.write(outPath + "_FCC_data.mrc"); /*weight.write(outPath + "_FCC_weight.mrc");*/ ImageLog::write(table, outPath + "_FCC_data"); } } double t1 = omp_get_wtime(); double diff = t1 - t0; std::cout << "elapsed (total): " << diff << " sec\n"; return 0; } relion-3.1.3/src/jaz/archive/apps/ref_aberration_plot.cpp000066400000000000000000000173001411340063500234670ustar00rootroot00000000000000 #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include using namespace gravis; class AberrationPlot : public RefinementProgram { public: AberrationPlot(); RFLOAT kmin; bool precomputed; std::string precomp; Image lastCos, lastSin; int readMoreOptions(IOParser& parser, int argc, char *argv[]); int _init(); int _run(); }; AberrationPlot :: AberrationPlot() : RefinementProgram(true), precomputed(false) { optReference = true; } int main(int argc, char *argv[]) { AberrationPlot tf; int rc0 = tf.init(argc, argv); if (rc0 != 0) return rc0; int rc1 = tf.run(); if (rc1 != 0) return rc1; } int AberrationPlot::readMoreOptions(IOParser& parser, int argc, char *argv[]) { kmin = textToFloat(parser.getOption("--kmin", "Inner freq. threshold [Angst]", "30.0")); precomp = parser.getOption("--precomp", "Precomputed *_sin and *_cos files from previous run (optional)", ""); precomputed = precomp != ""; noReference = precomputed; bool allGood = true; if (reconFn0 == "" && !precomputed) { std::cerr << "A reference map (--m) is required if no precomputed pixel-fit is available (--precomp).\n"; allGood = false; } if (!allGood) return RELION_EXIT_FAILURE; else return RELION_EXIT_SUCCESS; } int AberrationPlot::_init() { return RELION_EXIT_SUCCESS; } int AberrationPlot::_run() { std::vector fts(nr_omp_threads); double t0 = omp_get_wtime(); const bool differential = false; if (differential) { std::vector> A(nr_omp_threads, Image(sh,s)), b(nr_omp_threads, Image(sh,s)); const double as = (double)s * angpix; for (long g = minMG; g <= gc; g++) { std::stringstream stsg; stsg << g; std::cout << "micrograph " << g << " / " << mdts.size() <<"\n"; const int pc = mdts[g].numberOfObjects(); std::vector> pred; std::vector> obsF; pred = obsModel.predictObservations(projectors[0], mdts[g], nr_omp_threads, false, true); obsF = StackHelper::loadStackFS(mdts[g], imgPath, nr_omp_threads); #pragma omp parallel for num_threads(nr_omp_threads) for (long p = 0; p < pc; p++) { int t = omp_get_thread_num(); CTF ctf0; ctf0.read(mdts[g], mdts[g], p); //ctf0.Cs = 0.0; ctf0.initialise(); for (int y = 0; y < s; y++) for (int x = 0; x < sh; x++) { const double xf = x; const double yf = y < sh? y : y - s; const double gamma_i = ctf0.getGamma(xf/as, yf/as); const double cg = cos(gamma_i); const double sg = sin(gamma_i); Complex zobs = obsF[p](y,x); Complex zprd = pred[p](y,x); double zz = zobs.real*zprd.real + zobs.imag*zprd.imag; double nr = zprd.norm(); A[t](y,x) += nr*cg*cg; b[t](y,x) += cg*(sg*nr+zz); } } } for (int t = 1; t < nr_omp_threads; t++) { for (int y = 0; y < s; y++) for (int x = 0; x < sh; x++) { A[0](y,x) += A[t](y,x); b[0](y,x) += b[t](y,x); } } Image dgamma(sh,s); for (int y = 0; y < s; y++) for (int x = 0; x < sh; x++) { if (A[0](y,x) != 0.0) { dgamma(y,x) = b[0](y,x)/A[0](y,x); } } ImageLog::write(dgamma, outPath+"_dgamma"); } else { Image cosPhi(sh,s), sinPhi(sh,s), phase(sh,s); if (precomputed) { std::cout << "using precomputed data...\n"; cosPhi.read(precomp+"_cos.mrc"); sinPhi.read(precomp+"_sin.mrc"); phase.read(precomp+"_phase.mrc"); s = cosPhi.data.ydim; sh = cosPhi.data.xdim; } else { std::vector> Axx(nr_omp_threads, Image(sh,s)), Axy(nr_omp_threads, Image(sh,s)), Ayy(nr_omp_threads, Image(sh,s)), bx(nr_omp_threads, Image(sh,s)), by(nr_omp_threads, Image(sh,s)); const double as = (double)s * angpix; for (long g = minMG; g <= gc; g++) { std::stringstream stsg; stsg << g; std::cout << "micrograph " << g << " / " << mdts.size() <<"\n"; const int pc = mdts[g].numberOfObjects(); std::vector > pred; std::vector > obsF; pred = obsModel.predictObservations(projectors[0], mdts[g], nr_omp_threads, false, true); obsF = StackHelper::loadStackFS(mdts[g], imgPath, nr_omp_threads); #pragma omp parallel for num_threads(nr_omp_threads) for (long p = 0; p < pc; p++) { int t = omp_get_thread_num(); CTF ctf0; ctf0.read(mdts[g], mdts[g], p); //ctf0.Cs = 0.0; ctf0.initialise(); for (int y = 0; y < s; y++) for (int x = 0; x < sh; x++) { const double xf = x; const double yf = y < sh? y : y - s; const double gamma_i = ctf0.getGamma(xf/as, yf/as); const double cg = cos(gamma_i); const double sg = sin(gamma_i); Complex zobs = obsF[p](y,x); Complex zprd = pred[p](y,x); double zz = zobs.real*zprd.real + zobs.imag*zprd.imag; double nr = zprd.norm(); Axx[t](y,x) += nr*sg*sg; Axy[t](y,x) += nr*cg*sg; Ayy[t](y,x) += nr*cg*cg; bx[t](y,x) -= zz*sg; by[t](y,x) -= zz*cg; } } } for (int t = 1; t < nr_omp_threads; t++) { for (int y = 0; y < s; y++) for (int x = 0; x < sh; x++) { Axx[0](y,x) += Axx[t](y,x); Axy[0](y,x) += Axy[t](y,x); Ayy[0](y,x) += Ayy[t](y,x); bx[0](y,x) += bx[t](y,x); by[0](y,x) += by[t](y,x); } } for (int y = 0; y < s; y++) for (int x = 0; x < sh; x++) { d2Matrix A( Axx[0](y,x), Axy[0](y,x), Axy[0](y,x), Ayy[0](y,x)); d2Vector b(bx[0](y,x), by[0](y,x)); double det = A(0,0)*A(1,1) - A(1,0)*A(0,1); if (det != 0.0) { d2Matrix Ai = A; Ai.invert(); d2Vector opt = Ai*b; cosPhi(y,x) = opt.x; sinPhi(y,x) = opt.y; phase(y,x) = std::abs(opt.x) > 0.0? atan2(opt.y, opt.x) : 0.0; } } ImageLog::write(cosPhi, outPath+"_cos"); ImageLog::write(sinPhi, outPath+"_sin"); //ImageLog::write(phase, outPath+"_phase"); Image phaseFull(s,s); FftwHelper::decenterDouble2D(phase.data, phaseFull.data); ImageLog::write(phaseFull, outPath+"_phase"); cosPhi.write(outPath+"_cos.mrc"); sinPhi.write(outPath+"_sin.mrc"); phase.write(outPath+"_phase.mrc"); } for (int y = 0; y < s; y++) for (int x = 0; x < sh; x++) { double xx = x; double yy = y <= sh? y : y - s; double r = sqrt(xx*xx + yy*yy); if (r == 0 || 2.0*sh*angpix/r > kmin) { freqWeight(y,x) = 0.0; } } OriginalBasis fit = AberrationFit::fitBasic(phase, freqWeight, angpix); Image vis = AberrationFit::draw(&fit, angpix, s); Image visFull(s,s); FftwHelper::decenterDouble2D(vis.data, visFull.data); ImageLog::write(visFull, outPath+"_fit"); MetaDataTable mdtAll; mdtAll.reserve(mdt0.numberOfObjects()); for (long g = minMG; g <= gc; g++) { const int pc = mdts[g].numberOfObjects(); for (long p = 0; p < pc; p++) { fit.offsetCtf(mdts[g], p); } mdtAll.append(mdts[g]); } mdtAll.write(outPath+".star"); } double t1 = omp_get_wtime(); std::cout << "elapsed: " << (t1 - t0) << "s \n"; return RELION_EXIT_SUCCESS; } relion-3.1.3/src/jaz/archive/apps/update_angles.cpp000066400000000000000000000255461411340063500222750ustar00rootroot00000000000000 #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include using namespace gravis; int main(int argc, char *argv[]) { std::string starFn, reconFn0, reconFn1, maskFn, outPath, inPath, fscFn; bool debug, applyTilt, useFsc; long maxMG = -1, minMG = 0; RFLOAT angpix, paddingFactor, beamtilt_x, beamtilt_y, deltaAngle; int nr_omp_threads, kmax; IOParser parser; try { parser.setCommandLine(argc, argv); parser.addSection("General options"); starFn = parser.getOption("--i", "Input STAR file", ""); reconFn0 = parser.getOption("--m0", "Reference, half 1", ""); reconFn1 = parser.getOption("--m1", "Reference, half 2", ""); maskFn = parser.getOption("--mask", "Reference mask", ""); fscFn = parser.getOption("--f", "Input STAR file with the FSC of the reference", ""); outPath = parser.getOption("--out", "Output path", ""); inPath = parser.getOption("--img", "Path to images", ""); deltaAngle = textToFloat(parser.getOption("--delta", "Initial angle shift (in degrees)", "1.0")); angpix = textToFloat(parser.getOption("--angpix", "Pixel resolution (angst/pix)", "0.0")); paddingFactor = textToFloat(parser.getOption("--pad", "Padding factor", "2")); beamtilt_x = textToFloat(parser.getOption("--beamtilt_x", "Beamtilt in the X-direction (in mrad)", "0.")); beamtilt_y = textToFloat(parser.getOption("--beamtilt_y", "Beamtilt in the Y-direction (in mrad)", "0.")); applyTilt = (ABS(beamtilt_x) > 0. || ABS(beamtilt_y) > 0.); kmax = textToInteger(parser.getOption("--kmax", "Max. frequency used for alignment", "-1")); nr_omp_threads = textToInteger(parser.getOption("--jomp", "Number of OMP threads", "1")); maxMG = textToInteger(parser.getOption("--max_MG", "first micrograph index", "-1")); minMG = textToInteger(parser.getOption("--min_MG", "last micrograph index", "0")); debug = parser.checkOption("--debug", "TBD"); if (reconFn0 == "" || reconFn1 == "") { std::cout << "An initial reconstruction for per-micrograph B-factors (--m) is required.\n"; return 666; } } catch (RelionError XE) { parser.writeUsage(std::cout); std::cerr << XE; exit(1); } bool allGood = true; useFsc = fscFn != ""; MetaDataTable fscMdt; if (useFsc) { fscMdt.read(fscFn, "fsc"); if (!fscMdt.containsLabel(EMDL_SPECTRAL_IDX)) { std::cerr << fscFn << " does not contain a value for " << EMDL::label2Str(EMDL_SPECTRAL_IDX) << ".\n"; allGood = false; } if (!fscMdt.containsLabel(EMDL_POSTPROCESS_FSC_TRUE)) { std::cerr << fscFn << " does not contain a value for " << EMDL::label2Str(EMDL_POSTPROCESS_FSC_TRUE) << ".\n"; allGood = false; } } if (!allGood) { return 1; } Image map0, map1, dummy; Projector projector0, projector1; try { map0.read(reconFn0); } catch (RelionError XE) { std::cout << "Unable to read map: " << reconFn0 << "\n"; exit(1); } try { map1.read(reconFn1); } catch (RelionError XE) { std::cout << "Unable to read map: " << reconFn1 << "\n"; exit(1); } if (map0.data.xdim != map0.data.ydim || map0.data.ydim != map0.data.zdim) { REPORT_ERROR(reconFn0 + " is not cubical.\n"); } if (map1.data.xdim != map1.data.ydim || map1.data.ydim != map1.data.zdim) { REPORT_ERROR(reconFn1 + " is not cubical.\n"); } if ( map0.data.xdim != map1.data.xdim || map0.data.ydim != map1.data.ydim || map0.data.zdim != map1.data.zdim) { REPORT_ERROR(reconFn0 + " and " + reconFn1 + " are of unequal size.\n"); } if (maskFn != "") { std::cout << "masking references...\n"; Image mask, maskedRef; try { mask.read(maskFn); } catch (RelionError XE) { std::cout << "Unable to read mask: " << maskFn << "\n"; exit(1); } mask.read(maskFn); ImageOp::multiply(mask, map0, maskedRef); map0 = maskedRef; ImageOp::multiply(mask, map1, maskedRef); map1 = maskedRef; } const int s = map0.data.xdim; const int sh = s/2 + 1; Image imgSnr; if (useFsc) { RefinementHelper::computeSNR(&fscMdt, imgSnr); } else { imgSnr = Image(sh,s); imgSnr.data.initConstant(1.0); } std::cout << "transforming references...\n"; projector0 = Projector(s, TRILINEAR, paddingFactor, 10, 2); projector0.computeFourierTransformMap(map0.data, dummy.data, map0.data.xdim); projector1 = Projector(s, TRILINEAR, paddingFactor, 10, 2); projector1.computeFourierTransformMap(map1.data, dummy.data, map1.data.xdim); MetaDataTable mdt0; mdt0.read(starFn); std::vector mdts = StackHelper::splitByStack(&mdt0); RFLOAT Cs, lambda, kV; mdt0.getValue(EMDL_CTF_CS, Cs, 0); mdt0.getValue(EMDL_CTF_VOLTAGE, kV, 0); RFLOAT V = kV * 1e3; lambda = 12.2643247 / sqrt(V * (1.0 + V * 0.978466e-6)); if (angpix <= 0.0) { RFLOAT mag, dstep; mdts[0].getValue(EMDL_CTF_MAGNIFICATION, mag, 0); mdts[0].getValue(EMDL_CTF_DETECTOR_PIXEL_SIZE, dstep, 0); angpix = 10000 * dstep / mag; } ObservationModel obsModel(angpix); if (applyTilt) { obsModel = ObservationModel(angpix, Cs, kV * 1e3, beamtilt_x, beamtilt_y); } const long gc = maxMG >= 0? maxMG : mdts.size()-1; const long g0 = minMG; std::cout << "mg range: " << g0 << ".." << gc << "\n"; std::vector fts(nr_omp_threads); double t0 = omp_get_wtime(); const bool quadratic = true; MetaDataTable mdtAll; mdtAll.reserve(mdt0.numberOfObjects()); for (long g = g0; g <= gc; g++) { std::cout << "micrograph " << g << " / " << mdts.size() <<"\n"; std::stringstream stsg; stsg << g; const int pc = mdts[g].numberOfObjects(); std::vector> obsF = StackHelper::loadStackFS(&mdts[g], inPath, nr_omp_threads, &fts); #pragma omp parallel for num_threads(nr_omp_threads) for (long p = 0; p < pc; p++) { int randSubset; mdts[g].getValue(EMDL_PARTICLE_RANDOM_SUBSET, randSubset, p); randSubset -= 1; if (quadratic) { Matrix2D A(27,10); Matrix1D b(27); for (int rot = -1; rot <= 1; rot++) for (int tilt = -1; tilt <= 1; tilt++) for (int psi = -1; psi <= 1; psi++) { Image pred; if (randSubset == 0) { pred = obsModel.predictObservation( projector0, mdts[g], p, true, true, rot*deltaAngle, tilt*deltaAngle, psi*deltaAngle); } else { pred = obsModel.predictObservation( projector1, mdts[g], p, true, true, rot*deltaAngle, tilt*deltaAngle, psi*deltaAngle); } const double index = 9*(rot+1) + 3*(tilt+1) + (psi+1); b(index) = 0.0; for (int y = 0; y < s; y++) for (int x = 0; x < sh; x++) { double yy = y < sh? y : y - s; double r = sqrt(x*x + yy*yy); if (r > kmax) continue; b(index) += imgSnr(y,x) * (pred(y,x) - obsF[p](y,x)).norm(); } A(index, 0) = rot*rot; A(index, 1) = 2.0*rot*tilt; A(index, 2) = 2.0*rot*psi; A(index, 3) = 2.0*rot; A(index, 4) = tilt*tilt; A(index, 5) = 2.0*tilt*psi; A(index, 6) = 2.0*tilt; A(index, 7) = psi*psi; A(index, 8) = 2.0*psi; A(index, 9) = 1.0; } const double tol = 1e-20; Matrix1D x(10); solve(A, b, x, tol); d3Matrix C3(x(0), x(1), x(2), x(1), x(4), x(5), x(2), x(5), x(7)); d3Vector d(x(3), x(6), x(8)); d3Matrix C3i = C3; C3i.invert(); d3Vector min = -C3i * d; if (debug) std::cout << p << ": " << min*deltaAngle << "\n"; if (min.length() > 1.0) min /= min.length(); double rot, tilt, psi; mdts[g].getValue(EMDL_ORIENT_ROT, rot, p); mdts[g].getValue(EMDL_ORIENT_TILT, tilt, p); mdts[g].getValue(EMDL_ORIENT_PSI, psi, p); rot += min[0]*deltaAngle; tilt += min[1]*deltaAngle; psi += min[2]*deltaAngle; mdts[g].setValue(EMDL_ORIENT_ROT, rot, p); mdts[g].setValue(EMDL_ORIENT_TILT, tilt, p); mdts[g].setValue(EMDL_ORIENT_PSI, psi, p); } else { } } mdtAll.append(mdts[g]); } mdtAll.write(outPath); double t1 = omp_get_wtime(); double diff = t1 - t0; std::cout << "elapsed (total): " << diff << " sec\n"; } relion-3.1.3/src/jaz/archive/frame_merge.cpp000066400000000000000000000046731411340063500207660ustar00rootroot00000000000000/*************************************************************************** * * Author: "Jasenko Zivanov" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #include void FrameMerge :: mergeAvg(Image& stack, Image& tgt) { const int bs = stack.data.xdim / tgt.data.xdim; const int fc = stack.data.zdim; for (int y = 0; y < tgt.data.ydim; y++) for (int x = 0; x < tgt.data.xdim; x++) { double sum = 0.0; for (int yb = 0; yb < bs; yb++) for (int xb = 0; xb < bs; xb++) for (int n = 0; n < fc; n++) { sum += DIRECT_NZYX_ELEM(stack.data, 0, n, y*bs + yb, x*bs + xb); } DIRECT_A2D_ELEM(tgt.data, y, x) = sum / (double)(bs*bs*fc); } } void FrameMerge :: valueHistogram(Image& stack, Image& tgt) { const int mv = tgt.data.xdim; std::vector bins(mv); for (int i = 0; i < mv; i++) { bins[i] = 0; } for (int z = 0; z < tgt.data.zdim; z++) for (int y = 0; y < tgt.data.ydim; y++) for (int x = 0; x < tgt.data.xdim; x++) { double v = DIRECT_NZYX_ELEM(stack.data, 0, z, y, x); int vb = (int)v; if (vb < 0) vb = 0; else if (vb > mv) vb = mv; if (!(vb == vb)) continue; bins[vb]++; } double bmax = 0; for (int i = 1; i < mv; i++) { if (bins[i] > bmax) bmax = bins[i]; } bmax += 2.0; for (int x = 0; x < tgt.data.xdim; x++) { double bv = (double)tgt.data.ydim * (double)bins[x] / bmax; for (int y = 0; y < tgt.data.ydim; y++) { DIRECT_A2D_ELEM(tgt.data, y, x) = y >= bv? 1.0 : 0.0; } } } relion-3.1.3/src/jaz/archive/frame_merge.h000066400000000000000000000022471411340063500204260ustar00rootroot00000000000000/*************************************************************************** * * Author: "Jasenko Zivanov" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #ifndef FRAME_MERGE_H #define FRAME_MERGE_H #include class FrameMerge { public: static void mergeAvg(Image& stack, Image& tgt); static void valueHistogram(Image& stack, Image& tgt); }; #endif relion-3.1.3/src/jaz/archive/motion_em.cpp000066400000000000000000000374321411340063500205020ustar00rootroot00000000000000/*************************************************************************** * * Author: "Jasenko Zivanov" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #include #include #include #include #include #include #include #include using namespace gravis; MotionEM::MotionEM( Projector& projector0, Projector& projector1, const ObservationModel& obsModel, MetaDataTable& viewParams, const std::vector>>& movie, const std::vector& globalPositions, const std::vector& sigma2, const std::vector>& damageWeights, double sig_pos, const std::vector &sig_vel_initial, const std::vector &sig_div_initial, double sig_cutoff, int threads) : projector0(projector0), projector1(projector1), obsModel(obsModel), viewParams(viewParams), movie(movie), globalPositions(globalPositions), sigma2(sigma2), damageWeights(damageWeights), sig_pos(sig_pos), sig_vel(movie[0].size() - 1), sig_div(movie[0].size() - 1), sig_cutoff(sig_cutoff), threads(threads), fts_full(threads), fts_pos(threads), fts_vel(threads * sig_vel_initial.size()), pc(movie.size()), fc(movie[0].size()), s_full(movie[0][0].data.ydim), sh_full(movie[0][0].data.ydim/2 + 1), s_pos(2 * (int)(sig_cutoff * sig_pos)), sh_pos((int)(sig_cutoff * sig_pos) + 1), s_vel(movie[0].size()-1), sh_vel(movie[0].size()-1), sig_vel_class(movie[0].size()-1), initialized(false) { if (s_pos > s_full) { s_pos = s_full; sh_pos = sh_full; } for (int f = 0; f < fc-1; f++) { if (f < sig_vel_initial.size()-1) { sig_vel_class[f] = f; sig_vel[f] = sig_vel_initial[f]; } else { sig_vel_class[f] = sig_vel_initial.size()-1; sig_vel[f] = sig_vel_initial[sig_vel_initial.size()-1]; } if (f < sig_div_initial.size()-1) { sig_div[f] = sig_div_initial[f]; } else { sig_div[f] = sig_div_initial[sig_div_initial.size()-1]; } s_vel[f] = 2 * (int)(sig_cutoff * sig_vel[f]); sh_vel[f] = (int)(sig_cutoff * sig_vel[f]) + 1; if (s_vel[f] > s_pos) { s_vel[f] = s_pos; sh_vel[f] = sh_pos; } } } void MotionEM::computeInitial() { posProb = std::vector>>(pc); velProb = std::vector>>(pc); pred = std::vector>(pc); initialCC = std::vector>>(pc); for (int p = 0; p < pc; p++) { initialCC[p] = std::vector>(fc, Image(s_full,s_full)); posProb[p] = std::vector>(fc); velProb[p] = std::vector>(fc-1); int randSubset; viewParams.getValue(EMDL_PARTICLE_RANDOM_SUBSET, randSubset, p); randSubset -= 1; if (randSubset == 0) { pred[p] = obsModel.predictObservation(projector0, viewParams, p, true, true); } else { pred[p] = obsModel.predictObservation(projector1, viewParams, p, true, true); } MotionRefinement::noiseNormalize(pred[p], sigma2, pred[p]); #pragma omp parallel for num_threads(threads) for (int f = 0; f < fc; f++) { std::stringstream stsf; stsf << f; int threadnum = omp_get_thread_num(); Image ccFs(sh_full,s_full); ccFs.data.xinit = 0; ccFs.data.yinit = 0; for (int y = 0; y < s_full; y++) for (int x = 0; x < sh_full; x++) { ccFs(y,x) = movie[p][f](y,x) * damageWeights[f](y,x) * pred[p](y,x).conj(); } Image ccRs(s_full,s_full); fts_full[threadnum].inverseFourierTransform(ccFs(), ccRs()); for (int y = 0; y < s_full; y++) for (int x = 0; x < s_full; x++) { initialCC[p][f](y,x) = s_full * s_full * ccRs(y,x); } RFLOAT offCC = FilterHelper::maxValue(ccRs); ImageOp::linearCombination(ccRs, offCC, 1.0, -1.0, ccRs); posProb[p][f] = FilterHelper::expImg(ccRs, s_full * s_full); FilterHelper::GaussianEnvelopeCorner2D(posProb[p][f], sig_pos); posProb[p][f] = FilterHelper::cropCorner2D(posProb[p][f], s_pos, s_pos); } } // particles initialized = true; } void MotionEM::iterate() { updateVelocities(); consolidateVelocities(); //smoothVelocities(); updatePositions(false); updatePositions(true); } void MotionEM::updateVelocities() { #pragma omp parallel for num_threads(threads) for (int p = 0; p < pc; p++) { int threadnum = omp_get_thread_num(); std::vector> posProbFs(fc); for (int f = 0; f < fc; f++) { fts_pos[threadnum].FourierTransform(posProb[p][f](), posProbFs[f]()); } for (int f = 0; f < fc-1; f++) { Image velProbLargeFs = Image(sh_pos, s_pos); for (int y = 0; y < s_pos; y++) for (int x = 0; x < sh_pos; x++) { DIRECT_A2D_ELEM(velProbLargeFs(), y, x) = DIRECT_A2D_ELEM(posProbFs[f+1](), y, x) * DIRECT_A2D_ELEM(posProbFs[f ](), y, x).conj(); } Image velProbLarge(s_pos,s_pos); fts_pos[threadnum].inverseFourierTransform(velProbLargeFs(), velProbLarge()); velProb[p][f] = FilterHelper::cropCorner2D(velProbLarge, s_vel[f], s_vel[f]); } } } void MotionEM::consolidateVelocities(int maxPc) { const int debug_pc = maxPc > 0? maxPc : pc; #pragma omp parallel for num_threads(threads) for (int f = 0; f < fc-1; f++) { int threadnum = omp_get_thread_num(); std::vector> velProbNext(pc); std::vector> velProbFs(pc); for (int p = 0; p < pc; p++) { velProbNext[p] = Image(s_vel[f],s_vel[f]); fts_vel[threads*sig_vel_class[f] + threadnum].FourierTransform(velProb[p][f](), velProbFs[p]()); } for (int p = 0; p < debug_pc; p++) { const double eps = 1e-25; for (int y = 0; y < s_vel[f]; y++) for (int x = 0; x < s_vel[f]; x++) { if (velProb[p][f](y,x) > eps) { velProbNext[p](y,x) = log(velProb[p][f](y,x)); } else { velProbNext[p](y,x) = log(eps); } } Image velProbB(s_vel[f],s_vel[f]); for (int q = 0; q < pc; q++) { if (q == p) continue; const double dist = (globalPositions[p] - globalPositions[q]).length(); const double sig_real = sig_div[f] * sqrt(dist); velProbB = blurVelocity(velProbFs[q], sig_real, f, threadnum); for (int y = 0; y < s_vel[f]; y++) for (int x = 0; x < s_vel[f]; x++) { if (velProbB(y,x) < eps) { velProbNext[p](y,x) += log(eps); } else { velProbNext[p](y,x) += log(velProbB(y,x)); } } } double maxVal = -std::numeric_limits::max(); for (int y = 0; y < s_vel[f]; y++) for (int x = 0; x < s_vel[f]; x++) { const double yy = y < sh_vel[f]? y : y - s_vel[f]; const double xx = x < sh_vel[f]? x : x - s_vel[f]; velProbNext[p](y,x) -= 0.5*(xx*xx + yy*yy)/(sig_vel[f]*sig_vel[f]); if (velProbNext[p](y,x) > maxVal) { maxVal = velProbNext[p](y,x); } } for (int y = 0; y < s_vel[f]; y++) for (int x = 0; x < s_vel[f]; x++) { double v = velProbNext[p](y,x) - maxVal; velProbNext[p](y,x) = exp(v); } velProbNext[p] = NoiseHelper::normalize(velProbNext[p]); } for (int p = 0; p < debug_pc; p++) { for (int y = 0; y < s_vel[f]; y++) for (int x = 0; x < s_vel[f]; x++) { velProb[p][f](y,x) = velProbNext[p](y,x); } } } } void MotionEM::smoothVelocities() { const double sigma_acc = 1.0; #pragma omp parallel for num_threads(threads) for (int p = 0; p < pc; p++) { int threadnum = omp_get_thread_num(); std::vector> velProbNext(fc-1); std::vector> velProbFs(fc-1); for (int f = 0; f < fc-1; f++) { fts_vel[threads*sig_vel_class[f] + threadnum].FourierTransform(velProb[p][f](), velProbFs[f]()); } for (int f = 0; f < fc-1; f++) { velProbNext[f] = velProb[p][f]; if (f > 0) { Image velProbOther = adaptSize( blurVelocity(velProbFs[f-1], sigma_acc, f-1, threadnum), s_vel[f]); for (int y = 0; y < s_vel[f]; y++) for (int x = 0; x < s_vel[f]; x++) { velProbNext[f](y,x) *= velProbOther(y,x); } } if (f < fc-2) { Image velProbOther = adaptSize( blurVelocity(velProbFs[f+1], sigma_acc, f+1, threadnum), s_vel[f]); for (int y = 0; y < s_vel[f]; y++) for (int x = 0; x < s_vel[f]; x++) { velProbNext[f](y,x) *= velProbOther(y,x); } } } for (int f = 0; f < fc-1; f++) { for (int y = 0; y < s_vel[f]; y++) for (int x = 0; x < s_vel[f]; x++) { velProb[p][f](y,x) = velProbNext[f](y,x); } } } } Image MotionEM::blurVelocity(const Image &velProbFs, double sigma, int f, int threadnum) { Image velProbB(s_vel[f], s_vel[f]); const double sig_freq = s_vel[f]/(2.0 * PI * sigma); const double sig2_freq = sig_freq * sig_freq; Image velProbFs_env = velProbFs; for (int y = 0; y < s_vel[f]; y++) for (int x = 0; x < sh_vel[f]; x++) { const double yy = y < sh_vel[f]? y : y - s_vel[f]; const double xx = x; velProbFs_env(y,x) *= exp(-0.5*(xx*xx + yy*yy)/sig2_freq); } fts_vel[threads*sig_vel_class[f] + threadnum].inverseFourierTransform(velProbFs_env(), velProbB()); return velProbB; } Image MotionEM::adaptSize(const Image &img, int s) { if (img.data.ydim > s) { return FilterHelper::cropCorner2D(img, s, s); } else if (img.data.ydim < s) { return FilterHelper::padCorner2D(img, s, s); } else { return img; } } void MotionEM::updatePositions(bool backward, int maxPc) { const int debug_pc = maxPc > 0? maxPc : pc; const int f0 = backward? fc-1 : 0; const int f1 = backward? 0 : fc-1; const int df = backward? -1 : 1; const double eps = 1e-25; #pragma omp parallel for num_threads(threads) for (int p = 0; p < debug_pc; p++) { int threadnum = omp_get_thread_num(); Image posProbMapped(s_pos, s_pos); Image posProbFs(sh_pos, s_pos), velProbLargeFs(sh_pos, s_pos); for (int f = f0; f != f1; f += df) { const int ff = f+df; const int fv = backward? ff : f; Image velProbLarge = FilterHelper::padCorner2D(velProb[p][fv], s_pos, s_pos); fts_pos[threadnum].FourierTransform(velProbLarge(), velProbLargeFs()); fts_pos[threadnum].FourierTransform(posProb[p][f](), posProbFs()); for (int y = 0; y < s_pos; y++) for (int x = 0; x < sh_pos; x++) { if (backward) { posProbFs(y,x) = s_pos * s_pos * posProbFs(y,x) * velProbLargeFs(y,x).conj(); } else { posProbFs(y,x) = s_pos * s_pos * posProbFs(y,x) * velProbLargeFs(y,x); } } fts_pos[threadnum].inverseFourierTransform(posProbFs(), posProbMapped()); double sum = 0.0; for (int y = 0; y < s_pos; y++) for (int x = 0; x < s_pos; x++) { if (posProbMapped(y,x) < eps) { posProb[p][ff](y,x) *= eps; } else { posProb[p][ff](y,x) *= posProbMapped(y,x); } sum += posProb[p][ff](y,x); } if (sum > 0.0) { for (int y = 0; y < s_pos; y++) for (int x = 0; x < s_pos; x++) { posProb[p][ff](y,x) /= sum; } } } } } std::vector MotionEM::getTrack(int particle) { if (!initialized) { return std::vector(fc, d2Vector(0.0, 0.0)); } std::vector out(fc); for (int f = 0; f < fc; f++) { double maxProb = 0.0; int bestX = 0, bestY = 0; for (int y = 0; y < s_pos; y++) for (int x = 0; x < s_pos; x++) { double p = posProb[particle][f](y,x); if (p > maxProb) { maxProb = p; bestX = x; bestY = y; } } d2Vector opt(bestX, bestY); if (opt.x >= sh_pos) { opt.x -= s_pos; } if (opt.y >= sh_pos) { opt.y -= s_pos; } out[f] = opt; } return out; } std::vector MotionEM::getGlobalTrack() { std::vector out(fc); const double eps = 1e-30; e_sum = std::vector>(fc); for (int f = 0; f < fc; f++) { e_sum[f] = Image(s_full, s_full); e_sum[f].data.initZeros(); for (int p = 0; p < pc; p++) { for (int y = 0; y < s_full; y++) for (int x = 0; x < s_full; x++) { e_sum[f](y,x) += initialCC[p][f](y,x); } } d2Vector pos = Interpolation::quadraticMaxWrapXY(e_sum[f], eps); if (pos.x >= sh_full) pos.x -= s_full; if (pos.y >= sh_full) pos.y -= s_full; out[f] = pos; } return out; } relion-3.1.3/src/jaz/archive/motion_em.h000066400000000000000000000065341411340063500201460ustar00rootroot00000000000000/*************************************************************************** * * Author: "Jasenko Zivanov" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #ifndef MOTION_EM_H #define MOTION_EM_H #include #include #include #include #include #include #include #include class MotionEM { public: MotionEM( Projector& projector0, Projector& projector1, const ObservationModel& obsModel, MetaDataTable& viewParams, const std::vector>>& movie, const std::vector& globalPositions, const std::vector& sigma2, const std::vector>& damageWeights, double sig_pos, const std::vector& sig_vel_initial, const std::vector& sig_div_initial, double sig_cutoff, int threads); Projector& projector0; Projector& projector1; const ObservationModel& obsModel; MetaDataTable& viewParams; const std::vector>>& movie; const std::vector& globalPositions; const std::vector& sigma2; const std::vector>& damageWeights; double sig_pos, sig_cutoff; std::vector sig_vel, sig_div; int threads; std::vector fts_full, fts_pos, fts_vel; int pc, fc, s_full, sh_full, s_pos, sh_pos; std::vector s_vel, sh_vel, sig_vel_class; bool initialized; std::vector>> posProb, velProb, initialCC; std::vector> pred; std::vector> e_sum; void estimate(int iterations); void computeInitial(); void iterate(); void updateVelocities(); void consolidateVelocities(int maxPc = -1); void smoothVelocities(); Image blurVelocity(const Image& velProbFs, double sigma, int f, int threadnum); Image adaptSize(const Image& img, int s); void updatePositions(bool backward, int maxPc = -1); std::vector getTrack(int particle); std::vector getGlobalTrack(); }; #endif relion-3.1.3/src/jaz/archive/motion_refinement.cpp000066400000000000000000001303141411340063500222260ustar00rootroot00000000000000/*************************************************************************** * * Author: "Jasenko Zivanov" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #include "motion_refinement.h" #include #include #include #include #include #include #include #include using namespace gravis; Image MotionRefinement::recompose(const std::vector > &obs, const std::vector &pos) { const int w = obs[0].data.xdim; const int h = obs[0].data.ydim; const int ic = obs.size(); Image out(w,h); Image outC(w/2+1,h); outC.data.initZeros(); FourierTransformer ft; Image imgC; for (int i = 0; i < ic; i++) { Image obs2 = obs[i]; ft.FourierTransform(obs2(), imgC()); if (pos[2*i] != 0.0 || pos[2*i + 1] != 0.0) { shiftImageInFourierTransform(imgC(), imgC(), imgC.data.ydim, -pos[2*i], -pos[2*i + 1]); } ImageOp::linearCombination(imgC, outC, 1.0, 1.0/(double)ic, outC); } ft.inverseFourierTransform(outC(), out()); return out; } Image MotionRefinement::recompose(const std::vector > &obs, const std::vector &pos) { const int w = 2*obs[0].data.xdim - 1; const int h = obs[0].data.ydim; const int ic = obs.size(); Image out(w,h); Image outC(obs[0].data.xdim, obs[0].data.ydim); outC.data.initZeros(); FourierTransformer ft; Image imgC; for (int i = 0; i < ic; i++) { imgC = obs[i]; if (pos[2*i] != 0.0 || pos[2*i + 1] != 0.0) { shiftImageInFourierTransform(imgC(), imgC(), imgC.data.ydim, -pos[2*i], -pos[2*i + 1]); } ImageOp::linearCombination(imgC, outC, 1.0, 1.0/(double)ic, outC); } ft.inverseFourierTransform(outC(), out()); return out; } Image MotionRefinement::averageStack(const std::vector > &obs) { Image out(obs[0].data.xdim, obs[0].data.ydim); out.data.initZeros(); const int ic = obs.size(); for (int i = 0; i < ic; i++) { ImageOp::linearCombination(out, obs[i], 1.0, 1.0/(double)ic, out); } return out; } Image MotionRefinement::averageStack(const std::vector > &obs) { Image outC(obs[0].data.xdim, obs[0].data.ydim); outC.data.initZeros(); const int ic = obs.size(); for (int i = 0; i < ic; i++) { ImageOp::linearCombination(outC, obs[i], 1.0, 1.0/(double)ic, outC); } Image outR(2*obs[0].data.xdim - 1, obs[0].data.ydim); FourierTransformer ft; ft.inverseFourierTransform(outC(), outR()); return outR; } std::vector>> MotionRefinement::movieCC( Projector& projector0, Projector& projector1, const ObservationModel &obsModel, MetaDataTable &viewParams, const std::vector > > &movie, const std::vector &sigma2, const std::vector > &damageWeights, std::vector& fts, int threads) { const int pc = movie.size(); const int fc = movie[0].size(); const int s = movie[0][0]().ydim; const int sh = s/2 + 1; std::vector>> out(pc); std::vector> ccsFs(threads); std::vector> ccsRs(threads); for (int t = 0; t < threads; t++) { ccsFs[t] = Image(sh,s); ccsFs[t].data.xinit = 0; ccsFs[t].data.yinit = 0; ccsRs[t] = Image(s,s); ccsRs[t].data.xinit = 0; ccsRs[t].data.yinit = 0; } Image pred; for (int p = 0; p < pc; p++) { out[p] = std::vector>(fc); for (int f = 0; f < fc; f++) { out[p][f] = Image(s,s); } int randSubset; viewParams.getValue(EMDL_PARTICLE_RANDOM_SUBSET, randSubset, p); randSubset -= 1; if (randSubset == 0) { pred = obsModel.predictObservation(projector0, viewParams, p, true, true); } else { pred = obsModel.predictObservation(projector1, viewParams, p, true, true); } noiseNormalize(pred, sigma2, pred); #pragma omp parallel for num_threads(threads) for (int f = 0; f < fc; f++) { int t = omp_get_thread_num(); for (int y = 0; y < s; y++) for (int x = 0; x < sh; x++) { ccsFs[t](y,x) = movie[p][f](y,x) * damageWeights[f](y,x) * pred(y,x).conj(); } fts[t].inverseFourierTransform(ccsFs[t](), ccsRs[t]()); for (int y = 0; y < s; y++) for (int x = 0; x < s; x++) { out[p][f](y,x) = s * s * ccsRs[t](y,x); } } } return out; } std::vector MotionRefinement::getGlobalTrack( const std::vector>>& movieCC) { const int pc = movieCC.size(); const int fc = movieCC[0].size(); const int s = movieCC[0][0]().xdim; const int sh = s/2 + 1; std::vector out(fc); const double eps = 1e-30; std::vector> e_sum(fc); for (int f = 0; f < fc; f++) { e_sum[f] = Image(s, s); e_sum[f].data.initZeros(); for (int p = 0; p < pc; p++) { for (int y = 0; y < s; y++) for (int x = 0; x < s; x++) { e_sum[f](y,x) += movieCC[p][f](y,x); } } d2Vector pos = Interpolation::quadraticMaxWrapXY(e_sum[f], eps); if (pos.x >= sh) pos.x -= s; if (pos.y >= sh) pos.y -= s; out[f] = pos; } return out; } std::vector > MotionRefinement::addCCs( const std::vector>> &movieCC) { const int pc = movieCC.size(); const int fc = movieCC[0].size(); const int s = movieCC[0][0]().xdim; std::vector> e_sum(fc); for (int f = 0; f < fc; f++) { e_sum[f] = Image(s, s); e_sum[f].data.initZeros(); for (int p = 0; p < pc; p++) { for (int y = 0; y < s; y++) for (int x = 0; x < s; x++) { e_sum[f](y,x) += movieCC[p][f](y,x); } } } return e_sum; } std::vector MotionRefinement::getGlobalTrack( const std::vector> &movieCcSum) { const int fc = movieCcSum.size(); const int s = movieCcSum[0]().xdim; const int sh = s/2 + 1; std::vector out(fc); const double eps = 1e-30; for (int f = 0; f < fc; f++) { d2Vector pos = Interpolation::quadraticMaxWrapXY(movieCcSum[f], eps); if (pos.x >= sh) pos.x -= s; if (pos.y >= sh) pos.y -= s; out[f] = pos; } return out; } std::vector MotionRefinement::getGlobalOffsets( const std::vector>>& movieCC, const std::vector& globTrack, double sigma, int threads) { const int pc = movieCC.size(); const int fc = movieCC[0].size(); const int s = movieCC[0][0]().xdim; const int sh = s/2 + 1; const double eps = 1e-30; std::vector out(pc); Image weight(s,s); for (int y = 0; y < s; y++) for (int x = 0; x < s; x++) { double xx = x >= sh? x - s: x; double yy = y >= sh? y - s: y; weight(y,x) = exp(-0.5*(xx*xx + yy*yy)/(sigma*sigma)); } #pragma omp parallel for num_threads(threads) for (int p = 0; p < pc; p++) { Image pSum(s,s); pSum.data.initZeros(); for (int f = 0; f < fc; f++) { const d2Vector g = globTrack[f]; for (int y = 0; y < s; y++) for (int x = 0; x < s; x++) { pSum(y,x) += Interpolation::cubicXY(movieCC[p][f], x + g.x, y + g.y, 0, 0, true); } } for (int y = 0; y < s; y++) for (int x = 0; x < s; x++) { pSum(y,x) *= weight(y,x); } d2Vector out_p = Interpolation::quadraticMaxWrapXY(pSum, eps); if (out_p.x >= sh) out_p.x -= s; if (out_p.y >= sh) out_p.y -= s; #pragma omp_atomic out[p] = out_p; } return out; } Image MotionRefinement::crossCorrelation2D( const Image &obs, const Image &predConj, const Image &wgh, const std::vector& sigma2) { int wf = obs().xdim; int w = 2*wf - 1; int h = obs().ydim; Image corr(w, h); Image prod, prod2; ImageOp::multiply(obs, predConj, prod); ImageOp::multiply(wgh, prod, prod2); for (int y = 0; y < h; y++) for (int x = 0; x < wf; x++) { if (x == 0 && y == 0) continue; const double yy = y < wf? y : y - h; const double xx = x; const int r = (int) sqrt(xx*xx + yy*yy); if (r >= wf) { DIRECT_A2D_ELEM(prod2.data, y, x) = 0.0; } else { DIRECT_A2D_ELEM(prod2.data, y, x) /= sigma2[r]; } } FourierTransformer ft; ft.inverseFourierTransform(prod2(), corr()); Image out(w,h); for (int y = 0; y < h; y++) for (int x = 0; x < w; x++) { DIRECT_A2D_ELEM(out.data, y, x) = (float)DIRECT_A2D_ELEM(corr.data, (y+h/2-1)%h, (x+w/2-1)%w); } return out; } Image MotionRefinement::crossCorrelation2D(const Image &obs, const Image &predConj, const std::vector& sigma2, bool probability, bool normalize) { int wf = obs().xdim; int w = 2*wf - 1; int h = obs().ydim; /*{ Image obsM(wf,h), predM(wf,h); for (int y = 0; y < h; y++) for (int x = 0; x < wf; x++) { if (x == 0 && y == 0) { DIRECT_A2D_ELEM(obs.data, y, x) = 0.0; DIRECT_A2D_ELEM(predConj.data, y, x) = 0.0; continue; } const double yy = y < wf? y : y - h; const double xx = x; const int r = (int) sqrt(xx*xx + yy*yy); if (r >= wf) { DIRECT_A2D_ELEM(obs.data, y, x) = 0.0; DIRECT_A2D_ELEM(predConj.data, y, x) = 0.0; } else { DIRECT_A2D_ELEM(obs.data, y, x) /= sqrt(0.25*PI*w*h*sigma2[r]); DIRECT_A2D_ELEM(predConj.data, y, x) /= sqrt(0.25*PI*w*h*sigma2[r]); } } for (int y = 0; y < h; y++) for (int x = 0; x < wf; x++) { DIRECT_A2D_ELEM(obsM.data, y, x) = DIRECT_A2D_ELEM(obs.data, y, x); DIRECT_A2D_ELEM(predM.data, y, x) = DIRECT_A2D_ELEM(predConj.data, y, x).conj(); } Image obsR(w,h), predR(w,h); FourierTransformer ft; ft.inverseFourierTransform(obsM(), obsR()); ft.inverseFourierTransform(predM(), predR()); double muObs = 0.0; double muPred = 0.0; double varObs = 0.0; double varPred = 0.0; for (int y = 0; y < h; y++) for (int x = 0; x < w; x++) { muObs += DIRECT_A2D_ELEM(obsR.data, y, x); muPred += DIRECT_A2D_ELEM(predR.data, y, x); varObs += DIRECT_A2D_ELEM(obsR.data, y, x) * DIRECT_A2D_ELEM(obsR.data, y, x); varPred += DIRECT_A2D_ELEM(predR.data, y, x) * DIRECT_A2D_ELEM(predR.data, y, x); } muObs /= w*h; muPred /= w*h; varObs /= w*h; varPred /= w*h; std::cout << "mu: " << muObs << ", " << muPred << "\n"; std::cout << "s2: " << varObs << ", " << varPred << "\n"; VtkHelper::writeVTK(obsR, "corrDebug/obsR.vtk"); VtkHelper::writeVTK(predR, "corrDebug/predR.vtk"); Image corrF(wf,h); for (int y = 0; y < h; y++) for (int x = 0; x < wf; x++) { DIRECT_A2D_ELEM(corrF.data, y, x) = DIRECT_A2D_ELEM(obsM.data, y, x) * DIRECT_A2D_ELEM(predM.data, y, x).conj(); } Image corrR(w,h); ft.inverseFourierTransform(corrF(), corrR()); VtkHelper::writeVTK(corrR, "corrDebug/corrR_FS.vtk"); Image corrR2(w,h); for (int y = 0; y < h; y++) for (int x = 0; x < w; x++) { double cc = 0; for (int yy = 0; yy < h; yy++) for (int xx = 0; xx < w; xx++) { double vo = DIRECT_A2D_ELEM(obsR.data, yy, xx); double vp = DIRECT_A2D_ELEM(predR.data, (yy+y)%h, (xx+x)%w); cc += vo * vp; } DIRECT_A2D_ELEM(corrR2.data, y, x) = cc; } VtkHelper::writeVTK(corrR2, "corrDebug/corrR_RS.vtk"); std::exit(0); }*/ Image prod; ImageOp::multiply(obs, predConj, prod); const double area = 0.25*PI*w*h; if (probability) { for (int y = 0; y < h; y++) for (int x = 0; x < wf; x++) { if (x == 0 && y == 0) continue; const double yy = y < wf? y : y - h; const double xx = x; const int r = (int) sqrt(xx*xx + yy*yy); if (r >= wf) { DIRECT_A2D_ELEM(prod.data, y, x) = 0.0; } else { DIRECT_A2D_ELEM(prod.data, y, x) /= sigma2[r]*area; } } } Image corr(w, h); DIRECT_A2D_ELEM(prod.data, 0, 0) = 0.0; FourierTransformer ft; ft.inverseFourierTransform(prod(), corr()); Image out(w,h); if (probability) { if (normalize) { double sum = 0.0; for (int y = 0; y < h; y++) for (int x = 0; x < w; x++) { sum += exp(w*h*DIRECT_A2D_ELEM(corr.data, (y+h/2)%h, (x+w/2)%w)); } for (int y = 0; y < h; y++) for (int x = 0; x < w; x++) { DIRECT_A2D_ELEM(out.data, y, x) = exp(w*h*DIRECT_A2D_ELEM(corr.data, (y+h/2)%h, (x+w/2)%w)) / sum; } } else { for (int y = 0; y < h; y++) for (int x = 0; x < w; x++) { DIRECT_A2D_ELEM(out.data, y, x) = (float) exp(w*h*DIRECT_A2D_ELEM(corr.data, (y+h/2)%h, (x+w/2)%w)); } } } else { for (int y = 0; y < h; y++) for (int x = 0; x < w; x++) { DIRECT_A2D_ELEM(out.data, y, x) = (float) (w*h*DIRECT_A2D_ELEM(corr.data, (y+h/2)%h, (x+w/2)%w)); } } return out; } void MotionRefinement::noiseNormalize( const Image &img, const std::vector &sigma2, Image& dest) { int wf = img().xdim; int w = 2*wf - 1; int h = img().ydim; const double area = 0.25*PI*w*h; if (dest.data.xdim != img.data.xdim || dest.data.ydim != img.data.ydim) { dest.data.reshape(img.data); } dest.data.xinit = 0; dest.data.yinit = 0; for (int y = 0; y < h; y++) for (int x = 0; x < wf; x++) { if (x == 0 && y == 0) { dest(y,x) = Complex(0.0); continue; } const double yy = y < wf? y : y - h; const double xx = x; const int r = (int) sqrt(xx*xx + yy*yy); if (r >= wf) { dest(y,x) = Complex(0.0); } else { dest(y,x) = DIRECT_A2D_ELEM(img.data, y, x) / sqrt(sigma2[r]*area); } } } std::vector> MotionRefinement::readTrack(std::string fn, int pc, int fc) { std::vector> shift(pc); std::ifstream trackIn(fn); for (int p = 0; p < pc; p++) { if (!trackIn.good()) { std::cerr << "MotionRefinement::readTrack: error reading tracks in " << fn << "\n"; REPORT_ERROR("MotionRefinement::readTrack: error reading tracks in " + fn + "\n"); } shift[p] = std::vector(fc); char dummy[4069]; trackIn.getline(dummy, 4069); for (int f = 0; f < fc; f++) { char dummy[4069]; trackIn.getline(dummy, 4069); std::istringstream sts(dummy); sts >> shift[p][f].x; sts >> shift[p][f].y; } trackIn.getline(dummy, 4069); } return shift; } void MotionRefinement::writeTracks( const std::vector>& tracks, std::string fn) { const int pc = tracks.size(); const int fc = tracks[0].size(); std::string path = fn.substr(0, fn.find_last_of('/')); mktree(path); std::ofstream ofs(fn); MetaDataTable mdt; mdt.setName("general"); mdt.setIsList(true); mdt.addObject(); mdt.setValue(EMDL_PARTICLE_NUMBER, pc); mdt.write(ofs); mdt.clear(); for (int p = 0; p < pc; p++) { std::stringstream sts; sts << p; mdt.setName(sts.str()); for (int f = 0; f < fc; f++) { mdt.addObject(); mdt.setValue(EMDL_ORIENT_ORIGIN_X, tracks[p][f].x); mdt.setValue(EMDL_ORIENT_ORIGIN_Y, tracks[p][f].y); } mdt.write(ofs); mdt.clear(); } } std::vector> MotionRefinement::readTracks(std::string fn) { std::ifstream ifs(fn); if (ifs.fail()) { REPORT_ERROR("MotionRefinement::readTracks: unable to read " + fn + "."); } MetaDataTable mdt; mdt.readStar(ifs, "general"); int pc; if (!mdt.getValue(EMDL_PARTICLE_NUMBER, pc)) { REPORT_ERROR("MotionRefinement::readTracks: missing particle number in "+fn+"."); } std::vector> out(pc); int fc = 0, lastFc = 0; for (int p = 0; p < pc; p++) { std::stringstream sts; sts << p; mdt.readStar(ifs, sts.str()); fc = mdt.numberOfObjects(); if (p > 0 && fc != lastFc) { REPORT_ERROR("MotionRefinement::readTracks: broken file: "+fn+"."); } lastFc = fc; out[p] = std::vector(fc); for (int f = 0; f < fc; f++) { mdt.getValue(EMDL_ORIENT_ORIGIN_X, out[p][f].x, f); mdt.getValue(EMDL_ORIENT_ORIGIN_Y, out[p][f].y, f); } } return out; } d3Vector MotionRefinement::measureValueScaleReal( const Image& data, const Image& ref) { int wf = data().xdim; int w = 2*wf - 1; int h = data().ydim; Image dataC = data, refC = ref; DIRECT_A2D_ELEM(dataC.data, 0, 0) = Complex(0.0); DIRECT_A2D_ELEM(refC.data, 0, 0) = Complex(0.0); Image dataR(w,h), refR(w,h); FourierTransformer ft; ft.inverseFourierTransform(dataC(), dataR()); ft.inverseFourierTransform(refC(), refR()); double num = 0.0; double denom = 0.0; for (int y = 0; y < h; y++) for (int x = 0; x < w; x++) { RFLOAT d = DIRECT_A2D_ELEM(dataR.data, y, x); RFLOAT r = DIRECT_A2D_ELEM(refR.data, y, x); num += d * r; denom += r * r; } /*{ VtkHelper::writeVTK(dataR, "debug/dataR.vtk"); VtkHelper::writeVTK(refR, "debug/refR.vtk"); for (int y = 0; y < h; y++) for (int x = 0; x < w; x++) { DIRECT_A2D_ELEM(refR.data, y, x) *= (denom/num); } VtkHelper::writeVTK(refR, "debug/refR2.vtk"); std::exit(0); }*/ return d3Vector(num / denom, num, denom); } d3Vector MotionRefinement::measureValueScale( const Image& data, const Image& ref) { int w = data().xdim; int h = data().ydim; double num = 0.0; double denom = 0.0; for (int y = 0; y < h; y++) for (int x = 0; x < w; x++) { double d = DIRECT_A2D_ELEM(data.data, y, x).abs(); double r = DIRECT_A2D_ELEM(ref.data, y, x).abs(); num += d * r; denom += d * d; } return d3Vector(num / denom, num, denom); } void MotionRefinement::testCC(const Image &obs, const Image &predConj, const std::vector &sigma2) { int wf = obs().xdim; int w = 2*wf - 1; int h = obs().ydim; Image obsW(wf,h), predW(wf,h); for (int y = 0; y < h; y++) for (int x = 0; x < wf; x++) { const double yy = y < wf? y : y - h; const double xx = x; const int r = (int) sqrt(xx*xx + yy*yy); if (r == 0 || r >= wf) { DIRECT_A2D_ELEM(obsW.data, y, x) = 0.0; DIRECT_A2D_ELEM(predW.data, y, x) = 0.0; } else { DIRECT_A2D_ELEM(obsW.data, y, x) = DIRECT_A2D_ELEM(obs.data, y, x) / sqrt(sigma2[r]); DIRECT_A2D_ELEM(predW.data, y, x) = DIRECT_A2D_ELEM(predConj.data, y, x).conj() / sqrt(sigma2[r]); } } std::vector sig2new(wf, 0.0), wgh(wf, 0.0); for (int y = 0; y < h; y++) for (int x = 0; x < wf; x++) { const Complex z = DIRECT_A2D_ELEM(obsW.data, y, x); const double yy = y < w? y : y - h; const double xx = x; const int r = (int) sqrt(xx*xx + yy*yy); if (r >= wf) continue; sig2new[r] += z.norm(); wgh[r] += 1.0; } for (int x = 0; x < wf; x++) { if (wgh[x] > 0.0) { sig2new[x] /= wgh[x]; } } std::ofstream ofs("spec_new.dat"); for (int x = 0; x < wf; x++) { ofs << x << " " << sig2new[x] << "\n"; } FourierTransformer ft; Image obsWR(w,h), predWR(w,h); ft.inverseFourierTransform(obsW(), obsWR()); ft.inverseFourierTransform(predW(), predWR()); ImageLog::write(obsWR, "debug/obsWR"); ImageLog::write(predWR, "debug/predWR"); double var = 0.0; for (int y = 0; y < h; y++) for (int x = 0; x < w; x++) { double v = DIRECT_A2D_ELEM(obsWR.data, y, x); var += v*v; } var /= w*h; std::cout << "var real: " << var << " = " << PI*w*h/4.0 << "?\n"; Image corrR(w, h); corrR.data.initZeros(); for (int y = 0; y < h; y++) for (int x = 0; x < w; x++) { double cc = 0.0; for (int yy = 0; yy < h; yy++) for (int xx = 0; xx < w; xx++) { RFLOAT v0 = DIRECT_A2D_ELEM(predWR.data, yy, xx); RFLOAT v1 = DIRECT_A2D_ELEM(obsWR.data, (yy+y)%h, (xx+x)%w); cc += v0*v1; } DIRECT_A2D_ELEM(corrR.data, y, x) = cc; } ImageLog::write(corrR, "debug/Wcc_rs"); Image corr(w, h); Image prod; ImageOp::multiply(obs, predConj, prod); for (int y = 0; y < h; y++) for (int x = 0; x < wf; x++) { if (x == 0 && y == 0) continue; const double yy = y < wf? y : y - h; const double xx = x; const int r = (int) sqrt(xx*xx + yy*yy); if (r >= wf) { DIRECT_A2D_ELEM(prod.data, y, x) = 0.0; } else { DIRECT_A2D_ELEM(prod.data, y, x) /= sigma2[r]; } } ft.inverseFourierTransform(prod(), corr()); for (int y = 0; y < h; y++) for (int x = 0; x < w; x++) { DIRECT_A2D_ELEM(corr.data, y, x) *= w*h; } ImageLog::write(corr, "debug/Wcc_fs"); } Image MotionRefinement::zeroPad(const Image& img, RFLOAT ratio, RFLOAT taper) { const long w = img.data.xdim; const long h = img.data.ydim; const long ww = (long)(ratio*w); const long hh = (long)(ratio*h); const long tx = (long)(taper * (RFLOAT)w); const long ty = (long)(taper * (RFLOAT)h); Image out(ww,hh); out.data.initZeros(); const long x0 = (ww-w)/2; const long y0 = (hh-h)/2; RFLOAT avg = 0.0; for (long y = 0; y < h; y++) for (long x = 0; x < w; x++) { avg += DIRECT_A2D_ELEM(img.data, y, x); } avg /= (w*h); for (long y = 0; y < h; y++) for (long x = 0; x < w; x++) { RFLOAT tw = 1.0; if (x < tx || x >= w-tx || y < ty || y >= h-ty) { RFLOAT ex0 = x < tx? (x+1) / (RFLOAT)(tx+1) : 1.0; RFLOAT ex1 = x >= w-tx? (w-x) / (RFLOAT)(tx+1) : 1.0; RFLOAT ey0 = y < ty? (y+1) / (RFLOAT)(ty+1) : 1.0; RFLOAT ey1 = y >= h-ty? (h-y) / (RFLOAT)(ty+1) : 1.0; ex0 = (1.0 - cos(PI * ex0))/2.0; ex1 = (1.0 - cos(PI * ex1))/2.0; ey0 = (1.0 - cos(PI * ey0))/2.0; ey1 = (1.0 - cos(PI * ey1))/2.0; tw = ex0 * ex1 * ey0 * ey1; } DIRECT_A2D_ELEM(out.data, y+y0, x+x0) += tw * (DIRECT_A2D_ELEM(img.data, y, x) - avg); } return out; } std::vector > MotionRefinement::collectiveMotion( const std::vector > >& correlation) { const int pc = correlation.size(); if (pc == 0) return std::vector >(0); const int fc = correlation[0].size(); std::vector > corrSum(fc); const int w = correlation[0][0].data.xdim; const int h = correlation[0][0].data.ydim; for (int f = 0; f < fc; f++) { corrSum[f] = Image(w,h); corrSum[f].data.initZeros(); } for (int p = 0; p < pc; p++) { for (int f = 0; f < fc; f++) { ImageOp::linearCombination(corrSum[f], correlation[p][f], 1.0, 1.0, corrSum[f]); } } return corrSum; } std::vector>> MotionRefinement::blockMotion( const std::vector > >& correlation, std::vector positions, int parts, int micrographWidth, std::vector &numbers) { const int pc = correlation.size(); if (pc == 0) return std::vector>>(0); const int fc = correlation[0].size(); const int w = correlation[0][0].data.xdim; const int h = correlation[0][0].data.ydim; const int qc = parts*parts; std::vector>> corrSum(qc); for (int q = 0; q < qc; q++) { corrSum[q] = std::vector>(fc); for (int f = 0; f < fc; f++) { corrSum[q][f] = Image(w,h); corrSum[q][f].data.initZeros(); } } for (int p = 0; p < pc; p++) { int qx = (int)(parts * positions[p].x / micrographWidth); int qy = (int)(parts * positions[p].y / micrographWidth); if (qx > parts || qy > parts) continue; int q = qy*parts + qx; numbers[q]++; for (int f = 0; f < fc; f++) { ImageOp::linearCombination(corrSum[q][f], correlation[p][f], 1.0, 1.0, corrSum[q][f]); } } return corrSum; } std::vector MotionRefinement::findMaxima(std::vector> & corrSum) { const int fc = corrSum.size(); const int w = corrSum[0].data.xdim; const int h = corrSum[0].data.ydim; const double cx = w/2; const double cy = h/2; std::vector out(fc); for (int f = 0; f < fc; f++) { d2Vector m = Interpolation::quadraticMaxXY(corrSum[f]); out[f] = d2Vector(m.x - cx, m.y - cy); } return out; } std::vector > MotionRefinement::computeInitialPositions( const std::vector > > &correlation) { const int pc = correlation.size(); if (pc == 0) return std::vector >(0); std::vector > corrSum = collectiveMotion(correlation); std::vector maxima = findMaxima(corrSum); std::vector> out(pc, maxima); return out; } std::vector > MotionRefinement::optimize( const std::vector > >& correlation, const std::vector& positions, const std::vector >& initial, double lambda, double mu, double sigma) { const int pc = correlation.size(); if (pc == 0) return std::vector >(0); const int fc = correlation[0].size(); std::vector > distWeights(pc); const double s2 = sigma * sigma; for (int p = 0; p < pc; p++) { distWeights[p] = std::vector(pc); for (int q = 0; q < pc; q++) { const double d2 = (positions[p] - positions[q]).norm2(); distWeights[p][q] = exp(-d2/s2); } } MotionFit mf(correlation, distWeights, lambda, mu); std::vector x0 = pack(initial); std::cout << "initial f = " << mf.f(x0, 0) << "\n"; std::cout << "initial f_data = " << mf.f_data(x0) << "\n"; /*NelderMead nm; std::vector final = nm.optimize( x0, mf, 1.0, 1e-5, 10000, 1.0, 2.0, 0.5, 0.5, true);*/ std::vector final = GradientDescent::optimize( x0, mf, 1.0, 1e-20, 100000, 0.0, 0.0, true); return unpack(final, pc, fc); } std::vector > > MotionRefinement::visualize( const std::vector >& positions, int pc, int fc, int w, int h) { std::vector > > out(pc); for (int p = 0; p < pc; p++) { out[p].resize(fc); for (int f = 0; f < fc; f++) { out[p][f] = Image(w,h); out[p][f].data.initZeros(); gravis::d2Vector pos(positions[p][f].x + w/2, positions[p][f].y + h/2); int xi = (int)pos.x; int yi = (int)pos.y; double xf = pos.x - xi; double yf = pos.y - yi; if (xi >= 0 && xi < w && yi >= 0 && yi < h) { DIRECT_A2D_ELEM(out[p][f].data, yi, xi) = (1.0 - xf) * (1.0 - yf); } if (xi+1 >= 0 && xi+1 < w && yi >= 0 && yi < h) { DIRECT_A2D_ELEM(out[p][f].data, yi, xi+1) = xf * (1.0 - yf); } if (xi >= 0 && xi < w && yi+1 >= 0 && yi+1 < h) { DIRECT_A2D_ELEM(out[p][f].data, yi+1, xi) = (1.0 - xf) * yf; } if (xi+1 >= 0 && xi+1 < w && yi+1 >= 0 && yi+1 < h) { DIRECT_A2D_ELEM(out[p][f].data, yi+1, xi+1) = xf * yf; } } } return out; } std::vector > MotionRefinement::collapsePaths( const std::vector > >& paths) { const int pc = paths.size(); if (pc == 0) return std::vector >(0); const int fc = paths[0].size(); std::vector > pathSum(pc); const int w = paths[0][0].data.xdim; const int h = paths[0][0].data.ydim; for (int p = 0; p < pc; p++) { pathSum[p] = Image(w,h); pathSum[p].data.initZeros(); } for (int p = 0; p < pc; p++) { for (int f = 0; f < fc; f++) { ImageOp::linearCombination(pathSum[p], paths[p][f], 1.0, 1.0, pathSum[p]); } } return pathSum; } std::vector > MotionRefinement::unpack(const std::vector &pos, int pc, int fc) { std::vector > out(pc); for (int p = 0; p < pc; p++) { out[p].resize(fc); for (int f = 0; f < fc; f++) { out[p][f] = gravis::d2Vector( pos[2*(p*fc + f) + 0], pos[2*(p*fc + f) + 1]); } } return out; } std::vector MotionRefinement::pack(const std::vector > &pos) { const int pc = pos.size(); if (pc == 0) return std::vector(0); const int fc = pos[0].size(); std::vector out(2*pc*fc); for (int p = 0; p < pc; p++) { for (int f = 0; f < fc; f++) { out[2*(p*fc + f) + 0] = pos[p][f].x; out[2*(p*fc + f) + 1] = pos[p][f].y; } } return out; } ParticleMotionFit::ParticleMotionFit(const std::vector > &correlation, RFLOAT lambda_vel, RFLOAT lambda_acc) : correlation(correlation), lambda_vel(lambda_vel), lambda_acc(lambda_acc) { } double ParticleMotionFit::f(const std::vector &x, void* tempStorage) const { const double cx = correlation[0]().xdim/2; const double cy = correlation[0]().ydim/2; const int ic = correlation.size(); double out = 0.0; for (int i = 0; i < ic; i++) { const double xi = x[2*i] + cx; const double yi = x[2*i + 1] + cy; //out -= Interpolation::linearXY(correlation[i], xi, yi, 0); out -= Interpolation::cubicXY(correlation[i], xi, yi, 0); if (i > 0) { const double xn = x[2*(i-1)] + cx; const double yn = x[2*(i-1)+1] + cy; const double dx = xi - xn; const double dy = yi - yn; out += lambda_vel * (dx*dx + dy*dy); } if (i > 0 && i < ic-1) { const double xp = x[2*(i-1)] + cx; const double yp = x[2*(i-1)+1] + cy; const double xn = x[2*(i+1)] + cx; const double yn = x[2*(i+1)+1] + cy; const double ax = xp + xn - 2.0 * xi; const double ay = yp + yn - 2.0 * yi; out += lambda_acc * (ax*ax + ay*ay); } } return out; } MotionFit::MotionFit( const std::vector > > &correlation, const std::vector > &distWeights, RFLOAT lambda, RFLOAT mu) : correlation(correlation), distWeights(distWeights), lambda(lambda), mu(mu) { } double MotionFit::f(const std::vector &x, void* tempStorage) const { const int pc = correlation.size(); if (pc == 0) return 0.0; const int fc = correlation[0].size(); if (fc == 0) return 0.0; const int w = correlation[0][0].data.xdim; const int h = correlation[0][0].data.ydim; const double mx = w/2; const double my = h/2; double e = 0.0; const double eps = 0.001; // cutoff at 2.62 std.dev. for (int p = 0; p < pc; p++) { for (int f = 0; f < fc; f++) { const double xpf = x[2*(p*fc + f) + 0]; const double ypf = x[2*(p*fc + f) + 1]; e -= Interpolation::cubicXY(correlation[p][f], xpf + mx, ypf + my, 0); if (f > 0 && f < fc-1) { const double xpfn = x[2*(p*fc + f - 1) + 0]; const double ypfn = x[2*(p*fc + f - 1) + 1]; const double xpfp = x[2*(p*fc + f + 1) + 0]; const double ypfp = x[2*(p*fc + f + 1) + 1]; const double cx = xpfn + xpfp - 2.0 * xpf; const double cy = ypfn + ypfp - 2.0 * ypf; e += lambda * (cx * cx + cy * cy); } if (f > 0) { const double xpfn = x[2*(p*fc + f - 1) + 0]; const double ypfn = x[2*(p*fc + f - 1) + 1]; for (int q = p+1; q < pc; q++) { if (distWeights[p][q] < eps) continue; const double xqf = x[2*(q*fc + f) + 0]; const double yqf = x[2*(q*fc + f) + 1]; const double xqfn = x[2*(q*fc + f - 1) + 0]; const double yqfn = x[2*(q*fc + f - 1) + 1]; const double cx = (xpf - xpfn) - (xqf - xqfn); const double cy = (ypf - ypfn) - (yqf - yqfn); e += mu * (cx * cx + cy * cy); } } } } return e; } double MotionFit::f_data(const std::vector &x) const { const int pc = correlation.size(); if (pc == 0) return 0.0; const int fc = correlation[0].size(); if (fc == 0) return 0.0; const int w = correlation[0][0].data.xdim; const int h = correlation[0][0].data.ydim; const double mx = w/2; const double my = h/2; double e = 0.0; const double eps = 0.001; // cutoff at 2.62 std.dev. for (int p = 0; p < pc; p++) { for (int f = 0; f < fc; f++) { const double xpf = x[2*(p*fc + f) + 0]; const double ypf = x[2*(p*fc + f) + 1]; e -= Interpolation::cubicXY(correlation[p][f], xpf + mx, ypf + my, 0); } } return e; } void MotionFit::grad(const std::vector &x, std::vector &gradDest, void* tempStorage) const { const int pc = correlation.size(); if (pc == 0) return; const int fc = correlation[0].size(); if (fc == 0) return; const int w = correlation[0][0].data.xdim; const int h = correlation[0][0].data.ydim; const double mx = w/2; const double my = h/2; const double eps = 0.001; // cutoff at 2.62 std.dev. for (int p = 0; p < pc; p++) for (int f = 0; f < fc; f++) { gradDest[2*(p*fc + f) + 0] = 0.0; gradDest[2*(p*fc + f) + 1] = 0.0; } for (int p = 0; p < pc; p++) { for (int f = 0; f < fc; f++) { const double xpf = x[2*(p*fc + f) + 0]; const double ypf = x[2*(p*fc + f) + 1]; //e -= Interpolation::cubicXY(correlation[p][f], xpf + mx, ypf + my, 0); gravis::t2Vector g = Interpolation::cubicXYgrad(correlation[p][f], xpf + mx, ypf + my, 0); gradDest[2*(p*fc + f) + 0] -= g.x; gradDest[2*(p*fc + f) + 1] -= g.y; if (f > 0 && f < fc-1) { const double xpfn = x[2*(p*fc + f - 1) + 0]; const double ypfn = x[2*(p*fc + f - 1) + 1]; const double xpfp = x[2*(p*fc + f + 1) + 0]; const double ypfp = x[2*(p*fc + f + 1) + 1]; const double cx = xpfn + xpfp - 2.0 * xpf; const double cy = ypfn + ypfp - 2.0 * ypf; //e += lambda * (cx * cx + cy * cy); gradDest[2*(p*fc + f - 1) + 0] += 2.0 * lambda * cx; gradDest[2*(p*fc + f - 1) + 1] += 2.0 * lambda * cy; gradDest[2*(p*fc + f) + 0] -= 4.0 * lambda * cx; gradDest[2*(p*fc + f) + 1] -= 4.0 * lambda * cy; gradDest[2*(p*fc + f + 1) + 0] += 2.0 * lambda * cx; gradDest[2*(p*fc + f + 1) + 1] += 2.0 * lambda * cy; } if (f > 0) { const double xpfn = x[2*(p*fc + f - 1) + 0]; const double ypfn = x[2*(p*fc + f - 1) + 1]; for (int q = p+1; q < pc; q++) { if (distWeights[p][q] < eps) continue; const double xqf = x[2*(q*fc + f) + 0]; const double yqf = x[2*(q*fc + f) + 1]; const double xqfn = x[2*(q*fc + f - 1) + 0]; const double yqfn = x[2*(q*fc + f - 1) + 1]; const double cx = (xpf - xpfn) - (xqf - xqfn); const double cy = (ypf - ypfn) - (yqf - yqfn); //e += mu * (cx * cx + cy * cy); gradDest[2*(p*fc + f - 1) + 0] -= 2.0 * mu * cx; gradDest[2*(p*fc + f - 1) + 1] -= 2.0 * mu * cy; gradDest[2*(p*fc + f) + 0] += 2.0 * mu * cx; gradDest[2*(p*fc + f) + 1] += 2.0 * mu * cy; gradDest[2*(q*fc + f - 1) + 0] += 2.0 * mu * cx; gradDest[2*(q*fc + f - 1) + 1] += 2.0 * mu * cy; gradDest[2*(q*fc + f) + 0] -= 2.0 * mu * cx; gradDest[2*(q*fc + f) + 1] -= 2.0 * mu * cy; } } } } } std::vector > MotionRefinement::readCollectivePaths(std::string filename) { std::ifstream is(filename); int pc = 0, fc = 0; std::vector > coords(0); while (is.good()) { std::string line; std::getline(is,line); if (line.length() == 0) break; int delims = 0; for (int i = 0; i < line.length(); i++) { if (line[i] == '[' || line[i] == ']' || line[i] == ',') { line[i] = ' '; delims++; } } int fcp = delims / 3; if (pc == 0) { fc = fcp; } else if (fcp != fc) { std::stringstream isss; isss << (pc+1); REPORT_ERROR("insufficient number of frames for particle "+isss.str()); } std::istringstream iss(line); coords.push_back(std::vector(fc)); int f = 0; while (iss.good()) { iss >> coords[pc][f].x; iss >> coords[pc][f].y; f++; } pc++; } return coords; } void MotionRefinement::writeCollectivePaths(const std::vector > &data, std::string filename) { const int pc = data.size(); const int fc = data[0].size(); std::ofstream ofs(filename); for (int p = 0; p < pc; p++) { for (int f = 0; f < fc; f++) { ofs << data[p][f] << " "; } ofs << "\n"; } } std::vector > > MotionRefinement::readPaths(std::string fn, int imgNum, int blockNum, int frameNum) { std::vector > > out(imgNum*blockNum); std::ifstream is(fn); for (int i = 0; i < imgNum; i++) { for (int j = 0; j < blockNum; j++) { std::string line; std::getline(is, line); int delims = 0; for (int k = 0; k < line.length(); k++) { if (line[k] == '[' || line[k] == ']' || line[k] == ',') { line[k] = ' '; delims++; } } int fcp = delims / 3; if (fcp < frameNum) { std::stringstream rst; rst << (blockNum*i + blockNum); REPORT_ERROR("not enough frames in "+fn+", line "+rst.str()); } out[blockNum*i + j] = std::make_pair(0, std::vector(frameNum)); std::istringstream iss(line); iss >> out[blockNum*i + j].first; for (int f = 0; f < frameNum; f++) { iss >> out[blockNum*i + j].second[f].x; iss >> out[blockNum*i + j].second[f].y; } } } return out; } std::vector> MotionRefinement::centerBlocks( std::string filenameFull, std::string filenameBlocks, int imgCount, int blockCount, int frameCount, std::vector& particleNumbers, std::vector &totalParticleNumbers) { std::vector > > full = readPaths( filenameFull, imgCount, 1, frameCount); std::vector > > blocks = readPaths( filenameBlocks, imgCount, blockCount, frameCount); std::vector> out(imgCount*blockCount); for (int i = 0; i < imgCount; i++) { for (int j = 0; j < blockCount; j++) { out[i*blockCount + j] = std::vector(frameCount); particleNumbers[i*blockCount + j] = blocks[i*blockCount + j].first; totalParticleNumbers[i*blockCount + j] = full[i].first; for (int f = 0; f < frameCount; f++) { out[i*blockCount + j][f] = blocks[i*blockCount + j].second[f] - full[i].second[f]; } } } return out; } relion-3.1.3/src/jaz/archive/motion_refinement.h000066400000000000000000000175051411340063500217010ustar00rootroot00000000000000/*************************************************************************** * * Author: "Jasenko Zivanov" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #ifndef MOTION_REFINEMENT_H #define MOTION_REFINEMENT_H #include #include #include #include #include #include #include #include #include #include #include #include class ParticleMotionFit : public Optimization { public: ParticleMotionFit( const std::vector >& correlation, RFLOAT lambda_vel, RFLOAT lambda_acc); double f(const std::vector& x, void* tempStorage) const; private: const std::vector >& correlation; RFLOAT lambda_vel, lambda_acc; }; class MotionFit : public DifferentiableOptimization { public: MotionFit( const std::vector > >& correlation, const std::vector >& distWeights, RFLOAT lambda, RFLOAT mu); double f(const std::vector& x, void* tempStorage) const; double f_data(const std::vector& x) const; void grad(const std::vector& x, std::vector& gradDest, void* tempStorage) const; private: const std::vector > >& correlation; const std::vector >& distWeights; RFLOAT lambda, mu; }; class MotionRefinement { public: static Image recompose(const std::vector >& obs, const std::vector& pos); static Image recompose(const std::vector >& obs, const std::vector& pos); static Image averageStack(const std::vector >& obs); static Image averageStack(const std::vector >& obs); static std::vector>> movieCC( Projector& projector0, Projector& projector1, const ObservationModel& obsModel, MetaDataTable& viewParams, const std::vector>>& movie, const std::vector& sigma2, const std::vector>& damageWeights, std::vector& fts, int threads); static std::vector getGlobalTrack( const std::vector>>& movieCC); static std::vector> addCCs( const std::vector>>& movieCC); static std::vector getGlobalTrack( const std::vector>& movieCcSum); static std::vector getGlobalOffsets( const std::vector>>& movieCC, const std::vector& globTrack, double sigma, int threads); static Image crossCorrelation2D(const Image& obs, const Image& predConj, const Image& wgh, const std::vector &sigma2); static Image crossCorrelation2D( const Image& obs, const Image& predConj, const std::vector &sigma2, bool probability = false, bool normalize = false); static void noiseNormalize( const Image& img, const std::vector &sigma2, Image& dest); static std::vector> readTrack( std::string fn, int pc, int fc); static void writeTracks( const std::vector>& tracks, std::string fn); static std::vector> readTracks( std::string fn); static gravis::d3Vector measureValueScaleReal( const Image& obs, const Image& ref); static gravis::d3Vector measureValueScale( const Image& obs, const Image& ref); static void testCC( const Image& obs, const Image& predConj, const std::vector &sigma2); static Image zeroPad( const Image& img, RFLOAT ratio = 2.0, RFLOAT taper = 0.1); static std::vector > collectiveMotion( const std::vector > >& correlation); static std::vector>> blockMotion( const std::vector > >& correlation, std::vector positions, int parts, int micrographWidth, std::vector& numbers); static std::vector findMaxima(std::vector > &corrSum); static std::vector > computeInitialPositions( const std::vector > >& correlation); static std::vector > optimize( const std::vector > >& correlation, const std::vector& positions, const std::vector >& initial, double lambda, double mu, double sigma); static std::vector > > visualize( const std::vector >& positions, int pc, int fc, int w, int h); static std::vector > collapsePaths( const std::vector > >& paths); static std::vector > unpack( const std::vector& pos, int pc, int fc); static std::vector pack( const std::vector >& pos); static std::vector > readCollectivePaths(std::string filename); static void writeCollectivePaths( const std::vector >& data, std::string filename); static std::vector > > readPaths(std::string fn, int imgNum, int blockNum, int frameNum); static std::vector> centerBlocks( std::string filenameFull, std::string filenameBlocks, int imgCount, int blockCount, int frameCount, std::vector &particleNumbers, std::vector &totalParticleNumbers); }; #endif relion-3.1.3/src/jaz/archive/refinement_program.cpp000066400000000000000000000464451411340063500224030ustar00rootroot00000000000000/*************************************************************************** * * Author: "Jasenko Zivanov" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #include #include #include #include #include RefinementProgram::RefinementProgram(bool singleReference, bool doesMovies) : singleReference(singleReference), optStar(false), noStar(false), optReference(false), noReference(false), noTilt(false), doesMovies(doesMovies), hasCorrMic(false), last_gainFn("") { } int RefinementProgram::init(int argc, char *argv[]) { IOParser parser; try { parser.setCommandLine(argc, argv); parser.addSection("General options"); starFn = parser.getOption("--i", "Input STAR file with a list of particles", optStar? "" : "NULL"); if (!noReference) { if (singleReference) { reconFn0 = parser.getOption("--m", "Reference map", optReference? "" : "NULL"); } else { reconFn0 = parser.getOption("--m1", "Reference map, half 1", optReference? "" : "NULL"); reconFn1 = parser.getOption("--m2", "Reference map, half 2", optReference? "" : "NULL"); } maskFn = parser.getOption("--mask", "Reference mask", ""); fscFn = parser.getOption("--f", "Input STAR file with the FSC of the reference", ""); } else { maskFn = ""; fscFn = ""; } outPath = parser.getOption("--out", "Output path"); if (doesMovies) { imgPath = parser.getOption("--mov", "Path to movies", ""); corrMicFn = parser.getOption("--corr_mic", "List of uncorrected micrographs (e.g. corrected_micrographs.star)", ""); preextracted = parser.checkOption("--preex", "Preextracted movie stacks"); meta_path = parser.getOption("--meta", "Path to per-movie metadata star files", ""); gain_path = parser.getOption("--gain_path", "Path to gain references", ""); movie_ending = parser.getOption("--mov_end", "Ending of movie filenames", ""); movie_toReplace = parser.getOption("--mov_toReplace", "Replace this string in micrograph names...", ""); movie_replaceBy = parser.getOption("--mov_replaceBy", "..by this one", ""); movie_angpix = textToFloat(parser.getOption("--mps", "Pixel size of input movies (Angst/pix)", "-1")); coords_angpix = textToFloat(parser.getOption("--cps", "Pixel size of particle coordinates in star-file (Angst/pix)", "-1")); hotCutoff = textToFloat(parser.getOption("--hot", "Clip hot pixels to this max. value (-1 = off, TIFF only)", "-1")); firstFrame = textToInteger(parser.getOption("--first_frame", "", "1")) - 1; lastFrame = textToInteger(parser.getOption("--last_frame", "", "-1")) - 1; saveMem = parser.checkOption("--sbs", "Load movies slice-by-slice to save memory (slower)"); } else { imgPath = parser.getOption("--img", "Path to images", ""); } angpix = textToFloat(parser.getOption("--angpix", "Pixel resolution (angst/pix) - read from STAR file by default", "0.0")); if (!noReference) { Cs = textToFloat(parser.getOption("--Cs", "Spherical aberration - read from STAR file by default", "-1")); kV = textToFloat(parser.getOption("--kV", "Electron energy (keV) - read from STAR file by default", "-1")); paddingFactor = textToFloat(parser.getOption("--pad", "Padding factor", "2")); } else { Cs = -1; kV = -1; paddingFactor = 2; } if (noTilt) { beamtilt_x = 0.0; beamtilt_y = 0.0; applyTilt = false; beamtilt_xx = 1.0; beamtilt_xy = 0.0; beamtilt_yy = 1.0; } else { beamtilt_x = textToFloat(parser.getOption("--beamtilt_x", "Beamtilt in X-direction (in mrad)", "0.")); beamtilt_y = textToFloat(parser.getOption("--beamtilt_y", "Beamtilt in Y-direction (in mrad)", "0.")); applyTilt = ABS(beamtilt_x) > 0. || ABS(beamtilt_y) > 0.; beamtilt_xx = textToFloat(parser.getOption("--beamtilt_xx", "Anisotropic beamtilt, XX-coefficient", "1.")); beamtilt_xy = textToFloat(parser.getOption("--beamtilt_xy", "Anisotropic beamtilt, XY-coefficient", "0.")); beamtilt_yy = textToFloat(parser.getOption("--beamtilt_yy", "Anisotropic beamtilt, YY-coefficient", "1.")); } anisoTilt = beamtilt_xx != 1.0 || beamtilt_xy != 0.0 || beamtilt_yy != 1.0; nr_omp_threads = textToInteger(parser.getOption("--jomp", "Number of OMP threads", "1")); minMG = textToInteger(parser.getOption("--min_MG", "First micrograph index", "0")); maxMG = textToInteger(parser.getOption("--max_MG", "Last micrograph index", "-1")); debug = parser.checkOption("--debug", "Write debugging data"); debugMov = parser.checkOption("--debug_mov", "Write debugging data for movie loading"); int rco = readMoreOptions(parser, argc, argv); if (argc == 1) { parser.writeUsage(std::cerr); return 1; } if (parser.checkForErrors()) return 1; if (rco != 0) return rco; bool allGood = true; if (doesMovies && movie_angpix <= 0 && corrMicFn == "") { std::cerr << "Movie pixel size (--mps) is required unless a corrected_micrographs.star (--corr_mic) is provided.\n"; allGood = false; } if (doesMovies && coords_angpix <= 0 && corrMicFn == "") { std::cerr << "Coordinates pixel size (--cps) is required unless a corrected_micrographs.star (--corr_mic) is provided.\n"; allGood = false; } if (!allGood) return 12; } catch (RelionError XE) { parser.writeUsage(std::cout); std::cerr << XE; exit(1); } bool allGood = true; if (!noReference) { try { maps[0].read(reconFn0); } catch (RelionError XE) { std::cerr << "Unable to read map: " << reconFn0 << "\n"; return 2; } if (!singleReference) { try { maps[1].read(reconFn1); } catch (RelionError XE) { std::cerr << "Unable to read map: " << reconFn1 << "\n"; return 3; } } if (maps[0].data.xdim != maps[0].data.ydim || maps[0].data.ydim != maps[0].data.zdim) { REPORT_ERROR(reconFn0 + " is not cubical.\n"); } if (!singleReference) { if (maps[1].data.xdim != maps[1].data.ydim || maps[1].data.ydim != maps[1].data.zdim) { REPORT_ERROR(reconFn1 + " is not cubical.\n"); } if ( maps[0].data.xdim != maps[1].data.xdim || maps[0].data.ydim != maps[1].data.ydim || maps[0].data.zdim != maps[1].data.zdim) { REPORT_ERROR(reconFn0 + " and " + reconFn1 + " are of unequal size.\n"); } } if (maskFn != "") { if (singleReference) std::cout << "masking reference...\n"; else std::cout << "masking references...\n"; Image mask, maskedRef; try { mask.read(maskFn); } catch (RelionError XE) { std::cout << "Unable to read mask: " << maskFn << "\n"; return 4; } mask.read(maskFn); ImageOp::multiply(mask, maps[0], maskedRef); maps[0] = maskedRef; if (!singleReference) { ImageOp::multiply(mask, maps[1], maskedRef); maps[1] = maskedRef; } } s = maps[0].data.xdim; sh = s/2 + 1; if (singleReference) std::cout << "transforming reference...\n"; else std::cout << "transforming references...\n"; projectors[0] = Projector(s, TRILINEAR, paddingFactor, 10, 2); projectors[0].computeFourierTransformMap(maps[0].data, powSpec[0].data, maps[0].data.xdim); if (!singleReference) { projectors[1] = Projector(s, TRILINEAR, paddingFactor, 10, 2); projectors[1].computeFourierTransformMap(maps[1].data, powSpec[1].data, maps[1].data.xdim); } } useFsc = fscFn != ""; MetaDataTable fscMdt; if (useFsc) { fscMdt.read(fscFn, "fsc"); if (!fscMdt.containsLabel(EMDL_SPECTRAL_IDX)) { std::cerr << fscFn << " does not contain a value for " << EMDL::label2Str(EMDL_SPECTRAL_IDX) << ".\n"; allGood = false; } if (!fscMdt.containsLabel(EMDL_POSTPROCESS_FSC_TRUE)) { std::cerr << fscFn << " does not contain a value for " << EMDL::label2Str(EMDL_POSTPROCESS_FSC_TRUE) << ".\n"; allGood = false; } } if (!allGood) { return 1; } if (!noStar) { std::cout << "reading " << starFn << "...\n"; mdt0.read(starFn); if (Cs < 0.0) { mdt0.getValue(EMDL_CTF_CS, Cs, 0); std::cout << " + Using spherical aberration from the input STAR file: " << Cs << "\n"; } else { setForAll(EMDL_CTF_CS, Cs); } if (kV < 0.0) { mdt0.getValue(EMDL_CTF_VOLTAGE, kV, 0); std::cout << " + Using voltage from the input STAR file: " << kV << " kV\n"; } else { setForAll(EMDL_CTF_VOLTAGE, kV); } if (angpix <= 0.0) { RFLOAT mag, dstep; mdt0.getValue(EMDL_CTF_MAGNIFICATION, mag, 0); mdt0.getValue(EMDL_CTF_DETECTOR_PIXEL_SIZE, dstep, 0); angpix = 10000 * dstep / mag; std::cout << " + Using pixel size calculated from magnification and detector pixel size in the input STAR file: " << angpix << "\n"; } if (doesMovies) { if (movie_toReplace != "") { std::string name; for (int i = 0; i < mdt0.numberOfObjects(); i++) { mdt0.getValue(EMDL_MICROGRAPH_NAME, name, i); if (i == 0) std::cout << name << " -> "; std::string::size_type pos0 = name.find(movie_toReplace); if (pos0 != std::string::npos) { std::string::size_type pos1 = pos0 + movie_toReplace.length(); std::string before = name.substr(0, pos0); std::string after = pos1 < name.length()? name.substr(pos1) : ""; name = before + movie_replaceBy + after; } if (i == 0) std::cout << name << "\n"; mdt0.setValue(EMDL_MICROGRAPH_NAME, name, i); } } } mdts = StackHelper::splitByStack(&mdt0); gc = maxMG >= 0? maxMG : mdts.size()-1; g0 = minMG; std::cout << "mg range: " << g0 << ".." << gc << "\n"; } obsModel = LegacyObservationModel(angpix, Cs, kV * 1e3); if (applyTilt && anisoTilt) { obsModel.setAnisoTilt(beamtilt_xx, beamtilt_xy, beamtilt_yy); } int rc0 = _init(); if (useFsc) { RefinementHelper::drawFSC(&fscMdt, freqWeight1D, freqWeight); } else if (!noReference) { freqWeight1D = std::vector(sh, 1.0); freqWeight = Image(sh,s); freqWeight.data.initConstant(1.0); } if (doesMovies && corrMicFn != "") { MetaDataTable corrMic; corrMic.read(corrMicFn); mic2meta.clear(); std::string micName, metaName; for (int i = 0; i < corrMic.numberOfObjects(); i++) { corrMic.getValueToString(EMDL_MICROGRAPH_NAME, micName, i); corrMic.getValueToString(EMDL_MICROGRAPH_METADATA_NAME, metaName, i); mic2meta[micName] = metaName; } hasCorrMic = true; } return rc0; } int RefinementProgram::run() { return _run(); } double RefinementProgram::angstToPixFreq(double a) { return 2.0*sh*angpix/a; } double RefinementProgram::pixToAngstFreq(double p) { return 2.0*sh*angpix/p; } void RefinementProgram::loadInitialMovieValues() { if (preextracted) { std::string name, fullName, movieName; mdts[0].getValue(EMDL_IMAGE_NAME, fullName, 0); mdts[0].getValue(EMDL_MICROGRAPH_NAME, movieName, 0); name = fullName.substr(fullName.find("@")+1); std::string finName; if (imgPath == "") { finName = name; } else { finName = imgPath + "/" + movieName.substr(movieName.find_last_of("/")+1); } Image stack0; stack0.read(finName, false); const int pc0 = mdts[0].numberOfObjects(); const bool zstack = stack0.data.zdim > 1; const int stackSize = zstack? stack0.data.zdim : stack0.data.ndim; if (lastFrame < 0) fc = stackSize / pc0 - firstFrame; else fc = lastFrame - firstFrame + 1; } else { if (hasCorrMic) { std::string mgFn; mdts[0].getValueToString(EMDL_MICROGRAPH_NAME, mgFn, 0); std::string metaFn = mic2meta[mgFn]; if (meta_path != "") { metaFn = meta_path + "/" + metaFn.substr(metaFn.find_last_of("/")+1); } micrograph = Micrograph(metaFn); if (movie_angpix <= 0) { movie_angpix = micrograph.angpix; std::cout << " + Using movie pixel size from " << metaFn << ": " << movie_angpix << " A\n"; } else { std::cout << " + Using movie pixel size from command line: " << movie_angpix << " A\n"; } if (coords_angpix <= 0) { coords_angpix = micrograph.angpix * micrograph.getBinningFactor(); std::cout << " + Using coord. pixel size from " << metaFn << ": " << coords_angpix << " A\n"; } else { std::cout << " + Using coord. pixel size from command line: " << coords_angpix << " A\n"; } if (lastFrame < 0) fc = micrograph.getNframes() - firstFrame; else fc = lastFrame - firstFrame + 1; } else { REPORT_ERROR("You can no longer use this program without micrograph metadata STAR files."); } } } std::vector>> RefinementProgram::loadMovie( int g, int pc, std::vector& fts) { std::vector>> movie; if (preextracted) { movie = StackHelper::loadMovieStackFS( &mdts[g], imgPath, false, nr_omp_threads, &fts, firstFrame, lastFrame); } else { std::string mgFn; mdts[g].getValueToString(EMDL_MICROGRAPH_NAME, mgFn, 0); if (hasCorrMic) { std::string metaFn = mic2meta[mgFn]; if (meta_path != "") { metaFn = meta_path + "/" + metaFn.substr(metaFn.find_last_of("/")+1); } micrograph = Micrograph(metaFn); std::string mgFn = micrograph.getMovieFilename(); std::string gainFn = micrograph.getGainFilename(); if (movie_ending != "") { mgFn.substr(0, mgFn.find_last_of(".")+1) + movie_ending; } if (imgPath != "") { mgFn = imgPath + "/" + mgFn.substr(mgFn.find_last_of("/")+1); } bool mgHasGain = false; if (gainFn != "") { if (gain_path != "") { gainFn = gain_path + "/" + gainFn.substr(gainFn.find_last_of("/")+1); } if (gainFn != last_gainFn) { lastGainRef.read(gainFn); last_gainFn = gainFn; } mgHasGain = true; } MultidimArray defectMask; bool hasDefect = (micrograph.fnDefect != "" || micrograph.hotpixelX.size() != 0); if (hasDefect) micrograph.fillDefectAndHotpixels(defectMask); movie = StackHelper::extractMovieStackFS( &mdts[g], mgHasGain? &lastGainRef : 0, hasDefect ? &defectMask : 0, mgFn, angpix, coords_angpix, movie_angpix, s, nr_omp_threads, true, firstFrame, lastFrame, hotCutoff, debugMov, saveMem); } else { REPORT_ERROR("You can no longer use this program without micrograph metadata STAR files."); } #pragma omp parallel for num_threads(nr_omp_threads) for (int p = 0; p < pc; p++) { StackHelper::varianceNormalize(movie[p], false); } } if (angpix < coords_angpix) { std::cerr << "WARNING: pixel size (--angpix) is greater than the AutoPick pixel size (--coords_angpix)\n"; if (coords_angpix < angpix + 0.01) { std::cerr << " This is probably a rounding error. It is recommended to set --angpix (" << angpix << ") to at least " << coords_angpix << "\n"; } } if (angpix < movie_angpix) { std::cerr << "WARNING: pixel size (--angpix) is greater than the movie pixel size (--movie_angpix)\n"; if (movie_angpix < angpix + 0.01) { std::cerr << " This is probably a rounding error. It is recommended to set --angpix (" << angpix << ") to at least " << movie_angpix << "\n"; } } return movie; } void RefinementProgram::setForAll(EMDLabel label, RFLOAT value) { for (int i = 0; i < mdt0.numberOfObjects(); i++) { mdt0.setValue(label, value, i); } } std::string RefinementProgram::getMicrographTag(int m) { std::string tag; mdts[m].getValue(EMDL_IMAGE_NAME, tag, 0); tag = tag.substr(0,tag.find_last_of('.')); tag = tag.substr(tag.find_first_of('@')+1); return tag; } relion-3.1.3/src/jaz/archive/refinement_program.h000066400000000000000000000065101411340063500220350ustar00rootroot00000000000000/*************************************************************************** * * Author: "Jasenko Zivanov" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #ifndef REFINEMENT_PROGRAM_H #define REFINEMENT_PROGRAM_H #include #include #include #include #include #include #include #include #include #include #include #include "src/micrograph_model.h" #include class RefinementProgram { public: RefinementProgram(bool singleReference = false, bool doesMovies = false); // options: bool singleReference, doesMovies, debug, debugMov, applyTilt, anisoTilt, useFsc, optStar, noStar, optReference, noReference, noTilt, preextracted, coordsAtMgRes, hasCorrMic, saveMem; long maxMG, minMG; int firstFrame, lastFrame; RFLOAT angpix, paddingFactor, beamtilt_x, beamtilt_y, beamtilt_xx, beamtilt_xy, beamtilt_yy, hotCutoff; int nr_omp_threads; double movie_angpix, coords_angpix; std::string starFn, reconFn0, reconFn1, maskFn, outPath, imgPath, fscFn, meta_path, movie_ending, movie_toReplace, movie_replaceBy, corrMicFn, gain_path, last_gainFn; std::map mic2meta; // data: Image maps[2]; Image powSpec[2]; Image freqWeight, lastGainRef; Projector projectors[2]; Micrograph micrograph; MetaDataTable mdt0; std::vector mdts; RFLOAT Cs, lambda, kV; LegacyObservationModel obsModel; int s, sh, fc; long g0, gc; std::vector freqWeight1D; int init(int argc, char *argv[]); int run(); virtual int readMoreOptions(IOParser& parser, int argc, char *argv[]) {return 0;} virtual int _init(){return 0;} virtual int _run() = 0; double angstToPixFreq(double a); double pixToAngstFreq(double p); void loadInitialMovieValues(); std::vector>> loadMovie( int g, int pc, std::vector& fts); void setForAll(EMDLabel label, RFLOAT value); std::string getMicrographTag(int m); }; #endif relion-3.1.3/src/jaz/complex_io.cpp000066400000000000000000000017751411340063500172320ustar00rootroot00000000000000/*************************************************************************** * * Author: "Jasenko Zivanov" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #include /* Everything is moved to .h for template functions ... */ relion-3.1.3/src/jaz/complex_io.h000066400000000000000000000043471411340063500166750ustar00rootroot00000000000000/*************************************************************************** * * Author: "Jasenko Zivanov" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #ifndef COMPLEX_IO_H #define COMPLEX_IO_H #include #include #include class ComplexIO { public: template static void write(const MultidimArray >& img, std::string fnBase, std::string fnSuffix) { Image temp(img.xdim, img.ydim, img.zdim, img.ndim); FOR_ALL_DIRECT_NZYX_ELEMENTS_IN_MULTIDIMARRAY(img) { DIRECT_NZYX_ELEM(temp.data, l, k, i, j) = DIRECT_NZYX_ELEM(img, l, k, i, j).real; } temp.write(fnBase + "_real" + fnSuffix); FOR_ALL_DIRECT_NZYX_ELEMENTS_IN_MULTIDIMARRAY(img) { DIRECT_NZYX_ELEM(temp.data, l, k, i, j) = DIRECT_NZYX_ELEM(img, l, k, i, j).imag; } temp.write(fnBase + "_imag" + fnSuffix); } template static void read(Image >& img, std::string fnBase, std::string fnSuffix) { Image temp; temp.read(fnBase + "_real" + fnSuffix); img = Image(temp.data.xdim, temp.data.ydim, temp.data.zdim, temp.data.ndim); FOR_ALL_DIRECT_NZYX_ELEMENTS_IN_MULTIDIMARRAY(img.data) { DIRECT_NZYX_ELEM(img.data, l, k, i, j).real = DIRECT_NZYX_ELEM(temp.data, l, k, i, j); } temp.read(fnBase + "_imag" + fnSuffix); FOR_ALL_DIRECT_NZYX_ELEMENTS_IN_MULTIDIMARRAY(img.data) { DIRECT_NZYX_ELEM(img.data, l, k, i, j).imag = DIRECT_NZYX_ELEM(temp.data, l, k, i, j); } } }; #endif relion-3.1.3/src/jaz/config.h000066400000000000000000000016731411340063500160030ustar00rootroot00000000000000/*************************************************************************** * * Author: "Jasenko Zivanov" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #define JAZ_USE_OPENMP 1 relion-3.1.3/src/jaz/convolution_helper.cpp000066400000000000000000000054571411340063500210130ustar00rootroot00000000000000/*************************************************************************** * * Author: "Jasenko Zivanov" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #include Image ConvolutionHelper::convolve2D(Image &img0, Image &img1) { FourierTransformer ft; return convolve2D(img0, img1, ft); } Image ConvolutionHelper::convolve2D(Image &img0, Image &img1, FourierTransformer &ft) { int w = img0().xdim; int wf = w/2 + 1; int h = img0().ydim; Image I0, I1, P(wf,h); ft.FourierTransform(img0(), I0()); ft.FourierTransform(img1(), I1()); double sc = w*h; for (int y = 0; y < h; y++) for (int x = 0; x < wf; x++) { DIRECT_A2D_ELEM(P.data, y, x) = sc * DIRECT_A2D_ELEM(I0.data, y, x) * DIRECT_A2D_ELEM(I1.data, y, x).conj(); } Image out(w,h); ft.inverseFourierTransform(P(), out()); return out; } Image ConvolutionHelper::gaussianKernel2D(double sigma, int w, int h, bool normalize, bool centered, bool half) { Image out(w,h); const double s2 = sigma*sigma; double sum = 0.0; for (int yy = 0; yy < h; yy++) for (int xx = 0; xx < w; xx++) { double x, y; if (centered) { if (half) { x = xx; y = yy - h/2 - 1; } else { x = xx - w/2 - 1; y = yy - h/2 - 1; } } else { if (half) { x = xx; y = yy <= h/2 + 1? yy : yy - h; } else { x = xx <= w/2 + 1? xx : xx - w; y = yy <= h/2 + 1? yy : yy - h; } } out(yy,xx) = exp(-0.5*(x*x+y*y)/s2); sum += out(yy,xx); } if (normalize) { for (int yy = 0; yy < h; yy++) for (int xx = 0; xx < w; xx++) { out(yy,xx) /= sum; } } return out; } relion-3.1.3/src/jaz/convolution_helper.h000066400000000000000000000034171411340063500204520ustar00rootroot00000000000000/*************************************************************************** * * Author: "Jasenko Zivanov" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #ifndef CONVOLUTION_HELPER_H #define CONVOLUTION_HELPER_H #include #include class ConvolutionHelper { public: static Image convolve2D(Image& img0, Image& img1, FourierTransformer &ft); static Image convolve2D(Image& img0, Image& img1); static Image gaussianKernel2D(double sigma, int w, int h, bool normalize = true, bool centered = false, bool half = false); inline static double sigmaFreq(double sigmaReal, int h) { return h/(2.0*PI*sigmaReal); } inline static double sigmaReal(double sigmaFreq, int h) { return h/(2.0*PI*sigmaFreq); } }; #endif relion-3.1.3/src/jaz/ctf/000077500000000000000000000000001411340063500151325ustar00rootroot00000000000000relion-3.1.3/src/jaz/ctf/aberration_estimator.cpp000066400000000000000000000255141411340063500220620ustar00rootroot00000000000000#include "aberration_estimator.h" #include "tilt_helper.h" #include "ctf_refiner.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include using namespace gravis; AberrationEstimator::AberrationEstimator() : ready(false) {} void AberrationEstimator::read(IOParser &parser, int argc, char *argv[]) { kmin = textToFloat(parser.getOption("--kmin_aberr", "Inner freq. threshold for symmetrical aberration estimation [Å]", "20.0")); std::string aberrToken = "--even_aberr_max_n"; aberr_n_max = textToInteger(parser.getOption(aberrToken, "Maximum degree of Zernike polynomials used to fit even (i.e. symmetrical) aberrations", "4")); xring0 = textToDouble(parser.getOption("--xr0_a", "Exclusion ring start [Å]", "-1")); xring1 = textToDouble(parser.getOption("--xr1_a", "Exclusion ring end [Å]", "-1")); } void AberrationEstimator::init( int verb, int nr_omp_threads, bool debug, bool diag, std::string outPath, ReferenceMap* reference, ObservationModel* obsModel) { this->verb = verb; this->nr_omp_threads = nr_omp_threads; this->debug = debug; this->diag = diag; this->outPath = outPath; this->reference = reference; this->obsModel = obsModel; angpix = obsModel->getPixelSizes(); obsModel->getBoxSizes(s, sh); ready = true; } void AberrationEstimator::processMicrograph( long g, MetaDataTable& mdt, const std::vector>& obs, const std::vector>& pred) { if (!ready) { REPORT_ERROR("ERROR: AberrationEstimator::processMicrograph: AberrationEstimator not initialized."); } std::vector>> particlesByOpticsGroup = obsModel->splitParticlesByOpticsGroup(mdt); for (int pog = 0; pog < particlesByOpticsGroup.size(); pog++) { const int og = particlesByOpticsGroup[pog].first; const std::vector& partIndices = particlesByOpticsGroup[pog].second; // TODO: SHWS 29mar2018: when data is CTF-premultiplied: do we need to change below?? if (obsModel->getCtfPremultiplied(og)) std::cerr << "TODO: check aberration estimation with CTF-premultiplied data!!" << std::endl; const int pc = partIndices.size(); std::vector> Axx(nr_omp_threads, Image(sh[og],s[og])), Axy(nr_omp_threads, Image(sh[og],s[og])), Ayy(nr_omp_threads, Image(sh[og],s[og])), bx(nr_omp_threads, Image(sh[og],s[og])), by(nr_omp_threads, Image(sh[og],s[og])); const double as = (double)s[og] * angpix[og]; #pragma omp parallel for num_threads(nr_omp_threads) for (long pp = 0; pp < pc; pp++) { const int p = partIndices[pp]; CTF ctf; ctf.readByGroup(mdt, obsModel, p); int t = omp_get_thread_num(); for (int y = 0; y < s[og]; y++) for (int x = 0; x < sh[og]; x++) { const double xf = x; const double yf = y < sh[og]? y : y - s[og]; const double gamma_i = ctf.getGamma(xf/as, yf/as); const double cg = cos(gamma_i); const double sg = sin(gamma_i); Complex zobs = obs[p](y,x); Complex zprd = pred[p](y,x); double zz = zobs.real * zprd.real + zobs.imag * zprd.imag; double nr = zprd.norm(); Axx[t](y,x) += nr * sg * sg; Axy[t](y,x) += nr * cg * sg; Ayy[t](y,x) += nr * cg * cg; bx[t](y,x) -= zz * sg; by[t](y,x) -= zz * cg; } } // Combine the accumulated weights from all threads for this subset Image AxxSum(sh[og],s[og]), AxySum(sh[og],s[og]), AyySum(sh[og],s[og]), bxSum(sh[og],s[og]), bySum(sh[og],s[og]); for (int threadnum = 0; threadnum < nr_omp_threads; threadnum++) { ImageOp::linearCombination(AxxSum, Axx[threadnum], 1.0, 1.0, AxxSum); ImageOp::linearCombination(AxySum, Axy[threadnum], 1.0, 1.0, AxySum); ImageOp::linearCombination(AyySum, Ayy[threadnum], 1.0, 1.0, AyySum); ImageOp::linearCombination(bxSum, bx[threadnum], 1.0, 1.0, bxSum); ImageOp::linearCombination(bySum, by[threadnum], 1.0, 1.0, bySum); } // Write out the intermediate results per-micrograph: std::string outRoot = CtfRefiner::getOutputFilenameRoot(mdt, outPath); std::stringstream sts; sts << (og+1); AxxSum.write(outRoot+"_aberr-Axx_optics-group_" + sts.str() + ".mrc"); AxySum.write(outRoot+"_aberr-Axy_optics-group_" + sts.str() + ".mrc"); AyySum.write(outRoot+"_aberr-Ayy_optics-group_" + sts.str() + ".mrc"); bxSum.write(outRoot+"_aberr-bx_optics-group_" + sts.str() + ".mrc"); bySum.write(outRoot+"_aberr-by_optics-group_" + sts.str() + ".mrc"); } } void AberrationEstimator::parametricFit( const std::vector& mdts, MetaDataTable& optOut, std::vector &fn_eps) { if (!ready) { REPORT_ERROR("ERROR: AberrationEstimator::parametricFit: AberrationEstimator not initialized."); } if (verb > 0) { std::cout << " + Fitting symmetrical aberrations ..." << std::endl; } const int gc = mdts.size(); const int ogc = obsModel->numberOfOpticsGroups(); std::vector groupUsed(ogc,false); #pragma omp parallel for num_threads(nr_omp_threads) for (int og = 0; og < ogc; og++) { std::stringstream sts; sts << og+1; std::string ogstr = sts.str(); Image AxxSum(sh[og],s[og]), AxySum(sh[og],s[og]), AyySum(sh[og],s[og]), bxSum(sh[og],s[og]), bySum(sh[og],s[og]); for (long g = 0; g < gc; g++) { std::string outRoot = CtfRefiner::getOutputFilenameRoot(mdts[g], outPath); if ( exists(outRoot+"_aberr-Axx_optics-group_" + ogstr + ".mrc") && exists(outRoot+"_aberr-Axy_optics-group_" + ogstr + ".mrc") && exists(outRoot+"_aberr-Ayy_optics-group_" + ogstr + ".mrc") && exists(outRoot+"_aberr-bx_optics-group_" + ogstr + ".mrc") && exists(outRoot+"_aberr-by_optics-group_" + ogstr + ".mrc")) { Image Axx(sh[og],s[og]), Axy(sh[og],s[og]), Ayy(sh[og],s[og]), bx(sh[og],s[og]), by(sh[og],s[og]); Axx.read(outRoot+"_aberr-Axx_optics-group_" + ogstr + ".mrc"); Axy.read(outRoot+"_aberr-Axy_optics-group_" + ogstr + ".mrc"); Ayy.read(outRoot+"_aberr-Ayy_optics-group_" + ogstr + ".mrc"); bx.read(outRoot+"_aberr-bx_optics-group_" + ogstr + ".mrc"); by.read(outRoot+"_aberr-by_optics-group_" + ogstr + ".mrc"); AxxSum() += Axx(); AxySum() += Axy(); AyySum() += Ayy(); bxSum() += bx(); bySum() += by(); groupUsed[og] = true; } } if (!groupUsed[og]) { continue; } Image wgh0(sh[og],s[og]), wgh(sh[og],s[og]), phase(sh[og],s[og]); Image optXY(sh[og],s[og]); wgh0 = reference->getHollowWeight(kmin, s[og], angpix[og]); for (int y = 0; y < s[og]; y++) for (int x = 0; x < sh[og]; x++) { d2Matrix A( AxxSum(y,x), AxySum(y,x), AxySum(y,x), AyySum(y,x)); d2Vector b(bxSum(y,x), bySum(y,x)); double det = A(0,0) * A(1,1) - A(1,0) * A(0,1); if (det != 0.0) { d2Matrix Ai = A; Ai.invert(); d2Vector opt = Ai * b; optXY(y,x) = Complex(opt.x, opt.y); phase(y,x) = std::abs(opt.x) > 0.0? atan2(opt.y, opt.x) : 0.0; wgh(y,x) = wgh0(y,x) * sqrt(std::abs(det)); } else { optXY(y,x) = 0.0; phase(y,x) = 0.0; wgh0(y,x) = 0.0; wgh(y,x) = 0.0; } } if (xring1 > 0.0) { for (int y = 0; y < s[og]; y++) for (int x = 0; x < sh[og]; x++) { double xx = x; double yy = y < sh[og]? y : y - s[og]; double rp = sqrt(xx*xx + yy*yy); double ra = s[og] * angpix[og] / rp; if (ra > xring0 && ra <= xring1) { wgh0(y,x) = 0.0; wgh(y,x) = 0.0; } } } if (debug) { Image full; FftwHelper::decenterDouble2D(wgh(), full()); ImageLog::write(full, outPath + "aberr_weight-full_optics-group_"+ogstr); } std::vector > imgs_for_eps; std::vector scales; std::vector labels; Image fit, phaseFull, fitFull; FftwHelper::decenterDouble2D(phase.data, phaseFull.data); ImageLog::write(phaseFull, outPath + "aberr_delta-phase_per-pixel_optics-group_"+ogstr); imgs_for_eps.push_back(phaseFull); scales.push_back(1.); labels.push_back("Symm. obs [-1, 1] "+obsModel->getGroupName(og)); imgs_for_eps.push_back(phaseFull); scales.push_back(PI); labels.push_back("Symm. obs [-pi, pi] "+obsModel->getGroupName(og)); { std::vector Zernike_coeffs = TiltHelper::fitEvenZernike( phase, wgh, angpix[og], obsModel->getMagMatrix(og), aberr_n_max, &fit); FftwHelper::decenterDouble2D(fit.data, fitFull.data); std::stringstream sts; sts << aberr_n_max; ImageLog::write(fitFull, outPath + "aberr_delta-phase_lin-fit_optics-group_" +ogstr+"_N-"+sts.str()); { Image residual; residual.data = phaseFull.data - fitFull.data; ImageLog::write(residual, outPath + "aberr_delta-phase_lin-fit_optics-group_" +ogstr+"_N-"+sts.str()+"_residual"); } std::vector Zernike_coeffs_opt = TiltHelper::optimiseEvenZernike( optXY, wgh0, AxxSum, AxySum, AyySum, angpix[og], obsModel->getMagMatrix(og), aberr_n_max, Zernike_coeffs, &fit); FftwHelper::decenterDouble2D(fit.data, fitFull.data); ImageLog::write(fitFull, outPath + "aberr_delta-phase_iter-fit_optics-group_" +ogstr+"_N-"+sts.str()); imgs_for_eps.push_back(fitFull); scales.push_back(1.); labels.push_back("Symm. (N="+sts.str()+") fit [-1, 1] "+obsModel->getGroupName(og)); imgs_for_eps.push_back(fitFull); scales.push_back(PI); labels.push_back("Symm. (N="+sts.str()+") fit [-pi, pi] "+obsModel->getGroupName(og)); // extract Q0, Cs, defocus and astigmatism? #pragma omp critical { optOut.setValue(EMDL_IMAGE_EVEN_ZERNIKE_COEFFS, Zernike_coeffs_opt, og); } } FileName fn_root = outPath + "symmetric_aberrations_optics-group_"+ ogstr; ColorHelper::writeSignedToEPS(fn_root, 2, imgs_for_eps, scales, labels); fn_eps.push_back(fn_root+".eps"); } } bool AberrationEstimator::isFinished(const MetaDataTable &mdt) { if (!ready) { REPORT_ERROR("ERROR: AberrationEstimator::isFinished: AberrationEstimator not initialized."); } std::string outRoot = CtfRefiner::getOutputFilenameRoot(mdt, outPath); bool allDone = true; std::vector ogs = obsModel->getOptGroupsPresent_oneBased(mdt); for (int i = 0; i < ogs.size(); i++) { const int og = ogs[i]; std::stringstream sts; sts << og; if ( !exists(outRoot+"_aberr-Axx_optics-group_" + sts.str() + ".mrc") || !exists(outRoot+"_aberr-Axy_optics-group_" + sts.str() + ".mrc") || !exists(outRoot+"_aberr-Ayy_optics-group_" + sts.str() + ".mrc") || !exists(outRoot+"_aberr-bx_optics-group_" + sts.str() + ".mrc") || !exists(outRoot+"_aberr-by_optics-group_" + sts.str() + ".mrc")) { allDone = false; break; } } return allDone; } relion-3.1.3/src/jaz/ctf/aberration_estimator.h000066400000000000000000000024251411340063500215230ustar00rootroot00000000000000#ifndef ABERRATION_ESTIMATOR_H #define ABERRATION_ESTIMATOR_H #include class IOParser; class ReferenceMap; class ObservationModel; class AberrationEstimator { public: AberrationEstimator(); void read(IOParser& parser, int argc, char *argv[]); void init( int verb, int nr_omp_threads, bool debug, bool diag, std::string outPath, ReferenceMap* reference, ObservationModel* obsModel); // Compute per-pixel information for one micrograph void processMicrograph( long g, MetaDataTable& mdt, const std::vector>& obs, const std::vector>& pred); // Sum up per-pixel information from all micrographs, // then fit beam-tilt model to the per-pixel fit void parametricFit( const std::vector& mdts, MetaDataTable& optOut, std::vector &fn_eps); // Has this mdt been processed already? bool isFinished(const MetaDataTable& mdt); private: // cmd. line options (see read()) double kmin; int aberr_n_max; double xring0, xring1; // parameters obtained through init() int verb, nr_omp_threads; bool debug, diag, ready; std::string outPath; std::vector s, sh; std::vector angpix; ReferenceMap* reference; ObservationModel* obsModel; }; #endif relion-3.1.3/src/jaz/ctf/bfactor_refiner.cpp000066400000000000000000000373511411340063500210010ustar00rootroot00000000000000#include "bfactor_refiner.h" #include "ctf_refiner.h" #include #include #include #include #include #include #include #include #include #include #include using namespace gravis; BFactorRefiner::BFactorRefiner() : ready(false) {} void BFactorRefiner::read(IOParser &parser, int argc, char *argv[]) { perMicrograph = parser.checkOption("--bfac_per_mg", "Estimate B-factors per micrograph, instead of per particle"); min_B = textToDouble(parser.getOption("--bfac_min_B", "Minimal allowed B-factor", "-30")); max_B = textToDouble(parser.getOption("--bfac_max_B", "Maximal allowed B-factor", "300")); min_scale = textToDouble(parser.getOption("--bfac_min_scale", "Minimal allowed scale-factor (essential for outlier rejection)", "0.2")); kmin = textToDouble(parser.getOption("--kmin_bfac", "Inner freq. threshold for B-factor estimation [Angst]", "30.0")); } void BFactorRefiner::init( int verb, int nr_omp_threads, bool debug, bool diag, std::string outPath, ReferenceMap *reference, ObservationModel *obsModel) { this->verb = verb; this->nr_omp_threads = nr_omp_threads; this->debug = debug; this->diag = diag; this->outPath = outPath; this->reference = reference; this->obsModel = obsModel; angpix = obsModel->getPixelSizes(); obsModel->getBoxSizes(s, sh); freqWeights.resize(angpix.size()); for (int i = 0; i < angpix.size(); i++) { freqWeights[i] = reference->getHollowWeight(kmin, s[i], angpix[i]); } ready = true; } void BFactorRefiner::processMicrograph( long g, MetaDataTable& mdt, const std::vector>& obs, const std::vector>& pred, bool do_ctf_padding) { if (!ready) { REPORT_ERROR("ERROR: BFactorRefiner::processMicrograph: BFactorRefiner not initialized."); } long pc = obs.size(); std::stringstream stsg; stsg << g; std::vector>> valsPerPart(nr_omp_threads); const int ogc = obsModel->numberOfOpticsGroups(); std::vector as(ogc), min_B_px(ogc), max_B_px(ogc); for (int og = 0; og < ogc; og++) { as[og] = s[og] * angpix[og]; min_B_px[og] = min_B / (as[og]*as[og]); max_B_px[og] = max_B / (as[og]*as[og]); } // search recursively numIters times, scanning the range at stepsPerIter points each time: const int stepsPerIter = 20; const int numIters = 5; if (perMicrograph) { std::vector> t_rad(nr_omp_threads), s_rad(nr_omp_threads); // find optics group of minimal pixel size present in this micrograph std::vector pogs = obsModel->getOptGroupsPresent_zeroBased(mdt); int ogRef = pogs[0]; double angpixMin = angpix[pogs[0]]; for (int i = 1; i < pogs.size(); i++) { const int og = pogs[i]; if (angpix[og] < angpixMin) { angpixMin = angpix[og]; ogRef = og; } } const int s_ref = s[ogRef]; const int sh_ref = sh[ogRef]; for (int t = 0; t < nr_omp_threads; t++) { t_rad[t] = std::vector(sh_ref, 0.0); s_rad[t] = std::vector(sh_ref, 0.0); } // Parallel loop over all particles in this micrograph #pragma omp parallel for num_threads(nr_omp_threads) for (long p = 0; p < pc; p++) { const int og = obsModel->getOpticsGroup(mdt, p); const double as_ref = s_ref * angpix[ogRef]; const double as_p = s[og] * angpix[og]; const int t = omp_get_thread_num(); CTF ctf; ctf.readByGroup(mdt, obsModel, p); Image ctfImg(sh[og],s[og]); ctf.getFftwImage(ctfImg(), s[og], s[og], angpix[og], false, false, false, false, do_ctf_padding); for (int y = 0; y < s[og]; y++) for (int x = 0; x < sh[og]; x++) { const double xx = x; const double yy = (y + s[og]/2) % s[og] - s[og]/2; const int ri = (int)(as_ref * sqrt(xx*xx + yy*yy) / as_p + 0.5); if (ri < sh_ref) { const Complex zobs = obs[p](y,x); const Complex zpred = ctfImg(y,x) * pred[p](y,x); const double wp = freqWeights[og](y,x); t_rad[t][ri] += wp * (zpred.real * zpred.real + zpred.imag * zpred.imag); s_rad[t][ri] += wp * (zpred.real * zobs.real + zpred.imag * zobs.imag); } } } for (int t = 1; t < nr_omp_threads; t++) { for (int r = 0; r < sh_ref; r++) { t_rad[0][r] += t_rad[t][r]; s_rad[0][r] += s_rad[t][r]; } } d2Vector BK = BFactorRefiner::findBKRec1D( t_rad[0], s_rad[0], min_B_px[ogRef], max_B_px[ogRef], min_scale, stepsPerIter, numIters); for (long p = 0; p < pc; p++) { mdt.setValue(EMDL_CTF_BFACTOR, as[ogRef]*as[ogRef]*BK[0] - min_B, p); mdt.setValue(EMDL_CTF_SCALEFACTOR, BK[1], p); } writePerMicrographEPS(mdt, s_rad[0], t_rad[0], ogRef); } else { #pragma omp parallel for num_threads(nr_omp_threads) for (long p = 0; p < pc; p++) { const int og = obsModel->getOpticsGroup(mdt, p); CTF ctf; ctf.readByGroup(mdt, obsModel, p); Image ctfImg(sh[og],s[og]); ctf.getFftwImage(ctfImg(), s[og], s[og], angpix[og], false, false, false, false, do_ctf_padding); std::vector t_rad(sh[og], 0.0), s_rad(sh[og], 0.0); for (int y = 0; y < s[og]; y++) for (int x = 0; x < sh[og]; x++) { const double xx = x; const double yy = (y + s[og]/2) % s[og] - s[og]/2; const int ri = (int)(sqrt(xx*xx + yy*yy) + 0.5); if (ri < sh[og]) { const Complex zobs = obs[p](y,x); const Complex zpred = ctfImg(y,x) * pred[p](y,x); const double wp = freqWeights[og](y,x); t_rad[ri] += wp * (zpred.real * zpred.real + zpred.imag * zpred.imag); s_rad[ri] += wp * (zpred.real * zobs.real + zpred.imag * zobs.imag); } } // slower, but will be necessary for anisotropic B-factors: /* Image predCTF; ImageOp::multiply(ctfImg, pred[p], predCTF); d2Vector sigmaK = BFactorRefiner::findBKRec2D( obs[p], predCTF, freqWeight, min_B_px, max_B_px, min_scale, stepsPerIter, numIters); */ d2Vector BK = BFactorRefiner::findBKRec1D( t_rad, s_rad, min_B_px[og], max_B_px[og], min_scale, stepsPerIter, numIters); int threadnum = omp_get_thread_num(); valsPerPart[threadnum].push_back(std::make_pair(p, BK)); if (diag) writePerParticleDiagEPS(mdt, BK, s_rad, t_rad, p); } for (int t = 0; t < nr_omp_threads; t++) { for (int i = 0; i < valsPerPart[t].size(); i++) { int p = valsPerPart[t][i].first; d2Vector BK = valsPerPart[t][i].second; const int og = obsModel->getOpticsGroup(mdt, p); if (debug) { std::cout << p << ": " << as[og]*as[og]*BK[0] << " \t " << BK[1] << "\n"; } mdt.setValue(EMDL_CTF_BFACTOR, as[og]*as[og]*BK[0] - min_B, p); mdt.setValue(EMDL_CTF_SCALEFACTOR, BK[1], p); } } // Output a diagnostic Postscript file writePerParticleEPS(mdt); if (diag) { std::string outRoot = CtfRefiner::getOutputFilenameRoot(mdt, outPath); std::vector diagFns; for (int p = 0; p < pc; p++) { std::stringstream sts; sts << p; FileName fn_eps = outRoot + "_diag_particle_" + sts.str() + ".eps"; if (exists(fn_eps)) { diagFns.push_back(fn_eps); } } if (diagFns.size() > 0) { joinMultipleEPSIntoSinglePDF(outRoot + "_bfactors_per-particle.pdf", diagFns); } } } // Now write out STAR file with optimised values for this micrograph std::string outRoot = CtfRefiner::getOutputFilenameRoot(mdt, outPath); mdt.write(outRoot + "_bfactor_fit.star"); } void BFactorRefiner::writePerMicrographEPS( const MetaDataTable& mdt, const std::vector& s_rad, const std::vector& t_rad, int ogRef) { if (!ready) { REPORT_ERROR("ERROR: BFactorRefiner::writeEPS: BFactorRefiner not initialized."); } std::string outRoot = CtfRefiner::getOutputFilenameRoot(mdt, outPath); FileName fn_eps = outRoot + "_bfactor_fit.eps"; CPlot2D plot2D(fn_eps); plot2D.SetXAxisSize(600); plot2D.SetYAxisSize(600); plot2D.SetDrawLegend(false); plot2D.SetFlipY(false); RFLOAT B, a; mdt.getValue(EMDL_CTF_BFACTOR, B, 0); mdt.getValue(EMDL_CTF_SCALEFACTOR, a, 0); CDataSet curve; curve.SetDrawMarker(false); curve.SetDrawLine(true); curve.SetDatasetColor(0,0,0); const double as = s[ogRef] * angpix[ogRef]; double tMax = 0.0; for (int r = 0; r < sh[ogRef]; r++) { if (t_rad[r] > tMax) tMax = t_rad[r]; } for (int r = 0; r < sh[ogRef]; r++) { const double ra = r / as; double cval = a * exp(-(B+min_B) * ra*ra / 4.0); CDataPoint cp(r, cval); curve.AddDataPoint(cp); if (t_rad[r] > 1e-10 /*&& std::abs(s_rad[r] / t_rad[r]) < 5*/) { double pval = s_rad[r] / t_rad[r]; double ucert = 0.9*(1.0 - t_rad[r] / tMax); CDataSet dataPts; dataPts.SetDrawMarker(true); dataPts.SetDrawLine(false); dataPts.SetMarkerSize(10); dataPts.SetDatasetColor(ucert,ucert,ucert); CDataPoint dp(r, pval); dataPts.AddDataPoint(dp); plot2D.AddDataSet(dataPts); } } plot2D.AddDataSet(curve); std::string title = "CTF amplitude and B/k-factor fit"; plot2D.SetXAxisTitle(title); plot2D.OutputPostScriptPlot(fn_eps); } void BFactorRefiner::writePerParticleDiagEPS( const MetaDataTable& mdt, d2Vector BKpixels, const std::vector &s_rad, const std::vector &t_rad, int particle_index) { const int og = obsModel->getOpticsGroup(mdt, particle_index); std::string outRoot = CtfRefiner::getOutputFilenameRoot(mdt, outPath); std::stringstream sts; sts << particle_index; FileName fn_eps = outRoot + "_diag_particle_" + sts.str() + ".eps"; CPlot2D plot2D(fn_eps); plot2D.SetXAxisSize(600); plot2D.SetYAxisSize(600); plot2D.SetDrawLegend(false); plot2D.SetFlipY(false); CDataSet curve; curve.SetDrawMarker(false); curve.SetDrawLine(true); curve.SetDatasetColor(0,0,0); double tMax = 0.0; for (int r = 0; r < sh[og]; r++) { if (t_rad[r] > tMax) tMax = t_rad[r]; } for (int r = 0; r < sh[og]; r++) { double cval = BKpixels[1] * exp(-BKpixels[0] * r*r / 4.0); CDataPoint cp(r, cval); curve.AddDataPoint(cp); if (t_rad[r] > 1e-10 /*&& std::abs(s_rad[r] / t_rad[r]) < 5*/) { double pval = s_rad[r] / t_rad[r]; double ucert = 0.9*(1.0 - t_rad[r] / tMax); CDataSet dataPts; dataPts.SetDrawMarker(true); dataPts.SetDrawLine(false); dataPts.SetMarkerSize(10); dataPts.SetDatasetColor(ucert,ucert,ucert); CDataPoint dp(r, pval); dataPts.AddDataPoint(dp); plot2D.AddDataSet(dataPts); } } plot2D.AddDataSet(curve); std::string title = "CTF amplitude and B/k-factor fit"; plot2D.SetXAxisTitle(title); plot2D.OutputPostScriptPlot(fn_eps); } void BFactorRefiner::writePerParticleEPS(const MetaDataTable& mdt) { if (!ready) { REPORT_ERROR("ERROR: BFactorRefiner::writeEPS: BFactorRefiner not initialized."); } std::string outRoot = CtfRefiner::getOutputFilenameRoot(mdt, outPath); FileName fn_eps = outRoot + "_bfactor_fit.eps"; CPlot2D plot2D(fn_eps); plot2D.SetXAxisSize(600); plot2D.SetYAxisSize(600); plot2D.SetDrawLegend(false); plot2D.SetFlipY(true); const int pc = mdt.numberOfObjects(); for (int p = 0; p < pc; p++) { RFLOAT B, a; RFLOAT xcoor, ycoor; mdt.getValue(EMDL_IMAGE_COORD_X, xcoor, p); mdt.getValue(EMDL_IMAGE_COORD_Y, ycoor, p); mdt.getValue(EMDL_CTF_BFACTOR, B, p); mdt.getValue(EMDL_CTF_SCALEFACTOR, a, p); RFLOAT aval = 1.0 - a/2.0; RFLOAT bval = 1.01 - (B - min_B) / (max_B - min_B); CDataSet dataSet; dataSet.SetDrawMarker(true); dataSet.SetDrawLine(false); dataSet.SetMarkerSize(50*bval); dataSet.SetDatasetColor(aval, aval, aval); CDataPoint point(xcoor, ycoor); dataSet.AddDataPoint(point); plot2D.AddDataSet(dataSet); } std::string title = "B-factor (size) and CTF-scale (intensity)"; plot2D.SetXAxisTitle(title); plot2D.OutputPostScriptPlot(fn_eps); } bool BFactorRefiner::isFinished(const MetaDataTable &mdt) { if (!ready) { REPORT_ERROR("ERROR: BFactorRefiner::isFinished: BFactorRefiner not initialized."); } std::string outRoot = CtfRefiner::getOutputFilenameRoot(mdt, outPath); return exists(outRoot + "_bfactor_fit.star"); } d2Vector BFactorRefiner::findBKRec1D( const std::vector& t_rad, const std::vector& s_rad, double B0, double B1, double min_scale, int steps, int depth) { double minErr = std::numeric_limits::max(); double bestB = B0; double bestA = 1.0; const double eps = 1e-10; const int sh = t_rad.size(); std::vector sigVals(sh); for (int st = 0; st < steps; st++) { const double B = B0 + st*(B1 - B0)/(steps-1); for (int r = 0; r < sh; r++) { sigVals[r] = exp(-B * r * r / 4.0); } // find optimal scale-factor for hypothetical B-factor double num = 0.0, denom = 0.0; for (int r = 0; r < sh; r++) { const double tr = t_rad[r]; const double sr = s_rad[r]; const double br = sigVals[r]; num += sr * br; denom += tr * br * br; } double a = denom > eps? num / denom : num / eps; if (a < min_scale) a = min_scale; double sum = 0.0; for (int r = 0; r < sh; r++) { const double tr = t_rad[r]; const double sr = s_rad[r]; const double br = sigVals[r]; // avoid the division by tr in: // const double er = a * br - sr / tr; // sum += tr * er * er; // by dropping the constant-over-br offset sr²/tr²: sum += tr * a * a * br * br - 2.0 * a * br * sr; } if (sum < minErr) { minErr = sum; bestB = B; bestA = a; } } if (depth > 0) { const double hrange = (B1 - B0) / (steps - 1.0); double Bnext0 = bestB - hrange; double Bnext1 = bestB + hrange; if (Bnext0 < B0) Bnext0 = B0; if (Bnext1 > B1) Bnext1 = B1; return findBKRec1D( t_rad, s_rad, Bnext0, Bnext1, min_scale, steps, depth - 1); } return d2Vector(bestB, bestA); } d2Vector BFactorRefiner::findBKRec2D( const Image &obs, const Image &pred, const Image &weight, double B0, double B1, double min_scale, int steps, int depth) { double minErr = std::numeric_limits::max(); double bestB = B0; double bestA = 1.0; const int s = obs.data.ydim; const int sh = s/2 + 1; std::vector sigVals(sh); for (int st = 0; st < steps; st++) { const double B = B0 + st*(B1 - B0)/(steps-1); for (int r = 0; r < sh; r++) { sigVals[r] = exp(-B * r * r / 4.0); } // find optimal scale-factor for hypothetical B-factor double num = 0.0, denom = 0.0; for (long y = 0; y < s; y++) for (long x = 0; x < sh; x++) { const int xx = x; const int yy = y < sh? y : y - s; const int r = (int) (sqrt(xx*xx + yy*yy) + 0.5); if (r >= sh) continue; Complex vx = DIRECT_A2D_ELEM(pred.data, y, x); const Complex vy = DIRECT_A2D_ELEM(obs.data, y, x); const double vw = DIRECT_A2D_ELEM(weight.data, y, x); const double vb = sigVals[r]; num += vw * vb * (vx.real * vy.real + vx.imag * vy.imag); denom += vw * vb * vb * (vx.real * vx.real + vx.imag * vx.imag); } const double eps = 1e-20; double a = denom > eps? num / denom : num / eps; if (a < min_scale) a = min_scale; double sum = 0.0; for (long y = 0; y < s; y++) for (long x = 0; x < sh; x++) { const int xx = x; const int yy = y < sh? y : y - s; const int r = (int) (sqrt(xx*xx + yy*yy) + 0.5); if (r >= sh) continue; Complex vx = DIRECT_A2D_ELEM(pred.data, y, x); const Complex vy = DIRECT_A2D_ELEM(obs.data, y, x); const double vw = DIRECT_A2D_ELEM(weight.data, y, x); const double vb = sigVals[r]; sum += vw * (vy - a * vb * vx).norm(); } if (sum < minErr) { minErr = sum; bestB = B; bestA = a; } } if (depth > 0) { const double hrange = (B1 - B0) / (steps - 1.0); double Bnext0 = bestB - hrange; double Bnext1 = bestB + hrange; if (Bnext0 < B0) Bnext0 = B0; if (Bnext1 > B1) Bnext1 = B1; return findBKRec2D( obs, pred, weight, Bnext0, Bnext1, min_scale, steps, depth - 1); } return d2Vector(bestB, bestA); } relion-3.1.3/src/jaz/ctf/bfactor_refiner.h000066400000000000000000000042051411340063500204360ustar00rootroot00000000000000#ifndef BFACTOR_REFINER_H #define BFACTOR_REFINER_H #include #include class IOParser; class ReferenceMap; class ObservationModel; class BFactorRefiner { public: BFactorRefiner(); void read(IOParser& parser, int argc, char *argv[]); void init( int verb, int nr_omp_threads, bool debug, bool diag, std::string outPath, ReferenceMap* reference, ObservationModel* obsModel); // Fit B-factors for all particles on one micrograph void processMicrograph( long g, MetaDataTable& mdt, const std::vector>& obs, const std::vector>& pred, bool do_ctf_padding = false); // Combine all .stars and .eps files std::vector merge(const std::vector& mdts); // Write PostScript file with per-particle B-factors plotted onto micrograph void writePerParticleEPS(const MetaDataTable &mdt); void writePerMicrographEPS( const MetaDataTable& mdt, const std::vector& s_rad, const std::vector& t_rad, int ogRef); void writePerParticleDiagEPS( const MetaDataTable& mdt, gravis::d2Vector BKpixels, const std::vector& s_rad, const std::vector& t_rad, int particle_index); // Has this mdt been processed already? bool isFinished(const MetaDataTable& mdt); private: // cmd. line options (see read()): double kmin, min_scale, min_B, max_B; bool perMicrograph; // set at init: int verb, nr_omp_threads; bool debug, diag; std::string outPath; std::vector s, sh; std::vector angpix; std::vector > freqWeights; ReferenceMap* reference; ObservationModel* obsModel; bool ready; static gravis::d2Vector findBKRec1D( const std::vector& t_rad, const std::vector& s_rad, double B0, double B1, double min_scale, int steps, int depth); static gravis::d2Vector findBKRec2D( const Image& obs, const Image& pred, const Image& weight, double B0, double B1, double min_scale, int steps, int depth); }; #endif relion-3.1.3/src/jaz/ctf/ctf_refiner.cpp000066400000000000000000000341351411340063500201320ustar00rootroot00000000000000/*************************************************************************** * * Authors: "Jasenko Zivanov & Sjors H.W. Scheres" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #include "ctf_refiner.h" #include "tilt_helper.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include using namespace gravis; CtfRefiner::CtfRefiner() {} void CtfRefiner::read(int argc, char **argv) { IOParser parser; parser.setCommandLine(argc, argv); int gen_section = parser.addSection("General options"); starFn = parser.getOption("--i", "Input STAR file containing the particles"); reference.read(parser, argc, argv); outPath = parser.getOption("--o", "Output directory, e.g. CtfRefine/job041/"); only_do_unfinished = parser.checkOption("--only_do_unfinished", "Skip those steps for which output files already exist."); do_ctf_padding = parser.checkOption("--ctf_pad", "Use larger box to calculate CTF and then downscale to mimic boxing operation in real space"); diag = parser.checkOption("--diag", "Write out diagnostic data (slower)"); int fit_section = parser.addSection("Defocus fit options"); do_defocus_fit = parser.checkOption("--fit_defocus", "Perform refinement of per-particle defocus values?"); defocusEstimator.read(parser, argc, argv); int bfac_section = parser.addSection("B-factor options"); do_bfac_fit = parser.checkOption("--fit_bfacs", "Estimate CTF B-factors"); bfactorEstimator.read(parser, argc, argv); int tilt_section = parser.addSection("Beam-tilt options"); do_tilt_fit = parser.checkOption("--fit_beamtilt", "Perform refinement of beamtilt"); tiltEstimator.read(parser, argc, argv); int aberr_section = parser.addSection("Symmetric aberrations options"); do_aberr_fit = parser.checkOption("--fit_aberr", "Estimate symmetric aberrations"); aberrationEstimator.read(parser, argc, argv); int aniso_section = parser.addSection("Anisotropic magnification options"); do_mag_fit = parser.checkOption("--fit_aniso", "Estimate anisotropic magnification"); magnificationEstimator.read(parser, argc, argv); int comp_section = parser.addSection("Computational options"); nr_omp_threads = textToInteger(parser.getOption("--j", "Number of (OMP) threads", "1")); minMG = textToInteger(parser.getOption("--min_MG", "First micrograph index", "0")); maxMG = textToInteger(parser.getOption("--max_MG", "Last micrograph index (default is to process all)", "-1")); debug = parser.checkOption("--debug", "Write debugging data"); verb = textToInteger(parser.getOption("--verb", "Verbosity", "1")); JazConfig::writeMrc = !debug; JazConfig::writeVtk = debug; // Check for errors in the command-line option if (parser.checkForErrors()) { REPORT_ERROR("Errors encountered on the command line (see above), exiting..."); } // Make sure outPath ends with a slash and exists if (outPath[outPath.length()-1] != '/') { outPath += "/"; std::string command = " mkdir -p " + outPath; int ret = system(command.c_str()); } } void CtfRefiner::init() { if (verb > 0) { std::cout << " + Reading " << starFn << "..." << std::endl; } // Make sure output directory ends in a '/' if (outPath[outPath.length()-1] != '/') { outPath += "/"; } ObservationModel::loadSafely(starFn, obsModel, mdt0); if (!ObservationModel::containsAllColumnsNeededForPrediction(mdt0)) { REPORT_ERROR_STR(starFn << " does not contain all columns needed for view prediction: \n" << "rlnOriginXAngst, rlnOriginYAngst, " << "rlnAngleRot, rlnAngleTilt, rlnAnglePsi and rlnRandomSubset"); } /* // TAKANORI: TODO Put it somewhere if (Cs <= 0.1 && verb > 0) { std::cerr << "WARNING: Your Cs value is very small. Beam tilt refinement might be unnecessary. Sometimes it gives unrealistically large tilts." << std::endl; } */ // after all the necessary changes to mdt0 have been applied // in ObservationModel::loadSafely(), split it by micrograph allMdts = StackHelper::splitByMicrographName(mdt0); // Only work on a user-specified subset of the micrographs if (maxMG < 0 || maxMG >= allMdts.size()) { maxMG = allMdts.size()-1; } if (minMG < 0 || minMG >= allMdts.size()) { minMG = 0; } if (minMG > 0 || maxMG < allMdts.size()-1) { if (verb > 0) { std::cout << " - Will only process micrographs in range: [" << minMG << "-" << maxMG << "]" << std::endl; } std::vector todo_mdts; for (long int g = minMG; g <= maxMG; g++ ) { todo_mdts.push_back(allMdts[g]); } allMdts = todo_mdts; } if (verb > 0) { std::cout << " + Reading references ..." << std::endl; } reference.load(verb, debug); // Get dimensions int s = reference.s; tiltEstimator.init(verb, nr_omp_threads, debug, diag, outPath, &reference, &obsModel); aberrationEstimator.init(verb, nr_omp_threads, debug, diag, outPath, &reference, &obsModel); defocusEstimator.init(verb && do_defocus_fit, nr_omp_threads, debug, diag, outPath, &reference, &obsModel); bfactorEstimator.init(verb, nr_omp_threads, debug, diag, outPath, &reference, &obsModel); magnificationEstimator.init(verb, nr_omp_threads, debug, diag, outPath, &reference, &obsModel); // check whether output files exist and skip the micrographs for which they do if (only_do_unfinished) { for (long int g = minMG; g <= maxMG; g++ ) { bool is_done = (!do_defocus_fit || defocusEstimator.isFinished(allMdts[g])) && (!do_bfac_fit || bfactorEstimator.isFinished(allMdts[g])) && (!do_tilt_fit || tiltEstimator.isFinished(allMdts[g])) && (!do_aberr_fit || aberrationEstimator.isFinished(allMdts[g])) && (!do_mag_fit || magnificationEstimator.isFinished(allMdts[g])); if (!is_done) { unfinishedMdts.push_back(allMdts[g]); } } if (verb > 0) { if (unfinishedMdts.size() < allMdts.size()) { std::cout << " - Will only process " << unfinishedMdts.size() << " unfinished (out of " << allMdts.size() << ") micrographs" << std::endl; } else { std::cout << " - Will process all " << unfinishedMdts.size() << " micrographs" << std::endl; } } } else { unfinishedMdts = allMdts; } } void CtfRefiner::processSubsetMicrographs(long g_start, long g_end) { int barstep; int my_nr_micrographs = g_end - g_start + 1; if (verb > 0) { std::cout << " + Performing loop over all micrographs ... " << std::endl; init_progress_bar(my_nr_micrographs); barstep = XMIPP_MAX(1, my_nr_micrographs/ 60); } std::vector fts(nr_omp_threads); long nr_done = 0; FileName prevdir = ""; for (long g = g_start; g <= g_end; g++) { // Abort through the pipeline_control system, TODO: check how this goes with MPI.... if (pipeline_control_check_abort_job()) exit(RELION_EXIT_ABORTED); std::vector > obs; // all CTF-refinement programs need the same observations obs = StackHelper::loadStackFS(unfinishedMdts[g], "", nr_omp_threads, true, &obsModel); // Make sure output directory exists FileName newdir = getOutputFilenameRoot(unfinishedMdts[g], outPath); newdir = newdir.beforeLastOf("/"); if (newdir != prevdir) { std::string command = " mkdir -p " + newdir; int res = system(command.c_str()); } std::vector> predSameT, // phase-demodulated (defocus) predOppNT, // not phase-demodulated (tilt) predOppT; // phase-demodulated (mag and aberr) // applyMtf is always true // Four booleans in predictAll are applyCtf, applyTilt, applyShift, applyMtf. // use prediction from same half-set for defocus estimation (overfitting danger): if (do_defocus_fit || do_bfac_fit) { predSameT = reference.predictAll( unfinishedMdts[g], obsModel, ReferenceMap::Own, nr_omp_threads, false, true, false, true, do_ctf_padding); } // use predictions from opposite half-set otherwise: if (do_tilt_fit) { predOppNT = reference.predictAll( unfinishedMdts[g], obsModel, ReferenceMap::Opposite, nr_omp_threads, false, false, false, true, do_ctf_padding); } if (do_aberr_fit || do_mag_fit) { predOppT = reference.predictAll( unfinishedMdts[g], obsModel, ReferenceMap::Opposite, nr_omp_threads, false, true, false, true, do_ctf_padding); } if (do_defocus_fit) { defocusEstimator.processMicrograph(g, unfinishedMdts[g], obs, predSameT); } // B-factor fit is always performed after the defocus fit (so it can use the optimal CTFs) // The prediction is *not* CTF-weighted, so an up-to-date CTF can be used internally if (do_bfac_fit) { bfactorEstimator.processMicrograph(g, unfinishedMdts[g], obs, predSameT, do_ctf_padding); } if (do_tilt_fit) { tiltEstimator.processMicrograph(g, unfinishedMdts[g], obs, predOppNT, do_ctf_padding); } if (do_aberr_fit) { aberrationEstimator.processMicrograph(g, unfinishedMdts[g], obs, predOppT); } if (do_mag_fit) { std::vector>> predGradient = reference.predictAllComplexGradients( unfinishedMdts[g], obsModel, ReferenceMap::Opposite, nr_omp_threads, false, true, false, true, do_ctf_padding); magnificationEstimator.processMicrograph(g, unfinishedMdts[g], obs, predOppT, predGradient, do_ctf_padding); } nr_done++; if (verb > 0 && nr_done % barstep == 0) { progress_bar(nr_done); } } if (verb > 0) { progress_bar(my_nr_micrographs); } } void CtfRefiner::run() { if (do_defocus_fit || do_bfac_fit || do_tilt_fit || do_aberr_fit || do_mag_fit) { // The subsets will be used in openMPI parallelisation: // instead of over g0->gc, they will be over smaller subsets processSubsetMicrographs(0, unfinishedMdts.size()-1); } finalise(); } void CtfRefiner::finalise() { std::vector mdtOut; std::vector fn_eps, fn_eps_earlier, fn_eps_later; // Read back from disk the metadata-tables and eps-plots for the B-factor or defocus fit. // Note: only micrographs for which the defoci or B-factors were estimated (either now or before) // will end up in mdtOut - micrographs excluded through min_MG and max_MG will not. if (do_bfac_fit || do_defocus_fit) { mdtOut = merge(allMdts, fn_eps_later); } else { mdtOut = allMdts; } // ...and for the magnification fit if (do_mag_fit) { magnificationEstimator.parametricFit(mdtOut, obsModel.opticsMdt, fn_eps_earlier); } // Sum up the per-pixel beamtilt fits of all micrographs and fit a parametric model to them. // Then, write the beamtilt parameters into obsModel.opticsMdt if (do_tilt_fit) { tiltEstimator.parametricFit(mdtOut, obsModel.opticsMdt, fn_eps_earlier); } // Do the equivalent for the symmetrical aberrations... if (do_aberr_fit) { aberrationEstimator.parametricFit(mdtOut, obsModel.opticsMdt, fn_eps_earlier); } // Make sure to have the EPS from fn_eps_earlier before the ones from fn_eps_later // Sort earlier fn_eps, because openMP may have resulted in random order in optics groups std::sort(fn_eps_earlier.begin(), fn_eps_earlier.end()); fn_eps.reserve( fn_eps_earlier.size() + fn_eps_later.size() ); // preallocate memory fn_eps.insert( fn_eps.end(), fn_eps_earlier.begin(), fn_eps_earlier.end() ); fn_eps.insert( fn_eps.end(), fn_eps_later.begin(), fn_eps_later.end() ); if (fn_eps.size() > 0) { joinMultipleEPSIntoSinglePDF(outPath + "logfile.pdf", fn_eps); if (verb > 0) { std::cout << " + Written out : " << outPath << "logfile.pdf" << std::endl; } } MetaDataTable mdtOutAll = StackHelper::merge(mdtOut); obsModel.save(mdtOutAll, outPath + "particles_ctf_refine.star"); if (verb > 0) { std::cout << " + Done! Written out : " << outPath << "particles_ctf_refine.star" << std::endl; } } std::vector CtfRefiner::merge(const std::vector& mdts, std::vector &fn_eps ) { int gc = mdts.size(); int barstep; if (verb > 0) { std::cout << " + Combining data for all micrographs " << std::endl; init_progress_bar(gc); barstep = 1; } std::vector mdtOut; for (long g = 0; g < gc; g++) { std::string outRoot = getOutputFilenameRoot(mdts[g], outPath); MetaDataTable mdt; // If a B-factor fit has been performed, then this has been done after a potential defocus fit, // so the B-factor fit files are always more up-to-date. if (do_bfac_fit) { // Read in STAR file with B-factor fit data mdt.read(outRoot+"_bfactor_fit.star"); } else if (do_defocus_fit) { // Read in STAR file with defocus fit data mdt.read(outRoot+"_defocus_fit.star"); } mdtOut.push_back(mdt); if (exists(outRoot+"_ctf-refine_fit.eps")) { fn_eps.push_back(outRoot+"_ctf-refine_fit.eps"); } if (exists(outRoot+"_bfactor_fit.eps")) { fn_eps.push_back(outRoot+"_bfactor_fit.eps"); } if (verb > 0) { progress_bar(g); } } if (verb > 0) { progress_bar(gc); } return mdtOut; } int CtfRefiner::getVerbosityLevel() { return verb; } FileName CtfRefiner::getOutputFilenameRoot(const MetaDataTable &mdt, std::string outPath) { FileName fn_mic; mdt.getValue(EMDL_MICROGRAPH_NAME, fn_mic, 0); FileName fn_pre, fn_jobnr, fn_post; decomposePipelineFileName(fn_mic, fn_pre, fn_jobnr, fn_post); return outPath + fn_post.withoutExtension(); } relion-3.1.3/src/jaz/ctf/ctf_refiner.h000066400000000000000000000051771411340063500176030ustar00rootroot00000000000000/*************************************************************************** * * Author: "Jasenko Zivanov & Sjors H.W. Scheres" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #ifndef CTF_REFINER_H #define CTF_REFINER_H #include #include #include #include "tilt_estimator.h" #include "defocus_estimator.h" #include "bfactor_refiner.h" #include "magnification_estimator.h" #include "aberration_estimator.h" class CtfRefiner { public: CtfRefiner(); void read(int argc, char **argv); void init(); void run(); void finalise(); int getVerbosityLevel(); static FileName getOutputFilenameRoot( const MetaDataTable& mdt, std::string outPath); protected: ObservationModel obsModel; ReferenceMap reference; TiltEstimator tiltEstimator; DefocusEstimator defocusEstimator; BFactorRefiner bfactorEstimator; AberrationEstimator aberrationEstimator; MagnificationEstimator magnificationEstimator; // Verbosity int verb; // Allow continuation of crashed jobs bool only_do_unfinished; // Whether to estimate defoci, B-factors, antisymmetric aberrations (incl. beam tilt), // symmetric aberrations and anisotropic magnification, respectively bool do_defocus_fit, do_bfac_fit, do_tilt_fit, do_aberr_fit, do_mag_fit, do_ctf_padding; bool debug, // write out debugging info diag; // write out diagnostic info long maxMG, minMG; int nr_omp_threads; std::string starFn, outPath; MetaDataTable mdt0; std::vector allMdts, unfinishedMdts; // Fit CTF parameters for all particles on a subset of the micrographs micrograph void processSubsetMicrographs(long g_start, long g_end); // Combine all .stars and .eps files std::vector merge(const std::vector& mdts, std::vector &fn_eps); }; #endif /* CTF_REFINER_H */ relion-3.1.3/src/jaz/ctf/ctf_refiner_mpi.cpp000066400000000000000000000041431411340063500207730ustar00rootroot00000000000000/*************************************************************************** * * Author: "Jasenko Zivanov & Sjors H.W. Scheres" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #include "ctf_refiner_mpi.h" void CtfRefinerMpi::read(int argc, char **argv) { // Define a new MpiNode node = new MpiNode(argc, argv); // First read in non-parallelisation-dependent variables CtfRefiner::read(argc, argv); // Don't put any output to screen for mpi followers verb = (node->isLeader()) ? verb : 0; // Possibly also read parallelisation-dependent variables here if (node->size < 2) { REPORT_ERROR_STR("ParticlePolisherMpi::read ERROR: this program needs to be run " << "with at least two MPI processes!"); } // Print out MPI info printMpiNodesMachineNames(*node); } void CtfRefinerMpi::run() { // Parallel loop over micrographs long int total_nr_micrographs = unfinishedMdts.size(); // Each node does part of the work long int my_first_micrograph, my_last_micrograph; divide_equally(total_nr_micrographs, node->size, node->rank, my_first_micrograph, my_last_micrograph); if (do_defocus_fit || do_bfac_fit || do_tilt_fit || do_aberr_fit || do_mag_fit) { processSubsetMicrographs(my_first_micrograph, my_last_micrograph); } MPI_Barrier(MPI_COMM_WORLD); if (node->isLeader()) { finalise(); } } relion-3.1.3/src/jaz/ctf/ctf_refiner_mpi.h000066400000000000000000000026731411340063500204460ustar00rootroot00000000000000/*************************************************************************** * * Author: "Jasenko Zivanov & Sjors H.W. Scheres" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #ifndef CTF_REFINER_MPI_H_ #define CTF_REFINER_MPI_H_ #include #include #include "ctf_refiner.h" class CtfRefinerMpi : public CtfRefiner { private: MpiNode *node; public: /** Destructor, calls MPI_Finalize */ ~CtfRefinerMpi() { delete node; } /** Read * This could take care of mpi-parallelisation-dependent variables */ void read(int argc, char **argv); // Parallelized run function void run(); }; #endif /* CTF_REFINEMENT_MPI_H_ */ relion-3.1.3/src/jaz/ctf/defocus_estimator.cpp000066400000000000000000000230711411340063500213600ustar00rootroot00000000000000/*************************************************************************** * * Author: "Jasenko Zivanov" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #include "defocus_estimator.h" #include "defocus_helper.h" #include "ctf_refiner.h" #include #include #include #include #include #include #include #include #include #include using namespace gravis; DefocusEstimator::DefocusEstimator() : ready(false) {} void DefocusEstimator::read(IOParser &parser, int argc, char *argv[]) { fittingMode = parser.getOption("--fit_mode", "String of 5 characters describing whether to fit the phase shift (1), \n\ defocus (2), astigmatism (3), spherical aberration (4) and B-factors (5) \n\ per particle ('p'), per micrograph ('m') or to keep them fixed ('f')\n\ during the per-micrograph CTF refinement.", "fpmfm"); max_iters = textToInteger(parser.getOption("--max_defocus_iters", "Maximum number of iterations for CTF refinement.", "100")); bruteForcePre = parser.checkOption("--bf0", "Perform brute-force per-particle defocus search (as in RELION 3.0) prior \n\ to the per-micrograph CTF refinement."); bruteForcePost = parser.checkOption("--bf1", "Perform brute-force defocus search after CTF refinement."); bruteForceOnly = parser.checkOption("--bf_only", "Skip CTF refinement and only perform a brute-force defocus search."); defocusRange = textToDouble(parser.getOption("--bf_range", "Defocus scan range (in A) for brute-force search.", "2000.")); fitAstigmatism = parser.checkOption("--legacy_astig", "Estimate independent per-particle astigmatism (from RELION 3.0)"); kmin = textToFloat(parser.getOption("--kmin_defocus", "Inner freq. threshold for defocus estimation [Angst]", "30.0")); } void DefocusEstimator::init( int verb, int nr_omp_threads, bool debug, bool diag, std::string outPath, ReferenceMap *reference, ObservationModel *obsModel) { this->verb = verb; this->nr_omp_threads = nr_omp_threads; this->debug = debug; this->diag = diag; this->outPath = outPath; this->reference = reference; this->obsModel = obsModel; angpix = obsModel->getPixelSizes(); obsModel->getBoxSizes(s, sh); freqWeights.resize(angpix.size()); for (int i = 0; i < angpix.size(); i++) { freqWeights[i] = reference->getHollowWeight(kmin, s[i], angpix[i]); } if (verb > 0) { std::vector names{"per particle (p)", "per micrograph (m)", "fixed (f)" }; if (!ModularCtfOptimisation::validateModeString(fittingMode)) { REPORT_ERROR_STR("DefocusEstimator::init: illegal fitting mode string: " << fittingMode); } std::vector modes = ModularCtfOptimisation::decodeModes(fittingMode); std::cout << " + Defocus fitting-mode string: " << fittingMode << "\n"; std::cout << " => Estimating:" << "\n"; std::cout << " phase-shift: " << names[(int)modes[ModularCtfOptimisation::Phase]] << "\n"; std::cout << " defocus: " << names[(int)modes[ModularCtfOptimisation::Defocus]] << "\n"; std::cout << " astigmatism: " << names[(int)modes[ModularCtfOptimisation::Astigmatism1]] << "\n"; std::cout << " sph. aberration: " << names[(int)modes[ModularCtfOptimisation::SphericalAberration]] << "\n"; std::cout << " B/Scale factors: " << names[(int)modes[ModularCtfOptimisation::BFactor]] << std::endl; } ready = true; } void DefocusEstimator::processMicrograph( long g, MetaDataTable& mdt, const std::vector>& obs, const std::vector>& pred) { if (!ready) { REPORT_ERROR("ERROR: DefocusEstimator::processMicrograph: DefocusEstimator not initialized."); } if (bruteForcePre || bruteForceOnly) { bruteForceFit(g, mdt, obs, pred, "pre"); } if (!bruteForceOnly) { ModularCtfOptimisation mco(mdt, obsModel, obs, pred, freqWeights, fittingMode, nr_omp_threads); std::vector x0 = mco.encodeInitial(); std::vector x = LBFGS::optimize(x0, mco, debug, max_iters, 1e-9); mco.writeToTable(x); if (bruteForcePost) { bruteForceFit(g, mdt, obs, pred, "post"); } } // Output a diagnostic Postscript file writeEPS(mdt); // Now write out STAR file with optimised values for this micrograph std::string outRoot = CtfRefiner::getOutputFilenameRoot(mdt, outPath); mdt.write(outRoot + "_defocus_fit.star"); } void DefocusEstimator::writeEPS(const MetaDataTable& mdt) { if (!ready) { REPORT_ERROR("ERROR: DefocusEstimator::writeEPS: DefocusEstimator not initialized."); } std::string outRoot = CtfRefiner::getOutputFilenameRoot(mdt, outPath); FileName fn_eps = outRoot + "_ctf-refine_fit.eps"; CPlot2D plot2D(fn_eps); plot2D.SetXAxisSize(600); plot2D.SetYAxisSize(600); plot2D.SetDrawLegend(false); plot2D.SetFlipY(true); RFLOAT min_defocus = 99.e10; RFLOAT max_defocus = -99.e10; const int pc = mdt.numberOfObjects(); for (int p = 0; p < pc; p++) { RFLOAT defU, defV; mdt.getValue(EMDL_CTF_DEFOCUSU, defU, p); mdt.getValue(EMDL_CTF_DEFOCUSV, defV, p); defU = (defU + defV) / 2.; min_defocus = XMIPP_MIN(min_defocus, defU); max_defocus = XMIPP_MAX(max_defocus, defU); } for (int p = 0; p < pc; p++) { RFLOAT defU, defV; RFLOAT xcoor, ycoor; mdt.getValue(EMDL_IMAGE_COORD_X, xcoor, p); mdt.getValue(EMDL_IMAGE_COORD_Y, ycoor, p); mdt.getValue(EMDL_CTF_DEFOCUSU, defU, p); mdt.getValue(EMDL_CTF_DEFOCUSV, defV, p); defU = (defU + defV) / 2.; RFLOAT val = (defU - min_defocus) / (max_defocus - min_defocus); const RFLOAT eps = 1e-10; if (max_defocus - min_defocus < eps) val = 0.5; // to avoid NaN in color CDataSet dataSet; dataSet.SetDrawMarker(true); dataSet.SetDrawLine(false); dataSet.SetMarkerSize(10); dataSet.SetDatasetColor(val, 0.8 * val, 1.0 - val); CDataPoint point(xcoor, ycoor); dataSet.AddDataPoint(point); plot2D.AddDataSet(dataSet); } char title[256]; snprintf(title, 255, "Defocus range from blue to orange: %.0f A", max_defocus - min_defocus); plot2D.SetXAxisTitle(title); plot2D.OutputPostScriptPlot(fn_eps); } bool DefocusEstimator::isFinished(const MetaDataTable &mdt) { if (!ready) { REPORT_ERROR("ERROR: DefocusEstimator::isFinished: DefocusEstimator not initialized."); } std::string outRoot = CtfRefiner::getOutputFilenameRoot(mdt, outPath); return exists(outRoot + "_defocus_fit.star"); } void DefocusEstimator::bruteForceFit( long g, MetaDataTable &mdt, const std::vector > &obs, const std::vector > &pred, std::string tag) { long pc = obs.size(); std::stringstream stsg; stsg << g; if (diag) { std::ofstream ofst(outPath+"diag_m"+stsg.str()+"_bf-defocus-"+tag+"_cost.dat"); std::ofstream ofsto(outPath+"diag_m"+stsg.str()+"_bf-defocus-"+tag+"_opt.dat"); for (long p = 0; p < pc; p++) { const int og = obsModel->getOpticsGroup(mdt, p); CTF ctf0; ctf0.readByGroup(mdt, obsModel, p); std::vector cost = DefocusHelper::diagnoseDefocus( pred[p], obs[p], freqWeights[og], ctf0, angpix[og], defocusRange, 100, nr_omp_threads); double cMin = cost[0][1]; double dOpt = cost[0][0]; for (int i = 0; i < cost.size(); i++) { ofst << cost[i][0] << " " << cost[i][1] << "\n"; if (cost[i][1] < cMin) { cMin = cost[i][1]; dOpt = cost[i][0]; } } ofsto << dOpt << " " << cMin << "\n"; ofst << "\n"; } } // Parallel loop over all particles in this micrograph #pragma omp parallel for num_threads(nr_omp_threads) for (long p = 0; p < pc; p++) { const int og = obsModel->getOpticsGroup(mdt, p); std::stringstream stsp; stsp << p; CTF ctf0; ctf0.readByGroup(mdt, obsModel, p); if (fitAstigmatism) { double u, v, phi; DefocusHelper::findAstigmatismNM( pred[p], obs[p], freqWeights[og], ctf0, angpix[og], &u, &v, &phi); mdt.setValue(EMDL_CTF_DEFOCUSU, u, p); mdt.setValue(EMDL_CTF_DEFOCUSV, v, p); mdt.setValue(EMDL_CTF_DEFOCUS_ANGLE, phi, p); } else { double u, v; DefocusHelper::findDefocus1D( pred[p], obs[p], freqWeights[og], ctf0, angpix[og], &u, &v, defocusRange); /*if (debug) { double u0, v0; mdt.getValue(EMDL_CTF_DEFOCUSU, u0, p); mdt.getValue(EMDL_CTF_DEFOCUSV, v0, p); std::cout << u0 << " -> " << u << ", " << v0 << " -> " << v << "\n"; }*/ mdt.setValue(EMDL_CTF_DEFOCUSU, u, p); mdt.setValue(EMDL_CTF_DEFOCUSV, v, p); } } } relion-3.1.3/src/jaz/ctf/defocus_estimator.h000066400000000000000000000046441411340063500210320ustar00rootroot00000000000000/*************************************************************************** * * Author: "Jasenko Zivanov" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #ifndef DEFOCUS_ESTIMATOR_H #define DEFOCUS_ESTIMATOR_H #include class IOParser; class ReferenceMap; class ObservationModel; class DefocusEstimator { public: DefocusEstimator(); void read(IOParser& parser, int argc, char *argv[]); void init( int verb, int nr_omp_threads, bool debug, bool diag, std::string outPath, ReferenceMap* reference, ObservationModel* obsModel); // Fit defocus for all particles on one micrograph void processMicrograph( long g, MetaDataTable& mdt, const std::vector>& obs, const std::vector>& pred); // Write PostScript file with per-particle defocus // plotted onto micrograph in blue-red color scale void writeEPS(const MetaDataTable &mdt); // Has this mdt been processed already? bool isFinished(const MetaDataTable& mdt); private: // cmd. line options (see read()): double defocusRange, kmin; int max_iters; bool fitAstigmatism, bruteForcePre, bruteForcePost, bruteForceOnly; std::string fittingMode; // set at init: int verb, nr_omp_threads; bool debug, diag; std::string outPath; std::vector s, sh; std::vector angpix; std::vector > freqWeights; ReferenceMap* reference; ObservationModel* obsModel; bool ready; void bruteForceFit( long g, MetaDataTable& mdt, const std::vector>& obs, const std::vector>& pred, std::string tag); }; #endif relion-3.1.3/src/jaz/ctf/defocus_helper.cpp000066400000000000000000000247221411340063500206340ustar00rootroot00000000000000/*************************************************************************** * * Author: "Jasenko Zivanov" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #include "defocus_helper.h" #include #include #include #include #include #include using namespace gravis; RFLOAT DefocusHelper::findDefocus1D( const Image &prediction, const Image &observation, const Image& weight, const CTF &ctf0, RFLOAT angpix, double *destU, double *destV, RFLOAT range, int steps, int recDepth, RFLOAT recScale) { const RFLOAT delta = ctf0.DeltafV - ctf0.DeltafU; CTF ctf(ctf0); double minErr = std::numeric_limits::max(); double bestU = ctf0.DeltafU; double bestV = ctf0.DeltafV; for (int s = -steps/2; s <= steps/2; s++) { const RFLOAT u = ctf0.DeltafU + s * range / (steps/2); const RFLOAT v = u + delta; ctf.DeltafU = u; ctf.DeltafV = v; ctf.initialise(); double err = RefinementHelper::squaredDiff(prediction, observation, ctf, angpix, weight); if (err < minErr) { minErr = err; bestU = u; bestV = v; } } if (recDepth > 0) { ctf.DeltafU = bestU; ctf.DeltafV = bestV; ctf.initialise(); findDefocus1D(prediction, observation, weight, ctf, angpix, destU, destV, range/recScale, steps, recDepth-1, recScale); } else { *destU = bestU; *destV = bestV; } return minErr; } void DefocusHelper::findAstigmatismNM( const Image &prediction, const Image &observation, const Image &weight, const CTF &ctf0, RFLOAT angpix, double *destU, double *destV, double *destPhi) { AstigmatismOptimizationAcc opt(prediction, observation, weight, ctf0, false, false, angpix); std::vector initial = opt.getInitialParams(); std::vector params = NelderMead::optimize(initial, opt, 50.0, 1.0, 1000); *destU = opt.getU(params); *destV = opt.getV(params); *destPhi = opt.getPhi(params); } void DefocusHelper::findAstigmatismAndPhaseNM( const std::vector>& prediction, const std::vector>& observation, const Image &weight, const CTF &ctf0, RFLOAT angpix, double *destU, double *destV, double *destPhi, double *destPhase) { AstigmatismOptimizationAcc opt(prediction, observation, weight, ctf0, true, false, angpix); std::vector initial = opt.getInitialParams(); std::vector params = NelderMead::optimize(initial, opt, 5.0, 0.01, 100000); *destU = opt.getU(params); *destV = opt.getV(params); *destPhi = opt.getPhi(params); *destPhase = opt.getPhase(params); } void DefocusHelper::findAstigmatismPhaseAndCsNM( const std::vector>& prediction, const std::vector>& observation, const Image &weight, const CTF &ctf0, RFLOAT angpix, double *destU, double *destV, double *destPhi, double *destPhase, double* destCs) { AstigmatismOptimizationAcc opt(prediction, observation, weight, ctf0, true, true, angpix); std::vector initial = opt.getInitialParams(); std::vector params = NelderMead::optimize(initial, opt, 5.0, 0.01, 100000); *destU = opt.getU(params); *destV = opt.getV(params); *destPhi = opt.getPhi(params); *destPhase = opt.getPhase(params); *destCs = opt.getCs(params); } void DefocusHelper::findAstigmatismNM( const std::vector>& prediction, const std::vector>& observation, const Image &weight, const CTF &ctf0, RFLOAT angpix, double *destU, double *destV, double *destPhi) { AstigmatismOptimizationAcc opt(prediction, observation, weight, ctf0, false, false, angpix); std::vector initial = opt.getInitialParams(); std::vector params = NelderMead::optimize(initial, opt, 50.0, 1.0, 1000); *destU = opt.getU(params); *destV = opt.getV(params); *destPhi = opt.getPhi(params); } std::vector DefocusHelper::diagnoseDefocus( const Image &prediction, const Image &observation, const Image &weight, const CTF &ctf0, RFLOAT angpix, double range, int steps, int threads) { const RFLOAT delta = ctf0.DeltafV - ctf0.DeltafU; std::vector out(steps); #pragma omp parallel for num_threads(threads) for (int s = 0; s < steps; s++) { CTF ctf(ctf0); const RFLOAT u = ctf0.DeltafU + (s - steps/2) * range / (double)steps; const RFLOAT v = u + delta; ctf.DeltafU = u; ctf.DeltafV = v; ctf.initialise(); out[s][0] = u; out[s][1] = RefinementHelper::squaredDiff(prediction, observation, ctf, angpix, weight); } return out; } AstigmatismOptimization::AstigmatismOptimization( const Image& prediction, const Image& observation, const Image& weight, const CTF& ctf0, RFLOAT angpix) : prediction(prediction), observation(observation), weight(weight), ctf0(ctf0), angpix(angpix) { } double AstigmatismOptimization::f(const std::vector& x) const { CTF ctf(ctf0); ctf.DeltafU = x[0]; ctf.DeltafV = x[1]; ctf.azimuthal_angle = x[2]; ctf.initialise(); return RefinementHelper::squaredDiff(prediction, observation, ctf, angpix, weight); } AstigmatismOptimizationAcc::AstigmatismOptimizationAcc(const Image& prediction, const Image& observation, const Image& weight, const CTF& ctf0, bool phaseShift, bool spherAberr, RFLOAT angpix, RFLOAT phiScale, RFLOAT csScale) : ctf0(ctf0), phaseShift(phaseShift), spherAberr(spherAberr), angpix(angpix), phiScale(phiScale), csScale(csScale) { const long w = prediction.data.xdim; const long h = prediction.data.ydim; data = Image(w,h); for (long y = 0; y < h; y++) for (long x = 0; x < w; x++) { Complex vx = DIRECT_A2D_ELEM(prediction.data, y, x); const Complex vy = DIRECT_A2D_ELEM(observation.data, y, x); const RFLOAT vw = DIRECT_A2D_ELEM(weight.data, y, x); const RFLOAT x2 = vx.real*vx.real + vx.imag*vx.imag; const RFLOAT yxb = x2 > 0.0? (vy.real*vx.real + vy.imag*vx.imag)/x2 : 0.0; const RFLOAT wp = vw * x2; DIRECT_A2D_ELEM(data.data, y, x) = Complex(yxb, wp); } } AstigmatismOptimizationAcc::AstigmatismOptimizationAcc( const std::vector>& prediction, const std::vector>& observation, const Image& weight, const CTF& ctf0, bool phaseShift, bool spherAberr, RFLOAT angpix, RFLOAT phiScale, RFLOAT csScale) : ctf0(ctf0), phaseShift(phaseShift), spherAberr(spherAberr), angpix(angpix), phiScale(phiScale), csScale(csScale) { const long w = prediction[0].data.xdim; const long h = prediction[0].data.ydim; data = Image(w,h); data.data.initZeros(); for (int i = 0; i < prediction.size(); i++) { for (long y = 0; y < h; y++) for (long x = 0; x < w; x++) { Complex vx = DIRECT_A2D_ELEM(prediction[i].data, y, x); const Complex vy = DIRECT_A2D_ELEM(observation[i].data, y, x); const RFLOAT vw = DIRECT_A2D_ELEM(weight.data, y, x); const RFLOAT x2 = vx.real*vx.real + vx.imag*vx.imag; const RFLOAT yxb = x2 > 0.0? (vy.real*vx.real + vy.imag*vx.imag)/x2 : 0.0; const RFLOAT wp = vw * x2; DIRECT_A2D_ELEM(data.data, y, x) += Complex(yxb, wp); } } } double AstigmatismOptimizationAcc::f(const std::vector &x, void* tempStorage) const { CTF ctf(ctf0); ctf.DeltafU = x[0]; ctf.DeltafV = x[1]; ctf.azimuthal_angle = x[2]/phiScale; if (phaseShift) ctf.phase_shift = x[3]/phiScale; if (spherAberr) ctf.Cs = x[phaseShift?4:3]/csScale; ctf.initialise(); const long w = data.data.xdim; const long h = data.data.ydim; double out = 0.0; Image ctfImg(w,h); ctf.getFftwImage(ctfImg(), h, h, angpix); for (long y = 0; y < h; y++) for (long x = 0; x < w; x++) { Complex vd = DIRECT_A2D_ELEM(data.data, y, x); RFLOAT vm = ctfImg(y,x); RFLOAT dx = vd.real - vm; out += vd.imag * dx * dx; } return out; } double AstigmatismOptimizationAcc::getU(const std::vector &x) { return x[0]; } double AstigmatismOptimizationAcc::getV(const std::vector &x) { return x[1]; } double AstigmatismOptimizationAcc::getPhi(const std::vector &x) { return x[2] / phiScale; } double AstigmatismOptimizationAcc::getPhase(const std::vector &x) { return x[3] / phiScale; } double AstigmatismOptimizationAcc::getCs(const std::vector &x) { return x[phaseShift?4:3] / csScale; } std::vector AstigmatismOptimizationAcc::getInitialParams() { int num = 3; if (phaseShift) num++; if (spherAberr) num++; std::vector initial(num); initial[0] = ctf0.DeltafU; initial[1] = ctf0.DeltafU; initial[2] = phiScale * ctf0.azimuthal_angle; if (phaseShift) initial[3] = phiScale * ctf0.phase_shift; if (spherAberr) initial[phaseShift?4:3] = csScale * ctf0.Cs; return initial; } relion-3.1.3/src/jaz/ctf/defocus_helper.h000066400000000000000000000117311411340063500202750ustar00rootroot00000000000000/*************************************************************************** * * Author: "Jasenko Zivanov" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #ifndef DEFOCUS_REFINEMENT_H #define DEFOCUS_REFINEMENT_H #include #include #include #include #include #include #include class AstigmatismOptimization : public Optimization { public: AstigmatismOptimization( const Image& prediction, const Image& observation, const Image& weight, const CTF& ctf0, RFLOAT angpix); double f(const std::vector& x) const; private: const Image& prediction, observation; const Image& weight; const CTF& ctf0; RFLOAT angpix; }; class AstigmatismOptimizationAcc : public Optimization { public: AstigmatismOptimizationAcc( const Image& prediction, const Image& observation, const Image& weight, const CTF& ctf0, bool phaseShift, bool spherAberr, RFLOAT angpix, RFLOAT phiScale = 10, RFLOAT csScale = 10); AstigmatismOptimizationAcc( const std::vector>& prediction, const std::vector>& observation, const Image& weight, const CTF& ctf0, bool phaseShift, bool spherAberr, RFLOAT angpix, RFLOAT phiScale = 10, RFLOAT csScale = 100); double f(const std::vector& x, void* tempStorage) const; double getU(const std::vector &x); double getV(const std::vector &x); double getPhi(const std::vector &x); double getPhase(const std::vector &x); double getCs(const std::vector &x); std::vector getInitialParams(); private: Image data; const CTF& ctf0; bool phaseShift, spherAberr; RFLOAT angpix, phiScale, csScale; }; class DefocusHelper { public: static RFLOAT findDefocus1D(const Image& prediction, const Image& observation, const Image& weight, const CTF& ctf0, RFLOAT angpix, double* destU, double* destV, RFLOAT range = 1000.0, int steps = 11, int recDepth = 2, RFLOAT recScale = 10.0); static void findAstigmatismNM( const Image& prediction, const Image& observation, const Image& weight, const CTF& ctf0, RFLOAT angpix, double* destU, double* destV, double* destPhi); static void findAstigmatismAndPhaseNM( const std::vector>& prediction, const std::vector>& observation, const Image& weight, const CTF& ctf0, RFLOAT angpix, double* destU, double* destV, double* destPhi, double* destPhase); static void findAstigmatismPhaseAndCsNM( const std::vector>& prediction, const std::vector>& observation, const Image& weight, const CTF& ctf0, RFLOAT angpix, double* destU, double* destV, double* destPhi, double* destPhase, double* destCs); static void findAstigmatismNM( const std::vector>& prediction, const std::vector>& observation, const Image& weight, const CTF& ctf0, RFLOAT angpix, double* destU, double* destV, double* destPhi); static std::vector diagnoseDefocus(const Image& prediction, const Image& observation, const Image& weight, const CTF& ctf0, RFLOAT angpix, double range, int steps, int threads); }; #endif relion-3.1.3/src/jaz/ctf/delocalisation_helper.cpp000066400000000000000000000050461411340063500221740ustar00rootroot00000000000000#include "delocalisation_helper.h" #include using namespace gravis; void DelocalisationHelper::maskOutsideBox( const CTF &ctf, double radius, double angpix, int s_orig, MultidimArray &fftwCtfImg, double offsetx, double offsety) { const int s = fftwCtfImg.ydim; const int sh = fftwCtfImg.xdim; const double r2 = radius * radius; const double as = angpix * s; const t2Vector origin(offsetx, offsety); for (int y = 0; y < s; y++) for (int x = 0; x < sh; x++) { const double xx = x/as; const double yy = y < sh? y/as : (y - s)/as; t2Vector delocCent = RFLOAT(1.0 / (2 * angpix * PI)) * ctf.getGammaGrad(xx,yy); double out = 0.0; double cnt = 0.0; for (RFLOAT sign = (x == 0? 1.0 : -1.0); sign <= 1.0; sign += 2.0) { t2Vector p = origin + sign * delocCent; double dx, dy; if (p.x > 0) dx = s_orig/2 - p.x; else dx = s_orig/2 + p.x; if (p.y > 0) dy = s_orig/2 - p.y; else dy = s_orig/2 +p.y; double fx, fy; if (dx < -radius) fx = 0.0; else if (dx < radius) fx = 1.0 - acos(dx/radius)/PI + dx * sqrt(r2 - dx*dx)/(PI * r2); else fx = 1.0; if (dy < -radius) fy = 0.0; else if (dy < radius) fy = 1.0 - acos(dy/radius)/PI + dy * sqrt(r2 - dy*dy)/(PI * r2); else fy = 1.0; const double f = fx * fy; out += f; cnt += 1; } DIRECT_A2D_ELEM(fftwCtfImg, y, x) *= out/cnt; } } Image DelocalisationHelper::plotDelocalisation( const CTF &ctf, Image &mask, double angpix) { const int s = mask.data.ydim; const int sh = mask.data.xdim; Image out(s,s); const double as = s * angpix; for (int y = 0; y < s; y++) for (int x = 0; x < s; x++) { double xx = x < sh? x/as : (x - s)/as; double yy = y < sh? y/as : (y - s)/as; t2Vector delocCent = RFLOAT(1.0 / (2 * angpix * PI)) * ctf.getGammaGrad(xx,yy); if (delocCent.x > -sh && delocCent.x < sh && delocCent.y > -sh && delocCent.y < sh) { int rx0 = (int)(delocCent.x + s)%s; int ry0 = (int)(delocCent.y + s)%s; int rx1 = (int)(delocCent.x + s + 1)%s; int ry1 = (int)(delocCent.y + s + 1)%s; double rxf = delocCent.x - std::floor(delocCent.x); double ryf = delocCent.y - std::floor(delocCent.y); int mx = x < sh? x : s - x; int my = x < sh? y : (s - y)%s; double mv = mask(my, mx); out(ry0,rx0) += mv * (1 - rxf) * (1 - ryf); out(ry0,rx1) += mv * rxf * (1 - ryf); out(ry1,rx0) += mv * (1 - rxf) * ryf; out(ry1,rx1) += mv * rxf * ryf; } } return out; } relion-3.1.3/src/jaz/ctf/delocalisation_helper.h000066400000000000000000000006441411340063500216400ustar00rootroot00000000000000#ifndef DELOCALISATION_HELPER_H #define DELOCALISATION_HELPER_H #include class DelocalisationHelper { public: static void maskOutsideBox( const CTF& ctf, double radius, double angpix, int s_orig, MultidimArray& fftwCtfImg, double offsetx, double offsety); static Image plotDelocalisation( const CTF& ctf, Image& mask, double angpix); }; #endif relion-3.1.3/src/jaz/ctf/equation2x2.cpp000066400000000000000000000017061411340063500200230ustar00rootroot00000000000000/*************************************************************************** * * Author: "Jasenko Zivanov" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ // This file intentionally left blab relion-3.1.3/src/jaz/ctf/equation2x2.h000066400000000000000000000024371411340063500174720ustar00rootroot00000000000000/*************************************************************************** * * Author: "Jasenko Zivanov" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #ifndef EQUATION_2X2_h #define EQUATION_2X2_h #include class Equation2x2 { public: Equation2x2(); double Axx, Axy, Ayy, bx, by; Equation2x2& operator += (const Equation2x2& arg) { Axx += arg.Axx; Axy += arg.Axy; Ayy += arg.Ayy; bx += arg.bx; by += arg.by; return *this; } }; #endif relion-3.1.3/src/jaz/ctf/magnification_estimator.cpp000066400000000000000000000231401411340063500225350ustar00rootroot00000000000000/*************************************************************************** * * Author: "Jasenko Zivanov" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #include "magnification_estimator.h" #include "magnification_helper.h" #include "ctf_refiner.h" #include "equation2x2.h" #include #include #include #include #include #include using namespace gravis; MagnificationEstimator::MagnificationEstimator() { } void MagnificationEstimator::read( IOParser &parser, int argc, char *argv[]) { kmin = textToFloat(parser.getOption("--kmin_mag", "Inner freq. threshold for anisotropic magnification estimation [Angst]", "20.0")); adaptAstig = !parser.checkOption("--keep_astig", "Do not translate astigmatism into new coordinates"); perMgAstig = !parser.checkOption("--part_astig", "Allow astigmatism to vary among the particles of a micrograph"); } void MagnificationEstimator::init( int verb, int nr_omp_threads, bool debug, bool diag, std::string outPath, ReferenceMap* reference, ObservationModel* obsModel) { this->verb = verb; this->nr_omp_threads = nr_omp_threads; this->debug = debug; this->diag = diag; this->outPath = outPath; this->reference = reference; this->obsModel = obsModel; angpix = obsModel->getPixelSizes(); obsModel->getBoxSizes(s, sh); ready = true; } void MagnificationEstimator::processMicrograph( long g, MetaDataTable& mdt, const std::vector>& obs, const std::vector>& pred, const std::vector>>& predGradient, bool do_ctf_padding) { if (!ready) { REPORT_ERROR_STR("ERROR: MagnificationEstimator::processMicrograph: " << "MagnificationEstimator not initialized."); } std::vector>> particlesByOpticsGroup = obsModel->splitParticlesByOpticsGroup(mdt); for (int pog = 0; pog < particlesByOpticsGroup.size(); pog++) { const int og = particlesByOpticsGroup[pog].first; const std::vector& partIndices = particlesByOpticsGroup[pog].second; // TODO: SHWS 29mar2018: when data is CTF-premultiplied: do we need to change updateScaleFreq?? if (obsModel->getCtfPremultiplied(og)) std::cerr << "TODO: check magnification correction with CTF-premultiplied data!!" << std::endl; const int pc = partIndices.size(); std::vector> magEqs(nr_omp_threads); for (int i = 0; i < nr_omp_threads; i++) { magEqs[i] = Volume(sh[og],s[og],1); } #pragma omp parallel for num_threads(nr_omp_threads) for (long pp = 0; pp < pc; pp++) { const int p = partIndices[pp]; CTF ctf; ctf.readByGroup(mdt, obsModel, p); int threadnum = omp_get_thread_num(); MagnificationHelper::updateScaleFreq( pred[p], predGradient[p], obs[p], ctf, angpix[og], magEqs[threadnum], do_ctf_padding); } Volume magEq(sh[og], s[og],1); for (int threadnum = 0; threadnum < nr_omp_threads; threadnum++) { magEq += magEqs[threadnum]; } std::string outRoot = CtfRefiner::getOutputFilenameRoot(mdt, outPath); std::stringstream sts; sts << (og+1); MagnificationHelper::writeEQs(magEq, outRoot+"_mag_optics-group_" + sts.str()); } } void MagnificationEstimator::parametricFit( std::vector &mdts, MetaDataTable &optOut, std::vector &fn_eps) { if (!ready) { REPORT_ERROR_STR("ERROR: MagnificationEstimator::parametricFit: " << "MagnificationEstimator not initialized."); } if (verb > 0) { std::cout << " + Fitting anisotropic magnification ..." << std::endl; } const int gc = mdts.size(); const int ogc = obsModel->numberOfOpticsGroups(); bool hasMagMatrices = optOut.labelExists(EMDL_IMAGE_MAG_MATRIX_00) && optOut.labelExists(EMDL_IMAGE_MAG_MATRIX_01) && optOut.labelExists(EMDL_IMAGE_MAG_MATRIX_10) && optOut.labelExists(EMDL_IMAGE_MAG_MATRIX_11); std::vector> mat_by_optGroup(ogc); #pragma omp parallel for num_threads(nr_omp_threads) for (int og = 0; og < ogc; og++) { Volume magEqs(sh[og],s[og],1), magEqsG(sh[og],s[og],1); std::stringstream sts; sts << (og+1); bool groupPresent = false; for (long g = 0; g < gc; g++) { std::string outRoot = CtfRefiner::getOutputFilenameRoot(mdts[g], outPath); std::string fn = outRoot + "_mag_optics-group_" + sts.str(); if (exists(fn+"_Axx.mrc") || exists(fn+"_Axy.mrc") || exists(fn+"_Ayy.mrc") || exists(fn+"_bx.mrc") || exists(fn+"_by.mrc")) { try { MagnificationHelper::readEQs(fn, magEqsG); magEqs += magEqsG; groupPresent = true; } catch (RelionError e) {} } } if (!groupPresent) { mat_by_optGroup[og] = Matrix2D(2,2); mat_by_optGroup[og].initIdentity(); continue; } std::vector > imgs_for_eps; std::vector scales; std::vector labels; Image flowx, flowy; MagnificationHelper::solvePerPixel(magEqs, flowx, flowy); Image flowxFull, flowyFull; FftwHelper::decenterUnflip2D(flowx.data, flowxFull.data); FftwHelper::decenterUnflip2D(flowy.data, flowyFull.data); ImageLog::write(flowxFull, outPath + "mag_disp_x_optics-group_" + sts.str()); ImageLog::write(flowyFull, outPath + "mag_disp_y_optics-group_" + sts.str()); imgs_for_eps.push_back(flowxFull); scales.push_back(1.); labels.push_back("X-disp obs [-1,+1] "+obsModel->getGroupName(og)); imgs_for_eps.push_back(flowyFull); scales.push_back(1.); labels.push_back("Y-disp obs [-1,+1] "+obsModel->getGroupName(og)); Image freqWght = reference->getHollowWeight(kmin, s[og], angpix[og]); Matrix2D mat = MagnificationHelper::solveLinearlyFreq(magEqs, freqWght, flowx, flowy); FftwHelper::decenterUnflip2D(flowx.data, flowxFull.data); FftwHelper::decenterUnflip2D(flowy.data, flowyFull.data); ImageLog::write(flowxFull, outPath + "mag_disp_x_fit_optics-group_" + sts.str()); ImageLog::write(flowyFull, outPath + "mag_disp_y_fit_optics-group_" + sts.str()); imgs_for_eps.push_back(flowxFull); scales.push_back(1.); labels.push_back("X-disp fit [-1,+1] "+obsModel->getGroupName(og)); imgs_for_eps.push_back(flowyFull); scales.push_back(1.); labels.push_back("Y-disp fit [-1,+1] "+obsModel->getGroupName(og)); #ifdef DEBUG std::ofstream os(outPath + "mag_matrix_optics-group_" + sts.str() + ".txt"); os << mat(0,0) << " " << mat(0,1) << "\n"; os << mat(1,0) << " " << mat(1,1) << "\n"; os.close(); #endif mat_by_optGroup[og] = mat; Matrix2D mat0 = obsModel->getMagMatrix(og); Matrix2D mat1 = mat * mat0; Matrix2D u, vh; Matrix1D eig; svdcmp(mat1, u, eig, vh); const RFLOAT mean_mag = (eig(0) + eig(1)) / 2; const RFLOAT aniso_mag = fabs(eig(0) - eig(1));// / mean_mag; #pragma omp critical { optOut.setValue(EMDL_IMAGE_MAG_MATRIX_00, mat1(0,0), og); optOut.setValue(EMDL_IMAGE_MAG_MATRIX_01, mat1(0,1), og); optOut.setValue(EMDL_IMAGE_MAG_MATRIX_10, mat1(1,0), og); optOut.setValue(EMDL_IMAGE_MAG_MATRIX_11, mat1(1,1), og); std::cout << " - Magnification anisotropy of optics group #" << (og + 1) << " named '" << obsModel->getGroupName(og) << "': " << (aniso_mag * 100.0) << " %" << std::endl; if (fabs(mean_mag - 1) > 0.005) { std::cerr << "WARNING: Overall magnification of optics group #" << (og + 1) << " (" << obsModel->getGroupName(og) << ") differs from the nominal pixel size by " << ((mean_mag - 1) * 100) << " %.\n"; std::cerr << "WARNING: This overall difference changes the actual pixel size of the reconstruction!" << std::endl; } } obsModel->setMagMatrix(og, mat1); FileName fn_root = outPath + "mag_disp_optics-group_"+ sts.str(); ColorHelper::writeSignedToEPS(fn_root, 2, imgs_for_eps, scales, labels); fn_eps.push_back(fn_root+".eps"); } obsModel->hasMagMatrices = true; if (adaptAstig) { MagnificationHelper::adaptAstigmatism(mat_by_optGroup, mdts, !perMgAstig, obsModel); } } bool MagnificationEstimator::isFinished( const MetaDataTable &mdt) { if (!ready) { REPORT_ERROR("ERROR: TiltEstimator::isFinished: DefocusEstimator not initialized."); } std::string outRoot = CtfRefiner::getOutputFilenameRoot(mdt, outPath); bool allThere = true; std::vector ogp = obsModel->getOptGroupsPresent_zeroBased(mdt); for (int pog = 0; pog < ogp.size(); pog++) { const int og = ogp[pog]; std::stringstream sts; sts << (og+1); std::string ogstr = sts.str(); if( !exists(outRoot+"_mag_optics-group_"+ogstr+"_Axx.mrc") || !exists(outRoot+"_mag_optics-group_"+ogstr+"_Axy.mrc") || !exists(outRoot+"_mag_optics-group_"+ogstr+"_Ayy.mrc") || !exists(outRoot+"_mag_optics-group_"+ogstr+"_bx.mrc") || !exists(outRoot+"_mag_optics-group_"+ogstr+"_by.mrc")) { allThere = false; break; } } return allThere; } relion-3.1.3/src/jaz/ctf/magnification_estimator.h000066400000000000000000000046251411340063500222110ustar00rootroot00000000000000/*************************************************************************** * * Author: "Jasenko Zivanov" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #ifndef MAG_ESTIMATOR_H #define MAG_ESTIMATOR_H #include #include #include #include #include #include class IOParser; class ReferenceMap; class ObservationModel; class MetaDataTable; class MagnificationEstimator { public: MagnificationEstimator(); void read(IOParser& parser, int argc, char *argv[]); void init( int verb, int nr_omp_threads, bool debug, bool diag, std::string outPath, ReferenceMap* reference, ObservationModel* obsModel); // Compute per-pixel information for one micrograph void processMicrograph( long g, MetaDataTable& mdt, const std::vector>& obs, const std::vector>& pred, const std::vector>>& predGradient, bool do_ctf_padding = false); // Sum up per-pixel information from all micrographs, // then fit beam-tilt model to the per-pixel fit void parametricFit( std::vector& mdts, MetaDataTable& optOut, std::vector &fn_eps); // Has this mdt been processed already? bool isFinished(const MetaDataTable& mdt); private: // cmd. line options (see read()) double kmin; bool adaptAstig, perMgAstig; // parameters obtained through init() int verb, nr_omp_threads; bool debug, diag, ready; std::string outPath; std::vector s, sh; std::vector angpix; ReferenceMap* reference; ObservationModel* obsModel; }; #endif relion-3.1.3/src/jaz/ctf/magnification_helper.cpp000066400000000000000000000333671411340063500220210ustar00rootroot00000000000000/*************************************************************************** * * Author: "Jasenko Zivanov" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #include "magnification_helper.h" #include #include #include #include #include #include using namespace gravis; Matrix2D MagnificationHelper::polarToMatrix( double scaleMajor, double scaleMinor, double angleDeg) { // based on definition by T. Nakane Matrix2D out(2,2); const double angle = DEG2RAD(angleDeg); const double si = sin(angle), co = cos(angle); const double si2 = si * si, co2 = co * co; /* Out = Rot(angle) * Diag(scale_major, scale_minor) * Rot(-angle), where Rot(angle) = [[cos(angle), -sin(angle)], [sin(angle), cos(angle)]] [ c s ] [ j 0 ] [ c -s ] [ -s c ] [ 0 n ] [ s c ] = [ c s ] [ jc -js ] [ -s c ] [ ns nc ] = [ jcc+nss -jcs+ncs ] [ -jcs+ncs jss+ncc ] */ out(0, 0) = scaleMajor * co2 + scaleMinor * si2; out(1, 1) = scaleMajor * si2 + scaleMinor * co2; out(0, 1) = (-scaleMajor + scaleMinor) * si * co; out(1, 0) = out(0, 1); return out; } void MagnificationHelper::matrixToPolar( const Matrix2D& mat, RFLOAT& scaleMajor, RFLOAT& scaleMinor, RFLOAT& angleDeg) { matrixToPolar( d2Matrix(mat(0,0), mat(0,1), mat(1,0), mat(1,1)), scaleMajor, scaleMinor, angleDeg); } void MagnificationHelper::matrixToPolar( const d2Matrix& mat, RFLOAT& scaleMajor, RFLOAT& scaleMinor, RFLOAT& angleDeg) { const double m00 = mat(0,0); const double m11 = mat(1,1); const double m01 = 0.5 * (mat(0,1) + mat(1,0)); double ev0, ev1, cs, sn; dsyev2(m00, m01, m11, &ev0, &ev1, &cs, &sn); scaleMajor = ev0; scaleMinor = ev1; angleDeg = RAD2DEG(atan2(sn,cs)); return; } void MagnificationHelper::updateScaleFreq( const Image &prediction, const Volume>& predGradient, const Image &observation, CTF &ctf, double angpix, Volume &eqs, bool do_ctf_padding) { const long w = prediction.data.xdim; const long h = prediction.data.ydim; /*Volume gradReal(w,h,1), gradImg(w,h,1); FilterHelper::centralGrad2D(prediction, gradReal, gradImg);*/ Image ctfImg(w,h); ctf.getFftwImage(ctfImg(), h, h, angpix, false, false, false, true, do_ctf_padding); for (long y = 0; y < h; y++) for (long x = 0; x < w; x++) { Complex vx = DIRECT_A2D_ELEM(prediction.data, y, x); Complex vy = DIRECT_A2D_ELEM(observation.data, y, x); double c = ctfImg(y,x); gravis::d2Vector gr(predGradient(x,y,0).x.real, predGradient(x,y,0).y.real); gravis::d2Vector gi(predGradient(x,y,0).x.imag, predGradient(x,y,0).y.imag); eqs(x,y,0).Axx += c * c * (gr.x * gr.x + gi.x * gi.x); eqs(x,y,0).Axy += c * c * (gr.x * gr.y + gi.x * gi.y); eqs(x,y,0).Ayy += c * c * (gr.y * gr.y + gi.y * gi.y); eqs(x,y,0).bx += c * (gr.x * (vy.real - c * vx.real) + gi.x * (vy.imag - c * vx.imag)); eqs(x,y,0).by += c * (gr.y * (vy.real - c * vx.real) + gi.y * (vy.imag - c * vx.imag)); } } void MagnificationHelper::updateScaleReal( const Image &prediction, const Image &observation, const Image& snr, CTF &ctf, double angpix, Volume &eqs, bool do_ctf_padding) { const long ww = 2*(observation.data.xdim - 1); const long w = prediction.data.xdim; const long h = prediction.data.ydim; Image pred2(w,h), obs2(w,h); Image realPred(ww, h), realObs(ww, h); Image ctfImg(w,h); ctf.getFftwImage(ctfImg(), h, h, angpix, false, false, false, true, do_ctf_padding); for (long y = 0; y < h; y++) for (long x = 0; x < w; x++) { Complex vx = DIRECT_A2D_ELEM(prediction.data, y, x); Complex vy = DIRECT_A2D_ELEM(observation.data, y, x); Complex sn = DIRECT_A2D_ELEM(snr.data, y, x); double c = ctfImg(y,x); DIRECT_A2D_ELEM(pred2.data, y, x) = sn * c * vx; DIRECT_A2D_ELEM(obs2.data, y, x) = sn * vy; } FourierTransformer ft0, ft1; ft0.inverseFourierTransform(pred2.data, realPred.data); ft1.inverseFourierTransform(obs2.data, realObs.data); Volume grad(ww,h,1); FilterHelper::centralGrad2D(realPred, grad); for (long y = 0; y < h; y++) for (long x = 0; x < ww; x++) { double vx = DIRECT_A2D_ELEM(realPred.data, y, x); double vy = DIRECT_A2D_ELEM(realObs.data, y, x); gravis::d2Vector g = grad(x,y,0); eqs(x,y,0).Axx += g.x * g.x; eqs(x,y,0).Axy += g.x * g.y; eqs(x,y,0).Ayy += g.y * g.y; eqs(x,y,0).bx += g.x * (vy - vx); eqs(x,y,0).by += g.y * (vy - vx); } } void MagnificationHelper::solvePerPixel( const Volume &eqs, Image &vx, Image &vy) { const long w = eqs.dimx; const long h = eqs.dimy; vx = Image(w,h); vy = Image(w,h); for (long y = 0; y < h; y++) for (long x = 0; x < w; x++) { Equation2x2 eq = eqs(x,y,0); gravis::d2Vector b(eq.bx, eq.by); gravis::d2Matrix A; A(0,0) = eq.Axx; A(0,1) = eq.Axy; A(1,0) = eq.Axy; A(1,1) = eq.Ayy; double det = A(0,0)*A(1,1) - A(0,1)*A(1,0); if (det == 0.0) { DIRECT_A2D_ELEM(vx.data, y, x) = 0.0; DIRECT_A2D_ELEM(vy.data, y, x) = 0.0; } else { gravis::d2Matrix Ai = A; Ai.invert(); gravis::d2Vector xx = Ai * b; DIRECT_A2D_ELEM(vx.data, y, x) = xx.x; DIRECT_A2D_ELEM(vy.data, y, x) = xx.y; } } } Matrix2D MagnificationHelper::solveLinearlyFreq( const Volume &eqs, const Image& snr, Image &vx, Image &vy) { Matrix2D mat(2,2); const long w = eqs.dimx; const long h = eqs.dimy; vx = Image(w,h); vy = Image(w,h); d4Vector b(0.0, 0.0, 0.0, 0.0); d4Matrix A(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0); for (long yi = 1; yi < h-1; yi++) for (long xi = 1; xi < w-1; xi++) { Equation2x2 eq = eqs(xi,yi,0); double wgh = DIRECT_A2D_ELEM(snr.data, yi, xi); const double x = xi; const double y = yi < w? yi : (yi - h); A(0,0) += wgh * x * x * eq.Axx; A(0,1) += wgh * x * y * eq.Axx; A(0,2) += wgh * x * x * eq.Axy; A(0,3) += wgh * x * y * eq.Axy; A(1,0) += wgh * x * y * eq.Axx; A(1,1) += wgh * y * y * eq.Axx; A(1,2) += wgh * x * y * eq.Axy; A(1,3) += wgh * y * y * eq.Axy; A(2,0) += wgh * x * x * eq.Axy; A(2,1) += wgh * x * y * eq.Axy; A(2,2) += wgh * x * x * eq.Ayy; A(2,3) += wgh * x * y * eq.Ayy; A(3,0) += wgh * x * y * eq.Axy; A(3,1) += wgh * y * y * eq.Axy; A(3,2) += wgh * x * y * eq.Ayy; A(3,3) += wgh * y * y * eq.Ayy; b[0] += wgh * x * eq.bx; b[1] += wgh * y * eq.bx; b[2] += wgh * x * eq.by; b[3] += wgh * y * eq.by; } d4Matrix Ai = A; Ai.invert(); d4Vector opt = Ai * b; mat(0,0) = opt[0] + 1.0; mat(0,1) = opt[1]; mat(1,0) = opt[2]; mat(1,1) = opt[3] + 1.0; // std::cout << opt[0] << ", " << opt[1] << "\n" // << opt[2] << ", " << opt[3] << "\n"; for (long yi = 0; yi < h; yi++) for (long xi = 0; xi < w; xi++) { const double x = xi; const double y = yi < w? yi : (yi - h); DIRECT_A2D_ELEM(vx.data, yi, xi) = opt[0] * x + opt[1] * y; DIRECT_A2D_ELEM(vy.data, yi, xi) = opt[2] * x + opt[3] * y; } return mat; } void MagnificationHelper::readEQs(std::string path, Volume &eqs) { Image Axx, Axy, Ayy, bx, by; Axx.read(path+"_Axx.mrc"); Axy.read(path+"_Axy.mrc"); Ayy.read(path+"_Ayy.mrc"); bx.read(path+"_bx.mrc"); by.read(path+"_by.mrc"); const long w = Axx.data.xdim; const long h = Axx.data.ydim; eqs.resize(w,h,1); for (long y = 0; y < h; y++) for (long x = 0; x < w; x++) { eqs(x,y,0).Axx = DIRECT_A2D_ELEM(Axx.data, y, x); eqs(x,y,0).Axy = DIRECT_A2D_ELEM(Axy.data, y, x); eqs(x,y,0).Ayy = DIRECT_A2D_ELEM(Ayy.data, y, x); eqs(x,y,0).bx = DIRECT_A2D_ELEM(bx.data, y, x); eqs(x,y,0).by = DIRECT_A2D_ELEM(by.data, y, x); } } void MagnificationHelper::writeEQs(const Volume &eqs, std::string path) { const long w = eqs.dimx; const long h = eqs.dimy; Image Axx(w,h); Image Axy(w,h); Image Ayy(w,h); Image bx(w,h); Image by(w,h); for (long y = 0; y < h; y++) for (long x = 0; x < w; x++) { Equation2x2 eq = eqs(x,y,0); DIRECT_A2D_ELEM(Axx.data, y, x) = eq.Axx; DIRECT_A2D_ELEM(Axy.data, y, x) = eq.Axy; DIRECT_A2D_ELEM(Ayy.data, y, x) = eq.Ayy; DIRECT_A2D_ELEM(bx.data, y, x) = eq.bx; DIRECT_A2D_ELEM(by.data, y, x) = eq.by; } Axx.write(path+"_Axx.mrc"); Axy.write(path+"_Axy.mrc"); Ayy.write(path+"_Ayy.mrc"); bx.write(path+"_bx.mrc"); by.write(path+"_by.mrc"); } void MagnificationHelper::updatePowSpec( const Image &prediction, const Image &observation, CTF &ctf, double angpix, Image &powSpecPred, Image &powSpecObs, bool do_ctf_padding) { const long w = prediction.data.xdim; const long h = prediction.data.ydim; Image ctfImg(w,h); ctf.getFftwImage(ctfImg(), h, h, angpix, false, false, false, true, do_ctf_padding); for (long y = 0; y < h; y++) for (long x = 0; x < w; x++) { const double xf = x; const double yf = y < w? y : y - h; Complex vx = DIRECT_A2D_ELEM(prediction.data, y, x); Complex vy = DIRECT_A2D_ELEM(observation.data, y, x); double c = ctfImg(y,x); DIRECT_A2D_ELEM(powSpecPred.data, y, x) += (c * vx).abs(); DIRECT_A2D_ELEM(powSpecObs.data, y, x) += vy.abs(); } } void MagnificationHelper::adaptAstigmatism( const std::vector>& dMs, std::vector& partMdts, bool perParticle, ObservationModel* obsModel) { const long int mc = partMdts.size(); const int ogc = dMs.size(); std::vector M(ogc), Mi(ogc), Mit(ogc); for (int og = 0; og < ogc; og++) { M[og] = d2Matrix(dMs[og](0,0), dMs[og](0,1), dMs[og](1,0), dMs[og](1,1)); Mi[og] = M[og]; Mi[og].invert(); Mit[og] = Mi[og]; Mit[og].transpose(); } for (long int m = 0; m < mc; m++) { const int pc = partMdts[m].numberOfObjects(); std::vector A(pc), D(pc), Q(pc); for (long int p = 0; p < pc; p++) { double deltafU, deltafV, phiDeg; partMdts[m].getValue(EMDL_CTF_DEFOCUSU, deltafU, p); partMdts[m].getValue(EMDL_CTF_DEFOCUSV, deltafV, p); partMdts[m].getValue(EMDL_CTF_DEFOCUS_ANGLE, phiDeg, p); const double phi = DEG2RAD(phiDeg); const double si = sin(phi); const double co = cos(phi); Q[p] = d2Matrix(co, si, -si, co); D[p] = d2Matrix(-deltafU, 0.0, 0.0, -deltafV); d2Matrix Qt(co, -si, si, co); A[p] = Qt * D[p] * Q[p]; } if (perParticle) { for (long int p = 0; p < pc; p++) { int og; partMdts[m].getValue(EMDL_IMAGE_OPTICS_GROUP, og, p); og--; d2Matrix A2 = Mit[og] * A[p] * Mi[og]; RFLOAT deltafU_neg, deltafV_neg, phiDeg; matrixToPolar(A2, deltafU_neg, deltafV_neg, phiDeg); partMdts[m].setValue(EMDL_CTF_DEFOCUSU, -deltafU_neg, p); partMdts[m].setValue(EMDL_CTF_DEFOCUSV, -deltafV_neg, p); partMdts[m].setValue(EMDL_CTF_DEFOCUS_ANGLE, phiDeg, p); } } else // keep difference between deltafU and deltafV, as well as the azimuth angle, // constant for all particles in the same micrograph and optics group { std::vector optGroups = obsModel->getOptGroupsPresent_oneBased(partMdts[m]); const int cc = optGroups.size(); std::vector groupToIndex(obsModel->numberOfOpticsGroups()+1, -1); for (int i = 0; i < cc; i++) { groupToIndex[optGroups[i]] = i; } for (int c = 0; c < cc; c++) { const int og = optGroups[c] - 1; d2Matrix A_mean(0.0, 0.0, 0.0, 0.0); for (long int p = 0; p < pc; p++) { int ogp; partMdts[m].getValue(EMDL_IMAGE_OPTICS_GROUP, ogp, p); ogp--; if (ogp == og) { A_mean += A[p] * (1.0/(double)pc); } } A_mean = Mit[og] * A_mean * Mi[og]; double deltafU_mean_neg, deltafV_mean_neg, co, si; dsyev2(A_mean(0,0), A_mean(1,0), A_mean(1,1), &deltafU_mean_neg, &deltafV_mean_neg, &co, &si); d2Matrix Q2(co, si, -si, co); d2Matrix Qt2(co, -si, si, co); double meanDef_mean = 0.5 * (deltafU_mean_neg + deltafV_mean_neg); for (long int p = 0; p < pc; p++) { int ogp; partMdts[m].getValue(EMDL_IMAGE_OPTICS_GROUP, ogp, p); ogp--; if (ogp == og) { d2Matrix Ap2 = Mit[og] * A[p] * Mi[og]; double deltafU_p_neg, deltafV_p_neg, cop, sip; dsyev2(Ap2(0,0), Ap2(1,0), Ap2(1,1), &deltafU_p_neg, &deltafV_p_neg, &cop, &sip); double meanDef_p = 0.5 * (deltafU_p_neg + deltafV_p_neg); d2Matrix Dp2(deltafU_mean_neg - meanDef_mean + meanDef_p, 0.0, 0.0, deltafV_mean_neg - meanDef_mean + meanDef_p); d2Matrix Apa2 = Qt2 * Dp2 * Q2; RFLOAT deltafU_pa_neg, deltafV_pa_neg, phiDeg; matrixToPolar(Apa2, deltafU_pa_neg, deltafV_pa_neg, phiDeg); partMdts[m].setValue(EMDL_CTF_DEFOCUSU, -deltafU_pa_neg, p); partMdts[m].setValue(EMDL_CTF_DEFOCUSV, -deltafV_pa_neg, p); partMdts[m].setValue(EMDL_CTF_DEFOCUS_ANGLE, phiDeg, p); } } } } } } Equation2x2::Equation2x2() : Axx(0.0), Axy(0.0), Ayy(0.0), bx(0.0), by(0.0) { } relion-3.1.3/src/jaz/ctf/magnification_helper.h000066400000000000000000000057571411340063500214700ustar00rootroot00000000000000/*************************************************************************** * * Author: "Jasenko Zivanov" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #ifndef MAGNIFICATION_REFINEMENT_H #define MAGNIFICATION_REFINEMENT_H #include #include #include #include #include #include #include #include "equation2x2.h" class MagnificationHelper { public: static Matrix2D polarToMatrix( double scaleMajor = 1.0, double scaleMinor = 1.0, double angleDeg = 0.0); static void matrixToPolar( const Matrix2D& mat, RFLOAT& scaleMajor, RFLOAT& scaleMinor, RFLOAT& angleDeg); static void matrixToPolar( const gravis::d2Matrix& mat, RFLOAT& scaleMajor, RFLOAT& scaleMinor, RFLOAT& angleDeg); static void updateScaleFreq( const Image& prediction, const Volume>& predGradient, const Image& observation, CTF& ctf, double angpix, Volume& eqs, bool do_ctf_padding = false); static void updateScaleReal( const Image& prediction, const Image& observation, const Image& snr, CTF& ctf, double angpix, Volume& eqs, bool do_ctf_padding = false); static void solvePerPixel( const Volume& eqs, Image& vx, Image& vy); static Matrix2D solveLinearlyFreq( const Volume& eqs, const Image& snr, Image& vx, Image& vy); static void readEQs(std::string path, Volume& eqs); static void writeEQs(const Volume& eqs, std::string path); static void updatePowSpec( const Image& prediction, const Image& observation, CTF& ctf, double angpix, Image& powSpecPred, Image& powSpecObs, bool do_ctf_padding = false); static void adaptAstigmatism( const std::vector>& dMs, std::vector& partMdts, bool perParticle, ObservationModel* obsModel); }; #endif relion-3.1.3/src/jaz/ctf/modular_ctf_optimisation.cpp000066400000000000000000000407221411340063500227410ustar00rootroot00000000000000#include "modular_ctf_optimisation.h" #include #include #include using namespace gravis; #define DATA_PAD 512 ModularCtfOptimisation::ModularCtfOptimisation( MetaDataTable &mdt, ObservationModel* obsModel, const std::vector>& obs, const std::vector>& pred, const std::vector>& frqWghByGroup, std::string modeStr, int num_treads) : mdt(mdt), obsModel(obsModel), obs(obs), pred(pred), particle_count(mdt.numberOfObjects()), num_treads(num_treads), frqWghByGroup(frqWghByGroup) { initialValues.resize(CtfParamCount * particle_count); for (int p = 0; p < particle_count; p++) { CTF ctf; ctf.readByGroup(mdt, obsModel, p); std::vector K = ctf.getK(); const double Axx = K[1] * ctf.getAxx(); const double Axy = K[1] * ctf.getAxy(); const double Ayy = K[1] * ctf.getAyy(); const double avgDz = (Axx + Ayy) / 2.0; const double bfac = ctf.Bfac; const double kfac = ctf.scale; initialValues[CtfParamCount * p + Phase] = -K[5] - K[3]; initialValues[CtfParamCount * p + Defocus] = avgDz; initialValues[CtfParamCount * p + Astigmatism1] = Axx - avgDz; initialValues[CtfParamCount * p + Astigmatism2] = Axy; initialValues[CtfParamCount * p + SphericalAberration] = K[2]; initialValues[CtfParamCount * p + BFactor] = bfac; initialValues[CtfParamCount * p + ScaleFactor] = kfac; } modes = decodeModes(modeStr); int currOff = 0; for (int i = 0; i < CtfParamCount; i++) { switch (modes[i]) { case PerParticle: { paramOffset[i] = currOff; paramParticleStep[i] = 1; currOff += particle_count; break; } case PerMicrograph: { paramOffset[i] = currOff; paramParticleStep[i] = 0; currOff += 1; break; } case Fixed: { paramOffset[i] = 0; paramParticleStep[i] = 0; break; } } } param_count = currOff; angpix = obsModel->getPixelSizes(); if (obsModel->hasEvenZernike) { std::vector ogPres = obsModel->getOptGroupsPresent_zeroBased(mdt); aberrationByGroup.resize(obsModel->numberOfOpticsGroups()); for (int ogpi = 0; ogpi < ogPres.size(); ogpi++) { const int og = ogPres[ogpi]; aberrationByGroup[og] = obsModel->getGammaOffset(og, obsModel->getBoxSize(og)); } } } double ModularCtfOptimisation::f(const std::vector &x) const { double out = 0.0; for (int p = 0; p < particle_count; p++) { const double ph = readParam(Phase, x, p); const double dz = readParam(Defocus, x, p); const double a1 = readParam(Astigmatism1, x, p); const double a2 = readParam(Astigmatism2, x, p); const double cs = readParam(SphericalAberration, x, p); const double bf = readParam(BFactor, x, p); const double kf = readParam(ScaleFactor, x, p); const int s = obs[p].data.ydim; const int sh = s/2 + 1; const int og = obsModel->getOpticsGroup(mdt, p); const double as = s * angpix[og]; for (int yi = 0; yi < s; yi++) for (int xi = 0; xi < sh; xi++) { const double wght = (xi == 0)? 1.0 : 2.0; const double xx = xi/as; const double yy = (yi <= s/2)? yi/as : (yi - s)/as; double xu, yu; if (obsModel->hasMagMatrices) { const Matrix2D& M = obsModel->getMagMatrix(og); xu = M(0,0) * xx + M(0,1) * yy; yu = M(1,0) * xx + M(1,1) * yy; } else { xu = xx; yu = yy; } const double u2 = xu * xu + yu * yu; const double u4 = u2 * u2; const double gammaOffset = obsModel->hasEvenZernike? aberrationByGroup[og](yi,xi) : 0.0; const double freqWgh = frqWghByGroup[og](yi,xi); const double gamma = ph + (dz + a1) * xu * xu + 2.0 * a2 * xu * yu + (dz - a1) * yu * yu + cs * u4 + gammaOffset; const double ctfVal = kf * exp(-bf * u2 / 4.0) * (-sin(gamma)); out += freqWgh * wght * ( obs[p](yi,xi) - ctfVal * pred[p](yi,xi) ).norm(); } } return out; } double ModularCtfOptimisation::f(const std::vector &x, void *tempStorage) const { if (tempStorage == 0) return f(x); std::vector* out = (std::vector*) tempStorage; const int stride = param_count + DATA_PAD; #pragma omp parallel for num_threads(num_treads) for (int t = 0; t < num_treads; t++) { (*out)[t*stride] = 0.0; } #pragma omp parallel for num_threads(num_treads) for (int p = 0; p < particle_count; p++) { int t = omp_get_thread_num(); const double ph = readParam(Phase, x, p); const double dz = readParam(Defocus, x, p); const double a1 = readParam(Astigmatism1, x, p); const double a2 = readParam(Astigmatism2, x, p); const double cs = readParam(SphericalAberration, x, p); const double bf = readParam(BFactor, x, p); const double kf = readParam(ScaleFactor, x, p); const int s = obs[p].data.ydim; const int sh = s/2 + 1; const int og = obsModel->getOpticsGroup(mdt, p); const double as = s * angpix[og]; for (int yi = 0; yi < s; yi++) for (int xi = 0; xi < sh; xi++) { const double wght = (xi == 0)? 1.0 : 2.0; const double xx = xi/as; const double yy = (yi <= s/2)? yi/as : (yi - s)/as; double xu, yu; if (obsModel->hasMagMatrices) { const Matrix2D& M = obsModel->getMagMatrix(og); xu = M(0,0) * xx + M(0,1) * yy; yu = M(1,0) * xx + M(1,1) * yy; } else { xu = xx; yu = yy; } const double u2 = xu * xu + yu * yu; const double u4 = u2 * u2; const double gammaOffset = obsModel->hasEvenZernike? aberrationByGroup[og](yi,xi) : 0.0; const double freqWgh = frqWghByGroup[og](yi,xi); const double gamma = ph + (dz + a1) * xu * xu + 2.0 * a2 * xu * yu + (dz - a1) * yu * yu + cs * u4 + gammaOffset; const double ctfVal = kf * exp(-bf * u2 / 4.0) * (-sin(gamma)); (*out)[t*stride] += freqWgh * wght * ( obs[p](yi,xi) - ctfVal * pred[p](yi,xi) ).norm(); } } double out2 = 0.0; for (int t = 0; t < num_treads; t++) { out2 += (*out)[t*stride]; } return out2; } void ModularCtfOptimisation::grad( const std::vector &x, std::vector &gradDest) const { for (int i = 0; i < gradDest.size(); i++) { gradDest[i] = 0.0; } for (int p = 0; p < particle_count; p++) { const double ph = readParam(Phase, x, p); const double dz = readParam(Defocus, x, p); const double a1 = readParam(Astigmatism1, x, p); const double a2 = readParam(Astigmatism2, x, p); const double cs = readParam(SphericalAberration, x, p); const double bf = readParam(BFactor, x, p); const double kf = readParam(ScaleFactor, x, p); const int s = obs[p].data.ydim; const int sh = s/2 + 1; const int og = obsModel->getOpticsGroup(mdt, p); const double as = s * angpix[og]; for (int yi = 0; yi < s; yi++) for (int xi = 0; xi < sh; xi++) { const double wght = (xi == 0)? 1.0 : 2.0; const double xx = xi/as; const double yy = (yi <= s/2)? yi/as : (yi - s)/as; double xu, yu; if (obsModel->hasMagMatrices) { const Matrix2D& M = obsModel->getMagMatrix(og); xu = M(0,0) * xx + M(0,1) * yy; yu = M(1,0) * xx + M(1,1) * yy; } else { xu = xx; yu = yy; } const double u2 = xu * xu + yu * yu; const double u4 = u2 * u2; const double gammaOffset = obsModel->hasEvenZernike? aberrationByGroup[og](yi,xi) : 0.0; const double freqWgh = frqWghByGroup[og](yi,xi); const double gamma = ph + (dz + a1) * xu * xu + 2.0 * a2 * xu * yu + (dz - a1) * yu * yu + cs * u4 + gammaOffset; const double env = exp(-bf * u2 / 4.0); const double wave = -sin(gamma); const double ctfVal = kf * env * wave; const Complex z_obs = obs[p](yi,xi); const Complex z_pred = pred[p](yi,xi); const Complex z_err = ctfVal * z_pred - z_obs; const double dE_dCtfVal = 2.0 * freqWgh * wght * ( z_err.real * z_pred.real + z_err.imag * z_pred.imag); const double dE_dGamma = dE_dCtfVal * kf * env * (-cos(gamma)); const double dE_dPh = dE_dGamma; const double dE_dDz = dE_dGamma * (xu * xu + yu * yu); const double dE_dA1 = dE_dGamma * (xu * xu - yu * yu); const double dE_dA2 = dE_dGamma * 2.0 * xu * yu; const double dE_dCs = dE_dGamma * u4; const double dE_dBf = dE_dCtfVal * kf * wave * exp(-bf * u2 / 4.0) * (-u2/4.0); const double dE_dKf = dE_dCtfVal * env * wave; if (modes[Phase] != Fixed) { gradDest[paramOffset[Phase] + p * paramParticleStep[Phase]] += dE_dPh; } if (modes[Defocus] != Fixed) { gradDest[paramOffset[Defocus] + p * paramParticleStep[Defocus]] += dE_dDz; } if (modes[Astigmatism1] != Fixed) { gradDest[paramOffset[Astigmatism1] + p * paramParticleStep[Astigmatism1]] += dE_dA1; } if (modes[Astigmatism2] != Fixed) { gradDest[paramOffset[Astigmatism2] + p * paramParticleStep[Astigmatism2]] += dE_dA2; } if (modes[SphericalAberration] != Fixed) { gradDest[paramOffset[SphericalAberration] + p * paramParticleStep[SphericalAberration]] += dE_dCs; } if (modes[BFactor] != Fixed) { gradDest[paramOffset[BFactor] + p * paramParticleStep[BFactor]] += dE_dBf; } if (modes[ScaleFactor] != Fixed) { gradDest[paramOffset[ScaleFactor] + p * paramParticleStep[ScaleFactor]] += dE_dKf; } } } } void ModularCtfOptimisation::grad( const std::vector &x, std::vector &gradDest, void *tempStorage) const { if (tempStorage == 0) { grad(x, gradDest); return; } std::vector* out = (std::vector*) tempStorage; const int stride = param_count + DATA_PAD; #pragma omp parallel for num_threads(num_treads) for (int t = 0; t < num_treads; t++) for (int i = 0; i < param_count; i++) { (*out)[t*stride + i] = 0.0; } #pragma omp parallel for num_threads(num_treads) for (int p = 0; p < particle_count; p++) { int t = omp_get_thread_num(); const double ph = readParam(Phase, x, p); const double dz = readParam(Defocus, x, p); const double a1 = readParam(Astigmatism1, x, p); const double a2 = readParam(Astigmatism2, x, p); const double cs = readParam(SphericalAberration, x, p); const double bf = readParam(BFactor, x, p); const double kf = readParam(ScaleFactor, x, p); const int s = obs[p].data.ydim; const int sh = s/2 + 1; const int og = obsModel->getOpticsGroup(mdt, p); const double as = s * angpix[og]; for (int yi = 0; yi < s; yi++) for (int xi = 0; xi < sh; xi++) { const double wght = (xi == 0)? 1.0 : 2.0; const double xx = xi/as; const double yy = (yi <= s/2)? yi/as : (yi - s)/as; double xu, yu; if (obsModel->hasMagMatrices) { const Matrix2D& M = obsModel->getMagMatrix(og); xu = M(0,0) * xx + M(0,1) * yy; yu = M(1,0) * xx + M(1,1) * yy; } else { xu = xx; yu = yy; } const double u2 = xu * xu + yu * yu; const double u4 = u2 * u2; const double gammaOffset = obsModel->hasEvenZernike? aberrationByGroup[og](yi,xi) : 0.0; const double freqWgh = frqWghByGroup[og](yi,xi); const double gamma = ph + (dz + a1) * xu * xu + 2.0 * a2 * xu * yu + (dz - a1) * yu * yu + cs * u4 + gammaOffset; const double env = exp(-bf * u2 / 4.0); const double wave = -sin(gamma); const double ctfVal = kf * env * wave; const Complex z_obs = obs[p](yi,xi); const Complex z_pred = pred[p](yi,xi); const Complex z_err = ctfVal * z_pred - z_obs; const double dE_dCtfVal = 2.0 * freqWgh * wght * ( z_err.real * z_pred.real + z_err.imag * z_pred.imag); const double dE_dGamma = dE_dCtfVal * kf * env * (-cos(gamma)); const double dE_dPh = dE_dGamma; const double dE_dDz = dE_dGamma * (xu * xu + yu * yu); const double dE_dA1 = dE_dGamma * (xu * xu - yu * yu); const double dE_dA2 = dE_dGamma * 2.0 * xu * yu; const double dE_dCs = dE_dGamma * u4; const double dE_dBf = dE_dCtfVal * kf * wave * env * (-u2/4.0); const double dE_dKf = dE_dCtfVal * env * wave; if (modes[Phase] != Fixed) { (*out)[t*stride + paramOffset[Phase] + p * paramParticleStep[Phase]] += dE_dPh; } if (modes[Defocus] != Fixed) { (*out)[t*stride + paramOffset[Defocus] + p * paramParticleStep[Defocus]] += dE_dDz; } if (modes[Astigmatism1] != Fixed) { (*out)[t*stride + paramOffset[Astigmatism1] + p * paramParticleStep[Astigmatism1]] += dE_dA1; } if (modes[Astigmatism2] != Fixed) { (*out)[t*stride + paramOffset[Astigmatism2] + p * paramParticleStep[Astigmatism2]] += dE_dA2; } if (modes[SphericalAberration] != Fixed) { (*out)[t*stride + paramOffset[SphericalAberration] + p * paramParticleStep[SphericalAberration]] += dE_dCs; } if (modes[BFactor] != Fixed) { (*out)[t*stride + paramOffset[BFactor] + p * paramParticleStep[BFactor]] += dE_dBf; } if (modes[ScaleFactor] != Fixed) { (*out)[t*stride + paramOffset[ScaleFactor] + p * paramParticleStep[ScaleFactor]] += dE_dKf; } } } for (int i = 0; i < param_count; i++) { gradDest[i] = 0.0; for (int t = 0; t < num_treads; t++) { gradDest[i] += (*out)[t*stride + i]; } } } void *ModularCtfOptimisation::allocateTempStorage() const { return new std::vector((param_count + DATA_PAD) * num_treads); } void ModularCtfOptimisation::deallocateTempStorage(void *ts) const { delete (std::vector*) ts; } std::vector ModularCtfOptimisation::encodeInitial() { std::vector out(param_count, 0.0); for (int cp = 0; cp < CtfParamCount; cp++) { switch (modes[cp]) { case PerParticle: { for (int p = 0; p < particle_count; p++) { out[paramOffset[cp] + p] = initialValues[CtfParamCount * p + cp]; } break; } case PerMicrograph: { out[paramOffset[cp]] = initialValues[cp]; // returning value of first particle break; } case Fixed: { break; } } } return out; } void ModularCtfOptimisation::writeToTable(const std::vector &x) { for (int p = 0; p < particle_count; p++) { const double ph = readParam(Phase, x, p); const double dz = readParam(Defocus, x, p); const double a1 = readParam(Astigmatism1, x, p); const double a2 = readParam(Astigmatism2, x, p); const double cs = readParam(SphericalAberration, x, p); const double bf = readParam(BFactor, x, p); const double kf = readParam(ScaleFactor, x, p); CTF ctf; ctf.readByGroup(mdt, obsModel, p); std::vector K = ctf.getK(); d2Matrix A(-(dz+a1)/ K[1], -a2 / K[1], -a2 / K[1], -(dz-a1) / K[1]); RFLOAT defocusU, defocusV, angleDeg; MagnificationHelper::matrixToPolar(A, defocusU, defocusV, angleDeg); RFLOAT local_kV = ctf.kV * 1e3; const double lambda = 12.2643247 / sqrt(local_kV * (1. + local_kV * 0.978466e-6)); // ph = -K[5] - K[3] = -DEG2RAD(phase_shift) - atan(Q0/sqrt(1-Q0*Q0)); // => phase_shift = RAD2DEG(-ph - K[3]) if (modes[Phase] != Fixed) mdt.setValue(EMDL_CTF_PHASESHIFT, RAD2DEG(-ph - K[3]), p); mdt.setValue(EMDL_CTF_DEFOCUSU, defocusU, p); mdt.setValue(EMDL_CTF_DEFOCUSV, defocusV, p); mdt.setValue(EMDL_CTF_DEFOCUS_ANGLE, angleDeg, p); // cs = K[2] = (PI / 2) * C_s * 1e7 * lambda^3 // => C_s = cs * (2/PI) / (1e7 / lambda^3) if (modes[SphericalAberration] != Fixed) mdt.setValue(EMDL_CTF_CS, 2 * cs / (1e7 * PI * lambda*lambda*lambda), p); if (modes[BFactor] != Fixed) mdt.setValue(EMDL_CTF_BFACTOR, bf, p); if (modes[ScaleFactor] != Fixed) mdt.setValue(EMDL_CTF_SCALEFACTOR, kf, p); } } bool ModularCtfOptimisation::validateModeString(std::string mode) { if (mode.length() != 5) return false; for (int i = 0; i < 5; i++) { if (mode[i] != 'p' && mode[i] != 'm' && mode[i] != 'f') { return false; } } return true; } std::vector ModularCtfOptimisation::decodeModes(std::string s) { std::vector out(CtfParamCount); std::vector charToMode(256, Fixed); charToMode[(unsigned char)'p'] = PerParticle; charToMode[(unsigned char)'m'] = PerMicrograph; out[Phase] = charToMode[(unsigned char)s[0]]; out[Defocus] = charToMode[(unsigned char)s[1]]; out[Astigmatism1] = charToMode[(unsigned char)s[2]]; out[Astigmatism2] = charToMode[(unsigned char)s[2]]; out[SphericalAberration] = charToMode[(unsigned char)s[3]]; out[BFactor] = charToMode[(unsigned char)s[4]]; out[ScaleFactor] = charToMode[(unsigned char)s[4]]; return out; } relion-3.1.3/src/jaz/ctf/modular_ctf_optimisation.h000066400000000000000000000052631411340063500224070ustar00rootroot00000000000000#ifndef DIFF_CTF_OPTIMISATION #define DIFF_CTF_OPTIMISATION #include #include #include #include #include class ModularCtfOptimisation : public DifferentiableOptimization { public: typedef enum { PerParticle = 0, PerMicrograph = 1, Fixed = 2, ModeCount = 3 } Mode; typedef enum { Phase = 0, Defocus = 1, Astigmatism1 = 2, Astigmatism2 = 3, SphericalAberration = 4, BFactor = 5, ScaleFactor = 6, CtfParamCount = 7 } CtfParam; ModularCtfOptimisation( MetaDataTable& mdt, ObservationModel* obsModel, const std::vector>& obs, const std::vector>& pred, const std::vector>& frqWghByGroup, std::string modeStr, // <- five characters (from {p,m,f}) indicating whether int num_treads); // the phase, defocus, astigmatism, Cs and B/k (in this order) // are to be estimated per [p]article, per [m]icrograph or // to be kept [f]ixed. double f(const std::vector &x) const; double f(const std::vector &x, void *tempStorage) const; void grad(const std::vector &x, std::vector &gradDest) const; void grad(const std::vector &x, std::vector &gradDest, void *tempStorage) const; void* allocateTempStorage() const; void deallocateTempStorage(void* ts) const; std::vector encodeInitial(); void writeToTable(const std::vector &x); static bool validateModeString(std::string mode); static std::vector decodeModes(std::string s); protected: MetaDataTable& mdt; ObservationModel* obsModel; const std::vector>& obs; const std::vector>& pred; int particle_count, param_count, num_treads; std::vector modes; double paramScale[CtfParamCount]; std::vector initialValues, angpix; int paramOffset[CtfParamCount], paramParticleStep[CtfParamCount]; std::vector> aberrationByGroup; const std::vector>& frqWghByGroup; inline double readParam(CtfParam param, const std::vector &x, int p) const; }; inline double ModularCtfOptimisation::readParam( CtfParam param, const std::vector &x, int p) const { if (modes[param] == Fixed) { return initialValues[CtfParamCount * p + param]; } else { return x[paramOffset[param] + p * paramParticleStep[param]]; } } #endif relion-3.1.3/src/jaz/ctf/tilt_estimator.cpp000066400000000000000000000267461411340063500207200ustar00rootroot00000000000000/*************************************************************************** * * Author: "Jasenko Zivanov" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #include "tilt_estimator.h" #include "tilt_helper.h" #include "ctf_refiner.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include using namespace gravis; TiltEstimator::TiltEstimator() : ready(false) {} void TiltEstimator::read(IOParser &parser, int argc, char *argv[]) { kmin = textToFloat(parser.getOption("--kmin_tilt", "Inner freq. threshold for beamtilt estimation [Å]", "20.0")); std::string aberrToken = "--odd_aberr_max_n"; aberr_n_max = textToInteger(parser.getOption(aberrToken, "Maximum degree of Zernike polynomials used to fit odd (i.e. antisymmetrical) aberrations", "0")); xring0 = textToDouble(parser.getOption("--xr0_t", "Exclusion ring start [Å] - use to exclude dominant frequency (e.g. for helices)", "-1")); xring1 = textToDouble(parser.getOption("--xr1_t", "Exclusion ring end [Å]", "-1")); } void TiltEstimator::init( int verb, int nr_omp_threads, bool debug, bool diag, std::string outPath, ReferenceMap* reference, ObservationModel* obsModel) { this->verb = verb; this->nr_omp_threads = nr_omp_threads; this->debug = debug; this->diag = diag; this->outPath = outPath; this->reference = reference; this->obsModel = obsModel; angpix = obsModel->getPixelSizes(); obsModel->getBoxSizes(s, sh); ready = true; } void TiltEstimator::processMicrograph( long g, MetaDataTable& mdt, const std::vector>& obs, const std::vector>& pred, bool do_ctf_padding) { if (!ready) { REPORT_ERROR("ERROR: TiltEstimator::processMicrograph: TiltEstimator not initialized."); } std::vector>> particlesByOpticsGroup = obsModel->splitParticlesByOpticsGroup(mdt); for (int pog = 0; pog < particlesByOpticsGroup.size(); pog++) { const int og = particlesByOpticsGroup[pog].first; const std::vector& partIndices = particlesByOpticsGroup[pog].second; // TODO: SHWS 29mar2018: when data is CTF-premultiplied: do we need to change updateTiltShift?? if (obsModel->getCtfPremultiplied(og)) std::cerr << "TODO: check tilt estimation with CTF-premultiplied data!!" << std::endl; const int pc = partIndices.size(); std::vector> xyAcc(nr_omp_threads); std::vector> wAcc(nr_omp_threads); for (int i = 0; i < nr_omp_threads; i++) { xyAcc[i] = Image(sh[og],s[og]); xyAcc[i].data.initZeros(); wAcc[i] = Image(sh[og],s[og]); wAcc[i].data.initZeros(); } #pragma omp parallel for num_threads(nr_omp_threads) for (long pp = 0; pp < pc; pp++) { const int p = partIndices[pp]; CTF ctf; ctf.readByGroup(mdt, obsModel, p); int threadnum = omp_get_thread_num(); TiltHelper::updateTiltShift( pred[p], obs[p], ctf, angpix[og], xyAcc[threadnum], wAcc[threadnum], do_ctf_padding); } // Combine the accumulated weights from all threads for this subset, // store weighted sums in xyAccSum and wAccSum Image xyAccSum(sh[og], s[og]); Image wAccSum(sh[og], s[og]); for (int threadnum = 0; threadnum < nr_omp_threads; threadnum++) { ImageOp::linearCombination(xyAccSum, xyAcc[threadnum], 1.0, 1.0, xyAccSum); ImageOp::linearCombination(wAccSum, wAcc[threadnum], 1.0, 1.0, wAccSum); } // Write out the intermediate results for this micrograph: std::string outRoot = CtfRefiner::getOutputFilenameRoot(mdt, outPath); std::stringstream sts; sts << (og+1); ComplexIO::write(xyAccSum(), outRoot + "_xyAcc_optics-group_" + sts.str(), ".mrc"); wAccSum.write(outRoot+"_wAcc_optics-group_" + sts.str() + ".mrc"); } } void TiltEstimator::parametricFit( const std::vector& mdts, MetaDataTable& optOut, std::vector &fn_eps) { if (!ready) { REPORT_ERROR("ERROR: TiltEstimator::parametricFit: TiltEstimator not initialized."); } if (verb > 0) { std::cout << " + Fitting beam tilt ..." << std::endl; } const int gc = mdts.size(); const int ogc = obsModel->numberOfOpticsGroups(); std::vector groupUsed(ogc, false); #pragma omp parallel for num_threads(nr_omp_threads) for (int og = 0; og < ogc; og++) { double Cs = obsModel->getSphericalAberration(og); double lambda = obsModel->getWavelength(og); std::stringstream sts; sts << (og+1); std::string ogstr = sts.str(); Image xyAccSum(sh[og], s[og]); Image wAccSum(sh[og], s[og]); xyAccSum.data.initZeros(); wAccSum.data.initZeros(); for (long g = 0; g < gc; g++) { std::string outRoot = CtfRefiner::getOutputFilenameRoot(mdts[g], outPath); if ( exists(outRoot+"_xyAcc_optics-group_"+ogstr+"_real.mrc") && exists(outRoot+"_xyAcc_optics-group_"+ogstr+"_imag.mrc") && exists(outRoot+ "_wAcc_optics-group_"+ogstr+".mrc")) { Image xyAcc; Image wAcc; wAcc.read(outRoot+"_wAcc_optics-group_"+ogstr+".mrc"); ComplexIO::read(xyAcc, outRoot+"_xyAcc_optics-group_"+ogstr, ".mrc"); xyAccSum() += xyAcc(); wAccSum() += wAcc(); groupUsed[og] = true; } } if (!groupUsed[og]) { continue; } Image wgh, phase, fit, phaseFull, fitFull; FilterHelper::getPhase(xyAccSum, phase); Image xyNrm(sh[og],s[og]); Image wgh0 = reference->getHollowWeight(kmin, s[og], angpix[og]); FilterHelper::multiply(wAccSum, wgh0, wgh); if (xring1 > 0.0) { for (int y = 0; y < s[og]; y++) for (int x = 0; x < sh[og]; x++) { double xx = x; double yy = y <= sh[og]? y : y - s[og]; double rp = sqrt(xx*xx + yy*yy); double ra = s[og] * angpix[og] / rp; if (ra > xring0 && ra <= xring1) { wgh(y,x) = 0.0; } } } for (int y = 0; y < s[og]; y++) for (int x = 0; x < sh[og]; x++) { xyNrm(y,x) = wAccSum(y,x) > 0.0? xyAccSum(y,x)/wAccSum(y,x) : Complex(0.0, 0.0); } if (debug) { Image wghFull; FftwHelper::decenterDouble2D(wgh(), wghFull()); ImageLog::write(wghFull, outPath + "beamtilt_weight-full_optics-group_"+ogstr); } FftwHelper::decenterUnflip2D(phase.data, phaseFull.data); ImageLog::write(phaseFull, outPath + "beamtilt_delta-phase_per-pixel_optics-group_"+ogstr); std::vector > imgs_for_eps; std::vector scales; std::vector labels; imgs_for_eps.push_back(phaseFull); scales.push_back(1.); labels.push_back("Asymm. obs [-1, 1] " +obsModel->getGroupName(og)); imgs_for_eps.push_back(phaseFull); scales.push_back(PI); labels.push_back("Asymm. obs [-pi, pi] " +obsModel->getGroupName(og)); //ColorHelper::writeAngleToPNG(phaseFull, // outPath + "beamtilt_delta-phase_per-pixel_optics-group_"+ogstr); //ColorHelper::writeAngleToEPS(phaseFull, // outPath + "beamtilt_delta-phase_per-pixel_optics-group_"+ogstr); double shift_x(0), shift_y(0), tilt_x(0), tilt_y(0); if (aberr_n_max < 3) { TiltHelper::fitTiltShift( phase, wgh, Cs, lambda, angpix[og], obsModel->getMagMatrix(og), &shift_x, &shift_y, &tilt_x, &tilt_y, &fit); FftwHelper::decenterUnflip2D(fit.data, fitFull.data); ImageLog::write(fitFull, outPath + "beamtilt_delta-phase_lin-fit_optics-group_"+ogstr); TiltHelper::optimizeTilt( xyNrm, wgh, Cs, lambda, angpix[og], obsModel->getMagMatrix(og), false, shift_x, shift_y, tilt_x, tilt_y, &shift_x, &shift_y, &tilt_x, &tilt_y, &fit); FftwHelper::decenterUnflip2D(fit.data, fitFull.data); ImageLog::write(fitFull, outPath+"beamtilt_delta-phase_iter-fit_optics-group_"+ogstr); imgs_for_eps.push_back(fitFull); scales.push_back(1.); labels.push_back("Beamtilt-only fit [-1, 1] " +obsModel->getGroupName(og)); imgs_for_eps.push_back(fitFull); scales.push_back(PI); labels.push_back("Beamtilt-only fit [-pi, pi] " +obsModel->getGroupName(og)); #pragma omp critical { optOut.setValue(EMDL_IMAGE_BEAMTILT_X, tilt_x, og); optOut.setValue(EMDL_IMAGE_BEAMTILT_Y, tilt_y, og); } } else { Image one(sh[og],s[og]); one.data.initConstant(1); std::vector Zernike_coeffs = TiltHelper::fitOddZernike( xyNrm, wgh, angpix[og], obsModel->getMagMatrix(og), aberr_n_max, &fit); FftwHelper::decenterUnflip2D(fit.data, fitFull.data); std::stringstream sts; sts << aberr_n_max; ImageLog::write(fitFull, outPath + "beamtilt_delta-phase_lin-fit_optics-group_"+ogstr+"_N-"+sts.str()); { Image residual; residual.data = phaseFull.data - fitFull.data; ImageLog::write(residual, outPath + "beamtilt_delta-phase_lin-fit_optics-group_" +ogstr+"_N-"+sts.str()+"_residual"); } std::vector Zernike_coeffs_opt = TiltHelper::optimiseOddZernike( xyNrm, wgh, angpix[og], obsModel->getMagMatrix(og), aberr_n_max, Zernike_coeffs, &fit); FftwHelper::decenterUnflip2D(fit.data, fitFull.data); ImageLog::write(fitFull, outPath + "beamtilt_delta-phase_iter-fit_optics-group_" +ogstr+"_N-"+sts.str()); imgs_for_eps.push_back(fitFull); scales.push_back(1.); labels.push_back("Asymm. fit (N="+sts.str()+") fit [-1, 1] " +obsModel->getGroupName(og)); imgs_for_eps.push_back(fitFull); scales.push_back(PI); labels.push_back("Asymm. fit (N="+sts.str()+") fit [-pi, pi] " +obsModel->getGroupName(og)); TiltHelper::extractTilt(Zernike_coeffs_opt, tilt_x, tilt_y, Cs, lambda); #pragma omp critical { optOut.setValue(EMDL_IMAGE_BEAMTILT_X, tilt_x, og); optOut.setValue(EMDL_IMAGE_BEAMTILT_Y, tilt_y, og); optOut.setValue(EMDL_IMAGE_ODD_ZERNIKE_COEFFS, Zernike_coeffs_opt, og); } } FileName fn_root = outPath + "asymmetric_aberrations_optics-group_"+ ogstr; ColorHelper::writeSignedToEPS(fn_root, 2, imgs_for_eps, scales, labels); fn_eps.push_back(fn_root+".eps"); } } bool TiltEstimator::isFinished(const MetaDataTable &mdt) { if (!ready) { REPORT_ERROR("ERROR: TiltEstimator::isFinished: TiltEstimator not initialized."); } std::string outRoot = CtfRefiner::getOutputFilenameRoot(mdt, outPath); bool allDone = true; std::vector ogp = obsModel->getOptGroupsPresent_zeroBased(mdt); for (int i = 0; i < ogp.size(); i++) { std::stringstream sts; sts << (ogp[i]+1); std::string ogs = sts.str(); if ( !exists(outRoot+"_xyAcc_optics-group_"+ogs+"_real.mrc") || !exists(outRoot+"_xyAcc_optics-group_"+ogs+"_imag.mrc") || !exists(outRoot+"_wAcc_optics-group_"+ogs+".mrc")) { allDone = false; break; } } return allDone; } relion-3.1.3/src/jaz/ctf/tilt_estimator.h000066400000000000000000000042771411340063500203600ustar00rootroot00000000000000/*************************************************************************** * * Author: "Jasenko Zivanov" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #ifndef TILT_ESTIMATOR_H #define TILT_ESTIMATOR_H #include class IOParser; class ReferenceMap; class ObservationModel; class TiltEstimator { public: TiltEstimator(); void read(IOParser& parser, int argc, char *argv[]); void init( int verb, int nr_omp_threads, bool debug, bool diag, std::string outPath, ReferenceMap* reference, ObservationModel* obsModel); // Compute per-pixel information for one micrograph void processMicrograph( long g, MetaDataTable& mdt, const std::vector>& obs, const std::vector>& pred, bool do_ctf_padding = false); // Sum up per-pixel information from all micrographs, // then fit beam-tilt model to the per-pixel fit void parametricFit( const std::vector& mdts, MetaDataTable& optOut, std::vector &fn_eps); // Has this mdt been processed already? bool isFinished(const MetaDataTable& mdt); private: // cmd. line options (see read()) double kmin; int aberr_n_max; double xring0, xring1; // parameters obtained through init() int verb, nr_omp_threads; bool debug, diag, ready; std::string outPath; std::vector s, sh; std::vector angpix; ReferenceMap* reference; ObservationModel* obsModel; }; #endif relion-3.1.3/src/jaz/ctf/tilt_helper.cpp000066400000000000000000000553101411340063500201550ustar00rootroot00000000000000/*************************************************************************** * * Author: "Jasenko Zivanov" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #include "tilt_helper.h" #include #include #include #include using namespace gravis; void TiltHelper::updateTiltShift( const Image &prediction, const Image &observation, CTF& ctf, double angpix, Image& xyDest, Image& wDest, bool do_ctf_padding) { const long w = prediction.data.xdim; const long h = prediction.data.ydim; Image ctfImg(w,h); ctf.getFftwImage(ctfImg(), h, h, angpix, false, false, false, true, do_ctf_padding); for (long y = 0; y < h; y++) for (long x = 0; x < w; x++) { Complex vx = DIRECT_A2D_ELEM(prediction.data, y, x); Complex vy = DIRECT_A2D_ELEM(observation.data, y, x); RFLOAT c = ctfImg(y,x); DIRECT_A2D_ELEM(xyDest.data, y, x) += c * vx.conj() * vy; DIRECT_A2D_ELEM(wDest.data, y, x) += c * c * vx.norm(); } } void TiltHelper::updateTiltShiftPar( const Image &prediction, const Image &observation, CTF& ctf, double angpix, Image& xyDest, Image& wDest, bool do_ctf_padding) { const long w = prediction.data.xdim; const long h = prediction.data.ydim; Image ctfImg(w,h); ctf.getFftwImage(ctfImg(), h, h, angpix, false, false, false, true, do_ctf_padding); #pragma omp parallel for for (long y = 0; y < h; y++) for (long x = 0; x < w; x++) { Complex vx = DIRECT_A2D_ELEM(prediction.data, y, x); Complex vy = DIRECT_A2D_ELEM(observation.data, y, x); RFLOAT c = ctfImg(y,x); DIRECT_A2D_ELEM(xyDest.data, y, x) += c * vx.conj() * vy; DIRECT_A2D_ELEM(wDest.data, y, x) += c * c * vx.norm(); } } void TiltHelper::fitTiltShift( const Image& phase, const Image& weight, double Cs, double lambda, double angpix, const Matrix2D& mag, double* shift_x, double* shift_y, double* tilt_x, double* tilt_y, Image* fit) { const long w = phase.data.xdim; const long h = phase.data.ydim; double axx = 0.0, axy = 0.0, axz = 0.0,//axw == ayz, ayy = 0.0, ayz = 0.0, ayw = 0.0, azz = 0.0, azw = 0.0, aww = 0.0; double bx = 0.0, by = 0.0, bz = 0.0, bw = 0.0; const RFLOAT as = (RFLOAT)h * angpix; for (long yi = 0; yi < h; yi++) for (long xi = 0; xi < w; xi++) { const double x0 = xi; const double y0 = yi < w? yi : ((yi-h)); const double x = mag(0,0) * x0 + mag(0,1) * y0; const double y = mag(1,0) * x0 + mag(1,1) * y0; double q = x*x + y*y; double v = DIRECT_A2D_ELEM(phase.data, yi, xi); double g = DIRECT_A2D_ELEM(weight.data, yi, xi); axx += g * x * x; axy += g * x * y; axz += g * q * x * x; ayy += g * y * y; ayz += g * q * x * y; ayw += g * q * y * y; azz += g * q * q * x * x; azw += g * q * q * x * y; aww += g * q * q * y * y; bx += g * x * v; by += g * y * v; bz += g * q * x * v; bw += g * q * y * v; } gravis::d4Matrix A; gravis::d4Vector b(bx, by, bz, bw); A(0,0) = axx; A(0,1) = axy; A(0,2) = axz; A(0,3) = ayz; A(1,0) = axy; A(1,1) = ayy; A(1,2) = ayz; A(1,3) = ayw; A(2,0) = axz; A(2,1) = ayz; A(2,2) = azz; A(2,3) = azw; A(3,0) = ayz; A(3,1) = ayw; A(3,2) = azw; A(3,3) = aww; gravis::d4Matrix Ainv = A; Ainv.invert(); gravis::d4Vector opt = Ainv * b; //std::cout << opt[0] << ", " << opt[1] << ", " << opt[2] << ", " << opt[3] << "\n"; *shift_x = opt[0]; *shift_y = opt[1]; *tilt_x = -opt[2]*180.0/(0.360 * Cs * 10000000 * lambda * lambda * 3.141592654); *tilt_y = -opt[3]*180.0/(0.360 * Cs * 10000000 * lambda * lambda * 3.141592654); /*destA = gravis::d2Vector(opt[0], opt[1]).length(); destPhiA = (180.0/3.1416)*std::atan2(opt[1], opt[0]); destB = gravis::d2Vector(opt[2], opt[3]).length(); destPhiB = (180.0/3.1416)*std::atan2(opt[3], opt[2]); std::cout << "linear: " << destA << " @ " << destPhiA << "°\n"; std::cout << "cubic: " << destB << " @ " << destPhiB << "°\n"; std::cout << " = -" << destB << " @ " << (destPhiB + 180.0) << "°\n";*/ //std::cout << "tilt_x = " << *tilt_x << "\n"; //std::cout << "tilt_y = " << *tilt_y << "\n"; if (fit != 0) { *fit = Image(w,h); drawPhaseShift(opt[0], opt[1], opt[2], opt[3], w, h, as, mag, fit); } } void TiltHelper::optimizeTilt( const Image &xy, const Image &weight, double Cs, double lambda, double angpix, const Matrix2D& mag, bool L1, double shift0_x, double shift0_y, double tilt0_x, double tilt0_y, double *shift_x, double *shift_y, double *tilt_x, double *tilt_y, Image *fit) { TiltOptimization prob(xy, weight, angpix, mag, L1, false); double scale = 180.0/(0.360 * Cs * 10000000 * lambda * lambda * 3.141592654); std::vector initial{shift0_x, shift0_y, -tilt0_x/scale, -tilt0_y/scale}; std::vector opt = NelderMead::optimize( initial, prob, 0.01, 0.000001, 100000, 1.0, 2.0, 0.5, 0.5, false); *shift_x = opt[0]; *shift_y = opt[1]; *tilt_x = -opt[2]*scale; *tilt_y = -opt[3]*scale; //std::cout << opt[0] << ", " << opt[1] << ", " << opt[2] << ", " << opt[3] << "\n"; //std::cout << "tilt_x = " << *tilt_x << "\n"; //std::cout << "tilt_y = " << *tilt_y << "\n"; if (fit != 0) { const int w = xy.data.xdim; const int h = xy.data.ydim; const RFLOAT as = (RFLOAT)h * angpix; *fit = Image(w,h); drawPhaseShift(opt[0], opt[1], opt[2], opt[3], w, h, as, mag, fit); } } std::vector TiltHelper::fitOddZernike( const Image& xy, const Image& weight, double angpix, const Matrix2D& mag, int n_max, Image* fit) { const int w = xy.data.xdim; const int h = xy.data.ydim; const int cc = Zernike::numberOfOddCoeffs(n_max); std::vector> basis = TiltHelper::computeOddZernike(h, angpix, mag, n_max); std::vector out = fitBasisLin(xy, weight, basis); if (fit != 0) { *fit = Image(w,h); for (int y = 0; y < h; y++) for (int x = 0; x < w; x++) { for (int c = 0; c < cc; c++) { (*fit)(y,x) += out[c] * basis[c](y,x); } } } return out; } std::vector TiltHelper::optimiseOddZernike( const Image &xy, const Image &weight, double angpix, const Matrix2D& mag, int n_max, const std::vector &coeffs, Image *fit) { const int w = xy.data.xdim; const int h = xy.data.ydim; const int cc = Zernike::numberOfOddCoeffs(n_max); std::vector> basis = computeOddZernike(h, angpix, mag, n_max); std::vector opt = optimiseBasis(xy, weight, basis, coeffs); if (fit != 0) { *fit = Image(w,h); for (int y = 0; y < h; y++) for (int x = 0; x < w; x++) { for (int c = 0; c < cc; c++) { (*fit)(y,x) += opt[c] * basis[c](y,x); } } } return opt; } std::vector > TiltHelper::computeOddZernike( int s, double angpix, const Matrix2D& mag, int n_max) { const int cc = Zernike::numberOfOddCoeffs(n_max); const double as = (double)s * angpix; const int sh = s/2 + 1; std::vector> basis(cc); for (int c = 0; c < cc; c++) { basis[c] = Image(sh,s); int m, n; Zernike::oddIndexToMN(c, m, n); for (int y = 0; y < s; y++) for (int x = 0; x < sh; x++) { const double xx0 = x/as; const double yy0 = y < sh? y/as : (y-s)/as; const double xx = mag(0,0) * xx0 + mag(0,1) * yy0; const double yy = mag(1,0) * xx0 + mag(1,1) * yy0; basis[c](y,x) = Zernike::Z_cart(m, n, xx, yy); } } return basis; } Image TiltHelper::plotOddZernike( const std::vector& coeffs, int s, double angpix, const Matrix2D& mag) { Image out(s,s); const double as = (double)s * angpix; for (int y = 0; y < s; y++) for (int x = 0; x < s; x++) { const double xx0 = (x - s/2) / as; const double yy0 = (y - s/2) / as; const double xx = mag(0,0) * xx0 + mag(0,1) * yy0; const double yy = mag(1,0) * xx0 + mag(1,1) * yy0; for (int c = 0; c < coeffs.size(); c++) { int m, n; Zernike::oddIndexToMN(c, m, n); out(y,x) += coeffs[c] * Zernike::Z_cart(m, n, xx, yy); } } return out; } Image TiltHelper::plotTilt( double tx, double ty, int s, double angpix, const Matrix2D& mag, double Cs, double lambda) { Image out(s,s); /*double boxsize = angpix * s; double factor = 0.360 * Cs * 10000000 * lambda * lambda / (boxsize * boxsize * boxsize);*/ const double scale = Cs * 20000 * lambda * lambda * 3.141592654; const double as = (double)s * angpix; for (int y = 0; y < s; y++) for (int x = 0; x < s; x++) { /*const double xx = (x - s/2); const double yy = (y - s/2); out(y,x) = factor * (yy * yy + xx * xx) * (yy * ty + xx * tx);*/ const double xx0 = (x - s/2) / as; const double yy0 = (y - s/2) / as; const double xx = mag(0,0) * xx0 + mag(0,1) * yy0; const double yy = mag(1,0) * xx0 + mag(1,1) * yy0; out(y,x) = scale * (xx*xx + yy*yy) * (xx*tx + yy*ty); } return out; } std::vector TiltHelper::fitEvenZernike( const Image& phase, const Image& weight, double angpix, const Matrix2D& mag, int n_max, Image* fit) { const int w = phase.data.xdim; const int h = phase.data.ydim; const int cc = Zernike::numberOfEvenCoeffs(n_max); std::vector> basis = computeEvenZernike(h, angpix, mag, n_max); std::vector out = fitBasisLin(phase, weight, basis); if (fit != 0) { *fit = Image(w,h); for (int y = 0; y < h; y++) for (int x = 0; x < w; x++) { for (int c = 0; c < cc; c++) { (*fit)(y,x) += out[c] * basis[c](y,x); } } } return out; } std::vector TiltHelper::optimiseEvenZernike( const Image& xy, const Image& weight, double angpix, const Matrix2D& mag, int n_max, const std::vector& coeffs, Image* fit) { const int w = xy.data.xdim; const int h = xy.data.ydim; const int cc = Zernike::numberOfEvenCoeffs(n_max); std::vector> basis = computeEvenZernike(h, angpix, mag, n_max); std::vector opt = optimiseBasis(xy, weight, basis, coeffs); if (fit != 0) { *fit = Image(w,h); for (int y = 0; y < h; y++) for (int x = 0; x < w; x++) { for (int c = 0; c < cc; c++) { (*fit)(y,x) += opt[c] * basis[c](y,x); } } } return opt; } std::vector TiltHelper::optimiseEvenZernike( const Image& xy, const Image& weight0, const Image& Axx, const Image& Axy, const Image& Ayy, double angpix, const Matrix2D& mag, int n_max, const std::vector& coeffs, Image* fit) { const int w = xy.data.xdim; const int h = xy.data.ydim; const int cc = Zernike::numberOfEvenCoeffs(n_max); std::vector> basis = computeEvenZernike(h, angpix, mag, n_max); std::vector opt = optimiseBasis(xy, weight0, Axx, Axy, Ayy, basis, coeffs); if (fit != 0) { *fit = Image(w,h); for (int y = 0; y < h; y++) for (int x = 0; x < w; x++) { for (int c = 0; c < cc; c++) { (*fit)(y,x) += opt[c] * basis[c](y,x); } } } return opt; } std::vector > TiltHelper::computeEvenZernike( int s, double angpix, const Matrix2D& mag, int n_max) { const int cc = Zernike::numberOfEvenCoeffs(n_max); const double as = (double)s * angpix; const int sh = s/2 + 1; std::vector> basis(cc); for (int c = 0; c < cc; c++) { basis[c] = Image(sh,s); int m, n; Zernike::evenIndexToMN(c, m, n); for (int y = 0; y < s; y++) for (int x = 0; x < sh; x++) { const double xx0 = x/as; const double yy0 = y < sh? y/as : (y-s)/as; const double xx = mag(0,0) * xx0 + mag(0,1) * yy0; const double yy = mag(1,0) * xx0 + mag(1,1) * yy0; basis[c](y,x) = Zernike::Z_cart(m, n, xx, yy); } } return basis; } void TiltHelper::extractTilt( std::vector& oddZernikeCoeffs, double& tilt_x, double& tilt_y, double Cs, double lambda) { if (oddZernikeCoeffs.size() <= 5) oddZernikeCoeffs.resize(5, 0); const double scale = Cs * 20000 * lambda * lambda * 3.141592654; const double Z3x = oddZernikeCoeffs[4]; const double Z3y = oddZernikeCoeffs[3]; // p = Z1x x + Z3x (3r² - 2) x // = (Z1x - 2 Z3x) x + 3 Z3x r² x // = Z1x' x - tx r² x oddZernikeCoeffs[4] = 0.0; oddZernikeCoeffs[3] = 0.0; oddZernikeCoeffs[1] -= 2.0 * Z3x; oddZernikeCoeffs[0] -= 2.0 * Z3y; tilt_x = -3.0 * Z3x / scale; tilt_y = -3.0 * Z3y / scale; } void TiltHelper::insertTilt( std::vector& oddZernikeCoeffs, double tilt_x, double tilt_y, double Cs, double lambda) { if (oddZernikeCoeffs.size() <= 5) oddZernikeCoeffs.resize(5, 0); const double scale = Cs * 20000 * lambda * lambda * 3.141592654; const double Z3x = -scale * tilt_x / 3.0; const double Z3y = -scale * tilt_y / 3.0; oddZernikeCoeffs[1] += 2.0 * Z3x; oddZernikeCoeffs[0] += 2.0 * Z3y; oddZernikeCoeffs[4] = Z3x; oddZernikeCoeffs[3] = Z3y; } std::vector TiltHelper::fitBasisLin( const Image& xy, const Image& weight, const std::vector>& basis) { const int w = xy.data.xdim; const int h = xy.data.ydim; Image phase(w,h); for (int y = 0; y < h; y++) for (int x = 0; x < w; x++) { phase(y,x) = xy(y,x).arg(); } return fitBasisLin(phase, weight, basis); } std::vector TiltHelper::fitBasisLin( const Image& phase, const Image& weight, const std::vector>& basis) { const int cc = basis.size(); const int w = phase.data.xdim; const int h = phase.data.ydim; Matrix2D A(cc,cc); Matrix1D b(cc); for (int c1 = 0; c1 < cc; c1++) { for (int y = 0; y < h; y++) for (int x = 0; x < w; x++) { b(c1) += weight(y,x) * basis[c1](y,x) * phase(y,x); } for (int c2 = c1; c2 < cc; c2++) { for (int y = 0; y < h; y++) for (int x = 0; x < w; x++) { A(c1,c2) += weight(y,x) * basis[c1](y,x) * basis[c2](y,x); } } for (int c2 = 0; c2 < c1; c2++) { A(c1,c2) = A(c2,c1); } } const double tol = 1e-20; Matrix1D x(cc); solve(A, b, x, tol); std::vector out(cc); for (int c = 0; c < cc; c++) { out[c] = x(c); } return out; } std::vector TiltHelper::optimiseBasis( const Image& xy, const Image& weight, const std::vector>& basis, const std::vector& initial) { BasisOptimisation prob(xy, weight, basis, false); std::vector opt = NelderMead::optimize( initial, prob, 0.01, 0.000001, 100000, 1.0, 2.0, 0.5, 0.5, false); return opt; } std::vector TiltHelper::optimiseBasis( const Image& xy, const Image& weight0, const Image& Axx, const Image& Axy, const Image& Ayy, const std::vector>& basis, const std::vector& initial) { AnisoBasisOptimisation prob(xy, weight0, Axx, Axy, Ayy, basis, false); std::vector opt = NelderMead::optimize( initial, prob, 0.01, 0.000001, 100000, 1.0, 2.0, 0.5, 0.5, false); return opt; } void TiltHelper::optimizeAnisoTilt( const Image &xy, const Image &weight, double Cs, double lambda, double angpix, const Matrix2D& mag, bool L1, double shift0_x, double shift0_y, double tilt0_x, double tilt0_y, double *shift_x, double *shift_y, double *tilt_x, double *tilt_y, double* tilt_xx, double* tilt_xy, double* tilt_yy, Image *fit) { TiltOptimization prob(xy, weight, angpix, mag, L1, true); double scale = 180.0/(0.360 * Cs * 10000000 * lambda * lambda * 3.141592654); std::vector initial{shift0_x, shift0_y, -tilt0_x/scale, -tilt0_y/scale, 0.0, 1.0}; std::vector opt = NelderMead::optimize( initial, prob, 0.01, 0.000001, 100000, 1.0, 2.0, 0.5, 0.5, false); *shift_x = opt[0]; *shift_y = opt[1]; *tilt_x = -opt[2]*scale; *tilt_y = -opt[3]*scale; *tilt_xx = 1.0; *tilt_xy = opt[4]; *tilt_yy = opt[5]; std::cout << opt[0] << ", " << opt[1] << ", " << opt[2] << ", " << opt[3] << ", " << opt[4] << ", " << opt[5] << "\n"; std::cout << "tilt_x = " << *tilt_x << "\n"; std::cout << "tilt_y = " << *tilt_y << "\n"; std::cout << "tilt_xx = " << *tilt_xx << "\n"; std::cout << "tilt_xy = " << *tilt_xy << "\n"; std::cout << "tilt_yy = " << *tilt_yy << "\n"; if (fit != 0) { const int w = xy.data.xdim; const int h = xy.data.ydim; const RFLOAT as = (RFLOAT)h * angpix; *fit = Image(w,h); drawPhaseShift(opt[0], opt[1], opt[2], opt[3], 1.0, opt[4], opt[5], w, h, as, mag, fit); } } void TiltHelper::drawPhaseShift( double shift_x, double shift_y, double tilt_x, double tilt_y, int w, int h, double as, const Matrix2D& mag, Image *tgt) { for (long yi = 0; yi < h; yi++) for (long xi = 0; xi < w; xi++) { const double x0 = xi; const double y0 = yi < w? yi : yi-h; const double x = (mag(0,0) * x0 + mag(0,1) * y0) / as; const double y = (mag(1,0) * x0 + mag(1,1) * y0) / as; const double q = x*x + y*y; DIRECT_A2D_ELEM(tgt->data, yi, xi) = x * shift_x + y * shift_y + q * x * tilt_x + q * y * tilt_y; } } void TiltHelper::drawPhaseShift( double shift_x, double shift_y, double tilt_x, double tilt_y, double tilt_xx, double tilt_xy, double tilt_yy, int w, int h, double as, const Matrix2D& mag, Image *tgt) { for (long yi = 0; yi < h; yi++) for (long xi = 0; xi < w; xi++) { const double x0 = xi; const double y0 = yi < w? yi : yi-h; const double x = (mag(0,0) * x0 + mag(0,1) * y0) / as; const double y = (mag(1,0) * x0 + mag(1,1) * y0) / as; const double q = tilt_xx * x * x + 2.0 * tilt_xy * x * y + tilt_yy * y * y; DIRECT_A2D_ELEM(tgt->data, yi, xi) = x * shift_x + y * shift_y + q * x * tilt_x + q * y * tilt_y; } } TiltOptimization::TiltOptimization( const Image &xy, const Image &weight, double angpix, const Matrix2D &mag, bool L1, bool anisotropic) : xy(xy), weight(weight), angpix(angpix), mag(mag), L1(L1), anisotropic(anisotropic) { } double TiltOptimization::f(const std::vector &x, void* tempStorage) const { double out = 0.0; const int w = xy.data.xdim; const int h = xy.data.ydim; const double as = (double)h * angpix; for (long yi = 0; yi < h; yi++) for (long xi = 0; xi < w; xi++) { const double xd0 = xi/as; const double yd0 = yi < w? yi/as : (yi-h)/as; double rr; const double xd = mag(0,0) * xd0 + mag(0,1) * yd0; const double yd = mag(1,0) * xd0 + mag(1,1) * yd0; if (anisotropic) { rr = xd*xd + 2.0*x[4]*xd*yd + x[5]*yd*yd; } else { rr = xd*xd + yd*yd; } const double phi = x[0]*xd + x[1]*yd + rr*x[2]*xd + rr*x[3]*yd; const double e = (xy(yi,xi) - Complex(cos(phi), sin(phi))).norm(); if (L1) { out += weight(yi,xi) * sqrt(e); } else { out += weight(yi,xi) * e; } /*double d = phi - atan2(xy(yi,xi).imag, xy(yi,xi).real); out += weight(yi,xi) * d*d;*/ } return out; } BasisOptimisation::BasisOptimisation( const Image &xy, const Image &weight, const std::vector > &basis, bool L1) : w(xy.data.xdim), h(xy.data.ydim), cc(basis.size()), xy(xy), weight(weight), basis(basis), L1(L1) { } double BasisOptimisation::f(const std::vector &x, void *tempStorage) const { Image& recomb = *((Image*)tempStorage); recomb.data.initZeros(); for (int c = 0; c < cc; c++) for (int yp = 0; yp < h; yp++) for (int xp = 0; xp < w; xp++) { recomb(yp,xp) += x[c] * basis[c](yp,xp); } double sum = 0.0; for (int y = 0; y < h; y++) for (int x = 0; x < w; x++) { Complex zPred(cos(recomb(y,x)), sin(recomb(y,x))); sum += weight(y,x) * (zPred - xy(y,x)).norm(); } return sum; } void *BasisOptimisation::allocateTempStorage() const { return new Image(w,h); } void BasisOptimisation::deallocateTempStorage(void *ts) const { delete static_cast*>(ts); } AnisoBasisOptimisation::AnisoBasisOptimisation( const Image &xy, const Image &weight0, const Image& Axx, const Image& Axy, const Image& Ayy, const std::vector > &basis, bool L1) : w(xy.data.xdim), h(xy.data.ydim), cc(basis.size()), xy(xy), weight0(weight0), Axx(Axx), Axy(Axy), Ayy(Ayy), basis(basis), L1(L1) { } double AnisoBasisOptimisation::f(const std::vector &x, void *tempStorage) const { Image& recomb = *((Image*)tempStorage); recomb.data.initZeros(); for (int c = 0; c < cc; c++) for (int yp = 0; yp < h; yp++) for (int xp = 0; xp < w; xp++) { recomb(yp,xp) += x[c] * basis[c](yp,xp); } double sum = 0.0; for (int y = 0; y < h; y++) for (int x = 0; x < w; x++) { d2Vector e(cos(recomb(y,x)) - xy(y,x).real, sin(recomb(y,x)) - xy(y,x).imag); sum += weight0(y,x) * (Axx(y,x)*e.x*e.x + 2.0*Axy(y,x)*e.x*e.y + Ayy(y,x)*e.y*e.y); } return sum; } void* AnisoBasisOptimisation::allocateTempStorage() const { return new Image(w,h); } void AnisoBasisOptimisation::deallocateTempStorage(void *ts) const { delete static_cast*>(ts); } relion-3.1.3/src/jaz/ctf/tilt_helper.h000066400000000000000000000206501411340063500176210ustar00rootroot00000000000000/*************************************************************************** * * Author: "Jasenko Zivanov" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #ifndef TILT_REFINEMENT_H #define TILT_REFINEMENT_H #include #include #include #include #include #include #include class TiltOptimization : public Optimization { public: TiltOptimization( const Image& xy, const Image& weight, double angpix, const Matrix2D& mag, bool L1 = false, bool anisotropic = false); double f(const std::vector& x, void* tempStorage) const; private: const Image& xy; const Image& weight; const double angpix; const Matrix2D& mag; const bool L1, anisotropic; }; class BasisOptimisation : public Optimization { public: BasisOptimisation( const Image& xy, const Image& weight, const std::vector>& basis, bool L1 = false); double f(const std::vector& x, void* tempStorage) const; void* allocateTempStorage() const; void deallocateTempStorage(void* ts) const; private: const int w, h, cc; const Image& xy; const Image& weight; const std::vector>& basis; const bool L1; }; class AnisoBasisOptimisation : public Optimization { public: AnisoBasisOptimisation( const Image& xy, const Image& weight0, const Image& Axx, const Image& Axy, const Image& Ayy, const std::vector>& basis, bool L1 = false); double f(const std::vector& x, void* tempStorage) const; void* allocateTempStorage() const; void deallocateTempStorage(void* ts) const; private: const int w, h, cc; const Image& xy; const Image& weight0, &Axx, &Axy, &Ayy; const std::vector>& basis; const bool L1; }; class TiltHelper { public: static void updateTiltShift( const Image& prediction, const Image& observation, CTF& ctf, double angpix, Image& xyDest, Image& wDest, bool do_ctf_padding = false); static void updateTiltShiftPar(const Image& prediction, const Image& observation, CTF &ctf, double angpix, Image& xyDest, Image& wDest, bool do_ctf_padding = false); static void fitTiltShift( const Image& phase, const Image& weight, double Cs, double lambda, double angpix, const Matrix2D& mag, double* shift_x, double* shift_y, double* tilt_x, double* tilt_y, Image* fit); static void optimizeTilt( const Image& xy, const Image& weight, double Cs, double lambda, double angpix, const Matrix2D& mag, bool L1, double shift0_x, double shift0_y, double tilt0_x, double tilt0_y, double* shift_x, double* shift_y, double* tilt_x, double* tilt_y, Image* fit); static std::vector fitOddZernike( const Image& xy, const Image& weight, double angpix, const Matrix2D& mag, int n_max, Image* fit = 0); static std::vector optimiseOddZernike( const Image& xy, const Image& weight, double angpix, const Matrix2D& mag, int n_max, const std::vector& coeffs, Image* fit); static std::vector> computeOddZernike( int s, double angpix, const Matrix2D& mag, int n_max); static Image plotOddZernike( const std::vector& coeffs, int s, double angpix, const Matrix2D& mag); static Image plotTilt( double tx, double ty, int s, double angpix, const Matrix2D& mag, double Cs, double lambda); static std::vector fitEvenZernike( const Image& phase, const Image& weight, double angpix, const Matrix2D& mag, int n_max, Image* fit = 0); static std::vector optimiseEvenZernike( const Image& xy, const Image& weight, double angpix, const Matrix2D& mag, int n_max, const std::vector& coeffs, Image* fit); static std::vector optimiseEvenZernike( const Image& xy, const Image& weight0, const Image& Axx, const Image& Axy, const Image& Ayy, double angpix, const Matrix2D& mag, int n_max, const std::vector& coeffs, Image* fit); static std::vector> computeEvenZernike( int s, double angpix, const Matrix2D& mag, int n_max); static void extractTilt( std::vector& oddZernikeCoeffs, double& tilt_x, double& tilt_y, double Cs, double lambda); static void insertTilt( std::vector& oddZernikeCoeffs, double tilt_x, double tilt_y, double Cs, double lambda); static std::vector fitBasisLin( const Image& xy, const Image& weight, const std::vector>& basis); static std::vector fitBasisLin( const Image& phase, const Image& weight, const std::vector>& basis); static std::vector optimiseBasis( const Image& xy, const Image& weight, const std::vector>& basis, const std::vector& initial); static std::vector optimiseBasis( const Image& xy, const Image& weight0, const Image& Axx, const Image& Axy, const Image& Ayy, const std::vector>& basis, const std::vector& initial); static void optimizeAnisoTilt( const Image& xy, const Image& weight, double Cs, double lambda, double angpix, const Matrix2D& mag, bool L1, double shift0_x, double shift0_y, double tilt0_x, double tilt0_y, double* shift_x, double* shift_y, double* tilt_x, double* tilt_y, double* tilt_xx, double* tilt_xy, double* tilt_yy, Image* fit); static void drawPhaseShift( double shift_x, double shift_y, double tilt_x, double tilt_y, int w, int h, double as, const Matrix2D& mag, Image* tgt); static void drawPhaseShift( double shift_x, double shift_y, double tilt_x, double tilt_y, double tilt_xx, double tilt_xy, double tilt_yy, int w, int h, double as, const Matrix2D& mag, Image* tgt); }; #endif relion-3.1.3/src/jaz/ctf_helper.cpp000066400000000000000000000077031411340063500172040ustar00rootroot00000000000000/*************************************************************************** * * Author: "Jasenko Zivanov" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #include #include #include #include #include #include std::vector CtfHelper :: loadCtffind4( std::string path, int imageCount, double voltage, double Cs, double Q0, double Bfac, double scale) { /* example: # Output from CTFFind version 4.1.5, run on 2017-03-30 15:12:45 # Input file: /beegfs/zivanov/tomograms/ts_05/frames/05_f32.mrc ; Number of micrographs: 1 # Pixel size: 1.000 Angstroms ; acceleration voltage: 300.0 keV ; spherical aberration: 2.70 mm ; amplitude contrast: 0.07 # Box size: 512 pixels ; min. res.: 30.0 Angstroms ; max. res.: 5.0 Angstroms ; min. def.: 5000.0 um; max. def. 50000.0 um # Columns: #1 - micrograph number; #2 - defocus 1 [Angstroms]; #3 - defocus 2; #4 - azimuth of astigmatism; #5 - additional phase shift [radians]; #6 - cross correlation; #7 - spacing (in Angstroms) up to which CTF rings were fit successfully 1.000000 10295.926758 10012.275391 -38.856349 0.000000 0.030650 5.279412 */ std::vector ctfs(imageCount); size_t ast = path.find_first_of('*'); if (ast == std::string::npos) { std::ifstream file(path); int currImg = 0; char text[4096]; while (file.getline(text, 4096)) { if (text[0] == '#') continue; std::stringstream line(text); ctfs[currImg] = setFromFile(line, voltage, Cs, Q0, Bfac, scale); currImg++; if (currImg >= imageCount) { break; } } if (currImg < imageCount) { REPORT_ERROR_STR("Insufficient number of CTFs found in " << path << ".\n" << imageCount << " requested, " << currImg << " found.\n"); } } else { std::string fnBase, fnEnd; fnBase = path.substr(0, ast); fnEnd = path.substr(ast+1); for (int i = 0; i < imageCount; i++) { std::stringstream sts; sts << i; std::string fnm; sts >> fnm; std::string fn = fnBase + fnm + fnEnd; std::ifstream file(fn.c_str()); if (!file.is_open()) { REPORT_ERROR("failed to open " + fn + '\n'); } char text[4096]; while (file.getline(text, 4096)) { if (text[0] == '#') continue; std::stringstream line(text); ctfs[i] = setFromFile(line, voltage, Cs, Q0, Bfac, scale); } } } return ctfs; } CTF CtfHelper::setFromFile(std::stringstream& line, double voltage, double Cs, double Q0, double Bfac, double scale) { /* #1 - micrograph number; #2 - defocus 1 [Angstroms]; #3 - defocus 2; #4 - azimuth of astigmatism; #5 - additional phase shift [radians]; #6 - cross correlation; #7 - spacing (in Angstroms) up to which CTF rings were fit successfully */ double imgNumber, defocus1, defocus2, azimuth, phaseShift, crossCorr, bestBefore; line >> imgNumber; line >> defocus1; line >> defocus2; line >> azimuth; line >> phaseShift; line >> crossCorr; line >> bestBefore; CTF ctf; ctf.setValues(defocus1, defocus2, azimuth, voltage, Cs, Q0, Bfac, scale, phaseShift); return ctf; } relion-3.1.3/src/jaz/ctf_helper.h000066400000000000000000000032011411340063500166360ustar00rootroot00000000000000/*************************************************************************** * * Author: "Jasenko Zivanov" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #ifndef CTF_HELPER_H #define CTF_HELPER_H #include #include #include #include #include #include #include class CtfHelper { public: static std::vector loadCtffind4(std::string path, int imageCount, double voltage = 300.0, double Cs = 2.2, double Q0 = 0.1, double Bfac = 0.0, double scale = 1.0); static CTF setFromFile( std::stringstream& line, double voltage, double Cs, double Q0, double Bfac, double scale); }; #endif relion-3.1.3/src/jaz/d3x3/000077500000000000000000000000001411340063500151375ustar00rootroot00000000000000relion-3.1.3/src/jaz/d3x3/copyright.txt000066400000000000000000000020441411340063500177100ustar00rootroot00000000000000// ---------------------------------------------------------------------------- // Numerical diagonalization of 3x3 matrcies // Copyright (C) 2006 Joachim Kopp // ---------------------------------------------------------------------------- // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA // ---------------------------------------------------------------------------- relion-3.1.3/src/jaz/d3x3/dsyev2.c000066400000000000000000000054351411340063500165260ustar00rootroot00000000000000// ---------------------------------------------------------------------------- // Numerical diagonalization of 3x3 matrcies // Copyright (C) 2006 Joachim Kopp // ---------------------------------------------------------------------------- // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA // ---------------------------------------------------------------------------- #include #include #include "dsyev2.h" // Macros #define SQR(x) ((x)*(x)) // x^2 #define SQR_ABS(x) (SQR(creal(x)) + SQR(cimag(x))) // |x|^2 // ---------------------------------------------------------------------------- inline void dsyev2(double A, double B, double C, double *rt1, double *rt2, double *cs, double *sn) // ---------------------------------------------------------------------------- // Calculates the eigensystem of a real symmetric 2x2 matrix // [ A B ] // [ B C ] // in the form // [ A B ] = [ cs -sn ] [ rt1 0 ] [ cs sn ] // [ B C ] [ sn cs ] [ 0 rt2 ] [ -sn cs ] // where rt1 >= rt2. Note that this convention is different from the one used // in the LAPACK routine DLAEV2, where |rt1| >= |rt2|. // ---------------------------------------------------------------------------- { double sm = A + C; double df = A - C; double rt = sqrt(SQR(df) + 4.0*B*B); double t; if (sm > 0.0) { *rt1 = 0.5 * (sm + rt); t = 1.0/(*rt1); *rt2 = (A*t)*C - (B*t)*B; } else if (sm < 0.0) { *rt2 = 0.5 * (sm - rt); t = 1.0/(*rt2); *rt1 = (A*t)*C - (B*t)*B; } else // This case needs to be treated separately to avoid div by 0 { *rt1 = 0.5 * rt; *rt2 = -0.5 * rt; } // Calculate eigenvectors if (df > 0.0) *cs = df + rt; else *cs = df - rt; if (fabs(*cs) > 2.0*fabs(B)) { t = -2.0 * B / *cs; *sn = 1.0 / sqrt(1.0 + SQR(t)); *cs = t * (*sn); } else if (fabs(B) == 0.0) { *cs = 1.0; *sn = 0.0; } else { t = -0.5 * (*cs) / B; *cs = 1.0 / sqrt(1.0 + SQR(t)); *sn = t * (*cs); } if (df > 0.0) { t = *cs; *cs = -(*sn); *sn = t; } } relion-3.1.3/src/jaz/d3x3/dsyev2.h000066400000000000000000000024071411340063500165270ustar00rootroot00000000000000// ---------------------------------------------------------------------------- // Numerical diagonalization of 3x3 matrcies // Copyright (C) 2006 Joachim Kopp // ---------------------------------------------------------------------------- // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA // ---------------------------------------------------------------------------- #ifndef __DSYEV3_H #define __DSYEV3_H #ifdef __cplusplus extern "C" { #endif void dsyev2(double A, double B, double C, double *rt1, double *rt2, double *cs, double *sn); #ifdef __cplusplus } #endif #endif relion-3.1.3/src/jaz/d3x3/dsyevc3.c000066400000000000000000000063011411340063500166630ustar00rootroot00000000000000// ---------------------------------------------------------------------------- // Numerical diagonalization of 3x3 matrcies // Copyright (C) 2006 Joachim Kopp // ---------------------------------------------------------------------------- // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA // ---------------------------------------------------------------------------- #include #include #include "dsyevc3.h" // Constants #define M_SQRT3 1.73205080756887729352744634151 // sqrt(3) // Macros #define SQR(x) ((x)*(x)) // x^2 // ---------------------------------------------------------------------------- int dsyevc3(double A[3][3], double w[3]) // ---------------------------------------------------------------------------- // Calculates the eigenvalues of a symmetric 3x3 matrix A using Cardano's // analytical algorithm. // Only the diagonal and upper triangular parts of A are accessed. The access // is read-only. // ---------------------------------------------------------------------------- // Parameters: // A: The symmetric input matrix // w: Storage buffer for eigenvalues // ---------------------------------------------------------------------------- // Return value: // 0: Success // -1: Error // ---------------------------------------------------------------------------- { double m, c1, c0; // Determine coefficients of characteristic poynomial. We write // | a d f | // A = | d* b e | // | f* e* c | double de = A[0][1] * A[1][2]; // d * e double dd = SQR(A[0][1]); // d^2 double ee = SQR(A[1][2]); // e^2 double ff = SQR(A[0][2]); // f^2 m = A[0][0] + A[1][1] + A[2][2]; c1 = (A[0][0]*A[1][1] + A[0][0]*A[2][2] + A[1][1]*A[2][2]) // a*b + a*c + b*c - d^2 - e^2 - f^2 - (dd + ee + ff); c0 = A[2][2]*dd + A[0][0]*ee + A[1][1]*ff - A[0][0]*A[1][1]*A[2][2] - 2.0 * A[0][2]*de; // c*d^2 + a*e^2 + b*f^2 - a*b*c - 2*f*d*e) double p, sqrt_p, q, c, s, phi; p = SQR(m) - 3.0*c1; q = m*(p - (3.0/2.0)*c1) - (27.0/2.0)*c0; sqrt_p = sqrt(fabs(p)); phi = 27.0 * ( 0.25*SQR(c1)*(p - c1) + c0*(q + 27.0/4.0*c0)); phi = (1.0/3.0) * atan2(sqrt(fabs(phi)), q); c = sqrt_p*cos(phi); s = (1.0/M_SQRT3)*sqrt_p*sin(phi); w[1] = (1.0/3.0)*(m - c); w[2] = w[1] + s; w[0] = w[1] + c; w[1] -= s; return 0; } relion-3.1.3/src/jaz/d3x3/dsyevc3.h000066400000000000000000000021771411340063500166770ustar00rootroot00000000000000// ---------------------------------------------------------------------------- // Numerical diagonalization of 3x3 matrcies // Copyright (C) 2006 Joachim Kopp // ---------------------------------------------------------------------------- // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA // ---------------------------------------------------------------------------- #ifndef __DSYEVC3_H #define __DSYEVC3_H int dsyevc3(double A[3][3], double w[3]); #endif relion-3.1.3/src/jaz/d3x3/dsyevd3.c000066400000000000000000000143361411340063500166730ustar00rootroot00000000000000// ---------------------------------------------------------------------------- // Numerical diagonalization of 3x3 matrcies // Copyright (C) 2006 Joachim Kopp // ---------------------------------------------------------------------------- // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA // ---------------------------------------------------------------------------- #include #include #include #include #include "dsyev2.h" #include "slvsec3.h" #include "dsytrd3.h" #include "dsyevd3.h" // Macros #define SQR(x) ((x)*(x)) // x^2 // ---------------------------------------------------------------------------- int dsyevd3(double A[3][3], double Q[3][3], double w[3]) // ---------------------------------------------------------------------------- // Calculates the eigenvalues and normalized eigenvectors of a symmetric 3x3 // matrix A using Cuppen's Divide & Conquer algorithm. // The function accesses only the diagonal and upper triangular parts of A. // The access is read-only. // ---------------------------------------------------------------------------- // Parameters: // A: The symmetric input matrix // Q: Storage buffer for eigenvectors // w: Storage buffer for eigenvalues // ---------------------------------------------------------------------------- // Return value: // 0: Success // -1: Error // ---------------------------------------------------------------------------- // Dependencies: // dsyev2(), slvsec3(), dsytrd3() // ---------------------------------------------------------------------------- { const int n = 3; double R[3][3]; // Householder transformation matrix double P[3][3]; // Unitary transformation matrix which diagonalizes D + w w^T double e[2]; // Off-diagonal elements after Householder transformation double d[3]; // Eigenvalues of split matrix in the "divide" step) double c, s; // Eigenvector of 2x2 block in the "divide" step double z[3]; // Numerators of secular equation / Updating vector double t; // Miscellaenous temporary stuff // Initialize Q #ifndef EVALS_ONLY memset(Q, 0.0, 9*sizeof(double)); #endif // Transform A to real tridiagonal form by the Householder method dsytrd3(A, R, w, e); // "Divide" // -------- // Detect matrices that factorize to avoid multiple eigenvalues in the Divide/Conquer algorithm for (int i=0; i < n-1; i++) { t = fabs(w[i]) + fabs(w[i+1]); if (fabs(e[i]) <= 8.0*DBL_EPSILON*t) { if (i == 0) { dsyev2(w[1], e[1], w[2], &d[1], &d[2], &c, &s); w[1] = d[1]; w[2] = d[2]; #ifndef EVALS_ONLY Q[0][0] = 1.0; for (int j=1; j < n; j++) { Q[j][1] = s*R[j][2] + c*R[j][1]; Q[j][2] = c*R[j][2] - s*R[j][1]; } #endif } else { dsyev2(w[0], e[0], w[1], &d[0], &d[1], &c, &s); w[0] = d[0]; w[1] = d[1]; #ifndef EVALS_ONLY Q[0][0] = c; Q[0][1] = -s; Q[1][0] = R[1][1]*s; Q[1][1] = R[1][1]*c; Q[1][2] = R[1][2]; Q[2][0] = R[2][1]*s; Q[2][1] = R[2][1]*c; Q[2][2] = R[2][2]; #endif } return 0; } } // Calculate eigenvalues and eigenvectors of 2x2 block dsyev2(w[1]-e[0], e[1], w[2], &d[1], &d[2], &c, &s); d[0] = w[0] - e[0]; // "Conquer" // --------- // Determine coefficients of secular equation z[0] = e[0]; z[1] = e[0] * SQR(c); z[2] = e[0] * SQR(s); // Call slvsec3 with d sorted in ascending order. We make // use of the fact that dsyev2 guarantees d[1] >= d[2]. if (d[0] < d[2]) slvsec3(d, z, w, P, 0, 2, 1); else if (d[0] < d[1]) slvsec3(d, z, w, P, 2, 0, 1); else slvsec3(d, z, w, P, 2, 1, 0); #ifndef EVALS_ONLY // Calculate eigenvectors of matrix D + beta * z * z^t and store them in the // columns of P z[0] = sqrt(fabs(e[0])); z[1] = c * z[0]; z[2] = -s * z[0]; // Detect duplicate elements in d to avoid division by zero t = 8.0*DBL_EPSILON*(fabs(d[0]) + fabs(d[1]) + fabs(d[2])); if (fabs(d[1] - d[0]) <= t) { for (int j=0; j < n; j++) { if (P[0][j] * P[1][j] <= 0.0) { P[0][j] = z[1]; P[1][j] = -z[0]; P[2][j] = 0.0; } else for (int i=0; i < n; i++) P[i][j] = z[i]/P[i][j]; } } else if (fabs(d[2] - d[0]) <= t) { for (int j=0; j < n; j++) { if (P[0][j] * P[2][j] <= 0.0) { P[0][j] = z[2]; P[1][j] = 0.0; P[2][j] = -z[0]; } else for (int i=0; i < n; i++) P[i][j] = z[i]/P[i][j]; } } else { for (int j=0; j < n; j++) for (int i=0; i < n; i++) { if (P[i][j] == 0.0) { P[i][j] = 1.0; P[(i+1)%n][j] = 0.0; P[(i+2)%n][j] = 0.0; break; } else P[i][j] = z[i]/P[i][j]; } } // Normalize eigenvectors of D + beta * z * z^t for (int j=0; j < n; j++) { t = SQR(P[0][j]) + SQR(P[1][j]) + SQR(P[2][j]); t = 1.0 / sqrt(t); for (int i=0; i < n; i++) P[i][j] *= t; } // Undo diagonalization of 2x2 block for (int j=0; j < n; j++) { t = P[1][j]; P[1][j] = c*t - s*P[2][j]; P[2][j] = s*t + c*P[2][j]; } // Undo Householder transformation for (int j=0; j < n; j++) for (int k=0; k < n; k++) { t = P[k][j]; for (int i=0; i < n; i++) Q[i][j] += t * R[i][k]; } #endif return 0; } relion-3.1.3/src/jaz/d3x3/dsyevd3.h000066400000000000000000000022171411340063500166730ustar00rootroot00000000000000// ---------------------------------------------------------------------------- // Numerical diagonalization of 3x3 matrcies // Copyright (C) 2006 Joachim Kopp // ---------------------------------------------------------------------------- // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA // ---------------------------------------------------------------------------- #ifndef __DSYEVD3_H #define __DSYEVD3_H int dsyevd3(double A[3][3], double Q[3][3], double w[3]); #endif relion-3.1.3/src/jaz/d3x3/dsyevh3.c000066400000000000000000000117731411340063500167010ustar00rootroot00000000000000// ---------------------------------------------------------------------------- // Numerical diagonalization of 3x3 matrcies // Copyright (C) 2006 Joachim Kopp // ---------------------------------------------------------------------------- // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA // ---------------------------------------------------------------------------- #include #include #include #include #include "dsyevc3.h" #include "dsyevq3.h" #include "dsyevh3.h" // Macros #define SQR(x) ((x)*(x)) // x^2 // ---------------------------------------------------------------------------- int dsyevh3(double A[3][3], double Q[3][3], double w[3]) // ---------------------------------------------------------------------------- // Calculates the eigenvalues and normalized eigenvectors of a symmetric 3x3 // matrix A using Cardano's method for the eigenvalues and an analytical // method based on vector cross products for the eigenvectors. However, // if conditions are such that a large error in the results is to be // expected, the routine falls back to using the slower, but more // accurate QL algorithm. Only the diagonal and upper triangular parts of A need // to contain meaningful values. Access to A is read-only. // ---------------------------------------------------------------------------- // Parameters: // A: The symmetric input matrix // Q: Storage buffer for eigenvectors // w: Storage buffer for eigenvalues // ---------------------------------------------------------------------------- // Return value: // 0: Success // -1: Error // ---------------------------------------------------------------------------- // Dependencies: // dsyevc3(), dsytrd3(), dsyevq3() // ---------------------------------------------------------------------------- // Version history: // v1.1: Simplified fallback condition --> speed-up // v1.0: First released version // ---------------------------------------------------------------------------- { #ifndef EVALS_ONLY double norm; // Squared norm or inverse norm of current eigenvector // double n0, n1; // Norm of first and second columns of A double error; // Estimated maximum roundoff error double t, u; // Intermediate storage int j; // Loop counter #endif // Calculate eigenvalues dsyevc3(A, w); #ifndef EVALS_ONLY // n0 = SQR(A[0][0]) + SQR(A[0][1]) + SQR(A[0][2]); // n1 = SQR(A[0][1]) + SQR(A[1][1]) + SQR(A[1][2]); t = fabs(w[0]); if ((u=fabs(w[1])) > t) t = u; if ((u=fabs(w[2])) > t) t = u; if (t < 1.0) u = t; else u = SQR(t); error = 256.0 * DBL_EPSILON * SQR(u); // error = 256.0 * DBL_EPSILON * (n0 + u) * (n1 + u); Q[0][1] = A[0][1]*A[1][2] - A[0][2]*A[1][1]; Q[1][1] = A[0][2]*A[0][1] - A[1][2]*A[0][0]; Q[2][1] = SQR(A[0][1]); // Calculate first eigenvector by the formula // v[0] = (A - w[0]).e1 x (A - w[0]).e2 Q[0][0] = Q[0][1] + A[0][2]*w[0]; Q[1][0] = Q[1][1] + A[1][2]*w[0]; Q[2][0] = (A[0][0] - w[0]) * (A[1][1] - w[0]) - Q[2][1]; norm = SQR(Q[0][0]) + SQR(Q[1][0]) + SQR(Q[2][0]); // If vectors are nearly linearly dependent, or if there might have // been large cancellations in the calculation of A[i][i] - w[0], fall // back to QL algorithm // Note that this simultaneously ensures that multiple eigenvalues do // not cause problems: If w[0] = w[1], then A - w[0] * I has rank 1, // i.e. all columns of A - w[0] * I are linearly dependent. if (norm <= error) return dsyevq3(A, Q, w); else // This is the standard branch { norm = sqrt(1.0 / norm); for (j=0; j < 3; j++) Q[j][0] = Q[j][0] * norm; } // Calculate second eigenvector by the formula // v[1] = (A - w[1]).e1 x (A - w[1]).e2 Q[0][1] = Q[0][1] + A[0][2]*w[1]; Q[1][1] = Q[1][1] + A[1][2]*w[1]; Q[2][1] = (A[0][0] - w[1]) * (A[1][1] - w[1]) - Q[2][1]; norm = SQR(Q[0][1]) + SQR(Q[1][1]) + SQR(Q[2][1]); if (norm <= error) return dsyevq3(A, Q, w); else { norm = sqrt(1.0 / norm); for (j=0; j < 3; j++) Q[j][1] = Q[j][1] * norm; } // Calculate third eigenvector according to // v[2] = v[0] x v[1] Q[0][2] = Q[1][0]*Q[2][1] - Q[2][0]*Q[1][1]; Q[1][2] = Q[2][0]*Q[0][1] - Q[0][0]*Q[2][1]; Q[2][2] = Q[0][0]*Q[1][1] - Q[1][0]*Q[0][1]; #endif return 0; } relion-3.1.3/src/jaz/d3x3/dsyevh3.h000066400000000000000000000022171411340063500166770ustar00rootroot00000000000000// ---------------------------------------------------------------------------- // Numerical diagonalization of 3x3 matrcies // Copyright (C) 2006 Joachim Kopp // ---------------------------------------------------------------------------- // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA // ---------------------------------------------------------------------------- #ifndef __DSYEVH3_H #define __DSYEVH3_H int dsyevh3(double A[3][3], double Q[3][3], double w[3]); #endif relion-3.1.3/src/jaz/d3x3/dsyevj3.c000066400000000000000000000113131411340063500166710ustar00rootroot00000000000000// ---------------------------------------------------------------------------- // Numerical diagonalization of 3x3 matrcies // Copyright (C) 2006 Joachim Kopp // ---------------------------------------------------------------------------- // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA // ---------------------------------------------------------------------------- #include #include #include "dsyevj3.h" // Macros #define SQR(x) ((x)*(x)) // x^2 // ---------------------------------------------------------------------------- int dsyevj3(double A[3][3], double Q[3][3], double w[3]) // ---------------------------------------------------------------------------- // Calculates the eigenvalues and normalized eigenvectors of a symmetric 3x3 // matrix A using the Jacobi algorithm. // The upper triangular part of A is destroyed during the calculation, // the diagonal elements are read but not destroyed, and the lower // triangular elements are not referenced at all. // ---------------------------------------------------------------------------- // Parameters: // A: The symmetric input matrix // Q: Storage buffer for eigenvectors // w: Storage buffer for eigenvalues // ---------------------------------------------------------------------------- // Return value: // 0: Success // -1: Error (no convergence) // ---------------------------------------------------------------------------- { const int n = 3; double sd, so; // Sums of diagonal resp. off-diagonal elements double s, c, t; // sin(phi), cos(phi), tan(phi) and temporary storage double g, h, z, theta; // More temporary storage double thresh; // Initialize Q to the identitity matrix #ifndef EVALS_ONLY for (int i=0; i < n; i++) { Q[i][i] = 1.0; for (int j=0; j < i; j++) Q[i][j] = Q[j][i] = 0.0; } #endif // Initialize w to diag(A) for (int i=0; i < n; i++) w[i] = A[i][i]; // Calculate SQR(tr(A)) sd = 0.0; for (int i=0; i < n; i++) sd += fabs(w[i]); sd = SQR(sd); // Main iteration loop for (int nIter=0; nIter < 50; nIter++) { // Test for convergence so = 0.0; for (int p=0; p < n; p++) for (int q=p+1; q < n; q++) so += fabs(A[p][q]); if (so == 0.0) return 0; if (nIter < 4) thresh = 0.2 * so / SQR(n); else thresh = 0.0; // Do sweep for (int p=0; p < n; p++) for (int q=p+1; q < n; q++) { g = 100.0 * fabs(A[p][q]); if (nIter > 4 && fabs(w[p]) + g == fabs(w[p]) && fabs(w[q]) + g == fabs(w[q])) { A[p][q] = 0.0; } else if (fabs(A[p][q]) > thresh) { // Calculate Jacobi transformation h = w[q] - w[p]; if (fabs(h) + g == fabs(h)) { t = A[p][q] / h; } else { theta = 0.5 * h / A[p][q]; if (theta < 0.0) t = -1.0 / (sqrt(1.0 + SQR(theta)) - theta); else t = 1.0 / (sqrt(1.0 + SQR(theta)) + theta); } c = 1.0/sqrt(1.0 + SQR(t)); s = t * c; z = t * A[p][q]; // Apply Jacobi transformation A[p][q] = 0.0; w[p] -= z; w[q] += z; for (int r=0; r < p; r++) { t = A[r][p]; A[r][p] = c*t - s*A[r][q]; A[r][q] = s*t + c*A[r][q]; } for (int r=p+1; r < q; r++) { t = A[p][r]; A[p][r] = c*t - s*A[r][q]; A[r][q] = s*t + c*A[r][q]; } for (int r=q+1; r < n; r++) { t = A[p][r]; A[p][r] = c*t - s*A[q][r]; A[q][r] = s*t + c*A[q][r]; } // Update eigenvectors #ifndef EVALS_ONLY for (int r=0; r < n; r++) { t = Q[r][p]; Q[r][p] = c*t - s*Q[r][q]; Q[r][q] = s*t + c*Q[r][q]; } #endif } } } return -1; } relion-3.1.3/src/jaz/d3x3/dsyevj3.h000066400000000000000000000022171411340063500167010ustar00rootroot00000000000000// ---------------------------------------------------------------------------- // Numerical diagonalization of 3x3 matrcies // Copyright (C) 2006 Joachim Kopp // ---------------------------------------------------------------------------- // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA // ---------------------------------------------------------------------------- #ifndef __DSYEVJ3_H #define __DSYEVJ3_H int dsyevj3(double A[3][3], double Q[3][3], double w[3]); #endif relion-3.1.3/src/jaz/d3x3/dsyevq3.c000066400000000000000000000101411411340063500166760ustar00rootroot00000000000000// ---------------------------------------------------------------------------- // Numerical diagonalization of 3x3 matrcies // Copyright (C) 2006 Joachim Kopp // ---------------------------------------------------------------------------- // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA // ---------------------------------------------------------------------------- #include #include #include "dsytrd3.h" #include "dsyevq3.h" // Macros #define SQR(x) ((x)*(x)) // x^2 // ---------------------------------------------------------------------------- int dsyevq3(double A[3][3], double Q[3][3], double w[3]) // ---------------------------------------------------------------------------- // Calculates the eigenvalues and normalized eigenvectors of a symmetric 3x3 // matrix A using the QL algorithm with implicit shifts, preceded by a // Householder reduction to tridiagonal form. // The function accesses only the diagonal and upper triangular parts of A. // The access is read-only. // ---------------------------------------------------------------------------- // Parameters: // A: The symmetric input matrix // Q: Storage buffer for eigenvectors // w: Storage buffer for eigenvalues // ---------------------------------------------------------------------------- // Return value: // 0: Success // -1: Error (no convergence) // ---------------------------------------------------------------------------- // Dependencies: // dsytrd3() // ---------------------------------------------------------------------------- { const int n = 3; double e[3]; // The third element is used only as temporary workspace double g, r, p, f, b, s, c, t; // Intermediate storage int nIter; int m; // Transform A to real tridiagonal form by the Householder method dsytrd3(A, Q, w, e); // Calculate eigensystem of the remaining real symmetric tridiagonal matrix // with the QL method // // Loop over all off-diagonal elements for (int l=0; l < n-1; l++) { nIter = 0; while (1) { // Check for convergence and exit iteration loop if off-diagonal // element e(l) is zero for (m=l; m <= n-2; m++) { g = fabs(w[m])+fabs(w[m+1]); if (fabs(e[m]) + g == g) break; } if (m == l) break; if (nIter++ >= 30) return -1; // Calculate g = d_m - k g = (w[l+1] - w[l]) / (e[l] + e[l]); r = sqrt(SQR(g) + 1.0); if (g > 0) g = w[m] - w[l] + e[l]/(g + r); else g = w[m] - w[l] + e[l]/(g - r); s = c = 1.0; p = 0.0; for (int i=m-1; i >= l; i--) { f = s * e[i]; b = c * e[i]; if (fabs(f) > fabs(g)) { c = g / f; r = sqrt(SQR(c) + 1.0); e[i+1] = f * r; c *= (s = 1.0/r); } else { s = f / g; r = sqrt(SQR(s) + 1.0); e[i+1] = g * r; s *= (c = 1.0/r); } g = w[i+1] - p; r = (w[i] - g)*s + 2.0*c*b; p = s * r; w[i+1] = g + p; g = c*r - b; // Form eigenvectors #ifndef EVALS_ONLY for (int k=0; k < n; k++) { t = Q[k][i+1]; Q[k][i+1] = s*Q[k][i] + c*t; Q[k][i] = c*Q[k][i] - s*t; } #endif } w[l] -= p; e[l] = g; e[m] = 0.0; } } return 0; } relion-3.1.3/src/jaz/d3x3/dsyevq3.h000066400000000000000000000022171411340063500167100ustar00rootroot00000000000000// ---------------------------------------------------------------------------- // Numerical diagonalization of 3x3 matrcies // Copyright (C) 2006 Joachim Kopp // ---------------------------------------------------------------------------- // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA // ---------------------------------------------------------------------------- #ifndef __DSYEVQ3_H #define __DSYEVQ3_H int dsyevq3(double A[3][3], double Q[3][3], double w[3]); #endif relion-3.1.3/src/jaz/d3x3/dsyevv3.c000066400000000000000000000203711411340063500167110ustar00rootroot00000000000000// ---------------------------------------------------------------------------- // Numerical diagonalization of 3x3 matrcies // Copyright (C) 2006 Joachim Kopp // ---------------------------------------------------------------------------- // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA // ---------------------------------------------------------------------------- #include #include #include #include "dsyevc3.h" #include "dsyevv3.h" // Macros #define SQR(x) ((x)*(x)) // x^2 // ---------------------------------------------------------------------------- int dsyevv3(double A[3][3], double Q[3][3], double w[3]) // ---------------------------------------------------------------------------- // Calculates the eigenvalues and normalized eigenvectors of a symmetric 3x3 // matrix A using Cardano's method for the eigenvalues and an analytical // method based on vector cross products for the eigenvectors. // Only the diagonal and upper triangular parts of A need to contain meaningful // values. However, all of A may be used as temporary storage and may hence be // destroyed. // ---------------------------------------------------------------------------- // Parameters: // A: The symmetric input matrix // Q: Storage buffer for eigenvectors // w: Storage buffer for eigenvalues // ---------------------------------------------------------------------------- // Return value: // 0: Success // -1: Error // ---------------------------------------------------------------------------- // Dependencies: // dsyevc3() // ---------------------------------------------------------------------------- // Version history: // v1.1 (12 Mar 2012): Removed access to lower triangualr part of A // (according to the documentation, only the upper triangular part needs // to be filled) // v1.0: First released version // ---------------------------------------------------------------------------- { #ifndef EVALS_ONLY double norm; // Squared norm or inverse norm of current eigenvector double n0, n1; // Norm of first and second columns of A double n0tmp, n1tmp; // "Templates" for the calculation of n0/n1 - saves a few FLOPS double thresh; // Small number used as threshold for floating point comparisons double error; // Estimated maximum roundoff error in some steps double wmax; // The eigenvalue of maximum modulus double f, t; // Intermediate storage int i, j; // Loop counters #endif // Calculate eigenvalues dsyevc3(A, w); #ifndef EVALS_ONLY wmax = fabs(w[0]); if ((t=fabs(w[1])) > wmax) wmax = t; if ((t=fabs(w[2])) > wmax) wmax = t; thresh = SQR(8.0 * DBL_EPSILON * wmax); // Prepare calculation of eigenvectors n0tmp = SQR(A[0][1]) + SQR(A[0][2]); n1tmp = SQR(A[0][1]) + SQR(A[1][2]); Q[0][1] = A[0][1]*A[1][2] - A[0][2]*A[1][1]; Q[1][1] = A[0][2]*A[0][1] - A[1][2]*A[0][0]; Q[2][1] = SQR(A[0][1]); // Calculate first eigenvector by the formula // v[0] = (A - w[0]).e1 x (A - w[0]).e2 A[0][0] -= w[0]; A[1][1] -= w[0]; Q[0][0] = Q[0][1] + A[0][2]*w[0]; Q[1][0] = Q[1][1] + A[1][2]*w[0]; Q[2][0] = A[0][0]*A[1][1] - Q[2][1]; norm = SQR(Q[0][0]) + SQR(Q[1][0]) + SQR(Q[2][0]); n0 = n0tmp + SQR(A[0][0]); n1 = n1tmp + SQR(A[1][1]); error = n0 * n1; if (n0 <= thresh) // If the first column is zero, then (1,0,0) is an eigenvector { Q[0][0] = 1.0; Q[1][0] = 0.0; Q[2][0] = 0.0; } else if (n1 <= thresh) // If the second column is zero, then (0,1,0) is an eigenvector { Q[0][0] = 0.0; Q[1][0] = 1.0; Q[2][0] = 0.0; } else if (norm < SQR(64.0 * DBL_EPSILON) * error) { // If angle between A[0] and A[1] is too small, don't use t = SQR(A[0][1]); // cross product, but calculate v ~ (1, -A0/A1, 0) f = -A[0][0] / A[0][1]; if (SQR(A[1][1]) > t) { t = SQR(A[1][1]); f = -A[0][1] / A[1][1]; } if (SQR(A[1][2]) > t) f = -A[0][2] / A[1][2]; norm = 1.0/sqrt(1 + SQR(f)); Q[0][0] = norm; Q[1][0] = f * norm; Q[2][0] = 0.0; } else // This is the standard branch { norm = sqrt(1.0 / norm); for (j=0; j < 3; j++) Q[j][0] = Q[j][0] * norm; } // Prepare calculation of second eigenvector t = w[0] - w[1]; if (fabs(t) > 8.0 * DBL_EPSILON * wmax) { // For non-degenerate eigenvalue, calculate second eigenvector by the formula // v[1] = (A - w[1]).e1 x (A - w[1]).e2 A[0][0] += t; A[1][1] += t; Q[0][1] = Q[0][1] + A[0][2]*w[1]; Q[1][1] = Q[1][1] + A[1][2]*w[1]; Q[2][1] = A[0][0]*A[1][1] - Q[2][1]; norm = SQR(Q[0][1]) + SQR(Q[1][1]) + SQR(Q[2][1]); n0 = n0tmp + SQR(A[0][0]); n1 = n1tmp + SQR(A[1][1]); error = n0 * n1; if (n0 <= thresh) // If the first column is zero, then (1,0,0) is an eigenvector { Q[0][1] = 1.0; Q[1][1] = 0.0; Q[2][1] = 0.0; } else if (n1 <= thresh) // If the second column is zero, then (0,1,0) is an eigenvector { Q[0][1] = 0.0; Q[1][1] = 1.0; Q[2][1] = 0.0; } else if (norm < SQR(64.0 * DBL_EPSILON) * error) { // If angle between A[0] and A[1] is too small, don't use t = SQR(A[0][1]); // cross product, but calculate v ~ (1, -A0/A1, 0) f = -A[0][0] / A[0][1]; if (SQR(A[1][1]) > t) { t = SQR(A[1][1]); f = -A[0][1] / A[1][1]; } if (SQR(A[1][2]) > t) f = -A[0][2] / A[1][2]; norm = 1.0/sqrt(1 + SQR(f)); Q[0][1] = norm; Q[1][1] = f * norm; Q[2][1] = 0.0; } else { norm = sqrt(1.0 / norm); for (j=0; j < 3; j++) Q[j][1] = Q[j][1] * norm; } } else { // For degenerate eigenvalue, calculate second eigenvector according to // v[1] = v[0] x (A - w[1]).e[i] // // This would really get to complicated if we could not assume all of A to // contain meaningful values. A[1][0] = A[0][1]; A[2][0] = A[0][2]; A[2][1] = A[1][2]; A[0][0] += w[0]; A[1][1] += w[0]; for (i=0; i < 3; i++) { A[i][i] -= w[1]; n0 = SQR(A[0][i]) + SQR(A[1][i]) + SQR(A[2][i]); if (n0 > thresh) { Q[0][1] = Q[1][0]*A[2][i] - Q[2][0]*A[1][i]; Q[1][1] = Q[2][0]*A[0][i] - Q[0][0]*A[2][i]; Q[2][1] = Q[0][0]*A[1][i] - Q[1][0]*A[0][i]; norm = SQR(Q[0][1]) + SQR(Q[1][1]) + SQR(Q[2][1]); if (norm > SQR(256.0 * DBL_EPSILON) * n0) // Accept cross product only if the angle between { // the two vectors was not too small norm = sqrt(1.0 / norm); for (j=0; j < 3; j++) Q[j][1] = Q[j][1] * norm; break; } } } if (i == 3) // This means that any vector orthogonal to v[0] is an EV. { for (j=0; j < 3; j++) if (Q[j][0] != 0.0) // Find nonzero element of v[0] ... { // ... and swap it with the next one norm = 1.0 / sqrt(SQR(Q[j][0]) + SQR(Q[(j+1)%3][0])); Q[j][1] = Q[(j+1)%3][0] * norm; Q[(j+1)%3][1] = -Q[j][0] * norm; Q[(j+2)%3][1] = 0.0; break; } } } // Calculate third eigenvector according to // v[2] = v[0] x v[1] Q[0][2] = Q[1][0]*Q[2][1] - Q[2][0]*Q[1][1]; Q[1][2] = Q[2][0]*Q[0][1] - Q[0][0]*Q[2][1]; Q[2][2] = Q[0][0]*Q[1][1] - Q[1][0]*Q[0][1]; #endif return 0; } relion-3.1.3/src/jaz/d3x3/dsyevv3.h000066400000000000000000000022171411340063500167150ustar00rootroot00000000000000// ---------------------------------------------------------------------------- // Numerical diagonalization of 3x3 matrcies // Copyright (C) 2006 Joachim Kopp // ---------------------------------------------------------------------------- // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA // ---------------------------------------------------------------------------- #ifndef __DSYEVV3_H #define __DSYEVV3_H int dsyevv3(double A[3][3], double Q[3][3], double w[3]); #endif relion-3.1.3/src/jaz/d3x3/dsytrd3.c000066400000000000000000000061651411340063500167070ustar00rootroot00000000000000// ---------------------------------------------------------------------------- // Numerical diagonalization of 3x3 matrcies // Copyright (C) 2006 Joachim Kopp // ---------------------------------------------------------------------------- // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA // ---------------------------------------------------------------------------- #include #include #include "dsytrd3.h" // Macros #define SQR(x) ((x)*(x)) // x^2 // ---------------------------------------------------------------------------- inline void dsytrd3(double A[3][3], double Q[3][3], double d[3], double e[2]) // ---------------------------------------------------------------------------- // Reduces a symmetric 3x3 matrix to tridiagonal form by applying // (unitary) Householder transformations: // [ d[0] e[0] ] // A = Q . [ e[0] d[1] e[1] ] . Q^T // [ e[1] d[2] ] // The function accesses only the diagonal and upper triangular parts of // A. The access is read-only. // --------------------------------------------------------------------------- { const int n = 3; double u[n], q[n]; double omega, f; double K, h, g; // Initialize Q to the identitity matrix #ifndef EVALS_ONLY for (int i=0; i < n; i++) { Q[i][i] = 1.0; for (int j=0; j < i; j++) Q[i][j] = Q[j][i] = 0.0; } #endif // Bring first row and column to the desired form h = SQR(A[0][1]) + SQR(A[0][2]); if (A[0][1] > 0) g = -sqrt(h); else g = sqrt(h); e[0] = g; f = g * A[0][1]; u[1] = A[0][1] - g; u[2] = A[0][2]; omega = h - f; if (omega > 0.0) { omega = 1.0 / omega; K = 0.0; for (int i=1; i < n; i++) { f = A[1][i] * u[1] + A[i][2] * u[2]; q[i] = omega * f; // p K += u[i] * f; // u* A u } K *= 0.5 * SQR(omega); for (int i=1; i < n; i++) q[i] = q[i] - K * u[i]; d[0] = A[0][0]; d[1] = A[1][1] - 2.0*q[1]*u[1]; d[2] = A[2][2] - 2.0*q[2]*u[2]; // Store inverse Householder transformation in Q #ifndef EVALS_ONLY for (int j=1; j < n; j++) { f = omega * u[j]; for (int i=1; i < n; i++) Q[i][j] = Q[i][j] - f*u[i]; } #endif // Calculate updated A[1][2] and store it in e[1] e[1] = A[1][2] - q[1]*u[2] - u[1]*q[2]; } else { for (int i=0; i < n; i++) d[i] = A[i][i]; e[1] = A[1][2]; } } relion-3.1.3/src/jaz/d3x3/dsytrd3.h000066400000000000000000000022351411340063500167060ustar00rootroot00000000000000// ---------------------------------------------------------------------------- // Numerical diagonalization of 3x3 matrcies // Copyright (C) 2006 Joachim Kopp // ---------------------------------------------------------------------------- // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA // ---------------------------------------------------------------------------- #ifndef __DSYTRD3_H #define __DSYTRD3_H void dsytrd3(double A[3][3], double Q[3][3], double d[3], double e[2]); #endif relion-3.1.3/src/jaz/d3x3/slvsec3.c000066400000000000000000000151611411340063500166710ustar00rootroot00000000000000// ---------------------------------------------------------------------------- // Numerical diagonalization of 3x3 matrcies // Copyright (C) 2006 Joachim Kopp // ---------------------------------------------------------------------------- // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA // ---------------------------------------------------------------------------- #include #include #include #include "slvsec3.h" // Constants #define M_SQRT3 1.73205080756887729352744634151 // sqrt(3) // Macros #define SQR(x) ((x)*(x)) // x^2 // ---------------------------------------------------------------------------- void slvsec3(double d[3], double z[3], double w[3], double R[3][3], int i0, int i1, int i2) // ---------------------------------------------------------------------------- // Finds the three roots w_j of the secular equation // f(w_j) = 1 + Sum[ z_i / (d_i - w_j) ] == 0. // It is assumed that d_0 <= d_1 <= d_2, and that all z_i have the same sign. // The arrays P_i will contain the information required for the calculation // of the eigenvectors: // P_ij = d_i - w_j. // These differences can be obtained with better accuracy from intermediate // results. // ---------------------------------------------------------------------------- { double a[4]; // Bounds of the intervals bracketing the roots double delta; // Shift of the d_i which ensures better accuracy double dd[3]; // Shifted coefficients dd_i = d_i - delta double xl, xh; // Interval which straddles the current root. f(xl) < 0, f(xh) > 0 double x; // Current estimates for the root double x0[3]; // Analytically calculated roots, used as starting values double F, dF; // Function value f(x) and derivative f'(x) double dx, dxold; // Current and last stepsizes double error; // Numerical error estimate, used for termination condition double t[3]; // Temporary storage used for evaluating f double alpha, beta, gamma; // Coefficients of polynomial f(x) * Product [ d_i - x ] double p, sqrt_p, q, c, s, phi; // Intermediate results of analytical calculation // Determine intervals which must contain the roots if (z[0] > 0) { a[0] = d[i0]; a[1] = d[i1]; a[2] = d[i2]; a[3] = fabs(d[0] + 3.0*z[0]) + fabs(d[1] + 3.0*z[1]) + fabs(d[2] + 3.0*z[2]); } else { a[0] = -fabs(d[0] + 3.0*z[0]) - fabs(d[1] + 3.0*z[1]) - fabs(d[2] + 3.0*z[2]); a[1] = d[i0]; a[2] = d[i1]; a[3] = d[i2]; } // Calculate roots of f(x) = 0 analytically (analogous to ZHEEVC3) t[0] = d[1]*d[2]; t[1] = d[0]*d[2]; t[2] = d[0]*d[1]; gamma = t[0]*d[0] + (z[0]*t[0] + z[1]*t[1] + z[2]*t[2]); // Coefficients beta = (z[0]*(d[1]+d[2]) + z[1]*(d[0]+d[2]) + z[2]*(d[0]+d[1])) + (t[0] + t[1] + t[2]); alpha = (z[0] + z[1] + z[2]) + (d[0] + d[1] + d[2]); p = SQR(alpha) - 3.0*beta; // Transformation that removes the x^2 term q = alpha*(p - (3.0/2.0)*beta) + (27.0/2.0)*gamma; sqrt_p = sqrt(fabs(p)); phi = 27.0 * ( 0.25*SQR(beta)*(p - beta) - gamma*(q - 27.0/4.0*gamma)); phi = (1.0/3.0) * atan2(sqrt(fabs(phi)), q); c = sqrt_p*cos(phi); s = (1.0/M_SQRT3)*sqrt_p*fabs(sin(phi)); x0[0] = x0[1] = x0[2] = (1.0/3.0)*(alpha - c); if (c > s) // Make sure the roots are in ascending order { x0[0] -= s; x0[1] += s; x0[2] += c; } else if (c < -s) { x0[0] += c; x0[1] -= s; x0[2] += s; } else { x0[0] -= s; x0[1] += c; x0[2] += s; } // Refine roots with a combined Bisection/Newton-Raphson method for (int i=0; i < 3; i++) { xl = a[i]; // Lower bound of bracketing interval xh = a[i+1]; // Upper bound of bracketing interval dx = dxold = 0.5 * (xh - xl); // Make sure that xl != xh if (dx == 0.0) { w[i] = xl; for (int j=0; j < 3; j++) R[j][i] = d[j] - xl; continue; } // Shift the root close to zero to achieve better accuracy if (x0[i] >= xh) { delta = xh; x = -dx; for (int j=0; j < 3; j++) { dd[j] = d[j] - delta; R[j][i] = dd[j] - x; } } else if (x0[i] <= xl) { delta = xl; x = dx; for (int j=0; j < 3; j++) { dd[j] = d[j] - delta; R[j][i] = dd[j] - x; } } else { delta = x0[i]; x = 0.0; for (int j=0; j < 3; j++) R[j][i] = dd[j] = d[j] - delta; } xl -= delta; xh -= delta; // Make sure that f(xl) < 0 and f(xh) > 0 if (z[0] < 0.0) { double t = xh; xh = xl; xl = t; } // Main iteration loop for (int nIter=0; nIter < 500; nIter++) { // Evaluate f and f', and calculate an error estimate F = 1.0; dF = 0.0; error = 1.0; for (int j=0; j < 3; j++) { t[0] = 1.0 / R[j][i]; t[1] = z[j] * t[0]; t[2] = t[1] * t[0]; F += t[1]; error += fabs(t[1]); dF += t[2]; } // Check for convergence if (fabs(F) <= DBL_EPSILON * (8.0 * error + fabs(x*dF))) break; // Adjust interval boundaries if (F < 0.0) xl = x; else xh = x; // Check, whether Newton-Raphson would converge fast enough. If so, // give it a try. If not, or if it would run out of bounds, use bisection if (fabs(2.0 * F) < fabs(dxold * dF)) { dxold = dx; dx = F / dF; x = x - dx; if ((x - xh) * (x - xl) >= 0.0) { dx = 0.5 * (xh - xl); x = xl + dx; } } else { dx = 0.5 * (xh - xl); x = xl + dx; } // Prepare next iteration for (int j=0; j < 3; j++) R[j][i] = dd[j] - x; } // Un-shift result w[i] = x + delta; } } relion-3.1.3/src/jaz/d3x3/slvsec3.h000066400000000000000000000023061411340063500166730ustar00rootroot00000000000000// ---------------------------------------------------------------------------- // Numerical diagonalization of 3x3 matrcies // Copyright (C) 2006 Joachim Kopp // ---------------------------------------------------------------------------- // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA // ---------------------------------------------------------------------------- #ifndef __SLVSEC3_H #define __SLVSEC3_H void slvsec3(double d[3], double z[3], double w[3], double R[3][3], int i0, int i1, int i2); #endif relion-3.1.3/src/jaz/damage_helper.cpp000066400000000000000000000544441411340063500176520ustar00rootroot00000000000000/*************************************************************************** * * Author: "Jasenko Zivanov" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #include #include #include #include #include using namespace gravis; void DamageHelper::fitDamage(const Image &frcData, const Image &frcWeight, std::vector &, std::vector &dec, int t0, double dosePerFrame, bool root) { const int kc = frcData.data.xdim; const int tc = frcData.data.ydim; amp.resize(kc); dec.resize(kc); amp[0] = 0; dec[0] = 1; std::vector snrData(tc), snrWeight(tc); const double eps = 1e-3; for (int k = 1; k < kc; k++) { double wSum = 0.0; double fSum = 0.0; for (int t = 0; t < tc; t++) { double frc = DIRECT_A2D_ELEM(frcData.data, t, k); if (frc > 1.0 - eps) frc = 1.0 - eps; if (frc < 0.0) frc = 0.0; if (root) { snrData[t] = sqrt(frc/(1.0 - frc)); } else { snrData[t] = 2.0 * frc/(1.0 - frc); } snrWeight[t] = DIRECT_A2D_ELEM(frcWeight.data, t, k); wSum += snrWeight[t]; fSum += snrData[t]; } if (wSum == 0.0) { amp[k] = 0.0; dec[k] = 0.0; } else { DamageFit df(snrData, snrWeight, k, t0, 1.0, 1.0); std::vector initial(2); initial[0] = 1.0; initial[1] = 1.0; std::vector opt = NelderMead::optimize(initial, df, 0.01, 0.000001, 100000); std::cout << k << ": " << opt[0] << ", " << dosePerFrame * opt[1] << "\n"; amp[k] = opt[0]; dec[k] = dosePerFrame * opt[1]; } } } Image DamageHelper::plotDamage(const std::vector &, const std::vector &dec, int frames) { Image out(amp.size(), frames); const int kc = amp.size(); const int tc = frames; for (int t = 0; t < tc; t++) for (int k = 0; k < kc; k++) { const double pred = amp[k] * exp(-t/dec[k]); DIRECT_A2D_ELEM(out.data, t, k) = pred; } return out; } Image DamageHelper::plotDamage(const std::vector &dec, int frames) { std::vector amp(dec.size(), 1.0); return plotDamage(amp, dec, frames); } Image DamageHelper::plotDamageFrc(const std::vector &, const std::vector &dec, int frames) { Image out(amp.size(), frames); const int kc = amp.size(); const int tc = frames; for (int t = 0; t < tc; t++) for (int k = 0; k < kc; k++) { const double pred = amp[k] * exp(-t/dec[k]); DIRECT_A2D_ELEM(out.data, t, k) = pred / (pred + 2.0); } return out; } Image DamageHelper::plotDamageFrc(const std::vector &dec, int frames) { std::vector amp(dec.size(), 1.0); return plotDamageFrc(amp, dec, frames); } void DamageHelper::fitGlobalDamage(const Image &frcData, const Image &frcWeight, std::vector &, double *a, double *b, double *c, int k0, int k1, int t0, double angpix, double dosePerFrame, bool L1) { const int kc = frcData.data.xdim; const int tc = frcData.data.ydim; amp.resize(kc); amp[0] = 0.0; Image snrData(kc,tc); const double eps = 1e-3; for (int k = 1; k < kc; k++) for (int t = 0; t < tc; t++) { double frc = DIRECT_A2D_ELEM(frcData.data, t, k); if (frc > 1.0 - eps) frc = 1.0 - eps; if (frc < 0.0) frc = 0.0; DIRECT_A2D_ELEM(snrData.data, t, k) = 2.0 * frc/(1.0 - frc); //DIRECT_A2D_ELEM(snrData.data, t, k) = frc; } GlobalDamageFit gdf(snrData, frcWeight, k0, k1, t0, L1); std::vector initial(3); initial[0] = 100.0; initial[1] = -1.0; initial[2] = 0.0; std::vector opt = NelderMead::optimize(initial, gdf, 0.001, 0.000001, 1000000); const double rho = 1.0 / (2.0 * (kc-1) * angpix); *a = dosePerFrame * opt[0] / pow(rho, opt[1]); *b = opt[1]; *c = dosePerFrame * opt[2]; std::cout << (*a) << ", " << (*b) << ", " << (*c) << "\n"; const double epsT = 1e-20; for (int k = 1; k < kc; k++) { double tau = opt[0] * pow(k,opt[1]) + opt[2]; if (tau < epsT) tau = epsT; amp[k] = gdf.getScale(k, tau); } } Image DamageHelper::plotGlobalDamage(double a, double b, double c, const std::vector &, int freqs, int frames, double angpix, double dosePerFrame, bool frc) { Image out(freqs, frames); const int kc = freqs; const int tc = frames; const double eps = 1e-20; const double rho = 1.0 / (2.0 * (kc-1) * angpix); for (int t = 0; t < tc; t++) for (int k = 0; k < kc; k++) { double tau = a * pow(rho*k,b) + c; if (tau < eps) tau = eps; const double pred = amp[k] * exp(-t*dosePerFrame/tau); if (frc) { DIRECT_A2D_ELEM(out.data, t, k) = pred / (pred + 2.0); } else { DIRECT_A2D_ELEM(out.data, t, k) = pred; } } return out; } Image DamageHelper::plotGlobalDamage(double a, double b, double c, int freqs, int frames, double angpix, double dosePerFrame, bool frc) { std::vector amp(freqs, 1.0); return plotGlobalDamage(a, b, c, amp, freqs, frames, angpix, dosePerFrame, frc); } std::vector > DamageHelper::damageWeights( int s, RFLOAT angpix, int f0, int fc, RFLOAT dosePerFrame, RFLOAT a, RFLOAT b, RFLOAT c) { std::vector > out(fc); for (long f = 0; f < fc; f++) { out[f] = damageWeight(s, angpix, (f+f0)*dosePerFrame, a, b, c); } return out; } Image DamageHelper::damageWeight( int s, RFLOAT angpix, RFLOAT dose, RFLOAT a, RFLOAT b, RFLOAT c) { const int kc = s/2 + 1; const double rho = 1.0 / (2.0 * (kc-1) * angpix); Image out(kc, s); for (long y = 0; y < s; y++) for (long x = 0; x < kc; x++) { const double xf = x; const double yf = y < kc? y : (y - s); double k = sqrt(xf*xf + yf*yf); double tau = a * pow(rho*k, b) + c; DIRECT_A2D_ELEM(out.data, y, x) = exp(-dose/tau); } return out; } RFLOAT DamageHelper::damage(double k, int kc, RFLOAT angpix, RFLOAT dose, RFLOAT a, RFLOAT b, RFLOAT c) { const double rho = 1.0 / (2.0 * (kc-1) * angpix); const double tau = a * pow(rho*k, b) + c; return exp(-dose/tau); } std::vector DamageHelper::fitBFactors(const Image &fcc, int k0, int k1, int verb) { const int kc = fcc.data.xdim; const int fc = fcc.data.ydim; double maxSumf = 0.0; int bestF = 0; for (int f = 0; f < fc; f++) { double sumf = 0.0; for (int k = 0; k < kc; k++) { sumf += fcc(f,k); } if (sumf > maxSumf) { maxSumf = sumf; bestF = f; } } if (verb > 0) std::cout << "best f: " << bestF << "\n"; std::vector scale(kc, 0.0); for (int k = 0; k < kc; k++) { scale[k] = fcc(bestF,k); } std::vector sig(fc); for (int it = 0; it < 5; it++) { for (int f = 0; f < fc; f++) { sig[f] = findSigmaRec(fcc, f, scale, 1, 10*kc, 20, 4, 0.1); } for (int k = 0; k < kc; k++) { double num = 0.0, denom = 0.0; for (int f = 0; f < fc; f++) { double p = fcc(f,k); double q = exp(-0.5 * k * k / (sig[f] * sig[f])); num += q*p; denom += q*q; } const double eps = 1e-20; scale[k] = denom > eps? num / denom : num / eps; } } Image debug(kc,fc); for (int k = 0; k < kc; k++) for (int f = 0; f < fc; f++) { debug(f,k) = scale[k] * exp(-0.5 * k * k / (sig[f] * sig[f])); } ImageLog::write(debug, "bfacs/debug"); return sig; } std::pair,std::vector> DamageHelper::fitBkFactors( const Image &fcc, int k0, int k1, int verb) { const int kc = fcc.data.xdim; const int fc = fcc.data.ydim; double maxSumf = 0.0; int bestF = 0; for (int f = 0; f < fc; f++) { double sumf = 0.0; for (int k = 0; k < kc; k++) { sumf += fcc(f,k); } if (sumf > maxSumf) { maxSumf = sumf; bestF = f; } } if (verb > 0) std::cout << "best f: " << bestF << "\n"; std::vector scale(kc, 0.0), wgh(kc, 1.0); for (int k = 0; k < kc; k++) { scale[k] = XMIPP_MAX(0.0, fcc(bestF,k)); } std::vector sig(fc); for (int it = 0; it < 5; it++) { for (int f = 0; f < fc; f++) { sig[f] = findSigmaKRec(fcc, f, scale, wgh, k0, k1, 1, 10*kc, 20, 4, 0.1); } for (int k = 0; k < kc; k++) { double num = 0.0, denom = 0.0; for (int f = 0; f < fc; f++) { double p = fcc(f,k); double q = sig[f].y * exp(-0.5 * k * k / (sig[f].x * sig[f].x)); num += q*p; denom += q*q; } const double eps = 1e-20; scale[k] = denom > eps? num / denom : num / eps; scale[k] = XMIPP_MAX(0.0, scale[k]); } } return std::make_pair(sig,scale); } std::vector DamageHelper::fitBkFactors( const Image &fcc, const Image &env, const Image &wgh, int k0, int k1) { const int kc = fcc.data.xdim; const int fc = fcc.data.ydim; std::vector envelope(kc), weight(kc); std::vector sig(fc); for (int f = 0; f < fc; f++) { for (int k = 0; k < kc; k++) { envelope[k] = XMIPP_MAX(0.0, env(f,k)); weight[k] = XMIPP_MAX(0.0, wgh(f,k)); } sig[f] = findSigmaKRec(fcc, f, envelope, weight, k0, k1, 1, 10*kc, 20, 4, 0.1); } return sig; } Image DamageHelper::renderBkFit( const std::pair,std::vector>& sigScale, int kc, int fc, bool noScale) { Image out(kc,fc); for (int k = 0; k < kc; k++) for (int f = 0; f < fc; f++) { const double sigma = sigScale.first[f].x; const double a = sigScale.first[f].y; const double scale = noScale? 1.0 : sigScale.second[k]; out(f,k) = scale * a * exp(-0.5 * k * k / (sigma * sigma)); } return out; } Image DamageHelper::renderBkFit(std::vector sig, int kc, int fc) { Image out(kc,fc); for (int k = 0; k < kc; k++) for (int f = 0; f < fc; f++) { const double sigma = sig[f].x; const double a = sig[f].y; out(f,k) = a * exp(-0.5 * k * k / (sigma * sigma)); } return out; } double DamageHelper::findSigmaRec( const Image &fcc, int f, const std::vector &scale, double sig0, double sig1, int steps, int depth, double q) { const int kc = fcc.data.xdim; double minErr = std::numeric_limits::max(); double bestSig = sig0; const double eps = 1e-20; for (int s = 0; s < steps; s++) { const double sig = sig0 + s*(sig1 - sig0)/(steps-1); const double sig2 = sig*sig; if (sig2 < eps) continue; double sum = 0.0; for (int k = 0; k < kc; k++) { const double d = fcc(f,k) - scale[k] * exp(-0.5 * k * k / sig2); sum += d*d; } if (sum < minErr) { minErr = sum; bestSig = sig; } } if (depth > 0) { const double hrange = 0.5 * (sig1 - sig0); double snext0 = bestSig - q*hrange; double snext1 = bestSig + q*hrange; if (snext0 < eps) snext0 = eps; return findSigmaRec(fcc, f, scale, snext0, snext1, steps, depth - 1, q); } return bestSig; } d2Vector DamageHelper::findSigmaKRec( const Image &fcc, int f, const std::vector &envelope, const std::vector &weight, int k0, int k1, double sig0, double sig1, int steps, int depth, double q) { double minErr = std::numeric_limits::max(); double bestSig = sig0; double bestA = 1.0; const double eps = 1e-20; for (int s = 0; s < steps; s++) { const double sig = sig0 + s*(sig1 - sig0)/(steps-1); const double sig2 = sig*sig; if (sig2 < eps) continue; // find a double num = 0.0, denom = 0.0; for (int k = k0; k < k1; k++) { double p = fcc(f,k); double q = envelope[k] * exp(-0.5 * k * k / sig2); num += q*p; denom += q*q; } const double eps = 1e-20; double a = denom > eps? num / denom : num / eps; double sum = 0.0; for (int k = k0; k < k1; k++) { const double d = fcc(f,k) - envelope[k] * a * exp(-0.5 * k * k / sig2); sum += weight[k] * d*d; } if (sum < minErr) { minErr = sum; bestSig = sig; bestA = a; } } if (depth > 0) { const double hrange = 0.5 * (sig1 - sig0); double snext0 = bestSig - q*hrange; double snext1 = bestSig + q*hrange; if (snext0 < eps) snext0 = eps; return findSigmaKRec(fcc, f, envelope, weight, k0, k1, snext0, snext1, steps, depth - 1, q); } return d2Vector(bestSig, bestA); } std::vector> DamageHelper::computeWeights( const std::vector &bFacs, int kc, bool normalize) { const int fc = bFacs.size(); const int kc2 = 2 * (kc - 1); std::vector> out(fc); for (int f = 0; f < fc; f++) { out[f] = Image(kc,kc2); for (int y = 0; y < kc2; y++) for (int x = 0; x < kc; x++) { double yy = y < kc? y : y - kc2; double r2 = x*x + yy*yy; out[f](y,x) = exp(-0.5*r2/(bFacs[f]*bFacs[f])); } } if (normalize) { for (int y = 0; y < kc2; y++) for (int x = 0; x < kc; x++) { double sum = 0.0; for (int f = 0; f < fc; f++) { sum += out[f](y,x); } for (int f = 0; f < fc; f++) { out[f](y,x) /= sum; } } } return out; } std::vector> DamageHelper::computeWeights( const std::vector &bkFacs, int kc, bool normalize) { const int fc = bkFacs.size(); const int kc2 = 2 * (kc - 1); std::vector> out(fc); for (int f = 0; f < fc; f++) { out[f] = Image(kc,kc2); for (int y = 0; y < kc2; y++) for (int x = 0; x < kc; x++) { double yy = y < kc? y : y - kc2; double r2 = x*x + yy*yy; out[f](y,x) = bkFacs[f].y * exp(-0.5*r2/(bkFacs[f].x*bkFacs[f].x)); } } if (normalize) { for (int y = 0; y < kc2; y++) for (int x = 0; x < kc; x++) { double sum = 0.0; for (int f = 0; f < fc; f++) { sum += out[f](y,x); } for (int f = 0; f < fc; f++) { out[f](y,x) /= sum; } } } return out; } std::vector > DamageHelper::computeWeights( const std::vector &bkFacs, int kc, RFLOAT angpix, RFLOAT totalDose, RFLOAT dmga, RFLOAT dmgb, RFLOAT dmgc, bool normalize) { const int fc = bkFacs.size(); const int kc2 = 2 * (kc - 1); std::vector> out(fc); for (int f = 0; f < fc; f++) { out[f] = Image(kc,kc2); const double dose = totalDose*f/fc; for (int y = 0; y < kc2; y++) for (int x = 0; x < kc; x++) { double yy = y < kc? y : y - kc2; double r2 = x*x + yy*yy; out[f](y,x) = bkFacs[f].y * exp(-0.5*r2/(bkFacs[f].x*bkFacs[f].x)) * damage(sqrt(r2), kc, angpix, dose, dmga, dmgb, dmgc); } } if (normalize) { for (int y = 0; y < kc2; y++) for (int x = 0; x < kc; x++) { double sum = 0.0; for (int f = 0; f < fc; f++) { sum += out[f](y,x); } if (sum > 0.0) { for (int f = 0; f < fc; f++) { out[f](y,x) /= sum; } } } } return out; } std::vector> DamageHelper::computeWeights(const Image& fcc) { const int kc = fcc.data.xdim; const int fc = fcc.data.ydim; const int kc2 = 2 * (kc - 1); std::vector> out(fc); for (int f = 0; f < fc; f++) { out[f] = Image(kc,kc2); for (int y = 0; y < kc2; y++) for (int x = 0; x < kc; x++) { const double yy = y < kc? y : y - kc2; const double r2 = x*x + yy*yy; const double r = sqrt(r2); int idx = (int)r; if (idx >= kc) { out[f](y,x) = 0.0; } else if (idx == kc-1) { out[f](y,x) = XMIPP_MAX(fcc(f, idx), 0.0); } else { const double rf = r - idx; const double v0 = XMIPP_MAX(fcc(f, idx), 0.0); const double v1 = XMIPP_MAX(fcc(f, idx+1), 0.0); out[f](y,x) = rf * v1 + (1.0 - rf) * v0; } } } for (int y = 0; y < kc2; y++) for (int x = 0; x < kc; x++) { double sum = 0.0; for (int f = 0; f < fc; f++) { sum += out[f](y,x); } if (sum > 0.0) { for (int f = 0; f < fc; f++) { out[f](y,x) /= sum; } } } return out; } DamageFit::DamageFit(const std::vector &snrData, const std::vector &snrWeight, int k, int t0, double ampScale, double decScale) : snrData(snrData), snrWeight(snrWeight), k(k), t0(t0), ampScale(ampScale), decScale(decScale) {} double DamageFit::f(const std::vector &x, void* tempStorage) const { const int tc = snrData.size(); const double amp = ampScale * x[0]; const double dec = decScale * x[1]; double sum = 0.0; for (int t = t0; t < tc; t++) { const double pred = amp * exp(-t/dec); const double obs = snrData[t]; const double d = pred - obs; sum += snrWeight[t] * d*d; } return sum; } GlobalDamageFit::GlobalDamageFit(const Image &snrData, const Image &snrWeight, int k0, int k1, int t0, bool L1) : snrData(snrData), snrWeight(snrWeight), k0(k0), k1(k1), t0(t0), L1(L1) {} double GlobalDamageFit::f(const std::vector &x, void* tempStorage) const { const int tc = snrData.data.ydim; const double a = x[0]; const double b = x[1]; const double c = x[2]; double sum = 0.0; const double eps = 1e-20; for (int k = k0; k < k1; k++) { double tau = a * pow(k,b) + c; if (tau < eps) tau = eps; const double n = getScale(k, tau); for (int t = t0; t < tc; t++) { const double d = exp(-t/tau); const double e = DIRECT_A2D_ELEM(snrData.data, t, k) - n * d; if (L1) { sum += DIRECT_A2D_ELEM(snrWeight.data, t, k) * std::abs(e); } else { sum += DIRECT_A2D_ELEM(snrWeight.data, t, k) * e * e; } } } return sum; } double GlobalDamageFit::getScale(int k, double tau) const { double nn = 0.0; double nd = 0.0; const int tc = snrData.data.ydim; for (int t = t0; t < tc; t++) { const double d = exp(-t/tau); const double y = DIRECT_A2D_ELEM(snrData.data, t, k); const double w = DIRECT_A2D_ELEM(snrWeight.data, t, k); nn += w * y * d; nd += w * d * d; } return nd > 0.0? nn/nd : 0.0; } PerFrameBFactorFit::PerFrameBFactorFit(const Image &fcc, int k0, int k1) : fcc(fcc), kc(fcc.data.xdim), fc(fcc.data.ydim), k0(k0), k1(k1) { } double PerFrameBFactorFit::f(const std::vector &x) const { std::vector scale(k1-k0+1), bfac(fc); for (int f = 0; f < fc; f++) { bfac[f] = x[f]; } for (int k = 0; k < k1-k0+1; k++) { scale[k] = x[k+fc]; } double sum = 0.0; for (int f = 0; f < fc; f++) for (int k = k0; k <= k1; k++) { double d = fcc(f,k) - scale[k-k0] * exp(-0.5*f*f/(bfac[f]*bfac[f])); sum += d*d; } return sum; } relion-3.1.3/src/jaz/damage_helper.h000066400000000000000000000146451411340063500173160ustar00rootroot00000000000000/*************************************************************************** * * Author: "Jasenko Zivanov" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #ifndef DAMAGE_FIT_H #define DAMAGE_FIT_H #include #include #include #include #include class DamageFit : public Optimization { public: DamageFit(const std::vector& snrData, const std::vector& snrWeight, int k, int t0, double ampScale, double decScale); double f(const std::vector& x, void* tempStorage) const; private: const std::vector& snrData; const std::vector& snrWeight; int k, t0; double ampScale, decScale; }; class GlobalDamageFit : public Optimization { public: GlobalDamageFit(const Image& snrData, const Image& snrWeight, int k0, int k1, int t0, bool L1); double f(const std::vector& x, void* tempStorage) const; double getScale(int k, double tau) const; private: const Image& snrData; const Image& snrWeight; int k0, k1, t0; bool L1; }; class PerFrameBFactorFit : public Optimization { public: PerFrameBFactorFit(const Image& fcc, int k0, int k1); double f(const std::vector& x) const; private: const Image &fcc; int kc, fc, k0, k1; }; class DamageHelper { public: static void fitDamage(const Image& frcData, const Image& frcWeight, std::vector& amp, std::vector& dec, int t0, double dosePerFrame, bool root = false); static Image plotDamage( const std::vector& amp, const std::vector& dec, int frames); static Image plotDamage( const std::vector& dec, int frames); static Image plotDamageFrc( const std::vector& amp, const std::vector& dec, int frames); static Image plotDamageFrc( const std::vector& dec, int frames); static void fitGlobalDamage( const Image& frcData, const Image& frcWeight, std::vector& amp, double* a, double* b, double* c, int k0, int k1, int t0, double angpix, double dosePerFrame, bool L1 = false); static Image plotGlobalDamage( double a, double b, double c, const std::vector& amp, int freqs, int frames, double angpix, double dosePerFrame, bool frc = false); static Image plotGlobalDamage( double a, double b, double c, int freqs, int frames, double angpix, double dosePerFrame, bool frc = false); static std::vector> damageWeights( int s, RFLOAT angpix, int f0, int fc, RFLOAT dosePerFrame, RFLOAT a, RFLOAT b, RFLOAT c); static Image damageWeight( int s, RFLOAT angpix, RFLOAT dose, RFLOAT a, RFLOAT b, RFLOAT c); static RFLOAT damage( double k, int kc, RFLOAT angpix, RFLOAT dose, RFLOAT a, RFLOAT b, RFLOAT c); static std::vector fitBFactors(const Image& fcc, int k0, int k1, int verb = 0); static std::pair,std::vector> fitBkFactors(const Image& fcc, int k0, int k1, int verb = 0); static std::vector fitBkFactors(const Image& fcc, const Image& env, const Image& wgh, int k0, int k1); static Image renderBkFit( const std::pair,std::vector>& sigScale, int kc, int fc, bool noScale = false); static Image renderBkFit( std::vector sig, int kc, int fc); static double findSigmaRec(const Image& fcc, int f, const std::vector& scale, double sig0, double sig1, int steps, int depth, double q); static gravis::d2Vector findSigmaKRec( const Image& fcc, int f, const std::vector& envelope, const std::vector& weight, int k0, int k1, double sig0, double sig1, int steps, int depth, double q); static std::vector> computeWeights( const std::vector& bFacs, int kc, bool normalize = true); static std::vector> computeWeights( const std::vector& bkFacs, int kc, bool normalize = true); static std::vector> computeWeights( const std::vector& bkFacs, int kc, RFLOAT angpix, RFLOAT totalDose, RFLOAT dmga, RFLOAT dmgb, RFLOAT dmgc, bool normalize = true); static std::vector> computeWeights( const Image& fcc); }; #endif relion-3.1.3/src/jaz/distribution_helper.cpp000066400000000000000000000703031411340063500211430ustar00rootroot00000000000000/*************************************************************************** * * Author: "Jasenko Zivanov" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #include #include #include using namespace gravis; double DistributionHelper::sampleGauss(double mu, double sigma) { double u1 = rand()/(double)RAND_MAX; double u2 = rand()/(double)RAND_MAX; double z = sqrt(-2.0*log(u1)) * cos(2.0*PI*u2); return mu + sigma * z; } Complex DistributionHelper::sampleGauss(Complex mu, double sigma) { double u1r = rand()/(double)RAND_MAX; double u2r = rand()/(double)RAND_MAX; double u1i = rand()/(double)RAND_MAX; double u2i = rand()/(double)RAND_MAX; double xr = sqrt(-2.0*log(u1r)) * cos(2.0*PI*u2r); double xi = sqrt(-2.0*log(u1i)) * cos(2.0*PI*u2i); return mu + sigma * Complex(xr,xi); } std::vector > DistributionHelper::ringHistograms( const std::vector >& coords, int bins, double step) { int pc = coords.size(); int fc = coords[0].size(); std::vector > out(fc); for (int f = 0; f < fc; f++) { out[f] = std::vector(bins,0.0); } for (int f = 0; f < fc-1; f++) { for (int p = 0; p < pc; p++) { d2Vector p0 = coords[p][f]; d2Vector p1 = coords[p][f+1]; d2Vector v = p1 - p0; int vbin = (int)(v.length()/step); if (vbin < bins) { double ringArea = 2*vbin + 1; out[f][vbin] += 1.0/ringArea; } } } return out; } Image DistributionHelper::drawHistogramStack(const std::vector >& data, int height) { const int fc = data.size(); const int bins = data[0].size(); Image hist(bins,height,fc); hist.data.initZeros(); hist.data.xinit = 0; hist.data.yinit = 0; hist.data.zinit = 0; double bmax = 0.0; for (int f = 0; f < fc; f++) { for (int b = 0; b < bins; b++) { if (data[f][b] > bmax) bmax = data[f][b]; } } if (bmax == 0.0) return hist; double scale = (double)(height - 1)/bmax; for (int f = 0; f < fc; f++) { for (int b = 0; b < bins; b++) { double val = scale * data[f][b]; int vi = (int)val; if (vi > height) vi = height; for (int y = 0; y < vi; y++) { hist(f,y,b) = 1.0; } if (vi < height-1) { hist(f,vi,b) = val - vi; } } } return hist; } std::vector DistributionHelper::normalizeRing(const std::vector& histogram, double step) { const int s = histogram.size(); std::vector out(s,0.0); double sum = 0.0; for (int i = 0; i < s; i++) { double x0 = step*i; double x1 = step*(i+1); double a0 = 4.0*PI*x0*x0; double a1 = 4.0*PI*x1*x1; double ringArea = a1 - a0; sum += histogram[i]*ringArea; } if (sum > 0.0) { for (int i = 0; i < s; i++) { out[i] = histogram[i] / sum; } } return out; } std::vector DistributionHelper::toEnergy(const std::vector& probability) { const int s = probability.size(); std::vector out(s,0.0); for (int i = 0; i < s; i++) { out[i] = -log(probability[i]); } return out; } std::vector DistributionHelper::toProbability(const std::vector &energy) { const int s = energy.size(); std::vector out(s,0.0); for (int i = 0; i < s; i++) { out[i] = exp(-energy[i]); } return out; } void DistributionHelper::writeRing(const std::vector& values, double step, std::string filename, bool skipZero) { std::ofstream ofs(filename); const int s = values.size(); for (int i = 0; i < s; i++) { if (!(values[i] == values[i]) || std::isinf(values[i]) || skipZero && values[i] == 0.0) continue; ofs << (i + 0.5)*step << " " << values[i] << "\n"; } } std::vector DistributionHelper::corruptedExponentialProbability( double beta, double theta, double step, int bins, double area) { std::vector out(bins); const double twoPiBetaSq = 2.0*PI*beta*beta; for (int i = 0; i < bins; i++) { double x = (i+0.5)*step; out[i] = theta/area + (1.0 - theta)*exp(-x/beta)/twoPiBetaSq; } return out; } std::vector DistributionHelper::corruptedGaussProbability( double sigma, double theta, double area, double step, int bins) { std::vector out(bins); for (int i = 0; i < bins; i++) { double x = (i+0.5)*step; out[i] = theta/area + (1.0 - theta)*exp(-0.5*x*x/(sigma*sigma))/(2.0*PI*sigma*sigma); } return out; } std::vector DistributionHelper::bivariateStudentProbability(double sigma, double nu, double step, int bins) { std::vector out(bins); const double twoPiSigmaSq = 2.0*PI*sigma*sigma; const double nuSigmaSq = nu*sigma*sigma; for (int i = 0; i < bins; i++) { double x = (i+0.5)*step; out[i] = (1.0/twoPiSigmaSq) * pow(1.0 + x*x/nuSigmaSq, -(1.0 + nu/2.0)); } return out; } std::vector DistributionHelper::corruptedBivariateStudentProbability( double sigma, double nu, double theta, double rho, double step, int bins) { std::vector out(bins); const double twoPiSigmaSq = 2.0*PI*sigma*sigma; const double nuSigmaSq = nu*sigma*sigma; for (int i = 0; i < bins; i++) { double x = (i+0.5)*step; out[i] = theta * rho + (1.0 - theta) * (1.0/twoPiSigmaSq) * pow(1.0 + x*x/nuSigmaSq, -(1.0 + nu/2.0)); } return out; } std::vector DistributionHelper::bivariateGaussStudentProbability( double sigmaS, double sigmaG, double theta, double nu, double step, int bins) { std::vector out(bins); const double twoPiSigmaSSq = 2.0*PI*sigmaS*sigmaS; const double nuSigmaSSq = nu*sigmaS*sigmaS; const double twoPiSigmaGSq = 2.0*PI*sigmaG*sigmaG; for (int i = 0; i < bins; i++) { double x = (i+0.5)*step; out[i] = theta * (1.0/(twoPiSigmaGSq)) * exp(-0.5*x*x/(sigmaG*sigmaG)) + (1.0 - theta) * (1.0/twoPiSigmaSSq) * pow(1.0 + x*x/nuSigmaSSq, -(1.0 + nu/2.0)); } return out; } std::vector DistributionHelper::doubleStudentProbability( double sigma0, double sigmaS, double thetaS, double nu0, double nuS, double N, double area, double step, int bins) { std::vector out(bins); double sig2, nu, theta; if (N > 0) { const double Ninv = 1.0 / N; sig2 = sigma0 + Ninv * sigmaS; nu = nu0 + Ninv * nuS; theta = thetaS*Ninv; } else { sig2 = sigma0; nu = nu0; theta = 0.0; } const double twoPiSigmaSq = 2.0*PI*sig2; const double nuSigmaSq = nu*sig2; const double rho = 1.0 / area; for (int i = 0; i < bins; i++) { double x = (i+0.5)*step; out[i] = theta * rho + (1.0 - theta) * (1.0/twoPiSigmaSq) * pow(1.0 + x*x/nuSigmaSq, -(1.0 + nu/2.0)); } return out; } std::vector DistributionHelper::doubleGaussProbability( double sigma0, double sigmaS, double thetaS, double theta0, double thetaMin, bool relTheta, bool erfTheta, double N, double area, double step, int bins) { std::vector out(bins); double sig2, theta; if (N > 0) { const double Ninv = 1.0 / N; sig2 = sigma0 + Ninv * sigmaS; if (relTheta) { if (erfTheta) { theta = 1.0 - exp(log(0.5*(1.0 + sqrt(1.0 - exp(-N*thetaS)))) * area); } else { theta = theta0 + thetaS*Ninv; if (theta < thetaMin) { theta = thetaMin; } if (theta > 1.0) { theta = 1.0; } } } else { theta = thetaS; } if (theta > 1.0) theta = 1.0; } else { sig2 = sigma0; theta = thetaMin; } const double twoPiSigmaSq = 2.0*PI*sig2; const double rho = 1.0 / area; for (int i = 0; i < bins; i++) { double x = (i+0.5)*step; out[i] = theta * rho + (1.0 - theta) * (1.0/twoPiSigmaSq) * exp(-0.5*x*x/sig2); } return out; } std::vector DistributionHelper::multiStudentProbability( std::vector params, int N, int minPC, int maxPC, double area, double step, int bins) { std::vector out(bins); const double sig0 = params[0]; const double sigS = params[1]; const int pbc = maxPC - minPC + 1; int pb; double sig2; if (N == 0) { pb = maxPC - minPC; sig2 = sig0; } else { pb = N - minPC; sig2 = sig0 + sigS/(double)N; } if (pb >= pbc) pb = pbc; if (pb < 0) pb = 0; const double nu = params[2 + 2*pb]; const double theta = params[2 + 2*pb + 1]; const double twoPiSigmaSq = 2.0*PI*sig2*nu; const double nuSigmaSq = nu*sig2; const double rho = 1.0 / area; for (int i = 0; i < bins; i++) { double x = (i+0.5)*step; out[i] = theta * rho + (1.0 - theta) * (1.0/twoPiSigmaSq) * pow(1.0 + x*x/nuSigmaSq, -(1.0 + nu/2.0)); } return out; } gravis::d2Vector DistributionHelper::findCorrupted2DExponential( const std::vector > &coords, int f, double betaMax, double thetaMax, int betaSteps, int thetaSteps, double area, std::string outFilename) { int pc = coords.size(); double eMin = std::numeric_limits::max(); double bestBeta = 0.0, bestTheta = 0.0; Image eImg; bool drawPlot = outFilename != ""; if (drawPlot) { eImg = Image(thetaSteps, betaSteps); eImg.data.xinit = 0; eImg.data.yinit = 0; eImg.data.zinit = 0; } for (int i = 0; i < betaSteps; i++) for (int j = 0; j < thetaSteps; j++) { double beta = betaMax*(i+1)/(double)betaSteps; double beta2 = beta*beta; double theta = thetaMax*(j+1)/(double)thetaSteps; double e = 0.0; for (int p = 0; p < pc; p++) { d2Vector p0 = coords[p][f]; d2Vector p1 = coords[p][f+1]; d2Vector v = p1 - p0; double l1 = v.length(); e += -log(theta/area + (1.0 - theta)*exp(-l1/beta)/(2.0*PI*beta2)); } if (e < eMin) { eMin = e; bestBeta = beta; bestTheta = theta; } if (drawPlot) { eImg(i,j) = e; } } if (drawPlot) { VtkHelper::writeVTK(eImg, outFilename, 0, 0, 0, 100*thetaMax/(double)thetaSteps, betaMax/(double)betaSteps, 1.0); } return d2Vector(bestBeta, bestTheta); } gravis::d2Vector DistributionHelper::fitCorrupted2DExponential( const std::vector > &coords, int f, double area) { int pc = coords.size(); std::vector velocities(pc); for (int p = 0; p < pc; p++) { if (coords[p].size() < f+2) { std::stringstream sts0; sts0 << p; std::stringstream sts1; sts1 << (f+1); std::stringstream sts2; sts2 << (coords[p].size()); REPORT_ERROR("not enough data points for particle "+sts0.str()+"; required: "+sts1.str()+", present: "+sts2.str()+"\n"); } velocities[p] = (coords[p][f+1] - coords[p][f]).length(); } CorruptedExponentialFit cef(velocities, area); std::vector initial = {1.0, 0.0}; std::vector params = NelderMead::optimize(initial, cef, 0.1, 0.001, 10000); return d2Vector(params[0], params[1]); } d2Vector DistributionHelper::fitBivariateStudent( const std::vector > &coords, int f, double fixedNu) { int pc = coords.size(); std::vector velocities(pc); for (int p = 0; p < pc; p++) { velocities[p] = (coords[p][f+1] - coords[p][f]).length(); } if (fixedNu > 0.0) { BivariateStudentFit cef(velocities, fixedNu); std::vector initial = {1.0}; std::vector params = NelderMead::optimize(initial, cef, 0.1, 0.0001, 100000); return d2Vector(params[0], fixedNu); } else { BivariateStudentFit cef(velocities, -1); std::vector initial = {1.0, 2.0}; std::vector params = NelderMead::optimize(initial, cef, 0.1, 0.0001, 100000); return d2Vector(params[0], params[1]); } } d3Vector DistributionHelper::fitCorruptedBivariateStudent( const std::vector > &coords, int f, double area, double fixedNu) { int pc = coords.size(); std::vector velocities(pc); for (int p = 0; p < pc; p++) { velocities[p] = (coords[p][f+1] - coords[p][f]).length(); } CorruptedBivariateStudentFit csf(velocities, area, fixedNu); if (fixedNu > 0.0) { std::vector initial = {1.0, 0.01}; std::vector params = NelderMead::optimize(initial, csf, 0.1, 0.001, 10000); return d3Vector(params[0], fixedNu, params[1]); } else { std::vector initial = {1.0, 1.0, 0.01}; std::vector params = NelderMead::optimize(initial, csf, 0.1, 0.001, 10000); return d3Vector(params[0], params[1], params[2]); } } std::vector DistributionHelper::fitDoubleStudent( const std::vector >& coords, const std::vector& counts, int f, double area, bool fixedNu0, double nu0, bool fixedNuS, double nuS, bool relTheta) { int pc = coords.size(); std::vector velocities(pc); for (int p = 0; p < pc; p++) { velocities[p] = (coords[p][f+1] - coords[p][f]).length(); } DoubleStudentFit dsf(velocities, counts, nu0, fixedNu0, nuS, fixedNuS, area, relTheta); if (fixedNu0 && fixedNuS) { std::vector initial = {1.0, 0.0, 0.01}; std::vector params = NelderMead::optimize(initial, dsf, 0.1, 0.001, 10000); return std::vector{params[0], params[1], params[2], nu0, nuS}; } else if (fixedNu0 && !fixedNuS) // pathological { std::vector initial = {1.0, 0.0, 0.01, 0.0}; std::vector params = NelderMead::optimize(initial, dsf, 0.1, 0.001, 10000); return std::vector{params[0], params[1], params[2], nu0, params[3]}; } else if (!fixedNu0 && fixedNuS) { std::vector initial = {1.0, 0.0, 0.01, 2.0}; std::vector params = NelderMead::optimize(initial, dsf, 0.1, 0.001, 10000); return std::vector{params[0], params[1], params[2], params[3], nuS}; } else // (!fixedNu0 && !fixedNuS) { std::vector initial = {1.0, 0.0, 0.01, 2.0, 0.0}; std::vector params = NelderMead::optimize(initial, dsf, 0.1, 0.001, 10000); return std::vector{params[0], params[1], params[2], params[3], params[4]}; } } std::vector DistributionHelper::fitDoubleGauss( const std::vector > &coords, const std::vector &counts, const std::vector &totalCounts, int f, double area, bool centered, std::vector initial) { int pc = coords.size(); std::vector velocities(pc); for (int p = 0; p < pc; p++) { velocities[p] = (coords[p][f+1] - coords[p][f]).length(); } DoubleGaussFit dgf(velocities, counts, totalCounts, area, centered); std::vector params = NelderMead::optimize(initial, dgf, 0.02, 0.00001, 100000); return std::vector{params[0], params[1], params[2], params[3], params[4]}; } std::vector DistributionHelper::fitCorruptedGauss( const std::vector > &coords, int f, double area) { int pc = coords.size(); std::vector velocities(pc); for (int p = 0; p < pc; p++) { velocities[p] = (coords[p][f+1] - coords[p][f]).length(); } CorruptedGaussFit cgf(velocities, area); std::vector initial = {1.0, 0.01}; std::vector params = NelderMead::optimize(initial, cgf, 0.1, 0.001, 10000); return params; } std::vector DistributionHelper::fitMultiStudent( const std::vector > &coords, const std::vector &counts, int f, double area, int minPC, int maxPC) { int pc = coords.size(); std::vector velocities(pc); for (int p = 0; p < pc; p++) { velocities[p] = (coords[p][f+1] - coords[p][f]).length(); } MultiStudentFit msf(velocities, counts, minPC, maxPC, area); const int pbc = maxPC - minPC + 1; std::vector initial(2 + 2*pbc); initial[0] = 2.0; initial[1] = 0.0; for (int i = 0; i < pbc; i++) { initial[2 + 2*i] = 3.0; // nu initial[2 + 2*i + 1] = 0.2; // theta } std::vector params = NelderMead::optimize(initial, msf, 0.1, 0.001, 100000); return params; } d4Vector DistributionHelper::fitGaussStudent( const std::vector > &coords, int f, double fixedNu) { int pc = coords.size(); std::vector velocities(pc); for (int p = 0; p < pc; p++) { velocities[p] = (coords[p][f+1] - coords[p][f]).length(); } GaussStudentFit gsf(velocities, fixedNu); if (fixedNu > 0.0) { std::vector initial = {0.5, 2.0, 0.1}; std::vector params = NelderMead::optimize(initial, gsf, 0.1, 0.001, 10000); return d4Vector(params[0], params[1], params[2], fixedNu); } else { std::vector initial = {1.0, 1.0, 0.5, 2.0}; std::vector params = NelderMead::optimize(initial, gsf, 0.1, 0.001, 10000); return d4Vector(params[0], params[1], params[2], params[3]); } } CorruptedExponentialFit::CorruptedExponentialFit(const std::vector &velocities, double area) : velocities(velocities), area(area) {} double CorruptedExponentialFit::f(const std::vector &x, void* tempStorage) const { const int pc = velocities.size(); double e = 0.0; const double beta = x[0]; const double theta = x[1]; const double twoPiBetaSq = 2.0*PI*beta*beta; if (beta <= 0.0 || theta <= 0.0 || theta > 1.0) return std::numeric_limits::max(); for (int p = 0; p < pc; p++) { e += -log(theta/area + (1.0 - theta)*exp(-velocities[p]/beta)/twoPiBetaSq); } return e; } BivariateStudentFit::BivariateStudentFit(const std::vector &values, double fixedNu) : values(values), fixedNu(fixedNu) { } double BivariateStudentFit::f(const std::vector &x, void* tempStorage) const { const int pc = values.size(); double e = 0.0; const double sigma = x[0]; const double nu = fixedNu > 0.0? fixedNu : x[1]; const double logTwoPiSigmaSq = log(2.0*PI*sigma*sigma*nu); const double nuSigmaSq = nu*sigma*sigma; if (sigma <= 0.0 || nu <= 0.0) return std::numeric_limits::max(); for (int p = 0; p < pc; p++) { const double v = values[p]; e += logTwoPiSigmaSq + (1.0 + nu/2.0) * log(1.0 + v*v/nuSigmaSq); } return e; } CorruptedBivariateStudentFit::CorruptedBivariateStudentFit(const std::vector &values, double area, double fixedNu) : values(values), area(area), fixedNu(fixedNu) { } double CorruptedBivariateStudentFit::f(const std::vector &x, void* tempStorage) const { const int pc = values.size(); double e = 0.0; const double sigma = x[0]; const double nu = fixedNu > 0.0? fixedNu : x[1]; const double theta = fixedNu > 0.0? x[1] : x[2]; const double rho = 1.0/area; const double twoPiSigmaSq = 2.0*PI*sigma*sigma; const double nuSigmaSq = nu*sigma*sigma; if (sigma <= 0.0 || nu <= 0.0 || theta <= 0.0 || theta >= 1.0) return std::numeric_limits::max(); for (int p = 0; p < pc; p++) { const double y = values[p]; e += -log(theta * rho + (1.0 - theta) * (1.0/twoPiSigmaSq) * pow(1.0 + y*y/nuSigmaSq, -(1.0 + nu/2.0))); } return e; } GaussStudentFit::GaussStudentFit(const std::vector &values, double fixedNu) : values(values), fixedNu(fixedNu) { } double GaussStudentFit::f(const std::vector &x, void* tempStorage) const { const int pc = values.size(); double e = 0.0; const double sigmaS = x[0]; const double sigmaG = x[1]; const double theta = x[2]; const double nu = fixedNu > 0.0? fixedNu : x[3]; const double twoPiSigmaSSq = 2.0*PI*sigmaS*sigmaS; const double nuSigmaSSq = nu*sigmaS*sigmaS; const double twoPiSigmaGSq = 2.0*PI*sigmaG*sigmaG; if (sigmaG <= 0.0 || sigmaS <= 0.0 || nu <= 0.0 || theta <= 0.0 || theta >= 1.0) { return std::numeric_limits::max(); } for (int p = 0; p < pc; p++) { const double y = values[p]; e += -log(theta * (1.0/twoPiSigmaGSq) * exp(-0.5*y*y/(sigmaG*sigmaG)) + (1.0 - theta) * (1.0/twoPiSigmaSSq) * pow(1.0 + y*y/nuSigmaSSq, -(1.0 + nu/2.0))); } return e; } DoubleStudentFit::DoubleStudentFit(const std::vector &values, const std::vector &counts, double nu0, bool fixedNu0, double nuS, bool fixedNuS, double area, bool relTheta) : values(values), counts(counts), myNu0(nu0), myNuS(nuS), area(area), fixedNu0(fixedNu0), fixedNuS(fixedNuS), relTheta(relTheta) { } double DoubleStudentFit::f(const std::vector &x, void* tempStorage) const { const int pc = values.size(); double e = 0.0; const double sigma0 = x[0]; const double sigmaS = x[1]; const double thetaS = x[2]; double nu0, nuS; if (fixedNu0 && fixedNuS) { nu0 = myNu0; nuS = myNuS; } else if (!fixedNu0 && fixedNuS) { nu0 = x[3]; nuS = myNuS; } else if (fixedNu0 && !fixedNuS) // pathological { nu0 = myNu0; nuS = x[3]; } else if (!fixedNu0 && !fixedNuS) { nu0 = x[3]; nuS = x[4]; } const double rho = 1.0 / area; if (sigma0 <= 0.0 || nu0 <= 0.0 || thetaS <= 0.0) { return std::numeric_limits::max(); } for (int p = 0; p < pc; p++) { if (counts[p] <= 0) continue; const double Ninv = 1.0 / (double)counts[p]; const double sig2 = sigma0 + Ninv * sigmaS; const double nu = nu0 + Ninv * nuS; if (sig2 <= 0.0 || nu <= 0.0) { return std::numeric_limits::max(); } double theta = relTheta? thetaS*Ninv : thetaS; if (theta < 0.0) theta = 0.0; if (theta > 1.0) theta = 1.0; const double twoPiSigmaSq = 2.0*PI*sig2; const double nuSigmaSq = nu*sig2; const double y = values[p]; e += -log(theta * rho + (1.0 - theta) * (1.0/twoPiSigmaSq) * pow(1.0 + y*y/nuSigmaSq, -(1.0 + nu/2.0))); } return e; } MultiStudentFit::MultiStudentFit(const std::vector &values, const std::vector &counts, int minPN, int maxPN, double area) : values(values), counts(counts), minPN(minPN), maxPN(maxPN), area(area) { } double MultiStudentFit::f(const std::vector &x, void* tempStorage) const { const int pc = values.size(); double e = 0.0; const double sigma0 = x[0]; const double sigmaS = x[1]; if (sigma0 <= 0.0) { return std::numeric_limits::max(); } const double rho = 1.0 / area; const int pbn = maxPN - minPN + 1; std::vector nu(pbn); std::vector theta(pbn); for (int i = 0; i < pbn; i++) { nu[i] = x[2 + 2*i]; theta[i] = x[2 + 2*i + 1]; if (nu[i] <= 0.0 || theta[i] < 0.0 || theta[i] > 1.0) { return std::numeric_limits::max(); } } for (int p = 0; p < pc; p++) { const int pb = counts[p] - minPN; if (pb < 0 || pb >= pbn) continue; const double Ninv = 1.0 / (double)counts[p]; const double sig2 = sigma0 + Ninv * sigmaS; if (sig2 <= 0.0) { return std::numeric_limits::max(); } const double thetaP = theta[pb]; const double nuP = nu[pb]; const double twoPiSigmaSq = 2.0*PI*sig2; const double nuSigmaSq = nuP*sig2; const double y = values[p]; e += -log(thetaP * rho + (1.0 - thetaP) * (1.0/twoPiSigmaSq) * pow(1.0 + y*y/nuSigmaSq, -(1.0 + nuP/2.0))); } return e; } DoubleGaussFit::DoubleGaussFit(const std::vector &values, const std::vector &counts, const std::vector &totalCounts, double area, bool centered) : values(values), counts(counts), totalCounts(totalCounts), area(area), centered(centered) { } double DoubleGaussFit::f(const std::vector &x, void* tempStorage) const { const int pc = values.size(); double e = 0.0; const double sigma0 = x[0]; const double sigmaS = x[1]; const double theta0 = x[2]; const double thetaS = x[3]; const double thetaMin = x[4]; const double rho = 1.0 / area; if (sigma0 <= 0.0 || sigmaS <= 0.0 || thetaS < 0.0 || thetaMin < 0.0) { return std::numeric_limits::max(); } for (int p = 0; p < pc; p++) { if (counts[p] <= 0) continue; const double Ninv = centered? 1.0 / (double)counts[p] + 1.0 / (double)totalCounts[p] : 1.0 / (double)counts[p]; const double correction = centered? (1.0 - (double)counts[p] / (double)totalCounts[p]) : 1.0; if (correction == 0) continue; const double sig2 = correction * (sigma0 + Ninv * sigmaS); if (sig2 <= 0.0) { return std::numeric_limits::max(); } double theta = theta0 + Ninv * thetaS; if (theta < thetaMin) theta = thetaMin; if (theta > 1.0) theta = 1.0; const double twoPiSigmaSq = 2.0*PI*sig2; const double y = values[p]; e += -log(theta * rho + (1.0 - theta) * (1.0/twoPiSigmaSq) * exp(-0.5*y*y/sig2)); } return e; } CorruptedGaussFit::CorruptedGaussFit(const std::vector &values, double area) : values(values), area(area) { } double CorruptedGaussFit::f(const std::vector &x, void* tempStorage) const { const int pc = values.size(); double e = 0.0; const double sigma = x[0]; const double theta = x[1]; const double rho = 1.0 / area; if (sigma <= 0.0 || theta < 0.0 || theta > 1.0) { return std::numeric_limits::max(); } const double sig2 = sigma * sigma; const double twoPiSigmaSq = 2.0*PI*sig2; for (int p = 0; p < pc; p++) { const double y = values[p]; e += -log(theta * rho + (1.0 - theta) * (1.0/twoPiSigmaSq) * exp(-0.5*y*y/sig2)); } return e; } relion-3.1.3/src/jaz/distribution_helper.h000066400000000000000000000235121411340063500206100ustar00rootroot00000000000000/*************************************************************************** * * Author: "Jasenko Zivanov" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #ifndef DISTRIBUTION_HELPER_H #define DISTRIBUTION_HELPER_H #include #include #include #include #include #include class CorruptedGaussFit : public Optimization { public: CorruptedGaussFit(const std::vector& values, double area); const std::vector& values; const double area; double f(const std::vector& x, void* tempStorage) const; }; class CorruptedExponentialFit : public Optimization { public: CorruptedExponentialFit(const std::vector& velocities, double area); const std::vector& velocities; const double area; double f(const std::vector& x, void* tempStorage) const; }; class BivariateStudentFit : public Optimization { public: BivariateStudentFit(const std::vector& velocities, double fixedNu); const std::vector& values; double fixedNu; double f(const std::vector& x, void* tempStorage) const; }; class GaussStudentFit : public Optimization { public: GaussStudentFit(const std::vector& values, double fixedNu); const std::vector& values; double fixedNu; double f(const std::vector& x, void* tempStorage) const; }; class DoubleStudentFit : public Optimization { public: DoubleStudentFit(const std::vector& values, const std::vector& counts, double nu0, bool fixedNu0, double nuS, bool fixedNuS, double area, bool relTheta); const std::vector& values; const std::vector& counts; double myNu0, myNuS, area; bool fixedNu0, fixedNuS, relTheta; double f(const std::vector& x, void* tempStorage) const; }; class DoubleGaussFit : public Optimization { public: DoubleGaussFit(const std::vector& values, const std::vector& counts, const std::vector& totalCounts, double area, bool centered); const std::vector& values; const std::vector& counts, &totalCounts; double area; bool centered; double f(const std::vector& x, void* tempStorage) const; }; class MultiStudentFit : public Optimization { public: MultiStudentFit( const std::vector& values, const std::vector& counts, int minPN, int maxPN, double area); const std::vector& values; const std::vector& counts; int minPN, maxPN; double area; double f(const std::vector& x, void* tempStorage) const; }; class CollectiveGaussStudentFit : public Optimization { public: CollectiveGaussStudentFit(const std::vector>& values, double fixedNu, double mGauss, double f0Gauss, double betaStudent); const std::vector>& values; double fixedNu; double mGauss, f0Gauss, betaStudent; double f(const std::vector& x, void* tempStorage) const; }; class CorruptedBivariateStudentFit : public Optimization { public: CorruptedBivariateStudentFit(const std::vector& values, double area, double fixedNu); const std::vector& values; const double area; const double fixedNu; double f(const std::vector& x, void* tempStorage) const; }; class DistributionHelper { public: static double sampleGauss(double mu, double sigma); static Complex sampleGauss(Complex mu, double sigma); static std::vector > ringHistograms( const std::vector >& coords, int bins, double step); static Image drawHistogramStack( const std::vector >& data, int height); static std::vector normalizeRing( const std::vector& histogram, double step); static std::vector toEnergy( const std::vector& probability); static std::vector toProbability( const std::vector& energy); static void writeRing( const std::vector& values, double step, std::string filename, bool skipZero); static std::vector corruptedExponentialProbability( double beta, double theta, double step, int bins, double area); static std::vector corruptedGaussProbability( double sigma, double theta, double area, double step, int bins); static std::vector bivariateStudentProbability( double sigma, double nu, double step, int bins); static std::vector corruptedBivariateStudentProbability( double sigma, double nu, double theta, double rho, double step, int bins); static std::vector bivariateGaussStudentProbability( double sigmaS, double sigmaG, double theta, double nu, double step, int bins); static std::vector doubleStudentProbability( double sigma0, double sigmaS, double thetaS, double nu0, double nuS, double N, double area, double step, int bins); static std::vector doubleGaussProbability( double sigma0, double sigmaS, double thetaS, double theta0, double thetaMin, bool relTheta, bool erfTheta, double N, double area, double step, int bins); static std::vector multiStudentProbability( std::vector params, int N, int minPC, int maxPC, double area, double step, int bins); static gravis::d2Vector findCorrupted2DExponential( const std::vector >& coords, int f, double betaMax, double thetaMax, int betaSteps, int thetaSteps, double area, std::string outFilename); static gravis::d2Vector fitCorrupted2DExponential( const std::vector >& coords, int f, double area); static gravis::d2Vector fitBivariateStudent( const std::vector >& coords, int f, double fixedNu = -1); static gravis::d3Vector fitCorruptedBivariateStudent( const std::vector >& coords, int f, double area, double fixedNu = -1); static std::vector fitDoubleStudent( const std::vector >& coords, const std::vector& counts, int f, double area, bool fixedNu0, double nu0, bool fixedNuS, double nuS, bool relTheta); static std::vector fitDoubleGauss(const std::vector >& coords, const std::vector& counts, const std::vector &totalCounts, int f, double area, bool centered, std::vector initial); static std::vector fitCorruptedGauss( const std::vector >& coords, int f, double area); static std::vector fitMultiStudent( const std::vector >& coords, const std::vector& counts, int f, double area, int minPC, int maxPC); static gravis::d4Vector fitGaussStudent(const std::vector >& coords, int f, double fixedNu = -1); }; #endif relion-3.1.3/src/jaz/fftw_helper.cpp000066400000000000000000000044671411340063500174020ustar00rootroot00000000000000/*************************************************************************** * * Author: "Jasenko Zivanov" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #include void FftwHelper::decenterUnflip2D(const MultidimArray &src, MultidimArray &dest) { const long int w = src.xdim; const long int h = src.ydim; dest.reshape(h, 2*(w - 1)); const long int yc = dest.ydim/2; for (long int y = 0; y < dest.ydim; y++) for (long int x = 0; x < dest.xdim; x++) { long int xs = x - w; if (xs < 0) { long int ys = (y + yc - 1) % dest.ydim; DIRECT_A2D_ELEM(dest, y, x) = -DIRECT_A2D_ELEM(src, h-ys-1, -xs-1); } else { long int ys = (y + yc) % dest.ydim; DIRECT_A2D_ELEM(dest, y, x) = DIRECT_A2D_ELEM(src, ys, xs); } } } void FftwHelper::decenterDouble2D(const MultidimArray &src, MultidimArray &dest) { const long int w = src.xdim; const long int h = src.ydim; dest.reshape(h, 2*(w - 1)); const long int yc = dest.ydim/2; for (long int y = 0; y < dest.ydim; y++) for (long int x = 0; x < dest.xdim; x++) { long int xs = x - w; if (xs < 0) { long int ys = (y + yc - 1) % dest.ydim; DIRECT_A2D_ELEM(dest, y, x) = DIRECT_A2D_ELEM(src, h-ys-1, -xs-1); } else { long int ys = (y + yc) % dest.ydim; DIRECT_A2D_ELEM(dest, y, x) = DIRECT_A2D_ELEM(src, ys, xs+1); } } } relion-3.1.3/src/jaz/fftw_helper.h000066400000000000000000000076051411340063500170440ustar00rootroot00000000000000/*************************************************************************** * * Author: "Jasenko Zivanov" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #ifndef FFTW_HELPER_H #define FFTW_HELPER_H #include class FftwHelper { public: template static void decenterHalf(const MultidimArray &src, MultidimArray &dest); template static void recenterHalf(const MultidimArray &src, MultidimArray &dest); template static void decenterFull(const MultidimArray &src, MultidimArray &dest); template static void recenterFull(const MultidimArray &src, MultidimArray &dest); static void decenterUnflip2D(const MultidimArray &src, MultidimArray &dest); static void decenterDouble2D(const MultidimArray &src, MultidimArray &dest); }; template void FftwHelper::decenterHalf(const MultidimArray &src, MultidimArray &dest) { dest.reshape(src); for (long int z = 0; z < dest.zdim; z++) for (long int y = 0; y < dest.ydim; y++) for (long int x = 0; x < dest.xdim; x++) { long int zp = z < dest.xdim? z : z - dest.zdim; long int yp = y < dest.xdim? y : y - dest.ydim; DIRECT_A3D_ELEM(dest, z, y, x) = DIRECT_A3D_ELEM(src, zp, yp, x); } } template void FftwHelper::recenterHalf(const MultidimArray &src, MultidimArray &dest) { dest.reshape(src); const long int zc = dest.zdim - dest.zdim/2; const long int yc = dest.ydim - dest.ydim/2; for (long int z = 0; z < dest.zdim; z++) for (long int y = 0; y < dest.ydim; y++) for (long int x = 0; x < dest.xdim; x++) { long int zs = (z + zc) % dest.zdim; long int ys = (y + yc) % dest.ydim; DIRECT_A3D_ELEM(dest, z, y, x) = DIRECT_A3D_ELEM(src, zs, ys, x); } } template void FftwHelper::decenterFull(const MultidimArray &src, MultidimArray &dest) { dest.reshape(src); const long int zc = dest.zdim/2; const long int yc = dest.ydim/2; const long int xc = dest.xdim/2; for (long int z = 0; z < dest.zdim; z++) for (long int y = 0; y < dest.ydim; y++) for (long int x = 0; x < dest.xdim; x++) { long int zs = (z + zc) % dest.zdim; long int ys = (y + yc) % dest.ydim; long int xs = (x + xc) % dest.xdim; DIRECT_A3D_ELEM(dest, z, y, x) = DIRECT_A3D_ELEM(src, zs, ys, xs); } } template void FftwHelper::recenterFull(const MultidimArray &src, MultidimArray &dest) { dest.reshape(src); const long int zc = dest.zdim - dest.zdim/2; const long int yc = dest.ydim - dest.ydim/2; const long int xc = dest.xdim - dest.xdim/2; for (long int z = 0; z < dest.zdim; z++) for (long int y = 0; y < dest.ydim; y++) for (long int x = 0; x < dest.xdim; x++) { long int zs = (z + zc) % dest.zdim; long int ys = (y + yc) % dest.ydim; long int xs = (x + xc) % dest.xdim; DIRECT_A3D_ELEM(dest, z, y, x) = DIRECT_A3D_ELEM(src, zs, ys, xs); } } #endif relion-3.1.3/src/jaz/fsc_helper.cpp000066400000000000000000000433711411340063500172040ustar00rootroot00000000000000/*************************************************************************** * * Author: "Jasenko Zivanov" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #include #include #include using namespace gravis; void FscHelper::computeFscTable( const std::vector > >& frames, const std::vector >& predictions, Image& table, Image& weight) { const int w = predictions[0].data.xdim; const int pc = frames.size(); const int fc = frames[0].size(); table = Image(w,fc); weight = Image(w,fc); table.data.initZeros(); weight.data.initZeros(); Image weight1 = Image(w,fc); Image weight2 = Image(w,fc); weight1.data.initZeros(); weight2.data.initZeros(); for (int p = 0; p < pc; p++) for (int f = 0; f < fc; f++) { FOR_ALL_ELEMENTS_IN_FFTW_TRANSFORM(frames[p][f]()) { int idx = ROUND(sqrt(kp*kp + ip*ip + jp*jp)); if (idx >= w) { continue; } Complex z1 = DIRECT_A3D_ELEM(frames[p][f](), k, i, j); Complex z2 = DIRECT_A3D_ELEM(predictions[p](), k, i, j); DIRECT_A2D_ELEM(table.data, f, idx) += z1.real * z2.real + z1.imag * z2.imag; DIRECT_A2D_ELEM(weight1.data, f, idx) += z1.norm(); DIRECT_A2D_ELEM(weight2.data, f, idx) += z2.norm(); } } for (int f = 0; f < fc; f++) for (int x = 0; x < w; x++) { RFLOAT w1 = DIRECT_A2D_ELEM(weight1.data, f, x); RFLOAT w2 = DIRECT_A2D_ELEM(weight2.data, f, x); RFLOAT ww = sqrt(w1 * w2); DIRECT_A2D_ELEM(weight.data, f, x) = ww; DIRECT_A2D_ELEM(table.data, f, x) /= ww; } } void FscHelper::computeFscRow( const MultidimArray &data0, const MultidimArray &data1, int row, Image &table, Image &weight) { const int w = data0.xdim; std::vector weight1(w, 0.0), weight2(w, 0.0), data(w, 0.0); FOR_ALL_ELEMENTS_IN_FFTW_TRANSFORM(data0) { int idx = ROUND(sqrt(kp*kp + ip*ip + jp*jp)); if (idx >= w) { continue; } Complex z1 = DIRECT_A3D_ELEM(data0, k, i, j); Complex z2 = DIRECT_A3D_ELEM(data1, k, i, j); data[idx] += z1.real * z2.real + z1.imag * z2.imag; weight1[idx] += z1.norm(); weight2[idx] += z2.norm(); } for (int x = 0; x < w; x++) { if (x >= table.data.xdim) continue; RFLOAT ww = sqrt(weight1[x] * weight2[x]); DIRECT_A2D_ELEM(table.data, row, x) = ww > 0.0? data[x] / ww : 0.0; DIRECT_A2D_ELEM(weight.data, row, x) = ww; } } void FscHelper::initFscTable(int kc, int tc, Image &table, Image &weight0, Image &weight1) { table = Image(kc,tc); weight0 = Image(kc,tc); weight1 = Image(kc,tc); table.data.initZeros(); weight0.data.initZeros(); weight1.data.initZeros(); } void FscHelper::updateFscTable(const std::vector > &frames, const Image &prediction, double scale, Image &table, Image &weight0, Image &weight1) { const int fc = frames.size(); for (int f = 0; f < fc; f++) { updateFscTable(frames[f], f, prediction, scale, table, weight0, weight1); } } void FscHelper::updateFscTable(const Image &frame, int f, const Image &prediction, double scale, Image &table, Image &weight0, Image &weight1) { const int w = prediction.data.xdim; FOR_ALL_ELEMENTS_IN_FFTW_TRANSFORM(frame()) { int idx = ROUND(scale * sqrt(kp*kp + ip*ip + jp*jp)); if (idx >= w) { continue; } Complex z1 = DIRECT_A3D_ELEM(frame(), k, i, j); Complex z2 = DIRECT_A3D_ELEM(prediction(), k, i, j); DIRECT_A2D_ELEM(table.data, f, idx) += z1.real * z2.real + z1.imag * z2.imag; DIRECT_A2D_ELEM(weight0.data, f, idx) += z1.norm(); DIRECT_A2D_ELEM(weight1.data, f, idx) += z2.norm(); } } void FscHelper::updateFscTableVelWgh( const std::vector > &frames, const std::vector &velocities, const Image &prediction, Image &table, Image &weight0, Image &weight1) { const int w = prediction.data.xdim; const int fc = frames.size(); for (int f = 0; f < fc; f++) { FOR_ALL_ELEMENTS_IN_FFTW_TRANSFORM(frames[f]()) { int idx = ROUND(sqrt(kp*kp + ip*ip + jp*jp)); if (idx >= w) { continue; } double kv = (ip * velocities[f].y + jp * velocities[f].x)/(double)w; double wgh = kv < 1e-20? 1.0 : sin(PI*kv) / (PI*kv); //double wgh = exp(-0.5*kv*kv/0.5); Complex z1 = DIRECT_A3D_ELEM(frames[f](), k, i, j); Complex z2 = wgh * DIRECT_A3D_ELEM(prediction(), k, i, j); DIRECT_A2D_ELEM(table.data, f, idx) += z1.real * z2.real + z1.imag * z2.imag; DIRECT_A2D_ELEM(weight0.data, f, idx) += z1.norm(); DIRECT_A2D_ELEM(weight1.data, f, idx) += z2.norm(); } } } void FscHelper::updateVelFscTable( const std::vector > &frames, const std::vector &velocities, const Image &prediction, Image &table, Image &weight0, Image &weight1, int kmin, int kmax) { const int w = prediction.data.xdim; const int fc = frames.size(); if (kmax < 0) kmax = w; for (int f = 0; f < fc; f++) { FOR_ALL_ELEMENTS_IN_FFTW_TRANSFORM(frames[f]()) { int idx = ROUND(sqrt(kp*kp + ip*ip + jp*jp)); if (idx >= w || idx < kmin || idx > kmax) { continue; } double kv = ip * velocities[f].y + jp * velocities[f].x; int kvidx = ROUND(kv); if (kvidx < 0) kvidx = -kvidx; if (kvidx >= table().xdim) continue; Complex z1 = DIRECT_A3D_ELEM(frames[f](), k, i, j); Complex z2 = DIRECT_A3D_ELEM(prediction(), k, i, j); DIRECT_A2D_ELEM(table.data, f, kvidx) += z1.real * z2.real + z1.imag * z2.imag; DIRECT_A2D_ELEM(weight0.data, f, kvidx) += z1.norm(); DIRECT_A2D_ELEM(weight1.data, f, kvidx) += z2.norm(); } } } void FscHelper::mergeFscTables(const std::vector > &tables, const std::vector > &weights0, const std::vector > &weights1, Image &table, Image &weight) { const int w = tables[0].data.xdim; const int fc = tables[0].data.ydim; const int mgc = tables.size(); Image tableSum = Image(w,fc); Image weightSum0 = Image(w,fc); Image weightSum1 = Image(w,fc); tableSum.data.initZeros(); weightSum0.data.initZeros(); weightSum1.data.initZeros(); table = Image(w,fc); weight = Image(w,fc); for (int m = 0; m < mgc; m++) { for (int f = 0; f < fc; f++) for (int x = 0; x < w; x++) { DIRECT_A2D_ELEM(tableSum.data, f, x) += DIRECT_A2D_ELEM(tables[m].data, f, x); DIRECT_A2D_ELEM(weightSum0.data, f, x) += DIRECT_A2D_ELEM(weights0[m].data, f, x); DIRECT_A2D_ELEM(weightSum1.data, f, x) += DIRECT_A2D_ELEM(weights1[m].data, f, x); } } for (int f = 0; f < fc; f++) for (int x = 0; x < w; x++) { RFLOAT w1 = DIRECT_A2D_ELEM(weightSum0.data, f, x); RFLOAT w2 = DIRECT_A2D_ELEM(weightSum1.data, f, x); RFLOAT ww = sqrt(w1 * w2); DIRECT_A2D_ELEM(weight.data, f, x) = ww; if (ww > 0.0) { DIRECT_A2D_ELEM(table.data, f, x) = DIRECT_A2D_ELEM(tableSum.data, f, x) / ww; } else { DIRECT_A2D_ELEM(table.data, f, x) = 0.0; } } } double FscHelper::computeTsc( const std::vector> &tables, const std::vector> &weights0, const std::vector> &weights1, int k0, int k1) { //const int kc = tables[0].data.xdim; const int fc = tables[0].data.ydim; const int tc = tables.size(); double tSum = 0.0, w0Sum = 0.0, w1Sum = 0.0; for (int t = 0; t < tc; t++) for (int f = 0; f < fc; f++) for (int k = k0; k < k1; k++) { tSum += tables[t](f,k); w0Sum += weights0[t](f,k); w1Sum += weights1[t](f,k); } double ww = sqrt(w0Sum*w1Sum); if (ww > 0.0) { return tSum / ww; } else { return 0.0; } } void FscHelper::computeNoiseSq( std::vector > > frames, std::vector > predictions, Image &sigma2) { const int w = predictions[0].data.xdim; const int pc = frames.size(); const int fc = frames[0].size(); sigma2 = Image(w,fc); Image count(w,fc); sigma2.data.initZeros(); count.data.initZeros(); for (int p = 0; p < pc; p++) for (int f = 0; f < fc; f++) { FOR_ALL_ELEMENTS_IN_FFTW_TRANSFORM(frames[p][f]()) { int idx = ROUND(sqrt(kp*kp + ip*ip + jp*jp)); if (idx >= w) { continue; } Complex z1 = DIRECT_A3D_ELEM(frames[p][f](), k, i, j); Complex z2 = DIRECT_A3D_ELEM(predictions[p](), k, i, j); DIRECT_A2D_ELEM(sigma2.data, f, idx) += (z2 - z1).norm(); DIRECT_A2D_ELEM(count.data, f, idx) += 1.0; } } for (int f = 0; f < fc; f++) for (int x = 0; x < w; x++) { if (DIRECT_A2D_ELEM(count.data, f, x) > 0.0) { DIRECT_A2D_ELEM(sigma2.data, f, x) /= DIRECT_A2D_ELEM(count.data, f, x); } } } Image FscHelper::computeSignalSq(const Image &sigma2, const Image &frc) { const int kc = sigma2.data.xdim; const int fc = sigma2.data.ydim; Image out(kc,fc); const RFLOAT eps = 1e-3; for (int f = 0; f < fc; f++) for (int k = 0; k < kc; k++) { RFLOAT c = DIRECT_A2D_ELEM(frc.data, f, k); RFLOAT s2 = DIRECT_A2D_ELEM(sigma2.data, f, k); if (c < eps) { DIRECT_A2D_ELEM(out.data, f, k) = 0.0; } else { if (c > 1.0 - eps) c = 1.0 - eps; RFLOAT snr2 = c / (1.0 - c); DIRECT_A2D_ELEM(out.data, f, k) = snr2*s2; } } return out; } std::vector FscHelper::fitBfactorsNM(const Image &tau2, const Image &weight, int cutoff) { const int kc = tau2.data.xdim; const int ic = tau2.data.ydim; std::vector out(ic); const double Bw = 0.001, Cw = 10.0; Image tauRel(kc,ic); std::vector tauAvg(kc,0.0); for (int k = 0; k < kc; k++) { double ta = 0.0; double ts = 0.0; for (int f = 0; f < ic; f++) { double t2 = DIRECT_A2D_ELEM(tau2.data, f, k); if (t2 >= 0.0) { ta += t2; ts += 1.0; } } if (ts > 0.0) { tauAvg[k] = ta/ts; } for (int f = 0; f < ic; f++) { double t2 = DIRECT_A2D_ELEM(tau2.data, f, k); if (t2 >= 0.0) { DIRECT_A2D_ELEM(tauRel.data, f, k) = sqrt(t2)/tauAvg[k]; } else { DIRECT_A2D_ELEM(tauRel.data, f, k) = 0.0; } } } for (int f = 0; f < ic; f++) { std::cout << f << ": "; BFactorFit bf(tauRel, weight, f, cutoff, Bw, Cw); std::vector initial(2); // initial[0] = -0.001; //initial[1] = -10.0; initial[0] = -0.001/Bw; initial[1] = -10.0/Cw; std::vector opt = NelderMead::optimize(initial, bf, 0.01, 0.000001, 1000000); out[f][0] = opt[0]*Bw; out[f][1] = opt[1]*Cw; std::cout << out[f][0] << ", " << out[f][1] << "\n"; } return out; } std::vector FscHelper::fitBfactors(const Image &table, const Image &weight) { const int kc = table.data.xdim; const int ic = table.data.ydim; std::vector out(ic); std::vector avgFsc(kc, 0.0); for (int k = 0; k < kc; k++) { RFLOAT wsum = 0.0; for (int i = 0; i < ic; i++) { if (DIRECT_A2D_ELEM(table.data, i, k) < 0.0) continue; avgFsc[k] += DIRECT_A2D_ELEM(weight.data, i, k) * DIRECT_A2D_ELEM(table.data, i, k); wsum += DIRECT_A2D_ELEM(weight.data, i, k); } avgFsc[k] /= wsum; } const double eps = 1e-3; for (int i = 0; i < ic; i++) { d2Matrix A(0.0, 0.0, 0.0, 0.0); d2Vector b(0.0, 0.0); for (int k = 1; k < kc; k++) { RFLOAT fsc = DIRECT_A2D_ELEM(table.data, i, k); RFLOAT fscA = avgFsc[k]; if (fsc < eps || fscA < eps) continue; RFLOAT t = sqrt((fsc - fsc * fscA) / (fscA - fsc * fscA)); if (t < eps) continue; RFLOAT w = t * t * DIRECT_A2D_ELEM(weight.data, i, k); RFLOAT x = k * k; A(0,0) += w * x * x; A(1,0) += w * x; A(0,1) += w * x; A(1,1) += w; b[0] += w * x * log(t); b[1] += w * log(t); } A.invert(); d2Vector mq = A * b; out[i][0] = -4.0 * mq[0]; out[i][1] = exp(mq[1]); } return out; } Image FscHelper::tauRatio(const Image &table, const Image &weight) { const int kc = table.data.xdim; const int ic = table.data.ydim; Image out(kc,ic); std::vector avgFsc(kc, 0.0); for (int k = 0; k < kc; k++) { RFLOAT wsum = 0.0; for (int i = 0; i < ic; i++) { if (DIRECT_A2D_ELEM(table.data, i, k) < 0.0) continue; avgFsc[k] += DIRECT_A2D_ELEM(weight.data, i, k) * DIRECT_A2D_ELEM(table.data, i, k); wsum += DIRECT_A2D_ELEM(weight.data, i, k); } avgFsc[k] /= wsum; } const double eps = 1e-3; for (int i = 0; i < ic; i++) { for (int k = 0; k < kc; k++) { RFLOAT fsc = DIRECT_A2D_ELEM(table.data, i, k); RFLOAT fscA = avgFsc[k]; if (fsc < eps || fscA < eps) continue; DIRECT_A2D_ELEM(out.data, i, k) = (fsc - fsc * fscA) / (fscA - fsc * fscA); } } return out; } void FscHelper::computeBfactors(const std::vector& bfacs, Image &table) { const int kc = table.data.xdim; const int ic = table.data.ydim; for (int i = 0; i < ic; i++) for (int k = 0; k < kc; k++) { const double Bf = bfacs[i][0]; const double Cf = bfacs[i][1]; DIRECT_A2D_ELEM(table.data, i, k) = exp(Bf*k*k/4.0 + Cf); } } std::vector FscHelper::powerSpectrum3D(const Image &img) { const int sh = img.data.xdim; const int s = img.data.ydim; std::vector sum(sh, 0.0), wgh(sh, 0.0); for (int z = 0; z < img.data.zdim; z++) for (int y = 0; y < img.data.ydim; y++) for (int x = 0; x < img.data.xdim; x++) { const double xx = x; const double yy = y < sh? y : y - s; const double zz = z < sh? z : z - s; const double r = sqrt(xx*xx + yy*yy + zz*zz); const int ri = ROUND(r); if (ri < sh) { sum[ri] += DIRECT_NZYX_ELEM(img.data, 0, z, y, x).norm(); wgh[ri] += 1.0; } } for (int i = 0; i < sh; i++) { if (wgh[i] > 0.0) { sum[i] /= wgh[i]; } } return sum; } BFactorFit::BFactorFit(const Image &tau2, const Image &weight, int frame, int cutoff, double Bscale, double Cscale) : tau2(tau2), weight(weight), frame(frame), cutoff(cutoff), Bscale(Bscale), Cscale(Cscale) { } double BFactorFit::f(const std::vector& x, void* tempStorage) const { double Bf = x[0]*Bscale; double Cf = x[1]*Cscale; double sum = 0.0; const int kc = tau2.data.xdim; for (int k = cutoff; k < kc; k++) { double w = DIRECT_A2D_ELEM(weight.data, frame, k); RFLOAT t2 = DIRECT_A2D_ELEM(tau2.data, frame, k); double pv = exp(Bf*k*k/4.0 + Cf); double e = pv - t2; sum += w * e*e; } return sum; } relion-3.1.3/src/jaz/fsc_helper.h000066400000000000000000000117241411340063500166460ustar00rootroot00000000000000/*************************************************************************** * * Author: "Jasenko Zivanov" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #ifndef FSC_HELPER_H #define FSC_HELPER_H #include #include #include #include #include class BFactorFit : public Optimization { public: BFactorFit( const Image& tau2, const Image& weight, int frame, int cutoff, double Bscale, double Cscale); double f(const std::vector& x, void* tempStorage) const; private: const Image& tau2; const Image& weight; int frame, cutoff; double Bscale, Cscale; }; class FscHelper { public: static void computeFscTable( const std::vector > >& frames, const std::vector >& predictions, Image& table, Image& weight); static void computeFscRow( const MultidimArray& data0, const MultidimArray& data1, int row, Image& table, Image& weight); static void initFscTable( int kc, int tc, Image& table, Image& weight0, Image& weight1); static void updateFscTable( const std::vector >& frames, const Image& predictions, double scale, Image& table, Image& weight0, Image& weight1); static void updateFscTable( const Image& frame, int f, const Image& prediction, double scale, Image& table, Image& weight0, Image& weight1); static void updateFscTableVelWgh( const std::vector > &frames, const std::vector &velocities, const Image &prediction, Image &table, Image &weight0, Image &weight1); static void updateVelFscTable( const std::vector >& frames, const std::vector& velocities, const Image& prediction, Image& table, Image& weight0, Image& weight1, int kmin = 0, int kmax = -1); static void mergeFscTables( const std::vector>& tables0, const std::vector>& weights0, const std::vector>& weights1, Image& table, Image& weight); static double computeTsc( const std::vector>& tables, const std::vector>& weights0, const std::vector>& weights1, int k0, int k1); static void computeNoiseSq( std::vector > > frames, std::vector > predictions, Image& sigma2); static Image computeSignalSq( const Image& sigma2, const Image& frc); static std::vector fitBfactorsNM( const Image& tau2, const Image& weight, int cutoff); static std::vector fitBfactors( const Image& table, const Image& weight); static Image tauRatio( const Image& table, const Image& weight); static void computeBfactors( const std::vector& bfacs, Image& table); static std::vector powerSpectrum3D(const Image& img); }; #endif relion-3.1.3/src/jaz/gravis/000077500000000000000000000000001411340063500156515ustar00rootroot00000000000000relion-3.1.3/src/jaz/gravis/.svn/000077500000000000000000000000001411340063500165355ustar00rootroot00000000000000relion-3.1.3/src/jaz/gravis/.svn/all-wcprops000066400000000000000000000111241411340063500207220ustar00rootroot00000000000000K 25 svn:wc:ra_dav:version-url V 57 /repos/gravis/!svn/ver/23256/libs/libGravis/trunk/include END tMesh.h K 25 svn:wc:ra_dav:version-url V 65 /repos/gravis/!svn/ver/22689/libs/libGravis/trunk/include/tMesh.h END tVarMatrix_blas.hxx K 25 svn:wc:ra_dav:version-url V 77 /repos/gravis/!svn/ver/22187/libs/libGravis/trunk/include/tVarMatrix_blas.hxx END matrix_blas_reference.h K 25 svn:wc:ra_dav:version-url V 81 /repos/gravis/!svn/ver/22187/libs/libGravis/trunk/include/matrix_blas_reference.h END tGray_A.h K 25 svn:wc:ra_dav:version-url V 67 /repos/gravis/!svn/ver/22187/libs/libGravis/trunk/include/tGray_A.h END CMakeLists.txt K 25 svn:wc:ra_dav:version-url V 72 /repos/gravis/!svn/ver/23256/libs/libGravis/trunk/include/CMakeLists.txt END lapack.h K 25 svn:wc:ra_dav:version-url V 66 /repos/gravis/!svn/ver/22187/libs/libGravis/trunk/include/lapack.h END Mesh.h K 25 svn:wc:ra_dav:version-url V 64 /repos/gravis/!svn/ver/22187/libs/libGravis/trunk/include/Mesh.h END tRGBA.h K 25 svn:wc:ra_dav:version-url V 65 /repos/gravis/!svn/ver/22720/libs/libGravis/trunk/include/tRGBA.h END tArray.h K 25 svn:wc:ra_dav:version-url V 66 /repos/gravis/!svn/ver/23223/libs/libGravis/trunk/include/tArray.h END tVarMatrix_blas.h K 25 svn:wc:ra_dav:version-url V 75 /repos/gravis/!svn/ver/22187/libs/libGravis/trunk/include/tVarMatrix_blas.h END tImage.h K 25 svn:wc:ra_dav:version-url V 66 /repos/gravis/!svn/ver/22539/libs/libGravis/trunk/include/tImage.h END t2Vector.h K 25 svn:wc:ra_dav:version-url V 68 /repos/gravis/!svn/ver/22702/libs/libGravis/trunk/include/t2Vector.h END t3Vector.h K 25 svn:wc:ra_dav:version-url V 68 /repos/gravis/!svn/ver/22702/libs/libGravis/trunk/include/t3Vector.h END tQuaternion.h K 25 svn:wc:ra_dav:version-url V 71 /repos/gravis/!svn/ver/22187/libs/libGravis/trunk/include/tQuaternion.h END t4Vector.h K 25 svn:wc:ra_dav:version-url V 68 /repos/gravis/!svn/ver/22187/libs/libGravis/trunk/include/t4Vector.h END t2Matrix.h K 25 svn:wc:ra_dav:version-url V 68 /repos/gravis/!svn/ver/22187/libs/libGravis/trunk/include/t2Matrix.h END NMesh.h K 25 svn:wc:ra_dav:version-url V 65 /repos/gravis/!svn/ver/22187/libs/libGravis/trunk/include/NMesh.h END t3Matrix.h K 25 svn:wc:ra_dav:version-url V 68 /repos/gravis/!svn/ver/22187/libs/libGravis/trunk/include/t3Matrix.h END t4Matrix.h K 25 svn:wc:ra_dav:version-url V 68 /repos/gravis/!svn/ver/23220/libs/libGravis/trunk/include/t4Matrix.h END PushPull.h K 25 svn:wc:ra_dav:version-url V 68 /repos/gravis/!svn/ver/22391/libs/libGravis/trunk/include/PushPull.h END tImageAlgorithm.h K 25 svn:wc:ra_dav:version-url V 75 /repos/gravis/!svn/ver/19067/libs/libGravis/trunk/include/tImageAlgorithm.h END StringFormat.h K 25 svn:wc:ra_dav:version-url V 72 /repos/gravis/!svn/ver/22187/libs/libGravis/trunk/include/StringFormat.h END Timer.hpp K 25 svn:wc:ra_dav:version-url V 67 /repos/gravis/!svn/ver/22187/libs/libGravis/trunk/include/Timer.hpp END tLab.h K 25 svn:wc:ra_dav:version-url V 64 /repos/gravis/!svn/ver/22187/libs/libGravis/trunk/include/tLab.h END program_options.h K 25 svn:wc:ra_dav:version-url V 75 /repos/gravis/!svn/ver/22187/libs/libGravis/trunk/include/program_options.h END tYCbCr.h K 25 svn:wc:ra_dav:version-url V 66 /repos/gravis/!svn/ver/22187/libs/libGravis/trunk/include/tYCbCr.h END tMatrix.h K 25 svn:wc:ra_dav:version-url V 67 /repos/gravis/!svn/ver/22187/libs/libGravis/trunk/include/tMatrix.h END Tuple.h K 25 svn:wc:ra_dav:version-url V 65 /repos/gravis/!svn/ver/22702/libs/libGravis/trunk/include/Tuple.h END tDefaultVector.h K 25 svn:wc:ra_dav:version-url V 74 /repos/gravis/!svn/ver/22187/libs/libGravis/trunk/include/tDefaultVector.h END tMM.h K 25 svn:wc:ra_dav:version-url V 63 /repos/gravis/!svn/ver/22187/libs/libGravis/trunk/include/tMM.h END OBJReader.h K 25 svn:wc:ra_dav:version-url V 69 /repos/gravis/!svn/ver/22187/libs/libGravis/trunk/include/OBJReader.h END tBGR.h K 25 svn:wc:ra_dav:version-url V 64 /repos/gravis/!svn/ver/22187/libs/libGravis/trunk/include/tBGR.h END Exception.h K 25 svn:wc:ra_dav:version-url V 69 /repos/gravis/!svn/ver/22187/libs/libGravis/trunk/include/Exception.h END tRGB.h K 25 svn:wc:ra_dav:version-url V 64 /repos/gravis/!svn/ver/22720/libs/libGravis/trunk/include/tRGB.h END tRGB_A.h K 25 svn:wc:ra_dav:version-url V 66 /repos/gravis/!svn/ver/22720/libs/libGravis/trunk/include/tRGB_A.h END tVarMatrix.h K 25 svn:wc:ra_dav:version-url V 70 /repos/gravis/!svn/ver/22187/libs/libGravis/trunk/include/tVarMatrix.h END NTuple.h K 25 svn:wc:ra_dav:version-url V 66 /repos/gravis/!svn/ver/22187/libs/libGravis/trunk/include/NTuple.h END OBJWriter.h K 25 svn:wc:ra_dav:version-url V 69 /repos/gravis/!svn/ver/22187/libs/libGravis/trunk/include/OBJWriter.h END relion-3.1.3/src/jaz/gravis/.svn/entries000066400000000000000000000135731411340063500201420ustar00rootroot0000000000000010 dir 23800 https://svn.cs.unibas.ch:443/repos/gravis/libs/libGravis/trunk/include https://svn.cs.unibas.ch:443/repos/gravis 2013-04-12T18:37:50.523323Z 23256 forster b127c190-6edf-0310-8e64-ec95285ab742 tRGBA.h file 2013-11-05T12:28:38.099685Z 43b0d2b3fd461b23ff077130ee3cf8aa 2012-11-15T14:16:27.425453Z 22720 sandro 7900 tArray.h file 2013-11-05T12:28:38.123685Z 9395c0ca42c0a11d71e810898337c624 2013-04-09T11:52:29.894933Z 23223 sandro 10580 tVarMatrix_blas.h file 2012-06-29T11:51:38.885096Z 891974e9121beef213387d1c2c1f570c 2012-06-28T11:28:09.901604Z 22187 sandro 470 gravis dir tImage.h file 2013-11-05T12:28:38.155686Z 6d0f4eb4ca293e7389b9f7909738831d 2012-09-07T14:50:51.094145Z 22539 sandro 17698 t2Vector.h file 2013-11-05T12:28:38.187686Z e41651f45f1f0f352bfdf7398049150d 2012-11-10T07:22:40.381365Z 22702 sandro 6676 t3Vector.h file 2013-11-05T12:28:37.843680Z b210247039b7df9bc23e2ae792a2db61 2012-11-10T07:22:40.381365Z 22702 sandro 11960 tQuaternion.h file 2012-06-29T11:51:38.930096Z 05be57e973c13e533a6fa265b800843c 2012-06-28T11:28:09.901604Z 22187 sandro 3622 t4Vector.h file 2012-06-29T11:51:38.938096Z 0ea29069f47988ce21e1856d9a2eb6c9 2012-06-28T11:28:09.901604Z 22187 sandro 7802 t2Matrix.h file 2012-06-29T11:51:38.946096Z 202f094015a66a4d48c9458537e2b0f5 2012-06-28T11:28:09.901604Z 22187 sandro 5886 NMesh.h file 2012-06-29T11:51:38.956096Z 0f6e50e2fcb8df58745e99d4fce3a79f 2012-06-28T11:28:09.901604Z 22187 sandro 4024 t3Matrix.h file 2012-06-29T11:51:38.963096Z 330994278fb2ebbede167f37b08ce19f 2012-06-28T11:28:09.901604Z 22187 sandro 14876 t4Matrix.h file 2013-11-05T12:28:37.879681Z 3b340331e75ee00c6ae78e258a6ffdc7 2013-04-09T11:12:12.488671Z 23220 sandro 17814 PushPull.h file 2013-11-05T12:28:37.911681Z 6aae64a14c3725baf122edfb70c5cbb2 2012-08-10T15:29:10.129855Z 22391 sandro 5732 tImageAlgorithm.h file 2012-06-29T11:51:38.979096Z cd47cd68ee4dcce46e1caa358b0559bc 2010-07-06T09:41:21.570883Z 19067 bamberg 603 StringFormat.h file 2012-06-29T11:51:38.987096Z 1132642dc3d40cd611d13dfca2cf686e 2012-06-28T11:28:09.901604Z 22187 sandro 2197 Timer.hpp file 2012-06-29T11:51:38.995096Z 13dbc4e38030fd8817b15a5550faed7d 2012-06-28T11:28:09.901604Z 22187 sandro 3297 io dir tLab.h file 2012-06-29T11:51:39.003096Z 4d30efd1b5e0eaa072f90749bd5a890f 2012-06-28T11:28:09.901604Z 22187 sandro 6287 program_options.h file 2012-06-29T11:51:39.010096Z a888977182078ab1a9bd9e26983db5b1 2012-06-28T11:28:09.901604Z 22187 sandro 2279 private dir tYCbCr.h file 2012-06-29T11:51:39.018096Z f0c1d1057dc12c95c90f0b44b2651334 2012-06-28T11:28:09.901604Z 22187 sandro 6474 tMatrix.h file 2012-06-29T11:51:39.027096Z c9581b1246afbccbb4371d2d25fd42b4 2012-06-28T11:28:09.901604Z 22187 sandro 20695 Tuple.h file 2013-11-05T12:28:37.935682Z 44a25c35401f6ee3be1fc46cf1a5fcc0 2012-11-10T07:22:40.381365Z 22702 sandro 5814 tDefaultVector.h file 2012-06-29T11:51:38.721096Z c1e74bb4e90b7b51c71301cd2a164fa4 2012-06-28T11:28:09.901604Z 22187 sandro 5602 tMM.h file 2012-06-29T11:51:38.731096Z 8e13969f10cec96dcf6167ee81f6e4b1 2012-06-28T11:28:09.901604Z 22187 sandro 16603 OBJReader.h file 2012-06-29T11:51:38.739096Z 6bc6ed72d94a365cc43b18133fc44c61 2012-06-28T11:28:09.901604Z 22187 sandro 400 Exception.h file 2012-06-29T11:51:38.755096Z 68bf4325052e2de3c5550099164c71d0 2012-06-28T11:28:09.901604Z 22187 sandro 2883 tRGB_A.h file 2013-11-05T12:28:37.995683Z f22eb4459929feb74b3a772cc912c893 2012-11-15T14:16:27.425453Z 22720 sandro 7641 tRGB.h file 2013-11-05T12:28:37.971683Z cad616aff1fdefd174d0db850b854d21 2012-11-15T14:16:27.425453Z 22720 sandro 8314 tBGR.h file 2012-06-29T11:51:38.747096Z 4a41d1395c9512a8433277e6921686ae 2012-06-28T11:28:09.901604Z 22187 sandro 5524 tVarMatrix.h file 2012-06-29T11:51:38.783096Z dba5557da17d754487827de386197219 2012-06-28T11:28:09.901604Z 22187 sandro 48280 OBJWriter.h file 2012-06-29T11:51:38.798096Z 0af6f657fe08dd968c5f4999acf44756 2012-06-28T11:28:09.901604Z 22187 sandro 400 NTuple.h file 2012-06-29T11:51:38.791096Z 093425456a383549795b133d3d48fb03 2012-06-28T11:28:09.901604Z 22187 sandro 3390 colour dir tMesh.h file 2013-11-05T12:28:38.031684Z 05426a15618a43744d7738b4df2c0740 2012-11-08T15:31:28.965203Z 22689 sandro 15931 tImage dir tVarMatrix_blas.hxx file 2012-06-29T11:51:38.814096Z adf16c791f766783023deba1a61c5cb3 2012-06-28T11:28:09.901604Z 22187 sandro 22312 matrix_blas_reference.h file 2012-06-29T11:51:38.824096Z d96dbd36efd9925d9b86d7c52e2c8935 2012-06-28T11:28:09.901604Z 22187 sandro 3207 tGray_A.h file 2012-06-29T11:51:38.834096Z c1b02f05562f6326c85b79ebd92078a1 2012-06-28T11:28:09.901604Z 22187 sandro 4961 CMakeLists.txt file 2013-11-05T12:28:38.063684Z bbc09993e1e0b418777013dc4bd25c4c 2013-04-12T18:37:50.523323Z 23256 forster 811 lapack.h file 2012-06-29T11:51:38.853096Z 30a22da3ff1aa28df7796cfb00e72cda 2012-06-28T11:28:09.901604Z 22187 sandro 5244 Mesh.h file 2012-06-29T11:51:38.861096Z dd63137f948fc3afd5d3bf291837ba74 2012-06-28T11:28:09.901604Z 22187 sandro 8973 relion-3.1.3/src/jaz/gravis/.svn/text-base/000077500000000000000000000000001411340063500204315ustar00rootroot00000000000000relion-3.1.3/src/jaz/gravis/.svn/text-base/CMakeLists.txt.svn-base000066400000000000000000000014531411340063500247110ustar00rootroot00000000000000add_subdirectory(colour) add_subdirectory(private) add_subdirectory(tImage) add_subdirectory(io) set( install_files Exception.h lapack.h matrix_blas_reference.h Mesh.h NMesh.h NTuple.h OBJReader.h OBJWriter.h program_options.h StringFormat.h t2Matrix.h t2Vector.h t3Matrix.h t3Vector.h t4Matrix.h t4Vector.h tArray.h tBGR.h tDefaultVector.h tGray_A.h tImageAlgorithm.h tImage.h Timer.hpp tLab.h tMatrix.h tMesh.h tMM.h tQuaternion.h tRGB_A.h tRGBA.h tRGB.h Tuple.h tVarMatrix_blas.h tVarMatrix_blas.hxx tVarMatrix.h tYCbCr.h PushPull.h ) INSTALL(FILES ${install_files} DESTINATION ${CMAKE_INSTALL_PREFIX}/include/${LIBTITLE}-${LIBVERSION}/${LIBTITLE}) relion-3.1.3/src/jaz/gravis/.svn/text-base/Exception.h.svn-base000066400000000000000000000055031411340063500242600ustar00rootroot00000000000000#ifndef __LIBGRAVIS_EXCEPTION_H__ #define __LIBGRAVIS_EXCEPTION_H__ /****************************************************************************** ** Title: Exception.h ** Description: Base class for exceptions in libgravis. ** ** Author: Jean-Sebastien Pierrard, 2005 ** Computer Science Department, University Basel (CH) ** ******************************************************************************/ #define GRAVIS_CHECK(condition, r) if (!(condition)) GRAVIS_THROW3(gravis::Exception, "Assertion failed", #condition) #define GRAVIS_THROW(e) throw e(std::string(__FILE__),__LINE__) #define GRAVIS_THROW2(e,r) throw e(std::string(__FILE__),__LINE__,(r)) #define GRAVIS_THROW3(e,r,arg) throw e(std::string(__FILE__),__LINE__,(r),(arg)) #include #include #include #include #include namespace gravis { class Exception : public std::runtime_error { public: Exception (const std::string& src, const int line, const std::string& dtl = "", const std::string& arg = "") : std::runtime_error( std::string("gravis exception: ") + src + ", " + dtl + " (" + arg + ")" ), _source(src), _detail(dtl), _arg(arg), _line(line) {} Exception(const Exception& e) : std::runtime_error( e.what() ), _source(e._source), _detail(e._detail), _arg(e._arg), _line(e._line) { } virtual ~Exception() throw() {} virtual const char* getClassName () const { return "Exception"; } const char* detail() const { return _detail.c_str(); } const char* argument() const { return _arg.c_str(); } const char* source() const { return _source.c_str(); } const int& line() const { return _line; } bool hasDetail() const { return _detail.length() > 0; } bool hasArgument() const { return _arg.length() > 0; } /* do not need that since ctor of runtime_error took the message already virtual const char* what() const throw() { std::string strError( _source + " " + _detail + " " + _arg ); char* pLostMem = new char[ strError.size() + 1 ]; for( size_t i = 0; i < strError.size(); i++ ) pLostMem[i] = strError[i]; pLostMem[ strError.size() ] = '\0'; return pLostMem; }*/ protected: std::string _source; std::string _detail; std::string _arg; int _line; }; inline std::ostream& operator << (std::ostream& os, const Exception& ex) { os << ex.getClassName() << " in " << ex.source() << ", line " << ex.line(); if (ex.hasDetail()) os << ": " << ex.detail(); if (ex.hasArgument()) os << " (" << ex.argument() << ")"; return os; } } /* Close Namespace "gravis" */ #endif relion-3.1.3/src/jaz/gravis/.svn/text-base/Mesh.h.svn-base000066400000000000000000000214151411340063500232160ustar00rootroot00000000000000#ifndef __LIBGRAVIS_MESH_H__ #define __LIBGRAVIS_MESH_H__ /****************************************************************************** ** Title: Mesh.h ** Description: Mesh representation. ** ** Author: Jean-Sebastien Pierrard, 2005 ** Computer Science Department, University Basel (CH) ** ******************************************************************************/ #include "tArray.h" #include "tRGBA.h" #include "t2Vector.h" #include "t3Vector.h" #include "tImage.h" #include "Tuple.h" #include namespace gravis { class Material { public: Material (std::string n="") : name(n), ambient(0.1,1.0), diffuse(0.5,1.0), specular(1.0,1.0), shininess(10.0), hasTexture(false), textureName(), hasEnvMap(false), envMapName(), hasNormalMap(false), normalMapName() {} std::string name; fRGBA ambient; fRGBA diffuse; fRGBA specular; float shininess; /*!< \brief Phong exponent. */ bool hasTexture; /*!< \brief whether a (diffuse) texture is defined for this material. */ std::string textureName; /*!< \brief Filename of the (diffuse) texture. */ bool hasEnvMap; std::string envMapName; bool hasNormalMap; std::string normalMapName; }; /*! \brief Mesh data structure. * * A Mesh contains vertex, normal, texture coordinate (uvw) and material information. * For the three types of primitives (triangle, line, point) there are index arrays * referencing above information. For example for lines, lvi indexes into * vertex, and lti into texture coordinates. The vertices and colors * for the 4th lines in the mesh are then vertex[lvi[3][0]], vertex[lvi[3][1]], * color[lci[3][0]] and color[lci[3][1]]. * * tvi.size(), lvi.size() and pvi.size() implicitly specify how many triangles, lines * and points there are in the mesh. All other index arrays must either be of the * same length as the corresponding vertex index array, or of length 0. * * How is missing information handled? If for example no normals are assigned to * any triangles, tni.size() would be zero. If normals are assigned for some triangles, * but not for others, the tni-tuples for the respective triangles must have entries * of -1 (which is the 'invalid index'). */ class Mesh { public: tArray vertex; /*!< \brief Vertex array. */ tArray normal; /*!< \brief Normal array. */ tArray texcrd; /*!< \brief Texture coordinate array. */ tArray color; /*!< \brief Color array. */ std::vector material; /*!< \brief Material array. */ tArray tvi; /*!< \brief Triangle vertex indices. */ tArray tni; /*!< \brief Triangle normal indices. */ tArray tti; /*!< \brief Triangle texcrd indices. */ tArray tci; /*!< \brief Triangle color indices. */ tArray tmi; /*!< \brief Triangle material indices. */ tArray lvi; /*!< \brief Line vertex indices. */ tArray lti; /*!< \brief Line texcrd indices. */ tArray lci; /*!< \brief Line texcrd indices. */ tArray pvi; /*!< \brief Point vertex indices. */ tArray pci; /*!< \brief Point color indices. */ tArray adjacent; /*!< \brief Adjacency list. See generateAdjacencyList(). */ Mesh() : vertex(), normal(), texcrd(), color(), material(), tvi(), tni(), tti(), tci(), tmi(), lvi(), lti(), lci(), pvi(), pci(), adjacent() {} // Create a deep copy of the mesh void clone(Mesh& out) const { out.vertex = vertex.clone(); out.normal = normal.clone(); out.texcrd = texcrd.clone(); out.color = color.clone(); //out.material = material.save_clone(); out.material = material; out.tvi = tvi.clone(); out.tni = tni.clone(); out.tti = tti.clone(); out.tci = tci.clone(); out.tmi = tmi.clone(); out.lvi = lvi.clone(); out.lti = lti.clone(); out.lci = lci.clone(); out.pvi = pvi.clone(); out.pci = pci.clone(); out.adjacent = adjacent.clone(); } void generateNormals() { const int numFaces = tvi.size(); tni.setSize(numFaces); normal.setSize(numFaces); for (int i = 0; i < numFaces; i++) { f3Vector a = (vertex[tvi[i][1]] - vertex[tvi[i][0]]); f3Vector b = (vertex[tvi[i][2]] - vertex[tvi[i][0]]); normal[i] = cross(a, b).normalize(); tni[i] = Tuple3(i, i, i); } } void generatePerVertexNormals(unsigned int propagations=0) { tArray ncount; f3Vector norm; const int numFaces = tvi.size(); tni.setSize(numFaces); normal.setSize(vertex.size()); ncount.setSize(vertex.size()); for (unsigned int i = 0; i < ncount.size(); i++) ncount[i] = 0; for (unsigned int i = 0; i < normal.size(); i++) normal[i] = f3Vector(0.0f,0.0f,0.0f); for (int i = 0; i < numFaces; i++) { if(tvi[i].c0 < 0 || tvi[i].c1 < 0 || tvi[i].c2 < 0) continue; f3Vector a = (vertex[tvi[i][1]] - vertex[tvi[i][0]]); f3Vector b = (vertex[tvi[i][2]] - vertex[tvi[i][0]]); norm = cross(a, b).normalize(); tni[i] = tvi[i]; normal[tvi[i][0]] += norm; normal[tvi[i][1]] += norm; normal[tvi[i][2]] += norm; ncount[tvi[i][0]]++; ncount[tvi[i][1]]++; ncount[tvi[i][2]]++; } for (unsigned int i = 0; i < normal.size(); i++) { if(ncount[i] != 0) normal[i] /= ncount[i]; normal[i] = normal[i].normalize(); } tArray nnormal; nnormal.setSize(ncount.size()); for(unsigned int j=0; j nodeFaces(numVert); for (int i = 0; i < numFaces; i++) { for (int j = 0; j < 3; j++) { nodeFaces[tvi[i][j]].addFace(Tuple2(i, j)); } } // foreach face for (int f = 0; f < numFaces; f++) { Tuple3& ft = tvi[f]; Tuple3& at = adjacent[f]; // foreach edge for (int e = 0; e < 3; e++) { // already found adjacent face for this edge? if (at[e] >= 0) continue; // vertices for this edge int v1 = ft[e]; int v2 = ft[(e+1)%3]; // faces using these vertices Node& node1 = nodeFaces[v1]; Node& node2 = nodeFaces[v2]; for (int i = 0; i < node1.count; i++) { int f1 = node1.faces[i][0]; if (f1 == f) continue; // self for (int j = 0; j < node2.count; j++) { if (f1 == node2.faces[j][0]) { adjacent[f][e] = f1; adjacent[f1][node2.faces[j][1]] = f; } } } } } } }; } // namespace gravis #endif relion-3.1.3/src/jaz/gravis/.svn/text-base/NMesh.h.svn-base000066400000000000000000000076701411340063500233430ustar00rootroot00000000000000#ifndef __LIBGRAVIS_NMESH_H__ #define __LIBGRAVIS_NMESH_H__ /****************************************************************************** ** Title: NMesh.h ** Description: N-Sided Mesh representation. ** ** Author: Jean-Sebastien Pierrard, 2005 ** Brian Amberg ** Computer Science Department, University Basel (CH) ** ******************************************************************************/ #include #include "tArray.h" #include "tRGBA.h" #include "t2Vector.h" #include "t3Vector.h" #include "tImage.h" #include "NTuple.h" #include "Mesh.h" namespace gravis { /*! \brief N-Mesh data structure. * * The below is more or less correct, but we may now have double * precision values and faces of arbitrary (but equal) length. (Usefull * for multicube) * * A Mesh contains vertex, normal, texture coordinate (uvw) and material information. * For the three types of primitives (triangle, line, point) there are index arrays * referencing above information. For example for lines, lvi indexes into * vertex, and lti into texture coordinates. The vertices and colors * for the 4th lines in the mesh are then vertex[lvi[3][0]], vertex[lvi[3][1]], * color[lci[3][0]] and color[lci[3][1]]. * * fvi.size(), lvi.size() and pvi.size() implicitly specify how many triangles, lines * and points there are in the mesh. All other index arrays must either be of the * same length as the corresponding vertex index array, or of length 0. * * How is missing information handled? If for example no normals are assigned to * any triangles, fni.size() would be zero. If normals are assigned for some triangles, * but not for others, the fni-tuples for the respective triangles must have entries * of -1 (which is the 'invalid index'). */ template class NMesh { typedef t3Vector Vector; typedef NTuple Tuple; public: tArray vertex; /*!< \brief Vertex array. */ tArray normal; /*!< \brief Normal array. */ tArray texcrd; /*!< \brief Texture coordinate array. */ tArray color; /*!< \brief Color array. */ std::vector material; /*!< \brief Material array. */ tArray fvi; /*!< \brief Face vertex indices. */ tArray fni; /*!< \brief Face normal indices. */ tArray fti; /*!< \brief Face texcrd indices. */ tArray fci; /*!< \brief Face color indices. */ tArray fmi; /*!< \brief Face material indices. */ tArray lvi; /*!< \brief Line vertex indices. */ tArray lti; /*!< \brief Line texcrd indices. */ tArray lci; /*!< \brief Line texcrd indices. */ tArray pvi; /*!< \brief Point vertex indices. */ tArray pci; /*!< \brief Point color indices. */ tArray adjacent; /*!< \brief Adjacency list. See generateAdjacencyList(). */ // Create a deep copy of the mesh void clone(Mesh& out) const { out.vertex = vertex.clone(); out.normal = normal.clone(); out.texcrd = texcrd.clone(); out.color = color.clone(); //out.material = material.save_clone(); out.material = material; out.fvi = fvi.clone(); out.fni = fni.clone(); out.fti = fti.clone(); out.fci = fci.clone(); out.fmi = fmi.clone(); out.lvi = lvi.clone(); out.lti = lti.clone(); out.lci = lci.clone(); out.pvi = pvi.clone(); out.pci = pci.clone(); out.adjacent = adjacent.clone(); } }; typedef NMesh d3Mesh; typedef NMesh d4Mesh; typedef NMesh f3Mesh; typedef NMesh f4Mesh; // This should work nicely, as we have binary compatibility const Mesh& convert(const NMesh &in) { return *reinterpret_cast(&in); } } // namespace gravis #endif relion-3.1.3/src/jaz/gravis/.svn/text-base/NTuple.h.svn-base000066400000000000000000000064761411340063500235430ustar00rootroot00000000000000#ifndef __LIBGRAVIS_NTUPLE_H__ #define __LIBGRAVIS_NTUPLE_H__ /****************************************************************************** ** Title: NNTuple.h ** Description: N-NTuples (templated) ** ** Author: Brian Amberg 2006 ** Computer Science Department, University Basel (CH) ** ******************************************************************************/ namespace gravis { /*! \brief N-NTuple, typically used with Ieger datatypes for multi-index. */ template class NTuple { I m[N]; public: /*! \brief Construct a NTuple with entries of -1. */ NTuple() { for (size_t i=0; i= I(0)) (*this)[i] += o; } /*! \brief Whether all entries are non-negative. */ bool allValid() const { bool r = (*this)[0] >= I(0); for (size_t i=1; i= I(0)); return r; } //! Lexical Ordering for NTuples inline bool operator==(const NTuple& o) const { bool r = (*this)[0] == o[0]; for (size_t i=1; i(const NTuple& o) const { return (*this != o) && !(*this < o); } //! Lexical Ordering for NTuples inline bool operator<=(const NTuple& o) const { return (*this < o) || (*this == o); } //! Lexical Ordering for NTuples inline bool operator>=(const NTuple& o) const { return (*this > o) || (*this == o); } }; // class NTuple template inline std::ostream& operator<< (std::ostream& os, const NTuple& arg) { os << "["; for (int i=0; i NTuple nTuple(const I& c0) { NTuple r; r[0] = c0; return r; }; template NTuple nTuple(const I& c0, const I& c1) { NTuple r; r[0] = c0; r[1] = c1; return r; }; template NTuple nTuple(const I& c0, const I& c1, const I& c2) { NTuple r; r[0] = c0; r[1] = c1; r[2] = c2; return r; }; template NTuple nTuple(const I& c0, const I& c1, const I& c2, const I& c3) { NTuple r; r[0] = c0; r[1] = c1; r[2] = c2; r[3] = c3; return r; }; typedef NTuple I1Tuple; typedef NTuple I2Tuple; typedef NTuple I3Tuple; typedef NTuple I4Tuple; } // namespace gravis #endif relion-3.1.3/src/jaz/gravis/.svn/text-base/OBJReader.h.svn-base000066400000000000000000000006201411340063500240520ustar00rootroot00000000000000#ifndef __LIBGRAVIS_OBJREADER_H__ #define __LIBGRAVIS_OBJREADER_H__ #include "io/mesh/OBJReader.h" #warning io has been restructured and bundled into gravis/io/ \ You should preferably use the header and included routines, \ unless you rely on some special use of OBJReader/OBJWriter. \ These are still available but should be included as gravis/io/OBJ {Reader,Writer} .h #endif relion-3.1.3/src/jaz/gravis/.svn/text-base/OBJWriter.h.svn-base000066400000000000000000000006201411340063500241240ustar00rootroot00000000000000#ifndef __LIBGRAVIS_OBJWRITER_H__ #define __LIBGRAVIS_OBJWRITER_H__ #include "io/mesh/OBJWriter.h" #warning io has been restructured and bundled into gravis/io/ \ You should preferably use the header and included routines, \ unless you rely on some special use of OBJReader/OBJWriter. \ These are still available but should be included as gravis/io/OBJ {Reader,Writer} .h #endif relion-3.1.3/src/jaz/gravis/.svn/text-base/PushPull.h.svn-base000066400000000000000000000131441411340063500240760ustar00rootroot00000000000000/*============================================================================*/ /** * @file PushPull.h * * @brief Push-Pull interpolation class * * @date 1 Aug 2012 * @authors Jasenko Zivanov\n * jasenko.zivanov@unibas.ch\n * University of Basel, Switzerland */ /*============================================================================*/ #ifndef PUSH_PULL_H #define PUSH_PULL_H #include #include "tImage.h" namespace gravis { /** Push-Pull interpolation, interpolation on the image pyramid filling holes in an image */ class PushPull { public: /** Execute a push-pull interpolation (image pyramid hole filler) on the image * \param img Image to interpolate, do not use an alpha channel, use the mask to specify holes * \param mask Mask indicating which regions are holes (0: fill, 1: keep - as in the alpha channel) */ template static gravis::tImage interpolate( gravis::tImage img, gravis::tImage mask, int minSize = 1); private: template static gravis::tImage shrink2(gravis::tImage img); template static gravis::tImage shrink2(gravis::tImage img, gravis::tImage mask); template static gravis::tImage grow2(gravis::tImage img); template static gravis::tImage blur3x3(gravis::tImage img); }; template gravis::tImage PushPull :: interpolate( gravis::tImage img, gravis::tImage mask, int minSize) { const int w = img.cols(); const int h = img.rows(); gravis::tImage out(w,h); std::vector > pyramid(0); std::vector > maskPyramid(0); pyramid.push_back(img); maskPyramid.push_back(mask); gravis::tImage ci = img; gravis::tImage cm = mask; while ((int)ci.rows() > minSize && (int)ci.cols() > minSize) { ci = shrink2(ci,cm); cm = shrink2(cm); pyramid.push_back(ci); maskPyramid.push_back(cm); } maskPyramid[pyramid.size()-2].fill(1.f); for (int i = (int)pyramid.size() - 2; i >= 0; i--) { gravis::tImage pi0 = pyramid[i]; gravis::tImage pi1 = grow2(pyramid[i+1]); gravis::tImage pm = maskPyramid[i]; for (int y = 0; y < (int)pi0.rows(); y++) for (int x = 0; x < (int)pi0.cols(); x++) { pi0(x,y) = pm(x,y)*pi0(x,y) + (1.f - pm(x,y))*pi1(x,y); } } return pyramid[0]; } template gravis::tImage PushPull :: shrink2(gravis::tImage img, gravis::tImage mask) { const int w = img.cols(); const int h = img.rows(); const int w2 = (int)(ceil(w/2.f)); const int h2 = (int)(ceil(h/2.f)); gravis::tImage out(w2,h2); gravis::tImage weight(w2,h2); out.fill(T(0.f)); weight.fill(0.f); for (int y = 0; y < h; y++) for (int x = 0; x < w; x++) { out(x/2, y/2) += mask(x,y) * img(x,y); weight(x/2, y/2) += mask(x,y); } for (int i = 0; i < (w/2)*(h/2); i++) { if (weight[i] > 0.f) out[i] /= weight[i]; } return out; } template gravis::tImage PushPull :: shrink2(gravis::tImage img) { const int w = img.cols(); const int h = img.rows(); const int w2 = (int)(ceil(w/2.f)); const int h2 = (int)(ceil(h/2.f)); gravis::tImage out(w2,h2); gravis::tImage weight(w2,h2); out.fill(T(0.f)); weight.fill(0.f); for (int y = 0; y < h; y++) for (int x = 0; x < w; x++) { out(x/2, y/2) += img(x,y); weight(x/2, y/2)++; } for (int i = 0; i < w2*h2; i++) { if (weight[i] > 0.f) out[i] /= weight[i]; } return out; } template gravis::tImage PushPull :: grow2(gravis::tImage img) { const int w = img.cols(); const int h = img.rows(); gravis::tImage out(2*w,2*h); for (int y = 0; y < 2*h; y++) for (int x = 0; x < 2*w; x++) { out(x, y) = img(x/2,y/2); } return blur3x3(out); } template gravis::tImage PushPull :: blur3x3(gravis::tImage img) { const int w = img.cols(); const int h = img.rows(); gravis::tImage out(w,h); gravis::tImage weight(w,h); out.fill(T(0.f)); weight.fill(0.f); for (int y = 0; y < h; y++) for (int x = 0; x < w; x++) { if (x > 0 && y > 0) { out(x,y) += img(x-1,y-1)/4.f; weight(x,y) += 1/4.f; } if (x > 0) { out(x,y) += img(x-1,y)/2.f; weight(x,y) += 1/2.f; } if (x > 0 && y < h-1) { out(x,y) += img(x-1,y+1)/4.f; weight(x,y) += 1/4.f; } if (y > 0) { out(x,y) += img(x,y-1)/2.f; weight(x,y) += 1/2.f; } { out(x,y) += img(x,y); weight(x,y)++; } if (y < h-1) { out(x,y) += img(x,y+1)/2.f; weight(x,y) += 1/2.f; } if (x < w-1 && y > 0) { out(x,y) += img(x+1,y-1)/4.f; weight(x,y) += 1/4.f; } if (x < w-1) { out(x,y) += img(x+1,y)/2.f; weight(x,y) += 1/2.f; } if (x < w-1 && y < h-1) { out(x,y) += img(x+1,y+1)/4.f; weight(x,y) += 1/4.f; } } for (int i = 0; i < w*h; i++) { out[i] /= weight[i]; } return out; } } #endif relion-3.1.3/src/jaz/gravis/.svn/text-base/StringFormat.h.svn-base000066400000000000000000000042251411340063500247410ustar00rootroot00000000000000#ifndef __LIBGRAVIS_STRING_FORMAT_H__ #define __LIBGRAVIS_STRING_FORMAT_H__ #include #include #include namespace gravis { /** * Usage * std::string name = StringFormat("Dies sind ")(12)(" Zahlen."); * std::string name = StringFormat("Dies sind ")(12, 4, '0')(" Zahlen."); **/ class StringFormat { private: std::stringstream s; public: StringFormat(const StringFormat& start) : s() { s << start.string(); }; const char* c_str() const { return s.str().c_str(); } std::string string() const { return s.str(); } operator std::string() const { return s.str(); } bool operator==(const StringFormat& o) const { return o.string()==string(); } bool operator!=(const StringFormat& o) const { return o.string()!=string(); } bool operator==(const std::string& o) const { return o==string(); } bool operator!=(const std::string& o) const { return o!=string(); } StringFormat() : s() { } template explicit StringFormat(const T& e) : s() { s << e; } template StringFormat(const T& e, std::streamsize w) : s() { s << std::setw(w) << e; } template StringFormat(const T& e, std::streamsize w, char fill) : s() { s << std::setw(w) << std::setfill(fill) << e; } template inline StringFormat& operator()(const T& e) { s << e; return *this; } template inline StringFormat& operator()(const T& e, int w) { s << std::setw(w) << e; return *this; } template inline StringFormat& operator()(const T& e, int w, char fill) { s << std::setw(w) << std::setfill(fill) << e; return *this; } }; } inline std::ostream& operator<< (std::ostream& os, const gravis::StringFormat& arg) { os << arg.string(); return os; } #endif relion-3.1.3/src/jaz/gravis/.svn/text-base/Timer.hpp.svn-base000066400000000000000000000063411411340063500237430ustar00rootroot00000000000000#ifndef __LIBGRAVIS_TIMER_HPP__ #define __LIBGRAVIS_TIMER_HPP__ #ifndef WIN32 #include #else #include #endif #include #include namespace gravis { class Timer { friend std::ostream& operator<<(std::ostream& os, Timer& t); private: clock_t start_clock; #ifndef WIN32 timeval start_time; #endif public: Timer() : start_clock(), start_time() { restart(); } // Copy and assignment are fine double wall_time() const { #ifndef WIN32 timeval current; gettimeofday(¤t, 0); return (current.tv_sec - start_time.tv_sec) + (current.tv_usec - start_time.tv_usec)*1e-6; #else return -1; #endif } inline double cpu_time() const { return ticks_to_seconds(ticks()); } static inline double ticks_to_seconds(const clock_t& ticks) { return double(ticks) / double(CLOCKS_PER_SEC); } inline clock_t ticks() const { return (clock() - start_clock); } inline void restart() { #ifndef WIN32 gettimeofday(&start_time, 0); #endif start_clock = clock(); } ~Timer() { }; }; } //=========================================================================== // Allow timers to be printed to ostreams using the syntax 'os << t' // for an ostream 'os' and a timer 't'. For example, "cout << t" will // print out the total amount of time 't' has been "running". inline std::ostream& operator<<(std::ostream& os, gravis::Timer& t) { double wall_time = double(t.wall_time()); double cpu_time = t.cpu_time(); double min_time=wall_time; if (cpu_time #include #include namespace gravis { /*! \brief Tuple of 2 integers, typically used for multi-index. */ class Tuple2 { public: int c0, c1; /*! \brief Construct a Tuple2 with entries of -1. */ Tuple2() : c0(-1), c1(-1) {} Tuple2(int a, int b) : c0(a), c1(b) {} int operator[] (int i) const { return *(&c0 + i); } int& operator[] (int i) { return *(&c0 + i); } unsigned int size() const { return 2; } /*! \brief Offset all non-negative entries. */ void offset(int o) { if (c0 >= 0) c0 += o; if (c1 >= 0) c1 += o; } /*! \brief Whether all entries are non-negative. */ bool allValid() const { return c0 >= 0 && c1 >= 0; } //! Lexical Ordering for Tuples inline bool operator==(const Tuple2& o) const { return ((c0 == o.c0) && (c1 == o.c1)); } //! Lexical Ordering for Tuples inline bool operator!=(const Tuple2& o) const { return !(*this == o); } //! Lexical Ordering for Tuples inline bool operator<(const Tuple2& o) const { return ((c0 < o.c0) || ((c0 == o.c0) && (c1 < o.c1))); } //! Lexical Ordering for Tuples inline bool operator>(const Tuple2& o) const { return (*this != o) && !(*this < o); } //! Lexical Ordering for Tuples inline bool operator<=(const Tuple2& o) const { return (*this < o) || (*this == o); } //! Lexical Ordering for Tuples inline bool operator>=(const Tuple2& o) const { return (*this > o) || (*this == o); } }; // class Tuple2 /*! \brief Tuple of three integers, typically used for multi-index. */ class Tuple3 { public: int c0, c1, c2; /*! \brief Construct a Tuple3 with entries of -1. */ Tuple3() : c0(-1), c1(-1), c2(-1) {} Tuple3(int a, int b, int c) : c0(a), c1(b), c2(c) {} int operator[] (int i) const { return *(&c0 + i); } int& operator[] (int i) { return *(&c0 + i); } unsigned int size() const { return 3; } /*! brief Offset all non-negative entries. */ void offset(int o) { if (c0 >= 0) c0 += o; if (c1 >= 0) c1 += o; if (c2 >= 0) c2 += o; } /*! \brief Whether all entries are non-negative. */ bool allValid() const { return c0 >= 0 && c1 >= 0 && c2 >= 0; } //! Lexical Ordering for Tuples inline bool operator==(const Tuple3& o) const { return ((c0 == o.c0) && (c1 == o.c1) && (c2 == o.c2)); } //! Lexical Ordering for Tuples inline bool operator!=(const Tuple3& o) const { return !(*this == o); } //! Lexical Ordering for Tuples inline bool operator<(const Tuple3& o) const { return ((c0 < o.c0) || ((c0 == o.c0) && (c1 < o.c1)) || ((c0 == o.c0) && (c1 == o.c1) && (c2 < o.c2))); } //! Lexical Ordering for Tuples inline bool operator>(const Tuple3& o) const { return (*this != o) && !(*this < o); } //! Lexical Ordering for Tuples inline bool operator<=(const Tuple3& o) const { return (*this < o) || (*this == o); } //! Lexical Ordering for Tuples inline bool operator>=(const Tuple3& o) const { return (*this > o) || (*this == o); } }; // class Tuple3 inline std::ostream& operator<< (std::ostream& os, const Tuple3& arg) { os << "[" << arg.c0 << ", " << arg.c1 << ", " << arg.c2 << "]"; return os; } inline std::ostream& operator<< (std::ostream& os, const Tuple2& arg) { os << "[" << arg.c0 << ", " << arg.c1 << "]"; return os; } // Inverse of operator<< inline std::istream& operator>> (std::istream& is, Tuple3& arg) { char c = ' '; is >> c; if (is.eof()) return is; if (c != '[') throw std::runtime_error("Tuple should start with an opening ["); std::stringstream values; int v = 0; while ((is >> c) && (c != ']')) { if (c == ',') { v++; if (v >= 3) throw std::runtime_error("Tuple3 contains more than three elements"); values << " "; } else if (c != ' ') values << c; } if (c != ']') { throw std::runtime_error("Tuple3 should end with a ]"); } values >> arg.c0 >> arg.c1 >> arg.c2; return is; } // Inverse of operator<< inline std::istream& operator>> (std::istream& is, Tuple2& arg) { char c = ' '; is >> c; if (is.eof()) return is; if (c != '[') throw std::runtime_error("Tuple should start with an opening ["); std::stringstream values; int v = 0; while ((is >> c) && (c != ']')) { if (c == ',') { v++; if (v >= 2) throw std::runtime_error("Tuple2 contains more than three elements"); values << " "; } else if (c != ' ') values << c; } if (c != ']') { throw std::runtime_error("Tuple2 should end with a ]"); } values >> arg.c0 >> arg.c1; return is; } } // namespace gravis #endif relion-3.1.3/src/jaz/gravis/.svn/text-base/lapack.h.svn-base000066400000000000000000000121741411340063500235570ustar00rootroot00000000000000#ifndef __LIBGRAVIS_LAPACK_H__ #define __LIBGRAVIS_LAPACK_H__ /****************************************************************************** ** Title: lapack.h ** Description: Connect our matrix classes to lapack. ** ** Author: Brian Amberg, 2007 ** Computer Science Department, University Basel (CH) ** ******************************************************************************/ #include "Exception.h" #include "t2Matrix.h" #include "t3Matrix.h" #include "t4Matrix.h" // Lapack Header extern "C" { void sgesvd_(const char& jobu, const char& jobvt, const int& m, const int& n, float* a, const int& lda, float* s, float* u, const int& ldu, float* vt, const int& ldvt, float* work, const int& lwork, int& info ); void dgesvd_(const char& jobu, const char& jobvt, const int& m, const int& n, double* a, const int& lda, double* s, double* u, const int& ldu, double* vt, const int& ldvt, double* work, const int& lwork, int& info ); } namespace gravis { /** *Use lapack to calculate an svd of a 2x2 matrix **/ void svd(f2Matrix& U, f2Vector& S, f2Matrix& VT, const f2Matrix& A) { f2Matrix _A(A); float WORK[16]; int INFO; sgesvd_('A', 'A', 2, 2, &(_A[0]), 2, &(S[0]), &(U[0]), 2, &(VT[0]), 2, WORK, 16, INFO); if (INFO < 0) GRAVIS_THROW2(gravis::Exception, "The i'th argument had an invalid value."); if (INFO > 0) GRAVIS_THROW2(gravis::Exception, "SBDSQR did not converge to zero."); } /** *Use lapack to calculate an svd of a 2x2 matrix **/ void svd(d2Matrix& U, d2Vector& S, d2Matrix& VT, const d2Matrix& A) { d2Matrix _A(A); double WORK[16]; int INFO; dgesvd_('A', 'A', 2, 2, &(_A[0]), 2, &(S[0]), &(U[0]), 2, &(VT[0]), 2, WORK, 16, INFO); if (INFO < 0) GRAVIS_THROW2(gravis::Exception, "The i'th argument had an invalid value."); if (INFO > 0) GRAVIS_THROW2(gravis::Exception, "SBDSQR did not converge to zero."); } /** *Use lapack to calculate an svd of a 3x3 matrix **/ void svd(f3Matrix& U, f3Vector& S, f3Matrix& VT, const f3Matrix& A) { f3Matrix _A(A); float WORK[32]; int INFO; sgesvd_('A', 'A', 3, 3, &(_A[0]), 3, &(S[0]), &(U[0]), 3, &(VT[0]), 3, WORK, 32, INFO); if (INFO < 0) GRAVIS_THROW2(gravis::Exception, "The i'th argument had an invalid value."); if (INFO > 0) GRAVIS_THROW2(gravis::Exception, "SBDSQR did not converge to zero."); } /** *Use lapack to calculate an svd of a 3x3 matrix **/ void svd(d3Matrix& U, d3Vector& S, d3Matrix& VT, const d3Matrix& A) { d3Matrix _A(A); double WORK[32]; int INFO; dgesvd_('A', 'A', 3, 3, &(_A[0]), 3, &(S[0]), &(U[0]), 3, &(VT[0]), 3, WORK, 32, INFO); if (INFO < 0) GRAVIS_THROW2(gravis::Exception, "The i'th argument had an invalid value."); if (INFO > 0) GRAVIS_THROW2(gravis::Exception, "SBDSQR did not converge to zero."); } /** *Use lapack to calculate an svd of a 4x4 matrix **/ void svd(f4Matrix& U, f4Vector& S, f4Matrix& VT, const f4Matrix& A) { f4Matrix _A(A); float WORK[64]; int INFO; sgesvd_('A', 'A', 4, 4, &(_A[0]), 4, &(S[0]), &(U[0]), 4, &(VT[0]), 4, WORK, 64, INFO); if (INFO < 0) GRAVIS_THROW2(gravis::Exception, "The i'th argument had an invalid value."); if (INFO > 0) GRAVIS_THROW2(gravis::Exception, "SBDSQR did not converge to zero."); } /** *Use lapack to calculate an svd of a 4x4 matrix **/ void svd(d4Matrix& U, d4Vector& S, d4Matrix& VT, const d4Matrix& A) { d4Matrix _A(A); double WORK[64]; int INFO; dgesvd_('A', 'A', 4, 4, &(_A[0]), 4, &(S[0]), &(U[0]), 4, &(VT[0]), 4, WORK, 64, INFO); if (INFO < 0) GRAVIS_THROW2(gravis::Exception, "The i'th argument had an invalid value."); if (INFO > 0) GRAVIS_THROW2(gravis::Exception, "SBDSQR did not converge to zero."); } int rank(const f2Matrix& A, const float accuracy = 1e-10) { f2Matrix U; f2Vector S; f2Matrix VT; svd(U, S, VT, A); int r = 0; while (r<2 && (S[r] >= accuracy || S[r] <= -accuracy)) ++r; return r; } int rank(const d2Matrix& A, const double accuracy = 1e-10) { d2Matrix U; d2Vector S; d2Matrix VT; svd(U, S, VT, A); int r = 0; while (r<2 && (S[r] >= accuracy || S[r] <= -accuracy)) ++r; return r; } int rank(const f3Matrix& A, const float accuracy = 1e-10) { f3Matrix U; f3Vector S; f3Matrix VT; svd(U, S, VT, A); int r = 0; while (r<3 && (S[r] >= accuracy || S[r] <= -accuracy)) ++r; return r; } int rank(const d3Matrix& A, const double accuracy = 1e-10) { d3Matrix U; d3Vector S; d3Matrix VT; svd(U, S, VT, A); int r = 0; while (r<3 && (S[r] >= accuracy || S[r] <= -accuracy)) ++r; return r; } int rank(const f4Matrix& A, const float accuracy = 1e-10) { f4Matrix U; f4Vector S; f4Matrix VT; svd(U, S, VT, A); int r = 0; while (r<4 && (S[r] >= accuracy || S[r] <= -accuracy)) ++r; return r; } int rank(const d4Matrix& A, const double accuracy = 1e-10) { d4Matrix U; d4Vector S; d4Matrix VT; svd(U, S, VT, A); int r = 0; while (r<4 && (S[r] >= accuracy || S[r] <= -accuracy)) ++r; return r; } } #endif relion-3.1.3/src/jaz/gravis/.svn/text-base/matrix_blas_reference.h.svn-base000066400000000000000000000062071411340063500266470ustar00rootroot00000000000000// v = v+M*x inline static void addmult(tVectorView<__GMBD_REAL> &v, const tConstMatrixView<__GMBD_REAL> &M, const tConstVectorView<__GMBD_REAL> &x) { GRAVIS_CHECK( v.size() == M.h, "v and M are incompatible"); GRAVIS_CHECK( x.size() == M.w, "M and x are incompatible"); for (size_t j=0; j &v, const tConstVectorView<__GMBD_REAL> &a, const tConstMatrixView<__GMBD_REAL> &M, const tConstVectorView<__GMBD_REAL> &x) { GRAVIS_CHECK( v.size() == a.size(), "v and a are incompatible"); // Addition v = a; addmult(v, M, x); } // v = M*x inline static void mult(tVectorView<__GMBD_REAL> &v, const tConstMatrixView<__GMBD_REAL> &M, const tConstVectorView<__GMBD_REAL> &x) { ::gravis::matrix::clear(v); addmult(v, M, x); } // v = v+(x^T M)^T inline static void addmult(tVectorView<__GMBD_REAL> &v, const tConstVectorView<__GMBD_REAL> &x, const tConstMatrixView<__GMBD_REAL> &M) { GRAVIS_CHECK( v.size() == M.w, "v and M are incompatible"); GRAVIS_CHECK( x.size() == M.h, "M and x are incompatible"); for (size_t i=0; i &v, const tConstVectorView<__GMBD_REAL> &a, const tConstVectorView<__GMBD_REAL> &x, const tConstMatrixView<__GMBD_REAL> &M) { GRAVIS_CHECK( v.size() == a.size(), "v and a are incompatible"); // Addition v = a; addmult(v, M, x); } // v = (x^T M)^T inline static void mult(tVectorView<__GMBD_REAL> &v, const tConstVectorView<__GMBD_REAL> &x, const tConstMatrixView<__GMBD_REAL> &M) { ::gravis::matrix::clear(v); addmult(v, M, x); } /** * Squared l2 norm **/ inline static __GMBD_REAL normL2sqr(const tConstVectorView<__GMBD_REAL> &v) { if (v.size() == 0) return 0; __GMBD_REAL result = v[0]*v[0]; for (size_t i=1; i &v) { if (v.size() == 0) return 0; __GMBD_REAL result = v[0]*v[0]; for (size_t i=1; i &v) { return __GMBD_REAL(sqrt(normL2sqr(v))); } /** forbenius norm **/ inline static __GMBD_REAL normL2(const tConstMatrixView<__GMBD_REAL> &v) { return __GMBD_REAL(sqrt(normL2sqr(v))); } // v = v+M*x inline static void addmult(tVarVector<__GMBD_REAL> &v, const tConstMatrixView<__GMBD_REAL> &M, const tConstVectorView<__GMBD_REAL> &x) { tVectorView<__GMBD_REAL> vv(v); addmult(vv, M, x); } // v = a+M*x inline static void addmult(tVarVector<__GMBD_REAL> &v, const tConstVectorView<__GMBD_REAL> &a, const tConstMatrixView<__GMBD_REAL> &M, const tConstVectorView<__GMBD_REAL> &x) { tVectorView<__GMBD_REAL> vv(v); addmult(vv, a, M, x); } // v = M*x inline static void mult(tVarVector<__GMBD_REAL> &v, const tConstMatrixView<__GMBD_REAL> &M, const tConstVectorView<__GMBD_REAL> &x) { tVectorView<__GMBD_REAL> vv(v); addmult(vv, M, x); } relion-3.1.3/src/jaz/gravis/.svn/text-base/program_options.h.svn-base000066400000000000000000000043471411340063500255510ustar00rootroot00000000000000#ifndef __LIBGRAVIS_PROGRAM_OPTIONS_H__ #define __LIBGRAVIS_PROGRAM_OPTIONS_H__ /** * \file * This headers includes the boost program options header, disabling the warnings such that we can still have our programs compile with -Werror * Also it defines the MACROS PO_SWITCH and PO_VALUE, which make option definitions much more readable. **/ #if defined __GNUC__ #pragma GCC system_header #elif defined __SUNPRO_CC #pragma disable_warn #elif defined _MSC_VER #pragma warning(push, 1) #endif #include #include #include #include namespace boost { namespace program_options { template std::string vec2string(const std::vector &v) { std::stringstream s; for (size_t i=0; idefault_value(opt), desc) /** * Shortcut to define a boost program option with a value (e.g. a string or an integer) * * int number = false; * * po::options_description desc("Example Options"); * desc.add_options() * PO_VALUE("number,n", number, "Set the number of foos to use") * ; **/ #define PO_VALUE( key, opt, desc) (key, boost::program_options::value(&opt)->default_value(opt), desc) /** * Shortcut to define a boost program option with a vector value (e.g. a vector of strings) * * std::vector vec; vec.push_back(1); vec.push_back(2); * * po::options_description desc("Example Options"); * desc.add_options() * PO_VECTOR("vec,v", vec, "List of foos") * ; **/ #define PO_VECTOR(key, opt, desc) (key, boost::program_options::value(&opt)->default_value(opt, boost::program_options::vec2string(opt)), desc) #if defined __SUNPRO_CC #pragma enable_warn #elif defined _MSC_VER #pragma warning(pop) #endif #endif relion-3.1.3/src/jaz/gravis/.svn/text-base/t2Matrix.h.svn-base000066400000000000000000000133761411340063500240430ustar00rootroot00000000000000#ifndef __LIBGRAVIS_T2MATRIX_H__ #define __LIBGRAVIS_T2MATRIX_H__ /****************************************************************************** ** Title: t2Matrix.h ** Description: Represents a 2x2 matrix with column-major memory layout. ** ** Author: Jean-Sebastien Pierrard, 2005 ** Brian Amberg 2006 ** Computer Science Department, University Basel (CH) ** ******************************************************************************/ #include #include "t2Vector.h" namespace gravis { /** * A 2x2 matrix with column-major memory layout **/ template struct t2Matrix { T m[4]; t2Matrix () { loadIdentity(); }; t2Matrix (const t2Matrix& mat); explicit t2Matrix (const T& val) { m[0] = val; m[1] = val; m[2] = val; m[3] = val; }; explicit t2Matrix (const T* v_ptr) { m[ 0] = v_ptr[ 0]; m[ 1] = v_ptr[ 1]; m[ 2] = v_ptr[ 2]; m[ 3] = v_ptr[ 3]; }; t2Matrix (T m0, T m2, T m1, T m3); void set (T m0, T m2, T m1, T m3); const T& operator[] (int idx) const; T& operator[] (int idx); const T& operator() (int row, int col) const { return m[(col << 1) + row]; } T& operator() (int row, int col) { return m[(col << 1) + row]; } t2Vector operator* (const t2Vector&) const; t2Matrix operator* (const t2Matrix&) const; t2Matrix& operator*= (const t2Matrix&); /** * Element Wise Addition (Inplace) **/ inline t2Matrix& operator+= (const t2Matrix& rhs) { for (size_t i=0; i<4; ++i) m[i] += rhs.m[i]; return *this; } /** * Element Wise Subtraction (Inplace) **/ inline t2Matrix& operator-= (const t2Matrix& rhs) { for (size_t i=0; i<4; ++i) m[i] -= rhs.m[i]; return *this; } /** * Element Wise Addition **/ inline t2Matrix operator+ (const t2Matrix& rhs) const { t2Matrix result(*this); return(result += rhs); } /** * Element Wise Subtraction **/ inline t2Matrix operator- (const t2Matrix& rhs) const { t2Matrix result(*this); return(result -= rhs); } /** * Matrix Norm (2 Norm) **/ inline T norm2() const { return m[0]*m[0] + m[1]*m[1] + m[2]*m[2] + m[3]*m[3]; } /** * Matrix Trace (sum(diag(M))) **/ T trace() const; void transpose (); void invert(); void loadIdentity (); static t2Matrix identity(); static t2Matrix scale (const t2Vector& s) { return t2Matrix( s[0], T(0), T(0) , s[1]); }; static t2Matrix scale (const T& s) { return scale(t2Vector(s,s)); } /** * Create a 2x2 rotation matrix for a clockwise rotation around a rad. **/ static t2Matrix rotation(const T& a) { return t2Matrix( cos(a), -sin(a), sin(a), cos(a)); } }; template inline t2Matrix::t2Matrix (const t2Matrix& mat) { m[0] = mat.m[0]; m[1] = mat.m[1]; m[2] = mat.m[2]; m[3] = mat.m[3]; } template inline t2Matrix::t2Matrix (T m0, T m2, T m1, T m3) { m[ 0] = m0; m[ 1] = m1; m[ 2] = m2; m[ 3] = m3; } template inline void t2Matrix::set (T m0, T m2, T m1, T m3) { m[ 0] = m0; m[ 1] = m1; m[ 2] = m2; m[ 3] = m3; } template inline const T& t2Matrix::operator[] (int idx) const { #ifdef _GRAVIS_DEBUG_RANGECHECKING_ assert((idx >= 0) && (idx < 4)); #endif return m[idx]; } template inline T& t2Matrix::operator[] (int idx) { #ifdef _GRAVIS_DEBUG_RANGECHECKING_ assert((idx >= 0) && (idx < 4)); #endif return m[idx]; } template inline t2Matrix& t2Matrix::operator*= (const t2Matrix& op) { *this = this->operator*(op); return *this; } template inline t2Matrix t2Matrix::operator* (const t2Matrix& op) const { return t2Matrix( m[0]*op.m[ 0] + m[2]*op.m[ 1], m[0]*op.m[ 2] + m[2]*op.m[3], m[1]*op.m[ 0] + m[3]*op.m[ 1], m[1]*op.m[ 2] + m[3]*op.m[3]); } template inline t2Vector t2Matrix::operator* (const t2Vector& op) const { return t2Vector( m[ 0]*op.x + m[ 2]*op.y, m[ 1]*op.x + m[ 3]*op.y); } template inline void t2Matrix::loadIdentity () { m[ 0] = T(1); m[ 2] = T(0); m[ 1] = T(0); m[ 3] = T(1); } template inline void t2Matrix::transpose () { swap(m[1], m[2]); } template inline void t2Matrix::invert() { t2Matrix A = *this; T di = 1.0/(A[0]*A[3]-A[1]*A[2]); m[0]= A[3]*di; m[1]=-A[1]*di; m[2]=-A[2]*di; m[3]= A[0]*di; } template inline t2Matrix t2Matrix::identity () { return t2Matrix( T(1), T(0), T(0), T(1)); } template inline T t2Matrix::trace() const { return ( m[0] + m[3]); } template inline std::ostream& operator<< (std::ostream& os, const t2Matrix& arg) { os << "[ " << arg[ 0] << " " << arg[ 2] << " ]\n"; os << "[ " << arg[ 1] << " " << arg[ 3] << " ]\n"; return os; } template inline std::istream& operator>> ( std::istream& is, t2Matrix& arg) { std::string dummy; is >> dummy >> arg[ 0] >> arg[ 2] >> dummy; is >> dummy >> arg[ 1] >> arg[ 3] >> dummy; return is; } typedef t2Matrix f2Matrix; typedef t2Matrix d2Matrix; } /* Close Namespace "gravis" */ #endif relion-3.1.3/src/jaz/gravis/.svn/text-base/t2Vector.h.svn-base000066400000000000000000000150241411340063500240310ustar00rootroot00000000000000#ifndef __LIBGRAVIS_T2VECTOR_H__ #define __LIBGRAVIS_T2VECTOR_H__ /****************************************************************************** ** Title: t2Vector.h ** Description: Represents a two dimensional vector. ** ** Author: Pascal Paysan, 2005 ** Computer Science Department, University Basel (CH) ** ******************************************************************************/ #include #include #include #include #include #include namespace gravis { template class t2Vector { public: T x, y; typedef T scalar_type; t2Vector() : x(T(0)), y(T(0)) { } explicit t2Vector(T _v) : x(_v), y(_v) { } t2Vector(T _x, T _y) : x(_x), y(_y) { } template explicit t2Vector(const t2Vector& vec) : x(vec.x), y(vec.y) {} static t2Vector unitX() { return t2Vector(T(1), T(0)); } static t2Vector unitY() { return t2Vector(T(0), T(1)); } void set (T _v) { x = y= _v; } void set (T _x, T _y) { x = _x; y = _y; } T length () const { return T(::sqrt(x*x + y*y)); } //! Beware: This is not the 2 norm but the square of the two norm. T norm2 () const { return (x*x + y*y); } //! \f$l_1\f$ Norm: \f$\sum_i |v_i|\f$ T normL1 () const { return (std::abs(x) + std::abs(y)); } //! \f$l_2\f$ Norm: \f$\sqrt{\sum_i |v_i|^2}\f$ T normL2 () const { return sqrt(x*x + y*y); } //! \f$l_2\f$ Norm: \f$\sqrt{\sum_i |v_i|^2}\f$ T normL2sqr () const { return x*x + y*y; } //! \f$l_\infty\f$ Norm: \f$\max{ |v_i|\,|\, \forall i }\f$ T normLInf() const { return std::max(std::abs(x), std::abs(y)); } t2Vector& normalize (T f=1.0) { T norm = f / ::sqrt(x*x + y*y); x *= norm; y *= norm; return *this; } const T& operator[] (int idx) const { #ifdef _GRAVIS_DEBUG_RANGECHECKING_ assert((idx >= 0) && (idx < 2)); #endif return (&x)[idx]; } T& operator[] (int idx) { #ifdef _GRAVIS_DEBUG_RANGECHECKING_ assert((idx >= 0) && (idx < 2)); #endif return (&x)[idx]; } bool operator == ( const t2Vector& arg ) const { return ( x == arg.x && y == arg.y); } bool operator != ( const t2Vector& arg ) const { return !(*this == arg); } t2Vector& operator += (const t2Vector& arg) { x += arg.x; y += arg.y; return *this; } t2Vector& operator -= (const t2Vector& arg) { x -= arg.x; y -= arg.y; return *this; } t2Vector& operator += (const T& scalar) { x += scalar; y += scalar; return *this; } t2Vector& operator -= (const T& scalar) { x -= scalar; y -= scalar; return *this; } t2Vector& operator *= (const T& arg) { x *= arg; y *= arg; return *this; } t2Vector& operator /= (const T& arg) { x /= arg; y /= arg; return *this; } //! Check if the entries of the other vector differ by less than epsilon. // It is better to use this than to use operator== for comparision, if it is // not the same vertex. bool isClose( const t2Vector& o, const T epsilon) const { return ((std::fabs(x-o.x) < epsilon) and (std::fabs(y-o.y) < epsilon)); } static t2Vector normalize (const t2Vector& v1, T f=1.0f) { T norm = f / T(::sqrt(v1.x*v1.x + v1.y*v1.y)); return t2Vector(v1.x * norm, v1.y * norm); } t2Vector operator / (const T& arg) const { t2Vector r(*this); r /= arg; return r; } }; template inline t2Vector operator + (const t2Vector& v1, const t2Vector& v2) { return t2Vector(v1.x + v2.x, v1.y + v2.y); } template inline t2Vector operator - (const t2Vector& v1) { return t2Vector(-v1.x, -v1.y); } template inline t2Vector operator - (const t2Vector& v1, const t2Vector& v2) { return t2Vector(v1.x - v2.x, v1.y - v2.y); } template inline t2Vector operator + (const T& s, const t2Vector& v2) { return t2Vector(s + v2.x, s + v2.y); } template inline t2Vector operator - (const T& s, const t2Vector& v2) { return t2Vector(s - v2.x, s - v2.y); } template inline t2Vector operator + (const t2Vector& v, const T& s) { return t2Vector(v.x + s, v.y + s); } template inline t2Vector operator - (const t2Vector& v, const T& s) { return t2Vector(v.x - s, v.y - s); } template inline t2Vector operator * (T f, const t2Vector& v) { return t2Vector(f * v.x, f * v.y); } template inline t2Vector operator * (const t2Vector& v, const T& f) { return t2Vector(f * v.x, f * v.y); } template inline t2Vector operator * (const t2Vector& v, const t2Vector& f) { return t2Vector(v.x * f.x, v.y * f.y); } template inline std::ostream& operator<< (std::ostream& os, const t2Vector& arg) { os << "[" << arg.x << ", " << arg.y << "]"; return os; } template inline T dot (const t2Vector& v1, const t2Vector& v2) { return (v1.x*v2.x + v1.y*v2.y); } // Inverse of operator<< template inline std::istream& operator>> (std::istream& is, t2Vector& arg) { char c = ' '; is >> c; if (is.eof()) return is; if (c != '[') throw std::runtime_error("Vector should start with an opening ["); std::stringstream values; int v = 0; while ((is >> c) && (c != ']')) { if (c == ',') { v++; if (v >= 2) throw std::runtime_error("Vector contains more than three elements"); values << " "; } else if (c != ' ') values << c; } if (c != ']') { throw std::runtime_error("Vector should end with a ]"); } values >> arg.x >> arg.y; return is; } typedef t2Vector f2Vector; typedef t2Vector d2Vector; } /* Close Namespace "gravis" */ #endif relion-3.1.3/src/jaz/gravis/.svn/text-base/t3Matrix.h.svn-base000066400000000000000000000350341411340063500240370ustar00rootroot00000000000000#ifndef __LIBGRAVIS_T3MATRIX_H__ #define __LIBGRAVIS_T3MATRIX_H__ /****************************************************************************** ** Title: t3Matrix.h ** Description: Represents a 3x3 matrix with column-major memory layout. ** ** Author: Michael Keller, 2005 ** Computer Science Department, University Basel (CH) ** ******************************************************************************/ #include #include #include "t3Vector.h" #include "tRGB.h" #include "private/tDeterminants.h" namespace gravis { template class t4Matrix; /*! \brief A 3x3 matrix class. * * There is no operator*=, because some people expect it to be a left-multiplication * and others a right-multiplication. To avoid confusion we only provide the explicit * methods lmul() and rmul(). */ template class t3Matrix { public: T m[9]; t3Matrix(); explicit t3Matrix(T v); t3Matrix(const T* v_ptr); t3Matrix(const t3Matrix& mat); t3Matrix(T m0, T m3, T m6, T m1, T m4, T m7, T m2, T m5, T m8); template explicit t3Matrix (const t3Matrix& mat) { for(int i=0; i<9; ++i) m[i] = static_cast(mat.m[i]); } void set(T m0, T m3, T m6, T m1, T m4, T m7, T m2, T m5, T m8); /*! \brief Return indexed entry (column major). */ const T& operator[](int idx) const { return m[idx]; } /*! \brief Return reference to indexed entry (column major). */ T& operator[](int idx) { return m[idx]; } /*! \brief Return entry in row i and column j. */ const T& operator()(int row, int col) const { return m[col * 3 + row]; } /*! \brief Return reference to entry in row i and column j. */ T& operator()(int row, int col) { return m[col * 3 + row]; } //! Check if the entries of the other vector differ by less than epsilon. // It is better to use this than to use operator== for comparision, if it is // not the same vertex. bool isClose( const t3Matrix& o, const T epsilon) const { for (int i=0; i<9; i++) if (std::fabs(m[i]-o.m[i]) >= epsilon) return false; return true; } bool operator==(const t3Matrix& o) const { for (int i=0; i<9; i++) if (m[i] != o.m[i]) return false; return true; } bool operator!=(const t3Matrix& o) const { return !(*this == o); } t3Matrix operator*(T f) const; t3Matrix& operator*=(const T& f); t3Matrix& operator/=(const T& f); t3Vector operator*(const t3Vector&) const; tRGB operator*(const tRGB&) const; t3Matrix operator*(const t3Matrix&) const; t3Matrix& operator+=(const t3Matrix&); t3Matrix& operator-=(const t3Matrix&); t3Matrix operator+(const t3Matrix&) const; t3Matrix operator-(const t3Matrix&) const; t3Matrix operator-() const; t3Matrix& lmul(const t3Matrix& m); t3Matrix& rmul(const t3Matrix& m); T trace() const; T det() const; t3Matrix adjugate() const; t3Matrix& transpose(); t3Matrix& invert(); t3Matrix& loadIdentity(); t3Vector getAxis() const; static t3Matrix extract(const t4Matrix& mat); static t3Matrix scale(const t3Vector&); static t3Matrix scale (const T& s) { return scale(t3Vector(s,s,s)); } static t3Matrix rotation(const t3Vector& u, const t3Vector& v); static t3Matrix rotation(const t3Vector& axis, float angle); static t3Matrix rotationX(T angle); static t3Matrix rotationY(T angle); static t3Matrix rotationZ(T angle); }; /*! \brief Constructs an identity matrix. */ template inline t3Matrix::t3Matrix() { loadIdentity(); } /*! \brief Constructs a matrix with all entries set to val. */ template inline t3Matrix::t3Matrix(T val) { for (int i = 0; i < 9; i++) m[i] = val; } /*! \brief Constructs a matrix with entries taken from an array. * * \param v_ptr array must be of appropriate length and in column-major layout */ template inline t3Matrix::t3Matrix(const T* v_ptr) { for (int i = 0; i < 9; i++) m[i] = v_ptr[i]; } /*! \brief Copy constructor. */ template inline t3Matrix::t3Matrix(const t3Matrix& mat) { for (int i = 0; i < 9; i++) m[i] = mat.m[i]; } /*! \brief Constructs a matrix from the given entries (row major). */ template inline t3Matrix::t3Matrix(T m0, T m3, T m6, T m1, T m4, T m7, T m2, T m5, T m8) { m[0] = m0; m[1] = m1; m[2] = m2; m[3] = m3; m[4] = m4; m[5] = m5; m[6] = m6; m[7] = m7; m[8] = m8; } /*! \brief Overwrites this matrix with the given entries (row major). */ template inline void t3Matrix::set(T m0, T m3, T m6, T m1, T m4, T m7, T m2, T m5, T m8) { m[0] = m0; m[1] = m1; m[2] = m2; m[3] = m3; m[4] = m4; m[5] = m5; m[6] = m6; m[7] = m7; m[8] = m8; } /*! \brief Scalar times matrix. */ template inline t3Matrix operator*(T f, const t3Matrix& mat) { t3Matrix out(mat); out *= f; return out; } /*! \brief Matrix times scalar. */ template inline t3Matrix t3Matrix::operator*(T f) const { t3Matrix out(*this); out *= f; return out; } /*! \brief Multiply this matrix with a scalar. */ template inline t3Matrix& t3Matrix::operator*=(const T& f) { for (int i = 0; i < 9; i++) m[i] *= f; return *this; } /*! \brief Divide this matrix by a scalar. */ template inline t3Matrix& t3Matrix::operator/=(const T& f) { for (int i = 0; i < 9; i++) m[i] /= f; return *this; } /*! \brief Matrix times vector. */ template inline t3Vector t3Matrix::operator* (const t3Vector& op) const { return t3Vector( m[0]*op.x + m[3]*op.y + m[6]*op.z, m[1]*op.x + m[4]*op.y + m[7]*op.z, m[2]*op.x + m[5]*op.y + m[8]*op.z ); } /*! \brief Matrix times vector. */ template inline tRGB t3Matrix::operator* (const tRGB& op) const { return tRGB( m[0]*op.r + m[3]*op.g + m[6]*op.b, m[1]*op.r + m[4]*op.g + m[7]*op.b, m[2]*op.r + m[5]*op.g + m[8]*op.b ); } /*! \brief Matrix times matrix. */ template inline t3Matrix t3Matrix::operator* (const t3Matrix& op) const { return t3Matrix(m[0]*op.m[0] + m[3]*op.m[1] + m[6]*op.m[2], m[0]*op.m[3] + m[3]*op.m[4] + m[6]*op.m[5], m[0]*op.m[6] + m[3]*op.m[7] + m[6]*op.m[8], m[1]*op.m[0] + m[4]*op.m[1] + m[7]*op.m[2], m[1]*op.m[3] + m[4]*op.m[4] + m[7]*op.m[5], m[1]*op.m[6] + m[4]*op.m[7] + m[7]*op.m[8], m[2]*op.m[0] + m[5]*op.m[1] + m[8]*op.m[2], m[2]*op.m[3] + m[5]*op.m[4] + m[8]*op.m[5], m[2]*op.m[6] + m[5]*op.m[7] + m[8]*op.m[8] ); } /*! \brief Adds other matrix to this matrix. */ template inline t3Matrix& t3Matrix::operator+=(const t3Matrix& op) { for (int i = 0; i < 9; i++) m[i] += op.m[i]; return *this; } /*! \brief Subtracts other matrix from this matrix. */ template inline t3Matrix& t3Matrix::operator-=(const t3Matrix& op) { *this += -op; return *this; } /*! \brief Matrix plus matrix. */ template inline t3Matrix t3Matrix::operator+(const t3Matrix& op) const { t3Matrix out(*this); return out += op; } /*! \brief Matrix minus matrix. */ template inline t3Matrix t3Matrix::operator-(const t3Matrix& op) const { t3Matrix out(*this); return out += -op; } /*! \brief Return additive inverse of this matrix. */ template inline t3Matrix t3Matrix::operator-() const { t3Matrix out(*this); for (int i = 0; i < 9; i++) out[i] = -out[i]; return out; } /*! \brief Right-multiply m to this matrix (*this = *this * m). */ template inline t3Matrix& t3Matrix::rmul(const t3Matrix& m) { *this = *this * m; return *this; } /*! \brief Left-multiply m to this matrix (*this = m * *this). */ template inline t3Matrix& t3Matrix::lmul(const t3Matrix& m) { *this = m * *this; return *this; } /*! \brief Return the trace of this matrix (\f$a_{11} + a_{22} + a_{33}\f$). */ template inline T t3Matrix::trace() const { return ( m[0] + m[4] + m[8] ); } /*! \brief Return the determinant of this matrix. */ template inline T t3Matrix::det() const { return det3x3(m[0], m[3], m[6], m[1], m[4], m[7], m[2], m[5], m[8]); } /*! \brief Return the adjugate of this matrix. */ template inline t3Matrix t3Matrix::adjugate() const { // transpose of cofactor matrix return t3Matrix( det2x2(m[4],m[7],m[5],m[8]), det2x2(m[5],m[8],m[3],m[6]), det2x2(m[3],m[6],m[4],m[7]), det2x2(m[7],m[1],m[8],m[2]), det2x2(m[8],m[2],m[6],m[0]), det2x2(m[6],m[0],m[7],m[1]), det2x2(m[1],m[4],m[2],m[5]), det2x2(m[2],m[5],m[0],m[3]), det2x2(m[0],m[3],m[1],m[4])); } /*! \brief Transpose this matrix. * Attention: Although innocent looking this is an inplace operation **/ template inline t3Matrix& t3Matrix::transpose() { std::swap(m[1],m[3]); std::swap(m[2],m[6]); std::swap(m[5],m[7]); return *this; } /*! \brief Invert this matrix. * Attention: Although innocent looking this is an inplace operation **/ template inline t3Matrix& t3Matrix::invert() { *this = (T(1)/det())*adjugate(); return *this; } /*! \brief Overwrite this matrix with an identity matrix. */ template inline t3Matrix& t3Matrix::loadIdentity () { m[0] = T(1); m[1] = T(0); m[2] = T(0); m[3] = T(0); m[4] = T(1); m[5] = T(0); m[6] = T(0); m[7] = T(0); m[8] = T(1); return *this; } /*! \brief Retrieves the (not normalized) axis of rotation, * assuming this matrix describes a rotation. */ template inline t3Vector t3Matrix::getAxis() const { // gemaess Artin, "Algebra", Kapitel 4, Aufgabe 14 float a0 = m[5] + m[7]; // (2,3) + (3,2) float a1 = m[2] + m[6]; // (1,3) + (3,1) float a2 = m[1] + m[3]; // (1,2) + (2,1) if (a0 == 0) return t3Vector(T(1), T(0), T(0)); else if (a1 == 0) return t3Vector(T(0), T(1), T(0)); else if (a2 == 0) return t3Vector(T(0), T(0), T(1)); else return t3Vector(T(1)/a0, T(1)/a1, T(1)/a2); } /*! \brief Return the upper left 3x3 matrix from mat. */ template inline t3Matrix t3Matrix::extract(const t4Matrix& mat) { return t3Matrix(mat.m[0], mat.m[4], mat.m[8], mat.m[1], mat.m[5], mat.m[9], mat.m[2], mat.m[6], mat.m[10]); } /*! \brief Return a matrix representing a scaling by s. */ template inline t3Matrix t3Matrix::scale(const t3Vector& s) { return t3Matrix( s.x, T(0), T(0), T(0), s.y, T(0), T(0), T(0), s.z ); } /*! Return a matrix that will rotate u into v. */ template inline t3Matrix t3Matrix::rotation(const t3Vector& u, const t3Vector& v) { T phi; T h; T lambda; t3Vector w; w = u.cross(v); phi = u.dot(v); lambda = w.dot(w); if (lambda > 1e-10) h = ((T)1.0 - phi) / lambda; else h = lambda; T hxy = w.x * w.y * h; T hxz = w.x * w.z * h; T hyz = w.y * w.z * h; t3Matrix out(phi + w.x * w.x * h, hxy + w.z, hxz - w.y, hxy - w.z, phi + w.y * w.y * h, hyz + w.x, hxz + w.y, hyz - w.x, phi + w.z * w.z * h); return out; } /*! \brief Return a matrix that rotates by specified angle (in degrees) around specified axis. */ template inline t3Matrix t3Matrix::rotation(const t3Vector& axis, float angle) { // formula copied form GL specification t3Vector n(axis); n.normalize(); // convert to radians angle *= (float)(3.1415927/180.); t3Matrix s(0, -n.z, n.y, n.z, 0, -n.x, -n.y, n.x, 0); t3Matrix nnt(n.x*n.x, n.x*n.y, n.x*n.z, n.y*n.x, n.y*n.y, n.y*n.z, n.z*n.x, n.z*n.y, n.z*n.z); return nnt + T(cos(angle))*(t3Matrix() - nnt) + T(sin(angle))*s; } template inline std::ostream& operator<< (std::ostream& os, const t3Matrix& arg) { os << "[ " << std::setw(10) << arg[0] << " " << std::setw(10) << arg[3] << " " << std::setw(10) << arg[6] << " ]\n"; os << "| " << std::setw(10) << arg[1] << " " << std::setw(10) << arg[4] << " " << std::setw(10) << arg[7] << " |\n"; os << "| " << std::setw(10) << arg[2] << " " << std::setw(10) << arg[5] << " " << std::setw(10) << arg[8] << " |\n"; return os; } template inline std::istream& operator>> ( std::istream& is, t3Matrix& arg) { std::string dummy; is >> dummy >> arg[0] >> arg[3] >> arg[6] >> dummy; is >> dummy >> arg[1] >> arg[4] >> arg[7] >> dummy; is >> dummy >> arg[2] >> arg[5] >> arg[8] >> dummy; return is; } template inline t3Matrix t3Matrix::rotationX (T a) { return t3Matrix( T(1), T(0), T(0), T(0), T(cos(a)), T(-sin(a)), T(0), T(sin(a)), T(cos(a)) ); } template inline t3Matrix t3Matrix::rotationY (T a) { return t3Matrix( T(cos(a)), T(0), T(-sin(a)), T(0), T(1), T(0), T(sin(a)), T(0), T(cos(a)) ); } template inline t3Matrix t3Matrix::rotationZ (T a) { return t3Matrix( T(cos(a)), T(-sin(a)), T(0), T(sin(a)), T(cos(a)), T(0), T(0), T(0), T(1) ); } typedef t3Matrix f3Matrix; typedef t3Matrix d3Matrix; } // namespace gravis #endif relion-3.1.3/src/jaz/gravis/.svn/text-base/t3Vector.h.svn-base000066400000000000000000000272701411340063500240400ustar00rootroot00000000000000#ifndef __LIBGRAVIS_T3VECTOR_H__ #define __LIBGRAVIS_T3VECTOR_H__ /****************************************************************************** ** Title: t3Vector.h ** Description: ** ******************************************************************************/ #include #include #include #include #include #include namespace gravis { template class t4Vector; template class t2Vector; template class tRGBA; template class t3Vector { public: typedef T scalar_type; t3Vector () : x(T(0)), y(T(0)), z(T(0)) {} explicit t3Vector (T _v) : x(_v), y(_v), z(_v) {} t3Vector (T _x, T _y, T _z) : x(_x), y(_y), z(_z) {} t3Vector (const t3Vector& vec) : x(vec.x), y(vec.y), z(vec.z) {} // Generalized Copy Constructor (allows e.g. conversion from double to float) template explicit t3Vector (const t3Vector& vec) : x(vec.x), y(vec.y), z(vec.z) {} // Initialization from Array t3Vector (const T* vecd) : x(vecd[0]), y(vecd[1]), z(vecd[2]) {} explicit t3Vector (const t4Vector& vec) : x(vec.x/vec.w), y(vec.y/vec.w), z(vec.z/vec.w) {} //rk /* Deprecated. Should use t4Vector::toVector3() which has more appropriate logic - this here makes only sense under the (wrong) assumption that all 4D vectors represent finite points - but homogeneous coordinates can also be directions, displacements or points at infinity, all with w == 0. - in any case, too much logic for a cast - BAD (mk) */ static t3Vector unitX () { return t3Vector(T(1), T(0), T(0)); } static t3Vector unitY () { return t3Vector(T(0), T(1), T(0)); } static t3Vector unitZ () { return t3Vector(T(0), T(0), T(1)); } void set (T _v) { x = y = z = _v; } void set (T _x, T _y, T _z) { x = _x; y = _y; z = _z; } T length () const { return ::sqrt(x*x + y*y + z*z); } //! Beware: This is not the 2 norm but the square of the two norm. T norm2 () const { return (x*x + y*y + z*z); } //! Squared L2 Norm T normL2sqr () const { return (x*x + y*y + z*z); } //! \f$l_1\f$ Norm: \f$\sum_i |v_i|\f$ T normL1 () const { return (std::abs(x) + std::abs(y) + std::abs(z)); } //! \f$l_2\f$ Norm: \f$\sqrt{\sum_i |v_i|^2}\f$ T normL2 () const { return sqrt(x*x + y*y + z*z); } //! \f$l_\infty\f$ Norm: \f$\max{ |v_i|\,|\, \forall i }\f$ T normLInf() const { return std::max(std::max(std::abs(x), std::abs(y)), std::abs(z)); } t3Vector findOrthogonal() const; void invert () { x = -x; y = -y; z = -z; } T dot (const t3Vector& arg) const { return (x*arg.x + y*arg.y + z*arg.z); } t3Vector cross (const t3Vector& arg) const { return t3Vector( y*arg.z - z*arg.y, z*arg.x - x*arg.z, x*arg.y - y*arg.x ); } /** * Inplace normalization **/ t3Vector& normalize (T f=T(1)) { if (f == T(0)) set(T(0), T(0), T(0)); T norm = length()/f; if (norm != T(0)) { *this /= norm; } return *this; } /*! \brief Component wise multiplication (matlab ".*"). */ t3Vector cmul(const t3Vector& v) const { return t3Vector(x * v.x, y * v.y, z * v.z); } /*! \brief Component wise division (matlab "./"). */ t3Vector cdiv(const t3Vector& v) const { return t3Vector(x / v.x, y / v.y, z / v.z); } /*! \brief Interpolate three values, using this vector as barycentric coordinates. */ template Value interpolate(const Value& a, const Value& b, const Value& c) const { return x * a + y * b + z * c; } const T& operator[] (int idx) const { return (&x)[idx]; } T& operator[] (int idx) { return (&x)[idx]; } bool operator == ( const t3Vector& arg ) const { return ( x == arg.x && y == arg.y && z == arg.z ); } bool operator != ( const t3Vector& arg ) const { return !(*this == arg); } t3Vector& operator += (const t3Vector& arg) { x += arg.x; y += arg.y; z += arg.z; return *this; } t3Vector& operator -= (const t3Vector& arg) { x -= arg.x; y -= arg.y; z -= arg.z; return *this; } t3Vector& operator += (const T& scalar) { x += scalar; y += scalar; z += scalar; return *this; } t3Vector& operator -= (const T& scalar) { x -= scalar; y -= scalar; z -= scalar; return *this; } t3Vector& operator *= (T arg) { x *= arg; y *= arg; z *= arg; return *this; } t3Vector operator * (T arg) const { return t3Vector(x * arg, y * arg, z * arg); } t3Vector& operator /= (T arg) { x /= arg; y /= arg; z /= arg; return *this; } t3Vector operator / (T arg) const { return t3Vector(x / arg, y / arg, z / arg); } T dist2( const t3Vector& v ) const { return ((x-v.x)*(x-v.x)+(y-v.y)*(y-v.y)+(z-v.z)*(z-v.z)); } T dist( const t3Vector& v ) const { return ::sqrt( dist2( v ) ); } //! Check if the entries of the other vector differ by less than epsilon. // It is better to use this than to use operator== for comparision, if it is // not the same vertex. bool isClose( const t3Vector& o, const T epsilon) const { return ((std::fabs(x-o.x) < epsilon) and (std::fabs(y-o.y) < epsilon) and (std::fabs(z-o.z) < epsilon)); } static t3Vector normalize (const t3Vector& v1, T f=T(1)) { return t3Vector(v1).normalize(f); } static T dot (const t3Vector& v1, const t3Vector& v2) { return (v1.x*v2.x + v1.y*v2.y + v1.z*v2.z); } static t3Vector cross (const t3Vector& v1, const t3Vector& v2) { return t3Vector( v1.y*v2.z - v1.z*v2.y, v1.z*v2.x - v1.x*v2.z, v1.x*v2.y - v1.y*v2.x ); } public: T x, y, z; }; /*! \brief Returns minimal components of two vectors. * * This is useful for quick and dirty calculations of boundings boxes * for a set of vectors. * * TODO: This should be a static function of t3Vector */ template inline t3Vector lowerBound(const t3Vector v1, const t3Vector v2) { return t3Vector(min(v1.x, v2.x), min(v1.y, v2.y), min(v1.z, v2. z)); } /*! \brief Returns maximal components of two vectors. * * This is useful for quick and dirty calculations of boundings boxes * for a set of vectors. * * TODO: This should be a static function of t3Vector */ template inline t3Vector upperBound(const t3Vector v1, const t3Vector v2) { return t3Vector(max(v1.x, v2.x), max(v1.y, v2.y), max(v1.z, v2. z)); } /*! \brief Returns a vector orthogonal to this vector. * * E.g. for ||y|| < ||x|| and ||y|| < ||z||, the returned * vector is (z, 0, -x). */ template inline t3Vector t3Vector::findOrthogonal() const { if (std::abs(y) < std::abs(z)) { // y < z if (std::abs(x) < std::abs(y)) { // x smallest return t3Vector(0, z, -y); } else { // y smallest return t3Vector(z, 0, -x); } } else { // z < y if (std::abs(x) < std::abs(z)) { // x smallest return t3Vector(0, z, -y); } else { // z smallest return t3Vector(y, -x, 0); } } } template inline t3Vector operator ~ (const t3Vector& v1) { return t3Vector(-v1.x, -v1.y, -v1.z); } template inline t3Vector operator + (const t3Vector& v1, const t3Vector& v2) { return t3Vector( v1.x + v2.x, v1.y + v2.y, v1.z + v2.z ); } template inline t3Vector operator - (const t3Vector& v1) { return t3Vector(-v1.x, -v1.y, -v1.z); } template inline t3Vector operator + (const T& s, const t3Vector& v2) { return t3Vector(s + v2.x, s + v2.y, s + v2.z); } template inline t3Vector operator - (const T& s, const t3Vector& v2) { return t3Vector(s - v2.x, s - v2.y, s - v2.z); } template inline t3Vector operator + (const t3Vector& v, const T& s) { return t3Vector(v.x + s, v.y + s, v.z + s); } template inline t3Vector operator - (const t3Vector& v, const T& s) { return t3Vector(v.x - s, v.y - s, v.z - s); } template inline t3Vector operator - (const t3Vector& v1, const t3Vector& v2) { return t3Vector( v1.x - v2.x, v1.y - v2.y, v1.z - v2.z ); } template inline t3Vector operator * (T f, const t3Vector& v) { return t3Vector(f * v.x, f * v.y, f * v.z); } template inline t3Vector operator * (const t3Vector& v, T f) { return t3Vector(f * v.x, f * v.y, f * v.z); } template inline t3Vector operator / (const t3Vector& v, T f) { return t3Vector( v.x/f, v.y/f, v.z/f ); } template inline bool operator < (const t3Vector& v1, const t3Vector& v2) { return ((v1.x < v2.x) || ((v1.x == v2.x) && (v1.y < v2.y)) || ((v1.x == v2.x) && (v1.y == v2.y) && (v1.z < v2.z))); } template inline bool operator > (const t3Vector& v1, const t3Vector& v2) { return (!((v1==v2) || (v1 inline T dot (const t3Vector& v1, const t3Vector& v2) { return (v1.x*v2.x + v1.y*v2.y + v1.z*v2.z); } template inline t3Vector cross (const t3Vector& v1, const t3Vector& v2) { return t3Vector( v1.y*v2.z - v1.z*v2.y, v1.z*v2.x - v1.x*v2.z, v1.x*v2.y - v1.y*v2.x ); } template inline std::ostream& operator<< (std::ostream& os, const t3Vector& arg) { os << std::setprecision(17) << "[" << std::setw(8) << arg.x << ", " << std::setw(8) << arg.y << ", " << std::setw(8) << arg.z << "]"; return os; } // Inverse of operator<< template inline std::istream& operator>> (std::istream& is, t3Vector& arg) { char c = ' '; is >> c; if (is.eof()) return is; if (c != '[') throw std::runtime_error("Vector should start with an opening ["); std::stringstream values; int v = 0; while ((is >> c) && (c != ']')) { if (c == ',') { v++; if (v >= 3) throw std::runtime_error("Vector contains more than three elements"); values << " "; } else if (c != ' ') values << c; } if (c != ']') { throw std::runtime_error("Vector should end with a ]"); } values >> arg.x >> arg.y >> arg.z; return is; } typedef t3Vector f3Vector; typedef t3Vector d3Vector; } // namespace gravis #endif relion-3.1.3/src/jaz/gravis/.svn/text-base/t4Matrix.h.svn-base000066400000000000000000000426261411340063500240450ustar00rootroot00000000000000#ifndef __LIBGRAVIS_T4MATRIX_H__ #define __LIBGRAVIS_T4MATRIX_H__ /****************************************************************************** ** Title: t4Matrix.h ** Description: Represents a 4x4 matrix with column-major memory layout. ** ** Author: Jean-Sebastien Pierrard, 2005 ** Computer Science Department, University Basel (CH) ** ******************************************************************************/ #include #include "t4Vector.h" #include "private/tDeterminants.h" #include "t3Matrix.h" namespace gravis { /*! \brief A 4x4 matrix class. * * There is no operator*=, because some people expect it to be a left-multiplication * and others a right-multiplication. To avoid confusion we only provide the explicit * methods lmul() and rmul(). */ template class t4Matrix { public: T m[16]; typedef T scalar_type; t4Matrix(); explicit t4Matrix(T val); t4Matrix(const T* v_ptr); t4Matrix(const t4Matrix& mat); t4Matrix(const t3Matrix& mat); t4Matrix(T m0, T m4, T m8, T m12, T m1, T m5, T m9, T m13, T m2, T m6, T m10, T m14, T m3, T m7, T m11, T m15); template explicit t4Matrix (const t4Matrix& mat) { for(int i=0; i<16; ++i) m[i] = static_cast(mat.m[i]); } void set( T m0, T m4, T m8, T m12, T m1, T m5, T m9, T m13, T m2, T m6, T m10, T m14, T m3, T m7, T m11, T m15); //! Check if the entries of the other vector differ by less than epsilon. // It is better to use this than to use operator== for comparision, if it is // not the same vertex. bool isClose( const t4Matrix& o, const T epsilon) const { for (int i=0; i<16; i++) if (std::fabs(m[i]-o.m[i]) >= epsilon) return false; return true; } bool operator==(const t4Matrix& o) const { for (int i=0; i<16; i++) if (m[i] != o.m[i]) return false; return true; } bool operator!=(const t3Matrix &o) const { return !(*this == o); } /*! \brief Return indexed entry (column major). */ const T& operator[] (int idx) const { return m[idx]; } /*! \brief Return reference to indexed entry (column major). */ T& operator[] (int idx) { return m[idx]; } /*! \brief Return entry in row i and column j. */ const T& operator() (int row, int col) const { return m[col * 4 + row]; } /*! \brief Return reference to entry in row i and column j. */ T& operator() (int row, int col) { return m[col * 4 + row]; } t4Matrix operator*(T f) const; t4Matrix& operator*=(T f); t4Matrix operator/(T f) const; t4Matrix& operator/=(T f); t4Vector operator*(const t4Vector&) const; t4Matrix operator*(const t4Matrix&) const; t4Matrix& operator+=(const t4Matrix&); t4Matrix& operator-=(const t4Matrix&); t4Matrix operator+(const t4Matrix&) const; t4Matrix operator-(const t4Matrix&) const; t4Matrix operator-() const; t4Matrix& lmul(const t4Matrix& m); t4Matrix& rmul(const t4Matrix& m); T trace() const; T det() const; t4Matrix& transpose(); t4Matrix& invert(); t4Matrix& loadIdentity(); t4Matrix& copy(const t3Matrix& mat); static t4Matrix translation(const t3Vector&); static t4Matrix scale(const t3Vector&); static t4Matrix scale (const T& s) { return scale(t3Vector(s,s,s)); } static t4Matrix rotation(const t3Vector& u, const t3Vector& v); static t4Matrix rotation(const t3Vector& axis, float angle); static t4Matrix rotationX(T angle); static t4Matrix rotationY(T angle); static t4Matrix rotationZ(T angle); }; /*! \brief Constructs an identity matrix. */ template inline t4Matrix::t4Matrix() { loadIdentity(); } /*! \brief Constructs a matrix with all entries set to val. */ template inline t4Matrix::t4Matrix(T val) { for (int i = 0; i < 16; i++) m[i] = val; } /*! \brief Constructs a matrix with entries taken from an array. * * \param v_ptr array must be of appropriate length and in column-major layout */ template inline t4Matrix::t4Matrix(const T* v_ptr) { for (int i = 0; i < 16; i++) m[i] = v_ptr[i]; } /*! \brief Copy constructor. */ template inline t4Matrix::t4Matrix(const t4Matrix& mat) { for (int i = 0; i < 16; i++) m[i] = mat.m[i]; } /*! \brief Copy constructor. */ template inline t4Matrix::t4Matrix(const t3Matrix& mat) { m[ 0] = mat[0]; m[ 1] = mat[1]; m[ 2] = mat[2]; m[ 3] = 0.f; m[ 4] = mat[3]; m[ 5] = mat[4]; m[ 6] = mat[5]; m[ 7] = 0.f; m[ 8] = mat[6]; m[ 9] = mat[7]; m[10] = mat[8]; m[11] = 0.f; m[12] = 0.f; m[13] = 0.f; m[14] = 0.f; m[15] = 1.f; } /*! \brief Constructs a matrix from the given entries (row major). */ template inline t4Matrix::t4Matrix (T m0, T m4, T m8, T m12, T m1, T m5, T m9, T m13, T m2, T m6, T m10, T m14, T m3, T m7, T m11, T m15) { m[ 0] = m0; m[ 1] = m1; m[ 2] = m2; m[ 3] = m3; m[ 4] = m4; m[ 5] = m5; m[ 6] = m6; m[ 7] = m7; m[ 8] = m8; m[ 9] = m9; m[10] = m10; m[11] = m11; m[12] = m12; m[13] = m13; m[14] = m14; m[15] = m15; } /*! \brief Overwrites this matrix with the given entries (row major). */ template inline void t4Matrix::set (T m0, T m4, T m8, T m12, T m1, T m5, T m9, T m13, T m2, T m6, T m10, T m14, T m3, T m7, T m11, T m15 ) { m[ 0] = m0; m[ 1] = m1; m[ 2] = m2; m[ 3] = m3; m[ 4] = m4; m[ 5] = m5; m[ 6] = m6; m[ 7] = m7; m[ 8] = m8; m[ 9] = m9; m[10] = m10; m[11] = m11; m[12] = m12; m[13] = m13; m[14] = m14; m[15] = m15; } /*! \brief Scalar times matrix. */ template inline t4Matrix operator*(T f, const t4Matrix& mat) { t4Matrix out(mat); out *= f; return out; } /*! \brief Matrix times scalar. */ template inline t4Matrix t4Matrix::operator*(T f) const { t4Matrix out(*this); out *= f; return out; } /*! \brief Multiply this matrix with a scalar. */ template inline t4Matrix& t4Matrix::operator*=(T f) { for (int i = 0; i < 16; i++) m[i] *= f; return *this; } /*! \brief Matrix divided by scalar. */ template inline t4Matrix t4Matrix::operator/(const T f) const { t4Matrix out(*this); out /= f; return out; } /*! \brief Divide this matrix by a scalar. */ template inline t4Matrix& t4Matrix::operator/=(const T f) { for (int i = 0; i < 16; i++) m[i] /= f; return *this; } /*! \brief Matrix times vector. */ template inline t4Vector t4Matrix::operator*(const t4Vector& op) const { return t4Vector( m[ 0]*op.x + m[ 4]*op.y + m[ 8]*op.z + m[12]*op.w, m[ 1]*op.x + m[ 5]*op.y + m[ 9]*op.z + m[13]*op.w, m[ 2]*op.x + m[ 6]*op.y + m[10]*op.z + m[14]*op.w, m[ 3]*op.x + m[ 7]*op.y + m[11]*op.z + m[15]*op.w ); } /*! \brief Matrix times matrix. */ template inline t4Matrix t4Matrix::operator* (const t4Matrix& op) const { return t4Matrix( m[0]*op.m[ 0] + m[4]*op.m[ 1] + m[8]*op.m[ 2] + m[12]*op.m[ 3], // ROW 1 m[0]*op.m[ 4] + m[4]*op.m[ 5] + m[8]*op.m[ 6] + m[12]*op.m[ 7], m[0]*op.m[ 8] + m[4]*op.m[ 9] + m[8]*op.m[10] + m[12]*op.m[11], m[0]*op.m[12] + m[4]*op.m[13] + m[8]*op.m[14] + m[12]*op.m[15], m[1]*op.m[ 0] + m[5]*op.m[ 1] + m[9]*op.m[ 2] + m[13]*op.m[ 3], // ROW 2 m[1]*op.m[ 4] + m[5]*op.m[ 5] + m[9]*op.m[ 6] + m[13]*op.m[ 7], m[1]*op.m[ 8] + m[5]*op.m[ 9] + m[9]*op.m[10] + m[13]*op.m[11], m[1]*op.m[12] + m[5]*op.m[13] + m[9]*op.m[14] + m[13]*op.m[15], m[2]*op.m[ 0] + m[6]*op.m[ 1] + m[10]*op.m[ 2] + m[14]*op.m[ 3], // ROW 3 m[2]*op.m[ 4] + m[6]*op.m[ 5] + m[10]*op.m[ 6] + m[14]*op.m[ 7], m[2]*op.m[ 8] + m[6]*op.m[ 9] + m[10]*op.m[10] + m[14]*op.m[11], m[2]*op.m[12] + m[6]*op.m[13] + m[10]*op.m[14] + m[14]*op.m[15], m[3]*op.m[ 0] + m[7]*op.m[ 1] + m[11]*op.m[ 2] + m[15]*op.m[ 3], // ROW 4 m[3]*op.m[ 4] + m[7]*op.m[ 5] + m[11]*op.m[ 6] + m[15]*op.m[ 7], m[3]*op.m[ 8] + m[7]*op.m[ 9] + m[11]*op.m[10] + m[15]*op.m[11], m[3]*op.m[12] + m[7]*op.m[13] + m[11]*op.m[14] + m[15]*op.m[15] ); } /*! \brief Adds other matrix to this matrix. */ template inline t4Matrix& t4Matrix::operator+=(const t4Matrix& op) { for (int i = 0; i < 16; i++) m[i] += op.m[i]; return *this; } /*! \brief Subtracts other matrix from this matrix. */ template inline t4Matrix& t4Matrix::operator-=(const t4Matrix& op) { *this += -op; return *this; } /*! \brief Matrix plus matrix. */ template inline t4Matrix t4Matrix::operator+(const t4Matrix& op) const { t4Matrix out(*this); return out += op; } /*! \brief Matrix minus matrix. */ template inline t4Matrix t4Matrix::operator-(const t4Matrix& op) const { t4Matrix out(*this); return out += -op; } /*! \brief Return additive inverse of this matrix. */ template inline t4Matrix t4Matrix::operator-() const { t4Matrix out(*this); for (int i = 0; i < 16; i++) out[i] = -out[i]; return out; } /*! \brief Right-multiply m to this matrix (*this = *this * m). */ template inline t4Matrix& t4Matrix::rmul(const t4Matrix& m) { *this = *this * m; return *this; } /*! \brief Left-multiply m to this matrix (*this = m * *this). */ template inline t4Matrix& t4Matrix::lmul(const t4Matrix& m) { *this = m * *this; return *this; } /*! \brief Return the trace of this matrix (\f$a_{11} + a_{22} + a_{33} + a_{44}\f$). */ template inline T t4Matrix::trace() const { return ( m[0] + m[5] + m[10] + m[15] ); } /*! \brief Return the determinant of this matrix. */ template inline T t4Matrix::det() const { return det4x4(m[ 0], m[ 4], m[ 8], m[12], m[ 1], m[ 5], m[ 9], m[13], m[ 2], m[ 6], m[10], m[14], m[ 3], m[ 7], m[11], m[15]); } /*! \brief Transpose this matrix. * Attention: Although innocent looking this is an inplace operation **/ template inline t4Matrix& t4Matrix::transpose () { std::swap(m[1], m[4]); std::swap(m[2], m[8]); std::swap(m[3], m[12]); std::swap(m[6], m[9]); std::swap(m[7], m[13]); std::swap(m[11], m[14]); return *this; } /*! \brief Invert this matrix. * Attention: Although innocent looking this is an inplace operation **/ template inline t4Matrix& t4Matrix::invert() { T det, oodet; t4Matrix A = *this; (*this)(0,0) = det3x3(A(1,1), A(2,1), A(3,1), A(1,2), A(2,2), A(3,2), A(1,3), A(2,3), A(3,3)); (*this)(1,0) = -det3x3(A(1,0), A(2,0), A(3,0), A(1,2), A(2,2), A(3,2), A(1,3), A(2,3), A(3,3)); (*this)(2,0) = det3x3(A(1,0), A(2,0), A(3,0), A(1,1), A(2,1), A(3,1), A(1,3), A(2,3), A(3,3)); (*this)(3,0) = -det3x3(A(1,0), A(2,0), A(3,0), A(1,1), A(2,1), A(3,1), A(1,2), A(2,2), A(3,2)); (*this)(0,1) = -det3x3(A(0,1), A(2,1), A(3,1), A(0,2), A(2,2), A(3,2), A(0,3), A(2,3), A(3,3)); (*this)(1,1) = det3x3(A(0,0), A(2,0), A(3,0), A(0,2), A(2,2), A(3,2), A(0,3), A(2,3), A(3,3)); (*this)(2,1) = -det3x3(A(0,0), A(2,0), A(3,0), A(0,1), A(2,1), A(3,1), A(0,3), A(2,3), A(3,3)); (*this)(3,1) = det3x3(A(0,0), A(2,0), A(3,0), A(0,1), A(2,1), A(3,1), A(0,2), A(2,2), A(3,2)); (*this)(0,2) = det3x3(A(0,1), A(1,1), A(3,1), A(0,2), A(1,2), A(3,2), A(0,3), A(1,3), A(3,3)); (*this)(1,2) = -det3x3(A(0,0), A(1,0), A(3,0), A(0,2), A(1,2), A(3,2), A(0,3), A(1,3), A(3,3)); (*this)(2,2) = det3x3(A(0,0), A(1,0), A(3,0), A(0,1), A(1,1), A(3,1), A(0,3), A(1,3), A(3,3)); (*this)(3,2) = -det3x3(A(0,0), A(1,0), A(3,0), A(0,1), A(1,1), A(3,1), A(0,2), A(1,2), A(3,2)); (*this)(0,3) = -det3x3(A(0,1), A(1,1), A(2,1), A(0,2), A(1,2), A(2,2), A(0,3), A(1,3), A(2,3)); (*this)(1,3) = det3x3(A(0,0), A(1,0), A(2,0), A(0,2), A(1,2), A(2,2), A(0,3), A(1,3), A(2,3)); (*this)(2,3) = -det3x3(A(0,0), A(1,0), A(2,0), A(0,1), A(1,1), A(2,1), A(0,3), A(1,3), A(2,3)); (*this)(3,3) = det3x3(A(0,0), A(1,0), A(2,0), A(0,1), A(1,1), A(2,1), A(0,2), A(1,2), A(2,2)); det = (A(0,0) * (*this)(0,0)) + (A(0,1) * (*this)(1,0)) + (A(0,2) * (*this)(2,0)) + (A(0,3) * (*this)(3,0)); oodet = T(1) / det; *this *= oodet; return *this; } /*! \brief Overwrite this matrix with an identity matrix. */ template inline t4Matrix& t4Matrix::loadIdentity () { m[ 0] = T(1); m[ 1] = T(0); m[ 2] = T(0); m[ 3] = T(0); m[ 4] = T(0); m[ 5] = T(1); m[ 6] = T(0); m[ 7] = T(0); m[ 8] = T(0); m[ 9] = T(0); m[10] = T(1); m[11] = T(0); m[12] = T(0); m[13] = T(0); m[14] = T(0); m[15] = T(1); return *this; } /*! \brief Copies the 3x3 matrix into the upper left corner of this * instance. */ template inline t4Matrix& t4Matrix::copy(const t3Matrix& mat) { for (int j = 0; j < 3; j++) for (int i = 0; i < 3; i++) m[4 * j + i] = mat.m[3 * j + i]; return *this; } /*! \brief Return a matrix representing a translation by t. */ template inline t4Matrix t4Matrix::translation(const t3Vector& t) { return t4Matrix(T(1), T(0), T(0), t.x, T(0), T(1), T(0), t.y, T(0), T(0), T(1), t.z, T(0), T(0), T(0), T(1) ); } /*! \brief Return a matrix represnting a scaling by s. */ template inline t4Matrix t4Matrix::scale (const t3Vector& s) { return t4Matrix( s.x, T(0), T(0), T(0), T(0), s.y, T(0), T(0), T(0), T(0), s.z, T(0), T(0), T(0), T(0), T(1) ); } /*! Return a matrix that will rotate u into v. */ template inline t4Matrix t4Matrix::rotation(const t3Vector& u, const t3Vector& v) { t4Matrix out; out.copy(t3Matrix::rotation(u, v)); return out; } /*! \brief Return a matrix that rotates by specified angle (in degrees) around specified axis. */ template inline t4Matrix t4Matrix::rotation(const t3Vector& axis, float angle) { t4Matrix out; out.copy(t3Matrix::rotation(axis, angle)); return out; } template inline t4Matrix t4Matrix::rotationX (T a) { return t4Matrix( T(1), T(0), T(0), T(0), T(0), T(cos(a)), T(-sin(a)), T(0), T(0), T(sin(a)), T(cos(a)), T(0), T(0), T(0), T(0), T(1) ); } template inline t4Matrix t4Matrix::rotationY (T a) { // ATTENTION!!! This is actually wrong!, -sin is in the first column // but this could disrupt everything!!! // Sandro Schoenborn, 2013-04-09, sandro.schoenborn@unibas.ch // Clemens Blumer, 2013-04-09, clemens.blumer@unibas.ch return t4Matrix( T(cos(a)), T(0), T(-sin(a)), T(0), T(0), T(1), T(0), T(0), T(sin(a)), T(0), T(cos(a)), T(0), T(0), T(0), T(0), T(1) ); } template inline t4Matrix t4Matrix::rotationZ (T a) { return t4Matrix( T(cos(a)), T(-sin(a)), T(0), T(0), T(sin(a)), T(cos(a)), T(0), T(0), T(0), T(0), T(1), T(0), T(0), T(0), T(0), T(1) ); } // TODO: Set Fixed Precision template inline std::ostream& operator<< (std::ostream& os, const t4Matrix& arg) { os << "[ " << arg[ 0] << " " << arg[ 4] << " " << arg[ 8] << " " << arg[12] << " ]\n"; os << "| " << arg[ 1] << " " << arg[ 5] << " " << arg[ 9] << " " << arg[13] << " |\n"; os << "| " << arg[ 2] << " " << arg[ 6] << " " << arg[10] << " " << arg[14] << " |\n"; os << "[ " << arg[ 3] << " " << arg[ 7] << " " << arg[11] << " " << arg[15] << " ]\n"; return os; } template inline std::istream& operator>> ( std::istream& is, t4Matrix& arg) { std::string dummy; is >> dummy >> arg[ 0] >> arg[ 4] >> arg[ 8] >> arg[12] >> dummy; is >> dummy >> arg[ 1] >> arg[ 5] >> arg[ 9] >> arg[13] >> dummy; is >> dummy >> arg[ 2] >> arg[ 6] >> arg[10] >> arg[14] >> dummy; is >> dummy >> arg[ 3] >> arg[ 7] >> arg[11] >> arg[15] >> dummy; return is; } typedef t4Matrix f4Matrix; typedef t4Matrix d4Matrix; } // namespace gravis #endif relion-3.1.3/src/jaz/gravis/.svn/text-base/t4Vector.h.svn-base000066400000000000000000000171721411340063500240410ustar00rootroot00000000000000#ifndef __LIBGRAVIS_T4VECTOR_H__ #define __LIBGRAVIS_T4VECTOR_H__ /****************************************************************************** ** Title: t4Vector.h ** Description: Represents a four dimensional vector (3D+homogeneous comp.). ** ** Author: Jean-Sebastien Pierrard, 2005 ** Computer Science Department, University Basel (CH) ** ******************************************************************************/ #include #include #include #include namespace gravis { template class t3Vector; template class t4Vector { public: T x, y, z, w; typedef T scalar_type; t4Vector () : x(T(0)), y(T(0)), z(T(0)), w(T(1)) { } explicit t4Vector (T _v) : x(_v), y(_v), z(_v), w(_v) { } t4Vector (T _x, T _y, T _z, T _w=T(1)) : x(_x), y(_y), z(_z), w(_w) { } /*! \brief Construct a 4D vector with w = 1. */ explicit t4Vector (const t3Vector& vec) : x(vec.x), y(vec.y), z(vec.z), w(1.0) { } template explicit t4Vector (const t4Vector& vec) : x(vec.x), y(vec.y), z(vec.z), w(vec.w) {} t4Vector (const t4Vector& vec) : x(vec.x), y(vec.y), z(vec.z), w(vec.w) { } static t4Vector unitX () { return t4Vector(T(1), T(0), T(0), T(1)); } static t4Vector unitY () { return t4Vector(T(0), T(1), T(0), T(1)); } static t4Vector unitZ () { return t4Vector(T(0), T(0), T(1), T(1)); } void set (T _v) { x = y = z = _v; w = T(1); } void set (T _x, T _y, T _z, T _w=T(1)) { x = _x; y = _y; z = _z; w = _w; } //! Beware: This is not the 2 norm but the square of the two norm. T norm2 () const { return (x*x + y*y + z*z + w*w); } //! \f$l_1\f$ Norm: \f$\sum_i |v_i|\f$ T normL1 () const { return (std::abs(x) + std::abs(y) + std::abs(z) + std::abs(w)); } //! \f$l_2\f$ Norm: \f$\sqrt{\sum_i |v_i|^2}\f$ T normL2 () const { return sqrt(x*x + y*y + z*z + w*w); } //! \f$l_\infty\f$ Norm: \f$\max{ |v_i|\,|\, \forall i }\f$ T normLInf() const { return std::max(std::max(std::max(std::abs(x), std::abs(y)), std::abs(z)), std::abs(w)); } void invert () { x = -x; y = -y; z = -z; w = -w; } T dot (const t4Vector& arg) const { return (x*arg.x + y*arg.y + z*arg.z + w*arg.w); } void divideW () { x /= w; y /= w; z /= w; w = T(1); } /*! \brief Return a 3D vector corresponding to this 4D vector. * * If the w coordinate is 0, the vector is considered a direction or displacement, * and (x,y,z) is returned. Otherwise, the vector is considered a point, and * (x/w, y/w, z/w) is returned. */ t3Vector toVector3() const { if (w == 0) return t3Vector(x, y, z); else return t3Vector(x/w, y/w, z/w); } /*! \brief Return the euclidian norm of this 4D vector. * * Note, that there is no special treatment of the w-coordinate. * The result is simply \f$\sqrt{x^2+y^2+z^2+w^2}\f$. */ T length () const { return T(::sqrt(x*x + y*y + z*z + w*w)); } t4Vector& normalize (T f=T(1)) { if (f == T(0)) set(T(0), T(0), T(0), T(0)); T norm = length()/f; if (norm != T(0)) { *this /= norm; } return *this; } const T& operator[] (int idx) const { #ifdef _GRAVIS_DEBUG_RANGECHECKING_ assert((idx >= 0) && (idx < 4)); #endif return (&x)[idx]; } T& operator[] (int idx) { #ifdef _GRAVIS_DEBUG_RANGECHECKING_ assert((idx >= 0) && (idx < 4)); #endif return (&x)[idx]; } bool operator == ( const t4Vector& arg ) const { return ( x == arg.x && y == arg.y && z == arg.z && w == arg.w); } bool operator != ( const t4Vector& arg ) const { return !(*this == arg); } t4Vector& operator += (const t4Vector& arg) { x += arg.x; y += arg.y; z += arg.z; w += arg.w; return *this; } t4Vector& operator -= (const t4Vector& arg) { x -= arg.x; y -= arg.y; z -= arg.z; w -= arg.w; return *this; } t4Vector& operator += (const T& scalar) { x += scalar; y += scalar; z += scalar; w += scalar; return *this; } t4Vector& operator -= (const T& scalar) { x -= scalar; y -= scalar; z -= scalar; w -= scalar; return *this; } t4Vector& operator *= (const T& arg) { x *= arg; y *= arg; z *= arg; w *= arg; return *this; } t4Vector& operator /= (const T& arg) { x /= arg; y /= arg; z /= arg; w /= arg; return *this; } //! Check if the entries of the other vector differ by less than epsilon. // It is better to use this than to use operator== for comparision, if it is // not the same vertex. bool isClose( const t4Vector& o, const T epsilon) const { return ((std::fabs(x-o.x) < epsilon) and (std::fabs(y-o.y) < epsilon) and (std::fabs(z-o.z) < epsilon) and (std::fabs(w-o.w) < epsilon)); } static t4Vector normalize (const t4Vector& v1, T f=T(1)) { return t4Vector(v1).normalize(); } static T dot (const t4Vector& v1, const t4Vector& v2) { return (v1.x*v2.x + v1.y*v2.y + v1.z*v2.z + v1.w*v2.w); } }; template inline t4Vector operator + (const t4Vector& v1, const t4Vector& v2) { return t4Vector( v1.x + v2.x, v1.y + v2.y, v1.z + v2.z, v1.w + v2.w ); } template inline t4Vector operator - (const t4Vector& v1) { return t4Vector(-v1.x, -v1.y, -v1.z, -v1.w); } template inline t4Vector operator - (const t4Vector& v1, const t4Vector& v2) { return t4Vector( v1.x - v2.x, v1.y - v2.y, v1.z - v2.z, v1.w - v2.w ); } template inline t4Vector operator + (const T& s, const t4Vector& v2) { return t4Vector(s + v2.x, s + v2.y, s + v2.z, s + v2.w); } template inline t4Vector operator - (const T& s, const t4Vector& v2) { return t4Vector(s - v2.x, s - v2.y, s - v2.z, s - v2.w); } template inline t4Vector operator + (const t4Vector& v, const T& s) { return t4Vector(v.x + s, v.y + s, v.z + s, v.w + s); } template inline t4Vector operator - (const t4Vector& v, const T& s) { return t4Vector(v.x - s, v.y - s, v.z - s, v.w - s); } template inline t4Vector operator * (T f, const t4Vector& v) { return t4Vector(f * v.x, f * v.y, f * v.z, f * v.w); } template inline t4Vector operator * (const t4Vector& v, T f) { return t4Vector(f * v.x, f * v.y, f * v.z, f * v.w); } template inline std::ostream& operator<< (std::ostream& os, const t4Vector& arg) { os << "[" << arg.x << ", " << arg.y << ", " << arg.z << ", " << arg.w << "]"; return os; } typedef t4Vector f4Vector; typedef t4Vector d4Vector; } /* Close Namespace "gravis" */ #endif relion-3.1.3/src/jaz/gravis/.svn/text-base/tArray.h.svn-base000066400000000000000000000245241411340063500235700ustar00rootroot00000000000000#ifndef __LIBGRAVIS_T_ARRAY_H__ #define __LIBGRAVIS_T_ARRAY_H__ /****************************************************************************** ** Title: tArray.h ** Description: Implements a one dimensional array with reference counting. ** ** Author: Jean-Sebastien Pierrard, 2005 ** Computer Science Department, University Basel (CH) ** ******************************************************************************/ #include #include #include #include #include "private/tRefCPtr.h" /*! ** \file tArray.h */ namespace gravis { /*! ** \class tArray ** \brief Implements a one dimensional array with reference counting. */ template class tArray { public: typedef T value_type; tArray (); tArray (size_t); tArray (T* data, size_t nel, bool deleteData); tArray (const tArray&); tArray& operator=(const tArray&); tArray (const std::vector&); ~tArray (); tArray clone() const; tArray safeClone() const; //! Deprecating this, as it is not standard. Use resize() instead. tArray& setSize (size_t); //! Useful alias to setSize. At some point we should switch completely to the std library. tArray& resize (size_t s) { return setSize(s); }; void fill (T); void fill (T, size_t, size_t); size_t size () const; const T& operator[] (size_t) const; T& operator[] (size_t); const T* data () const; T* data (); bool operator==(const tArray& other) const; bool operator!=(const tArray& other) const; operator std::vector() const { const tArray &self = *this; const size_t l=size(); std::vector result(l); for (size_t i=0; i p_smp; T* p_data; size_t length; }; /*! ** \class tConstArray ** \brief Read-only wrapper for tArray. * * Since tArray is really a pointer, "const tArray" does protect the data, * but also protects the pointer! Assume I want a class that keeps a * pointer to data (= tArray), and needs only read access. We also want to * change the pointer once in a while. * \code * class X { * // tArray readOnly; // BAD! can manipulate data * // const tArray readOnly; // cannot manipulate data, but cannot change readOnly * tConstArray readOnly; // solution * public: * void setArray(tConstArray a) { * readOnly = a; * } * }; * \endcode */ template class tConstArray { private: tArray ta; public: tConstArray() {} tConstArray(tArray& ta) : ta(ta) {} tArray clone() const { return ta.clone(); } tArray safeClone() const { return ta.safeClone(); } size_t size() const { return ta.size(); } const T& operator[](size_t i) const { return ta[i]; } const T* data() const { return ta.data(); } bool operator==(const tArray& other) const { return ta == other; } bool operator!=(const tArray& other) const { return ta != other; } bool operator==(const tConstArray& other) const { return ta == other.ta; } bool operator!=(const tConstArray& other) const { return ta == other.ta; } const tConstArray& operator=(tArray& ta) { this->ta = ta; return *this; } const tConstArray& operator=(const tConstArray& other) { this->ta = other.ta; return *this; } }; /*! ** \brief Default constructor */ template inline tArray::tArray () : p_smp (), p_data(), length() { this->allocArray(0); } /*! ** \brief Constructor. ** \param nel Number of elements to allocate for this tArray. */ template inline tArray::tArray (size_t nel) : p_smp (), p_data(), length() { this->allocArray(nel); } template inline tArray::tArray (T* data, size_t nel, bool deleteData) : p_smp(), p_data(data), length(nel) { if (deleteData) p_smp = priv::tRefCPtr(p_data, priv::tRefCPtr::ALLOC_ARRAY, 1); else p_smp = priv::tRefCPtr(p_data, priv::tRefCPtr::ALLOC_ARRAY, 2); } /*! ** \brief Copy-constructor ** ** The copy-constructor has reference-semantic, i.e. the managed data is not ** copied. Instead a new handle to the same data is created. ** ** \param rhs The array to be copied */ template inline tArray::tArray (const tArray& rhs) : p_smp (rhs.p_smp), p_data(rhs.p_data), length(rhs.length) { } /*! ** \brief Assignment ** ** The assignment has reference-semantic, i.e. the managed data is not ** copied. Instead a new handle to the same data is created. ** ** \param rhs The array to be assigned */ template inline tArray &tArray::operator=(const tArray& rhs) { p_smp = rhs.p_smp; p_data = rhs.p_data; length = rhs.length; return *this; } /*! ** \brief Construct from std vector ** ** \param rhs The std vector from which the data is copied. This construction does not create a reference, but actually copies the data. */ template inline tArray::tArray (const std::vector& rhs) { this->allocArray(rhs.size()); for (size_t i=0; i inline tArray::~tArray () { } /*! ** \brief Create a deep-copy of managed data. ** \return A new tArray object. ** ** Use this version of clone unless your datatype is simple ** (e.g. tVector, size_t, Tuple2...) */ template inline tArray tArray::safeClone() const { tArray lhs(length); for (size_t i=0; i object. ** ** \warning This method creates a byte-wise copy of the managed data. When ** applied to compound types (e.g. T=std::vector) or reference counted ** types like std::string it will create crashes use save_clone() unless your datatype is simple. */ template inline tArray tArray::clone () const { tArray lhs(length); memcpy(lhs.p_data, p_data, length*sizeof(T)); return lhs; } /*! ** \brief Fill array with constant value. ** ** \param value Value to fill with. */ template inline void tArray::fill (T value) { const T* end_ptr = p_data + length; for (T* t_ptr=p_data; t_ptr inline void tArray::fill (T value, size_t from, size_t to) { if (from >= length) from = length; if (to >= length) to = length; T* end_ptr = p_data + to; for (T* t_ptr=p_data+from; t_ptr inline tArray& tArray::setSize (size_t nel) { this->allocArray(nel); return *this; } /*! ** \brief Get number of elements. ** ** \return Number of T-elements in array. */ template inline size_t tArray::size () const { return length; } /*! ** \brief Access i-th element. ** \param i Index into array. ** \return const-Reference to i-th element. */ template inline const T& tArray::operator[] (size_t i) const { #ifdef _GRAVIS_DEBUG_RANGECHECKING_ assert( i < length ); #endif return p_data[i]; } /*! ** \brief Access i-th element. ** \param i Index into array. ** \return Reference to i-th element. */ template inline T& tArray::operator[] (size_t i) { #ifdef _GRAVIS_DEBUG_RANGECHECKING_ assert( i < length ); #endif return p_data[i]; } /*! ** \brief Perform element-by-element comparison. */ template inline bool tArray::operator==(const tArray& other) const { return !(*this != other); } /*! ** \brief Perform element-by-element comparison. */ template inline bool tArray::operator!=(const tArray& other) const { if (p_data == other.p_data) return false; else if (length != other.length) return true; else { for (size_t i = 0; i < length; i++) { if (p_data[i] != other.p_data[i]) return true; } } return false; } /*! ** \brief Get pointer to managed data. ** \return const-Pointer to first element of managed data. */ template inline const T* tArray::data () const { return p_data; } /*! ** \brief Get pointer to managed data. ** \return Pointer to first element of managed data. */ template inline T* tArray::data () { return p_data; } template inline void tArray::allocArray (size_t nel) { if (nel <= 0) { p_data = 0; length = 0; p_smp = priv::tRefCPtr(p_data, priv::tRefCPtr::ALLOC_ARRAY); } else { // ATTENTION! Bug: this leaks!! ... delete old memory // Sandro Schoenborn, 2013-04-09, sandro.schoenborn@unibas.ch // Tobias Maier, 2013-04-09, tobias.maier@unibas.ch p_data = new T[nel]; length = nel; p_smp = priv::tRefCPtr(p_data, priv::tRefCPtr::ALLOC_ARRAY); } } } /* Close namespace "gravis" */ #endif relion-3.1.3/src/jaz/gravis/.svn/text-base/tBGR.h.svn-base000066400000000000000000000126241411340063500231220ustar00rootroot00000000000000#ifndef __LIBGRAVIS_T_BGR_H__ #define __LIBGRAVIS_T_BGR_H__ /****************************************************************************** ** Title: tBGR.h ** Description: Represents an BGR color tupel. ** ** Author: ** ** Computer Science Department, University Basel (CH) ** ******************************************************************************/ #include namespace gravis { template struct tBGR { T b, g, r; typedef T scalar_type; tBGR () : b(T(0)), g(T(0)), r(T(0)) { } tBGR (T _b, T _g, T _r) : b(_b), g(_g), r(_r) { } tBGR (T gray) : b(gray), g(gray), r(gray) { } void set (T _b, T _g, T _r) { r = _r; g = _g; b = _b; } void set (T gray) { r = gray; b = gray; g = gray; } void add (T _r, T _g, T _b) { r += _r; g += _g; b += _b; } void add (T gray) { r += gray; g += gray; b += gray; } T grayValue () const { return (T)(0.30f*r + 0.59f*g + 0.11f*b); } T minValue () const { if (r < g) { if (r < b) return r; else return b; } else { if (g < b) return g; else return b; } } T maxValue () const { if (r > g) { if (r > b) return r; else return b; } else { if (g > b) return g; else return b; } } tBGR& operator += (const tBGR& c) { r += c.r; g += c.g; b += c.b; return *this; } tBGR& operator += (const T gray) { r += gray; g += gray; b += gray; return *this; } tBGR& operator -= (const tBGR& c) { r -= c.r; g -= c.g; b -= c.b; return *this; } tBGR& operator -= (const T gray) { r -= gray; g -= gray; b -= gray; return *this; } tBGR& operator *= (const tBGR& c) { r *= c.r; g *= c.g; b *= c.b; return *this; } tBGR& operator *= (const T factor) { r *= factor; g *= factor; b *= factor; return *this; } tBGR& operator /= (const tBGR& c) { r /= c.r; g /= c.g; b /= c.b; return *this; } tBGR& operator /= (const T factor) { r /= factor; g /= factor; b /= factor; return *this; } //! Unary minus inline tBGR operator - () const { return tBGR(-r, -g, -b); }; //! Addition of a scalar (analog to -=) inline tBGR operator + (const T& c) const { return tBGR(r+c, g+c, b+c); }; //! Subtraction of a scalar (analog to +=) inline tBGR operator - (const T& c) const { return tBGR(r-c, g-c, b-c); }; //! Multiplication of a scalar (analog to *=) inline tBGR operator * (const T& c) const { return tBGR(r*c, g*c, b*c); }; //! Division by a scalar (analog to /=) inline tBGR operator / (const T& c) const { return tBGR(r/c, g/c, b/c); }; bool operator == (const tBGR& arg) { return ((arg.r == r) && (arg.g == g) && (arg.b == b)); } }; template inline tBGR operator + (const tBGR& c1, const tBGR& c2) { tBGR result = c1; return (result += c2); } template inline tBGR operator - (const tBGR& c1, const tBGR& c2) { tBGR result = c1; return (result -= c2); } template inline tBGR operator * (const tBGR& c1, const tBGR& c2) { tBGR result(c1.r * c2.r, c1.g * c2.g, c1.b * c2.b); return result; } template inline tBGR operator * (const tBGR& c, T factor) { tBGR result(c.r * factor, c.g * factor, c.b * factor); return result; } template inline tBGR operator * (T factor, const tBGR& c) { tBGR result(c.r * factor, c.g * factor, c.b * factor); return result; } template inline tBGR operator / (const tBGR& c1, const tBGR& c2) { tBGR result(c1.r / c2.r, c1.g / c2.g, c1.b / c2.b); return result; } template inline tBGR operator / (const tBGR& c, T factor) { tBGR result(c.r / factor, c.g / factor, c.b / factor); return result; } template inline bool operator < (const tBGR& c1, const tBGR& c2) { T gray1 = c1.grayValue(); T gray2 = c2.grayValue(); return (gray1 < gray2); } template inline tBGR operator ! (const tBGR& c) { tBGR result = tBGR::White(); return (result -= c); } // Absolute of every color channel template inline tBGR abs(const tBGR& c) { return tBGR(c.r < T(0) ? -c.r : c.r, c.g < T(0) ? -c.g : c.g, c.b < T(0) ? -c.b : c.b); } template inline std::ostream& operator << (std::ostream& os, const tBGR& c) { os << "(" << c.r << " " << c.g << " " << c.b << ")"; return os; } template <> inline std::ostream& operator << (std::ostream& os, const tBGR& c) { os << "(" << (int)c.r << " " << (int)c.g << " " << (int)c.b << ")"; return os; } typedef tBGR cBGR; typedef tBGR bBGR; typedef tBGR fBGR; typedef tBGR dBGR; } /* Close Namespace "gravis" */ #endif relion-3.1.3/src/jaz/gravis/.svn/text-base/tDefaultVector.h.svn-base000066400000000000000000000127421411340063500252600ustar00rootroot00000000000000#ifndef __LIBGRAVIS_T_DEFAULT_VECTOR_H__ #define __LIBGRAVIS_T_DEFAULT_VECTOR_H__ #include #include "tArray.h" namespace gravis { /** * Like a std::vector, but with a default value returned when accessing [-1]. * * This situation is checked extremely efficiently by positioning the default * element at position [-1] in memory, so no check has to be done. * * This replacement for tVector does not offer reference counting. It makes * more sense to take a complete array structure and wrap it into a * boost::shared_ptr **/ template class tDefaultVector { private: typedef typename std::vector Vector; Vector data; T* data_ptr; public: typedef typename Vector::iterator iterator; typedef typename Vector::const_iterator const_iterator; typedef typename Vector::reverse_iterator reverse_iterator; typedef typename Vector::const_reverse_iterator const_reverse_iterator; typedef typename Vector::reference reference; typedef typename Vector::const_reference const_reference; /** * Create a new vector, optionally specifying a default value. If no default value is specified T() is used **/ tDefaultVector(const size_t size=0, const T& def=T()) { data.resize(size+1); data_ptr = &data[1]; data[0] = def; } /** * Copy data from the other vector **/ tDefaultVector(const tDefaultVector& other) : data(other.data), data_ptr(&data[1]) {}; /** * Copy data from the other vector **/ tDefaultVector(const tArray &other) : data(other.size()+1), data_ptr(&data[1]) { for (size_t i=0; i &other) : data(other.size()+1), data_ptr(&data[1]) { for (size_t i=0; i inline void swap(gravis::tDefaultVector<_Tp>& __x, gravis::tDefaultVector<_Tp>& __y) { __x.swap(__y); } } #endif relion-3.1.3/src/jaz/gravis/.svn/text-base/tGray_A.h.svn-base000066400000000000000000000115411411340063500236470ustar00rootroot00000000000000#ifndef __LIBGRAVIS_T_GRAY_A_H__ #define __LIBGRAVIS_T_GRAY_A_H__ /****************************************************************************** ** Title: tGray_A.h ** Description: Represents an RGB+Alpha color tupel. ** ** Author: Jean-Sebastien Pierrard, 2005 ** Computer Science Department, University Basel (CH) ** ******************************************************************************/ #include namespace gravis { template class tGray_A { public: T g, a; typedef T scalar_type; tGray_A () : g(T(0)), a(T(1.0)) { } tGray_A (T _g) : g(_g) , a(T(1.0)) { } tGray_A (T _g, T _a) : g(_g) , a(_a) { } void set (T _g) { g = _g; } void set (T _g, T _a) { g = _g; a = _a; } T grayValue () const { return g; } T minValue () const { return g; } T maxValue () const { return g; } /*! \brief All color components, including alpha are clamped to [0,1]. * * \return self */ tGray_A& clamp() { g = std::min(std::max(g, T(0)), T(1)); return *this; } bool operator != (const tGray_A& c) const { return g != c.g || a != c.a; } bool operator == (const tGray_A& c) const { return g == c.g && a == c.a; } tGray_A& operator += (const tGray_A& c) { g += c.g; return *this; } tGray_A& operator += (const T gray) { g += gray; return *this; } tGray_A& operator -= (const tGray_A& c) { g -= c.g; return *this; } tGray_A& operator -= (const T gray) { g -= gray; return *this; } tGray_A& operator *= (const tGray_A& c) { g *= c.g; return *this; } tGray_A& operator *= (const float factor) { g *= factor; return *this; } tGray_A& operator /= (const tGray_A& c) { g /= c.g; return *this; } tGray_A& operator /= (const float factor) { g /= factor; return *this; } //! Unary minus inline tGray_A operator - () const { return tGray_A(-g, a); }; //! Addition of a scalar (analog to -=) inline tGray_A operator + (const T& c) const { return tGray_A(g+c, a); }; //! Subtraction of a scalar (analog to +=) inline tGray_A operator - (const T& c) const { return tGray_A(g-c, a); }; //! Multiplication of a scalar (analog to *=) inline tGray_A operator * (const T& c) const { return tGray_A(g*c, a); }; //! Division by a scalar (analog to /=) inline tGray_A operator / (const T& c) const { return tGray_A(g/c, a); }; }; template inline tGray_A operator+ (const tGray_A& c1, const tGray_A& c2) { tGray_A result(c1); return (result += c2); } template inline tGray_A operator- (const tGray_A& c1, const tGray_A& c2) { tGray_A result(c1); return (result -= c2); } template inline tGray_A operator* (const tGray_A& c1, const tGray_A& c2) { tGray_A result(c1); return (result *= c2); } template inline tGray_A operator* (const tGray_A& c, T factor) { tGray_A result(c); return (result *= factor); } template inline tGray_A operator* (T factor, const tGray_A& c) { tGray_A result(c); return (result *= factor); } template inline tGray_A operator / (const tGray_A& c1, const tGray_A& c2) { tGray_A result(c1); return (result /= c2); } template inline tGray_A operator / (const tGray_A& c, T factor) { tGray_A result(c); return (result /= factor); } template inline bool operator < (const tGray_A& c1, const tGray_A& c2) { return (c1.grayValue() < c2.grayValue()); } template inline tGray_A operator ! (const tGray_A& c) { tGray_A result = tGray_A::White; return (result -= c); } template inline std::ostream& operator << (std::ostream& os, const tGray_A& c) { os << "(" << c.g << " " << c.a << ")"; return os; } template <> inline std::ostream& operator << (std::ostream& os, const tGray_A& c) { os << "(" << (int)c.g << " " << (int)c.a << ")"; return os; } typedef tGray_A bGray_A; typedef tGray_A fGray_A; typedef tGray_A dGray_A; } /* Close Namespace "gravis" */ #endif relion-3.1.3/src/jaz/gravis/.svn/text-base/tImage.h.svn-base000066400000000000000000000424421411340063500235330ustar00rootroot00000000000000#ifndef __LIBGRAVIS_T_IMAGE_H__ #define __LIBGRAVIS_T_IMAGE_H__ /****************************************************************************** ** Title: tImage.h ** Description: Implements two dimensional array with row-major memory layout. ** ** Author: Jean-Sebastien Pierrard, 2009 ** Computer Science Department, University Basel (CH) ** ******************************************************************************/ #include #include #include "tRGB.h" #include "tBGR.h" #include "tRGBA.h" #include "tRGB_A.h" #include "tGray_A.h" #include "tArray.h" #include #include "tImage/traits.h" /*! ** \file tImage.h */ namespace gravis { template class tImage; } #include "tImage/access.hxx" #include "tImage/interpolation.hxx" namespace gravis { /*! ** \class tImage ** \brief Implements two dimensional array with row-major memory layout. ** ** This class represents an image of arbitrary pixel type. TODO ** ** For operations on images look at tImage/operators.h, tImage/???.h. */ template class tImage { inline static bool has_ending(const std::string& filename, const std::string& ending) { if (filename.size() < ending.size()) return false; for (size_t i=0; i::Scalar_t scalar_type; typedef T* iterator; tImage (); tImage (size_t, size_t, std::string=""); tImage (size_t, size_t, T const& value ); tImage (const tImage&); tImage& operator=(const tImage&); ~tImage (); tImage clone () const; tImage& setSize (size_t, size_t); tImage& resize (size_t, size_t); tImage& setName (std::string); tImage& fill (T); std::string name () const; size_t cols () const; size_t rows () const; size_t size () const; /** Returns the number of components per pixel **/ size_t components () const; const T& operator() (size_t, size_t) const; T& operator() (size_t, size_t); /** Returns the component specified by column, row and, component number (channel) **/ const scalar_type& operator() (size_t, size_t, size_t) const; scalar_type& operator() (size_t, size_t, size_t); const T& operator [] (size_t) const; T& operator [] (size_t); /** Returns the component specified by index (as [] operator) and, component number (channel) **/ const scalar_type& comp(size_t, size_t) const; scalar_type& comp(size_t, size_t); iterator begin () const; iterator end () const; const T* data () const; T* data (); const T* data (size_t, size_t) const; T* data (size_t, size_t); void read (const std::string&); /** * Detect the filetype from the ending. **/ void write(const std::string&) const; void writePNM (const std::string&) const; void writePNG (const std::string&) const; void writeJPG (const std::string&, int quality=100) const; /** * Interpolated access to the image * * Usage * * image.interpolate(x, y) * image.interpolate(x, y) * image.interpolate(x, y) * * See interpolation:: namespace for other methods. * * Beware: * if using this inside of a templated function or class, you have to write * image.template interpolate(x, y), which is quite * awfull. **/ template inline T interpolate(const Float& x, const Float& y) const { return InterpolationMethod::getPixel(*this, x, y); } /** * Default interpolation mode is Cubic **/ template inline T interpolate(const Float& x, const Float& y) const { return interpolation::Cubic::getPixel(*this, x, y); } /** * Checked access to the image, with configurable behaviour. * * Usage * * image.access(x, y) * image.access(x, y) * image.access(x, y) * image.access(x, y) * * Beware: * if using this inside of a templated function or class, you have to write * image.template access(x, y), which is quite * awfull. * **/ template inline T access(const int& x, const int& y) const { return AccessMethod::getPixel(*this, x, y); } /** * Default access mode is access::Repeat **/ inline T access(const int& x, const int& y) const { return access::Repeat::getPixel(*this, x, y); } /** * tImage Convolution using the access specified access method. * * The access methods include: * AccessZero * AccessRepeat * AccessWrapped * AccessMirrored * * Usage: * * tImage result = image.convolve< access::AccessMirrored >(kernel); * * * Beware: * if using this inside of a templated function or class, you have to write * image.template convolve(kernel), which is quite * awfull. **/ template tImage convolve(const tImage< typename tImageTraits::Float_t >& kernel) const { int klmargin, ktmargin; if ((kernel.cols() % 2) == 0) { klmargin = (kernel.cols() >> 1) - 1; } else { klmargin = (kernel.cols() >> 1); } if ((kernel.rows() % 2) == 0) { ktmargin = (kernel.rows() >> 1) - 1; } else { ktmargin = (kernel.rows() >> 1); } tImage lhs(cols(), rows()); for (int r=0; r<(int)rows(); ++r) { for (int c=0; c<(int)cols(); ++c) { T sum = T(0); for (int ky=0; ky<(int)kernel.rows(); ++ky) { for (int kx=0; kx<(int)kernel.cols(); ++kx) { sum += kernel(kx, ky) * access(kx-klmargin+c, ky-ktmargin+r); } } lhs(c, r) = sum; } } return lhs; } /** * Default access method is Repeat **/ tImage convolve(const tImage< typename tImageTraits::Float_t >& kernel) const { return (*this).template convolve(kernel); } /** Clamp an image by calling the clamp() method on each element **/ void clamp() { for (size_t i=0; i image; tArray accel; iterator p_begin; iterator p_end; }; } /* Close namespace "gravis" */ /****************************************************************************** ** tImage implementation ******************************************************************************/ #include "Exception.h" #include "private/tImageIO.hxx" #include "private/tImageConverter.hxx" #include "private/tImageIO_PNM.hxx" #include "private/tImageIO_PNG.hxx" #include "private/tImageIO_JPG.hxx" namespace gravis { /*! ** \brief Default constructor. */ template inline tImage::tImage () : p_name(""), wd(0), ht(0), image(), accel(), p_begin(), p_end() { } /*! ** \brief Constructor. ** ** \param width Set number of columns. ** \param height Set number of rows. ** \param name Sets a name for the image (\em optional). */ template inline tImage::tImage (size_t width, size_t height, std::string name) : p_name(name), wd(width), ht(height), // Allocate space for channel data and indexing accelerators image(width* height), accel(height), p_begin(image.data()), p_end(image.data()+image.size()) { // Compute pointers to beginning of each line for (size_t y=0; y inline tImage::tImage (size_t width, size_t height, T const& value) : p_name(""), wd(width), ht(height), // Allocate space for channel data and indexing accelerators image(width* height), accel(height), p_begin(image.data()), p_end(image.data()+image.size()) { // Compute pointers to beginning of each line for (size_t y=0; y inline tImage::tImage (const tImage& rhs) : p_name (rhs.p_name), wd (rhs.wd), ht (rhs.ht), image (rhs.image), accel (rhs.accel), p_begin(rhs.p_begin), p_end (rhs.p_end) { } /*! ** \brief Reference Semantic Assignemnt ** ** The assignmment has reference-semantic, i.e. the image data is not actually ** copied. Instead a new handle to the same data is created. ** ** \param rhs */ template inline tImage &tImage::operator =(const tImage& rhs) { p_name = rhs.p_name; wd = rhs.wd; ht = rhs.ht; image = rhs.image; accel = rhs.accel; p_begin = rhs.p_begin; p_end = rhs.p_end; return *this; } /*! ** \brief Destructor. ** ** Destroy the object(handle). The image data is \em only deleted if no other ** instance of this class holds a reference to it. */ template inline tImage::~tImage () { } /*! ** \brief Create a deep-copy of the image data. ** ** \return A new tImage object. ** ** \warning This method creates a byte-wise copy of the image data. When ** applied to compound types (e.g. T=std::vector) it is very likely to ** cause serious problems. */ template tImage tImage::clone () const { // Allocate new image with same name and dimensions tImage result(wd, ht, p_name); // Copy the data memcpy(result.data(), data(), wd*ht*sizeof(T)); return result; } /*! ** \brief Resize image. ** ** \param nwd Number of columns in resized image. ** \param nht Number of rows in resized image. ** ** \return ** \warning The original data is not copied TODO?? */ template inline tImage& tImage::resize (size_t nwd, size_t nht) { if ((nwd != wd) || (nht != ht)) *this = tImage(nwd, nht, p_name); return *this; } /*! ** \brief Resize image. ** ** \param nwd Number of columns in resized image. ** \param nht Number of rows in resized image. ** ** \return ** \warning The original data is not copied TODO?? */ template inline tImage& tImage::setSize (size_t nwd, size_t nht) { return resize(nwd, nht); } template inline size_t tImage::rows () const { return ht; } template inline size_t tImage::cols () const { return wd; } template inline size_t tImage::size () const { return image.size(); } template inline std::string tImage::name () const { return p_name; } template inline size_t tImage::components() const { return tImageTraits::components(); } template inline tImage& tImage::setName (std::string name) { p_name = name; return *this; } template inline tImage& tImage::fill (T value) { image.fill(value); return *this; } template inline const T& tImage::operator() (size_t x, size_t y) const { #ifdef _GRAVIS_DEBUG_RANGECHECKING_ assert( x >= 0 && x < cols() ); assert( y >= 0 && y < rows() ); #endif return (accel[y])[x]; } template inline T& tImage::operator() (size_t x, size_t y) { #ifdef _GRAVIS_DEBUG_RANGECHECKING_ assert( x >= 0 && x < cols() ); assert( y >= 0 && y < rows() ); #endif return (accel[y])[x]; } template inline const typename tImage::scalar_type& tImage::operator() (size_t x, size_t y, size_t c) const { #ifdef _GRAVIS_DEBUG_RANGECHECKING_ assert( x >= 0 && x < cols() ); assert( y >= 0 && y < rows() ); assert( c >= 0 && c < tImageTraits::components() ); #endif const scalar_type* p = reinterpret_cast(p_begin); return p[(y*cols() + x) * tImageTraits::components() + c]; } template inline typename tImage::scalar_type& tImage::operator() (size_t x, size_t y, size_t c) { #ifdef _GRAVIS_DEBUG_RANGECHECKING_ assert( x >= 0 && x < cols() ); assert( y >= 0 && y < rows() ); assert( c >= 0 && c < tImageTraits::components() ); #endif scalar_type* p = reinterpret_cast(p_begin); return p[(y*cols() + x) * tImageTraits::components() + c]; } template inline const T& tImage::operator[] (size_t n) const { #ifdef _GRAVIS_DEBUG_RANGECHECKING_ assert((n >= 0) && (n < image.size())); #endif return *(p_begin + n); } template inline T& tImage::operator[] (size_t n) { #ifdef _GRAVIS_DEBUG_RANGECHECKING_ assert((n >= 0) && (n < image.size())); #endif return *(p_begin + n); } template inline const typename tImage::scalar_type& tImage::comp(size_t n, size_t c) const { #ifdef _GRAVIS_DEBUG_RANGECHECKING_ assert((n >= 0) && (n < image.size())); assert( c >= 0 && c < tImageTraits::components() ); #endif const scalar_type* p = reinterpret_cast(p_begin); return *(p + n*tImageTraits::components() + c); } template inline typename tImage::scalar_type& tImage::comp(size_t n, size_t c) { #ifdef _GRAVIS_DEBUG_RANGECHECKING_ assert((n >= 0) && (n < image.size())); assert( c >= 0 && c < tImageTraits::components() ); #endif scalar_type* p = reinterpret_cast(p_begin); return *(p + n*tImageTraits::components() + c); } template inline typename tImage::iterator tImage::begin () const { return p_begin; } template inline typename tImage::iterator tImage::end () const { return p_end; } template inline const T* tImage::data () const { return image.data(); } template inline T* tImage::data () { return image.data(); } template inline const T* tImage::data (size_t x, size_t y) const { return accel[y] + x; } template inline T* tImage::data (size_t x, size_t y) { return accel[y] + x; } template inline void tImage::read (const std::string& filename) { if (priv::JPGImageReader::canHandle(filename)) { priv::JPGImageReader reader; reader.read(*this, filename); return; } char header[512]; std::ifstream is(filename.c_str(), std::ios::in | std::ios::binary); if (!is.good()) { GRAVIS_THROW3(Exception, "Unable to open file", filename); } is.read(&header[0], sizeof(header)); is.close(); if (priv::PNMImageReader::canHandle(header)) { priv::PNMImageReader reader; reader.read(*this, filename.c_str()); return; } if (priv::PNGImageReader::canHandle(header)) { priv::PNGImageReader reader; reader.read(*this, filename.c_str()); return; } GRAVIS_THROW3(gravis::Exception, "Can't handle this file.", filename); } template inline void tImage::write(const std::string& filename) const { if (has_ending(filename, "jpg") || has_ending(filename, "jpeg")) writeJPG(filename); else if (has_ending(filename, "png")) writePNG(filename); else if (has_ending(filename, "pnm")) writePNM(filename); else GRAVIS_THROW3(gravis::Exception, "Could not determine filetype from filename: ", filename); } template inline void tImage::writePNM (const std::string& filename) const { priv::PNMImageWriter writer; writer.write(*this, filename.c_str()); } template inline void tImage::writePNG (const std::string& filename) const { priv::PNGImageWriter writer; writer.write(*this, filename.c_str()); } template inline void tImage::writeJPG (const std::string& filename, int quality) const { priv::JPGImageWriter writer; writer.write(*this, filename.c_str(), quality); } } /* Close namespace "gravis" */ #endif relion-3.1.3/src/jaz/gravis/.svn/text-base/tImageAlgorithm.h.svn-base000066400000000000000000000011331411340063500253720ustar00rootroot00000000000000#ifndef __LIBGRAVIS_T_IMAGE_ALGORITHM_H__ #define __LIBGRAVIS_T_IMAGE_ALGORITHM_H__ /****************************************************************************** ** Title: tImageAlgorithm.h ** Description: Collection of standalone image algorithms. ** ** Author: Jean-Sebastien Pierrard, 2005 ** Brian Schroeder 2006 ** Computer Science Department, University Basel (CH) ** ******************************************************************************/ #include "tImage/convolution.h" #include "tImage/normalization.h" #include "tImage/operators.h" #endif relion-3.1.3/src/jaz/gravis/.svn/text-base/tLab.h.svn-base000066400000000000000000000142171411340063500232060ustar00rootroot00000000000000#ifndef __LIBGRAVIS_T_LAB_H__ #define __LIBGRAVIS_T_LAB_H__ /****************************************************************************** ** Title: tLab.h ** Description: Represents an L*a*b* color tupel. ** ******************************************************************************/ #include namespace gravis { template class tLab { /*! * Private helper functions, wrapped into an additional struct in case that we want to use the names **/ struct priv { static inline const T& min(const T& a, const T& b) { return ab ? a : b; } }; public: typedef T scalar_type; T L, a, b; tLab () : L(T(0)), a(T(0)), b(T(0)) { } tLab (T L, T a, T b) : L(L), a(a), b(b) { } // tLab (T gray) : (gray), g(gray), b(gray) { } void set (T _y, T _cb, T _cr) { L = _y; a = _cb; b = _cr; } // void add (T _r, T _g, T _b) { // r += _r; g += _g; b += _b; // } T intensity () const { return L; } /* bool operator != (const tLab& c) const { return r != c.r || g != c.g || b != c.b; } bool operator == (const tLab& c) const { return r == c.r && g == c.g && b == c.b; } */ tLab& operator += (const tLab& c) { L += c.L; a += c.a; b += c.b; return *this; } /* tLab& operator += (const T gray) { r += gray; g += gray; b += gray; return *this; } */ tLab& operator -= (const tLab& c) { L -= c.L; a -= c.a; b -= c.b; return *this; } // tLab& operator -= (const T gray) { // r -= gray; g -= gray; b -= gray; // return *this; // } tLab& operator *= (const tLab& c) { L *= c.L; a *= c.a; b *= c.b; return *this; } tLab& operator *= (const T factor) { L *= factor; a *= factor; b *= factor; return *this; } /* tLab& operator /= (const tLab& c) { r /= c.r; g /= c.g; b /= c.b; return *this; } tLab& operator /= (const T factor) { r /= factor; g /= factor; b /= factor; return *this; } * \brief All color components are clamped to [0,1]. This function works inplace. * * \return self tLab& clamp() { r = priv::min(priv::max(r, 0), 1); g = priv::min(priv::max(g, 0), 1); b = priv::min(priv::max(b, 0), 1); return *this; } //! Unary minus inline tLab operator - () const { return tLab(-r, -g, -b); }; //! Addition of a scalar (analog to -=) inline tLab operator + (const T& c) const { return tLab(r+c, g+c, b+c); }; //! Subtraction of a scalar (analog to +=) inline tLab operator - (const T& c) const { return tLab(r-c, g-c, b-c); }; */ //! Multiplication of a scalar (analog to *=) inline tLab operator * (const T& c) const { return tLab(L*c, a*c, b*c); }; /* //! Division by a scalar (analog to /=) inline tLab operator / (const T& c) const { return tLab(r/c, g/c, b/c); }; bool operator == (const tLab& arg) { return ((arg.r == r) && (arg.g == g) && (arg.b == b)); } const T &operator [](const size_t &i) const { return (&r)[i]; } T &operator [](const size_t &i) { return (&r)[i]; } */ }; template inline tLab operator + (const tLab& c1, const tLab& c2) { tLab result = c1; return (result += c2); } template inline tLab operator - (const tLab& c1, const tLab& c2) { tLab result = c1; return (result -= c2); } /* template inline tLab operator * (const tLab& c1, const tLab& c2) { tLab result(c1.r * c2.r, c1.g * c2.g, c1.b * c2.b); return result; } */ template inline tLab operator * (const tLab& c, T factor) { tLab result(c.L * factor, c.a * factor, c.b * factor); return result; } template inline tLab operator * (T factor, const tLab& c) { tLab result(c.L * factor, c.a * factor, c.b * factor); return result; } /* template inline tLab operator / (const tLab& c1, const tLab& c2) { tLab result(c1.r / c2.r, c1.g / c2.g, c1.b / c2.b); return result; } template inline tLab operator / (const tLab& c, T factor) { tLab result(c.r / factor, c.g / factor, c.b / factor); return result; } template inline bool operator < (const tLab& c1, const tLab& c2) { T gray1 = c1.grayValue(); T gray2 = c2.grayValue(); return (gray1 < gray2); } template inline tLab operator ! (const tLab& c) { tLab result = tLab::White(); return (result -= c); } // Absolute of every color channel template inline tLab abs(const tLab& c) { return tLab(c.r < T(0) ? -c.r : c.r, c.g < T(0) ? -c.g : c.g, c.b < T(0) ? -c.b : c.b); } template inline std::ostream& operator << (std::ostream& os, const tLab& c) { os << "(" << c.r << " " << c.g << " " << c.b << ")"; return os; } template <> inline std::ostream& operator << (std::ostream& os, const tLab& c) { os << "(" << (int)c.r << " " << (int)c.g << " " << (int)c.b << ")"; return os; } template inline T dot (const tLab& v1, const tLab& v2) { return (v1.r*v2.r + v1.g*v2.g + v1.b*v2.b); } */ //typedef tLab bRGB; typedef tLab fLab; typedef tLab dLab; } #endif relion-3.1.3/src/jaz/gravis/.svn/text-base/tMM.h.svn-base000066400000000000000000000403331411340063500230170ustar00rootroot00000000000000#ifndef __LIBGRAVIS_T_MM_H__ #define __LIBGRAVIS_T_MM_H__ #include "tVarMatrix.h" #include #include namespace gravis { template class tConstMM; template class tMM { public: typedef tMatrix Vector; std::string title; tVarVector< Vector > nu; tVarMatrix< Vector > D; const size_t& m() const { return D.h; }; // Number of vertices const size_t& k() const { return D.w; }; // Number of paramters tMM(const std::string& title="Morphable Model") : title(title), nu(title+"::nu"), D(title+"::D") { }; inline void evaluate(tVectorView< Vector > &v, const tConstVectorView< T > &a) const { tConstMM MM(*this); MM.evaluate(v, a); } inline void evaluate(tVarVector< Vector > &v, const tConstVectorView< T > &a) const { tConstMM MM(*this); MM.evaluate(v, a); } inline void resize(size_t h, size_t w) { D.resize(h,w); nu.resize(h); }; inline void clear() { matrix::clear(D); matrix::clear(nu); }; // Check if the file to load has the right datatype bool load_is_compatible(const std::string& fn) { char mmid0[33] = "GRAVIS_MORPHABLE_MODEL "; char mmid1[33] = " "; std::ifstream stream(fn.c_str(), std::ifstream::binary); uint8_t uint32_size; uint8_t T_size; uint32_t m,k; uint16_t endianness; stream.read(mmid1, 32); stream.read((char*)&endianness, 2); stream.read((char*)&uint32_size, 1); stream.read((char*)&T_size, 1); stream.read((char*)&m, sizeof(m)); stream.read((char*)&k, sizeof(k)); GRAVIS_CHECK( 0 == strncmp( mmid0, mmid1, 31 ), "Not a gravis morphable model file" ); GRAVIS_CHECK( endianness == 0x0001, "Wrong endianness"); if (uint32_size != 4) { std::cerr << "Uint 32 size is " << uint32_size << std::endl; } GRAVIS_CHECK( uint32_size == 4, "Wrong uint32_size size"); return( T_size == sizeof(T) ); } void load(const std::string& fn) { char mmid0[33] = "GRAVIS_MORPHABLE_MODEL "; char mmid1[33] = " "; std::ifstream stream(fn.c_str(), std::ifstream::binary); uint8_t uint32_size; uint8_t T_size; uint32_t m,k; uint16_t endianness; stream.read(mmid1, 32); stream.read((char*)&endianness, 2); stream.read((char*)&uint32_size, 1); stream.read((char*)&T_size, 1); stream.read((char*)&m, sizeof(m)); stream.read((char*)&k, sizeof(k)); GRAVIS_CHECK( 0 == strncmp( mmid0, mmid1, 31 ), "Not a gravis morphable model file" ); GRAVIS_CHECK( endianness == 0x0001, "Wrong endianness"); if (uint32_size != 4) { std::cerr << "Uint 32 size is " << uint32_size << std::endl; } GRAVIS_CHECK( uint32_size == 4, "Wrong uint32_size size"); GRAVIS_CHECK( T_size == sizeof(T), "Wrong type in model file"); resize(m, k); clear(); stream.read((char*)D.data, sizeof(Vector)*D.size()); stream.read((char*)nu.data, sizeof(Vector)*nu.size()); char mmid2[33] = " "; stream.read(mmid2, 32); GRAVIS_CHECK( 0 == strncmp( mmid0, mmid2, 31 ), "File did not end with the end marker" ); } void save(const std::string& fn) { tConstMM cm(*this); cm.save(fn); } // Create a new interpolated model from barycentric coordinates into the old model inline void interpolate(tMM &out, const tConstMatrixView &idx, const tConstMatrixView &weight) const { tConstMM cm(*this); cm.interpolate(out, idx, weight); } // Create a new model from chosen lines of the old model inline void submodel(tMM &out, const tConstVectorView &chosen) const { tConstMM cm(*this); cm.submodel(out, chosen); } // Create a new model from chosen lines of the old model inline void submodel(tMM &out, const tConstVectorView &chosen) const { tConstMM cm(*this); cm.submodel(out, chosen); } // Create a new model from chosen lines of the old model inline void submodel(tMM &out, const std::vector &chosen) const { tConstMM cm(*this); cm.submodel(out, chosen); } // Create a new model from chosen lines of the old model inline void submodel(tMM &out, const std::vector &chosen) const { tConstMM cm(*this); cm.submodel(out, chosen); } // Create a new interpolated model from barycentric coordinates into the old model inline void interpolate(tMM &out, const tConstMatrixView &idx, const tConstMatrixView &weight, const size_t& n_coeff) const { tConstMM cm(*this); cm.interpolate(out, idx, weight, n_coeff); } // Create a new interpolated model from barycentric coordinates into the old model inline void interpolate(tMM &out, const tConstMatrixView &idx, const tConstMatrixView &weight, const int& n_coeff) const { tConstMM cm(*this); cm.interpolate(out, idx, weight, n_coeff); } // Create a new model from chosen lines of the old model inline void submodel(tMM &out, const tConstVectorView &chosen, const size_t& n_coeff) const { tConstMM cm(*this); cm.submodel(out, chosen, n_coeff); } // Create a new model from chosen lines of the old model inline void submodel(tMM &out, const tConstVectorView &chosen, const size_t& n_coeff) const { tConstMM cm(*this); cm.submodel(out, chosen, n_coeff); } // Create a new model from chosen lines of the old model inline void submodel(tMM &out, const std::vector &chosen, const size_t& n_coeff) const { tConstMM cm(*this); cm.submodel(out, chosen, n_coeff); } // Create a new model from chosen lines of the old model inline void submodel(tMM &out, const std::vector &chosen, const int& n_coeff) const { tConstMM cm(*this); cm.submodel(out, chosen, n_coeff); } }; template class tConstMM { public: typedef tMatrix Vector; const tConstVectorView< Vector > nu; const tConstMatrixView< Vector > D; const size_t& m() const { return D.h; }; // Number of vertices const size_t& k() const { return D.w; }; // Number of paramters tConstMM(const tConstVectorView &nu, const tConstMatrixView &D) : nu(nu), D(D) { GRAVIS_CHECK( nu.h == D.h, "Morphable model is inconsistent" ); }; tConstMM(const tMM &o) : nu(o.nu), D(o.D) {}; tConstMM(const tConstMM &o) : nu(o.nu), D(o.D) {}; #ifdef MATLAB tConstMM(const tmxConstMatrixView &_nu, const tmxConstMatrixView &_D) : nu((Vector*)_nu.data, _nu.dims[0]/3) , D((Vector*)_D.data, _D.dims[0]/3, _D.dims[1]) { GRAVIS_CHECK( nu.h == D.h, "Morphable model is inconsistent" ); }; #endif // If a is too short assumes zeros for the unset coefficents // If a is too long assumes zeros for the missing principal components inline void evaluate(tVectorView< Vector > &v, const tConstVectorView< T > &a) const { size_t K=std::min(k(), a.size()); // GRAVIS_CHECK( a.size() == k(), "a and D are incompatible"); GRAVIS_CHECK( v.size() == m(), "v and nu are incompatible"); GRAVIS_CHECK( nu.size() == m(), "k and nu are incompatible"); // Apply the morphable model #if 0 v = nu; for (size_t j=0; j vv((T*)v.data, 3*v.size()); tConstVectorView< T > vnu((T*)nu.data, 3*nu.size()); tConstMatrixView< T > vD((T*)D.data, 3*D.h, K); tConstVectorView< T > va(a.data, K); matrix::addmult(vv, vnu, vD, va); // USING BLAS } inline void evaluate(tVarVector< Vector > &v, const tConstVectorView< T > &a) const { v.resize(m()); tVectorView< tMatrix > vv(v); evaluate(vv, a); } void save(const std::string& fn) { char mmid[33] = "GRAVIS_MORPHABLE_MODEL "; std::ofstream stream(fn.c_str(), std::ofstream::binary); uint8_t uint32_size = sizeof(uint32_t); if (uint32_size != 4) { std::cerr << "Uint 32 size is " << uint32_size << std::endl; } uint8_t T_size = sizeof(T); uint32_t m_ = m(), k_ = k(); uint16_t endianness = 0x0001; stream.write(mmid, 32); stream.write((char*)&endianness, 2); stream.write((char*)&uint32_size, 1); stream.write((char*)&T_size, 1); stream.write((char*)&(m_), sizeof(m_)); stream.write((char*)&(k_), sizeof(k_)); stream.write((char*)D.data, sizeof(Vector)*D.size()); stream.write((char*)nu.data, sizeof(Vector)*nu.size()); stream.write(mmid, 32); } // Create a new interpolated model from barycentric coordinates into the old model inline void interpolate(tMM &out, const tConstMatrixView &idx, const tConstMatrixView &weight) const { const tConstMM &model = *this; GRAVIS_CHECK( idx.w == weight.w && idx.h == weight.h, "idx and weight should be kxn and kxn"); out.resize(idx.w, model.k()); const size_t& n = idx.w; const size_t& t = idx.h; const size_t& K = model.k(); // Initialize to zero out.clear(); // Write out.nu for (size_t i=0; i &out, const tConstVectorView &chosen) const { const tConstMM &model = *this; GRAVIS_CHECK( chosen.h == model.m(), "Chosen and model are incompatible"); size_t n = 0; for (size_t i=0; i &out, const tConstVectorView &chosen) const { const tConstMM &model = *this; size_t n = chosen.h; out.resize(n, model.k()); for (size_t I=0; I &out, const tConstVectorView &chosen) const { const tConstMM &model = *this; int n = chosen.h; out.resize(n, model.k()); for (int I=0; I &out, const std::vector &chosen) const { tConstVectorView vchosen(&chosen[0], chosen.size()); submodel(out, vchosen); } // Create a new model from chosen lines of the old model inline void submodel(tMM &out, const std::vector &chosen) const { tConstVectorView vchosen(&chosen[0], chosen.size()); submodel(out, vchosen); } // Create a new interpolated model from barycentric coordinates into the old model template inline void interpolate(tMM &out, const tConstMatrixView &idx, const tConstMatrixView &weight, const Int& n_coeff) const { const tConstMM &model = *this; GRAVIS_CHECK( idx.w == weight.w && idx.h == weight.h, "idx and weight should be kxn and kxn"); out.resize(idx.w, n_coeff); const Int& n = idx.w; const Int& t = idx.h; const Int K = std::min(n_coeff, model.k()); // Initialize to zero out.clear(); // Write out.nu for (Int i=0; i &out, const tConstVectorView &chosen, const size_t& n_coeff) const { const tConstMM &model = *this; GRAVIS_CHECK( chosen.h == model.m(), "Chosen and model are incompatible"); size_t n = 0; for (size_t i=0; i inline void submodel(tMM &out, const tConstVectorView &chosen, const Int& n_coeff) const { const tConstMM &model = *this; Int n = chosen.h; out.resize(n, n_coeff); const Int copy_coeff = std::min(n_coeff, model.k()); for (Int I=0; I inline void submodel(tMM &out, const std::vector &chosen, const Int& n_coeff) const { tConstVectorView vchosen(&chosen[0], chosen.size()); submodel(out, vchosen, n_coeff); } }; } #endif relion-3.1.3/src/jaz/gravis/.svn/text-base/tMatrix.h.svn-base000066400000000000000000000503271411340063500237560ustar00rootroot00000000000000#ifndef __LIBGRAVIS_T_MATRIX_H__ #define __LIBGRAVIS_T_MATRIX_H__ /****************************************************************************** ** Title: matrix.h ** Description: Templated fixed size dense matrices, which are a ** complement to the fixed size t{2,3,4}{Vector,Matrix} classes. ** ** Author: Brian Amberg, 2007 ** Computer Science Department, University Basel (CH) ** ******************************************************************************/ #include #include #include #include #include #include "StringFormat.h" #include "Exception.h" #ifdef DEBUG #define checkAccess1( i, h ) { if(!( (i)<(h) )) GRAVIS_THROW2(gravis::Exception, "Access out of bounds " #i "<" #h); } #define checkAccess2( i,j, h,w ) { if(!( (i)<(h) && (j)<(w) )) GRAVIS_THROW2(gravis::Exception, "Access out of bounds " #i "<" #h " && "#j"<" #w); } #define checkAccess3( i,j,k, h,w,d ) { if(!( (i)<(h) && (j)<(w) && (k)<(d))) GRAVIS_THROW2(gravis::Exception, "Access out of bounds " #i "<" #h " && "#j"<" #w " && " #k "<" #d); } #else #define checkAccess1( i, h ) { } #define checkAccess2( i,j, h,w ) { } #define checkAccess3( i,j,k, h,w,d ) { } #endif namespace gravis { template class tMatrix; namespace tMatrixPrivateConstructorTrick { struct CheckIfRightSize { template static void has_2_elements( const tMatrix &m ) {} template static void has_2_elements( const tMatrix &m ) {} template static void has_3_elements( const tMatrix &m ) {} template static void has_3_elements( const tMatrix &m ) {} template static void has_4_elements( const tMatrix &m ) {} template static void has_4_elements( const tMatrix &m ) {} template static void has_4_elements( const tMatrix &m ) {} template static void has_9_elements( const tMatrix &m ) {} template static void has_16_elements( const tMatrix &m ) {} }; } /** * Small Matrix Of Arbitrary size held completely in memory in consecutive positions. * The data is in row major order. **/ template class tMatrix { public: typedef T scalar; T data[h* w]; /** * The data is not initialized **/ tMatrix() {}; /** * Copy constructor **/ tMatrix(const tMatrix &o) { memcpy( data, o.data, h*w*sizeof(T) ); } /** * Fill with copies of v values **/ explicit tMatrix(const T& v) { fill(v); } /** * Copy data from another matrix **/ tMatrix& operator=(const tMatrix& o) { memcpy( data, o.data, h*w*sizeof(T) ); return *this; } /** * Inplace negation **/ inline void negate() { tMatrix &m = *this; for (size_t i=0; i tMatrix operator*(const tMatrix &right) const { tMatrix out(0); const tMatrix& self(*this); for (size_t j=0; j0) memset( &data[0], 0, sizeof(data[0])*size()); } /** * Convenience function to clamp all elements of **/ inline void clamp(const T& min, const T& max) { for (size_t i=0; i static inline void cross(tMatrix &result, const tMatrix& a, const tMatrix& b) { result[0] = a[1]*b[2] - a[2]*b[1]; result[1] = a[2]*b[0] - a[0]*b[2]; result[2] = a[0]*b[1] - a[1]*b[0]; } } /** * Matrix Scalar Addition **/ template inline tMatrix operator-(const T& o, const tMatrix &self) { tMatrix r; for (size_t i=0; i inline tMatrix operator+(const T& o, const tMatrix &self) { tMatrix r; for (size_t i=0; i inline tMatrix operator*(const T& o, const tMatrix &self) { tMatrix r; for (size_t i=0; i inline static void addmult(tMatrix &out, const tMatrix &left, const tMatrix &right) { for (size_t j=0; j inline static void submult(tMatrix &out, const tMatrix &left, const tMatrix &right) { for (size_t j=0; j inline static void negate(tMatrix &m) { for (size_t i=0; i inline static void mult(const T& scalar, tMatrix &m) { for (size_t i=0; i inline static void mult(tMatrix &m, const T& scalar) { for (size_t i=0; i inline static void mult(tMatrix &out, const tMatrix &m, const T& scalar) { for (size_t i=0; i inline static void mult(tMatrix &out, const T& scalar, const tMatrix &m) { for (size_t i=0; i inline void add(tMatrix &self, const tMatrix &right) { for (size_t i=0; i inline void add(tMatrix &out, const tMatrix &self, const tMatrix &right) { for (size_t i=0; i inline void sub(tMatrix &self, const tMatrix &right) { for (size_t i=0; i inline void sub(tMatrix &out, const tMatrix &self, const tMatrix &right) { for (size_t i=0; i inline static void mult(tMatrix &out, const tMatrix &self, const tMatrix &right) { out.zeros(); for (size_t i=0; i inline static void mult(T& out, const tMatrix &self, const tMatrix &right) { out = self[0] * right[0]; for (size_t i=1; i inline static void mult(tMatrix &self, const tMatrix &right) { tMatrix tmp; mult(tmp, self, right); self = tmp; } /** Convenience Constructors **/ template inline static tMatrix tVector1(const T& a) { tMatrix v; v[0]=a; return v; } /** Convenience Constructors **/ template inline static tMatrix tVector2(const T& a, const T& b) { tMatrix v; v[0]=a; v[1]=b; return v; } /** Convenience Constructors **/ template inline static tMatrix tVector3(const T& a, const T& b, const T& c) { tMatrix v; v[0]=a; v[1]=b; v[2]=c; return v; } /** Convenience Constructors **/ template inline static tMatrix tVector4(const T& a, const T& b, const T& c, const T& d) { tMatrix v; v[0]=a; v[1]=b; v[2]=c; v[3]=d; return v; } /** Convenience Constructors **/ template inline static tMatrix tMatrix3( const T& a, const T& b, const T& c, const T& d, const T& e, const T& f, const T& g, const T& i, const T& h) { tMatrix m; m(0,0)=a; m(0,1)=b; m(0,2)=c; m(1,0)=d; m(1,1)=e; m(1,2)=f; m(2,0)=g; m(2,1)=i; m(2,2)=h; return m; } /** * Write fixed size matrices to a stream **/ template inline std::ostream& operator<< (std::ostream& os, const tMatrix& arg) { if ((h>1) && (w>1)) { os << "Matrix: " << h << "x" << w << std::endl; for (size_t i=0; i1) { os << "[ "; for (size_t j=0; j inline std::istream& operator>> (std::istream& is, tMatrix& arg) { std::string t; if ((h>1) && (w>1)) { is >> t >> t; for (size_t i=0; i> t; for (size_t j=0; j> arg(i,j); is >> t; } } else if (w==1 && h>1) { is >> t; if (t != "[") GRAVIS_THROW3(gravis::Exception, "Unexpected token. A vector should start with [", t); for (size_t j=0; j> arg[j]; is >> t; if (t != "]^T") GRAVIS_THROW3(gravis::Exception, "Unexpected token. A column vector should end with ]^T", t); } else { is >> t; if (t != "[") GRAVIS_THROW3(gravis::Exception, "Unexpected token. A vector should start with [", t); for (size_t j=0; j> arg[j]; is >> t; if (t != "]") GRAVIS_THROW3(gravis::Exception, "Unexpected token. A row vector should end with ]", t); } return is; } } #endif relion-3.1.3/src/jaz/gravis/.svn/text-base/tMesh.h.svn-base000066400000000000000000000370731411340063500234110ustar00rootroot00000000000000#ifndef __LIBGRAVIS_T_MESH_H__ #define __LIBGRAVIS_T_MESH_H__ /****************************************************************************** ** Title: tMesh.h ** Description: Templated mesh representation using std:;vector. ** ** Author: Jean-Sebastien Pierrard, 2005 ** Brian Amberg, 2006 ** Computer Science Department, University Basel (CH) ** ******************************************************************************/ #define ATT_PURE __attribute__ ((pure)) #include "tRGBA.h" #include "t2Vector.h" #include "t3Vector.h" #include "tImage.h" #include "Tuple.h" #include #include #include "Mesh.h" #include namespace gravis { template class tMaterial { public: /** * Helper class with lazy loading images with an associated filename. * It may also contain no image, for a textureless mesh. **/ class ImageFile { private: mutable bool loaded; mutable tImage< tRGBA > image; boost::filesystem::path filename; public: ImageFile() : loaded(false), image(), filename() {}; ImageFile(const std::string& fn) : loaded(false), image(), filename(fn) {}; inline const bool& isLoaded() const { return loaded; } /** * Load the image into memory, if it is not yet loaded **/ void load() const { if (!loaded) { reload(); } }; /** * Load the image into memory, even if it is already loaded **/ void reload() const { if (isSet()) { loaded = true; image.read( getFilenameNative() ); } }; /** * Load the given image **/ void load(const std::string& filename) { setFilename(filename); reload(); }; /** * Change the name of the image file * * The empty filename resets the image. * * Convenience function, assuming the filename is in native format. **/ void setFilename(const char* filename) { setFilename(boost::filesystem::path(filename)); }; /** * Change the name of the image file * * The empty filename resets the image. * * Convenience function, assuming the filename is in native format. **/ void setFilename(const std::string& filename) { setFilename(boost::filesystem::path(filename)); }; /** * Change the name of the image file * * The empty filename resets the image. **/ void setFilename(const boost::filesystem::path& filename) { this->filename = filename; }; /** * Return the image filename in native format. **/ const std::string getFilenameNative() const { return filename.string(); } /** * Return the image filename **/ const boost::filesystem::path& getFilename() const { return filename; } /** * Delete the texture **/ void reset() { filename = ""; image.resize(0,0); } /** * Do we represent the NULL image **/ ATT_PURE bool isSet() const { return filename != ""; } /** * Associate a texture from a tImage **/ void set(const std::string& filename, const tImage > &image) { loaded = true; this->filename = filename; this->image = image; } /** * Access the image. * There seems to be a problem with changing the image. **/ tImage< tRGBA > &getImage() { if (!loaded) load(); return image; } /** * Access the image **/ const tImage< tRGBA > &getImage() const { if (!loaded) load(); return image; } /** * Set the image **/ void setImage(const tImage > &img) { image=img; }; }; public: tMaterial(std::string n="") : name(n), ambient(T(0.1),T(1.0)), diffuse(T(0.9),T(1.0)), specular(T(0.6),T(1.0)), shininess(T(25.0)), texture(), envMap(), normalMap() {} tMaterial(const Material& o) : name(o.name), ambient(o.ambient), diffuse(o.diffuse), specular(o.specular), shininess(o.shininess), texture(o.textureName), envMap(o.envMapName), normalMap(o.normalMapName) { } tMaterial(const tMaterial& o) : name(o.name), ambient(o.ambient), diffuse(o.diffuse), specular(o.specular), shininess(o.shininess), texture(o.texture), envMap(o.envMap), normalMap(o.normalMap) { } tMaterial& operator=(const tMaterial& o) { name= o.name; ambient=o.ambient; diffuse=o.diffuse; specular=o.specular; shininess=o.shininess; texture=o.texture; envMap=o.envMap; normalMap=o.normalMap; return *this; } std::string name; tRGBA ambient; tRGBA diffuse; tRGBA specular; T shininess; /*!< \brief Phong exponent. */ ImageFile texture; ImageFile envMap; ImageFile normalMap; /// convert this brian-material to a gravis material Material getGravisMaterial() const { Material gravisMaterial; gravisMaterial.name = name; gravisMaterial.ambient = ambient; gravisMaterial.diffuse = diffuse; gravisMaterial.specular = specular; gravisMaterial.shininess = shininess; gravisMaterial.hasTexture = texture.isSet(); gravisMaterial.textureName = texture.getFilenameNative(); gravisMaterial.hasEnvMap = envMap.isSet(); gravisMaterial.envMapName = envMap.getFilenameNative(); gravisMaterial.hasNormalMap = normalMap.isSet(); gravisMaterial.normalMapName = normalMap.getFilenameNative(); return gravisMaterial; } }; /*! \brief Mesh data structure. * * A Mesh contains vertex, normal, texture coordinate (uvw) and material information. * For the three types of primitives (triangle, line, point) there are index arrays * referencing above information. For example for lines, lvi indexes into * vertex, and lti into texture coordinates. The vertices and colors * for the 4th lines in the mesh are then vertex[lvi[3][0]], vertex[lvi[3][1]], * color[lci[3][0]] and color[lci[3][1]]. * * tvi.size(), lvi.size() and pvi.size() implicitly specify how many triangles, lines * and points there are in the mesh. All other index arrays must either be of the * same length as the corresponding vertex index array, or of length 0. * * How is missing information handled? If for example no normals are assigned to * any triangles, tni.size() would be zero. If normals are assigned for some triangles, * but not for others, the tni-tuples for the respective triangles must have entries * of -1 (which is the 'invalid index' pointing to the default entry in the * corresponding defaultVectors). */ template class tMesh { public: std::vector< t3Vector > vertex; /*!< \brief Vertex array. */ std::vector< t3Vector > normal; /*!< \brief Normal array. */ std::vector< t3Vector > texcrd; /*!< \brief Texture coordinate array. */ std::vector< tRGBA > color; /*!< \brief Color array. */ std::vector< tMaterial > material; /*!< \brief Material array. */ std::vector tvi; /*!< \brief Triangle vertex indices. */ std::vector tni; /*!< \brief Triangle normal indices. */ std::vector tti; /*!< \brief Triangle texcrd indices. */ std::vector tci; /*!< \brief Triangle color indices. */ std::vector tmi; /*!< \brief Triangle material indices. */ std::vector lvi; /*!< \brief Line vertex indices. */ std::vector lti; /*!< \brief Line texcrd indices. */ std::vector lci; /*!< \brief Line colour indices. */ std::vector pvi; /*!< \brief Point vertex indices. */ std::vector pci; /*!< \brief Point color indices. */ std::vector adjacent; /*!< \brief Adjacency list. See generateAdjacencyList(). */ tMesh() : vertex(0,t3Vector(std::numeric_limits::quiet_NaN())), normal(), texcrd(), color(0, tRGBA(1)), material(), tvi(0, Tuple3(-1,-1,-1)), tni(0, Tuple3(-1,-1,-1)), tti(0, Tuple3(-1,-1,-1)), tci(0, Tuple3(-1,-1,-1)), tmi(0, -1), lvi(0, Tuple2(-1,-1)), lti(0, Tuple2(-1,-1)), lci(0, Tuple2(-1,-1)), pvi(0, -1), pci(0, -1), adjacent(0,Tuple3(-1,-1,-1)) { } tMesh(const tMesh& o) : vertex(o.vertex), normal(o.normal), texcrd(o.texcrd), color(o.color), material(o.material), tvi(o.tvi), tni(o.tni), tti(o.tti), tci(o.tci), tmi(o.tmi), lvi(o.lvi), lti(o.lti), lci(o.lci), pvi(o.pvi), pci(o.pci), adjacent(o.adjacent) { } tMesh(const Mesh& o) : vertex(o.vertex), normal(o.normal), texcrd(o.texcrd), color(o.color), material(o.material.size()), tvi(o.tvi), tni(o.tni), tti(o.tti), tci(o.tci), tmi(o.tmi), lvi(o.lvi), lti(o.lti), lci(o.lci), pvi(o.pvi), pci(o.pci), adjacent(o.adjacent) { for (size_t i=0; i(o.material[i]); } /** * Exception safe swap operator **/ void swap(tMesh& o) { vertex.swap(o.vertex); normal.swap(o.normal); texcrd.swap(o.texcrd); color.swap(o.color); material.swap(o.material); tvi.swap(o.tvi); tni.swap(o.tni); tti.swap(o.tti); tci.swap(o.tci); tmi.swap(o.tmi); lvi.swap(o.lvi); lti.swap(o.lti); lci.swap(o.lci); pvi.swap(o.pvi); pci.swap(o.pci); adjacent.swap(o.adjacent); } /** * Exception safe assignment operator **/ tMesh& operator=(const tMesh& o) { tMesh tmp(o); tmp.swap(*this); return *this; } /// Generate a gravis mesh from this brian mesh Mesh getGravisMesh() const { Mesh gravisMesh; gravisMesh.vertex = vertex; gravisMesh.normal = normal; gravisMesh.texcrd = texcrd; gravisMesh.color = color; gravisMesh.tvi = tvi; gravisMesh.tni = tni; gravisMesh.tti = tti; gravisMesh.tci = tci; gravisMesh.tmi = tmi; gravisMesh.lvi = lvi; gravisMesh.lti = lti; gravisMesh.lci = lci; gravisMesh.pvi = pvi; gravisMesh.pci = pci; gravisMesh.adjacent = adjacent; gravisMesh.material.resize( material.size() ); for (size_t i=0; i a = (vertex[tvi[i][1]] - vertex[tvi[i][0]]); t3Vector b = (vertex[tvi[i][2]] - vertex[tvi[i][0]]); normal[i] = cross(a, b).normalize(); tni[i] = Tuple3(i, i, i); } } void generatePerVertexNormals() { std::vector ncount; t3Vector norm; const int numFaces = int(tvi.size()); tni.resize(numFaces); normal.resize(vertex.size()); ncount.resize(vertex.size()); for (unsigned int i = 0; i < ncount.size(); i++) { ncount[i] = 0; normal[i] = t3Vector(T(0)); } for (int i = 0; i < numFaces; i++) { t3Vector a = (vertex[tvi[i][1]] - vertex[tvi[i][0]]); t3Vector b = (vertex[tvi[i][2]] - vertex[tvi[i][0]]); norm = cross(a, b).normalize(); tni[i] = tvi[i]; normal[tvi[i][0]] += norm; normal[tvi[i][1]] += norm; normal[tvi[i][2]] += norm; ncount[tvi[i][0]]++; ncount[tvi[i][1]]++; ncount[tvi[i][2]]++; } for (unsigned int i = 0; i < normal.size(); i++) { if(ncount[i] != 0) normal[i] /= T(ncount[i]); normal[i] = normal[i].normalize(); } } class Node { public: int count; Tuple2 faces[20]; Node() : count(0) {} void addFace(const Tuple2& t) { if (count == 20) GRAVIS_THROW2(Exception, "Node in mesh has cardinality greater than 20!"); faces[count++] = t; } }; /*! \brief Generate the adjacency list. * * The adjacency list (adjacent) contains entries for each triangle. * Each entry specifies the adjacent triangle for each edge. * * The complexity of the algorithm is linear in the number of faces. * * \throw gravis::Exception if any vertex has a cardinality greater 20 */ void generateAdjacencyList() { const int numFaces = tvi.size(); const int numVert = vertex.size(); adjacent.resize(numFaces); std::vector nodeFaces(numVert); for (int i = 0; i < numFaces; i++) { for (int j = 0; j < 3; j++) { nodeFaces[tvi[i][j]].addFace(Tuple2(i, j)); } } // foreach face for (int f = 0; f < numFaces; f++) { Tuple3& ft = tvi[f]; Tuple3& at = adjacent[f]; // foreach edge for (int e = 0; e < 3; e++) { // already found adjacent face for this edge? if (at[e] >= 0) continue; // vertices for this edge int v1 = ft[e]; int v2 = ft[(e+1)%3]; // faces using these vertices Node& node1 = nodeFaces[v1]; Node& node2 = nodeFaces[v2]; for (int i = 0; i < node1.count; i++) { int f1 = node1.faces[i][0]; if (f1 == f) continue; // self for (int j = 0; j < node2.count; j++) { if (f1 == node2.faces[j][0]) { adjacent[f][e] = f1; adjacent[f1][node2.faces[j][1]] = f; } } } } } } }; /// See tMesh::swap(). template inline void swap(tMesh& __x, tMesh& __y) { __x.swap(__y); } typedef tMaterial fMaterial; typedef tMaterial dMaterial; typedef tMesh fMesh; typedef tMesh dMesh; } // namespace gravis #endif relion-3.1.3/src/jaz/gravis/.svn/text-base/tQuaternion.h.svn-base000066400000000000000000000070461411340063500246370ustar00rootroot00000000000000#ifndef __LIBGRAVIS_T_QUATERNION_H__ #define __LIBGRAVIS_T_QUATERNION_H__ /****************************************************************************** ** Title: tQuaternion.h ** Description: Represents a quaternion useful for rotation and scaling ** ** Author: Reinhard Knothe ** Brian Amberg ** Computer Science Department, University Basel (CH) ** ******************************************************************************/ #include "t4Matrix.h" namespace gravis { /** * A tQuaternion class useful for rotation+scaling operations **/ template class tQuaternion { public: T s,v1,v2,v3; tQuaternion(const T& s=0, const T& v1=1, const T& v2=0, const T& v3=0) : s(s), v1(v1), v2(v2), v3(v3) {}; tQuaternion(const tQuaternion& q) : s(q.s), v1(q.v1), v2(q.v2), v3(q.v3) {}; tQuaternion(const T* q) : s(q[0]), v1(q[1]), v2(q[2]), v3(q[3]) {}; tQuaternion(const T& phi, const gravis::t3Vector &axis) : s(cos(phi/T(2))), v1(axis.x* sin(phi/T(2))), v2(axis.y* sin(phi/T(2))), v3(axis.z* sin(phi/T(2))) {}; bool operator==(const tQuaternion& q) const { return (s==q.s) && (v1==q.v1) && (v2==q.v2) && (v3==q.v3); }; bool operator!=(const tQuaternion& q) const { return !(*this == q); } tQuaternion operator + (const tQuaternion& q) const { return tQuaternion(q.s+s, q.v1+v1, q.v2+v2, q.v3+v3 ); } tQuaternion operator - (const tQuaternion& q) const { return tQuaternion(s-q.s, v1-q.v1, v2-q.v2, v3-q.v3 ); }; tQuaternion operator * (const tQuaternion& q) const { t3Vector v00 (v1,v2,v3); t3Vector v10 (q.v1,q.v2,q.v3); T s2 = s * q.s - dot(v00,v10); t3Vector v2 = cross(v00,v10); t3Vector v3 = v10; v3 *= s; t3Vector v4 = v00; v4 *= q.s; t3Vector v5 = v2+v3+v4; return tQuaternion(s2, v5.x, v5.y, v5.z); } /** * Norm **/ T length() const { return sqrt(s*s + v1*v1 + v2*v2 + v3*v3); } /** * Inplace normalization **/ void normalize() { T l = length(); s /= l; v1 /= l; v2 /= l; v3 /= l; } t3Matrix getMatrix3() const { return t3Matrix( T(1)-T(2)*(v2*v2 + v3*v3), T(2)*(v1*v2 - v3*s), T(2)*(v3*v1 + v2*s), T(2) * (v1*v2 + v3*s), T(1) - T(2) * (v3*v3 + v1*v1), T(2) * (v2*v3 - v1*s), T(2) * (v3*v1 - v2*s), T(2) * (v2*v3 + v1*s), T(1) - T(2) * (v2*v2 + v1*v1)); } t4Matrix getMatrix4() const { return t4Matrix( T(1)-T(2)*(v2*v2 + v3*v3), T(2)*(v1*v2 - v3*s), T(2)*(v3*v1 + v2*s), T(0), T(2) * (v1*v2 + v3*s), T(1) - T(2) * (v3*v3 + v1*v1), T(2) * (v2*v3 - v1*s), T(0), T(2) * (v3*v1 - v2*s), T(2) * (v2*v3 + v1*s), T(1) - T(2) * (v2*v2 + v1*v1), T(0), T(0), T(0), T(0), T(1) ); } }; template inline std::ostream& operator<< (std::ostream& os, const tQuaternion& arg) { os << "[" << arg.s << "; " << arg.v1 << ", " << arg.v2 << ", " << arg.v3 << "]"; return os; } typedef gravis::tQuaternion fQuaternion; typedef gravis::tQuaternion dQuaternion; } #endif relion-3.1.3/src/jaz/gravis/.svn/text-base/tRGB.h.svn-base000066400000000000000000000201721411340063500231170ustar00rootroot00000000000000#ifndef __LIBGRAVIS_T_RGB_H__ #define __LIBGRAVIS_T_RGB_H__ /****************************************************************************** ** Title: tRGB.h ** Description: Represents an RGB color tupel. ** ** Author: Jean-Sebastien Pierrard, 2005 ** Brian Amberg, 2005-2006 ** Computer Science Department, University Basel (CH) ** ******************************************************************************/ #include #include #include namespace gravis { template class tRGBA; template class tRGB { /*! * Private helper functions, wrapped into an additional struct in case that we want to use the names **/ struct priv { static inline const T& min(const T& a, const T& b) { return ab ? a : b; } }; public: typedef T scalar_type; T r, g, b; tRGB () : r(T(0)), g(T(0)), b(T(0)) { } tRGB (T _r, T _g, T _b) : r(_r), g(_g), b(_b) { } tRGB (T gray) : r(gray), g(gray), b(gray) { } explicit tRGB (const tRGBA& c) : r(c.r), g(c.g), b(c.b) {} void set (T _r, T _g, T _b) { r = _r; g = _g; b = _b; } void set (T gray) { r = gray; b = gray; g = gray; } void add (T _r, T _g, T _b) { r += _r; g += _g; b += _b; } void add (T gray) { r += gray; g += gray; b += gray; } /** * Deprecated, use intensity() instead **/ T grayValue () const { return T(0.30*r + 0.59*g + 0.11*b); } T intensity () const { return grayValue(); } /*! * Return minimum of the tupel. * * TODO: Is this really necessary in here. It could be a utility function. **/ T const& minValue () const { return std::min(std::min(r, g), b); } /*! * Return maximum of the tupel. * * TODO: Is this really necessary in here. It could be a utility function. **/ T const& maxValue () const { return std::max(std::max(r, g), b); } bool operator != (const tRGB& c) const { return r != c.r || g != c.g || b != c.b; } bool operator == (const tRGB& c) const { return r == c.r && g == c.g && b == c.b; } tRGB& operator += (const tRGB& c) { r += c.r; g += c.g; b += c.b; return *this; } tRGB& operator += (const T gray) { r += gray; g += gray; b += gray; return *this; } tRGB& operator -= (const tRGB& c) { r -= c.r; g -= c.g; b -= c.b; return *this; } tRGB& operator -= (const T gray) { r -= gray; g -= gray; b -= gray; return *this; } tRGB& operator *= (const tRGB& c) { r *= c.r; g *= c.g; b *= c.b; return *this; } tRGB& operator *= (const T factor) { r *= factor; g *= factor; b *= factor; return *this; } tRGB& operator /= (const tRGB& c) { r /= c.r; g /= c.g; b /= c.b; return *this; } tRGB& operator /= (const T factor) { r /= factor; g /= factor; b /= factor; return *this; } /*! * \brief All color components are clamped to [0,1]. This function works inplace. * * \return self */ tRGB& clamp() { r = std::min(std::max(r, T(0)), T(1)); g = std::min(std::max(g, T(0)), T(1)); b = std::min(std::max(b, T(0)), T(1)); return *this; } //! Unary minus inline tRGB operator - () const { return tRGB(-r, -g, -b); }; //! Addition of a scalar (analog to -=) inline tRGB operator + (const T& c) const { return tRGB(r+c, g+c, b+c); }; //! Subtraction of a scalar (analog to +=) inline tRGB operator - (const T& c) const { return tRGB(r-c, g-c, b-c); }; //! Multiplication of a scalar (analog to *=) inline tRGB operator * (const T& c) const { return tRGB(r*c, g*c, b*c); }; //! Division by a scalar (analog to /=) inline tRGB operator / (const T& c) const { return tRGB(r/c, g/c, b/c); }; bool operator == (const tRGB& arg) { return ((arg.r == r) && (arg.g == g) && (arg.b == b)); } const T& operator [](const size_t& i) const { return (&r)[i]; } T& operator [](const size_t& i) { return (&r)[i]; } }; template inline tRGB operator + (const tRGB& c1, const tRGB& c2) { tRGB result = c1; return (result += c2); } template inline tRGB operator - (const tRGB& c1, const tRGB& c2) { tRGB result = c1; return (result -= c2); } template inline tRGB operator * (const tRGB& c1, const tRGB& c2) { tRGB result(c1.r * c2.r, c1.g * c2.g, c1.b * c2.b); return result; } template inline tRGB operator * (const tRGB& c, T factor) { tRGB result(c.r * factor, c.g * factor, c.b * factor); return result; } template inline tRGB operator * (T factor, const tRGB& c) { tRGB result(c.r * factor, c.g * factor, c.b * factor); return result; } template inline tRGB operator / (const tRGB& c1, const tRGB& c2) { tRGB result(c1.r / c2.r, c1.g / c2.g, c1.b / c2.b); return result; } template inline tRGB operator / (const tRGB& c, T factor) { tRGB result(c.r / factor, c.g / factor, c.b / factor); return result; } template inline bool operator < (const tRGB& c1, const tRGB& c2) { T gray1 = c1.grayValue(); T gray2 = c2.grayValue(); return (gray1 < gray2); } template inline tRGB operator ! (const tRGB& c) { tRGB result = tRGB::White(); return (result -= c); } // Absolute of every color channel template inline tRGB abs(const tRGB& c) { return tRGB(c.r < T(0) ? -c.r : c.r, c.g < T(0) ? -c.g : c.g, c.b < T(0) ? -c.b : c.b); } template inline std::ostream& operator << (std::ostream& os, const tRGB& c) { os << "(" << c.r << ", " << c.g << ", " << c.b << ")"; return os; } template <> inline std::ostream& operator << (std::ostream& os, const tRGB& c) { os << "(" << (int)c.r << ", " << (int)c.g << ", " << (int)c.b << ")"; return os; } // Inverse of operator<< template inline std::istream& operator>> (std::istream& is, tRGB& arg) { char c = ' '; is >> c; if (is.eof()) return is; if (c != '(') throw std::runtime_error("tRGB should start with an opening ("); std::string values; int v = 0; while ((is.get(c)) && (c != ')')) { if (c == ',') { v++; if (v >= 3) throw std::runtime_error("tRGB contains more than three elements"); values.push_back(' '); } else values.push_back(c); } if (c != ')') { throw std::runtime_error("tRGB should end with a )"); } if ( v < 2 ) { throw std::runtime_error("tRGB has not enough color values"); } std::stringstream valueReader(values); valueReader >> arg.r >> arg.g >> arg.b; return is; } template inline T dot (const tRGB& v1, const tRGB& v2) { return (v1.r*v2.r + v1.g*v2.g + v1.b*v2.b); } typedef tRGB bRGB; typedef tRGB fRGB; typedef tRGB dRGB; } /* Close Namespace "gravis" */ #endif relion-3.1.3/src/jaz/gravis/.svn/text-base/tRGBA.h.svn-base000066400000000000000000000173341411340063500232260ustar00rootroot00000000000000#ifndef __LIBGRAVIS_T_RGBA_H__ #define __LIBGRAVIS_T_RGBA_H__ /****************************************************************************** ** Title: tRGBA.h ** Description: Represents an RGB+Alpha color tupel. ** ** Author: Jean-Sebastien Pierrard, 2005 ** Brian Amberg, 2005-2006 ** Computer Science Department, University Basel (CH) ** ******************************************************************************/ #include #include #include #include namespace gravis { template class tRGBA { public: T r, g, b, a; typedef T scalar_type; //! Default constructs a black, translucent pixel tRGBA () : r(T(0)), g(T(0)), b(T(0)), a(T(0)) { } tRGBA (const T& r, const T& g, const T& b, const T& a=T(1)) : r(r), g(g), b(b), a(a) { } tRGBA (const T& gray, const T& alpha=T(1)) : r(gray), g(gray), b(gray), a(alpha) { } explicit tRGBA (const tRGB& c, const T& a=T(1)) : r(c.r), g(c.g), b(c.b), a(a) { } void set (T _r, T _g, T _b, T _a) { r = _r; g = _g; b = _b; a = _a; } void set (T gray) { r = gray; b = gray; g = gray; a = T(1); } /*! * Conversion to a gray pixel * * TODO: This should be put in an external conversion file, together with cie, hsv, etc... **/ T grayValue () const { return T(0.30*r + 0.59*g + 0.11*b); } T intensity () const { return grayValue(); } /*! * Return minimum of the tupel, ignoring the alpha channel. * * TODO: Is this really necessary in here. It could be a utility function. **/ T const& minValue () const { return std::min(std::min(r, g), b); } /*! * Return maximum of the tupel, ignoring the alpha channel. * * TODO: Is this really necessary in here. It could be a utility function. **/ T const& maxValue () const { return std::max(std::max(r, g), b); } const T& operator [] (const size_t& i) const { return (&r)[i]; } T& operator [] (const size_t& i) { return (&r)[i]; } /*! * \brief All color components, including alpha are clamped to [0,1]. This function works inplace. * * \return self */ tRGBA& clamp() { r = std::min(std::max(r, T(0)), T(1)); g = std::min(std::max(g, T(0)), T(1)); b = std::min(std::max(b, T(0)), T(1)); a = std::min(std::max(a, T(0)), T(1)); return *this; } bool operator != (const tRGBA& c) const { return r != c.r || g != c.g || b != c.b || a != c.a; } bool operator == (const tRGBA& c) const { return r == c.r && g == c.g && b == c.b && a == c.a; } tRGBA& operator += (const tRGBA& c) { r += c.r; g += c.g; b += c.b; a += c.a; return *this; } // tRGBA& operator += (const T gray) // { // r += gray; // g += gray; // b += gray; // return *this; // } tRGBA& operator -= (const tRGBA& c) { r -= c.r; g -= c.g; b -= c.b; a -= c.a; return *this; } // tRGBA& operator -= (const T gray) // { // r -= gray; // g -= gray; // b -= gray; // return *this; // } tRGBA& operator *= (const tRGBA& c) { r *= c.r; g *= c.g; b *= c.b; a *= c.a; return *this; } tRGBA& operator *= (const float factor) { r *= factor; g *= factor; b *= factor; a *= factor; return *this; } tRGBA& operator /= (const tRGBA& c) { r /= c.r; g /= c.g; b /= c.b; a /= c.a; return *this; } tRGBA& operator /= (const float factor) { r /= factor; g /= factor; b /= factor; a /= factor; return *this; } //! Unary minus inline tRGBA operator - () const { return tRGBA(-r, -g, -b, -a); }; //! Addition of a scalar (analog to -=) // inline // tRGBA operator + (const T& c) const // { // return tRGBA(r+c, g+c, b+c, a); // }; //! Subtraction of a scalar (analog to +=) // inline // tRGBA operator - (const T& c) const // { // return tRGBA(r-c, g-c, b-c, a); // }; //! Multiplication of a scalar (analog to *=) inline tRGBA operator * (const T& c) const { return tRGBA(r*c, g*c, b*c, a*c); }; //! Division by a scalar (analog to /=) inline tRGBA operator / (const T& c) const { return tRGBA(r/c, g/c, b/c, a/c); }; }; template inline tRGBA operator+ (const tRGBA& c1, const tRGBA& c2) { tRGBA result(c1); return (result += c2); } template inline tRGBA operator- (const tRGBA& c1, const tRGBA& c2) { tRGBA result(c1); return (result -= c2); } template inline tRGBA operator* (const tRGBA& c1, const tRGBA& c2) { tRGBA result(c1); result *= c2; return result; } template inline tRGBA operator* (const tRGBA& c, T factor) { tRGBA result(c); return (result *= factor); } template inline tRGBA operator* (T factor, const tRGBA& c) { tRGBA result(c); return (result *= factor); } template inline tRGBA operator / (const tRGBA& c1, const tRGBA& c2) { tRGBA result(c1.r / c2.r, c1.g / c2.g, c1.b / c2.b); return result; } template inline tRGBA operator / (const tRGBA& c, T factor) { tRGBA result(c.r / factor, c.g / factor, c.b / factor); return result; } template inline bool operator < (const tRGBA& c1, const tRGBA& c2) { T gray1 = c1.grayValue(); T gray2 = c2.grayValue(); return (gray1 < gray2); } template inline tRGBA operator ! (const tRGBA& c) { tRGBA result = tRGBA::White; return (result -= c); } template inline std::ostream& operator << (std::ostream& os, const tRGBA& c) { os << "(" << c.r << " " << c.g << " " << c.b << " " << c.a << ")"; return os; } template <> inline std::ostream& operator << (std::ostream& os, const tRGBA& c) { os << "(" << (int)c.r << " " << (int)c.g << " " << (int)c.b << " " << (int)c.a << ")"; return os; } // Inverse of operator<< template inline std::istream& operator>> (std::istream& is, tRGBA& arg) { char c = ' '; is >> c; if (is.eof()) return is; if (c != '(') throw std::runtime_error("tRGBA should start with an opening ("); std::stringstream values; int v = 0; while ((is >> c) && (c != ')')) { if (c == ',') { v++; if (v >= 4) throw std::runtime_error("tRGBA contains more than four elements"); values << " "; } else if (c != ' ') values << c; } if (c != ')') { throw std::runtime_error("tRGBA should end with a )"); } if ( v < 3 ) { throw std::runtime_error("tRGBA has not enough color values"); } values >> arg.r >> arg.g >> arg.b >> arg.a; return is; } typedef tRGBA bRGBA; typedef tRGBA fRGBA; typedef tRGBA dRGBA; } /* Close Namespace "gravis" */ #endif relion-3.1.3/src/jaz/gravis/.svn/text-base/tRGB_A.h.svn-base000066400000000000000000000167311411340063500233650ustar00rootroot00000000000000#ifndef __LIBGRAVIS_T_RGB_A_H__ #define __LIBGRAVIS_T_RGB_A_H__ /****************************************************************************** ** Title: tRGB_A.h ** Description: Represents an RGB+Alpha color tupel. ** ** Author: Jean-Sebastien Pierrard, 2005 ** Computer Science Department, University Basel (CH) ** ******************************************************************************/ #include #include #include #include namespace gravis { template class tRGB_A { public: T r, g, b, a; tRGB_A () : r(T(0)), g(T(0)), b(T(0)), a(T(1.0)) { } tRGB_A (T _r, T _g, T _b, T _a=T(1.0)) : r(_r), g(_g), b(_b), a(_a) { } tRGB_A (T gray) : r(gray), g(gray), b(gray), a(T(1.0)) { } tRGB_A (T gray, T alpha) : r(gray), g(gray), b(gray), a(alpha) { } explicit tRGB_A (const tRGBA& c) : r(c.r), g(c.g), b(c.b), a(c.a) { } explicit tRGB_A (const tRGB& c, T _a=T(1.0)) : r(c.r), g(c.g), b(c.b), a(_a) { } void set (T _r, T _g, T _b, T _a) { r = _r; g = _g; b = _b; a = _a; } void set (T _r, T _g, T _b) { r = _r; g = _g; b = _b; } void set (T gray) { r = gray; b = gray; g = gray; } void set (T gray, T alpha) { r = gray; b = gray; g = gray; a = alpha; } T grayValue () const { return (T)(0.30*r + 0.59*g + 0.11*b); } T minValue () const { if (r < g) { if (r < b) return r; else return b; } else { if (g < b) return g; else return b; } } T maxValue () const { if (r > g) { if (r > b) return r; else return b; } else { if (g > b) return g; else return b; } } /*! \brief All color components, including alpha are clamped to [0,1]. * * \return self */ tRGB_A& clamp() { r = std::min(std::max(r, T(0)), T(1)); g = std::min(std::max(g, T(0)), T(1)); b = std::min(std::max(b, T(0)), T(1)); return *this; } bool operator != (const tRGB_A& c) const { return r != c.r || g != c.g || b != c.b || a != c.a; } bool operator == (const tRGB_A& c) const { return r == c.r && g == c.g && b == c.b && a == c.a; } tRGB_A& operator += (const tRGB_A& c) { r += c.r; g += c.g; b += c.b; return *this; } tRGB_A& operator += (const T gray) { r += gray; g += gray; b += gray; return *this; } tRGB_A& operator -= (const tRGB_A& c) { r -= c.r; g -= c.g; b -= c.b; return *this; } tRGB_A& operator -= (const T gray) { r -= gray; g -= gray; b -= gray; return *this; } tRGB_A& operator *= (const tRGB_A& c) { r *= c.r; g *= c.g; b *= c.b; return *this; } tRGB_A& operator *= (const float factor) { r *= factor; g *= factor; b *= factor; return *this; } tRGB_A& operator /= (const tRGB_A& c) { r /= c.r; g /= c.g; b /= c.b; return *this; } tRGB_A& operator /= (const float factor) { r /= factor; g /= factor; b /= factor; return *this; } //! Unary minus inline tRGB_A operator - () const { return tRGB_A(-r, -g, -b, a); }; //! Addition of a scalar (analog to -=) inline tRGB_A operator + (const T& c) const { return tRGB_A(r+c, g+c, b+c, a); }; //! Subtraction of a scalar (analog to +=) inline tRGB_A operator - (const T& c) const { return tRGB_A(r-c, g-c, b-c, a); }; //! Multiplication of a scalar (analog to *=) inline tRGB_A operator * (const T& c) const { return tRGB_A(r*c, g*c, b*c, a); }; //! Division by a scalar (analog to /=) inline tRGB_A operator / (const T& c) const { return tRGB_A(r/c, g/c, b/c, a); }; }; template inline tRGB_A operator+ (const tRGB_A& c1, const tRGB_A& c2) { tRGB_A result(c1); return (result += c2); } template inline tRGB_A operator- (const tRGB_A& c1, const tRGB_A& c2) { tRGB_A result(c1); return (result -= c2); } template inline tRGB_A operator* (const tRGB_A& c1, const tRGB_A& c2) { // tRGB_A result(c1.r * c2.r, c1.g * c2.g, c1.b * c2.b, c1.a * c2.a); tRGB_A result(c1); result *= c2; return result; } template inline tRGB_A operator* (const tRGB_A& c, T factor) { // tRGB_A result(c.r * factor, c.g * factor, c.b * factor, c.a); tRGB_A result(c); return (result *= factor); } template inline tRGB_A operator* (T factor, const tRGB_A& c) { // tRGB_A result(c.r * factor, c.g * factor, c.b * factor, c.a); tRGB_A result(c); return (result *= factor); } template inline tRGB_A operator / (const tRGB_A& c1, const tRGB_A& c2) { tRGB_A result(c1.r / c2.r, c1.g / c2.g, c1.b / c2.b); return result; } template inline tRGB_A operator / (const tRGB_A& c, T factor) { tRGB_A result(c.r / factor, c.g / factor, c.b / factor); return result; } template inline bool operator < (const tRGB_A& c1, const tRGB_A& c2) { T gray1 = c1.grayValue(); T gray2 = c2.grayValue(); return (gray1 < gray2); } template inline tRGB_A operator ! (const tRGB_A& c) { tRGB_A result = tRGB_A::White; return (result -= c); } template inline std::ostream& operator << (std::ostream& os, const tRGB_A& c) { os << "(" << c.r << " " << c.g << " " << c.b << " " << c.a << ")"; return os; } template <> inline std::ostream& operator << (std::ostream& os, const tRGB_A& c) { os << "(" << (int)c.r << " " << (int)c.g << " " << (int)c.b << " " << (int)c.a << ")"; return os; } // Inverse of operator<< template inline std::istream& operator>> (std::istream& is, tRGB_A& arg) { char c = ' '; is >> c; if (is.eof()) return is; if (c != '(') throw std::runtime_error("tRGB_A should start with an opening ("); std::stringstream values; int v = 0; while ((is >> c) && (c != ')')) { if (c == ' ') { v++; if (v >= 4) throw std::runtime_error("tRGB_A contains more than four elements"); values << " "; } else if (c != ' ') values << c; } if (c != ')') { throw std::runtime_error("tRGB_A should end with a )"); } if ( v < 3 ) { throw std::runtime_error("tRGB_A has not enough color values"); } values >> arg.r >> arg.g >> arg.b >> arg.a; return is; } typedef tRGB_A bRGB_A; typedef tRGB_A fRGB_A; typedef tRGB_A dRGB_A; } /* Close Namespace "gravis" */ #endif relion-3.1.3/src/jaz/gravis/.svn/text-base/tVarMatrix.h.svn-base000066400000000000000000001362301411340063500244250ustar00rootroot00000000000000#ifndef __LIBGRAVIS_T_VAR_MATRIX_H__ #define __LIBGRAVIS_T_VAR_MATRIX_H__ /****************************************************************************** ** Title: matrix.h ** Description: Templated variable size dense matrices, with a blas/lapack ** connector. ** ** Author: Brian Amberg, 2007 ** Computer Science Department, University Basel (CH) ** ******************************************************************************/ #include #include #include #include #include #include #include #include #include #include "tMatrix.h" #include "Exception.h" #include "t2Matrix.h" #include "t3Matrix.h" #include "t4Matrix.h" namespace gravis { //////////////////////////////////////////////////////////////////// // The Matrix classes. We distinguish between vectors and matrices // and allow for views of full matrices. No spacing magic is used, these are // simple dense matrices in column-first order template class tVarVector; template class tVectorView; template class tConstVectorView; template class tVarMatrix; template class tMatrixView; template class tConstMatrixView; // // IMPLEMENTATION // namespace matrix { /** * Set all to zero **/ template inline static void clear(VectorOrMatrix& v) { if (v.size()>0) memset( &v[0], 0, sizeof(v[0])*v.size()); } /** * Fill with equal elements **/ template inline static void fill(VectorOrMatrix& v, const typename VectorOrMatrix::scalar& value) { for(size_t i=0; i static inline void clamp(VectorOrMatrix& v, const typename VectorOrMatrix::scalar& min, const typename VectorOrMatrix::scalar& max) { for (size_t i=0; i inline static T* alloc_arr(const std::string& title, const size_t h, const size_t w=1) { T* r = new T[h*w]; return r; } template inline static void free_arr(const T* p) { delete [] p; } template inline static void copy_arr(T* t, const T* s, const size_t sz) { if (sz>0) memcpy(t, s, sz*sizeof(T)); } /////////////////////// HELPER ////////////////////// /** * Clamp a value **/ template static inline void clamp(T& v, const T& min, const T& max) { if (v inline T sqr(const T& v) { return v*v; } } } /** * A thin c++ matrix wrapper around a slice of memory * * These matrix classes allow easy access of the blas/lapack functions. * * They are relatively rough, as they try to be as simple as possible. In my * view it is not good to make c++ behave like matlab, as the only advantage * of c++ over matlab is more control. These classes give maximum control. **/ template class tVectorView { public: typedef T scalar; size_t h; T* const data; tVectorView(T* data, size_t h) : h(h), data(data) { } /** * Create a view of the other matrix **/ tVectorView(tVectorView &o) : h(o.h), data(o.data) {} tVectorView(tVarVector &o) : h(o.h), data(o.data) {} template tVectorView(tMatrix &m) : h(mh), data(&m[0]) {} /** * Copy another vector into this vector. **/ tVectorView& operator=(const tConstVectorView &o) { GRAVIS_CHECK(o.h==h, "Incompatible size"); //for (size_t i=0; i &o) { GRAVIS_CHECK(o.h==h, "Incompatible size"); //for (size_t i=0; i &o) { GRAVIS_CHECK(o.h==h, "Incompatible size"); //for (size_t i=0; i class tConstVectorView { public: typedef T scalar; size_t h; const T* const data; tConstVectorView(const T* data, size_t h) : h(h), data(data) {} tConstVectorView(const tConstVectorView& o) : h(o.h), data(o.data) {} tConstVectorView(const tVectorView &o) : h(o.h), data(o.data) {} tConstVectorView(const tVarVector &o) : h(o.h), data(o.data) {} template tConstVectorView(const tMatrix &m) : h(mh), data(&m[0]) {} inline const T& operator[](size_t i) const { checkAccess1( i, h ); return data[i]; } inline const T& operator()(size_t i) const { checkAccess1( i, h ); return data[i]; } inline const T& clampedAccess(int i) const { matrix::priv::clamp(i, 0, int(h)-1); return operator()(i); } inline size_t size() const { return h; } }; /** * A matrix with memory allocated on the heap. * * The semantic of operations on this vector is different from the vector * views. Assigning something to this vector is a copy operation, while for the * views it is just a pointer assignment. * * They are relatively rough, as they try to be as simple as possible. In my * view it is not good to make c++ behave like matlab, as the only advantage * of c++ over matlab is more control. These classes give maximum control. **/ template class tVarVector { public: typedef T scalar; size_t h; T* data; std::string title; tVarVector(size_t h, const std::string& title="UNNAMED:VECTOR") : h(h), data(matrix::priv::alloc_arr(title, h)), title(title) {} tVarVector(const std::string& title="UNNAMED:VECTOR") : h(0), data(matrix::priv::alloc_arr(title, h)), title(title) {} ~tVarVector() { matrix::priv::free_arr(data); }; /** * Copy another vector into this vector **/ tVarVector(const tConstVectorView &o, const std::string& title="UNNAMED:VECTOR") : h(o.h), data(matrix::priv::alloc_arr(title, h)), title(title) { //for (size_t i=0; i &o, const std::string& title="UNNAMED:VECTOR") : h(o.h), data(matrix::priv::alloc_arr(title, h)), title(title) { //for (size_t i=0; i(title, h)), title(title) { //for (size_t i=0; i tVarVector(const tMatrix &o, const std::string& title="UNNAMED:VECTOR") : h(mh), data(matrix::priv::alloc_arr(title, h)), title(title) { //for (size_t i=0; i &o) { resize(o.h); //for (size_t i=0; i &o) { resize(o.h); //for (size_t i=0; i &o) { resize(o.h); //for (size_t i=0; ih) return; if (h > this->h) { T* new_data = matrix::priv::alloc_arr(title, h); std::swap(data, new_data); matrix::priv::free_arr(new_data); } this->h = h; } inline T& operator[](size_t i) { checkAccess1( i, h ); return data[i]; }; inline const T& operator[](size_t i) const { checkAccess1( i, h ); return data[i]; }; inline T& operator()(size_t i) { checkAccess1( i, h ); return data[i]; }; inline const T& operator()(size_t i) const { checkAccess1( i, h ); return data[i]; }; inline T& clampedAccess(int i) { matrix::priv::clamp(i, 0, int(h)-1); return operator()(i); } inline const T& clampedAccess(int i) const { matrix::priv::clamp(i, 0, int(h)-1); return operator()(i); } inline size_t size() const { return h; } /** * Convenience function to clear a matrix **/ inline void clear() { gravis::matrix::clear(*this); } /** * Convenience functions to fill a matrix **/ inline void fill(const T& e) { gravis::matrix::fill(*this, e); } /** * Convenience functions to clamp all elements of a matrix **/ inline void clamp(const T& min, const T& max) { gravis::matrix::clamp(*this, min, max); } }; /** * A thin c++ matrix wrapper around a slice of memory **/ template class tMatrixView { public: typedef T scalar; size_t h, w; T* const data; tMatrixView(T* data, size_t h, size_t w) : h(h), w(w), data(data) {} tMatrixView(tMatrixView &o) : h(o.h), w(o.w), data(o.data) {} tMatrixView(tVarMatrix &o) : h(o.h), w(o.w), data(o.data) {} template tMatrixView(tMatrix &m) : h(mh), w(mw), data(&m[0]) {} /** * Copy another vector into this vector. **/ tMatrixView& operator=(const tConstMatrixView &o) { GRAVIS_CHECK(o.h==h && o.w==w, "Incompatible size"); //for (size_t i=0; i &o) { GRAVIS_CHECK(o.h==h && o.w==w, "Incompatible size"); //for (size_t i=0; i &o) { GRAVIS_CHECK(o.h==h && o.w==w, "Incompatible size"); //for (size_t i=0; i class tConstMatrixView { public: typedef T scalar; size_t h, w; const T* const data; tConstMatrixView(const T* data, size_t h, size_t w) : h(h), w(w), data(data) {} tConstMatrixView(const tConstMatrixView& o) : h(o.h), w(o.w), data(o.data) {} tConstMatrixView(const tMatrixView &o) : h(o.h), w(o.w), data(o.data) {} tConstMatrixView(const tVarMatrix &o) : h(o.h), w(o.w), data(o.data) {} template tConstMatrixView(const tMatrix &m) : h(mh), w(mw), data(&m[0]) {} inline const T& operator[](size_t i ) const { checkAccess1( i, h*w ); return data[i]; } inline const T& operator()(size_t i, size_t j) const { checkAccess2( i,j, h, w ); return data[i + j*h]; } inline const T& clampedAccess(int i, int j) const { matrix::priv::clamp(i, 0, int(h)-1); matrix::priv::clamp(j, 0, int(w)-1); return operator()(i,j); } inline size_t size() const { return h*w; } }; /** * A matrix with memory allocated on the heap **/ template class tVarMatrix { public: typedef T scalar; size_t h,w; T* data; std::string title; tVarMatrix(size_t h, size_t w, const std::string& title="UNNAMED:MATRIX") : h(h), w(w), data(matrix::priv::alloc_arr(title, h, w)), title(title) {} tVarMatrix(const std::string& title="UNNAMED:MATRIX") : h(0), w(0), data(matrix::priv::alloc_arr(title, h, w)), title(title) {} ~tVarMatrix() { matrix::priv::free_arr(data); }; /** * Copy another matrix into this matrix **/ tVarMatrix(const tConstMatrixView &o, const std::string& title="UNNAMED:MATRIX") : h(o.h), w(o.w), data(matrix::priv::alloc_arr(title, o.h, o.w)), title(title) { matrix::priv::copy_arr(data, o.data, size()); } /** * Copy another matrix into this matrix **/ tVarMatrix(const tMatrixView &o, const std::string& title="UNNAMED:MATRIX") : h(o.h), w(o.w), data(matrix::priv::alloc_arr(title, o.h, o.w)), title(title) { matrix::priv::copy_arr(data, o.data, size()); } /** * Copy another matrix into this matrix **/ tVarMatrix(const tVarMatrix &o, const std::string& title="UNNAMED:MATRIX") : h(o.h), w(o.w), data(matrix::priv::alloc_arr(title, o.h, o.w)), title(title) { matrix::priv::copy_arr(data, o.data, size()); } /** * Copy another matrix into this matrix **/ template tVarMatrix(const tMatrix &o, const std::string& title="UNNAMED:MATRIX") : h(mh), w(mw), data(matrix::priv::alloc_arr(title, o.h, o.w)), title(title) { matrix::priv::copy_arr(data, o.data, size()); } /** * Copy another matrix into this matrix **/ tVarMatrix(const t2Matrix &o, const std::string& title="UNNAMED:MATRIX") : h(2), w(2), data(matrix::priv::alloc_arr(title, h, w)), title(title) { matrix::priv::copy_arr(data, o.m, size()); } /** * Copy another matrix into this matrix **/ tVarMatrix(const t3Matrix &o, const std::string& title="UNNAMED:MATRIX") : h(3), w(3), data(matrix::priv::alloc_arr(title, h, w)), title(title) { matrix::priv::copy_arr(data, o.m, size()); } /** * Copy another matrix into this matrix **/ tVarMatrix(const t4Matrix &o, const std::string& title="UNNAMED:MATRIX") : h(4), w(4), data(matrix::priv::alloc_arr(title, h, w)), title(title) { matrix::priv::copy_arr(data, o.m, size()); } /** * Copy another matrix into this matrix. * Will loose old data reference, beware. **/ tVarMatrix& operator=(const tConstMatrixView &o) { resize(o.h,o.w); //for (size_t i=0; i &o) { resize(o.h,o.w); //for (size_t i=0; i &o) { resize(o.h,o.w); //for (size_t i=0; ih && w == this->w) return; if (h*w>size()) { T* new_data = matrix::priv::alloc_arr(title, h, w); std::swap(data, new_data); matrix::priv::free_arr(new_data); } this->h = h; this->w = w; } inline T& operator[](size_t i) { checkAccess1( i, h*w ); return data[i]; }; inline const T& operator[](size_t i) const { checkAccess1( i, h*w ); return data[i]; }; inline T& operator()(size_t i, size_t j) { checkAccess2( i, j, h, w ); return data[i+j*h]; }; inline const T& operator()(size_t i, size_t j) const { checkAccess2( i, j, h, w ); return data[i+j*h]; }; inline T& clampedAccess(int i, int j) { matrix::priv::clamp(i, 0, int(h)-1); matrix::priv::clamp(j, 0, int(w)-1); return operator()(i,j); } inline const T& clampedAccess(int i, int j) const { matrix::priv::clamp(i, 0, int(h)-1); matrix::priv::clamp(j, 0, int(w)-1); return operator()(i,j); } inline size_t size() const { return h*w; } /** * Convenience function to clear a matrix **/ inline void clear() { gravis::matrix::clear(*this); } /** * Convenience functions to fill a matrix **/ inline void fill(const T& e) { gravis::matrix::fill(*this, e); } /** * Convenience functions to clamp all elements of a matrix **/ inline void clamp(const T& min, const T& max) { gravis::matrix::clamp(*this, min, max); } }; /** * Matrix and vector operations **/ namespace matrix { template inline static void display( const tConstVectorView &v) { std::cout << "Vector: " << v.h << std::endl; for (size_t i=0; i inline static void display( const tVarVector &v) { display( tConstVectorView(v) ); } template inline static void display( const tVectorView &v) { display( tConstVectorView(v) ); } template inline static void display( const tConstMatrixView &v) { std::cout << "Matrix: " << v.h << "x" << v.w << std::endl; for (size_t i=0; i inline static void display( const tVarMatrix &v) { display( tConstMatrixView(v) ); } template inline static void display( const tMatrixView &v) { display( tConstMatrixView(v) ); } /** * Find the largest element **/ template inline static typename VectorOrMatrix::scalar max(const VectorOrMatrix& v) { size_t mi = 0; for (size_t i=1; i v[mi]) mi = i; return v[mi]; } /** * Find the smallest element **/ template inline static typename VectorOrMatrix::scalar min(const VectorOrMatrix& v) { size_t mi = 0; for (size_t i=1; i inline static void sub(tVectorView &v, const T& s) { for(size_t i=0; i inline static void sub(tVarVector &v, const T& s) { for(size_t i=0; i inline static void sub(tMatrixView &v, const T& s) { for(size_t i=0; i inline static void sub(tVarMatrix &v, const T& s) { for(size_t i=0; i inline static void sub(tMatrix &v, const T& s) { for(size_t i=0; i inline static void add(tVectorView &v, const T& s) { for(size_t i=0; i inline static void add(tVarVector &v, const T& s) { for(size_t i=0; i inline static void add(tMatrixView &v, const T& s) { for(size_t i=0; i inline static void add(tVarMatrix &v, const T& s) { for(size_t i=0; i inline static void add(tMatrix &v, const T& s) { for(size_t i=0; i inline static void mult(tVectorView &v, const T& s) { for(size_t i=0; i inline static void mult(tVarVector &v, const T& s) { for(size_t i=0; i inline static void mult(tMatrixView &v, const T& s) { for(size_t i=0; i inline static void mult(tVarMatrix &v, const T& s) { for(size_t i=0; i inline static void mult(tMatrix &v, const T& s) { for(size_t i=0; i inline static void div(tVectorView &v, const T& s) { for(size_t i=0; i inline static void div(tVarVector &v, const T& s) { for(size_t i=0; i inline static void div(tMatrixView &v, const T& s) { for(size_t i=0; i inline static void div(tVarMatrix &v, const T& s) { for(size_t i=0; i inline static void div(tMatrix &v, const T& s) { for(size_t i=0; i inline static void negate(tVectorView &v) { for(size_t i=0; i inline static void negate(tVarVector &v) { for(size_t i=0; i inline static void negate(tMatrixView &v) { for(size_t i=0; i inline static void negate(tVarMatrix &v) { for(size_t i=0; i inline static void add(tVectorView &v, const tConstVectorView& v2) { GRAVIS_CHECK(v.size() != v2.size, "Size must be equal for addition"); for(size_t i=0; i inline static void add(tVarVector &v, const tConstVectorView& v2) { GRAVIS_CHECK(v.size() != v2.size, "Size must be equal for addition"); for(size_t i=0; i inline static void add(tMatrixView &v, const tConstMatrixView& v2) { GRAVIS_CHECK(v.size() != v2.size, "Size must be equal for addition"); for(size_t i=0; i inline static void add(tVarMatrix &v, const tConstMatrixView& v2) { GRAVIS_CHECK(v.size() != v2.size, "Size must be equal for addition"); for(size_t i=0; i inline static void add(tMatrix &v, const tMatrix& v2) { for(size_t i=0; i inline static void sub(tVectorView &v, const tConstVectorView& v2) { GRAVIS_CHECK(v.size() != v2.size, "Size must be equal for addition"); for(size_t i=0; i inline static void sub(tVarVector &v, const tConstVectorView& v2) { GRAVIS_CHECK(v.size() != v2.size, "Size must be equal for addition"); for(size_t i=0; i inline static void sub(tMatrixView &v, const tConstMatrixView& v2) { GRAVIS_CHECK(v.size() != v2.size, "Size must be equal for addition"); for(size_t i=0; i inline static void sub(tVarMatrix &v, const tConstMatrixView& v2) { GRAVIS_CHECK(v.size() != v2.size(), "Size must be equal for addition"); for(size_t i=0; i inline static void sub(tMatrix &v, const tMatrix& v2) { for(size_t i=0; i inline static void elmul(tVectorView &v, const tConstVectorView& v2) { GRAVIS_CHECK(v.size() != v2.size(), "Size must be equal for addition"); for(size_t i=0; i inline static void elmul(tVarVector &v, const tConstVectorView& v2) { GRAVIS_CHECK(v.size() != v2.size(), "Size must be equal for addition"); for(size_t i=0; i inline static void elmul(tMatrixView &v, const tConstMatrixView& v2) { GRAVIS_CHECK(v.size() != v2.size(), "Size must be equal for addition"); for(size_t i=0; i inline static void elmul(tVarMatrix &v, const tConstMatrixView& v2) { GRAVIS_CHECK(v.size() != v2.size(), "Size must be equal for addition"); for(size_t i=0; i inline static void elmul(tMatrix &v, const tMatrix& v2) { for(size_t i=0; i inline static void eldiv(tVectorView &v, const tConstVectorView& v2) { GRAVIS_CHECK(v.size() == v2.size(), "Size must be equal for addition"); for(size_t i=0; i inline static void eldiv(tVarVector &v, const tConstVectorView& v2) { GRAVIS_CHECK(v.size() == v2.size(), "Size must be equal for addition"); for(size_t i=0; i inline static void eldiv(tMatrixView &v, const tConstMatrixView& v2) { GRAVIS_CHECK(v.size() == v2.size(), "Size must be equal for addition"); for(size_t i=0; i inline static void eldiv(tVarMatrix &v, const tConstMatrixView& v2) { GRAVIS_CHECK(v.size() == v2.size(), "Size must be equal for addition"); for(size_t i=0; i inline static void eldiv(tVarMatrix &v, const tVarMatrix& v2) { GRAVIS_CHECK(v.size() == v2.size(), "Size must be equal for addition"); for(size_t i=0; i inline static void eldiv(tMatrix &v, const tMatrix& v2) { for(size_t i=0; i inline static void cmpLarger( OutMatrix& Mout, const InMatrix& M, const typename InMatrix::scalar& t) { GRAVIS_CHECK(Mout.h == M.h && Mout.w == M.w, "Incompatible sizes"); const int S=Mout.size(); for (int i=0; it ? '\xFF' : '\x00'; } /** * Per element smaller than test for scalars **/ template inline static void cmpSmaller( OutMatrix& Mout, const InMatrix& M, const typename InMatrix::scalar& t) { GRAVIS_CHECK(Mout.h == M.h && Mout.w == M.w, "Incompatible sizes"); int i; const int S=Mout.size(); for (int i=0; i inline static void cmpEqual( OutMatrix& Mout, const InMatrix& M, const typename InMatrix::scalar& t) { GRAVIS_CHECK(Mout.h == M.h && Mout.w == M.w, "Incompatible sizes"); int i; const int S=Mout.size(); for (int i=0; i inline static void inset( OutMatrix& Out, const InMatrix& In, const size_t row, const size_t col=0) { if ((In.h == 0) || (In.w == 0) || (col>=Out.w) || (row>=Out.h)) return; size_t h=std::min(In.h, Out.h-row); size_t w=std::min(In.w, Out.w-col); for (size_t j=0; j inline static void inset( tVarVector &Out, const tConstVectorView &In, const size_t row) { if ((In.h == 0) || (row>=Out.h)) return; size_t h=std::min(In.h, Out.h-row); memcpy( &Out(row, 0), &In(0, 0), sizeof(In[0])*h ); } /** * Inset one matrix into another **/ template inline static void inset( tVarVector &Out, const tVarVector &In, const size_t row) { if ((In.h == 0) || (row>=Out.h)) return; size_t h=std::min(In.h, Out.h-row); memcpy( &Out[row], &In[0], sizeof(In[0])*h ); } /** * Matrix Convolution * * TODO: Do not use checked access in the main region of the image, use it * only on the borders **/ template inline static void conv2( OutMatrix& Iout, const InMatrixImg& I, const InMatrixMask& F ) { GRAVIS_CHECK( Iout.w == I.w && Iout.h == I.h, "Matrix sizes are not compatible" ); Iout.clear(); const int ox(F.w/2); const int oy(F.h/2); const int W=I.w; int j; #ifdef _OPENMP #pragma omp parallel for default(none) private(j) shared(I,Iout,F) #endif for (j=0; j inline static void erode( Matrix& m) { const tVarMatrix M(m); // Make a copy int j; const int W=M.w; #ifdef _OPENMP #pragma omp parallel for default(none) private(j) shared(m) #endif for (j=1; j inline static void mult_elementwise(OutMatrix& m, const InMatrix1& m1, const InMatrix2& m2) { GRAVIS_CHECK( m.size() == m1.size(), "Matrix sizes incompatible"); GRAVIS_CHECK( m.size() == m2.size(), "Matrix sizes incompatible"); const size_t s = m.size(); for (size_t i=0; i inline static T sum(tConstMatrixView &v) { if (v.size()==0) return T(0); T r=v[0]; for(size_t i=1; i inline static T sum(tConstVectorView &v) { if (v.size()==0) return T(0); T r=v[0]; for(size_t i=1; i inline static T sum(tMatrixView &v) { if (v.size()==0) return T(0); T r=v[0]; for(size_t i=1; i inline static T sum(tVectorView &v) { if (v.size()==0) return T(0); T r=v[0]; for(size_t i=1; i inline static T sum(tVarMatrix &v) { if (v.size()==0) return T(0); T r=v[0]; for(size_t i=1; i inline static T sum(tVarVector &v) { if (v.size()==0) return T(0); T r=v[0]; for(size_t i=1; i static void load(tVarMatrix &M, const std::string& fn) { char mmid0[33] = "GRAVIS_VAR_MATRIX "; char mmid1[33] = "GRAVIS_VAR_MATRIX "; std::ifstream stream(fn.c_str(), std::ifstream::binary); uint8_t uint32_size; uint8_t T_size; uint32_t h,w; uint16_t endianness; stream.read(mmid1, 32); stream.read((char*)&endianness, 2); stream.read((char*)&uint32_size, 1); stream.read((char*)&T_size, 1); stream.read((char*)&h, sizeof(h)); stream.read((char*)&w, sizeof(w)); GRAVIS_CHECK( 0==strncmp( mmid0, mmid1, 31 ),"Not a gravis var matrix file" ); GRAVIS_CHECK( endianness == 0x0001, "Wrong endianness"); GRAVIS_CHECK( uint32_size == 4, "Wrong size_t size"); GRAVIS_CHECK( T_size == sizeof(T), "Wrong type in matrix file"); M.resize(h,w); stream.read((char*)M.data, sizeof(T)*M.size()); } template static void save(const std::string& fn, const tConstMatrixView &v) { char mmid[33] = "GRAVIS_VAR_MATRIX "; std::ofstream stream(fn.c_str(), std::ofstream::binary); uint8_t uint32_size = sizeof(uint32_t); uint8_t T_size = sizeof(T); uint32_t h = v.h, w = v.w; uint16_t endianness = 0x0001; stream.write(mmid, 32); stream.write((char*)&endianness, 2); stream.write((char*)&uint32_size, 1); stream.write((char*)&T_size, 1); stream.write((char*)&h, sizeof(h)); stream.write((char*)&w, sizeof(w)); stream.write((char*)v.data, sizeof(T)*v.size()); } template static void load(tVarVector &v, const std::string& fn) { char mmid0[33] = "GRAVIS_VAR_VECTOR "; char mmid1[33] = "GRAVIS_VAR_VECTOR "; std::ifstream stream(fn.c_str(), std::ifstream::binary); uint8_t uint32_size; uint8_t T_size; uint32_t k; uint16_t endianness; stream.read(mmid1, 32); stream.read((char*)&endianness, 2); stream.read((char*)&uint32_size, 1); stream.read((char*)&T_size, 1); stream.read((char*)&k, sizeof(k)); GRAVIS_CHECK( 0 == strncmp( mmid0, mmid1, 31 ), "Not a gravis var vector file" ); GRAVIS_CHECK( endianness == 0x0001, "Wrong endianness"); GRAVIS_CHECK( uint32_size == 4, "Wrong uint32 size"); GRAVIS_CHECK( T_size == sizeof(T), "Wrong type in model file"); v.resize(k); stream.read((char*)v.data, sizeof(T)*v.size()); } template static void save(const std::string& fn, const tConstVectorView &v) { char mmid[33] = "GRAVIS_VAR_VECTOR "; std::ofstream stream(fn.c_str(), std::ofstream::binary); uint8_t uint32_size = sizeof(uint32_t); uint8_t T_size = sizeof(T); uint16_t endianness = 0x0001; uint32_t k = v.size(); stream.write(mmid, 32); stream.write((char*)&endianness, 2); stream.write((char*)&uint32_size, 1); stream.write((char*)&T_size, 1); stream.write((char*)&k, sizeof(k)); stream.write((char*)v.data, sizeof(T)*v.size()); } template static inline void clamp(tMatrixView &v, const T& min, const T& max) { for (size_t i=0; i static inline void clamp(tVarMatrix &v, const T& min, const T& max) { for (size_t i=0; i static inline void clamp(tVectorView &v, const T& min, const T& max) { for (size_t i=0; i static inline void clamp(tVarVector &v, const T& min, const T& max) { for (size_t i=0; i inline std::istream& operator>> (std::istream& is, tVectorView& arg) { size_t h = arg.h; std::string t; is >> t; if (t != "[") GRAVIS_THROW3(gravis::Exception, "Unexpected token. A vector should start with [", t); for (size_t j=0; j> arg[j]; is >> t; if (t != "]") GRAVIS_THROW3(gravis::Exception, "Unexpected token. A vector should end with ]", t); return is; } /** * Read Variable size matrices from a stream **/ template inline std::istream& operator>> (std::istream& is, tVarVector& arg) { std::string t; std::vector v; is >> t; if (t != "[") GRAVIS_THROW3(gravis::Exception, "Unexpected token. A vector should start with [", t); while (is) { is >> t; if (t == "]") break; std::stringstream st(t); T tt; st >> tt; v.push_back(tt); } arg.resize(v.size()); size_t h = arg.h; for (size_t j=0; j inline std::ostream& operator<< (std::ostream& os, const tConstVectorView& arg) { size_t h = arg.h; os << "["; for (size_t j=0; j inline std::ostream& operator<< (std::ostream& os, const tConstMatrixView& arg) { size_t h = arg.h; size_t w = arg.w; if ((h>1) && (w>1)) { os << "Matrix: " << h << "x" << w << std::endl; for (size_t i=0; i1) { os << "["; for (size_t j=0; j inline std::ostream& operator<< (std::ostream& os, const tVarVector& arg) { tConstVectorView mv(arg); os << mv; return os; } /** * Write Variable size matrices to a stream **/ template inline std::ostream& operator<< (std::ostream& os, const tVarMatrix& arg) { tConstMatrixView mv(arg); os << mv; return os; } } #include "tVarMatrix_blas.h" #endif relion-3.1.3/src/jaz/gravis/.svn/text-base/tVarMatrix_blas.h.svn-base000066400000000000000000000007261411340063500254260ustar00rootroot00000000000000#ifndef __LIBGRAVIS_T_VAR_MATRIX_BLAS_H__ #define __LIBGRAVIS_T_VAR_MATRIX_BLAS_H__ #include "tVarMatrix.h" namespace gravis { namespace matrix { // Single and double implementations #define __GRAVIS__MATRIX__BLAS__DATATYPE__SINGLE__ #include "tVarMatrix_blas.hxx" #undef __GRAVIS__MATRIX__BLAS__DATATYPE__SINGLE__ #define __GRAVIS__MATRIX__BLAS__DATATYPE__DOUBLE__ #include "tVarMatrix_blas.hxx" #undef __GRAVIS__MATRIX__BLAS__DATATYPE__DOUBLE__ } } #endif relion-3.1.3/src/jaz/gravis/.svn/text-base/tVarMatrix_blas.hxx.svn-base000066400000000000000000000534501411340063500260100ustar00rootroot00000000000000/** * Included multiple times from matrix_blas.hpp for different combinations of float, double varmatrix and matrixview * * Never include directly **/ #ifdef __GRAVIS__MATRIX__BLAS__DATATYPE__SINGLE__ #define __GMBD_REAL float #define __GMBD_xGEMV sgemv_ #define __GMBD_xNRM2 snrm2_ #define __GMBD_xSCAL sscal_ #define __GMBD_xAXPY saxpy_ #define __GMBD_xGESVD sgesvd_ #define __GMBD_xGESDD sgesdd_ #define __GMBD_xDOT sdot_ #else #ifdef __GRAVIS__MATRIX__BLAS__DATATYPE__DOUBLE__ #define __GMBD_REAL double #define __GMBD_xGEMV dgemv_ #define __GMBD_xNRM2 dnrm2_ #define __GMBD_xSCAL dscal_ #define __GMBD_xAXPY daxpy_ #define __GMBD_xGESVD dgesvd_ #define __GMBD_xGESDD dgesdd_ #define __GMBD_xDOT ddot_ #else #error( "Never include directly, this is included only from within matrix_blas.hpp" ) #endif #endif namespace reference { //#include "tVarMatrix_blas_reference.h" } // Blas Header extern "C" { void __GMBD_xGEMV(const char* const trans, const size_t& m, const size_t& n, const __GMBD_REAL& alpha, const __GMBD_REAL* const M, const size_t& m1, const __GMBD_REAL* const x, const size_t& xs, const __GMBD_REAL& beta, __GMBD_REAL* const v, const size_t& vs); __GMBD_REAL __GMBD_xNRM2(const size_t& n, const __GMBD_REAL* const x, const size_t& inc); void __GMBD_xSCAL(const size_t& n, const __GMBD_REAL& alpha, __GMBD_REAL* const x, const size_t& inc); void __GMBD_xAXPY(const size_t& n, const __GMBD_REAL& alpha, const __GMBD_REAL* const x, const size_t& incx, __GMBD_REAL* const y, const size_t& incy); __GMBD_REAL __GMBD_xDOT(const size_t& n, const __GMBD_REAL* dx, const size_t& incx, const __GMBD_REAL* dy, const size_t& incy); } /// Lapack Header extern "C" { void __GMBD_xGESVD(const char& jobu, const char& jobvt, const int& m, const int& n, __GMBD_REAL* a, const int& lda, __GMBD_REAL* s, __GMBD_REAL* u, const int& ldu, __GMBD_REAL* vt, const int& ldvt, __GMBD_REAL* work, const int& lwork, int& info ); void __GMBD_xGESDD(const char& jobz, const int& m, const int& n, __GMBD_REAL* a, const int& lda, __GMBD_REAL* s, __GMBD_REAL* u, const int& ldu, __GMBD_REAL* vt, const int& ldvt, __GMBD_REAL* work, const int& lwork, int* iwork, int& info ); } /** * Inplace SVD for small matrices. * Replaces the input matrix A with its left eigenvectors U **/ inline static void svd_inplace_u(tMatrixView<__GMBD_REAL> &IN_A_OUT_U, tVectorView<__GMBD_REAL> &S, tMatrixView<__GMBD_REAL> &VT) { int info; tVarVector<__GMBD_REAL> work(1); __GMBD_xGESVD('O', 'A', IN_A_OUT_U.h, IN_A_OUT_U.w, IN_A_OUT_U.data, IN_A_OUT_U.h, S.data, NULL, IN_A_OUT_U.h, VT.data, VT.h, work.data, -1, info); if (info < 0) GRAVIS_THROW3(gravis::Exception, "The i'th argument had an invalid value.", StringFormat(info)); if (info > 0) GRAVIS_THROW3(gravis::Exception, "SBDSQR did not converge to zero.", StringFormat(info)); work.resize(int(work[0])); __GMBD_xGESVD('O', 'A', IN_A_OUT_U.h, IN_A_OUT_U.w, IN_A_OUT_U.data, IN_A_OUT_U.h, S.data, NULL, IN_A_OUT_U.h, VT.data, VT.h, work.data, work.h, info); } /** * Inplace SVD for small matrices * Replaces the input matrix A with its left eigenvectors U **/ inline static void svd_inplace_u(tVarMatrix<__GMBD_REAL> &IN_A_OUT_U, tVarVector<__GMBD_REAL> &S, tVarMatrix<__GMBD_REAL> &VT) { int info; tVarVector<__GMBD_REAL> work(1); __GMBD_xGESVD('O', 'A', IN_A_OUT_U.h, IN_A_OUT_U.w, IN_A_OUT_U.data, IN_A_OUT_U.h, S.data, NULL, IN_A_OUT_U.h, VT.data, VT.h, work.data, -1, info); if (info < 0) GRAVIS_THROW3(gravis::Exception, "The i'th argument had an invalid value.", StringFormat(info)); if (info > 0) GRAVIS_THROW3(gravis::Exception, "SBDSQR did not converge to zero.", StringFormat(info)); work.resize(int(work[0])); __GMBD_xGESVD('O', 'A', IN_A_OUT_U.h, IN_A_OUT_U.w, IN_A_OUT_U.data, IN_A_OUT_U.h, S.data, NULL, IN_A_OUT_U.h, VT.data, VT.h, work.data, work.h, info); } /** * Inplace SVD for large matrices using a divide and conquer algorithm * Replaces the input matrix A with its left eigenvectors U **/ inline static void svd_inplace_u_dc(tMatrixView<__GMBD_REAL> &IN_A_OUT_U, tVectorView<__GMBD_REAL> &S, tMatrixView<__GMBD_REAL> &VT) { int info; tVarVector<__GMBD_REAL> work(1); tVarVector iwork(8*std::min(IN_A_OUT_U.h, IN_A_OUT_U.w)); __GMBD_xGESDD('O', IN_A_OUT_U.h, IN_A_OUT_U.w, IN_A_OUT_U.data, IN_A_OUT_U.h, S.data, NULL, IN_A_OUT_U.h, VT.data, VT.h, work.data, -1, iwork.data, info); if (info < 0) GRAVIS_THROW3(gravis::Exception, "The i'th argument had an invalid value.", StringFormat(info)); if (info > 0) GRAVIS_THROW3(gravis::Exception, "SBDSQR did not converge to zero.", StringFormat(info)); work.resize(int(work[0])); __GMBD_xGESDD('O', IN_A_OUT_U.h, IN_A_OUT_U.w, IN_A_OUT_U.data, IN_A_OUT_U.h, S.data, NULL, IN_A_OUT_U.h, VT.data, VT.h, work.data, work.h, iwork.data, info); } /** * Inplace SVD for large matrices using a divide and conquer algorithm * Replaces the input matrix A with its left eigenvectors U **/ inline static void svd_inplace_u_dc(tVarMatrix<__GMBD_REAL> &IN_A_OUT_U, tVarVector<__GMBD_REAL> &S, tVarMatrix<__GMBD_REAL> &VT) { int info; tVarVector<__GMBD_REAL> work(1); tVarVector iwork(8*std::min(IN_A_OUT_U.h, IN_A_OUT_U.w)); __GMBD_xGESDD('O', IN_A_OUT_U.h, IN_A_OUT_U.w, IN_A_OUT_U.data, IN_A_OUT_U.h, S.data, NULL, IN_A_OUT_U.h, VT.data, VT.h, work.data, -1, iwork.data, info); if (info < 0) GRAVIS_THROW3(gravis::Exception, "The i'th argument had an invalid value.", StringFormat(info)); if (info > 0) GRAVIS_THROW3(gravis::Exception, "SBDSQR did not converge to zero.", StringFormat(info)); work.resize(int(work[0])); __GMBD_xGESDD('O', IN_A_OUT_U.h, IN_A_OUT_U.w, IN_A_OUT_U.data, IN_A_OUT_U.h, S.data, NULL, IN_A_OUT_U.h, VT.data, VT.h, work.data, work.h, iwork.data, info); } /** * SVD for small matrices **/ inline static void svd(tMatrixView<__GMBD_REAL> &U, tVectorView<__GMBD_REAL> &S, tMatrixView<__GMBD_REAL> &VT, const tConstMatrixView<__GMBD_REAL> &A) { int info; tVarMatrix<__GMBD_REAL> _A(A); tVarVector<__GMBD_REAL> work(1); __GMBD_xGESVD('A', 'A', A.h, A.w, _A.data, A.h, S.data, U.data, U.h, VT.data, VT.h, work.data, -1, info); if (info < 0) GRAVIS_THROW3(gravis::Exception, "The i'th argument had an invalid value.", StringFormat(info)); if (info > 0) GRAVIS_THROW3(gravis::Exception, "SBDSQR did not converge to zero.", StringFormat(info)); work.resize(int(work[0])); __GMBD_xGESVD('A', 'A', A.h, A.w, _A.data, A.h, S.data, U.data, U.h, VT.data, VT.h, work.data, work.h, info); } /** * SVD for small matrices **/ inline static void svd(tVarMatrix<__GMBD_REAL> &U, tVarVector<__GMBD_REAL> &S, tVarMatrix<__GMBD_REAL> &VT, const tConstMatrixView<__GMBD_REAL> &A) { int info; tVarMatrix<__GMBD_REAL> _A(A); tVarVector<__GMBD_REAL> work(1); __GMBD_xGESVD('A', 'A', A.h, A.w, _A.data, A.h, S.data, U.data, U.h, VT.data, VT.h, work.data, -1, info); if (info < 0) GRAVIS_THROW3(gravis::Exception, "The i'th argument had an invalid value.", StringFormat(info)); if (info > 0) GRAVIS_THROW3(gravis::Exception, "SBDSQR did not converge to zero.", StringFormat(info)); work.resize(int(work[0])); __GMBD_xGESVD('A', 'A', A.h, A.w, _A.data, A.h, S.data, U.data, U.h, VT.data, VT.h, work.data, work.h, info); } /** * SVD for large matrices using a divide and conquer algorithm **/ inline static void svd_dc(tMatrixView<__GMBD_REAL> &U, tVectorView<__GMBD_REAL> &S, tMatrixView<__GMBD_REAL> &VT, const tConstMatrixView<__GMBD_REAL> &A) { int info; tVarMatrix<__GMBD_REAL> _A(A); tVarVector<__GMBD_REAL> work(1); tVarVector iwork(8*std::min(A.h, A.w)); __GMBD_xGESDD('A', A.h, A.w, _A.data, A.h, S.data, U.data, U.h, VT.data, VT.h, work.data, -1, iwork.data, info); if (info < 0) GRAVIS_THROW3(gravis::Exception, "The i'th argument had an invalid value.", StringFormat(info)); if (info > 0) GRAVIS_THROW3(gravis::Exception, "SBDSQR did not converge to zero.", StringFormat(info)); work.resize(int(work[0])); __GMBD_xGESDD('A', A.h, A.w, _A.data, A.h, S.data, U.data, U.h, VT.data, VT.h, work.data, work.h, iwork.data, info); } /** * SVD for large matrices using a divide and conquer algorithm **/ inline static void svd_dc(tVarMatrix<__GMBD_REAL> &U, tVarVector<__GMBD_REAL> &S, tVarMatrix<__GMBD_REAL> &VT, const tConstMatrixView<__GMBD_REAL> &A) { int info; tVarMatrix<__GMBD_REAL> _A(A); tVarVector<__GMBD_REAL> work(1); tVarVector iwork(8*std::min(A.h, A.w)); __GMBD_xGESDD('A', A.h, A.w, _A.data, A.h, S.data, U.data, U.h, VT.data, VT.h, work.data, -1, iwork.data, info); if (info < 0) GRAVIS_THROW3(gravis::Exception, "The i'th argument had an invalid value.", StringFormat(info)); if (info > 0) GRAVIS_THROW3(gravis::Exception, "SBDSQR did not converge to zero.", StringFormat(info)); work.resize(int(work[0])); __GMBD_xGESDD('A', A.h, A.w, _A.data, A.h, S.data, U.data, U.h, VT.data, VT.h, work.data, work.h, iwork.data, info); } //// Multiplications // Not Transposed /** * v = alpha*M*x + beta*v **/ inline static void addmult(tVectorView<__GMBD_REAL> &v, const __GMBD_REAL& alpha, const tConstMatrixView<__GMBD_REAL> &M, const tConstVectorView<__GMBD_REAL> &x, const __GMBD_REAL& beta) { GRAVIS_CHECK( v.size() == M.h, "v and M are incompatible"); GRAVIS_CHECK( x.size() == M.w, "M and x are incompatible"); if (M.h > 0) __GMBD_xGEMV("N", M.h, M.w, alpha, M.data, M.h, x.data, 1, beta, v.data, 1); } /** * v = alpha*M*x + beta*v * Will not resize v, as this would not make sense **/ inline static void addmult(tVarVector<__GMBD_REAL> &v, const __GMBD_REAL& alpha, const tConstMatrixView<__GMBD_REAL> &M, const tConstVectorView<__GMBD_REAL> &x, const __GMBD_REAL& beta) { tVectorView<__GMBD_REAL> vv(v); addmult(vv, alpha, M, x, beta); } /** * v = v+M*x **/ inline static void addmult(tVectorView<__GMBD_REAL> &v, const tConstMatrixView<__GMBD_REAL> &M, const tConstVectorView<__GMBD_REAL> &x) { addmult(v, __GMBD_REAL(1), M, x, __GMBD_REAL(1)); } /** * v = v+M*x * Will not resize v, as this would not make sense **/ inline static void addmult(tVarVector<__GMBD_REAL> &v, const tConstMatrixView<__GMBD_REAL> &M, const tConstVectorView<__GMBD_REAL> &x) { addmult(v, __GMBD_REAL(1), M, x, __GMBD_REAL(1)); } /** * v = a+M*x **/ inline static void addmult(tVectorView<__GMBD_REAL> &v, const tConstVectorView<__GMBD_REAL> &a, const tConstMatrixView<__GMBD_REAL> &M, const tConstVectorView<__GMBD_REAL> &x) { v = a; addmult(v, M, x); } /** * v = a+M*x **/ inline static void addmult(tVarVector<__GMBD_REAL> &v, const tConstVectorView<__GMBD_REAL> &a, const tConstMatrixView<__GMBD_REAL> &M, const tConstVectorView<__GMBD_REAL> &x) { v = a; addmult(v, M, x); } /** * v = alpha*M*x **/ inline static void mult(tVectorView<__GMBD_REAL> &v, const __GMBD_REAL& alpha, const tConstMatrixView<__GMBD_REAL> &M, const tConstVectorView<__GMBD_REAL> &x) { ::gravis::matrix::clear(v); addmult(v, alpha, M, x, 1); } /** * v = alpha*M*x * Will not resize v, as this would not make sense **/ inline static void mult(tVarVector<__GMBD_REAL> &v, const __GMBD_REAL& alpha, const tConstMatrixView<__GMBD_REAL> &M, const tConstVectorView<__GMBD_REAL> &x) { ::gravis::matrix::clear(v); addmult(v, alpha, M, x, 1); } /** * v = M*x **/ inline static void mult(tVectorView<__GMBD_REAL> &v, const tConstMatrixView<__GMBD_REAL> &M, const tConstVectorView<__GMBD_REAL> &x) { mult(v, 1, M, x); } /** * v = M*x **/ inline static void mult(tVarVector<__GMBD_REAL> &v, const tConstMatrixView<__GMBD_REAL> &M, const tConstVectorView<__GMBD_REAL> &x) { mult(v, 1, M, x); } // TRANSPOSED VERSIONS /** * v = (alpha*x^T M)^T + beta*v **/ inline static void addmult(tVectorView<__GMBD_REAL> &v, const __GMBD_REAL& alpha, const tConstVectorView<__GMBD_REAL> &x, const tConstMatrixView<__GMBD_REAL> &M, const __GMBD_REAL& beta) { GRAVIS_CHECK( v.size() == M.w, "v and M are incompatible"); GRAVIS_CHECK( x.size() == M.h, "M and x are incompatible"); if (M.h > 0) __GMBD_xGEMV("T", M.h, M.w, alpha, M.data, M.h, x.data, 1, beta, v.data, 1); } /** * v = (alpha*x^T M)^T + beta*v **/ inline static void addmult(tVarVector<__GMBD_REAL> &v, const __GMBD_REAL& alpha, const tConstVectorView<__GMBD_REAL> &x, const tConstMatrixView<__GMBD_REAL> &M, const __GMBD_REAL& beta) { tVectorView<__GMBD_REAL> vv(v); addmult(vv, alpha, x, M, beta); } /** * v = v+M*x **/ inline static void addmult(tVectorView<__GMBD_REAL> &v, const tConstVectorView<__GMBD_REAL> &x, const tConstMatrixView<__GMBD_REAL> &M) { addmult(v, __GMBD_REAL(1), x, M, __GMBD_REAL(1)); } /** * v = v+M*x * Will not resize v, as this would not make sense **/ inline static void addmult(tVarVector<__GMBD_REAL> &v, const tConstVectorView<__GMBD_REAL> &x, const tConstMatrixView<__GMBD_REAL> &M) { addmult(v, __GMBD_REAL(1), x, M, __GMBD_REAL(1)); } /** * v = a+(x^T*M)^T **/ inline static void addmult(tVectorView<__GMBD_REAL> &v, const tConstVectorView<__GMBD_REAL> &a, const tConstVectorView<__GMBD_REAL> &x, const tConstMatrixView<__GMBD_REAL> &M) { v = a; addmult(v, x, M); } /** * v = a+(x^T*M)^T **/ inline static void addmult(tVarVector<__GMBD_REAL> &v, const tConstVectorView<__GMBD_REAL> &a, const tConstVectorView<__GMBD_REAL> &x, const tConstMatrixView<__GMBD_REAL> &M) { v = a; addmult(v, x, M); } /** * v = alpha*x^T*M **/ inline static void mult(tVectorView<__GMBD_REAL> &v, const __GMBD_REAL& alpha, const tConstVectorView<__GMBD_REAL> &x, const tConstMatrixView<__GMBD_REAL> &M) { ::gravis::matrix::clear(v); addmult(v, alpha, x, M, 1); } /** * v = alpha*x^T*M * Will not resize v, as this would not make sense **/ inline static void mult(tVarVector<__GMBD_REAL> &v, const __GMBD_REAL& alpha, const tConstVectorView<__GMBD_REAL> &x, const tConstMatrixView<__GMBD_REAL> &M) { ::gravis::matrix::clear(v); addmult(v, alpha, x, M, 1); } /** * v = x^T*M **/ inline static void mult(tVectorView<__GMBD_REAL> &v, const tConstVectorView<__GMBD_REAL> &x, const tConstMatrixView<__GMBD_REAL> &M) { mult(v, 1, x, M); } /** * v = x^T*M **/ inline static void mult(tVarVector<__GMBD_REAL> &v, const tConstVectorView<__GMBD_REAL> &x, const tConstMatrixView<__GMBD_REAL> &M) { mult(v, 1, x, M); } static inline __GMBD_REAL abs(const __GMBD_REAL& a) { return a< __GMBD_REAL(0) ? -a : a; } //// Norms /** l1 norm **/ inline static __GMBD_REAL normL1(const tConstVectorView<__GMBD_REAL> &v) { if (v.size()==0) return 0; __GMBD_REAL result = abs(v[0]); for (size_t i=1; i &v) { if (v.size()==0) return 0; __GMBD_REAL result = abs(v[0]); for (size_t i=1; i &v) { return v.size()==0 ? 0 : __GMBD_xNRM2(v.size(), v.data, 1); } /** Frobenius norm **/ inline static __GMBD_REAL normL2(const tConstMatrixView<__GMBD_REAL> &v) { return v.size()==0 ? 0 : __GMBD_xNRM2(v.size(), v.data, 1); } /** Squared l2 norm **/ inline static __GMBD_REAL normL2sqr(const tConstVectorView<__GMBD_REAL> &v) { return ::gravis::matrix::priv::sqr(normL2(v)); } /** Squared Frobenius norm **/ inline static __GMBD_REAL normL2sqr(const tConstMatrixView<__GMBD_REAL> &v) { return ::gravis::matrix::priv::sqr(normL2(v)); } /** linf norm **/ inline static __GMBD_REAL normLinf(const tConstVectorView<__GMBD_REAL> &v) { if (v.size()==0) return 0; __GMBD_REAL result = abs(v[0]); for (size_t i=1; i &v) { if (v.size()==0) return 0; __GMBD_REAL result = abs(v[0]); for (size_t i=1; i &v, const __GMBD_REAL& s, const tConstVectorView<__GMBD_REAL> &u) { __GMBD_xAXPY(v.size(), s, u.data, 1, v.data, 1); } /** v += u **/ inline static void add(tVarVector<__GMBD_REAL> &v, const __GMBD_REAL& s, const tConstVectorView<__GMBD_REAL> &u) { __GMBD_xAXPY(v.size(), s, u.data, 1, v.data, 1); } /** V += U **/ inline static void add(tMatrixView<__GMBD_REAL> &V, const __GMBD_REAL& s, const tConstMatrixView<__GMBD_REAL> &U) { __GMBD_xAXPY(V.size(), s, U.data, 1, V.data, 1); } /** V += U **/ inline static void add(tVarMatrix<__GMBD_REAL> &V, const __GMBD_REAL& s, const tConstMatrixView<__GMBD_REAL> &U) { __GMBD_xAXPY(V.size(), s, U.data, 1, V.data, 1); } /** v += u **/ inline static void add(tVectorView<__GMBD_REAL> &v, const tConstVectorView<__GMBD_REAL> &u) { add(v, 1, u); } /** v += u **/ inline static void add(tVarVector<__GMBD_REAL> &v, const tConstVectorView<__GMBD_REAL> &u) { add(v, 1, u); } /** V += U **/ inline static void add(tMatrixView<__GMBD_REAL> &V, const tConstMatrixView<__GMBD_REAL> &U) { add(V, 1, U); } /** V += U **/ inline static void add(tVarMatrix<__GMBD_REAL> &V, const tConstMatrixView<__GMBD_REAL> &U) { add(V, 1, U); } /** r = v + u **/ inline static void add(tVectorView<__GMBD_REAL> &r, const tConstVectorView<__GMBD_REAL> &v, const tConstVectorView<__GMBD_REAL> &u) { r = v; add(r,u); } /** r = v + u **/ inline static void add(tVarVector<__GMBD_REAL> &r, const tConstVectorView<__GMBD_REAL> &v, const tConstVectorView<__GMBD_REAL> &u) { r = v; add(r,u); } /** R = V + U **/ inline static void add(tMatrixView<__GMBD_REAL> &R, const tConstMatrixView<__GMBD_REAL> &V, const tConstMatrixView<__GMBD_REAL> &U) { R = V; add(R,U); } /** R = V + U **/ inline static void add(tVarMatrix<__GMBD_REAL> &R, const tConstMatrixView<__GMBD_REAL> &V, const tConstMatrixView<__GMBD_REAL> &U) { R = V; add(R,U); } /** v -= u **/ inline static void sub(tVectorView<__GMBD_REAL> &v, const tConstVectorView<__GMBD_REAL> &u) { add(v, -1, u); } /** v -= u **/ inline static void sub(tVarVector<__GMBD_REAL> &v, const tConstVectorView<__GMBD_REAL> &u) { add(v, -1, u); } /** V -= U **/ inline static void sub(tMatrixView<__GMBD_REAL> &V, const tConstMatrixView<__GMBD_REAL> &U) { add(V, -1, U); } /** V -= U **/ inline static void sub(tVarMatrix<__GMBD_REAL> &V, const tConstMatrixView<__GMBD_REAL> &U) { add(V, -1, U); } /** v -= su **/ inline static void sub(tVectorView<__GMBD_REAL> &v, const __GMBD_REAL& s, const tConstVectorView<__GMBD_REAL> &u) { add(v, -s, u); } /** v -= su **/ inline static void sub(tVarVector<__GMBD_REAL> &v, const __GMBD_REAL& s, const tConstVectorView<__GMBD_REAL> &u) { add(v, -s, u); } /** V -= sU **/ inline static void sub(tMatrixView<__GMBD_REAL> &V, const __GMBD_REAL& s, const tConstMatrixView<__GMBD_REAL> &U) { add(V, -s, U); } /** V -= sU **/ inline static void sub(tVarMatrix<__GMBD_REAL> &V, const __GMBD_REAL& s, const tConstMatrixView<__GMBD_REAL> &U) { add(V, -s, U); } //// Matrix Scalar Operations /** Arithmethic operations with scalars *= **/ inline static void mult(tVectorView<__GMBD_REAL> &o, const tConstVectorView<__GMBD_REAL> &v, const __GMBD_REAL& scalar) { o=v; __GMBD_xSCAL(v.size(), scalar, o.data, 1); } /** Arithmethic operations with scalars *= **/ inline static void mult(tVarVector<__GMBD_REAL> &o, const tConstVectorView<__GMBD_REAL> &v, const __GMBD_REAL& scalar) { o=v; __GMBD_xSCAL(v.size(), scalar, o.data, 1); } /** Arithmethic operations with scalars *= **/ inline static void mult(tMatrixView<__GMBD_REAL> &o, const tConstMatrixView<__GMBD_REAL> &v, const __GMBD_REAL& scalar) { o=v; __GMBD_xSCAL(v.size(), scalar, o.data, 1); } /** Arithmethic operations with scalars *= **/ inline static void mult(tVarMatrix<__GMBD_REAL> &o, const tConstMatrixView<__GMBD_REAL> &v, const __GMBD_REAL& scalar) { o=v; __GMBD_xSCAL(v.size(), scalar, o.data, 1); } /** Arithmethic operations with scalars = * **/ inline static void mult(tVectorView<__GMBD_REAL> &v, const __GMBD_REAL& scalar) { __GMBD_xSCAL(v.size(), scalar, v.data, 1); } /** Arithmethic operations with scalars = * **/ inline static void mult(tVarVector<__GMBD_REAL> &v, const __GMBD_REAL& scalar) { __GMBD_xSCAL(v.size(), scalar, v.data, 1); } /** Arithmethic operations with scalars = * **/ inline static void mult(tMatrixView<__GMBD_REAL> &v, const __GMBD_REAL& scalar) { __GMBD_xSCAL(v.size(), scalar, v.data, 1); } /** Arithmethic operations with scalars = * **/ inline static void mult(tVarMatrix<__GMBD_REAL> &v, const __GMBD_REAL& scalar) { __GMBD_xSCAL(v.size(), scalar, v.data, 1); } /** Dotproduct * **/ inline static __GMBD_REAL dot(const tVarVector<__GMBD_REAL> &u, const tVarVector<__GMBD_REAL> &v) { return __GMBD_xDOT(u.size(),u.data,1,v.data,1); } inline static __GMBD_REAL dot(const tVectorView<__GMBD_REAL> &u, const tVectorView<__GMBD_REAL> &v) { return __GMBD_xDOT(u.size(),u.data,1,v.data,1); } inline static void pinv(tVarMatrix<__GMBD_REAL> &A) { tVarMatrix<__GMBD_REAL> U(A.h,A.h); tVarVector<__GMBD_REAL> s(std::min(A.w,A.h)); tVarMatrix<__GMBD_REAL> VT(A.w,A.w); svd_dc(U, s, VT, A); for (unsigned int i = 0; i < s.h; ++i) { if(s[i] != 0) s[i] = 1 / s[i]; } //GEMM(VT,A) // UT = diag(s) * UT for (unsigned int i = 0; i < U.w; ++i) { for (unsigned int j = 0; j < U.h; ++j) { U(j,i) = (i < s.h) ? s(i) * U(j,i) : 0; } } // A = V * UT //tVarMatrix<__GMBD_REAL> X(VT.w, U.h); A.resize(A.w,A.h); for (unsigned int i = 0; i < A.h; ++i) { for (unsigned int j = 0; j < A.w; ++j) { A(i,j) = 0; for (unsigned int k = 0; k < std::min(VT.h,U.h); ++k) { A(i,j) += VT(k,i) * U(j,k); } } } //A = X; } #undef __GMBD_REAL #undef __GMBD_xGEMV #undef __GMBD_xNRM2 #undef __GMBD_xSCAL #undef __GMBD_xAXPY #undef __GMBD_xDOT #undef __GMBD_xGESVD #undef __GMBD_xGESDD relion-3.1.3/src/jaz/gravis/.svn/text-base/tYCbCr.h.svn-base000066400000000000000000000145121411340063500234500ustar00rootroot00000000000000#ifndef __LIBGRAVIS_T_YCB_CR_H__ #define __LIBGRAVIS_T_YCB_CR_H__ /****************************************************************************** ** Title: tYCbCr.h ** Description: Represents an CIE Y/Cb/Cr color tupel. ** ******************************************************************************/ #include namespace gravis { template class tYCbCr { /*! * Private helper functions, wrapped into an additional struct in case that we want to use the names **/ struct priv { static inline const T& min(const T& a, const T& b) { return ab ? a : b; } }; public: typedef T scalar_type; T y, cb, cr; tYCbCr () : y(T(0)), cb(T(0)), cr(T(0)) { } tYCbCr (T y, T cb, T cr) : y(y), cb(cb), cr(cr) { } // tYCbCr (T gray) : (gray), g(gray), b(gray) { } void set (T _y, T _cb, T _cr) { y = _y; cb = _cb; cr = _cr; } // void add (T _r, T _g, T _b) { // r += _r; g += _g; b += _b; // } T intensity () const { return y(); } /* bool operator != (const tYCbCr& c) const { return r != c.r || g != c.g || b != c.b; } bool operator == (const tYCbCr& c) const { return r == c.r && g == c.g && b == c.b; } */ tYCbCr& operator += (const tYCbCr& c) { y += c.y; cb += c.cb; cr += c.cr; return *this; } /* tYCbCr& operator += (const T gray) { r += gray; g += gray; b += gray; return *this; } */ tYCbCr& operator -= (const tYCbCr& c) { y -= c.y; cb -= c.cb; cr -= c.cr; return *this; } // tYCbCr& operator -= (const T gray) { // r -= gray; g -= gray; b -= gray; // return *this; // } tYCbCr& operator *= (const tYCbCr& c) { y *= c.y; cb *= c.cb; cr *= c.cr; return *this; } tYCbCr& operator *= (const T factor) { y *= factor; cb *= factor; cr *= factor; return *this; } /* tYCbCr& operator /= (const tYCbCr& c) { r /= c.r; g /= c.g; b /= c.b; return *this; } tYCbCr& operator /= (const T factor) { r /= factor; g /= factor; b /= factor; return *this; } * \brief All color components are clamped to [0,1]. This function works inplace. * * \return self tYCbCr& clamp() { r = std::min(std::max(r, 0), 1); g = std::min(std::max(g, 0), 1); b = std::min(std::max(b, 0), 1); return *this; } //! Unary minus inline tYCbCr operator - () const { return tYCbCr(-r, -g, -b); }; //! Addition of a scalar (analog to -=) inline tYCbCr operator + (const T& c) const { return tYCbCr(r+c, g+c, b+c); }; //! Subtraction of a scalar (analog to +=) inline tYCbCr operator - (const T& c) const { return tYCbCr(r-c, g-c, b-c); }; */ //! Multiplication of a scalar (analog to *=) inline tYCbCr operator * (const T& c) const { return tYCbCr(y*c, cb*c, cr*c); }; /* //! Division by a scalar (analog to /=) inline tYCbCr operator / (const T& c) const { return tYCbCr(r/c, g/c, b/c); }; bool operator == (const tYCbCr& arg) { return ((arg.r == r) && (arg.g == g) && (arg.b == b)); } const T &operator [](const size_t &i) const { return (&r)[i]; } T &operator [](const size_t &i) { return (&r)[i]; } */ }; template inline tYCbCr operator + (const tYCbCr& c1, const tYCbCr& c2) { tYCbCr result = c1; return (result += c2); } template inline tYCbCr operator - (const tYCbCr& c1, const tYCbCr& c2) { tYCbCr result = c1; return (result -= c2); } /* template inline tYCbCr operator * (const tYCbCr& c1, const tYCbCr& c2) { tYCbCr result(c1.r * c2.r, c1.g * c2.g, c1.b * c2.b); return result; } */ template inline tYCbCr operator * (const tYCbCr& c, T factor) { tYCbCr result(c.y * factor, c.cb * factor, c.cr * factor); return result; } template inline tYCbCr operator * (T factor, const tYCbCr& c) { tYCbCr result(c.y * factor, c.cb * factor, c.cr * factor); return result; } /* template inline tYCbCr operator / (const tYCbCr& c1, const tYCbCr& c2) { tYCbCr result(c1.r / c2.r, c1.g / c2.g, c1.b / c2.b); return result; } template inline tYCbCr operator / (const tYCbCr& c, T factor) { tYCbCr result(c.r / factor, c.g / factor, c.b / factor); return result; } template inline bool operator < (const tYCbCr& c1, const tYCbCr& c2) { T gray1 = c1.grayValue(); T gray2 = c2.grayValue(); return (gray1 < gray2); } template inline tYCbCr operator ! (const tYCbCr& c) { tYCbCr result = tYCbCr::White(); return (result -= c); } // Absolute of every color channel template inline tYCbCr abs(const tYCbCr& c) { return tYCbCr(c.r < T(0) ? -c.r : c.r, c.g < T(0) ? -c.g : c.g, c.b < T(0) ? -c.b : c.b); } template inline std::ostream& operator << (std::ostream& os, const tYCbCr& c) { os << "(" << c.r << " " << c.g << " " << c.b << ")"; return os; } template <> inline std::ostream& operator << (std::ostream& os, const tYCbCr& c) { os << "(" << (int)c.r << " " << (int)c.g << " " << (int)c.b << ")"; return os; } template inline T dot (const tYCbCr& v1, const tYCbCr& v2) { return (v1.r*v2.r + v1.g*v2.g + v1.b*v2.b); } */ //typedef tYCbCr bRGB; typedef tYCbCr fYCbCr; typedef tYCbCr dYCbCr; } #endif relion-3.1.3/src/jaz/gravis/CMakeLists.txt000066400000000000000000000014531411340063500204140ustar00rootroot00000000000000add_subdirectory(colour) add_subdirectory(private) add_subdirectory(tImage) add_subdirectory(io) set( install_files Exception.h lapack.h matrix_blas_reference.h Mesh.h NMesh.h NTuple.h OBJReader.h OBJWriter.h program_options.h StringFormat.h t2Matrix.h t2Vector.h t3Matrix.h t3Vector.h t4Matrix.h t4Vector.h tArray.h tBGR.h tDefaultVector.h tGray_A.h tImageAlgorithm.h tImage.h Timer.hpp tLab.h tMatrix.h tMesh.h tMM.h tQuaternion.h tRGB_A.h tRGBA.h tRGB.h Tuple.h tVarMatrix_blas.h tVarMatrix_blas.hxx tVarMatrix.h tYCbCr.h PushPull.h ) INSTALL(FILES ${install_files} DESTINATION ${CMAKE_INSTALL_PREFIX}/include/${LIBTITLE}-${LIBVERSION}/${LIBTITLE}) relion-3.1.3/src/jaz/gravis/Exception.h000066400000000000000000000055031411340063500177630ustar00rootroot00000000000000#ifndef __LIBGRAVIS_EXCEPTION_H__ #define __LIBGRAVIS_EXCEPTION_H__ /****************************************************************************** ** Title: Exception.h ** Description: Base class for exceptions in libgravis. ** ** Author: Jean-Sebastien Pierrard, 2005 ** Computer Science Department, University Basel (CH) ** ******************************************************************************/ #define GRAVIS_CHECK(condition, r) if (!(condition)) GRAVIS_THROW3(gravis::Exception, "Assertion failed", #condition) #define GRAVIS_THROW(e) throw e(std::string(__FILE__),__LINE__) #define GRAVIS_THROW2(e,r) throw e(std::string(__FILE__),__LINE__,(r)) #define GRAVIS_THROW3(e,r,arg) throw e(std::string(__FILE__),__LINE__,(r),(arg)) #include #include #include #include #include namespace gravis { class Exception : public std::runtime_error { public: Exception (const std::string& src, const int line, const std::string& dtl = "", const std::string& arg = "") : std::runtime_error( std::string("gravis exception: ") + src + ", " + dtl + " (" + arg + ")" ), _source(src), _detail(dtl), _arg(arg), _line(line) {} Exception(const Exception& e) : std::runtime_error( e.what() ), _source(e._source), _detail(e._detail), _arg(e._arg), _line(e._line) { } virtual ~Exception() throw() {} virtual const char* getClassName () const { return "Exception"; } const char* detail() const { return _detail.c_str(); } const char* argument() const { return _arg.c_str(); } const char* source() const { return _source.c_str(); } const int& line() const { return _line; } bool hasDetail() const { return _detail.length() > 0; } bool hasArgument() const { return _arg.length() > 0; } /* do not need that since ctor of runtime_error took the message already virtual const char* what() const throw() { std::string strError( _source + " " + _detail + " " + _arg ); char* pLostMem = new char[ strError.size() + 1 ]; for( size_t i = 0; i < strError.size(); i++ ) pLostMem[i] = strError[i]; pLostMem[ strError.size() ] = '\0'; return pLostMem; }*/ protected: std::string _source; std::string _detail; std::string _arg; int _line; }; inline std::ostream& operator << (std::ostream& os, const Exception& ex) { os << ex.getClassName() << " in " << ex.source() << ", line " << ex.line(); if (ex.hasDetail()) os << ": " << ex.detail(); if (ex.hasArgument()) os << " (" << ex.argument() << ")"; return os; } } /* Close Namespace "gravis" */ #endif relion-3.1.3/src/jaz/gravis/Mesh.h000066400000000000000000000214151411340063500167210ustar00rootroot00000000000000#ifndef __LIBGRAVIS_MESH_H__ #define __LIBGRAVIS_MESH_H__ /****************************************************************************** ** Title: Mesh.h ** Description: Mesh representation. ** ** Author: Jean-Sebastien Pierrard, 2005 ** Computer Science Department, University Basel (CH) ** ******************************************************************************/ #include "tArray.h" #include "tRGBA.h" #include "t2Vector.h" #include "t3Vector.h" #include "tImage.h" #include "Tuple.h" #include namespace gravis { class Material { public: Material (std::string n="") : name(n), ambient(0.1,1.0), diffuse(0.5,1.0), specular(1.0,1.0), shininess(10.0), hasTexture(false), textureName(), hasEnvMap(false), envMapName(), hasNormalMap(false), normalMapName() {} std::string name; fRGBA ambient; fRGBA diffuse; fRGBA specular; float shininess; /*!< \brief Phong exponent. */ bool hasTexture; /*!< \brief whether a (diffuse) texture is defined for this material. */ std::string textureName; /*!< \brief Filename of the (diffuse) texture. */ bool hasEnvMap; std::string envMapName; bool hasNormalMap; std::string normalMapName; }; /*! \brief Mesh data structure. * * A Mesh contains vertex, normal, texture coordinate (uvw) and material information. * For the three types of primitives (triangle, line, point) there are index arrays * referencing above information. For example for lines, lvi indexes into * vertex, and lti into texture coordinates. The vertices and colors * for the 4th lines in the mesh are then vertex[lvi[3][0]], vertex[lvi[3][1]], * color[lci[3][0]] and color[lci[3][1]]. * * tvi.size(), lvi.size() and pvi.size() implicitly specify how many triangles, lines * and points there are in the mesh. All other index arrays must either be of the * same length as the corresponding vertex index array, or of length 0. * * How is missing information handled? If for example no normals are assigned to * any triangles, tni.size() would be zero. If normals are assigned for some triangles, * but not for others, the tni-tuples for the respective triangles must have entries * of -1 (which is the 'invalid index'). */ class Mesh { public: tArray vertex; /*!< \brief Vertex array. */ tArray normal; /*!< \brief Normal array. */ tArray texcrd; /*!< \brief Texture coordinate array. */ tArray color; /*!< \brief Color array. */ std::vector material; /*!< \brief Material array. */ tArray tvi; /*!< \brief Triangle vertex indices. */ tArray tni; /*!< \brief Triangle normal indices. */ tArray tti; /*!< \brief Triangle texcrd indices. */ tArray tci; /*!< \brief Triangle color indices. */ tArray tmi; /*!< \brief Triangle material indices. */ tArray lvi; /*!< \brief Line vertex indices. */ tArray lti; /*!< \brief Line texcrd indices. */ tArray lci; /*!< \brief Line texcrd indices. */ tArray pvi; /*!< \brief Point vertex indices. */ tArray pci; /*!< \brief Point color indices. */ tArray adjacent; /*!< \brief Adjacency list. See generateAdjacencyList(). */ Mesh() : vertex(), normal(), texcrd(), color(), material(), tvi(), tni(), tti(), tci(), tmi(), lvi(), lti(), lci(), pvi(), pci(), adjacent() {} // Create a deep copy of the mesh void clone(Mesh& out) const { out.vertex = vertex.clone(); out.normal = normal.clone(); out.texcrd = texcrd.clone(); out.color = color.clone(); //out.material = material.save_clone(); out.material = material; out.tvi = tvi.clone(); out.tni = tni.clone(); out.tti = tti.clone(); out.tci = tci.clone(); out.tmi = tmi.clone(); out.lvi = lvi.clone(); out.lti = lti.clone(); out.lci = lci.clone(); out.pvi = pvi.clone(); out.pci = pci.clone(); out.adjacent = adjacent.clone(); } void generateNormals() { const int numFaces = tvi.size(); tni.setSize(numFaces); normal.setSize(numFaces); for (int i = 0; i < numFaces; i++) { f3Vector a = (vertex[tvi[i][1]] - vertex[tvi[i][0]]); f3Vector b = (vertex[tvi[i][2]] - vertex[tvi[i][0]]); normal[i] = cross(a, b).normalize(); tni[i] = Tuple3(i, i, i); } } void generatePerVertexNormals(unsigned int propagations=0) { tArray ncount; f3Vector norm; const int numFaces = tvi.size(); tni.setSize(numFaces); normal.setSize(vertex.size()); ncount.setSize(vertex.size()); for (unsigned int i = 0; i < ncount.size(); i++) ncount[i] = 0; for (unsigned int i = 0; i < normal.size(); i++) normal[i] = f3Vector(0.0f,0.0f,0.0f); for (int i = 0; i < numFaces; i++) { if(tvi[i].c0 < 0 || tvi[i].c1 < 0 || tvi[i].c2 < 0) continue; f3Vector a = (vertex[tvi[i][1]] - vertex[tvi[i][0]]); f3Vector b = (vertex[tvi[i][2]] - vertex[tvi[i][0]]); norm = cross(a, b).normalize(); tni[i] = tvi[i]; normal[tvi[i][0]] += norm; normal[tvi[i][1]] += norm; normal[tvi[i][2]] += norm; ncount[tvi[i][0]]++; ncount[tvi[i][1]]++; ncount[tvi[i][2]]++; } for (unsigned int i = 0; i < normal.size(); i++) { if(ncount[i] != 0) normal[i] /= ncount[i]; normal[i] = normal[i].normalize(); } tArray nnormal; nnormal.setSize(ncount.size()); for(unsigned int j=0; j nodeFaces(numVert); for (int i = 0; i < numFaces; i++) { for (int j = 0; j < 3; j++) { nodeFaces[tvi[i][j]].addFace(Tuple2(i, j)); } } // foreach face for (int f = 0; f < numFaces; f++) { Tuple3& ft = tvi[f]; Tuple3& at = adjacent[f]; // foreach edge for (int e = 0; e < 3; e++) { // already found adjacent face for this edge? if (at[e] >= 0) continue; // vertices for this edge int v1 = ft[e]; int v2 = ft[(e+1)%3]; // faces using these vertices Node& node1 = nodeFaces[v1]; Node& node2 = nodeFaces[v2]; for (int i = 0; i < node1.count; i++) { int f1 = node1.faces[i][0]; if (f1 == f) continue; // self for (int j = 0; j < node2.count; j++) { if (f1 == node2.faces[j][0]) { adjacent[f][e] = f1; adjacent[f1][node2.faces[j][1]] = f; } } } } } } }; } // namespace gravis #endif relion-3.1.3/src/jaz/gravis/NMesh.h000066400000000000000000000076701411340063500170460ustar00rootroot00000000000000#ifndef __LIBGRAVIS_NMESH_H__ #define __LIBGRAVIS_NMESH_H__ /****************************************************************************** ** Title: NMesh.h ** Description: N-Sided Mesh representation. ** ** Author: Jean-Sebastien Pierrard, 2005 ** Brian Amberg ** Computer Science Department, University Basel (CH) ** ******************************************************************************/ #include #include "tArray.h" #include "tRGBA.h" #include "t2Vector.h" #include "t3Vector.h" #include "tImage.h" #include "NTuple.h" #include "Mesh.h" namespace gravis { /*! \brief N-Mesh data structure. * * The below is more or less correct, but we may now have double * precision values and faces of arbitrary (but equal) length. (Usefull * for multicube) * * A Mesh contains vertex, normal, texture coordinate (uvw) and material information. * For the three types of primitives (triangle, line, point) there are index arrays * referencing above information. For example for lines, lvi indexes into * vertex, and lti into texture coordinates. The vertices and colors * for the 4th lines in the mesh are then vertex[lvi[3][0]], vertex[lvi[3][1]], * color[lci[3][0]] and color[lci[3][1]]. * * fvi.size(), lvi.size() and pvi.size() implicitly specify how many triangles, lines * and points there are in the mesh. All other index arrays must either be of the * same length as the corresponding vertex index array, or of length 0. * * How is missing information handled? If for example no normals are assigned to * any triangles, fni.size() would be zero. If normals are assigned for some triangles, * but not for others, the fni-tuples for the respective triangles must have entries * of -1 (which is the 'invalid index'). */ template class NMesh { typedef t3Vector Vector; typedef NTuple Tuple; public: tArray vertex; /*!< \brief Vertex array. */ tArray normal; /*!< \brief Normal array. */ tArray texcrd; /*!< \brief Texture coordinate array. */ tArray color; /*!< \brief Color array. */ std::vector material; /*!< \brief Material array. */ tArray fvi; /*!< \brief Face vertex indices. */ tArray fni; /*!< \brief Face normal indices. */ tArray fti; /*!< \brief Face texcrd indices. */ tArray fci; /*!< \brief Face color indices. */ tArray fmi; /*!< \brief Face material indices. */ tArray lvi; /*!< \brief Line vertex indices. */ tArray lti; /*!< \brief Line texcrd indices. */ tArray lci; /*!< \brief Line texcrd indices. */ tArray pvi; /*!< \brief Point vertex indices. */ tArray pci; /*!< \brief Point color indices. */ tArray adjacent; /*!< \brief Adjacency list. See generateAdjacencyList(). */ // Create a deep copy of the mesh void clone(Mesh& out) const { out.vertex = vertex.clone(); out.normal = normal.clone(); out.texcrd = texcrd.clone(); out.color = color.clone(); //out.material = material.save_clone(); out.material = material; out.fvi = fvi.clone(); out.fni = fni.clone(); out.fti = fti.clone(); out.fci = fci.clone(); out.fmi = fmi.clone(); out.lvi = lvi.clone(); out.lti = lti.clone(); out.lci = lci.clone(); out.pvi = pvi.clone(); out.pci = pci.clone(); out.adjacent = adjacent.clone(); } }; typedef NMesh d3Mesh; typedef NMesh d4Mesh; typedef NMesh f3Mesh; typedef NMesh f4Mesh; // This should work nicely, as we have binary compatibility const Mesh& convert(const NMesh &in) { return *reinterpret_cast(&in); } } // namespace gravis #endif relion-3.1.3/src/jaz/gravis/NTuple.h000066400000000000000000000064761411340063500172460ustar00rootroot00000000000000#ifndef __LIBGRAVIS_NTUPLE_H__ #define __LIBGRAVIS_NTUPLE_H__ /****************************************************************************** ** Title: NNTuple.h ** Description: N-NTuples (templated) ** ** Author: Brian Amberg 2006 ** Computer Science Department, University Basel (CH) ** ******************************************************************************/ namespace gravis { /*! \brief N-NTuple, typically used with Ieger datatypes for multi-index. */ template class NTuple { I m[N]; public: /*! \brief Construct a NTuple with entries of -1. */ NTuple() { for (size_t i=0; i= I(0)) (*this)[i] += o; } /*! \brief Whether all entries are non-negative. */ bool allValid() const { bool r = (*this)[0] >= I(0); for (size_t i=1; i= I(0)); return r; } //! Lexical Ordering for NTuples inline bool operator==(const NTuple& o) const { bool r = (*this)[0] == o[0]; for (size_t i=1; i(const NTuple& o) const { return (*this != o) && !(*this < o); } //! Lexical Ordering for NTuples inline bool operator<=(const NTuple& o) const { return (*this < o) || (*this == o); } //! Lexical Ordering for NTuples inline bool operator>=(const NTuple& o) const { return (*this > o) || (*this == o); } }; // class NTuple template inline std::ostream& operator<< (std::ostream& os, const NTuple& arg) { os << "["; for (int i=0; i NTuple nTuple(const I& c0) { NTuple r; r[0] = c0; return r; }; template NTuple nTuple(const I& c0, const I& c1) { NTuple r; r[0] = c0; r[1] = c1; return r; }; template NTuple nTuple(const I& c0, const I& c1, const I& c2) { NTuple r; r[0] = c0; r[1] = c1; r[2] = c2; return r; }; template NTuple nTuple(const I& c0, const I& c1, const I& c2, const I& c3) { NTuple r; r[0] = c0; r[1] = c1; r[2] = c2; r[3] = c3; return r; }; typedef NTuple I1Tuple; typedef NTuple I2Tuple; typedef NTuple I3Tuple; typedef NTuple I4Tuple; } // namespace gravis #endif relion-3.1.3/src/jaz/gravis/OBJReader.h000066400000000000000000000006201411340063500175550ustar00rootroot00000000000000#ifndef __LIBGRAVIS_OBJREADER_H__ #define __LIBGRAVIS_OBJREADER_H__ #include "io/mesh/OBJReader.h" #warning io has been restructured and bundled into gravis/io/ \ You should preferably use the header and included routines, \ unless you rely on some special use of OBJReader/OBJWriter. \ These are still available but should be included as gravis/io/OBJ {Reader,Writer} .h #endif relion-3.1.3/src/jaz/gravis/OBJWriter.h000066400000000000000000000006201411340063500176270ustar00rootroot00000000000000#ifndef __LIBGRAVIS_OBJWRITER_H__ #define __LIBGRAVIS_OBJWRITER_H__ #include "io/mesh/OBJWriter.h" #warning io has been restructured and bundled into gravis/io/ \ You should preferably use the header and included routines, \ unless you rely on some special use of OBJReader/OBJWriter. \ These are still available but should be included as gravis/io/OBJ {Reader,Writer} .h #endif relion-3.1.3/src/jaz/gravis/PushPull.h000066400000000000000000000131441411340063500176010ustar00rootroot00000000000000/*============================================================================*/ /** * @file PushPull.h * * @brief Push-Pull interpolation class * * @date 1 Aug 2012 * @authors Jasenko Zivanov\n * jasenko.zivanov@unibas.ch\n * University of Basel, Switzerland */ /*============================================================================*/ #ifndef PUSH_PULL_H #define PUSH_PULL_H #include #include "tImage.h" namespace gravis { /** Push-Pull interpolation, interpolation on the image pyramid filling holes in an image */ class PushPull { public: /** Execute a push-pull interpolation (image pyramid hole filler) on the image * \param img Image to interpolate, do not use an alpha channel, use the mask to specify holes * \param mask Mask indicating which regions are holes (0: fill, 1: keep - as in the alpha channel) */ template static gravis::tImage interpolate( gravis::tImage img, gravis::tImage mask, int minSize = 1); private: template static gravis::tImage shrink2(gravis::tImage img); template static gravis::tImage shrink2(gravis::tImage img, gravis::tImage mask); template static gravis::tImage grow2(gravis::tImage img); template static gravis::tImage blur3x3(gravis::tImage img); }; template gravis::tImage PushPull :: interpolate( gravis::tImage img, gravis::tImage mask, int minSize) { const int w = img.cols(); const int h = img.rows(); gravis::tImage out(w,h); std::vector > pyramid(0); std::vector > maskPyramid(0); pyramid.push_back(img); maskPyramid.push_back(mask); gravis::tImage ci = img; gravis::tImage cm = mask; while ((int)ci.rows() > minSize && (int)ci.cols() > minSize) { ci = shrink2(ci,cm); cm = shrink2(cm); pyramid.push_back(ci); maskPyramid.push_back(cm); } maskPyramid[pyramid.size()-2].fill(1.f); for (int i = (int)pyramid.size() - 2; i >= 0; i--) { gravis::tImage pi0 = pyramid[i]; gravis::tImage pi1 = grow2(pyramid[i+1]); gravis::tImage pm = maskPyramid[i]; for (int y = 0; y < (int)pi0.rows(); y++) for (int x = 0; x < (int)pi0.cols(); x++) { pi0(x,y) = pm(x,y)*pi0(x,y) + (1.f - pm(x,y))*pi1(x,y); } } return pyramid[0]; } template gravis::tImage PushPull :: shrink2(gravis::tImage img, gravis::tImage mask) { const int w = img.cols(); const int h = img.rows(); const int w2 = (int)(ceil(w/2.f)); const int h2 = (int)(ceil(h/2.f)); gravis::tImage out(w2,h2); gravis::tImage weight(w2,h2); out.fill(T(0.f)); weight.fill(0.f); for (int y = 0; y < h; y++) for (int x = 0; x < w; x++) { out(x/2, y/2) += mask(x,y) * img(x,y); weight(x/2, y/2) += mask(x,y); } for (int i = 0; i < (w/2)*(h/2); i++) { if (weight[i] > 0.f) out[i] /= weight[i]; } return out; } template gravis::tImage PushPull :: shrink2(gravis::tImage img) { const int w = img.cols(); const int h = img.rows(); const int w2 = (int)(ceil(w/2.f)); const int h2 = (int)(ceil(h/2.f)); gravis::tImage out(w2,h2); gravis::tImage weight(w2,h2); out.fill(T(0.f)); weight.fill(0.f); for (int y = 0; y < h; y++) for (int x = 0; x < w; x++) { out(x/2, y/2) += img(x,y); weight(x/2, y/2)++; } for (int i = 0; i < w2*h2; i++) { if (weight[i] > 0.f) out[i] /= weight[i]; } return out; } template gravis::tImage PushPull :: grow2(gravis::tImage img) { const int w = img.cols(); const int h = img.rows(); gravis::tImage out(2*w,2*h); for (int y = 0; y < 2*h; y++) for (int x = 0; x < 2*w; x++) { out(x, y) = img(x/2,y/2); } return blur3x3(out); } template gravis::tImage PushPull :: blur3x3(gravis::tImage img) { const int w = img.cols(); const int h = img.rows(); gravis::tImage out(w,h); gravis::tImage weight(w,h); out.fill(T(0.f)); weight.fill(0.f); for (int y = 0; y < h; y++) for (int x = 0; x < w; x++) { if (x > 0 && y > 0) { out(x,y) += img(x-1,y-1)/4.f; weight(x,y) += 1/4.f; } if (x > 0) { out(x,y) += img(x-1,y)/2.f; weight(x,y) += 1/2.f; } if (x > 0 && y < h-1) { out(x,y) += img(x-1,y+1)/4.f; weight(x,y) += 1/4.f; } if (y > 0) { out(x,y) += img(x,y-1)/2.f; weight(x,y) += 1/2.f; } { out(x,y) += img(x,y); weight(x,y)++; } if (y < h-1) { out(x,y) += img(x,y+1)/2.f; weight(x,y) += 1/2.f; } if (x < w-1 && y > 0) { out(x,y) += img(x+1,y-1)/4.f; weight(x,y) += 1/4.f; } if (x < w-1) { out(x,y) += img(x+1,y)/2.f; weight(x,y) += 1/2.f; } if (x < w-1 && y < h-1) { out(x,y) += img(x+1,y+1)/4.f; weight(x,y) += 1/4.f; } } for (int i = 0; i < w*h; i++) { out[i] /= weight[i]; } return out; } } #endif relion-3.1.3/src/jaz/gravis/StringFormat.h000066400000000000000000000042251411340063500204440ustar00rootroot00000000000000#ifndef __LIBGRAVIS_STRING_FORMAT_H__ #define __LIBGRAVIS_STRING_FORMAT_H__ #include #include #include namespace gravis { /** * Usage * std::string name = StringFormat("Dies sind ")(12)(" Zahlen."); * std::string name = StringFormat("Dies sind ")(12, 4, '0')(" Zahlen."); **/ class StringFormat { private: std::stringstream s; public: StringFormat(const StringFormat& start) : s() { s << start.string(); }; const char* c_str() const { return s.str().c_str(); } std::string string() const { return s.str(); } operator std::string() const { return s.str(); } bool operator==(const StringFormat& o) const { return o.string()==string(); } bool operator!=(const StringFormat& o) const { return o.string()!=string(); } bool operator==(const std::string& o) const { return o==string(); } bool operator!=(const std::string& o) const { return o!=string(); } StringFormat() : s() { } template explicit StringFormat(const T& e) : s() { s << e; } template StringFormat(const T& e, std::streamsize w) : s() { s << std::setw(w) << e; } template StringFormat(const T& e, std::streamsize w, char fill) : s() { s << std::setw(w) << std::setfill(fill) << e; } template inline StringFormat& operator()(const T& e) { s << e; return *this; } template inline StringFormat& operator()(const T& e, int w) { s << std::setw(w) << e; return *this; } template inline StringFormat& operator()(const T& e, int w, char fill) { s << std::setw(w) << std::setfill(fill) << e; return *this; } }; } inline std::ostream& operator<< (std::ostream& os, const gravis::StringFormat& arg) { os << arg.string(); return os; } #endif relion-3.1.3/src/jaz/gravis/Timer.hpp000066400000000000000000000063411411340063500174460ustar00rootroot00000000000000#ifndef __LIBGRAVIS_TIMER_HPP__ #define __LIBGRAVIS_TIMER_HPP__ #ifndef WIN32 #include #else #include #endif #include #include namespace gravis { class Timer { friend std::ostream& operator<<(std::ostream& os, Timer& t); private: clock_t start_clock; #ifndef WIN32 timeval start_time; #endif public: Timer() : start_clock(), start_time() { restart(); } // Copy and assignment are fine double wall_time() const { #ifndef WIN32 timeval current; gettimeofday(¤t, 0); return (current.tv_sec - start_time.tv_sec) + (current.tv_usec - start_time.tv_usec)*1e-6; #else return -1; #endif } inline double cpu_time() const { return ticks_to_seconds(ticks()); } static inline double ticks_to_seconds(const clock_t& ticks) { return double(ticks) / double(CLOCKS_PER_SEC); } inline clock_t ticks() const { return (clock() - start_clock); } inline void restart() { #ifndef WIN32 gettimeofday(&start_time, 0); #endif start_clock = clock(); } ~Timer() { }; }; } //=========================================================================== // Allow timers to be printed to ostreams using the syntax 'os << t' // for an ostream 'os' and a timer 't'. For example, "cout << t" will // print out the total amount of time 't' has been "running". inline std::ostream& operator<<(std::ostream& os, gravis::Timer& t) { double wall_time = double(t.wall_time()); double cpu_time = t.cpu_time(); double min_time=wall_time; if (cpu_time #include #include namespace gravis { /*! \brief Tuple of 2 integers, typically used for multi-index. */ class Tuple2 { public: int c0, c1; /*! \brief Construct a Tuple2 with entries of -1. */ Tuple2() : c0(-1), c1(-1) {} Tuple2(int a, int b) : c0(a), c1(b) {} int operator[] (int i) const { return *(&c0 + i); } int& operator[] (int i) { return *(&c0 + i); } unsigned int size() const { return 2; } /*! \brief Offset all non-negative entries. */ void offset(int o) { if (c0 >= 0) c0 += o; if (c1 >= 0) c1 += o; } /*! \brief Whether all entries are non-negative. */ bool allValid() const { return c0 >= 0 && c1 >= 0; } //! Lexical Ordering for Tuples inline bool operator==(const Tuple2& o) const { return ((c0 == o.c0) && (c1 == o.c1)); } //! Lexical Ordering for Tuples inline bool operator!=(const Tuple2& o) const { return !(*this == o); } //! Lexical Ordering for Tuples inline bool operator<(const Tuple2& o) const { return ((c0 < o.c0) || ((c0 == o.c0) && (c1 < o.c1))); } //! Lexical Ordering for Tuples inline bool operator>(const Tuple2& o) const { return (*this != o) && !(*this < o); } //! Lexical Ordering for Tuples inline bool operator<=(const Tuple2& o) const { return (*this < o) || (*this == o); } //! Lexical Ordering for Tuples inline bool operator>=(const Tuple2& o) const { return (*this > o) || (*this == o); } }; // class Tuple2 /*! \brief Tuple of three integers, typically used for multi-index. */ class Tuple3 { public: int c0, c1, c2; /*! \brief Construct a Tuple3 with entries of -1. */ Tuple3() : c0(-1), c1(-1), c2(-1) {} Tuple3(int a, int b, int c) : c0(a), c1(b), c2(c) {} int operator[] (int i) const { return *(&c0 + i); } int& operator[] (int i) { return *(&c0 + i); } unsigned int size() const { return 3; } /*! brief Offset all non-negative entries. */ void offset(int o) { if (c0 >= 0) c0 += o; if (c1 >= 0) c1 += o; if (c2 >= 0) c2 += o; } /*! \brief Whether all entries are non-negative. */ bool allValid() const { return c0 >= 0 && c1 >= 0 && c2 >= 0; } //! Lexical Ordering for Tuples inline bool operator==(const Tuple3& o) const { return ((c0 == o.c0) && (c1 == o.c1) && (c2 == o.c2)); } //! Lexical Ordering for Tuples inline bool operator!=(const Tuple3& o) const { return !(*this == o); } //! Lexical Ordering for Tuples inline bool operator<(const Tuple3& o) const { return ((c0 < o.c0) || ((c0 == o.c0) && (c1 < o.c1)) || ((c0 == o.c0) && (c1 == o.c1) && (c2 < o.c2))); } //! Lexical Ordering for Tuples inline bool operator>(const Tuple3& o) const { return (*this != o) && !(*this < o); } //! Lexical Ordering for Tuples inline bool operator<=(const Tuple3& o) const { return (*this < o) || (*this == o); } //! Lexical Ordering for Tuples inline bool operator>=(const Tuple3& o) const { return (*this > o) || (*this == o); } }; // class Tuple3 inline std::ostream& operator<< (std::ostream& os, const Tuple3& arg) { os << "[" << arg.c0 << ", " << arg.c1 << ", " << arg.c2 << "]"; return os; } inline std::ostream& operator<< (std::ostream& os, const Tuple2& arg) { os << "[" << arg.c0 << ", " << arg.c1 << "]"; return os; } // Inverse of operator<< inline std::istream& operator>> (std::istream& is, Tuple3& arg) { char c = ' '; is >> c; if (is.eof()) return is; if (c != '[') throw std::runtime_error("Tuple should start with an opening ["); std::stringstream values; int v = 0; while ((is >> c) && (c != ']')) { if (c == ',') { v++; if (v >= 3) throw std::runtime_error("Tuple3 contains more than three elements"); values << " "; } else if (c != ' ') values << c; } if (c != ']') { throw std::runtime_error("Tuple3 should end with a ]"); } values >> arg.c0 >> arg.c1 >> arg.c2; return is; } // Inverse of operator<< inline std::istream& operator>> (std::istream& is, Tuple2& arg) { char c = ' '; is >> c; if (is.eof()) return is; if (c != '[') throw std::runtime_error("Tuple should start with an opening ["); std::stringstream values; int v = 0; while ((is >> c) && (c != ']')) { if (c == ',') { v++; if (v >= 2) throw std::runtime_error("Tuple2 contains more than three elements"); values << " "; } else if (c != ' ') values << c; } if (c != ']') { throw std::runtime_error("Tuple2 should end with a ]"); } values >> arg.c0 >> arg.c1; return is; } } // namespace gravis #endif relion-3.1.3/src/jaz/gravis/colour/000077500000000000000000000000001411340063500171545ustar00rootroot00000000000000relion-3.1.3/src/jaz/gravis/colour/.svn/000077500000000000000000000000001411340063500200405ustar00rootroot00000000000000relion-3.1.3/src/jaz/gravis/colour/.svn/all-wcprops000066400000000000000000000005551411340063500222330ustar00rootroot00000000000000K 25 svn:wc:ra_dav:version-url V 64 /repos/gravis/!svn/ver/22187/libs/libGravis/trunk/include/colour END CMakeLists.txt K 25 svn:wc:ra_dav:version-url V 79 /repos/gravis/!svn/ver/21790/libs/libGravis/trunk/include/colour/CMakeLists.txt END rgb_hsv.h K 25 svn:wc:ra_dav:version-url V 74 /repos/gravis/!svn/ver/22187/libs/libGravis/trunk/include/colour/rgb_hsv.h END relion-3.1.3/src/jaz/gravis/colour/.svn/entries000066400000000000000000000010261411340063500214330ustar00rootroot0000000000000010 dir 23800 https://svn.cs.unibas.ch:443/repos/gravis/libs/libGravis/trunk/include/colour https://svn.cs.unibas.ch:443/repos/gravis 2012-06-28T11:28:09.901604Z 22187 sandro b127c190-6edf-0310-8e64-ec95285ab742 CMakeLists.txt file 2012-06-29T11:51:37.246097Z 799e51faeec4f64c2dd7bf7a168db19f 2012-03-09T15:30:47.393511Z 21790 forster 177 rgb_hsv.h file 2012-06-29T11:51:37.238097Z d913ff90d0423161a9c7cf09dedc7443 2012-06-28T11:28:09.901604Z 22187 sandro 2163 relion-3.1.3/src/jaz/gravis/colour/.svn/text-base/000077500000000000000000000000001411340063500217345ustar00rootroot00000000000000relion-3.1.3/src/jaz/gravis/colour/.svn/text-base/CMakeLists.txt.svn-base000066400000000000000000000002611411340063500262100ustar00rootroot00000000000000set( install_files rgb_hsv.h ) INSTALL(FILES ${install_files} DESTINATION ${CMAKE_INSTALL_PREFIX}/include/${LIBTITLE}-${LIBVERSION}/${LIBTITLE}/colour) relion-3.1.3/src/jaz/gravis/colour/.svn/text-base/rgb_hsv.h.svn-base000066400000000000000000000041631411340063500252600ustar00rootroot00000000000000#ifndef __GRAVIS__COLOUR__HSV__ #define __GRAVIS__COLOUR__HSV__ #include "../tRGBA.h" // Author: Reinhard Knothe // Extracted: Brian Schroeder /**********************************************************************/ /* hsv2rgb: */ /**********************************************************************/ /* <-- rgb coordinates of hsv color, rgb in [0,1]^3 */ /* --> hsv color coordinates h in [0, 360), s in [0,1], v in [0,1] */ /**********************************************************************/ /* Converts hsv to rgb */ /**********************************************************************/ template gravis::tRGBA hsv2rgb(T h, T s, T v, T a = T(1)) { if (s==0 && h==-1) return gravis::tRGBA(v, v, v, a); else { if (h==360.0) h = 0.0; h /= 60.f; int i = (int)floor(h); T f = h - (T)i; T m = v * (1. - s); T n = v * (1. - s*f); T k = v * (1. - s*(1. - f)); if (i==0) return gravis::tRGBA(v, k, m, a); if (i==1) return gravis::tRGBA(n, v, m, a); if (i==2) return gravis::tRGBA(m, v, k, a); if (i==3) return gravis::tRGBA(m, n, v, a); if (i==4) return gravis::tRGBA(k, m, v, a); if (i==5) return gravis::tRGBA(v, m, n, a); } return gravis::tRGBA(v); } template gravis::tRGBA hsvScale(T dist, T min = 0.0, T max = 1.0, T a = 1.0) { T h = 240.0 - (300.0 * (dist-min)/(max-min)); if (h>360.0) h = 360.0; if (h<0.0) h = 0.0; return gravis::tRGBA(hsv2rgba(h,(T) 1,(T) 1,a)); } template gravis::tRGBA hsvBlueToRed(T dist, T min = 0.0, T max = 1.0, T a = 1.0) { T d = (dist-min)/(max-min); if (d<0.0) d = 0.0; if (d>1.0) d = 1.0; T h = 240 - (240.0 * d); return gravis::tRGBA(hsv2rgba(h,T(1), T(1), a)); } template gravis::tRGBA hsvGreenToRed(T dist, T min = 0.0, T max = 1.0, T a = 1.0) { T d = (dist-min)/(max-min); if (d<0.0) d = 0.0; if (d>1.0) d = 1.0; T h = (240.0 * d + 120); return gravis::tRGBA(hsv2rgba(h,T(1), T(1), a)); } #endif relion-3.1.3/src/jaz/gravis/colour/CMakeLists.txt000066400000000000000000000002611411340063500217130ustar00rootroot00000000000000set( install_files rgb_hsv.h ) INSTALL(FILES ${install_files} DESTINATION ${CMAKE_INSTALL_PREFIX}/include/${LIBTITLE}-${LIBVERSION}/${LIBTITLE}/colour) relion-3.1.3/src/jaz/gravis/colour/rgb_hsv.h000066400000000000000000000041631411340063500207630ustar00rootroot00000000000000#ifndef __GRAVIS__COLOUR__HSV__ #define __GRAVIS__COLOUR__HSV__ #include "../tRGBA.h" // Author: Reinhard Knothe // Extracted: Brian Schroeder /**********************************************************************/ /* hsv2rgb: */ /**********************************************************************/ /* <-- rgb coordinates of hsv color, rgb in [0,1]^3 */ /* --> hsv color coordinates h in [0, 360), s in [0,1], v in [0,1] */ /**********************************************************************/ /* Converts hsv to rgb */ /**********************************************************************/ template gravis::tRGBA hsv2rgb(T h, T s, T v, T a = T(1)) { if (s==0 && h==-1) return gravis::tRGBA(v, v, v, a); else { if (h==360.0) h = 0.0; h /= 60.f; int i = (int)floor(h); T f = h - (T)i; T m = v * (1. - s); T n = v * (1. - s*f); T k = v * (1. - s*(1. - f)); if (i==0) return gravis::tRGBA(v, k, m, a); if (i==1) return gravis::tRGBA(n, v, m, a); if (i==2) return gravis::tRGBA(m, v, k, a); if (i==3) return gravis::tRGBA(m, n, v, a); if (i==4) return gravis::tRGBA(k, m, v, a); if (i==5) return gravis::tRGBA(v, m, n, a); } return gravis::tRGBA(v); } template gravis::tRGBA hsvScale(T dist, T min = 0.0, T max = 1.0, T a = 1.0) { T h = 240.0 - (300.0 * (dist-min)/(max-min)); if (h>360.0) h = 360.0; if (h<0.0) h = 0.0; return gravis::tRGBA(hsv2rgba(h,(T) 1,(T) 1,a)); } template gravis::tRGBA hsvBlueToRed(T dist, T min = 0.0, T max = 1.0, T a = 1.0) { T d = (dist-min)/(max-min); if (d<0.0) d = 0.0; if (d>1.0) d = 1.0; T h = 240 - (240.0 * d); return gravis::tRGBA(hsv2rgba(h,T(1), T(1), a)); } template gravis::tRGBA hsvGreenToRed(T dist, T min = 0.0, T max = 1.0, T a = 1.0) { T d = (dist-min)/(max-min); if (d<0.0) d = 0.0; if (d>1.0) d = 1.0; T h = (240.0 * d + 120); return gravis::tRGBA(hsv2rgba(h,T(1), T(1), a)); } #endif relion-3.1.3/src/jaz/gravis/gravis/000077500000000000000000000000001411340063500171445ustar00rootroot00000000000000relion-3.1.3/src/jaz/gravis/gravis/.svn/000077500000000000000000000000001411340063500200305ustar00rootroot00000000000000relion-3.1.3/src/jaz/gravis/gravis/.svn/all-wcprops000066400000000000000000000001511411340063500222130ustar00rootroot00000000000000K 25 svn:wc:ra_dav:version-url V 64 /repos/gravis/!svn/ver/22136/libs/libGravis/trunk/include/gravis END relion-3.1.3/src/jaz/gravis/gravis/.svn/entries000066400000000000000000000003501411340063500214220ustar00rootroot0000000000000010 dir 23800 https://svn.cs.unibas.ch:443/repos/gravis/libs/libGravis/trunk/include/gravis https://svn.cs.unibas.ch:443/repos/gravis 2012-06-20T09:25:07.220971Z 22136 forster b127c190-6edf-0310-8e64-ec95285ab742 relion-3.1.3/src/jaz/gravis/io/000077500000000000000000000000001411340063500162605ustar00rootroot00000000000000relion-3.1.3/src/jaz/gravis/io/.svn/000077500000000000000000000000001411340063500171445ustar00rootroot00000000000000relion-3.1.3/src/jaz/gravis/io/.svn/all-wcprops000066400000000000000000000007201411340063500213310ustar00rootroot00000000000000K 25 svn:wc:ra_dav:version-url V 60 /repos/gravis/!svn/ver/22702/libs/libGravis/trunk/include/io END CMakeLists.txt K 25 svn:wc:ra_dav:version-url V 75 /repos/gravis/!svn/ver/22306/libs/libGravis/trunk/include/io/CMakeLists.txt END mesh.h K 25 svn:wc:ra_dav:version-url V 67 /repos/gravis/!svn/ver/22187/libs/libGravis/trunk/include/io/mesh.h END array.h K 25 svn:wc:ra_dav:version-url V 68 /repos/gravis/!svn/ver/22670/libs/libGravis/trunk/include/io/array.h END relion-3.1.3/src/jaz/gravis/io/.svn/entries000066400000000000000000000013041411340063500205360ustar00rootroot0000000000000010 dir 23800 https://svn.cs.unibas.ch:443/repos/gravis/libs/libGravis/trunk/include/io https://svn.cs.unibas.ch:443/repos/gravis 2012-11-10T07:22:40.381365Z 22702 sandro b127c190-6edf-0310-8e64-ec95285ab742 mesh dir CMakeLists.txt file 2013-11-05T12:28:36.343653Z 269f8cacf6c889b7fa8858b7b811c747 2012-07-17T14:56:31.209755Z 22306 sandro 254 array dir image dir mesh.h file 2012-06-29T11:51:38.365096Z d5e8508f6cb1107a8672737187e5c051 2012-06-28T11:28:09.901604Z 22187 sandro 3951 array.h file 2013-11-05T12:28:36.423655Z 246b56809f7fa40317235a5cf1cc5404 2012-11-05T16:14:58.915140Z 22670 sandro 4596 relion-3.1.3/src/jaz/gravis/io/.svn/text-base/000077500000000000000000000000001411340063500210405ustar00rootroot00000000000000relion-3.1.3/src/jaz/gravis/io/.svn/text-base/CMakeLists.txt.svn-base000066400000000000000000000003761411340063500253230ustar00rootroot00000000000000add_subdirectory(array) add_subdirectory(image) add_subdirectory(mesh) set( install_files mesh.h array.h ) INSTALL(FILES ${install_files} DESTINATION ${CMAKE_INSTALL_PREFIX}/include/${LIBTITLE}-${LIBVERSION}/${LIBTITLE}/io) relion-3.1.3/src/jaz/gravis/io/.svn/text-base/array.h.svn-base000066400000000000000000000107641411340063500240540ustar00rootroot00000000000000/****************************************************************************** ** Title: gravis/io/array.h ** Description: Implements reader/writer for different array file formats. ** ** Author: Sandro Schoenborn ** Computer Science Department, University Basel (CH) ** ******************************************************************************/ #ifndef __GRAVIS_IO_ARRAY__ #define __GRAVIS_IO_ARRAY__ #include "array/a.h" #include "array/ArrayStreamIO.h" #include "array/le.h" #include #include #include #include namespace gravis { namespace io { /** * The main functionality of this class is to forward the commands to * ArrayA, ArrayLittleEndian, ArrayStreamIO based on the file contents. * **/ class Array { private: static inline bool has_ending(const std::string& filename, const std::string& ending) { return boost::algorithm::ends_with( filename, ending ); } static inline bool is_readable_format( std::string const& filename ) { if ( has_ending( filename, ".gz" ) ) { // compressed format -- so far only readable in this format return true; } else { // uncompressed format std::ifstream ifIn( filename.c_str() ); std::string strLine; std::getline( ifIn, strLine ); if ( strLine.empty() ) return false; boost::algorithm::erase_all( strLine, " " ); size_t nP1 = strLine.find("[0]"); // first index is alway 0 if ( nP1 == 0 ) return true; else return false; } } static inline bool is_header_format( std::string const& filename ) { std::ifstream ifIn( filename.c_str(), std::ios_base::binary ); std::string strMagic = ArrayA::magic(); std::vector vCheck( strMagic.size() ); if ( ifIn.read( &vCheck[0], vCheck.size() ) ) { std::string strCheck( vCheck.begin(), vCheck.end() ); if ( strCheck == strMagic ) return true; else return false; } else return false; } public: /** * Load vector from a file. The filetype is determined automatically **/ template static inline std::vector load( const std::string& filename ) { std::vector vec; load( vec, filename ); return std::move(vec); } /** * Load vector from a file. The filetype is determined automatically **/ template static inline void load( std::vector& out, const std::string& filename, size_t count = 0 ) { if ( is_readable_format( filename ) ) ArrayStreamIO::load( out, filename ); else if ( is_header_format( filename ) ) ArrayA::load( out, filename ); else { tArray tout; ArrayLittleEndian::load( tout, filename, count ); out = std::vector( tout ); } } /** * save array to a file, standard format is ArrayStreamIO (readable format) **/ template static inline void save(const std::string& filename, const std::vector& array ) { ArrayStreamIO::save( filename, array ); } /** * Load vector from a file. The filetype is determined automatically **/ template static inline void load( gravis::tArray& out, const std::string& filename, size_t count = 0 ) { if ( is_readable_format( filename ) ) ArrayStreamIO::load( out, filename ); else if ( is_header_format( filename ) ) ArrayA::load( out, filename ); else ArrayLittleEndian::load( out, filename, count ); } /** * save array to a file, standard format is ArrayStreamIO (readable format) **/ template static inline void save(const std::string& filename, const gravis::tArray& array ) { ArrayStreamIO::save( filename, array ); } }; } } #endif relion-3.1.3/src/jaz/gravis/io/.svn/text-base/mesh.h.svn-base000066400000000000000000000075571411340063500237000ustar00rootroot00000000000000/****************************************************************************** ** Title: gravis/io/mesh/obj.h ** Description: Implements reader/writer for different mesh file formats. ** ** Author: Brian Amberg ** Computer Science Department, University Basel (CH) ** ******************************************************************************/ #ifndef __GRAVIS_IO_MESH__ #define __GRAVIS_IO_MESH__ #include "mesh/obj.h" #include "mesh/msh.h" namespace gravis { namespace io { /** * The main functionality of this class is to forward the commands to * MeshMSH and MeshOBJ based on the file ending. * * Using this instead of MeshMSH and MeshOBJ makes your program a lot more * flexible. **/ class Mesh { private: static inline bool has_ending(const std::string& filename, const std::string& ending) { return (filename.size() >= ending.size()) && (filename.substr(filename.size() - ending.size()) == ending); } static inline bool is_obj(const std::string& filename) { return (has_ending(filename, ".obj") || has_ending(filename, ".obj.gz")); } static inline bool is_msh(const std::string& filename) { return (has_ending(filename, ".msh") || has_ending(filename, ".msh.gz")); } public: /** * Load mesh from a file. The filetype is determined automatically **/ static inline void load(gravis::Mesh& out, const std::string& filename) { if (is_msh(filename)) MeshMSH::load(out, filename); else MeshOBJ::load(out, filename); } /** * Load mesh from a file. The filetype is determined automatically **/ static inline void load(gravis::fMesh& out, const std::string& filename) { if (is_msh(filename)) MeshMSH::load(out, filename); else { MeshOBJ::load(out, filename); } } /** * save mesh to a file. The filetype is determined from the ending **/ static inline void save(const std::string& filename, const gravis::Mesh& mesh) { if (is_msh(filename)) MeshMSH::save(filename, mesh); else MeshOBJ::save(filename, mesh); } /** * save mesh to a file. The filetype is determined from the ending **/ static inline void save(const std::string& filename, const gravis::fMesh& mesh) { if (is_msh(filename)) MeshMSH::save(filename, mesh); else MeshOBJ::save(filename, mesh); } /** * save mesh to a file. The filetype is determined from the ending. * Any texture files are copied into the same directory as the output * file unless they already exist. **/ static inline void save_complete(const std::string& filename, const gravis::Mesh& mesh) { if (is_msh(filename)) MeshMSH::save_complete(filename, mesh); else MeshOBJ::save_complete(filename, mesh); } /** * save mesh to a file. The filetype is determined from the ending. * Any texture files are copied into the same directory as the output * file unless they already exist. **/ static inline void save_complete(const std::string& filename, const gravis::fMesh& mesh) { if (is_msh(filename)) MeshMSH::save_complete(filename, mesh); else MeshOBJ::save_complete(filename, mesh); } }; } } #endif relion-3.1.3/src/jaz/gravis/io/CMakeLists.txt000066400000000000000000000003761411340063500210260ustar00rootroot00000000000000add_subdirectory(array) add_subdirectory(image) add_subdirectory(mesh) set( install_files mesh.h array.h ) INSTALL(FILES ${install_files} DESTINATION ${CMAKE_INSTALL_PREFIX}/include/${LIBTITLE}-${LIBVERSION}/${LIBTITLE}/io) relion-3.1.3/src/jaz/gravis/io/array.h000066400000000000000000000107641411340063500175570ustar00rootroot00000000000000/****************************************************************************** ** Title: gravis/io/array.h ** Description: Implements reader/writer for different array file formats. ** ** Author: Sandro Schoenborn ** Computer Science Department, University Basel (CH) ** ******************************************************************************/ #ifndef __GRAVIS_IO_ARRAY__ #define __GRAVIS_IO_ARRAY__ #include "array/a.h" #include "array/ArrayStreamIO.h" #include "array/le.h" #include #include #include #include namespace gravis { namespace io { /** * The main functionality of this class is to forward the commands to * ArrayA, ArrayLittleEndian, ArrayStreamIO based on the file contents. * **/ class Array { private: static inline bool has_ending(const std::string& filename, const std::string& ending) { return boost::algorithm::ends_with( filename, ending ); } static inline bool is_readable_format( std::string const& filename ) { if ( has_ending( filename, ".gz" ) ) { // compressed format -- so far only readable in this format return true; } else { // uncompressed format std::ifstream ifIn( filename.c_str() ); std::string strLine; std::getline( ifIn, strLine ); if ( strLine.empty() ) return false; boost::algorithm::erase_all( strLine, " " ); size_t nP1 = strLine.find("[0]"); // first index is alway 0 if ( nP1 == 0 ) return true; else return false; } } static inline bool is_header_format( std::string const& filename ) { std::ifstream ifIn( filename.c_str(), std::ios_base::binary ); std::string strMagic = ArrayA::magic(); std::vector vCheck( strMagic.size() ); if ( ifIn.read( &vCheck[0], vCheck.size() ) ) { std::string strCheck( vCheck.begin(), vCheck.end() ); if ( strCheck == strMagic ) return true; else return false; } else return false; } public: /** * Load vector from a file. The filetype is determined automatically **/ template static inline std::vector load( const std::string& filename ) { std::vector vec; load( vec, filename ); return std::move(vec); } /** * Load vector from a file. The filetype is determined automatically **/ template static inline void load( std::vector& out, const std::string& filename, size_t count = 0 ) { if ( is_readable_format( filename ) ) ArrayStreamIO::load( out, filename ); else if ( is_header_format( filename ) ) ArrayA::load( out, filename ); else { tArray tout; ArrayLittleEndian::load( tout, filename, count ); out = std::vector( tout ); } } /** * save array to a file, standard format is ArrayStreamIO (readable format) **/ template static inline void save(const std::string& filename, const std::vector& array ) { ArrayStreamIO::save( filename, array ); } /** * Load vector from a file. The filetype is determined automatically **/ template static inline void load( gravis::tArray& out, const std::string& filename, size_t count = 0 ) { if ( is_readable_format( filename ) ) ArrayStreamIO::load( out, filename ); else if ( is_header_format( filename ) ) ArrayA::load( out, filename ); else ArrayLittleEndian::load( out, filename, count ); } /** * save array to a file, standard format is ArrayStreamIO (readable format) **/ template static inline void save(const std::string& filename, const gravis::tArray& array ) { ArrayStreamIO::save( filename, array ); } }; } } #endif relion-3.1.3/src/jaz/gravis/io/array/000077500000000000000000000000001411340063500173765ustar00rootroot00000000000000relion-3.1.3/src/jaz/gravis/io/array/.svn/000077500000000000000000000000001411340063500202625ustar00rootroot00000000000000relion-3.1.3/src/jaz/gravis/io/array/.svn/all-wcprops000066400000000000000000000017541411340063500224570ustar00rootroot00000000000000K 25 svn:wc:ra_dav:version-url V 66 /repos/gravis/!svn/ver/22702/libs/libGravis/trunk/include/io/array END ArrayStreamIO.h K 25 svn:wc:ra_dav:version-url V 82 /repos/gravis/!svn/ver/22702/libs/libGravis/trunk/include/io/array/ArrayStreamIO.h END CMakeLists.txt K 25 svn:wc:ra_dav:version-url V 81 /repos/gravis/!svn/ver/21790/libs/libGravis/trunk/include/io/array/CMakeLists.txt END networkByteOrder.h K 25 svn:wc:ra_dav:version-url V 85 /repos/gravis/!svn/ver/22187/libs/libGravis/trunk/include/io/array/networkByteOrder.h END raw.h K 25 svn:wc:ra_dav:version-url V 72 /repos/gravis/!svn/ver/22187/libs/libGravis/trunk/include/io/array/raw.h END a.h K 25 svn:wc:ra_dav:version-url V 70 /repos/gravis/!svn/ver/22306/libs/libGravis/trunk/include/io/array/a.h END le.h K 25 svn:wc:ra_dav:version-url V 71 /repos/gravis/!svn/ver/22187/libs/libGravis/trunk/include/io/array/le.h END matlab.h K 25 svn:wc:ra_dav:version-url V 75 /repos/gravis/!svn/ver/22187/libs/libGravis/trunk/include/io/array/matlab.h END relion-3.1.3/src/jaz/gravis/io/array/.svn/entries000066400000000000000000000024011411340063500216530ustar00rootroot0000000000000010 dir 23800 https://svn.cs.unibas.ch:443/repos/gravis/libs/libGravis/trunk/include/io/array https://svn.cs.unibas.ch:443/repos/gravis 2012-11-10T07:22:40.381365Z 22702 sandro b127c190-6edf-0310-8e64-ec95285ab742 networkByteOrder.h file 2012-06-29T11:51:38.262096Z aa7d001ad0656cb063cd78e1a9822f30 2012-06-28T11:28:09.901604Z 22187 sandro 2890 raw.h file 2012-06-29T11:51:38.272096Z ed3bb3c1a17fdcc6ee1cef7e63c7ac4e 2012-06-28T11:28:09.901604Z 22187 sandro 1745 a.h file 2013-11-05T12:28:35.935646Z 81e10a3ef376d80f6c7fbd0a6f78285b 2012-07-17T14:56:31.209755Z 22306 sandro 9548 le.h file 2012-06-29T11:51:38.287096Z 7f7f2308314616c431bf9de2280ffe64 2012-06-28T11:28:09.901604Z 22187 sandro 2340 matlab.h file 2012-06-29T11:51:38.295096Z 8fc0c7ca2f698e19c6a447950bb8f7d7 2012-06-28T11:28:09.901604Z 22187 sandro 12376 ArrayStreamIO.h file 2013-11-05T12:28:36.011648Z c5f654b91f95de522c46d190768d3024 2012-11-10T07:22:40.381365Z 22702 sandro 7109 CMakeLists.txt file 2012-06-29T11:51:38.309096Z 26bc4410ba3d4626f9c4683c0b2c4985 2012-03-09T15:30:47.393511Z 21790 forster 247 relion-3.1.3/src/jaz/gravis/io/array/.svn/text-base/000077500000000000000000000000001411340063500221565ustar00rootroot00000000000000relion-3.1.3/src/jaz/gravis/io/array/.svn/text-base/ArrayStreamIO.h.svn-base000066400000000000000000000157051411340063500265360ustar00rootroot00000000000000/*============================================================================*/ /** * @file ClearTextIO.h * * @brief Gravis Arrays with >> and << reader and writer * * @date 06/30/2011 02:18:07 PM * @authors Sandro Schoenborn (ses)\n * sandro.schoenborn@unibas.ch\n * University of Basel, Switzerland */ /*============================================================================*/ #ifndef ARRAYSTREAMIO_INC #define ARRAYSTREAMIO_INC #include "../../tArray.h" #include #include #include #include #include #include #include #include #include #include #include namespace gravis { /** Stream output operator for vectors, each index is explicitely written */ template std::ostream& operator<< ( std::ostream& ostr, std::vector const& in ) { //ostr << std::scientific; ostr.precision(17); // 16 might be just enough for ( size_t i = 0; i < in.size(); i++ ) ostr << "[" << i << "]=" << in[i] << "\n"; return ostr; } /** Stream input operator for vectors, each index is explicitely expected, format: "[i]=value\n", * if \param in already has a size, it is only filled, not enlarged! */ template std::istream& operator>> ( std::istream& istr, std::vector& in ) { std::string strLine; size_t nArrPos = 0; size_t nMaxSize = std::numeric_limits::max(); if ( !in.empty() ) { nMaxSize = in.size(); nArrPos = in.size(); } while ( std::getline( istr, strLine ) && nArrPos <= nMaxSize ) { size_t nPos = 0; T tCont; // Do not parse empty lines if ( strLine.empty() ) continue; // expect format: [index]=value size_t nEq = strLine.find('='); size_t nB1 = strLine.find('['); size_t nB2 = strLine.find(']'); if ( nEq == std::string::npos || nB1 == std::string::npos || nB2 == std::string::npos || nEq < nB1 || nEq < nB2 || nB2 < nB1 ) throw std::runtime_error("ArrayStreamIO: format is invalid! expect: \"[index]=value\" per line"); std::string strIndex = strLine.substr( nB1+1, nB2-nB1-1 ); boost::algorithm::trim( strIndex ); std::string strContent = strLine.substr( nEq+1 ); // Index number std::stringstream( strIndex ) >> nPos; // Content std::stringstream( strContent ) >> tCont; if ( nPos == nArrPos ) { in.push_back( tCont ); // deque push_back should not be too costly } else if ( nPos < nArrPos ) { in[nPos] = tCont; } else { //std::cout << "Larger index, resizing to " << nPos << std::endl; in.resize( nPos+1 ); in[nPos] = tCont; } nArrPos = in.size(); } if ( in.size() > nMaxSize && nMaxSize < std::numeric_limits::max() ) in.resize( nMaxSize ); return istr; } namespace io { /** Provides vector (and tArray) IO via streams, allows for gzip compression */ class ArrayStreamIO { public: /** Load an array (vector) from a file, using stream operator>>, able to decompress gzip files (filename ends with gz)*/ template static void load( std::vector& out, std::string const& filename); /** Save an array (vector) to a file, using stream operator<<, able to compress gzip files (filename ends with gz)*/ template static void save( std::string const& filename, std::vector const& in, bool compression = false ); /** Load an array (tArray) from a file, using stream operator>>, able to decompress gzip files (filename ends with gz)*/ template static void load( tArray& out, std::string const& filename); /** Save an array (tArray) to a file, using stream operator<<, able to compress gzip files (filename ends with gz)*/ template static void save( std::string const& filename, tArray const& in, bool compression = false ); }; // ------ Implementation ------ template void ArrayStreamIO::load(std::vector& out, std::string const& filename ) { if ( boost::algorithm::ends_with( filename, "gz" ) ) { // compressed file read with gzip filter std::ifstream ifInput( filename.c_str(), std::ios_base::in | std::ios_base::binary ); if (!ifInput) throw std::runtime_error("Cannot open '" + filename + "'!"); boost::iostreams::filtering_streambuf bufIn; bufIn.push( boost::iostreams::gzip_decompressor() ); bufIn.push( ifInput ); std::stringstream ssData; boost::iostreams::copy( bufIn, ssData ); ssData >> out; } else { // normal readable file std::ifstream ifInput( filename.c_str(), std::ios_base::in ); if (!ifInput) throw std::runtime_error("Cannot open '" + filename + "'!"); ifInput >> out; } } template void ArrayStreamIO::save(std::string const& filename, std::vector const& in, bool compression ) { // if (!count) count = in.size(); if ( compression || boost::algorithm::ends_with( filename, "gz" ) ) { std::string strFile( filename ); if ( !boost::algorithm::ends_with( strFile, "gz" ) ) strFile += ".gz"; std::ofstream ofOut( strFile.c_str(), std::ios_base::out | std::ios_base::binary ); if ( !ofOut ) throw std::runtime_error( "Could not open file for writing: " + strFile ); boost::iostreams::filtering_streambuf bufOut; bufOut.push( boost::iostreams::gzip_compressor() ); // write a gzip compressed file std::stringstream ssData; ssData << in; // via stream operator bufOut.push( ssData ); boost::iostreams::copy( bufOut, ofOut ); } else { std::ofstream ofOut( filename.c_str(), std::ios_base::out ); if ( !ofOut ) throw std::runtime_error( "Could not open file for writing: " + filename ); ofOut << in; // via stream operator } } // tArray compatibility stuff template void ArrayStreamIO::load(tArray& out, std::string const& filename ) { std::ifstream ifInput( filename.c_str(), std::ios_base::in ); if (!ifInput) { throw std::runtime_error("Cannot open '" + filename + "'!"); } std::vector vOut( out.size() ); load( vOut, filename ); out = tArray( vOut ); } template void ArrayStreamIO::save(std::string const& filename, tArray const& in, bool compression ) { std::vector vArr( in ); save( filename, vArr, compression ); } } } #endif // ----- #ifndef ARRAYSTREAMIO_INC ----- relion-3.1.3/src/jaz/gravis/io/array/.svn/text-base/CMakeLists.txt.svn-base000066400000000000000000000003671411340063500264410ustar00rootroot00000000000000set( install_files a.h ArrayStreamIO.h le.h matlab.h networkByteOrder.h raw.h ) INSTALL(FILES ${install_files} DESTINATION ${CMAKE_INSTALL_PREFIX}/include/${LIBTITLE}-${LIBVERSION}/${LIBTITLE}/io/array) relion-3.1.3/src/jaz/gravis/io/array/.svn/text-base/a.h.svn-base000066400000000000000000000225141411340063500242700ustar00rootroot00000000000000/****************************************************************************** ** Title: io/array/a.h ** Description: Input/Output for tArrays. ** ** Author: Brian Amberg, 2006 ** Computer Science Department, University Basel (CH) ** ******************************************************************************/ #ifndef __GRAVIS_IO_ARRAY_A__ #define __GRAVIS_IO_ARRAY_A__ #include "../../tArray.h" #include "../../Exception.h" #include #include #include #include #include "../../Tuple.h" #include "../../t2Vector.h" #include "../../t3Vector.h" #include "../../t4Vector.h" #include "../../StringFormat.h" namespace gravis { namespace io { /** This should be in ArrayA in the private section, but that is not allowed **/ namespace ArrayA_private { template struct TName { static std::string name() { return "UNKNOWN"; } }; template <> struct TName { static std::string name() { return "float"; } }; template <> struct TName { static std::string name() { return "double"; } }; template <> struct TName { static std::string name() { return "char"; } }; template <> struct TName { static std::string name() { return "int8"; } }; template <> struct TName { static std::string name() { return "int16"; } }; template <> struct TName { static std::string name() { return "int32"; } }; template <> struct TName { static std::string name() { return "int64"; } }; template <> struct TName { static std::string name() { return "uint8"; } }; template <> struct TName { static std::string name() { return "uint16"; } }; template <> struct TName { static std::string name() { return "uint32"; } }; template <> struct TName { static std::string name() { return "uint64"; } }; template <> struct TName { static std::string name() { return "uint32"; } }; template <> struct TName { static std::string name() { return "uint64"; } }; template struct TName > { static std::string name() { return std::string("vector2:") + TName::name(); } }; template struct TName > { static std::string name() { return std::string("vector3:") + TName::name(); } }; template struct TName > { static std::string name() { return std::string("vector4:") + TName::name(); } }; } /** * Simple Array format with a header describing the array type **/ class ArrayA { private: static const uint8_t version = 1; public: static const std::string magic() { return "GVS:ARR"; }; public: struct ArrayHeader { char magic[8]; // 0 char type[16]; // 8 uint16_t type_size; // 24 uint32_t length; // 26 uint8_t version; // 30 char reserved1[1]; // 31 char reserved2[32]; // 32 }; /** * Load an array from an array file. Type checking is done for a subset * of supported types. More types can be added by changing the private * part of this class. Unknown types are saved and loaded, but not type checked. **/ template static bool is_a(const std::string& filename) { std::ifstream is(filename.c_str(), std::ios_base::binary); ArrayHeader h; is.read((char*)(&h), sizeof(h)); if (!is.good()) GRAVIS_THROW3(::gravis::Exception, "Could not read file", filename); if (std::string(h.magic) != magic()) GRAVIS_THROW3(::gravis::Exception, "Not a gravis array file", filename); if (h.version > version) GRAVIS_THROW3(::gravis::Exception, "Can't read this gravis array version", filename); if (sizeof(T) != h.type_size) return false; if (ArrayA_private::TName::name() != h.type) return false; return true; } /** * Load an array from an array file. Type checking is done for a subset * of supported types. More types can be added by changing the private * part of this class. Unknown types are saved and loaded, but not type checked. **/ template static void load(std::vector &out, const std::string& filename) { std::ifstream is(filename.c_str(), std::ios_base::binary); ArrayHeader h; is.read((char*)(&h), sizeof(h)); if (!is.good()) GRAVIS_THROW3(::gravis::Exception, "Could not read file", filename); if (std::string(h.magic) != magic()) GRAVIS_THROW3(::gravis::Exception, "Not a gravis array file", filename); if (h.version > version) GRAVIS_THROW3(::gravis::Exception, "Can't read this gravis array version", filename); if ((ArrayA_private::TName::name() != h.type) || (sizeof(T) != h.type_size)) GRAVIS_THROW3(::gravis::Exception, ::gravis::StringFormat("Wrong type in array. Expected ")(ArrayA_private::TName::name())(" of size ")(sizeof(T))(" but got ")(h.type)(" of size ")(h.type_size), filename); out.resize(h.length); is.read((char*)(&out[0]), sizeof(out[0]) * h.length); } /** * Load an array from an array file. Type checking is done for a subset * of supported types. More types can be added by changing the private * part of this class. Unknown types are saved and loaded, but not type checked. **/ template static void load(tArray &out, const std::string& filename) { std::ifstream is(filename.c_str(), std::ios_base::binary); ArrayHeader h; is.read((char*)(&h), sizeof(h)); if (!is.good()) GRAVIS_THROW3(::gravis::Exception, "Could not read file", filename); if (std::string(h.magic) != magic()) GRAVIS_THROW3(::gravis::Exception, "Not a gravis array file", filename); if (h.version > version) GRAVIS_THROW3(::gravis::Exception, "Can't read this gravis array version", filename); if ((ArrayA_private::TName::name() != h.type) || (sizeof(T) != h.type_size)) GRAVIS_THROW3(::gravis::Exception, ::gravis::StringFormat("Wrong type in array. Expected ")(ArrayA_private::TName::name())(" of size ")(sizeof(T))(" but got ")(h.type)(" of size ")(h.type_size), filename); out.resize(h.length); is.read((char*)(&out[0]), sizeof(out[0]) * h.length); } /** * Save an array to an array file. Type checking is done for a subset * of supported types. More types can be added by changing the private * part of this class **/ template static void save(const std::string& filename, const tArray &in) { std::ofstream os(filename.c_str(), std::ios_base::binary); ArrayHeader h; std::memset((char*)(&h), 0x0, sizeof(h)); std::memcpy(h.magic, magic().data(), magic().size()); std::memcpy(h.type, ArrayA_private::TName::name().data(), ArrayA_private::TName::name().size()); h.type_size = sizeof(T); h.length = in.size(); h.version = version; os.write((char*)(&h), sizeof(h)); os.write((char*)(&in[0]), sizeof(T) * in.size()); } /** * Save an array to an array file. Type checking is done for a subset * of supported types. More types can be added by changing the private * part of this class **/ template static void save(const std::string& filename, const std::vector &in) { std::ofstream os(filename.c_str(), std::ios_base::binary); ArrayHeader h; std::memset((char*)(&h), 0x0, sizeof(h)); std::memcpy(h.magic, magic().data(), magic().size()); std::memcpy(h.type, ArrayA_private::TName::name().data(), ArrayA_private::TName::name().size()); h.type_size = sizeof(T); h.length = in.size(); h.version = version; os.write((char*)(&h), sizeof(h)); os.write((char*)(&in[0]), sizeof(T) * in.size()); } }; } } #endif relion-3.1.3/src/jaz/gravis/io/array/.svn/text-base/le.h.svn-base000066400000000000000000000044441411340063500244520ustar00rootroot00000000000000/****************************************************************************** ** Title: tArrayIO.h ** Description: Input/Output for tArrays. ** ** Author: Michael Keller, 2006 ** Computer Science Department, University Basel (CH) ** ******************************************************************************/ #ifndef _T_LE_H_ #define _T_LE_H_ #include #include #include #include namespace gravis { namespace io { class ArrayLittleEndian { public: template static tArray load(std::string filename, size_t count=0); template static void load(tArray& out, std::string filename, size_t count=0); template static void save(std::string filename, const tArray in, size_t count=0); }; // end class template tArray ArrayLittleEndian::load(std::string filename, size_t count) { tArray out; load(out,filename,count); return out; } template void ArrayLittleEndian::load(tArray& out, std::string filename, size_t count) { FILE* in = fopen(filename.c_str(), "rb"); if (!in) { throw std::runtime_error("Cannot open '" + filename + "'!"); } // determine size if (!count) { fseek(in, 0, SEEK_END); count = (int)(ftell(in)/sizeof(T)); fseek(in, 0, SEEK_SET); } // write out.setSize(count); if (fread(out.data(), sizeof(T), count, in) != count) { fclose(in); out.setSize(0); throw std::runtime_error("Error reading '" + filename + "'!"); } fclose(in); } template void ArrayLittleEndian::save(std::string filename, const tArray in, size_t count) { if (!count) count = in.size(); FILE* out = fopen(filename.c_str(), "wb"); if (!out) { throw std::runtime_error("Cannot open '" + filename + "'!"); } if (fwrite(in.data(), sizeof(T), count, out) != count) { fclose(out); unlink(filename.c_str()); throw std::runtime_error("Error writing '" + filename + "'!"); } fclose(out); } } // end namespace } // end namespace #endif relion-3.1.3/src/jaz/gravis/io/array/.svn/text-base/matlab.h.svn-base000066400000000000000000000301301411340063500253010ustar00rootroot00000000000000/****************************************************************************** ** Title: matlab.h ** Description: Read and write tArrays to/from matlab files. ** ** Author: Brian Amberg, 2007 ** Computer Science Department, University Basel (CH) ** ** Linking: ** When using these functions you should link with the following commandline: ** ** -I$(MATLAB)/extern/include -L$(MATLAB)/bin/em64 -L$(MATLAB)/bin/glnx86 \ ** -lmat -lmx -lut -licui18n -licuio -licuuc -licudata -lhdf5 \ ** -D__GRAVIS__MATLAB__ ** ** And make shure that $(MATLAB)/bin/glnx86 is in your LD_LIBRARY_PATH ** ******************************************************************************/ #ifndef __GRAVIS__IO__ARRAY__MATLAB__ #define __GRAVIS__IO__ARRAY__MATLAB__ #include #include #ifdef __GRAVIS__MATLAB__ #include #include #include #include "../../StringFormat.h" #include "../../tRGB.h" #include "../../tRGBA.h" #include "../../t2Vector.h" #include "../../t3Vector.h" #include "../../t4Vector.h" #include "../../Tuple.h" #include "../matlabFileIO.h" namespace gravis { namespace io { class ArrayMatlab: public MatlabFileIO { private: // Setup knowledge about how to make an tArray out of this mxArray type #define DEFINE_MX_ARRAY_CONVERSION(type, classid) \ static inline void fillArray(tArray &a, const mxArray *mx) \ { const size_t channels=sizeof(a[0])/sizeof(type); \ if (mxGetClassID(mx) != classid) GRAVIS_THROW3(gravis::Exception, "Incompatible datatype", StringFormat("Found ")(mxGetClassName(mx))(" and expected ")( #classid )); \ if (mxGetM(mx) != channels) GRAVIS_THROW3(gravis::Exception, "Array has the wrong number of rows.", StringFormat(mxGetM(mx))(" found and ")(channels)(" expected")); \ a.resize(mxGetN(mx)); memcpy(&a[0], mxGetPr(mx), channels*sizeof(type) * a.size()); } \ static inline void fillArray(tArray< t2Vector< type > > &a, const mxArray *mx) \ { const size_t channels=sizeof(a[0])/sizeof(type); \ if (mxGetClassID(mx) != classid) GRAVIS_THROW3(gravis::Exception, "Incompatible datatype", StringFormat("Found ")(mxGetClassName(mx))(" and expected ")( #classid )); \ if (mxGetM(mx) != channels) GRAVIS_THROW3(gravis::Exception, "Array has the wrong number of rows.", StringFormat(mxGetM(mx))(" found and ")(channels)(" expected")); \ a.resize(mxGetN(mx)); memcpy(&a[0], mxGetPr(mx), channels*sizeof(type) * a.size()); } \ static inline void fillArray(tArray< t3Vector< type > > &a, const mxArray *mx) \ { const size_t channels=sizeof(a[0])/sizeof(type); \ if (mxGetClassID(mx) != classid) GRAVIS_THROW3(gravis::Exception, "Incompatible datatype", StringFormat("Found ")(mxGetClassName(mx))(" and expected ")( #classid )); \ if (mxGetM(mx) != channels) GRAVIS_THROW3(gravis::Exception, "Array has the wrong number of rows.", StringFormat(mxGetM(mx))(" found and ")(channels)(" expected")); \ a.resize(mxGetN(mx)); memcpy(&a[0], mxGetPr(mx), channels*sizeof(type) * a.size()); } \ static inline void fillArray(tArray< t4Vector< type > > &a, const mxArray *mx) \ { const size_t channels=sizeof(a[0])/sizeof(type); \ if (mxGetClassID(mx) != classid) GRAVIS_THROW3(gravis::Exception, "Incompatible datatype", StringFormat("Found ")(mxGetClassName(mx))(" and expected ")( #classid )); \ if (mxGetM(mx) != channels) GRAVIS_THROW3(gravis::Exception, "Array has the wrong number of rows.", StringFormat(mxGetM(mx))(" found and ")(channels)(" expected")); \ a.resize(mxGetN(mx)); memcpy(&a[0], mxGetPr(mx), channels*sizeof(type) * a.size()); } \ static inline void fillArray(tArray< tRGB< type > > &a, const mxArray *mx) \ { const size_t channels=sizeof(a[0])/sizeof(type); \ if (mxGetClassID(mx) != classid) GRAVIS_THROW3(gravis::Exception, "Incompatible datatype", StringFormat("Found ")(mxGetClassName(mx))(" and expected ")( #classid )); \ if (mxGetM(mx) != channels) GRAVIS_THROW3(gravis::Exception, "Array has the wrong number of rows.", StringFormat(mxGetM(mx))(" found and ")(channels)(" expected")); \ a.resize(mxGetN(mx)); memcpy(&a[0], mxGetPr(mx), channels*sizeof(type) * a.size()); } \ static inline void fillArray(tArray< tRGBA< type > > &a, const mxArray *mx) \ { const size_t channels=sizeof(a[0])/sizeof(type); \ if (mxGetClassID(mx) != classid) GRAVIS_THROW3(gravis::Exception, "Incompatible datatype", StringFormat("Found ")(mxGetClassName(mx))(" and expected ")( #classid )); \ if (mxGetM(mx) != channels) GRAVIS_THROW3(gravis::Exception, "Array has the wrong number of rows.", StringFormat(mxGetM(mx))(" found and ")(channels)(" expected")); \ a.resize(mxGetN(mx)); memcpy(&a[0], mxGetPr(mx), channels*sizeof(type) * a.size()); } DEFINE_MX_ARRAY_CONVERSION(float, mxSINGLE_CLASS); DEFINE_MX_ARRAY_CONVERSION(double, mxDOUBLE_CLASS); DEFINE_MX_ARRAY_CONVERSION(int8_t, mxINT8_CLASS); DEFINE_MX_ARRAY_CONVERSION(int16_t, mxINT16_CLASS); DEFINE_MX_ARRAY_CONVERSION(int32_t, mxINT32_CLASS); DEFINE_MX_ARRAY_CONVERSION(int64_t, mxINT64_CLASS); DEFINE_MX_ARRAY_CONVERSION(uint8_t, mxUINT8_CLASS); DEFINE_MX_ARRAY_CONVERSION(uint16_t, mxUINT16_CLASS); DEFINE_MX_ARRAY_CONVERSION(uint32_t, mxUINT32_CLASS); DEFINE_MX_ARRAY_CONVERSION(uint64_t, mxUINT64_CLASS); #undef DEFINE_MX_ARRAY_CONVERSION // Setup knowledge about how to make an mxArray out of the gravis datatypes #define DEFINE_MX_ARRAY_CONVERSION(type, classid) \ static inline mxArray *mxFromArray(const tArray &a) { mxArray *mx = mxCreateNumericMatrix(1, a.size(), classid, mxREAL); memcpy(mxGetPr(mx), &a[0], sizeof(type) * a.size()); return mx; } \ static inline mxArray *mxFromArray(const tArray< t2Vector< type > > &a) { mxArray *mx = mxCreateNumericMatrix(2, a.size(), classid, mxREAL); memcpy(mxGetPr(mx), &a[0], 2*sizeof(type) * a.size()); return mx; } \ static inline mxArray *mxFromArray(const tArray< t3Vector< type > > &a) { mxArray *mx = mxCreateNumericMatrix(3, a.size(), classid, mxREAL); memcpy(mxGetPr(mx), &a[0], 3*sizeof(type) * a.size()); return mx; } \ static inline mxArray *mxFromArray(const tArray< t4Vector< type > > &a) { mxArray *mx = mxCreateNumericMatrix(4, a.size(), classid, mxREAL); memcpy(mxGetPr(mx), &a[0], 4*sizeof(type) * a.size()); return mx; } \ static inline mxArray *mxFromArray(const tArray< tRGB< type > > &a) { mxArray *mx = mxCreateNumericMatrix(3, a.size(), classid, mxREAL); memcpy(mxGetPr(mx), &a[0], 3*sizeof(type) * a.size()); return mx; } \ static inline mxArray *mxFromArray(const tArray< tRGBA< type > > &a) { mxArray *mx = mxCreateNumericMatrix(4, a.size(), classid, mxREAL); memcpy(mxGetPr(mx), &a[0], 4*sizeof(type) * a.size()); return mx; } static inline mxArray* mxFromArray(const tArray< Tuple2 > &a) { mxArray* mx = mxCreateNumericMatrix(2, a.size(), mxINT32_CLASS, mxREAL); memcpy(mxGetPr(mx), &a[0], 2*sizeof(int) * a.size()); return mx; } static inline mxArray* mxFromArray(const tArray< Tuple3 > &a) { mxArray* mx = mxCreateNumericMatrix(3, a.size(), mxINT32_CLASS, mxREAL); memcpy(mxGetPr(mx), &a[0], 3*sizeof(int) * a.size()); return mx; } DEFINE_MX_ARRAY_CONVERSION(float, mxSINGLE_CLASS); DEFINE_MX_ARRAY_CONVERSION(double, mxDOUBLE_CLASS); DEFINE_MX_ARRAY_CONVERSION(int8_t, mxINT8_CLASS); DEFINE_MX_ARRAY_CONVERSION(int16_t, mxINT16_CLASS); DEFINE_MX_ARRAY_CONVERSION(int32_t, mxINT32_CLASS); DEFINE_MX_ARRAY_CONVERSION(int64_t, mxINT64_CLASS); DEFINE_MX_ARRAY_CONVERSION(uint8_t, mxUINT8_CLASS); DEFINE_MX_ARRAY_CONVERSION(uint16_t, mxUINT16_CLASS); DEFINE_MX_ARRAY_CONVERSION(uint32_t, mxUINT32_CLASS); DEFINE_MX_ARRAY_CONVERSION(uint64_t, mxUINT64_CLASS); #undef DEFINE_MX_ARRAY_CONVERSION public: using MatlabFileIO::get; using MatlabFileIO::put; using MatlabFileIO::hasVar; using MatlabFileIO::getNDim; using MatlabFileIO::getDimensions; using MatlabFileIO::getClassID; ArrayMatlab():MatlabFileIO() {} ArrayMatlab(const std::string& filename, const std::string& mode):MatlabFileIO(filename, mode) {}; /** * Load an array from an matlab file. **/ template void get(tArray &out, const std::string& varname = "gravis_array") { if(!pmat) GRAVIS_THROW2(gravis::Exception, "Error file not open! Call open first."); /* * Read in each array we just wrote */ mxArray* pa = matGetVariable(pmat, varname.c_str()); if (pa == NULL) { matClose(pmat); GRAVIS_THROW3(gravis::Exception, "Did not find variable in file.", varname); } fillArray(out, pa); /* clean up before exit */ mxDestroyArray(pa); } /** * Load an array from an matlab file. **/ template static void read(tArray &out, const std::string& filename, const std::string& varname = "gravis_array") { ArrayMatlab am(filename,"r"); am.get(out,varname); } /** * Save an array to a matlab file. This should work with float and double data. **/ template void put(const tArray &in, const std::string& varname = "gravis_array") { if(!pmat) GRAVIS_THROW2(gravis::Exception, "Error file not open! Call open first."); mxArray* pa = mxFromArray(in); if (pa == NULL) GRAVIS_THROW3(gravis::Exception, "Could not convert to mxArray. Can not save: ", varname); int status; status = matPutVariable(pmat, varname.c_str(), pa); if (status != 0) { mxDestroyArray(pa); matClose(pmat); GRAVIS_THROW3(gravis::Exception, "Could not put variable into file. Matlab error code: ", StringFormat(status)); } mxDestroyArray(pa); } /** * Save an array to a matlab file. This should work with float and double data. **/ template static void write(const std::string& filename, const tArray &in, const std::string& varname = "gravis_array") { ArrayMatlab am(filename,"w"); am.put(in,varname); } }; } } // end namespace #endif #endif relion-3.1.3/src/jaz/gravis/io/array/.svn/text-base/networkByteOrder.h.svn-base000066400000000000000000000055121411340063500273600ustar00rootroot00000000000000/****************************************************************************** ** Title: tArrayIO.h ** Description: Input/Output for tArrays. ** ** Author: Reinhard Knothe / Michael Keller, 2006 ** Brian Amberg, 2007 ** Computer Science Department, University Basel (CH) ** ******************************************************************************/ #ifndef _T_ARRAY_IO_NETWORK_BYTE_ORDER_H_ #define _T_ARRAY_IO_NETWORK_BYTE_ORDER_H_ #include #include #include #include #include namespace gravis { namespace io { class ArrayNetworkByteOrder { public: template static tArray load(const std::string& filename, size_t count=0) { tArray ret; load(ret,filename,count); return ret; } template static void load(tArray &out, const std::string& filename, size_t count=0) { FILE* in = fopen(filename.c_str(), "rb"); if (!in) { GRAVIS_THROW3(Exception, "Unable to open file", filename); } // determine size if (!count) { fseek(in, 0, SEEK_END); count = (int)(ftell(in)/sizeof(T)); fseek(in, 0, SEEK_SET); } // write int size = sizeof(T)*count/sizeof(int); out.resize(count); int* outh = (int*)(out.data()); int* outn = new int[size]; if (fread(outn, sizeof(int), size, in) != size_t(size)) { fclose(in); out.setSize(0); delete outn; GRAVIS_THROW3(Exception, "Error reading file", filename); } fclose(in); // copy for (int i=0; i static void save(const std::string& filename, const tArray &in, size_t count=0) { if (!count) count = in.size(); FILE* out = fopen(filename.c_str(), "wb"); if (!out) { GRAVIS_THROW3(Exception, "Unable to open file", filename); } int* inh = (int*)(in.data()); int size = sizeof(T)*count/sizeof(int); int* inn = new int[size]; for (int i=0; i #include namespace gravis { namespace io { /** * Raw dump of array contents **/ class ArrayRaw { public: /** * Load an Array from a raw file. * The only checking possible is for consistent filesize **/ template static void load(tArray &out, const std::string& filename) { std::ifstream is(filename.c_str(), std::ios_base::binary); // get length of file: is.seekg(0, std::ios::end); size_t length = is.tellg(); is.seekg(0, std::ios::beg); if (length / sizeof(T) * sizeof(T) != length) GRAVIS_THROW3(Exception, "Invalid array file. The length does not fit.", filename); length = length / sizeof(T); // Load data out.setSize(length); is.read((char*)(&out[0]), sizeof(T) * length); } /** * Dump an array to a raw file **/ template static void save(const std::string& filename, const tArray &in) { std::ofstream os(filename.c_str(), std::ios_base::binary); os.write((char*)(&in[0]), sizeof(T) * in.size()); } }; } } #endif relion-3.1.3/src/jaz/gravis/io/array/ArrayStreamIO.h000066400000000000000000000157051411340063500222410ustar00rootroot00000000000000/*============================================================================*/ /** * @file ClearTextIO.h * * @brief Gravis Arrays with >> and << reader and writer * * @date 06/30/2011 02:18:07 PM * @authors Sandro Schoenborn (ses)\n * sandro.schoenborn@unibas.ch\n * University of Basel, Switzerland */ /*============================================================================*/ #ifndef ARRAYSTREAMIO_INC #define ARRAYSTREAMIO_INC #include "../../tArray.h" #include #include #include #include #include #include #include #include #include #include #include namespace gravis { /** Stream output operator for vectors, each index is explicitely written */ template std::ostream& operator<< ( std::ostream& ostr, std::vector const& in ) { //ostr << std::scientific; ostr.precision(17); // 16 might be just enough for ( size_t i = 0; i < in.size(); i++ ) ostr << "[" << i << "]=" << in[i] << "\n"; return ostr; } /** Stream input operator for vectors, each index is explicitely expected, format: "[i]=value\n", * if \param in already has a size, it is only filled, not enlarged! */ template std::istream& operator>> ( std::istream& istr, std::vector& in ) { std::string strLine; size_t nArrPos = 0; size_t nMaxSize = std::numeric_limits::max(); if ( !in.empty() ) { nMaxSize = in.size(); nArrPos = in.size(); } while ( std::getline( istr, strLine ) && nArrPos <= nMaxSize ) { size_t nPos = 0; T tCont; // Do not parse empty lines if ( strLine.empty() ) continue; // expect format: [index]=value size_t nEq = strLine.find('='); size_t nB1 = strLine.find('['); size_t nB2 = strLine.find(']'); if ( nEq == std::string::npos || nB1 == std::string::npos || nB2 == std::string::npos || nEq < nB1 || nEq < nB2 || nB2 < nB1 ) throw std::runtime_error("ArrayStreamIO: format is invalid! expect: \"[index]=value\" per line"); std::string strIndex = strLine.substr( nB1+1, nB2-nB1-1 ); boost::algorithm::trim( strIndex ); std::string strContent = strLine.substr( nEq+1 ); // Index number std::stringstream( strIndex ) >> nPos; // Content std::stringstream( strContent ) >> tCont; if ( nPos == nArrPos ) { in.push_back( tCont ); // deque push_back should not be too costly } else if ( nPos < nArrPos ) { in[nPos] = tCont; } else { //std::cout << "Larger index, resizing to " << nPos << std::endl; in.resize( nPos+1 ); in[nPos] = tCont; } nArrPos = in.size(); } if ( in.size() > nMaxSize && nMaxSize < std::numeric_limits::max() ) in.resize( nMaxSize ); return istr; } namespace io { /** Provides vector (and tArray) IO via streams, allows for gzip compression */ class ArrayStreamIO { public: /** Load an array (vector) from a file, using stream operator>>, able to decompress gzip files (filename ends with gz)*/ template static void load( std::vector& out, std::string const& filename); /** Save an array (vector) to a file, using stream operator<<, able to compress gzip files (filename ends with gz)*/ template static void save( std::string const& filename, std::vector const& in, bool compression = false ); /** Load an array (tArray) from a file, using stream operator>>, able to decompress gzip files (filename ends with gz)*/ template static void load( tArray& out, std::string const& filename); /** Save an array (tArray) to a file, using stream operator<<, able to compress gzip files (filename ends with gz)*/ template static void save( std::string const& filename, tArray const& in, bool compression = false ); }; // ------ Implementation ------ template void ArrayStreamIO::load(std::vector& out, std::string const& filename ) { if ( boost::algorithm::ends_with( filename, "gz" ) ) { // compressed file read with gzip filter std::ifstream ifInput( filename.c_str(), std::ios_base::in | std::ios_base::binary ); if (!ifInput) throw std::runtime_error("Cannot open '" + filename + "'!"); boost::iostreams::filtering_streambuf bufIn; bufIn.push( boost::iostreams::gzip_decompressor() ); bufIn.push( ifInput ); std::stringstream ssData; boost::iostreams::copy( bufIn, ssData ); ssData >> out; } else { // normal readable file std::ifstream ifInput( filename.c_str(), std::ios_base::in ); if (!ifInput) throw std::runtime_error("Cannot open '" + filename + "'!"); ifInput >> out; } } template void ArrayStreamIO::save(std::string const& filename, std::vector const& in, bool compression ) { // if (!count) count = in.size(); if ( compression || boost::algorithm::ends_with( filename, "gz" ) ) { std::string strFile( filename ); if ( !boost::algorithm::ends_with( strFile, "gz" ) ) strFile += ".gz"; std::ofstream ofOut( strFile.c_str(), std::ios_base::out | std::ios_base::binary ); if ( !ofOut ) throw std::runtime_error( "Could not open file for writing: " + strFile ); boost::iostreams::filtering_streambuf bufOut; bufOut.push( boost::iostreams::gzip_compressor() ); // write a gzip compressed file std::stringstream ssData; ssData << in; // via stream operator bufOut.push( ssData ); boost::iostreams::copy( bufOut, ofOut ); } else { std::ofstream ofOut( filename.c_str(), std::ios_base::out ); if ( !ofOut ) throw std::runtime_error( "Could not open file for writing: " + filename ); ofOut << in; // via stream operator } } // tArray compatibility stuff template void ArrayStreamIO::load(tArray& out, std::string const& filename ) { std::ifstream ifInput( filename.c_str(), std::ios_base::in ); if (!ifInput) { throw std::runtime_error("Cannot open '" + filename + "'!"); } std::vector vOut( out.size() ); load( vOut, filename ); out = tArray( vOut ); } template void ArrayStreamIO::save(std::string const& filename, tArray const& in, bool compression ) { std::vector vArr( in ); save( filename, vArr, compression ); } } } #endif // ----- #ifndef ARRAYSTREAMIO_INC ----- relion-3.1.3/src/jaz/gravis/io/array/CMakeLists.txt000066400000000000000000000003671411340063500221440ustar00rootroot00000000000000set( install_files a.h ArrayStreamIO.h le.h matlab.h networkByteOrder.h raw.h ) INSTALL(FILES ${install_files} DESTINATION ${CMAKE_INSTALL_PREFIX}/include/${LIBTITLE}-${LIBVERSION}/${LIBTITLE}/io/array) relion-3.1.3/src/jaz/gravis/io/array/a.h000066400000000000000000000225141411340063500177730ustar00rootroot00000000000000/****************************************************************************** ** Title: io/array/a.h ** Description: Input/Output for tArrays. ** ** Author: Brian Amberg, 2006 ** Computer Science Department, University Basel (CH) ** ******************************************************************************/ #ifndef __GRAVIS_IO_ARRAY_A__ #define __GRAVIS_IO_ARRAY_A__ #include "../../tArray.h" #include "../../Exception.h" #include #include #include #include #include "../../Tuple.h" #include "../../t2Vector.h" #include "../../t3Vector.h" #include "../../t4Vector.h" #include "../../StringFormat.h" namespace gravis { namespace io { /** This should be in ArrayA in the private section, but that is not allowed **/ namespace ArrayA_private { template struct TName { static std::string name() { return "UNKNOWN"; } }; template <> struct TName { static std::string name() { return "float"; } }; template <> struct TName { static std::string name() { return "double"; } }; template <> struct TName { static std::string name() { return "char"; } }; template <> struct TName { static std::string name() { return "int8"; } }; template <> struct TName { static std::string name() { return "int16"; } }; template <> struct TName { static std::string name() { return "int32"; } }; template <> struct TName { static std::string name() { return "int64"; } }; template <> struct TName { static std::string name() { return "uint8"; } }; template <> struct TName { static std::string name() { return "uint16"; } }; template <> struct TName { static std::string name() { return "uint32"; } }; template <> struct TName { static std::string name() { return "uint64"; } }; template <> struct TName { static std::string name() { return "uint32"; } }; template <> struct TName { static std::string name() { return "uint64"; } }; template struct TName > { static std::string name() { return std::string("vector2:") + TName::name(); } }; template struct TName > { static std::string name() { return std::string("vector3:") + TName::name(); } }; template struct TName > { static std::string name() { return std::string("vector4:") + TName::name(); } }; } /** * Simple Array format with a header describing the array type **/ class ArrayA { private: static const uint8_t version = 1; public: static const std::string magic() { return "GVS:ARR"; }; public: struct ArrayHeader { char magic[8]; // 0 char type[16]; // 8 uint16_t type_size; // 24 uint32_t length; // 26 uint8_t version; // 30 char reserved1[1]; // 31 char reserved2[32]; // 32 }; /** * Load an array from an array file. Type checking is done for a subset * of supported types. More types can be added by changing the private * part of this class. Unknown types are saved and loaded, but not type checked. **/ template static bool is_a(const std::string& filename) { std::ifstream is(filename.c_str(), std::ios_base::binary); ArrayHeader h; is.read((char*)(&h), sizeof(h)); if (!is.good()) GRAVIS_THROW3(::gravis::Exception, "Could not read file", filename); if (std::string(h.magic) != magic()) GRAVIS_THROW3(::gravis::Exception, "Not a gravis array file", filename); if (h.version > version) GRAVIS_THROW3(::gravis::Exception, "Can't read this gravis array version", filename); if (sizeof(T) != h.type_size) return false; if (ArrayA_private::TName::name() != h.type) return false; return true; } /** * Load an array from an array file. Type checking is done for a subset * of supported types. More types can be added by changing the private * part of this class. Unknown types are saved and loaded, but not type checked. **/ template static void load(std::vector &out, const std::string& filename) { std::ifstream is(filename.c_str(), std::ios_base::binary); ArrayHeader h; is.read((char*)(&h), sizeof(h)); if (!is.good()) GRAVIS_THROW3(::gravis::Exception, "Could not read file", filename); if (std::string(h.magic) != magic()) GRAVIS_THROW3(::gravis::Exception, "Not a gravis array file", filename); if (h.version > version) GRAVIS_THROW3(::gravis::Exception, "Can't read this gravis array version", filename); if ((ArrayA_private::TName::name() != h.type) || (sizeof(T) != h.type_size)) GRAVIS_THROW3(::gravis::Exception, ::gravis::StringFormat("Wrong type in array. Expected ")(ArrayA_private::TName::name())(" of size ")(sizeof(T))(" but got ")(h.type)(" of size ")(h.type_size), filename); out.resize(h.length); is.read((char*)(&out[0]), sizeof(out[0]) * h.length); } /** * Load an array from an array file. Type checking is done for a subset * of supported types. More types can be added by changing the private * part of this class. Unknown types are saved and loaded, but not type checked. **/ template static void load(tArray &out, const std::string& filename) { std::ifstream is(filename.c_str(), std::ios_base::binary); ArrayHeader h; is.read((char*)(&h), sizeof(h)); if (!is.good()) GRAVIS_THROW3(::gravis::Exception, "Could not read file", filename); if (std::string(h.magic) != magic()) GRAVIS_THROW3(::gravis::Exception, "Not a gravis array file", filename); if (h.version > version) GRAVIS_THROW3(::gravis::Exception, "Can't read this gravis array version", filename); if ((ArrayA_private::TName::name() != h.type) || (sizeof(T) != h.type_size)) GRAVIS_THROW3(::gravis::Exception, ::gravis::StringFormat("Wrong type in array. Expected ")(ArrayA_private::TName::name())(" of size ")(sizeof(T))(" but got ")(h.type)(" of size ")(h.type_size), filename); out.resize(h.length); is.read((char*)(&out[0]), sizeof(out[0]) * h.length); } /** * Save an array to an array file. Type checking is done for a subset * of supported types. More types can be added by changing the private * part of this class **/ template static void save(const std::string& filename, const tArray &in) { std::ofstream os(filename.c_str(), std::ios_base::binary); ArrayHeader h; std::memset((char*)(&h), 0x0, sizeof(h)); std::memcpy(h.magic, magic().data(), magic().size()); std::memcpy(h.type, ArrayA_private::TName::name().data(), ArrayA_private::TName::name().size()); h.type_size = sizeof(T); h.length = in.size(); h.version = version; os.write((char*)(&h), sizeof(h)); os.write((char*)(&in[0]), sizeof(T) * in.size()); } /** * Save an array to an array file. Type checking is done for a subset * of supported types. More types can be added by changing the private * part of this class **/ template static void save(const std::string& filename, const std::vector &in) { std::ofstream os(filename.c_str(), std::ios_base::binary); ArrayHeader h; std::memset((char*)(&h), 0x0, sizeof(h)); std::memcpy(h.magic, magic().data(), magic().size()); std::memcpy(h.type, ArrayA_private::TName::name().data(), ArrayA_private::TName::name().size()); h.type_size = sizeof(T); h.length = in.size(); h.version = version; os.write((char*)(&h), sizeof(h)); os.write((char*)(&in[0]), sizeof(T) * in.size()); } }; } } #endif relion-3.1.3/src/jaz/gravis/io/array/le.h000066400000000000000000000044441411340063500201550ustar00rootroot00000000000000/****************************************************************************** ** Title: tArrayIO.h ** Description: Input/Output for tArrays. ** ** Author: Michael Keller, 2006 ** Computer Science Department, University Basel (CH) ** ******************************************************************************/ #ifndef _T_LE_H_ #define _T_LE_H_ #include #include #include #include namespace gravis { namespace io { class ArrayLittleEndian { public: template static tArray load(std::string filename, size_t count=0); template static void load(tArray& out, std::string filename, size_t count=0); template static void save(std::string filename, const tArray in, size_t count=0); }; // end class template tArray ArrayLittleEndian::load(std::string filename, size_t count) { tArray out; load(out,filename,count); return out; } template void ArrayLittleEndian::load(tArray& out, std::string filename, size_t count) { FILE* in = fopen(filename.c_str(), "rb"); if (!in) { throw std::runtime_error("Cannot open '" + filename + "'!"); } // determine size if (!count) { fseek(in, 0, SEEK_END); count = (int)(ftell(in)/sizeof(T)); fseek(in, 0, SEEK_SET); } // write out.setSize(count); if (fread(out.data(), sizeof(T), count, in) != count) { fclose(in); out.setSize(0); throw std::runtime_error("Error reading '" + filename + "'!"); } fclose(in); } template void ArrayLittleEndian::save(std::string filename, const tArray in, size_t count) { if (!count) count = in.size(); FILE* out = fopen(filename.c_str(), "wb"); if (!out) { throw std::runtime_error("Cannot open '" + filename + "'!"); } if (fwrite(in.data(), sizeof(T), count, out) != count) { fclose(out); unlink(filename.c_str()); throw std::runtime_error("Error writing '" + filename + "'!"); } fclose(out); } } // end namespace } // end namespace #endif relion-3.1.3/src/jaz/gravis/io/array/matlab.h000066400000000000000000000301301411340063500210040ustar00rootroot00000000000000/****************************************************************************** ** Title: matlab.h ** Description: Read and write tArrays to/from matlab files. ** ** Author: Brian Amberg, 2007 ** Computer Science Department, University Basel (CH) ** ** Linking: ** When using these functions you should link with the following commandline: ** ** -I$(MATLAB)/extern/include -L$(MATLAB)/bin/em64 -L$(MATLAB)/bin/glnx86 \ ** -lmat -lmx -lut -licui18n -licuio -licuuc -licudata -lhdf5 \ ** -D__GRAVIS__MATLAB__ ** ** And make shure that $(MATLAB)/bin/glnx86 is in your LD_LIBRARY_PATH ** ******************************************************************************/ #ifndef __GRAVIS__IO__ARRAY__MATLAB__ #define __GRAVIS__IO__ARRAY__MATLAB__ #include #include #ifdef __GRAVIS__MATLAB__ #include #include #include #include "../../StringFormat.h" #include "../../tRGB.h" #include "../../tRGBA.h" #include "../../t2Vector.h" #include "../../t3Vector.h" #include "../../t4Vector.h" #include "../../Tuple.h" #include "../matlabFileIO.h" namespace gravis { namespace io { class ArrayMatlab: public MatlabFileIO { private: // Setup knowledge about how to make an tArray out of this mxArray type #define DEFINE_MX_ARRAY_CONVERSION(type, classid) \ static inline void fillArray(tArray &a, const mxArray *mx) \ { const size_t channels=sizeof(a[0])/sizeof(type); \ if (mxGetClassID(mx) != classid) GRAVIS_THROW3(gravis::Exception, "Incompatible datatype", StringFormat("Found ")(mxGetClassName(mx))(" and expected ")( #classid )); \ if (mxGetM(mx) != channels) GRAVIS_THROW3(gravis::Exception, "Array has the wrong number of rows.", StringFormat(mxGetM(mx))(" found and ")(channels)(" expected")); \ a.resize(mxGetN(mx)); memcpy(&a[0], mxGetPr(mx), channels*sizeof(type) * a.size()); } \ static inline void fillArray(tArray< t2Vector< type > > &a, const mxArray *mx) \ { const size_t channels=sizeof(a[0])/sizeof(type); \ if (mxGetClassID(mx) != classid) GRAVIS_THROW3(gravis::Exception, "Incompatible datatype", StringFormat("Found ")(mxGetClassName(mx))(" and expected ")( #classid )); \ if (mxGetM(mx) != channels) GRAVIS_THROW3(gravis::Exception, "Array has the wrong number of rows.", StringFormat(mxGetM(mx))(" found and ")(channels)(" expected")); \ a.resize(mxGetN(mx)); memcpy(&a[0], mxGetPr(mx), channels*sizeof(type) * a.size()); } \ static inline void fillArray(tArray< t3Vector< type > > &a, const mxArray *mx) \ { const size_t channels=sizeof(a[0])/sizeof(type); \ if (mxGetClassID(mx) != classid) GRAVIS_THROW3(gravis::Exception, "Incompatible datatype", StringFormat("Found ")(mxGetClassName(mx))(" and expected ")( #classid )); \ if (mxGetM(mx) != channels) GRAVIS_THROW3(gravis::Exception, "Array has the wrong number of rows.", StringFormat(mxGetM(mx))(" found and ")(channels)(" expected")); \ a.resize(mxGetN(mx)); memcpy(&a[0], mxGetPr(mx), channels*sizeof(type) * a.size()); } \ static inline void fillArray(tArray< t4Vector< type > > &a, const mxArray *mx) \ { const size_t channels=sizeof(a[0])/sizeof(type); \ if (mxGetClassID(mx) != classid) GRAVIS_THROW3(gravis::Exception, "Incompatible datatype", StringFormat("Found ")(mxGetClassName(mx))(" and expected ")( #classid )); \ if (mxGetM(mx) != channels) GRAVIS_THROW3(gravis::Exception, "Array has the wrong number of rows.", StringFormat(mxGetM(mx))(" found and ")(channels)(" expected")); \ a.resize(mxGetN(mx)); memcpy(&a[0], mxGetPr(mx), channels*sizeof(type) * a.size()); } \ static inline void fillArray(tArray< tRGB< type > > &a, const mxArray *mx) \ { const size_t channels=sizeof(a[0])/sizeof(type); \ if (mxGetClassID(mx) != classid) GRAVIS_THROW3(gravis::Exception, "Incompatible datatype", StringFormat("Found ")(mxGetClassName(mx))(" and expected ")( #classid )); \ if (mxGetM(mx) != channels) GRAVIS_THROW3(gravis::Exception, "Array has the wrong number of rows.", StringFormat(mxGetM(mx))(" found and ")(channels)(" expected")); \ a.resize(mxGetN(mx)); memcpy(&a[0], mxGetPr(mx), channels*sizeof(type) * a.size()); } \ static inline void fillArray(tArray< tRGBA< type > > &a, const mxArray *mx) \ { const size_t channels=sizeof(a[0])/sizeof(type); \ if (mxGetClassID(mx) != classid) GRAVIS_THROW3(gravis::Exception, "Incompatible datatype", StringFormat("Found ")(mxGetClassName(mx))(" and expected ")( #classid )); \ if (mxGetM(mx) != channels) GRAVIS_THROW3(gravis::Exception, "Array has the wrong number of rows.", StringFormat(mxGetM(mx))(" found and ")(channels)(" expected")); \ a.resize(mxGetN(mx)); memcpy(&a[0], mxGetPr(mx), channels*sizeof(type) * a.size()); } DEFINE_MX_ARRAY_CONVERSION(float, mxSINGLE_CLASS); DEFINE_MX_ARRAY_CONVERSION(double, mxDOUBLE_CLASS); DEFINE_MX_ARRAY_CONVERSION(int8_t, mxINT8_CLASS); DEFINE_MX_ARRAY_CONVERSION(int16_t, mxINT16_CLASS); DEFINE_MX_ARRAY_CONVERSION(int32_t, mxINT32_CLASS); DEFINE_MX_ARRAY_CONVERSION(int64_t, mxINT64_CLASS); DEFINE_MX_ARRAY_CONVERSION(uint8_t, mxUINT8_CLASS); DEFINE_MX_ARRAY_CONVERSION(uint16_t, mxUINT16_CLASS); DEFINE_MX_ARRAY_CONVERSION(uint32_t, mxUINT32_CLASS); DEFINE_MX_ARRAY_CONVERSION(uint64_t, mxUINT64_CLASS); #undef DEFINE_MX_ARRAY_CONVERSION // Setup knowledge about how to make an mxArray out of the gravis datatypes #define DEFINE_MX_ARRAY_CONVERSION(type, classid) \ static inline mxArray *mxFromArray(const tArray &a) { mxArray *mx = mxCreateNumericMatrix(1, a.size(), classid, mxREAL); memcpy(mxGetPr(mx), &a[0], sizeof(type) * a.size()); return mx; } \ static inline mxArray *mxFromArray(const tArray< t2Vector< type > > &a) { mxArray *mx = mxCreateNumericMatrix(2, a.size(), classid, mxREAL); memcpy(mxGetPr(mx), &a[0], 2*sizeof(type) * a.size()); return mx; } \ static inline mxArray *mxFromArray(const tArray< t3Vector< type > > &a) { mxArray *mx = mxCreateNumericMatrix(3, a.size(), classid, mxREAL); memcpy(mxGetPr(mx), &a[0], 3*sizeof(type) * a.size()); return mx; } \ static inline mxArray *mxFromArray(const tArray< t4Vector< type > > &a) { mxArray *mx = mxCreateNumericMatrix(4, a.size(), classid, mxREAL); memcpy(mxGetPr(mx), &a[0], 4*sizeof(type) * a.size()); return mx; } \ static inline mxArray *mxFromArray(const tArray< tRGB< type > > &a) { mxArray *mx = mxCreateNumericMatrix(3, a.size(), classid, mxREAL); memcpy(mxGetPr(mx), &a[0], 3*sizeof(type) * a.size()); return mx; } \ static inline mxArray *mxFromArray(const tArray< tRGBA< type > > &a) { mxArray *mx = mxCreateNumericMatrix(4, a.size(), classid, mxREAL); memcpy(mxGetPr(mx), &a[0], 4*sizeof(type) * a.size()); return mx; } static inline mxArray* mxFromArray(const tArray< Tuple2 > &a) { mxArray* mx = mxCreateNumericMatrix(2, a.size(), mxINT32_CLASS, mxREAL); memcpy(mxGetPr(mx), &a[0], 2*sizeof(int) * a.size()); return mx; } static inline mxArray* mxFromArray(const tArray< Tuple3 > &a) { mxArray* mx = mxCreateNumericMatrix(3, a.size(), mxINT32_CLASS, mxREAL); memcpy(mxGetPr(mx), &a[0], 3*sizeof(int) * a.size()); return mx; } DEFINE_MX_ARRAY_CONVERSION(float, mxSINGLE_CLASS); DEFINE_MX_ARRAY_CONVERSION(double, mxDOUBLE_CLASS); DEFINE_MX_ARRAY_CONVERSION(int8_t, mxINT8_CLASS); DEFINE_MX_ARRAY_CONVERSION(int16_t, mxINT16_CLASS); DEFINE_MX_ARRAY_CONVERSION(int32_t, mxINT32_CLASS); DEFINE_MX_ARRAY_CONVERSION(int64_t, mxINT64_CLASS); DEFINE_MX_ARRAY_CONVERSION(uint8_t, mxUINT8_CLASS); DEFINE_MX_ARRAY_CONVERSION(uint16_t, mxUINT16_CLASS); DEFINE_MX_ARRAY_CONVERSION(uint32_t, mxUINT32_CLASS); DEFINE_MX_ARRAY_CONVERSION(uint64_t, mxUINT64_CLASS); #undef DEFINE_MX_ARRAY_CONVERSION public: using MatlabFileIO::get; using MatlabFileIO::put; using MatlabFileIO::hasVar; using MatlabFileIO::getNDim; using MatlabFileIO::getDimensions; using MatlabFileIO::getClassID; ArrayMatlab():MatlabFileIO() {} ArrayMatlab(const std::string& filename, const std::string& mode):MatlabFileIO(filename, mode) {}; /** * Load an array from an matlab file. **/ template void get(tArray &out, const std::string& varname = "gravis_array") { if(!pmat) GRAVIS_THROW2(gravis::Exception, "Error file not open! Call open first."); /* * Read in each array we just wrote */ mxArray* pa = matGetVariable(pmat, varname.c_str()); if (pa == NULL) { matClose(pmat); GRAVIS_THROW3(gravis::Exception, "Did not find variable in file.", varname); } fillArray(out, pa); /* clean up before exit */ mxDestroyArray(pa); } /** * Load an array from an matlab file. **/ template static void read(tArray &out, const std::string& filename, const std::string& varname = "gravis_array") { ArrayMatlab am(filename,"r"); am.get(out,varname); } /** * Save an array to a matlab file. This should work with float and double data. **/ template void put(const tArray &in, const std::string& varname = "gravis_array") { if(!pmat) GRAVIS_THROW2(gravis::Exception, "Error file not open! Call open first."); mxArray* pa = mxFromArray(in); if (pa == NULL) GRAVIS_THROW3(gravis::Exception, "Could not convert to mxArray. Can not save: ", varname); int status; status = matPutVariable(pmat, varname.c_str(), pa); if (status != 0) { mxDestroyArray(pa); matClose(pmat); GRAVIS_THROW3(gravis::Exception, "Could not put variable into file. Matlab error code: ", StringFormat(status)); } mxDestroyArray(pa); } /** * Save an array to a matlab file. This should work with float and double data. **/ template static void write(const std::string& filename, const tArray &in, const std::string& varname = "gravis_array") { ArrayMatlab am(filename,"w"); am.put(in,varname); } }; } } // end namespace #endif #endif relion-3.1.3/src/jaz/gravis/io/array/networkByteOrder.h000066400000000000000000000055121411340063500230630ustar00rootroot00000000000000/****************************************************************************** ** Title: tArrayIO.h ** Description: Input/Output for tArrays. ** ** Author: Reinhard Knothe / Michael Keller, 2006 ** Brian Amberg, 2007 ** Computer Science Department, University Basel (CH) ** ******************************************************************************/ #ifndef _T_ARRAY_IO_NETWORK_BYTE_ORDER_H_ #define _T_ARRAY_IO_NETWORK_BYTE_ORDER_H_ #include #include #include #include #include namespace gravis { namespace io { class ArrayNetworkByteOrder { public: template static tArray load(const std::string& filename, size_t count=0) { tArray ret; load(ret,filename,count); return ret; } template static void load(tArray &out, const std::string& filename, size_t count=0) { FILE* in = fopen(filename.c_str(), "rb"); if (!in) { GRAVIS_THROW3(Exception, "Unable to open file", filename); } // determine size if (!count) { fseek(in, 0, SEEK_END); count = (int)(ftell(in)/sizeof(T)); fseek(in, 0, SEEK_SET); } // write int size = sizeof(T)*count/sizeof(int); out.resize(count); int* outh = (int*)(out.data()); int* outn = new int[size]; if (fread(outn, sizeof(int), size, in) != size_t(size)) { fclose(in); out.setSize(0); delete outn; GRAVIS_THROW3(Exception, "Error reading file", filename); } fclose(in); // copy for (int i=0; i static void save(const std::string& filename, const tArray &in, size_t count=0) { if (!count) count = in.size(); FILE* out = fopen(filename.c_str(), "wb"); if (!out) { GRAVIS_THROW3(Exception, "Unable to open file", filename); } int* inh = (int*)(in.data()); int size = sizeof(T)*count/sizeof(int); int* inn = new int[size]; for (int i=0; i #include namespace gravis { namespace io { /** * Raw dump of array contents **/ class ArrayRaw { public: /** * Load an Array from a raw file. * The only checking possible is for consistent filesize **/ template static void load(tArray &out, const std::string& filename) { std::ifstream is(filename.c_str(), std::ios_base::binary); // get length of file: is.seekg(0, std::ios::end); size_t length = is.tellg(); is.seekg(0, std::ios::beg); if (length / sizeof(T) * sizeof(T) != length) GRAVIS_THROW3(Exception, "Invalid array file. The length does not fit.", filename); length = length / sizeof(T); // Load data out.setSize(length); is.read((char*)(&out[0]), sizeof(T) * length); } /** * Dump an array to a raw file **/ template static void save(const std::string& filename, const tArray &in) { std::ofstream os(filename.c_str(), std::ios_base::binary); os.write((char*)(&in[0]), sizeof(T) * in.size()); } }; } } #endif relion-3.1.3/src/jaz/gravis/io/image/000077500000000000000000000000001411340063500173425ustar00rootroot00000000000000relion-3.1.3/src/jaz/gravis/io/image/.svn/000077500000000000000000000000001411340063500202265ustar00rootroot00000000000000relion-3.1.3/src/jaz/gravis/io/image/.svn/all-wcprops000066400000000000000000000005471411340063500224220ustar00rootroot00000000000000K 25 svn:wc:ra_dav:version-url V 66 /repos/gravis/!svn/ver/22187/libs/libGravis/trunk/include/io/image END f.h K 25 svn:wc:ra_dav:version-url V 70 /repos/gravis/!svn/ver/22187/libs/libGravis/trunk/include/io/image/f.h END CMakeLists.txt K 25 svn:wc:ra_dav:version-url V 81 /repos/gravis/!svn/ver/21790/libs/libGravis/trunk/include/io/image/CMakeLists.txt END relion-3.1.3/src/jaz/gravis/io/image/.svn/entries000066400000000000000000000010221411340063500216150ustar00rootroot0000000000000010 dir 23800 https://svn.cs.unibas.ch:443/repos/gravis/libs/libGravis/trunk/include/io/image https://svn.cs.unibas.ch:443/repos/gravis 2012-06-28T11:28:09.901604Z 22187 sandro b127c190-6edf-0310-8e64-ec95285ab742 f.h file 2012-06-29T11:51:37.870096Z abf782aef4b3caa66e7e11ab3fc5e8f9 2012-06-28T11:28:09.901604Z 22187 sandro 5265 CMakeLists.txt file 2012-06-29T11:51:37.879096Z 6e6dc980c5133a02bd0612e692376ad0 2012-03-09T15:30:47.393511Z 21790 forster 173 relion-3.1.3/src/jaz/gravis/io/image/.svn/text-base/000077500000000000000000000000001411340063500221225ustar00rootroot00000000000000relion-3.1.3/src/jaz/gravis/io/image/.svn/text-base/CMakeLists.txt.svn-base000066400000000000000000000002551411340063500264010ustar00rootroot00000000000000set( install_files f.h ) INSTALL(FILES ${install_files} DESTINATION ${CMAKE_INSTALL_PREFIX}/include/${LIBTITLE}-${LIBVERSION}/${LIBTITLE}/io/image) relion-3.1.3/src/jaz/gravis/io/image/.svn/text-base/f.h.svn-base000066400000000000000000000122211411340063500242330ustar00rootroot00000000000000/****************************************************************************** ** Title: gravis/io/image/f.h ** Description: Implements reader/writer for the .f (vector) file format ** ** Author: Pascal Paysan ** Brian Amberg ** Computer Science Department, University Basel (CH) ** ******************************************************************************/ #ifndef __GRAVIS__IO_IMAGE_F__ #define __GRAVIS__IO_IMAGE_F__ #include #include #include #include "../../t2Vector.h" #include "../../t3Vector.h" #include "../../t4Vector.h" #include #include namespace gravis { namespace io { static const unsigned int FI_VERSION = 1u; static const float FI_MAGIC_NUMBER = 3.141592653f*3.141592653f; /** * * saves and load .f displacement fields into images. * **/ class ImageF { private: typedef struct FImageHeader { char name[128]; unsigned int width; unsigned int height; unsigned int depth; unsigned int channels; unsigned int version; //20 byte float magicNo; //4 byte char reserved[360]; }; static inline void displayHeader(const FImageHeader& fih) { std::cout << "Name: " << fih.name << std::endl; std::cout << "Width: " << fih.width << std::endl; std::cout << "Height: " << fih.height << std::endl; std::cout << "Depth: " << fih.depth << std::endl; std::cout << "Version: " << fih.version << std::endl; std::cout << "MagicNo: " << fih.magicNo << std::endl; std::cout << "Channels: " << fih.channels << std::endl; }; public: template static inline void load(::gravis::tImage &image, const std::string& filename) { typedef typename tImageTraits< T >::Scalar_t T_scalar; const size_t channels = sizeof(T) / sizeof(T_scalar); gzFile fin = gzopen(filename.c_str(), "rb"); try { if (0 == fin) { GRAVIS_THROW3(Exception, "Unable to open file: ", filename.c_str()); } FImageHeader fih; gzread(fin, (char*)(&fih), sizeof(fih)); if (!fih.magicNo == FI_MAGIC_NUMBER) GRAVIS_THROW3(Exception, "Not a .f file: ", filename); if(fih.magicNo != FI_MAGIC_NUMBER) GRAVIS_THROW3(Exception, "File is not an FImage: ", filename); if(fih.depth != 1) GRAVIS_THROW3(Exception, "Unable to open .f file with depth greater one: ", filename); if(fih.version > FI_VERSION) { std::stringstream s; s << "Unable to FImage file with version greater " << FI_VERSION << ". This file has FI_VERSION: " << fih.version; GRAVIS_THROW3(Exception, s.str(), filename); } if(fih.channels != channels) { std::stringstream s; s << "The image datatype has " << channels << " dimensions, while the .f file has " << fih.channels << " dimensions." << " Can not load this .f file into the image."; GRAVIS_THROW3(Exception, s.str(), filename); } image.setName(fih.name); image.setSize(fih.width, fih.height); T_scalar* image_data = reinterpret_cast(&image[0]); std::vector fbuf(fih.width*fih.height*fih.channels); gzread(fin, (char*)(&fbuf[0]), sizeof(fbuf[0])*fbuf.size()); for(size_t i=0; i static inline void save(const std::string& filename, const ::gravis::tImage &image) { typedef typename tImageTraits< T >::Scalar_t T_scalar; const size_t channels = sizeof(T) / sizeof(T_scalar); std::ofstream of(filename.c_str(), std::ofstream::binary); if (!of.good()) { GRAVIS_THROW3(Exception, "Unable to open/create .f file: ", filename); } FImageHeader fih; strncpy(fih.name, image.name().c_str(), 128); fih.width = image.cols(); fih.height = image.rows(); fih.depth = 1; fih.version = FI_VERSION; fih.magicNo = FI_MAGIC_NUMBER; fih.channels = channels; of.write((char*)(&fih),sizeof(FImageHeader)); const T_scalar* image_data = reinterpret_cast(&image[0]); std::vector fbuf(fih.width*fih.height*fih.channels); for(size_t i=0; i #include #include #include "../../t2Vector.h" #include "../../t3Vector.h" #include "../../t4Vector.h" #include #include namespace gravis { namespace io { static const unsigned int FI_VERSION = 1u; static const float FI_MAGIC_NUMBER = 3.141592653f*3.141592653f; /** * * saves and load .f displacement fields into images. * **/ class ImageF { private: typedef struct FImageHeader { char name[128]; unsigned int width; unsigned int height; unsigned int depth; unsigned int channels; unsigned int version; //20 byte float magicNo; //4 byte char reserved[360]; }; static inline void displayHeader(const FImageHeader& fih) { std::cout << "Name: " << fih.name << std::endl; std::cout << "Width: " << fih.width << std::endl; std::cout << "Height: " << fih.height << std::endl; std::cout << "Depth: " << fih.depth << std::endl; std::cout << "Version: " << fih.version << std::endl; std::cout << "MagicNo: " << fih.magicNo << std::endl; std::cout << "Channels: " << fih.channels << std::endl; }; public: template static inline void load(::gravis::tImage &image, const std::string& filename) { typedef typename tImageTraits< T >::Scalar_t T_scalar; const size_t channels = sizeof(T) / sizeof(T_scalar); gzFile fin = gzopen(filename.c_str(), "rb"); try { if (0 == fin) { GRAVIS_THROW3(Exception, "Unable to open file: ", filename.c_str()); } FImageHeader fih; gzread(fin, (char*)(&fih), sizeof(fih)); if (!fih.magicNo == FI_MAGIC_NUMBER) GRAVIS_THROW3(Exception, "Not a .f file: ", filename); if(fih.magicNo != FI_MAGIC_NUMBER) GRAVIS_THROW3(Exception, "File is not an FImage: ", filename); if(fih.depth != 1) GRAVIS_THROW3(Exception, "Unable to open .f file with depth greater one: ", filename); if(fih.version > FI_VERSION) { std::stringstream s; s << "Unable to FImage file with version greater " << FI_VERSION << ". This file has FI_VERSION: " << fih.version; GRAVIS_THROW3(Exception, s.str(), filename); } if(fih.channels != channels) { std::stringstream s; s << "The image datatype has " << channels << " dimensions, while the .f file has " << fih.channels << " dimensions." << " Can not load this .f file into the image."; GRAVIS_THROW3(Exception, s.str(), filename); } image.setName(fih.name); image.setSize(fih.width, fih.height); T_scalar* image_data = reinterpret_cast(&image[0]); std::vector fbuf(fih.width*fih.height*fih.channels); gzread(fin, (char*)(&fbuf[0]), sizeof(fbuf[0])*fbuf.size()); for(size_t i=0; i static inline void save(const std::string& filename, const ::gravis::tImage &image) { typedef typename tImageTraits< T >::Scalar_t T_scalar; const size_t channels = sizeof(T) / sizeof(T_scalar); std::ofstream of(filename.c_str(), std::ofstream::binary); if (!of.good()) { GRAVIS_THROW3(Exception, "Unable to open/create .f file: ", filename); } FImageHeader fih; strncpy(fih.name, image.name().c_str(), 128); fih.width = image.cols(); fih.height = image.rows(); fih.depth = 1; fih.version = FI_VERSION; fih.magicNo = FI_MAGIC_NUMBER; fih.channels = channels; of.write((char*)(&fih),sizeof(FImageHeader)); const T_scalar* image_data = reinterpret_cast(&image[0]); std::vector fbuf(fih.width*fih.height*fih.channels); for(size_t i=0; i= ending.size()) && (filename.substr(filename.size() - ending.size()) == ending); } static inline bool is_obj(const std::string& filename) { return (has_ending(filename, ".obj") || has_ending(filename, ".obj.gz")); } static inline bool is_msh(const std::string& filename) { return (has_ending(filename, ".msh") || has_ending(filename, ".msh.gz")); } public: /** * Load mesh from a file. The filetype is determined automatically **/ static inline void load(gravis::Mesh& out, const std::string& filename) { if (is_msh(filename)) MeshMSH::load(out, filename); else MeshOBJ::load(out, filename); } /** * Load mesh from a file. The filetype is determined automatically **/ static inline void load(gravis::fMesh& out, const std::string& filename) { if (is_msh(filename)) MeshMSH::load(out, filename); else { MeshOBJ::load(out, filename); } } /** * save mesh to a file. The filetype is determined from the ending **/ static inline void save(const std::string& filename, const gravis::Mesh& mesh) { if (is_msh(filename)) MeshMSH::save(filename, mesh); else MeshOBJ::save(filename, mesh); } /** * save mesh to a file. The filetype is determined from the ending **/ static inline void save(const std::string& filename, const gravis::fMesh& mesh) { if (is_msh(filename)) MeshMSH::save(filename, mesh); else MeshOBJ::save(filename, mesh); } /** * save mesh to a file. The filetype is determined from the ending. * Any texture files are copied into the same directory as the output * file unless they already exist. **/ static inline void save_complete(const std::string& filename, const gravis::Mesh& mesh) { if (is_msh(filename)) MeshMSH::save_complete(filename, mesh); else MeshOBJ::save_complete(filename, mesh); } /** * save mesh to a file. The filetype is determined from the ending. * Any texture files are copied into the same directory as the output * file unless they already exist. **/ static inline void save_complete(const std::string& filename, const gravis::fMesh& mesh) { if (is_msh(filename)) MeshMSH::save_complete(filename, mesh); else MeshOBJ::save_complete(filename, mesh); } }; } } #endif relion-3.1.3/src/jaz/gravis/io/mesh/000077500000000000000000000000001411340063500172145ustar00rootroot00000000000000relion-3.1.3/src/jaz/gravis/io/mesh/.svn/000077500000000000000000000000001411340063500201005ustar00rootroot00000000000000relion-3.1.3/src/jaz/gravis/io/mesh/.svn/all-wcprops000066400000000000000000000015561411340063500222750ustar00rootroot00000000000000K 25 svn:wc:ra_dav:version-url V 65 /repos/gravis/!svn/ver/22187/libs/libGravis/trunk/include/io/mesh END obj.h K 25 svn:wc:ra_dav:version-url V 71 /repos/gravis/!svn/ver/22187/libs/libGravis/trunk/include/io/mesh/obj.h END OBJReader.h K 25 svn:wc:ra_dav:version-url V 77 /repos/gravis/!svn/ver/22187/libs/libGravis/trunk/include/io/mesh/OBJReader.h END absolute_paths.h K 25 svn:wc:ra_dav:version-url V 82 /repos/gravis/!svn/ver/22187/libs/libGravis/trunk/include/io/mesh/absolute_paths.h END CMakeLists.txt K 25 svn:wc:ra_dav:version-url V 80 /repos/gravis/!svn/ver/21790/libs/libGravis/trunk/include/io/mesh/CMakeLists.txt END OBJWriter.h K 25 svn:wc:ra_dav:version-url V 77 /repos/gravis/!svn/ver/22187/libs/libGravis/trunk/include/io/mesh/OBJWriter.h END msh.h K 25 svn:wc:ra_dav:version-url V 71 /repos/gravis/!svn/ver/22187/libs/libGravis/trunk/include/io/mesh/msh.h END relion-3.1.3/src/jaz/gravis/io/mesh/.svn/entries000066400000000000000000000021601411340063500214730ustar00rootroot0000000000000010 dir 23800 https://svn.cs.unibas.ch:443/repos/gravis/libs/libGravis/trunk/include/io/mesh https://svn.cs.unibas.ch:443/repos/gravis 2012-06-28T11:28:09.901604Z 22187 sandro b127c190-6edf-0310-8e64-ec95285ab742 obj.h file 2012-06-29T11:51:38.034096Z c2dcd67347797f5cef18241a550c14cf 2012-06-28T11:28:09.901604Z 22187 sandro 13627 OBJReader.h file 2012-06-29T11:51:38.042096Z 2580f85cb3d096be00e1af75566def56 2012-06-28T11:28:09.901604Z 22187 sandro 3099 absolute_paths.h file 2012-06-29T11:51:38.050096Z 1f6ffb494e91455bbdd6bc08bed573ec 2012-06-28T11:28:09.901604Z 22187 sandro 3401 CMakeLists.txt file 2012-06-29T11:51:38.059096Z 5e9672ea96707eb915621d016ebab336 2012-03-09T15:30:47.393511Z 21790 forster 237 OBJWriter.h file 2012-06-29T11:51:38.066096Z e601d6dccc7f61bb14af2946fbc78dec 2012-06-28T11:28:09.901604Z 22187 sandro 3872 msh.h file 2012-06-29T11:51:38.074096Z eb37777be991b35cbbc926db53903cfb 2012-06-28T11:28:09.901604Z 22187 sandro 14650 relion-3.1.3/src/jaz/gravis/io/mesh/.svn/text-base/000077500000000000000000000000001411340063500217745ustar00rootroot00000000000000relion-3.1.3/src/jaz/gravis/io/mesh/.svn/text-base/CMakeLists.txt.svn-base000066400000000000000000000003551411340063500262540ustar00rootroot00000000000000set( install_files absolute_paths.h msh.h obj.h OBJReader.h OBJWriter.h ) INSTALL(FILES ${install_files} DESTINATION ${CMAKE_INSTALL_PREFIX}/include/${LIBTITLE}-${LIBVERSION}/${LIBTITLE}/io/mesh) relion-3.1.3/src/jaz/gravis/io/mesh/.svn/text-base/OBJReader.h.svn-base000066400000000000000000000060331411340063500254210ustar00rootroot00000000000000/****************************************************************************** ** Title: OBJReader.h ** Description: Class to import a .obj file info a Mesh. ** ** Author: Jean-Sebastien Pierrard, 2005 ** Computer Science Department, University Basel (CH) ** ******************************************************************************/ #ifndef _OBJREADER_H_ #define _OBJREADER_H_ #include #include #include "../../Mesh.h" namespace gravis { /*! \brief Class for reading Wavefront OBJ files. * * Features: * - Currently only reads polygonal information (no points, no lines, * and especially no NURBS, bezier patches etc.) * - The old maplib and usemap directives are ignored; texture maps must be * specified in materials (with map_Kd) * - map_Ks, map_d, map_refl and map_bump are crrently not read * - all grouping stuff is ignored (and consequently smoothing groups as * a way to implicitly specify normals) * - illum is ignored (it's a rendering setting) * - many other things in materials are ignored (Td, Ni, sharpness...) */ class OBJReader { public: struct Texcoord { Texcoord():u(0.0),v(0.0),w(0.0) {} Texcoord(float u,float v,float w):u(u),v(v),w(w) {} float u, v, w; }; struct Vertex { Vertex () : vidx(-1), nidx(-1), tidx(-1) { } int vidx, nidx, tidx; }; struct Face { int smggroup; int mtlgroup; std::vector corner; }; struct Group { Group (std::string n) : name(n) { } std::string name; std::vector fidx_v; }; OBJReader (); void read (std::string); void buildMesh (Mesh&) const; /*! * Convenience function to load a file into a mesh. **/ static void load(Mesh& m, const std::string& fn) { OBJReader objr; objr.read(fn); objr.buildMesh(m); } protected: void toLowerCase (char*); void parseFile (std::string); void parseLine (std::vector&); static const unsigned int OBJ_MAXLINELEN = 512; static const unsigned int OBJ_MAXARGVLEN = 32; static const char* errUnexpectedArgs () { return "Unexpected #arguments for directive: "; } std::vector vertex_v; std::vector color_v; std::vector normal_v; std::vector texcrd_v; std::vector face_v; std::vector group_v; std::vector mtl_v; // whether indices to normals were found bool foundNormals; // whether indices to texture coordinates were found bool foundTexCrds; std::string objpath; int active_smggroup; int active_mtlgroup; std::vector active_objgroup; double my_atof(const char* str); }; #include "../../private/OBJReader.hxx" } // namespace gravis; #endif relion-3.1.3/src/jaz/gravis/io/mesh/.svn/text-base/OBJWriter.h.svn-base000066400000000000000000000074401411340063500254760ustar00rootroot00000000000000/****************************************************************************** ** Title: ** Description: ******************************************************************************/ #ifndef _OBJWriter_H_ #define _OBJWriter_H_ #include #include #include "../../Mesh.h" #include "../../tMesh.h" #include namespace gravis { class OBJWriter { static std::string textureFilename(std::string basename, int i) { std::stringstream s; s << basename << "_" << i << ".png"; return s.str(); } // TODO: These function belong into libFoundation, but I do not want to have xerces in everything #ifdef WIN32 #define PATH_SEPARATOR "\\" #else #define PATH_SEPARATOR "/" #endif static void path_split(std::string& dir, std::string& filename, const std::string& path) { size_t p = path.rfind(PATH_SEPARATOR); if (p == std::string::npos) { dir = "."PATH_SEPARATOR; filename = path; } else { dir = path.substr(0, p+1); filename = path.substr(p+1); } } static std::string path_filename(const std::string& path) { std::string dir, filename; path_split(dir, filename, path); return filename; }; static void copyfile(const std::string& out, const std::string& in) { std::ifstream is(in.c_str(), std::ios_base::binary); std::ofstream os(out.c_str(), std::ios_base::binary); os << is.rdbuf(); } static bool file_exists(const std::string& filename) { bool res = false; FILE* f = fopen(filename.c_str(),"r"); if(f) { res = true; fclose(f); } return res; } public: OBJWriter(); void write(std::string filepath, const gravis::Mesh& mesh); // It writes the gGenericaMesh data into OBJ format. void write( std::ofstream& ofs, const gravis::Mesh* mesh, std::string mtlfn ); // It writes the material file of an OBJ. void writemtl( std::string path, std::string mtlfn, const gravis::Mesh* mesh ); /*! * Convenience function to write a mesh to file without explicitly constructing an OBJWriter **/ static void save( const std::string& fn, const gravis::Mesh& mesh) { OBJWriter ow; ow.write(fn, mesh); } /*! * Convenience function to write a mesh with its textures to file without explicitly constructing an OBJWriter **/ static void save_complete( const std::string& fn, const gravis::Mesh& mesh) { std::cout << " Writing " << fn << std::endl; gravis::Mesh outmesh; mesh.clone(outmesh); std::string dir, name; path_split(dir, name, fn); for (size_t i=0; i #include #include "../../tMesh.h" #include "../../Mesh.h" namespace gravis { namespace io { namespace mesh_helper { /** * Find the relative path to p given base. **/ static inline boost::filesystem::path makeRelative(const boost::filesystem::path& p, const boost::filesystem::path& base) { using namespace boost::filesystem; if (p.string() == "") return path(); path p2(absolute(base)); if (base.string() == "") p2 = initial_path(); path p1(absolute(p, p2)); // Remove initial equal parts path::iterator ip1 = p1.begin(); path::iterator ip2 = p2.begin(); while((ip1!=p1.end()) && (ip2 != p2.end()) && (*ip1 == *ip2)) { ++ip1; ++ip2; }; path relative; for (; ip2!=p2.end(); ++ip2) relative /= ".."; for (; ip1!=p1.end(); ++ip1) relative /= *ip1; return relative; } /** * After loading, all paths have to be absolute, otherwise we loose track * of the files. To do this we check if we can find the file by any means * (from obj directory or from current directory) and expand accordingly **/ static inline void makePathsAbsolute(gravis::fMesh& m, const std::string& filename) { using namespace boost::filesystem; path dir(path(filename).branch_path()); for (size_t i=0; i(); } if (!dir.is_complete()) { dir = absolute(dir); } path complete_path = absolute( texturefn, dir ); if (exists( complete_path )) m.material[i].texture.setFilename( complete_path ); } } } } /** * After loading, all paths have to be absolute, otherwise we loose track * of the files. To do this we check if we can find the file by any means * (from obj directory or from current directory) and expand accordingly **/ static inline void makePathsAbsolute(gravis::Mesh& m, const std::string& filename) { using namespace boost::filesystem; path dir(path(filename).branch_path()); for (size_t i=0; i(); } if (!dir.is_complete()) { dir = absolute(dir); } path complete_path = absolute( texturefn, dir ); if (exists( complete_path )) { m.material[i].textureName = complete_path.string(); } } } } } } } } #endif relion-3.1.3/src/jaz/gravis/io/mesh/.svn/text-base/msh.h.svn-base000066400000000000000000000344721411340063500244630ustar00rootroot00000000000000/****************************************************************************** ** Title: gravis/io/mesh/msh.h ** Description: Implements reader/writer for the .msh mesh file format ** ** Author: Brian Amberg ** Computer Science Department, University Basel (CH) ** ******************************************************************************/ #ifndef __GRAVIS_IO__MESH_MSH_H__ #define __GRAVIS_IO__MESH_MSH_H__ #include #include "../../Mesh.h" #include "../../tMesh.h" #include #include #ifdef WIN32 #include #define alloca _alloca #else #include #endif #include "absolute_paths.h" #include #define MEM_ALLOC(type, n, size) ((type)alloca((n)*(size))) namespace gravis { namespace io { namespace MSH_CONSTANTS { static const std::string id_start("GRAVIS::MSH2::BINARY::START\n"); static const std::string id_end("\nGRAVIS::MSH2::BINARY::END\n"); } /** * Saves meshes in msh format * The msh format is a direct reflection of our Mesh datastructure, which * is extremely fast to save and load while being a lot more accurate and * compact than meshes in obj format. * * When implementing loading and saving it is best to use the gravis::io::Mesh * and gravis::io::Mesh, which determine the filetype automatically. **/ class MeshMSH { private: /*** Functions to load and save data ***/ template static inline void get(T& v, gzFile& fin) { if (gzread(fin, (char*)(&v), sizeof(v)) <= 0) GRAVIS_THROW2(gravis::Exception, "File ends prematurely"); } template static inline void put(std::ostream& os, const T& v) { os.write((char*)(&v), sizeof(v)); } static inline size_t get_size(gzFile& fin) { uint32_t size; get(size, fin); return size; } static inline void put_size(std::ostream& os, size_t size) { uint32_t sz = size; os.write((char*)(&sz), sizeof(sz)); } template static inline void get(std::vector &out, gzFile& fin) { size_t size = get_size(fin); out.resize(size); if (size > 0) if (gzread(fin, (char*)&(out[0]), sizeof(out[0]) * size) <= 0) GRAVIS_THROW2(gravis::Exception, "File ends prematurely"); } template static inline void get(gravis::tArray &out, gzFile& fin) { size_t size = get_size(fin); out.resize(size); if (size > 0) if (gzread(fin, (char*)&(out[0]), sizeof(out[0]) * size) <= 0) GRAVIS_THROW2(gravis::Exception, "File ends prematurely"); } template static inline void put(std::ostream& os, const std::vector &in) { put_size(os, in.size()); if ( in.size() > 0 ) os.write((char*)&(in[0]), sizeof(in[0]) * in.size()); } template static inline void put(std::ostream& os, const gravis::tArray &in) { put_size(os, in.size()); if ( in.size() > 0 ) // invalid access of in[0] if omitted!! os.write((char*)&(in[0]), sizeof(in[0]) * in.size()); } static inline void get(std::string& out, gzFile& fin) { size_t size = get_size(fin); out.resize(size); if (size == 0) return; char* temp = MEM_ALLOC(char*,size+1,1); temp[size] = 0; if (gzread(fin, temp, size) <= 0) GRAVIS_THROW2(gravis::Exception, "File ends prematurely"); out=temp; } static inline void put(std::ostream& os, const std::string& in) { put_size(os, in.size()); os.write(in.data(), in.size()); } public: /** * Load a mesh from the binary msh format **/ static inline void load(gravis::fMesh& mesh, const std::string& filename) { gzFile fin = gzopen(filename.c_str(), "rb"); if (0 == fin) { GRAVIS_THROW3(Exception, "Unable to open file: ", filename.c_str()); return; } try { char _id_start[MSH_CONSTANTS::id_start.size()+1]; char _id_end[MSH_CONSTANTS::id_end.size()+1]; _id_end[MSH_CONSTANTS::id_end.size()] = 0; _id_start[MSH_CONSTANTS::id_start.size()] = 0; gzread(fin, _id_start, MSH_CONSTANTS::id_start.size()); if (MSH_CONSTANTS::id_start != std::string(_id_start)) GRAVIS_THROW3(Exception, "File is not in gravis msh format.", filename.c_str()); mesh.material.resize(get_size(fin)); for (unsigned int i=0; i #include #include #include #include #include "../../tImage.h" #include "../../Exception.h" #include "absolute_paths.h" #if defined(HAVE_LIBZ) # include #endif namespace gravis { namespace io { /** * * saves and load meshes in/from obj format * The obj format is a text based represenation for triangle meshes, that * can hold a subset of the information in a Mesh object. * * It is useful to interface different applications, as it is a common format. * * For internal use the msh format (gravis::io::MeshMSH) might be a better * choice, as it is faster, more accurate and covers all of the * capabilities of the Mesh datastructure. * * When implementing loading and saving it is best to use the gravis::io::Mesh * and gravis::io::Mesh, which determine the filetype automatically. * **/ class MeshOBJ { static inline void write(ofstream& ofs, const gravis::fMesh& mesh, string mtlfn) { ofs << "mtllib " << mtlfn << endl; // Write vertices for (size_t i = 0; i < mesh.vertex.size(); ++i ) { ofs << "v " << mesh.vertex[i].x << " " << mesh.vertex[i].y << " " << mesh.vertex[i].z << endl; } ofs << "# " << mesh.vertex.size() << " vertices." << endl << endl; // Write texture coordinates for (size_t i = 0; i < mesh.texcrd.size(); ++i ) { ofs << "vt " << mesh.texcrd[i].x << " " << mesh.texcrd[i].y << " " << mesh.texcrd[i].z << endl; } ofs << "# " << mesh.texcrd.size() << " texture coordinates." << endl << endl; // Write texture normals for (size_t i = 0; i < mesh.normal.size(); ++i ) { ofs << "vn " << mesh.normal[i].x << " " << mesh.normal[i].y << " " << mesh.normal[i].z << " " << endl; } ofs << "# " << mesh.normal.size() << " normals." << endl << endl; // Write faces // NOTE: the vertex indices in OBJ format start from 1 int current_mtl = -1; for (size_t i = 0; i < mesh.tvi.size(); ++i ) { if(i=mesh.tti.size()) ofs << "/"; ofs << "/" << (mesh.tni[i][c])+1 ; } ofs << " "; } ofs << endl; } ofs << "# " << mesh.tvi.size() << " faces." << endl << endl; } static inline void writemtl( ofstream& ofs, const fMesh& mesh, const boost::filesystem::path objdir ) { for (size_t i=0; i=mesh.tti.size()) ofs << "/"; ofs << "/" << (mesh.tni[i][c])+1 ; } ofs << " "; } ofs << endl; } ofs << "# " << mesh.tvi.size() << " faces." << endl << endl; } static inline void writemtl( ofstream& ofs, const Mesh& mesh, const boost::filesystem::path objdir ) { for (size_t i=0; i #include #include "../../Mesh.h" namespace gravis { /*! \brief Class for reading Wavefront OBJ files. * * Features: * - Currently only reads polygonal information (no points, no lines, * and especially no NURBS, bezier patches etc.) * - The old maplib and usemap directives are ignored; texture maps must be * specified in materials (with map_Kd) * - map_Ks, map_d, map_refl and map_bump are crrently not read * - all grouping stuff is ignored (and consequently smoothing groups as * a way to implicitly specify normals) * - illum is ignored (it's a rendering setting) * - many other things in materials are ignored (Td, Ni, sharpness...) */ class OBJReader { public: struct Texcoord { Texcoord():u(0.0),v(0.0),w(0.0) {} Texcoord(float u,float v,float w):u(u),v(v),w(w) {} float u, v, w; }; struct Vertex { Vertex () : vidx(-1), nidx(-1), tidx(-1) { } int vidx, nidx, tidx; }; struct Face { int smggroup; int mtlgroup; std::vector corner; }; struct Group { Group (std::string n) : name(n) { } std::string name; std::vector fidx_v; }; OBJReader (); void read (std::string); void buildMesh (Mesh&) const; /*! * Convenience function to load a file into a mesh. **/ static void load(Mesh& m, const std::string& fn) { OBJReader objr; objr.read(fn); objr.buildMesh(m); } protected: void toLowerCase (char*); void parseFile (std::string); void parseLine (std::vector&); static const unsigned int OBJ_MAXLINELEN = 512; static const unsigned int OBJ_MAXARGVLEN = 32; static const char* errUnexpectedArgs () { return "Unexpected #arguments for directive: "; } std::vector vertex_v; std::vector color_v; std::vector normal_v; std::vector texcrd_v; std::vector face_v; std::vector group_v; std::vector mtl_v; // whether indices to normals were found bool foundNormals; // whether indices to texture coordinates were found bool foundTexCrds; std::string objpath; int active_smggroup; int active_mtlgroup; std::vector active_objgroup; double my_atof(const char* str); }; #include "../../private/OBJReader.hxx" } // namespace gravis; #endif relion-3.1.3/src/jaz/gravis/io/mesh/OBJWriter.h000066400000000000000000000074401411340063500212010ustar00rootroot00000000000000/****************************************************************************** ** Title: ** Description: ******************************************************************************/ #ifndef _OBJWriter_H_ #define _OBJWriter_H_ #include #include #include "../../Mesh.h" #include "../../tMesh.h" #include namespace gravis { class OBJWriter { static std::string textureFilename(std::string basename, int i) { std::stringstream s; s << basename << "_" << i << ".png"; return s.str(); } // TODO: These function belong into libFoundation, but I do not want to have xerces in everything #ifdef WIN32 #define PATH_SEPARATOR "\\" #else #define PATH_SEPARATOR "/" #endif static void path_split(std::string& dir, std::string& filename, const std::string& path) { size_t p = path.rfind(PATH_SEPARATOR); if (p == std::string::npos) { dir = "."PATH_SEPARATOR; filename = path; } else { dir = path.substr(0, p+1); filename = path.substr(p+1); } } static std::string path_filename(const std::string& path) { std::string dir, filename; path_split(dir, filename, path); return filename; }; static void copyfile(const std::string& out, const std::string& in) { std::ifstream is(in.c_str(), std::ios_base::binary); std::ofstream os(out.c_str(), std::ios_base::binary); os << is.rdbuf(); } static bool file_exists(const std::string& filename) { bool res = false; FILE* f = fopen(filename.c_str(),"r"); if(f) { res = true; fclose(f); } return res; } public: OBJWriter(); void write(std::string filepath, const gravis::Mesh& mesh); // It writes the gGenericaMesh data into OBJ format. void write( std::ofstream& ofs, const gravis::Mesh* mesh, std::string mtlfn ); // It writes the material file of an OBJ. void writemtl( std::string path, std::string mtlfn, const gravis::Mesh* mesh ); /*! * Convenience function to write a mesh to file without explicitly constructing an OBJWriter **/ static void save( const std::string& fn, const gravis::Mesh& mesh) { OBJWriter ow; ow.write(fn, mesh); } /*! * Convenience function to write a mesh with its textures to file without explicitly constructing an OBJWriter **/ static void save_complete( const std::string& fn, const gravis::Mesh& mesh) { std::cout << " Writing " << fn << std::endl; gravis::Mesh outmesh; mesh.clone(outmesh); std::string dir, name; path_split(dir, name, fn); for (size_t i=0; i #include #include "../../tMesh.h" #include "../../Mesh.h" namespace gravis { namespace io { namespace mesh_helper { /** * Find the relative path to p given base. **/ static inline boost::filesystem::path makeRelative(const boost::filesystem::path& p, const boost::filesystem::path& base) { using namespace boost::filesystem; if (p.string() == "") return path(); path p2(absolute(base)); if (base.string() == "") p2 = initial_path(); path p1(absolute(p, p2)); // Remove initial equal parts path::iterator ip1 = p1.begin(); path::iterator ip2 = p2.begin(); while((ip1!=p1.end()) && (ip2 != p2.end()) && (*ip1 == *ip2)) { ++ip1; ++ip2; }; path relative; for (; ip2!=p2.end(); ++ip2) relative /= ".."; for (; ip1!=p1.end(); ++ip1) relative /= *ip1; return relative; } /** * After loading, all paths have to be absolute, otherwise we loose track * of the files. To do this we check if we can find the file by any means * (from obj directory or from current directory) and expand accordingly **/ static inline void makePathsAbsolute(gravis::fMesh& m, const std::string& filename) { using namespace boost::filesystem; path dir(path(filename).branch_path()); for (size_t i=0; i(); } if (!dir.is_complete()) { dir = absolute(dir); } path complete_path = absolute( texturefn, dir ); if (exists( complete_path )) m.material[i].texture.setFilename( complete_path ); } } } } /** * After loading, all paths have to be absolute, otherwise we loose track * of the files. To do this we check if we can find the file by any means * (from obj directory or from current directory) and expand accordingly **/ static inline void makePathsAbsolute(gravis::Mesh& m, const std::string& filename) { using namespace boost::filesystem; path dir(path(filename).branch_path()); for (size_t i=0; i(); } if (!dir.is_complete()) { dir = absolute(dir); } path complete_path = absolute( texturefn, dir ); if (exists( complete_path )) { m.material[i].textureName = complete_path.string(); } } } } } } } } #endif relion-3.1.3/src/jaz/gravis/io/mesh/msh.h000066400000000000000000000344721411340063500201660ustar00rootroot00000000000000/****************************************************************************** ** Title: gravis/io/mesh/msh.h ** Description: Implements reader/writer for the .msh mesh file format ** ** Author: Brian Amberg ** Computer Science Department, University Basel (CH) ** ******************************************************************************/ #ifndef __GRAVIS_IO__MESH_MSH_H__ #define __GRAVIS_IO__MESH_MSH_H__ #include #include "../../Mesh.h" #include "../../tMesh.h" #include #include #ifdef WIN32 #include #define alloca _alloca #else #include #endif #include "absolute_paths.h" #include #define MEM_ALLOC(type, n, size) ((type)alloca((n)*(size))) namespace gravis { namespace io { namespace MSH_CONSTANTS { static const std::string id_start("GRAVIS::MSH2::BINARY::START\n"); static const std::string id_end("\nGRAVIS::MSH2::BINARY::END\n"); } /** * Saves meshes in msh format * The msh format is a direct reflection of our Mesh datastructure, which * is extremely fast to save and load while being a lot more accurate and * compact than meshes in obj format. * * When implementing loading and saving it is best to use the gravis::io::Mesh * and gravis::io::Mesh, which determine the filetype automatically. **/ class MeshMSH { private: /*** Functions to load and save data ***/ template static inline void get(T& v, gzFile& fin) { if (gzread(fin, (char*)(&v), sizeof(v)) <= 0) GRAVIS_THROW2(gravis::Exception, "File ends prematurely"); } template static inline void put(std::ostream& os, const T& v) { os.write((char*)(&v), sizeof(v)); } static inline size_t get_size(gzFile& fin) { uint32_t size; get(size, fin); return size; } static inline void put_size(std::ostream& os, size_t size) { uint32_t sz = size; os.write((char*)(&sz), sizeof(sz)); } template static inline void get(std::vector &out, gzFile& fin) { size_t size = get_size(fin); out.resize(size); if (size > 0) if (gzread(fin, (char*)&(out[0]), sizeof(out[0]) * size) <= 0) GRAVIS_THROW2(gravis::Exception, "File ends prematurely"); } template static inline void get(gravis::tArray &out, gzFile& fin) { size_t size = get_size(fin); out.resize(size); if (size > 0) if (gzread(fin, (char*)&(out[0]), sizeof(out[0]) * size) <= 0) GRAVIS_THROW2(gravis::Exception, "File ends prematurely"); } template static inline void put(std::ostream& os, const std::vector &in) { put_size(os, in.size()); if ( in.size() > 0 ) os.write((char*)&(in[0]), sizeof(in[0]) * in.size()); } template static inline void put(std::ostream& os, const gravis::tArray &in) { put_size(os, in.size()); if ( in.size() > 0 ) // invalid access of in[0] if omitted!! os.write((char*)&(in[0]), sizeof(in[0]) * in.size()); } static inline void get(std::string& out, gzFile& fin) { size_t size = get_size(fin); out.resize(size); if (size == 0) return; char* temp = MEM_ALLOC(char*,size+1,1); temp[size] = 0; if (gzread(fin, temp, size) <= 0) GRAVIS_THROW2(gravis::Exception, "File ends prematurely"); out=temp; } static inline void put(std::ostream& os, const std::string& in) { put_size(os, in.size()); os.write(in.data(), in.size()); } public: /** * Load a mesh from the binary msh format **/ static inline void load(gravis::fMesh& mesh, const std::string& filename) { gzFile fin = gzopen(filename.c_str(), "rb"); if (0 == fin) { GRAVIS_THROW3(Exception, "Unable to open file: ", filename.c_str()); return; } try { char _id_start[MSH_CONSTANTS::id_start.size()+1]; char _id_end[MSH_CONSTANTS::id_end.size()+1]; _id_end[MSH_CONSTANTS::id_end.size()] = 0; _id_start[MSH_CONSTANTS::id_start.size()] = 0; gzread(fin, _id_start, MSH_CONSTANTS::id_start.size()); if (MSH_CONSTANTS::id_start != std::string(_id_start)) GRAVIS_THROW3(Exception, "File is not in gravis msh format.", filename.c_str()); mesh.material.resize(get_size(fin)); for (unsigned int i=0; i #include #include #include #include #include "../../tImage.h" #include "../../Exception.h" #include "absolute_paths.h" #if defined(HAVE_LIBZ) # include #endif namespace gravis { namespace io { /** * * saves and load meshes in/from obj format * The obj format is a text based represenation for triangle meshes, that * can hold a subset of the information in a Mesh object. * * It is useful to interface different applications, as it is a common format. * * For internal use the msh format (gravis::io::MeshMSH) might be a better * choice, as it is faster, more accurate and covers all of the * capabilities of the Mesh datastructure. * * When implementing loading and saving it is best to use the gravis::io::Mesh * and gravis::io::Mesh, which determine the filetype automatically. * **/ class MeshOBJ { static inline void write(ofstream& ofs, const gravis::fMesh& mesh, string mtlfn) { ofs << "mtllib " << mtlfn << endl; // Write vertices for (size_t i = 0; i < mesh.vertex.size(); ++i ) { ofs << "v " << mesh.vertex[i].x << " " << mesh.vertex[i].y << " " << mesh.vertex[i].z << endl; } ofs << "# " << mesh.vertex.size() << " vertices." << endl << endl; // Write texture coordinates for (size_t i = 0; i < mesh.texcrd.size(); ++i ) { ofs << "vt " << mesh.texcrd[i].x << " " << mesh.texcrd[i].y << " " << mesh.texcrd[i].z << endl; } ofs << "# " << mesh.texcrd.size() << " texture coordinates." << endl << endl; // Write texture normals for (size_t i = 0; i < mesh.normal.size(); ++i ) { ofs << "vn " << mesh.normal[i].x << " " << mesh.normal[i].y << " " << mesh.normal[i].z << " " << endl; } ofs << "# " << mesh.normal.size() << " normals." << endl << endl; // Write faces // NOTE: the vertex indices in OBJ format start from 1 int current_mtl = -1; for (size_t i = 0; i < mesh.tvi.size(); ++i ) { if(i=mesh.tti.size()) ofs << "/"; ofs << "/" << (mesh.tni[i][c])+1 ; } ofs << " "; } ofs << endl; } ofs << "# " << mesh.tvi.size() << " faces." << endl << endl; } static inline void writemtl( ofstream& ofs, const fMesh& mesh, const boost::filesystem::path objdir ) { for (size_t i=0; i=mesh.tti.size()) ofs << "/"; ofs << "/" << (mesh.tni[i][c])+1 ; } ofs << " "; } ofs << endl; } ofs << "# " << mesh.tvi.size() << " faces." << endl << endl; } static inline void writemtl( ofstream& ofs, const Mesh& mesh, const boost::filesystem::path objdir ) { for (size_t i=0; i 0) GRAVIS_THROW2(gravis::Exception, "SBDSQR did not converge to zero."); } /** *Use lapack to calculate an svd of a 2x2 matrix **/ void svd(d2Matrix& U, d2Vector& S, d2Matrix& VT, const d2Matrix& A) { d2Matrix _A(A); double WORK[16]; int INFO; dgesvd_('A', 'A', 2, 2, &(_A[0]), 2, &(S[0]), &(U[0]), 2, &(VT[0]), 2, WORK, 16, INFO); if (INFO < 0) GRAVIS_THROW2(gravis::Exception, "The i'th argument had an invalid value."); if (INFO > 0) GRAVIS_THROW2(gravis::Exception, "SBDSQR did not converge to zero."); } /** *Use lapack to calculate an svd of a 3x3 matrix **/ void svd(f3Matrix& U, f3Vector& S, f3Matrix& VT, const f3Matrix& A) { f3Matrix _A(A); float WORK[32]; int INFO; sgesvd_('A', 'A', 3, 3, &(_A[0]), 3, &(S[0]), &(U[0]), 3, &(VT[0]), 3, WORK, 32, INFO); if (INFO < 0) GRAVIS_THROW2(gravis::Exception, "The i'th argument had an invalid value."); if (INFO > 0) GRAVIS_THROW2(gravis::Exception, "SBDSQR did not converge to zero."); } /** *Use lapack to calculate an svd of a 3x3 matrix **/ void svd(d3Matrix& U, d3Vector& S, d3Matrix& VT, const d3Matrix& A) { d3Matrix _A(A); double WORK[32]; int INFO; dgesvd_('A', 'A', 3, 3, &(_A[0]), 3, &(S[0]), &(U[0]), 3, &(VT[0]), 3, WORK, 32, INFO); if (INFO < 0) GRAVIS_THROW2(gravis::Exception, "The i'th argument had an invalid value."); if (INFO > 0) GRAVIS_THROW2(gravis::Exception, "SBDSQR did not converge to zero."); } /** *Use lapack to calculate an svd of a 4x4 matrix **/ void svd(f4Matrix& U, f4Vector& S, f4Matrix& VT, const f4Matrix& A) { f4Matrix _A(A); float WORK[64]; int INFO; sgesvd_('A', 'A', 4, 4, &(_A[0]), 4, &(S[0]), &(U[0]), 4, &(VT[0]), 4, WORK, 64, INFO); if (INFO < 0) GRAVIS_THROW2(gravis::Exception, "The i'th argument had an invalid value."); if (INFO > 0) GRAVIS_THROW2(gravis::Exception, "SBDSQR did not converge to zero."); } /** *Use lapack to calculate an svd of a 4x4 matrix **/ void svd(d4Matrix& U, d4Vector& S, d4Matrix& VT, const d4Matrix& A) { d4Matrix _A(A); double WORK[64]; int INFO; dgesvd_('A', 'A', 4, 4, &(_A[0]), 4, &(S[0]), &(U[0]), 4, &(VT[0]), 4, WORK, 64, INFO); if (INFO < 0) GRAVIS_THROW2(gravis::Exception, "The i'th argument had an invalid value."); if (INFO > 0) GRAVIS_THROW2(gravis::Exception, "SBDSQR did not converge to zero."); } int rank(const f2Matrix& A, const float accuracy = 1e-10) { f2Matrix U; f2Vector S; f2Matrix VT; svd(U, S, VT, A); int r = 0; while (r<2 && (S[r] >= accuracy || S[r] <= -accuracy)) ++r; return r; } int rank(const d2Matrix& A, const double accuracy = 1e-10) { d2Matrix U; d2Vector S; d2Matrix VT; svd(U, S, VT, A); int r = 0; while (r<2 && (S[r] >= accuracy || S[r] <= -accuracy)) ++r; return r; } int rank(const f3Matrix& A, const float accuracy = 1e-10) { f3Matrix U; f3Vector S; f3Matrix VT; svd(U, S, VT, A); int r = 0; while (r<3 && (S[r] >= accuracy || S[r] <= -accuracy)) ++r; return r; } int rank(const d3Matrix& A, const double accuracy = 1e-10) { d3Matrix U; d3Vector S; d3Matrix VT; svd(U, S, VT, A); int r = 0; while (r<3 && (S[r] >= accuracy || S[r] <= -accuracy)) ++r; return r; } int rank(const f4Matrix& A, const float accuracy = 1e-10) { f4Matrix U; f4Vector S; f4Matrix VT; svd(U, S, VT, A); int r = 0; while (r<4 && (S[r] >= accuracy || S[r] <= -accuracy)) ++r; return r; } int rank(const d4Matrix& A, const double accuracy = 1e-10) { d4Matrix U; d4Vector S; d4Matrix VT; svd(U, S, VT, A); int r = 0; while (r<4 && (S[r] >= accuracy || S[r] <= -accuracy)) ++r; return r; } } #endif relion-3.1.3/src/jaz/gravis/matrix_blas_reference.h000066400000000000000000000062071411340063500223520ustar00rootroot00000000000000// v = v+M*x inline static void addmult(tVectorView<__GMBD_REAL> &v, const tConstMatrixView<__GMBD_REAL> &M, const tConstVectorView<__GMBD_REAL> &x) { GRAVIS_CHECK( v.size() == M.h, "v and M are incompatible"); GRAVIS_CHECK( x.size() == M.w, "M and x are incompatible"); for (size_t j=0; j &v, const tConstVectorView<__GMBD_REAL> &a, const tConstMatrixView<__GMBD_REAL> &M, const tConstVectorView<__GMBD_REAL> &x) { GRAVIS_CHECK( v.size() == a.size(), "v and a are incompatible"); // Addition v = a; addmult(v, M, x); } // v = M*x inline static void mult(tVectorView<__GMBD_REAL> &v, const tConstMatrixView<__GMBD_REAL> &M, const tConstVectorView<__GMBD_REAL> &x) { ::gravis::matrix::clear(v); addmult(v, M, x); } // v = v+(x^T M)^T inline static void addmult(tVectorView<__GMBD_REAL> &v, const tConstVectorView<__GMBD_REAL> &x, const tConstMatrixView<__GMBD_REAL> &M) { GRAVIS_CHECK( v.size() == M.w, "v and M are incompatible"); GRAVIS_CHECK( x.size() == M.h, "M and x are incompatible"); for (size_t i=0; i &v, const tConstVectorView<__GMBD_REAL> &a, const tConstVectorView<__GMBD_REAL> &x, const tConstMatrixView<__GMBD_REAL> &M) { GRAVIS_CHECK( v.size() == a.size(), "v and a are incompatible"); // Addition v = a; addmult(v, M, x); } // v = (x^T M)^T inline static void mult(tVectorView<__GMBD_REAL> &v, const tConstVectorView<__GMBD_REAL> &x, const tConstMatrixView<__GMBD_REAL> &M) { ::gravis::matrix::clear(v); addmult(v, M, x); } /** * Squared l2 norm **/ inline static __GMBD_REAL normL2sqr(const tConstVectorView<__GMBD_REAL> &v) { if (v.size() == 0) return 0; __GMBD_REAL result = v[0]*v[0]; for (size_t i=1; i &v) { if (v.size() == 0) return 0; __GMBD_REAL result = v[0]*v[0]; for (size_t i=1; i &v) { return __GMBD_REAL(sqrt(normL2sqr(v))); } /** forbenius norm **/ inline static __GMBD_REAL normL2(const tConstMatrixView<__GMBD_REAL> &v) { return __GMBD_REAL(sqrt(normL2sqr(v))); } // v = v+M*x inline static void addmult(tVarVector<__GMBD_REAL> &v, const tConstMatrixView<__GMBD_REAL> &M, const tConstVectorView<__GMBD_REAL> &x) { tVectorView<__GMBD_REAL> vv(v); addmult(vv, M, x); } // v = a+M*x inline static void addmult(tVarVector<__GMBD_REAL> &v, const tConstVectorView<__GMBD_REAL> &a, const tConstMatrixView<__GMBD_REAL> &M, const tConstVectorView<__GMBD_REAL> &x) { tVectorView<__GMBD_REAL> vv(v); addmult(vv, a, M, x); } // v = M*x inline static void mult(tVarVector<__GMBD_REAL> &v, const tConstMatrixView<__GMBD_REAL> &M, const tConstVectorView<__GMBD_REAL> &x) { tVectorView<__GMBD_REAL> vv(v); addmult(vv, M, x); } relion-3.1.3/src/jaz/gravis/private/000077500000000000000000000000001411340063500173235ustar00rootroot00000000000000relion-3.1.3/src/jaz/gravis/private/.svn/000077500000000000000000000000001411340063500202075ustar00rootroot00000000000000relion-3.1.3/src/jaz/gravis/private/.svn/all-wcprops000066400000000000000000000031301411340063500223720ustar00rootroot00000000000000K 25 svn:wc:ra_dav:version-url V 65 /repos/gravis/!svn/ver/22702/libs/libGravis/trunk/include/private END tImageIO_PNM.hxx K 25 svn:wc:ra_dav:version-url V 82 /repos/gravis/!svn/ver/22187/libs/libGravis/trunk/include/private/tImageIO_PNM.hxx END tRefCPtr.h K 25 svn:wc:ra_dav:version-url V 76 /repos/gravis/!svn/ver/22187/libs/libGravis/trunk/include/private/tRefCPtr.h END OBJReader.hxx K 25 svn:wc:ra_dav:version-url V 79 /repos/gravis/!svn/ver/22187/libs/libGravis/trunk/include/private/OBJReader.hxx END tImageConverter.hxx K 25 svn:wc:ra_dav:version-url V 85 /repos/gravis/!svn/ver/22187/libs/libGravis/trunk/include/private/tImageConverter.hxx END tImageIO.hxx K 25 svn:wc:ra_dav:version-url V 78 /repos/gravis/!svn/ver/22187/libs/libGravis/trunk/include/private/tImageIO.hxx END tImageIO_JPG.hxx K 25 svn:wc:ra_dav:version-url V 82 /repos/gravis/!svn/ver/22187/libs/libGravis/trunk/include/private/tImageIO_JPG.hxx END tDeterminants.h K 25 svn:wc:ra_dav:version-url V 81 /repos/gravis/!svn/ver/22187/libs/libGravis/trunk/include/private/tDeterminants.h END tImageIO_PNG.hxx K 25 svn:wc:ra_dav:version-url V 82 /repos/gravis/!svn/ver/22187/libs/libGravis/trunk/include/private/tImageIO_PNG.hxx END tImageConvolution.hxx K 25 svn:wc:ra_dav:version-url V 87 /repos/gravis/!svn/ver/22187/libs/libGravis/trunk/include/private/tImageConvolution.hxx END CMakeLists.txt K 25 svn:wc:ra_dav:version-url V 80 /repos/gravis/!svn/ver/21790/libs/libGravis/trunk/include/private/CMakeLists.txt END OBJWriter.hxx K 25 svn:wc:ra_dav:version-url V 79 /repos/gravis/!svn/ver/22702/libs/libGravis/trunk/include/private/OBJWriter.hxx END relion-3.1.3/src/jaz/gravis/private/.svn/entries000066400000000000000000000035361411340063500216120ustar00rootroot0000000000000010 dir 23800 https://svn.cs.unibas.ch:443/repos/gravis/libs/libGravis/trunk/include/private https://svn.cs.unibas.ch:443/repos/gravis 2012-11-10T07:22:40.381365Z 22702 sandro b127c190-6edf-0310-8e64-ec95285ab742 OBJReader.hxx file 2012-06-29T11:51:38.590096Z a7fdf0061bbb81d3b4a3197049742657 2012-06-28T11:28:09.901604Z 22187 sandro 18216 tImageConverter.hxx file 2012-06-29T11:51:38.605096Z 0ef7b23cfbed275d70c9575631d54bcd 2012-06-28T11:28:09.901604Z 22187 sandro 12819 tImageIO.hxx file 2012-06-29T11:51:38.613096Z 15032425c069f971a5f33e2fd33f405d 2012-06-28T11:28:09.901604Z 22187 sandro 1667 tImageIO_JPG.hxx file 2012-06-29T11:51:38.621096Z 870a49cbeaec10cd5400fda744e217df 2012-06-28T11:28:09.901604Z 22187 sandro 15216 tDeterminants.h file 2012-06-29T11:51:38.628096Z 6c73dc5abc364b767d8d20f435eaa142 2012-06-28T11:28:09.901604Z 22187 sandro 1920 tImageIO_PNG.hxx file 2012-06-29T11:51:38.636096Z 0c41b433e1790f2c82a7329e944d3ab1 2012-06-28T11:28:09.901604Z 22187 sandro 8190 tImageConvolution.hxx file 2012-06-29T11:51:38.643096Z 52a25eaeb2744ac548104d0b02794290 2012-06-28T11:28:09.901604Z 22187 sandro 489 CMakeLists.txt file 2012-06-29T11:51:38.651096Z 692341284d7839fb0775aaca465045db 2012-03-09T15:30:47.393511Z 21790 forster 365 OBJWriter.hxx file 7d829bc6ce7d69fdb290a0372f771af7 2012-11-10T07:22:40.381365Z 22702 sandro tImageIO_PNM.hxx file 2012-06-29T11:51:38.668096Z cb055b1f0a67d377da4f568343523046 2012-06-28T11:28:09.901604Z 22187 sandro 9543 tRefCPtr.h file 2012-06-29T11:51:38.581096Z f52e452aa0bb4d033105fd46bb9da5c1 2012-06-28T11:28:09.901604Z 22187 sandro 3342 relion-3.1.3/src/jaz/gravis/private/.svn/text-base/000077500000000000000000000000001411340063500221035ustar00rootroot00000000000000relion-3.1.3/src/jaz/gravis/private/.svn/text-base/CMakeLists.txt.svn-base000066400000000000000000000005551411340063500263650ustar00rootroot00000000000000set( install_files OBJReader.hxx OBJWriter.hxx tDeterminants.h tImageConverter.hxx tImageConvolution.hxx tImageIO.hxx tImageIO_JPG.hxx tImageIO_PNG.hxx tImageIO_PNM.hxx tRefCPtr.h ) INSTALL(FILES ${install_files} DESTINATION ${CMAKE_INSTALL_PREFIX}/include/${LIBTITLE}-${LIBVERSION}/${LIBTITLE}/private) relion-3.1.3/src/jaz/gravis/private/.svn/text-base/OBJReader.hxx.svn-base000066400000000000000000000434501411340063500261140ustar00rootroot00000000000000/****************************************************************************** ** Title: OBJReader.hxx ** Description: Class to import a .obj file info a Mesh. ** ** Author: Jean-Sebastien Pierrard, 2005 ** Computer Science Department, University Basel (CH) ** ******************************************************************************/ #include #include #include "../Exception.h" #define HAVE_LIBZ #if defined(HAVE_LIBZ) # include #endif #ifdef __APPLE__ #include inline double OBJReader::my_atof(const char* str) { std::istringstream s(str); double d = 0; s >> d; return d; } #else inline double OBJReader::my_atof(const char* str) { return atof(str); } #endif inline OBJReader::OBJReader () : foundNormals(false), foundTexCrds(false) { /* ** Create 'fallback' material */ Material fb_mtl("_fallback_"); mtl_v.push_back(fb_mtl); /* ** Create 'default' object group */ Group fb_grp("_default_"); group_v.push_back(fb_grp); active_smggroup = 0; // 0 is the default ('no smoothing') group active_mtlgroup = 0; // Point to fallback } inline void OBJReader::read (std::string filename) { std::string::size_type n = filename.rfind('/'); if (n == std::string::npos) { objpath = "./"; } else { objpath = filename.substr(0, n); objpath += "/"; } parseFile(filename); } inline void OBJReader::toLowerCase (char* str) { while (*str != '\0') { if ((*str >= 'A') && (*str <= 'Z')) { *str = (char)((int)(*str) - (int)'A' + (int)'a'); } ++str; } } inline void OBJReader::parseFile (std::string filename) { #ifdef HAVE_LIBZ gzFile fin = gzopen (filename.c_str(), "rb"); if (0 == fin) { GRAVIS_THROW3(Exception, "Unable to open file: ", filename); return; } #else FILE* fin = fopen(filename.c_str(), "rt"); if (0 == fin) { GRAVIS_THROW3(Exception, "Unable to open file: ", filename); return; } #endif int linecount = 0; char line[OBJ_MAXLINELEN]; do { #ifdef HAVE_LIBZ if (Z_NULL == gzgets(fin, &line[0], OBJ_MAXLINELEN)) break; #else if (0 == fgets(&line[0], OBJ_MAXLINELEN, fin)) break; #endif int linelen = strlen(line); ++linecount; // Ignore comments and empty lines if ((line[0] == '#' ) || (line[0] == '\n')) continue; // Check for VERY long input lines, // TODO: this should be handled differently if (linelen == (OBJ_MAXLINELEN-1)) { std::cerr << "possible buffer overflow" << std::endl; continue; } // Tokenize line into argc,argv style int i=0; std::vector argv; while ((linelen > 0) && (i < linelen)) { while (isspace(line[i])) ++i; // skip leading spaces if (i < linelen) { argv.push_back(&line[i]); while (!isspace(line[i])) ++i; // read over sequence of non-spaces line[i++] = '\0'; // terminate each sequ. of non-spaces } } // Check on parse errors try { parseLine(argv); } catch (Exception& e) { std::cerr << "Parse error in '" << filename << "', line " << linecount << ": " << e.detail() << e.argument() << std::endl; } } while (true); #ifdef HAVE_LIBZ gzclose(fin); #else fclose(fin); #endif } inline void OBJReader::parseLine (std::vector& argv) { int argc = argv.size(); if (argc <= 0) return; // return on empty lines // Transform argv[0] (datatype) to lower-case and // derive integer key from result (max. first 4 letters) char* argv0=argv[0]; int tkey=0, tpos=0; toLowerCase(argv0); while (*argv0 != '\0') { if (tpos < 4) { tkey |= ((int)*argv0) << (24-8*tpos); ++tpos; } ++argv0; } switch (tkey) { case 0x76000000 : // tkey = "v", vertex coordinates { if (argc < 4) GRAVIS_THROW3(Exception, errUnexpectedArgs(), argv[0]); float x = my_atof(argv[1]); float y = my_atof(argv[2]); float z = my_atof(argv[3]); vertex_v.push_back(f3Vector(x, y, z)); if (argc == 7 || argc == 8) { float r = my_atof(argv[4])/255.0; float g = my_atof(argv[5])/255.0; float b = my_atof(argv[6])/255.0; float a = 0; if(argc == 8) a = my_atof(argv[7]); color_v.push_back(fRGBA(r, g, b,a)); } break; } case 0x766E0000 : // tkey = "vn", vertex normal { if (argc < 4) GRAVIS_THROW3(Exception, errUnexpectedArgs(), argv[0]); float x = my_atof(argv[1]); float y = my_atof(argv[2]); float z = my_atof(argv[3]); normal_v.push_back(f3Vector(x, y, z)); break; } case 0x76740000 : // tkey = "vt", texture coordinates { if (argc < 3) GRAVIS_THROW3(Exception, errUnexpectedArgs(), argv[0]); Texcoord uvw; if(argc < 4) uvw = Texcoord(my_atof(argv[1]), my_atof(argv[2]), 0.0); else uvw = Texcoord(atof(argv[1]), my_atof(argv[2]), my_atof(argv[3])); texcrd_v.push_back(uvw); break; } case 0x66000000 : // tkey = "(f)ace", polygon { Face face; face.smggroup = active_smggroup; face.mtlgroup = active_mtlgroup; // Number of vertices for this face(polygon) = argc - 1 // Parse each section separately for vertex/texture/normal for (int c=0; c sls; // pointers to the positions of the slashes in *cs Vertex vertex; vertex.vidx = atoi(cs); // Parse each section for the field separator "/" while (*cs != '\0') { if (*cs == '/') { sls.push_back(cs+1); *cs = '\0'; } ++cs; } switch (sls.size()) { case 0 : // no slashes: only vertex index defined { vertex.tidx = 0; vertex.nidx = 0; break; } case 1 : // one slash: vertex and texcrd index { foundTexCrds = true; vertex.tidx = atoi(sls[0]); vertex.nidx = 0; break; } case 2 : // two slashes: vertex, texcrd and normal index { foundNormals = true; if (sls[0] == (sls[1]-1)) // texcrd index ommited ?? { vertex.tidx = 0; vertex.nidx = atoi(sls[1]); } else { foundTexCrds = true; vertex.tidx = atoi(sls[0]); vertex.nidx = atoi(sls[1]); } break; } default : { GRAVIS_THROW3(Exception, "Unsupported face-format", argv[c+1]); } } // switch(number of slashes) // negative references must be remapped relative to current position if (vertex.vidx < 0) vertex.vidx += vertex_v.size() + 1; if (vertex.nidx < 0) vertex.nidx += normal_v.size() + 1; if (vertex.tidx < 0) vertex.tidx += texcrd_v.size() + 1; // OBJs first index is 1, C indexing start with 0 // non-set indices become -1, which is good! vertex.vidx -= 1; vertex.nidx -= 1; vertex.tidx -= 1; // Error(range) checking if ((vertex.vidx < 0) || (vertex.vidx >= int(vertex_v.size()))) GRAVIS_THROW3(Exception, "Vertex index out of range", argv[c+1]); if ((vertex.tidx <-1) || (vertex.tidx >= int(texcrd_v.size()))) GRAVIS_THROW3(Exception, "Texture index out of range", sls[0]); if ((vertex.nidx <-1) || (vertex.nidx >= int(normal_v.size()))) vertex.nidx = -1; face.corner.push_back(vertex); } //for(each corner) face_v.push_back(face); // Add face to each active group for (int gid=0; gid 0 && color_v.size() == vertex_v.size(); mesh.material.resize(mtl_v.size()); for (int i=0; i < (int)mtl_v.size(); ++i) { mesh.material[i] = mtl_v[i]; } /* ** Transfer triangle data ------------------------------------------------ */ // Compute number of triangles after breaking down non-triangular faces. int tri_faces = face_v.size(); for (int i=0; i<(int)face_v.size(); ++i) { tri_faces += face_v[i].corner.size() - 3; } mesh.tvi.resize(tri_faces); if (foundNormals) mesh.tni.resize(tri_faces); if (foundTexCrds) mesh.tti.resize(tri_faces); if (foundColor) mesh.tci.resize(tri_faces); mesh.tmi.resize(tri_faces); int t = 0; for (int i=0; i<(int)face_v.size(); ++i) { const Face& face = face_v[i]; for (int c=0; c<=((int)face.corner.size()-3); ++c) { mesh.tvi[t].c0 = face.corner[0 ].vidx; mesh.tvi[t].c1 = face.corner[1 + c].vidx; mesh.tvi[t].c2 = face.corner[2 + c].vidx; if (foundNormals) { mesh.tni[t].c0 = face.corner[0 ].nidx; mesh.tni[t].c1 = face.corner[1 + c].nidx; mesh.tni[t].c2 = face.corner[2 + c].nidx; } if (foundTexCrds) { mesh.tti[t].c0 = face.corner[0 ].tidx; mesh.tti[t].c1 = face.corner[1 + c].tidx; mesh.tti[t].c2 = face.corner[2 + c].tidx; } if (foundColor) { mesh.tci[t].c0 = face.corner[0 ].vidx; mesh.tci[t].c1 = face.corner[1 + c].vidx; mesh.tci[t].c2 = face.corner[2 + c].vidx; } mesh.tmi[t] = face.mtlgroup; ++t; } } /* ** Transfer vertex data -------------------------------------------------- */ mesh.vertex.resize(vertex_v.size()); mesh.normal.resize(normal_v.size()); mesh.texcrd.resize(texcrd_v.size()); memcpy( mesh.vertex.data(), &(vertex_v[0]), 3*sizeof(float)*vertex_v.size() ); memcpy( mesh.normal.data(), &(normal_v[0]), 3*sizeof(float)*normal_v.size() ); memcpy( mesh.texcrd.data(), &(texcrd_v[0]), 3*sizeof(float)*texcrd_v.size() ); if(foundColor) { mesh.color.resize(color_v.size()); memcpy(mesh.color.data(), &(color_v[0]), 4*sizeof(float)*color_v.size()); } } relion-3.1.3/src/jaz/gravis/private/.svn/text-base/OBJWriter.hxx.svn-base000066400000000000000000000131051411340063500261600ustar00rootroot00000000000000 #include #include #include #include #include #include "../tImage.h" #include "../Exception.h" #if defined(HAVE_LIBZ) # include #endif #ifdef WIN32 #define PATH_SEPARATOR "\\" #else #define PATH_SEPARATOR "/" #endif using namespace std; inline OBJWriter::OBJWriter() { } inline void OBJWriter::write(string filename, const gravis::Mesh& mesh) { // File::addSearchPath(objfile.pathName()); // File::addSearchPath("."); int pos = filename.rfind(PATH_SEPARATOR); std::string base = ""; string path = "."PATH_SEPARATOR; if(pos<=0) pos=-1; if(pos>0) path = filename.substr(0,pos+1); base = filename.substr(pos+1,filename.size()); #ifdef DEBUG cout << "Writing OBJ File: " << filename << endl; cout << "-- pathname: " << path << endl; cout << "-- basename: " << base << endl; #endif ofstream ofs( filename.c_str(), ios::out ); if(!ofs) GRAVIS_THROW3(Exception,"Can not open file for writing!",std::string("File: ")+filename); base.replace(base.find("obj"), 3, "mtl"); write(ofs, &mesh, base); writemtl(path, base, &mesh); } /* void OBJWriter::write( ofstream& ofs, const gVAMesh* mesh ) { // Write vertices for ( size_t i = 0; i < mesh->getNofVertices(); ++i ) { ofs << "v " << mesh->vertex_v[i].x << " " << mesh->vertex_v[i].y << " " << mesh->vertex_v[i].z << endl; } ofs << "# " << mesh->getNofVertices() << " vertices." << endl << endl; // Write faces // NOTE: the vertex indices in OBJ format start from 1 for ( size_t i = 0; i < mesh->getNofTriangles(); ++i ) { ofs << "f " << mesh->triang_v[i].index[0]+1 << " " << mesh->triang_v[i].index[1]+1 << " " << mesh->triang_v[i].index[2]+1 << endl; } ofs << "# " << mesh->getNofTriangles() << " triangles." << endl << endl; } */ inline void OBJWriter::write(ofstream& ofs, const gravis::Mesh* mesh, string mtlfn) { ofs << "mtllib " << mtlfn << endl; // Write vertices for (size_t i = 0; i < mesh->vertex.size(); ++i ) { ofs << "v " << mesh->vertex[i].x << " " << mesh->vertex[i].y << " " << mesh->vertex[i].z << endl; } ofs << "# " << mesh->vertex.size() << " vertices." << endl << endl; // Write texture coordinates for (size_t i = 0; i < mesh->texcrd.size(); ++i ) { ofs << "vt " << mesh->texcrd[i].x << " " << mesh->texcrd[i].y << " " << mesh->texcrd[i].z << endl; } ofs << "# " << mesh->texcrd.size() << " texture coordinates." << endl << endl; // Write texture normals for (size_t i = 0; i < mesh->normal.size(); ++i ) { ofs << "vn " << mesh->normal[i].x << " " << mesh->normal[i].y << " " << mesh->normal[i].z << " " << endl; } ofs << "# " << mesh->normal.size() << " normals." << endl << endl; // Write faces // NOTE: the vertex indices in OBJ format start from 1 int current_mtl = -1; for (size_t i = 0; i < mesh->tvi.size(); ++i ) { if(itmi.size()) { int mtl = mesh->tmi[i]; if (current_mtl != mtl) { ofs << "usemtl " << mesh->material[mtl].name << endl; current_mtl = mtl; } } ofs << "f "; for (size_t c=0 ; c < 3; ++c ) { ofs << (mesh->tvi[i][c])+1; if(itti.size()) { ofs << "/" << (mesh->tti[i][c])+1; } if(itni.size()) { if (i>=mesh->tti.size()) ofs << "/"; ofs << "/" << (mesh->tni[i][c])+1 ; } ofs << " "; } ofs << endl; } ofs << "# " << mesh->tvi.size() << " faces." << endl << endl; } inline void OBJWriter::writemtl( string path, string mtlfn, const Mesh* mesh ) { #ifdef DEBUG cout <<"Writing material file: "<< string(path).append(mtlfn).c_str() << endl; #endif ofstream ofs( string(path).append(mtlfn).c_str(), ios::out ); //int current_mtl = -1; vector written; for (size_t i = 0; i < mesh->tmi.size(); ++i ) { int mtl = mesh->tmi[i]; //if (current_mtl != mtl && current_mtl != 0) { if (find(written.begin(),written.end(),mtl) == written.end() ) { const Material* material = &(mesh->material[mtl]); ofs << "newmtl " << material->name << endl // << "d " << material->opacity << endl << "ns " << material->shininess << endl << "ka " << material->ambient.r << " " << material->ambient.g << " " << material->ambient.b << " " << endl << "kd " << material->diffuse.r << " " << material->diffuse.g << " " << material->diffuse.b << " " << endl << "ks " << material->specular.r << " " << material->specular.g << " " << material->specular.b << " " << endl; if ( material->hasTexture) { string dmap_file = material->textureName; int pos = dmap_file.rfind(PATH_SEPARATOR); if (pos <= 0) pos = -1; std::string base = dmap_file.substr(pos+1, dmap_file.size()); ofs << "map_kd " << base << endl; } if ( material->hasEnvMap) { string dmap_file = material->envMapName; int pos = dmap_file.rfind(PATH_SEPARATOR); if (pos <= 0) pos = -1; std::string base = dmap_file.substr(pos+1, dmap_file.size()); ofs << "map_refl " << base << endl; } if ( material->hasNormalMap) { string dmap_file = material->normalMapName; int pos = dmap_file.rfind(PATH_SEPARATOR); if (pos <= 0) pos = -1; std::string base = dmap_file.substr(pos+1, dmap_file.size()); ofs << "map_norm " << base << endl; } written.push_back( mtl ); std::cout << "material " << i << std::endl; } //current_mtl = mtl; //} } } relion-3.1.3/src/jaz/gravis/private/.svn/text-base/tDeterminants.h.svn-base000066400000000000000000000036001411340063500266110ustar00rootroot00000000000000/****************************************************************************** ** Title: tDeterminants.h ** Description: Templated functions for 2D, 3D and 4D determinants. ** ** Author: Michael Keller, 2005 ** Computer Science Department, University Basel (CH) ** ******************************************************************************/ #ifndef _TDETERMINANTS_H_ #define _TDETERMINANTS_H_ namespace gravis { template inline T det2x2(T a1, T a2, T b1, T b2) { return + a1 * b2 - b1 * a2; } template inline T det3x3(T a1, T a2, T a3, T b1, T b2, T b3, T c1, T c2, T c3) { return + a1 * det2x2(b2, b3, c2, c3) - b1 * det2x2(a2, a3, c2, c3) + c1 * det2x2(a2, a3, b2, b3); } template inline T det4x4(T a1, T a2, T a3, T a4, T b1, T b2, T b3, T b4, T c1, T c2, T c3, T c4, T d1, T d2, T d3, T d4) { return + a1 * det3x3(b2, b3, b4, c2, c3, c4, d2, d3, d4) - b1 * det3x3(c2, c3, c4, d2, d3, d4, a2, a3, a4) + c1 * det3x3(d2, d3, d4, a2, a3, a4, b2, b3, b4) - d1 * det3x3(a2, a3, a4, b2, b3, b4, c2, c3, c4) - a2 * det3x3(b3, b4, b1, c3, c4, c1, d3, d4, d1) + b2 * det3x3(c3, c4, c1, d3, d4, d1, a3, a4, a1) - c2 * det3x3(d3, d4, d1, a3, a4, a1, b3, b4, b1) + d2 * det3x3(a3, a4, a1, b3, b4, b1, c3, c4, c1) + a3 * det3x3(b4, b1, b2, c4, c1, c2, d4, d1, d2) - b3 * det3x3(c4, c1, c2, d4, d1, d2, a4, a1, a2) + c3 * det3x3(d4, d1, d2, a4, a1, a2, b4, b1, b2) - d3 * det3x3(a4, a1, a2, b4, b1, b2, c4, c1, c2) - a4 * det3x3(b1, b2, b3, c1, c2, c3, d1, d2, d3) + b4 * det3x3(c1, c2, c3, d1, d2, d3, a1, a2, a3) - c4 * det3x3(d1, d2, d3, a1, a2, a3, b1, b2, b3) + d4 * det3x3(a1, a2, a3, b1, b2, b3, c1, c2, c3); } } #endif relion-3.1.3/src/jaz/gravis/private/.svn/text-base/tImageConverter.hxx.svn-base000066400000000000000000000310231411340063500274460ustar00rootroot00000000000000/****************************************************************************** ** Title: tImageConverter.hxx ** Description: Traits and functions used to convert different pixel types. ** Required by reader/writer classes. INTERNAL USE ONLY ! ** ** Author: Jean-Sebastien Pierrard, 2005 ** Computer Science Department, University Basel (CH) ** ******************************************************************************/ namespace gravis { namespace priv { template struct IPT_Traits { }; template <> struct IPT_Traits { typedef unsigned char Value_t; typedef int Promote_t; typedef float RealPromote_t; static inline Value_t fullValue () { return 255; } static inline Value_t zeroValue () { return 0; } }; template <> struct IPT_Traits { typedef float Value_t; typedef float Promote_t; typedef float RealPromote_t; static inline Value_t fullValue () { return 1.0f; } static inline Value_t zeroValue () { return 0.0f; } }; template <> struct IPT_Traits { typedef double Value_t; typedef double Promote_t; typedef double RealPromote_t; static inline Value_t fullValue () { return 1.0; } static inline Value_t zeroValue () { return 0.0; } }; // Integral Pixel Type Traits, used to convert pixel components template struct IPC_Traits { }; #define _DeclareIPConversion(FROM_T, TO_T, CONVERSION) \ template<> struct IPC_Traits { \ static inline TO_T convert (FROM_T value) { \ return (TO_T)(CONVERSION); \ } \ } _DeclareIPConversion(unsigned char, unsigned char, value); _DeclareIPConversion(unsigned char, float, (1.0f/255.0f)*float(value)); _DeclareIPConversion(unsigned char, double, (1.0/255.0)*double(value)); _DeclareIPConversion(double, unsigned char, 255.0f*value); _DeclareIPConversion(double, float, value); _DeclareIPConversion(float, unsigned char, 255.0f*value); _DeclareIPConversion(float, double, value); _DeclareIPConversion(float, float, value); _DeclareIPConversion(double, double, value); template struct pixelTypeConverter_1 { static inline void f (const T& src, T& dst) { dst = src; } }; template struct pixelTypeConverter_2 { static inline void f (const S& src, D& dst) { dst = IPC_Traits::convert(src); }; }; template struct pixelTypeConverter_2, tRGBA > { static inline void f (const tRGBA& src, tRGBA& dst) { dst = tRGBA( IPC_Traits::convert(src.r), IPC_Traits::convert(src.g), IPC_Traits::convert(src.b), IPC_Traits::convert(src.a) ); } }; template struct pixelTypeConverter_2, tRGB_A > { static inline void f (const tRGB_A& src, tRGB_A& dst) { dst.set( IPC_Traits::convert(src.r), IPC_Traits::convert(src.g), IPC_Traits::convert(src.b), IPC_Traits::convert(src.a) ); } }; template struct pixelTypeConverter_2, tRGB_A > { static inline void f (const tRGBA& src, tRGB_A& dst) { dst.set( IPC_Traits::convert(src.r), IPC_Traits::convert(src.g), IPC_Traits::convert(src.b), IPC_Traits::convert(src.a) ); } }; template struct pixelTypeConverter_2, tRGBA > { static inline void f (const tGray_A& src, tRGBA& dst) { dst.set( IPC_Traits::convert(src.g), IPC_Traits::convert(src.g), IPC_Traits::convert(src.g), IPC_Traits::convert(src.a) ); } }; template struct pixelTypeConverter_2, tRGB > { static inline void f (const tGray_A& src, tRGB& dst) { dst.set( IPC_Traits::convert(src.g), IPC_Traits::convert(src.g), IPC_Traits::convert(src.g) ); } }; template struct pixelTypeConverter_2, tRGBA > { static inline void f (const tRGB_A& src, tRGBA& dst) { dst.set( IPC_Traits::convert(src.r), IPC_Traits::convert(src.g), IPC_Traits::convert(src.b), IPC_Traits::convert(src.a) ); } }; template struct pixelTypeConverter_2, tRGB > { static inline void f (const tRGB& src, tRGB& dst) { dst = tRGB( IPC_Traits::convert(src.r), IPC_Traits::convert(src.g), IPC_Traits::convert(src.b) ); } }; template struct pixelTypeConverter_2, tRGBA > { static inline void f (const tRGB& src, tRGBA& dst) { dst = tRGBA( IPC_Traits::convert(src.r), IPC_Traits::convert(src.g), IPC_Traits::convert(src.b), IPT_Traits::fullValue() ); } }; template struct pixelTypeConverter_2, tRGB_A > { static inline void f (const tRGB& src, tRGB_A& dst) { dst.set( IPC_Traits::convert(src.r), IPC_Traits::convert(src.g), IPC_Traits::convert(src.b), IPT_Traits::fullValue() ); } }; template struct pixelTypeConverter_2, tRGB > { static inline void f (const tRGBA& src, tRGB& dst) { dst = tRGB( IPC_Traits::convert(src.r), IPC_Traits::convert(src.g), IPC_Traits::convert(src.b) ); } }; template struct pixelTypeConverter_2, tRGB > { static inline void f (const tRGB_A& src, tRGB& dst) { dst.set( IPC_Traits::convert(src.r), IPC_Traits::convert(src.g), IPC_Traits::convert(src.b) ); } }; template struct pixelTypeConverter_2 > { static inline void f (const S& src, tRGBA& dst) { dst = tRGBA( IPC_Traits::convert(src), IPT_Traits::fullValue() ); } }; template struct pixelTypeConverter_2 > { static inline void f (const S& src, tRGB_A& dst) { dst.set( IPC_Traits::convert(src), IPT_Traits::fullValue() ); } }; template struct pixelTypeConverter_2 > { static inline void f (const S& src, tRGB& dst) { dst = tRGB( IPC_Traits::convert(src) ); } }; // Convert from tRGB of type S to grayvalue of type D template struct pixelTypeConverter_2, D> { static inline void f (const tRGB& src, D& dst) { dst = IPC_Traits::convert(src.grayValue()); } }; // Convert from tRGBA of type S to grayvalue of type D template struct pixelTypeConverter_2, D> { static inline void f (const tRGBA& src, D& dst) { dst = IPC_Traits::convert(src.grayValue()); } }; template struct pixelTypeConverter_2, D> { static inline void f (const tRGB_A& src, D& dst) { dst = IPC_Traits::convert(src.grayValue()); } }; template struct pixelTypeConverter_2, D> { static inline void f (const tGray_A& src, D& dst) { dst = IPC_Traits::convert(src.grayValue()); } }; // tBGR template struct pixelTypeConverter_2, tRGB > { static inline void f (const tBGR& src, tRGB& dst) { dst = tRGB( IPC_Traits::convert(src.b), IPC_Traits::convert(src.g), IPC_Traits::convert(src.r) ); } }; template struct pixelTypeConverter_2, tBGR > { static inline void f (const tRGB& src, tBGR& dst) { dst = tBGR( IPC_Traits::convert(src.b), IPC_Traits::convert(src.g), IPC_Traits::convert(src.r) ); } }; template struct pixelTypeConverter_2 > { static inline void f (const S& src, tBGR& dst) { dst = tBGR( IPC_Traits::convert(src) ); } }; template struct pixelTypeConverter_2, tBGR > { static inline void f (const tRGBA& src, tBGR& dst) { dst = tBGR( IPC_Traits::convert(src.b), IPC_Traits::convert(src.g), IPC_Traits::convert(src.r) ); } }; /* template struct pixelTypeConverter_2, tBGR > { static inline void f (const tRGB_A& src, tBGR& dst) { dst.set( IPC_Traits::convert(src.b), IPC_Traits::convert(src.g), IPC_Traits::convert(src.r) ); } }; */ template struct pixelTypeConverter_2, D > { static inline void f (const tBGR& src, D& dst) { dst = IPC_Traits::convert(src.b); } }; template struct pixelTypeConverter_2, tRGBA > { static inline void f (const tBGR& src, tRGBA& dst) { dst = tRGBA( IPC_Traits::convert(src.b), IPC_Traits::convert(src.g), IPC_Traits::convert(src.r), IPT_Traits::fullValue() ); } }; /* template struct pixelTypeConverter_2, tRGB_A > { static inline void f (const tBGR& src, tRGB_A& dst) { dst.set( IPC_Traits::convert(src.b), IPC_Traits::convert(src.g), IPC_Traits::convert(src.r), IPT_Traits::fullValue() ); } }; */ template inline void pixelTypeConverter (const S& src, D& dst) { pixelTypeConverter_2::f(src, dst); } template inline void pixelTypeConverter (const T& src, T& dst) { pixelTypeConverter_1::f(src, dst); } template class tImageConverter { public: DTYPE operator() (const STYPE& src) const { DTYPE dst(src.cols(), src.rows()); typename STYPE::iterator src_it = src.begin(); typename DTYPE::iterator dst_it = dst.begin(); for (; dst_it != dst.end(); ++src_it, ++dst_it) { pixelTypeConverter(*src_it, *dst_it); } return dst; } void convert(const STYPE& src, DTYPE& dst) const { dst.resize(src.cols(), src.rows()); typename STYPE::iterator src_it = src.begin(); typename DTYPE::iterator dst_it = dst.begin(); for (; dst_it!=dst.end(); ++src_it, ++dst_it) { pixelTypeConverter(*src_it, *dst_it); } } }; } /* Close Namespace "priv" */ } /* Close Namespace "gravis" */ relion-3.1.3/src/jaz/gravis/private/.svn/text-base/tImageConvolution.hxx.svn-base000066400000000000000000000007511411340063500300220ustar00rootroot00000000000000/****************************************************************************** ** Title: tImageConvolution.hxx ** Description: Image convolution with different border handling methods. ** ** Author: Jean-Sebastien Pierrard, 2005 ** Brian Schroeder, 2006 ** Computer Science Department, University Basel (CH) ** ******************************************************************************/ namespace gravis { } /* Close namespace "gravis" */ relion-3.1.3/src/jaz/gravis/private/.svn/text-base/tImageIO.hxx.svn-base000066400000000000000000000032031411340063500260050ustar00rootroot00000000000000/****************************************************************************** ** Title: tImageIO.hxx ** Description: Traits for tImage class. ** Required by reader/writer classes. INTERNAL USE ONLY ! ** ** Author: Jean-Sebastien Pierrard, 2005 ** Computer Science Department, University Basel (CH) ** ******************************************************************************/ namespace gravis { namespace priv { template struct tImage_Traits { }; template struct tImage_Traits > { typedef float Pixel_t; typedef float CComp_t; static inline int nofComponents () { return 1; } }; template struct tImage_Traits > > { typedef tRGB Pixel_t; typedef T CComp_t; static inline int nofComponents () { return 3; } }; template struct tImage_Traits > > { typedef tRGBA Pixel_t; typedef T CComp_t; static inline int nofComponents () { return 4; } }; template struct tImage_Traits > > { typedef tRGB_A Pixel_t; typedef T CComp_t; static inline int nofComponents () { return 4; } }; /* template struct tImage_Traits > > { typedef tGray_A Pixel_t; typedef T CComp_t; static inline int nofComponents () { return 2; } }; */ } /* Close namespace "priv" */ } /* Close namespace "gravis" */ relion-3.1.3/src/jaz/gravis/private/.svn/text-base/tImageIO_JPG.hxx.svn-base000066400000000000000000000355601411340063500265200ustar00rootroot00000000000000/****************************************************************************** ** Title: tImageIO_JPG.hxx ** Description: Implements reader/writer for JPG image format. ** ** Author: Jean-Sebastien Pierrard, 2005 ** Computer Science Department, University Basel (CH) ** ******************************************************************************/ #ifndef __GRAVIS__TIMAGE_IO_JPG__ #define __GRAVIS__TIMAGE_IO_JPG__ #include #include #include #include #include namespace gravis { namespace priv { template class JPGImageReader { struct gravis_jpg_error_mgr { struct jpeg_error_mgr pub; /* "public" fields */ jmp_buf setjmp_buffer; /* for return to caller */ }; /* * Here's the routine that will replace the standard error_exit method: */ static void gravis_jpg_error_quiet (j_common_ptr cinfo) { gravis_jpg_error_mgr* myerr = (gravis_jpg_error_mgr*) cinfo->err; /* Return control to the setjmp point */ longjmp(myerr->setjmp_buffer, 1); } /* * Here's the routine that will replace the standard error_exit method: */ static void gravis_jpg_error_exit (j_common_ptr cinfo) { gravis_jpg_error_mgr* myerr = (gravis_jpg_error_mgr*) cinfo->err; /* Always display the message. */ /* We could postpone this until after returning, if we chose. */ (*cinfo->err->output_message) (cinfo); /* Return control to the setjmp point */ longjmp(myerr->setjmp_buffer, 1); } public: JPGImageReader () {}; ~JPGImageReader () {}; void read (tImage&, const std::string&); static bool canHandle (const std::string& filename); }; template class JPGImageWriter { struct gravis_jpg_error_mgr { struct jpeg_error_mgr pub; /* "public" fields */ jmp_buf setjmp_buffer; /* for return to caller */ }; /* * Here's the routine that will replace the standard error_exit method: */ static void gravis_jpg_error_quiet (j_common_ptr cinfo) { gravis_jpg_error_mgr* myerr = (gravis_jpg_error_mgr*) cinfo->err; /* Return control to the setjmp point */ longjmp(myerr->setjmp_buffer, 1); } /* * Here's the routine that will replace the standard error_exit method: */ static void gravis_jpg_error_exit (j_common_ptr cinfo) { gravis_jpg_error_mgr* myerr = (gravis_jpg_error_mgr*) cinfo->err; /* Always display the message. */ /* We could postpone this until after returning, if we chose. */ (*cinfo->err->output_message) (cinfo); /* Return control to the setjmp point */ longjmp(myerr->setjmp_buffer, 1); } public: JPGImageWriter () {}; ~JPGImageWriter () {}; void write (const tImage& image, const std::string& filename, int quality); }; template inline bool JPGImageReader::canHandle (const std::string& filename) { /* This struct contains the JPEG decompression parameters and pointers to * working space (which is allocated as needed by the JPEG library). */ struct jpeg_decompress_struct cinfo; FILE* infile; /* source file */ if ((infile = fopen(filename.c_str(), "rb")) == NULL) { GRAVIS_THROW3(gravis::Exception, "Could not open file", filename); return 0; } /* We set up the normal JPEG error routines, then override error_exit. */ struct gravis_jpg_error_mgr jerr; cinfo.err = jpeg_std_error(&jerr.pub); jerr.pub.error_exit = gravis_jpg_error_quiet; /* Establish the setjmp return context for gravis_jpg_error_exit to use. */ if (setjmp(jerr.setjmp_buffer)) { /* If we get here, the JPEG code has signaled an error. * We need to clean up the JPEG object, close the input file, and return. */ jpeg_destroy_decompress(&cinfo); fclose(infile); return false; } jpeg_create_decompress(&cinfo); jpeg_stdio_src(&cinfo, infile); (void) jpeg_read_header(&cinfo, TRUE); return true; } template inline void JPGImageReader::read (tImage& image, const std::string& filename) { FILE* infile = fopen(filename.c_str(), "rb"); if (infile == NULL) { GRAVIS_THROW3(Exception, "Unable to open file: ", filename); } /* This struct contains the JPEG decompression parameters and pointers to * working space (which is allocated as needed by the JPEG library). */ struct jpeg_decompress_struct cinfo; /* We use our private extension JPEG error handler. * Note that this struct must live as long as the main JPEG parameter * struct, to avoid dangling-pointer problems. */ struct gravis_jpg_error_mgr jerr; /* More stuff */ JSAMPARRAY buffer; /* Output row buffer */ int row_stride; /* physical row width in output buffer */ /* Step 1: allocate and initialize JPEG decompression object */ /* We set up the normal JPEG error routines, then override error_exit. */ cinfo.err = jpeg_std_error(&jerr.pub); jerr.pub.error_exit = gravis_jpg_error_exit; /* Establish the setjmp return context for my_error_exit to use. */ if (setjmp(jerr.setjmp_buffer)) { /* If we get here, the JPEG code has signaled an error. * We need to clean up the JPEG object, close the input file, and return. */ jpeg_destroy_decompress(&cinfo); fclose(infile); GRAVIS_THROW3(gravis::Exception, "Could not read jpeg file", filename); return; } /* Now we can initialize the JPEG decompression object. */ jpeg_create_decompress(&cinfo); /* Step 2: specify data source (eg, a file) */ jpeg_stdio_src(&cinfo, infile); /* Step 3: read file parameters with jpeg_read_header() */ (void) jpeg_read_header(&cinfo, TRUE); /* We can ignore the return value from jpeg_read_header since * (a) suspension is not possible with the stdio data source, and * (b) we passed TRUE to reject a tables-only JPEG file as an error. * See libjpeg.doc for more info. */ /* Step 4: set parameters for decompression */ /* In this example, we don't need to change any of the defaults set by * jpeg_read_header(), so we do nothing here. */ /* Step 5: Start decompressor */ (void) jpeg_start_decompress(&cinfo); /* We can ignore the return value since suspension is not possible * with the stdio data source. */ /* We may need to do some setup of our own at this point before reading * the data. After jpeg_start_decompress() we have the correct scaled * output image dimensions available, as well as the output colormap * if we asked for color quantization. * In this example, we need to make an output work buffer of the right size. */ image.setSize(cinfo.output_width, cinfo.output_height); /* JSAMPLEs per row in output buffer */ row_stride = cinfo.output_width * cinfo.output_components; /* Make a one-row-high sample array that will go away when done with image */ buffer = (*cinfo.mem->alloc_sarray) ((j_common_ptr) &cinfo, JPOOL_IMAGE, row_stride, 1); /* Step 6: while (scan lines remain to be read) */ /* jpeg_read_scanlines(...); */ /* Here we use the library's state variable cinfo.output_scanline as the * loop counter, so that we don't have to keep track ourselves. */ while (cinfo.output_scanline < cinfo.output_height) { /* jpeg_read_scanlines expects an array of pointers to scanlines. * Here the array is only one element long, but you could ask for * more than one scanline at a time if that's more convenient. */ (void) jpeg_read_scanlines(&cinfo, buffer, 1); /* Assume put_scanline_someplace wants a pointer and sample count. */ std::cout << cinfo.output_scanline << std::endl; for (size_t x=0; x sample( buffer[0][3*x], buffer[0][3*x+1], buffer[0][3*x+2]); T& pixel = image(x,cinfo.output_scanline-1); pixelTypeConverter(sample, pixel); } } /* Step 7: Finish decompression */ (void) jpeg_finish_decompress(&cinfo); /* We can ignore the return value since suspension is not possible * with the stdio data source. */ /* Step 8: Release JPEG decompression object */ /* This is an important step since it will release a good deal of memory. */ jpeg_destroy_decompress(&cinfo); /* After finish_decompress, we can close the input file. * Here we postpone it until after no more JPEG errors are possible, * so as to simplify the setjmp error logic above. (Actually, I don't * think that jpeg_destroy can do an error exit, but why assume anything...) */ fclose(infile); /* At this point you may want to check to see whether any corrupt-data * warnings occurred (test whether jerr.pub.num_warnings is nonzero). */ if (jerr.pub.num_warnings != 0) GRAVIS_THROW3(gravis::Exception, "Corrupted data in jpeg file", filename); image.setName(std::string(filename)); } template inline void JPGImageWriter::write (const tImage& image, const std::string& filename, int quality=100) { tImageConverter, tImage< tRGB > > convert; gravis::tImage< tRGB< unsigned char > > rgbimage = convert(image); FILE* outfile = fopen(filename.c_str(), "wb"); if (outfile == 0) { GRAVIS_THROW3(Exception, "Unable to open/create image file: ", filename); } if (image.cols() == 0 || image.rows()==0) GRAVIS_THROW3(Exception, "Can not write an empty image", filename); /* This struct contains the JPEG compression parameters and pointers to * working space (which is allocated as needed by the JPEG library). * It is possible to have several such structures, representing multiple * compression/decompression processes, in existence at once. We refer * to any one struct (and its associated working data) as a "JPEG object". */ struct jpeg_compress_struct cinfo; /* This struct represents a JPEG error handler. It is declared separately * because applications often want to supply a specialized error handler * (see the second half of this file for an example). But here we just * take the easy way out and use the standard error handler, which will * print a message on stderr and call exit() if compression fails. * Note that this struct must live as long as the main JPEG parameter * struct, to avoid dangling-pointer problems. */ struct gravis_jpg_error_mgr jerr; cinfo.err = jpeg_std_error(&jerr.pub); jerr.pub.error_exit = gravis_jpg_error_exit; /* Establish the setjmp return context for my_error_exit to use. */ if (setjmp(jerr.setjmp_buffer)) { /* If we get here, the JPEG code has signaled an error. * We need to clean up the JPEG object, close the input file, and return. */ jpeg_destroy_compress(&cinfo); fclose(outfile); return; } /* More stuff */ JSAMPROW row_pointer[1]; /* pointer to JSAMPLE row[s] */ int row_stride; /* physical row width in image buffer */ /* Step 1: allocate and initialize JPEG compression object */ /* We have to set up the error handler first, in case the initialization * step fails. (Unlikely, but it could happen if you are out of memory.) * This routine fills in the contents of struct jerr, and returns jerr's * address which we place into the link field in cinfo. */ cinfo.err = jpeg_std_error(&jerr.pub); /* Now we can initialize the JPEG compression object. */ jpeg_create_compress(&cinfo); /* Step 2: specify data destination (eg, a file) */ jpeg_stdio_dest(&cinfo, outfile); /* Step 3: set parameters for compression */ /* First we supply a description of the input image. * Four fields of the cinfo struct must be filled in: */ cinfo.image_width = image.cols(); /* image width and height, in pixels */ cinfo.image_height = image.rows(); cinfo.input_components = 3; /* # of color components per pixel */ cinfo.in_color_space = JCS_RGB; /* colorspace of input image */ /* Now use the library's routine to set default compression parameters. * (You must set at least cinfo.in_color_space before calling this, * since the defaults depend on the source color space.) */ jpeg_set_defaults(&cinfo); /* Now you can set any non-default parameters you wish to. * Here we just illustrate the use of quality (quantization table) scaling: */ jpeg_set_quality(&cinfo, quality, TRUE /* limit to baseline-JPEG values */); /* Step 4: Start compressor */ /* TRUE ensures that we will write a complete interchange-JPEG file. * Pass TRUE unless you are very sure of what you're doing. */ jpeg_start_compress(&cinfo, TRUE); /* Step 5: while (scan lines remain to be written) */ /* jpeg_write_scanlines(...); */ /* Here we use the library's state variable cinfo.next_scanline as the * loop counter, so that we don't have to keep track ourselves. * To keep things simple, we pass one scanline per call; you can pass * more if you wish, though. */ row_stride = image.cols() * 3; /* JSAMPLEs per row in image_buffer */ while (cinfo.next_scanline < cinfo.image_height) { /* jpeg_write_scanlines expects an array of pointers to scanlines. * Here the array is only one element long, but you could pass * more than one scanline at a time if that's more convenient. */ row_pointer[0] = &rgbimage(0,cinfo.next_scanline)[0]; (void) jpeg_write_scanlines(&cinfo, row_pointer, 1); } /* Step 6: Finish compression */ jpeg_finish_compress(&cinfo); /* After finish_compress, we can close the output file. */ fclose(outfile); /* Step 7: release JPEG compression object */ /* This is an important step since it will release a good deal of memory. */ jpeg_destroy_compress(&cinfo); } } /* Close Namespace "priv" */ } /* Close Namespace "gravis" */ #endif relion-3.1.3/src/jaz/gravis/private/.svn/text-base/tImageIO_PNG.hxx.svn-base000066400000000000000000000177761411340063500265350ustar00rootroot00000000000000/****************************************************************************** ** Title: tImageIO_PNG.hxx ** Description: Implements reader/writer for PNG image format. ** ** Author: Jean-Sebastien Pierrard, 2005 ** Computer Science Department, University Basel (CH) ** ******************************************************************************/ #include #include #include namespace gravis { namespace priv { template class PNGImageReader { public: PNGImageReader (); ~PNGImageReader (); void read (tImage&, const char*); static bool canHandle (const char*); }; template class PNGImageWriter { public: PNGImageWriter (); ~PNGImageWriter (); void write (const tImage& image, const char* filename); }; template inline PNGImageReader::PNGImageReader () { } template inline PNGImageReader::~PNGImageReader () { } template inline bool PNGImageReader::canHandle (const char* header) { bool is_png = !png_sig_cmp((png_byte*)header, 0, 4); return is_png; } template inline void PNGImageReader::read (tImage& image, const char* filename) { FILE* pngfilep = fopen(filename, "rb"); if (pngfilep == 0) { GRAVIS_THROW3(Exception, "Unable to open file: ", filename); } png_structp png_ptr = png_create_read_struct(PNG_LIBPNG_VER_STRING, this, 0, 0); if (png_ptr == 0) { fclose(pngfilep); GRAVIS_THROW3(Exception, "PNG Read Error: ", filename); } png_infop info_ptr = png_create_info_struct(png_ptr); if (info_ptr == 0) { fclose(pngfilep); png_destroy_read_struct(&png_ptr, NULL, NULL); GRAVIS_THROW3(Exception, "PNG Read Error: ", filename); } png_init_io(png_ptr, pngfilep); png_read_info(png_ptr, info_ptr); png_uint_32 width, height; int depth, color_type, interlace_type; png_get_IHDR( png_ptr, info_ptr, &width, &height, &depth, &color_type, &interlace_type, NULL, NULL ); // Reduce 16 bit images to 8 bit png_set_strip_16(png_ptr); // Expand paletted images to RGB if (color_type == PNG_COLOR_TYPE_PALETTE) png_set_palette_to_rgb(png_ptr); // Expand grayscale images to the full 8 bits from 1, 2, or 4 bits/pixel if ((color_type == PNG_COLOR_TYPE_GRAY) && (depth < 8)) png_set_expand_gray_1_2_4_to_8(png_ptr); // Expang grayscale image to RGB(A), // This should definitely be optimzed !! if (color_type == PNG_COLOR_TYPE_GRAY) { png_set_gray_to_rgb(png_ptr); } // /* ??? // Add filler (or alpha) byte (before/after each RGB triplet) png_set_filler(png_ptr, 0xff, PNG_FILLER_AFTER); // ??? */ // png_set_interlace_handling(png_ptr); png_read_update_info(png_ptr, info_ptr); // ??? // std::cout << "Image: " << width << " x " << height << "(" << depth << "bpp)\n"; // std::cout << "Bytes/Row: " << png_get_rowbytes(png_ptr, info_ptr) << "\n"; png_bytep* row_pointers = new png_bytep[height]; for (unsigned int row=0; row pixel(src_data[0], src_data[1], src_data[2], src_data[3]); pixelTypeConverter(pixel, *tgt_data); tgt_data++; src_data += 4; } } // Free PNG-image for (unsigned int row=0; row inline PNGImageWriter::PNGImageWriter () { } template inline PNGImageWriter::~PNGImageWriter () { } template inline void PNGImageWriter::write (const tImage& image, const char* filename) { FILE* pngfilep = fopen(filename, "wb"); if (pngfilep == 0) { GRAVIS_THROW3(Exception, "Unable to open/create image file: ", filename); } png_structp png_ptr = png_create_write_struct(PNG_LIBPNG_VER_STRING, this, 0, 0); if (png_ptr == 0) { fclose(pngfilep); GRAVIS_THROW3(Exception, "PNG(internal) write error: ", filename); } png_infop info_ptr = png_create_info_struct(png_ptr); if (info_ptr == 0) { fclose(pngfilep); png_destroy_write_struct(&png_ptr, NULL); GRAVIS_THROW3(Exception, "PNG(internal) write error: ", filename); } png_init_io(png_ptr, pngfilep); int color_type; int nof_cc = tImage_Traits >::nofComponents(); switch (nof_cc) { case 1 : color_type = PNG_COLOR_TYPE_GRAY; break; case 3 : color_type = PNG_COLOR_TYPE_RGB; break; case 4 : color_type = PNG_COLOR_TYPE_RGB_ALPHA; break; default : { std::cerr << "Unhandled number of color components \n" ; exit(1); } } if (image.cols() == 0 || image.rows()==0) GRAVIS_THROW3(Exception, "Can not write an empty image", filename); png_set_IHDR( png_ptr, info_ptr, image.cols(), image.rows(), 8/*depth*/, color_type, PNG_INTERLACE_NONE, PNG_COMPRESSION_TYPE_BASE, PNG_FILTER_TYPE_BASE ); png_write_info(png_ptr, info_ptr); switch (nof_cc) { case 1 : { tImageConverter, tImage > convert; tImage out(image.cols(), image.rows()); out = convert(image); png_bytep* row_pointers = new png_bytep[image.rows()]; for (unsigned int i=0; i, tImage > convert; tImage out(image.cols(), image.rows()); out = convert(image); png_bytep* row_pointers = new png_bytep[image.rows()]; for (unsigned int i=0; i, tImage > convert; tImage out(image.cols(), image.rows()); out = convert(image); png_bytep* row_pointers = new png_bytep[image.rows()]; for (unsigned int i=0; i #include namespace gravis { namespace priv { template class PNMImageReader { public: PNMImageReader (); ~PNMImageReader (); void read (tImage& image, const char* filename); static bool canHandle (const char* header); protected: bool garbage; bool isWS (int) const; int getInt (std::istream&); }; template class PNMImageWriter { public: PNMImageWriter (); ~PNMImageWriter (); void write (const tImage& image, const char* filename); public: bool raw, fullcolor; }; template inline PNMImageReader::PNMImageReader () { } template inline PNMImageReader::~PNMImageReader () { } template inline bool PNMImageReader::isWS (int c) const { return ((c == ' ') || (c == '\r') || (c == '\t') || (c == '\n')); } template int PNMImageReader::getInt (std::istream& is) { int c, i = 0; c = is.get(); while (1) { // Skip comments (lines starting with #) if (c == '#') { while (1) { c = is.get(); if ((c == '\n') || (c == EOF)) break; } } if (c == EOF) return EOF; if ((c >= '0') && (c <= '9')) break; if (!isWS(c)) garbage = true; c = is.get(); } while (1) { i = (i * 10) + (c - '0'); c = is.get(); if (c == EOF) return i; if ((c < '0') || (c > '9')) break; } return i; } // Identify PNM images by file content (header) // Search for "P3\n"(ascii) or "P6\n"(binary) RGB // "P2\n"(ascii) or "P5\n"(binary) GRAYSCALE template inline bool PNMImageReader::canHandle (const char* header) { if ( (header[0] == 'P') && ((header[1] == '3') || (header[1] == '6') || (header[1] == '2') || (header[1] == '5')) && (header[2] == (char)0x0A) ) return true; else return false; } template inline void PNMImageReader::read (tImage& image, const char* filename) { int wd, ht, colors, c, counter = 0; bool grayscale = true, ascii = true; std::ifstream is(filename, std::ios::in | std::ios::binary); if (!is.good()) { GRAVIS_THROW3(Exception, "Unable to open file", filename); } garbage = false; c = is.get(); // read 'P' c = is.get(); // read format identifier if (c == '2') { grayscale = true; ascii = true; } if (c == '3') { grayscale = false; ascii = true; } if (c == '5') { grayscale = true; ascii = false; } if (c == '6') { grayscale = false; ascii = false; } wd = getInt(is); ht = getInt(is); colors = getInt(is); if (garbage) { GRAVIS_THROW3(Exception, "Corrupt image header", filename); } if ((wd <= 0) || (ht <= 0)) { GRAVIS_THROW3(Exception, "Illegal image dimensions", filename); } if (colors > 255) { GRAVIS_THROW3(Exception, "Unsupported colorvalues", filename); } image.setSize(wd, ht); image.setName(std::string(filename)); if (grayscale) { T* data = image.begin(); if (ascii) { for (; data!=image.end(); ++data) { c = getInt(is); if (c >= 0) { unsigned char value = (unsigned char)c; pixelTypeConverter(value, *data); ++counter; } else break; } } else // not ascii { unsigned char* bbuf = new unsigned char[image.size()]; unsigned char* bptr = bbuf; is.read((char*)bbuf, image.size()); counter += is.gcount(); for (; data!=image.end(); ++data, ++bptr) { pixelTypeConverter(*bptr, *data); } delete[] bbuf; } } else // not grayscale { T* data = image.begin(); if (ascii) { for (; data!=image.end(); ++data) { bRGB pnm_pixel; c = getInt(is); if (c >= 0) pnm_pixel.r = (unsigned char)c; else break; c = getInt(is); if (c >= 0) pnm_pixel.g = (unsigned char)c; else break; c = getInt(is); if (c >= 0) pnm_pixel.b = (unsigned char)c; else break; pixelTypeConverter(pnm_pixel, *data); ++counter; } } else // not grayscale, not ascii { unsigned char* bbuf = new unsigned char[3*image.cols()]; for (unsigned int y=0; y inline PNMImageWriter::PNMImageWriter () { raw = true; fullcolor = true; } template inline PNMImageWriter::~PNMImageWriter () { } template inline void PNMImageWriter::write (const tImage& image, const char* filename) { std::ofstream os(filename, std::ios::out | std::ios::binary); if (!os.good()) { GRAVIS_THROW3(Exception, "Unable to open/create file", filename); } T* data = image.begin(); if (fullcolor == false) // Write GRAY values { if (raw == false) // Write 'ascii' format { os << "P2\n" << image.cols() << " " << image.rows() << "\n255\n"; int cols_per_line = 0; for (; data!=image.end(); ++data) { if (cols_per_line > 8) { cols_per_line = 0; os << "\n"; } unsigned char value = 0; pixelTypeConverter(*data, value); os << int(value) << " "; ++cols_per_line; } } else // Write 'raw' format { os << "P5\n" << image.cols() << " " << image.rows() << "\n255\n"; unsigned char* bbuf = new unsigned char[image.cols()]; for (int y=0; y 8) { cols_per_line = 0; os << "\n"; } tRGB value; pixelTypeConverter(*data, value); os << int(value.r) << " " << int(value.g) << " " << int(value.b) << " "; ++cols_per_line; } } else // Write 'raw' format { os << "P6\n" << image.cols() << " " << image.rows() << "\n255\n"; tRGB* bbuf = new tRGB[image.cols()]; for (int y=0; y* bptr = bbuf; // Convert one h-line of pixels into tmp-buffer for (int i=0; i namespace gravis { namespace priv { template class tRefCPtr { public: enum allocType { ALLOC_OBJECT, ALLOC_ARRAY }; explicit tRefCPtr (T* tptr=0, allocType alloct=ALLOC_OBJECT, unsigned int c=1) : refc_ptr(0) { if (tptr != 0) refc_ptr = new RefCounter(tptr, alloct, c); } tRefCPtr (const tRefCPtr& rcp) { _acquireCounter(rcp.refc_ptr); } ~tRefCPtr () { _releaseCounter(); } tRefCPtr& operator= (const tRefCPtr& rcp) { if (this != &rcp) { _releaseCounter(); _acquireCounter(rcp.refc_ptr); } return *this; } T& operator* () const { return *(refc_ptr->tptr); } T* operator-> () const { return refc_ptr->tptr; } bool isNull() const; protected: struct RefCounter { RefCounter (T* ptr=0, allocType _alloct=ALLOC_OBJECT, unsigned int c=1) : tptr(ptr), alloct(_alloct), counter(c) {} private: // Reference Counters should not be copied RefCounter(const RefCounter& o) : tptr(o.tptr), alloct(o.alloct), counter(o.counter) {} // Reference Counters should not be copied RefCounter& operator=(const RefCounter& o) { tptr=o.tptr; alloct=o.alloct; counter=o.counter; } public: ~RefCounter () { assert(counter == 0); if (counter == 0) { if (alloct == ALLOC_OBJECT) delete tptr; else delete[] tptr; tptr = 0; } } unsigned int addRef () { return ++counter; } unsigned int freeRef () { return --counter; } unsigned int getRefCounts () const { return counter; } T* tptr; allocType alloct; unsigned int counter; }* refc_ptr; void _acquireCounter (RefCounter* rc) { refc_ptr = rc; if (rc != 0) rc->addRef(); } void _releaseCounter () { if (refc_ptr != 0) { if (refc_ptr->freeRef() == 0) { delete refc_ptr; refc_ptr = 0; } } } }; template inline bool tRefCPtr::isNull() const { return refc_ptr == 0; } } /* Close namespace "priv" */ } /* Close namespace "gravis" */ #endif /* _TREFCPTR_H_ */ relion-3.1.3/src/jaz/gravis/private/.svn/tmp/000077500000000000000000000000001411340063500210075ustar00rootroot00000000000000relion-3.1.3/src/jaz/gravis/private/.svn/tmp/OBJWriter.hxx.tmp000066400000000000000000000131051411340063500241460ustar00rootroot00000000000000 #include #include #include #include #include #include "../tImage.h" #include "../Exception.h" #if defined(HAVE_LIBZ) # include #endif #ifdef WIN32 #define PATH_SEPARATOR "\\" #else #define PATH_SEPARATOR "/" #endif using namespace std; inline OBJWriter::OBJWriter() { } inline void OBJWriter::write(string filename, const gravis::Mesh& mesh) { // File::addSearchPath(objfile.pathName()); // File::addSearchPath("."); int pos = filename.rfind(PATH_SEPARATOR); std::string base = ""; string path = "."PATH_SEPARATOR; if(pos<=0) pos=-1; if(pos>0) path = filename.substr(0,pos+1); base = filename.substr(pos+1,filename.size()); #ifdef DEBUG cout << "Writing OBJ File: " << filename << endl; cout << "-- pathname: " << path << endl; cout << "-- basename: " << base << endl; #endif ofstream ofs( filename.c_str(), ios::out ); if(!ofs) GRAVIS_THROW3(Exception,"Can not open file for writing!",std::string("File: ")+filename); base.replace(base.find("obj"), 3, "mtl"); write(ofs, &mesh, base); writemtl(path, base, &mesh); } /* void OBJWriter::write( ofstream& ofs, const gVAMesh* mesh ) { // Write vertices for ( size_t i = 0; i < mesh->getNofVertices(); ++i ) { ofs << "v " << mesh->vertex_v[i].x << " " << mesh->vertex_v[i].y << " " << mesh->vertex_v[i].z << endl; } ofs << "# " << mesh->getNofVertices() << " vertices." << endl << endl; // Write faces // NOTE: the vertex indices in OBJ format start from 1 for ( size_t i = 0; i < mesh->getNofTriangles(); ++i ) { ofs << "f " << mesh->triang_v[i].index[0]+1 << " " << mesh->triang_v[i].index[1]+1 << " " << mesh->triang_v[i].index[2]+1 << endl; } ofs << "# " << mesh->getNofTriangles() << " triangles." << endl << endl; } */ inline void OBJWriter::write(ofstream& ofs, const gravis::Mesh* mesh, string mtlfn) { ofs << "mtllib " << mtlfn << endl; // Write vertices for (size_t i = 0; i < mesh->vertex.size(); ++i ) { ofs << "v " << mesh->vertex[i].x << " " << mesh->vertex[i].y << " " << mesh->vertex[i].z << endl; } ofs << "# " << mesh->vertex.size() << " vertices." << endl << endl; // Write texture coordinates for (size_t i = 0; i < mesh->texcrd.size(); ++i ) { ofs << "vt " << mesh->texcrd[i].x << " " << mesh->texcrd[i].y << " " << mesh->texcrd[i].z << endl; } ofs << "# " << mesh->texcrd.size() << " texture coordinates." << endl << endl; // Write texture normals for (size_t i = 0; i < mesh->normal.size(); ++i ) { ofs << "vn " << mesh->normal[i].x << " " << mesh->normal[i].y << " " << mesh->normal[i].z << " " << endl; } ofs << "# " << mesh->normal.size() << " normals." << endl << endl; // Write faces // NOTE: the vertex indices in OBJ format start from 1 int current_mtl = -1; for (size_t i = 0; i < mesh->tvi.size(); ++i ) { if(itmi.size()) { int mtl = mesh->tmi[i]; if (current_mtl != mtl) { ofs << "usemtl " << mesh->material[mtl].name << endl; current_mtl = mtl; } } ofs << "f "; for (size_t c=0 ; c < 3; ++c ) { ofs << (mesh->tvi[i][c])+1; if(itti.size()) { ofs << "/" << (mesh->tti[i][c])+1; } if(itni.size()) { if (i>=mesh->tti.size()) ofs << "/"; ofs << "/" << (mesh->tni[i][c])+1 ; } ofs << " "; } ofs << endl; } ofs << "# " << mesh->tvi.size() << " faces." << endl << endl; } inline void OBJWriter::writemtl( string path, string mtlfn, const Mesh* mesh ) { #ifdef DEBUG cout <<"Writing material file: "<< string(path).append(mtlfn).c_str() << endl; #endif ofstream ofs( string(path).append(mtlfn).c_str(), ios::out ); //int current_mtl = -1; vector written; for (size_t i = 0; i < mesh->tmi.size(); ++i ) { int mtl = mesh->tmi[i]; //if (current_mtl != mtl && current_mtl != 0) { if (find(written.begin(),written.end(),mtl) == written.end() ) { const Material* material = &(mesh->material[mtl]); ofs << "newmtl " << material->name << endl // << "d " << material->opacity << endl << "ns " << material->shininess << endl << "ka " << material->ambient.r << " " << material->ambient.g << " " << material->ambient.b << " " << endl << "kd " << material->diffuse.r << " " << material->diffuse.g << " " << material->diffuse.b << " " << endl << "ks " << material->specular.r << " " << material->specular.g << " " << material->specular.b << " " << endl; if ( material->hasTexture) { string dmap_file = material->textureName; int pos = dmap_file.rfind(PATH_SEPARATOR); if (pos <= 0) pos = -1; std::string base = dmap_file.substr(pos+1, dmap_file.size()); ofs << "map_kd " << base << endl; } if ( material->hasEnvMap) { string dmap_file = material->envMapName; int pos = dmap_file.rfind(PATH_SEPARATOR); if (pos <= 0) pos = -1; std::string base = dmap_file.substr(pos+1, dmap_file.size()); ofs << "map_refl " << base << endl; } if ( material->hasNormalMap) { string dmap_file = material->normalMapName; int pos = dmap_file.rfind(PATH_SEPARATOR); if (pos <= 0) pos = -1; std::string base = dmap_file.substr(pos+1, dmap_file.size()); ofs << "map_norm " << base << endl; } written.push_back( mtl ); std::cout << "material " << i << std::endl; } //current_mtl = mtl; //} } } relion-3.1.3/src/jaz/gravis/private/CMakeLists.txt000066400000000000000000000005551411340063500220700ustar00rootroot00000000000000set( install_files OBJReader.hxx OBJWriter.hxx tDeterminants.h tImageConverter.hxx tImageConvolution.hxx tImageIO.hxx tImageIO_JPG.hxx tImageIO_PNG.hxx tImageIO_PNM.hxx tRefCPtr.h ) INSTALL(FILES ${install_files} DESTINATION ${CMAKE_INSTALL_PREFIX}/include/${LIBTITLE}-${LIBVERSION}/${LIBTITLE}/private) relion-3.1.3/src/jaz/gravis/private/OBJReader.hxx000066400000000000000000000434501411340063500216170ustar00rootroot00000000000000/****************************************************************************** ** Title: OBJReader.hxx ** Description: Class to import a .obj file info a Mesh. ** ** Author: Jean-Sebastien Pierrard, 2005 ** Computer Science Department, University Basel (CH) ** ******************************************************************************/ #include #include #include "../Exception.h" #define HAVE_LIBZ #if defined(HAVE_LIBZ) # include #endif #ifdef __APPLE__ #include inline double OBJReader::my_atof(const char* str) { std::istringstream s(str); double d = 0; s >> d; return d; } #else inline double OBJReader::my_atof(const char* str) { return atof(str); } #endif inline OBJReader::OBJReader () : foundNormals(false), foundTexCrds(false) { /* ** Create 'fallback' material */ Material fb_mtl("_fallback_"); mtl_v.push_back(fb_mtl); /* ** Create 'default' object group */ Group fb_grp("_default_"); group_v.push_back(fb_grp); active_smggroup = 0; // 0 is the default ('no smoothing') group active_mtlgroup = 0; // Point to fallback } inline void OBJReader::read (std::string filename) { std::string::size_type n = filename.rfind('/'); if (n == std::string::npos) { objpath = "./"; } else { objpath = filename.substr(0, n); objpath += "/"; } parseFile(filename); } inline void OBJReader::toLowerCase (char* str) { while (*str != '\0') { if ((*str >= 'A') && (*str <= 'Z')) { *str = (char)((int)(*str) - (int)'A' + (int)'a'); } ++str; } } inline void OBJReader::parseFile (std::string filename) { #ifdef HAVE_LIBZ gzFile fin = gzopen (filename.c_str(), "rb"); if (0 == fin) { GRAVIS_THROW3(Exception, "Unable to open file: ", filename); return; } #else FILE* fin = fopen(filename.c_str(), "rt"); if (0 == fin) { GRAVIS_THROW3(Exception, "Unable to open file: ", filename); return; } #endif int linecount = 0; char line[OBJ_MAXLINELEN]; do { #ifdef HAVE_LIBZ if (Z_NULL == gzgets(fin, &line[0], OBJ_MAXLINELEN)) break; #else if (0 == fgets(&line[0], OBJ_MAXLINELEN, fin)) break; #endif int linelen = strlen(line); ++linecount; // Ignore comments and empty lines if ((line[0] == '#' ) || (line[0] == '\n')) continue; // Check for VERY long input lines, // TODO: this should be handled differently if (linelen == (OBJ_MAXLINELEN-1)) { std::cerr << "possible buffer overflow" << std::endl; continue; } // Tokenize line into argc,argv style int i=0; std::vector argv; while ((linelen > 0) && (i < linelen)) { while (isspace(line[i])) ++i; // skip leading spaces if (i < linelen) { argv.push_back(&line[i]); while (!isspace(line[i])) ++i; // read over sequence of non-spaces line[i++] = '\0'; // terminate each sequ. of non-spaces } } // Check on parse errors try { parseLine(argv); } catch (Exception& e) { std::cerr << "Parse error in '" << filename << "', line " << linecount << ": " << e.detail() << e.argument() << std::endl; } } while (true); #ifdef HAVE_LIBZ gzclose(fin); #else fclose(fin); #endif } inline void OBJReader::parseLine (std::vector& argv) { int argc = argv.size(); if (argc <= 0) return; // return on empty lines // Transform argv[0] (datatype) to lower-case and // derive integer key from result (max. first 4 letters) char* argv0=argv[0]; int tkey=0, tpos=0; toLowerCase(argv0); while (*argv0 != '\0') { if (tpos < 4) { tkey |= ((int)*argv0) << (24-8*tpos); ++tpos; } ++argv0; } switch (tkey) { case 0x76000000 : // tkey = "v", vertex coordinates { if (argc < 4) GRAVIS_THROW3(Exception, errUnexpectedArgs(), argv[0]); float x = my_atof(argv[1]); float y = my_atof(argv[2]); float z = my_atof(argv[3]); vertex_v.push_back(f3Vector(x, y, z)); if (argc == 7 || argc == 8) { float r = my_atof(argv[4])/255.0; float g = my_atof(argv[5])/255.0; float b = my_atof(argv[6])/255.0; float a = 0; if(argc == 8) a = my_atof(argv[7]); color_v.push_back(fRGBA(r, g, b,a)); } break; } case 0x766E0000 : // tkey = "vn", vertex normal { if (argc < 4) GRAVIS_THROW3(Exception, errUnexpectedArgs(), argv[0]); float x = my_atof(argv[1]); float y = my_atof(argv[2]); float z = my_atof(argv[3]); normal_v.push_back(f3Vector(x, y, z)); break; } case 0x76740000 : // tkey = "vt", texture coordinates { if (argc < 3) GRAVIS_THROW3(Exception, errUnexpectedArgs(), argv[0]); Texcoord uvw; if(argc < 4) uvw = Texcoord(my_atof(argv[1]), my_atof(argv[2]), 0.0); else uvw = Texcoord(atof(argv[1]), my_atof(argv[2]), my_atof(argv[3])); texcrd_v.push_back(uvw); break; } case 0x66000000 : // tkey = "(f)ace", polygon { Face face; face.smggroup = active_smggroup; face.mtlgroup = active_mtlgroup; // Number of vertices for this face(polygon) = argc - 1 // Parse each section separately for vertex/texture/normal for (int c=0; c sls; // pointers to the positions of the slashes in *cs Vertex vertex; vertex.vidx = atoi(cs); // Parse each section for the field separator "/" while (*cs != '\0') { if (*cs == '/') { sls.push_back(cs+1); *cs = '\0'; } ++cs; } switch (sls.size()) { case 0 : // no slashes: only vertex index defined { vertex.tidx = 0; vertex.nidx = 0; break; } case 1 : // one slash: vertex and texcrd index { foundTexCrds = true; vertex.tidx = atoi(sls[0]); vertex.nidx = 0; break; } case 2 : // two slashes: vertex, texcrd and normal index { foundNormals = true; if (sls[0] == (sls[1]-1)) // texcrd index ommited ?? { vertex.tidx = 0; vertex.nidx = atoi(sls[1]); } else { foundTexCrds = true; vertex.tidx = atoi(sls[0]); vertex.nidx = atoi(sls[1]); } break; } default : { GRAVIS_THROW3(Exception, "Unsupported face-format", argv[c+1]); } } // switch(number of slashes) // negative references must be remapped relative to current position if (vertex.vidx < 0) vertex.vidx += vertex_v.size() + 1; if (vertex.nidx < 0) vertex.nidx += normal_v.size() + 1; if (vertex.tidx < 0) vertex.tidx += texcrd_v.size() + 1; // OBJs first index is 1, C indexing start with 0 // non-set indices become -1, which is good! vertex.vidx -= 1; vertex.nidx -= 1; vertex.tidx -= 1; // Error(range) checking if ((vertex.vidx < 0) || (vertex.vidx >= int(vertex_v.size()))) GRAVIS_THROW3(Exception, "Vertex index out of range", argv[c+1]); if ((vertex.tidx <-1) || (vertex.tidx >= int(texcrd_v.size()))) GRAVIS_THROW3(Exception, "Texture index out of range", sls[0]); if ((vertex.nidx <-1) || (vertex.nidx >= int(normal_v.size()))) vertex.nidx = -1; face.corner.push_back(vertex); } //for(each corner) face_v.push_back(face); // Add face to each active group for (int gid=0; gid 0 && color_v.size() == vertex_v.size(); mesh.material.resize(mtl_v.size()); for (int i=0; i < (int)mtl_v.size(); ++i) { mesh.material[i] = mtl_v[i]; } /* ** Transfer triangle data ------------------------------------------------ */ // Compute number of triangles after breaking down non-triangular faces. int tri_faces = face_v.size(); for (int i=0; i<(int)face_v.size(); ++i) { tri_faces += face_v[i].corner.size() - 3; } mesh.tvi.resize(tri_faces); if (foundNormals) mesh.tni.resize(tri_faces); if (foundTexCrds) mesh.tti.resize(tri_faces); if (foundColor) mesh.tci.resize(tri_faces); mesh.tmi.resize(tri_faces); int t = 0; for (int i=0; i<(int)face_v.size(); ++i) { const Face& face = face_v[i]; for (int c=0; c<=((int)face.corner.size()-3); ++c) { mesh.tvi[t].c0 = face.corner[0 ].vidx; mesh.tvi[t].c1 = face.corner[1 + c].vidx; mesh.tvi[t].c2 = face.corner[2 + c].vidx; if (foundNormals) { mesh.tni[t].c0 = face.corner[0 ].nidx; mesh.tni[t].c1 = face.corner[1 + c].nidx; mesh.tni[t].c2 = face.corner[2 + c].nidx; } if (foundTexCrds) { mesh.tti[t].c0 = face.corner[0 ].tidx; mesh.tti[t].c1 = face.corner[1 + c].tidx; mesh.tti[t].c2 = face.corner[2 + c].tidx; } if (foundColor) { mesh.tci[t].c0 = face.corner[0 ].vidx; mesh.tci[t].c1 = face.corner[1 + c].vidx; mesh.tci[t].c2 = face.corner[2 + c].vidx; } mesh.tmi[t] = face.mtlgroup; ++t; } } /* ** Transfer vertex data -------------------------------------------------- */ mesh.vertex.resize(vertex_v.size()); mesh.normal.resize(normal_v.size()); mesh.texcrd.resize(texcrd_v.size()); memcpy( mesh.vertex.data(), &(vertex_v[0]), 3*sizeof(float)*vertex_v.size() ); memcpy( mesh.normal.data(), &(normal_v[0]), 3*sizeof(float)*normal_v.size() ); memcpy( mesh.texcrd.data(), &(texcrd_v[0]), 3*sizeof(float)*texcrd_v.size() ); if(foundColor) { mesh.color.resize(color_v.size()); memcpy(mesh.color.data(), &(color_v[0]), 4*sizeof(float)*color_v.size()); } } relion-3.1.3/src/jaz/gravis/private/OBJWriter.hxx000066400000000000000000000131111411340063500216600ustar00rootroot00000000000000 #include #include #include #include #include #include "../tImage.h" #include "../Exception.h" #if defined(HAVE_LIBZ) # include #endif #ifdef WIN32 #define PATH_SEPARATOR "\\" #else #define PATH_SEPARATOR "/" #endif using namespace std; inline OBJWriter::OBJWriter() { } inline void OBJWriter::write(string filename, const gravis::Mesh& mesh) { // File::addSearchPath(objfile.pathName()); // File::addSearchPath("."); int pos = filename.rfind(PATH_SEPARATOR); std::string base = ""; string path = "."PATH_SEPARATOR; if(pos<=0) pos=-1; if(pos>0) path = filename.substr(0,pos+1); base = filename.substr(pos+1,filename.size()); #ifdef DEBUG cout << "Writing OBJ File: " << filename << endl; cout << "-- pathname: " << path << endl; cout << "-- basename: " << base << endl; #endif ofstream ofs( filename.c_str(), ios::out ); if(!ofs) GRAVIS_THROW3(Exception,"Can not open file for writing!",std::string("File: ")+filename); base.replace(base.find("obj"), 3, "mtl"); write(ofs, &mesh, base); //writemtl(path, base, &mesh); } /* void OBJWriter::write( ofstream& ofs, const gVAMesh* mesh ) { // Write vertices for ( size_t i = 0; i < mesh->getNofVertices(); ++i ) { ofs << "v " << mesh->vertex_v[i].x << " " << mesh->vertex_v[i].y << " " << mesh->vertex_v[i].z << endl; } ofs << "# " << mesh->getNofVertices() << " vertices." << endl << endl; // Write faces // NOTE: the vertex indices in OBJ format start from 1 for ( size_t i = 0; i < mesh->getNofTriangles(); ++i ) { ofs << "f " << mesh->triang_v[i].index[0]+1 << " " << mesh->triang_v[i].index[1]+1 << " " << mesh->triang_v[i].index[2]+1 << endl; } ofs << "# " << mesh->getNofTriangles() << " triangles." << endl << endl; } */ inline void OBJWriter::write(ofstream& ofs, const gravis::Mesh* mesh, string mtlfn) { //ofs << "mtllib " << mtlfn << endl; // Write vertices for (size_t i = 0; i < mesh->vertex.size(); ++i ) { ofs << "v " << mesh->vertex[i].x << " " << mesh->vertex[i].y << " " << mesh->vertex[i].z << endl; } ofs << "# " << mesh->vertex.size() << " vertices." << endl << endl; // Write texture coordinates for (size_t i = 0; i < mesh->texcrd.size(); ++i ) { ofs << "vt " << mesh->texcrd[i].x << " " << mesh->texcrd[i].y << " " << mesh->texcrd[i].z << endl; } ofs << "# " << mesh->texcrd.size() << " texture coordinates." << endl << endl; // Write texture normals for (size_t i = 0; i < mesh->normal.size(); ++i ) { ofs << "vn " << mesh->normal[i].x << " " << mesh->normal[i].y << " " << mesh->normal[i].z << " " << endl; } ofs << "# " << mesh->normal.size() << " normals." << endl << endl; // Write faces // NOTE: the vertex indices in OBJ format start from 1 int current_mtl = -1; for (size_t i = 0; i < mesh->tvi.size(); ++i ) { if(itmi.size()) { int mtl = mesh->tmi[i]; if (current_mtl != mtl) { ofs << "usemtl " << mesh->material[mtl].name << endl; current_mtl = mtl; } } ofs << "f "; for (size_t c=0 ; c < 3; ++c ) { ofs << (mesh->tvi[i][c])+1; if(itti.size()) { ofs << "/" << (mesh->tti[i][c])+1; } if(itni.size()) { if (i>=mesh->tti.size()) ofs << "/"; ofs << "/" << (mesh->tni[i][c])+1 ; } ofs << " "; } ofs << endl; } ofs << "# " << mesh->tvi.size() << " faces." << endl << endl; } inline void OBJWriter::writemtl( string path, string mtlfn, const Mesh* mesh ) { #ifdef DEBUG cout <<"Writing material file: "<< string(path).append(mtlfn).c_str() << endl; #endif ofstream ofs( string(path).append(mtlfn).c_str(), ios::out ); //int current_mtl = -1; vector written; for (size_t i = 0; i < mesh->tmi.size(); ++i ) { int mtl = mesh->tmi[i]; //if (current_mtl != mtl && current_mtl != 0) { if (find(written.begin(),written.end(),mtl) == written.end() ) { const Material* material = &(mesh->material[mtl]); ofs << "newmtl " << material->name << endl // << "d " << material->opacity << endl << "ns " << material->shininess << endl << "ka " << material->ambient.r << " " << material->ambient.g << " " << material->ambient.b << " " << endl << "kd " << material->diffuse.r << " " << material->diffuse.g << " " << material->diffuse.b << " " << endl << "ks " << material->specular.r << " " << material->specular.g << " " << material->specular.b << " " << endl; if ( material->hasTexture) { string dmap_file = material->textureName; int pos = dmap_file.rfind(PATH_SEPARATOR); if (pos <= 0) pos = -1; std::string base = dmap_file.substr(pos+1, dmap_file.size()); ofs << "map_kd " << base << endl; } if ( material->hasEnvMap) { string dmap_file = material->envMapName; int pos = dmap_file.rfind(PATH_SEPARATOR); if (pos <= 0) pos = -1; std::string base = dmap_file.substr(pos+1, dmap_file.size()); ofs << "map_refl " << base << endl; } if ( material->hasNormalMap) { string dmap_file = material->normalMapName; int pos = dmap_file.rfind(PATH_SEPARATOR); if (pos <= 0) pos = -1; std::string base = dmap_file.substr(pos+1, dmap_file.size()); ofs << "map_norm " << base << endl; } written.push_back( mtl ); std::cout << "material " << i << std::endl; } //current_mtl = mtl; //} } } relion-3.1.3/src/jaz/gravis/private/tDeterminants.h000066400000000000000000000036001411340063500223140ustar00rootroot00000000000000/****************************************************************************** ** Title: tDeterminants.h ** Description: Templated functions for 2D, 3D and 4D determinants. ** ** Author: Michael Keller, 2005 ** Computer Science Department, University Basel (CH) ** ******************************************************************************/ #ifndef _TDETERMINANTS_H_ #define _TDETERMINANTS_H_ namespace gravis { template inline T det2x2(T a1, T a2, T b1, T b2) { return + a1 * b2 - b1 * a2; } template inline T det3x3(T a1, T a2, T a3, T b1, T b2, T b3, T c1, T c2, T c3) { return + a1 * det2x2(b2, b3, c2, c3) - b1 * det2x2(a2, a3, c2, c3) + c1 * det2x2(a2, a3, b2, b3); } template inline T det4x4(T a1, T a2, T a3, T a4, T b1, T b2, T b3, T b4, T c1, T c2, T c3, T c4, T d1, T d2, T d3, T d4) { return + a1 * det3x3(b2, b3, b4, c2, c3, c4, d2, d3, d4) - b1 * det3x3(c2, c3, c4, d2, d3, d4, a2, a3, a4) + c1 * det3x3(d2, d3, d4, a2, a3, a4, b2, b3, b4) - d1 * det3x3(a2, a3, a4, b2, b3, b4, c2, c3, c4) - a2 * det3x3(b3, b4, b1, c3, c4, c1, d3, d4, d1) + b2 * det3x3(c3, c4, c1, d3, d4, d1, a3, a4, a1) - c2 * det3x3(d3, d4, d1, a3, a4, a1, b3, b4, b1) + d2 * det3x3(a3, a4, a1, b3, b4, b1, c3, c4, c1) + a3 * det3x3(b4, b1, b2, c4, c1, c2, d4, d1, d2) - b3 * det3x3(c4, c1, c2, d4, d1, d2, a4, a1, a2) + c3 * det3x3(d4, d1, d2, a4, a1, a2, b4, b1, b2) - d3 * det3x3(a4, a1, a2, b4, b1, b2, c4, c1, c2) - a4 * det3x3(b1, b2, b3, c1, c2, c3, d1, d2, d3) + b4 * det3x3(c1, c2, c3, d1, d2, d3, a1, a2, a3) - c4 * det3x3(d1, d2, d3, a1, a2, a3, b1, b2, b3) + d4 * det3x3(a1, a2, a3, b1, b2, b3, c1, c2, c3); } } #endif relion-3.1.3/src/jaz/gravis/private/tImageConverter.hxx000066400000000000000000000310231411340063500231510ustar00rootroot00000000000000/****************************************************************************** ** Title: tImageConverter.hxx ** Description: Traits and functions used to convert different pixel types. ** Required by reader/writer classes. INTERNAL USE ONLY ! ** ** Author: Jean-Sebastien Pierrard, 2005 ** Computer Science Department, University Basel (CH) ** ******************************************************************************/ namespace gravis { namespace priv { template struct IPT_Traits { }; template <> struct IPT_Traits { typedef unsigned char Value_t; typedef int Promote_t; typedef float RealPromote_t; static inline Value_t fullValue () { return 255; } static inline Value_t zeroValue () { return 0; } }; template <> struct IPT_Traits { typedef float Value_t; typedef float Promote_t; typedef float RealPromote_t; static inline Value_t fullValue () { return 1.0f; } static inline Value_t zeroValue () { return 0.0f; } }; template <> struct IPT_Traits { typedef double Value_t; typedef double Promote_t; typedef double RealPromote_t; static inline Value_t fullValue () { return 1.0; } static inline Value_t zeroValue () { return 0.0; } }; // Integral Pixel Type Traits, used to convert pixel components template struct IPC_Traits { }; #define _DeclareIPConversion(FROM_T, TO_T, CONVERSION) \ template<> struct IPC_Traits { \ static inline TO_T convert (FROM_T value) { \ return (TO_T)(CONVERSION); \ } \ } _DeclareIPConversion(unsigned char, unsigned char, value); _DeclareIPConversion(unsigned char, float, (1.0f/255.0f)*float(value)); _DeclareIPConversion(unsigned char, double, (1.0/255.0)*double(value)); _DeclareIPConversion(double, unsigned char, 255.0f*value); _DeclareIPConversion(double, float, value); _DeclareIPConversion(float, unsigned char, 255.0f*value); _DeclareIPConversion(float, double, value); _DeclareIPConversion(float, float, value); _DeclareIPConversion(double, double, value); template struct pixelTypeConverter_1 { static inline void f (const T& src, T& dst) { dst = src; } }; template struct pixelTypeConverter_2 { static inline void f (const S& src, D& dst) { dst = IPC_Traits::convert(src); }; }; template struct pixelTypeConverter_2, tRGBA > { static inline void f (const tRGBA& src, tRGBA& dst) { dst = tRGBA( IPC_Traits::convert(src.r), IPC_Traits::convert(src.g), IPC_Traits::convert(src.b), IPC_Traits::convert(src.a) ); } }; template struct pixelTypeConverter_2, tRGB_A > { static inline void f (const tRGB_A& src, tRGB_A& dst) { dst.set( IPC_Traits::convert(src.r), IPC_Traits::convert(src.g), IPC_Traits::convert(src.b), IPC_Traits::convert(src.a) ); } }; template struct pixelTypeConverter_2, tRGB_A > { static inline void f (const tRGBA& src, tRGB_A& dst) { dst.set( IPC_Traits::convert(src.r), IPC_Traits::convert(src.g), IPC_Traits::convert(src.b), IPC_Traits::convert(src.a) ); } }; template struct pixelTypeConverter_2, tRGBA > { static inline void f (const tGray_A& src, tRGBA& dst) { dst.set( IPC_Traits::convert(src.g), IPC_Traits::convert(src.g), IPC_Traits::convert(src.g), IPC_Traits::convert(src.a) ); } }; template struct pixelTypeConverter_2, tRGB > { static inline void f (const tGray_A& src, tRGB& dst) { dst.set( IPC_Traits::convert(src.g), IPC_Traits::convert(src.g), IPC_Traits::convert(src.g) ); } }; template struct pixelTypeConverter_2, tRGBA > { static inline void f (const tRGB_A& src, tRGBA& dst) { dst.set( IPC_Traits::convert(src.r), IPC_Traits::convert(src.g), IPC_Traits::convert(src.b), IPC_Traits::convert(src.a) ); } }; template struct pixelTypeConverter_2, tRGB > { static inline void f (const tRGB& src, tRGB& dst) { dst = tRGB( IPC_Traits::convert(src.r), IPC_Traits::convert(src.g), IPC_Traits::convert(src.b) ); } }; template struct pixelTypeConverter_2, tRGBA > { static inline void f (const tRGB& src, tRGBA& dst) { dst = tRGBA( IPC_Traits::convert(src.r), IPC_Traits::convert(src.g), IPC_Traits::convert(src.b), IPT_Traits::fullValue() ); } }; template struct pixelTypeConverter_2, tRGB_A > { static inline void f (const tRGB& src, tRGB_A& dst) { dst.set( IPC_Traits::convert(src.r), IPC_Traits::convert(src.g), IPC_Traits::convert(src.b), IPT_Traits::fullValue() ); } }; template struct pixelTypeConverter_2, tRGB > { static inline void f (const tRGBA& src, tRGB& dst) { dst = tRGB( IPC_Traits::convert(src.r), IPC_Traits::convert(src.g), IPC_Traits::convert(src.b) ); } }; template struct pixelTypeConverter_2, tRGB > { static inline void f (const tRGB_A& src, tRGB& dst) { dst.set( IPC_Traits::convert(src.r), IPC_Traits::convert(src.g), IPC_Traits::convert(src.b) ); } }; template struct pixelTypeConverter_2 > { static inline void f (const S& src, tRGBA& dst) { dst = tRGBA( IPC_Traits::convert(src), IPT_Traits::fullValue() ); } }; template struct pixelTypeConverter_2 > { static inline void f (const S& src, tRGB_A& dst) { dst.set( IPC_Traits::convert(src), IPT_Traits::fullValue() ); } }; template struct pixelTypeConverter_2 > { static inline void f (const S& src, tRGB& dst) { dst = tRGB( IPC_Traits::convert(src) ); } }; // Convert from tRGB of type S to grayvalue of type D template struct pixelTypeConverter_2, D> { static inline void f (const tRGB& src, D& dst) { dst = IPC_Traits::convert(src.grayValue()); } }; // Convert from tRGBA of type S to grayvalue of type D template struct pixelTypeConverter_2, D> { static inline void f (const tRGBA& src, D& dst) { dst = IPC_Traits::convert(src.grayValue()); } }; template struct pixelTypeConverter_2, D> { static inline void f (const tRGB_A& src, D& dst) { dst = IPC_Traits::convert(src.grayValue()); } }; template struct pixelTypeConverter_2, D> { static inline void f (const tGray_A& src, D& dst) { dst = IPC_Traits::convert(src.grayValue()); } }; // tBGR template struct pixelTypeConverter_2, tRGB > { static inline void f (const tBGR& src, tRGB& dst) { dst = tRGB( IPC_Traits::convert(src.b), IPC_Traits::convert(src.g), IPC_Traits::convert(src.r) ); } }; template struct pixelTypeConverter_2, tBGR > { static inline void f (const tRGB& src, tBGR& dst) { dst = tBGR( IPC_Traits::convert(src.b), IPC_Traits::convert(src.g), IPC_Traits::convert(src.r) ); } }; template struct pixelTypeConverter_2 > { static inline void f (const S& src, tBGR& dst) { dst = tBGR( IPC_Traits::convert(src) ); } }; template struct pixelTypeConverter_2, tBGR > { static inline void f (const tRGBA& src, tBGR& dst) { dst = tBGR( IPC_Traits::convert(src.b), IPC_Traits::convert(src.g), IPC_Traits::convert(src.r) ); } }; /* template struct pixelTypeConverter_2, tBGR > { static inline void f (const tRGB_A& src, tBGR& dst) { dst.set( IPC_Traits::convert(src.b), IPC_Traits::convert(src.g), IPC_Traits::convert(src.r) ); } }; */ template struct pixelTypeConverter_2, D > { static inline void f (const tBGR& src, D& dst) { dst = IPC_Traits::convert(src.b); } }; template struct pixelTypeConverter_2, tRGBA > { static inline void f (const tBGR& src, tRGBA& dst) { dst = tRGBA( IPC_Traits::convert(src.b), IPC_Traits::convert(src.g), IPC_Traits::convert(src.r), IPT_Traits::fullValue() ); } }; /* template struct pixelTypeConverter_2, tRGB_A > { static inline void f (const tBGR& src, tRGB_A& dst) { dst.set( IPC_Traits::convert(src.b), IPC_Traits::convert(src.g), IPC_Traits::convert(src.r), IPT_Traits::fullValue() ); } }; */ template inline void pixelTypeConverter (const S& src, D& dst) { pixelTypeConverter_2::f(src, dst); } template inline void pixelTypeConverter (const T& src, T& dst) { pixelTypeConverter_1::f(src, dst); } template class tImageConverter { public: DTYPE operator() (const STYPE& src) const { DTYPE dst(src.cols(), src.rows()); typename STYPE::iterator src_it = src.begin(); typename DTYPE::iterator dst_it = dst.begin(); for (; dst_it != dst.end(); ++src_it, ++dst_it) { pixelTypeConverter(*src_it, *dst_it); } return dst; } void convert(const STYPE& src, DTYPE& dst) const { dst.resize(src.cols(), src.rows()); typename STYPE::iterator src_it = src.begin(); typename DTYPE::iterator dst_it = dst.begin(); for (; dst_it!=dst.end(); ++src_it, ++dst_it) { pixelTypeConverter(*src_it, *dst_it); } } }; } /* Close Namespace "priv" */ } /* Close Namespace "gravis" */ relion-3.1.3/src/jaz/gravis/private/tImageConvolution.hxx000066400000000000000000000007511411340063500235250ustar00rootroot00000000000000/****************************************************************************** ** Title: tImageConvolution.hxx ** Description: Image convolution with different border handling methods. ** ** Author: Jean-Sebastien Pierrard, 2005 ** Brian Schroeder, 2006 ** Computer Science Department, University Basel (CH) ** ******************************************************************************/ namespace gravis { } /* Close namespace "gravis" */ relion-3.1.3/src/jaz/gravis/private/tImageIO.hxx000066400000000000000000000032031411340063500215100ustar00rootroot00000000000000/****************************************************************************** ** Title: tImageIO.hxx ** Description: Traits for tImage class. ** Required by reader/writer classes. INTERNAL USE ONLY ! ** ** Author: Jean-Sebastien Pierrard, 2005 ** Computer Science Department, University Basel (CH) ** ******************************************************************************/ namespace gravis { namespace priv { template struct tImage_Traits { }; template struct tImage_Traits > { typedef float Pixel_t; typedef float CComp_t; static inline int nofComponents () { return 1; } }; template struct tImage_Traits > > { typedef tRGB Pixel_t; typedef T CComp_t; static inline int nofComponents () { return 3; } }; template struct tImage_Traits > > { typedef tRGBA Pixel_t; typedef T CComp_t; static inline int nofComponents () { return 4; } }; template struct tImage_Traits > > { typedef tRGB_A Pixel_t; typedef T CComp_t; static inline int nofComponents () { return 4; } }; /* template struct tImage_Traits > > { typedef tGray_A Pixel_t; typedef T CComp_t; static inline int nofComponents () { return 2; } }; */ } /* Close namespace "priv" */ } /* Close namespace "gravis" */ relion-3.1.3/src/jaz/gravis/private/tImageIO_JPG.hxx000066400000000000000000000355541411340063500222260ustar00rootroot00000000000000/****************************************************************************** ** Title: tImageIO_JPG.hxx ** Description: Implements reader/writer for JPG image format. ** ** Author: Jean-Sebastien Pierrard, 2005 ** Computer Science Department, University Basel (CH) ** ******************************************************************************/ #ifndef __GRAVIS__TIMAGE_IO_JPG__ #define __GRAVIS__TIMAGE_IO_JPG__ #include #include #include #include #include namespace gravis { namespace priv { template class JPGImageReader { struct gravis_jpg_error_mgr { struct jpeg_error_mgr pub; /* "public" fields */ jmp_buf setjmp_buffer; /* for return to caller */ }; /* * Here's the routine that will replace the standard error_exit method: */ static void gravis_jpg_error_quiet (j_common_ptr cinfo) { gravis_jpg_error_mgr* myerr = (gravis_jpg_error_mgr*) cinfo->err; /* Return control to the setjmp point */ longjmp(myerr->setjmp_buffer, 1); } /* * Here's the routine that will replace the standard error_exit method: */ static void gravis_jpg_error_exit (j_common_ptr cinfo) { gravis_jpg_error_mgr* myerr = (gravis_jpg_error_mgr*) cinfo->err; /* Always display the message. */ /* We could postpone this until after returning, if we chose. */ (*cinfo->err->output_message) (cinfo); /* Return control to the setjmp point */ longjmp(myerr->setjmp_buffer, 1); } public: JPGImageReader () {}; ~JPGImageReader () {}; void read (tImage&, const std::string&); static bool canHandle (const std::string& filename); }; template class JPGImageWriter { struct gravis_jpg_error_mgr { struct jpeg_error_mgr pub; /* "public" fields */ jmp_buf setjmp_buffer; /* for return to caller */ }; /* * Here's the routine that will replace the standard error_exit method: */ static void gravis_jpg_error_quiet (j_common_ptr cinfo) { gravis_jpg_error_mgr* myerr = (gravis_jpg_error_mgr*) cinfo->err; /* Return control to the setjmp point */ longjmp(myerr->setjmp_buffer, 1); } /* * Here's the routine that will replace the standard error_exit method: */ static void gravis_jpg_error_exit (j_common_ptr cinfo) { gravis_jpg_error_mgr* myerr = (gravis_jpg_error_mgr*) cinfo->err; /* Always display the message. */ /* We could postpone this until after returning, if we chose. */ (*cinfo->err->output_message) (cinfo); /* Return control to the setjmp point */ longjmp(myerr->setjmp_buffer, 1); } public: JPGImageWriter () {}; ~JPGImageWriter () {}; void write (const tImage& image, const std::string& filename, int quality); }; template inline bool JPGImageReader::canHandle (const std::string& filename) { /* This struct contains the JPEG decompression parameters and pointers to * working space (which is allocated as needed by the JPEG library). */ struct jpeg_decompress_struct cinfo; FILE* infile; /* source file */ if ((infile = fopen(filename.c_str(), "rb")) == NULL) { GRAVIS_THROW3(gravis::Exception, "Could not open file", filename); return 0; } /* We set up the normal JPEG error routines, then override error_exit. */ struct gravis_jpg_error_mgr jerr; cinfo.err = jpeg_std_error(&jerr.pub); jerr.pub.error_exit = gravis_jpg_error_quiet; /* Establish the setjmp return context for gravis_jpg_error_exit to use. */ if (setjmp(jerr.setjmp_buffer)) { /* If we get here, the JPEG code has signaled an error. * We need to clean up the JPEG object, close the input file, and return. */ jpeg_destroy_decompress(&cinfo); fclose(infile); return false; } jpeg_create_decompress(&cinfo); jpeg_stdio_src(&cinfo, infile); (void) jpeg_read_header(&cinfo, TRUE); return true; } template inline void JPGImageReader::read (tImage& image, const std::string& filename) { FILE* infile = fopen(filename.c_str(), "rb"); if (infile == NULL) { GRAVIS_THROW3(Exception, "Unable to open file: ", filename); } /* This struct contains the JPEG decompression parameters and pointers to * working space (which is allocated as needed by the JPEG library). */ struct jpeg_decompress_struct cinfo; /* We use our private extension JPEG error handler. * Note that this struct must live as long as the main JPEG parameter * struct, to avoid dangling-pointer problems. */ struct gravis_jpg_error_mgr jerr; /* More stuff */ JSAMPARRAY buffer; /* Output row buffer */ int row_stride; /* physical row width in output buffer */ /* Step 1: allocate and initialize JPEG decompression object */ /* We set up the normal JPEG error routines, then override error_exit. */ cinfo.err = jpeg_std_error(&jerr.pub); jerr.pub.error_exit = gravis_jpg_error_exit; /* Establish the setjmp return context for my_error_exit to use. */ if (setjmp(jerr.setjmp_buffer)) { /* If we get here, the JPEG code has signaled an error. * We need to clean up the JPEG object, close the input file, and return. */ jpeg_destroy_decompress(&cinfo); fclose(infile); GRAVIS_THROW3(gravis::Exception, "Could not read jpeg file", filename); return; } /* Now we can initialize the JPEG decompression object. */ jpeg_create_decompress(&cinfo); /* Step 2: specify data source (eg, a file) */ jpeg_stdio_src(&cinfo, infile); /* Step 3: read file parameters with jpeg_read_header() */ (void) jpeg_read_header(&cinfo, TRUE); /* We can ignore the return value from jpeg_read_header since * (a) suspension is not possible with the stdio data source, and * (b) we passed TRUE to reject a tables-only JPEG file as an error. * See libjpeg.doc for more info. */ /* Step 4: set parameters for decompression */ /* In this example, we don't need to change any of the defaults set by * jpeg_read_header(), so we do nothing here. */ /* Step 5: Start decompressor */ (void) jpeg_start_decompress(&cinfo); /* We can ignore the return value since suspension is not possible * with the stdio data source. */ /* We may need to do some setup of our own at this point before reading * the data. After jpeg_start_decompress() we have the correct scaled * output image dimensions available, as well as the output colormap * if we asked for color quantization. * In this example, we need to make an output work buffer of the right size. */ image.setSize(cinfo.output_width, cinfo.output_height); /* JSAMPLEs per row in output buffer */ row_stride = cinfo.output_width * cinfo.output_components; /* Make a one-row-high sample array that will go away when done with image */ buffer = (*cinfo.mem->alloc_sarray) ((j_common_ptr) &cinfo, JPOOL_IMAGE, row_stride, 1); /* Step 6: while (scan lines remain to be read) */ /* jpeg_read_scanlines(...); */ /* Here we use the library's state variable cinfo.output_scanline as the * loop counter, so that we don't have to keep track ourselves. */ while (cinfo.output_scanline < cinfo.output_height) { /* jpeg_read_scanlines expects an array of pointers to scanlines. * Here the array is only one element long, but you could ask for * more than one scanline at a time if that's more convenient. */ (void) jpeg_read_scanlines(&cinfo, buffer, 1); /* Assume put_scanline_someplace wants a pointer and sample count. */ std::cout << cinfo.output_scanline << std::endl; for (size_t x=0; x sample( buffer[0][3*x], buffer[0][3*x+1], buffer[0][3*x+2]); T& pixel = image(x,cinfo.output_scanline-1); pixelTypeConverter(sample, pixel); } } /* Step 7: Finish decompression */ (void) jpeg_finish_decompress(&cinfo); /* We can ignore the return value since suspension is not possible * with the stdio data source. */ /* Step 8: Release JPEG decompression object */ /* This is an important step since it will release a good deal of memory. */ jpeg_destroy_decompress(&cinfo); /* After finish_decompress, we can close the input file. * Here we postpone it until after no more JPEG errors are possible, * so as to simplify the setjmp error logic above. (Actually, I don't * think that jpeg_destroy can do an error exit, but why assume anything...) */ fclose(infile); /* At this point you may want to check to see whether any corrupt-data * warnings occurred (test whether jerr.pub.num_warnings is nonzero). */ if (jerr.pub.num_warnings != 0) GRAVIS_THROW3(gravis::Exception, "Corrupted data in jpeg file", filename); image.setName(std::string(filename)); } template inline void JPGImageWriter::write (const tImage& image, const std::string& filename, int quality) { tImageConverter, tImage< tRGB > > convert; gravis::tImage< tRGB< unsigned char > > rgbimage = convert(image); FILE* outfile = fopen(filename.c_str(), "wb"); if (outfile == 0) { GRAVIS_THROW3(Exception, "Unable to open/create image file: ", filename); } if (image.cols() == 0 || image.rows()==0) GRAVIS_THROW3(Exception, "Can not write an empty image", filename); /* This struct contains the JPEG compression parameters and pointers to * working space (which is allocated as needed by the JPEG library). * It is possible to have several such structures, representing multiple * compression/decompression processes, in existence at once. We refer * to any one struct (and its associated working data) as a "JPEG object". */ struct jpeg_compress_struct cinfo; /* This struct represents a JPEG error handler. It is declared separately * because applications often want to supply a specialized error handler * (see the second half of this file for an example). But here we just * take the easy way out and use the standard error handler, which will * print a message on stderr and call exit() if compression fails. * Note that this struct must live as long as the main JPEG parameter * struct, to avoid dangling-pointer problems. */ struct gravis_jpg_error_mgr jerr; cinfo.err = jpeg_std_error(&jerr.pub); jerr.pub.error_exit = gravis_jpg_error_exit; /* Establish the setjmp return context for my_error_exit to use. */ if (setjmp(jerr.setjmp_buffer)) { /* If we get here, the JPEG code has signaled an error. * We need to clean up the JPEG object, close the input file, and return. */ jpeg_destroy_compress(&cinfo); fclose(outfile); return; } /* More stuff */ JSAMPROW row_pointer[1]; /* pointer to JSAMPLE row[s] */ int row_stride; /* physical row width in image buffer */ /* Step 1: allocate and initialize JPEG compression object */ /* We have to set up the error handler first, in case the initialization * step fails. (Unlikely, but it could happen if you are out of memory.) * This routine fills in the contents of struct jerr, and returns jerr's * address which we place into the link field in cinfo. */ cinfo.err = jpeg_std_error(&jerr.pub); /* Now we can initialize the JPEG compression object. */ jpeg_create_compress(&cinfo); /* Step 2: specify data destination (eg, a file) */ jpeg_stdio_dest(&cinfo, outfile); /* Step 3: set parameters for compression */ /* First we supply a description of the input image. * Four fields of the cinfo struct must be filled in: */ cinfo.image_width = image.cols(); /* image width and height, in pixels */ cinfo.image_height = image.rows(); cinfo.input_components = 3; /* # of color components per pixel */ cinfo.in_color_space = JCS_RGB; /* colorspace of input image */ /* Now use the library's routine to set default compression parameters. * (You must set at least cinfo.in_color_space before calling this, * since the defaults depend on the source color space.) */ jpeg_set_defaults(&cinfo); /* Now you can set any non-default parameters you wish to. * Here we just illustrate the use of quality (quantization table) scaling: */ jpeg_set_quality(&cinfo, quality, TRUE /* limit to baseline-JPEG values */); /* Step 4: Start compressor */ /* TRUE ensures that we will write a complete interchange-JPEG file. * Pass TRUE unless you are very sure of what you're doing. */ jpeg_start_compress(&cinfo, TRUE); /* Step 5: while (scan lines remain to be written) */ /* jpeg_write_scanlines(...); */ /* Here we use the library's state variable cinfo.next_scanline as the * loop counter, so that we don't have to keep track ourselves. * To keep things simple, we pass one scanline per call; you can pass * more if you wish, though. */ row_stride = image.cols() * 3; /* JSAMPLEs per row in image_buffer */ while (cinfo.next_scanline < cinfo.image_height) { /* jpeg_write_scanlines expects an array of pointers to scanlines. * Here the array is only one element long, but you could pass * more than one scanline at a time if that's more convenient. */ row_pointer[0] = &rgbimage(0,cinfo.next_scanline)[0]; (void) jpeg_write_scanlines(&cinfo, row_pointer, 1); } /* Step 6: Finish compression */ jpeg_finish_compress(&cinfo); /* After finish_compress, we can close the output file. */ fclose(outfile); /* Step 7: release JPEG compression object */ /* This is an important step since it will release a good deal of memory. */ jpeg_destroy_compress(&cinfo); } } /* Close Namespace "priv" */ } /* Close Namespace "gravis" */ #endif relion-3.1.3/src/jaz/gravis/private/tImageIO_PNG.hxx000066400000000000000000000177761411340063500222400ustar00rootroot00000000000000/****************************************************************************** ** Title: tImageIO_PNG.hxx ** Description: Implements reader/writer for PNG image format. ** ** Author: Jean-Sebastien Pierrard, 2005 ** Computer Science Department, University Basel (CH) ** ******************************************************************************/ #include #include #include namespace gravis { namespace priv { template class PNGImageReader { public: PNGImageReader (); ~PNGImageReader (); void read (tImage&, const char*); static bool canHandle (const char*); }; template class PNGImageWriter { public: PNGImageWriter (); ~PNGImageWriter (); void write (const tImage& image, const char* filename); }; template inline PNGImageReader::PNGImageReader () { } template inline PNGImageReader::~PNGImageReader () { } template inline bool PNGImageReader::canHandle (const char* header) { bool is_png = !png_sig_cmp((png_byte*)header, 0, 4); return is_png; } template inline void PNGImageReader::read (tImage& image, const char* filename) { FILE* pngfilep = fopen(filename, "rb"); if (pngfilep == 0) { GRAVIS_THROW3(Exception, "Unable to open file: ", filename); } png_structp png_ptr = png_create_read_struct(PNG_LIBPNG_VER_STRING, this, 0, 0); if (png_ptr == 0) { fclose(pngfilep); GRAVIS_THROW3(Exception, "PNG Read Error: ", filename); } png_infop info_ptr = png_create_info_struct(png_ptr); if (info_ptr == 0) { fclose(pngfilep); png_destroy_read_struct(&png_ptr, NULL, NULL); GRAVIS_THROW3(Exception, "PNG Read Error: ", filename); } png_init_io(png_ptr, pngfilep); png_read_info(png_ptr, info_ptr); png_uint_32 width, height; int depth, color_type, interlace_type; png_get_IHDR( png_ptr, info_ptr, &width, &height, &depth, &color_type, &interlace_type, NULL, NULL ); // Reduce 16 bit images to 8 bit png_set_strip_16(png_ptr); // Expand paletted images to RGB if (color_type == PNG_COLOR_TYPE_PALETTE) png_set_palette_to_rgb(png_ptr); // Expand grayscale images to the full 8 bits from 1, 2, or 4 bits/pixel if ((color_type == PNG_COLOR_TYPE_GRAY) && (depth < 8)) png_set_expand_gray_1_2_4_to_8(png_ptr); // Expang grayscale image to RGB(A), // This should definitely be optimzed !! if (color_type == PNG_COLOR_TYPE_GRAY) { png_set_gray_to_rgb(png_ptr); } // /* ??? // Add filler (or alpha) byte (before/after each RGB triplet) png_set_filler(png_ptr, 0xff, PNG_FILLER_AFTER); // ??? */ // png_set_interlace_handling(png_ptr); png_read_update_info(png_ptr, info_ptr); // ??? // std::cout << "Image: " << width << " x " << height << "(" << depth << "bpp)\n"; // std::cout << "Bytes/Row: " << png_get_rowbytes(png_ptr, info_ptr) << "\n"; png_bytep* row_pointers = new png_bytep[height]; for (unsigned int row=0; row pixel(src_data[0], src_data[1], src_data[2], src_data[3]); pixelTypeConverter(pixel, *tgt_data); tgt_data++; src_data += 4; } } // Free PNG-image for (unsigned int row=0; row inline PNGImageWriter::PNGImageWriter () { } template inline PNGImageWriter::~PNGImageWriter () { } template inline void PNGImageWriter::write (const tImage& image, const char* filename) { FILE* pngfilep = fopen(filename, "wb"); if (pngfilep == 0) { GRAVIS_THROW3(Exception, "Unable to open/create image file: ", filename); } png_structp png_ptr = png_create_write_struct(PNG_LIBPNG_VER_STRING, this, 0, 0); if (png_ptr == 0) { fclose(pngfilep); GRAVIS_THROW3(Exception, "PNG(internal) write error: ", filename); } png_infop info_ptr = png_create_info_struct(png_ptr); if (info_ptr == 0) { fclose(pngfilep); png_destroy_write_struct(&png_ptr, NULL); GRAVIS_THROW3(Exception, "PNG(internal) write error: ", filename); } png_init_io(png_ptr, pngfilep); int color_type; int nof_cc = tImage_Traits >::nofComponents(); switch (nof_cc) { case 1 : color_type = PNG_COLOR_TYPE_GRAY; break; case 3 : color_type = PNG_COLOR_TYPE_RGB; break; case 4 : color_type = PNG_COLOR_TYPE_RGB_ALPHA; break; default : { std::cerr << "Unhandled number of color components \n" ; exit(1); } } if (image.cols() == 0 || image.rows()==0) GRAVIS_THROW3(Exception, "Can not write an empty image", filename); png_set_IHDR( png_ptr, info_ptr, image.cols(), image.rows(), 8/*depth*/, color_type, PNG_INTERLACE_NONE, PNG_COMPRESSION_TYPE_BASE, PNG_FILTER_TYPE_BASE ); png_write_info(png_ptr, info_ptr); switch (nof_cc) { case 1 : { tImageConverter, tImage > convert; tImage out(image.cols(), image.rows()); out = convert(image); png_bytep* row_pointers = new png_bytep[image.rows()]; for (unsigned int i=0; i, tImage > convert; tImage out(image.cols(), image.rows()); out = convert(image); png_bytep* row_pointers = new png_bytep[image.rows()]; for (unsigned int i=0; i, tImage > convert; tImage out(image.cols(), image.rows()); out = convert(image); png_bytep* row_pointers = new png_bytep[image.rows()]; for (unsigned int i=0; i #include namespace gravis { namespace priv { template class PNMImageReader { public: PNMImageReader (); ~PNMImageReader (); void read (tImage& image, const char* filename); static bool canHandle (const char* header); protected: bool garbage; bool isWS (int) const; int getInt (std::istream&); }; template class PNMImageWriter { public: PNMImageWriter (); ~PNMImageWriter (); void write (const tImage& image, const char* filename); public: bool raw, fullcolor; }; template inline PNMImageReader::PNMImageReader () { } template inline PNMImageReader::~PNMImageReader () { } template inline bool PNMImageReader::isWS (int c) const { return ((c == ' ') || (c == '\r') || (c == '\t') || (c == '\n')); } template int PNMImageReader::getInt (std::istream& is) { int c, i = 0; c = is.get(); while (1) { // Skip comments (lines starting with #) if (c == '#') { while (1) { c = is.get(); if ((c == '\n') || (c == EOF)) break; } } if (c == EOF) return EOF; if ((c >= '0') && (c <= '9')) break; if (!isWS(c)) garbage = true; c = is.get(); } while (1) { i = (i * 10) + (c - '0'); c = is.get(); if (c == EOF) return i; if ((c < '0') || (c > '9')) break; } return i; } // Identify PNM images by file content (header) // Search for "P3\n"(ascii) or "P6\n"(binary) RGB // "P2\n"(ascii) or "P5\n"(binary) GRAYSCALE template inline bool PNMImageReader::canHandle (const char* header) { if ( (header[0] == 'P') && ((header[1] == '3') || (header[1] == '6') || (header[1] == '2') || (header[1] == '5')) && (header[2] == (char)0x0A) ) return true; else return false; } template inline void PNMImageReader::read (tImage& image, const char* filename) { int wd, ht, colors, c, counter = 0; bool grayscale = true, ascii = true; std::ifstream is(filename, std::ios::in | std::ios::binary); if (!is.good()) { GRAVIS_THROW3(Exception, "Unable to open file", filename); } garbage = false; c = is.get(); // read 'P' c = is.get(); // read format identifier if (c == '2') { grayscale = true; ascii = true; } if (c == '3') { grayscale = false; ascii = true; } if (c == '5') { grayscale = true; ascii = false; } if (c == '6') { grayscale = false; ascii = false; } wd = getInt(is); ht = getInt(is); colors = getInt(is); if (garbage) { GRAVIS_THROW3(Exception, "Corrupt image header", filename); } if ((wd <= 0) || (ht <= 0)) { GRAVIS_THROW3(Exception, "Illegal image dimensions", filename); } if (colors > 255) { GRAVIS_THROW3(Exception, "Unsupported colorvalues", filename); } image.setSize(wd, ht); image.setName(std::string(filename)); if (grayscale) { T* data = image.begin(); if (ascii) { for (; data!=image.end(); ++data) { c = getInt(is); if (c >= 0) { unsigned char value = (unsigned char)c; pixelTypeConverter(value, *data); ++counter; } else break; } } else // not ascii { unsigned char* bbuf = new unsigned char[image.size()]; unsigned char* bptr = bbuf; is.read((char*)bbuf, image.size()); counter += is.gcount(); for (; data!=image.end(); ++data, ++bptr) { pixelTypeConverter(*bptr, *data); } delete[] bbuf; } } else // not grayscale { T* data = image.begin(); if (ascii) { for (; data!=image.end(); ++data) { bRGB pnm_pixel; c = getInt(is); if (c >= 0) pnm_pixel.r = (unsigned char)c; else break; c = getInt(is); if (c >= 0) pnm_pixel.g = (unsigned char)c; else break; c = getInt(is); if (c >= 0) pnm_pixel.b = (unsigned char)c; else break; pixelTypeConverter(pnm_pixel, *data); ++counter; } } else // not grayscale, not ascii { unsigned char* bbuf = new unsigned char[3*image.cols()]; for (unsigned int y=0; y inline PNMImageWriter::PNMImageWriter () { raw = true; fullcolor = true; } template inline PNMImageWriter::~PNMImageWriter () { } template inline void PNMImageWriter::write (const tImage& image, const char* filename) { std::ofstream os(filename, std::ios::out | std::ios::binary); if (!os.good()) { GRAVIS_THROW3(Exception, "Unable to open/create file", filename); } T* data = image.begin(); if (fullcolor == false) // Write GRAY values { if (raw == false) // Write 'ascii' format { os << "P2\n" << image.cols() << " " << image.rows() << "\n255\n"; int cols_per_line = 0; for (; data!=image.end(); ++data) { if (cols_per_line > 8) { cols_per_line = 0; os << "\n"; } unsigned char value = 0; pixelTypeConverter(*data, value); os << int(value) << " "; ++cols_per_line; } } else // Write 'raw' format { os << "P5\n" << image.cols() << " " << image.rows() << "\n255\n"; unsigned char* bbuf = new unsigned char[image.cols()]; for (int y=0; y 8) { cols_per_line = 0; os << "\n"; } tRGB value; pixelTypeConverter(*data, value); os << int(value.r) << " " << int(value.g) << " " << int(value.b) << " "; ++cols_per_line; } } else // Write 'raw' format { os << "P6\n" << image.cols() << " " << image.rows() << "\n255\n"; tRGB* bbuf = new tRGB[image.cols()]; for (int y=0; y* bptr = bbuf; // Convert one h-line of pixels into tmp-buffer for (int i=0; i namespace gravis { namespace priv { template class tRefCPtr { public: enum allocType { ALLOC_OBJECT, ALLOC_ARRAY }; explicit tRefCPtr (T* tptr=0, allocType alloct=ALLOC_OBJECT, unsigned int c=1) : refc_ptr(0) { if (tptr != 0) refc_ptr = new RefCounter(tptr, alloct, c); } tRefCPtr (const tRefCPtr& rcp) { _acquireCounter(rcp.refc_ptr); } ~tRefCPtr () { _releaseCounter(); } tRefCPtr& operator= (const tRefCPtr& rcp) { if (this != &rcp) { _releaseCounter(); _acquireCounter(rcp.refc_ptr); } return *this; } T& operator* () const { return *(refc_ptr->tptr); } T* operator-> () const { return refc_ptr->tptr; } bool isNull() const; protected: struct RefCounter { RefCounter (T* ptr=0, allocType _alloct=ALLOC_OBJECT, unsigned int c=1) : tptr(ptr), alloct(_alloct), counter(c) {} private: // Reference Counters should not be copied RefCounter(const RefCounter& o) : tptr(o.tptr), alloct(o.alloct), counter(o.counter) {} // Reference Counters should not be copied RefCounter& operator=(const RefCounter& o) { tptr=o.tptr; alloct=o.alloct; counter=o.counter; } public: ~RefCounter () { assert(counter == 0); if (counter == 0) { if (alloct == ALLOC_OBJECT) delete tptr; else delete[] tptr; tptr = 0; } } unsigned int addRef () { return ++counter; } unsigned int freeRef () { return --counter; } unsigned int getRefCounts () const { return counter; } T* tptr; allocType alloct; unsigned int counter; }* refc_ptr; void _acquireCounter (RefCounter* rc) { refc_ptr = rc; if (rc != 0) rc->addRef(); } void _releaseCounter () { if (refc_ptr != 0) { if (refc_ptr->freeRef() == 0) { delete refc_ptr; refc_ptr = 0; } } } }; template inline bool tRefCPtr::isNull() const { return refc_ptr == 0; } } /* Close namespace "priv" */ } /* Close namespace "gravis" */ #endif /* _TREFCPTR_H_ */ relion-3.1.3/src/jaz/gravis/program_options.h000066400000000000000000000043471411340063500212540ustar00rootroot00000000000000#ifndef __LIBGRAVIS_PROGRAM_OPTIONS_H__ #define __LIBGRAVIS_PROGRAM_OPTIONS_H__ /** * \file * This headers includes the boost program options header, disabling the warnings such that we can still have our programs compile with -Werror * Also it defines the MACROS PO_SWITCH and PO_VALUE, which make option definitions much more readable. **/ #if defined __GNUC__ #pragma GCC system_header #elif defined __SUNPRO_CC #pragma disable_warn #elif defined _MSC_VER #pragma warning(push, 1) #endif #include #include #include #include namespace boost { namespace program_options { template std::string vec2string(const std::vector &v) { std::stringstream s; for (size_t i=0; idefault_value(opt), desc) /** * Shortcut to define a boost program option with a value (e.g. a string or an integer) * * int number = false; * * po::options_description desc("Example Options"); * desc.add_options() * PO_VALUE("number,n", number, "Set the number of foos to use") * ; **/ #define PO_VALUE( key, opt, desc) (key, boost::program_options::value(&opt)->default_value(opt), desc) /** * Shortcut to define a boost program option with a vector value (e.g. a vector of strings) * * std::vector vec; vec.push_back(1); vec.push_back(2); * * po::options_description desc("Example Options"); * desc.add_options() * PO_VECTOR("vec,v", vec, "List of foos") * ; **/ #define PO_VECTOR(key, opt, desc) (key, boost::program_options::value(&opt)->default_value(opt, boost::program_options::vec2string(opt)), desc) #if defined __SUNPRO_CC #pragma enable_warn #elif defined _MSC_VER #pragma warning(pop) #endif #endif relion-3.1.3/src/jaz/gravis/t2Matrix.h000066400000000000000000000141311411340063500175340ustar00rootroot00000000000000#ifndef __LIBGRAVIS_T2MATRIX_H__ #define __LIBGRAVIS_T2MATRIX_H__ /****************************************************************************** ** Title: t2Matrix.h ** Description: Represents a 2x2 matrix with column-major memory layout. ** ** Author: Jean-Sebastien Pierrard, 2005 ** Brian Amberg 2006 ** Computer Science Department, University Basel (CH) ** ******************************************************************************/ #include #include "t2Vector.h" namespace gravis { /** * A 2x2 matrix with column-major memory layout **/ template struct t2Matrix { T m[4]; t2Matrix () { loadIdentity(); }; t2Matrix (const t2Matrix& mat); explicit t2Matrix (const T& val) { m[0] = val; m[1] = val; m[2] = val; m[3] = val; }; explicit t2Matrix (const T* v_ptr) { m[ 0] = v_ptr[ 0]; m[ 1] = v_ptr[ 1]; m[ 2] = v_ptr[ 2]; m[ 3] = v_ptr[ 3]; }; t2Matrix (T m0, T m2, T m1, T m3); void set (T m0, T m2, T m1, T m3); const T& operator[] (int idx) const; T& operator[] (int idx); const T& operator() (int row, int col) const { return m[(col << 1) + row]; } T& operator() (int row, int col) { return m[(col << 1) + row]; } t2Matrix operator*(T f) const; t2Vector operator* (const t2Vector&) const; t2Matrix operator* (const t2Matrix&) const; t2Matrix& operator*= (const t2Matrix&); /** * Element Wise Addition (Inplace) **/ inline t2Matrix& operator+= (const t2Matrix& rhs) { for (size_t i=0; i<4; ++i) m[i] += rhs.m[i]; return *this; } /** * Element Wise Subtraction (Inplace) **/ inline t2Matrix& operator-= (const t2Matrix& rhs) { for (size_t i=0; i<4; ++i) m[i] -= rhs.m[i]; return *this; } /** * Element Wise Addition **/ inline t2Matrix operator+ (const t2Matrix& rhs) const { t2Matrix result(*this); return(result += rhs); } /** * Element Wise Subtraction **/ inline t2Matrix operator- (const t2Matrix& rhs) const { t2Matrix result(*this); return(result -= rhs); } /** * Matrix Norm (2 Norm) **/ inline T norm2() const { return m[0]*m[0] + m[1]*m[1] + m[2]*m[2] + m[3]*m[3]; } /** * Matrix Trace (sum(diag(M))) **/ T trace() const; void transpose (); void invert(); void loadIdentity (); static t2Matrix identity(); static t2Matrix scale (const t2Vector& s) { return t2Matrix( s[0], T(0), T(0) , s[1]); }; static t2Matrix scale (const T& s) { return scale(t2Vector(s,s)); } /** * Create a 2x2 rotation matrix for a clockwise rotation around a rad. **/ static t2Matrix rotation(const T& a) { return t2Matrix( cos(a), -sin(a), sin(a), cos(a)); } }; template inline t2Matrix::t2Matrix (const t2Matrix& mat) { m[0] = mat.m[0]; m[1] = mat.m[1]; m[2] = mat.m[2]; m[3] = mat.m[3]; } template inline t2Matrix::t2Matrix (T m0, T m2, T m1, T m3) { m[ 0] = m0; m[ 1] = m1; m[ 2] = m2; m[ 3] = m3; } template inline void t2Matrix::set (T m0, T m2, T m1, T m3) { m[ 0] = m0; m[ 1] = m1; m[ 2] = m2; m[ 3] = m3; } template inline const T& t2Matrix::operator[] (int idx) const { #ifdef _GRAVIS_DEBUG_RANGECHECKING_ assert((idx >= 0) && (idx < 4)); #endif return m[idx]; } template inline T& t2Matrix::operator[] (int idx) { #ifdef _GRAVIS_DEBUG_RANGECHECKING_ assert((idx >= 0) && (idx < 4)); #endif return m[idx]; } template inline t2Matrix& t2Matrix::operator*= (const t2Matrix& op) { *this = this->operator*(op); return *this; } template inline t2Matrix t2Matrix::operator* (const t2Matrix& op) const { return t2Matrix( m[0]*op.m[ 0] + m[2]*op.m[ 1], m[0]*op.m[ 2] + m[2]*op.m[3], m[1]*op.m[ 0] + m[3]*op.m[ 1], m[1]*op.m[ 2] + m[3]*op.m[3]); } template inline t2Vector t2Matrix::operator* (const t2Vector& op) const { return t2Vector( m[ 0]*op.x + m[ 2]*op.y, m[ 1]*op.x + m[ 3]*op.y); } template inline t2Matrix t2Matrix::operator* (T f) const { return t2Matrix(f * m[0], f * m[2], f * m[1], f * m[3]); } template inline t2Matrix operator* (T f, const t2Matrix& v) { return t2Matrix(f * v[0], f * v[2], f * v[1], f * v[3]); } template inline void t2Matrix::loadIdentity () { m[ 0] = T(1); m[ 2] = T(0); m[ 1] = T(0); m[ 3] = T(1); } template inline void t2Matrix::transpose () { std::swap(m[1], m[2]); } template inline void t2Matrix::invert() { t2Matrix A = *this; T di = 1.0/(A[0]*A[3]-A[1]*A[2]); m[0]= A[3]*di; m[1]=-A[1]*di; m[2]=-A[2]*di; m[3]= A[0]*di; } template inline t2Matrix t2Matrix::identity () { return t2Matrix( T(1), T(0), T(0), T(1)); } template inline T t2Matrix::trace() const { return ( m[0] + m[3]); } template inline std::ostream& operator<< (std::ostream& os, const t2Matrix& arg) { os << "[ " << arg[ 0] << " " << arg[ 2] << " ]\n"; os << "[ " << arg[ 1] << " " << arg[ 3] << " ]\n"; return os; } template inline std::istream& operator>> ( std::istream& is, t2Matrix& arg) { std::string dummy; is >> dummy >> arg[ 0] >> arg[ 2] >> dummy; is >> dummy >> arg[ 1] >> arg[ 3] >> dummy; return is; } typedef t2Matrix f2Matrix; typedef t2Matrix d2Matrix; } /* Close Namespace "gravis" */ #endif relion-3.1.3/src/jaz/gravis/t2Vector.h000066400000000000000000000152231411340063500175350ustar00rootroot00000000000000#ifndef __LIBGRAVIS_T2VECTOR_H__ #define __LIBGRAVIS_T2VECTOR_H__ /****************************************************************************** ** Title: t2Vector.h ** Description: Represents a two dimensional vector. ** ** Author: Pascal Paysan, 2005 ** Computer Science Department, University Basel (CH) ** ******************************************************************************/ #include #include #include #include #include #include namespace gravis { template class t2Vector { public: T x, y; typedef T scalar_type; t2Vector() : x(T(0)), y(T(0)) { } explicit t2Vector(T _v) : x(_v), y(_v) { } t2Vector(T _x, T _y) : x(_x), y(_y) { } template explicit t2Vector(const t2Vector& vec) : x(vec.x), y(vec.y) {} static t2Vector unitX() { return t2Vector(T(1), T(0)); } static t2Vector unitY() { return t2Vector(T(0), T(1)); } void set (T _v) { x = y= _v; } void set (T _x, T _y) { x = _x; y = _y; } T length () const { return T(::sqrt(x*x + y*y)); } //! Beware: This is not the 2 norm but the square of the two norm. T norm2 () const { return (x*x + y*y); } //! \f$l_1\f$ Norm: \f$\sum_i |v_i|\f$ T normL1 () const { return (std::abs(x) + std::abs(y)); } //! \f$l_2\f$ Norm: \f$\sqrt{\sum_i |v_i|^2}\f$ T normL2 () const { return sqrt(x*x + y*y); } //! \f$l_2\f$ Norm: \f$\sqrt{\sum_i |v_i|^2}\f$ T normL2sqr () const { return x*x + y*y; } //! \f$l_\infty\f$ Norm: \f$\max{ |v_i|\,|\, \forall i }\f$ T normLInf() const { return std::max(std::abs(x), std::abs(y)); } t2Vector& normalize (T f=1.0) { T norm = f / ::sqrt(x*x + y*y); x *= norm; y *= norm; return *this; } T dot (const t2Vector& arg) const { return (x*arg.x + y*arg.y); } const T& operator[] (int idx) const { #ifdef _GRAVIS_DEBUG_RANGECHECKING_ assert((idx >= 0) && (idx < 2)); #endif return (&x)[idx]; } T& operator[] (int idx) { #ifdef _GRAVIS_DEBUG_RANGECHECKING_ assert((idx >= 0) && (idx < 2)); #endif return (&x)[idx]; } bool operator == ( const t2Vector& arg ) const { return ( x == arg.x && y == arg.y); } bool operator != ( const t2Vector& arg ) const { return !(*this == arg); } t2Vector& operator += (const t2Vector& arg) { x += arg.x; y += arg.y; return *this; } t2Vector& operator -= (const t2Vector& arg) { x -= arg.x; y -= arg.y; return *this; } t2Vector& operator += (const T& scalar) { x += scalar; y += scalar; return *this; } t2Vector& operator -= (const T& scalar) { x -= scalar; y -= scalar; return *this; } t2Vector& operator *= (const T& arg) { x *= arg; y *= arg; return *this; } t2Vector& operator /= (const T& arg) { x /= arg; y /= arg; return *this; } //! Check if the entries of the other vector differ by less than epsilon. // It is better to use this than to use operator== for comparision, if it is // not the same vertex. bool isClose( const t2Vector& o, const T epsilon) const { return ((std::fabs(x-o.x) < epsilon) and (std::fabs(y-o.y) < epsilon)); } static t2Vector normalize (const t2Vector& v1, T f=1.0f) { T norm = f / T(::sqrt(v1.x*v1.x + v1.y*v1.y)); return t2Vector(v1.x * norm, v1.y * norm); } t2Vector operator / (const T& arg) const { t2Vector r(*this); r /= arg; return r; } }; template inline t2Vector operator + (const t2Vector& v1, const t2Vector& v2) { return t2Vector(v1.x + v2.x, v1.y + v2.y); } template inline t2Vector operator - (const t2Vector& v1) { return t2Vector(-v1.x, -v1.y); } template inline t2Vector operator - (const t2Vector& v1, const t2Vector& v2) { return t2Vector(v1.x - v2.x, v1.y - v2.y); } template inline t2Vector operator + (const T& s, const t2Vector& v2) { return t2Vector(s + v2.x, s + v2.y); } template inline t2Vector operator - (const T& s, const t2Vector& v2) { return t2Vector(s - v2.x, s - v2.y); } template inline t2Vector operator + (const t2Vector& v, const T& s) { return t2Vector(v.x + s, v.y + s); } template inline t2Vector operator - (const t2Vector& v, const T& s) { return t2Vector(v.x - s, v.y - s); } template inline t2Vector operator * (T f, const t2Vector& v) { return t2Vector(f * v.x, f * v.y); } template inline t2Vector operator * (const t2Vector& v, const T& f) { return t2Vector(f * v.x, f * v.y); } template inline t2Vector operator * (const t2Vector& v, const t2Vector& f) { return t2Vector(v.x * f.x, v.y * f.y); } template inline std::ostream& operator<< (std::ostream& os, const t2Vector& arg) { os << "[" << arg.x << ", " << arg.y << "]"; return os; } template inline T dot (const t2Vector& v1, const t2Vector& v2) { return (v1.x*v2.x + v1.y*v2.y); } // Inverse of operator<< template inline std::istream& operator>> (std::istream& is, t2Vector& arg) { char c = ' '; is >> c; if (is.eof()) return is; if (c != '[') throw std::runtime_error("Vector should start with an opening ["); std::stringstream values; int v = 0; while ((is >> c) && (c != ']')) { if (c == ',') { v++; if (v >= 2) throw std::runtime_error("Vector contains more than three elements"); values << " "; } else if (c != ' ') values << c; } if (c != ']') { throw std::runtime_error("Vector should end with a ]"); } values >> arg.x >> arg.y; return is; } typedef t2Vector f2Vector; typedef t2Vector d2Vector; typedef t2Vector i2Vector; } /* Close Namespace "gravis" */ #endif relion-3.1.3/src/jaz/gravis/t3Matrix.h000066400000000000000000000350341411340063500175420ustar00rootroot00000000000000#ifndef __LIBGRAVIS_T3MATRIX_H__ #define __LIBGRAVIS_T3MATRIX_H__ /****************************************************************************** ** Title: t3Matrix.h ** Description: Represents a 3x3 matrix with column-major memory layout. ** ** Author: Michael Keller, 2005 ** Computer Science Department, University Basel (CH) ** ******************************************************************************/ #include #include #include "t3Vector.h" #include "tRGB.h" #include "private/tDeterminants.h" namespace gravis { template class t4Matrix; /*! \brief A 3x3 matrix class. * * There is no operator*=, because some people expect it to be a left-multiplication * and others a right-multiplication. To avoid confusion we only provide the explicit * methods lmul() and rmul(). */ template class t3Matrix { public: T m[9]; t3Matrix(); explicit t3Matrix(T v); t3Matrix(const T* v_ptr); t3Matrix(const t3Matrix& mat); t3Matrix(T m0, T m3, T m6, T m1, T m4, T m7, T m2, T m5, T m8); template explicit t3Matrix (const t3Matrix& mat) { for(int i=0; i<9; ++i) m[i] = static_cast(mat.m[i]); } void set(T m0, T m3, T m6, T m1, T m4, T m7, T m2, T m5, T m8); /*! \brief Return indexed entry (column major). */ const T& operator[](int idx) const { return m[idx]; } /*! \brief Return reference to indexed entry (column major). */ T& operator[](int idx) { return m[idx]; } /*! \brief Return entry in row i and column j. */ const T& operator()(int row, int col) const { return m[col * 3 + row]; } /*! \brief Return reference to entry in row i and column j. */ T& operator()(int row, int col) { return m[col * 3 + row]; } //! Check if the entries of the other vector differ by less than epsilon. // It is better to use this than to use operator== for comparision, if it is // not the same vertex. bool isClose( const t3Matrix& o, const T epsilon) const { for (int i=0; i<9; i++) if (std::fabs(m[i]-o.m[i]) >= epsilon) return false; return true; } bool operator==(const t3Matrix& o) const { for (int i=0; i<9; i++) if (m[i] != o.m[i]) return false; return true; } bool operator!=(const t3Matrix& o) const { return !(*this == o); } t3Matrix operator*(T f) const; t3Matrix& operator*=(const T& f); t3Matrix& operator/=(const T& f); t3Vector operator*(const t3Vector&) const; tRGB operator*(const tRGB&) const; t3Matrix operator*(const t3Matrix&) const; t3Matrix& operator+=(const t3Matrix&); t3Matrix& operator-=(const t3Matrix&); t3Matrix operator+(const t3Matrix&) const; t3Matrix operator-(const t3Matrix&) const; t3Matrix operator-() const; t3Matrix& lmul(const t3Matrix& m); t3Matrix& rmul(const t3Matrix& m); T trace() const; T det() const; t3Matrix adjugate() const; t3Matrix& transpose(); t3Matrix& invert(); t3Matrix& loadIdentity(); t3Vector getAxis() const; static t3Matrix extract(const t4Matrix& mat); static t3Matrix scale(const t3Vector&); static t3Matrix scale (const T& s) { return scale(t3Vector(s,s,s)); } static t3Matrix rotation(const t3Vector& u, const t3Vector& v); static t3Matrix rotation(const t3Vector& axis, float angle); static t3Matrix rotationX(T angle); static t3Matrix rotationY(T angle); static t3Matrix rotationZ(T angle); }; /*! \brief Constructs an identity matrix. */ template inline t3Matrix::t3Matrix() { loadIdentity(); } /*! \brief Constructs a matrix with all entries set to val. */ template inline t3Matrix::t3Matrix(T val) { for (int i = 0; i < 9; i++) m[i] = val; } /*! \brief Constructs a matrix with entries taken from an array. * * \param v_ptr array must be of appropriate length and in column-major layout */ template inline t3Matrix::t3Matrix(const T* v_ptr) { for (int i = 0; i < 9; i++) m[i] = v_ptr[i]; } /*! \brief Copy constructor. */ template inline t3Matrix::t3Matrix(const t3Matrix& mat) { for (int i = 0; i < 9; i++) m[i] = mat.m[i]; } /*! \brief Constructs a matrix from the given entries (row major). */ template inline t3Matrix::t3Matrix(T m0, T m3, T m6, T m1, T m4, T m7, T m2, T m5, T m8) { m[0] = m0; m[1] = m1; m[2] = m2; m[3] = m3; m[4] = m4; m[5] = m5; m[6] = m6; m[7] = m7; m[8] = m8; } /*! \brief Overwrites this matrix with the given entries (row major). */ template inline void t3Matrix::set(T m0, T m3, T m6, T m1, T m4, T m7, T m2, T m5, T m8) { m[0] = m0; m[1] = m1; m[2] = m2; m[3] = m3; m[4] = m4; m[5] = m5; m[6] = m6; m[7] = m7; m[8] = m8; } /*! \brief Scalar times matrix. */ template inline t3Matrix operator*(T f, const t3Matrix& mat) { t3Matrix out(mat); out *= f; return out; } /*! \brief Matrix times scalar. */ template inline t3Matrix t3Matrix::operator*(T f) const { t3Matrix out(*this); out *= f; return out; } /*! \brief Multiply this matrix with a scalar. */ template inline t3Matrix& t3Matrix::operator*=(const T& f) { for (int i = 0; i < 9; i++) m[i] *= f; return *this; } /*! \brief Divide this matrix by a scalar. */ template inline t3Matrix& t3Matrix::operator/=(const T& f) { for (int i = 0; i < 9; i++) m[i] /= f; return *this; } /*! \brief Matrix times vector. */ template inline t3Vector t3Matrix::operator* (const t3Vector& op) const { return t3Vector( m[0]*op.x + m[3]*op.y + m[6]*op.z, m[1]*op.x + m[4]*op.y + m[7]*op.z, m[2]*op.x + m[5]*op.y + m[8]*op.z ); } /*! \brief Matrix times vector. */ template inline tRGB t3Matrix::operator* (const tRGB& op) const { return tRGB( m[0]*op.r + m[3]*op.g + m[6]*op.b, m[1]*op.r + m[4]*op.g + m[7]*op.b, m[2]*op.r + m[5]*op.g + m[8]*op.b ); } /*! \brief Matrix times matrix. */ template inline t3Matrix t3Matrix::operator* (const t3Matrix& op) const { return t3Matrix(m[0]*op.m[0] + m[3]*op.m[1] + m[6]*op.m[2], m[0]*op.m[3] + m[3]*op.m[4] + m[6]*op.m[5], m[0]*op.m[6] + m[3]*op.m[7] + m[6]*op.m[8], m[1]*op.m[0] + m[4]*op.m[1] + m[7]*op.m[2], m[1]*op.m[3] + m[4]*op.m[4] + m[7]*op.m[5], m[1]*op.m[6] + m[4]*op.m[7] + m[7]*op.m[8], m[2]*op.m[0] + m[5]*op.m[1] + m[8]*op.m[2], m[2]*op.m[3] + m[5]*op.m[4] + m[8]*op.m[5], m[2]*op.m[6] + m[5]*op.m[7] + m[8]*op.m[8] ); } /*! \brief Adds other matrix to this matrix. */ template inline t3Matrix& t3Matrix::operator+=(const t3Matrix& op) { for (int i = 0; i < 9; i++) m[i] += op.m[i]; return *this; } /*! \brief Subtracts other matrix from this matrix. */ template inline t3Matrix& t3Matrix::operator-=(const t3Matrix& op) { *this += -op; return *this; } /*! \brief Matrix plus matrix. */ template inline t3Matrix t3Matrix::operator+(const t3Matrix& op) const { t3Matrix out(*this); return out += op; } /*! \brief Matrix minus matrix. */ template inline t3Matrix t3Matrix::operator-(const t3Matrix& op) const { t3Matrix out(*this); return out += -op; } /*! \brief Return additive inverse of this matrix. */ template inline t3Matrix t3Matrix::operator-() const { t3Matrix out(*this); for (int i = 0; i < 9; i++) out[i] = -out[i]; return out; } /*! \brief Right-multiply m to this matrix (*this = *this * m). */ template inline t3Matrix& t3Matrix::rmul(const t3Matrix& m) { *this = *this * m; return *this; } /*! \brief Left-multiply m to this matrix (*this = m * *this). */ template inline t3Matrix& t3Matrix::lmul(const t3Matrix& m) { *this = m * *this; return *this; } /*! \brief Return the trace of this matrix (\f$a_{11} + a_{22} + a_{33}\f$). */ template inline T t3Matrix::trace() const { return ( m[0] + m[4] + m[8] ); } /*! \brief Return the determinant of this matrix. */ template inline T t3Matrix::det() const { return det3x3(m[0], m[3], m[6], m[1], m[4], m[7], m[2], m[5], m[8]); } /*! \brief Return the adjugate of this matrix. */ template inline t3Matrix t3Matrix::adjugate() const { // transpose of cofactor matrix return t3Matrix( det2x2(m[4],m[7],m[5],m[8]), det2x2(m[5],m[8],m[3],m[6]), det2x2(m[3],m[6],m[4],m[7]), det2x2(m[7],m[1],m[8],m[2]), det2x2(m[8],m[2],m[6],m[0]), det2x2(m[6],m[0],m[7],m[1]), det2x2(m[1],m[4],m[2],m[5]), det2x2(m[2],m[5],m[0],m[3]), det2x2(m[0],m[3],m[1],m[4])); } /*! \brief Transpose this matrix. * Attention: Although innocent looking this is an inplace operation **/ template inline t3Matrix& t3Matrix::transpose() { std::swap(m[1],m[3]); std::swap(m[2],m[6]); std::swap(m[5],m[7]); return *this; } /*! \brief Invert this matrix. * Attention: Although innocent looking this is an inplace operation **/ template inline t3Matrix& t3Matrix::invert() { *this = (T(1)/det())*adjugate(); return *this; } /*! \brief Overwrite this matrix with an identity matrix. */ template inline t3Matrix& t3Matrix::loadIdentity () { m[0] = T(1); m[1] = T(0); m[2] = T(0); m[3] = T(0); m[4] = T(1); m[5] = T(0); m[6] = T(0); m[7] = T(0); m[8] = T(1); return *this; } /*! \brief Retrieves the (not normalized) axis of rotation, * assuming this matrix describes a rotation. */ template inline t3Vector t3Matrix::getAxis() const { // gemaess Artin, "Algebra", Kapitel 4, Aufgabe 14 float a0 = m[5] + m[7]; // (2,3) + (3,2) float a1 = m[2] + m[6]; // (1,3) + (3,1) float a2 = m[1] + m[3]; // (1,2) + (2,1) if (a0 == 0) return t3Vector(T(1), T(0), T(0)); else if (a1 == 0) return t3Vector(T(0), T(1), T(0)); else if (a2 == 0) return t3Vector(T(0), T(0), T(1)); else return t3Vector(T(1)/a0, T(1)/a1, T(1)/a2); } /*! \brief Return the upper left 3x3 matrix from mat. */ template inline t3Matrix t3Matrix::extract(const t4Matrix& mat) { return t3Matrix(mat.m[0], mat.m[4], mat.m[8], mat.m[1], mat.m[5], mat.m[9], mat.m[2], mat.m[6], mat.m[10]); } /*! \brief Return a matrix representing a scaling by s. */ template inline t3Matrix t3Matrix::scale(const t3Vector& s) { return t3Matrix( s.x, T(0), T(0), T(0), s.y, T(0), T(0), T(0), s.z ); } /*! Return a matrix that will rotate u into v. */ template inline t3Matrix t3Matrix::rotation(const t3Vector& u, const t3Vector& v) { T phi; T h; T lambda; t3Vector w; w = u.cross(v); phi = u.dot(v); lambda = w.dot(w); if (lambda > 1e-10) h = ((T)1.0 - phi) / lambda; else h = lambda; T hxy = w.x * w.y * h; T hxz = w.x * w.z * h; T hyz = w.y * w.z * h; t3Matrix out(phi + w.x * w.x * h, hxy + w.z, hxz - w.y, hxy - w.z, phi + w.y * w.y * h, hyz + w.x, hxz + w.y, hyz - w.x, phi + w.z * w.z * h); return out; } /*! \brief Return a matrix that rotates by specified angle (in degrees) around specified axis. */ template inline t3Matrix t3Matrix::rotation(const t3Vector& axis, float angle) { // formula copied form GL specification t3Vector n(axis); n.normalize(); // convert to radians angle *= (float)(3.1415927/180.); t3Matrix s(0, -n.z, n.y, n.z, 0, -n.x, -n.y, n.x, 0); t3Matrix nnt(n.x*n.x, n.x*n.y, n.x*n.z, n.y*n.x, n.y*n.y, n.y*n.z, n.z*n.x, n.z*n.y, n.z*n.z); return nnt + T(cos(angle))*(t3Matrix() - nnt) + T(sin(angle))*s; } template inline std::ostream& operator<< (std::ostream& os, const t3Matrix& arg) { os << "[ " << std::setw(10) << arg[0] << " " << std::setw(10) << arg[3] << " " << std::setw(10) << arg[6] << " ]\n"; os << "| " << std::setw(10) << arg[1] << " " << std::setw(10) << arg[4] << " " << std::setw(10) << arg[7] << " |\n"; os << "| " << std::setw(10) << arg[2] << " " << std::setw(10) << arg[5] << " " << std::setw(10) << arg[8] << " |\n"; return os; } template inline std::istream& operator>> ( std::istream& is, t3Matrix& arg) { std::string dummy; is >> dummy >> arg[0] >> arg[3] >> arg[6] >> dummy; is >> dummy >> arg[1] >> arg[4] >> arg[7] >> dummy; is >> dummy >> arg[2] >> arg[5] >> arg[8] >> dummy; return is; } template inline t3Matrix t3Matrix::rotationX (T a) { return t3Matrix( T(1), T(0), T(0), T(0), T(cos(a)), T(-sin(a)), T(0), T(sin(a)), T(cos(a)) ); } template inline t3Matrix t3Matrix::rotationY (T a) { return t3Matrix( T(cos(a)), T(0), T(-sin(a)), T(0), T(1), T(0), T(sin(a)), T(0), T(cos(a)) ); } template inline t3Matrix t3Matrix::rotationZ (T a) { return t3Matrix( T(cos(a)), T(-sin(a)), T(0), T(sin(a)), T(cos(a)), T(0), T(0), T(0), T(1) ); } typedef t3Matrix f3Matrix; typedef t3Matrix d3Matrix; } // namespace gravis #endif relion-3.1.3/src/jaz/gravis/t3Vector.h000066400000000000000000000272561411340063500175470ustar00rootroot00000000000000#ifndef __LIBGRAVIS_T3VECTOR_H__ #define __LIBGRAVIS_T3VECTOR_H__ /****************************************************************************** ** Title: t3Vector.h ** Description: ** ******************************************************************************/ #include #include #include #include #include #include namespace gravis { template class t4Vector; template class t2Vector; template class tRGBA; template class t3Vector { public: typedef T scalar_type; t3Vector () : x(T(0)), y(T(0)), z(T(0)) {} explicit t3Vector (T _v) : x(_v), y(_v), z(_v) {} t3Vector (T _x, T _y, T _z) : x(_x), y(_y), z(_z) {} t3Vector (const t3Vector& vec) : x(vec.x), y(vec.y), z(vec.z) {} // Generalized Copy Constructor (allows e.g. conversion from double to float) template explicit t3Vector (const t3Vector& vec) : x(vec.x), y(vec.y), z(vec.z) {} // Initialization from Array t3Vector (const T* vecd) : x(vecd[0]), y(vecd[1]), z(vecd[2]) {} explicit t3Vector (const t4Vector& vec) : x(vec.x/vec.w), y(vec.y/vec.w), z(vec.z/vec.w) {} //rk /* Deprecated. Should use t4Vector::toVector3() which has more appropriate logic - this here makes only sense under the (wrong) assumption that all 4D vectors represent finite points - but homogeneous coordinates can also be directions, displacements or points at infinity, all with w == 0. - in any case, too much logic for a cast - BAD (mk) */ static t3Vector unitX () { return t3Vector(T(1), T(0), T(0)); } static t3Vector unitY () { return t3Vector(T(0), T(1), T(0)); } static t3Vector unitZ () { return t3Vector(T(0), T(0), T(1)); } void set (T _v) { x = y = z = _v; } void set (T _x, T _y, T _z) { x = _x; y = _y; z = _z; } T length () const { return ::sqrt(x*x + y*y + z*z); } //! Beware: This is not the 2 norm but the square of the two norm. T norm2 () const { return (x*x + y*y + z*z); } //! Squared L2 Norm T normL2sqr () const { return (x*x + y*y + z*z); } //! \f$l_1\f$ Norm: \f$\sum_i |v_i|\f$ T normL1 () const { return (std::fabs(x) + std::fabs(y) + std::fabs(z)); } //! \f$l_2\f$ Norm: \f$\sqrt{\sum_i |v_i|^2}\f$ T normL2 () const { return sqrt(x*x + y*y + z*z); } //! \f$l_\infty\f$ Norm: \f$\max{ |v_i|\,|\, \forall i }\f$ T normLInf() const { return std::max(std::max(std::abs(x), std::abs(y)), std::abs(z)); } T sum() const { return x + y + z; } t3Vector findOrthogonal() const; void invert () { x = -x; y = -y; z = -z; } T dot (const t3Vector& arg) const { return (x*arg.x + y*arg.y + z*arg.z); } t3Vector cross (const t3Vector& arg) const { return t3Vector( y*arg.z - z*arg.y, z*arg.x - x*arg.z, x*arg.y - y*arg.x ); } /** * Inplace normalization **/ t3Vector& normalize (T f=T(1)) { if (f == T(0)) set(T(0), T(0), T(0)); T norm = length()/f; if (norm != T(0)) { *this /= norm; } return *this; } /*! \brief Component wise multiplication (matlab ".*"). */ t3Vector cmul(const t3Vector& v) const { return t3Vector(x * v.x, y * v.y, z * v.z); } /*! \brief Component wise division (matlab "./"). */ t3Vector cdiv(const t3Vector& v) const { return t3Vector(x / v.x, y / v.y, z / v.z); } /*! \brief Interpolate three values, using this vector as barycentric coordinates. */ template Value interpolate(const Value& a, const Value& b, const Value& c) const { return x * a + y * b + z * c; } const T& operator[] (int idx) const { return (&x)[idx]; } T& operator[] (int idx) { return (&x)[idx]; } bool operator == ( const t3Vector& arg ) const { return ( x == arg.x && y == arg.y && z == arg.z ); } bool operator != ( const t3Vector& arg ) const { return !(*this == arg); } t3Vector& operator += (const t3Vector& arg) { x += arg.x; y += arg.y; z += arg.z; return *this; } t3Vector& operator -= (const t3Vector& arg) { x -= arg.x; y -= arg.y; z -= arg.z; return *this; } t3Vector& operator += (const T& scalar) { x += scalar; y += scalar; z += scalar; return *this; } t3Vector& operator -= (const T& scalar) { x -= scalar; y -= scalar; z -= scalar; return *this; } t3Vector& operator *= (T arg) { x *= arg; y *= arg; z *= arg; return *this; } t3Vector operator * (T arg) const { return t3Vector(x * arg, y * arg, z * arg); } t3Vector& operator /= (T arg) { x /= arg; y /= arg; z /= arg; return *this; } t3Vector operator / (T arg) const { return t3Vector(x / arg, y / arg, z / arg); } T dist2( const t3Vector& v ) const { return ((x-v.x)*(x-v.x)+(y-v.y)*(y-v.y)+(z-v.z)*(z-v.z)); } T dist( const t3Vector& v ) const { return ::sqrt( dist2( v ) ); } //! Check if the entries of the other vector differ by less than epsilon. // It is better to use this than to use operator== for comparision, if it is // not the same vertex. bool isClose( const t3Vector& o, const T epsilon) const { return ((std::fabs(x-o.x) < epsilon) and (std::fabs(y-o.y) < epsilon) and (std::fabs(z-o.z) < epsilon)); } static t3Vector normalize (const t3Vector& v1, T f=T(1)) { return t3Vector(v1).normalize(f); } static T dot (const t3Vector& v1, const t3Vector& v2) { return (v1.x*v2.x + v1.y*v2.y + v1.z*v2.z); } static t3Vector cross (const t3Vector& v1, const t3Vector& v2) { return t3Vector( v1.y*v2.z - v1.z*v2.y, v1.z*v2.x - v1.x*v2.z, v1.x*v2.y - v1.y*v2.x ); } public: T x, y, z; }; /*! \brief Returns minimal components of two vectors. * * This is useful for quick and dirty calculations of boundings boxes * for a set of vectors. * * TODO: This should be a static function of t3Vector */ template inline t3Vector lowerBound(const t3Vector v1, const t3Vector v2) { return t3Vector(min(v1.x, v2.x), min(v1.y, v2.y), min(v1.z, v2. z)); } /*! \brief Returns maximal components of two vectors. * * This is useful for quick and dirty calculations of boundings boxes * for a set of vectors. * * TODO: This should be a static function of t3Vector */ template inline t3Vector upperBound(const t3Vector v1, const t3Vector v2) { return t3Vector(max(v1.x, v2.x), max(v1.y, v2.y), max(v1.z, v2. z)); } /*! \brief Returns a vector orthogonal to this vector. * * E.g. for ||y|| < ||x|| and ||y|| < ||z||, the returned * vector is (z, 0, -x). */ template inline t3Vector t3Vector::findOrthogonal() const { if (std::abs(y) < std::abs(z)) { // y < z if (std::abs(x) < std::abs(y)) { // x smallest return t3Vector(0, z, -y); } else { // y smallest return t3Vector(z, 0, -x); } } else { // z < y if (std::abs(x) < std::abs(z)) { // x smallest return t3Vector(0, z, -y); } else { // z smallest return t3Vector(y, -x, 0); } } } template inline t3Vector operator ~ (const t3Vector& v1) { return t3Vector(-v1.x, -v1.y, -v1.z); } template inline t3Vector operator + (const t3Vector& v1, const t3Vector& v2) { return t3Vector( v1.x + v2.x, v1.y + v2.y, v1.z + v2.z ); } template inline t3Vector operator - (const t3Vector& v1) { return t3Vector(-v1.x, -v1.y, -v1.z); } template inline t3Vector operator + (const T& s, const t3Vector& v2) { return t3Vector(s + v2.x, s + v2.y, s + v2.z); } template inline t3Vector operator - (const T& s, const t3Vector& v2) { return t3Vector(s - v2.x, s - v2.y, s - v2.z); } template inline t3Vector operator + (const t3Vector& v, const T& s) { return t3Vector(v.x + s, v.y + s, v.z + s); } template inline t3Vector operator - (const t3Vector& v, const T& s) { return t3Vector(v.x - s, v.y - s, v.z - s); } template inline t3Vector operator - (const t3Vector& v1, const t3Vector& v2) { return t3Vector( v1.x - v2.x, v1.y - v2.y, v1.z - v2.z ); } template inline t3Vector operator * (T f, const t3Vector& v) { return t3Vector(f * v.x, f * v.y, f * v.z); } template inline t3Vector operator * (const t3Vector& v, T f) { return t3Vector(f * v.x, f * v.y, f * v.z); } template inline t3Vector operator / (const t3Vector& v, T f) { return t3Vector( v.x/f, v.y/f, v.z/f ); } template inline bool operator < (const t3Vector& v1, const t3Vector& v2) { return ((v1.x < v2.x) || ((v1.x == v2.x) && (v1.y < v2.y)) || ((v1.x == v2.x) && (v1.y == v2.y) && (v1.z < v2.z))); } template inline bool operator > (const t3Vector& v1, const t3Vector& v2) { return (!((v1==v2) || (v1 inline T dot (const t3Vector& v1, const t3Vector& v2) { return (v1.x*v2.x + v1.y*v2.y + v1.z*v2.z); } template inline t3Vector cross (const t3Vector& v1, const t3Vector& v2) { return t3Vector( v1.y*v2.z - v1.z*v2.y, v1.z*v2.x - v1.x*v2.z, v1.x*v2.y - v1.y*v2.x ); } template inline std::ostream& operator<< (std::ostream& os, const t3Vector& arg) { os << "[" << arg.x << ", " << arg.y << ", " << arg.z << "]"; return os; } // Inverse of operator<< template inline std::istream& operator>> (std::istream& is, t3Vector& arg) { char c = ' '; is >> c; if (is.eof()) return is; if (c != '[') throw std::runtime_error("Vector should start with an opening ["); std::stringstream values; int v = 0; while ((is >> c) && (c != ']')) { if (c == ',') { v++; if (v >= 3) throw std::runtime_error("Vector contains more than three elements"); values << " "; } else if (c != ' ') values << c; } if (c != ']') { throw std::runtime_error("Vector should end with a ]"); } values >> arg.x >> arg.y >> arg.z; return is; } typedef t3Vector f3Vector; typedef t3Vector d3Vector; } // namespace gravis #endif relion-3.1.3/src/jaz/gravis/t4Matrix.h000066400000000000000000000426261411340063500175500ustar00rootroot00000000000000#ifndef __LIBGRAVIS_T4MATRIX_H__ #define __LIBGRAVIS_T4MATRIX_H__ /****************************************************************************** ** Title: t4Matrix.h ** Description: Represents a 4x4 matrix with column-major memory layout. ** ** Author: Jean-Sebastien Pierrard, 2005 ** Computer Science Department, University Basel (CH) ** ******************************************************************************/ #include #include "t4Vector.h" #include "private/tDeterminants.h" #include "t3Matrix.h" namespace gravis { /*! \brief A 4x4 matrix class. * * There is no operator*=, because some people expect it to be a left-multiplication * and others a right-multiplication. To avoid confusion we only provide the explicit * methods lmul() and rmul(). */ template class t4Matrix { public: T m[16]; typedef T scalar_type; t4Matrix(); explicit t4Matrix(T val); t4Matrix(const T* v_ptr); t4Matrix(const t4Matrix& mat); t4Matrix(const t3Matrix& mat); t4Matrix(T m0, T m4, T m8, T m12, T m1, T m5, T m9, T m13, T m2, T m6, T m10, T m14, T m3, T m7, T m11, T m15); template explicit t4Matrix (const t4Matrix& mat) { for(int i=0; i<16; ++i) m[i] = static_cast(mat.m[i]); } void set( T m0, T m4, T m8, T m12, T m1, T m5, T m9, T m13, T m2, T m6, T m10, T m14, T m3, T m7, T m11, T m15); //! Check if the entries of the other vector differ by less than epsilon. // It is better to use this than to use operator== for comparision, if it is // not the same vertex. bool isClose( const t4Matrix& o, const T epsilon) const { for (int i=0; i<16; i++) if (std::fabs(m[i]-o.m[i]) >= epsilon) return false; return true; } bool operator==(const t4Matrix& o) const { for (int i=0; i<16; i++) if (m[i] != o.m[i]) return false; return true; } bool operator!=(const t3Matrix &o) const { return !(*this == o); } /*! \brief Return indexed entry (column major). */ const T& operator[] (int idx) const { return m[idx]; } /*! \brief Return reference to indexed entry (column major). */ T& operator[] (int idx) { return m[idx]; } /*! \brief Return entry in row i and column j. */ const T& operator() (int row, int col) const { return m[col * 4 + row]; } /*! \brief Return reference to entry in row i and column j. */ T& operator() (int row, int col) { return m[col * 4 + row]; } t4Matrix operator*(T f) const; t4Matrix& operator*=(T f); t4Matrix operator/(T f) const; t4Matrix& operator/=(T f); t4Vector operator*(const t4Vector&) const; t4Matrix operator*(const t4Matrix&) const; t4Matrix& operator+=(const t4Matrix&); t4Matrix& operator-=(const t4Matrix&); t4Matrix operator+(const t4Matrix&) const; t4Matrix operator-(const t4Matrix&) const; t4Matrix operator-() const; t4Matrix& lmul(const t4Matrix& m); t4Matrix& rmul(const t4Matrix& m); T trace() const; T det() const; t4Matrix& transpose(); t4Matrix& invert(); t4Matrix& loadIdentity(); t4Matrix& copy(const t3Matrix& mat); static t4Matrix translation(const t3Vector&); static t4Matrix scale(const t3Vector&); static t4Matrix scale (const T& s) { return scale(t3Vector(s,s,s)); } static t4Matrix rotation(const t3Vector& u, const t3Vector& v); static t4Matrix rotation(const t3Vector& axis, float angle); static t4Matrix rotationX(T angle); static t4Matrix rotationY(T angle); static t4Matrix rotationZ(T angle); }; /*! \brief Constructs an identity matrix. */ template inline t4Matrix::t4Matrix() { loadIdentity(); } /*! \brief Constructs a matrix with all entries set to val. */ template inline t4Matrix::t4Matrix(T val) { for (int i = 0; i < 16; i++) m[i] = val; } /*! \brief Constructs a matrix with entries taken from an array. * * \param v_ptr array must be of appropriate length and in column-major layout */ template inline t4Matrix::t4Matrix(const T* v_ptr) { for (int i = 0; i < 16; i++) m[i] = v_ptr[i]; } /*! \brief Copy constructor. */ template inline t4Matrix::t4Matrix(const t4Matrix& mat) { for (int i = 0; i < 16; i++) m[i] = mat.m[i]; } /*! \brief Copy constructor. */ template inline t4Matrix::t4Matrix(const t3Matrix& mat) { m[ 0] = mat[0]; m[ 1] = mat[1]; m[ 2] = mat[2]; m[ 3] = 0.f; m[ 4] = mat[3]; m[ 5] = mat[4]; m[ 6] = mat[5]; m[ 7] = 0.f; m[ 8] = mat[6]; m[ 9] = mat[7]; m[10] = mat[8]; m[11] = 0.f; m[12] = 0.f; m[13] = 0.f; m[14] = 0.f; m[15] = 1.f; } /*! \brief Constructs a matrix from the given entries (row major). */ template inline t4Matrix::t4Matrix (T m0, T m4, T m8, T m12, T m1, T m5, T m9, T m13, T m2, T m6, T m10, T m14, T m3, T m7, T m11, T m15) { m[ 0] = m0; m[ 1] = m1; m[ 2] = m2; m[ 3] = m3; m[ 4] = m4; m[ 5] = m5; m[ 6] = m6; m[ 7] = m7; m[ 8] = m8; m[ 9] = m9; m[10] = m10; m[11] = m11; m[12] = m12; m[13] = m13; m[14] = m14; m[15] = m15; } /*! \brief Overwrites this matrix with the given entries (row major). */ template inline void t4Matrix::set (T m0, T m4, T m8, T m12, T m1, T m5, T m9, T m13, T m2, T m6, T m10, T m14, T m3, T m7, T m11, T m15 ) { m[ 0] = m0; m[ 1] = m1; m[ 2] = m2; m[ 3] = m3; m[ 4] = m4; m[ 5] = m5; m[ 6] = m6; m[ 7] = m7; m[ 8] = m8; m[ 9] = m9; m[10] = m10; m[11] = m11; m[12] = m12; m[13] = m13; m[14] = m14; m[15] = m15; } /*! \brief Scalar times matrix. */ template inline t4Matrix operator*(T f, const t4Matrix& mat) { t4Matrix out(mat); out *= f; return out; } /*! \brief Matrix times scalar. */ template inline t4Matrix t4Matrix::operator*(T f) const { t4Matrix out(*this); out *= f; return out; } /*! \brief Multiply this matrix with a scalar. */ template inline t4Matrix& t4Matrix::operator*=(T f) { for (int i = 0; i < 16; i++) m[i] *= f; return *this; } /*! \brief Matrix divided by scalar. */ template inline t4Matrix t4Matrix::operator/(const T f) const { t4Matrix out(*this); out /= f; return out; } /*! \brief Divide this matrix by a scalar. */ template inline t4Matrix& t4Matrix::operator/=(const T f) { for (int i = 0; i < 16; i++) m[i] /= f; return *this; } /*! \brief Matrix times vector. */ template inline t4Vector t4Matrix::operator*(const t4Vector& op) const { return t4Vector( m[ 0]*op.x + m[ 4]*op.y + m[ 8]*op.z + m[12]*op.w, m[ 1]*op.x + m[ 5]*op.y + m[ 9]*op.z + m[13]*op.w, m[ 2]*op.x + m[ 6]*op.y + m[10]*op.z + m[14]*op.w, m[ 3]*op.x + m[ 7]*op.y + m[11]*op.z + m[15]*op.w ); } /*! \brief Matrix times matrix. */ template inline t4Matrix t4Matrix::operator* (const t4Matrix& op) const { return t4Matrix( m[0]*op.m[ 0] + m[4]*op.m[ 1] + m[8]*op.m[ 2] + m[12]*op.m[ 3], // ROW 1 m[0]*op.m[ 4] + m[4]*op.m[ 5] + m[8]*op.m[ 6] + m[12]*op.m[ 7], m[0]*op.m[ 8] + m[4]*op.m[ 9] + m[8]*op.m[10] + m[12]*op.m[11], m[0]*op.m[12] + m[4]*op.m[13] + m[8]*op.m[14] + m[12]*op.m[15], m[1]*op.m[ 0] + m[5]*op.m[ 1] + m[9]*op.m[ 2] + m[13]*op.m[ 3], // ROW 2 m[1]*op.m[ 4] + m[5]*op.m[ 5] + m[9]*op.m[ 6] + m[13]*op.m[ 7], m[1]*op.m[ 8] + m[5]*op.m[ 9] + m[9]*op.m[10] + m[13]*op.m[11], m[1]*op.m[12] + m[5]*op.m[13] + m[9]*op.m[14] + m[13]*op.m[15], m[2]*op.m[ 0] + m[6]*op.m[ 1] + m[10]*op.m[ 2] + m[14]*op.m[ 3], // ROW 3 m[2]*op.m[ 4] + m[6]*op.m[ 5] + m[10]*op.m[ 6] + m[14]*op.m[ 7], m[2]*op.m[ 8] + m[6]*op.m[ 9] + m[10]*op.m[10] + m[14]*op.m[11], m[2]*op.m[12] + m[6]*op.m[13] + m[10]*op.m[14] + m[14]*op.m[15], m[3]*op.m[ 0] + m[7]*op.m[ 1] + m[11]*op.m[ 2] + m[15]*op.m[ 3], // ROW 4 m[3]*op.m[ 4] + m[7]*op.m[ 5] + m[11]*op.m[ 6] + m[15]*op.m[ 7], m[3]*op.m[ 8] + m[7]*op.m[ 9] + m[11]*op.m[10] + m[15]*op.m[11], m[3]*op.m[12] + m[7]*op.m[13] + m[11]*op.m[14] + m[15]*op.m[15] ); } /*! \brief Adds other matrix to this matrix. */ template inline t4Matrix& t4Matrix::operator+=(const t4Matrix& op) { for (int i = 0; i < 16; i++) m[i] += op.m[i]; return *this; } /*! \brief Subtracts other matrix from this matrix. */ template inline t4Matrix& t4Matrix::operator-=(const t4Matrix& op) { *this += -op; return *this; } /*! \brief Matrix plus matrix. */ template inline t4Matrix t4Matrix::operator+(const t4Matrix& op) const { t4Matrix out(*this); return out += op; } /*! \brief Matrix minus matrix. */ template inline t4Matrix t4Matrix::operator-(const t4Matrix& op) const { t4Matrix out(*this); return out += -op; } /*! \brief Return additive inverse of this matrix. */ template inline t4Matrix t4Matrix::operator-() const { t4Matrix out(*this); for (int i = 0; i < 16; i++) out[i] = -out[i]; return out; } /*! \brief Right-multiply m to this matrix (*this = *this * m). */ template inline t4Matrix& t4Matrix::rmul(const t4Matrix& m) { *this = *this * m; return *this; } /*! \brief Left-multiply m to this matrix (*this = m * *this). */ template inline t4Matrix& t4Matrix::lmul(const t4Matrix& m) { *this = m * *this; return *this; } /*! \brief Return the trace of this matrix (\f$a_{11} + a_{22} + a_{33} + a_{44}\f$). */ template inline T t4Matrix::trace() const { return ( m[0] + m[5] + m[10] + m[15] ); } /*! \brief Return the determinant of this matrix. */ template inline T t4Matrix::det() const { return det4x4(m[ 0], m[ 4], m[ 8], m[12], m[ 1], m[ 5], m[ 9], m[13], m[ 2], m[ 6], m[10], m[14], m[ 3], m[ 7], m[11], m[15]); } /*! \brief Transpose this matrix. * Attention: Although innocent looking this is an inplace operation **/ template inline t4Matrix& t4Matrix::transpose () { std::swap(m[1], m[4]); std::swap(m[2], m[8]); std::swap(m[3], m[12]); std::swap(m[6], m[9]); std::swap(m[7], m[13]); std::swap(m[11], m[14]); return *this; } /*! \brief Invert this matrix. * Attention: Although innocent looking this is an inplace operation **/ template inline t4Matrix& t4Matrix::invert() { T det, oodet; t4Matrix A = *this; (*this)(0,0) = det3x3(A(1,1), A(2,1), A(3,1), A(1,2), A(2,2), A(3,2), A(1,3), A(2,3), A(3,3)); (*this)(1,0) = -det3x3(A(1,0), A(2,0), A(3,0), A(1,2), A(2,2), A(3,2), A(1,3), A(2,3), A(3,3)); (*this)(2,0) = det3x3(A(1,0), A(2,0), A(3,0), A(1,1), A(2,1), A(3,1), A(1,3), A(2,3), A(3,3)); (*this)(3,0) = -det3x3(A(1,0), A(2,0), A(3,0), A(1,1), A(2,1), A(3,1), A(1,2), A(2,2), A(3,2)); (*this)(0,1) = -det3x3(A(0,1), A(2,1), A(3,1), A(0,2), A(2,2), A(3,2), A(0,3), A(2,3), A(3,3)); (*this)(1,1) = det3x3(A(0,0), A(2,0), A(3,0), A(0,2), A(2,2), A(3,2), A(0,3), A(2,3), A(3,3)); (*this)(2,1) = -det3x3(A(0,0), A(2,0), A(3,0), A(0,1), A(2,1), A(3,1), A(0,3), A(2,3), A(3,3)); (*this)(3,1) = det3x3(A(0,0), A(2,0), A(3,0), A(0,1), A(2,1), A(3,1), A(0,2), A(2,2), A(3,2)); (*this)(0,2) = det3x3(A(0,1), A(1,1), A(3,1), A(0,2), A(1,2), A(3,2), A(0,3), A(1,3), A(3,3)); (*this)(1,2) = -det3x3(A(0,0), A(1,0), A(3,0), A(0,2), A(1,2), A(3,2), A(0,3), A(1,3), A(3,3)); (*this)(2,2) = det3x3(A(0,0), A(1,0), A(3,0), A(0,1), A(1,1), A(3,1), A(0,3), A(1,3), A(3,3)); (*this)(3,2) = -det3x3(A(0,0), A(1,0), A(3,0), A(0,1), A(1,1), A(3,1), A(0,2), A(1,2), A(3,2)); (*this)(0,3) = -det3x3(A(0,1), A(1,1), A(2,1), A(0,2), A(1,2), A(2,2), A(0,3), A(1,3), A(2,3)); (*this)(1,3) = det3x3(A(0,0), A(1,0), A(2,0), A(0,2), A(1,2), A(2,2), A(0,3), A(1,3), A(2,3)); (*this)(2,3) = -det3x3(A(0,0), A(1,0), A(2,0), A(0,1), A(1,1), A(2,1), A(0,3), A(1,3), A(2,3)); (*this)(3,3) = det3x3(A(0,0), A(1,0), A(2,0), A(0,1), A(1,1), A(2,1), A(0,2), A(1,2), A(2,2)); det = (A(0,0) * (*this)(0,0)) + (A(0,1) * (*this)(1,0)) + (A(0,2) * (*this)(2,0)) + (A(0,3) * (*this)(3,0)); oodet = T(1) / det; *this *= oodet; return *this; } /*! \brief Overwrite this matrix with an identity matrix. */ template inline t4Matrix& t4Matrix::loadIdentity () { m[ 0] = T(1); m[ 1] = T(0); m[ 2] = T(0); m[ 3] = T(0); m[ 4] = T(0); m[ 5] = T(1); m[ 6] = T(0); m[ 7] = T(0); m[ 8] = T(0); m[ 9] = T(0); m[10] = T(1); m[11] = T(0); m[12] = T(0); m[13] = T(0); m[14] = T(0); m[15] = T(1); return *this; } /*! \brief Copies the 3x3 matrix into the upper left corner of this * instance. */ template inline t4Matrix& t4Matrix::copy(const t3Matrix& mat) { for (int j = 0; j < 3; j++) for (int i = 0; i < 3; i++) m[4 * j + i] = mat.m[3 * j + i]; return *this; } /*! \brief Return a matrix representing a translation by t. */ template inline t4Matrix t4Matrix::translation(const t3Vector& t) { return t4Matrix(T(1), T(0), T(0), t.x, T(0), T(1), T(0), t.y, T(0), T(0), T(1), t.z, T(0), T(0), T(0), T(1) ); } /*! \brief Return a matrix represnting a scaling by s. */ template inline t4Matrix t4Matrix::scale (const t3Vector& s) { return t4Matrix( s.x, T(0), T(0), T(0), T(0), s.y, T(0), T(0), T(0), T(0), s.z, T(0), T(0), T(0), T(0), T(1) ); } /*! Return a matrix that will rotate u into v. */ template inline t4Matrix t4Matrix::rotation(const t3Vector& u, const t3Vector& v) { t4Matrix out; out.copy(t3Matrix::rotation(u, v)); return out; } /*! \brief Return a matrix that rotates by specified angle (in degrees) around specified axis. */ template inline t4Matrix t4Matrix::rotation(const t3Vector& axis, float angle) { t4Matrix out; out.copy(t3Matrix::rotation(axis, angle)); return out; } template inline t4Matrix t4Matrix::rotationX (T a) { return t4Matrix( T(1), T(0), T(0), T(0), T(0), T(cos(a)), T(-sin(a)), T(0), T(0), T(sin(a)), T(cos(a)), T(0), T(0), T(0), T(0), T(1) ); } template inline t4Matrix t4Matrix::rotationY (T a) { // ATTENTION!!! This is actually wrong!, -sin is in the first column // but this could disrupt everything!!! // Sandro Schoenborn, 2013-04-09, sandro.schoenborn@unibas.ch // Clemens Blumer, 2013-04-09, clemens.blumer@unibas.ch return t4Matrix( T(cos(a)), T(0), T(-sin(a)), T(0), T(0), T(1), T(0), T(0), T(sin(a)), T(0), T(cos(a)), T(0), T(0), T(0), T(0), T(1) ); } template inline t4Matrix t4Matrix::rotationZ (T a) { return t4Matrix( T(cos(a)), T(-sin(a)), T(0), T(0), T(sin(a)), T(cos(a)), T(0), T(0), T(0), T(0), T(1), T(0), T(0), T(0), T(0), T(1) ); } // TODO: Set Fixed Precision template inline std::ostream& operator<< (std::ostream& os, const t4Matrix& arg) { os << "[ " << arg[ 0] << " " << arg[ 4] << " " << arg[ 8] << " " << arg[12] << " ]\n"; os << "| " << arg[ 1] << " " << arg[ 5] << " " << arg[ 9] << " " << arg[13] << " |\n"; os << "| " << arg[ 2] << " " << arg[ 6] << " " << arg[10] << " " << arg[14] << " |\n"; os << "[ " << arg[ 3] << " " << arg[ 7] << " " << arg[11] << " " << arg[15] << " ]\n"; return os; } template inline std::istream& operator>> ( std::istream& is, t4Matrix& arg) { std::string dummy; is >> dummy >> arg[ 0] >> arg[ 4] >> arg[ 8] >> arg[12] >> dummy; is >> dummy >> arg[ 1] >> arg[ 5] >> arg[ 9] >> arg[13] >> dummy; is >> dummy >> arg[ 2] >> arg[ 6] >> arg[10] >> arg[14] >> dummy; is >> dummy >> arg[ 3] >> arg[ 7] >> arg[11] >> arg[15] >> dummy; return is; } typedef t4Matrix f4Matrix; typedef t4Matrix d4Matrix; } // namespace gravis #endif relion-3.1.3/src/jaz/gravis/t4Vector.h000066400000000000000000000171721411340063500175440ustar00rootroot00000000000000#ifndef __LIBGRAVIS_T4VECTOR_H__ #define __LIBGRAVIS_T4VECTOR_H__ /****************************************************************************** ** Title: t4Vector.h ** Description: Represents a four dimensional vector (3D+homogeneous comp.). ** ** Author: Jean-Sebastien Pierrard, 2005 ** Computer Science Department, University Basel (CH) ** ******************************************************************************/ #include #include #include #include namespace gravis { template class t3Vector; template class t4Vector { public: T x, y, z, w; typedef T scalar_type; t4Vector () : x(T(0)), y(T(0)), z(T(0)), w(T(1)) { } explicit t4Vector (T _v) : x(_v), y(_v), z(_v), w(_v) { } t4Vector (T _x, T _y, T _z, T _w=T(1)) : x(_x), y(_y), z(_z), w(_w) { } /*! \brief Construct a 4D vector with w = 1. */ explicit t4Vector (const t3Vector& vec) : x(vec.x), y(vec.y), z(vec.z), w(1.0) { } template explicit t4Vector (const t4Vector& vec) : x(vec.x), y(vec.y), z(vec.z), w(vec.w) {} t4Vector (const t4Vector& vec) : x(vec.x), y(vec.y), z(vec.z), w(vec.w) { } static t4Vector unitX () { return t4Vector(T(1), T(0), T(0), T(1)); } static t4Vector unitY () { return t4Vector(T(0), T(1), T(0), T(1)); } static t4Vector unitZ () { return t4Vector(T(0), T(0), T(1), T(1)); } void set (T _v) { x = y = z = _v; w = T(1); } void set (T _x, T _y, T _z, T _w=T(1)) { x = _x; y = _y; z = _z; w = _w; } //! Beware: This is not the 2 norm but the square of the two norm. T norm2 () const { return (x*x + y*y + z*z + w*w); } //! \f$l_1\f$ Norm: \f$\sum_i |v_i|\f$ T normL1 () const { return (std::abs(x) + std::abs(y) + std::abs(z) + std::abs(w)); } //! \f$l_2\f$ Norm: \f$\sqrt{\sum_i |v_i|^2}\f$ T normL2 () const { return sqrt(x*x + y*y + z*z + w*w); } //! \f$l_\infty\f$ Norm: \f$\max{ |v_i|\,|\, \forall i }\f$ T normLInf() const { return std::max(std::max(std::max(std::abs(x), std::abs(y)), std::abs(z)), std::abs(w)); } void invert () { x = -x; y = -y; z = -z; w = -w; } T dot (const t4Vector& arg) const { return (x*arg.x + y*arg.y + z*arg.z + w*arg.w); } void divideW () { x /= w; y /= w; z /= w; w = T(1); } /*! \brief Return a 3D vector corresponding to this 4D vector. * * If the w coordinate is 0, the vector is considered a direction or displacement, * and (x,y,z) is returned. Otherwise, the vector is considered a point, and * (x/w, y/w, z/w) is returned. */ t3Vector toVector3() const { if (w == 0) return t3Vector(x, y, z); else return t3Vector(x/w, y/w, z/w); } /*! \brief Return the euclidian norm of this 4D vector. * * Note, that there is no special treatment of the w-coordinate. * The result is simply \f$\sqrt{x^2+y^2+z^2+w^2}\f$. */ T length () const { return T(::sqrt(x*x + y*y + z*z + w*w)); } t4Vector& normalize (T f=T(1)) { if (f == T(0)) set(T(0), T(0), T(0), T(0)); T norm = length()/f; if (norm != T(0)) { *this /= norm; } return *this; } const T& operator[] (int idx) const { #ifdef _GRAVIS_DEBUG_RANGECHECKING_ assert((idx >= 0) && (idx < 4)); #endif return (&x)[idx]; } T& operator[] (int idx) { #ifdef _GRAVIS_DEBUG_RANGECHECKING_ assert((idx >= 0) && (idx < 4)); #endif return (&x)[idx]; } bool operator == ( const t4Vector& arg ) const { return ( x == arg.x && y == arg.y && z == arg.z && w == arg.w); } bool operator != ( const t4Vector& arg ) const { return !(*this == arg); } t4Vector& operator += (const t4Vector& arg) { x += arg.x; y += arg.y; z += arg.z; w += arg.w; return *this; } t4Vector& operator -= (const t4Vector& arg) { x -= arg.x; y -= arg.y; z -= arg.z; w -= arg.w; return *this; } t4Vector& operator += (const T& scalar) { x += scalar; y += scalar; z += scalar; w += scalar; return *this; } t4Vector& operator -= (const T& scalar) { x -= scalar; y -= scalar; z -= scalar; w -= scalar; return *this; } t4Vector& operator *= (const T& arg) { x *= arg; y *= arg; z *= arg; w *= arg; return *this; } t4Vector& operator /= (const T& arg) { x /= arg; y /= arg; z /= arg; w /= arg; return *this; } //! Check if the entries of the other vector differ by less than epsilon. // It is better to use this than to use operator== for comparision, if it is // not the same vertex. bool isClose( const t4Vector& o, const T epsilon) const { return ((std::fabs(x-o.x) < epsilon) and (std::fabs(y-o.y) < epsilon) and (std::fabs(z-o.z) < epsilon) and (std::fabs(w-o.w) < epsilon)); } static t4Vector normalize (const t4Vector& v1, T f=T(1)) { return t4Vector(v1).normalize(); } static T dot (const t4Vector& v1, const t4Vector& v2) { return (v1.x*v2.x + v1.y*v2.y + v1.z*v2.z + v1.w*v2.w); } }; template inline t4Vector operator + (const t4Vector& v1, const t4Vector& v2) { return t4Vector( v1.x + v2.x, v1.y + v2.y, v1.z + v2.z, v1.w + v2.w ); } template inline t4Vector operator - (const t4Vector& v1) { return t4Vector(-v1.x, -v1.y, -v1.z, -v1.w); } template inline t4Vector operator - (const t4Vector& v1, const t4Vector& v2) { return t4Vector( v1.x - v2.x, v1.y - v2.y, v1.z - v2.z, v1.w - v2.w ); } template inline t4Vector operator + (const T& s, const t4Vector& v2) { return t4Vector(s + v2.x, s + v2.y, s + v2.z, s + v2.w); } template inline t4Vector operator - (const T& s, const t4Vector& v2) { return t4Vector(s - v2.x, s - v2.y, s - v2.z, s - v2.w); } template inline t4Vector operator + (const t4Vector& v, const T& s) { return t4Vector(v.x + s, v.y + s, v.z + s, v.w + s); } template inline t4Vector operator - (const t4Vector& v, const T& s) { return t4Vector(v.x - s, v.y - s, v.z - s, v.w - s); } template inline t4Vector operator * (T f, const t4Vector& v) { return t4Vector(f * v.x, f * v.y, f * v.z, f * v.w); } template inline t4Vector operator * (const t4Vector& v, T f) { return t4Vector(f * v.x, f * v.y, f * v.z, f * v.w); } template inline std::ostream& operator<< (std::ostream& os, const t4Vector& arg) { os << "[" << arg.x << ", " << arg.y << ", " << arg.z << ", " << arg.w << "]"; return os; } typedef t4Vector f4Vector; typedef t4Vector d4Vector; } /* Close Namespace "gravis" */ #endif relion-3.1.3/src/jaz/gravis/tArray.h000066400000000000000000000245241411340063500172730ustar00rootroot00000000000000#ifndef __LIBGRAVIS_T_ARRAY_H__ #define __LIBGRAVIS_T_ARRAY_H__ /****************************************************************************** ** Title: tArray.h ** Description: Implements a one dimensional array with reference counting. ** ** Author: Jean-Sebastien Pierrard, 2005 ** Computer Science Department, University Basel (CH) ** ******************************************************************************/ #include #include #include #include #include "private/tRefCPtr.h" /*! ** \file tArray.h */ namespace gravis { /*! ** \class tArray ** \brief Implements a one dimensional array with reference counting. */ template class tArray { public: typedef T value_type; tArray (); tArray (size_t); tArray (T* data, size_t nel, bool deleteData); tArray (const tArray&); tArray& operator=(const tArray&); tArray (const std::vector&); ~tArray (); tArray clone() const; tArray safeClone() const; //! Deprecating this, as it is not standard. Use resize() instead. tArray& setSize (size_t); //! Useful alias to setSize. At some point we should switch completely to the std library. tArray& resize (size_t s) { return setSize(s); }; void fill (T); void fill (T, size_t, size_t); size_t size () const; const T& operator[] (size_t) const; T& operator[] (size_t); const T* data () const; T* data (); bool operator==(const tArray& other) const; bool operator!=(const tArray& other) const; operator std::vector() const { const tArray &self = *this; const size_t l=size(); std::vector result(l); for (size_t i=0; i p_smp; T* p_data; size_t length; }; /*! ** \class tConstArray ** \brief Read-only wrapper for tArray. * * Since tArray is really a pointer, "const tArray" does protect the data, * but also protects the pointer! Assume I want a class that keeps a * pointer to data (= tArray), and needs only read access. We also want to * change the pointer once in a while. * \code * class X { * // tArray readOnly; // BAD! can manipulate data * // const tArray readOnly; // cannot manipulate data, but cannot change readOnly * tConstArray readOnly; // solution * public: * void setArray(tConstArray a) { * readOnly = a; * } * }; * \endcode */ template class tConstArray { private: tArray ta; public: tConstArray() {} tConstArray(tArray& ta) : ta(ta) {} tArray clone() const { return ta.clone(); } tArray safeClone() const { return ta.safeClone(); } size_t size() const { return ta.size(); } const T& operator[](size_t i) const { return ta[i]; } const T* data() const { return ta.data(); } bool operator==(const tArray& other) const { return ta == other; } bool operator!=(const tArray& other) const { return ta != other; } bool operator==(const tConstArray& other) const { return ta == other.ta; } bool operator!=(const tConstArray& other) const { return ta == other.ta; } const tConstArray& operator=(tArray& ta) { this->ta = ta; return *this; } const tConstArray& operator=(const tConstArray& other) { this->ta = other.ta; return *this; } }; /*! ** \brief Default constructor */ template inline tArray::tArray () : p_smp (), p_data(), length() { this->allocArray(0); } /*! ** \brief Constructor. ** \param nel Number of elements to allocate for this tArray. */ template inline tArray::tArray (size_t nel) : p_smp (), p_data(), length() { this->allocArray(nel); } template inline tArray::tArray (T* data, size_t nel, bool deleteData) : p_smp(), p_data(data), length(nel) { if (deleteData) p_smp = priv::tRefCPtr(p_data, priv::tRefCPtr::ALLOC_ARRAY, 1); else p_smp = priv::tRefCPtr(p_data, priv::tRefCPtr::ALLOC_ARRAY, 2); } /*! ** \brief Copy-constructor ** ** The copy-constructor has reference-semantic, i.e. the managed data is not ** copied. Instead a new handle to the same data is created. ** ** \param rhs The array to be copied */ template inline tArray::tArray (const tArray& rhs) : p_smp (rhs.p_smp), p_data(rhs.p_data), length(rhs.length) { } /*! ** \brief Assignment ** ** The assignment has reference-semantic, i.e. the managed data is not ** copied. Instead a new handle to the same data is created. ** ** \param rhs The array to be assigned */ template inline tArray &tArray::operator=(const tArray& rhs) { p_smp = rhs.p_smp; p_data = rhs.p_data; length = rhs.length; return *this; } /*! ** \brief Construct from std vector ** ** \param rhs The std vector from which the data is copied. This construction does not create a reference, but actually copies the data. */ template inline tArray::tArray (const std::vector& rhs) { this->allocArray(rhs.size()); for (size_t i=0; i inline tArray::~tArray () { } /*! ** \brief Create a deep-copy of managed data. ** \return A new tArray object. ** ** Use this version of clone unless your datatype is simple ** (e.g. tVector, size_t, Tuple2...) */ template inline tArray tArray::safeClone() const { tArray lhs(length); for (size_t i=0; i object. ** ** \warning This method creates a byte-wise copy of the managed data. When ** applied to compound types (e.g. T=std::vector) or reference counted ** types like std::string it will create crashes use save_clone() unless your datatype is simple. */ template inline tArray tArray::clone () const { tArray lhs(length); memcpy(lhs.p_data, p_data, length*sizeof(T)); return lhs; } /*! ** \brief Fill array with constant value. ** ** \param value Value to fill with. */ template inline void tArray::fill (T value) { const T* end_ptr = p_data + length; for (T* t_ptr=p_data; t_ptr inline void tArray::fill (T value, size_t from, size_t to) { if (from >= length) from = length; if (to >= length) to = length; T* end_ptr = p_data + to; for (T* t_ptr=p_data+from; t_ptr inline tArray& tArray::setSize (size_t nel) { this->allocArray(nel); return *this; } /*! ** \brief Get number of elements. ** ** \return Number of T-elements in array. */ template inline size_t tArray::size () const { return length; } /*! ** \brief Access i-th element. ** \param i Index into array. ** \return const-Reference to i-th element. */ template inline const T& tArray::operator[] (size_t i) const { #ifdef _GRAVIS_DEBUG_RANGECHECKING_ assert( i < length ); #endif return p_data[i]; } /*! ** \brief Access i-th element. ** \param i Index into array. ** \return Reference to i-th element. */ template inline T& tArray::operator[] (size_t i) { #ifdef _GRAVIS_DEBUG_RANGECHECKING_ assert( i < length ); #endif return p_data[i]; } /*! ** \brief Perform element-by-element comparison. */ template inline bool tArray::operator==(const tArray& other) const { return !(*this != other); } /*! ** \brief Perform element-by-element comparison. */ template inline bool tArray::operator!=(const tArray& other) const { if (p_data == other.p_data) return false; else if (length != other.length) return true; else { for (size_t i = 0; i < length; i++) { if (p_data[i] != other.p_data[i]) return true; } } return false; } /*! ** \brief Get pointer to managed data. ** \return const-Pointer to first element of managed data. */ template inline const T* tArray::data () const { return p_data; } /*! ** \brief Get pointer to managed data. ** \return Pointer to first element of managed data. */ template inline T* tArray::data () { return p_data; } template inline void tArray::allocArray (size_t nel) { if (nel <= 0) { p_data = 0; length = 0; p_smp = priv::tRefCPtr(p_data, priv::tRefCPtr::ALLOC_ARRAY); } else { // ATTENTION! Bug: this leaks!! ... delete old memory // Sandro Schoenborn, 2013-04-09, sandro.schoenborn@unibas.ch // Tobias Maier, 2013-04-09, tobias.maier@unibas.ch p_data = new T[nel]; length = nel; p_smp = priv::tRefCPtr(p_data, priv::tRefCPtr::ALLOC_ARRAY); } } } /* Close namespace "gravis" */ #endif relion-3.1.3/src/jaz/gravis/tBGR.h000066400000000000000000000126241411340063500166250ustar00rootroot00000000000000#ifndef __LIBGRAVIS_T_BGR_H__ #define __LIBGRAVIS_T_BGR_H__ /****************************************************************************** ** Title: tBGR.h ** Description: Represents an BGR color tupel. ** ** Author: ** ** Computer Science Department, University Basel (CH) ** ******************************************************************************/ #include namespace gravis { template struct tBGR { T b, g, r; typedef T scalar_type; tBGR () : b(T(0)), g(T(0)), r(T(0)) { } tBGR (T _b, T _g, T _r) : b(_b), g(_g), r(_r) { } tBGR (T gray) : b(gray), g(gray), r(gray) { } void set (T _b, T _g, T _r) { r = _r; g = _g; b = _b; } void set (T gray) { r = gray; b = gray; g = gray; } void add (T _r, T _g, T _b) { r += _r; g += _g; b += _b; } void add (T gray) { r += gray; g += gray; b += gray; } T grayValue () const { return (T)(0.30f*r + 0.59f*g + 0.11f*b); } T minValue () const { if (r < g) { if (r < b) return r; else return b; } else { if (g < b) return g; else return b; } } T maxValue () const { if (r > g) { if (r > b) return r; else return b; } else { if (g > b) return g; else return b; } } tBGR& operator += (const tBGR& c) { r += c.r; g += c.g; b += c.b; return *this; } tBGR& operator += (const T gray) { r += gray; g += gray; b += gray; return *this; } tBGR& operator -= (const tBGR& c) { r -= c.r; g -= c.g; b -= c.b; return *this; } tBGR& operator -= (const T gray) { r -= gray; g -= gray; b -= gray; return *this; } tBGR& operator *= (const tBGR& c) { r *= c.r; g *= c.g; b *= c.b; return *this; } tBGR& operator *= (const T factor) { r *= factor; g *= factor; b *= factor; return *this; } tBGR& operator /= (const tBGR& c) { r /= c.r; g /= c.g; b /= c.b; return *this; } tBGR& operator /= (const T factor) { r /= factor; g /= factor; b /= factor; return *this; } //! Unary minus inline tBGR operator - () const { return tBGR(-r, -g, -b); }; //! Addition of a scalar (analog to -=) inline tBGR operator + (const T& c) const { return tBGR(r+c, g+c, b+c); }; //! Subtraction of a scalar (analog to +=) inline tBGR operator - (const T& c) const { return tBGR(r-c, g-c, b-c); }; //! Multiplication of a scalar (analog to *=) inline tBGR operator * (const T& c) const { return tBGR(r*c, g*c, b*c); }; //! Division by a scalar (analog to /=) inline tBGR operator / (const T& c) const { return tBGR(r/c, g/c, b/c); }; bool operator == (const tBGR& arg) { return ((arg.r == r) && (arg.g == g) && (arg.b == b)); } }; template inline tBGR operator + (const tBGR& c1, const tBGR& c2) { tBGR result = c1; return (result += c2); } template inline tBGR operator - (const tBGR& c1, const tBGR& c2) { tBGR result = c1; return (result -= c2); } template inline tBGR operator * (const tBGR& c1, const tBGR& c2) { tBGR result(c1.r * c2.r, c1.g * c2.g, c1.b * c2.b); return result; } template inline tBGR operator * (const tBGR& c, T factor) { tBGR result(c.r * factor, c.g * factor, c.b * factor); return result; } template inline tBGR operator * (T factor, const tBGR& c) { tBGR result(c.r * factor, c.g * factor, c.b * factor); return result; } template inline tBGR operator / (const tBGR& c1, const tBGR& c2) { tBGR result(c1.r / c2.r, c1.g / c2.g, c1.b / c2.b); return result; } template inline tBGR operator / (const tBGR& c, T factor) { tBGR result(c.r / factor, c.g / factor, c.b / factor); return result; } template inline bool operator < (const tBGR& c1, const tBGR& c2) { T gray1 = c1.grayValue(); T gray2 = c2.grayValue(); return (gray1 < gray2); } template inline tBGR operator ! (const tBGR& c) { tBGR result = tBGR::White(); return (result -= c); } // Absolute of every color channel template inline tBGR abs(const tBGR& c) { return tBGR(c.r < T(0) ? -c.r : c.r, c.g < T(0) ? -c.g : c.g, c.b < T(0) ? -c.b : c.b); } template inline std::ostream& operator << (std::ostream& os, const tBGR& c) { os << "(" << c.r << " " << c.g << " " << c.b << ")"; return os; } template <> inline std::ostream& operator << (std::ostream& os, const tBGR& c) { os << "(" << (int)c.r << " " << (int)c.g << " " << (int)c.b << ")"; return os; } typedef tBGR cBGR; typedef tBGR bBGR; typedef tBGR fBGR; typedef tBGR dBGR; } /* Close Namespace "gravis" */ #endif relion-3.1.3/src/jaz/gravis/tDefaultVector.h000066400000000000000000000127421411340063500207630ustar00rootroot00000000000000#ifndef __LIBGRAVIS_T_DEFAULT_VECTOR_H__ #define __LIBGRAVIS_T_DEFAULT_VECTOR_H__ #include #include "tArray.h" namespace gravis { /** * Like a std::vector, but with a default value returned when accessing [-1]. * * This situation is checked extremely efficiently by positioning the default * element at position [-1] in memory, so no check has to be done. * * This replacement for tVector does not offer reference counting. It makes * more sense to take a complete array structure and wrap it into a * boost::shared_ptr **/ template class tDefaultVector { private: typedef typename std::vector Vector; Vector data; T* data_ptr; public: typedef typename Vector::iterator iterator; typedef typename Vector::const_iterator const_iterator; typedef typename Vector::reverse_iterator reverse_iterator; typedef typename Vector::const_reverse_iterator const_reverse_iterator; typedef typename Vector::reference reference; typedef typename Vector::const_reference const_reference; /** * Create a new vector, optionally specifying a default value. If no default value is specified T() is used **/ tDefaultVector(const size_t size=0, const T& def=T()) { data.resize(size+1); data_ptr = &data[1]; data[0] = def; } /** * Copy data from the other vector **/ tDefaultVector(const tDefaultVector& other) : data(other.data), data_ptr(&data[1]) {}; /** * Copy data from the other vector **/ tDefaultVector(const tArray &other) : data(other.size()+1), data_ptr(&data[1]) { for (size_t i=0; i &other) : data(other.size()+1), data_ptr(&data[1]) { for (size_t i=0; i inline void swap(gravis::tDefaultVector<_Tp>& __x, gravis::tDefaultVector<_Tp>& __y) { __x.swap(__y); } } #endif relion-3.1.3/src/jaz/gravis/tGray_A.h000066400000000000000000000115411411340063500173520ustar00rootroot00000000000000#ifndef __LIBGRAVIS_T_GRAY_A_H__ #define __LIBGRAVIS_T_GRAY_A_H__ /****************************************************************************** ** Title: tGray_A.h ** Description: Represents an RGB+Alpha color tupel. ** ** Author: Jean-Sebastien Pierrard, 2005 ** Computer Science Department, University Basel (CH) ** ******************************************************************************/ #include namespace gravis { template class tGray_A { public: T g, a; typedef T scalar_type; tGray_A () : g(T(0)), a(T(1.0)) { } tGray_A (T _g) : g(_g) , a(T(1.0)) { } tGray_A (T _g, T _a) : g(_g) , a(_a) { } void set (T _g) { g = _g; } void set (T _g, T _a) { g = _g; a = _a; } T grayValue () const { return g; } T minValue () const { return g; } T maxValue () const { return g; } /*! \brief All color components, including alpha are clamped to [0,1]. * * \return self */ tGray_A& clamp() { g = std::min(std::max(g, T(0)), T(1)); return *this; } bool operator != (const tGray_A& c) const { return g != c.g || a != c.a; } bool operator == (const tGray_A& c) const { return g == c.g && a == c.a; } tGray_A& operator += (const tGray_A& c) { g += c.g; return *this; } tGray_A& operator += (const T gray) { g += gray; return *this; } tGray_A& operator -= (const tGray_A& c) { g -= c.g; return *this; } tGray_A& operator -= (const T gray) { g -= gray; return *this; } tGray_A& operator *= (const tGray_A& c) { g *= c.g; return *this; } tGray_A& operator *= (const float factor) { g *= factor; return *this; } tGray_A& operator /= (const tGray_A& c) { g /= c.g; return *this; } tGray_A& operator /= (const float factor) { g /= factor; return *this; } //! Unary minus inline tGray_A operator - () const { return tGray_A(-g, a); }; //! Addition of a scalar (analog to -=) inline tGray_A operator + (const T& c) const { return tGray_A(g+c, a); }; //! Subtraction of a scalar (analog to +=) inline tGray_A operator - (const T& c) const { return tGray_A(g-c, a); }; //! Multiplication of a scalar (analog to *=) inline tGray_A operator * (const T& c) const { return tGray_A(g*c, a); }; //! Division by a scalar (analog to /=) inline tGray_A operator / (const T& c) const { return tGray_A(g/c, a); }; }; template inline tGray_A operator+ (const tGray_A& c1, const tGray_A& c2) { tGray_A result(c1); return (result += c2); } template inline tGray_A operator- (const tGray_A& c1, const tGray_A& c2) { tGray_A result(c1); return (result -= c2); } template inline tGray_A operator* (const tGray_A& c1, const tGray_A& c2) { tGray_A result(c1); return (result *= c2); } template inline tGray_A operator* (const tGray_A& c, T factor) { tGray_A result(c); return (result *= factor); } template inline tGray_A operator* (T factor, const tGray_A& c) { tGray_A result(c); return (result *= factor); } template inline tGray_A operator / (const tGray_A& c1, const tGray_A& c2) { tGray_A result(c1); return (result /= c2); } template inline tGray_A operator / (const tGray_A& c, T factor) { tGray_A result(c); return (result /= factor); } template inline bool operator < (const tGray_A& c1, const tGray_A& c2) { return (c1.grayValue() < c2.grayValue()); } template inline tGray_A operator ! (const tGray_A& c) { tGray_A result = tGray_A::White; return (result -= c); } template inline std::ostream& operator << (std::ostream& os, const tGray_A& c) { os << "(" << c.g << " " << c.a << ")"; return os; } template <> inline std::ostream& operator << (std::ostream& os, const tGray_A& c) { os << "(" << (int)c.g << " " << (int)c.a << ")"; return os; } typedef tGray_A bGray_A; typedef tGray_A fGray_A; typedef tGray_A dGray_A; } /* Close Namespace "gravis" */ #endif relion-3.1.3/src/jaz/gravis/tImage.h000066400000000000000000000424671411340063500172450ustar00rootroot00000000000000#ifndef __LIBGRAVIS_T_IMAGE_H__ #define __LIBGRAVIS_T_IMAGE_H__ /****************************************************************************** ** Title: tImage.h ** Description: Implements two dimensional array with row-major memory layout. ** ** Author: Jean-Sebastien Pierrard, 2009 ** Computer Science Department, University Basel (CH) ** ******************************************************************************/ #include #include #include "tRGB.h" #include "tBGR.h" #include "tRGBA.h" #include "tRGB_A.h" #include "tGray_A.h" #include "tArray.h" #include #include "tImage/traits.h" /*! ** \file tImage.h */ namespace gravis { template class tImage; } #include "tImage/access.hxx" #include "tImage/interpolation.hxx" namespace gravis { /*! ** \class tImage ** \brief Implements two dimensional array with row-major memory layout. ** ** This class represents an image of arbitrary pixel type. TODO ** ** For operations on images look at tImage/operators.h, tImage/???.h. */ template class tImage { inline static bool has_ending(const std::string& filename, const std::string& ending) { if (filename.size() < ending.size()) return false; for (size_t i=0; i::Scalar_t scalar_type; typedef T* iterator; tImage (); tImage (size_t, size_t, std::string=""); tImage (size_t, size_t, T const& value ); tImage (const tImage&); tImage& operator=(const tImage&); ~tImage (); tImage clone () const; tImage& setSize (size_t, size_t); tImage& resize (size_t, size_t); tImage& setName (std::string); tImage& fill (T); std::string name () const; size_t cols () const; size_t rows () const; size_t size () const; /** Returns the number of components per pixel **/ size_t components () const; const T& operator() (size_t, size_t) const; T& operator() (size_t, size_t); /** Returns the component specified by column, row and, component number (channel) **/ const scalar_type& operator() (size_t, size_t, size_t) const; scalar_type& operator() (size_t, size_t, size_t); const T& operator [] (size_t) const; T& operator [] (size_t); /** Returns the component specified by index (as [] operator) and, component number (channel) **/ const scalar_type& comp(size_t, size_t) const; scalar_type& comp(size_t, size_t); iterator begin () const; iterator end () const; const T* data () const; T* data (); const T* data (size_t, size_t) const; T* data (size_t, size_t); void read (const std::string&); /** * Detect the filetype from the ending. **/ void write(const std::string&) const; void writePNM (const std::string&) const; void writePNG (const std::string&) const; //void writeJPG (const std::string&, int quality=100) const; /** * Interpolated access to the image * * Usage * * image.interpolate(x, y) * image.interpolate(x, y) * image.interpolate(x, y) * * See interpolation:: namespace for other methods. * * Beware: * if using this inside of a templated function or class, you have to write * image.template interpolate(x, y), which is quite * awfull. **/ template inline T interpolate(const Float& x, const Float& y) const { return InterpolationMethod::getPixel(*this, x, y); } /** * Default interpolation mode is Cubic **/ template inline T interpolate(const Float& x, const Float& y) const { return interpolation::Cubic::getPixel(*this, x, y); } /** * Checked access to the image, with configurable behaviour. * * Usage * * image.access(x, y) * image.access(x, y) * image.access(x, y) * image.access(x, y) * * Beware: * if using this inside of a templated function or class, you have to write * image.template access(x, y), which is quite * awfull. * **/ template inline T access(const int& x, const int& y) const { return AccessMethod::getPixel(*this, x, y); } /** * Default access mode is access::Repeat **/ inline T access(const int& x, const int& y) const { return access::Repeat::getPixel(*this, x, y); } /** * tImage Convolution using the access specified access method. * * The access methods include: * AccessZero * AccessRepeat * AccessWrapped * AccessMirrored * * Usage: * * tImage result = image.convolve< access::AccessMirrored >(kernel); * * * Beware: * if using this inside of a templated function or class, you have to write * image.template convolve(kernel), which is quite * awfull. **/ template tImage convolve(const tImage< typename tImageTraits::Float_t >& kernel) const { int klmargin, ktmargin; if ((kernel.cols() % 2) == 0) { klmargin = (kernel.cols() >> 1) - 1; } else { klmargin = (kernel.cols() >> 1); } if ((kernel.rows() % 2) == 0) { ktmargin = (kernel.rows() >> 1) - 1; } else { ktmargin = (kernel.rows() >> 1); } tImage lhs(cols(), rows()); for (int r=0; r<(int)rows(); ++r) { for (int c=0; c<(int)cols(); ++c) { T sum = T(0); for (int ky=0; ky<(int)kernel.rows(); ++ky) { for (int kx=0; kx<(int)kernel.cols(); ++kx) { sum += kernel(kx, ky) * access(kx-klmargin+c, ky-ktmargin+r); } } lhs(c, r) = sum; } } return lhs; } /** * Default access method is Repeat **/ tImage convolve(const tImage< typename tImageTraits::Float_t >& kernel) const { return (*this).template convolve(kernel); } /** Clamp an image by calling the clamp() method on each element **/ void clamp() { for (size_t i=0; i image; tArray accel; iterator p_begin; iterator p_end; }; } /* Close namespace "gravis" */ /****************************************************************************** ** tImage implementation ******************************************************************************/ #include "Exception.h" #include "private/tImageIO.hxx" #include "private/tImageConverter.hxx" #include "private/tImageIO_PNM.hxx" #include "private/tImageIO_PNG.hxx" //#include "private/tImageIO_JPG.hxx" namespace gravis { /*! ** \brief Default constructor. */ template inline tImage::tImage () : p_name(""), wd(0), ht(0), image(), accel(), p_begin(), p_end() { } /*! ** \brief Constructor. ** ** \param width Set number of columns. ** \param height Set number of rows. ** \param name Sets a name for the image (\em optional). */ template inline tImage::tImage (size_t width, size_t height, std::string name) : p_name(name), wd(width), ht(height), // Allocate space for channel data and indexing accelerators image(width* height), accel(height), p_begin(image.data()), p_end(image.data()+image.size()) { // Compute pointers to beginning of each line for (size_t y=0; y inline tImage::tImage (size_t width, size_t height, T const& value) : p_name(""), wd(width), ht(height), // Allocate space for channel data and indexing accelerators image(width* height), accel(height), p_begin(image.data()), p_end(image.data()+image.size()) { // Compute pointers to beginning of each line for (size_t y=0; y inline tImage::tImage (const tImage& rhs) : p_name (rhs.p_name), wd (rhs.wd), ht (rhs.ht), image (rhs.image), accel (rhs.accel), p_begin(rhs.p_begin), p_end (rhs.p_end) { } /*! ** \brief Reference Semantic Assignemnt ** ** The assignmment has reference-semantic, i.e. the image data is not actually ** copied. Instead a new handle to the same data is created. ** ** \param rhs */ template inline tImage &tImage::operator =(const tImage& rhs) { p_name = rhs.p_name; wd = rhs.wd; ht = rhs.ht; image = rhs.image; accel = rhs.accel; p_begin = rhs.p_begin; p_end = rhs.p_end; return *this; } /*! ** \brief Destructor. ** ** Destroy the object(handle). The image data is \em only deleted if no other ** instance of this class holds a reference to it. */ template inline tImage::~tImage () { } /*! ** \brief Create a deep-copy of the image data. ** ** \return A new tImage object. ** ** \warning This method creates a byte-wise copy of the image data. When ** applied to compound types (e.g. T=std::vector) it is very likely to ** cause serious problems. */ template tImage tImage::clone () const { // Allocate new image with same name and dimensions tImage result(wd, ht, p_name); // Copy the data memcpy(result.data(), data(), wd*ht*sizeof(T)); return result; } /*! ** \brief Resize image. ** ** \param nwd Number of columns in resized image. ** \param nht Number of rows in resized image. ** ** \return ** \warning The original data is not copied TODO?? */ template inline tImage& tImage::resize (size_t nwd, size_t nht) { if ((nwd != wd) || (nht != ht)) *this = tImage(nwd, nht, p_name); return *this; } /*! ** \brief Resize image. ** ** \param nwd Number of columns in resized image. ** \param nht Number of rows in resized image. ** ** \return ** \warning The original data is not copied TODO?? */ template inline tImage& tImage::setSize (size_t nwd, size_t nht) { return resize(nwd, nht); } template inline size_t tImage::rows () const { return ht; } template inline size_t tImage::cols () const { return wd; } template inline size_t tImage::size () const { return image.size(); } template inline std::string tImage::name () const { return p_name; } template inline size_t tImage::components() const { return tImageTraits::components(); } template inline tImage& tImage::setName (std::string name) { p_name = name; return *this; } template inline tImage& tImage::fill (T value) { image.fill(value); return *this; } template inline const T& tImage::operator() (size_t x, size_t y) const { #ifdef _GRAVIS_DEBUG_RANGECHECKING_ assert( x >= 0 && x < cols() ); assert( y >= 0 && y < rows() ); #endif return (accel[y])[x]; } template inline T& tImage::operator() (size_t x, size_t y) { #ifdef _GRAVIS_DEBUG_RANGECHECKING_ assert( x >= 0 && x < cols() ); assert( y >= 0 && y < rows() ); #endif return (accel[y])[x]; } template inline const typename tImage::scalar_type& tImage::operator() (size_t x, size_t y, size_t c) const { #ifdef _GRAVIS_DEBUG_RANGECHECKING_ assert( x >= 0 && x < cols() ); assert( y >= 0 && y < rows() ); assert( c >= 0 && c < tImageTraits::components() ); #endif const scalar_type* p = reinterpret_cast(p_begin); return p[(y*cols() + x) * tImageTraits::components() + c]; } template inline typename tImage::scalar_type& tImage::operator() (size_t x, size_t y, size_t c) { #ifdef _GRAVIS_DEBUG_RANGECHECKING_ assert( x >= 0 && x < cols() ); assert( y >= 0 && y < rows() ); assert( c >= 0 && c < tImageTraits::components() ); #endif scalar_type* p = reinterpret_cast(p_begin); return p[(y*cols() + x) * tImageTraits::components() + c]; } template inline const T& tImage::operator[] (size_t n) const { #ifdef _GRAVIS_DEBUG_RANGECHECKING_ assert((n >= 0) && (n < image.size())); #endif return *(p_begin + n); } template inline T& tImage::operator[] (size_t n) { #ifdef _GRAVIS_DEBUG_RANGECHECKING_ assert((n >= 0) && (n < image.size())); #endif return *(p_begin + n); } template inline const typename tImage::scalar_type& tImage::comp(size_t n, size_t c) const { #ifdef _GRAVIS_DEBUG_RANGECHECKING_ assert((n >= 0) && (n < image.size())); assert( c >= 0 && c < tImageTraits::components() ); #endif const scalar_type* p = reinterpret_cast(p_begin); return *(p + n*tImageTraits::components() + c); } template inline typename tImage::scalar_type& tImage::comp(size_t n, size_t c) { #ifdef _GRAVIS_DEBUG_RANGECHECKING_ assert((n >= 0) && (n < image.size())); assert( c >= 0 && c < tImageTraits::components() ); #endif scalar_type* p = reinterpret_cast(p_begin); return *(p + n*tImageTraits::components() + c); } template inline typename tImage::iterator tImage::begin () const { return p_begin; } template inline typename tImage::iterator tImage::end () const { return p_end; } template inline const T* tImage::data () const { return image.data(); } template inline T* tImage::data () { return image.data(); } template inline const T* tImage::data (size_t x, size_t y) const { return accel[y] + x; } template inline T* tImage::data (size_t x, size_t y) { return accel[y] + x; } template inline void tImage::read (const std::string& filename) { /* if (priv::JPGImageReader::canHandle(filename)) { priv::JPGImageReader reader; reader.read(*this, filename); return; } */ char header[512]; std::ifstream is(filename.c_str(), std::ios::in | std::ios::binary); if (!is.good()) { GRAVIS_THROW3(Exception, "Unable to open file", filename); } is.read(&header[0], sizeof(header)); is.close(); if (priv::PNMImageReader::canHandle(header)) { priv::PNMImageReader reader; reader.read(*this, filename.c_str()); return; } if (priv::PNGImageReader::canHandle(header)) { priv::PNGImageReader reader; reader.read(*this, filename.c_str()); return; } GRAVIS_THROW3(gravis::Exception, "Can't handle this file.", filename); } template inline void tImage::write(const std::string& filename) const { if /* (has_ending(filename, "jpg") || has_ending(filename, "jpeg")) writeJPG(filename); else if*/ (has_ending(filename, "png")) writePNG(filename); else if (has_ending(filename, "pnm")) writePNM(filename); else GRAVIS_THROW3(gravis::Exception, "Could not determine filetype from filename: ", filename); } template inline void tImage::writePNM (const std::string& filename) const { priv::PNMImageWriter writer; writer.write(*this, filename.c_str()); } template inline void tImage::writePNG (const std::string& filename) const { priv::PNGImageWriter writer; writer.write(*this, filename.c_str()); } /* template inline void tImage::writeJPG (const std::string& filename, int quality) const { priv::JPGImageWriter writer; writer.write(*this, filename.c_str(), quality); } */ } /* Close namespace "gravis" */ #endif relion-3.1.3/src/jaz/gravis/tImage/000077500000000000000000000000001411340063500170575ustar00rootroot00000000000000relion-3.1.3/src/jaz/gravis/tImage/.svn/000077500000000000000000000000001411340063500177435ustar00rootroot00000000000000relion-3.1.3/src/jaz/gravis/tImage/.svn/all-wcprops000066400000000000000000000022131411340063500221270ustar00rootroot00000000000000K 25 svn:wc:ra_dav:version-url V 64 /repos/gravis/!svn/ver/22953/libs/libGravis/trunk/include/tImage END CMakeLists.txt K 25 svn:wc:ra_dav:version-url V 79 /repos/gravis/!svn/ver/21790/libs/libGravis/trunk/include/tImage/CMakeLists.txt END interpolation.hxx K 25 svn:wc:ra_dav:version-url V 82 /repos/gravis/!svn/ver/22187/libs/libGravis/trunk/include/tImage/interpolation.hxx END draw.h K 25 svn:wc:ra_dav:version-url V 71 /repos/gravis/!svn/ver/22187/libs/libGravis/trunk/include/tImage/draw.h END operators.h K 25 svn:wc:ra_dav:version-url V 76 /repos/gravis/!svn/ver/22187/libs/libGravis/trunk/include/tImage/operators.h END access.hxx K 25 svn:wc:ra_dav:version-url V 75 /repos/gravis/!svn/ver/22187/libs/libGravis/trunk/include/tImage/access.hxx END distance_transform.h K 25 svn:wc:ra_dav:version-url V 85 /repos/gravis/!svn/ver/22187/libs/libGravis/trunk/include/tImage/distance_transform.h END normalization.h K 25 svn:wc:ra_dav:version-url V 80 /repos/gravis/!svn/ver/22953/libs/libGravis/trunk/include/tImage/normalization.h END traits.h K 25 svn:wc:ra_dav:version-url V 73 /repos/gravis/!svn/ver/22187/libs/libGravis/trunk/include/tImage/traits.h END relion-3.1.3/src/jaz/gravis/tImage/.svn/entries000066400000000000000000000026561411340063500213500ustar00rootroot0000000000000010 dir 23800 https://svn.cs.unibas.ch:443/repos/gravis/libs/libGravis/trunk/include/tImage https://svn.cs.unibas.ch:443/repos/gravis 2013-01-23T14:22:37.181452Z 22953 sandro b127c190-6edf-0310-8e64-ec95285ab742 normalization.h file 2013-11-05T12:28:37.379672Z 4363d99e4707f25ca904b299624322c3 2013-01-23T14:22:37.181452Z 22953 sandro 11605 traits.h file 2012-06-29T11:51:37.436097Z 4a5490c07880ae3a4bc40858c22adfab 2012-06-28T11:28:09.901604Z 22187 sandro 3413 CMakeLists.txt file 2012-06-29T11:51:37.443097Z e570cb5177fa917e0bab5697fd87d665 2012-03-09T15:30:47.393511Z 21790 forster 285 interpolation.hxx file 2012-06-29T11:51:37.450096Z dcedb9e7c863ac42f2b6c51f21f0296e 2012-06-28T11:28:09.901604Z 22187 sandro 5341 draw.h file 2012-06-29T11:51:37.395097Z 181b4a0f59b572241f2b1c7a9ff32029 2012-06-28T11:28:09.901604Z 22187 sandro 7472 operators.h file 2012-06-29T11:51:37.402097Z cb7e94b6bc74f964fff87f43399fb541 2012-06-28T11:28:09.901604Z 22187 sandro 10190 access.hxx file 2012-06-29T11:51:37.411097Z ba2355e1ac41f0f7dd8f9351e1982987 2012-06-28T11:28:09.901604Z 22187 sandro 2107 distance_transform.h file 2012-06-29T11:51:37.421097Z b7724d06deb71be11edc34a32fd5bcb9 2012-06-28T11:28:09.901604Z 22187 sandro 8664 relion-3.1.3/src/jaz/gravis/tImage/.svn/text-base/000077500000000000000000000000001411340063500216375ustar00rootroot00000000000000relion-3.1.3/src/jaz/gravis/tImage/.svn/text-base/CMakeLists.txt.svn-base000066400000000000000000000004351411340063500261160ustar00rootroot00000000000000set( install_files access.hxx distance_transform.h draw.h interpolation.hxx normalization.h operators.h traits.h ) INSTALL(FILES ${install_files} DESTINATION ${CMAKE_INSTALL_PREFIX}/include/${LIBTITLE}-${LIBVERSION}/${LIBTITLE}/tImage) relion-3.1.3/src/jaz/gravis/tImage/.svn/text-base/access.hxx.svn-base000066400000000000000000000040731411340063500253520ustar00rootroot00000000000000/****************************************************************************** ** Title: Checked access for tImage ** Description: Checked Image access ** ** Author: Jean Sebastian Pierrard, 2005 ** Brian Amberg, 2007 ** Computer Science Department, University Basel (CH) ** ******************************************************************************/ namespace gravis { namespace access { //! Functor for access behind the image borders struct Zero { template static T getPixel (const tImage& image, int x, int y) { if (x < 0) return T(0); else if (x >= (int)image.cols()) return T(0); if (y < 0) return T(0); else if (y >= (int)image.rows()) return T(0); return image(x, y); } }; //! Functor for access behind the image borders struct Repeat { template static const T& getPixel (const tImage& image, int x, int y) { if (x < 0) x = 0; else if (x >= (int)image.cols()) x = image.cols()-1; if (y < 0) y = 0; else if (y >= (int)image.rows()) y = image.rows()-1; return image(x, y); } }; //! Functor for access behind the image borders struct Mirror { template static const T& getPixel (const tImage& image, int x, int y) { if (x < 0) x = 0-x; else if (x >= (int)image.cols()) x = 2*(image.cols()-1) - x; if (y < 0) y = 0-y; else if (y >= (int)image.rows()) y = 2*(image.rows()-1) - y; return image(x, y); } }; //! Functor for access behind the image borders struct Wrap { template static const T& getPixel (const tImage& image, int x, int y) { if (x < 0) x = image.cols()-1+x; else if (x >= (int)image.cols()) x = x-image.cols(); if (y < 0) y = image.rows()-1+y; else if (y >= (int)image.rows()) y = y-image.rows(); return image(x, y); } }; } } relion-3.1.3/src/jaz/gravis/tImage/.svn/text-base/distance_transform.h.svn-base000066400000000000000000000207301411340063500274140ustar00rootroot00000000000000/*************************************************************************//*! * Title: tImage/distance_transform.h * Description: Implements drawing operations on images without the need * for the libRender library. * * Author: Brian Amberg, 2006-2007 * Computer Science Department, University Basel (CH) ****************************************************************************/ #ifndef __GRAVIS__TIMAGE__DISTANCE_TRANSFORM__ #define __GRAVIS__TIMAGE__DISTANCE_TRANSFORM__ #include "../tImage/traits.h" namespace gravis { template inline static T sqr(const T& a) { return a*a; } /** * Binarize an image using an intensity threshold **/ template static inline void intensityThreshold(tImage &out, const tImage &in, const F& threshold = 0) { out.resize(in.cols(), in.rows()); for (size_t i=0; i threshold; }; /** * Binarize an image using a threshold on the alpha channel **/ template static inline void alphaThreshold(tImage &out, const tImage &in, const F& threshold = typename tImageTraits::Float_t(0)) { out.resize(in.cols(), in.rows()); for (size_t i=0; i threshold; }; /** * w=1, iterations=2 gives perfect results, while still being fast **/ template inline static void distanceTransform(tImage< F > &out, const tImage< Pixel > &in, const int& w=3, const size_t& iterations=3) { const int W=in.cols(); const int H=in.rows(); out.resize(W,H); // Temporary Memory (parents) tImage< t2Vector< int > > p(W, H); tImage dm(2*w+1, 2*w+1); // distances for (int x=0; x<2*w+1; ++x) for (int y=0; y<2*w+1; ++y) dm(x,y) = sqrt(sqr(y-w)+sqr(x-w)); // initialize out.fill( in.cols()+in.rows() ); p.fill( t2Vector(0,0) ); //initialize immediate interior elements for (int x=0; x-1; y--) for (int x=0; x-1; x--) distanceTransformApplyRegionSave; for (int y=0; y=w; y--) for (int x=W-w-1; x>=w; x--) distanceTransformApplyRegion; //Make the border for (int y=w-1; y>-1; y--) for (int x=0; x-1; x--) distanceTransformApplyRegionSave; for (int y=0; y inline static void distanceTransformSq(tImage< F > &out, const tImage< Pixel > &in, const int& w=3, const size_t& iterations=3) { const int W=in.cols(); const int H=in.rows(); out.resize(W,H); // Temporary Memory (parents) tImage< t2Vector< int > > p(W, H); tImage dm(2*w+1, 2*w+1); // distances for (int x=0; x<2*w+1; ++x) for (int y=0; y<2*w+1; ++y) dm(x,y) = sqr(y-w)+sqr(x-w); // initialize out.fill( in.cols()+in.rows() ); p.fill( t2Vector(0,0) ); //initialize immediate interior elements for (int x=0; x-1; y--) for (int x=0; x-1; x--) distanceTransformApplyRegionSave; for (int y=0; y=w; y--) for (int x=W-w-1; x>=w; x--) distanceTransformApplyRegion; //Make the border for (int y=w-1; y>-1; y--) for (int x=0; x-1; x--) distanceTransformApplyRegionSave; for (int y=0; y static inline void draw_line(gravis::tImage &I, const gravis::t2Vector &a, const gravis::t2Vector &b, const Pixel& c1, const Pixel& c2) { typedef typename tImageTraits::Float_t ImageFloat; // Bounding Box const F min_x_f = std::max(F(0), std::min(a[0], b[0])); const F max_x_f = std::min(F(I.cols()-1), std::max(a[0], b[0])); const F min_y_f = std::max(F(0), std::min(a[1], b[1])); const F max_y_f = std::min(F(I.rows()-1), std::max(a[1], b[1])); // Bounding Box const size_t min_x = int( ceil( min_x_f ) ); const size_t max_x = int( floor( max_x_f ) ); const size_t min_y = int( ceil( min_y_f ) ); const size_t max_y = int( floor( max_y_f ) ); F alpha; // Loop over bounding box if (max_x - min_x > max_y - min_y) { if (a.x < b.x) { for (size_t x=min_x; x<=max_x; ++x) { alpha = (F(x) - a.x) / (b.x - a.x); const size_t y = size_t(a.y + alpha * (b.y - a.y) + F(0.5)); I(x, y) = ImageFloat(F(1) - alpha) * c1 + ImageFloat(alpha) * c2; } } else { for (size_t x=min_x; x<=max_x; ++x) { alpha = (F(x) - b.x) / (a.x - b.x); const size_t y = size_t(b.y + alpha * (a.y - b.y) + F(0.5)); I(x, y) = ImageFloat(F(1) - alpha) * c2 + ImageFloat(alpha) * c1; } } } else { if (a.y < b.y) { for (size_t y=min_y; y<=max_y; ++y) { alpha = (F(y) - a.y) / (b.y - a.y); const size_t x = size_t(a.x + alpha * (b.x - a.x) + F(0.5)); I(x, y) = ImageFloat(F(1) - alpha) * c1 + ImageFloat(alpha) * c2; } } else { for (size_t y=min_y; y<=max_y; ++y) { alpha = (F(y) - b.y) / (a.y - b.y); const size_t x = size_t(b.x + alpha * (a.x - b.x) + F(0.5)); I(x, y) = ImageFloat(F(1) - alpha) * c2 + ImageFloat(alpha) * c1; } } } } /** * Fill an image of type T with an interpolated triangle. Uses the float type * F. Beware: using double actually makes better triangles, see the testcase. **/ template static inline void fill_triangle(gravis::tImage &I, const gravis::t2Vector &a, const gravis::t2Vector &b, const gravis::t2Vector &c, const Pixel& c1, const Pixel& c2, const Pixel& c3) { typedef typename tImageTraits::Float_t ImageFloat; // Divisor const F det = a[0] * (b[1] - c[1]) + b[0] * (c[1] - a[1]) + c[0] * (a[1] - b[1]); if (det == F(0)) // The triangle is singular, it has no area return; // Bounding Box const F min_x_f = std::max(F(0), std::min(std::min(a[0], b[0]), c[0])); const F max_x_f = std::min(F(I.cols()-1), std::max(std::max(a[0], b[0]), c[0])); const F min_y_f = std::max(F(0), std::min(std::min(a[1], b[1]), c[1])); const F max_y_f = std::min(F(I.rows()-1), std::max(std::max(a[1], b[1]), c[1])); // Bounding Box const int min_x = int( ceil( min_x_f ) ); const int max_x = int( floor( max_x_f ) ); const int min_y = int( ceil( min_y_f ) ); const int max_y = int( floor( max_y_f ) ); t3Vector lambda; t3Vector lambdaWorld; // Loop over bounding box for (int x = min_x; x <= max_x; ++x) { bool found = false; // Good for larger triangles, but may slow it down for small triangles. This is anyhow a relatively slow method, but obviously correct. for (int y = min_y; y <= max_y; ++y) { // Gets Barycentric Coordinates in Screen space lambda[0] = (F(x) * (b[1] - c[1]) + b[0] * (c[1] - F(y)) + c[0] * (F(y) - b[1])) / det; lambda[1] = (a[0] * (F(y) - c[1]) + F(x) * (c[1] - a[1]) + c[0] * (a[1] - F(y))) / det; lambda[2] = F(1) - lambda[0] - lambda[1]; // Test if inside triangle if ((F(0) <= lambda[0]) && (F(0) <= lambda[1]) && (F(0) <= lambda[2])) { found = true; I(x,y) = ImageFloat(lambda.x) * c1 + ImageFloat(lambda.y) * c2 + ImageFloat(lambda.z) * c3; } else if (found) break; } } } template static inline void draw_circle(gravis::tImage &I, const gravis::t2Vector ¢er, const F& radius, const Pixel& c) { const F pi=3.14159265358979323846264338327950288419716939937510; const int w=I.cols(); const int h=I.rows(); for (F a=0; a<=0.25*pi; a+=1.0/(2.0*pi*radius)) { const int ds=int( radius*sin(a) ); const int dc=int( radius*cos(a) ); const int x1 = int(center.x + ds); const int y1 = int(center.y + dc); const int x2 = int(center.x - ds); const int y2 = int(center.y - dc); const int x3 = int(center.x + dc); const int y3 = int(center.y + ds); const int x4 = int(center.x - dc); const int y4 = int(center.y - ds); if (0<=x1 && x1 tImage inset(const tImage &img1, const tImage &img2, const int col, const int row) { int min_x = -std::min(0, col); int min_y = -std::min(0, row); int max_x = std::min((int)img2.cols(), (int)img1.cols()-col); int max_y = std::min((int)img2.rows(), (int)img1.rows()-row); tImage lhs = img1.clone(); for (int y=min_y; y namespace gravis { namespace interpolation { /** * Nearest Neighbour Image Access * * Usage: * image.interpolate(20.2, 20.4) **/ struct NearestNeighbour { private: template inline static int round(const T& v) { return v < 0 ? int(v-T(0.5)) : int(v+T(0.5)); } public: template static inline T getPixel(const tImage& image, const F& x, const F& y) { const int x_i = round(x); const int y_i = round(y); return image.access(x_i, y_i); } }; /** * Linearly interpolated image access * * Usage: * image.interpolate(20.2, 20.4) **/ struct Linear { private: template inline static int round(const T& v) { return v < 0 ? int(v-T(0.5)) : int(v+T(0.5)); } public: template static inline T getPixel(const tImage& image, const F& x, const F& y) { const int x_i = int(floor(x)); const int y_i = int(floor(y)); const F dx = x-x_i; const F dy = y-y_i; return (image.access(x_i , y_i ) * (F(1.0)-dx) + image.access(x_i+1 , y_i ) * ( dx)) * (F(1.0)-dy) + (image.access(x_i , y_i+1) * (F(1.0)-dx) + image.access(x_i+1 , y_i+1) * ( dx)) * ( dy); } }; /** * Cubic interpolated image access * * Usage: * image.interpolate(20.2, 20.4) **/ struct Cubic { private: template inline static int round(const T& v) { return v < 0 ? int(v-T(0.5)) : int(v+T(0.5)); } template static inline T cubicInterpolation(const T& a, const T& b, const T& c, const T& d, const F& x) { const T p = (d - c) - (a - b); const T q = (a - b) - p; const T r = c - a; return p*(x*x*x) + q*(x*x) + r*x + b; } public: template static inline T getPixel(const tImage& image, const F& x, const F& y) { int x_i = int(floor(x)); int y_i = int(floor(y)); const F dx = x-x_i; const F dy = y-y_i; return cubicInterpolation( cubicInterpolation( image.access(x_i-1, y_i-1), image.access(x_i, y_i-1), image.access(x_i+1, y_i-1), image.access(x_i+2, y_i-1), dx), cubicInterpolation( image.access(x_i-1, y_i ), image.access(x_i, y_i ), image.access(x_i+1, y_i ), image.access(x_i+2, y_i ), dx), cubicInterpolation( image.access(x_i-1, y_i+1), image.access(x_i, y_i+1), image.access(x_i+1, y_i+1), image.access(x_i+2, y_i+1), dx), cubicInterpolation( image.access(x_i-1, y_i+2), image.access(x_i, y_i+2), image.access(x_i+1, y_i+2), image.access(x_i+2, y_i+2), dx), dy); } }; /** * Wrapper around another interpolation method, that first scales textures from [0,1]x[0,1] to [0,width-1]x[0,height-1] * * Usage: * image.interpolate(0.2, 0.4) **/ template struct TextureCoordinateAccess { template static inline T getPixel(const tImage& image, const F& x, const F& y) { return AccessMethod::getPixel(image, x*F(image.cols()-1), y*F(image.rows()-1)); } }; /** * Nearest Neighbour interpolated access to the image, using coordinates in [0,1] * * Usage: * image.interpolate(0.2, 0.4) **/ typedef TextureCoordinateAccess NearestNeighbourTextureCoordinate; /** * Linear interpolated access to the image, using coordinates in [0,1] * * Usage: * image.interpolate(0.2, 0.4) **/ typedef TextureCoordinateAccess LinearTextureCoordinate; /** * Cubic interpolated access to the image, using coordinates in [0,1] * * Usage: * image.interpolate(0.2, 0.4) **/ typedef TextureCoordinateAccess CubicTextureCoordinate; } } relion-3.1.3/src/jaz/gravis/tImage/.svn/text-base/normalization.h.svn-base000066400000000000000000000265251411340063500264250ustar00rootroot00000000000000/*************************************************************************//*! * Title: tImageNormalization.h * Description: Implements image normalization * * Author: Brian Schroeder, 2006 * Computer Science Department, University Basel (CH) ****************************************************************************/ /*!\file * Implements image normalization on scalar, rgb, rgba and vector 1,2,3 entries. * * Usage: * After doing * \code * include * \endcode * you can use normalize() and normalizeI() on the images. * */ #ifndef __GRAVIS__TIMAGE_NORMALIZATION__ #define __GRAVIS__TIMAGE_NORMALIZATION__ #include #include #include #include #include #include #include #include namespace gravis { //! @cond INTERN namespace priv { //! Scalar minmax template struct FunMinMaxS { void operator()(T& min, T& max, const T& p) const { min = p; max = p; }; }; //! access to .r .g .b template struct FunMinMaxRGB { void operator()(T& min, T& max, const RGB& p) const { min = std::min(std::min(p.r, p.g), p.b); max = std::max(std::max(p.r, p.g), p.b); }; }; //! access to .g template struct FunMinMaxGA { void operator()(T& min, T& max, const GA& p) const { min = p.g; max = p.g; }; }; //! access to .r .g .b template struct FunMinMaxRGBChannel { void operator()(RGB& min, RGB& max, const RGB& p) const { min.r = std::min(min.r,p.r); min.g = std::min(min.g,p.g); min.b = std::min(min.b,p.b); max.r = std::max(max.r,p.r); max.g = std::max(max.g,p.g); max.b = std::max(max.b,p.b); }; }; //! Things accessible by operator[] template struct FunMinMaxV { void operator()(T& min, T& max, const V& p) const { min = p[0]; max = p[0]; for (int j=1; j void normalizeI(tImage< T > &in, const FUN& fun) { typedef typename tImageTraits< T >::Scalar_t S; typedef typename tImageTraits< T >::Float_t F; T* data = in.data(); if(!data) return; const T* end = data + in.rows() * in.cols(); S min, max; fun(min, max, *data); for (const T* p = data; p tImage< T > normalize(const tImage< T > &in, const FUN& fun) { tImage< T > lhs = in.clone(); normalizeI(lhs, fun); return lhs; }; //! @endcond //! Scalar Image inplace normalization // Scales and offsets the pixel values, such that the max and min over all pixels is 0 and 1 respectively. template void normalizeI(tImage< T > &in) { priv::FunMinMaxS fun; normalizeI< T, priv::FunMinMaxS >(in, fun); } //! Scalar Image normalization // Scales and offsets the pixel values, such that the max and min over all pixels is 0 and 1 respectively. template tImage< T > normalize(const tImage< T > &in) { priv::FunMinMaxS fun; return normalize< T, priv::FunMinMaxS >(in, fun); } //! Gray_A Image inplace normalization // Scales and offsets the pixel values, such that the max and min over all pixels is 0 and 1 respectively. template void normalizeI(tImage > &in) { priv::FunMinMaxGA > fun; normalizeI< tGray_A, priv::FunMinMaxGA > >(in, fun); } //! Gray_A Image normalization // Scales and offsets the pixel values, such that the max and min over all pixels is 0 and 1 respectively. template tImage< tGray_A > normalize(const tImage< tGray_A > &in) { priv::FunMinMaxGA > fun; return normalize< T, priv::FunMinMaxGA > >(in, fun); } //! RGB Image inplace normalization // Scales and offsets all channels simultaneously, such that the max and min over all pixels and channels is 0 and 1 respectively. template void normalizeI(tImage< tRGB< T > > &in) { priv::FunMinMaxRGB< T, tRGB > fun; normalizeI< tRGB< T >, priv::FunMinMaxRGB< T, tRGB > >(in, fun); } //! RGB Image normalization // Scales and offsets all channels simultaneously, such that the max and min over all pixels and channels is 0 and 1 respectively. template tImage< tRGB< T > > normalize(const tImage< tRGB< T > > &in) { priv::FunMinMaxRGB< T, tRGB > fun; return normalize< tRGB< T >, priv::FunMinMaxRGB< T, tRGB > >(in, fun); } //! RGBA Image inplace normalization // Scales and offsets all channels simultaneously, such that the max and min over all pixels and channels is 0 and 1 respectively. Ignores the alpha channel. template void normalizeI(tImage< tRGBA< T > > &in) { priv::FunMinMaxRGB< T, tRGBA > fun; normalizeI< tRGBA< T >, priv::FunMinMaxRGB< T, tRGBA > >(in, fun); } //! RGBA Image normalization // Scales and offsets all channels simultaneously, such that the max and min over all pixels and channels is 0 and 1 respectively. Ignores the alpha channel. template tImage< tRGBA< T > > normalize(tImage< tRGBA< T > > &in) { priv::FunMinMaxRGB< T, tRGBA > fun; return normalize< tRGBA< T >, priv::FunMinMaxRGB< T, tRGBA > >(in, fun); } //! RGB_A Image inplace normalization // Scales and offsets all channels simultaneously, such that the max and min over all pixels and channels is 0 and 1 respectively. Ignores the alpha channel. template void normalizeI(tImage< tRGB_A< T > > &in) { priv::FunMinMaxRGB< T, tRGB_A > fun; normalizeI< tRGB_A< T >, priv::FunMinMaxRGB< T, tRGB_A > >(in, fun); } //! RGB_A Image normalization // Scales and offsets all channels simultaneously, such that the max and min over all pixels and channels is 0 and 1 respectively. Ignores the alpha channel. template tImage< tRGB_A< T > > normalize(tImage< tRGB_A< T > > &in) { priv::FunMinMaxRGB< T, tRGB_A > fun; return normalize< tRGB_A< T >, priv::FunMinMaxRGB< T, tRGB_A > >(in, fun); } //! RGB Image inplace normalization // Scales and offsets all channels seperatly, such that the max and min over all pixels and channels is 0 and 1 respectively. template void normalizeIC(tImage< tRGB< T > > &in) { priv::FunMinMaxRGBChannel > fun; normalizeI< tRGB< T >, priv::FunMinMaxRGBChannel > >(in, fun); } //! RGB Image normalization // Scales and offsets all channels seperatly, such that the max and min over all pixels and channels is 0 and 1 respectively. template tImage< tRGB< T > > normalizeC(const tImage< tRGB< T > > &in) { priv::FunMinMaxRGBChannel > fun; return normalize< tRGB< T >, priv::FunMinMaxRGBChannel > >(in, fun); } //! RGBA Image inplace normalization // Scales and offsets all channels seperatly, such that the max and min over all pixels and channels is 0 and 1 respectively. Ignores the alpha channel. template void normalizeIC(tImage< tRGBA< T > > &in) { priv::FunMinMaxRGBChannel > fun; normalizeI< tRGBA< T >, priv::FunMinMaxRGBChannel > >(in, fun); } //! RGBA Image normalization // Scales and offsets all channels seperatly, such that the max and min over all pixels and channels is 0 and 1 respectively. Ignores the alpha channel. template tImage< tRGBA< T > > normalizeC(tImage< tRGBA< T > > &in) { priv::FunMinMaxRGBChannel > fun; return normalize< tRGBA< T >, priv::FunMinMaxRGBChannel > >(in, fun); } //! RGB_A Image inplace normalization // Scales and offsets all channels seperatly, such that the max and min over all pixels and channels is 0 and 1 respectively. Ignores the alpha channel. template void normalizeIC(tImage< tRGB_A< T > > &in) { priv::FunMinMaxRGBChannel > fun; normalizeI< tRGB_A< T >, priv::FunMinMaxRGBChannel > >(in, fun); } //! RGB_A Image normalization // Scales and offsets all channels seperatly, such that the max and min over all pixels and channels is 0 and 1 respectively. Ignores the alpha channel. template tImage< tRGB_A< T > > normalizeC(tImage< tRGB_A< T > > &in) { priv::FunMinMaxRGBChannel > fun; return normalize< tRGB_A< T >, priv::FunMinMaxRGBChannel > >(in, fun); } //! t2Vector Image inplace normalization // Scales and offsets all dimensions simultaneously, such that the max and min over all pixels and dimensions is 0 and 1 respectively. template void normalizeI(tImage< t2Vector< T > > &in) { priv::FunMinMaxV< T, t2Vector, 2> fun; normalizeI< t2Vector< T >, priv::FunMinMaxV< T, t2Vector, 2> >(in, fun); } //! t 2Vector Image normalization // Scales and offsets all dimensions simultaneously, such that the max and min over all pixels and dimensions is 0 and 1 respectively. template tImage< t2Vector< T > > normalize(const tImage< t2Vector< T > > &in) { priv::FunMinMaxV< T, t2Vector, 2> fun; return normalize< t2Vector< T >, priv::FunMinMaxV< T, t2Vector, 2> >(in, fun); } //! t3Vector Image inplace normalization // Scales and offsets all dimensions simultaneously, such that the max and min over all pixels and dimensions is 0 and 1 respectively. template void normalizeI(tImage< t3Vector< T > > &in) { priv::FunMinMaxV< T, t3Vector, 2> fun; normalizeI< t3Vector< T >, priv::FunMinMaxV< T, t3Vector, 2> >(in, fun); } //! t3Vector Image normalization // Scales and offsets all dimensions simultaneously, such that the max and min over all pixels and dimensions is 0 and 1 respectively. template tImage< t3Vector< T > > normalize(const tImage< t3Vector< T > > &in) { priv::FunMinMaxV< T, t3Vector, 3> fun; return normalize< t3Vector< T >, priv::FunMinMaxV< T, t3Vector, 3> >(in, fun); } //! t4Vector Image inplace normalization // Scales and offsets all dimensions simultaneously, such that the max and min over all pixels and dimensions is 0 and 1 respectively. template void normalizeI(tImage< t4Vector< T > > &in) { priv::FunMinMaxV< T, t4Vector, 2> fun; normalizeI< t4Vector< T >, priv::FunMinMaxV< T, t4Vector, 2> >(in, fun); } //! t4Vector Image normalization // Scales and offsets all dimensions simultaneously, such that the max and min over all pixels and dimensions is 0 and 1 respectively. template tImage< t4Vector< T > > normalize(const tImage< t4Vector< T > > &in) { priv::FunMinMaxV< T, t4Vector, 4> fun; return normalize< t4Vector< T >, priv::FunMinMaxV< T, t4Vector, 4> >(in, fun); } }; #endif relion-3.1.3/src/jaz/gravis/tImage/.svn/text-base/operators.h.svn-base000066400000000000000000000237161411340063500255540ustar00rootroot00000000000000/*************************************************************************//*! * Title: tImage/operators.h * Description: Implements operators on tImages * * Author: Brian Amberg, 2006-2007 * Computer Science Department, University Basel (CH) ****************************************************************************/ /*!\file * Implements operators on tImages. * * Usage: * After doing * \code * include * \endcode * you can do pixelwise operations in images using overloaded functions. * * For Example to rescale an image into [-1,1]: * \code * tImage< tRGB > image; * image.read('test.png'); * image -= 0.5; * image *= 2.0; * \endcode * */ #ifndef __GRAVIS__IMAGE_OPERATORS__ #define __GRAVIS__IMAGE_OPERATORS__ #include namespace gravis { // \@{ //! Inplace image - image operation template void imageOpI(tImage &lhs, const tImage &rhs, const OP& op) { if ((lhs.cols() != rhs.cols()) || (lhs.rows() != rhs.rows())) throw("Incompatible sizes."); T1* ldata = lhs.data(); const T2* rdata = rhs.data(); const T1* end = ldata + lhs.cols()*lhs.rows(); for (; ldata tImage imageOp(const tImage &rhs1, const tImage &rhs2, const OP& op) { tImage lhs(rhs2.cols(), rhs2.rows()); T1* ldata = lhs.data(); const T1* r1data = rhs1.data(); const T2* r2data = rhs2.data(); const T1* end = ldata + lhs.cols()*lhs.rows(); for (; ldata void imageOpI(tImage &lhs, const F& rhs, const OP& op) { T* ldata = lhs.data(); const T* end = ldata + lhs.cols()*lhs.rows(); for (; ldata tImage imageOp(const tImage &rhs1, const F& rhs2, const OP& op) { tImage lhs(rhs1.cols(), rhs1.rows()); T1* ldata = lhs.data(); const T1* rdata = rhs1.data(); const T1* end = ldata + lhs.cols()*lhs.rows(); for (; ldata tImage imageOp(const F& rhs1, const tImage &rhs2, const OP& op) { tImage lhs(rhs2.cols(), rhs2.rows()); T1* ldata = lhs.data(); const T1* rdata = rhs2.data(); const T1* end = ldata + lhs.cols()*lhs.rows(); for (; ldata void imageOpI(tImage &img, const OP& op) { T* data = img.data(); const T* end = data + img.cols()*img.rows(); for (; data tImage imageOp(const tImage &img, const OP& op) { tImage lhs = img.clone(); imageOpI(lhs, op); return lhs; } // \@} namespace priv { //! Inplace Subtraction Functor template struct FunSubI { inline void operator()(T1& a, const T2& b) const { a -= b; } }; //! Inplace Addition Functor template struct FunAddI { inline void operator()(T1& a, const T2& b) const { a += b; } }; //! Inplace Multiplication Functor template struct FunMulI { inline void operator()(T1& a, const T2& b) const { a *= b; } }; //! Inplace Division Functor template struct FunDivI { inline void operator()(T1& a, const T2& b) const { a /= b; } }; //! Out of place Subtraction Functor template struct FunSub { inline R operator()(const T1& a, const T2& b) const { return a - b; } }; //! Out of place Addition Functor template struct FunAdd { inline R operator()(const T1& a, const T2& b) const { return a + b; } }; //! Out of place Multiplication Functor template struct FunMul { inline R operator()(const T1& a, const T2& b) const { return a * b; } }; //! Out of place Division Functor template struct FunDiv { inline R operator()(const T1& a, const T2& b) const { return a / b; } }; //! negation functor template struct FunNeg { inline T operator()(const T& a) const { return -a; } }; //! not functor template struct FunNot { inline T operator()(const T& a) const { return !a; } }; //! abs functor template struct FunAbs { inline T operator()(const T& a) const { return abs(a); } }; } //! Subtract one image from another inplace. template void operator-=(tImage &lhs, const tImage &rhs) { priv::FunSubI fun; imageOpI(lhs, rhs, fun); } //! Add one image to another inplace. template void operator+=(tImage &lhs, const tImage &rhs) { priv::FunAddI fun; imageOpI(lhs, rhs, fun); } //! Multiply an image with another inplace. template void operator*=(tImage &lhs, const tImage &rhs) { priv::FunMulI fun; imageOpI(lhs, rhs, fun); } //! Divide one image with another inplace. template void operator/=(tImage &lhs, const tImage &rhs) { priv::FunDivI fun; imageOpI(lhs, rhs, fun); } //! Subtract one image from another. template tImage operator-(const tImage &rhs1, const tImage &rhs2) { priv::FunSub fun; return imageOp(rhs1, rhs2, fun); } //! Add one image to another template tImage operator+(const tImage &rhs1, const tImage &rhs2) { priv::FunAdd fun; return imageOp(rhs1, rhs2, fun); } //! Multiply one image with another template tImage operator*(const tImage &rhs1, const tImage &rhs2) { priv::FunMul fun; return imageOp(rhs1, rhs2, fun); } //! Divide one image by another template tImage operator/(const tImage &rhs1, const tImage &rhs2) { priv::FunDiv fun; return imageOp(rhs1, rhs2, fun); } //! Subtract a scalar from an image inplace. template void operator-=(tImage &lhs, const F& rhs) { priv::FunSubI fun; imageOpI(lhs, rhs, fun); } //! Add a scalar to an image inplace. template void operator+=(tImage &lhs, const F& rhs) { priv::FunAddI fun; imageOpI(lhs, rhs, fun); } //! Multiply an image with a scalar inplace. template void operator*=(tImage &lhs, const F& rhs) { priv::FunMulI fun; imageOpI(lhs, rhs, fun); } //! Divide an image by a scalar inplace. template void operator/=(tImage &lhs, const F& rhs) { priv::FunDivI fun; imageOpI(lhs, rhs, fun); } //! Subtract an scalar from an image template tImage operator-(const tImage &rhs1, const F& rhs2) { priv::FunSub fun; return imageOp(rhs1, rhs2, fun); } //! Add a scalar to an image template tImage operator+(const tImage &rhs1, const F& rhs2) { priv::FunAdd fun; return imageOp(rhs1, rhs2, fun); } //! Multiply an image with a scalar template tImage operator*(const tImage &rhs1, const F& rhs2) { priv::FunMul fun; return imageOp(rhs1, rhs2, fun); } //! Divide an image by a scalar template tImage operator/(const tImage &rhs1, const F& rhs2) { priv::FunDiv fun; return imageOp(rhs1, rhs2, fun); } //! Left subtraction of scalar with image template tImage operator-(const F& rhs1, const tImage &rhs2) { priv::FunSub fun; return imageOp(rhs1, rhs2, fun); } //! Left addition of scalar to image template tImage operator+(const F& rhs1, const tImage &rhs2) { priv::FunAdd fun; return imageOp(rhs1, rhs2, fun); } //! Left multiply an image with a scalar template tImage operator*(const F& rhs1, const tImage &rhs2) { priv::FunMul fun; return imageOp(rhs1, rhs2, fun); } //! Left divide an image with a scalar template tImage operator/(const F& rhs1, const tImage &rhs2) { priv::FunDiv fun; return imageOp(rhs1, rhs2, fun); } //! Negate an image template tImage operator-(const tImage &img) { priv::FunNeg fun; return imageOp(img, fun); } //! Calculate the absolute of an image template tImage abs(const tImage &img) { priv::FunAbs fun; return imageOp(img, fun); } //! Calculate the absolute of an image inplace template void absI(tImage &img) { priv::FunAbs fun; return imageOpI(img, fun); } //! Negate an image template tImage operator!(const tImage &img) { priv::FunNot fun; return imageOp(img, fun); } } #endif relion-3.1.3/src/jaz/gravis/tImage/.svn/text-base/traits.h.svn-base000066400000000000000000000065251411340063500250430ustar00rootroot00000000000000/****************************************************************************** ** Title: tImage/traits.h ** Description: tImage traits neccessary for nice implementation of stuff ** like convolution ** ** Author: Brian Amberg ** Computer Science Department, University Basel (CH) ** ******************************************************************************/ #ifndef _TIMAGE_TRAITS_H_ #define _TIMAGE_TRAITS_H_ // TODO: This centralizes traits definition, but includes everything, which is // bad. It would be better to use predeclarations of these classes here, but // that does not work with templates. Another possibility would be to move the // trait definitions into the include files of the respective datatypes. #include "../t2Vector.h" #include "../t3Vector.h" #include "../t4Vector.h" #include "../t2Matrix.h" #include "../t3Matrix.h" #include "../t4Matrix.h" #include "../tMatrix.h" #include "../tRGB.h" #include "../tBGR.h" #include "../tYCbCr.h" #include "../tRGBA.h" #include "../tRGB_A.h" #include "../tGray_A.h" #include "../tLab.h" namespace gravis { template struct tImageTraits { }; #define DEFINE_TRAIT( aPixel_t, aScalar_t, aFloat_t) \ template <> \ struct tImageTraits< aPixel_t > { \ typedef aScalar_t Scalar_t; \ typedef aFloat_t Float_t; \ typedef aPixel_t Pixel_t; \ static unsigned int components(){ \ return sizeof(aPixel_t)/sizeof(Scalar_t); \ } \ } #define DEFINE_ALL_COMPOUND_TRAITS( aScalar_t, aFloat_t) \ DEFINE_TRAIT( aScalar_t, aScalar_t, aFloat_t); \ DEFINE_TRAIT( tRGB< aScalar_t >, aScalar_t, aFloat_t); \ DEFINE_TRAIT( tBGR< aScalar_t >, aScalar_t, aFloat_t); \ DEFINE_TRAIT( tRGBA< aScalar_t >, aScalar_t, aFloat_t); \ DEFINE_TRAIT( tRGB_A< aScalar_t >, aScalar_t, aFloat_t); \ DEFINE_TRAIT( tGray_A< aScalar_t >, aScalar_t, aFloat_t); \ DEFINE_TRAIT( tYCbCr< aScalar_t >, aScalar_t, aFloat_t); \ DEFINE_TRAIT( tLab< aScalar_t >, aScalar_t, aFloat_t); \ DEFINE_TRAIT( t2Vector< aScalar_t >, aScalar_t, aFloat_t); \ DEFINE_TRAIT( t3Vector< aScalar_t >, aScalar_t, aFloat_t); \ DEFINE_TRAIT( t4Vector< aScalar_t >, aScalar_t, aFloat_t); \ DEFINE_TRAIT( t2Matrix< aScalar_t >, aScalar_t, aFloat_t); \ DEFINE_TRAIT( t3Matrix< aScalar_t >, aScalar_t, aFloat_t); \ DEFINE_TRAIT( t4Matrix< aScalar_t >, aScalar_t, aFloat_t) DEFINE_ALL_COMPOUND_TRAITS( char, float ); DEFINE_ALL_COMPOUND_TRAITS( unsigned char, float ); DEFINE_ALL_COMPOUND_TRAITS( signed char, float ); DEFINE_ALL_COMPOUND_TRAITS( unsigned int, double ); DEFINE_ALL_COMPOUND_TRAITS( signed int, double ); DEFINE_ALL_COMPOUND_TRAITS( unsigned short int, double ); DEFINE_ALL_COMPOUND_TRAITS( signed short int, double ); DEFINE_ALL_COMPOUND_TRAITS( signed long int, double ); DEFINE_ALL_COMPOUND_TRAITS( unsigned long int, double ); DEFINE_ALL_COMPOUND_TRAITS( float, float ); DEFINE_ALL_COMPOUND_TRAITS( double, double ); DEFINE_ALL_COMPOUND_TRAITS( long double, long double ); DEFINE_ALL_COMPOUND_TRAITS( bool, double ); } #endif relion-3.1.3/src/jaz/gravis/tImage/CMakeLists.txt000066400000000000000000000004351411340063500216210ustar00rootroot00000000000000set( install_files access.hxx distance_transform.h draw.h interpolation.hxx normalization.h operators.h traits.h ) INSTALL(FILES ${install_files} DESTINATION ${CMAKE_INSTALL_PREFIX}/include/${LIBTITLE}-${LIBVERSION}/${LIBTITLE}/tImage) relion-3.1.3/src/jaz/gravis/tImage/access.hxx000066400000000000000000000042071411340063500210540ustar00rootroot00000000000000/****************************************************************************** ** Title: Checked access for tImage ** Description: Checked Image access ** ** Author: Jean Sebastian Pierrard, 2005 ** Brian Amberg, 2007 ** Computer Science Department, University Basel (CH) ** ******************************************************************************/ #ifndef __GRAVIS__IMAGE_ACCESS__ #define __GRAVIS__IMAGE_ACCESS__ namespace gravis { namespace access { //! Functor for access behind the image borders struct Zero { template static T getPixel (const tImage& image, int x, int y) { if (x < 0) return T(0); else if (x >= (int)image.cols()) return T(0); if (y < 0) return T(0); else if (y >= (int)image.rows()) return T(0); return image(x, y); } }; //! Functor for access behind the image borders struct Repeat { template static const T& getPixel (const tImage& image, int x, int y) { if (x < 0) x = 0; else if (x >= (int)image.cols()) x = image.cols()-1; if (y < 0) y = 0; else if (y >= (int)image.rows()) y = image.rows()-1; return image(x, y); } }; //! Functor for access behind the image borders struct Mirror { template static const T& getPixel (const tImage& image, int x, int y) { if (x < 0) x = 0-x; else if (x >= (int)image.cols()) x = 2*(image.cols()-1) - x; if (y < 0) y = 0-y; else if (y >= (int)image.rows()) y = 2*(image.rows()-1) - y; return image(x, y); } }; //! Functor for access behind the image borders struct Wrap { template static const T& getPixel (const tImage& image, int x, int y) { if (x < 0) x = image.cols()-1+x; else if (x >= (int)image.cols()) x = x-image.cols(); if (y < 0) y = image.rows()-1+y; else if (y >= (int)image.rows()) y = y-image.rows(); return image(x, y); } }; } } #endif relion-3.1.3/src/jaz/gravis/tImage/distance_transform.h000066400000000000000000000207301411340063500231170ustar00rootroot00000000000000/*************************************************************************//*! * Title: tImage/distance_transform.h * Description: Implements drawing operations on images without the need * for the libRender library. * * Author: Brian Amberg, 2006-2007 * Computer Science Department, University Basel (CH) ****************************************************************************/ #ifndef __GRAVIS__TIMAGE__DISTANCE_TRANSFORM__ #define __GRAVIS__TIMAGE__DISTANCE_TRANSFORM__ #include "../tImage/traits.h" namespace gravis { template inline static T sqr(const T& a) { return a*a; } /** * Binarize an image using an intensity threshold **/ template static inline void intensityThreshold(tImage &out, const tImage &in, const F& threshold = 0) { out.resize(in.cols(), in.rows()); for (size_t i=0; i threshold; }; /** * Binarize an image using a threshold on the alpha channel **/ template static inline void alphaThreshold(tImage &out, const tImage &in, const F& threshold = typename tImageTraits::Float_t(0)) { out.resize(in.cols(), in.rows()); for (size_t i=0; i threshold; }; /** * w=1, iterations=2 gives perfect results, while still being fast **/ template inline static void distanceTransform(tImage< F > &out, const tImage< Pixel > &in, const int& w=3, const size_t& iterations=3) { const int W=in.cols(); const int H=in.rows(); out.resize(W,H); // Temporary Memory (parents) tImage< t2Vector< int > > p(W, H); tImage dm(2*w+1, 2*w+1); // distances for (int x=0; x<2*w+1; ++x) for (int y=0; y<2*w+1; ++y) dm(x,y) = sqrt(sqr(y-w)+sqr(x-w)); // initialize out.fill( in.cols()+in.rows() ); p.fill( t2Vector(0,0) ); //initialize immediate interior elements for (int x=0; x-1; y--) for (int x=0; x-1; x--) distanceTransformApplyRegionSave; for (int y=0; y=w; y--) for (int x=W-w-1; x>=w; x--) distanceTransformApplyRegion; //Make the border for (int y=w-1; y>-1; y--) for (int x=0; x-1; x--) distanceTransformApplyRegionSave; for (int y=0; y inline static void distanceTransformSq(tImage< F > &out, const tImage< Pixel > &in, const int& w=3, const size_t& iterations=3) { const int W=in.cols(); const int H=in.rows(); out.resize(W,H); // Temporary Memory (parents) tImage< t2Vector< int > > p(W, H); tImage dm(2*w+1, 2*w+1); // distances for (int x=0; x<2*w+1; ++x) for (int y=0; y<2*w+1; ++y) dm(x,y) = sqr(y-w)+sqr(x-w); // initialize out.fill( in.cols()+in.rows() ); p.fill( t2Vector(0,0) ); //initialize immediate interior elements for (int x=0; x-1; y--) for (int x=0; x-1; x--) distanceTransformApplyRegionSave; for (int y=0; y=w; y--) for (int x=W-w-1; x>=w; x--) distanceTransformApplyRegion; //Make the border for (int y=w-1; y>-1; y--) for (int x=0; x-1; x--) distanceTransformApplyRegionSave; for (int y=0; y static inline void draw_line(gravis::tImage &I, const gravis::t2Vector &a, const gravis::t2Vector &b, const Pixel& c1, const Pixel& c2) { typedef typename tImageTraits::Float_t ImageFloat; // Bounding Box const F min_x_f = std::max(F(0), std::min(a[0], b[0])); const F max_x_f = std::min(F(I.cols()-1), std::max(a[0], b[0])); const F min_y_f = std::max(F(0), std::min(a[1], b[1])); const F max_y_f = std::min(F(I.rows()-1), std::max(a[1], b[1])); // Bounding Box const size_t min_x = int( ceil( min_x_f ) ); const size_t max_x = int( floor( max_x_f ) ); const size_t min_y = int( ceil( min_y_f ) ); const size_t max_y = int( floor( max_y_f ) ); F alpha; // Loop over bounding box if (max_x - min_x > max_y - min_y) { if (a.x < b.x) { for (size_t x=min_x; x<=max_x; ++x) { alpha = (F(x) - a.x) / (b.x - a.x); const size_t y = size_t(a.y + alpha * (b.y - a.y) + F(0.5)); I(x, y) = ImageFloat(F(1) - alpha) * c1 + ImageFloat(alpha) * c2; } } else { for (size_t x=min_x; x<=max_x; ++x) { alpha = (F(x) - b.x) / (a.x - b.x); const size_t y = size_t(b.y + alpha * (a.y - b.y) + F(0.5)); I(x, y) = ImageFloat(F(1) - alpha) * c2 + ImageFloat(alpha) * c1; } } } else { if (a.y < b.y) { for (size_t y=min_y; y<=max_y; ++y) { alpha = (F(y) - a.y) / (b.y - a.y); const size_t x = size_t(a.x + alpha * (b.x - a.x) + F(0.5)); I(x, y) = ImageFloat(F(1) - alpha) * c1 + ImageFloat(alpha) * c2; } } else { for (size_t y=min_y; y<=max_y; ++y) { alpha = (F(y) - b.y) / (a.y - b.y); const size_t x = size_t(b.x + alpha * (a.x - b.x) + F(0.5)); I(x, y) = ImageFloat(F(1) - alpha) * c2 + ImageFloat(alpha) * c1; } } } } /** * Fill an image of type T with an interpolated triangle. Uses the float type * F. Beware: using double actually makes better triangles, see the testcase. **/ template static inline void fill_triangle(gravis::tImage &I, const gravis::t2Vector &a, const gravis::t2Vector &b, const gravis::t2Vector &c, const Pixel& c1, const Pixel& c2, const Pixel& c3) { typedef typename tImageTraits::Float_t ImageFloat; // Divisor const F det = a[0] * (b[1] - c[1]) + b[0] * (c[1] - a[1]) + c[0] * (a[1] - b[1]); if (det == F(0)) // The triangle is singular, it has no area return; // Bounding Box const F min_x_f = std::max(F(0), std::min(std::min(a[0], b[0]), c[0])); const F max_x_f = std::min(F(I.cols()-1), std::max(std::max(a[0], b[0]), c[0])); const F min_y_f = std::max(F(0), std::min(std::min(a[1], b[1]), c[1])); const F max_y_f = std::min(F(I.rows()-1), std::max(std::max(a[1], b[1]), c[1])); // Bounding Box const int min_x = int( ceil( min_x_f ) ); const int max_x = int( floor( max_x_f ) ); const int min_y = int( ceil( min_y_f ) ); const int max_y = int( floor( max_y_f ) ); t3Vector lambda; t3Vector lambdaWorld; // Loop over bounding box for (int x = min_x; x <= max_x; ++x) { bool found = false; // Good for larger triangles, but may slow it down for small triangles. This is anyhow a relatively slow method, but obviously correct. for (int y = min_y; y <= max_y; ++y) { // Gets Barycentric Coordinates in Screen space lambda[0] = (F(x) * (b[1] - c[1]) + b[0] * (c[1] - F(y)) + c[0] * (F(y) - b[1])) / det; lambda[1] = (a[0] * (F(y) - c[1]) + F(x) * (c[1] - a[1]) + c[0] * (a[1] - F(y))) / det; lambda[2] = F(1) - lambda[0] - lambda[1]; // Test if inside triangle if ((F(0) <= lambda[0]) && (F(0) <= lambda[1]) && (F(0) <= lambda[2])) { found = true; I(x,y) = ImageFloat(lambda.x) * c1 + ImageFloat(lambda.y) * c2 + ImageFloat(lambda.z) * c3; } else if (found) break; } } } template static inline void draw_circle(gravis::tImage &I, const gravis::t2Vector ¢er, const F& radius, const Pixel& c) { const F pi=3.14159265358979323846264338327950288419716939937510; const int w=I.cols(); const int h=I.rows(); for (F a=0; a<=0.25*pi; a+=1.0/(2.0*pi*radius)) { const int ds=int( radius*sin(a) ); const int dc=int( radius*cos(a) ); const int x1 = int(center.x + ds); const int y1 = int(center.y + dc); const int x2 = int(center.x - ds); const int y2 = int(center.y - dc); const int x3 = int(center.x + dc); const int y3 = int(center.y + ds); const int x4 = int(center.x - dc); const int y4 = int(center.y - ds); if (0<=x1 && x1 tImage inset(const tImage &img1, const tImage &img2, const int col, const int row) { int min_x = -std::min(0, col); int min_y = -std::min(0, row); int max_x = std::min((int)img2.cols(), (int)img1.cols()-col); int max_y = std::min((int)img2.rows(), (int)img1.rows()-row); tImage lhs = img1.clone(); for (int y=min_y; y namespace gravis { namespace interpolation { /** * Nearest Neighbour Image Access * * Usage: * image.interpolate(20.2, 20.4) **/ struct NearestNeighbour { private: template inline static int round(const T& v) { return v < 0 ? int(v-T(0.5)) : int(v+T(0.5)); } public: template static inline T getPixel(const tImage& image, const F& x, const F& y) { const int x_i = round(x); const int y_i = round(y); return image.access(x_i, y_i); } }; /** * Linearly interpolated image access * * Usage: * image.interpolate(20.2, 20.4) **/ struct Linear { private: template inline static int round(const T& v) { return v < 0 ? int(v-T(0.5)) : int(v+T(0.5)); } public: template static inline T getPixel(const tImage& image, const F& x, const F& y) { const int x_i = int(floor(x)); const int y_i = int(floor(y)); const F dx = x-x_i; const F dy = y-y_i; return (image.access(x_i , y_i ) * (F(1.0)-dx) + image.access(x_i+1 , y_i ) * ( dx)) * (F(1.0)-dy) + (image.access(x_i , y_i+1) * (F(1.0)-dx) + image.access(x_i+1 , y_i+1) * ( dx)) * ( dy); } }; /** * Cubic interpolated image access * * Usage: * image.interpolate(20.2, 20.4) **/ struct Cubic { private: template inline static int round(const T& v) { return v < 0 ? int(v-T(0.5)) : int(v+T(0.5)); } template static inline T cubicInterpolation(const T& a, const T& b, const T& c, const T& d, const F& x) { const T p = (d - c) - (a - b); const T q = (a - b) - p; const T r = c - a; return p*(x*x*x) + q*(x*x) + r*x + b; } public: template static inline T getPixel(const tImage& image, const F& x, const F& y) { int x_i = int(floor(x)); int y_i = int(floor(y)); const F dx = x-x_i; const F dy = y-y_i; return cubicInterpolation( cubicInterpolation( image.access(x_i-1, y_i-1), image.access(x_i, y_i-1), image.access(x_i+1, y_i-1), image.access(x_i+2, y_i-1), dx), cubicInterpolation( image.access(x_i-1, y_i ), image.access(x_i, y_i ), image.access(x_i+1, y_i ), image.access(x_i+2, y_i ), dx), cubicInterpolation( image.access(x_i-1, y_i+1), image.access(x_i, y_i+1), image.access(x_i+1, y_i+1), image.access(x_i+2, y_i+1), dx), cubicInterpolation( image.access(x_i-1, y_i+2), image.access(x_i, y_i+2), image.access(x_i+1, y_i+2), image.access(x_i+2, y_i+2), dx), dy); } }; /** * Wrapper around another interpolation method, that first scales textures from [0,1]x[0,1] to [0,width-1]x[0,height-1] * * Usage: * image.interpolate(0.2, 0.4) **/ template struct TextureCoordinateAccess { template static inline T getPixel(const tImage& image, const F& x, const F& y) { return AccessMethod::getPixel(image, x*F(image.cols()-1), y*F(image.rows()-1)); } }; /** * Nearest Neighbour interpolated access to the image, using coordinates in [0,1] * * Usage: * image.interpolate(0.2, 0.4) **/ typedef TextureCoordinateAccess NearestNeighbourTextureCoordinate; /** * Linear interpolated access to the image, using coordinates in [0,1] * * Usage: * image.interpolate(0.2, 0.4) **/ typedef TextureCoordinateAccess LinearTextureCoordinate; /** * Cubic interpolated access to the image, using coordinates in [0,1] * * Usage: * image.interpolate(0.2, 0.4) **/ typedef TextureCoordinateAccess CubicTextureCoordinate; } } #endif relion-3.1.3/src/jaz/gravis/tImage/normalization.h000066400000000000000000000265251411340063500221300ustar00rootroot00000000000000/*************************************************************************//*! * Title: tImageNormalization.h * Description: Implements image normalization * * Author: Brian Schroeder, 2006 * Computer Science Department, University Basel (CH) ****************************************************************************/ /*!\file * Implements image normalization on scalar, rgb, rgba and vector 1,2,3 entries. * * Usage: * After doing * \code * include * \endcode * you can use normalize() and normalizeI() on the images. * */ #ifndef __GRAVIS__TIMAGE_NORMALIZATION__ #define __GRAVIS__TIMAGE_NORMALIZATION__ #include #include #include #include #include #include #include #include namespace gravis { //! @cond INTERN namespace priv { //! Scalar minmax template struct FunMinMaxS { void operator()(T& min, T& max, const T& p) const { min = p; max = p; }; }; //! access to .r .g .b template struct FunMinMaxRGB { void operator()(T& min, T& max, const RGB& p) const { min = std::min(std::min(p.r, p.g), p.b); max = std::max(std::max(p.r, p.g), p.b); }; }; //! access to .g template struct FunMinMaxGA { void operator()(T& min, T& max, const GA& p) const { min = p.g; max = p.g; }; }; //! access to .r .g .b template struct FunMinMaxRGBChannel { void operator()(RGB& min, RGB& max, const RGB& p) const { min.r = std::min(min.r,p.r); min.g = std::min(min.g,p.g); min.b = std::min(min.b,p.b); max.r = std::max(max.r,p.r); max.g = std::max(max.g,p.g); max.b = std::max(max.b,p.b); }; }; //! Things accessible by operator[] template struct FunMinMaxV { void operator()(T& min, T& max, const V& p) const { min = p[0]; max = p[0]; for (int j=1; j void normalizeI(tImage< T > &in, const FUN& fun) { typedef typename tImageTraits< T >::Scalar_t S; typedef typename tImageTraits< T >::Float_t F; T* data = in.data(); if(!data) return; const T* end = data + in.rows() * in.cols(); S min, max; fun(min, max, *data); for (const T* p = data; p tImage< T > normalize(const tImage< T > &in, const FUN& fun) { tImage< T > lhs = in.clone(); normalizeI(lhs, fun); return lhs; }; //! @endcond //! Scalar Image inplace normalization // Scales and offsets the pixel values, such that the max and min over all pixels is 0 and 1 respectively. template void normalizeI(tImage< T > &in) { priv::FunMinMaxS fun; normalizeI< T, priv::FunMinMaxS >(in, fun); } //! Scalar Image normalization // Scales and offsets the pixel values, such that the max and min over all pixels is 0 and 1 respectively. template tImage< T > normalize(const tImage< T > &in) { priv::FunMinMaxS fun; return normalize< T, priv::FunMinMaxS >(in, fun); } //! Gray_A Image inplace normalization // Scales and offsets the pixel values, such that the max and min over all pixels is 0 and 1 respectively. template void normalizeI(tImage > &in) { priv::FunMinMaxGA > fun; normalizeI< tGray_A, priv::FunMinMaxGA > >(in, fun); } //! Gray_A Image normalization // Scales and offsets the pixel values, such that the max and min over all pixels is 0 and 1 respectively. template tImage< tGray_A > normalize(const tImage< tGray_A > &in) { priv::FunMinMaxGA > fun; return normalize< T, priv::FunMinMaxGA > >(in, fun); } //! RGB Image inplace normalization // Scales and offsets all channels simultaneously, such that the max and min over all pixels and channels is 0 and 1 respectively. template void normalizeI(tImage< tRGB< T > > &in) { priv::FunMinMaxRGB< T, tRGB > fun; normalizeI< tRGB< T >, priv::FunMinMaxRGB< T, tRGB > >(in, fun); } //! RGB Image normalization // Scales and offsets all channels simultaneously, such that the max and min over all pixels and channels is 0 and 1 respectively. template tImage< tRGB< T > > normalize(const tImage< tRGB< T > > &in) { priv::FunMinMaxRGB< T, tRGB > fun; return normalize< tRGB< T >, priv::FunMinMaxRGB< T, tRGB > >(in, fun); } //! RGBA Image inplace normalization // Scales and offsets all channels simultaneously, such that the max and min over all pixels and channels is 0 and 1 respectively. Ignores the alpha channel. template void normalizeI(tImage< tRGBA< T > > &in) { priv::FunMinMaxRGB< T, tRGBA > fun; normalizeI< tRGBA< T >, priv::FunMinMaxRGB< T, tRGBA > >(in, fun); } //! RGBA Image normalization // Scales and offsets all channels simultaneously, such that the max and min over all pixels and channels is 0 and 1 respectively. Ignores the alpha channel. template tImage< tRGBA< T > > normalize(tImage< tRGBA< T > > &in) { priv::FunMinMaxRGB< T, tRGBA > fun; return normalize< tRGBA< T >, priv::FunMinMaxRGB< T, tRGBA > >(in, fun); } //! RGB_A Image inplace normalization // Scales and offsets all channels simultaneously, such that the max and min over all pixels and channels is 0 and 1 respectively. Ignores the alpha channel. template void normalizeI(tImage< tRGB_A< T > > &in) { priv::FunMinMaxRGB< T, tRGB_A > fun; normalizeI< tRGB_A< T >, priv::FunMinMaxRGB< T, tRGB_A > >(in, fun); } //! RGB_A Image normalization // Scales and offsets all channels simultaneously, such that the max and min over all pixels and channels is 0 and 1 respectively. Ignores the alpha channel. template tImage< tRGB_A< T > > normalize(tImage< tRGB_A< T > > &in) { priv::FunMinMaxRGB< T, tRGB_A > fun; return normalize< tRGB_A< T >, priv::FunMinMaxRGB< T, tRGB_A > >(in, fun); } //! RGB Image inplace normalization // Scales and offsets all channels seperatly, such that the max and min over all pixels and channels is 0 and 1 respectively. template void normalizeIC(tImage< tRGB< T > > &in) { priv::FunMinMaxRGBChannel > fun; normalizeI< tRGB< T >, priv::FunMinMaxRGBChannel > >(in, fun); } //! RGB Image normalization // Scales and offsets all channels seperatly, such that the max and min over all pixels and channels is 0 and 1 respectively. template tImage< tRGB< T > > normalizeC(const tImage< tRGB< T > > &in) { priv::FunMinMaxRGBChannel > fun; return normalize< tRGB< T >, priv::FunMinMaxRGBChannel > >(in, fun); } //! RGBA Image inplace normalization // Scales and offsets all channels seperatly, such that the max and min over all pixels and channels is 0 and 1 respectively. Ignores the alpha channel. template void normalizeIC(tImage< tRGBA< T > > &in) { priv::FunMinMaxRGBChannel > fun; normalizeI< tRGBA< T >, priv::FunMinMaxRGBChannel > >(in, fun); } //! RGBA Image normalization // Scales and offsets all channels seperatly, such that the max and min over all pixels and channels is 0 and 1 respectively. Ignores the alpha channel. template tImage< tRGBA< T > > normalizeC(tImage< tRGBA< T > > &in) { priv::FunMinMaxRGBChannel > fun; return normalize< tRGBA< T >, priv::FunMinMaxRGBChannel > >(in, fun); } //! RGB_A Image inplace normalization // Scales and offsets all channels seperatly, such that the max and min over all pixels and channels is 0 and 1 respectively. Ignores the alpha channel. template void normalizeIC(tImage< tRGB_A< T > > &in) { priv::FunMinMaxRGBChannel > fun; normalizeI< tRGB_A< T >, priv::FunMinMaxRGBChannel > >(in, fun); } //! RGB_A Image normalization // Scales and offsets all channels seperatly, such that the max and min over all pixels and channels is 0 and 1 respectively. Ignores the alpha channel. template tImage< tRGB_A< T > > normalizeC(tImage< tRGB_A< T > > &in) { priv::FunMinMaxRGBChannel > fun; return normalize< tRGB_A< T >, priv::FunMinMaxRGBChannel > >(in, fun); } //! t2Vector Image inplace normalization // Scales and offsets all dimensions simultaneously, such that the max and min over all pixels and dimensions is 0 and 1 respectively. template void normalizeI(tImage< t2Vector< T > > &in) { priv::FunMinMaxV< T, t2Vector, 2> fun; normalizeI< t2Vector< T >, priv::FunMinMaxV< T, t2Vector, 2> >(in, fun); } //! t 2Vector Image normalization // Scales and offsets all dimensions simultaneously, such that the max and min over all pixels and dimensions is 0 and 1 respectively. template tImage< t2Vector< T > > normalize(const tImage< t2Vector< T > > &in) { priv::FunMinMaxV< T, t2Vector, 2> fun; return normalize< t2Vector< T >, priv::FunMinMaxV< T, t2Vector, 2> >(in, fun); } //! t3Vector Image inplace normalization // Scales and offsets all dimensions simultaneously, such that the max and min over all pixels and dimensions is 0 and 1 respectively. template void normalizeI(tImage< t3Vector< T > > &in) { priv::FunMinMaxV< T, t3Vector, 2> fun; normalizeI< t3Vector< T >, priv::FunMinMaxV< T, t3Vector, 2> >(in, fun); } //! t3Vector Image normalization // Scales and offsets all dimensions simultaneously, such that the max and min over all pixels and dimensions is 0 and 1 respectively. template tImage< t3Vector< T > > normalize(const tImage< t3Vector< T > > &in) { priv::FunMinMaxV< T, t3Vector, 3> fun; return normalize< t3Vector< T >, priv::FunMinMaxV< T, t3Vector, 3> >(in, fun); } //! t4Vector Image inplace normalization // Scales and offsets all dimensions simultaneously, such that the max and min over all pixels and dimensions is 0 and 1 respectively. template void normalizeI(tImage< t4Vector< T > > &in) { priv::FunMinMaxV< T, t4Vector, 2> fun; normalizeI< t4Vector< T >, priv::FunMinMaxV< T, t4Vector, 2> >(in, fun); } //! t4Vector Image normalization // Scales and offsets all dimensions simultaneously, such that the max and min over all pixels and dimensions is 0 and 1 respectively. template tImage< t4Vector< T > > normalize(const tImage< t4Vector< T > > &in) { priv::FunMinMaxV< T, t4Vector, 4> fun; return normalize< t4Vector< T >, priv::FunMinMaxV< T, t4Vector, 4> >(in, fun); } }; #endif relion-3.1.3/src/jaz/gravis/tImage/operators.h000066400000000000000000000237161411340063500212570ustar00rootroot00000000000000/*************************************************************************//*! * Title: tImage/operators.h * Description: Implements operators on tImages * * Author: Brian Amberg, 2006-2007 * Computer Science Department, University Basel (CH) ****************************************************************************/ /*!\file * Implements operators on tImages. * * Usage: * After doing * \code * include * \endcode * you can do pixelwise operations in images using overloaded functions. * * For Example to rescale an image into [-1,1]: * \code * tImage< tRGB > image; * image.read('test.png'); * image -= 0.5; * image *= 2.0; * \endcode * */ #ifndef __GRAVIS__IMAGE_OPERATORS__ #define __GRAVIS__IMAGE_OPERATORS__ #include namespace gravis { // \@{ //! Inplace image - image operation template void imageOpI(tImage &lhs, const tImage &rhs, const OP& op) { if ((lhs.cols() != rhs.cols()) || (lhs.rows() != rhs.rows())) throw("Incompatible sizes."); T1* ldata = lhs.data(); const T2* rdata = rhs.data(); const T1* end = ldata + lhs.cols()*lhs.rows(); for (; ldata tImage imageOp(const tImage &rhs1, const tImage &rhs2, const OP& op) { tImage lhs(rhs2.cols(), rhs2.rows()); T1* ldata = lhs.data(); const T1* r1data = rhs1.data(); const T2* r2data = rhs2.data(); const T1* end = ldata + lhs.cols()*lhs.rows(); for (; ldata void imageOpI(tImage &lhs, const F& rhs, const OP& op) { T* ldata = lhs.data(); const T* end = ldata + lhs.cols()*lhs.rows(); for (; ldata tImage imageOp(const tImage &rhs1, const F& rhs2, const OP& op) { tImage lhs(rhs1.cols(), rhs1.rows()); T1* ldata = lhs.data(); const T1* rdata = rhs1.data(); const T1* end = ldata + lhs.cols()*lhs.rows(); for (; ldata tImage imageOp(const F& rhs1, const tImage &rhs2, const OP& op) { tImage lhs(rhs2.cols(), rhs2.rows()); T1* ldata = lhs.data(); const T1* rdata = rhs2.data(); const T1* end = ldata + lhs.cols()*lhs.rows(); for (; ldata void imageOpI(tImage &img, const OP& op) { T* data = img.data(); const T* end = data + img.cols()*img.rows(); for (; data tImage imageOp(const tImage &img, const OP& op) { tImage lhs = img.clone(); imageOpI(lhs, op); return lhs; } // \@} namespace priv { //! Inplace Subtraction Functor template struct FunSubI { inline void operator()(T1& a, const T2& b) const { a -= b; } }; //! Inplace Addition Functor template struct FunAddI { inline void operator()(T1& a, const T2& b) const { a += b; } }; //! Inplace Multiplication Functor template struct FunMulI { inline void operator()(T1& a, const T2& b) const { a *= b; } }; //! Inplace Division Functor template struct FunDivI { inline void operator()(T1& a, const T2& b) const { a /= b; } }; //! Out of place Subtraction Functor template struct FunSub { inline R operator()(const T1& a, const T2& b) const { return a - b; } }; //! Out of place Addition Functor template struct FunAdd { inline R operator()(const T1& a, const T2& b) const { return a + b; } }; //! Out of place Multiplication Functor template struct FunMul { inline R operator()(const T1& a, const T2& b) const { return a * b; } }; //! Out of place Division Functor template struct FunDiv { inline R operator()(const T1& a, const T2& b) const { return a / b; } }; //! negation functor template struct FunNeg { inline T operator()(const T& a) const { return -a; } }; //! not functor template struct FunNot { inline T operator()(const T& a) const { return !a; } }; //! abs functor template struct FunAbs { inline T operator()(const T& a) const { return abs(a); } }; } //! Subtract one image from another inplace. template void operator-=(tImage &lhs, const tImage &rhs) { priv::FunSubI fun; imageOpI(lhs, rhs, fun); } //! Add one image to another inplace. template void operator+=(tImage &lhs, const tImage &rhs) { priv::FunAddI fun; imageOpI(lhs, rhs, fun); } //! Multiply an image with another inplace. template void operator*=(tImage &lhs, const tImage &rhs) { priv::FunMulI fun; imageOpI(lhs, rhs, fun); } //! Divide one image with another inplace. template void operator/=(tImage &lhs, const tImage &rhs) { priv::FunDivI fun; imageOpI(lhs, rhs, fun); } //! Subtract one image from another. template tImage operator-(const tImage &rhs1, const tImage &rhs2) { priv::FunSub fun; return imageOp(rhs1, rhs2, fun); } //! Add one image to another template tImage operator+(const tImage &rhs1, const tImage &rhs2) { priv::FunAdd fun; return imageOp(rhs1, rhs2, fun); } //! Multiply one image with another template tImage operator*(const tImage &rhs1, const tImage &rhs2) { priv::FunMul fun; return imageOp(rhs1, rhs2, fun); } //! Divide one image by another template tImage operator/(const tImage &rhs1, const tImage &rhs2) { priv::FunDiv fun; return imageOp(rhs1, rhs2, fun); } //! Subtract a scalar from an image inplace. template void operator-=(tImage &lhs, const F& rhs) { priv::FunSubI fun; imageOpI(lhs, rhs, fun); } //! Add a scalar to an image inplace. template void operator+=(tImage &lhs, const F& rhs) { priv::FunAddI fun; imageOpI(lhs, rhs, fun); } //! Multiply an image with a scalar inplace. template void operator*=(tImage &lhs, const F& rhs) { priv::FunMulI fun; imageOpI(lhs, rhs, fun); } //! Divide an image by a scalar inplace. template void operator/=(tImage &lhs, const F& rhs) { priv::FunDivI fun; imageOpI(lhs, rhs, fun); } //! Subtract an scalar from an image template tImage operator-(const tImage &rhs1, const F& rhs2) { priv::FunSub fun; return imageOp(rhs1, rhs2, fun); } //! Add a scalar to an image template tImage operator+(const tImage &rhs1, const F& rhs2) { priv::FunAdd fun; return imageOp(rhs1, rhs2, fun); } //! Multiply an image with a scalar template tImage operator*(const tImage &rhs1, const F& rhs2) { priv::FunMul fun; return imageOp(rhs1, rhs2, fun); } //! Divide an image by a scalar template tImage operator/(const tImage &rhs1, const F& rhs2) { priv::FunDiv fun; return imageOp(rhs1, rhs2, fun); } //! Left subtraction of scalar with image template tImage operator-(const F& rhs1, const tImage &rhs2) { priv::FunSub fun; return imageOp(rhs1, rhs2, fun); } //! Left addition of scalar to image template tImage operator+(const F& rhs1, const tImage &rhs2) { priv::FunAdd fun; return imageOp(rhs1, rhs2, fun); } //! Left multiply an image with a scalar template tImage operator*(const F& rhs1, const tImage &rhs2) { priv::FunMul fun; return imageOp(rhs1, rhs2, fun); } //! Left divide an image with a scalar template tImage operator/(const F& rhs1, const tImage &rhs2) { priv::FunDiv fun; return imageOp(rhs1, rhs2, fun); } //! Negate an image template tImage operator-(const tImage &img) { priv::FunNeg fun; return imageOp(img, fun); } //! Calculate the absolute of an image template tImage abs(const tImage &img) { priv::FunAbs fun; return imageOp(img, fun); } //! Calculate the absolute of an image inplace template void absI(tImage &img) { priv::FunAbs fun; return imageOpI(img, fun); } //! Negate an image template tImage operator!(const tImage &img) { priv::FunNot fun; return imageOp(img, fun); } } #endif relion-3.1.3/src/jaz/gravis/tImage/traits.h000066400000000000000000000066611411340063500205470ustar00rootroot00000000000000/****************************************************************************** ** Title: tImage/traits.h ** Description: tImage traits neccessary for nice implementation of stuff ** like convolution ** ** Author: Brian Amberg ** Computer Science Department, University Basel (CH) ** ******************************************************************************/ #ifndef _TIMAGE_TRAITS_H_ #define _TIMAGE_TRAITS_H_ // TODO: This centralizes traits definition, but includes everything, which is // bad. It would be better to use predeclarations of these classes here, but // that does not work with templates. Another possibility would be to move the // trait definitions into the include files of the respective datatypes. #include "../t2Vector.h" #include "../t3Vector.h" #include "../t4Vector.h" #include "../t2Matrix.h" #include "../t3Matrix.h" #include "../t4Matrix.h" #include "../tMatrix.h" #include "../tRGB.h" #include "../tBGR.h" #include "../tYCbCr.h" #include "../tRGBA.h" #include "../tRGB_A.h" #include "../tGray_A.h" #include "../tLab.h" namespace gravis { template struct tImageTraits { typedef float Scalar_t; typedef float Float_t; typedef float Pixel_t; }; #define DEFINE_TRAIT( aPixel_t, aScalar_t, aFloat_t) \ template <> \ struct tImageTraits< aPixel_t > { \ typedef aScalar_t Scalar_t; \ typedef aFloat_t Float_t; \ typedef aPixel_t Pixel_t; \ static unsigned int components(){ \ return sizeof(aPixel_t)/sizeof(Scalar_t); \ } \ } #define DEFINE_ALL_COMPOUND_TRAITS( aScalar_t, aFloat_t) \ DEFINE_TRAIT( aScalar_t, aScalar_t, aFloat_t); \ DEFINE_TRAIT( tRGB< aScalar_t >, aScalar_t, aFloat_t); \ DEFINE_TRAIT( tBGR< aScalar_t >, aScalar_t, aFloat_t); \ DEFINE_TRAIT( tRGBA< aScalar_t >, aScalar_t, aFloat_t); \ DEFINE_TRAIT( tRGB_A< aScalar_t >, aScalar_t, aFloat_t); \ DEFINE_TRAIT( tGray_A< aScalar_t >, aScalar_t, aFloat_t); \ DEFINE_TRAIT( tYCbCr< aScalar_t >, aScalar_t, aFloat_t); \ DEFINE_TRAIT( tLab< aScalar_t >, aScalar_t, aFloat_t); \ DEFINE_TRAIT( t2Vector< aScalar_t >, aScalar_t, aFloat_t); \ DEFINE_TRAIT( t3Vector< aScalar_t >, aScalar_t, aFloat_t); \ DEFINE_TRAIT( t4Vector< aScalar_t >, aScalar_t, aFloat_t); \ DEFINE_TRAIT( t2Matrix< aScalar_t >, aScalar_t, aFloat_t); \ DEFINE_TRAIT( t3Matrix< aScalar_t >, aScalar_t, aFloat_t); \ DEFINE_TRAIT( t4Matrix< aScalar_t >, aScalar_t, aFloat_t) DEFINE_ALL_COMPOUND_TRAITS( char, float ); DEFINE_ALL_COMPOUND_TRAITS( unsigned char, float ); DEFINE_ALL_COMPOUND_TRAITS( signed char, float ); DEFINE_ALL_COMPOUND_TRAITS( unsigned int, double ); DEFINE_ALL_COMPOUND_TRAITS( signed int, double ); DEFINE_ALL_COMPOUND_TRAITS( unsigned short int, double ); DEFINE_ALL_COMPOUND_TRAITS( signed short int, double ); DEFINE_ALL_COMPOUND_TRAITS( signed long int, double ); DEFINE_ALL_COMPOUND_TRAITS( unsigned long int, double ); DEFINE_ALL_COMPOUND_TRAITS( float, float ); DEFINE_ALL_COMPOUND_TRAITS( double, double ); DEFINE_ALL_COMPOUND_TRAITS( long double, long double ); DEFINE_ALL_COMPOUND_TRAITS( bool, double ); } #endif relion-3.1.3/src/jaz/gravis/tImageAlgorithm.h000066400000000000000000000011331411340063500210750ustar00rootroot00000000000000#ifndef __LIBGRAVIS_T_IMAGE_ALGORITHM_H__ #define __LIBGRAVIS_T_IMAGE_ALGORITHM_H__ /****************************************************************************** ** Title: tImageAlgorithm.h ** Description: Collection of standalone image algorithms. ** ** Author: Jean-Sebastien Pierrard, 2005 ** Brian Schroeder 2006 ** Computer Science Department, University Basel (CH) ** ******************************************************************************/ #include "tImage/convolution.h" #include "tImage/normalization.h" #include "tImage/operators.h" #endif relion-3.1.3/src/jaz/gravis/tLab.h000066400000000000000000000142171411340063500167110ustar00rootroot00000000000000#ifndef __LIBGRAVIS_T_LAB_H__ #define __LIBGRAVIS_T_LAB_H__ /****************************************************************************** ** Title: tLab.h ** Description: Represents an L*a*b* color tupel. ** ******************************************************************************/ #include namespace gravis { template class tLab { /*! * Private helper functions, wrapped into an additional struct in case that we want to use the names **/ struct priv { static inline const T& min(const T& a, const T& b) { return ab ? a : b; } }; public: typedef T scalar_type; T L, a, b; tLab () : L(T(0)), a(T(0)), b(T(0)) { } tLab (T L, T a, T b) : L(L), a(a), b(b) { } // tLab (T gray) : (gray), g(gray), b(gray) { } void set (T _y, T _cb, T _cr) { L = _y; a = _cb; b = _cr; } // void add (T _r, T _g, T _b) { // r += _r; g += _g; b += _b; // } T intensity () const { return L; } /* bool operator != (const tLab& c) const { return r != c.r || g != c.g || b != c.b; } bool operator == (const tLab& c) const { return r == c.r && g == c.g && b == c.b; } */ tLab& operator += (const tLab& c) { L += c.L; a += c.a; b += c.b; return *this; } /* tLab& operator += (const T gray) { r += gray; g += gray; b += gray; return *this; } */ tLab& operator -= (const tLab& c) { L -= c.L; a -= c.a; b -= c.b; return *this; } // tLab& operator -= (const T gray) { // r -= gray; g -= gray; b -= gray; // return *this; // } tLab& operator *= (const tLab& c) { L *= c.L; a *= c.a; b *= c.b; return *this; } tLab& operator *= (const T factor) { L *= factor; a *= factor; b *= factor; return *this; } /* tLab& operator /= (const tLab& c) { r /= c.r; g /= c.g; b /= c.b; return *this; } tLab& operator /= (const T factor) { r /= factor; g /= factor; b /= factor; return *this; } * \brief All color components are clamped to [0,1]. This function works inplace. * * \return self tLab& clamp() { r = priv::min(priv::max(r, 0), 1); g = priv::min(priv::max(g, 0), 1); b = priv::min(priv::max(b, 0), 1); return *this; } //! Unary minus inline tLab operator - () const { return tLab(-r, -g, -b); }; //! Addition of a scalar (analog to -=) inline tLab operator + (const T& c) const { return tLab(r+c, g+c, b+c); }; //! Subtraction of a scalar (analog to +=) inline tLab operator - (const T& c) const { return tLab(r-c, g-c, b-c); }; */ //! Multiplication of a scalar (analog to *=) inline tLab operator * (const T& c) const { return tLab(L*c, a*c, b*c); }; /* //! Division by a scalar (analog to /=) inline tLab operator / (const T& c) const { return tLab(r/c, g/c, b/c); }; bool operator == (const tLab& arg) { return ((arg.r == r) && (arg.g == g) && (arg.b == b)); } const T &operator [](const size_t &i) const { return (&r)[i]; } T &operator [](const size_t &i) { return (&r)[i]; } */ }; template inline tLab operator + (const tLab& c1, const tLab& c2) { tLab result = c1; return (result += c2); } template inline tLab operator - (const tLab& c1, const tLab& c2) { tLab result = c1; return (result -= c2); } /* template inline tLab operator * (const tLab& c1, const tLab& c2) { tLab result(c1.r * c2.r, c1.g * c2.g, c1.b * c2.b); return result; } */ template inline tLab operator * (const tLab& c, T factor) { tLab result(c.L * factor, c.a * factor, c.b * factor); return result; } template inline tLab operator * (T factor, const tLab& c) { tLab result(c.L * factor, c.a * factor, c.b * factor); return result; } /* template inline tLab operator / (const tLab& c1, const tLab& c2) { tLab result(c1.r / c2.r, c1.g / c2.g, c1.b / c2.b); return result; } template inline tLab operator / (const tLab& c, T factor) { tLab result(c.r / factor, c.g / factor, c.b / factor); return result; } template inline bool operator < (const tLab& c1, const tLab& c2) { T gray1 = c1.grayValue(); T gray2 = c2.grayValue(); return (gray1 < gray2); } template inline tLab operator ! (const tLab& c) { tLab result = tLab::White(); return (result -= c); } // Absolute of every color channel template inline tLab abs(const tLab& c) { return tLab(c.r < T(0) ? -c.r : c.r, c.g < T(0) ? -c.g : c.g, c.b < T(0) ? -c.b : c.b); } template inline std::ostream& operator << (std::ostream& os, const tLab& c) { os << "(" << c.r << " " << c.g << " " << c.b << ")"; return os; } template <> inline std::ostream& operator << (std::ostream& os, const tLab& c) { os << "(" << (int)c.r << " " << (int)c.g << " " << (int)c.b << ")"; return os; } template inline T dot (const tLab& v1, const tLab& v2) { return (v1.r*v2.r + v1.g*v2.g + v1.b*v2.b); } */ //typedef tLab bRGB; typedef tLab fLab; typedef tLab dLab; } #endif relion-3.1.3/src/jaz/gravis/tMM.h000066400000000000000000000403331411340063500165220ustar00rootroot00000000000000#ifndef __LIBGRAVIS_T_MM_H__ #define __LIBGRAVIS_T_MM_H__ #include "tVarMatrix.h" #include #include namespace gravis { template class tConstMM; template class tMM { public: typedef tMatrix Vector; std::string title; tVarVector< Vector > nu; tVarMatrix< Vector > D; const size_t& m() const { return D.h; }; // Number of vertices const size_t& k() const { return D.w; }; // Number of paramters tMM(const std::string& title="Morphable Model") : title(title), nu(title+"::nu"), D(title+"::D") { }; inline void evaluate(tVectorView< Vector > &v, const tConstVectorView< T > &a) const { tConstMM MM(*this); MM.evaluate(v, a); } inline void evaluate(tVarVector< Vector > &v, const tConstVectorView< T > &a) const { tConstMM MM(*this); MM.evaluate(v, a); } inline void resize(size_t h, size_t w) { D.resize(h,w); nu.resize(h); }; inline void clear() { matrix::clear(D); matrix::clear(nu); }; // Check if the file to load has the right datatype bool load_is_compatible(const std::string& fn) { char mmid0[33] = "GRAVIS_MORPHABLE_MODEL "; char mmid1[33] = " "; std::ifstream stream(fn.c_str(), std::ifstream::binary); uint8_t uint32_size; uint8_t T_size; uint32_t m,k; uint16_t endianness; stream.read(mmid1, 32); stream.read((char*)&endianness, 2); stream.read((char*)&uint32_size, 1); stream.read((char*)&T_size, 1); stream.read((char*)&m, sizeof(m)); stream.read((char*)&k, sizeof(k)); GRAVIS_CHECK( 0 == strncmp( mmid0, mmid1, 31 ), "Not a gravis morphable model file" ); GRAVIS_CHECK( endianness == 0x0001, "Wrong endianness"); if (uint32_size != 4) { std::cerr << "Uint 32 size is " << uint32_size << std::endl; } GRAVIS_CHECK( uint32_size == 4, "Wrong uint32_size size"); return( T_size == sizeof(T) ); } void load(const std::string& fn) { char mmid0[33] = "GRAVIS_MORPHABLE_MODEL "; char mmid1[33] = " "; std::ifstream stream(fn.c_str(), std::ifstream::binary); uint8_t uint32_size; uint8_t T_size; uint32_t m,k; uint16_t endianness; stream.read(mmid1, 32); stream.read((char*)&endianness, 2); stream.read((char*)&uint32_size, 1); stream.read((char*)&T_size, 1); stream.read((char*)&m, sizeof(m)); stream.read((char*)&k, sizeof(k)); GRAVIS_CHECK( 0 == strncmp( mmid0, mmid1, 31 ), "Not a gravis morphable model file" ); GRAVIS_CHECK( endianness == 0x0001, "Wrong endianness"); if (uint32_size != 4) { std::cerr << "Uint 32 size is " << uint32_size << std::endl; } GRAVIS_CHECK( uint32_size == 4, "Wrong uint32_size size"); GRAVIS_CHECK( T_size == sizeof(T), "Wrong type in model file"); resize(m, k); clear(); stream.read((char*)D.data, sizeof(Vector)*D.size()); stream.read((char*)nu.data, sizeof(Vector)*nu.size()); char mmid2[33] = " "; stream.read(mmid2, 32); GRAVIS_CHECK( 0 == strncmp( mmid0, mmid2, 31 ), "File did not end with the end marker" ); } void save(const std::string& fn) { tConstMM cm(*this); cm.save(fn); } // Create a new interpolated model from barycentric coordinates into the old model inline void interpolate(tMM &out, const tConstMatrixView &idx, const tConstMatrixView &weight) const { tConstMM cm(*this); cm.interpolate(out, idx, weight); } // Create a new model from chosen lines of the old model inline void submodel(tMM &out, const tConstVectorView &chosen) const { tConstMM cm(*this); cm.submodel(out, chosen); } // Create a new model from chosen lines of the old model inline void submodel(tMM &out, const tConstVectorView &chosen) const { tConstMM cm(*this); cm.submodel(out, chosen); } // Create a new model from chosen lines of the old model inline void submodel(tMM &out, const std::vector &chosen) const { tConstMM cm(*this); cm.submodel(out, chosen); } // Create a new model from chosen lines of the old model inline void submodel(tMM &out, const std::vector &chosen) const { tConstMM cm(*this); cm.submodel(out, chosen); } // Create a new interpolated model from barycentric coordinates into the old model inline void interpolate(tMM &out, const tConstMatrixView &idx, const tConstMatrixView &weight, const size_t& n_coeff) const { tConstMM cm(*this); cm.interpolate(out, idx, weight, n_coeff); } // Create a new interpolated model from barycentric coordinates into the old model inline void interpolate(tMM &out, const tConstMatrixView &idx, const tConstMatrixView &weight, const int& n_coeff) const { tConstMM cm(*this); cm.interpolate(out, idx, weight, n_coeff); } // Create a new model from chosen lines of the old model inline void submodel(tMM &out, const tConstVectorView &chosen, const size_t& n_coeff) const { tConstMM cm(*this); cm.submodel(out, chosen, n_coeff); } // Create a new model from chosen lines of the old model inline void submodel(tMM &out, const tConstVectorView &chosen, const size_t& n_coeff) const { tConstMM cm(*this); cm.submodel(out, chosen, n_coeff); } // Create a new model from chosen lines of the old model inline void submodel(tMM &out, const std::vector &chosen, const size_t& n_coeff) const { tConstMM cm(*this); cm.submodel(out, chosen, n_coeff); } // Create a new model from chosen lines of the old model inline void submodel(tMM &out, const std::vector &chosen, const int& n_coeff) const { tConstMM cm(*this); cm.submodel(out, chosen, n_coeff); } }; template class tConstMM { public: typedef tMatrix Vector; const tConstVectorView< Vector > nu; const tConstMatrixView< Vector > D; const size_t& m() const { return D.h; }; // Number of vertices const size_t& k() const { return D.w; }; // Number of paramters tConstMM(const tConstVectorView &nu, const tConstMatrixView &D) : nu(nu), D(D) { GRAVIS_CHECK( nu.h == D.h, "Morphable model is inconsistent" ); }; tConstMM(const tMM &o) : nu(o.nu), D(o.D) {}; tConstMM(const tConstMM &o) : nu(o.nu), D(o.D) {}; #ifdef MATLAB tConstMM(const tmxConstMatrixView &_nu, const tmxConstMatrixView &_D) : nu((Vector*)_nu.data, _nu.dims[0]/3) , D((Vector*)_D.data, _D.dims[0]/3, _D.dims[1]) { GRAVIS_CHECK( nu.h == D.h, "Morphable model is inconsistent" ); }; #endif // If a is too short assumes zeros for the unset coefficents // If a is too long assumes zeros for the missing principal components inline void evaluate(tVectorView< Vector > &v, const tConstVectorView< T > &a) const { size_t K=std::min(k(), a.size()); // GRAVIS_CHECK( a.size() == k(), "a and D are incompatible"); GRAVIS_CHECK( v.size() == m(), "v and nu are incompatible"); GRAVIS_CHECK( nu.size() == m(), "k and nu are incompatible"); // Apply the morphable model #if 0 v = nu; for (size_t j=0; j vv((T*)v.data, 3*v.size()); tConstVectorView< T > vnu((T*)nu.data, 3*nu.size()); tConstMatrixView< T > vD((T*)D.data, 3*D.h, K); tConstVectorView< T > va(a.data, K); matrix::addmult(vv, vnu, vD, va); // USING BLAS } inline void evaluate(tVarVector< Vector > &v, const tConstVectorView< T > &a) const { v.resize(m()); tVectorView< tMatrix > vv(v); evaluate(vv, a); } void save(const std::string& fn) { char mmid[33] = "GRAVIS_MORPHABLE_MODEL "; std::ofstream stream(fn.c_str(), std::ofstream::binary); uint8_t uint32_size = sizeof(uint32_t); if (uint32_size != 4) { std::cerr << "Uint 32 size is " << uint32_size << std::endl; } uint8_t T_size = sizeof(T); uint32_t m_ = m(), k_ = k(); uint16_t endianness = 0x0001; stream.write(mmid, 32); stream.write((char*)&endianness, 2); stream.write((char*)&uint32_size, 1); stream.write((char*)&T_size, 1); stream.write((char*)&(m_), sizeof(m_)); stream.write((char*)&(k_), sizeof(k_)); stream.write((char*)D.data, sizeof(Vector)*D.size()); stream.write((char*)nu.data, sizeof(Vector)*nu.size()); stream.write(mmid, 32); } // Create a new interpolated model from barycentric coordinates into the old model inline void interpolate(tMM &out, const tConstMatrixView &idx, const tConstMatrixView &weight) const { const tConstMM &model = *this; GRAVIS_CHECK( idx.w == weight.w && idx.h == weight.h, "idx and weight should be kxn and kxn"); out.resize(idx.w, model.k()); const size_t& n = idx.w; const size_t& t = idx.h; const size_t& K = model.k(); // Initialize to zero out.clear(); // Write out.nu for (size_t i=0; i &out, const tConstVectorView &chosen) const { const tConstMM &model = *this; GRAVIS_CHECK( chosen.h == model.m(), "Chosen and model are incompatible"); size_t n = 0; for (size_t i=0; i &out, const tConstVectorView &chosen) const { const tConstMM &model = *this; size_t n = chosen.h; out.resize(n, model.k()); for (size_t I=0; I &out, const tConstVectorView &chosen) const { const tConstMM &model = *this; int n = chosen.h; out.resize(n, model.k()); for (int I=0; I &out, const std::vector &chosen) const { tConstVectorView vchosen(&chosen[0], chosen.size()); submodel(out, vchosen); } // Create a new model from chosen lines of the old model inline void submodel(tMM &out, const std::vector &chosen) const { tConstVectorView vchosen(&chosen[0], chosen.size()); submodel(out, vchosen); } // Create a new interpolated model from barycentric coordinates into the old model template inline void interpolate(tMM &out, const tConstMatrixView &idx, const tConstMatrixView &weight, const Int& n_coeff) const { const tConstMM &model = *this; GRAVIS_CHECK( idx.w == weight.w && idx.h == weight.h, "idx and weight should be kxn and kxn"); out.resize(idx.w, n_coeff); const Int& n = idx.w; const Int& t = idx.h; const Int K = std::min(n_coeff, model.k()); // Initialize to zero out.clear(); // Write out.nu for (Int i=0; i &out, const tConstVectorView &chosen, const size_t& n_coeff) const { const tConstMM &model = *this; GRAVIS_CHECK( chosen.h == model.m(), "Chosen and model are incompatible"); size_t n = 0; for (size_t i=0; i inline void submodel(tMM &out, const tConstVectorView &chosen, const Int& n_coeff) const { const tConstMM &model = *this; Int n = chosen.h; out.resize(n, n_coeff); const Int copy_coeff = std::min(n_coeff, model.k()); for (Int I=0; I inline void submodel(tMM &out, const std::vector &chosen, const Int& n_coeff) const { tConstVectorView vchosen(&chosen[0], chosen.size()); submodel(out, vchosen, n_coeff); } }; } #endif relion-3.1.3/src/jaz/gravis/tMatrix.h000066400000000000000000000503271411340063500174610ustar00rootroot00000000000000#ifndef __LIBGRAVIS_T_MATRIX_H__ #define __LIBGRAVIS_T_MATRIX_H__ /****************************************************************************** ** Title: matrix.h ** Description: Templated fixed size dense matrices, which are a ** complement to the fixed size t{2,3,4}{Vector,Matrix} classes. ** ** Author: Brian Amberg, 2007 ** Computer Science Department, University Basel (CH) ** ******************************************************************************/ #include #include #include #include #include #include "StringFormat.h" #include "Exception.h" #ifdef DEBUG #define checkAccess1( i, h ) { if(!( (i)<(h) )) GRAVIS_THROW2(gravis::Exception, "Access out of bounds " #i "<" #h); } #define checkAccess2( i,j, h,w ) { if(!( (i)<(h) && (j)<(w) )) GRAVIS_THROW2(gravis::Exception, "Access out of bounds " #i "<" #h " && "#j"<" #w); } #define checkAccess3( i,j,k, h,w,d ) { if(!( (i)<(h) && (j)<(w) && (k)<(d))) GRAVIS_THROW2(gravis::Exception, "Access out of bounds " #i "<" #h " && "#j"<" #w " && " #k "<" #d); } #else #define checkAccess1( i, h ) { } #define checkAccess2( i,j, h,w ) { } #define checkAccess3( i,j,k, h,w,d ) { } #endif namespace gravis { template class tMatrix; namespace tMatrixPrivateConstructorTrick { struct CheckIfRightSize { template static void has_2_elements( const tMatrix &m ) {} template static void has_2_elements( const tMatrix &m ) {} template static void has_3_elements( const tMatrix &m ) {} template static void has_3_elements( const tMatrix &m ) {} template static void has_4_elements( const tMatrix &m ) {} template static void has_4_elements( const tMatrix &m ) {} template static void has_4_elements( const tMatrix &m ) {} template static void has_9_elements( const tMatrix &m ) {} template static void has_16_elements( const tMatrix &m ) {} }; } /** * Small Matrix Of Arbitrary size held completely in memory in consecutive positions. * The data is in row major order. **/ template class tMatrix { public: typedef T scalar; T data[h* w]; /** * The data is not initialized **/ tMatrix() {}; /** * Copy constructor **/ tMatrix(const tMatrix &o) { memcpy( data, o.data, h*w*sizeof(T) ); } /** * Fill with copies of v values **/ explicit tMatrix(const T& v) { fill(v); } /** * Copy data from another matrix **/ tMatrix& operator=(const tMatrix& o) { memcpy( data, o.data, h*w*sizeof(T) ); return *this; } /** * Inplace negation **/ inline void negate() { tMatrix &m = *this; for (size_t i=0; i tMatrix operator*(const tMatrix &right) const { tMatrix out(0); const tMatrix& self(*this); for (size_t j=0; j0) memset( &data[0], 0, sizeof(data[0])*size()); } /** * Convenience function to clamp all elements of **/ inline void clamp(const T& min, const T& max) { for (size_t i=0; i static inline void cross(tMatrix &result, const tMatrix& a, const tMatrix& b) { result[0] = a[1]*b[2] - a[2]*b[1]; result[1] = a[2]*b[0] - a[0]*b[2]; result[2] = a[0]*b[1] - a[1]*b[0]; } } /** * Matrix Scalar Addition **/ template inline tMatrix operator-(const T& o, const tMatrix &self) { tMatrix r; for (size_t i=0; i inline tMatrix operator+(const T& o, const tMatrix &self) { tMatrix r; for (size_t i=0; i inline tMatrix operator*(const T& o, const tMatrix &self) { tMatrix r; for (size_t i=0; i inline static void addmult(tMatrix &out, const tMatrix &left, const tMatrix &right) { for (size_t j=0; j inline static void submult(tMatrix &out, const tMatrix &left, const tMatrix &right) { for (size_t j=0; j inline static void negate(tMatrix &m) { for (size_t i=0; i inline static void mult(const T& scalar, tMatrix &m) { for (size_t i=0; i inline static void mult(tMatrix &m, const T& scalar) { for (size_t i=0; i inline static void mult(tMatrix &out, const tMatrix &m, const T& scalar) { for (size_t i=0; i inline static void mult(tMatrix &out, const T& scalar, const tMatrix &m) { for (size_t i=0; i inline void add(tMatrix &self, const tMatrix &right) { for (size_t i=0; i inline void add(tMatrix &out, const tMatrix &self, const tMatrix &right) { for (size_t i=0; i inline void sub(tMatrix &self, const tMatrix &right) { for (size_t i=0; i inline void sub(tMatrix &out, const tMatrix &self, const tMatrix &right) { for (size_t i=0; i inline static void mult(tMatrix &out, const tMatrix &self, const tMatrix &right) { out.zeros(); for (size_t i=0; i inline static void mult(T& out, const tMatrix &self, const tMatrix &right) { out = self[0] * right[0]; for (size_t i=1; i inline static void mult(tMatrix &self, const tMatrix &right) { tMatrix tmp; mult(tmp, self, right); self = tmp; } /** Convenience Constructors **/ template inline static tMatrix tVector1(const T& a) { tMatrix v; v[0]=a; return v; } /** Convenience Constructors **/ template inline static tMatrix tVector2(const T& a, const T& b) { tMatrix v; v[0]=a; v[1]=b; return v; } /** Convenience Constructors **/ template inline static tMatrix tVector3(const T& a, const T& b, const T& c) { tMatrix v; v[0]=a; v[1]=b; v[2]=c; return v; } /** Convenience Constructors **/ template inline static tMatrix tVector4(const T& a, const T& b, const T& c, const T& d) { tMatrix v; v[0]=a; v[1]=b; v[2]=c; v[3]=d; return v; } /** Convenience Constructors **/ template inline static tMatrix tMatrix3( const T& a, const T& b, const T& c, const T& d, const T& e, const T& f, const T& g, const T& i, const T& h) { tMatrix m; m(0,0)=a; m(0,1)=b; m(0,2)=c; m(1,0)=d; m(1,1)=e; m(1,2)=f; m(2,0)=g; m(2,1)=i; m(2,2)=h; return m; } /** * Write fixed size matrices to a stream **/ template inline std::ostream& operator<< (std::ostream& os, const tMatrix& arg) { if ((h>1) && (w>1)) { os << "Matrix: " << h << "x" << w << std::endl; for (size_t i=0; i1) { os << "[ "; for (size_t j=0; j inline std::istream& operator>> (std::istream& is, tMatrix& arg) { std::string t; if ((h>1) && (w>1)) { is >> t >> t; for (size_t i=0; i> t; for (size_t j=0; j> arg(i,j); is >> t; } } else if (w==1 && h>1) { is >> t; if (t != "[") GRAVIS_THROW3(gravis::Exception, "Unexpected token. A vector should start with [", t); for (size_t j=0; j> arg[j]; is >> t; if (t != "]^T") GRAVIS_THROW3(gravis::Exception, "Unexpected token. A column vector should end with ]^T", t); } else { is >> t; if (t != "[") GRAVIS_THROW3(gravis::Exception, "Unexpected token. A vector should start with [", t); for (size_t j=0; j> arg[j]; is >> t; if (t != "]") GRAVIS_THROW3(gravis::Exception, "Unexpected token. A row vector should end with ]", t); } return is; } } #endif relion-3.1.3/src/jaz/gravis/tMesh.h000066400000000000000000000370731411340063500171140ustar00rootroot00000000000000#ifndef __LIBGRAVIS_T_MESH_H__ #define __LIBGRAVIS_T_MESH_H__ /****************************************************************************** ** Title: tMesh.h ** Description: Templated mesh representation using std:;vector. ** ** Author: Jean-Sebastien Pierrard, 2005 ** Brian Amberg, 2006 ** Computer Science Department, University Basel (CH) ** ******************************************************************************/ #define ATT_PURE __attribute__ ((pure)) #include "tRGBA.h" #include "t2Vector.h" #include "t3Vector.h" #include "tImage.h" #include "Tuple.h" #include #include #include "Mesh.h" #include namespace gravis { template class tMaterial { public: /** * Helper class with lazy loading images with an associated filename. * It may also contain no image, for a textureless mesh. **/ class ImageFile { private: mutable bool loaded; mutable tImage< tRGBA > image; boost::filesystem::path filename; public: ImageFile() : loaded(false), image(), filename() {}; ImageFile(const std::string& fn) : loaded(false), image(), filename(fn) {}; inline const bool& isLoaded() const { return loaded; } /** * Load the image into memory, if it is not yet loaded **/ void load() const { if (!loaded) { reload(); } }; /** * Load the image into memory, even if it is already loaded **/ void reload() const { if (isSet()) { loaded = true; image.read( getFilenameNative() ); } }; /** * Load the given image **/ void load(const std::string& filename) { setFilename(filename); reload(); }; /** * Change the name of the image file * * The empty filename resets the image. * * Convenience function, assuming the filename is in native format. **/ void setFilename(const char* filename) { setFilename(boost::filesystem::path(filename)); }; /** * Change the name of the image file * * The empty filename resets the image. * * Convenience function, assuming the filename is in native format. **/ void setFilename(const std::string& filename) { setFilename(boost::filesystem::path(filename)); }; /** * Change the name of the image file * * The empty filename resets the image. **/ void setFilename(const boost::filesystem::path& filename) { this->filename = filename; }; /** * Return the image filename in native format. **/ const std::string getFilenameNative() const { return filename.string(); } /** * Return the image filename **/ const boost::filesystem::path& getFilename() const { return filename; } /** * Delete the texture **/ void reset() { filename = ""; image.resize(0,0); } /** * Do we represent the NULL image **/ ATT_PURE bool isSet() const { return filename != ""; } /** * Associate a texture from a tImage **/ void set(const std::string& filename, const tImage > &image) { loaded = true; this->filename = filename; this->image = image; } /** * Access the image. * There seems to be a problem with changing the image. **/ tImage< tRGBA > &getImage() { if (!loaded) load(); return image; } /** * Access the image **/ const tImage< tRGBA > &getImage() const { if (!loaded) load(); return image; } /** * Set the image **/ void setImage(const tImage > &img) { image=img; }; }; public: tMaterial(std::string n="") : name(n), ambient(T(0.1),T(1.0)), diffuse(T(0.9),T(1.0)), specular(T(0.6),T(1.0)), shininess(T(25.0)), texture(), envMap(), normalMap() {} tMaterial(const Material& o) : name(o.name), ambient(o.ambient), diffuse(o.diffuse), specular(o.specular), shininess(o.shininess), texture(o.textureName), envMap(o.envMapName), normalMap(o.normalMapName) { } tMaterial(const tMaterial& o) : name(o.name), ambient(o.ambient), diffuse(o.diffuse), specular(o.specular), shininess(o.shininess), texture(o.texture), envMap(o.envMap), normalMap(o.normalMap) { } tMaterial& operator=(const tMaterial& o) { name= o.name; ambient=o.ambient; diffuse=o.diffuse; specular=o.specular; shininess=o.shininess; texture=o.texture; envMap=o.envMap; normalMap=o.normalMap; return *this; } std::string name; tRGBA ambient; tRGBA diffuse; tRGBA specular; T shininess; /*!< \brief Phong exponent. */ ImageFile texture; ImageFile envMap; ImageFile normalMap; /// convert this brian-material to a gravis material Material getGravisMaterial() const { Material gravisMaterial; gravisMaterial.name = name; gravisMaterial.ambient = ambient; gravisMaterial.diffuse = diffuse; gravisMaterial.specular = specular; gravisMaterial.shininess = shininess; gravisMaterial.hasTexture = texture.isSet(); gravisMaterial.textureName = texture.getFilenameNative(); gravisMaterial.hasEnvMap = envMap.isSet(); gravisMaterial.envMapName = envMap.getFilenameNative(); gravisMaterial.hasNormalMap = normalMap.isSet(); gravisMaterial.normalMapName = normalMap.getFilenameNative(); return gravisMaterial; } }; /*! \brief Mesh data structure. * * A Mesh contains vertex, normal, texture coordinate (uvw) and material information. * For the three types of primitives (triangle, line, point) there are index arrays * referencing above information. For example for lines, lvi indexes into * vertex, and lti into texture coordinates. The vertices and colors * for the 4th lines in the mesh are then vertex[lvi[3][0]], vertex[lvi[3][1]], * color[lci[3][0]] and color[lci[3][1]]. * * tvi.size(), lvi.size() and pvi.size() implicitly specify how many triangles, lines * and points there are in the mesh. All other index arrays must either be of the * same length as the corresponding vertex index array, or of length 0. * * How is missing information handled? If for example no normals are assigned to * any triangles, tni.size() would be zero. If normals are assigned for some triangles, * but not for others, the tni-tuples for the respective triangles must have entries * of -1 (which is the 'invalid index' pointing to the default entry in the * corresponding defaultVectors). */ template class tMesh { public: std::vector< t3Vector > vertex; /*!< \brief Vertex array. */ std::vector< t3Vector > normal; /*!< \brief Normal array. */ std::vector< t3Vector > texcrd; /*!< \brief Texture coordinate array. */ std::vector< tRGBA > color; /*!< \brief Color array. */ std::vector< tMaterial > material; /*!< \brief Material array. */ std::vector tvi; /*!< \brief Triangle vertex indices. */ std::vector tni; /*!< \brief Triangle normal indices. */ std::vector tti; /*!< \brief Triangle texcrd indices. */ std::vector tci; /*!< \brief Triangle color indices. */ std::vector tmi; /*!< \brief Triangle material indices. */ std::vector lvi; /*!< \brief Line vertex indices. */ std::vector lti; /*!< \brief Line texcrd indices. */ std::vector lci; /*!< \brief Line colour indices. */ std::vector pvi; /*!< \brief Point vertex indices. */ std::vector pci; /*!< \brief Point color indices. */ std::vector adjacent; /*!< \brief Adjacency list. See generateAdjacencyList(). */ tMesh() : vertex(0,t3Vector(std::numeric_limits::quiet_NaN())), normal(), texcrd(), color(0, tRGBA(1)), material(), tvi(0, Tuple3(-1,-1,-1)), tni(0, Tuple3(-1,-1,-1)), tti(0, Tuple3(-1,-1,-1)), tci(0, Tuple3(-1,-1,-1)), tmi(0, -1), lvi(0, Tuple2(-1,-1)), lti(0, Tuple2(-1,-1)), lci(0, Tuple2(-1,-1)), pvi(0, -1), pci(0, -1), adjacent(0,Tuple3(-1,-1,-1)) { } tMesh(const tMesh& o) : vertex(o.vertex), normal(o.normal), texcrd(o.texcrd), color(o.color), material(o.material), tvi(o.tvi), tni(o.tni), tti(o.tti), tci(o.tci), tmi(o.tmi), lvi(o.lvi), lti(o.lti), lci(o.lci), pvi(o.pvi), pci(o.pci), adjacent(o.adjacent) { } tMesh(const Mesh& o) : vertex(o.vertex), normal(o.normal), texcrd(o.texcrd), color(o.color), material(o.material.size()), tvi(o.tvi), tni(o.tni), tti(o.tti), tci(o.tci), tmi(o.tmi), lvi(o.lvi), lti(o.lti), lci(o.lci), pvi(o.pvi), pci(o.pci), adjacent(o.adjacent) { for (size_t i=0; i(o.material[i]); } /** * Exception safe swap operator **/ void swap(tMesh& o) { vertex.swap(o.vertex); normal.swap(o.normal); texcrd.swap(o.texcrd); color.swap(o.color); material.swap(o.material); tvi.swap(o.tvi); tni.swap(o.tni); tti.swap(o.tti); tci.swap(o.tci); tmi.swap(o.tmi); lvi.swap(o.lvi); lti.swap(o.lti); lci.swap(o.lci); pvi.swap(o.pvi); pci.swap(o.pci); adjacent.swap(o.adjacent); } /** * Exception safe assignment operator **/ tMesh& operator=(const tMesh& o) { tMesh tmp(o); tmp.swap(*this); return *this; } /// Generate a gravis mesh from this brian mesh Mesh getGravisMesh() const { Mesh gravisMesh; gravisMesh.vertex = vertex; gravisMesh.normal = normal; gravisMesh.texcrd = texcrd; gravisMesh.color = color; gravisMesh.tvi = tvi; gravisMesh.tni = tni; gravisMesh.tti = tti; gravisMesh.tci = tci; gravisMesh.tmi = tmi; gravisMesh.lvi = lvi; gravisMesh.lti = lti; gravisMesh.lci = lci; gravisMesh.pvi = pvi; gravisMesh.pci = pci; gravisMesh.adjacent = adjacent; gravisMesh.material.resize( material.size() ); for (size_t i=0; i a = (vertex[tvi[i][1]] - vertex[tvi[i][0]]); t3Vector b = (vertex[tvi[i][2]] - vertex[tvi[i][0]]); normal[i] = cross(a, b).normalize(); tni[i] = Tuple3(i, i, i); } } void generatePerVertexNormals() { std::vector ncount; t3Vector norm; const int numFaces = int(tvi.size()); tni.resize(numFaces); normal.resize(vertex.size()); ncount.resize(vertex.size()); for (unsigned int i = 0; i < ncount.size(); i++) { ncount[i] = 0; normal[i] = t3Vector(T(0)); } for (int i = 0; i < numFaces; i++) { t3Vector a = (vertex[tvi[i][1]] - vertex[tvi[i][0]]); t3Vector b = (vertex[tvi[i][2]] - vertex[tvi[i][0]]); norm = cross(a, b).normalize(); tni[i] = tvi[i]; normal[tvi[i][0]] += norm; normal[tvi[i][1]] += norm; normal[tvi[i][2]] += norm; ncount[tvi[i][0]]++; ncount[tvi[i][1]]++; ncount[tvi[i][2]]++; } for (unsigned int i = 0; i < normal.size(); i++) { if(ncount[i] != 0) normal[i] /= T(ncount[i]); normal[i] = normal[i].normalize(); } } class Node { public: int count; Tuple2 faces[20]; Node() : count(0) {} void addFace(const Tuple2& t) { if (count == 20) GRAVIS_THROW2(Exception, "Node in mesh has cardinality greater than 20!"); faces[count++] = t; } }; /*! \brief Generate the adjacency list. * * The adjacency list (adjacent) contains entries for each triangle. * Each entry specifies the adjacent triangle for each edge. * * The complexity of the algorithm is linear in the number of faces. * * \throw gravis::Exception if any vertex has a cardinality greater 20 */ void generateAdjacencyList() { const int numFaces = tvi.size(); const int numVert = vertex.size(); adjacent.resize(numFaces); std::vector nodeFaces(numVert); for (int i = 0; i < numFaces; i++) { for (int j = 0; j < 3; j++) { nodeFaces[tvi[i][j]].addFace(Tuple2(i, j)); } } // foreach face for (int f = 0; f < numFaces; f++) { Tuple3& ft = tvi[f]; Tuple3& at = adjacent[f]; // foreach edge for (int e = 0; e < 3; e++) { // already found adjacent face for this edge? if (at[e] >= 0) continue; // vertices for this edge int v1 = ft[e]; int v2 = ft[(e+1)%3]; // faces using these vertices Node& node1 = nodeFaces[v1]; Node& node2 = nodeFaces[v2]; for (int i = 0; i < node1.count; i++) { int f1 = node1.faces[i][0]; if (f1 == f) continue; // self for (int j = 0; j < node2.count; j++) { if (f1 == node2.faces[j][0]) { adjacent[f][e] = f1; adjacent[f1][node2.faces[j][1]] = f; } } } } } } }; /// See tMesh::swap(). template inline void swap(tMesh& __x, tMesh& __y) { __x.swap(__y); } typedef tMaterial fMaterial; typedef tMaterial dMaterial; typedef tMesh fMesh; typedef tMesh dMesh; } // namespace gravis #endif relion-3.1.3/src/jaz/gravis/tQuaternion.h000066400000000000000000000070461411340063500203420ustar00rootroot00000000000000#ifndef __LIBGRAVIS_T_QUATERNION_H__ #define __LIBGRAVIS_T_QUATERNION_H__ /****************************************************************************** ** Title: tQuaternion.h ** Description: Represents a quaternion useful for rotation and scaling ** ** Author: Reinhard Knothe ** Brian Amberg ** Computer Science Department, University Basel (CH) ** ******************************************************************************/ #include "t4Matrix.h" namespace gravis { /** * A tQuaternion class useful for rotation+scaling operations **/ template class tQuaternion { public: T s,v1,v2,v3; tQuaternion(const T& s=0, const T& v1=1, const T& v2=0, const T& v3=0) : s(s), v1(v1), v2(v2), v3(v3) {}; tQuaternion(const tQuaternion& q) : s(q.s), v1(q.v1), v2(q.v2), v3(q.v3) {}; tQuaternion(const T* q) : s(q[0]), v1(q[1]), v2(q[2]), v3(q[3]) {}; tQuaternion(const T& phi, const gravis::t3Vector &axis) : s(cos(phi/T(2))), v1(axis.x* sin(phi/T(2))), v2(axis.y* sin(phi/T(2))), v3(axis.z* sin(phi/T(2))) {}; bool operator==(const tQuaternion& q) const { return (s==q.s) && (v1==q.v1) && (v2==q.v2) && (v3==q.v3); }; bool operator!=(const tQuaternion& q) const { return !(*this == q); } tQuaternion operator + (const tQuaternion& q) const { return tQuaternion(q.s+s, q.v1+v1, q.v2+v2, q.v3+v3 ); } tQuaternion operator - (const tQuaternion& q) const { return tQuaternion(s-q.s, v1-q.v1, v2-q.v2, v3-q.v3 ); }; tQuaternion operator * (const tQuaternion& q) const { t3Vector v00 (v1,v2,v3); t3Vector v10 (q.v1,q.v2,q.v3); T s2 = s * q.s - dot(v00,v10); t3Vector v2 = cross(v00,v10); t3Vector v3 = v10; v3 *= s; t3Vector v4 = v00; v4 *= q.s; t3Vector v5 = v2+v3+v4; return tQuaternion(s2, v5.x, v5.y, v5.z); } /** * Norm **/ T length() const { return sqrt(s*s + v1*v1 + v2*v2 + v3*v3); } /** * Inplace normalization **/ void normalize() { T l = length(); s /= l; v1 /= l; v2 /= l; v3 /= l; } t3Matrix getMatrix3() const { return t3Matrix( T(1)-T(2)*(v2*v2 + v3*v3), T(2)*(v1*v2 - v3*s), T(2)*(v3*v1 + v2*s), T(2) * (v1*v2 + v3*s), T(1) - T(2) * (v3*v3 + v1*v1), T(2) * (v2*v3 - v1*s), T(2) * (v3*v1 - v2*s), T(2) * (v2*v3 + v1*s), T(1) - T(2) * (v2*v2 + v1*v1)); } t4Matrix getMatrix4() const { return t4Matrix( T(1)-T(2)*(v2*v2 + v3*v3), T(2)*(v1*v2 - v3*s), T(2)*(v3*v1 + v2*s), T(0), T(2) * (v1*v2 + v3*s), T(1) - T(2) * (v3*v3 + v1*v1), T(2) * (v2*v3 - v1*s), T(0), T(2) * (v3*v1 - v2*s), T(2) * (v2*v3 + v1*s), T(1) - T(2) * (v2*v2 + v1*v1), T(0), T(0), T(0), T(0), T(1) ); } }; template inline std::ostream& operator<< (std::ostream& os, const tQuaternion& arg) { os << "[" << arg.s << "; " << arg.v1 << ", " << arg.v2 << ", " << arg.v3 << "]"; return os; } typedef gravis::tQuaternion fQuaternion; typedef gravis::tQuaternion dQuaternion; } #endif relion-3.1.3/src/jaz/gravis/tRGB.h000066400000000000000000000201721411340063500166220ustar00rootroot00000000000000#ifndef __LIBGRAVIS_T_RGB_H__ #define __LIBGRAVIS_T_RGB_H__ /****************************************************************************** ** Title: tRGB.h ** Description: Represents an RGB color tupel. ** ** Author: Jean-Sebastien Pierrard, 2005 ** Brian Amberg, 2005-2006 ** Computer Science Department, University Basel (CH) ** ******************************************************************************/ #include #include #include namespace gravis { template class tRGBA; template class tRGB { /*! * Private helper functions, wrapped into an additional struct in case that we want to use the names **/ struct priv { static inline const T& min(const T& a, const T& b) { return ab ? a : b; } }; public: typedef T scalar_type; T r, g, b; tRGB () : r(T(0)), g(T(0)), b(T(0)) { } tRGB (T _r, T _g, T _b) : r(_r), g(_g), b(_b) { } tRGB (T gray) : r(gray), g(gray), b(gray) { } explicit tRGB (const tRGBA& c) : r(c.r), g(c.g), b(c.b) {} void set (T _r, T _g, T _b) { r = _r; g = _g; b = _b; } void set (T gray) { r = gray; b = gray; g = gray; } void add (T _r, T _g, T _b) { r += _r; g += _g; b += _b; } void add (T gray) { r += gray; g += gray; b += gray; } /** * Deprecated, use intensity() instead **/ T grayValue () const { return T(0.30*r + 0.59*g + 0.11*b); } T intensity () const { return grayValue(); } /*! * Return minimum of the tupel. * * TODO: Is this really necessary in here. It could be a utility function. **/ T const& minValue () const { return std::min(std::min(r, g), b); } /*! * Return maximum of the tupel. * * TODO: Is this really necessary in here. It could be a utility function. **/ T const& maxValue () const { return std::max(std::max(r, g), b); } bool operator != (const tRGB& c) const { return r != c.r || g != c.g || b != c.b; } bool operator == (const tRGB& c) const { return r == c.r && g == c.g && b == c.b; } tRGB& operator += (const tRGB& c) { r += c.r; g += c.g; b += c.b; return *this; } tRGB& operator += (const T gray) { r += gray; g += gray; b += gray; return *this; } tRGB& operator -= (const tRGB& c) { r -= c.r; g -= c.g; b -= c.b; return *this; } tRGB& operator -= (const T gray) { r -= gray; g -= gray; b -= gray; return *this; } tRGB& operator *= (const tRGB& c) { r *= c.r; g *= c.g; b *= c.b; return *this; } tRGB& operator *= (const T factor) { r *= factor; g *= factor; b *= factor; return *this; } tRGB& operator /= (const tRGB& c) { r /= c.r; g /= c.g; b /= c.b; return *this; } tRGB& operator /= (const T factor) { r /= factor; g /= factor; b /= factor; return *this; } /*! * \brief All color components are clamped to [0,1]. This function works inplace. * * \return self */ tRGB& clamp() { r = std::min(std::max(r, T(0)), T(1)); g = std::min(std::max(g, T(0)), T(1)); b = std::min(std::max(b, T(0)), T(1)); return *this; } //! Unary minus inline tRGB operator - () const { return tRGB(-r, -g, -b); }; //! Addition of a scalar (analog to -=) inline tRGB operator + (const T& c) const { return tRGB(r+c, g+c, b+c); }; //! Subtraction of a scalar (analog to +=) inline tRGB operator - (const T& c) const { return tRGB(r-c, g-c, b-c); }; //! Multiplication of a scalar (analog to *=) inline tRGB operator * (const T& c) const { return tRGB(r*c, g*c, b*c); }; //! Division by a scalar (analog to /=) inline tRGB operator / (const T& c) const { return tRGB(r/c, g/c, b/c); }; bool operator == (const tRGB& arg) { return ((arg.r == r) && (arg.g == g) && (arg.b == b)); } const T& operator [](const size_t& i) const { return (&r)[i]; } T& operator [](const size_t& i) { return (&r)[i]; } }; template inline tRGB operator + (const tRGB& c1, const tRGB& c2) { tRGB result = c1; return (result += c2); } template inline tRGB operator - (const tRGB& c1, const tRGB& c2) { tRGB result = c1; return (result -= c2); } template inline tRGB operator * (const tRGB& c1, const tRGB& c2) { tRGB result(c1.r * c2.r, c1.g * c2.g, c1.b * c2.b); return result; } template inline tRGB operator * (const tRGB& c, T factor) { tRGB result(c.r * factor, c.g * factor, c.b * factor); return result; } template inline tRGB operator * (T factor, const tRGB& c) { tRGB result(c.r * factor, c.g * factor, c.b * factor); return result; } template inline tRGB operator / (const tRGB& c1, const tRGB& c2) { tRGB result(c1.r / c2.r, c1.g / c2.g, c1.b / c2.b); return result; } template inline tRGB operator / (const tRGB& c, T factor) { tRGB result(c.r / factor, c.g / factor, c.b / factor); return result; } template inline bool operator < (const tRGB& c1, const tRGB& c2) { T gray1 = c1.grayValue(); T gray2 = c2.grayValue(); return (gray1 < gray2); } template inline tRGB operator ! (const tRGB& c) { tRGB result = tRGB::White(); return (result -= c); } // Absolute of every color channel template inline tRGB abs(const tRGB& c) { return tRGB(c.r < T(0) ? -c.r : c.r, c.g < T(0) ? -c.g : c.g, c.b < T(0) ? -c.b : c.b); } template inline std::ostream& operator << (std::ostream& os, const tRGB& c) { os << "(" << c.r << ", " << c.g << ", " << c.b << ")"; return os; } template <> inline std::ostream& operator << (std::ostream& os, const tRGB& c) { os << "(" << (int)c.r << ", " << (int)c.g << ", " << (int)c.b << ")"; return os; } // Inverse of operator<< template inline std::istream& operator>> (std::istream& is, tRGB& arg) { char c = ' '; is >> c; if (is.eof()) return is; if (c != '(') throw std::runtime_error("tRGB should start with an opening ("); std::string values; int v = 0; while ((is.get(c)) && (c != ')')) { if (c == ',') { v++; if (v >= 3) throw std::runtime_error("tRGB contains more than three elements"); values.push_back(' '); } else values.push_back(c); } if (c != ')') { throw std::runtime_error("tRGB should end with a )"); } if ( v < 2 ) { throw std::runtime_error("tRGB has not enough color values"); } std::stringstream valueReader(values); valueReader >> arg.r >> arg.g >> arg.b; return is; } template inline T dot (const tRGB& v1, const tRGB& v2) { return (v1.r*v2.r + v1.g*v2.g + v1.b*v2.b); } typedef tRGB bRGB; typedef tRGB fRGB; typedef tRGB dRGB; } /* Close Namespace "gravis" */ #endif relion-3.1.3/src/jaz/gravis/tRGBA.h000066400000000000000000000173251411340063500167310ustar00rootroot00000000000000#ifndef __LIBGRAVIS_T_RGBA_H__ #define __LIBGRAVIS_T_RGBA_H__ /****************************************************************************** ** Title: tRGBA.h ** Description: Represents an RGB+Alpha color tupel. ** ** Author: Jean-Sebastien Pierrard, 2005 ** Brian Amberg, 2005-2006 ** Computer Science Department, University Basel (CH) ** ******************************************************************************/ #include "tRGB.h" #include #include #include namespace gravis { template class tRGBA { public: T r, g, b, a; typedef T scalar_type; //! Default constructs a black, translucent pixel tRGBA () : r(T(0)), g(T(0)), b(T(0)), a(T(0)) { } tRGBA (const T& r, const T& g, const T& b, const T& a=T(1)) : r(r), g(g), b(b), a(a) { } tRGBA (const T& gray, const T& alpha=T(1)) : r(gray), g(gray), b(gray), a(alpha) { } explicit tRGBA (const tRGB& c, const T& a=T(1)) : r(c.r), g(c.g), b(c.b), a(a) { } void set (T _r, T _g, T _b, T _a) { r = _r; g = _g; b = _b; a = _a; } void set (T gray) { r = gray; b = gray; g = gray; a = T(1); } /*! * Conversion to a gray pixel * * TODO: This should be put in an external conversion file, together with cie, hsv, etc... **/ T grayValue () const { return T(0.30*r + 0.59*g + 0.11*b); } T intensity () const { return grayValue(); } /*! * Return minimum of the tupel, ignoring the alpha channel. * * TODO: Is this really necessary in here. It could be a utility function. **/ T const& minValue () const { return std::min(std::min(r, g), b); } /*! * Return maximum of the tupel, ignoring the alpha channel. * * TODO: Is this really necessary in here. It could be a utility function. **/ T const& maxValue () const { return std::max(std::max(r, g), b); } const T& operator [] (const size_t& i) const { return (&r)[i]; } T& operator [] (const size_t& i) { return (&r)[i]; } /*! * \brief All color components, including alpha are clamped to [0,1]. This function works inplace. * * \return self */ tRGBA& clamp() { r = std::min(std::max(r, T(0)), T(1)); g = std::min(std::max(g, T(0)), T(1)); b = std::min(std::max(b, T(0)), T(1)); a = std::min(std::max(a, T(0)), T(1)); return *this; } bool operator != (const tRGBA& c) const { return r != c.r || g != c.g || b != c.b || a != c.a; } bool operator == (const tRGBA& c) const { return r == c.r && g == c.g && b == c.b && a == c.a; } tRGBA& operator += (const tRGBA& c) { r += c.r; g += c.g; b += c.b; a += c.a; return *this; } // tRGBA& operator += (const T gray) // { // r += gray; // g += gray; // b += gray; // return *this; // } tRGBA& operator -= (const tRGBA& c) { r -= c.r; g -= c.g; b -= c.b; a -= c.a; return *this; } // tRGBA& operator -= (const T gray) // { // r -= gray; // g -= gray; // b -= gray; // return *this; // } tRGBA& operator *= (const tRGBA& c) { r *= c.r; g *= c.g; b *= c.b; a *= c.a; return *this; } tRGBA& operator *= (const float factor) { r *= factor; g *= factor; b *= factor; a *= factor; return *this; } tRGBA& operator /= (const tRGBA& c) { r /= c.r; g /= c.g; b /= c.b; a /= c.a; return *this; } tRGBA& operator /= (const float factor) { r /= factor; g /= factor; b /= factor; a /= factor; return *this; } //! Unary minus inline tRGBA operator - () const { return tRGBA(-r, -g, -b, -a); }; //! Addition of a scalar (analog to -=) // inline // tRGBA operator + (const T& c) const // { // return tRGBA(r+c, g+c, b+c, a); // }; //! Subtraction of a scalar (analog to +=) // inline // tRGBA operator - (const T& c) const // { // return tRGBA(r-c, g-c, b-c, a); // }; //! Multiplication of a scalar (analog to *=) inline tRGBA operator * (const T& c) const { return tRGBA(r*c, g*c, b*c, a*c); }; //! Division by a scalar (analog to /=) inline tRGBA operator / (const T& c) const { return tRGBA(r/c, g/c, b/c, a/c); }; }; template inline tRGBA operator+ (const tRGBA& c1, const tRGBA& c2) { tRGBA result(c1); return (result += c2); } template inline tRGBA operator- (const tRGBA& c1, const tRGBA& c2) { tRGBA result(c1); return (result -= c2); } template inline tRGBA operator* (const tRGBA& c1, const tRGBA& c2) { tRGBA result(c1); result *= c2; return result; } template inline tRGBA operator* (const tRGBA& c, T factor) { tRGBA result(c); return (result *= factor); } template inline tRGBA operator* (T factor, const tRGBA& c) { tRGBA result(c); return (result *= factor); } template inline tRGBA operator / (const tRGBA& c1, const tRGBA& c2) { tRGBA result(c1.r / c2.r, c1.g / c2.g, c1.b / c2.b); return result; } template inline tRGBA operator / (const tRGBA& c, T factor) { tRGBA result(c.r / factor, c.g / factor, c.b / factor); return result; } template inline bool operator < (const tRGBA& c1, const tRGBA& c2) { T gray1 = c1.grayValue(); T gray2 = c2.grayValue(); return (gray1 < gray2); } template inline tRGBA operator ! (const tRGBA& c) { tRGBA result = tRGBA::White; return (result -= c); } template inline std::ostream& operator << (std::ostream& os, const tRGBA& c) { os << "(" << c.r << " " << c.g << " " << c.b << " " << c.a << ")"; return os; } template <> inline std::ostream& operator << (std::ostream& os, const tRGBA& c) { os << "(" << (int)c.r << " " << (int)c.g << " " << (int)c.b << " " << (int)c.a << ")"; return os; } // Inverse of operator<< template inline std::istream& operator>> (std::istream& is, tRGBA& arg) { char c = ' '; is >> c; if (is.eof()) return is; if (c != '(') throw std::runtime_error("tRGBA should start with an opening ("); std::stringstream values; int v = 0; while ((is >> c) && (c != ')')) { if (c == ',') { v++; if (v >= 4) throw std::runtime_error("tRGBA contains more than four elements"); values << " "; } else if (c != ' ') values << c; } if (c != ')') { throw std::runtime_error("tRGBA should end with a )"); } if ( v < 3 ) { throw std::runtime_error("tRGBA has not enough color values"); } values >> arg.r >> arg.g >> arg.b >> arg.a; return is; } typedef tRGBA bRGBA; typedef tRGBA fRGBA; typedef tRGBA dRGBA; } /* Close Namespace "gravis" */ #endif relion-3.1.3/src/jaz/gravis/tRGB_A.h000066400000000000000000000167221411340063500170700ustar00rootroot00000000000000#ifndef __LIBGRAVIS_T_RGB_A_H__ #define __LIBGRAVIS_T_RGB_A_H__ /****************************************************************************** ** Title: tRGB_A.h ** Description: Represents an RGB+Alpha color tupel. ** ** Author: Jean-Sebastien Pierrard, 2005 ** Computer Science Department, University Basel (CH) ** ******************************************************************************/ #include "tRGB.h" #include #include #include namespace gravis { template class tRGB_A { public: T r, g, b, a; tRGB_A () : r(T(0)), g(T(0)), b(T(0)), a(T(1.0)) { } tRGB_A (T _r, T _g, T _b, T _a=T(1.0)) : r(_r), g(_g), b(_b), a(_a) { } tRGB_A (T gray) : r(gray), g(gray), b(gray), a(T(1.0)) { } tRGB_A (T gray, T alpha) : r(gray), g(gray), b(gray), a(alpha) { } explicit tRGB_A (const tRGBA& c) : r(c.r), g(c.g), b(c.b), a(c.a) { } explicit tRGB_A (const tRGB& c, T _a=T(1.0)) : r(c.r), g(c.g), b(c.b), a(_a) { } void set (T _r, T _g, T _b, T _a) { r = _r; g = _g; b = _b; a = _a; } void set (T _r, T _g, T _b) { r = _r; g = _g; b = _b; } void set (T gray) { r = gray; b = gray; g = gray; } void set (T gray, T alpha) { r = gray; b = gray; g = gray; a = alpha; } T grayValue () const { return (T)(0.30*r + 0.59*g + 0.11*b); } T minValue () const { if (r < g) { if (r < b) return r; else return b; } else { if (g < b) return g; else return b; } } T maxValue () const { if (r > g) { if (r > b) return r; else return b; } else { if (g > b) return g; else return b; } } /*! \brief All color components, including alpha are clamped to [0,1]. * * \return self */ tRGB_A& clamp() { r = std::min(std::max(r, T(0)), T(1)); g = std::min(std::max(g, T(0)), T(1)); b = std::min(std::max(b, T(0)), T(1)); return *this; } bool operator != (const tRGB_A& c) const { return r != c.r || g != c.g || b != c.b || a != c.a; } bool operator == (const tRGB_A& c) const { return r == c.r && g == c.g && b == c.b && a == c.a; } tRGB_A& operator += (const tRGB_A& c) { r += c.r; g += c.g; b += c.b; return *this; } tRGB_A& operator += (const T gray) { r += gray; g += gray; b += gray; return *this; } tRGB_A& operator -= (const tRGB_A& c) { r -= c.r; g -= c.g; b -= c.b; return *this; } tRGB_A& operator -= (const T gray) { r -= gray; g -= gray; b -= gray; return *this; } tRGB_A& operator *= (const tRGB_A& c) { r *= c.r; g *= c.g; b *= c.b; return *this; } tRGB_A& operator *= (const float factor) { r *= factor; g *= factor; b *= factor; return *this; } tRGB_A& operator /= (const tRGB_A& c) { r /= c.r; g /= c.g; b /= c.b; return *this; } tRGB_A& operator /= (const float factor) { r /= factor; g /= factor; b /= factor; return *this; } //! Unary minus inline tRGB_A operator - () const { return tRGB_A(-r, -g, -b, a); }; //! Addition of a scalar (analog to -=) inline tRGB_A operator + (const T& c) const { return tRGB_A(r+c, g+c, b+c, a); }; //! Subtraction of a scalar (analog to +=) inline tRGB_A operator - (const T& c) const { return tRGB_A(r-c, g-c, b-c, a); }; //! Multiplication of a scalar (analog to *=) inline tRGB_A operator * (const T& c) const { return tRGB_A(r*c, g*c, b*c, a); }; //! Division by a scalar (analog to /=) inline tRGB_A operator / (const T& c) const { return tRGB_A(r/c, g/c, b/c, a); }; }; template inline tRGB_A operator+ (const tRGB_A& c1, const tRGB_A& c2) { tRGB_A result(c1); return (result += c2); } template inline tRGB_A operator- (const tRGB_A& c1, const tRGB_A& c2) { tRGB_A result(c1); return (result -= c2); } template inline tRGB_A operator* (const tRGB_A& c1, const tRGB_A& c2) { // tRGB_A result(c1.r * c2.r, c1.g * c2.g, c1.b * c2.b, c1.a * c2.a); tRGB_A result(c1); result *= c2; return result; } template inline tRGB_A operator* (const tRGB_A& c, T factor) { // tRGB_A result(c.r * factor, c.g * factor, c.b * factor, c.a); tRGB_A result(c); return (result *= factor); } template inline tRGB_A operator* (T factor, const tRGB_A& c) { // tRGB_A result(c.r * factor, c.g * factor, c.b * factor, c.a); tRGB_A result(c); return (result *= factor); } template inline tRGB_A operator / (const tRGB_A& c1, const tRGB_A& c2) { tRGB_A result(c1.r / c2.r, c1.g / c2.g, c1.b / c2.b); return result; } template inline tRGB_A operator / (const tRGB_A& c, T factor) { tRGB_A result(c.r / factor, c.g / factor, c.b / factor); return result; } template inline bool operator < (const tRGB_A& c1, const tRGB_A& c2) { T gray1 = c1.grayValue(); T gray2 = c2.grayValue(); return (gray1 < gray2); } template inline tRGB_A operator ! (const tRGB_A& c) { tRGB_A result = tRGB_A::White; return (result -= c); } template inline std::ostream& operator << (std::ostream& os, const tRGB_A& c) { os << "(" << c.r << " " << c.g << " " << c.b << " " << c.a << ")"; return os; } template <> inline std::ostream& operator << (std::ostream& os, const tRGB_A& c) { os << "(" << (int)c.r << " " << (int)c.g << " " << (int)c.b << " " << (int)c.a << ")"; return os; } // Inverse of operator<< template inline std::istream& operator>> (std::istream& is, tRGB_A& arg) { char c = ' '; is >> c; if (is.eof()) return is; if (c != '(') throw std::runtime_error("tRGB_A should start with an opening ("); std::stringstream values; int v = 0; while ((is >> c) && (c != ')')) { if (c == ' ') { v++; if (v >= 4) throw std::runtime_error("tRGB_A contains more than four elements"); values << " "; } else if (c != ' ') values << c; } if (c != ')') { throw std::runtime_error("tRGB_A should end with a )"); } if ( v < 3 ) { throw std::runtime_error("tRGB_A has not enough color values"); } values >> arg.r >> arg.g >> arg.b >> arg.a; return is; } typedef tRGB_A bRGB_A; typedef tRGB_A fRGB_A; typedef tRGB_A dRGB_A; } /* Close Namespace "gravis" */ #endif relion-3.1.3/src/jaz/gravis/tVarMatrix.h000066400000000000000000001362301411340063500201300ustar00rootroot00000000000000#ifndef __LIBGRAVIS_T_VAR_MATRIX_H__ #define __LIBGRAVIS_T_VAR_MATRIX_H__ /****************************************************************************** ** Title: matrix.h ** Description: Templated variable size dense matrices, with a blas/lapack ** connector. ** ** Author: Brian Amberg, 2007 ** Computer Science Department, University Basel (CH) ** ******************************************************************************/ #include #include #include #include #include #include #include #include #include #include "tMatrix.h" #include "Exception.h" #include "t2Matrix.h" #include "t3Matrix.h" #include "t4Matrix.h" namespace gravis { //////////////////////////////////////////////////////////////////// // The Matrix classes. We distinguish between vectors and matrices // and allow for views of full matrices. No spacing magic is used, these are // simple dense matrices in column-first order template class tVarVector; template class tVectorView; template class tConstVectorView; template class tVarMatrix; template class tMatrixView; template class tConstMatrixView; // // IMPLEMENTATION // namespace matrix { /** * Set all to zero **/ template inline static void clear(VectorOrMatrix& v) { if (v.size()>0) memset( &v[0], 0, sizeof(v[0])*v.size()); } /** * Fill with equal elements **/ template inline static void fill(VectorOrMatrix& v, const typename VectorOrMatrix::scalar& value) { for(size_t i=0; i static inline void clamp(VectorOrMatrix& v, const typename VectorOrMatrix::scalar& min, const typename VectorOrMatrix::scalar& max) { for (size_t i=0; i inline static T* alloc_arr(const std::string& title, const size_t h, const size_t w=1) { T* r = new T[h*w]; return r; } template inline static void free_arr(const T* p) { delete [] p; } template inline static void copy_arr(T* t, const T* s, const size_t sz) { if (sz>0) memcpy(t, s, sz*sizeof(T)); } /////////////////////// HELPER ////////////////////// /** * Clamp a value **/ template static inline void clamp(T& v, const T& min, const T& max) { if (v inline T sqr(const T& v) { return v*v; } } } /** * A thin c++ matrix wrapper around a slice of memory * * These matrix classes allow easy access of the blas/lapack functions. * * They are relatively rough, as they try to be as simple as possible. In my * view it is not good to make c++ behave like matlab, as the only advantage * of c++ over matlab is more control. These classes give maximum control. **/ template class tVectorView { public: typedef T scalar; size_t h; T* const data; tVectorView(T* data, size_t h) : h(h), data(data) { } /** * Create a view of the other matrix **/ tVectorView(tVectorView &o) : h(o.h), data(o.data) {} tVectorView(tVarVector &o) : h(o.h), data(o.data) {} template tVectorView(tMatrix &m) : h(mh), data(&m[0]) {} /** * Copy another vector into this vector. **/ tVectorView& operator=(const tConstVectorView &o) { GRAVIS_CHECK(o.h==h, "Incompatible size"); //for (size_t i=0; i &o) { GRAVIS_CHECK(o.h==h, "Incompatible size"); //for (size_t i=0; i &o) { GRAVIS_CHECK(o.h==h, "Incompatible size"); //for (size_t i=0; i class tConstVectorView { public: typedef T scalar; size_t h; const T* const data; tConstVectorView(const T* data, size_t h) : h(h), data(data) {} tConstVectorView(const tConstVectorView& o) : h(o.h), data(o.data) {} tConstVectorView(const tVectorView &o) : h(o.h), data(o.data) {} tConstVectorView(const tVarVector &o) : h(o.h), data(o.data) {} template tConstVectorView(const tMatrix &m) : h(mh), data(&m[0]) {} inline const T& operator[](size_t i) const { checkAccess1( i, h ); return data[i]; } inline const T& operator()(size_t i) const { checkAccess1( i, h ); return data[i]; } inline const T& clampedAccess(int i) const { matrix::priv::clamp(i, 0, int(h)-1); return operator()(i); } inline size_t size() const { return h; } }; /** * A matrix with memory allocated on the heap. * * The semantic of operations on this vector is different from the vector * views. Assigning something to this vector is a copy operation, while for the * views it is just a pointer assignment. * * They are relatively rough, as they try to be as simple as possible. In my * view it is not good to make c++ behave like matlab, as the only advantage * of c++ over matlab is more control. These classes give maximum control. **/ template class tVarVector { public: typedef T scalar; size_t h; T* data; std::string title; tVarVector(size_t h, const std::string& title="UNNAMED:VECTOR") : h(h), data(matrix::priv::alloc_arr(title, h)), title(title) {} tVarVector(const std::string& title="UNNAMED:VECTOR") : h(0), data(matrix::priv::alloc_arr(title, h)), title(title) {} ~tVarVector() { matrix::priv::free_arr(data); }; /** * Copy another vector into this vector **/ tVarVector(const tConstVectorView &o, const std::string& title="UNNAMED:VECTOR") : h(o.h), data(matrix::priv::alloc_arr(title, h)), title(title) { //for (size_t i=0; i &o, const std::string& title="UNNAMED:VECTOR") : h(o.h), data(matrix::priv::alloc_arr(title, h)), title(title) { //for (size_t i=0; i(title, h)), title(title) { //for (size_t i=0; i tVarVector(const tMatrix &o, const std::string& title="UNNAMED:VECTOR") : h(mh), data(matrix::priv::alloc_arr(title, h)), title(title) { //for (size_t i=0; i &o) { resize(o.h); //for (size_t i=0; i &o) { resize(o.h); //for (size_t i=0; i &o) { resize(o.h); //for (size_t i=0; ih) return; if (h > this->h) { T* new_data = matrix::priv::alloc_arr(title, h); std::swap(data, new_data); matrix::priv::free_arr(new_data); } this->h = h; } inline T& operator[](size_t i) { checkAccess1( i, h ); return data[i]; }; inline const T& operator[](size_t i) const { checkAccess1( i, h ); return data[i]; }; inline T& operator()(size_t i) { checkAccess1( i, h ); return data[i]; }; inline const T& operator()(size_t i) const { checkAccess1( i, h ); return data[i]; }; inline T& clampedAccess(int i) { matrix::priv::clamp(i, 0, int(h)-1); return operator()(i); } inline const T& clampedAccess(int i) const { matrix::priv::clamp(i, 0, int(h)-1); return operator()(i); } inline size_t size() const { return h; } /** * Convenience function to clear a matrix **/ inline void clear() { gravis::matrix::clear(*this); } /** * Convenience functions to fill a matrix **/ inline void fill(const T& e) { gravis::matrix::fill(*this, e); } /** * Convenience functions to clamp all elements of a matrix **/ inline void clamp(const T& min, const T& max) { gravis::matrix::clamp(*this, min, max); } }; /** * A thin c++ matrix wrapper around a slice of memory **/ template class tMatrixView { public: typedef T scalar; size_t h, w; T* const data; tMatrixView(T* data, size_t h, size_t w) : h(h), w(w), data(data) {} tMatrixView(tMatrixView &o) : h(o.h), w(o.w), data(o.data) {} tMatrixView(tVarMatrix &o) : h(o.h), w(o.w), data(o.data) {} template tMatrixView(tMatrix &m) : h(mh), w(mw), data(&m[0]) {} /** * Copy another vector into this vector. **/ tMatrixView& operator=(const tConstMatrixView &o) { GRAVIS_CHECK(o.h==h && o.w==w, "Incompatible size"); //for (size_t i=0; i &o) { GRAVIS_CHECK(o.h==h && o.w==w, "Incompatible size"); //for (size_t i=0; i &o) { GRAVIS_CHECK(o.h==h && o.w==w, "Incompatible size"); //for (size_t i=0; i class tConstMatrixView { public: typedef T scalar; size_t h, w; const T* const data; tConstMatrixView(const T* data, size_t h, size_t w) : h(h), w(w), data(data) {} tConstMatrixView(const tConstMatrixView& o) : h(o.h), w(o.w), data(o.data) {} tConstMatrixView(const tMatrixView &o) : h(o.h), w(o.w), data(o.data) {} tConstMatrixView(const tVarMatrix &o) : h(o.h), w(o.w), data(o.data) {} template tConstMatrixView(const tMatrix &m) : h(mh), w(mw), data(&m[0]) {} inline const T& operator[](size_t i ) const { checkAccess1( i, h*w ); return data[i]; } inline const T& operator()(size_t i, size_t j) const { checkAccess2( i,j, h, w ); return data[i + j*h]; } inline const T& clampedAccess(int i, int j) const { matrix::priv::clamp(i, 0, int(h)-1); matrix::priv::clamp(j, 0, int(w)-1); return operator()(i,j); } inline size_t size() const { return h*w; } }; /** * A matrix with memory allocated on the heap **/ template class tVarMatrix { public: typedef T scalar; size_t h,w; T* data; std::string title; tVarMatrix(size_t h, size_t w, const std::string& title="UNNAMED:MATRIX") : h(h), w(w), data(matrix::priv::alloc_arr(title, h, w)), title(title) {} tVarMatrix(const std::string& title="UNNAMED:MATRIX") : h(0), w(0), data(matrix::priv::alloc_arr(title, h, w)), title(title) {} ~tVarMatrix() { matrix::priv::free_arr(data); }; /** * Copy another matrix into this matrix **/ tVarMatrix(const tConstMatrixView &o, const std::string& title="UNNAMED:MATRIX") : h(o.h), w(o.w), data(matrix::priv::alloc_arr(title, o.h, o.w)), title(title) { matrix::priv::copy_arr(data, o.data, size()); } /** * Copy another matrix into this matrix **/ tVarMatrix(const tMatrixView &o, const std::string& title="UNNAMED:MATRIX") : h(o.h), w(o.w), data(matrix::priv::alloc_arr(title, o.h, o.w)), title(title) { matrix::priv::copy_arr(data, o.data, size()); } /** * Copy another matrix into this matrix **/ tVarMatrix(const tVarMatrix &o, const std::string& title="UNNAMED:MATRIX") : h(o.h), w(o.w), data(matrix::priv::alloc_arr(title, o.h, o.w)), title(title) { matrix::priv::copy_arr(data, o.data, size()); } /** * Copy another matrix into this matrix **/ template tVarMatrix(const tMatrix &o, const std::string& title="UNNAMED:MATRIX") : h(mh), w(mw), data(matrix::priv::alloc_arr(title, o.h, o.w)), title(title) { matrix::priv::copy_arr(data, o.data, size()); } /** * Copy another matrix into this matrix **/ tVarMatrix(const t2Matrix &o, const std::string& title="UNNAMED:MATRIX") : h(2), w(2), data(matrix::priv::alloc_arr(title, h, w)), title(title) { matrix::priv::copy_arr(data, o.m, size()); } /** * Copy another matrix into this matrix **/ tVarMatrix(const t3Matrix &o, const std::string& title="UNNAMED:MATRIX") : h(3), w(3), data(matrix::priv::alloc_arr(title, h, w)), title(title) { matrix::priv::copy_arr(data, o.m, size()); } /** * Copy another matrix into this matrix **/ tVarMatrix(const t4Matrix &o, const std::string& title="UNNAMED:MATRIX") : h(4), w(4), data(matrix::priv::alloc_arr(title, h, w)), title(title) { matrix::priv::copy_arr(data, o.m, size()); } /** * Copy another matrix into this matrix. * Will loose old data reference, beware. **/ tVarMatrix& operator=(const tConstMatrixView &o) { resize(o.h,o.w); //for (size_t i=0; i &o) { resize(o.h,o.w); //for (size_t i=0; i &o) { resize(o.h,o.w); //for (size_t i=0; ih && w == this->w) return; if (h*w>size()) { T* new_data = matrix::priv::alloc_arr(title, h, w); std::swap(data, new_data); matrix::priv::free_arr(new_data); } this->h = h; this->w = w; } inline T& operator[](size_t i) { checkAccess1( i, h*w ); return data[i]; }; inline const T& operator[](size_t i) const { checkAccess1( i, h*w ); return data[i]; }; inline T& operator()(size_t i, size_t j) { checkAccess2( i, j, h, w ); return data[i+j*h]; }; inline const T& operator()(size_t i, size_t j) const { checkAccess2( i, j, h, w ); return data[i+j*h]; }; inline T& clampedAccess(int i, int j) { matrix::priv::clamp(i, 0, int(h)-1); matrix::priv::clamp(j, 0, int(w)-1); return operator()(i,j); } inline const T& clampedAccess(int i, int j) const { matrix::priv::clamp(i, 0, int(h)-1); matrix::priv::clamp(j, 0, int(w)-1); return operator()(i,j); } inline size_t size() const { return h*w; } /** * Convenience function to clear a matrix **/ inline void clear() { gravis::matrix::clear(*this); } /** * Convenience functions to fill a matrix **/ inline void fill(const T& e) { gravis::matrix::fill(*this, e); } /** * Convenience functions to clamp all elements of a matrix **/ inline void clamp(const T& min, const T& max) { gravis::matrix::clamp(*this, min, max); } }; /** * Matrix and vector operations **/ namespace matrix { template inline static void display( const tConstVectorView &v) { std::cout << "Vector: " << v.h << std::endl; for (size_t i=0; i inline static void display( const tVarVector &v) { display( tConstVectorView(v) ); } template inline static void display( const tVectorView &v) { display( tConstVectorView(v) ); } template inline static void display( const tConstMatrixView &v) { std::cout << "Matrix: " << v.h << "x" << v.w << std::endl; for (size_t i=0; i inline static void display( const tVarMatrix &v) { display( tConstMatrixView(v) ); } template inline static void display( const tMatrixView &v) { display( tConstMatrixView(v) ); } /** * Find the largest element **/ template inline static typename VectorOrMatrix::scalar max(const VectorOrMatrix& v) { size_t mi = 0; for (size_t i=1; i v[mi]) mi = i; return v[mi]; } /** * Find the smallest element **/ template inline static typename VectorOrMatrix::scalar min(const VectorOrMatrix& v) { size_t mi = 0; for (size_t i=1; i inline static void sub(tVectorView &v, const T& s) { for(size_t i=0; i inline static void sub(tVarVector &v, const T& s) { for(size_t i=0; i inline static void sub(tMatrixView &v, const T& s) { for(size_t i=0; i inline static void sub(tVarMatrix &v, const T& s) { for(size_t i=0; i inline static void sub(tMatrix &v, const T& s) { for(size_t i=0; i inline static void add(tVectorView &v, const T& s) { for(size_t i=0; i inline static void add(tVarVector &v, const T& s) { for(size_t i=0; i inline static void add(tMatrixView &v, const T& s) { for(size_t i=0; i inline static void add(tVarMatrix &v, const T& s) { for(size_t i=0; i inline static void add(tMatrix &v, const T& s) { for(size_t i=0; i inline static void mult(tVectorView &v, const T& s) { for(size_t i=0; i inline static void mult(tVarVector &v, const T& s) { for(size_t i=0; i inline static void mult(tMatrixView &v, const T& s) { for(size_t i=0; i inline static void mult(tVarMatrix &v, const T& s) { for(size_t i=0; i inline static void mult(tMatrix &v, const T& s) { for(size_t i=0; i inline static void div(tVectorView &v, const T& s) { for(size_t i=0; i inline static void div(tVarVector &v, const T& s) { for(size_t i=0; i inline static void div(tMatrixView &v, const T& s) { for(size_t i=0; i inline static void div(tVarMatrix &v, const T& s) { for(size_t i=0; i inline static void div(tMatrix &v, const T& s) { for(size_t i=0; i inline static void negate(tVectorView &v) { for(size_t i=0; i inline static void negate(tVarVector &v) { for(size_t i=0; i inline static void negate(tMatrixView &v) { for(size_t i=0; i inline static void negate(tVarMatrix &v) { for(size_t i=0; i inline static void add(tVectorView &v, const tConstVectorView& v2) { GRAVIS_CHECK(v.size() != v2.size, "Size must be equal for addition"); for(size_t i=0; i inline static void add(tVarVector &v, const tConstVectorView& v2) { GRAVIS_CHECK(v.size() != v2.size, "Size must be equal for addition"); for(size_t i=0; i inline static void add(tMatrixView &v, const tConstMatrixView& v2) { GRAVIS_CHECK(v.size() != v2.size, "Size must be equal for addition"); for(size_t i=0; i inline static void add(tVarMatrix &v, const tConstMatrixView& v2) { GRAVIS_CHECK(v.size() != v2.size, "Size must be equal for addition"); for(size_t i=0; i inline static void add(tMatrix &v, const tMatrix& v2) { for(size_t i=0; i inline static void sub(tVectorView &v, const tConstVectorView& v2) { GRAVIS_CHECK(v.size() != v2.size, "Size must be equal for addition"); for(size_t i=0; i inline static void sub(tVarVector &v, const tConstVectorView& v2) { GRAVIS_CHECK(v.size() != v2.size, "Size must be equal for addition"); for(size_t i=0; i inline static void sub(tMatrixView &v, const tConstMatrixView& v2) { GRAVIS_CHECK(v.size() != v2.size, "Size must be equal for addition"); for(size_t i=0; i inline static void sub(tVarMatrix &v, const tConstMatrixView& v2) { GRAVIS_CHECK(v.size() != v2.size(), "Size must be equal for addition"); for(size_t i=0; i inline static void sub(tMatrix &v, const tMatrix& v2) { for(size_t i=0; i inline static void elmul(tVectorView &v, const tConstVectorView& v2) { GRAVIS_CHECK(v.size() != v2.size(), "Size must be equal for addition"); for(size_t i=0; i inline static void elmul(tVarVector &v, const tConstVectorView& v2) { GRAVIS_CHECK(v.size() != v2.size(), "Size must be equal for addition"); for(size_t i=0; i inline static void elmul(tMatrixView &v, const tConstMatrixView& v2) { GRAVIS_CHECK(v.size() != v2.size(), "Size must be equal for addition"); for(size_t i=0; i inline static void elmul(tVarMatrix &v, const tConstMatrixView& v2) { GRAVIS_CHECK(v.size() != v2.size(), "Size must be equal for addition"); for(size_t i=0; i inline static void elmul(tMatrix &v, const tMatrix& v2) { for(size_t i=0; i inline static void eldiv(tVectorView &v, const tConstVectorView& v2) { GRAVIS_CHECK(v.size() == v2.size(), "Size must be equal for addition"); for(size_t i=0; i inline static void eldiv(tVarVector &v, const tConstVectorView& v2) { GRAVIS_CHECK(v.size() == v2.size(), "Size must be equal for addition"); for(size_t i=0; i inline static void eldiv(tMatrixView &v, const tConstMatrixView& v2) { GRAVIS_CHECK(v.size() == v2.size(), "Size must be equal for addition"); for(size_t i=0; i inline static void eldiv(tVarMatrix &v, const tConstMatrixView& v2) { GRAVIS_CHECK(v.size() == v2.size(), "Size must be equal for addition"); for(size_t i=0; i inline static void eldiv(tVarMatrix &v, const tVarMatrix& v2) { GRAVIS_CHECK(v.size() == v2.size(), "Size must be equal for addition"); for(size_t i=0; i inline static void eldiv(tMatrix &v, const tMatrix& v2) { for(size_t i=0; i inline static void cmpLarger( OutMatrix& Mout, const InMatrix& M, const typename InMatrix::scalar& t) { GRAVIS_CHECK(Mout.h == M.h && Mout.w == M.w, "Incompatible sizes"); const int S=Mout.size(); for (int i=0; it ? '\xFF' : '\x00'; } /** * Per element smaller than test for scalars **/ template inline static void cmpSmaller( OutMatrix& Mout, const InMatrix& M, const typename InMatrix::scalar& t) { GRAVIS_CHECK(Mout.h == M.h && Mout.w == M.w, "Incompatible sizes"); int i; const int S=Mout.size(); for (int i=0; i inline static void cmpEqual( OutMatrix& Mout, const InMatrix& M, const typename InMatrix::scalar& t) { GRAVIS_CHECK(Mout.h == M.h && Mout.w == M.w, "Incompatible sizes"); int i; const int S=Mout.size(); for (int i=0; i inline static void inset( OutMatrix& Out, const InMatrix& In, const size_t row, const size_t col=0) { if ((In.h == 0) || (In.w == 0) || (col>=Out.w) || (row>=Out.h)) return; size_t h=std::min(In.h, Out.h-row); size_t w=std::min(In.w, Out.w-col); for (size_t j=0; j inline static void inset( tVarVector &Out, const tConstVectorView &In, const size_t row) { if ((In.h == 0) || (row>=Out.h)) return; size_t h=std::min(In.h, Out.h-row); memcpy( &Out(row, 0), &In(0, 0), sizeof(In[0])*h ); } /** * Inset one matrix into another **/ template inline static void inset( tVarVector &Out, const tVarVector &In, const size_t row) { if ((In.h == 0) || (row>=Out.h)) return; size_t h=std::min(In.h, Out.h-row); memcpy( &Out[row], &In[0], sizeof(In[0])*h ); } /** * Matrix Convolution * * TODO: Do not use checked access in the main region of the image, use it * only on the borders **/ template inline static void conv2( OutMatrix& Iout, const InMatrixImg& I, const InMatrixMask& F ) { GRAVIS_CHECK( Iout.w == I.w && Iout.h == I.h, "Matrix sizes are not compatible" ); Iout.clear(); const int ox(F.w/2); const int oy(F.h/2); const int W=I.w; int j; #ifdef _OPENMP #pragma omp parallel for default(none) private(j) shared(I,Iout,F) #endif for (j=0; j inline static void erode( Matrix& m) { const tVarMatrix M(m); // Make a copy int j; const int W=M.w; #ifdef _OPENMP #pragma omp parallel for default(none) private(j) shared(m) #endif for (j=1; j inline static void mult_elementwise(OutMatrix& m, const InMatrix1& m1, const InMatrix2& m2) { GRAVIS_CHECK( m.size() == m1.size(), "Matrix sizes incompatible"); GRAVIS_CHECK( m.size() == m2.size(), "Matrix sizes incompatible"); const size_t s = m.size(); for (size_t i=0; i inline static T sum(tConstMatrixView &v) { if (v.size()==0) return T(0); T r=v[0]; for(size_t i=1; i inline static T sum(tConstVectorView &v) { if (v.size()==0) return T(0); T r=v[0]; for(size_t i=1; i inline static T sum(tMatrixView &v) { if (v.size()==0) return T(0); T r=v[0]; for(size_t i=1; i inline static T sum(tVectorView &v) { if (v.size()==0) return T(0); T r=v[0]; for(size_t i=1; i inline static T sum(tVarMatrix &v) { if (v.size()==0) return T(0); T r=v[0]; for(size_t i=1; i inline static T sum(tVarVector &v) { if (v.size()==0) return T(0); T r=v[0]; for(size_t i=1; i static void load(tVarMatrix &M, const std::string& fn) { char mmid0[33] = "GRAVIS_VAR_MATRIX "; char mmid1[33] = "GRAVIS_VAR_MATRIX "; std::ifstream stream(fn.c_str(), std::ifstream::binary); uint8_t uint32_size; uint8_t T_size; uint32_t h,w; uint16_t endianness; stream.read(mmid1, 32); stream.read((char*)&endianness, 2); stream.read((char*)&uint32_size, 1); stream.read((char*)&T_size, 1); stream.read((char*)&h, sizeof(h)); stream.read((char*)&w, sizeof(w)); GRAVIS_CHECK( 0==strncmp( mmid0, mmid1, 31 ),"Not a gravis var matrix file" ); GRAVIS_CHECK( endianness == 0x0001, "Wrong endianness"); GRAVIS_CHECK( uint32_size == 4, "Wrong size_t size"); GRAVIS_CHECK( T_size == sizeof(T), "Wrong type in matrix file"); M.resize(h,w); stream.read((char*)M.data, sizeof(T)*M.size()); } template static void save(const std::string& fn, const tConstMatrixView &v) { char mmid[33] = "GRAVIS_VAR_MATRIX "; std::ofstream stream(fn.c_str(), std::ofstream::binary); uint8_t uint32_size = sizeof(uint32_t); uint8_t T_size = sizeof(T); uint32_t h = v.h, w = v.w; uint16_t endianness = 0x0001; stream.write(mmid, 32); stream.write((char*)&endianness, 2); stream.write((char*)&uint32_size, 1); stream.write((char*)&T_size, 1); stream.write((char*)&h, sizeof(h)); stream.write((char*)&w, sizeof(w)); stream.write((char*)v.data, sizeof(T)*v.size()); } template static void load(tVarVector &v, const std::string& fn) { char mmid0[33] = "GRAVIS_VAR_VECTOR "; char mmid1[33] = "GRAVIS_VAR_VECTOR "; std::ifstream stream(fn.c_str(), std::ifstream::binary); uint8_t uint32_size; uint8_t T_size; uint32_t k; uint16_t endianness; stream.read(mmid1, 32); stream.read((char*)&endianness, 2); stream.read((char*)&uint32_size, 1); stream.read((char*)&T_size, 1); stream.read((char*)&k, sizeof(k)); GRAVIS_CHECK( 0 == strncmp( mmid0, mmid1, 31 ), "Not a gravis var vector file" ); GRAVIS_CHECK( endianness == 0x0001, "Wrong endianness"); GRAVIS_CHECK( uint32_size == 4, "Wrong uint32 size"); GRAVIS_CHECK( T_size == sizeof(T), "Wrong type in model file"); v.resize(k); stream.read((char*)v.data, sizeof(T)*v.size()); } template static void save(const std::string& fn, const tConstVectorView &v) { char mmid[33] = "GRAVIS_VAR_VECTOR "; std::ofstream stream(fn.c_str(), std::ofstream::binary); uint8_t uint32_size = sizeof(uint32_t); uint8_t T_size = sizeof(T); uint16_t endianness = 0x0001; uint32_t k = v.size(); stream.write(mmid, 32); stream.write((char*)&endianness, 2); stream.write((char*)&uint32_size, 1); stream.write((char*)&T_size, 1); stream.write((char*)&k, sizeof(k)); stream.write((char*)v.data, sizeof(T)*v.size()); } template static inline void clamp(tMatrixView &v, const T& min, const T& max) { for (size_t i=0; i static inline void clamp(tVarMatrix &v, const T& min, const T& max) { for (size_t i=0; i static inline void clamp(tVectorView &v, const T& min, const T& max) { for (size_t i=0; i static inline void clamp(tVarVector &v, const T& min, const T& max) { for (size_t i=0; i inline std::istream& operator>> (std::istream& is, tVectorView& arg) { size_t h = arg.h; std::string t; is >> t; if (t != "[") GRAVIS_THROW3(gravis::Exception, "Unexpected token. A vector should start with [", t); for (size_t j=0; j> arg[j]; is >> t; if (t != "]") GRAVIS_THROW3(gravis::Exception, "Unexpected token. A vector should end with ]", t); return is; } /** * Read Variable size matrices from a stream **/ template inline std::istream& operator>> (std::istream& is, tVarVector& arg) { std::string t; std::vector v; is >> t; if (t != "[") GRAVIS_THROW3(gravis::Exception, "Unexpected token. A vector should start with [", t); while (is) { is >> t; if (t == "]") break; std::stringstream st(t); T tt; st >> tt; v.push_back(tt); } arg.resize(v.size()); size_t h = arg.h; for (size_t j=0; j inline std::ostream& operator<< (std::ostream& os, const tConstVectorView& arg) { size_t h = arg.h; os << "["; for (size_t j=0; j inline std::ostream& operator<< (std::ostream& os, const tConstMatrixView& arg) { size_t h = arg.h; size_t w = arg.w; if ((h>1) && (w>1)) { os << "Matrix: " << h << "x" << w << std::endl; for (size_t i=0; i1) { os << "["; for (size_t j=0; j inline std::ostream& operator<< (std::ostream& os, const tVarVector& arg) { tConstVectorView mv(arg); os << mv; return os; } /** * Write Variable size matrices to a stream **/ template inline std::ostream& operator<< (std::ostream& os, const tVarMatrix& arg) { tConstMatrixView mv(arg); os << mv; return os; } } #include "tVarMatrix_blas.h" #endif relion-3.1.3/src/jaz/gravis/tVarMatrix_blas.h000066400000000000000000000007261411340063500211310ustar00rootroot00000000000000#ifndef __LIBGRAVIS_T_VAR_MATRIX_BLAS_H__ #define __LIBGRAVIS_T_VAR_MATRIX_BLAS_H__ #include "tVarMatrix.h" namespace gravis { namespace matrix { // Single and double implementations #define __GRAVIS__MATRIX__BLAS__DATATYPE__SINGLE__ #include "tVarMatrix_blas.hxx" #undef __GRAVIS__MATRIX__BLAS__DATATYPE__SINGLE__ #define __GRAVIS__MATRIX__BLAS__DATATYPE__DOUBLE__ #include "tVarMatrix_blas.hxx" #undef __GRAVIS__MATRIX__BLAS__DATATYPE__DOUBLE__ } } #endif relion-3.1.3/src/jaz/gravis/tVarMatrix_blas.hxx000066400000000000000000000534501411340063500215130ustar00rootroot00000000000000/** * Included multiple times from matrix_blas.hpp for different combinations of float, double varmatrix and matrixview * * Never include directly **/ #ifdef __GRAVIS__MATRIX__BLAS__DATATYPE__SINGLE__ #define __GMBD_REAL float #define __GMBD_xGEMV sgemv_ #define __GMBD_xNRM2 snrm2_ #define __GMBD_xSCAL sscal_ #define __GMBD_xAXPY saxpy_ #define __GMBD_xGESVD sgesvd_ #define __GMBD_xGESDD sgesdd_ #define __GMBD_xDOT sdot_ #else #ifdef __GRAVIS__MATRIX__BLAS__DATATYPE__DOUBLE__ #define __GMBD_REAL double #define __GMBD_xGEMV dgemv_ #define __GMBD_xNRM2 dnrm2_ #define __GMBD_xSCAL dscal_ #define __GMBD_xAXPY daxpy_ #define __GMBD_xGESVD dgesvd_ #define __GMBD_xGESDD dgesdd_ #define __GMBD_xDOT ddot_ #else #error( "Never include directly, this is included only from within matrix_blas.hpp" ) #endif #endif namespace reference { //#include "tVarMatrix_blas_reference.h" } // Blas Header extern "C" { void __GMBD_xGEMV(const char* const trans, const size_t& m, const size_t& n, const __GMBD_REAL& alpha, const __GMBD_REAL* const M, const size_t& m1, const __GMBD_REAL* const x, const size_t& xs, const __GMBD_REAL& beta, __GMBD_REAL* const v, const size_t& vs); __GMBD_REAL __GMBD_xNRM2(const size_t& n, const __GMBD_REAL* const x, const size_t& inc); void __GMBD_xSCAL(const size_t& n, const __GMBD_REAL& alpha, __GMBD_REAL* const x, const size_t& inc); void __GMBD_xAXPY(const size_t& n, const __GMBD_REAL& alpha, const __GMBD_REAL* const x, const size_t& incx, __GMBD_REAL* const y, const size_t& incy); __GMBD_REAL __GMBD_xDOT(const size_t& n, const __GMBD_REAL* dx, const size_t& incx, const __GMBD_REAL* dy, const size_t& incy); } /// Lapack Header extern "C" { void __GMBD_xGESVD(const char& jobu, const char& jobvt, const int& m, const int& n, __GMBD_REAL* a, const int& lda, __GMBD_REAL* s, __GMBD_REAL* u, const int& ldu, __GMBD_REAL* vt, const int& ldvt, __GMBD_REAL* work, const int& lwork, int& info ); void __GMBD_xGESDD(const char& jobz, const int& m, const int& n, __GMBD_REAL* a, const int& lda, __GMBD_REAL* s, __GMBD_REAL* u, const int& ldu, __GMBD_REAL* vt, const int& ldvt, __GMBD_REAL* work, const int& lwork, int* iwork, int& info ); } /** * Inplace SVD for small matrices. * Replaces the input matrix A with its left eigenvectors U **/ inline static void svd_inplace_u(tMatrixView<__GMBD_REAL> &IN_A_OUT_U, tVectorView<__GMBD_REAL> &S, tMatrixView<__GMBD_REAL> &VT) { int info; tVarVector<__GMBD_REAL> work(1); __GMBD_xGESVD('O', 'A', IN_A_OUT_U.h, IN_A_OUT_U.w, IN_A_OUT_U.data, IN_A_OUT_U.h, S.data, NULL, IN_A_OUT_U.h, VT.data, VT.h, work.data, -1, info); if (info < 0) GRAVIS_THROW3(gravis::Exception, "The i'th argument had an invalid value.", StringFormat(info)); if (info > 0) GRAVIS_THROW3(gravis::Exception, "SBDSQR did not converge to zero.", StringFormat(info)); work.resize(int(work[0])); __GMBD_xGESVD('O', 'A', IN_A_OUT_U.h, IN_A_OUT_U.w, IN_A_OUT_U.data, IN_A_OUT_U.h, S.data, NULL, IN_A_OUT_U.h, VT.data, VT.h, work.data, work.h, info); } /** * Inplace SVD for small matrices * Replaces the input matrix A with its left eigenvectors U **/ inline static void svd_inplace_u(tVarMatrix<__GMBD_REAL> &IN_A_OUT_U, tVarVector<__GMBD_REAL> &S, tVarMatrix<__GMBD_REAL> &VT) { int info; tVarVector<__GMBD_REAL> work(1); __GMBD_xGESVD('O', 'A', IN_A_OUT_U.h, IN_A_OUT_U.w, IN_A_OUT_U.data, IN_A_OUT_U.h, S.data, NULL, IN_A_OUT_U.h, VT.data, VT.h, work.data, -1, info); if (info < 0) GRAVIS_THROW3(gravis::Exception, "The i'th argument had an invalid value.", StringFormat(info)); if (info > 0) GRAVIS_THROW3(gravis::Exception, "SBDSQR did not converge to zero.", StringFormat(info)); work.resize(int(work[0])); __GMBD_xGESVD('O', 'A', IN_A_OUT_U.h, IN_A_OUT_U.w, IN_A_OUT_U.data, IN_A_OUT_U.h, S.data, NULL, IN_A_OUT_U.h, VT.data, VT.h, work.data, work.h, info); } /** * Inplace SVD for large matrices using a divide and conquer algorithm * Replaces the input matrix A with its left eigenvectors U **/ inline static void svd_inplace_u_dc(tMatrixView<__GMBD_REAL> &IN_A_OUT_U, tVectorView<__GMBD_REAL> &S, tMatrixView<__GMBD_REAL> &VT) { int info; tVarVector<__GMBD_REAL> work(1); tVarVector iwork(8*std::min(IN_A_OUT_U.h, IN_A_OUT_U.w)); __GMBD_xGESDD('O', IN_A_OUT_U.h, IN_A_OUT_U.w, IN_A_OUT_U.data, IN_A_OUT_U.h, S.data, NULL, IN_A_OUT_U.h, VT.data, VT.h, work.data, -1, iwork.data, info); if (info < 0) GRAVIS_THROW3(gravis::Exception, "The i'th argument had an invalid value.", StringFormat(info)); if (info > 0) GRAVIS_THROW3(gravis::Exception, "SBDSQR did not converge to zero.", StringFormat(info)); work.resize(int(work[0])); __GMBD_xGESDD('O', IN_A_OUT_U.h, IN_A_OUT_U.w, IN_A_OUT_U.data, IN_A_OUT_U.h, S.data, NULL, IN_A_OUT_U.h, VT.data, VT.h, work.data, work.h, iwork.data, info); } /** * Inplace SVD for large matrices using a divide and conquer algorithm * Replaces the input matrix A with its left eigenvectors U **/ inline static void svd_inplace_u_dc(tVarMatrix<__GMBD_REAL> &IN_A_OUT_U, tVarVector<__GMBD_REAL> &S, tVarMatrix<__GMBD_REAL> &VT) { int info; tVarVector<__GMBD_REAL> work(1); tVarVector iwork(8*std::min(IN_A_OUT_U.h, IN_A_OUT_U.w)); __GMBD_xGESDD('O', IN_A_OUT_U.h, IN_A_OUT_U.w, IN_A_OUT_U.data, IN_A_OUT_U.h, S.data, NULL, IN_A_OUT_U.h, VT.data, VT.h, work.data, -1, iwork.data, info); if (info < 0) GRAVIS_THROW3(gravis::Exception, "The i'th argument had an invalid value.", StringFormat(info)); if (info > 0) GRAVIS_THROW3(gravis::Exception, "SBDSQR did not converge to zero.", StringFormat(info)); work.resize(int(work[0])); __GMBD_xGESDD('O', IN_A_OUT_U.h, IN_A_OUT_U.w, IN_A_OUT_U.data, IN_A_OUT_U.h, S.data, NULL, IN_A_OUT_U.h, VT.data, VT.h, work.data, work.h, iwork.data, info); } /** * SVD for small matrices **/ inline static void svd(tMatrixView<__GMBD_REAL> &U, tVectorView<__GMBD_REAL> &S, tMatrixView<__GMBD_REAL> &VT, const tConstMatrixView<__GMBD_REAL> &A) { int info; tVarMatrix<__GMBD_REAL> _A(A); tVarVector<__GMBD_REAL> work(1); __GMBD_xGESVD('A', 'A', A.h, A.w, _A.data, A.h, S.data, U.data, U.h, VT.data, VT.h, work.data, -1, info); if (info < 0) GRAVIS_THROW3(gravis::Exception, "The i'th argument had an invalid value.", StringFormat(info)); if (info > 0) GRAVIS_THROW3(gravis::Exception, "SBDSQR did not converge to zero.", StringFormat(info)); work.resize(int(work[0])); __GMBD_xGESVD('A', 'A', A.h, A.w, _A.data, A.h, S.data, U.data, U.h, VT.data, VT.h, work.data, work.h, info); } /** * SVD for small matrices **/ inline static void svd(tVarMatrix<__GMBD_REAL> &U, tVarVector<__GMBD_REAL> &S, tVarMatrix<__GMBD_REAL> &VT, const tConstMatrixView<__GMBD_REAL> &A) { int info; tVarMatrix<__GMBD_REAL> _A(A); tVarVector<__GMBD_REAL> work(1); __GMBD_xGESVD('A', 'A', A.h, A.w, _A.data, A.h, S.data, U.data, U.h, VT.data, VT.h, work.data, -1, info); if (info < 0) GRAVIS_THROW3(gravis::Exception, "The i'th argument had an invalid value.", StringFormat(info)); if (info > 0) GRAVIS_THROW3(gravis::Exception, "SBDSQR did not converge to zero.", StringFormat(info)); work.resize(int(work[0])); __GMBD_xGESVD('A', 'A', A.h, A.w, _A.data, A.h, S.data, U.data, U.h, VT.data, VT.h, work.data, work.h, info); } /** * SVD for large matrices using a divide and conquer algorithm **/ inline static void svd_dc(tMatrixView<__GMBD_REAL> &U, tVectorView<__GMBD_REAL> &S, tMatrixView<__GMBD_REAL> &VT, const tConstMatrixView<__GMBD_REAL> &A) { int info; tVarMatrix<__GMBD_REAL> _A(A); tVarVector<__GMBD_REAL> work(1); tVarVector iwork(8*std::min(A.h, A.w)); __GMBD_xGESDD('A', A.h, A.w, _A.data, A.h, S.data, U.data, U.h, VT.data, VT.h, work.data, -1, iwork.data, info); if (info < 0) GRAVIS_THROW3(gravis::Exception, "The i'th argument had an invalid value.", StringFormat(info)); if (info > 0) GRAVIS_THROW3(gravis::Exception, "SBDSQR did not converge to zero.", StringFormat(info)); work.resize(int(work[0])); __GMBD_xGESDD('A', A.h, A.w, _A.data, A.h, S.data, U.data, U.h, VT.data, VT.h, work.data, work.h, iwork.data, info); } /** * SVD for large matrices using a divide and conquer algorithm **/ inline static void svd_dc(tVarMatrix<__GMBD_REAL> &U, tVarVector<__GMBD_REAL> &S, tVarMatrix<__GMBD_REAL> &VT, const tConstMatrixView<__GMBD_REAL> &A) { int info; tVarMatrix<__GMBD_REAL> _A(A); tVarVector<__GMBD_REAL> work(1); tVarVector iwork(8*std::min(A.h, A.w)); __GMBD_xGESDD('A', A.h, A.w, _A.data, A.h, S.data, U.data, U.h, VT.data, VT.h, work.data, -1, iwork.data, info); if (info < 0) GRAVIS_THROW3(gravis::Exception, "The i'th argument had an invalid value.", StringFormat(info)); if (info > 0) GRAVIS_THROW3(gravis::Exception, "SBDSQR did not converge to zero.", StringFormat(info)); work.resize(int(work[0])); __GMBD_xGESDD('A', A.h, A.w, _A.data, A.h, S.data, U.data, U.h, VT.data, VT.h, work.data, work.h, iwork.data, info); } //// Multiplications // Not Transposed /** * v = alpha*M*x + beta*v **/ inline static void addmult(tVectorView<__GMBD_REAL> &v, const __GMBD_REAL& alpha, const tConstMatrixView<__GMBD_REAL> &M, const tConstVectorView<__GMBD_REAL> &x, const __GMBD_REAL& beta) { GRAVIS_CHECK( v.size() == M.h, "v and M are incompatible"); GRAVIS_CHECK( x.size() == M.w, "M and x are incompatible"); if (M.h > 0) __GMBD_xGEMV("N", M.h, M.w, alpha, M.data, M.h, x.data, 1, beta, v.data, 1); } /** * v = alpha*M*x + beta*v * Will not resize v, as this would not make sense **/ inline static void addmult(tVarVector<__GMBD_REAL> &v, const __GMBD_REAL& alpha, const tConstMatrixView<__GMBD_REAL> &M, const tConstVectorView<__GMBD_REAL> &x, const __GMBD_REAL& beta) { tVectorView<__GMBD_REAL> vv(v); addmult(vv, alpha, M, x, beta); } /** * v = v+M*x **/ inline static void addmult(tVectorView<__GMBD_REAL> &v, const tConstMatrixView<__GMBD_REAL> &M, const tConstVectorView<__GMBD_REAL> &x) { addmult(v, __GMBD_REAL(1), M, x, __GMBD_REAL(1)); } /** * v = v+M*x * Will not resize v, as this would not make sense **/ inline static void addmult(tVarVector<__GMBD_REAL> &v, const tConstMatrixView<__GMBD_REAL> &M, const tConstVectorView<__GMBD_REAL> &x) { addmult(v, __GMBD_REAL(1), M, x, __GMBD_REAL(1)); } /** * v = a+M*x **/ inline static void addmult(tVectorView<__GMBD_REAL> &v, const tConstVectorView<__GMBD_REAL> &a, const tConstMatrixView<__GMBD_REAL> &M, const tConstVectorView<__GMBD_REAL> &x) { v = a; addmult(v, M, x); } /** * v = a+M*x **/ inline static void addmult(tVarVector<__GMBD_REAL> &v, const tConstVectorView<__GMBD_REAL> &a, const tConstMatrixView<__GMBD_REAL> &M, const tConstVectorView<__GMBD_REAL> &x) { v = a; addmult(v, M, x); } /** * v = alpha*M*x **/ inline static void mult(tVectorView<__GMBD_REAL> &v, const __GMBD_REAL& alpha, const tConstMatrixView<__GMBD_REAL> &M, const tConstVectorView<__GMBD_REAL> &x) { ::gravis::matrix::clear(v); addmult(v, alpha, M, x, 1); } /** * v = alpha*M*x * Will not resize v, as this would not make sense **/ inline static void mult(tVarVector<__GMBD_REAL> &v, const __GMBD_REAL& alpha, const tConstMatrixView<__GMBD_REAL> &M, const tConstVectorView<__GMBD_REAL> &x) { ::gravis::matrix::clear(v); addmult(v, alpha, M, x, 1); } /** * v = M*x **/ inline static void mult(tVectorView<__GMBD_REAL> &v, const tConstMatrixView<__GMBD_REAL> &M, const tConstVectorView<__GMBD_REAL> &x) { mult(v, 1, M, x); } /** * v = M*x **/ inline static void mult(tVarVector<__GMBD_REAL> &v, const tConstMatrixView<__GMBD_REAL> &M, const tConstVectorView<__GMBD_REAL> &x) { mult(v, 1, M, x); } // TRANSPOSED VERSIONS /** * v = (alpha*x^T M)^T + beta*v **/ inline static void addmult(tVectorView<__GMBD_REAL> &v, const __GMBD_REAL& alpha, const tConstVectorView<__GMBD_REAL> &x, const tConstMatrixView<__GMBD_REAL> &M, const __GMBD_REAL& beta) { GRAVIS_CHECK( v.size() == M.w, "v and M are incompatible"); GRAVIS_CHECK( x.size() == M.h, "M and x are incompatible"); if (M.h > 0) __GMBD_xGEMV("T", M.h, M.w, alpha, M.data, M.h, x.data, 1, beta, v.data, 1); } /** * v = (alpha*x^T M)^T + beta*v **/ inline static void addmult(tVarVector<__GMBD_REAL> &v, const __GMBD_REAL& alpha, const tConstVectorView<__GMBD_REAL> &x, const tConstMatrixView<__GMBD_REAL> &M, const __GMBD_REAL& beta) { tVectorView<__GMBD_REAL> vv(v); addmult(vv, alpha, x, M, beta); } /** * v = v+M*x **/ inline static void addmult(tVectorView<__GMBD_REAL> &v, const tConstVectorView<__GMBD_REAL> &x, const tConstMatrixView<__GMBD_REAL> &M) { addmult(v, __GMBD_REAL(1), x, M, __GMBD_REAL(1)); } /** * v = v+M*x * Will not resize v, as this would not make sense **/ inline static void addmult(tVarVector<__GMBD_REAL> &v, const tConstVectorView<__GMBD_REAL> &x, const tConstMatrixView<__GMBD_REAL> &M) { addmult(v, __GMBD_REAL(1), x, M, __GMBD_REAL(1)); } /** * v = a+(x^T*M)^T **/ inline static void addmult(tVectorView<__GMBD_REAL> &v, const tConstVectorView<__GMBD_REAL> &a, const tConstVectorView<__GMBD_REAL> &x, const tConstMatrixView<__GMBD_REAL> &M) { v = a; addmult(v, x, M); } /** * v = a+(x^T*M)^T **/ inline static void addmult(tVarVector<__GMBD_REAL> &v, const tConstVectorView<__GMBD_REAL> &a, const tConstVectorView<__GMBD_REAL> &x, const tConstMatrixView<__GMBD_REAL> &M) { v = a; addmult(v, x, M); } /** * v = alpha*x^T*M **/ inline static void mult(tVectorView<__GMBD_REAL> &v, const __GMBD_REAL& alpha, const tConstVectorView<__GMBD_REAL> &x, const tConstMatrixView<__GMBD_REAL> &M) { ::gravis::matrix::clear(v); addmult(v, alpha, x, M, 1); } /** * v = alpha*x^T*M * Will not resize v, as this would not make sense **/ inline static void mult(tVarVector<__GMBD_REAL> &v, const __GMBD_REAL& alpha, const tConstVectorView<__GMBD_REAL> &x, const tConstMatrixView<__GMBD_REAL> &M) { ::gravis::matrix::clear(v); addmult(v, alpha, x, M, 1); } /** * v = x^T*M **/ inline static void mult(tVectorView<__GMBD_REAL> &v, const tConstVectorView<__GMBD_REAL> &x, const tConstMatrixView<__GMBD_REAL> &M) { mult(v, 1, x, M); } /** * v = x^T*M **/ inline static void mult(tVarVector<__GMBD_REAL> &v, const tConstVectorView<__GMBD_REAL> &x, const tConstMatrixView<__GMBD_REAL> &M) { mult(v, 1, x, M); } static inline __GMBD_REAL abs(const __GMBD_REAL& a) { return a< __GMBD_REAL(0) ? -a : a; } //// Norms /** l1 norm **/ inline static __GMBD_REAL normL1(const tConstVectorView<__GMBD_REAL> &v) { if (v.size()==0) return 0; __GMBD_REAL result = abs(v[0]); for (size_t i=1; i &v) { if (v.size()==0) return 0; __GMBD_REAL result = abs(v[0]); for (size_t i=1; i &v) { return v.size()==0 ? 0 : __GMBD_xNRM2(v.size(), v.data, 1); } /** Frobenius norm **/ inline static __GMBD_REAL normL2(const tConstMatrixView<__GMBD_REAL> &v) { return v.size()==0 ? 0 : __GMBD_xNRM2(v.size(), v.data, 1); } /** Squared l2 norm **/ inline static __GMBD_REAL normL2sqr(const tConstVectorView<__GMBD_REAL> &v) { return ::gravis::matrix::priv::sqr(normL2(v)); } /** Squared Frobenius norm **/ inline static __GMBD_REAL normL2sqr(const tConstMatrixView<__GMBD_REAL> &v) { return ::gravis::matrix::priv::sqr(normL2(v)); } /** linf norm **/ inline static __GMBD_REAL normLinf(const tConstVectorView<__GMBD_REAL> &v) { if (v.size()==0) return 0; __GMBD_REAL result = abs(v[0]); for (size_t i=1; i &v) { if (v.size()==0) return 0; __GMBD_REAL result = abs(v[0]); for (size_t i=1; i &v, const __GMBD_REAL& s, const tConstVectorView<__GMBD_REAL> &u) { __GMBD_xAXPY(v.size(), s, u.data, 1, v.data, 1); } /** v += u **/ inline static void add(tVarVector<__GMBD_REAL> &v, const __GMBD_REAL& s, const tConstVectorView<__GMBD_REAL> &u) { __GMBD_xAXPY(v.size(), s, u.data, 1, v.data, 1); } /** V += U **/ inline static void add(tMatrixView<__GMBD_REAL> &V, const __GMBD_REAL& s, const tConstMatrixView<__GMBD_REAL> &U) { __GMBD_xAXPY(V.size(), s, U.data, 1, V.data, 1); } /** V += U **/ inline static void add(tVarMatrix<__GMBD_REAL> &V, const __GMBD_REAL& s, const tConstMatrixView<__GMBD_REAL> &U) { __GMBD_xAXPY(V.size(), s, U.data, 1, V.data, 1); } /** v += u **/ inline static void add(tVectorView<__GMBD_REAL> &v, const tConstVectorView<__GMBD_REAL> &u) { add(v, 1, u); } /** v += u **/ inline static void add(tVarVector<__GMBD_REAL> &v, const tConstVectorView<__GMBD_REAL> &u) { add(v, 1, u); } /** V += U **/ inline static void add(tMatrixView<__GMBD_REAL> &V, const tConstMatrixView<__GMBD_REAL> &U) { add(V, 1, U); } /** V += U **/ inline static void add(tVarMatrix<__GMBD_REAL> &V, const tConstMatrixView<__GMBD_REAL> &U) { add(V, 1, U); } /** r = v + u **/ inline static void add(tVectorView<__GMBD_REAL> &r, const tConstVectorView<__GMBD_REAL> &v, const tConstVectorView<__GMBD_REAL> &u) { r = v; add(r,u); } /** r = v + u **/ inline static void add(tVarVector<__GMBD_REAL> &r, const tConstVectorView<__GMBD_REAL> &v, const tConstVectorView<__GMBD_REAL> &u) { r = v; add(r,u); } /** R = V + U **/ inline static void add(tMatrixView<__GMBD_REAL> &R, const tConstMatrixView<__GMBD_REAL> &V, const tConstMatrixView<__GMBD_REAL> &U) { R = V; add(R,U); } /** R = V + U **/ inline static void add(tVarMatrix<__GMBD_REAL> &R, const tConstMatrixView<__GMBD_REAL> &V, const tConstMatrixView<__GMBD_REAL> &U) { R = V; add(R,U); } /** v -= u **/ inline static void sub(tVectorView<__GMBD_REAL> &v, const tConstVectorView<__GMBD_REAL> &u) { add(v, -1, u); } /** v -= u **/ inline static void sub(tVarVector<__GMBD_REAL> &v, const tConstVectorView<__GMBD_REAL> &u) { add(v, -1, u); } /** V -= U **/ inline static void sub(tMatrixView<__GMBD_REAL> &V, const tConstMatrixView<__GMBD_REAL> &U) { add(V, -1, U); } /** V -= U **/ inline static void sub(tVarMatrix<__GMBD_REAL> &V, const tConstMatrixView<__GMBD_REAL> &U) { add(V, -1, U); } /** v -= su **/ inline static void sub(tVectorView<__GMBD_REAL> &v, const __GMBD_REAL& s, const tConstVectorView<__GMBD_REAL> &u) { add(v, -s, u); } /** v -= su **/ inline static void sub(tVarVector<__GMBD_REAL> &v, const __GMBD_REAL& s, const tConstVectorView<__GMBD_REAL> &u) { add(v, -s, u); } /** V -= sU **/ inline static void sub(tMatrixView<__GMBD_REAL> &V, const __GMBD_REAL& s, const tConstMatrixView<__GMBD_REAL> &U) { add(V, -s, U); } /** V -= sU **/ inline static void sub(tVarMatrix<__GMBD_REAL> &V, const __GMBD_REAL& s, const tConstMatrixView<__GMBD_REAL> &U) { add(V, -s, U); } //// Matrix Scalar Operations /** Arithmethic operations with scalars *= **/ inline static void mult(tVectorView<__GMBD_REAL> &o, const tConstVectorView<__GMBD_REAL> &v, const __GMBD_REAL& scalar) { o=v; __GMBD_xSCAL(v.size(), scalar, o.data, 1); } /** Arithmethic operations with scalars *= **/ inline static void mult(tVarVector<__GMBD_REAL> &o, const tConstVectorView<__GMBD_REAL> &v, const __GMBD_REAL& scalar) { o=v; __GMBD_xSCAL(v.size(), scalar, o.data, 1); } /** Arithmethic operations with scalars *= **/ inline static void mult(tMatrixView<__GMBD_REAL> &o, const tConstMatrixView<__GMBD_REAL> &v, const __GMBD_REAL& scalar) { o=v; __GMBD_xSCAL(v.size(), scalar, o.data, 1); } /** Arithmethic operations with scalars *= **/ inline static void mult(tVarMatrix<__GMBD_REAL> &o, const tConstMatrixView<__GMBD_REAL> &v, const __GMBD_REAL& scalar) { o=v; __GMBD_xSCAL(v.size(), scalar, o.data, 1); } /** Arithmethic operations with scalars = * **/ inline static void mult(tVectorView<__GMBD_REAL> &v, const __GMBD_REAL& scalar) { __GMBD_xSCAL(v.size(), scalar, v.data, 1); } /** Arithmethic operations with scalars = * **/ inline static void mult(tVarVector<__GMBD_REAL> &v, const __GMBD_REAL& scalar) { __GMBD_xSCAL(v.size(), scalar, v.data, 1); } /** Arithmethic operations with scalars = * **/ inline static void mult(tMatrixView<__GMBD_REAL> &v, const __GMBD_REAL& scalar) { __GMBD_xSCAL(v.size(), scalar, v.data, 1); } /** Arithmethic operations with scalars = * **/ inline static void mult(tVarMatrix<__GMBD_REAL> &v, const __GMBD_REAL& scalar) { __GMBD_xSCAL(v.size(), scalar, v.data, 1); } /** Dotproduct * **/ inline static __GMBD_REAL dot(const tVarVector<__GMBD_REAL> &u, const tVarVector<__GMBD_REAL> &v) { return __GMBD_xDOT(u.size(),u.data,1,v.data,1); } inline static __GMBD_REAL dot(const tVectorView<__GMBD_REAL> &u, const tVectorView<__GMBD_REAL> &v) { return __GMBD_xDOT(u.size(),u.data,1,v.data,1); } inline static void pinv(tVarMatrix<__GMBD_REAL> &A) { tVarMatrix<__GMBD_REAL> U(A.h,A.h); tVarVector<__GMBD_REAL> s(std::min(A.w,A.h)); tVarMatrix<__GMBD_REAL> VT(A.w,A.w); svd_dc(U, s, VT, A); for (unsigned int i = 0; i < s.h; ++i) { if(s[i] != 0) s[i] = 1 / s[i]; } //GEMM(VT,A) // UT = diag(s) * UT for (unsigned int i = 0; i < U.w; ++i) { for (unsigned int j = 0; j < U.h; ++j) { U(j,i) = (i < s.h) ? s(i) * U(j,i) : 0; } } // A = V * UT //tVarMatrix<__GMBD_REAL> X(VT.w, U.h); A.resize(A.w,A.h); for (unsigned int i = 0; i < A.h; ++i) { for (unsigned int j = 0; j < A.w; ++j) { A(i,j) = 0; for (unsigned int k = 0; k < std::min(VT.h,U.h); ++k) { A(i,j) += VT(k,i) * U(j,k); } } } //A = X; } #undef __GMBD_REAL #undef __GMBD_xGEMV #undef __GMBD_xNRM2 #undef __GMBD_xSCAL #undef __GMBD_xAXPY #undef __GMBD_xDOT #undef __GMBD_xGESVD #undef __GMBD_xGESDD relion-3.1.3/src/jaz/gravis/tYCbCr.h000066400000000000000000000145121411340063500171530ustar00rootroot00000000000000#ifndef __LIBGRAVIS_T_YCB_CR_H__ #define __LIBGRAVIS_T_YCB_CR_H__ /****************************************************************************** ** Title: tYCbCr.h ** Description: Represents an CIE Y/Cb/Cr color tupel. ** ******************************************************************************/ #include namespace gravis { template class tYCbCr { /*! * Private helper functions, wrapped into an additional struct in case that we want to use the names **/ struct priv { static inline const T& min(const T& a, const T& b) { return ab ? a : b; } }; public: typedef T scalar_type; T y, cb, cr; tYCbCr () : y(T(0)), cb(T(0)), cr(T(0)) { } tYCbCr (T y, T cb, T cr) : y(y), cb(cb), cr(cr) { } // tYCbCr (T gray) : (gray), g(gray), b(gray) { } void set (T _y, T _cb, T _cr) { y = _y; cb = _cb; cr = _cr; } // void add (T _r, T _g, T _b) { // r += _r; g += _g; b += _b; // } T intensity () const { return y(); } /* bool operator != (const tYCbCr& c) const { return r != c.r || g != c.g || b != c.b; } bool operator == (const tYCbCr& c) const { return r == c.r && g == c.g && b == c.b; } */ tYCbCr& operator += (const tYCbCr& c) { y += c.y; cb += c.cb; cr += c.cr; return *this; } /* tYCbCr& operator += (const T gray) { r += gray; g += gray; b += gray; return *this; } */ tYCbCr& operator -= (const tYCbCr& c) { y -= c.y; cb -= c.cb; cr -= c.cr; return *this; } // tYCbCr& operator -= (const T gray) { // r -= gray; g -= gray; b -= gray; // return *this; // } tYCbCr& operator *= (const tYCbCr& c) { y *= c.y; cb *= c.cb; cr *= c.cr; return *this; } tYCbCr& operator *= (const T factor) { y *= factor; cb *= factor; cr *= factor; return *this; } /* tYCbCr& operator /= (const tYCbCr& c) { r /= c.r; g /= c.g; b /= c.b; return *this; } tYCbCr& operator /= (const T factor) { r /= factor; g /= factor; b /= factor; return *this; } * \brief All color components are clamped to [0,1]. This function works inplace. * * \return self tYCbCr& clamp() { r = std::min(std::max(r, 0), 1); g = std::min(std::max(g, 0), 1); b = std::min(std::max(b, 0), 1); return *this; } //! Unary minus inline tYCbCr operator - () const { return tYCbCr(-r, -g, -b); }; //! Addition of a scalar (analog to -=) inline tYCbCr operator + (const T& c) const { return tYCbCr(r+c, g+c, b+c); }; //! Subtraction of a scalar (analog to +=) inline tYCbCr operator - (const T& c) const { return tYCbCr(r-c, g-c, b-c); }; */ //! Multiplication of a scalar (analog to *=) inline tYCbCr operator * (const T& c) const { return tYCbCr(y*c, cb*c, cr*c); }; /* //! Division by a scalar (analog to /=) inline tYCbCr operator / (const T& c) const { return tYCbCr(r/c, g/c, b/c); }; bool operator == (const tYCbCr& arg) { return ((arg.r == r) && (arg.g == g) && (arg.b == b)); } const T &operator [](const size_t &i) const { return (&r)[i]; } T &operator [](const size_t &i) { return (&r)[i]; } */ }; template inline tYCbCr operator + (const tYCbCr& c1, const tYCbCr& c2) { tYCbCr result = c1; return (result += c2); } template inline tYCbCr operator - (const tYCbCr& c1, const tYCbCr& c2) { tYCbCr result = c1; return (result -= c2); } /* template inline tYCbCr operator * (const tYCbCr& c1, const tYCbCr& c2) { tYCbCr result(c1.r * c2.r, c1.g * c2.g, c1.b * c2.b); return result; } */ template inline tYCbCr operator * (const tYCbCr& c, T factor) { tYCbCr result(c.y * factor, c.cb * factor, c.cr * factor); return result; } template inline tYCbCr operator * (T factor, const tYCbCr& c) { tYCbCr result(c.y * factor, c.cb * factor, c.cr * factor); return result; } /* template inline tYCbCr operator / (const tYCbCr& c1, const tYCbCr& c2) { tYCbCr result(c1.r / c2.r, c1.g / c2.g, c1.b / c2.b); return result; } template inline tYCbCr operator / (const tYCbCr& c, T factor) { tYCbCr result(c.r / factor, c.g / factor, c.b / factor); return result; } template inline bool operator < (const tYCbCr& c1, const tYCbCr& c2) { T gray1 = c1.grayValue(); T gray2 = c2.grayValue(); return (gray1 < gray2); } template inline tYCbCr operator ! (const tYCbCr& c) { tYCbCr result = tYCbCr::White(); return (result -= c); } // Absolute of every color channel template inline tYCbCr abs(const tYCbCr& c) { return tYCbCr(c.r < T(0) ? -c.r : c.r, c.g < T(0) ? -c.g : c.g, c.b < T(0) ? -c.b : c.b); } template inline std::ostream& operator << (std::ostream& os, const tYCbCr& c) { os << "(" << c.r << " " << c.g << " " << c.b << ")"; return os; } template <> inline std::ostream& operator << (std::ostream& os, const tYCbCr& c) { os << "(" << (int)c.r << " " << (int)c.g << " " << (int)c.b << ")"; return os; } template inline T dot (const tYCbCr& v1, const tYCbCr& v2) { return (v1.r*v2.r + v1.g*v2.g + v1.b*v2.b); } */ //typedef tYCbCr bRGB; typedef tYCbCr fYCbCr; typedef tYCbCr dYCbCr; } #endif relion-3.1.3/src/jaz/image_log.cpp000066400000000000000000000036341411340063500170130ustar00rootroot00000000000000/*************************************************************************** * * Author: "Jasenko Zivanov" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #include #include #include void ImageLog::write( Image &img, std::string fn, bool polar, Centering center, double originX, double originY, double originZ, double spacingX, double spacingY, double spacingZ) { if (polar) { Image argImg, absImg; FilterHelper::getPhase(img, argImg); FilterHelper::getAbs(img, absImg); write(argImg, fn+"_arg", center, originX, originY, originZ, spacingX, spacingY, spacingZ); write(absImg, fn+"_abs", center, originX, originY, originZ, spacingX, spacingY, spacingZ); } else { Image realImg, imagImg; FilterHelper::getReal(img, realImg); FilterHelper::getImag(img, imagImg); write(realImg, fn+"_re", center, originX, originY, originZ, spacingX, spacingY, spacingZ); write(imagImg, fn+"_im", center, originX, originY, originZ, spacingX, spacingY, spacingZ); } } relion-3.1.3/src/jaz/image_log.h000066400000000000000000000131241411340063500164530ustar00rootroot00000000000000/*************************************************************************** * * Author: "Jasenko Zivanov" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #ifndef IMAGE_LOG_H #define IMAGE_LOG_H #include #include #include enum Centering {NoCenter, CenterXY, CenterXYZ}; class ImageLog { public: template static void write( Image& img, std::string fn, Centering center = NoCenter, double originX = 0.0, double originY = 0.0, double originZ = 0.0, double spacingX = 1.0, double spacingY = 1.0, double spacingZ = 1.0); static void write( Image& img, std::string fn, bool polar, Centering center = NoCenter, double originX = 0.0, double originY = 0.0, double originZ = 0.0, double spacingX = 1.0, double spacingY = 1.0, double spacingZ = 1.0); template static void write( MultidimArray& mda, std::string fn, Centering center = NoCenter, double originX = 0.0, double originY = 0.0, double originZ = 0.0, double spacingX = 1.0, double spacingY = 1.0, double spacingZ = 1.0); template static void write( std::vector>& vec, std::string fn, Centering center = NoCenter, double originX = 0.0, double originY = 0.0, double originZ = 0.0, double spacingX = 1.0, double spacingY = 1.0, double spacingZ = 1.0); }; template void ImageLog::write( Image &img, std::string fn, Centering center, double originX, double originY, double originZ, double spacingX, double spacingY, double spacingZ) { const int w = img.data.xdim; const int h = img.data.ydim; const int d = img.data.zdim; switch (center) { case CenterXY: { Image img2(w,h,d); for (int z = 0; z < d; z++) for (int y = 0; y < h; y++) for (int x = 0; x < w; x++) { const int xx = (x+w/2)%w; const int yy = (y+h/2)%h; img2(z,y,x) = img(z,yy,xx); } write(img2, fn, NoCenter, originX, originY, originZ, spacingX, spacingY, spacingZ); return; } case CenterXYZ: { Image img2(w,h,d); for (int z = 0; z < d; z++) for (int y = 0; y < h; y++) for (int x = 0; x < w; x++) { const int xx = (x+w/2)%w; const int yy = (y+h/2)%h; const int zz = (z+d/2)%d; img2(z,y,x) = img(zz,yy,xx); } write(img2, fn, NoCenter, originX, originY, originZ, spacingX, spacingY, spacingZ); return; } } if (JazConfig::writeMrc) { img.write(fn+".mrc"); } if (JazConfig::writeVtk) { VtkHelper::writeVTK(img, fn+".vtk", originX, originY, originZ, spacingX, spacingY, spacingZ); } } template void ImageLog::write( MultidimArray& mda, std::string fn, Centering center, double originX, double originY, double originZ, double spacingX, double spacingY, double spacingZ) { Image img; img.data = mda; write(img, fn, center, originX, originY, originZ, spacingX, spacingY, spacingZ); } template void ImageLog::write( std::vector>& vec, std::string fn, Centering center, double originX, double originY, double originZ, double spacingX, double spacingY, double spacingZ) { if (vec.size() == 0) { std::cerr << "WARNING: nothing to write to " << fn << " - vector size is zero.\n"; return; } if (vec[0].data.zdim > 1 || vec[0].data.ndim > 1) { REPORT_ERROR("ImageLog::write: unable to write a vector of 3D images\n"); } if (center == CenterXYZ) { REPORT_ERROR("ImageLog::write: unable to XYZ-center a vector of 2D images\n"); } const int w = vec[0].data.xdim; const int h = vec[0].data.ydim; const int ic = vec.size(); Image img(w,h,ic); for (int i = 0; i < ic; i++) { if (vec[i].data.xdim != w || vec[i].data.ydim != h) { REPORT_ERROR("ImageLog::write: images in vector are of unequal size\n"); } for (int y = 0; y < h; y++) for (int x = 0; x < w; x++) { if (center == CenterXY) { const int xx = (x + w/2) % w; const int yy = (y + h/2) % h; img(i,y,x) = vec[i](yy,xx); } else // center == NoCenter { img(i,y,x) = vec[i](y,x); } } } write(img, fn, NoCenter, originX, originY, originZ, spacingX, spacingY, spacingZ); } #endif relion-3.1.3/src/jaz/img_proc/000077500000000000000000000000001411340063500161555ustar00rootroot00000000000000relion-3.1.3/src/jaz/img_proc/color_helper.cpp000066400000000000000000000104601411340063500213370ustar00rootroot00000000000000#include "color_helper.h" #include #include #ifdef HAVE_PNG #include #endif using namespace gravis; dRGB ColorHelper::signedToRedBlue(double d, double scale, double rbFract) { const double d_rb = d / (scale * rbFract); const double d_g = (std::abs(d)/scale - rbFract) / (1.0 - rbFract); return dRGB(std::min(1.0, std::max(0.0, d_rb)), std::min(1.0, std::max(0.0, d_g)), std::min(1.0, std::max(0.0, -d_rb))); } void ColorHelper::writeAngleToPNG(const Image &img, std::string filename) { writeSignedToPNG(img, filename+"_-pi+pi", PI); writeSignedToPNG(img, filename+"_-1+1", 1.0); } void ColorHelper::writeSignedToPNG(const Image &img, std::string filename, double scale) { #ifdef HAVE_PNG { tImage pngOut(img.data.xdim, img.data.ydim); pngOut.fill(dRGB(0.f)); for (int y = 0; y < img.data.ydim; y++) for (int x = 0; x < img.data.xdim; x++) { double c = img(y,x); pngOut(x,y) = signedToRedBlue(c, scale); } pngOut.writePNG(filename+".png"); } #endif } void ColorHelper::writeSignedToEPS(std::string filename, int col, const std::vector > &imgs, const std::vector &scales, const std::vector &labels) { // Check all images have the same size int xdim = imgs[0].data.xdim; int ydim = imgs[0].data.ydim; int nimgs= imgs.size(); for (int i = 1; i < imgs.size(); i++) { if (imgs[i].data.xdim != xdim || imgs[i].data.ydim != ydim) REPORT_ERROR(" ERROR: combining images with different sizes into one EPS..."); } std::ofstream outputFile; FileName fn_out = filename + ".eps"; outputFile.open(fn_out.c_str()); int delta = 15; int row = CEIL(nimgs/(RFLOAT)col); int width = col * xdim + (col-1)*delta; int height = row * (ydim + delta); // Rescale to maximum one A4: 595 x 842, or one letter: 612 x 792 RFLOAT width_ratio = width / 595.; RFLOAT height_ratio = height / 792.; RFLOAT max_ratio = XMIPP_MAX(width_ratio, height_ratio); RFLOAT rescale = 1.; if (max_ratio > 1.) { rescale = max_ratio; } // header outputFile << "%!PS-Adobe-2.0 EPSF-1.2" << "\n"; outputFile << "%%BoundingBox: 0 0 " << ROUND(width/rescale)<< " " << ROUND(height/rescale) << "\n"; outputFile << "%%Pages: 1" << "\n"; outputFile << "%%EndComments" << "\n"; outputFile << "/Times-Roman findfont\n"; outputFile << ROUND(10/rescale) << " scalefont\n"; outputFile << "setfont\n"; // First put all the labels (without scale argument!) int xcoord, ycoord, xpos, ypos; for (int i = 0; i < imgs.size(); i++) { xpos = i%col; ypos = (row - 1) - i/col; xcoord = xpos * ROUND((xdim+delta)/rescale); ycoord = ypos * ROUND((ydim+delta)/rescale) + ROUND(ydim/rescale); // Print the label outputFile << "newpath\n"; outputFile << (int)(xcoord) << " " << (int)(ycoord + ROUND(5/rescale)) << " moveto\n"; outputFile << "(" << labels[i] << ") show\n"; } // one scale statement only! outputFile << ROUND(xdim/rescale) << " " << ROUND(ydim/rescale) << " scale\n"; for (int i = 0; i < imgs.size(); i++) { xpos = i%col; ypos = (row - 1) - i/col; xcoord = xpos * (xdim + delta); ycoord = ypos * (ydim+delta) + ydim; // The actual image // Note that the number of elements in a string or array literal should be less than 64 K. // Otherwise, earlier versions of Ghostscript and Preview in MacOS fails. // Ref: https://stackoverflow.com/questions/7595532/postcript-maximum-array-size // http://paulbourke.net/dataformats/postscript/ outputFile << xdim << " " << ydim <<" 8 [" << xdim << " 0 0 -" << ydim << " -"< 6) { ii=0; outputFile << "\n"; } } if (ii!=0) outputFile << "\n"; outputFile << std::dec; outputFile << "\n"; } outputFile << "%%EOF\n"; outputFile.close(); } relion-3.1.3/src/jaz/img_proc/color_helper.h000066400000000000000000000011301411340063500207760ustar00rootroot00000000000000#ifndef COLOR_HELPER_H #define COLOR_HELPER_H #include #include class ColorHelper { public: static gravis::dRGB signedToRedBlue(double d, double scale = 1.0, double rbFract = 0.333); static void writeAngleToPNG(const Image& img, std::string filename); static void writeSignedToPNG(const Image& img, std::string filename, double scale = 1.0); static void writeSignedToEPS(std::string filename, int col, const std::vector > &imgs, const std::vector &scales, const std::vector &labels); }; #endif relion-3.1.3/src/jaz/img_proc/filter_helper.cpp000066400000000000000000002616171411340063500215220ustar00rootroot00000000000000/*************************************************************************** * * Author: "Jasenko Zivanov" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #include #include #include #include #include #include #include extern "C" { #include } using namespace gravis; void FilterHelper::separableGaussianFreq( const MultidimArray &src, MultidimArray &dest, double sigma, int k) { if (k < 0) { k = (int)(2*sigma + 0.5); } dest.reshape(src); std::vector kernel(2*k+1); const double s2 = sigma*sigma; for (int i = -k; i <= k; i++) { kernel[i+k] = exp(-0.5*i*i/s2); } MultidimArray temp(src.zdim, src.ydim, src.xdim); for (size_t z = 0; z < src.zdim; z++) for (size_t y = 0; y < src.ydim; y++) for (size_t x = 0; x < src.xdim; x++) { Complex v = 0; double m = 0; for (long int i = -k; i <= k; i++) { long xx = x + i; bool conj = false; if (xx < 0) { xx = -xx; conj = true; } else if (xx >= src.xdim) { xx = 2*src.xdim - 1 - xx; conj = true; } long yy = conj? (src.ydim - y) % src.ydim : y; long zz = conj? (src.zdim - z) % src.zdim : z; v += kernel[i+k] * DIRECT_NZYX_ELEM(src, 0, zz, yy, xx); m += kernel[i+k]; } DIRECT_NZYX_ELEM(dest, 0, z, y, x) = v/m; } for (size_t z = 0; z < src.zdim; z++) for (size_t y = 0; y < src.ydim; y++) for (size_t x = 0; x < src.xdim; x++) { Complex v = 0; double m = 0; for (long int i = -k; i <= k; i++) { const long int yy = (src.ydim + y + i) % src.ydim; v += kernel[i+k] * DIRECT_NZYX_ELEM(dest, 0, z, yy, x); m += kernel[i+k]; } DIRECT_NZYX_ELEM(temp, 0, z, y, x) = v/m; } for (size_t z = 0; z < src.zdim; z++) for (size_t y = 0; y < src.ydim; y++) for (size_t x = 0; x < src.xdim; x++) { Complex v = 0; double m = 0; for (long int i = -k; i <= k; i++) { const long int zz = (src.zdim + z + i) % src.zdim; v += kernel[i+k] * DIRECT_NZYX_ELEM(temp, 0, zz, y, x); m += kernel[i+k]; } DIRECT_NZYX_ELEM(dest, 0, z, y, x) = v/m; } } void FilterHelper::separableGaussianFreqXY( const MultidimArray &src, MultidimArray &dest, double sigma, int k) { if (k < 0) { k = (int)(2*sigma + 0.5); } dest.reshape(src); std::vector kernel(2*k+1); const double s2 = sigma*sigma; for (int i = -k; i <= k; i++) { kernel[i+k] = exp(-0.5*i*i/s2); } MultidimArray temp(src.zdim, src.ydim, src.xdim); for (size_t z = 0; z < src.zdim; z++) for (size_t y = 0; y < src.ydim; y++) for (size_t x = 0; x < src.xdim; x++) { Complex v = 0; double m = 0; for (long int i = -k; i <= k; i++) { long xx = x + i; bool conj = false; if (xx < 0) { xx = -xx; conj = true; } else if (xx >= src.xdim) { xx = 2*src.xdim - 2 - xx; conj = true; } long yy = conj? (src.ydim - y) % src.ydim : y; long zz = conj? (src.zdim - z) % src.zdim : z; Complex vv = DIRECT_NZYX_ELEM(src, 0, zz, yy, xx); if (conj) vv = vv.conj(); v += kernel[i+k] * vv; m += kernel[i+k]; } DIRECT_NZYX_ELEM(temp, 0, z, y, x) = v/m; } for (size_t z = 0; z < src.zdim; z++) for (size_t y = 0; y < src.ydim; y++) for (size_t x = 0; x < src.xdim; x++) { Complex v = 0; double m = 0; for (long int i = -k; i <= k; i++) { const long int yy = (src.ydim + y + i) % src.ydim; v += kernel[i+k] * DIRECT_NZYX_ELEM(temp, 0, z, yy, x); m += kernel[i+k]; } DIRECT_NZYX_ELEM(dest, 0, z, y, x) = v/m; } } void FilterHelper::drawTestPattern(Image& img, int squareSize) { for (long int z = 0; z < img.data.zdim; z++) for (long int y = 0; y < img.data.ydim; y++) for (long int x = 0; x < img.data.xdim; x++) { int xi = (int)(x/squareSize) % 2; int yi = (int)(y/squareSize) % 2; int zi = (int)(z/squareSize) % 2; int v = (xi + yi + zi) % 2; DIRECT_A3D_ELEM(img.data, z, y, x) = (RFLOAT) v; } } void FilterHelper::drawTestPattern(Volume& volume, int squareSize) { for (size_t z = 0; z < volume.dimz; z++) for (size_t y = 0; y < volume.dimy; y++) for (size_t x = 0; x < volume.dimx; x++) { int xi = (int)(x/squareSize) % 2; int yi = (int)(y/squareSize) % 2; int zi = (int)(z/squareSize) % 2; int v = (xi + yi + zi) % 2; volume(x,y,z) = (RFLOAT) v; } } Image FilterHelper::expImg(Image &img, double scale) { Image out = img; FOR_ALL_DIRECT_NZYX_ELEMENTS_IN_MULTIDIMARRAY(img.data) { DIRECT_NZYX_ELEM(out.data, l, k, i, j) = exp(scale*DIRECT_NZYX_ELEM(img.data, l, k, i, j)); } return out; } Image FilterHelper::logImg(Image &img, double thresh, double scale) { Image out = img; FOR_ALL_DIRECT_NZYX_ELEMENTS_IN_MULTIDIMARRAY(img.data) { double v = DIRECT_NZYX_ELEM(img.data, l, k, i, j); if (v < thresh) v = thresh; DIRECT_NZYX_ELEM(out.data, l, k, i, j) = log(scale*v); } return out; } Image FilterHelper::padCorner2D(Image& img, double factor) { const int w0 = img.data.xdim; const int h0 = img.data.ydim; const int w1 = factor * w0; const int h1 = factor * h0; Image out(w1,h1); for (int y = 0; y < h1; y++) for (int x = 0; x < w1; x++) { int x1 = x < w1/2? x : x - w1; int y1 = y < h1/2? y : y - h1; if (x1 < w0/2 && y1 < h0/2 && x1 >= -w0/2 && y1 >= -h0/2) { int x0 = x1 < 0? x1 + w0 : x1; int y0 = y1 < 0? y1 + h0 : y1; DIRECT_A2D_ELEM(out.data, y, x) = DIRECT_A2D_ELEM(img.data, y0, x0); } else { DIRECT_A2D_ELEM(out.data, y, x) = 0.0; } } return out; } Image FilterHelper::padCorner2D(Image& img, double factor) { const int w0 = img.data.xdim; const int h0 = img.data.ydim; const int w1 = factor * w0; const int h1 = factor * h0; Image out(w1,h1); for (int y = 0; y < h1; y++) for (int x = 0; x < w1; x++) { int x1 = x; int y1 = y < h1/2? y : y - h1; if (x1 < w0/2 && y1 < h0/2 && x1 >= -w0/2 && y1 >= -h0/2) { int x0 = x1; int y0 = y1 < 0? y1 + h0 : y1; DIRECT_A2D_ELEM(out.data, y, x) = DIRECT_A2D_ELEM(img.data, y0, x0); } else { DIRECT_A2D_ELEM(out.data, y, x) = 0.0; } } return out; } Image FilterHelper::padCorner2D(const Image& img, int w, int h) { const int w0 = img.data.xdim; const int h0 = img.data.ydim; Image out(w,h); for (int y = 0; y < h; y++) for (int x = 0; x < w; x++) { int x1 = x < w/2? x : x - w; int y1 = y < h/2? y : y - h; if (x1 < w0/2 && y1 < h0/2 && x1 >= -w0/2 && y1 >= -h0/2) { int x0 = x1 < 0? x1 + w0 : x1; int y0 = y1 < 0? y1 + h0 : y1; DIRECT_A2D_ELEM(out.data, y, x) = DIRECT_A2D_ELEM(img.data, y0, x0); } else { DIRECT_A2D_ELEM(out.data, y, x) = 0.0; } } return out; } Image FilterHelper::cropCorner2D(const Image& img, int w, int h) { const int w1 = img.data.xdim; const int h1 = img.data.ydim; if (w > w1 || h > h1) return img; Image out(w,h); for (int y = 0; y < h1; y++) for (int x = 0; x < w1; x++) { int x1 = x < w1/2? x : x - w1; int y1 = y < h1/2? y : y - h1; if (x1 < w/2 && y1 < h/2 && x1 >= -w/2 && y1 >= -h/2) { int x0 = x1 < 0? x1 + w : x1; int y0 = y1 < 0? y1 + h : y1; DIRECT_A2D_ELEM(out.data, y0, x0) = DIRECT_A2D_ELEM(img.data, y, x); } } return out; } Image FilterHelper::cropCorner2D(const Image& img, int w, int h) { const int w1 = img.data.xdim; const int h1 = img.data.ydim; Image out(w,h); for (int y = 0; y < h1; y++) for (int x = 0; x < w1; x++) { int x1 = x; int y1 = y < h1/2? y : y - h1; if (x1 < w && y1 < h/2 && y1 >= -h/2) { int x0 = x1; int y0 = y1 < 0? y1 + h : y1; DIRECT_A2D_ELEM(out.data, y0, x0) = DIRECT_A2D_ELEM(img.data, y, x); } } return out; } Image FilterHelper::zeroOutsideCorner2D(Image &img, double radius) { const int w = img.data.xdim; const int h = img.data.ydim; const double rad2 = radius*radius; Image out(w,h); for (int y = 0; y < h; y++) for (int x = 0; x < w; x++) { int xx = x < w/2? x : x - w; int yy = y < h/2? y : y - h; int r2 = xx*xx + yy*yy; if (r2 <= rad2) { DIRECT_A2D_ELEM(out.data, y, x) = DIRECT_A2D_ELEM(img.data, y, x); } else { DIRECT_A2D_ELEM(out.data, y, x) = 0.0; } } return out; } void FilterHelper::GaussianEnvelopeCorner2D(Image &img, double sigma) { const int w = img.data.xdim; const int h = img.data.ydim; const double s2 = sigma * sigma; for (int y = 0; y < h; y++) for (int x = 0; x < w; x++) { double xx = x < w/2? x : x - w; double yy = y < h/2? y : y - h; double r2 = xx*xx + yy*yy; DIRECT_A2D_ELEM(img.data, y, x) *= exp(-0.5*r2/s2); } } Image FilterHelper::raisedCosEnvCorner2D(Image &img, double radIn, double radOut) { const int w = img.data.xdim; const int h = img.data.ydim; Image out(w,h); for (int y = 0; y < h; y++) for (int x = 0; x < w; x++) { double xx = x < w/2? x : x - w; double yy = y < h/2? y : y - h; double r = sqrt(xx*xx + yy*yy); if (r < radIn) { DIRECT_A2D_ELEM(out.data, y, x) = DIRECT_A2D_ELEM(img.data, y, x); } else if (r < radOut) { double t = (r - radIn)/(radOut - radIn); double a = 0.5 * (1.0 + cos(PI * t)); DIRECT_A2D_ELEM(out.data, y, x) = a * DIRECT_A2D_ELEM(img.data, y, x); } else { DIRECT_A2D_ELEM(out.data, y, x) = 0.0; } } return out; } Image FilterHelper::raisedCosEnvCorner2DFull(Image &img, double radIn, double radOut) { const int w = img.data.xdim; const int h = img.data.ydim; Image out(w,h); for (int y = 0; y < h; y++) for (int x = 0; x < w; x++) { double xx = x < w/2? x : x - w; double yy = y < h/2? y : y - h; double r = sqrt(xx*xx + yy*yy); if (r < radIn) { DIRECT_A2D_ELEM(out.data, y, x) = DIRECT_A2D_ELEM(img.data, y, x); } else if (r < radOut) { double t = (r - radIn)/(radOut - radIn); double a = 0.5 * (1.0 + cos(PI * t)); DIRECT_A2D_ELEM(out.data, y, x) = a * DIRECT_A2D_ELEM(img.data, y, x); } else { DIRECT_A2D_ELEM(out.data, y, x) = 0.0; } } return out; } Image FilterHelper::raisedCosEnvCorner3D(Image &img, double radIn, double radOut) { const int w = img.data.xdim; const int h = img.data.ydim; const int d = img.data.zdim; Image out(w,h,d); for (int z = 0; z < d; z++) for (int y = 0; y < h; y++) for (int x = 0; x < w; x++) { double xx = x < w/2? x : x - w; double yy = y < h/2? y : y - h; double zz = z < d/2? z : z - d; double r = sqrt(xx*xx + yy*yy + zz*zz); if (r < radIn) { DIRECT_A3D_ELEM(out.data, z, y, x) = DIRECT_A3D_ELEM(img.data, z, y, x); } else if (r < radOut) { double t = (r - radIn)/(radOut - radIn); double a = 0.5 * (1.0 + cos(PI * t)); DIRECT_A3D_ELEM(out.data, z, y, x) = a * DIRECT_A3D_ELEM(img.data, z, y, x); } else { DIRECT_A3D_ELEM(out.data, z, y, x) = 0.0; } } return out; } Image FilterHelper::raisedCosEnvFreq2D(const Image& img, double radIn, double radOut) { const int w = img.data.xdim; const int h = img.data.ydim; Image out(w,h); for (int y = 0; y < h; y++) for (int x = 0; x < w; x++) { double xx = x; double yy = y <= h/2? y : y - h; double r = sqrt(xx*xx + yy*yy); if (r < radIn) { DIRECT_A2D_ELEM(out.data, y, x) = DIRECT_A2D_ELEM(img.data, y, x); } else if (r < radOut) { double t = (r - radIn)/(radOut - radIn); double a = 0.5 * (1.0 + cos(PI * t)); DIRECT_A2D_ELEM(out.data, y, x) = a * DIRECT_A2D_ELEM(img.data, y, x); } else { DIRECT_A2D_ELEM(out.data, y, x) = 0.0; } } return out; } Image FilterHelper::raisedCosEnvRingFreq2D( const Image &img, double rad0, double rad1, double stepWidth) { const int w = img.data.xdim; const int h = img.data.ydim; Image out(w,h); for (int y = 0; y < h; y++) for (int x = 0; x < w; x++) { double xx = x; double yy = y <= h/2? y : y - h; double r = sqrt(xx*xx + yy*yy); double r0 = rad0 > 0.0? r - rad0 : stepWidth/2; double r1 = rad1 - r; double re = 2.0 * XMIPP_MIN(r0, r1) / stepWidth; if (re > 1.0) { DIRECT_A2D_ELEM(out.data, y, x) = DIRECT_A2D_ELEM(img.data, y, x); } else if (re > -1.0) { double t = (re + 1.0)/2.0; double a = 0.5 * (1.0 - cos(PI * t)); DIRECT_A2D_ELEM(out.data, y, x) = a * DIRECT_A2D_ELEM(img.data, y, x); } else { DIRECT_A2D_ELEM(out.data, y, x) = 0.0; } } return out; } void FilterHelper::lowPassFilter(Image& img, double maxFreq0, double maxFreq1, Image& dest) { MultidimArray imgFreq; FourierTransformer ft; ft.FourierTransform(img(), imgFreq, false); lowPassFilterSpectrum(imgFreq, maxFreq0, maxFreq1); FourierTransformer ft2; ft2.inverseFourierTransform(imgFreq, dest()); } void FilterHelper::lowPassFilterSpectrum(MultidimArray& spectrum, double maxFreq0, double maxFreq1) { FOR_ALL_DIRECT_ELEMENTS_IN_ARRAY3D(spectrum) { double xi = j/(double)spectrum.xdim; double yi = 2.0*i/(double)spectrum.ydim; double zi = 2.0*k/(double)spectrum.zdim; if (yi > 1.0) yi = 2.0 - yi; if (zi > 1.0) zi = 2.0 - zi; double r = sqrt(xi*xi + yi*yi + zi*zi); if (r > maxFreq1) { DIRECT_A3D_ELEM(spectrum, k, i, j) = Complex(0.0, 0.0); } else if (r > maxFreq0) { const double t = (r - maxFreq0)/(maxFreq1 - maxFreq0); const double q = 0.5 * (cos(PI*t) + 1.0); DIRECT_A3D_ELEM(spectrum, k, i, j) *= q; } } } RFLOAT FilterHelper::averageValue(Image& img) { RFLOAT sum; FOR_ALL_DIRECT_NZYX_ELEMENTS_IN_MULTIDIMARRAY(img.data) { sum += DIRECT_NZYX_ELEM(img.data, l, k, i, j); } return sum / (double)(img.data.xdim * img.data.ydim * img.data.zdim * img.data.ndim); } RFLOAT FilterHelper::maxValue(Image &img) { RFLOAT vMax = -std::numeric_limits::max(); FOR_ALL_DIRECT_NZYX_ELEMENTS_IN_MULTIDIMARRAY(img.data) { RFLOAT v = DIRECT_NZYX_ELEM(img.data, l, k, i, j); if (v > vMax) vMax = v; } return vMax; } void FilterHelper::phaseFlip(Image& img, CTF& ctf, RFLOAT angpix, Image& dest) { MultidimArray imgFreq; FourierTransformer ft; ft.FourierTransform(img(), imgFreq, false); RFLOAT xs = (RFLOAT)img.data.xdim * angpix; RFLOAT ys = (RFLOAT)img.data.ydim * angpix; FOR_ALL_DIRECT_ELEMENTS_IN_ARRAY2D(imgFreq) { const int x = j; const int y = i < imgFreq.ydim/2? i : i - imgFreq.ydim; RFLOAT c = ctf.getCTF(x/xs, y/ys); if (c < 0) { DIRECT_A2D_ELEM(imgFreq, i, j) *= -1; } } if (dest.data.xdim != img.data.xdim || dest.data.ydim != img.data.ydim) { dest.data.resize(img.data); } FourierTransformer ft2; ft2.inverseFourierTransform(imgFreq, dest()); } void FilterHelper::applyBeamTilt(Image &img, RFLOAT beamtilt_x, RFLOAT beamtilt_y, RFLOAT lambda, RFLOAT Cs, RFLOAT angpix, int s, Image& dest) { MultidimArray imgFreq; FourierTransformer ft; ft.FourierTransform(img(), imgFreq, false); selfApplyBeamTilt(imgFreq, beamtilt_x, beamtilt_y, lambda, Cs, angpix, s); FourierTransformer ft2; ft2.inverseFourierTransform(imgFreq, dest()); } void FilterHelper::modulate(Image& img, CTF& ctf, RFLOAT angpix, Image& dest) { Image imgFreq; FourierTransformer ft; ft.FourierTransform(img(), imgFreq(), false); modulate(imgFreq, ctf, angpix, dest); } void FilterHelper::modulate(Image& imgFreq, CTF& ctf, RFLOAT angpix, Image& dest) { const int w = imgFreq.data.xdim; const int h = imgFreq.data.ydim; Image ctfImg(w,h); ctf.getFftwImage(ctfImg(), h, h, angpix); FOR_ALL_DIRECT_ELEMENTS_IN_ARRAY2D(imgFreq()) { DIRECT_A2D_ELEM(imgFreq(), i, j) *= DIRECT_A2D_ELEM(ctfImg(), i, j); } if (dest.data.xdim != 2*(imgFreq.data.xdim-1) || dest.data.ydim != imgFreq.data.ydim) { dest.data.resize(imgFreq.data.ydim, 2*(imgFreq.data.xdim-1)); } FourierTransformer ft2; ft2.inverseFourierTransform(imgFreq(), dest()); } void FilterHelper::modulate(MultidimArray& imgFreq, CTF& ctf, RFLOAT angpix) { const int w = imgFreq.xdim; const int h = imgFreq.ydim; Image ctfImg(w,h); ctf.getFftwImage(ctfImg(), h, h, angpix); FOR_ALL_DIRECT_ELEMENTS_IN_ARRAY2D(imgFreq) { DIRECT_A2D_ELEM(imgFreq, i, j) *= DIRECT_A2D_ELEM(ctfImg(), i, j); } } void FilterHelper::drawCtf(CTF &ctf, RFLOAT angpix, Image &dest) { const int w = dest.data.xdim; const int h = dest.data.ydim; Image ctfImg(w,h); ctf.getFftwImage(ctfImg(), h, h, angpix); FOR_ALL_DIRECT_ELEMENTS_IN_ARRAY2D(dest()) { DIRECT_A2D_ELEM(dest(), i, j) = DIRECT_A2D_ELEM(ctfImg(), i, j); } } void FilterHelper::wienerFilter(Image& img, CTF& ctf, RFLOAT angpix, RFLOAT eps, RFLOAT Bfac, Image& dest) { MultidimArray imgFreq; FourierTransformer ft; ft.FourierTransform(img(), imgFreq, false); RFLOAT xs = (RFLOAT)img.data.xdim * angpix; RFLOAT ys = (RFLOAT)img.data.ydim * angpix; FOR_ALL_DIRECT_ELEMENTS_IN_ARRAY2D(imgFreq) { const int x = j; const int y = i < imgFreq.ydim/2? i : i - imgFreq.ydim; RFLOAT c; if (Bfac > 0.0) c = ctf.getCTF(x/xs, y/ys) * exp(-Bfac*(x*x + y*y)/4.0); else c = ctf.getCTF(x/xs, y/ys); DIRECT_A2D_ELEM(imgFreq, i, j) = (c * DIRECT_A2D_ELEM(imgFreq, i, j))/(c*c + eps); } if (dest.data.xdim != img.data.xdim || dest.data.ydim != img.data.ydim) { dest.data.resize(img.data); } FourierTransformer ft2; ft2.inverseFourierTransform(imgFreq, dest()); } void FilterHelper::richardsonLucy(Image& img, CTF& ctf, RFLOAT angpix, RFLOAT eps, int iterations, Image& dest) { const int w = img.data.xdim; const int h = img.data.ydim; Image img0(w,h,1,1), img1(w,h,1,1), img1M(w,h,1,1), imgR(w,h,1,1), imgRM(w,h,1,1); double vmin = 0; double Bfac = (double)w/4.0; FOR_ALL_DIRECT_ELEMENTS_IN_ARRAY2D(img.data) { double v0 = DIRECT_A2D_ELEM(img.data, i, j); if (v0 < vmin) vmin = v0; } vmin -= 10; FOR_ALL_DIRECT_ELEMENTS_IN_ARRAY2D(img.data) { DIRECT_A2D_ELEM(img0.data, i, j) = DIRECT_A2D_ELEM(img.data, i, j) + vmin; } wienerFilter(img0, ctf, angpix, eps, Bfac, img1); VtkHelper::writeVTK(img1, "rl_it0.vtk"); for (int it = 0; it < iterations; it++) { // img1 = img1 * conv(psf, img / conv(psf, img1) ) // = img1 * IFT( ctf * FT(img / IFT( ctf * FT(img1) ) ) ) // = img1 * ctf_mod( img / ctf_mod(img1) ) modulate(img1, ctf, angpix, img1M); wienerDivide(img0, img1M, eps, imgR); modulate(imgR, ctf, angpix, imgRM); multiply(imgRM, img1, img1); std::stringstream sts; sts << (it+1); std::string fn; sts >> fn; VtkHelper::writeVTK(img1, "rl_it"+fn+".vtk"); } } void FilterHelper::rampFilter(Image& img, RFLOAT s0, RFLOAT t1, double ux, double uy, Image& dest) { MultidimArray imgFreq; FourierTransformer ft; ft.FourierTransform(img(), imgFreq, false); FOR_ALL_DIRECT_ELEMENTS_IN_ARRAY2D(imgFreq) { const int x = j; const int y = i < imgFreq.ydim/2? i : i - imgFreq.ydim; RFLOAT t = std::abs(x*ux + y*uy); RFLOAT s = t < t1? (s0 + (1-s0)*t/t1) : 1.0; DIRECT_A2D_ELEM(imgFreq, i, j) = s * DIRECT_A2D_ELEM(imgFreq, i, j); } if (dest.data.xdim != img.data.xdim || dest.data.ydim != img.data.ydim) { dest.data.resize(img.data); } FourierTransformer ft2; ft2.inverseFourierTransform(imgFreq, dest()); } void FilterHelper::rampFilter3D(Image& img, RFLOAT s0, RFLOAT t1, double tx, double ty, double tz) { d3Vector ta(tx,ty,tz); FOR_ALL_DIRECT_ELEMENTS_IN_ARRAY3D(img.data) { const int x = j; const int y = i < img.data.ydim/2? i : i - img.data.ydim; const int z = k < img.data.zdim/2? k : k - img.data.zdim; d3Vector p(x,y,z); d3Vector q = p - p.dot(ta)*ta; double t = q.length(); RFLOAT s = t < t1? (s0 + (1-s0)*t/t1) : 1.0; DIRECT_A3D_ELEM(img.data, k, i, j) = s * DIRECT_A3D_ELEM(img.data, k, i, j); } } void FilterHelper::doubleRampFilter3D(Image& img, RFLOAT s0, RFLOAT t1, double tx, double ty, double tz) { d3Vector ta(tx,ty,tz); FOR_ALL_DIRECT_ELEMENTS_IN_ARRAY3D(img.data) { const int x = j; const int y = i < img.data.ydim/2? i : i - img.data.ydim; const int z = k < img.data.zdim/2? k : k - img.data.zdim; d3Vector p(x,y,z); d3Vector q = p - p.dot(ta)*ta; double t = q.length(); RFLOAT s = t < t1? (s0 + (1-s0)*t/t1) : 1.0 + t1 - t; if (s < 0) s = 0; DIRECT_A3D_ELEM(img.data, k, i, j) = s * DIRECT_A3D_ELEM(img.data, k, i, j); } } void FilterHelper::getPhase(const Image &img, Image &dest) { const long w = img.data.xdim; const long h = img.data.ydim; const long d = img.data.zdim; dest = Image(w,h,d); for (long int z = 0; z < d; z++) for (long int y = 0; y < h; y++) for (long int x = 0; x < w; x++) { if (DIRECT_NZYX_ELEM(img.data, 0, z, y, x).norm() > 0) { DIRECT_NZYX_ELEM(dest.data, 0, z, y, x) = DIRECT_NZYX_ELEM(img.data, 0, z, y, x).arg(); } else { DIRECT_NZYX_ELEM(dest.data, 0, z, y, x) = 0; } } } void FilterHelper::getAbs(const Image &img, Image &dest) { const long w = img.data.xdim; const long h = img.data.ydim; const long d = img.data.zdim; dest = Image(w,h,d); for (long int z = 0; z < d; z++) for (long int y = 0; y < h; y++) for (long int x = 0; x < w; x++) { DIRECT_NZYX_ELEM(dest.data, 0, z, y, x) = DIRECT_NZYX_ELEM(img.data, 0, z, y, x).abs(); } } void FilterHelper::getReal(const Image &img, Image &dest) { const long w = img.data.xdim; const long h = img.data.ydim; const long d = img.data.zdim; dest = Image(w,h,d); for (long int z = 0; z < d; z++) for (long int y = 0; y < h; y++) for (long int x = 0; x < w; x++) { DIRECT_NZYX_ELEM(dest.data, 0, z, y, x) = DIRECT_NZYX_ELEM(img.data, 0, z, y, x).real; } } void FilterHelper::getImag(const Image &img, Image &dest) { const long w = img.data.xdim; const long h = img.data.ydim; const long d = img.data.zdim; dest = Image(w,h,d); for (long int z = 0; z < d; z++) for (long int y = 0; y < h; y++) for (long int x = 0; x < w; x++) { DIRECT_NZYX_ELEM(dest.data, 0, z, y, x) = DIRECT_NZYX_ELEM(img.data, 0, z, y, x).imag; } } void FilterHelper::powerSpectrum2D(Image& img, Volume& spectrum) { MultidimArray imgFreq; FourierTransformer ft; ft.FourierTransform(img(), imgFreq, false); spectrum.resize(imgFreq.xdim, imgFreq.ydim, 1); FOR_ALL_DIRECT_ELEMENTS_IN_ARRAY2D(imgFreq) { Complex z = DIRECT_A2D_ELEM(imgFreq, i, j); spectrum(j,i,0) = z.abs(); } } void FilterHelper::equiphaseAverage2D(const Volume& src, Volume& dest) { int n = src.dimx; std::vector val(n), wgh(n); for (long int i = 0; i < n; i++) { val[i] = 0.0; wgh[i] = 0.0; } for (long int y = 0; y < src.dimy; y++) for (long int x = 0; x < src.dimx; x++) { double id; if (y < src.dimy/2) { id = sqrt(x*x + y*y); } else { id = sqrt(x*x + (src.dimy - y)*(src.dimy - y)); } int i = (int)id; double f = id - i; if (i >= 0 && i < n) { val[i] += (1.0 - f) * src(x,y,0); wgh[i] += (1.0 - f); } if (i >= -1 && i < n-1) { val[i+1] += f * src(x,y,0); wgh[i+1] += f; } } for (long int i = 0; i < n; i++) { if (wgh[i] > 0.0) { val[i] /= wgh[i]; } } dest.resize(src); for (long int y = 0; y < src.dimy; y++) for (long int x = 0; x < src.dimx; x++) { double id; if (y < src.dimy/2) { id = sqrt(x*x + y*y); } else { id = sqrt(x*x + (src.dimy - y)*(src.dimy - y)); } int i = (int)id; double f = id - i; if (i >= 0 && i < n-1) { dest(x,y,0) = (1.0 - f) * val[i] + f * val[i+1]; } } } void FilterHelper::threshold(Image& src, RFLOAT t, Image& dest) { for (long int n = 0; n < src.data.ndim; n++) for (long int z = 0; z < src.data.zdim; z++) for (long int y = 0; y < src.data.ydim; y++) for (long int x = 0; x < src.data.xdim; x++) { if (DIRECT_NZYX_ELEM(src.data, n, z, y, x) > t) { DIRECT_NZYX_ELEM(dest.data, n, z, y, x) = 1.0; } else { DIRECT_NZYX_ELEM(dest.data, n, z, y, x) = 0.0; } } } void FilterHelper::fill(Image& dest, RFLOAT v) { for (long int n = 0; n < dest.data.ndim; n++) for (long int z = 0; z < dest.data.zdim; z++) for (long int y = 0; y < dest.data.ydim; y++) for (long int x = 0; x < dest.data.xdim; x++) { DIRECT_NZYX_ELEM(dest.data, n, z, y, x) = v; } } void FilterHelper::linearTransform(Image& src, RFLOAT m, RFLOAT q, Image& dest) { for (long int n = 0; n < src.data.ndim; n++) for (long int z = 0; z < src.data.zdim; z++) for (long int y = 0; y < src.data.ydim; y++) for (long int x = 0; x < src.data.xdim; x++) { DIRECT_NZYX_ELEM(dest.data, n, z, y, x) = m * DIRECT_NZYX_ELEM(src.data, n, z, y, x) + q; } } void FilterHelper::linearCombination(Image& src0, Image& src1, RFLOAT a0, RFLOAT a1, Image& dest) { for (long int n = 0; n < src0.data.ndim; n++) for (long int z = 0; z < src0.data.zdim; z++) for (long int y = 0; y < src0.data.ydim; y++) for (long int x = 0; x < src0.data.xdim; x++) { DIRECT_NZYX_ELEM(dest.data, n, z, y, x) = a0 * DIRECT_NZYX_ELEM(src0.data, n, z, y, x) + a1 * DIRECT_NZYX_ELEM(src1.data, n, z, y, x); } } void FilterHelper::linearCombination(const Volume& src0, const Volume& src1, RFLOAT a0, RFLOAT a1, Volume& dest) { for (long int z = 0; z < src0.dimz; z++) for (long int y = 0; y < src0.dimy; y++) for (long int x = 0; x < src0.dimx; x++) { dest(x,y,z) = a0 * src0(x,y,z) + a1 * src1(x,y,z); } } void FilterHelper::sumUp(const std::vector > & src, Image &dest) { const int w = src[0].data.xdim; const int h = src[0].data.ydim; const int d = src[0].data.zdim; const int m = src[0].data.ndim; const int ic = src.size(); dest = Image(w,h,d,m); dest.data.initZeros(); for (long int i = 0; i < ic; i++) { if ( src[i].data.xdim != w || src[i].data.ydim != h || src[i].data.zdim != d || src[i].data.ndim != m) { REPORT_ERROR("FilterHelper::sumUp(): image dimension mismatch.\n"); } for (long int n = 0; n < m; n++) for (long int z = 0; z < d; z++) for (long int y = 0; y < h; y++) for (long int x = 0; x < w; x++) { DIRECT_NZYX_ELEM(dest.data, n, z, y, x) += DIRECT_NZYX_ELEM(src[i].data, n, z, y, x); } } } double FilterHelper::L1distance(const Image& i0, const Image& i1, int x0, int y0, int w, int h) { double d = 0.0; if (w < 0) w = i0.data.xdim; if (h < 0) h = i0.data.ydim; for (long int n = 0; n < i0.data.ndim; n++) for (long int z = 0; z < i0.data.zdim; z++) for (long int y = y0; y < y0 + h; y++) for (long int x = x0; x < x0 + w; x++) { RFLOAT v0 = DIRECT_NZYX_ELEM(i0.data, n, z, y, x); RFLOAT v1 = DIRECT_NZYX_ELEM(i1.data, n, z, y, x); double di = v1 - v0; d += std::abs(di); } return d; } double FilterHelper::L2distance(const Image& i0, const Image& i1, int x0, int y0, int w, int h) { double d = 0.0; if (w < 0) w = i0.data.xdim; if (h < 0) h = i0.data.ydim; for (long int n = 0; n < i0.data.ndim; n++) for (long int z = 0; z < i0.data.zdim; z++) for (long int y = y0; y < y0 + h; y++) for (long int x = x0; x < x0 + w; x++) { RFLOAT v0 = DIRECT_NZYX_ELEM(i0.data, n, z, y, x); RFLOAT v1 = DIRECT_NZYX_ELEM(i1.data, n, z, y, x); double di = v1 - v0; d += di*di; } return d; } double FilterHelper::NCC(const Image& i0, const Image& i1, int x0, int y0, int w, int h) { double d = 0.0; if (w < 0) w = i0.data.xdim; if (h < 0) h = i0.data.ydim; double mu0 = 0.0, mu1 = 0.0, cnt = 0.0; for (long int n = 0; n < i0.data.ndim; n++) for (long int z = 0; z < i0.data.zdim; z++) for (long int y = y0; y < y0 + h; y++) for (long int x = x0; x < x0 + w; x++) { RFLOAT v0 = DIRECT_NZYX_ELEM(i0.data, n, z, y, x); RFLOAT v1 = DIRECT_NZYX_ELEM(i1.data, n, z, y, x); mu0 += v0; mu1 += v1; cnt += 1.0; } mu0 /= cnt; mu1 /= cnt; double sig0 = 0.0, sig1 = 0.0; for (long int n = 0; n < i0.data.ndim; n++) for (long int z = 0; z < i0.data.zdim; z++) for (long int y = y0; y < y0 + h; y++) for (long int x = x0; x < x0 + w; x++) { RFLOAT v0 = DIRECT_NZYX_ELEM(i0.data, n, z, y, x) - mu0; RFLOAT v1 = DIRECT_NZYX_ELEM(i1.data, n, z, y, x) - mu1; sig0 += v0*v0; sig1 += v1*v1; } sig0 = sqrt(sig0/(cnt - 1.0)); sig1 = sqrt(sig1/(cnt - 1.0)); double ncc = 0.0; for (long int n = 0; n < i0.data.ndim; n++) for (long int z = 0; z < i0.data.zdim; z++) for (long int y = y0; y < y0 + h; y++) for (long int x = x0; x < x0 + w; x++) { RFLOAT v0 = (DIRECT_NZYX_ELEM(i0.data, n, z, y, x) - mu0); RFLOAT v1 = (DIRECT_NZYX_ELEM(i1.data, n, z, y, x) - mu1); ncc += v0*v1; } ncc /= sig0*sig1*cnt; return ncc; } void FilterHelper::multiply(Image& i0, Image& i1, Image& dest) { dest = Image(i0.data.xdim, i0.data.ydim, i0.data.zdim, i0.data.ndim); for (long int n = 0; n < i0.data.ndim; n++) for (long int z = 0; z < i0.data.zdim; z++) for (long int y = 0; y < i0.data.ydim; y++) for (long int x = 0; x < i0.data.xdim; x++) { DIRECT_NZYX_ELEM(dest.data, n, z, y, x) = DIRECT_NZYX_ELEM(i0.data, n, z, y, x) * DIRECT_NZYX_ELEM(i1.data, n, z, y, x); } } void FilterHelper::multiply(Image& i0, Image& i1, Image& dest) { dest = Image(i0.data.xdim, i0.data.ydim, i0.data.zdim, i0.data.ndim); for (long int n = 0; n < i0.data.ndim; n++) for (long int z = 0; z < i0.data.zdim; z++) for (long int y = 0; y < i0.data.ydim; y++) for (long int x = 0; x < i0.data.xdim; x++) { DIRECT_NZYX_ELEM(dest.data, n, z, y, x) = DIRECT_NZYX_ELEM(i0.data, n, z, y, x) * DIRECT_NZYX_ELEM(i1.data, n, z, y, x); } } void FilterHelper::wienerDivide(Image& num, Image& denom, RFLOAT eps, Image& dest) { for (long int n = 0; n < num.data.ndim; n++) for (long int z = 0; z < num.data.zdim; z++) for (long int y = 0; y < num.data.ydim; y++) for (long int x = 0; x < num.data.xdim; x++) { RFLOAT d = DIRECT_NZYX_ELEM(denom.data, n, z, y, x); DIRECT_NZYX_ELEM(dest.data, n, z, y, x) = d * DIRECT_NZYX_ELEM(num.data, n, z, y, x) / (d*d + eps); } } void FilterHelper::divide(Image& num, Volume& denom, RFLOAT eps, Image& dest) { for (long int n = 0; n < num.data.ndim; n++) for (long int z = 0; z < num.data.zdim; z++) for (long int y = 0; y < num.data.ydim; y++) for (long int x = 0; x < num.data.xdim; x++) { DIRECT_NZYX_ELEM(dest.data, n, z, y, x) = DIRECT_NZYX_ELEM(num.data, n, z, y, x) / (denom(x,y,z) + eps); } } void FilterHelper::divide(Image& num, Image& denom, RFLOAT eps, Image& dest) { FOR_ALL_DIRECT_NZYX_ELEMENTS_IN_MULTIDIMARRAY(num.data) { DIRECT_NZYX_ELEM(dest.data, l, k, i, j) = DIRECT_NZYX_ELEM(num.data, l, k, i, j) / (DIRECT_NZYX_ELEM(denom.data, l, k, i, j) + eps); } } void FilterHelper::divideExcessive(Image& num, Volume& denom, RFLOAT theta, Image& dest) { for (long int n = 0; n < num.data.ndim; n++) for (long int z = 0; z < num.data.zdim; z++) for (long int y = 0; y < num.data.ydim; y++) for (long int x = 0; x < num.data.xdim; x++) { RFLOAT t = denom(x,y,z)/theta; if (t > 1) { DIRECT_NZYX_ELEM(dest.data, n, z, y, x) = DIRECT_NZYX_ELEM(num.data, n, z, y, x) / t; } else { DIRECT_NZYX_ELEM(dest.data, n, z, y, x) = DIRECT_NZYX_ELEM(num.data, n, z, y, x); } } } void FilterHelper::wienerDeconvolve(Image& num, Image& denom, RFLOAT theta, Image& dest) { for (long int z = 0; z < num.data.zdim; z++) for (long int y = 0; y < num.data.ydim; y++) for (long int x = 0; x < num.data.xdim; x++) { Complex zz = DIRECT_NZYX_ELEM(denom.data, 0, z, y, x); Complex z0 = DIRECT_NZYX_ELEM(num.data, 0, z, y, x); /*std::cout << "z0 = " << z0.real << " + " << z0.imag << " * i\n"; std::cout << "zz = " << zz.real << " + " << zz.imag << " * i\n"; std::cout << "zzB * z0 = " << (zz.conj() * z0).real << " + " << (zz.conj() * z0).imag << " * i\n"; std::cout << "((zz.conj() * zz).real + theta) = " << ((zz.conj() * zz).real + theta) << " * i\n";*/ //DIRECT_NZYX_ELEM(dest.data, 0, z, y, x) = (zz.conj() * z0) / ((zz.conj() * zz).real + theta); DIRECT_NZYX_ELEM(dest.data, 0, z, y, x) = (zz.real * z0) / (zz.real * zz.real + theta); /*RFLOAT t = zz.abs()/theta; if (t > 1) { DIRECT_NZYX_ELEM(dest.data, 0, z, y, x) = DIRECT_NZYX_ELEM(num.data, 0, z, y, x) / t; } else { DIRECT_NZYX_ELEM(dest.data, 0, z, y, x) = DIRECT_NZYX_ELEM(num.data, 0, z, y, x); }*/ } } void FilterHelper::extract2D(const Image& src, Image& dest, long int x0, long int y0, long int w, long int h) { dest = Image(w,h); for (long int y = 0; y < h; y++) for (long int x = 0; x < w; x++) { long int xx = x0 + x; long int yy = y0 + y; if ( xx >= 0 && xx < src.data.xdim && yy >= 0 && yy < src.data.ydim) { DIRECT_NZYX_ELEM(dest.data, 0, 0, y, x) = DIRECT_NZYX_ELEM(src.data, 0, 0, yy, xx); } else { DIRECT_NZYX_ELEM(dest.data, 0, 0, y, x) = 0; } } } void FilterHelper::extract( const Volume& src, Volume& dest, long int x0, long int y0, long int z0, long int w, long int h, long int d) { dest.resize(w,h,d); for (long int z = 0; z < d; z++) for (long int y = 0; y < h; y++) for (long int x = 0; x < w; x++) { long int xx = x0 + x; long int yy = y0 + y; long int zz = z0 + z; if ( xx >= 0 && xx < src.dimx && yy >= 0 && yy < src.dimy && zz >= 0 && zz < src.dimz) { dest(x, y, z) = src(xx, yy, zz); } } } void FilterHelper::signedDist(const Image& src, Image& dest) { dest = Image(src.data.xdim, src.data.ydim, src.data.zdim); Image ggp(src.data.xdim, src.data.ydim, src.data.zdim), ggn(src.data.xdim, src.data.ydim, src.data.zdim), gp(src.data.xdim, src.data.ydim, src.data.zdim), gn(src.data.xdim, src.data.ydim, src.data.zdim), hp(src.data.xdim, src.data.ydim, src.data.zdim), hn(src.data.xdim, src.data.ydim, src.data.zdim), s(src.data.xdim, src.data.ydim, src.data.zdim); double rmax2 = 4.0 * (src.data.xdim*src.data.xdim + src.data.ydim*src.data.ydim + src.data.zdim*src.data.zdim); for (long int z = 0; z < dest.data.zdim; z++) for (long int y = 0; y < dest.data.ydim; y++) { DIRECT_A3D_ELEM(ggp.data, z, y, 0) = rmax2; DIRECT_A3D_ELEM(ggn.data, z, y, 0) = rmax2; for (long int x = 1; x < dest.data.xdim; x++) { if (DIRECT_A3D_ELEM(src.data, z, y, x) < 0.0) { DIRECT_A3D_ELEM(ggp.data, z, y, x) = 0; double d = sqrt(DIRECT_A3D_ELEM(ggn.data, z, y, x-1)) + 1.0; DIRECT_A3D_ELEM(ggn.data, z, y, x) = d*d; } else { DIRECT_A3D_ELEM(ggn.data, z, y, x) = 0; double d = sqrt(DIRECT_A3D_ELEM(ggp.data, z, y, x-1)) + 1.0; DIRECT_A3D_ELEM(ggp.data, z, y, x) = d*d; } } DIRECT_A3D_ELEM(gp.data, z, y, dest.data.xdim-1) = DIRECT_A3D_ELEM(ggp.data, z, y, dest.data.xdim-1); DIRECT_A3D_ELEM(gn.data, z, y, dest.data.xdim-1) = DIRECT_A3D_ELEM(ggn.data, z, y, dest.data.xdim-1); for (long int x = dest.data.xdim-2; x >= 0; x--) { double dp = sqrt(DIRECT_A3D_ELEM(gp.data, z, y, x+1)) + 1.0; double ddp = dp*dp; double dn = sqrt(DIRECT_A3D_ELEM(gn.data, z, y, x+1)) + 1.0; double ddn = dn*dn; if (ddp < DIRECT_A3D_ELEM(ggp.data, z, y, x)) { DIRECT_A3D_ELEM(gp.data, z, y, x) = ddp; } else { DIRECT_A3D_ELEM(gp.data, z, y, x) = DIRECT_A3D_ELEM(ggp.data, z, y, x); } if (ddn < DIRECT_A3D_ELEM(ggn.data, z, y, x)) { DIRECT_A3D_ELEM(gn.data, z, y, x) = ddn; } else { DIRECT_A3D_ELEM(gn.data, z, y, x) = DIRECT_A3D_ELEM(ggn.data, z, y, x); } } } for (long int z = 0; z < dest.data.zdim; z++) for (long int y = 0; y < dest.data.ydim; y++) for (long int x = 0; x < dest.data.xdim; x++) { long int rp = (long int) sqrt(DIRECT_A3D_ELEM(gp.data, z, y, x)); long int rn = (long int) sqrt(DIRECT_A3D_ELEM(gn.data, z, y, x)); double minValP = rmax2; double minValN = rmax2; for (long int yy = y-rp; yy <= y+rp; yy++) { if (yy < 0 || yy >= dest.data.ydim) continue; double dy = yy - y; double vgp = DIRECT_A3D_ELEM(gp.data, z, yy, x) + dy*dy; if (vgp < minValP) minValP = vgp; } for (long int yy = y-rn; yy <= y+rn; yy++) { if (yy < 0 || yy >= dest.data.ydim) continue; double dy = yy - y; double vgn = DIRECT_A3D_ELEM(gn.data, z, yy, x) + dy*dy; if (vgn < minValN) minValN = vgn; } DIRECT_A3D_ELEM(hp.data, z, y, x) = minValP; DIRECT_A3D_ELEM(hn.data, z, y, x) = minValN; } for (long int z = 0; z < dest.data.zdim; z++) for (long int y = 0; y < dest.data.ydim; y++) for (long int x = 0; x < dest.data.xdim; x++) { if (DIRECT_A3D_ELEM(src.data, z, y, x) < 0.0) { DIRECT_A3D_ELEM(dest.data, z, y, x) = -sqrt(DIRECT_A3D_ELEM(hn.data, z, y, x)); } else { DIRECT_A3D_ELEM(dest.data, z, y, x) = sqrt(DIRECT_A3D_ELEM(hp.data, z, y, x)); } } } void FilterHelper::erode3x3(Image& src, Image& dest) { for (long int z = 0; z < src.data.zdim; z++) for (long int y = 0; y < src.data.ydim; y++) for (long int x = 0; x < src.data.xdim; x++) { double v = std::numeric_limits::max(); for (long int zz = z-1; zz <= z+1; zz++) for (long int yy = y-1; yy <= y+1; yy++) for (long int xx = x-1; xx <= x+1; xx++) { if ( xx >= 0 && xx < src.data.xdim && yy >= 0 && yy < src.data.ydim && zz >= 0 && zz < src.data.zdim && DIRECT_A3D_ELEM(src.data, zz, yy, xx) < v) { v = DIRECT_A3D_ELEM(src.data, zz, yy, xx); } } DIRECT_A3D_ELEM(dest.data, z, y, x) = v; } } void FilterHelper::localMinima(Image& src, Image& dest, RFLOAT thresh) { dest = Image(src.data.xdim, src.data.ydim, src.data.zdim); for (long int z = 0; z < src.data.zdim; z++) for (long int y = 0; y < src.data.ydim; y++) for (long int x = 0; x < src.data.xdim; x++) { if (DIRECT_A3D_ELEM(src.data, z, y, x) > thresh) { DIRECT_A3D_ELEM(dest.data, z, y, x) = 0.f; continue; } double v = std::numeric_limits::max(); for (long int zz = z-1; zz <= z+1; zz++) for (long int yy = y-1; yy <= y+1; yy++) for (long int xx = x-1; xx <= x+1; xx++) { if ( xx >= 0 && xx < src.data.xdim && yy >= 0 && yy < src.data.ydim && zz >= 0 && zz < src.data.zdim && DIRECT_A3D_ELEM(src.data, zz, yy, xx) < v) { v = DIRECT_A3D_ELEM(src.data, zz, yy, xx); } } if (v == DIRECT_A3D_ELEM(src.data, z, y, x)) { DIRECT_A3D_ELEM(dest.data, z, y, x) = 1.f; } else { DIRECT_A3D_ELEM(dest.data, z, y, x) = 0.f; } } } std::vector FilterHelper::localMinima(Image& src, RFLOAT thresh) { std::vector out(0); for (long int z = 0; z < src.data.zdim; z++) for (long int y = 0; y < src.data.ydim; y++) for (long int x = 0; x < src.data.xdim; x++) { if (DIRECT_A3D_ELEM(src.data, z, y, x) > thresh) { continue; } double v = std::numeric_limits::max(); for (long int zz = z-1; zz <= z+1; zz++) for (long int yy = y-1; yy <= y+1; yy++) for (long int xx = x-1; xx <= x+1; xx++) { if ( xx >= 0 && xx < src.data.xdim && yy >= 0 && yy < src.data.ydim && zz >= 0 && zz < src.data.zdim && DIRECT_A3D_ELEM(src.data, zz, yy, xx) < v) { v = DIRECT_A3D_ELEM(src.data, zz, yy, xx); } } if (v == DIRECT_A3D_ELEM(src.data, z, y, x)) { out.push_back(d3Vector(x,y,z)); } } return out; } void FilterHelper::centralGradient(const Volume& src, Volume >& dest) { const size_t dimx = src.dimx; const size_t dimy = src.dimy; const size_t dimz = src.dimz; dest.resize(dimx, dimy, dimz); FOR_ALL_VOXELS(src) { if (dimx == 0) { dest(x,y,z).x = 0; } else if (x == 0) { dest(x,y,z).x = src(x+1,y,z) - src(x,y,z); } else if (x < dimx - 1) { dest(x,y,z).x = 0.5 * (src(x+1,y,z) - src(x-1,y,z)); } else { dest(x,y,z).x = src(x,y,z) - src(x-1,y,z); } if (dimy == 0) { dest(x,y,z).y = 0; } else if (y == 0) { dest(x,y,z).y = src(x,y+1,z) - src(x,y,z); } else if (y < dimy - 1) { dest(x,y,z).y = 0.5 * (src(x,y+1,z) - src(x,y-1,z)); } else { dest(x,y,z).y = src(x,y,z) - src(x,y-1,z); } if (dimz == 0) { dest(x,y,z).z = 0; } else if (z == 0) { dest(x,y,z).z = src(x,y,z+1) - src(x,y,z); } else if (z < dimz - 1) { dest(x,y,z).z = 0.5 * (src(x,y,z+1) - src(x,y,z-1)); } else { dest(x,y,z).z = src(x,y,z) - src(x,y,z-1); } } } t3Vector FilterHelper::centralGradient(const Volume& src, size_t x, size_t y, size_t z) { t3Vector out; if (src.dimx == 0) { out.x = 0; } else if (x == 0) { out.x = src(x+1,y,z) - src(x,y,z); } else if (x < src.dimx - 1) { out.x = 0.5 * (src(x+1,y,z) - src(x-1,y,z)); } else { out.x = src(x,y,z) - src(x-1,y,z); } if (src.dimy == 0) { out.y = 0; } else if (y == 0) { out.y = src(x,y+1,z) - src(x,y,z); } else if (y < src.dimy - 1) { out.y = 0.5 * (src(x,y+1,z) - src(x,y-1,z)); } else { out.y = src(x,y,z) - src(x,y-1,z); } if (src.dimz == 0) { out.z = 0; } else if (z == 0) { out.z = src(x,y,z+1) - src(x,y,z); } else if (z < src.dimz - 1) { out.z = 0.5 * (src(x,y,z+1) - src(x,y,z-1)); } else { out.z = src(x,y,z) - src(x,y,z-1); } return out; } MultidimArray FilterHelper::FriedelExpand(const MultidimArray &half) { const int wh = half.xdim; const int h = half.ydim; const int d = half.zdim; const int c = half.ndim; const int w = 2*(wh-1); MultidimArray out(d,h,w); for (int n = 0; n < c; n++) for (int z = 0; z < d; z++) for (int y = 0; y < h; y++) { const int zz = (d - z) % d; const int yy = (h - y) % h; for (int x = 0; x < wh; x++) { DIRECT_NZYX_ELEM(out, n, z, y, x) = DIRECT_NZYX_ELEM(half, n, z, y, x); } for (int x = wh; x < w; x++) { DIRECT_NZYX_ELEM(out, n, z, y, x) = DIRECT_NZYX_ELEM(half, n, zz, yy, w-x).conj(); } } return out; } Image FilterHelper::normaliseToUnitInterval(const Image &img) { const int w = img.data.xdim; const int h = img.data.ydim; const int d = img.data.zdim; const int c = img.data.ndim; RFLOAT minVal = std::numeric_limits::max(); RFLOAT maxVal = -std::numeric_limits::max(); for (int n = 0; n < c; n++) for (int z = 0; z < d; z++) for (int y = 0; y < h; y++) for (int x = 0; x < w; x++) { RFLOAT v = DIRECT_NZYX_ELEM(img.data, n, z, y, x); if (v > maxVal) maxVal = v; if (v < minVal) minVal = v; } Image out(w,h,d,c); for (int n = 0; n < c; n++) for (int z = 0; z < d; z++) for (int y = 0; y < h; y++) for (int x = 0; x < w; x++) { RFLOAT v = DIRECT_NZYX_ELEM(img.data, n, z, y, x); DIRECT_NZYX_ELEM(out.data, n, z, y, x) = (v - minVal)/(maxVal - minVal); } return out; } Image FilterHelper::normaliseToUnitIntervalSigned(const Image &img) { const int w = img.data.xdim; const int h = img.data.ydim; const int d = img.data.zdim; const int c = img.data.ndim; RFLOAT maxAbs = 0; for (int n = 0; n < c; n++) for (int z = 0; z < d; z++) for (int y = 0; y < h; y++) for (int x = 0; x < w; x++) { RFLOAT v = std::abs(DIRECT_NZYX_ELEM(img.data, n, z, y, x)); if (v > maxAbs) maxAbs = v; } Image out(w,h,d,c); for (int n = 0; n < c; n++) for (int z = 0; z < d; z++) for (int y = 0; y < h; y++) for (int x = 0; x < w; x++) { RFLOAT v = DIRECT_NZYX_ELEM(img.data, n, z, y, x); DIRECT_NZYX_ELEM(out.data, n, z, y, x) = v / maxAbs; } return out; } void FilterHelper::uniqueInfluenceMask(std::vector pts, Image& dest, Image& indexDest, RFLOAT thresh) { const long int w = dest.data.xdim; const long int h = dest.data.ydim; const long int pc = pts.size(); indexDest = Image(w,h); const double t2 = thresh * thresh; for (long int y = 0; y < h; y++) for (long int x = 0; x < w; x++) { int closer = 0; int lastIndex = -1; for (long int p = 0; p < pc; p++) { d2Vector d(x - pts[p].x, y - pts[p].y); if (d.norm2() < t2) { closer++; lastIndex = p; } } if (closer == 1) { DIRECT_NZYX_ELEM(dest.data, 0, 0, y, x) = 1.0; DIRECT_NZYX_ELEM(indexDest.data, 0, 0, y, x) = lastIndex; } else { DIRECT_NZYX_ELEM(dest.data, 0, 0, y, x) = 0.0; DIRECT_NZYX_ELEM(indexDest.data, 0, 0, y, x) = -1.0; } } } void FilterHelper::polarRemap(d2Vector pos, const Image& src, Image& dest, const Image& mask, Image& maskDest, int phiRes, int rRes, double rMax) { const long int w = src.data.xdim; const long int h = src.data.ydim; dest = Image(phiRes, rRes, 1); maskDest = Image(phiRes, rRes, 1); for (long int ri = 0; ri < rRes; ri++) for (long int p = 0; p < phiRes; p++) { const double r = rMax * ri / (double)rRes; const double phi = 2.0 * PI * p / (double)phiRes; d2Vector pp = pos + r * d2Vector(cos(phi),sin(phi)); int ppnnx = (int)(pp.x + 0.5); int ppnny = (int)(pp.y + 0.5); if ( ppnnx > 0 && ppnnx < w - 1 && ppnny > 0 && ppnny < h - 1 && DIRECT_NZYX_ELEM(mask.data, 0, 0, ppnny, ppnnx) > 0.5) { DIRECT_NZYX_ELEM(dest.data, 0, 0, ri, p) = Interpolation::linearXY(src, pp.x, pp.y, 0); DIRECT_NZYX_ELEM(maskDest.data, 0, 0, ri, p) = 1.0; } else { DIRECT_NZYX_ELEM(dest.data, 0, 0, ri, p) = 0.0; DIRECT_NZYX_ELEM(maskDest.data, 0, 0, ri, p) = 0.0; } } } void FilterHelper::polarRemap(d2Vector pos, const Image& distTransf, const Image& src, Image& dest, const Image& mask, Image& maskDest, int phiRes, int rRes, double rMax) { const long int w = src.data.xdim; const long int h = src.data.ydim; dest = Image(phiRes, rRes, 1); maskDest = Image(phiRes, rRes, 1); for (long int r = 0; r < rRes; r++) for (long int p = 0; p < phiRes; p++) { DIRECT_NZYX_ELEM(dest.data, 0, 0, r, p) = 0.0; DIRECT_NZYX_ELEM(maskDest.data, 0, 0, r, p) = 0.0; } const int x0 = (int)(pos.x - rMax + 0.5); const int x1 = (int)(pos.x + rMax + 0.5); const int y0 = (int)(pos.y - rMax + 0.5); const int y1 = (int)(pos.y + rMax + 0.5); for (int y = y0; y <= y1; y++) for (int x = x0; x <= x1; x++) { const double dx = x - pos.x; const double dy = y - pos.y; if (x < 1 || x >= w-1 || y < 1 || y >= h-1 || (dx == 0.0 && dy == 0.0)) { continue; } double phiR = std::atan2(dy,dx); if (phiR < 0.0) phiR += 2.0*PI; const double phiD = phiRes * phiR / (2.0*PI); const int phi0 = ((int)(phiD)) % phiRes; const int phi1 = ((int)(phiD)+1) % phiRes; const double phiF = phiD - (double)phi0; const double rD = rRes * DIRECT_NZYX_ELEM(distTransf.data, 0, 0, y, x) / rMax; const int r0 = (int)rD; const int r1 = (int)rD + 1; const double rF = rD - r0; const double v = DIRECT_NZYX_ELEM(src.data, 0, 0, y, x); if (r0 >= 0 && r0 < rRes) { DIRECT_NZYX_ELEM(dest.data, 0, 0, r0, phi0) += (1.0 - rF) * (1.0 - phiF) * v; DIRECT_NZYX_ELEM(maskDest.data, 0, 0, r0, phi0) += (1.0 - rF) * (1.0 - phiF); DIRECT_NZYX_ELEM(dest.data, 0, 0, r0, phi1) += (1.0 - rF) * phiF * v; DIRECT_NZYX_ELEM(maskDest.data, 0, 0, r0, phi1) += (1.0 - rF) * phiF; } if (r1 >= 0 && r1 < rRes) { DIRECT_NZYX_ELEM(dest.data, 0, 0, r1, phi0) += rF * (1.0 - phiF) * v; DIRECT_NZYX_ELEM(maskDest.data, 0, 0, r1, phi0) += rF * (1.0 - phiF); DIRECT_NZYX_ELEM(dest.data, 0, 0, r1, phi1) += rF * phiF * v; DIRECT_NZYX_ELEM(maskDest.data, 0, 0, r1, phi1) += rF * phiF; } } for (long int r = 0; r < rRes; r++) for (long int p = 0; p < phiRes; p++) { if (DIRECT_NZYX_ELEM(maskDest.data, 0, 0, r, p) > 0.0) { DIRECT_NZYX_ELEM(dest.data, 0, 0, r, p) /= DIRECT_NZYX_ELEM(maskDest.data, 0, 0, r, p); } } } Image FilterHelper::cartToPolar(const Image &img) { const int w0 = img.data.xdim; const int h0 = img.data.ydim; const double w0h = w0/2.0; const double h0h = h0/2.0; const double cx = w0h + 1; const double cy = h0h + 0.5; const int w = (int)(2.0*PI*w0h + 1); const int h = w0h; Image out(w,h); for (int y = 0; y < h; y++) for (int x = 0; x < w; x++) { const double phi = 2.0 * PI * x / (double)w; const double r = w0h * y / (double)h; double xx = cx + r * cos(phi); double yy = cy + r * sin(phi); out(y,x) = Interpolation::cubicXY(img, xx, yy, 0, 0); } return out; } Image FilterHelper::polarToCart(const Image &img) { const int wp = img.data.xdim; const int hp = img.data.ydim; const double w0h = hp; const double cx = w0h + 1; const double cy = w0h + 0.5; const int w = 2.0*w0h; const int h = 2.0*w0h; Image out(w,h); for (int y = 0; y < h; y++) for (int x = 0; x < w; x++) { const double xd = x - cx; const double yd = y - cy; const double r = sqrt(xd*xd + yd*yd); double phi = (xd == 0 && yd == 0)? 0.0 : atan2(yd,xd); if (phi < 0.0) phi += 2.0*PI; out(y,x) = Interpolation::cubicXY(img, wp*phi/(2.0*PI), r, 0, 0); } return out; } Image FilterHelper::polarBlur(const Image &img, double sigma) { Image img1 = FilterHelper::cartToPolar(img); Image img2 = img1; separableGaussianX_wrap(img1, img2, sigma); return FilterHelper::polarToCart(img2); } Image FilterHelper::sectorBlend(const Image& img0, const Image& img1, int sectors) { const int w = img0.data.xdim; const int h = img0.data.ydim; if (img1.data.xdim != w || img1.data.ydim != h) { std::cerr << "FilterHelper::sectorBlend: unequal image size: " << w << "x" << h << " vs. " << img1.data.xdim << "x" << img1.data.ydim << "\n"; REPORT_ERROR("FilterHelper::sectorBlend: unequal image size."); } Image out(w,h); const double cx = w/2.0; const double cy = h/2.0; for (int y = 0; y < h; y++) for (int x = 0; x < w; x++) { const double xd = x - cx; const double yd = y - cy; double phi = (xd == 0 && yd == 0)? 0.0 : atan2(yd,xd) + PI; double a = sectors*phi/(2.0*PI); out(y,x) = a - (int)a < 0.5? img0(y,x) : img1(y,x); } return out; } void FilterHelper::diffuseAlongIsocontours2D(const Image& src, const Image& guide, Image& dest, int iters, RFLOAT sigma, RFLOAT lambda, RFLOAT delta) { const long int w = src.data.xdim; const long int h = src.data.ydim; const bool sobel = true; dest = Image(w, h, 1); for (long int y = 0; y < h; y++) for (long int x = 0; x < w; x++) { DIRECT_NZYX_ELEM(dest.data, 0, 0, y, x) = DIRECT_NZYX_ELEM(src.data, 0, 0, y, x); } Volume flux(w,h,1); flux.fill(d2Vector(0,0)); Volume > D0(w,h,1), D(w,h,1), J(w,h,1); D0.fill(Tensor2x2(0.0)); D.fill(Tensor2x2(0.0)); J.fill(Tensor2x2(0.0)); for (long int y = 1; y < h-1; y++) for (long int x = 1; x < w-1; x++) { d2Vector g; if (sobel) { double gxp = 0.25 * DIRECT_NZYX_ELEM(guide.data, 0, 0, y-1, x+1) + 0.5 * DIRECT_NZYX_ELEM(guide.data, 0, 0, y, x+1) + 0.25 * DIRECT_NZYX_ELEM(guide.data, 0, 0, y+1, x+1); double gxn = 0.25 * DIRECT_NZYX_ELEM(guide.data, 0, 0, y-1, x-1) + 0.5 * DIRECT_NZYX_ELEM(guide.data, 0, 0, y, x-1) + 0.25 * DIRECT_NZYX_ELEM(guide.data, 0, 0, y+1, x-1); double gyp = 0.25 * DIRECT_NZYX_ELEM(guide.data, 0, 0, y+1, x-1) + 0.5 * DIRECT_NZYX_ELEM(guide.data, 0, 0, y+1, x) + 0.25 * DIRECT_NZYX_ELEM(guide.data, 0, 0, y+1, x+1); double gyn = 0.25 * DIRECT_NZYX_ELEM(guide.data, 0, 0, y-1, x-1) + 0.5 * DIRECT_NZYX_ELEM(guide.data, 0, 0, y-1, x) + 0.25 * DIRECT_NZYX_ELEM(guide.data, 0, 0, y-1, x+1); g.x = 0.5 * (gxp - gxn); g.y = 0.5 * (gyp - gyn); } else { g.x = 0.5 * (DIRECT_NZYX_ELEM(guide.data, 0, 0, y, x+1) - DIRECT_NZYX_ELEM(guide.data, 0, 0, y, x-1)); g.y = 0.5 * (DIRECT_NZYX_ELEM(guide.data, 0, 0, y+1, x) - DIRECT_NZYX_ELEM(guide.data, 0, 0, y-1, x)); } D0(x,y,0) = Tensor2x2::autoDyadicProduct(t2Vector(g.x, g.y)); } separableGaussian(D0, D, sigma); //Volume dbg0(w,h,1), dbg1(w,h,1); for (long int y = 0; y < h; y++) for (long int x = 0; x < w; x++) { t2Matrix DxyR = D(x,y,0).toMatrix(); d2Matrix Dxy(DxyR(0,0), DxyR(0,1), DxyR(1,0), DxyR(1,1)); double qx, qy, l0, l1; dsyev2(Dxy(0,0), Dxy(0,1), Dxy(1,1), &l0, &l1, &qx, &qy); double dl = l0 - l1; RFLOAT ani = 1.0 - exp(-0.5*dl*dl/(lambda*lambda)); d2Vector f(-qy, qx); //dbg0(x,y,0) = f.length(); J(x,y,0) = ani * Tensor2x2::autoDyadicProduct(t2Vector(f.x, f.y)); } //VtkHelper::writeVTK(dbg0, "f_len.vtk"); for (int it = 0; it < iters; it++) { #if JAZ_USE_OPENMP #pragma omp parallel for #endif for (long int y = 1; y < h-1; y++) for (long int x = 1; x < w-1; x++) { d2Vector g; if (sobel) { double gxp = 0.25 * DIRECT_NZYX_ELEM(dest.data, 0, 0, y-1, x+1) + 0.5 * DIRECT_NZYX_ELEM(dest.data, 0, 0, y, x+1) + 0.25 * DIRECT_NZYX_ELEM(dest.data, 0, 0, y+1, x+1); double gxn = 0.25 * DIRECT_NZYX_ELEM(dest.data, 0, 0, y-1, x-1) + 0.5 * DIRECT_NZYX_ELEM(dest.data, 0, 0, y, x-1) + 0.25 * DIRECT_NZYX_ELEM(dest.data, 0, 0, y+1, x-1); double gyp = 0.25 * DIRECT_NZYX_ELEM(dest.data, 0, 0, y+1, x-1) + 0.5 * DIRECT_NZYX_ELEM(dest.data, 0, 0, y+1, x) + 0.25 * DIRECT_NZYX_ELEM(dest.data, 0, 0, y+1, x+1); double gyn = 0.25 * DIRECT_NZYX_ELEM(dest.data, 0, 0, y-1, x-1) + 0.5 * DIRECT_NZYX_ELEM(dest.data, 0, 0, y-1, x) + 0.25 * DIRECT_NZYX_ELEM(dest.data, 0, 0, y-1, x+1); g.x = 0.5 * (gxp - gxn); g.y = 0.5 * (gyp - gyn); } else { g.x = 0.5 * (DIRECT_NZYX_ELEM(guide.data, 0, 0, y, x+1) - DIRECT_NZYX_ELEM(guide.data, 0, 0, y, x-1)); g.y = 0.5 * (DIRECT_NZYX_ELEM(guide.data, 0, 0, y+1, x) - DIRECT_NZYX_ELEM(guide.data, 0, 0, y-1, x)); } t2Vector fR = J(x,y,0).toMatrix() * t2Vector(g.x, g.y); flux(x,y,0) = d2Vector(fR.x, fR.y); } #if JAZ_USE_OPENMP #pragma omp parallel for #endif for (long int y = 1; y < h-1; y++) for (long int x = 1; x < w-1; x++) { double div = 0.0; div += flux(x+1,y,0).x - flux(x-1,y,0).x; div += flux(x,y+1,0).y - flux(x,y-1,0).y; DIRECT_NZYX_ELEM(dest.data, 0, 0, y, x) += delta * div; } } } void FilterHelper::EED_2D(const Image& src, Image& dest, int iters, double sigma, double delta, double tau) { const long int w = src.data.xdim; const long int h = src.data.ydim; for (long int y = 0; y < h; y++) for (long int x = 0; x < w; x++) { DIRECT_NZYX_ELEM(dest.data, 0, 0, y, x) = DIRECT_NZYX_ELEM(src.data, 0, 0, y, x); } Image smooth; separableGaussianXY(dest, smooth, sigma); Volume flux(w,h,1); flux.fill(d2Vector(0,0)); double tt = tau*tau; for (int it = 0; it < iters; it++) { #if JAZ_USE_OPENMP #pragma omp parallel for #endif for (long int y = 1; y < h-1; y++) for (long int x = 1; x < w-1; x++) { d2Vector g, gs; g.x = DIRECT_NZYX_ELEM(dest.data, 0, 0, y, x+1) - DIRECT_NZYX_ELEM(dest.data, 0, 0, y, x); g.y = DIRECT_NZYX_ELEM(dest.data, 0, 0, y+1, x) - DIRECT_NZYX_ELEM(dest.data, 0, 0, y, x); gs.x = DIRECT_NZYX_ELEM(smooth.data, 0, 0, y, x+1) - DIRECT_NZYX_ELEM(smooth.data, 0, 0, y, x); gs.y = DIRECT_NZYX_ELEM(smooth.data, 0, 0, y+1, x) - DIRECT_NZYX_ELEM(smooth.data, 0, 0, y, x); double iso = exp(-0.5*gs.norm2()/tt); double gsl = gs.length(); if (gsl > 0.0) gs /= gsl; d2Vector gn = g.dot(gs) * gs; d2Vector gp = g - gn; flux(x,y,0) = iso * g + (1.0 - iso) * gp; } #if JAZ_USE_OPENMP #pragma omp parallel for #endif for (long int y = 1; y < h-1; y++) for (long int x = 1; x < w-1; x++) { double div = 0.0; div += flux(x,y,0).x - flux(x-1,y,0).x; div += flux(x,y,0).y - flux(x,y-1,0).y; DIRECT_NZYX_ELEM(dest.data, 0, 0, y, x) += delta * div; } } } void FilterHelper::descendTV(const Image& src, Image& dest, double delta) { const long int w = src.data.xdim; const long int h = src.data.ydim; const long int d = src.data.zdim; std::vector vals; for (long int z = 0; z < d; z++) for (long int y = 0; y < h; y++) for (long int x = 0; x < w; x++) { const double v0 = DIRECT_NZYX_ELEM(src.data, 0, z, y, x); vals.clear(); vals.reserve(6); if (x > 0) vals.push_back(DIRECT_NZYX_ELEM(src.data, 0, z, y, x-1)); if (x < w-1) vals.push_back(DIRECT_NZYX_ELEM(src.data, 0, z, y, x+1)); if (y > 0) vals.push_back(DIRECT_NZYX_ELEM(src.data, 0, z, y-1, x)); if (y < h-1) vals.push_back(DIRECT_NZYX_ELEM(src.data, 0, z, y+1, x)); if (z > 0) vals.push_back(DIRECT_NZYX_ELEM(src.data, 0, z-1, y, x)); if (z < d-1) vals.push_back(DIRECT_NZYX_ELEM(src.data, 0, z+1, y, x)); std::vector order = IndexSort::sortIndices(vals); const int c = vals.size(); double vm; if (vals.size() % 2 == 0) { vm = 0.5 * (vals[order[c/2]] + vals[order[c/2 - 1]]); } else { vm = vals[order[c/2]]; } if (std::abs(v0 - vm) < delta) { DIRECT_NZYX_ELEM(dest.data, 0, z, y, x) = vm; } else if (v0 < vm) { DIRECT_NZYX_ELEM(dest.data, 0, z, y, x) = v0 + delta; } else { DIRECT_NZYX_ELEM(dest.data, 0, z, y, x) = v0 - delta; } } } void FilterHelper::descendTV2(const Image& src, Image& dest, Volume& xi, Volume& uBar, int iters, double sigma, double tau) { const long int w = src.data.xdim; const long int h = src.data.ydim; const long int d = src.data.zdim; fwdGrad(src,xi); for (long int z = 0; z < d; z++) for (long int y = 0; y < h; y++) for (long int x = 0; x < w; x++) { uBar(x,y,z) = DIRECT_NZYX_ELEM(src.data, 0, z, y, x); DIRECT_NZYX_ELEM(dest.data, 0, z, y, x) = DIRECT_NZYX_ELEM(src.data, 0, z, y, x); } for (int it = 0; it < iters; it++) { for (long int z = 0; z < d; z++) for (long int y = 0; y < h; y++) for (long int x = 0; x < w; x++) { d3Vector gradUBar; if (w == 1) { gradUBar.x = 0; } else if (x < w - 1) { gradUBar.x = uBar(x+1,y,z) - uBar(x,y,z); } else { gradUBar.x = uBar(x,y,z) - uBar(x-1,y,z); } if (h == 1) { gradUBar.y = 0; } else if (y < h - 1) { gradUBar.y = uBar(x,y+1,z) - uBar(x,y,z); } else { gradUBar.y = uBar(x,y,z) - uBar(x,y-1,z); } if (d == 1) { gradUBar.z = 0; } else if (z < d - 1) { gradUBar.z = uBar(x,y,z+1) - uBar(x,y,z); } else { gradUBar.z = uBar(x,y,z) - uBar(x,y,z-1); } d3Vector nextXi = xi(x,y,z) + sigma * gradUBar; double nxl = nextXi.length(); xi(x,y,z) = nxl > 0.0? nextXi/nxl : d3Vector(0,0,0); } for (long int z = 0; z < d; z++) for (long int y = 0; y < h; y++) for (long int x = 0; x < w; x++) { double divXi = 0.0; if (x > 0) { divXi += xi(x,y,z).x - xi(x-1,y,z).x; } if (y > 0) { divXi += xi(x,y,z).y - xi(x,y-1,z).y; } if (z > 0) { divXi += xi(x,y,z).z - xi(x,y,z-1).z; } double du = tau * divXi; DIRECT_NZYX_ELEM(dest.data, 0, z, y, x) += du; uBar(x,y,z) = DIRECT_NZYX_ELEM(dest.data, 0, z, y, x) + 0.5*du; } } } void FilterHelper::segmentTV(const Image& src, Image& dest, Volume& xi, Volume& uBar, int iters, double sigma, double tau, double nu) { const long int w = src.data.xdim; const long int h = src.data.ydim; const long int d = src.data.zdim; xi.fill(d3Vector(0.0,0.0,0.0)); uBar.fill(0.0); for (long int z = 0; z < d; z++) for (long int y = 0; y < h; y++) for (long int x = 0; x < w; x++) { uBar(x,y,z) = DIRECT_NZYX_ELEM(src.data, 0, z, y, x); DIRECT_NZYX_ELEM(dest.data, 0, z, y, x) = 0.0; } for (int it = 0; it < iters; it++) { #if JAZ_USE_OPENMP #pragma omp parallel for #endif for (long int z = 0; z < d; z++) for (long int y = 0; y < h; y++) for (long int x = 0; x < w; x++) { d3Vector gradUBar; if (w == 1) { gradUBar.x = 0; } else if (x < w - 1) { gradUBar.x = uBar(x+1,y,z) - uBar(x,y,z); } else { gradUBar.x = uBar(x,y,z) - uBar(x-1,y,z); } if (h == 1) { gradUBar.y = 0; } else if (y < h - 1) { gradUBar.y = uBar(x,y+1,z) - uBar(x,y,z); } else { gradUBar.y = uBar(x,y,z) - uBar(x,y-1,z); } if (d == 1) { gradUBar.z = 0; } else if (z < d - 1) { gradUBar.z = uBar(x,y,z+1) - uBar(x,y,z); } else { gradUBar.z = uBar(x,y,z) - uBar(x,y,z-1); } d3Vector nextXi = xi(x,y,z) + sigma * gradUBar; double nxl = nextXi.length(); xi(x,y,z) = nxl > 0.0? nextXi/nxl : d3Vector(0,0,0); } #if JAZ_USE_OPENMP #pragma omp parallel for #endif for (long int z = 0; z < d; z++) for (long int y = 0; y < h; y++) for (long int x = 0; x < w; x++) { double divXi = 0.0; if (x > 0) { divXi += xi(x,y,z).x - xi(x-1,y,z).x; } if (y > 0) { divXi += xi(x,y,z).y - xi(x,y-1,z).y; } if (z > 0) { divXi += xi(x,y,z).z - xi(x,y,z-1).z; } double u = DIRECT_NZYX_ELEM(dest.data, 0, z, y, x); double du = tau * (nu * divXi + DIRECT_NZYX_ELEM(src.data, 0, z, y, x)); double nextU = u + du; if (nextU > 1.0) nextU = 1.0; else if (nextU < 0) nextU = 0; DIRECT_NZYX_ELEM(dest.data, 0, z, y, x) = nextU; uBar(x,y,z) = 2.0 * nextU - u; } } } void FilterHelper::segmentTVAniso2D(const Image& src, Image& dest, Volume& xi, Volume& uBar, int iters, double sigma, double tau, double nu, double rho, double theta, double alpha) { const long int w = src.data.xdim; const long int h = src.data.ydim; Image smooth; separableGaussianXY(src, smooth, rho); Volume smoothGrad(w,h,1); fwdGrad2D(smooth, smoothGrad); xi.fill(d2Vector(0.0,0.0)); uBar.fill(0.0); Volume D(w,h,1); const double tt = theta * theta; for (long int y = 0; y < h; y++) for (long int x = 0; x < w; x++) { uBar(x,y,0) = DIRECT_NZYX_ELEM(src.data, 0, 0, y, x); DIRECT_NZYX_ELEM(dest.data, 0, 0, y, x) = 0.0; d2Vector gs = smoothGrad(x,y,0); double iso = exp(-0.5*gs.norm2()/tt); double gsl = gs.length(); if (gsl > 0.0) gs /= gsl; d2Matrix I; //d2Matrix G = E - d2Matrix(gs.x*gs.x, gs.y*gs.x, gs.x*gs.y, gs.y*gs.y); // G x = x - (x dot gs) gs d2Matrix F = d2Matrix(gs.x*gs.x, gs.y*gs.x, gs.x*gs.y, gs.y*gs.y); d2Matrix G = sqrt(alpha) * F + sqrt((3.0 - alpha)/2.0)*(I - F); D(x,y,0) = sqrt(nu) * (iso * I + (1.0 - iso) * G); } for (int it = 0; it < iters; it++) { #if JAZ_USE_OPENMP #pragma omp parallel for #endif for (long int y = 0; y < h; y++) for (long int x = 0; x < w; x++) { d2Vector gradUBar; if (w == 1) { gradUBar.x = 0; } else if (x < w - 1) { gradUBar.x = uBar(x+1,y,0) - uBar(x,y,0); } else { gradUBar.x = uBar(x,y,0) - uBar(x-1,y,0); } if (h == 1) { gradUBar.y = 0; } else if (y < h - 1) { gradUBar.y = uBar(x,y+1,0) - uBar(x,y,0); } else { gradUBar.y = uBar(x,y,0) - uBar(x,y-1,0); } d2Vector nextXi = D(x,y,0) * (xi(x,y,0) + sigma * D(x,y,0) * gradUBar); double nxl = nextXi.length(); xi(x,y,0) = nxl > 0.0? (nextXi/nxl) : d2Vector(0,0); } #if JAZ_USE_OPENMP #pragma omp parallel for #endif for (long int y = 0; y < h; y++) for (long int x = 0; x < w; x++) { double divXi = 0.0; if (x > 0) { divXi += xi(x,y,0).x - xi(x-1,y,0).x; } if (y > 0) { divXi += xi(x,y,0).y - xi(x,y-1,0).y; } double u = DIRECT_NZYX_ELEM(dest.data, 0, 0, y, x); double du = tau * (divXi + DIRECT_NZYX_ELEM(src.data, 0, 0, y, x)); double nextU = u + du; if (nextU > 1.0) nextU = 1.0; else if (nextU < 0) nextU = 0; DIRECT_NZYX_ELEM(dest.data, 0, 0, y, x) = nextU; uBar(x,y,0) = 2.0 * nextU - u; } } } void FilterHelper::fwdGrad(const Image& u, Volume& dest) { const long int w = u.data.xdim; const long int h = u.data.ydim; const long int d = u.data.zdim; for (long int z = 0; z < d; z++) for (long int y = 0; y < h; y++) for (long int x = 0; x < w; x++) { if (w == 1) { dest(x,y,z).x = 0; } else if (x < w - 1) { dest(x,y,z).x = DIRECT_NZYX_ELEM(u.data, 0, z, y, x+1) - DIRECT_NZYX_ELEM(u.data, 0, z, y, x); } else { dest(x,y,z).x = DIRECT_NZYX_ELEM(u.data, 0, z, y, x) - DIRECT_NZYX_ELEM(u.data, 0, z, y, x-1); } if (h == 1) { dest(x,y,z).y = 0; } else if (y < h - 1) { dest(x,y,z).y = DIRECT_NZYX_ELEM(u.data, 0, z, y+1, x) - DIRECT_NZYX_ELEM(u.data, 0, z, y, x); } else { dest(x,y,z).y = DIRECT_NZYX_ELEM(u.data, 0, z, y, x) - DIRECT_NZYX_ELEM(u.data, 0, z, y-1, x); } if (d == 1) { dest(x,y,z).z = 0; } else if (z < d - 1) { dest(x,y,z).z = DIRECT_NZYX_ELEM(u.data, 0, z+1, y, x) - DIRECT_NZYX_ELEM(u.data, 0, z, y, x); } else { dest(x,y,z).z = DIRECT_NZYX_ELEM(u.data, 0, z, y, x) - DIRECT_NZYX_ELEM(u.data, 0, z-1, y, x); } } } void FilterHelper::fwdGrad2D(const Image& u, Volume& dest) { const long int w = u.data.xdim; const long int h = u.data.ydim; const long int d = 1; for (long int z = 0; z < d; z++) for (long int y = 0; y < h; y++) for (long int x = 0; x < w; x++) { if (w == 1) { dest(x,y,z).x = 0; } else if (x < w - 1) { dest(x,y,z).x = DIRECT_NZYX_ELEM(u.data, 0, z, y, x+1) - DIRECT_NZYX_ELEM(u.data, 0, z, y, x); } else { dest(x,y,z).x = DIRECT_NZYX_ELEM(u.data, 0, z, y, x) - DIRECT_NZYX_ELEM(u.data, 0, z, y, x-1); } if (h == 1) { dest(x,y,z).y = 0; } else if (y < h - 1) { dest(x,y,z).y = DIRECT_NZYX_ELEM(u.data, 0, z, y+1, x) - DIRECT_NZYX_ELEM(u.data, 0, z, y, x); } else { dest(x,y,z).y = DIRECT_NZYX_ELEM(u.data, 0, z, y, x) - DIRECT_NZYX_ELEM(u.data, 0, z, y-1, x); } } } void FilterHelper::centralGrad2D(const Image &u, Volume &dest) { const long int w = u.data.xdim; const long int h = u.data.ydim; const long int d = 1; for (long int z = 0; z < d; z++) for (long int y = 0; y < h; y++) for (long int x = 0; x < w; x++) { if (w == 1) { dest(x,y,z).x = 0; } else if (x < w-1 && x > 0) { dest(x,y,z).x = (DIRECT_NZYX_ELEM(u.data, 0, z, y, x+1) - DIRECT_NZYX_ELEM(u.data, 0, z, y, x-1))/2.0; } else if (x == 0) { dest(x,y,z).x = DIRECT_NZYX_ELEM(u.data, 0, z, y, x+1) - DIRECT_NZYX_ELEM(u.data, 0, z, y, x); } else if (x == w-1) { dest(x,y,z).x = DIRECT_NZYX_ELEM(u.data, 0, z, y, x) - DIRECT_NZYX_ELEM(u.data, 0, z, y, x-1); } if (h == 1) { dest(x,y,z).y = 0; } else if (y < h-1 && y > 0) { dest(x,y,z).y = (DIRECT_NZYX_ELEM(u.data, 0, z, y+1, x) - DIRECT_NZYX_ELEM(u.data, 0, z, y-1, x))/2; } else if (y == 0) { dest(x,y,z).y = DIRECT_NZYX_ELEM(u.data, 0, z, y+1, x) - DIRECT_NZYX_ELEM(u.data, 0, z, y, x); } else if (y == h-1) { dest(x,y,z).y = DIRECT_NZYX_ELEM(u.data, 0, z, y, x) - DIRECT_NZYX_ELEM(u.data, 0, z, y-1, x); } } } void FilterHelper::centralGrad2D(const Image &u, Volume &destRe, Volume &destIm) { const long int w = u.data.xdim; const long int h = u.data.ydim; const long int d = 1; for (long int z = 0; z < d; z++) for (long int y = 0; y < h; y++) for (long int x = 0; x < w; x++) { if (w == 1) { destRe(x,y,z).x = 0; destIm(x,y,z).x = 0; } else if (x < w-1 && x > 0) { destRe(x,y,z).x = (DIRECT_NZYX_ELEM(u.data, 0, z, y, x+1).real - DIRECT_NZYX_ELEM(u.data, 0, z, y, x-1).real)/2.0; destIm(x,y,z).x = (DIRECT_NZYX_ELEM(u.data, 0, z, y, x+1).imag - DIRECT_NZYX_ELEM(u.data, 0, z, y, x-1).imag)/2.0; } else if (x == 0) { destRe(x,y,z).x = DIRECT_NZYX_ELEM(u.data, 0, z, y, x+1).real - DIRECT_NZYX_ELEM(u.data, 0, z, y, x).real; destIm(x,y,z).x = DIRECT_NZYX_ELEM(u.data, 0, z, y, x+1).imag - DIRECT_NZYX_ELEM(u.data, 0, z, y, x).imag; } else if (x == w-1) { destRe(x,y,z).x = DIRECT_NZYX_ELEM(u.data, 0, z, y, x).real - DIRECT_NZYX_ELEM(u.data, 0, z, y, x-1).real; destIm(x,y,z).x = DIRECT_NZYX_ELEM(u.data, 0, z, y, x).imag - DIRECT_NZYX_ELEM(u.data, 0, z, y, x-1).imag; } if (h == 1) { destRe(x,y,z).y = 0; destIm(x,y,z).y = 0; } else if (y < h-1 && y > 0) { destRe(x,y,z).y = (DIRECT_NZYX_ELEM(u.data, 0, z, y+1, x).real - DIRECT_NZYX_ELEM(u.data, 0, z, y-1, x).real)/2; destIm(x,y,z).y = (DIRECT_NZYX_ELEM(u.data, 0, z, y+1, x).imag - DIRECT_NZYX_ELEM(u.data, 0, z, y-1, x).imag)/2; } else if (y == 0) { destRe(x,y,z).y = DIRECT_NZYX_ELEM(u.data, 0, z, y+1, x).real - DIRECT_NZYX_ELEM(u.data, 0, z, y, x).real; destIm(x,y,z).y = DIRECT_NZYX_ELEM(u.data, 0, z, y+1, x).imag - DIRECT_NZYX_ELEM(u.data, 0, z, y, x).imag; } else if (y == h-1) { destRe(x,y,z).y = DIRECT_NZYX_ELEM(u.data, 0, z, y, x).real - DIRECT_NZYX_ELEM(u.data, 0, z, y-1, x).real; destIm(x,y,z).y = DIRECT_NZYX_ELEM(u.data, 0, z, y, x).imag - DIRECT_NZYX_ELEM(u.data, 0, z, y-1, x).imag; } } } void FilterHelper::blendSoft(const Image& src0, const Image& src1, const Volume& mask, Image& dest, RFLOAT bias1) { const long int w = src0.data.xdim; const long int h = src0.data.ydim; const long int d = src0.data.zdim; for (long int z = 0; z < d; z++) for (long int y = 0; y < h; y++) for (long int x = 0; x < w; x++) { const Complex v0 = DIRECT_NZYX_ELEM(src0.data, 0, z, y, x); const Complex v1 = DIRECT_NZYX_ELEM(src1.data, 0, z, y, x); const RFLOAT m = mask(x,y,z); DIRECT_NZYX_ELEM(dest.data, 0, z, y, x) = (v0 + bias1*m*v1)/(1.0 + bias1*m); } } double FilterHelper::totalVariation(const Image& src) { double sum = 0.0; FOR_ALL_DIRECT_ELEMENTS_IN_ARRAY2D(src.data) { if (i == src.data.ydim - 1 || j == src.data.xdim - 1) continue; double v0 = DIRECT_A2D_ELEM(src.data, i, j); double vx = DIRECT_A2D_ELEM(src.data, i, j+1); double vy = DIRECT_A2D_ELEM(src.data, i+1, j); double dx = vx - v0; double dy = vy - v0; double dtv = sqrt(dx*dx + dy*dy); sum += dtv; } return sum; } double FilterHelper::totalLogVariation(const Image& src, double delta) { double sum = 0.0; FOR_ALL_DIRECT_ELEMENTS_IN_ARRAY2D(src.data) { if (i == src.data.ydim - 1 || j == src.data.xdim - 1) continue; double v0 = DIRECT_A2D_ELEM(src.data, i, j); double vx = DIRECT_A2D_ELEM(src.data, i, j+1); double vy = DIRECT_A2D_ELEM(src.data, i+1, j); double dx = vx - v0; double dy = vy - v0; double dtv = log(delta + sqrt(dx*dx + dy*dy)); sum += dtv; } return sum; } void FilterHelper::separableGaussianXYZ(const Image& src, Image& dest, RFLOAT sigma, int k) { if (k < 0) { k = (int)(2*sigma + 0.5); } dest.data.resize(src.data); std::vector kernel(2*k+1); const RFLOAT s2 = sigma*sigma; for (int i = -k; i <= k; i++) { kernel[i+k] = exp(-0.5*i*i/s2); } Image temp(src.data.xdim, src.data.ydim, src.data.zdim); for (size_t z = 0; z < src.data.zdim; z++) for (size_t y = 0; y < src.data.ydim; y++) for (size_t x = 0; x < src.data.xdim; x++) { RFLOAT v = 0; RFLOAT m = 0; for (long int i = -k; i <= k; i++) { const long int xx = x + i; if (xx < 0 || xx >= src.data.xdim) continue; v += kernel[i+k] * DIRECT_A3D_ELEM(src.data, z, y, xx); m += kernel[i+k]; } DIRECT_A3D_ELEM(dest.data, z, y, x) = v/m; } for (size_t z = 0; z < src.data.zdim; z++) for (size_t y = 0; y < src.data.ydim; y++) for (size_t x = 0; x < src.data.xdim; x++) { RFLOAT v = 0; RFLOAT m = 0; for (long int i = -k; i <= k; i++) { const long int yy = y + i; if (yy < 0 || yy >= src.data.ydim) continue; v += kernel[i+k] * DIRECT_A3D_ELEM(dest.data, z, yy, x); m += kernel[i+k]; } DIRECT_A3D_ELEM(temp.data, z, y, x) = v/m; } for (size_t z = 0; z < src.data.zdim; z++) for (size_t y = 0; y < src.data.ydim; y++) for (size_t x = 0; x < src.data.xdim; x++) { RFLOAT v = 0; RFLOAT m = 0; for (long int i = -k; i <= k; i++) { const long int zz = z + i; if (zz < 0 || zz >= src.data.zdim) continue; v += kernel[i+k] * DIRECT_A3D_ELEM(temp.data, zz, y, x); m += kernel[i+k]; } DIRECT_A3D_ELEM(dest.data, z, y, x) = v/m; } } void FilterHelper::separableGaussianXY(const Image& src, Image& dest, RFLOAT sigma, int k, bool wrap) { if (!dest.data.sameShape(src.data)) { dest.data.resize(src.data); } if (sigma <= 0.0) { for (size_t z = 0; z < src.data.zdim; z++) for (size_t y = 0; y < src.data.ydim; y++) for (size_t x = 0; x < src.data.xdim; x++) { DIRECT_A3D_ELEM(dest.data, z, y, x) = DIRECT_A3D_ELEM(src.data, z, y, x); } return; } if (k < 0) { k = (int)(2*sigma + 0.5); } std::vector kernel(2*k+1); const RFLOAT s2 = sigma*sigma; for (int i = -k; i <= k; i++) { kernel[i+k] = exp(-0.5*i*i/s2); } Image temp(src.data.xdim, src.data.ydim, src.data.zdim); for (size_t z = 0; z < src.data.zdim; z++) for (size_t y = 0; y < src.data.ydim; y++) for (size_t x = 0; x < src.data.xdim; x++) { RFLOAT v = 0; RFLOAT m = 0; for (long int i = -k; i <= k; i++) { long int xx = x + i; if (wrap) xx = (xx + src.data.xdim) % src.data.xdim; else if (xx < 0 || xx >= src.data.xdim) continue; v += kernel[i+k] * DIRECT_A3D_ELEM(src.data, z, y, xx); m += kernel[i+k]; } DIRECT_A3D_ELEM(temp.data, z, y, x) = v/m; } for (size_t z = 0; z < src.data.zdim; z++) for (size_t y = 0; y < src.data.ydim; y++) for (size_t x = 0; x < src.data.xdim; x++) { RFLOAT v = 0; RFLOAT m = 0; for (long int i = -k; i <= k; i++) { long int yy = y + i; if (wrap) yy = (yy + src.data.ydim) % src.data.ydim; else if (yy < 0 || yy >= temp.data.ydim) continue; v += kernel[i+k] * DIRECT_A3D_ELEM(temp.data, z, yy, x); m += kernel[i+k]; } DIRECT_A3D_ELEM(dest.data, z, y, x) = v/m; } } void FilterHelper::separableGaussianX_wrap(const Image& src, const Image& mask, Image& dest, RFLOAT sigma, int k) { if (k < 0) { k = (int)(2*sigma + 0.5); } dest.data.resize(src.data); std::vector kernel(2*k+1); const RFLOAT s2 = sigma*sigma; for (int i = -k; i <= k; i++) { kernel[i+k] = exp(-0.5*i*i/s2); } for (size_t z = 0; z < src.data.zdim; z++) for (size_t y = 0; y < src.data.ydim; y++) for (size_t x = 0; x < src.data.xdim; x++) { RFLOAT v = 0; RFLOAT m = 0; for (long int i = -k; i <= k; i++) { long int xx = (x + i + src.data.xdim) % src.data.xdim; if (xx < 0 || xx >= src.data.xdim) continue; v += kernel[i+k] * DIRECT_A3D_ELEM(mask.data, z, y, xx) * DIRECT_A3D_ELEM(src.data, z, y, xx); m += kernel[i+k] * DIRECT_A3D_ELEM(mask.data, z, y, xx); } if (m > 0.0) { DIRECT_A3D_ELEM(dest.data, z, y, x) = v/m; } } } void FilterHelper::separableGaussianX_wrap(const Image& src, Image& dest, RFLOAT sigma, int k) { if (k < 0) { k = (int)(2*sigma + 0.5); } dest.data.resize(src.data); std::vector kernel(2*k+1); const RFLOAT s2 = sigma*sigma; for (int i = -k; i <= k; i++) { kernel[i+k] = exp(-0.5*i*i/s2); } for (size_t z = 0; z < src.data.zdim; z++) for (size_t y = 0; y < src.data.ydim; y++) for (size_t x = 0; x < src.data.xdim; x++) { RFLOAT v = 0; RFLOAT m = 0; for (long int i = -k; i <= k; i++) { long int xx = (x + i + src.data.xdim) % src.data.xdim; if (xx < 0 || xx >= src.data.xdim) continue; v += kernel[i+k] * DIRECT_A3D_ELEM(src.data, z, y, xx); m += kernel[i+k]; } if (m > 0.0) { DIRECT_A3D_ELEM(dest.data, z, y, x) = v/m; } } } void FilterHelper::averageX(const Image& src, const Image& mask, Image& dest) { dest.data.resize(src.data); for (size_t z = 0; z < src.data.zdim; z++) for (size_t y = 0; y < src.data.ydim; y++) { RFLOAT v = 0; RFLOAT m = 0; for (size_t x = 0; x < src.data.xdim; x++) { v += DIRECT_A3D_ELEM(mask.data, z, y, x) * DIRECT_A3D_ELEM(src.data, z, y, x); m += DIRECT_A3D_ELEM(mask.data, z, y, x); } if (m > 0.0) { v /= m; } for (size_t x = 0; x < src.data.xdim; x++) { DIRECT_A3D_ELEM(dest.data, z, y, x) = v; } } } relion-3.1.3/src/jaz/img_proc/filter_helper.h000066400000000000000000000446241411340063500211640ustar00rootroot00000000000000/*************************************************************************** * * Author: "Jasenko Zivanov" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #ifndef FILTER_HELPER_H #define FILTER_HELPER_H #include #include #include #include #include #include #include #include #include class FilterHelper { public: template static void binomial3x3_2D(const Image& src, Image& dest, bool wrap = false); template static void separableGaussian(const Volume& src, Volume& dest, double sigma, int k = -1); template static void separableGaussian(const MultidimArray& src, MultidimArray& dest, double sigma, int k = -1); template static void separableGaussianWrap(const MultidimArray& src, MultidimArray& dest, double sigma, int k = -1); static void separableGaussianFreq( const MultidimArray& src, MultidimArray& dest, double sigma, int k = -1); static void separableGaussianFreqXY( const MultidimArray& src, MultidimArray& dest, double sigma, int k = -1); static void drawTestPattern(Image& img, int squareSize); static void drawTestPattern(Volume& volume, int squareSize); static Image expImg(Image& img, double scale = 1.0); static Image logImg(Image& img, double thresh = 1e-20, double scale = 1.0); static Image padCorner2D(Image& img, double factor); static Image padCorner2D(Image& img, double factor); static Image padCorner2D(const Image &img, int w, int h); static Image cropCorner2D(const Image &img, int w, int h); static Image cropCorner2D(const Image &img, int w, int h); static Image zeroOutsideCorner2D(Image& img, double radius); static void GaussianEnvelopeCorner2D(Image& img, double sigma); static Image raisedCosEnvCorner2D(Image& img, double radIn, double radOut); static Image raisedCosEnvCorner2DFull(Image& img, double radIn, double radOut); static Image raisedCosEnvCorner3D(Image& img, double radIn, double radOut); static Image raisedCosEnvFreq2D(const Image& img, double radIn, double radOut); static Image raisedCosEnvRingFreq2D(const Image& img, double rad0, double rad1, double stepWidth); static void lowPassFilter(Image& img, double maxFreq0, double maxFreq1, Image& dest); static void lowPassFilterSpectrum(MultidimArray& spectrum, double maxFreq0, double maxFreq1); static RFLOAT averageValue(Image& img); static RFLOAT maxValue(Image& img); static void phaseFlip(Image& img, CTF& ctf, RFLOAT angpix, Image& dest); static void applyBeamTilt(Image& img, RFLOAT beamtilt_x, RFLOAT beamtilt_y, RFLOAT lambda, RFLOAT Cs, RFLOAT angpix, int s, Image& dest); static void modulate(Image& img, CTF& ctf, RFLOAT angpix, Image& dest); static void modulate(Image& imgFreq, CTF& ctf, RFLOAT angpix, Image& dest); static void modulate(MultidimArray& imgFreq, CTF& ctf, RFLOAT angpix); static void drawCtf(CTF& ctf, RFLOAT angpix, Image& dest); static void wienerFilter(Image& img, CTF& ctf, RFLOAT angpix, RFLOAT eps, RFLOAT Bfac, Image& dest); static void richardsonLucy(Image& img, CTF& ctf, RFLOAT angpix, RFLOAT eps, int iterations, Image& dest); static void rampFilter(Image& img, RFLOAT s0, RFLOAT t1, double ux, double uy, Image& dest); static void rampFilter3D(Image& img, RFLOAT s0, RFLOAT t1, double tx, double ty, double tz); static void doubleRampFilter3D(Image& img, RFLOAT s0, RFLOAT t1, double tx, double ty, double tz); static void getPhase(const Image& img, Image& dest); static void getAbs(const Image& img, Image& dest); static void getReal(const Image& img, Image& dest); static void getImag(const Image& img, Image& dest); static void powerSpectrum2D(Image& img, Volume& spectrum); static void equiphaseAverage2D(const Volume& src, Volume& dest); static void threshold(Image& src, RFLOAT t, Image& dest); static void fill(Image& dest, RFLOAT v); static void linearTransform(Image& src, RFLOAT m, RFLOAT q, Image& dest); static void linearCombination(Image& src0, Image& src1, RFLOAT a0, RFLOAT a1, Image& dest); static void linearCombination(const Volume& src0, const Volume& src1, RFLOAT a0, RFLOAT a1, Volume& dest); static void sumUp(const std::vector >& src, Image& dest); static double L1distance(const Image& i0, const Image& i1, int x0 = 0, int y0 = 0, int w = -1, int h = -1); static double L2distance(const Image& i0, const Image& i1, int x0 = 0, int y0 = 0, int w = -1, int h = -1); static double NCC(const Image& i0, const Image& i1, int x0 = 0, int y0 = 0, int w = -1, int h = -1); static void multiply(Image& i0, Image& i1, Image& dest); static void multiply(Image& i0, Image& i1, Image& dest); static void wienerDivide(Image& num, Image& denom, RFLOAT eps, Image& dest); static void divide(Image& num, Volume& denom, RFLOAT eps, Image& dest); static void divide(Image& num, Image& denom, RFLOAT eps, Image& dest); static void divideExcessive(Image& num, Volume& denom, RFLOAT theta, Image& dest); static void wienerDeconvolve(Image& num, Image& denom, RFLOAT theta, Image& dest); static void extract2D(const Image& src, Image& dest, long int x0, long int y0, long int w, long int h); static void extract(const Volume& src, Volume& dest, long int x0, long int y0, long int z0, long int w, long int h, long int d); static void signedDist(const Image& src, Image& dest); static void erode3x3(Image& src, Image& dest); static void localMinima(Image& src, Image& dest, RFLOAT thresh); static std::vector localMinima(Image& src, RFLOAT thresh); static void uniqueInfluenceMask(std::vector pts, Image& dest, Image& indexDest, RFLOAT thresh); static void polarRemap(gravis::d2Vector pos, const Image& src, Image& dest, const Image& mask, Image& maskDest, int phiRes, int rRes, double rMax); static void polarRemap(gravis::d2Vector pos, const Image& distTransf, const Image& src, Image& dest, const Image& mask, Image& maskDest, int phiRes, int rRes, double rMax); static Image cartToPolar(const Image& img); static Image polarToCart(const Image& img); static Image polarBlur(const Image& img, double sigma); static Image sectorBlend(const Image& img0, const Image& img1, int sectors); static void diffuseAlongIsocontours2D(const Image& src, const Image& guide, Image& dest, int iters, RFLOAT sigma, RFLOAT lambda, RFLOAT delta); static void EED_2D(const Image& src, Image& dest, int iters, double sigma, double delta, double tau); static void descendTV(const Image& src, Image& dest, double delta); static void descendTV2(const Image& src, Image& dest, Volume& xi, Volume& uBar, int iters, double sigma, double tau); static void segmentTV(const Image& src, Image& dest, Volume& xi, Volume& uBar, int iters, double sigma, double tau, double nu); static void segmentTVAniso2D(const Image& src, Image& dest, Volume& xi, Volume& uBar, int iters, double sigma, double tau, double nu, double rho, double theta, double alpha); static void fwdGrad(const Image& u, Volume& dest); static void fwdGrad2D(const Image& u, Volume& dest); static void centralGrad2D(const Image& u, Volume& dest); static void centralGrad2D(const Image& u, Volume& destRe, Volume& destIm); static void blendSoft(const Image& src0, const Image& src1, const Volume& mask, Image& dest, RFLOAT bias1 = 1.0); static double totalVariation(const Image& src); static double totalLogVariation(const Image& src, double delta = 1.0); static void separableGaussianXYZ(const Image& src, Image& dest, RFLOAT sigma, int k = -1); static void separableGaussianXY(const Image& src, Image& dest, RFLOAT sigma, int k = -1, bool wrap = false); static void separableGaussianX_wrap(const Image& src, const Image& mask, Image& dest, RFLOAT sigma, int k = -1); static void separableGaussianX_wrap(const Image& src, Image& dest, RFLOAT sigma, int k = -1); static void averageX(const Image& src, const Image& mask, Image& dest); static void centralGradient(const Volume& src, Volume >& dest); static gravis::t3Vector centralGradient(const Volume& src, size_t x, size_t y, size_t z); static MultidimArray FriedelExpand(const MultidimArray& half); static Image normaliseToUnitInterval(const Image& img); static Image normaliseToUnitIntervalSigned(const Image& img); }; template void FilterHelper::binomial3x3_2D(const Image& src, Image& dest, bool wrap) { const size_t w = src.data.xdim; const size_t h = src.data.ydim; const size_t d = src.data.zdim; dest.data.reshape(d,h,w); Image temp(w,h); std::vector kernel = {0.25, 0.5, 0.25}; for (size_t z = 0; z < d; z++) for (size_t y = 0; y < h; y++) for (size_t x = 0; x < w; x++) { T v = 0; double m = 0; for (int i = -1; i <= 1; i++) { int xx = x + i; if (wrap) xx = (xx + w) % w; else if (xx < 0 || xx >= w) continue; v += kernel[i+1] * DIRECT_NZYX_ELEM(src(), 0, z, y, xx); m += kernel[i+1]; } DIRECT_NZYX_ELEM(temp(), 0, z, y, x) = v/m; } for (size_t z = 0; z < d; z++) for (size_t y = 0; y < h; y++) for (size_t x = 0; x < w; x++) { T v = 0; double m = 0; for (int i = -1; i <= 1; i++) { int yy = y + i; if (wrap) yy = (yy + h) % h; else if (yy < 0 || yy >= h) continue; v += kernel[i+1] * DIRECT_NZYX_ELEM(temp(), 0, z, yy, x); m += kernel[i+1]; } DIRECT_NZYX_ELEM(dest(), 0, z, y, x) = v/m; } } template void FilterHelper::separableGaussian(const Volume& src, Volume& dest, double sigma, int k) { if (k < 0) { k = (int)(2*sigma + 0.5); } dest.resize(src); std::vector kernel(2*k+1); const double s2 = sigma*sigma; for (int i = -k; i <= k; i++) { kernel[i+k] = exp(-i*i/s2); } Volume temp(src.dimx, src.dimy, src.dimz); for (size_t z = 0; z < src.dimz; z++) for (size_t y = 0; y < src.dimy; y++) for (size_t x = 0; x < src.dimx; x++) { T v = 0; double m = 0; for (long int i = -k; i <= k; i++) { const long int xx = x + i; if (xx < 0 || xx >= src.dimx) continue; v += kernel[i+k] * src(xx,y,z); m += kernel[i+k]; } dest(x,y,z) = v/m; } for (size_t z = 0; z < src.dimz; z++) for (size_t y = 0; y < src.dimy; y++) for (size_t x = 0; x < src.dimx; x++) { T v = 0; double m = 0; for (long int i = -k; i <= k; i++) { const long int yy = y + i; if (yy < 0 || yy >= src.dimy) continue; v += kernel[i+k] * dest(x,yy,z); m += kernel[i+k]; } temp(x,y,z) = v/m; } for (size_t z = 0; z < src.dimz; z++) for (size_t y = 0; y < src.dimy; y++) for (size_t x = 0; x < src.dimx; x++) { T v = 0; double m = 0; for (long int i = -k; i <= k; i++) { const long int zz = z + i; if (zz < 0 || zz >= src.dimz) continue; v += kernel[i+k] * temp(x,y,zz); m += kernel[i+k]; } dest(x,y,z) = v/m; } } template void FilterHelper::separableGaussian(const MultidimArray& src, MultidimArray& dest, double sigma, int k) { if (k < 0) { k = (int)(2*sigma + 0.5); } dest.reshape(src); std::vector kernel(2*k+1); const double s2 = sigma*sigma; for (int i = -k; i <= k; i++) { kernel[i+k] = exp(-0.5*i*i/s2); } MultidimArray temp(src.zdim, src.ydim, src.xdim); for (size_t z = 0; z < src.zdim; z++) for (size_t y = 0; y < src.ydim; y++) for (size_t x = 0; x < src.xdim; x++) { T v = 0; double m = 0; for (long int i = -k; i <= k; i++) { const long int xx = x + i; if (xx < 0 || xx >= src.xdim) continue; v += kernel[i+k] * DIRECT_NZYX_ELEM(src, 0, z, y, xx); m += kernel[i+k]; } DIRECT_NZYX_ELEM(dest, 0, z, y, x) = v/m; } for (size_t z = 0; z < src.zdim; z++) for (size_t y = 0; y < src.ydim; y++) for (size_t x = 0; x < src.xdim; x++) { T v = 0; double m = 0; for (long int i = -k; i <= k; i++) { const long int yy = y + i; if (yy < 0 || yy >= src.ydim) continue; v += kernel[i+k] * DIRECT_NZYX_ELEM(dest, 0, z, yy, x); m += kernel[i+k]; } DIRECT_NZYX_ELEM(temp, 0, z, y, x) = v/m; } for (size_t z = 0; z < src.zdim; z++) for (size_t y = 0; y < src.ydim; y++) for (size_t x = 0; x < src.xdim; x++) { T v = 0; double m = 0; for (long int i = -k; i <= k; i++) { const long int zz = z + i; if (zz < 0 || zz >= src.zdim) continue; v += kernel[i+k] * DIRECT_NZYX_ELEM(temp, 0, zz, y, x); m += kernel[i+k]; } DIRECT_NZYX_ELEM(dest, 0, z, y, x) = v/m; } } template void FilterHelper::separableGaussianWrap(const MultidimArray& src, MultidimArray& dest, double sigma, int k) { if (k < 0) { k = (int)(2*sigma + 0.5); } dest.reshape(src); std::vector kernel(2*k+1); const double s2 = sigma*sigma; for (int i = -k; i <= k; i++) { kernel[i+k] = exp(-0.5*i*i/s2); } MultidimArray temp(src.zdim, src.ydim, src.xdim); for (size_t z = 0; z < src.zdim; z++) for (size_t y = 0; y < src.ydim; y++) for (size_t x = 0; x < src.xdim; x++) { T v = 0; double m = 0; for (long int i = -k; i <= k; i++) { const long int xx = (src.xdim + x + i) % src.xdim; v += kernel[i+k] * DIRECT_NZYX_ELEM(src, 0, z, y, xx); m += kernel[i+k]; } DIRECT_NZYX_ELEM(dest, 0, z, y, x) = v/m; } for (size_t z = 0; z < src.zdim; z++) for (size_t y = 0; y < src.ydim; y++) for (size_t x = 0; x < src.xdim; x++) { T v = 0; double m = 0; for (long int i = -k; i <= k; i++) { const long int yy = (src.ydim + y + i) % src.ydim; v += kernel[i+k] * DIRECT_NZYX_ELEM(dest, 0, z, yy, x); m += kernel[i+k]; } DIRECT_NZYX_ELEM(temp, 0, z, y, x) = v/m; } for (size_t z = 0; z < src.zdim; z++) for (size_t y = 0; y < src.ydim; y++) for (size_t x = 0; x < src.xdim; x++) { T v = 0; double m = 0; for (long int i = -k; i <= k; i++) { const long int zz = (src.zdim + z + i) % src.zdim; v += kernel[i+k] * DIRECT_NZYX_ELEM(temp, 0, zz, y, x); m += kernel[i+k]; } DIRECT_NZYX_ELEM(dest, 0, z, y, x) = v/m; } } #endif relion-3.1.3/src/jaz/img_proc/image_op.cpp000066400000000000000000000017071411340063500204460ustar00rootroot00000000000000/*************************************************************************** * * Author: "Jasenko Zivanov" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ // This file intentionally left blank relion-3.1.3/src/jaz/img_proc/image_op.h000066400000000000000000000203561411340063500201140ustar00rootroot00000000000000/*************************************************************************** * * Author: "Jasenko Zivanov" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #ifndef IMAGE_OPS_H #define IMAGE_OPS_H #include class ImageOp { public: template static void linearCombination(const Image& src0, const Image& src1, T2 a0, T2 a1, Image& dest); template static void multiply(const Image& i0, const Image& i1, Image& dest); template static void multiplyBy(Image& dest, const Image& i1); template static void linearCombination(const Image& src0, T1 src1, T2 a0, T2 a1, Image& dest); template static void linearCombination(const MultidimArray& src0, const MultidimArray& src1, T2 a0, T2 a1, MultidimArray& dest); template static void multiply(const MultidimArray& i0, const MultidimArray& i1, MultidimArray& dest); template static void linearCombination(const MultidimArray& src0, T1 src1, T2 a0, T2 a1, MultidimArray& dest); template static void flipX(const MultidimArray& src0, MultidimArray& dest); template static void flipY(const MultidimArray& src0, MultidimArray& dest); template static void rotate90(const MultidimArray& src0, MultidimArray& dest); template static void rotate180(const MultidimArray& src0, MultidimArray& dest); template static void rotate270(const MultidimArray& src0, MultidimArray& dest); }; template void ImageOp :: linearCombination(const Image& src0, const Image& src1, T2 a0, T2 a1, Image& dest) { for (long int n = 0; n < src0.data.ndim; n++) for (long int z = 0; z < src0.data.zdim; z++) for (long int y = 0; y < src0.data.ydim; y++) for (long int x = 0; x < src0.data.xdim; x++) { DIRECT_NZYX_ELEM(dest.data, n, z, y, x) = a0 * DIRECT_NZYX_ELEM(src0.data, n, z, y, x) + a1 * DIRECT_NZYX_ELEM(src1.data, n, z, y, x); } } template void ImageOp :: multiply(const Image& i0, const Image& i1, Image& dest) { dest = Image(i0.data.xdim, i0.data.ydim, i0.data.zdim, i0.data.ndim); for (long int n = 0; n < i0.data.ndim; n++) for (long int z = 0; z < i0.data.zdim; z++) for (long int y = 0; y < i0.data.ydim; y++) for (long int x = 0; x < i0.data.xdim; x++) { DIRECT_NZYX_ELEM(dest.data, n, z, y, x) = DIRECT_NZYX_ELEM(i0.data, n, z, y, x) * DIRECT_NZYX_ELEM(i1.data, n, z, y, x); } } template void ImageOp :: multiplyBy(Image& dest, const Image& i1) { for (long int n = 0; n < dest.data.ndim; n++) for (long int z = 0; z < dest.data.zdim; z++) for (long int y = 0; y < dest.data.ydim; y++) for (long int x = 0; x < dest.data.xdim; x++) { DIRECT_NZYX_ELEM(dest.data, n, z, y, x) *= DIRECT_NZYX_ELEM(i1.data, n, z, y, x); } } template void ImageOp :: linearCombination(const Image& src0, T1 src1, T2 a0, T2 a1, Image& dest) { for (long int n = 0; n < src0.data.ndim; n++) for (long int z = 0; z < src0.data.zdim; z++) for (long int y = 0; y < src0.data.ydim; y++) for (long int x = 0; x < src0.data.xdim; x++) { DIRECT_NZYX_ELEM(dest.data, n, z, y, x) = a0 * DIRECT_NZYX_ELEM(src0.data, n, z, y, x) + a1 * src1; } } template void ImageOp :: linearCombination(const MultidimArray& src0, const MultidimArray& src1, T2 a0, T2 a1, MultidimArray& dest) { for (long int n = 0; n < src0.ndim; n++) for (long int z = 0; z < src0.zdim; z++) for (long int y = 0; y < src0.ydim; y++) for (long int x = 0; x < src0.xdim; x++) { DIRECT_NZYX_ELEM(dest, n, z, y, x) = a0 * DIRECT_NZYX_ELEM(src0, n, z, y, x) + a1 * DIRECT_NZYX_ELEM(src1, n, z, y, x); } } template void ImageOp :: multiply(const MultidimArray& i0, const MultidimArray& i1, MultidimArray& dest) { dest = MultidimArray(i0.xdim, i0.ydim, i0.zdim, i0.ndim); for (long int n = 0; n < i0.ndim; n++) for (long int z = 0; z < i0.zdim; z++) for (long int y = 0; y < i0.ydim; y++) for (long int x = 0; x < i0.xdim; x++) { DIRECT_NZYX_ELEM(dest, n, z, y, x) = DIRECT_NZYX_ELEM(i0, n, z, y, x) * DIRECT_NZYX_ELEM(i1, n, z, y, x); } } template void ImageOp :: linearCombination(const MultidimArray& src0, T1 src1, T2 a0, T2 a1, MultidimArray& dest) { for (long int n = 0; n < src0.ndim; n++) for (long int z = 0; z < src0.zdim; z++) for (long int y = 0; y < src0.ydim; y++) for (long int x = 0; x < src0.xdim; x++) { DIRECT_NZYX_ELEM(dest, n, z, y, x) = a0 * DIRECT_NZYX_ELEM(src0, n, z, y, x) + a1 * src1; } } // flip 'left-to-right' // MotionCor2's FlipGain 2 template void ImageOp :: flipX(const MultidimArray& src0, MultidimArray& dest) { dest.reshape(src0); for (long int n = 0; n < src0.ndim; n++) for (long int z = 0; z < src0.zdim; z++) for (long int y = 0; y < src0.ydim; y++) for (long int x = 0; x < src0.xdim; x++) { DIRECT_NZYX_ELEM(dest, n, z, y, x) = DIRECT_NZYX_ELEM(src0, n, z, y, src0.xdim - 1 - x); } } // flip 'upside down' // MotionCor2's FlipGain 1 template void ImageOp :: flipY(const MultidimArray& src0, MultidimArray& dest) { dest.reshape(src0); for (long int n = 0; n < src0.ndim; n++) for (long int z = 0; z < src0.zdim; z++) for (long int y = 0; y < src0.ydim; y++) for (long int x = 0; x < src0.xdim; x++) { DIRECT_NZYX_ELEM(dest, n, z, y, x) = DIRECT_NZYX_ELEM(src0, n, z, src0.ydim - 1 - y, x); } } // This is equivalent to MotionCor2's -RotGain 1. // In relion_display, this looks clock-wise. template void ImageOp :: rotate90(const MultidimArray& src0, MultidimArray& dest) { dest.reshape(src0.ndim, src0.zdim, src0.xdim, src0.ydim); for (long int n = 0; n < src0.ndim; n++) for (long int z = 0; z < src0.zdim; z++) for (long int y = 0; y < src0.ydim; y++) for (long int x = 0; x < src0.xdim; x++) { DIRECT_NZYX_ELEM(dest, n, z, x, src0.ydim - 1 - y) = DIRECT_NZYX_ELEM(src0, n, z, y, x); } } // MotionCor2's RotGain 2 template void ImageOp :: rotate180(const MultidimArray& src0, MultidimArray& dest) { dest.reshape(src0); for (long int n = 0; n < src0.ndim; n++) for (long int z = 0; z < src0.zdim; z++) for (long int y = 0; y < src0.ydim; y++) for (long int x = 0; x < src0.xdim; x++) { DIRECT_NZYX_ELEM(dest, n, z, src0.ydim - 1 - y, src0.xdim - 1 - x) = DIRECT_NZYX_ELEM(src0, n, z, y, x); } } // MotionCor2's RotGain 3 template void ImageOp :: rotate270(const MultidimArray& src0, MultidimArray& dest) { dest.reshape(src0.ndim, src0.zdim, src0.xdim, src0.ydim); for (long int n = 0; n < src0.ndim; n++) for (long int z = 0; z < src0.zdim; z++) for (long int y = 0; y < src0.ydim; y++) for (long int x = 0; x < src0.xdim; x++) { DIRECT_NZYX_ELEM(dest, n, z, src0.xdim - 1 - x, y) = DIRECT_NZYX_ELEM(src0, n, z, y, x); } } #endif relion-3.1.3/src/jaz/img_proc/radial_avg.h000066400000000000000000000050601411340063500204200ustar00rootroot00000000000000#ifndef RADIAL_AVG_H #define RADIAL_AVG_H #include #include class RadialAvg { public: template static std::vector> radialAverageAndStdDevFFTW_2D(const Image& map) { const int w = map.data.xdim; const int h = map.data.ydim; const int b = w; std::vector avg(b, 0.0); std::vector wgh(b, 0.0); std::vector var(b, 0.0); for (int yy = 0; yy < h; yy++) for (int xx = 0; xx < w; xx++) { double x = xx; double y = yy < h/2.0? yy : yy - h; double rd = sqrt(x*x + y*y); int r = (int)(rd+0.5); if (r < b) { avg[r] += DIRECT_A2D_ELEM(map.data, yy, xx); wgh[r] += 1.0; } } for (int i = 0; i < b; i++) { if (wgh[i] > 0.0) { avg[i] /= wgh[i]; } } for (int yy = 0; yy < h; yy++) for (int xx = 0; xx < w; xx++) { double x = xx; double y = yy < h/2.0? yy : yy - h; double rd = sqrt(x*x + y*y); int r = (int)(rd+0.5); T mu = avg[r]; T v = DIRECT_A2D_ELEM(map.data, yy, xx) - mu; if (r < b) { var[r] += v*v; } } for (int i = 0; i < b; i++) { if (wgh[i] > 1.0) { var[i] /= (wgh[i]-1); } } std::vector> out(b); for (int i = 0; i < b; i++) { out[i] = std::make_pair(avg[i], sqrt(var[i])); } return out; } template static std::vector radialAverageFFTW_2D(const Image& map) { const int w = map.data.xdim; const int h = map.data.ydim; const int b = w; std::vector avg(b, 0.0); std::vector wgh(b, 0.0); for (int yy = 0; yy < h; yy++) for (int xx = 0; xx < w; xx++) { double x = xx; double y = yy < h/2.0? yy : yy - h; double rd = sqrt(x*x + y*y); int r = (int)(rd+0.5); if (r < b) { avg[r] += DIRECT_A2D_ELEM(map.data, yy, xx); wgh[r] += 1.0; } } for (int i = 0; i < b; i++) { if (wgh[i] > 0.0) { avg[i] /= wgh[i]; } } return avg; } template static std::vector radialSumFFTW_2D(const Image& map) { const int w = map.data.xdim; const int h = map.data.ydim; const int b = w; std::vector sum(b, 0.0); for (int yy = 0; yy < h; yy++) for (int xx = 0; xx < w; xx++) { double x = xx; double y = yy < h/2.0? yy : yy - h; double rd = sqrt(x*x + y*y); int r = (int)(rd+0.5); if (r < b) { sum[r] += DIRECT_A2D_ELEM(map.data, yy, xx); } } return sum; } }; #endif relion-3.1.3/src/jaz/index_sort.h000066400000000000000000000033241411340063500167070ustar00rootroot00000000000000/*************************************************************************** * * Author: "Jasenko Zivanov" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #ifndef INDEX_SORT_H #define INDEX_SORT_H #include #include template class IndexSort { public: static std::vector sortIndices(const std::vector& data) { const int s = (int) data.size(); std::vector indices(s); for (int i = 0; i < s; i++) { indices[i] = i; } sort(indices.begin(), indices.end(), IndexComparator(data)); return indices; } struct IndexComparator { IndexComparator(const std::vector& data) : data(data) {} bool operator()(const int a, const int b) const { return data[a] < data[b]; } const std::vector& data; }; }; #endif relion-3.1.3/src/jaz/interpolation.cpp000066400000000000000000000222051411340063500177520ustar00rootroot00000000000000/*************************************************************************** * * Author: "Jasenko Zivanov" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #include #include using namespace gravis; bool Interpolation::isInSlice(const Image& img, double x, double y) { return x >= 0.0 && x < img.data.xdim-1 && y >= 0.0 && y < img.data.ydim-1; } double Interpolation::getTaperWeight(const Image& img, double x, double y, double rx, double ry) { double wx(1.0), wy(1.0); if (x < rx) wx *= (1.0 - cos(PI * (x+1) / rx))/2.0; if (x >= img.data.xdim - rx) wx *= (1.0 - cos(PI * (img.data.xdim - x) / rx))/2.0; if (y < ry) wy *= (1.0 - cos(PI * (y+1) / ry))/2.0; if (y >= img.data.ydim - ry) wy *= (1.0 - cos(PI * (img.data.ydim - y) / ry))/2.0; return wx * wy; } double Interpolation::linearXY(const Image& img, double x, double y, int n) { if (!(x >= 0.0 && x < img.data.xdim-1 && y >= 0.0 && y < img.data.ydim-1)) { return 0.0; } const int xi = (int)x; const int yi = (int)y; const double xf = x - xi; const double yf = y - yi; const double f00 = DIRECT_NZYX_ELEM(img.data, n, 0, yi, xi); const double f01 = DIRECT_NZYX_ELEM(img.data, n, 0, yi, xi+1); const double f10 = DIRECT_NZYX_ELEM(img.data, n, 0, yi+1, xi); const double f11 = DIRECT_NZYX_ELEM(img.data, n, 0, yi+1, xi+1); const double f0 = xf * f01 + (1.0 - xf) * f00; const double f1 = xf * f11 + (1.0 - xf) * f10; const double f = yf * f1 + (1.0 - yf) * f0; return f; } double Interpolation::cubic1D(double y0, double y1, double y2, double y3, double t) { const d4Matrix A( -1.0/2.0, 3.0/2.0, -3.0/2.0, 1.0/2.0, 1.0, -5.0/2.0, 2.0, -1.0/2.0, -1.0/2.0, 0.0, 1.0/2.0, 0.0, 0.0, 1.0, 0.0, 0.0); d4Vector y(y0, y1, y2, y3); d4Vector c = A*y; d4Vector x(t*t*t, t*t, t, 1.0); return x.dot(c); } Complex Interpolation::linear3D(const Image& img, double x, double y, double z) { if (!(x >= 0.0 && x < img.data.xdim-1 && y >= 0.0 && y < img.data.ydim-1 && z >= 0.0 && z < img.data.zdim-1)) { return 0.0; } const int xi = (int)x; const int yi = (int)y; const int zi = (int)z; const double xf = x - xi; const double yf = y - yi; const double zf = z - zi; const Complex f000 = DIRECT_NZYX_ELEM(img.data, 0, zi, yi, xi); const Complex f001 = DIRECT_NZYX_ELEM(img.data, 0, zi, yi, xi+1); const Complex f010 = DIRECT_NZYX_ELEM(img.data, 0, zi, yi+1, xi); const Complex f011 = DIRECT_NZYX_ELEM(img.data, 0, zi, yi+1, xi+1); const Complex f100 = DIRECT_NZYX_ELEM(img.data, 0, zi+1, yi, xi); const Complex f101 = DIRECT_NZYX_ELEM(img.data, 0, zi+1, yi, xi+1); const Complex f110 = DIRECT_NZYX_ELEM(img.data, 0, zi+1, yi+1, xi); const Complex f111 = DIRECT_NZYX_ELEM(img.data, 0, zi+1, yi+1, xi+1); const Complex f00 = xf * f001 + (1.0 - xf) * f000; const Complex f01 = xf * f011 + (1.0 - xf) * f010; const Complex f10 = xf * f101 + (1.0 - xf) * f100; const Complex f11 = xf * f111 + (1.0 - xf) * f110; const Complex f0 = yf * f01 + (1.0 - yf) * f00; const Complex f1 = yf * f11 + (1.0 - yf) * f10; const Complex f = zf * f1 + (1.0 - zf) * f0; return f; } Complex Interpolation::linearFFTW3D(const Image& img, double x, double y, double z) { if (x > img.data.xdim-1) { return 0.0; } const int xi = (int)x; const int yi = (int)y; const int zi = (int)z; const int xp = xi + 1; const int yp = (yi+1)%((int)img.data.ydim); const int zp = (zi+1)%((int)img.data.zdim); const double xf = x - xi; const double yf = y - yi; const double zf = z - zi; const Complex f000 = DIRECT_NZYX_ELEM(img.data, 0, zi, yi, xi); const Complex f001 = DIRECT_NZYX_ELEM(img.data, 0, zi, yi, xp); const Complex f010 = DIRECT_NZYX_ELEM(img.data, 0, zi, yp, xi); const Complex f011 = DIRECT_NZYX_ELEM(img.data, 0, zi, yp, xp); const Complex f100 = DIRECT_NZYX_ELEM(img.data, 0, zp, yi, xi); const Complex f101 = DIRECT_NZYX_ELEM(img.data, 0, zp, yi, xp); const Complex f110 = DIRECT_NZYX_ELEM(img.data, 0, zp, yp, xi); const Complex f111 = DIRECT_NZYX_ELEM(img.data, 0, zp, yp, xp); const Complex f00 = xf * f001 + (1.0 - xf) * f000; const Complex f01 = xf * f011 + (1.0 - xf) * f010; const Complex f10 = xf * f101 + (1.0 - xf) * f100; const Complex f11 = xf * f111 + (1.0 - xf) * f110; const Complex f0 = yf * f01 + (1.0 - yf) * f00; const Complex f1 = yf * f11 + (1.0 - yf) * f10; const Complex f = zf * f1 + (1.0 - zf) * f0; return f; } Complex Interpolation::linearFFTW2D(const Image& img, double x, double y) { if (x > img.data.xdim-1) { return 0.0; } const int xi = (int)x; const int yi = (int)y; const int xp = xi + 1; const int yp = (yi+1)%((int)img.data.ydim); const double xf = x - xi; const double yf = y - yi; const Complex f00 = DIRECT_NZYX_ELEM(img.data, 0, 0, yi, xi); const Complex f01 = DIRECT_NZYX_ELEM(img.data, 0, 0, yi, xp); const Complex f10 = DIRECT_NZYX_ELEM(img.data, 0, 0, yp, xi); const Complex f11 = DIRECT_NZYX_ELEM(img.data, 0, 0, yp, xp); const Complex f0 = xf * f01 + (1.0 - xf) * f00; const Complex f1 = xf * f11 + (1.0 - xf) * f10; const Complex f = yf * f1 + (1.0 - yf) * f0; return f; } void Interpolation::test2D() { int w0 = 5, w1 = 1000; Image img0(w0,w0), img(w1,w1), img1(w1,w1), img2a(w1,w1), img2b(w1,w1), img3a(w1,w1), img3b(w1,w1), img3c(w1,w1); Image gradx(w1,w1), grady(w1,w1); Image gradnx(w1,w1), gradny(w1,w1); for (int y = 0; y < w0; y++) for (int x = 0; x < w0; x++) { DIRECT_NZYX_ELEM(img0.data, 0, 0, y, x) = (x + w0*y)*(1 - 2*((x%2)^(y%2))); } double eps = 0.001; for (int y = 0; y < w1; y++) for (int x = 0; x < w1; x++) { DIRECT_NZYX_ELEM(img.data, 0, 0, y, x) = DIRECT_NZYX_ELEM(img0.data, 0, 0, w0*y/w1, w0*x/w1); DIRECT_NZYX_ELEM(img1.data, 0, 0, y, x) = cubicXY( img0, w0*x/(double)w1 - 0.5, w0*y/(double)w1 - 0.5, 0); DIRECT_NZYX_ELEM(img2a.data, 0, 0, y, x) = cubicXY( img0, w0*x/(double)w1 - 0.5, w0*y/(double)w1 - 0.5, 0, 0, true); DIRECT_NZYX_ELEM(img2b.data, 0, 0, y, x) = cubicXY( img0, w0*x/(double)w1 - 0.5 - w0/2, w0*y/(double)w1 - 0.5 - w0/2, 0, 0, true); DIRECT_NZYX_ELEM(img3a.data, 0, 0, y, x) = cubicXY( img0, 1e-2*(x-w1/2), 1e-2*(y-w1/2), 0, 0, true); DIRECT_NZYX_ELEM(img3b.data, 0, 0, y, x) = cubicXY( img0, 1e-7*(x-w1/2), 1e-7*(y-w1/2), 0, 0, true); DIRECT_NZYX_ELEM(img3c.data, 0, 0, y, x) = cubicXY( img0, 1e-16*(x-w1/2), 1e-16*(y-w1/2), 0, 0, true); t2Vector g = cubicXYgrad(img0, w0*x/(double)w1 - 0.5, w0*y/(double)w1 - 0.5, 0); DIRECT_NZYX_ELEM(gradx.data, 0, 0, y, x) = g.x; DIRECT_NZYX_ELEM(grady.data, 0, 0, y, x) = g.y; DIRECT_NZYX_ELEM(gradnx.data, 0, 0, y, x) = ( cubicXY(img0, w0*x/(double)w1 - 0.5 + eps, w0*y/(double)w1 - 0.5, 0) - cubicXY(img0, w0*x/(double)w1 - 0.5 - eps, w0*y/(double)w1 - 0.5, 0))/(2.0*eps); DIRECT_NZYX_ELEM(gradny.data, 0, 0, y, x) = ( cubicXY(img0, w0*x/(double)w1 - 0.5, w0*y/(double)w1 - 0.5 + eps, 0) - cubicXY(img0, w0*x/(double)w1 - 0.5, w0*y/(double)w1 - 0.5 - eps, 0))/(2.0*eps); } VtkHelper::writeVTK(img, "debug/interpolationX_0.vtk", 0, 0, 0, 1, 1, 1); VtkHelper::writeVTK(img1, "debug/interpolationX_1.vtk", 0, 0, 0, 1, 1, 1); VtkHelper::writeVTK(img2a, "debug/interpolationX_1w.vtk", 0, 0, 0, 1, 1, 1); VtkHelper::writeVTK(img2b, "debug/interpolationX_2w.vtk", 0, 0, 0, 1, 1, 1); VtkHelper::writeVTK(img3a, "debug/interpolationX_3w_0.vtk", 0, 0, 0, 1, 1, 1); VtkHelper::writeVTK(img3b, "debug/interpolationX_3w_4.vtk", 0, 0, 0, 1, 1, 1); VtkHelper::writeVTK(img3c, "debug/interpolationX_3w_16.vtk", 0, 0, 0, 1, 1, 1); VtkHelper::writeVTK(gradx, "debug/interpolationX_gx.vtk", 0, 0, 0, 1, 1, 1); VtkHelper::writeVTK(grady, "debug/interpolationX_gy.vtk", 0, 0, 0, 1, 1, 1); VtkHelper::writeVTK(gradnx, "debug/interpolationX_gnx.vtk", 0, 0, 0, 1, 1, 1); VtkHelper::writeVTK(gradny, "debug/interpolationX_gny.vtk", 0, 0, 0, 1, 1, 1); } relion-3.1.3/src/jaz/interpolation.h000066400000000000000000000301351411340063500174200ustar00rootroot00000000000000/*************************************************************************** * * Author: "Jasenko Zivanov" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #ifndef JAZ_INTERPOLATION_H #define JAZ_INTERPOLATION_H #include #include #include #include #define INTERPOL_WRAP(i,n) ((i) >= 0 ? (i) % (n) : -(i) % (n) ? (n) - (-(i) % (n)) : 0) class Interpolation { public: static bool isInSlice(const Image& img, double x, double y); static double getTaperWeight(const Image& img, double x, double y, double rx, double ry); static double linearXY(const Image& img, double x, double y, int n); static Complex linear3D(const Image& img, double x, double y, double z); static Complex linearFFTW3D(const Image& img, double x, double y, double z); static Complex linearFFTW2D(const Image& img, double x, double y); template static gravis::d2Vector quadraticMaxXY(const Image& img, double eps = 1e-25) { const int w = img.data.xdim; const int h = img.data.ydim; int xmax = -1, ymax = -1; double vmax = 0.0; for (int y = 0; y < h; y++) for (int x = 0; x < w; x++) { T v = DIRECT_A2D_ELEM(img.data, y, x); if (xmax < 0 || v > vmax) { vmax = v; xmax = x; ymax = y; } } gravis::d2Vector p(xmax, ymax); if (xmax > 0 && xmax < w-1) { const T vp = DIRECT_A2D_ELEM(img.data, ymax, xmax+1); const T vn = DIRECT_A2D_ELEM(img.data, ymax, xmax-1); if (std::abs(vp + vn - 2.0*vmax) > eps) { p.x -= 0.5 * (vp - vn) / (vp + vn - 2.0*vmax); } } if (xmax > 0 && xmax < w-1) { const T vp = DIRECT_A2D_ELEM(img.data, ymax+1, xmax); const T vn = DIRECT_A2D_ELEM(img.data, ymax-1, xmax); if (std::abs(vp + vn - 2.0*vmax) > eps) { p.y -= 0.5 * (vp - vn) / (vp + vn - 2.0*vmax); } } return p; } template static gravis::d2Vector quadraticMaxWrapXY( const Image& img, double eps = 1e-25, int wMax = -1, int hMax = -1) { const int w = img.data.xdim; const int h = img.data.ydim; if (wMax < 0) wMax = w; if (hMax < 0) hMax = h; int xmax = -1, ymax = -1; double vmax = 0.0; for (int y = 0; y < h; y++) for (int x = 0; x < w; x++) { if ((x > wMax && w - x > wMax) || y > hMax && h - x > hMax) { continue; } T v = DIRECT_A2D_ELEM(img.data, y, x); if (xmax < 0 || v > vmax) { vmax = v; xmax = x; ymax = y; } } gravis::d2Vector p(xmax, ymax); { const T vp = DIRECT_A2D_ELEM(img.data, ymax, (xmax+1)%w); const T vn = DIRECT_A2D_ELEM(img.data, ymax, (xmax-1+w)%w); if (std::abs(vp + vn - 2.0*vmax) > eps) { p.x -= 0.5 * (vp - vn) / (vp + vn - 2.0*vmax); } } { const T vp = DIRECT_A2D_ELEM(img.data, (ymax+1)%w, xmax); const T vn = DIRECT_A2D_ELEM(img.data, (ymax-1+w)%w, xmax); if (std::abs(vp + vn - 2.0*vmax) > eps) { p.y -= 0.5 * (vp - vn) / (vp + vn - 2.0*vmax); } } return p; } static double cubic1D(double y0, double y1, double y2, double y3, double t); template static T cubicXY(const Image& img, double x, double y, int z = 0, int n = 0, bool wrap = false) { int xi = (int)std::floor(x); int yi = (int)std::floor(y); int xi_n1 = xi-1; int yi_n1 = yi-1; int xi_p1 = xi+1; int yi_p1 = yi+1; int xi_p2 = xi+2; int yi_p2 = yi+2; const double xf = x - xi; const double yf = y - yi; if (wrap) { xi_n1 = INTERPOL_WRAP(xi_n1, img.data.xdim); yi_n1 = INTERPOL_WRAP(yi_n1, img.data.ydim); xi = INTERPOL_WRAP(xi, img.data.xdim); yi = INTERPOL_WRAP(yi, img.data.ydim); xi_p1 = INTERPOL_WRAP(xi_p1, img.data.xdim); yi_p1 = INTERPOL_WRAP(yi_p1, img.data.ydim); xi_p2 = INTERPOL_WRAP(xi_p2, img.data.xdim); yi_p2 = INTERPOL_WRAP(yi_p2, img.data.ydim); } else { xi = XMIPP_MAX(0, XMIPP_MIN(img.data.xdim - 1, xi)); yi = XMIPP_MAX(0, XMIPP_MIN(img.data.ydim - 1, yi)); xi_n1 = XMIPP_MAX(0, XMIPP_MIN(img.data.xdim - 1, xi_n1)); yi_n1 = XMIPP_MAX(0, XMIPP_MIN(img.data.ydim - 1, yi_n1)); xi_p1 = XMIPP_MAX(0, XMIPP_MIN(img.data.xdim - 1, xi_p1)); yi_p1 = XMIPP_MAX(0, XMIPP_MIN(img.data.ydim - 1, yi_p1)); xi_p2 = XMIPP_MAX(0, XMIPP_MIN(img.data.xdim - 1, xi_p2)); yi_p2 = XMIPP_MAX(0, XMIPP_MIN(img.data.ydim - 1, yi_p2)); } const T f00 = DIRECT_NZYX_ELEM(img.data, n, z, yi_n1, xi_n1); const T f01 = DIRECT_NZYX_ELEM(img.data, n, z, yi_n1, xi); const T f02 = DIRECT_NZYX_ELEM(img.data, n, z, yi_n1, xi_p1); const T f03 = DIRECT_NZYX_ELEM(img.data, n, z, yi_n1, xi_p2); const T f10 = DIRECT_NZYX_ELEM(img.data, n, z, yi, xi_n1); const T f11 = DIRECT_NZYX_ELEM(img.data, n, z, yi, xi); const T f12 = DIRECT_NZYX_ELEM(img.data, n, z, yi, xi_p1); const T f13 = DIRECT_NZYX_ELEM(img.data, n, z, yi, xi_p2); const T f20 = DIRECT_NZYX_ELEM(img.data, n, z, yi_p1, xi_n1); const T f21 = DIRECT_NZYX_ELEM(img.data, n, z, yi_p1, xi); const T f22 = DIRECT_NZYX_ELEM(img.data, n, z, yi_p1, xi_p1); const T f23 = DIRECT_NZYX_ELEM(img.data, n, z, yi_p1, xi_p2); const T f30 = DIRECT_NZYX_ELEM(img.data, n, z, yi_p2, xi_n1); const T f31 = DIRECT_NZYX_ELEM(img.data, n, z, yi_p2, xi); const T f32 = DIRECT_NZYX_ELEM(img.data, n, z, yi_p2, xi_p1); const T f33 = DIRECT_NZYX_ELEM(img.data, n, z, yi_p2, xi_p2); const gravis::d4Matrix A( -1.0/2.0, 3.0/2.0, -3.0/2.0, 1.0/2.0, 1.0, -5.0/2.0, 2.0, -1.0/2.0, -1.0/2.0, 0.0, 1.0/2.0, 0.0, 0.0, 1.0, 0.0, 0.0); const gravis::d4Matrix V( f00, f10, f20, f30, f01, f11, f21, f31, f02, f12, f22, f32, f03, f13, f23, f33); gravis::d4Matrix At = A; At.transpose(); gravis::d4Matrix AVA = A * V * At; const gravis::d4Vector xx(xf*xf*xf, xf*xf, xf, 1.0); const gravis::d4Vector yy(yf*yf*yf, yf*yf, yf, 1.0); return (T)(xx.dot(AVA * yy)); } template static gravis::t2Vector cubicXYgrad(const Image& img, double x, double y, int z = 0, int n = 0, bool wrap = false) { int xi = (int)std::floor(x); int yi = (int)std::floor(y); int xi_n1 = xi-1; int yi_n1 = yi-1; int xi_p1 = xi+1; int yi_p1 = yi+1; int xi_p2 = xi+2; int yi_p2 = yi+2; const double xf = x - xi; const double yf = y - yi; if (wrap) { xi_n1 = INTERPOL_WRAP(xi_n1, img.data.xdim); yi_n1 = INTERPOL_WRAP(yi_n1, img.data.ydim); xi = INTERPOL_WRAP(xi, img.data.xdim); yi = INTERPOL_WRAP(yi, img.data.ydim); xi_p1 = INTERPOL_WRAP(xi_p1, img.data.xdim); yi_p1 = INTERPOL_WRAP(yi_p1, img.data.ydim); xi_p2 = INTERPOL_WRAP(xi_p2, img.data.xdim); yi_p2 = INTERPOL_WRAP(yi_p2, img.data.ydim); } else { xi = XMIPP_MAX(0, XMIPP_MIN(img.data.xdim - 1, xi)); yi = XMIPP_MAX(0, XMIPP_MIN(img.data.ydim - 1, yi)); xi_n1 = XMIPP_MAX(0, XMIPP_MIN(img.data.xdim - 1, xi_n1)); yi_n1 = XMIPP_MAX(0, XMIPP_MIN(img.data.ydim - 1, yi_n1)); xi_p1 = XMIPP_MAX(0, XMIPP_MIN(img.data.xdim - 1, xi_p1)); yi_p1 = XMIPP_MAX(0, XMIPP_MIN(img.data.ydim - 1, yi_p1)); xi_p2 = XMIPP_MAX(0, XMIPP_MIN(img.data.xdim - 1, xi_p2)); yi_p2 = XMIPP_MAX(0, XMIPP_MIN(img.data.ydim - 1, yi_p2)); } const T f00 = DIRECT_NZYX_ELEM(img.data, n, 0, yi_n1, xi_n1); const T f01 = DIRECT_NZYX_ELEM(img.data, n, 0, yi_n1, xi); const T f02 = DIRECT_NZYX_ELEM(img.data, n, 0, yi_n1, xi_p1); const T f03 = DIRECT_NZYX_ELEM(img.data, n, 0, yi_n1, xi_p2); const T f10 = DIRECT_NZYX_ELEM(img.data, n, 0, yi, xi_n1); const T f11 = DIRECT_NZYX_ELEM(img.data, n, 0, yi, xi); const T f12 = DIRECT_NZYX_ELEM(img.data, n, 0, yi, xi_p1); const T f13 = DIRECT_NZYX_ELEM(img.data, n, 0, yi, xi_p2); const T f20 = DIRECT_NZYX_ELEM(img.data, n, 0, yi_p1, xi_n1); const T f21 = DIRECT_NZYX_ELEM(img.data, n, 0, yi_p1, xi); const T f22 = DIRECT_NZYX_ELEM(img.data, n, 0, yi_p1, xi_p1); const T f23 = DIRECT_NZYX_ELEM(img.data, n, 0, yi_p1, xi_p2); const T f30 = DIRECT_NZYX_ELEM(img.data, n, 0, yi_p2, xi_n1); const T f31 = DIRECT_NZYX_ELEM(img.data, n, 0, yi_p2, xi); const T f32 = DIRECT_NZYX_ELEM(img.data, n, 0, yi_p2, xi_p1); const T f33 = DIRECT_NZYX_ELEM(img.data, n, 0, yi_p2, xi_p2); const gravis::d4Matrix A( -1.0/2.0, 3.0/2.0, -3.0/2.0, 1.0/2.0, 1.0, -5.0/2.0, 2.0, -1.0/2.0, -1.0/2.0, 0.0, 1.0/2.0, 0.0, 0.0, 1.0, 0.0, 0.0); const gravis::d4Matrix V( f00, f10, f20, f30, f01, f11, f21, f31, f02, f12, f22, f32, f03, f13, f23, f33); gravis::d4Matrix At = A; At.transpose(); gravis::d4Matrix AVA = A * V * At; const gravis::d4Vector xx(xf*xf*xf, xf*xf, xf, 1.0); const gravis::d4Vector yy(yf*yf*yf, yf*yf, yf, 1.0); const gravis::d4Vector xxd(3.0*xf*xf, 2.0*xf, 1.0, 0.0); const gravis::d4Vector yyd(3.0*yf*yf, 2.0*yf, 1.0, 0.0); return gravis::t2Vector(xxd.dot(AVA * yy), xx.dot(AVA * yyd)); } static void test2D(); }; #endif relion-3.1.3/src/jaz/io/000077500000000000000000000000001411340063500147655ustar00rootroot00000000000000relion-3.1.3/src/jaz/io/star_converter.cpp000066400000000000000000000226011411340063500205320ustar00rootroot00000000000000#include "star_converter.h" void StarConverter::convert_3p0_particlesTo_3p1(const MetaDataTable &in, MetaDataTable &outParticles, MetaDataTable &outOptics, std::string tablename, bool do_die_upon_error) { int ver = in.getVersion(); int curVer = MetaDataTable::getCurrentVersion(); if (ver == curVer) { if (do_die_upon_error) { REPORT_ERROR_STR("StarConverter::convert_3p0_particlesTo_3p1: Star file is already at version " << curVer/10000.0); } else { return; } } else if (ver > curVer) { if (do_die_upon_error) { REPORT_ERROR_STR("StarConverter::convert_3p0_particlesTo_3p1: Star file is at version " << ver/10000.0 << " - this is beyond the current version of Relion (" << curVer/10000.0 << ")\n" << "You are either using an outdated copy of Relion, or the file is from the future.\n"); } else { return; } } const int particleCount = in.numberOfObjects(); std::vector allOpticsLabels_double(0); allOpticsLabels_double.push_back(EMDL_CTF_Q0); allOpticsLabels_double.push_back(EMDL_IMAGE_BEAMTILT_X); allOpticsLabels_double.push_back(EMDL_IMAGE_BEAMTILT_Y); allOpticsLabels_double.push_back(EMDL_CTF_CS); allOpticsLabels_double.push_back(EMDL_CTF_VOLTAGE); allOpticsLabels_double.push_back(EMDL_CTF_DETECTOR_PIXEL_SIZE); allOpticsLabels_double.push_back(EMDL_CTF_MAGNIFICATION); std::vector opticsLabels_double(0); for (int l = 0; l < allOpticsLabels_double.size(); l++) { if (in.labelExists(allOpticsLabels_double[l])) { opticsLabels_double.push_back(allOpticsLabels_double[l]); } } const int opticsLabelCount_double = opticsLabels_double.size(); std::vector> groupValues_double(0); std::vector opticsClasses(particleCount, -1); for (long int p = 0; p < particleCount; p++) { int foundGroup = -1; std::vector curVals_double(opticsLabelCount_double); for (int l = 0; l < opticsLabelCount_double; l++) { in.getValue(opticsLabels_double[l], curVals_double[l], p); } for (int g = 0; g < groupValues_double.size(); g++) { bool groupGood = true; for (int l = 0; l < opticsLabelCount_double; l++) { if (curVals_double[l] != groupValues_double[g][l]) { groupGood = false; break; } } if (groupGood) { foundGroup = g; break; } } if (foundGroup >= 0) { opticsClasses[p] = foundGroup; } else { groupValues_double.push_back(curVals_double); opticsClasses[p] = groupValues_double.size() - 1; } } outParticles = in; for (int l = 0; l < opticsLabelCount_double; l++) { outParticles.deactivateLabel(opticsLabels_double[l]); } outParticles.addLabel(EMDL_IMAGE_OPTICS_GROUP); for (long int p = 0; p < particleCount; p++) { outParticles.setValue(EMDL_IMAGE_OPTICS_GROUP, opticsClasses[p] + 1, p); } // Determine the data type if (tablename == "") { if (in.containsLabel(EMDL_IMAGE_NAME)) tablename = "particles"; else if (in.containsLabel(EMDL_MICROGRAPH_METADATA_NAME)) tablename = "movies"; else tablename = "micrographs"; } outParticles.setName(tablename); outParticles.setVersion(curVer); outOptics.setName("optics"); outOptics.setVersion(curVer); outOptics.addLabel(EMDL_IMAGE_OPTICS_GROUP); outOptics.addLabel(EMDL_IMAGE_OPTICS_GROUP_NAME); for (int l = 0; l < opticsLabelCount_double; l++) { outOptics.addLabel(opticsLabels_double[l]); } for (int g = 0; g < groupValues_double.size(); g++) { outOptics.addObject(); outOptics.setValue(EMDL_IMAGE_OPTICS_GROUP, g + 1, g); std::string mygroupname = "opticsGroup" + integerToString(g + 1); outOptics.setValue(EMDL_IMAGE_OPTICS_GROUP_NAME, mygroupname, g); for (int l = 0; l < opticsLabelCount_double; l++) { outOptics.setValue(opticsLabels_double[l], groupValues_double[g][l], g); } } // set IMAGE_PIXEL_SIZE/MICROGRAPH_PIXEL_SIZE instead of DETECTOR_PIXEL_SIZE and MAGNIFICATION // This does not do anything if DETECTOR_PIXEL_SIZE or MAGNIFICATION are not in the input STAR file unifyPixelSize(outOptics, tablename); if (tablename == "particles" || tablename == "") { // Make translations in Angstroms instead of in pixels translateOffsets(outParticles, outOptics); // Also read in one image for each optics group to set the image sizes in the outOptics table // Also set the image_size for each optics_group int nr_optics_groups_found = 0; int nr_optics_groups = groupValues_double.size(); std::vector found_this_group; found_this_group.resize(nr_optics_groups, false); for (long int p = 0; p < particleCount; p++) { int g = opticsClasses[p]; if (!found_this_group[g]) { FileName fn_img; if (!outParticles.getValue(EMDL_IMAGE_NAME, fn_img, p)) { if (do_die_upon_error) { REPORT_ERROR("BUG: cannot find name for particle..."); } else { return; } } try { Image img; img.read(fn_img, false); // false means read only header, skip real data int image_size = img().xdim; if (image_size % 2 != 0) { REPORT_ERROR("ERROR: this program only works with even values for the image dimensions!"); } if (image_size != img().ydim) { REPORT_ERROR("ERROR: xsize != ysize: only squared images are allowed"); } outOptics.setValue(EMDL_IMAGE_SIZE, image_size, g); found_this_group[g] = true; nr_optics_groups_found++; if (img().zdim > 1) { if (image_size != img().zdim) { REPORT_ERROR("ERROR: xsize != zsize: only cube 3D images allowed"); } outOptics.setValue(EMDL_IMAGE_DIMENSIONALITY, 3, g); } else { outOptics.setValue(EMDL_IMAGE_DIMENSIONALITY, 2, g); } } catch (RelionError e) { std::cerr << "Warning: " << fn_img << " not found.\n"; break; } } if (nr_optics_groups_found == nr_optics_groups) { break; } } if (nr_optics_groups_found != nr_optics_groups) { std::cerr << "Warning: Not all image files could be found.\n"; std::cerr << " Image sizes and dimensionalities will be missing from the star file.\n"; std::cerr << " Later steps (e.g. re-extraction, CtfRefine) can fail!!\n"; std::cerr << " Repeat this job after fixing the image paths.\n"; //REPORT_ERROR("BUG: something went wrong with finding the optics groups..."); } } return; } void StarConverter::unifyPixelSize(MetaDataTable& outOptics, std::string tablename) { if (outOptics.containsLabel(EMDL_CTF_DETECTOR_PIXEL_SIZE) && outOptics.containsLabel(EMDL_CTF_MAGNIFICATION)) { for (int i = 0; i < outOptics.numberOfObjects(); i++) { double dstep, mag; outOptics.getValue(EMDL_CTF_DETECTOR_PIXEL_SIZE, dstep, i); outOptics.getValue(EMDL_CTF_MAGNIFICATION, mag, i); double angpix = 10000 * dstep / mag; if (tablename == "particles" || tablename == "") { outOptics.setValue(EMDL_IMAGE_PIXEL_SIZE, angpix, i); // Do not set EMDL_MICROGRAPH_ORIGINAL_PIXEL_SIZE, because particles might have been down-sampled. } else if (tablename == "micrographs") { outOptics.setValue(EMDL_MICROGRAPH_PIXEL_SIZE, angpix, i); outOptics.setValue(EMDL_MICROGRAPH_ORIGINAL_PIXEL_SIZE, angpix, i); } } outOptics.deactivateLabel(EMDL_CTF_DETECTOR_PIXEL_SIZE); outOptics.deactivateLabel(EMDL_CTF_MAGNIFICATION); } } void StarConverter::translateOffsets(MetaDataTable &outParticles, const MetaDataTable &optics) { for (int i = 0; i < outParticles.numberOfObjects(); i++) { int og; outParticles.getValue(EMDL_IMAGE_OPTICS_GROUP, og, i); og--; double angpix; optics.getValue(EMDL_IMAGE_PIXEL_SIZE, angpix, og); double x, y, z, d; if (outParticles.containsLabel(EMDL_ORIENT_ORIGIN_X)) { outParticles.getValue(EMDL_ORIENT_ORIGIN_X, x, i); outParticles.setValue(EMDL_ORIENT_ORIGIN_X_ANGSTROM, x*angpix, i); } if (outParticles.containsLabel(EMDL_ORIENT_ORIGIN_Y)) { outParticles.getValue(EMDL_ORIENT_ORIGIN_Y, y, i); outParticles.setValue(EMDL_ORIENT_ORIGIN_Y_ANGSTROM, y*angpix, i); } if (outParticles.containsLabel(EMDL_ORIENT_ORIGIN_Z)) { outParticles.getValue(EMDL_ORIENT_ORIGIN_Z, z, i); outParticles.setValue(EMDL_ORIENT_ORIGIN_Z_ANGSTROM, z*angpix, i); } if (outParticles.containsLabel(EMDL_ORIENT_ORIGIN_X_PRIOR)) { outParticles.getValue(EMDL_ORIENT_ORIGIN_X_PRIOR, x, i); outParticles.setValue(EMDL_ORIENT_ORIGIN_X_PRIOR_ANGSTROM, x*angpix, i); } if (outParticles.containsLabel(EMDL_ORIENT_ORIGIN_Y_PRIOR)) { outParticles.getValue(EMDL_ORIENT_ORIGIN_Y_PRIOR, y, i); outParticles.setValue(EMDL_ORIENT_ORIGIN_Y_PRIOR_ANGSTROM, y*angpix, i); } if (outParticles.containsLabel(EMDL_ORIENT_ORIGIN_Z_PRIOR)) { outParticles.getValue(EMDL_ORIENT_ORIGIN_Z_PRIOR, z, i); outParticles.setValue(EMDL_ORIENT_ORIGIN_Z_PRIOR_ANGSTROM, z*angpix, i); } if (outParticles.containsLabel(EMDL_PARTICLE_HELICAL_TRACK_LENGTH)) { outParticles.getValue(EMDL_PARTICLE_HELICAL_TRACK_LENGTH, d, i); outParticles.setValue(EMDL_PARTICLE_HELICAL_TRACK_LENGTH_ANGSTROM, d*angpix, i); } } outParticles.deactivateLabel(EMDL_ORIENT_ORIGIN_X); outParticles.deactivateLabel(EMDL_ORIENT_ORIGIN_Y); outParticles.deactivateLabel(EMDL_ORIENT_ORIGIN_Z); outParticles.deactivateLabel(EMDL_ORIENT_ORIGIN_X_PRIOR); outParticles.deactivateLabel(EMDL_ORIENT_ORIGIN_Y_PRIOR); outParticles.deactivateLabel(EMDL_ORIENT_ORIGIN_Z_PRIOR); outParticles.deactivateLabel(EMDL_PARTICLE_HELICAL_TRACK_LENGTH); } relion-3.1.3/src/jaz/io/star_converter.h000066400000000000000000000010711411340063500201750ustar00rootroot00000000000000#ifndef STAR_CONVERTER_H #define STAR_CONVERTER_H #include #include class StarConverter { public: static void convert_3p0_particlesTo_3p1( const MetaDataTable& in, MetaDataTable& outParticles, MetaDataTable& outOptics, std::string tablename = "particles", bool do_die_upon_error = true); protected: static void unifyPixelSize(MetaDataTable& outOptics, std::string tablename = "particles"); static void translateOffsets(MetaDataTable& outParticles, const MetaDataTable& optics); }; #endif relion-3.1.3/src/jaz/jaz_config.cpp000066400000000000000000000020041411340063500171670ustar00rootroot00000000000000/*************************************************************************** * * Author: "Jasenko Zivanov" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #include bool JazConfig::writeVtk = true; bool JazConfig::writeMrc = true; relion-3.1.3/src/jaz/jaz_config.h000066400000000000000000000020621411340063500166400ustar00rootroot00000000000000/*************************************************************************** * * Author: "Jasenko Zivanov" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #ifndef JAZ_CONFIG_H #define JAZ_CONFIG_H class JazConfig { public: static bool writeVtk; static bool writeMrc; }; #endif relion-3.1.3/src/jaz/lbfgs/000077500000000000000000000000001411340063500154535ustar00rootroot00000000000000relion-3.1.3/src/jaz/lbfgs/arithmetic_ansi.h000066400000000000000000000064641411340063500210010ustar00rootroot00000000000000/* * ANSI C implementation of vector operations. * * Copyright (c) 2007-2010 Naoaki Okazaki * All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ /* $Id$ */ #include #include #if LBFGS_FLOAT == 32 && LBFGS_IEEE_FLOAT #define fsigndiff(x, y) (((*(uint32_t*)(x)) ^ (*(uint32_t*)(y))) & 0x80000000U) #else #define fsigndiff(x, y) (*(x) * (*(y) / fabs(*(y))) < 0.) #endif/*LBFGS_IEEE_FLOAT*/ inline static void* vecalloc(size_t size) { void *memblock = malloc(size); if (memblock) { memset(memblock, 0, size); } return memblock; } inline static void vecfree(void *memblock) { free(memblock); } inline static void vecset(lbfgsfloatval_t *x, const lbfgsfloatval_t c, const int n) { int i; for (i = 0;i < n;++i) { x[i] = c; } } inline static void veccpy(lbfgsfloatval_t *y, const lbfgsfloatval_t *x, const int n) { int i; for (i = 0;i < n;++i) { y[i] = x[i]; } } inline static void vecncpy(lbfgsfloatval_t *y, const lbfgsfloatval_t *x, const int n) { int i; for (i = 0;i < n;++i) { y[i] = -x[i]; } } inline static void vecadd(lbfgsfloatval_t *y, const lbfgsfloatval_t *x, const lbfgsfloatval_t c, const int n) { int i; for (i = 0;i < n;++i) { y[i] += c * x[i]; } } inline static void vecdiff(lbfgsfloatval_t *z, const lbfgsfloatval_t *x, const lbfgsfloatval_t *y, const int n) { int i; for (i = 0;i < n;++i) { z[i] = x[i] - y[i]; } } inline static void vecscale(lbfgsfloatval_t *y, const lbfgsfloatval_t c, const int n) { int i; for (i = 0;i < n;++i) { y[i] *= c; } } inline static void vecmul(lbfgsfloatval_t *y, const lbfgsfloatval_t *x, const int n) { int i; for (i = 0;i < n;++i) { y[i] *= x[i]; } } inline static void vecdot(lbfgsfloatval_t* s, const lbfgsfloatval_t *x, const lbfgsfloatval_t *y, const int n) { int i; *s = 0.; for (i = 0;i < n;++i) { *s += x[i] * y[i]; } } inline static void vec2norm(lbfgsfloatval_t* s, const lbfgsfloatval_t *x, const int n) { vecdot(s, x, x, n); *s = (lbfgsfloatval_t)sqrt(*s); } inline static void vec2norminv(lbfgsfloatval_t* s, const lbfgsfloatval_t *x, const int n) { vec2norm(s, x, n); *s = (lbfgsfloatval_t)(1.0 / *s); } relion-3.1.3/src/jaz/lbfgs/arithmetic_sse_double.h000066400000000000000000000211761411340063500221700ustar00rootroot00000000000000/* * SSE2 implementation of vector oprations (64bit double). * * Copyright (c) 2007-2010 Naoaki Okazaki * All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ /* $Id$ */ #include #ifndef __APPLE__ #include #endif #include #if 1400 <= _MSC_VER #include #endif/*1400 <= _MSC_VER*/ #if HAVE_EMMINTRIN_H #include #endif/*HAVE_EMMINTRIN_H*/ inline static void* vecalloc(size_t size) { #if defined(_MSC_VER) void *memblock = _aligned_malloc(size, 16); #elif defined(__APPLE__) /* OS X always aligns on 16-byte boundaries */ void *memblock = malloc(size); #else void *memblock = NULL, *p = NULL; if (posix_memalign(&p, 16, size) == 0) { memblock = p; } #endif if (memblock != NULL) { memset(memblock, 0, size); } return memblock; } inline static void vecfree(void *memblock) { #ifdef _MSC_VER _aligned_free(memblock); #else free(memblock); #endif } #define fsigndiff(x, y) \ ((_mm_movemask_pd(_mm_set_pd(*(x), *(y))) + 1) & 0x002) #define vecset(x, c, n) \ { \ int i; \ __m128d XMM0 = _mm_set1_pd(c); \ for (i = 0;i < (n);i += 8) { \ _mm_store_pd((x)+i , XMM0); \ _mm_store_pd((x)+i+2, XMM0); \ _mm_store_pd((x)+i+4, XMM0); \ _mm_store_pd((x)+i+6, XMM0); \ } \ } #define veccpy(y, x, n) \ { \ int i; \ for (i = 0;i < (n);i += 8) { \ __m128d XMM0 = _mm_load_pd((x)+i ); \ __m128d XMM1 = _mm_load_pd((x)+i+2); \ __m128d XMM2 = _mm_load_pd((x)+i+4); \ __m128d XMM3 = _mm_load_pd((x)+i+6); \ _mm_store_pd((y)+i , XMM0); \ _mm_store_pd((y)+i+2, XMM1); \ _mm_store_pd((y)+i+4, XMM2); \ _mm_store_pd((y)+i+6, XMM3); \ } \ } #define vecncpy(y, x, n) \ { \ int i; \ for (i = 0;i < (n);i += 8) { \ __m128d XMM0 = _mm_setzero_pd(); \ __m128d XMM1 = _mm_setzero_pd(); \ __m128d XMM2 = _mm_setzero_pd(); \ __m128d XMM3 = _mm_setzero_pd(); \ __m128d XMM4 = _mm_load_pd((x)+i ); \ __m128d XMM5 = _mm_load_pd((x)+i+2); \ __m128d XMM6 = _mm_load_pd((x)+i+4); \ __m128d XMM7 = _mm_load_pd((x)+i+6); \ XMM0 = _mm_sub_pd(XMM0, XMM4); \ XMM1 = _mm_sub_pd(XMM1, XMM5); \ XMM2 = _mm_sub_pd(XMM2, XMM6); \ XMM3 = _mm_sub_pd(XMM3, XMM7); \ _mm_store_pd((y)+i , XMM0); \ _mm_store_pd((y)+i+2, XMM1); \ _mm_store_pd((y)+i+4, XMM2); \ _mm_store_pd((y)+i+6, XMM3); \ } \ } #define vecadd(y, x, c, n) \ { \ int i; \ __m128d XMM7 = _mm_set1_pd(c); \ for (i = 0;i < (n);i += 4) { \ __m128d XMM0 = _mm_load_pd((x)+i ); \ __m128d XMM1 = _mm_load_pd((x)+i+2); \ __m128d XMM2 = _mm_load_pd((y)+i ); \ __m128d XMM3 = _mm_load_pd((y)+i+2); \ XMM0 = _mm_mul_pd(XMM0, XMM7); \ XMM1 = _mm_mul_pd(XMM1, XMM7); \ XMM2 = _mm_add_pd(XMM2, XMM0); \ XMM3 = _mm_add_pd(XMM3, XMM1); \ _mm_store_pd((y)+i , XMM2); \ _mm_store_pd((y)+i+2, XMM3); \ } \ } #define vecdiff(z, x, y, n) \ { \ int i; \ for (i = 0;i < (n);i += 8) { \ __m128d XMM0 = _mm_load_pd((x)+i ); \ __m128d XMM1 = _mm_load_pd((x)+i+2); \ __m128d XMM2 = _mm_load_pd((x)+i+4); \ __m128d XMM3 = _mm_load_pd((x)+i+6); \ __m128d XMM4 = _mm_load_pd((y)+i ); \ __m128d XMM5 = _mm_load_pd((y)+i+2); \ __m128d XMM6 = _mm_load_pd((y)+i+4); \ __m128d XMM7 = _mm_load_pd((y)+i+6); \ XMM0 = _mm_sub_pd(XMM0, XMM4); \ XMM1 = _mm_sub_pd(XMM1, XMM5); \ XMM2 = _mm_sub_pd(XMM2, XMM6); \ XMM3 = _mm_sub_pd(XMM3, XMM7); \ _mm_store_pd((z)+i , XMM0); \ _mm_store_pd((z)+i+2, XMM1); \ _mm_store_pd((z)+i+4, XMM2); \ _mm_store_pd((z)+i+6, XMM3); \ } \ } #define vecscale(y, c, n) \ { \ int i; \ __m128d XMM7 = _mm_set1_pd(c); \ for (i = 0;i < (n);i += 4) { \ __m128d XMM0 = _mm_load_pd((y)+i ); \ __m128d XMM1 = _mm_load_pd((y)+i+2); \ XMM0 = _mm_mul_pd(XMM0, XMM7); \ XMM1 = _mm_mul_pd(XMM1, XMM7); \ _mm_store_pd((y)+i , XMM0); \ _mm_store_pd((y)+i+2, XMM1); \ } \ } #define vecmul(y, x, n) \ { \ int i; \ for (i = 0;i < (n);i += 8) { \ __m128d XMM0 = _mm_load_pd((x)+i ); \ __m128d XMM1 = _mm_load_pd((x)+i+2); \ __m128d XMM2 = _mm_load_pd((x)+i+4); \ __m128d XMM3 = _mm_load_pd((x)+i+6); \ __m128d XMM4 = _mm_load_pd((y)+i ); \ __m128d XMM5 = _mm_load_pd((y)+i+2); \ __m128d XMM6 = _mm_load_pd((y)+i+4); \ __m128d XMM7 = _mm_load_pd((y)+i+6); \ XMM4 = _mm_mul_pd(XMM4, XMM0); \ XMM5 = _mm_mul_pd(XMM5, XMM1); \ XMM6 = _mm_mul_pd(XMM6, XMM2); \ XMM7 = _mm_mul_pd(XMM7, XMM3); \ _mm_store_pd((y)+i , XMM4); \ _mm_store_pd((y)+i+2, XMM5); \ _mm_store_pd((y)+i+4, XMM6); \ _mm_store_pd((y)+i+6, XMM7); \ } \ } #if 3 <= __SSE__ || defined(__SSE3__) /* Horizontal add with haddps SSE3 instruction. The work register (rw) is unused. */ #define __horizontal_sum(r, rw) \ r = _mm_hadd_ps(r, r); \ r = _mm_hadd_ps(r, r); #else /* Horizontal add with SSE instruction. The work register (rw) is used. */ #define __horizontal_sum(r, rw) \ rw = r; \ r = _mm_shuffle_ps(r, rw, _MM_SHUFFLE(1, 0, 3, 2)); \ r = _mm_add_ps(r, rw); \ rw = r; \ r = _mm_shuffle_ps(r, rw, _MM_SHUFFLE(2, 3, 0, 1)); \ r = _mm_add_ps(r, rw); #endif #define vecdot(s, x, y, n) \ { \ int i; \ __m128d XMM0 = _mm_setzero_pd(); \ __m128d XMM1 = _mm_setzero_pd(); \ __m128d XMM2, XMM3, XMM4, XMM5; \ for (i = 0;i < (n);i += 4) { \ XMM2 = _mm_load_pd((x)+i ); \ XMM3 = _mm_load_pd((x)+i+2); \ XMM4 = _mm_load_pd((y)+i ); \ XMM5 = _mm_load_pd((y)+i+2); \ XMM2 = _mm_mul_pd(XMM2, XMM4); \ XMM3 = _mm_mul_pd(XMM3, XMM5); \ XMM0 = _mm_add_pd(XMM0, XMM2); \ XMM1 = _mm_add_pd(XMM1, XMM3); \ } \ XMM0 = _mm_add_pd(XMM0, XMM1); \ XMM1 = _mm_shuffle_pd(XMM0, XMM0, _MM_SHUFFLE2(1, 1)); \ XMM0 = _mm_add_pd(XMM0, XMM1); \ _mm_store_sd((s), XMM0); \ } #define vec2norm(s, x, n) \ { \ int i; \ __m128d XMM0 = _mm_setzero_pd(); \ __m128d XMM1 = _mm_setzero_pd(); \ __m128d XMM2, XMM3, XMM4, XMM5; \ for (i = 0;i < (n);i += 4) { \ XMM2 = _mm_load_pd((x)+i ); \ XMM3 = _mm_load_pd((x)+i+2); \ XMM4 = XMM2; \ XMM5 = XMM3; \ XMM2 = _mm_mul_pd(XMM2, XMM4); \ XMM3 = _mm_mul_pd(XMM3, XMM5); \ XMM0 = _mm_add_pd(XMM0, XMM2); \ XMM1 = _mm_add_pd(XMM1, XMM3); \ } \ XMM0 = _mm_add_pd(XMM0, XMM1); \ XMM1 = _mm_shuffle_pd(XMM0, XMM0, _MM_SHUFFLE2(1, 1)); \ XMM0 = _mm_add_pd(XMM0, XMM1); \ XMM0 = _mm_sqrt_pd(XMM0); \ _mm_store_sd((s), XMM0); \ } #define vec2norminv(s, x, n) \ { \ int i; \ __m128d XMM0 = _mm_setzero_pd(); \ __m128d XMM1 = _mm_setzero_pd(); \ __m128d XMM2, XMM3, XMM4, XMM5; \ for (i = 0;i < (n);i += 4) { \ XMM2 = _mm_load_pd((x)+i ); \ XMM3 = _mm_load_pd((x)+i+2); \ XMM4 = XMM2; \ XMM5 = XMM3; \ XMM2 = _mm_mul_pd(XMM2, XMM4); \ XMM3 = _mm_mul_pd(XMM3, XMM5); \ XMM0 = _mm_add_pd(XMM0, XMM2); \ XMM1 = _mm_add_pd(XMM1, XMM3); \ } \ XMM2 = _mm_set1_pd(1.0); \ XMM0 = _mm_add_pd(XMM0, XMM1); \ XMM1 = _mm_shuffle_pd(XMM0, XMM0, _MM_SHUFFLE2(1, 1)); \ XMM0 = _mm_add_pd(XMM0, XMM1); \ XMM0 = _mm_sqrt_pd(XMM0); \ XMM2 = _mm_div_pd(XMM2, XMM0); \ _mm_store_sd((s), XMM2); \ } relion-3.1.3/src/jaz/lbfgs/arithmetic_sse_float.h000066400000000000000000000215561411340063500220250ustar00rootroot00000000000000/* * SSE/SSE3 implementation of vector oprations (32bit float). * * Copyright (c) 2007-2010 Naoaki Okazaki * All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ /* $Id$ */ #include #ifndef __APPLE__ #include #endif #include #if 1400 <= _MSC_VER #include #endif/*_MSC_VER*/ #if HAVE_XMMINTRIN_H #include #endif/*HAVE_XMMINTRIN_H*/ #if LBFGS_FLOAT == 32 && LBFGS_IEEE_FLOAT #define fsigndiff(x, y) (((*(uint32_t*)(x)) ^ (*(uint32_t*)(y))) & 0x80000000U) #else #define fsigndiff(x, y) (*(x) * (*(y) / fabs(*(y))) < 0.) #endif/*LBFGS_IEEE_FLOAT*/ inline static void* vecalloc(size_t size) { #if defined(_MSC_VER) void *memblock = _aligned_malloc(size, 16); #elif defined(__APPLE__) /* OS X always aligns on 16-byte boundaries */ void *memblock = malloc(size); #else void *memblock = NULL, *p = NULL; if (posix_memalign(&p, 16, size) == 0) { memblock = p; } #endif if (memblock != NULL) { memset(memblock, 0, size); } return memblock; } inline static void vecfree(void *memblock) { _aligned_free(memblock); } #define vecset(x, c, n) \ { \ int i; \ __m128 XMM0 = _mm_set_ps1(c); \ for (i = 0;i < (n);i += 16) { \ _mm_store_ps((x)+i , XMM0); \ _mm_store_ps((x)+i+ 4, XMM0); \ _mm_store_ps((x)+i+ 8, XMM0); \ _mm_store_ps((x)+i+12, XMM0); \ } \ } #define veccpy(y, x, n) \ { \ int i; \ for (i = 0;i < (n);i += 16) { \ __m128 XMM0 = _mm_load_ps((x)+i ); \ __m128 XMM1 = _mm_load_ps((x)+i+ 4); \ __m128 XMM2 = _mm_load_ps((x)+i+ 8); \ __m128 XMM3 = _mm_load_ps((x)+i+12); \ _mm_store_ps((y)+i , XMM0); \ _mm_store_ps((y)+i+ 4, XMM1); \ _mm_store_ps((y)+i+ 8, XMM2); \ _mm_store_ps((y)+i+12, XMM3); \ } \ } #define vecncpy(y, x, n) \ { \ int i; \ const uint32_t mask = 0x80000000; \ __m128 XMM4 = _mm_load_ps1((float*)&mask); \ for (i = 0;i < (n);i += 16) { \ __m128 XMM0 = _mm_load_ps((x)+i ); \ __m128 XMM1 = _mm_load_ps((x)+i+ 4); \ __m128 XMM2 = _mm_load_ps((x)+i+ 8); \ __m128 XMM3 = _mm_load_ps((x)+i+12); \ XMM0 = _mm_xor_ps(XMM0, XMM4); \ XMM1 = _mm_xor_ps(XMM1, XMM4); \ XMM2 = _mm_xor_ps(XMM2, XMM4); \ XMM3 = _mm_xor_ps(XMM3, XMM4); \ _mm_store_ps((y)+i , XMM0); \ _mm_store_ps((y)+i+ 4, XMM1); \ _mm_store_ps((y)+i+ 8, XMM2); \ _mm_store_ps((y)+i+12, XMM3); \ } \ } #define vecadd(y, x, c, n) \ { \ int i; \ __m128 XMM7 = _mm_set_ps1(c); \ for (i = 0;i < (n);i += 8) { \ __m128 XMM0 = _mm_load_ps((x)+i ); \ __m128 XMM1 = _mm_load_ps((x)+i+4); \ __m128 XMM2 = _mm_load_ps((y)+i ); \ __m128 XMM3 = _mm_load_ps((y)+i+4); \ XMM0 = _mm_mul_ps(XMM0, XMM7); \ XMM1 = _mm_mul_ps(XMM1, XMM7); \ XMM2 = _mm_add_ps(XMM2, XMM0); \ XMM3 = _mm_add_ps(XMM3, XMM1); \ _mm_store_ps((y)+i , XMM2); \ _mm_store_ps((y)+i+4, XMM3); \ } \ } #define vecdiff(z, x, y, n) \ { \ int i; \ for (i = 0;i < (n);i += 16) { \ __m128 XMM0 = _mm_load_ps((x)+i ); \ __m128 XMM1 = _mm_load_ps((x)+i+ 4); \ __m128 XMM2 = _mm_load_ps((x)+i+ 8); \ __m128 XMM3 = _mm_load_ps((x)+i+12); \ __m128 XMM4 = _mm_load_ps((y)+i ); \ __m128 XMM5 = _mm_load_ps((y)+i+ 4); \ __m128 XMM6 = _mm_load_ps((y)+i+ 8); \ __m128 XMM7 = _mm_load_ps((y)+i+12); \ XMM0 = _mm_sub_ps(XMM0, XMM4); \ XMM1 = _mm_sub_ps(XMM1, XMM5); \ XMM2 = _mm_sub_ps(XMM2, XMM6); \ XMM3 = _mm_sub_ps(XMM3, XMM7); \ _mm_store_ps((z)+i , XMM0); \ _mm_store_ps((z)+i+ 4, XMM1); \ _mm_store_ps((z)+i+ 8, XMM2); \ _mm_store_ps((z)+i+12, XMM3); \ } \ } #define vecscale(y, c, n) \ { \ int i; \ __m128 XMM7 = _mm_set_ps1(c); \ for (i = 0;i < (n);i += 8) { \ __m128 XMM0 = _mm_load_ps((y)+i ); \ __m128 XMM1 = _mm_load_ps((y)+i+4); \ XMM0 = _mm_mul_ps(XMM0, XMM7); \ XMM1 = _mm_mul_ps(XMM1, XMM7); \ _mm_store_ps((y)+i , XMM0); \ _mm_store_ps((y)+i+4, XMM1); \ } \ } #define vecmul(y, x, n) \ { \ int i; \ for (i = 0;i < (n);i += 16) { \ __m128 XMM0 = _mm_load_ps((x)+i ); \ __m128 XMM1 = _mm_load_ps((x)+i+ 4); \ __m128 XMM2 = _mm_load_ps((x)+i+ 8); \ __m128 XMM3 = _mm_load_ps((x)+i+12); \ __m128 XMM4 = _mm_load_ps((y)+i ); \ __m128 XMM5 = _mm_load_ps((y)+i+ 4); \ __m128 XMM6 = _mm_load_ps((y)+i+ 8); \ __m128 XMM7 = _mm_load_ps((y)+i+12); \ XMM4 = _mm_mul_ps(XMM4, XMM0); \ XMM5 = _mm_mul_ps(XMM5, XMM1); \ XMM6 = _mm_mul_ps(XMM6, XMM2); \ XMM7 = _mm_mul_ps(XMM7, XMM3); \ _mm_store_ps((y)+i , XMM4); \ _mm_store_ps((y)+i+ 4, XMM5); \ _mm_store_ps((y)+i+ 8, XMM6); \ _mm_store_ps((y)+i+12, XMM7); \ } \ } #if 3 <= __SSE__ || defined(__SSE3__) /* Horizontal add with haddps SSE3 instruction. The work register (rw) is unused. */ #define __horizontal_sum(r, rw) \ r = _mm_hadd_ps(r, r); \ r = _mm_hadd_ps(r, r); #else /* Horizontal add with SSE instruction. The work register (rw) is used. */ #define __horizontal_sum(r, rw) \ rw = r; \ r = _mm_shuffle_ps(r, rw, _MM_SHUFFLE(1, 0, 3, 2)); \ r = _mm_add_ps(r, rw); \ rw = r; \ r = _mm_shuffle_ps(r, rw, _MM_SHUFFLE(2, 3, 0, 1)); \ r = _mm_add_ps(r, rw); #endif #define vecdot(s, x, y, n) \ { \ int i; \ __m128 XMM0 = _mm_setzero_ps(); \ __m128 XMM1 = _mm_setzero_ps(); \ __m128 XMM2, XMM3, XMM4, XMM5; \ for (i = 0;i < (n);i += 8) { \ XMM2 = _mm_load_ps((x)+i ); \ XMM3 = _mm_load_ps((x)+i+4); \ XMM4 = _mm_load_ps((y)+i ); \ XMM5 = _mm_load_ps((y)+i+4); \ XMM2 = _mm_mul_ps(XMM2, XMM4); \ XMM3 = _mm_mul_ps(XMM3, XMM5); \ XMM0 = _mm_add_ps(XMM0, XMM2); \ XMM1 = _mm_add_ps(XMM1, XMM3); \ } \ XMM0 = _mm_add_ps(XMM0, XMM1); \ __horizontal_sum(XMM0, XMM1); \ _mm_store_ss((s), XMM0); \ } #define vec2norm(s, x, n) \ { \ int i; \ __m128 XMM0 = _mm_setzero_ps(); \ __m128 XMM1 = _mm_setzero_ps(); \ __m128 XMM2, XMM3; \ for (i = 0;i < (n);i += 8) { \ XMM2 = _mm_load_ps((x)+i ); \ XMM3 = _mm_load_ps((x)+i+4); \ XMM2 = _mm_mul_ps(XMM2, XMM2); \ XMM3 = _mm_mul_ps(XMM3, XMM3); \ XMM0 = _mm_add_ps(XMM0, XMM2); \ XMM1 = _mm_add_ps(XMM1, XMM3); \ } \ XMM0 = _mm_add_ps(XMM0, XMM1); \ __horizontal_sum(XMM0, XMM1); \ XMM2 = XMM0; \ XMM1 = _mm_rsqrt_ss(XMM0); \ XMM3 = XMM1; \ XMM1 = _mm_mul_ss(XMM1, XMM1); \ XMM1 = _mm_mul_ss(XMM1, XMM3); \ XMM1 = _mm_mul_ss(XMM1, XMM0); \ XMM1 = _mm_mul_ss(XMM1, _mm_set_ss(-0.5f)); \ XMM3 = _mm_mul_ss(XMM3, _mm_set_ss(1.5f)); \ XMM3 = _mm_add_ss(XMM3, XMM1); \ XMM3 = _mm_mul_ss(XMM3, XMM2); \ _mm_store_ss((s), XMM3); \ } #define vec2norminv(s, x, n) \ { \ int i; \ __m128 XMM0 = _mm_setzero_ps(); \ __m128 XMM1 = _mm_setzero_ps(); \ __m128 XMM2, XMM3; \ for (i = 0;i < (n);i += 16) { \ XMM2 = _mm_load_ps((x)+i ); \ XMM3 = _mm_load_ps((x)+i+4); \ XMM2 = _mm_mul_ps(XMM2, XMM2); \ XMM3 = _mm_mul_ps(XMM3, XMM3); \ XMM0 = _mm_add_ps(XMM0, XMM2); \ XMM1 = _mm_add_ps(XMM1, XMM3); \ } \ XMM0 = _mm_add_ps(XMM0, XMM1); \ __horizontal_sum(XMM0, XMM1); \ XMM2 = XMM0; \ XMM1 = _mm_rsqrt_ss(XMM0); \ XMM3 = XMM1; \ XMM1 = _mm_mul_ss(XMM1, XMM1); \ XMM1 = _mm_mul_ss(XMM1, XMM3); \ XMM1 = _mm_mul_ss(XMM1, XMM0); \ XMM1 = _mm_mul_ss(XMM1, _mm_set_ss(-0.5f)); \ XMM3 = _mm_mul_ss(XMM3, _mm_set_ss(1.5f)); \ XMM3 = _mm_add_ss(XMM3, XMM1); \ _mm_store_ss((s), XMM3); \ } relion-3.1.3/src/jaz/lbfgs/copyright.txt000066400000000000000000000023301411340063500202220ustar00rootroot00000000000000/* * C library of Limited memory BFGS (L-BFGS). * * Copyright (c) 1990, Jorge Nocedal * Copyright (c) 2007-2010 Naoaki Okazaki * All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ relion-3.1.3/src/jaz/lbfgs/lbfgs.c000066400000000000000000001177551411340063500167340ustar00rootroot00000000000000/* * Limited memory BFGS (L-BFGS). * * Copyright (c) 1990, Jorge Nocedal * Copyright (c) 2007-2010 Naoaki Okazaki * All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ /* $Id$ */ /* This library is a C port of the FORTRAN implementation of Limited-memory Broyden-Fletcher-Goldfarb-Shanno (L-BFGS) method written by Jorge Nocedal. The original FORTRAN source code is available at: http://www.ece.northwestern.edu/~nocedal/lbfgs.html The L-BFGS algorithm is described in: - Jorge Nocedal. Updating Quasi-Newton Matrices with Limited Storage. Mathematics of Computation, Vol. 35, No. 151, pp. 773--782, 1980. - Dong C. Liu and Jorge Nocedal. On the limited memory BFGS method for large scale optimization. Mathematical Programming B, Vol. 45, No. 3, pp. 503-528, 1989. The line search algorithms used in this implementation are described in: - John E. Dennis and Robert B. Schnabel. Numerical Methods for Unconstrained Optimization and Nonlinear Equations, Englewood Cliffs, 1983. - Jorge J. More and David J. Thuente. Line search algorithm with guaranteed sufficient decrease. ACM Transactions on Mathematical Software (TOMS), Vol. 20, No. 3, pp. 286-307, 1994. This library also implements Orthant-Wise Limited-memory Quasi-Newton (OWL-QN) method presented in: - Galen Andrew and Jianfeng Gao. Scalable training of L1-regularized log-linear models. In Proceedings of the 24th International Conference on Machine Learning (ICML 2007), pp. 33-40, 2007. I would like to thank the original author, Jorge Nocedal, who has been distributing the effieicnt and explanatory implementation in an open source licence. */ #ifdef HAVE_CONFIG_H #include #endif/*HAVE_CONFIG_H*/ #include #include #include #include #include "lbfgs.h" #ifdef _MSC_VER #define inline __inline #endif/*_MSC_VER*/ #if defined(USE_SSE) && defined(__SSE2__) && LBFGS_FLOAT == 64 /* Use SSE2 optimization for 64bit double precision. */ #include "arithmetic_sse_double.h" #elif defined(USE_SSE) && defined(__SSE__) && LBFGS_FLOAT == 32 /* Use SSE optimization for 32bit float precision. */ #include "arithmetic_sse_float.h" #else /* No CPU specific optimization. */ #include "arithmetic_ansi.h" #endif #define min2(a, b) ((a) <= (b) ? (a) : (b)) #define max2(a, b) ((a) >= (b) ? (a) : (b)) #define max3(a, b, c) max2(max2((a), (b)), (c)); struct tag_callback_data { int n; void *instance; lbfgs_evaluate_t proc_evaluate; lbfgs_progress_t proc_progress; }; typedef struct tag_callback_data callback_data_t; struct tag_iteration_data { lbfgsfloatval_t alpha; lbfgsfloatval_t *s; /* [n] */ lbfgsfloatval_t *y; /* [n] */ lbfgsfloatval_t ys; /* vecdot(y, s) */ }; typedef struct tag_iteration_data iteration_data_t; static const lbfgs_parameter_t _defparam = { 6, 1e-5, 0, 1e-5, 0, LBFGS_LINESEARCH_DEFAULT, 40, 1e-20, 1e20, 1e-4, 0.9, 0.9, 1.0e-16, 0.0, 0, -1, }; /* Forward function declarations. */ typedef int (*line_search_proc)( int n, lbfgsfloatval_t *x, lbfgsfloatval_t *f, lbfgsfloatval_t *g, lbfgsfloatval_t *s, lbfgsfloatval_t *stp, const lbfgsfloatval_t* xp, const lbfgsfloatval_t* gp, lbfgsfloatval_t *wa, callback_data_t *cd, const lbfgs_parameter_t *param ); static int line_search_backtracking( int n, lbfgsfloatval_t *x, lbfgsfloatval_t *f, lbfgsfloatval_t *g, lbfgsfloatval_t *s, lbfgsfloatval_t *stp, const lbfgsfloatval_t* xp, const lbfgsfloatval_t* gp, lbfgsfloatval_t *wa, callback_data_t *cd, const lbfgs_parameter_t *param ); static int line_search_backtracking_owlqn( int n, lbfgsfloatval_t *x, lbfgsfloatval_t *f, lbfgsfloatval_t *g, lbfgsfloatval_t *s, lbfgsfloatval_t *stp, const lbfgsfloatval_t* xp, const lbfgsfloatval_t* gp, lbfgsfloatval_t *wp, callback_data_t *cd, const lbfgs_parameter_t *param ); static int line_search_morethuente( int n, lbfgsfloatval_t *x, lbfgsfloatval_t *f, lbfgsfloatval_t *g, lbfgsfloatval_t *s, lbfgsfloatval_t *stp, const lbfgsfloatval_t* xp, const lbfgsfloatval_t* gp, lbfgsfloatval_t *wa, callback_data_t *cd, const lbfgs_parameter_t *param ); static int update_trial_interval( lbfgsfloatval_t *x, lbfgsfloatval_t *fx, lbfgsfloatval_t *dx, lbfgsfloatval_t *y, lbfgsfloatval_t *fy, lbfgsfloatval_t *dy, lbfgsfloatval_t *t, lbfgsfloatval_t *ft, lbfgsfloatval_t *dt, const lbfgsfloatval_t tmin, const lbfgsfloatval_t tmax, int *brackt ); static lbfgsfloatval_t owlqn_x1norm( const lbfgsfloatval_t* x, const int start, const int n ); static void owlqn_pseudo_gradient( lbfgsfloatval_t* pg, const lbfgsfloatval_t* x, const lbfgsfloatval_t* g, const int n, const lbfgsfloatval_t c, const int start, const int end ); static void owlqn_project( lbfgsfloatval_t* d, const lbfgsfloatval_t* sign, const int start, const int end ); #if defined(USE_SSE) && (defined(__SSE__) || defined(__SSE2__)) static int round_out_variables(int n) { n += 7; n /= 8; n *= 8; return n; } #endif/*defined(USE_SSE)*/ lbfgsfloatval_t* lbfgs_malloc(int n) { #if defined(USE_SSE) && (defined(__SSE__) || defined(__SSE2__)) n = round_out_variables(n); #endif/*defined(USE_SSE)*/ return (lbfgsfloatval_t*)vecalloc(sizeof(lbfgsfloatval_t) * n); } void lbfgs_free(lbfgsfloatval_t *x) { vecfree(x); } void lbfgs_parameter_init(lbfgs_parameter_t *param) { memcpy(param, &_defparam, sizeof(*param)); } int lbfgs( int n, lbfgsfloatval_t *x, lbfgsfloatval_t *ptr_fx, lbfgs_evaluate_t proc_evaluate, lbfgs_progress_t proc_progress, void *instance, lbfgs_parameter_t *_param ) { int ret; int i, j, k, ls, end, bound; lbfgsfloatval_t step; /* Constant parameters and their default values. */ lbfgs_parameter_t param = (_param != NULL) ? (*_param) : _defparam; const int m = param.m; lbfgsfloatval_t *xp = NULL; lbfgsfloatval_t *g = NULL, *gp = NULL, *pg = NULL; lbfgsfloatval_t *d = NULL, *w = NULL, *pf = NULL; iteration_data_t *lm = NULL, *it = NULL; lbfgsfloatval_t ys, yy; lbfgsfloatval_t xnorm, gnorm, beta; lbfgsfloatval_t fx = 0.; lbfgsfloatval_t rate = 0.; line_search_proc linesearch = line_search_morethuente; /* Construct a callback data. */ callback_data_t cd; cd.n = n; cd.instance = instance; cd.proc_evaluate = proc_evaluate; cd.proc_progress = proc_progress; #if defined(USE_SSE) && (defined(__SSE__) || defined(__SSE2__)) /* Round out the number of variables. */ n = round_out_variables(n); #endif/*defined(USE_SSE)*/ /* Check the input parameters for errors. */ if (n <= 0) { return LBFGSERR_INVALID_N; } #if defined(USE_SSE) && (defined(__SSE__) || defined(__SSE2__)) if (n % 8 != 0) { return LBFGSERR_INVALID_N_SSE; } if ((uintptr_t)(const void*)x % 16 != 0) { return LBFGSERR_INVALID_X_SSE; } #endif/*defined(USE_SSE)*/ if (param.epsilon < 0.) { return LBFGSERR_INVALID_EPSILON; } if (param.past < 0) { return LBFGSERR_INVALID_TESTPERIOD; } if (param.delta < 0.) { return LBFGSERR_INVALID_DELTA; } if (param.min_step < 0.) { return LBFGSERR_INVALID_MINSTEP; } if (param.max_step < param.min_step) { return LBFGSERR_INVALID_MAXSTEP; } if (param.ftol < 0.) { return LBFGSERR_INVALID_FTOL; } if (param.linesearch == LBFGS_LINESEARCH_BACKTRACKING_WOLFE || param.linesearch == LBFGS_LINESEARCH_BACKTRACKING_STRONG_WOLFE) { if (param.wolfe <= param.ftol || 1. <= param.wolfe) { return LBFGSERR_INVALID_WOLFE; } } if (param.gtol < 0.) { return LBFGSERR_INVALID_GTOL; } if (param.xtol < 0.) { return LBFGSERR_INVALID_XTOL; } if (param.max_linesearch <= 0) { return LBFGSERR_INVALID_MAXLINESEARCH; } if (param.orthantwise_c < 0.) { return LBFGSERR_INVALID_ORTHANTWISE; } if (param.orthantwise_start < 0 || n < param.orthantwise_start) { return LBFGSERR_INVALID_ORTHANTWISE_START; } if (param.orthantwise_end < 0) { param.orthantwise_end = n; } if (n < param.orthantwise_end) { return LBFGSERR_INVALID_ORTHANTWISE_END; } if (param.orthantwise_c != 0.) { switch (param.linesearch) { case LBFGS_LINESEARCH_BACKTRACKING: linesearch = line_search_backtracking_owlqn; break; default: /* Only the backtracking method is available. */ return LBFGSERR_INVALID_LINESEARCH; } } else { switch (param.linesearch) { case LBFGS_LINESEARCH_MORETHUENTE: linesearch = line_search_morethuente; break; case LBFGS_LINESEARCH_BACKTRACKING_ARMIJO: case LBFGS_LINESEARCH_BACKTRACKING_WOLFE: case LBFGS_LINESEARCH_BACKTRACKING_STRONG_WOLFE: linesearch = line_search_backtracking; break; default: return LBFGSERR_INVALID_LINESEARCH; } } /* Allocate working space. */ xp = (lbfgsfloatval_t*)vecalloc(n * sizeof(lbfgsfloatval_t)); g = (lbfgsfloatval_t*)vecalloc(n * sizeof(lbfgsfloatval_t)); gp = (lbfgsfloatval_t*)vecalloc(n * sizeof(lbfgsfloatval_t)); d = (lbfgsfloatval_t*)vecalloc(n * sizeof(lbfgsfloatval_t)); w = (lbfgsfloatval_t*)vecalloc(n * sizeof(lbfgsfloatval_t)); if (xp == NULL || g == NULL || gp == NULL || d == NULL || w == NULL) { ret = LBFGSERR_OUTOFMEMORY; goto lbfgs_exit; } if (param.orthantwise_c != 0.) { /* Allocate working space for OW-LQN. */ pg = (lbfgsfloatval_t*)vecalloc(n * sizeof(lbfgsfloatval_t)); if (pg == NULL) { ret = LBFGSERR_OUTOFMEMORY; goto lbfgs_exit; } } /* Allocate limited memory storage. */ lm = (iteration_data_t*)vecalloc(m * sizeof(iteration_data_t)); if (lm == NULL) { ret = LBFGSERR_OUTOFMEMORY; goto lbfgs_exit; } /* Initialize the limited memory. */ for (i = 0;i < m;++i) { it = &lm[i]; it->alpha = 0; it->ys = 0; it->s = (lbfgsfloatval_t*)vecalloc(n * sizeof(lbfgsfloatval_t)); it->y = (lbfgsfloatval_t*)vecalloc(n * sizeof(lbfgsfloatval_t)); if (it->s == NULL || it->y == NULL) { ret = LBFGSERR_OUTOFMEMORY; goto lbfgs_exit; } } /* Allocate an array for storing previous values of the objective function. */ if (0 < param.past) { pf = (lbfgsfloatval_t*)vecalloc(param.past * sizeof(lbfgsfloatval_t)); } /* Evaluate the function value and its gradient. */ fx = cd.proc_evaluate(cd.instance, x, g, cd.n, 0); if (0. != param.orthantwise_c) { /* Compute the L1 norm of the variable and add it to the object value. */ xnorm = owlqn_x1norm(x, param.orthantwise_start, param.orthantwise_end); fx += xnorm * param.orthantwise_c; owlqn_pseudo_gradient( pg, x, g, n, param.orthantwise_c, param.orthantwise_start, param.orthantwise_end ); } /* Store the initial value of the objective function. */ if (pf != NULL) { pf[0] = fx; } /* Compute the direction; we assume the initial hessian matrix H_0 as the identity matrix. */ if (param.orthantwise_c == 0.) { vecncpy(d, g, n); } else { vecncpy(d, pg, n); } /* Make sure that the initial variables are not a minimizer. */ vec2norm(&xnorm, x, n); if (param.orthantwise_c == 0.) { vec2norm(&gnorm, g, n); } else { vec2norm(&gnorm, pg, n); } if (xnorm < 1.0) xnorm = 1.0; if (gnorm / xnorm <= param.epsilon) { ret = LBFGS_ALREADY_MINIMIZED; goto lbfgs_exit; } /* Compute the initial step: step = 1.0 / sqrt(vecdot(d, d, n)) */ vec2norminv(&step, d, n); k = 1; end = 0; for (;;) { /* Store the current position and gradient vectors. */ veccpy(xp, x, n); veccpy(gp, g, n); /* Search for an optimal step. */ if (param.orthantwise_c == 0.) { ls = linesearch(n, x, &fx, g, d, &step, xp, gp, w, &cd, ¶m); } else { ls = linesearch(n, x, &fx, g, d, &step, xp, pg, w, &cd, ¶m); owlqn_pseudo_gradient( pg, x, g, n, param.orthantwise_c, param.orthantwise_start, param.orthantwise_end ); } if (ls < 0) { /* Revert to the previous point. */ veccpy(x, xp, n); veccpy(g, gp, n); ret = ls; goto lbfgs_exit; } /* Compute x and g norms. */ vec2norm(&xnorm, x, n); if (param.orthantwise_c == 0.) { vec2norm(&gnorm, g, n); } else { vec2norm(&gnorm, pg, n); } /* Report the progress. */ if (cd.proc_progress) { if ((ret = cd.proc_progress(cd.instance, x, g, fx, xnorm, gnorm, step, cd.n, k, ls))) { goto lbfgs_exit; } } /* Convergence test. The criterion is given by the following formula: |g(x)| / \max(1, |x|) < \epsilon */ if (xnorm < 1.0) xnorm = 1.0; if (gnorm / xnorm <= param.epsilon) { /* Convergence. */ ret = LBFGS_SUCCESS; break; } /* Test for stopping criterion. The criterion is given by the following formula: (f(past_x) - f(x)) / f(x) < \delta */ if (pf != NULL) { /* We don't test the stopping criterion while k < past. */ if (param.past <= k) { /* Compute the relative improvement from the past. */ rate = (pf[k % param.past] - fx) / fx; /* The stopping criterion. */ if (rate < param.delta) { ret = LBFGS_STOP; break; } } /* Store the current value of the objective function. */ pf[k % param.past] = fx; } if (param.max_iterations != 0 && param.max_iterations < k+1) { /* Maximum number of iterations. */ ret = LBFGSERR_MAXIMUMITERATION; break; } /* Update vectors s and y: s_{k+1} = x_{k+1} - x_{k} = \step * d_{k}. y_{k+1} = g_{k+1} - g_{k}. */ it = &lm[end]; vecdiff(it->s, x, xp, n); vecdiff(it->y, g, gp, n); /* Compute scalars ys and yy: ys = y^t \cdot s = 1 / \rho. yy = y^t \cdot y. Notice that yy is used for scaling the hessian matrix H_0 (Cholesky factor). */ vecdot(&ys, it->y, it->s, n); vecdot(&yy, it->y, it->y, n); it->ys = ys; /* Recursive formula to compute dir = -(H \cdot g). This is described in page 779 of: Jorge Nocedal. Updating Quasi-Newton Matrices with Limited Storage. Mathematics of Computation, Vol. 35, No. 151, pp. 773--782, 1980. */ bound = (m <= k) ? m : k; ++k; end = (end + 1) % m; /* Compute the steepest direction. */ if (param.orthantwise_c == 0.) { /* Compute the negative of gradients. */ vecncpy(d, g, n); } else { vecncpy(d, pg, n); } j = end; for (i = 0;i < bound;++i) { j = (j + m - 1) % m; /* if (--j == -1) j = m-1; */ it = &lm[j]; /* \alpha_{j} = \rho_{j} s^{t}_{j} \cdot q_{k+1}. */ vecdot(&it->alpha, it->s, d, n); it->alpha /= it->ys; /* q_{i} = q_{i+1} - \alpha_{i} y_{i}. */ vecadd(d, it->y, -it->alpha, n); } vecscale(d, ys / yy, n); for (i = 0;i < bound;++i) { it = &lm[j]; /* \beta_{j} = \rho_{j} y^t_{j} \cdot \gamma_{i}. */ vecdot(&beta, it->y, d, n); beta /= it->ys; /* \gamma_{i+1} = \gamma_{i} + (\alpha_{j} - \beta_{j}) s_{j}. */ vecadd(d, it->s, it->alpha - beta, n); j = (j + 1) % m; /* if (++j == m) j = 0; */ } /* Constrain the search direction for orthant-wise updates. */ if (param.orthantwise_c != 0.) { for (i = param.orthantwise_start;i < param.orthantwise_end;++i) { if (d[i] * pg[i] >= 0) { d[i] = 0; } } } /* Now the search direction d is ready. We try step = 1 first. */ step = 1.0; } lbfgs_exit: /* Return the final value of the objective function. */ if (ptr_fx != NULL) { *ptr_fx = fx; } vecfree(pf); /* Free memory blocks used by this function. */ if (lm != NULL) { for (i = 0;i < m;++i) { vecfree(lm[i].s); vecfree(lm[i].y); } vecfree(lm); } vecfree(pg); vecfree(w); vecfree(d); vecfree(gp); vecfree(g); vecfree(xp); return ret; } static int line_search_backtracking( int n, lbfgsfloatval_t *x, lbfgsfloatval_t *f, lbfgsfloatval_t *g, lbfgsfloatval_t *s, lbfgsfloatval_t *stp, const lbfgsfloatval_t* xp, const lbfgsfloatval_t* gp, lbfgsfloatval_t *wp, callback_data_t *cd, const lbfgs_parameter_t *param ) { int count = 0; lbfgsfloatval_t width, dg; lbfgsfloatval_t finit, dginit = 0., dgtest; const lbfgsfloatval_t dec = 0.5, inc = 2.1; /* Check the input parameters for errors. */ if (*stp <= 0.) { return LBFGSERR_INVALIDPARAMETERS; } /* Compute the initial gradient in the search direction. */ vecdot(&dginit, g, s, n); /* Make sure that s points to a descent direction. */ if (0 < dginit) { return LBFGSERR_INCREASEGRADIENT; } /* The initial value of the objective function. */ finit = *f; dgtest = param->ftol * dginit; for (;;) { veccpy(x, xp, n); vecadd(x, s, *stp, n); /* Evaluate the function and gradient values. */ *f = cd->proc_evaluate(cd->instance, x, g, cd->n, *stp); ++count; if (*f > finit + *stp * dgtest) { width = dec; } else { /* The sufficient decrease condition (Armijo condition). */ if (param->linesearch == LBFGS_LINESEARCH_BACKTRACKING_ARMIJO) { /* Exit with the Armijo condition. */ return count; } /* Check the Wolfe condition. */ vecdot(&dg, g, s, n); if (dg < param->wolfe * dginit) { width = inc; } else { if(param->linesearch == LBFGS_LINESEARCH_BACKTRACKING_WOLFE) { /* Exit with the regular Wolfe condition. */ return count; } /* Check the strong Wolfe condition. */ if(dg > -param->wolfe * dginit) { width = dec; } else { /* Exit with the strong Wolfe condition. */ return count; } } } if (*stp < param->min_step) { /* The step is the minimum value. */ return LBFGSERR_MINIMUMSTEP; } if (*stp > param->max_step) { /* The step is the maximum value. */ return LBFGSERR_MAXIMUMSTEP; } if (param->max_linesearch <= count) { /* Maximum number of iteration. */ return LBFGSERR_MAXIMUMLINESEARCH; } (*stp) *= width; } } static int line_search_backtracking_owlqn( int n, lbfgsfloatval_t *x, lbfgsfloatval_t *f, lbfgsfloatval_t *g, lbfgsfloatval_t *s, lbfgsfloatval_t *stp, const lbfgsfloatval_t* xp, const lbfgsfloatval_t* gp, lbfgsfloatval_t *wp, callback_data_t *cd, const lbfgs_parameter_t *param ) { int i, count = 0; lbfgsfloatval_t width = 0.5, norm = 0.; lbfgsfloatval_t finit = *f, dgtest; /* Check the input parameters for errors. */ if (*stp <= 0.) { return LBFGSERR_INVALIDPARAMETERS; } /* Choose the orthant for the new point. */ for (i = 0;i < n;++i) { wp[i] = (xp[i] == 0.) ? -gp[i] : xp[i]; } for (;;) { /* Update the current point. */ veccpy(x, xp, n); vecadd(x, s, *stp, n); /* The current point is projected onto the orthant. */ owlqn_project(x, wp, param->orthantwise_start, param->orthantwise_end); /* Evaluate the function and gradient values. */ *f = cd->proc_evaluate(cd->instance, x, g, cd->n, *stp); /* Compute the L1 norm of the variables and add it to the object value. */ norm = owlqn_x1norm(x, param->orthantwise_start, param->orthantwise_end); *f += norm * param->orthantwise_c; ++count; dgtest = 0.; for (i = 0;i < n;++i) { dgtest += (x[i] - xp[i]) * gp[i]; } if (*f <= finit + param->ftol * dgtest) { /* The sufficient decrease condition. */ return count; } if (*stp < param->min_step) { /* The step is the minimum value. */ return LBFGSERR_MINIMUMSTEP; } if (*stp > param->max_step) { /* The step is the maximum value. */ return LBFGSERR_MAXIMUMSTEP; } if (param->max_linesearch <= count) { /* Maximum number of iteration. */ return LBFGSERR_MAXIMUMLINESEARCH; } (*stp) *= width; } } static int line_search_morethuente( int n, lbfgsfloatval_t *x, lbfgsfloatval_t *f, lbfgsfloatval_t *g, lbfgsfloatval_t *s, lbfgsfloatval_t *stp, const lbfgsfloatval_t* xp, const lbfgsfloatval_t* gp, lbfgsfloatval_t *wa, callback_data_t *cd, const lbfgs_parameter_t *param ) { int count = 0; int brackt, stage1, uinfo = 0; lbfgsfloatval_t dg; lbfgsfloatval_t stx, fx, dgx; lbfgsfloatval_t sty, fy, dgy; lbfgsfloatval_t fxm, dgxm, fym, dgym, fm, dgm; lbfgsfloatval_t finit, ftest1, dginit, dgtest; lbfgsfloatval_t width, prev_width; lbfgsfloatval_t stmin, stmax; /* Check the input parameters for errors. */ if (*stp <= 0.) { return LBFGSERR_INVALIDPARAMETERS; } /* Compute the initial gradient in the search direction. */ vecdot(&dginit, g, s, n); /* Make sure that s points to a descent direction. */ if (0 < dginit) { return LBFGSERR_INCREASEGRADIENT; } /* Initialize local variables. */ brackt = 0; stage1 = 1; finit = *f; dgtest = param->ftol * dginit; width = param->max_step - param->min_step; prev_width = 2.0 * width; /* The variables stx, fx, dgx contain the values of the step, function, and directional derivative at the best step. The variables sty, fy, dgy contain the value of the step, function, and derivative at the other endpoint of the interval of uncertainty. The variables stp, f, dg contain the values of the step, function, and derivative at the current step. */ stx = sty = 0.; fx = fy = finit; dgx = dgy = dginit; for (;;) { /* Set the minimum and maximum steps to correspond to the present interval of uncertainty. */ if (brackt) { stmin = min2(stx, sty); stmax = max2(stx, sty); } else { stmin = stx; stmax = *stp + 4.0 * (*stp - stx); } /* Clip the step in the range of [stpmin, stpmax]. */ if (*stp < param->min_step) *stp = param->min_step; if (param->max_step < *stp) *stp = param->max_step; /* If an unusual termination is to occur then let stp be the lowest point obtained so far. */ if ((brackt && ((*stp <= stmin || stmax <= *stp) || param->max_linesearch <= count + 1 || uinfo != 0)) || (brackt && (stmax - stmin <= param->xtol * stmax))) { *stp = stx; } /* Compute the current value of x: x <- x + (*stp) * s. */ veccpy(x, xp, n); vecadd(x, s, *stp, n); /* Evaluate the function and gradient values. */ *f = cd->proc_evaluate(cd->instance, x, g, cd->n, *stp); vecdot(&dg, g, s, n); ftest1 = finit + *stp * dgtest; ++count; /* Test for errors and convergence. */ if (brackt && ((*stp <= stmin || stmax <= *stp) || uinfo != 0)) { /* Rounding errors prevent further progress. */ return LBFGSERR_ROUNDING_ERROR; } if (*stp == param->max_step && *f <= ftest1 && dg <= dgtest) { /* The step is the maximum value. */ return LBFGSERR_MAXIMUMSTEP; } if (*stp == param->min_step && (ftest1 < *f || dgtest <= dg)) { /* The step is the minimum value. */ return LBFGSERR_MINIMUMSTEP; } if (brackt && (stmax - stmin) <= param->xtol * stmax) { /* Relative width of the interval of uncertainty is at most xtol. */ return LBFGSERR_WIDTHTOOSMALL; } if (param->max_linesearch <= count) { /* Maximum number of iteration. */ return LBFGSERR_MAXIMUMLINESEARCH; } if (*f <= ftest1 && fabs(dg) <= param->gtol * (-dginit)) { /* The sufficient decrease condition and the directional derivative condition hold. */ return count; } /* In the first stage we seek a step for which the modified function has a nonpositive value and nonnegative derivative. */ if (stage1 && *f <= ftest1 && min2(param->ftol, param->gtol) * dginit <= dg) { stage1 = 0; } /* A modified function is used to predict the step only if we have not obtained a step for which the modified function has a nonpositive function value and nonnegative derivative, and if a lower function value has been obtained but the decrease is not sufficient. */ if (stage1 && ftest1 < *f && *f <= fx) { /* Define the modified function and derivative values. */ fm = *f - *stp * dgtest; fxm = fx - stx * dgtest; fym = fy - sty * dgtest; dgm = dg - dgtest; dgxm = dgx - dgtest; dgym = dgy - dgtest; /* Call update_trial_interval() to update the interval of uncertainty and to compute the new step. */ uinfo = update_trial_interval( &stx, &fxm, &dgxm, &sty, &fym, &dgym, stp, &fm, &dgm, stmin, stmax, &brackt ); /* Reset the function and gradient values for f. */ fx = fxm + stx * dgtest; fy = fym + sty * dgtest; dgx = dgxm + dgtest; dgy = dgym + dgtest; } else { /* Call update_trial_interval() to update the interval of uncertainty and to compute the new step. */ uinfo = update_trial_interval( &stx, &fx, &dgx, &sty, &fy, &dgy, stp, f, &dg, stmin, stmax, &brackt ); } /* Force a sufficient decrease in the interval of uncertainty. */ if (brackt) { if (0.66 * prev_width <= fabs(sty - stx)) { *stp = stx + 0.5 * (sty - stx); } prev_width = width; width = fabs(sty - stx); } } return LBFGSERR_LOGICERROR; } /** * Define the local variables for computing minimizers. */ #define USES_MINIMIZER \ lbfgsfloatval_t a, d, gamma, theta, p, q, r, s; /** * Find a minimizer of an interpolated cubic function. * @param cm The minimizer of the interpolated cubic. * @param u The value of one point, u. * @param fu The value of f(u). * @param du The value of f'(u). * @param v The value of another point, v. * @param fv The value of f(v). * @param du The value of f'(v). */ #define CUBIC_MINIMIZER(cm, u, fu, du, v, fv, dv) \ d = (v) - (u); \ theta = ((fu) - (fv)) * 3 / d + (du) + (dv); \ p = fabs(theta); \ q = fabs(du); \ r = fabs(dv); \ s = max3(p, q, r); \ /* gamma = s*sqrt((theta/s)**2 - (du/s) * (dv/s)) */ \ a = theta / s; \ gamma = s * sqrt(a * a - ((du) / s) * ((dv) / s)); \ if ((v) < (u)) gamma = -gamma; \ p = gamma - (du) + theta; \ q = gamma - (du) + gamma + (dv); \ r = p / q; \ (cm) = (u) + r * d; /** * Find a minimizer of an interpolated cubic function. * @param cm The minimizer of the interpolated cubic. * @param u The value of one point, u. * @param fu The value of f(u). * @param du The value of f'(u). * @param v The value of another point, v. * @param fv The value of f(v). * @param du The value of f'(v). * @param xmin The maximum value. * @param xmin The minimum value. */ #define CUBIC_MINIMIZER2(cm, u, fu, du, v, fv, dv, xmin, xmax) \ d = (v) - (u); \ theta = ((fu) - (fv)) * 3 / d + (du) + (dv); \ p = fabs(theta); \ q = fabs(du); \ r = fabs(dv); \ s = max3(p, q, r); \ /* gamma = s*sqrt((theta/s)**2 - (du/s) * (dv/s)) */ \ a = theta / s; \ gamma = s * sqrt(max2(0, a * a - ((du) / s) * ((dv) / s))); \ if ((u) < (v)) gamma = -gamma; \ p = gamma - (dv) + theta; \ q = gamma - (dv) + gamma + (du); \ r = p / q; \ if (r < 0. && gamma != 0.) { \ (cm) = (v) - r * d; \ } else if (a < 0) { \ (cm) = (xmax); \ } else { \ (cm) = (xmin); \ } /** * Find a minimizer of an interpolated quadratic function. * @param qm The minimizer of the interpolated quadratic. * @param u The value of one point, u. * @param fu The value of f(u). * @param du The value of f'(u). * @param v The value of another point, v. * @param fv The value of f(v). */ #define QUARD_MINIMIZER(qm, u, fu, du, v, fv) \ a = (v) - (u); \ (qm) = (u) + (du) / (((fu) - (fv)) / a + (du)) / 2 * a; /** * Find a minimizer of an interpolated quadratic function. * @param qm The minimizer of the interpolated quadratic. * @param u The value of one point, u. * @param du The value of f'(u). * @param v The value of another point, v. * @param dv The value of f'(v). */ #define QUARD_MINIMIZER2(qm, u, du, v, dv) \ a = (u) - (v); \ (qm) = (v) + (dv) / ((dv) - (du)) * a; /** * Update a safeguarded trial value and interval for line search. * * The parameter x represents the step with the least function value. * The parameter t represents the current step. This function assumes * that the derivative at the point of x in the direction of the step. * If the bracket is set to true, the minimizer has been bracketed in * an interval of uncertainty with endpoints between x and y. * * @param x The pointer to the value of one endpoint. * @param fx The pointer to the value of f(x). * @param dx The pointer to the value of f'(x). * @param y The pointer to the value of another endpoint. * @param fy The pointer to the value of f(y). * @param dy The pointer to the value of f'(y). * @param t The pointer to the value of the trial value, t. * @param ft The pointer to the value of f(t). * @param dt The pointer to the value of f'(t). * @param tmin The minimum value for the trial value, t. * @param tmax The maximum value for the trial value, t. * @param brackt The pointer to the predicate if the trial value is * bracketed. * @retval int Status value. Zero indicates a normal termination. * * @see * Jorge J. More and David J. Thuente. Line search algorithm with * guaranteed sufficient decrease. ACM Transactions on Mathematical * Software (TOMS), Vol 20, No 3, pp. 286-307, 1994. */ static int update_trial_interval( lbfgsfloatval_t *x, lbfgsfloatval_t *fx, lbfgsfloatval_t *dx, lbfgsfloatval_t *y, lbfgsfloatval_t *fy, lbfgsfloatval_t *dy, lbfgsfloatval_t *t, lbfgsfloatval_t *ft, lbfgsfloatval_t *dt, const lbfgsfloatval_t tmin, const lbfgsfloatval_t tmax, int *brackt ) { int bound; int dsign = fsigndiff(dt, dx); lbfgsfloatval_t mc; /* minimizer of an interpolated cubic. */ lbfgsfloatval_t mq; /* minimizer of an interpolated quadratic. */ lbfgsfloatval_t newt; /* new trial value. */ USES_MINIMIZER; /* for CUBIC_MINIMIZER and QUARD_MINIMIZER. */ /* Check the input parameters for errors. */ if (*brackt) { if (*t <= min2(*x, *y) || max2(*x, *y) <= *t) { /* The trival value t is out of the interval. */ return LBFGSERR_OUTOFINTERVAL; } if (0. <= *dx * (*t - *x)) { /* The function must decrease from x. */ return LBFGSERR_INCREASEGRADIENT; } if (tmax < tmin) { /* Incorrect tmin and tmax specified. */ return LBFGSERR_INCORRECT_TMINMAX; } } /* Trial value selection. */ if (*fx < *ft) { /* Case 1: a higher function value. The minimum is brackt. If the cubic minimizer is closer to x than the quadratic one, the cubic one is taken, else the average of the minimizers is taken. */ *brackt = 1; bound = 1; CUBIC_MINIMIZER(mc, *x, *fx, *dx, *t, *ft, *dt); QUARD_MINIMIZER(mq, *x, *fx, *dx, *t, *ft); if (fabs(mc - *x) < fabs(mq - *x)) { newt = mc; } else { newt = mc + 0.5 * (mq - mc); } } else if (dsign) { /* Case 2: a lower function value and derivatives of opposite sign. The minimum is brackt. If the cubic minimizer is closer to x than the quadratic (secant) one, the cubic one is taken, else the quadratic one is taken. */ *brackt = 1; bound = 0; CUBIC_MINIMIZER(mc, *x, *fx, *dx, *t, *ft, *dt); QUARD_MINIMIZER2(mq, *x, *dx, *t, *dt); if (fabs(mc - *t) > fabs(mq - *t)) { newt = mc; } else { newt = mq; } } else if (fabs(*dt) < fabs(*dx)) { /* Case 3: a lower function value, derivatives of the same sign, and the magnitude of the derivative decreases. The cubic minimizer is only used if the cubic tends to infinity in the direction of the minimizer or if the minimum of the cubic is beyond t. Otherwise the cubic minimizer is defined to be either tmin or tmax. The quadratic (secant) minimizer is also computed and if the minimum is brackt then the the minimizer closest to x is taken, else the one farthest away is taken. */ bound = 1; CUBIC_MINIMIZER2(mc, *x, *fx, *dx, *t, *ft, *dt, tmin, tmax); QUARD_MINIMIZER2(mq, *x, *dx, *t, *dt); if (*brackt) { if (fabs(*t - mc) < fabs(*t - mq)) { newt = mc; } else { newt = mq; } } else { if (fabs(*t - mc) > fabs(*t - mq)) { newt = mc; } else { newt = mq; } } } else { /* Case 4: a lower function value, derivatives of the same sign, and the magnitude of the derivative does not decrease. If the minimum is not brackt, the step is either tmin or tmax, else the cubic minimizer is taken. */ bound = 0; if (*brackt) { CUBIC_MINIMIZER(newt, *t, *ft, *dt, *y, *fy, *dy); } else if (*x < *t) { newt = tmax; } else { newt = tmin; } } /* Update the interval of uncertainty. This update does not depend on the new step or the case analysis above. - Case a: if f(x) < f(t), x <- x, y <- t. - Case b: if f(t) <= f(x) && f'(t)*f'(x) > 0, x <- t, y <- y. - Case c: if f(t) <= f(x) && f'(t)*f'(x) < 0, x <- t, y <- x. */ if (*fx < *ft) { /* Case a */ *y = *t; *fy = *ft; *dy = *dt; } else { /* Case c */ if (dsign) { *y = *x; *fy = *fx; *dy = *dx; } /* Cases b and c */ *x = *t; *fx = *ft; *dx = *dt; } /* Clip the new trial value in [tmin, tmax]. */ if (tmax < newt) newt = tmax; if (newt < tmin) newt = tmin; /* Redefine the new trial value if it is close to the upper bound of the interval. */ if (*brackt && bound) { mq = *x + 0.66 * (*y - *x); if (*x < *y) { if (mq < newt) newt = mq; } else { if (newt < mq) newt = mq; } } /* Return the new trial value. */ *t = newt; return 0; } static lbfgsfloatval_t owlqn_x1norm( const lbfgsfloatval_t* x, const int start, const int n ) { int i; lbfgsfloatval_t norm = 0.; for (i = start;i < n;++i) { norm += fabs(x[i]); } return norm; } static void owlqn_pseudo_gradient( lbfgsfloatval_t* pg, const lbfgsfloatval_t* x, const lbfgsfloatval_t* g, const int n, const lbfgsfloatval_t c, const int start, const int end ) { int i; /* Compute the negative of gradients. */ for (i = 0;i < start;++i) { pg[i] = g[i]; } /* Compute the psuedo-gradients. */ for (i = start;i < end;++i) { if (x[i] < 0.) { /* Differentiable. */ pg[i] = g[i] - c; } else if (0. < x[i]) { /* Differentiable. */ pg[i] = g[i] + c; } else { if (g[i] < -c) { /* Take the right partial derivative. */ pg[i] = g[i] + c; } else if (c < g[i]) { /* Take the left partial derivative. */ pg[i] = g[i] - c; } else { pg[i] = 0.; } } } for (i = end;i < n;++i) { pg[i] = g[i]; } } static void owlqn_project( lbfgsfloatval_t* d, const lbfgsfloatval_t* sign, const int start, const int end ) { int i; for (i = start;i < end;++i) { if (d[i] * sign[i] <= 0) { d[i] = 0; } } } relion-3.1.3/src/jaz/lbfgs/lbfgs.h000066400000000000000000000772201411340063500167310ustar00rootroot00000000000000/* * C library of Limited memory BFGS (L-BFGS). * * Copyright (c) 1990, Jorge Nocedal * Copyright (c) 2007-2010 Naoaki Okazaki * All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ /* $Id$ */ #ifndef __LBFGS_H__ #define __LBFGS_H__ #ifdef __cplusplus extern "C" { #endif/*__cplusplus*/ /* * The default precision of floating point values is 64bit (double). */ #ifndef LBFGS_FLOAT #define LBFGS_FLOAT 64 #endif/*LBFGS_FLOAT*/ /* * Activate optimization routines for IEEE754 floating point values. */ #ifndef LBFGS_IEEE_FLOAT #define LBFGS_IEEE_FLOAT 1 #endif/*LBFGS_IEEE_FLOAT*/ #if LBFGS_FLOAT == 32 typedef float lbfgsfloatval_t; #elif LBFGS_FLOAT == 64 typedef double lbfgsfloatval_t; #else #error "libLBFGS supports single (float; LBFGS_FLOAT = 32) or double (double; LBFGS_FLOAT=64) precision only." #endif /** * \addtogroup liblbfgs_api libLBFGS API * @{ * * The libLBFGS API. */ /** * Return values of lbfgs(). * * Roughly speaking, a negative value indicates an error. */ enum { /** L-BFGS reaches convergence. */ LBFGS_SUCCESS = 0, LBFGS_CONVERGENCE = 0, LBFGS_STOP, /** The initial variables already minimize the objective function. */ LBFGS_ALREADY_MINIMIZED, /** Unknown error. */ LBFGSERR_UNKNOWNERROR = -1024, /** Logic error. */ LBFGSERR_LOGICERROR, /** Insufficient memory. */ LBFGSERR_OUTOFMEMORY, /** The minimization process has been canceled. */ LBFGSERR_CANCELED, /** Invalid number of variables specified. */ LBFGSERR_INVALID_N, /** Invalid number of variables (for SSE) specified. */ LBFGSERR_INVALID_N_SSE, /** The array x must be aligned to 16 (for SSE). */ LBFGSERR_INVALID_X_SSE, /** Invalid parameter lbfgs_parameter_t::epsilon specified. */ LBFGSERR_INVALID_EPSILON, /** Invalid parameter lbfgs_parameter_t::past specified. */ LBFGSERR_INVALID_TESTPERIOD, /** Invalid parameter lbfgs_parameter_t::delta specified. */ LBFGSERR_INVALID_DELTA, /** Invalid parameter lbfgs_parameter_t::linesearch specified. */ LBFGSERR_INVALID_LINESEARCH, /** Invalid parameter lbfgs_parameter_t::max_step specified. */ LBFGSERR_INVALID_MINSTEP, /** Invalid parameter lbfgs_parameter_t::max_step specified. */ LBFGSERR_INVALID_MAXSTEP, /** Invalid parameter lbfgs_parameter_t::ftol specified. */ LBFGSERR_INVALID_FTOL, /** Invalid parameter lbfgs_parameter_t::wolfe specified. */ LBFGSERR_INVALID_WOLFE, /** Invalid parameter lbfgs_parameter_t::gtol specified. */ LBFGSERR_INVALID_GTOL, /** Invalid parameter lbfgs_parameter_t::xtol specified. */ LBFGSERR_INVALID_XTOL, /** Invalid parameter lbfgs_parameter_t::max_linesearch specified. */ LBFGSERR_INVALID_MAXLINESEARCH, /** Invalid parameter lbfgs_parameter_t::orthantwise_c specified. */ LBFGSERR_INVALID_ORTHANTWISE, /** Invalid parameter lbfgs_parameter_t::orthantwise_start specified. */ LBFGSERR_INVALID_ORTHANTWISE_START, /** Invalid parameter lbfgs_parameter_t::orthantwise_end specified. */ LBFGSERR_INVALID_ORTHANTWISE_END, /** The line-search step went out of the interval of uncertainty. */ LBFGSERR_OUTOFINTERVAL, /** A logic error occurred; alternatively, the interval of uncertainty became too small. */ LBFGSERR_INCORRECT_TMINMAX, /** A rounding error occurred; alternatively, no line-search step satisfies the sufficient decrease and curvature conditions. */ LBFGSERR_ROUNDING_ERROR, /** The line-search step became smaller than lbfgs_parameter_t::min_step. */ LBFGSERR_MINIMUMSTEP, /** The line-search step became larger than lbfgs_parameter_t::max_step. */ LBFGSERR_MAXIMUMSTEP, /** The line-search routine reaches the maximum number of evaluations. */ LBFGSERR_MAXIMUMLINESEARCH, /** The algorithm routine reaches the maximum number of iterations. */ LBFGSERR_MAXIMUMITERATION, /** Relative width of the interval of uncertainty is at most lbfgs_parameter_t::xtol. */ LBFGSERR_WIDTHTOOSMALL, /** A logic error (negative line-search step) occurred. */ LBFGSERR_INVALIDPARAMETERS, /** The current search direction increases the objective function value. */ LBFGSERR_INCREASEGRADIENT, }; /** * Line search algorithms. */ enum { /** The default algorithm (MoreThuente method). */ LBFGS_LINESEARCH_DEFAULT = 0, /** MoreThuente method proposd by More and Thuente. */ LBFGS_LINESEARCH_MORETHUENTE = 0, /** * Backtracking method with the Armijo condition. * The backtracking method finds the step length such that it satisfies * the sufficient decrease (Armijo) condition, * - f(x + a * d) <= f(x) + lbfgs_parameter_t::ftol * a * g(x)^T d, * * where x is the current point, d is the current search direction, and * a is the step length. */ LBFGS_LINESEARCH_BACKTRACKING_ARMIJO = 1, /** The backtracking method with the defualt (regular Wolfe) condition. */ LBFGS_LINESEARCH_BACKTRACKING = 2, /** * Backtracking method with regular Wolfe condition. * The backtracking method finds the step length such that it satisfies * both the Armijo condition (LBFGS_LINESEARCH_BACKTRACKING_ARMIJO) * and the curvature condition, * - g(x + a * d)^T d >= lbfgs_parameter_t::wolfe * g(x)^T d, * * where x is the current point, d is the current search direction, and * a is the step length. */ LBFGS_LINESEARCH_BACKTRACKING_WOLFE = 2, /** * Backtracking method with strong Wolfe condition. * The backtracking method finds the step length such that it satisfies * both the Armijo condition (LBFGS_LINESEARCH_BACKTRACKING_ARMIJO) * and the following condition, * - |g(x + a * d)^T d| <= lbfgs_parameter_t::wolfe * |g(x)^T d|, * * where x is the current point, d is the current search direction, and * a is the step length. */ LBFGS_LINESEARCH_BACKTRACKING_STRONG_WOLFE = 3, }; /** * L-BFGS optimization parameters. * Call lbfgs_parameter_init() function to initialize parameters to the * default values. */ typedef struct { /** * The number of corrections to approximate the inverse hessian matrix. * The L-BFGS routine stores the computation results of previous \ref m * iterations to approximate the inverse hessian matrix of the current * iteration. This parameter controls the size of the limited memories * (corrections). The default value is \c 6. Values less than \c 3 are * not recommended. Large values will result in excessive computing time. */ int m; /** * Epsilon for convergence test. * This parameter determines the accuracy with which the solution is to * be found. A minimization terminates when * ||g|| < \ref epsilon * max(1, ||x||), * where ||.|| denotes the Euclidean (L2) norm. The default value is * \c 1e-5. */ lbfgsfloatval_t epsilon; /** * Distance for delta-based convergence test. * This parameter determines the distance, in iterations, to compute * the rate of decrease of the objective function. If the value of this * parameter is zero, the library does not perform the delta-based * convergence test. The default value is \c 0. */ int past; /** * Delta for convergence test. * This parameter determines the minimum rate of decrease of the * objective function. The library stops iterations when the * following condition is met: * (f' - f) / f < \ref delta, * where f' is the objective value of \ref past iterations ago, and f is * the objective value of the current iteration. * The default value is \c 0. */ lbfgsfloatval_t delta; /** * The maximum number of iterations. * The lbfgs() function terminates an optimization process with * ::LBFGSERR_MAXIMUMITERATION status code when the iteration count * exceedes this parameter. Setting this parameter to zero continues an * optimization process until a convergence or error. The default value * is \c 0. */ int max_iterations; /** * The line search algorithm. * This parameter specifies a line search algorithm to be used by the * L-BFGS routine. */ int linesearch; /** * The maximum number of trials for the line search. * This parameter controls the number of function and gradients evaluations * per iteration for the line search routine. The default value is \c 20. */ int max_linesearch; /** * The minimum step of the line search routine. * The default value is \c 1e-20. This value need not be modified unless * the exponents are too large for the machine being used, or unless the * problem is extremely badly scaled (in which case the exponents should * be increased). */ lbfgsfloatval_t min_step; /** * The maximum step of the line search. * The default value is \c 1e+20. This value need not be modified unless * the exponents are too large for the machine being used, or unless the * problem is extremely badly scaled (in which case the exponents should * be increased). */ lbfgsfloatval_t max_step; /** * A parameter to control the accuracy of the line search routine. * The default value is \c 1e-4. This parameter should be greater * than zero and smaller than \c 0.5. */ lbfgsfloatval_t ftol; /** * A coefficient for the Wolfe condition. * This parameter is valid only when the backtracking line-search * algorithm is used with the Wolfe condition, * ::LBFGS_LINESEARCH_BACKTRACKING_STRONG_WOLFE or * ::LBFGS_LINESEARCH_BACKTRACKING_WOLFE . * The default value is \c 0.9. This parameter should be greater * the \ref ftol parameter and smaller than \c 1.0. */ lbfgsfloatval_t wolfe; /** * A parameter to control the accuracy of the line search routine. * The default value is \c 0.9. If the function and gradient * evaluations are inexpensive with respect to the cost of the * iteration (which is sometimes the case when solving very large * problems) it may be advantageous to set this parameter to a small * value. A typical small value is \c 0.1. This parameter shuold be * greater than the \ref ftol parameter (\c 1e-4) and smaller than * \c 1.0. */ lbfgsfloatval_t gtol; /** * The machine precision for floating-point values. * This parameter must be a positive value set by a client program to * estimate the machine precision. The line search routine will terminate * with the status code (::LBFGSERR_ROUNDING_ERROR) if the relative width * of the interval of uncertainty is less than this parameter. */ lbfgsfloatval_t xtol; /** * Coeefficient for the L1 norm of variables. * This parameter should be set to zero for standard minimization * problems. Setting this parameter to a positive value activates * Orthant-Wise Limited-memory Quasi-Newton (OWL-QN) method, which * minimizes the objective function F(x) combined with the L1 norm |x| * of the variables, {F(x) + C |x|}. This parameter is the coeefficient * for the |x|, i.e., C. As the L1 norm |x| is not differentiable at * zero, the library modifies function and gradient evaluations from * a client program suitably; a client program thus have only to return * the function value F(x) and gradients G(x) as usual. The default value * is zero. */ lbfgsfloatval_t orthantwise_c; /** * Start index for computing L1 norm of the variables. * This parameter is valid only for OWL-QN method * (i.e., \ref orthantwise_c != 0). This parameter b (0 <= b < N) * specifies the index number from which the library computes the * L1 norm of the variables x, * |x| := |x_{b}| + |x_{b+1}| + ... + |x_{N}| . * In other words, variables x_1, ..., x_{b-1} are not used for * computing the L1 norm. Setting b (0 < b < N), one can protect * variables, x_1, ..., x_{b-1} (e.g., a bias term of logistic * regression) from being regularized. The default value is zero. */ int orthantwise_start; /** * End index for computing L1 norm of the variables. * This parameter is valid only for OWL-QN method * (i.e., \ref orthantwise_c != 0). This parameter e (0 < e <= N) * specifies the index number at which the library stops computing the * L1 norm of the variables x, */ int orthantwise_end; } lbfgs_parameter_t; /** * Callback interface to provide objective function and gradient evaluations. * * The lbfgs() function call this function to obtain the values of objective * function and its gradients when needed. A client program must implement * this function to evaluate the values of the objective function and its * gradients, given current values of variables. * * @param instance The user data sent for lbfgs() function by the client. * @param x The current values of variables. * @param g The gradient vector. The callback function must compute * the gradient values for the current variables. * @param n The number of variables. * @param step The current step of the line search routine. * @retval lbfgsfloatval_t The value of the objective function for the current * variables. */ typedef lbfgsfloatval_t (*lbfgs_evaluate_t)( void *instance, const lbfgsfloatval_t *x, lbfgsfloatval_t *g, const int n, const lbfgsfloatval_t step ); /** * Callback interface to receive the progress of the optimization process. * * The lbfgs() function call this function for each iteration. Implementing * this function, a client program can store or display the current progress * of the optimization process. * * @param instance The user data sent for lbfgs() function by the client. * @param x The current values of variables. * @param g The current gradient values of variables. * @param fx The current value of the objective function. * @param xnorm The Euclidean norm of the variables. * @param gnorm The Euclidean norm of the gradients. * @param step The line-search step used for this iteration. * @param n The number of variables. * @param k The iteration count. * @param ls The number of evaluations called for this iteration. * @retval int Zero to continue the optimization process. Returning a * non-zero value will cancel the optimization process. */ typedef int (*lbfgs_progress_t)( void *instance, const lbfgsfloatval_t *x, const lbfgsfloatval_t *g, const lbfgsfloatval_t fx, const lbfgsfloatval_t xnorm, const lbfgsfloatval_t gnorm, const lbfgsfloatval_t step, int n, int k, int ls ); /* A user must implement a function compatible with ::lbfgs_evaluate_t (evaluation callback) and pass the pointer to the callback function to lbfgs() arguments. Similarly, a user can implement a function compatible with ::lbfgs_progress_t (progress callback) to obtain the current progress (e.g., variables, function value, ||G||, etc) and to cancel the iteration process if necessary. Implementation of a progress callback is optional: a user can pass \c NULL if progress notification is not necessary. In addition, a user must preserve two requirements: - The number of variables must be multiples of 16 (this is not 4). - The memory block of variable array ::x must be aligned to 16. This algorithm terminates an optimization when: ||G|| < \epsilon \cdot \max(1, ||x||) . In this formula, ||.|| denotes the Euclidean norm. */ /** * Start a L-BFGS optimization. * * @param n The number of variables. * @param x The array of variables. A client program can set * default values for the optimization and receive the * optimization result through this array. This array * must be allocated by ::lbfgs_malloc function * for libLBFGS built with SSE/SSE2 optimization routine * enabled. The library built without SSE/SSE2 * optimization does not have such a requirement. * @param ptr_fx The pointer to the variable that receives the final * value of the objective function for the variables. * This argument can be set to \c NULL if the final * value of the objective function is unnecessary. * @param proc_evaluate The callback function to provide function and * gradient evaluations given a current values of * variables. A client program must implement a * callback function compatible with \ref * lbfgs_evaluate_t and pass the pointer to the * callback function. * @param proc_progress The callback function to receive the progress * (the number of iterations, the current value of * the objective function) of the minimization * process. This argument can be set to \c NULL if * a progress report is unnecessary. * @param instance A user data for the client program. The callback * functions will receive the value of this argument. * @param param The pointer to a structure representing parameters for * L-BFGS optimization. A client program can set this * parameter to \c NULL to use the default parameters. * Call lbfgs_parameter_init() function to fill a * structure with the default values. * @retval int The status code. This function returns zero if the * minimization process terminates without an error. A * non-zero value indicates an error. */ int lbfgs( int n, lbfgsfloatval_t *x, lbfgsfloatval_t *ptr_fx, lbfgs_evaluate_t proc_evaluate, lbfgs_progress_t proc_progress, void *instance, lbfgs_parameter_t *param ); /** * Initialize L-BFGS parameters to the default values. * * Call this function to fill a parameter structure with the default values * and overwrite parameter values if necessary. * * @param param The pointer to the parameter structure. */ void lbfgs_parameter_init(lbfgs_parameter_t *param); /** * Allocate an array for variables. * * This function allocates an array of variables for the convenience of * ::lbfgs function; the function has a requreiemt for a variable array * when libLBFGS is built with SSE/SSE2 optimization routines. A user does * not have to use this function for libLBFGS built without SSE/SSE2 * optimization. * * @param n The number of variables. */ lbfgsfloatval_t* lbfgs_malloc(int n); /** * Free an array of variables. * * @param x The array of variables allocated by ::lbfgs_malloc * function. */ void lbfgs_free(lbfgsfloatval_t *x); /** @} */ #ifdef __cplusplus } #endif/*__cplusplus*/ /** @mainpage libLBFGS: a library of Limited-memory Broyden-Fletcher-Goldfarb-Shanno (L-BFGS) @section intro Introduction This library is a C port of the implementation of Limited-memory Broyden-Fletcher-Goldfarb-Shanno (L-BFGS) method written by Jorge Nocedal. The original FORTRAN source code is available at: http://www.ece.northwestern.edu/~nocedal/lbfgs.html The L-BFGS method solves the unconstrainted minimization problem,
    minimize F(x), x = (x1, x2, ..., xN),
only if the objective function F(x) and its gradient G(x) are computable. The well-known Newton's method requires computation of the inverse of the hessian matrix of the objective function. However, the computational cost for the inverse hessian matrix is expensive especially when the objective function takes a large number of variables. The L-BFGS method iteratively finds a minimizer by approximating the inverse hessian matrix by information from last m iterations. This innovation saves the memory storage and computational time drastically for large-scaled problems. Among the various ports of L-BFGS, this library provides several features: - Optimization with L1-norm (Orthant-Wise Limited-memory Quasi-Newton (OWL-QN) method): In addition to standard minimization problems, the library can minimize a function F(x) combined with L1-norm |x| of the variables, {F(x) + C |x|}, where C is a constant scalar parameter. This feature is useful for estimating parameters of sparse log-linear models (e.g., logistic regression and maximum entropy) with L1-regularization (or Laplacian prior). - Clean C code: Unlike C codes generated automatically by f2c (Fortran 77 into C converter), this port includes changes based on my interpretations, improvements, optimizations, and clean-ups so that the ported code would be well-suited for a C code. In addition to comments inherited from the original code, a number of comments were added through my interpretations. - Callback interface: The library receives function and gradient values via a callback interface. The library also notifies the progress of the optimization by invoking a callback function. In the original implementation, a user had to set function and gradient values every time the function returns for obtaining updated values. - Thread safe: The library is thread-safe, which is the secondary gain from the callback interface. - Cross platform. The source code can be compiled on Microsoft Visual Studio 2010, GNU C Compiler (gcc), etc. - Configurable precision: A user can choose single-precision (float) or double-precision (double) accuracy by changing ::LBFGS_FLOAT macro. - SSE/SSE2 optimization: This library includes SSE/SSE2 optimization (written in compiler intrinsics) for vector arithmetic operations on Intel/AMD processors. The library uses SSE for float values and SSE2 for double values. The SSE/SSE2 optimization routine is disabled by default. This library is used by: - CRFsuite: A fast implementation of Conditional Random Fields (CRFs) - Classias: A collection of machine-learning algorithms for classification - mlegp: an R package for maximum likelihood estimates for Gaussian processes - imaging2: the imaging2 class library - Algorithm::LBFGS - Perl extension for L-BFGS - YAP-LBFGS (an interface to call libLBFGS from YAP Prolog) @section download Download - Source code - GitHub repository libLBFGS is distributed under the term of the MIT license. @section changelog History - Version 1.10 (2010-12-22): - Fixed compiling errors on Mac OS X; this patch was kindly submitted by Nic Schraudolph. - Reduced compiling warnings on Mac OS X; this patch was kindly submitted by Tamas Nepusz. - Replaced memalign() with posix_memalign(). - Updated solution and project files for Microsoft Visual Studio 2010. - Version 1.9 (2010-01-29): - Fixed a mistake in checking the validity of the parameters "ftol" and "wolfe"; this was discovered by Kevin S. Van Horn. - Version 1.8 (2009-07-13): - Accepted the patch submitted by Takashi Imamichi; the backtracking method now has three criteria for choosing the step length: - ::LBFGS_LINESEARCH_BACKTRACKING_ARMIJO: sufficient decrease (Armijo) condition only - ::LBFGS_LINESEARCH_BACKTRACKING_WOLFE: regular Wolfe condition (sufficient decrease condition + curvature condition) - ::LBFGS_LINESEARCH_BACKTRACKING_STRONG_WOLFE: strong Wolfe condition - Updated the documentation to explain the above three criteria. - Version 1.7 (2009-02-28): - Improved OWL-QN routines for stability. - Removed the support of OWL-QN method in MoreThuente algorithm because it accidentally fails in early stages of iterations for some objectives. Because of this change, the OW-LQN method must be used with the backtracking algorithm (::LBFGS_LINESEARCH_BACKTRACKING), or the library returns ::LBFGSERR_INVALID_LINESEARCH. - Renamed line search algorithms as follows: - ::LBFGS_LINESEARCH_BACKTRACKING: regular Wolfe condition. - ::LBFGS_LINESEARCH_BACKTRACKING_LOOSE: regular Wolfe condition. - ::LBFGS_LINESEARCH_BACKTRACKING_STRONG: strong Wolfe condition. - Source code clean-up. - Version 1.6 (2008-11-02): - Improved line-search algorithm with strong Wolfe condition, which was contributed by Takashi Imamichi. This routine is now default for ::LBFGS_LINESEARCH_BACKTRACKING. The previous line search algorithm with regular Wolfe condition is still available as ::LBFGS_LINESEARCH_BACKTRACKING_LOOSE. - Configurable stop index for L1-norm computation. A member variable ::lbfgs_parameter_t::orthantwise_end was added to specify the index number at which the library stops computing the L1 norm of the variables. This is useful to prevent some variables from being regularized by the OW-LQN method. - A sample program written in C++ (sample/sample.cpp). - Version 1.5 (2008-07-10): - Configurable starting index for L1-norm computation. A member variable ::lbfgs_parameter_t::orthantwise_start was added to specify the index number from which the library computes the L1 norm of the variables. This is useful to prevent some variables from being regularized by the OWL-QN method. - Fixed a zero-division error when the initial variables have already been a minimizer (reported by Takashi Imamichi). In this case, the library returns ::LBFGS_ALREADY_MINIMIZED status code. - Defined ::LBFGS_SUCCESS status code as zero; removed unused constants, LBFGSFALSE and LBFGSTRUE. - Fixed a compile error in an implicit down-cast. - Version 1.4 (2008-04-25): - Configurable line search algorithms. A member variable ::lbfgs_parameter_t::linesearch was added to choose either MoreThuente method (::LBFGS_LINESEARCH_MORETHUENTE) or backtracking algorithm (::LBFGS_LINESEARCH_BACKTRACKING). - Fixed a bug: the previous version did not compute psuedo-gradients properly in the line search routines for OWL-QN. This bug might quit an iteration process too early when the OWL-QN routine was activated (0 < ::lbfgs_parameter_t::orthantwise_c). - Configure script for POSIX environments. - SSE/SSE2 optimizations with GCC. - New functions ::lbfgs_malloc and ::lbfgs_free to use SSE/SSE2 routines transparently. It is uncessary to use these functions for libLBFGS built without SSE/SSE2 routines; you can still use any memory allocators if SSE/SSE2 routines are disabled in libLBFGS. - Version 1.3 (2007-12-16): - An API change. An argument was added to lbfgs() function to receive the final value of the objective function. This argument can be set to \c NULL if the final value is unnecessary. - Fixed a null-pointer bug in the sample code (reported by Takashi Imamichi). - Added build scripts for Microsoft Visual Studio 2005 and GCC. - Added README file. - Version 1.2 (2007-12-13): - Fixed a serious bug in orthant-wise L-BFGS. An important variable was used without initialization. - Version 1.1 (2007-12-01): - Implemented orthant-wise L-BFGS. - Implemented lbfgs_parameter_init() function. - Fixed several bugs. - API documentation. - Version 1.0 (2007-09-20): - Initial release. @section api Documentation - @ref liblbfgs_api "libLBFGS API" @section sample Sample code @include sample.c @section ack Acknowledgements The L-BFGS algorithm is described in: - Jorge Nocedal. Updating Quasi-Newton Matrices with Limited Storage. Mathematics of Computation, Vol. 35, No. 151, pp. 773--782, 1980. - Dong C. Liu and Jorge Nocedal. On the limited memory BFGS method for large scale optimization. Mathematical Programming B, Vol. 45, No. 3, pp. 503-528, 1989. The line search algorithms used in this implementation are described in: - John E. Dennis and Robert B. Schnabel. Numerical Methods for Unconstrained Optimization and Nonlinear Equations, Englewood Cliffs, 1983. - Jorge J. More and David J. Thuente. Line search algorithm with guaranteed sufficient decrease. ACM Transactions on Mathematical Software (TOMS), Vol. 20, No. 3, pp. 286-307, 1994. This library also implements Orthant-Wise Limited-memory Quasi-Newton (OWL-QN) method presented in: - Galen Andrew and Jianfeng Gao. Scalable training of L1-regularized log-linear models. In Proceedings of the 24th International Conference on Machine Learning (ICML 2007), pp. 33-40, 2007. Special thanks go to: - Yoshimasa Tsuruoka and Daisuke Okanohara for technical information about OWL-QN - Takashi Imamichi for the useful enhancements of the backtracking method - Kevin S. Van Horn, Nic Schraudolph, and Tamas Nepusz for bug fixes Finally I would like to thank the original author, Jorge Nocedal, who has been distributing the effieicnt and explanatory implementation in an open source licence. @section reference Reference - L-BFGS by Jorge Nocedal. - Orthant-Wise Limited-memory Quasi-Newton Optimizer for L1-regularized Objectives by Galen Andrew. - C port (via f2c) by Taku Kudo. - C#/C++/Delphi/VisualBasic6 port in ALGLIB. - Computational Crystallography Toolbox includes scitbx::lbfgs. */ #endif/*__LBFGS_H__*/ relion-3.1.3/src/jaz/legacy_obs_model.cpp000066400000000000000000000123771411340063500203630ustar00rootroot00000000000000#include "src/jaz/legacy_obs_model.h" #include "src/jaz/stack_helper.h" #include "src/jaz/img_proc/filter_helper.h" #include "src/jaz/Fourier_helper.h" #include LegacyObservationModel::LegacyObservationModel() : angpix(-1), anisoTilt(false) { } LegacyObservationModel::LegacyObservationModel(double angpix, double Cs, double voltage) : angpix(angpix), lambda(12.2643247 / sqrt(voltage * (1.0 + voltage * 0.978466e-6))), Cs(Cs), anisoTilt(false) { } void LegacyObservationModel::predictObservation( Projector& proj, const MetaDataTable& mdt, int particle, MultidimArray& dest, bool applyCtf, bool applyTilt, bool applyShift) const { const int s = proj.ori_size; const int sh = s/2 + 1; double xoff, yoff; mdt.getValue(EMDL_ORIENT_ORIGIN_X, xoff, particle); mdt.getValue(EMDL_ORIENT_ORIGIN_Y, yoff, particle); double rot, tilt, psi; Matrix2D A3D; mdt.getValue(EMDL_ORIENT_ROT, rot, particle); mdt.getValue(EMDL_ORIENT_TILT, tilt, particle); mdt.getValue(EMDL_ORIENT_PSI, psi, particle); Euler_angles2matrix(rot, tilt, psi, A3D); if (dest.xdim != sh || dest.ydim != s) { dest.resize(s,sh); } dest.initZeros(); proj.get2DFourierTransform(dest, A3D); if (applyShift) { shiftImageInFourierTransform(dest, dest, s, s/2 - xoff, s/2 - yoff); } if (applyCtf) { CTF ctf; ctf.read(mdt, mdt, particle); FilterHelper::modulate(dest, ctf, angpix); } if (applyTilt) { double tx = 0.0, ty = 0.0; mdt.getValue(EMDL_IMAGE_BEAMTILT_X, tx, particle); mdt.getValue(EMDL_IMAGE_BEAMTILT_Y, ty, particle); if (tx != 0.0 && ty != 0.0) { if (anisoTilt) { selfApplyBeamTilt( dest, -tx, -ty, beamtilt_xx, beamtilt_xy, beamtilt_yy, lambda, Cs, angpix, s); } else { selfApplyBeamTilt(dest, -tx, -ty, lambda, Cs, angpix, s); } } } } Image LegacyObservationModel::predictObservation( Projector& proj, const MetaDataTable& mdt, int particle, bool applyCtf, bool applyTilt, bool applyShift) const { Image pred; predictObservation(proj, mdt, particle, pred.data, applyCtf, applyTilt, applyShift); return pred; } std::vector> LegacyObservationModel::predictObservations( Projector &proj, const MetaDataTable &mdt, int threads, bool applyCtf, bool applyTilt, bool applyShift) const { const int pc = mdt.numberOfObjects(); std::vector> out(pc); #pragma omp parallel for num_threads(threads) for (int p = 0; p < pc; p++) { out[p] = predictObservation(proj, mdt, p, applyCtf, applyTilt, applyShift); } return out; } void LegacyObservationModel::insertObservation(const Image& img, BackProjector &bproj, const MetaDataTable& mdt, int particle, bool applyCtf, bool applyTilt, double shift_x, double shift_y) { const int s = img.data.ydim; const int sh = img.data.xdim; RFLOAT rot, tilt, psi; Matrix2D A3D; double tx = 0.0, ty = 0.0; mdt.getValue(EMDL_ORIENT_ROT, rot, particle); mdt.getValue(EMDL_ORIENT_TILT, tilt, particle); mdt.getValue(EMDL_ORIENT_PSI, psi, particle); Euler_angles2matrix(rot, tilt, psi, A3D); mdt.getValue(EMDL_ORIENT_ORIGIN_X, tx, particle); mdt.getValue(EMDL_ORIENT_ORIGIN_Y, ty, particle); tx += shift_x; ty += shift_y; MultidimArray F2D = img.data; shiftImageInFourierTransform(F2D, F2D, s, tx, ty); MultidimArray Fctf; Fctf.resize(F2D); Fctf.initConstant(1.); if (applyCtf) { CTF ctf; ctf.read(mdt, mdt, particle); ctf.getFftwImage(Fctf, s, s, angpix); FOR_ALL_DIRECT_ELEMENTS_IN_MULTIDIMARRAY(F2D) { DIRECT_MULTIDIM_ELEM(F2D, n) *= DIRECT_MULTIDIM_ELEM(Fctf, n); DIRECT_MULTIDIM_ELEM(Fctf, n) *= DIRECT_MULTIDIM_ELEM(Fctf, n); } } if (applyTilt) { double my_tilt_x = 0.0; double my_tilt_y = 0.0; if (mdt.containsLabel(EMDL_IMAGE_BEAMTILT_X)) { mdt.getValue(EMDL_IMAGE_BEAMTILT_X, my_tilt_x, particle); } if (mdt.containsLabel(EMDL_IMAGE_BEAMTILT_Y)) { mdt.getValue(EMDL_IMAGE_BEAMTILT_Y, my_tilt_y, particle); } selfApplyBeamTilt(F2D, my_tilt_x, my_tilt_y, lambda, Cs, angpix, sh); } bproj.set2DFourierTransform(F2D, A3D, &Fctf); } void LegacyObservationModel::setAnisoTilt(double xx, double xy, double yy) { beamtilt_xx = xx; beamtilt_xy = xy; beamtilt_yy = yy; anisoTilt = true; } double LegacyObservationModel::angToPix(double a, int s) { return s * angpix / a; } double LegacyObservationModel::pixToAng(double p, int s) { return s * angpix / p; } bool LegacyObservationModel::containsAllNeededColumns(const MetaDataTable& mdt) { return (mdt.containsLabel(EMDL_ORIENT_ORIGIN_X) && mdt.containsLabel(EMDL_ORIENT_ORIGIN_Y) && mdt.containsLabel(EMDL_ORIENT_ROT) && mdt.containsLabel(EMDL_ORIENT_TILT) && mdt.containsLabel(EMDL_ORIENT_PSI) && mdt.containsLabel(EMDL_PARTICLE_RANDOM_SUBSET)); } relion-3.1.3/src/jaz/legacy_obs_model.h000066400000000000000000000030511411340063500200150ustar00rootroot00000000000000#ifndef LEGACY_OBS_MODEL_H #define LEGACY_OBS_MODEL_H #include #include #include #include #include class BackProjector; class LegacyObservationModel { public: LegacyObservationModel(); LegacyObservationModel(double angpix, double Cs, double voltage); double angpix, lambda, Cs; double beamtilt_xx, beamtilt_xy, beamtilt_yy; bool anisoTilt; void predictObservation( Projector &proj, const MetaDataTable &mdt, int particle, MultidimArray& dest, bool applyCtf = true, bool applyTilt = true, bool applyShift = true) const; Image predictObservation( Projector &proj, const MetaDataTable &mdt, int particle, bool applyCtf = true, bool applyTilt = true, bool applyShift = true) const; std::vector> predictObservations( Projector &proj, const MetaDataTable &mdt, int threads, bool applyCtf = true, bool applyTilt = true, bool applyShift = true) const; void insertObservation( const Image& img, BackProjector &bproj, const MetaDataTable& mdt, int particle, bool applyCtf, bool applyTilt, double shift_x = 0.0, double shift_y = 0.0); void setAnisoTilt(double xx, double xy, double yy); double angToPix(double a, int s); double pixToAng(double p, int s); static bool containsAllNeededColumns(const MetaDataTable& mdt); }; #endif relion-3.1.3/src/jaz/local_motion_fit.cpp000066400000000000000000000171271411340063500204130ustar00rootroot00000000000000/*************************************************************************** * * Author: "Jasenko Zivanov" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #include #include #include using namespace gravis; LocalMotionFit::LocalMotionFit(const std::vector>>& correlation, const std::vector& velWgh, const std::vector& accWgh, const std::vector>> &divWgh, const std::vector& offsets, int threads) : pc(correlation.size()), fc(correlation[0].size()), threads(threads), correlation(correlation), velWgh(velWgh), accWgh(accWgh), divWgh(divWgh), offsets(offsets) { } double LocalMotionFit::f(const std::vector &x, void* tempStorage) const { double e_tot = 0.0; #pragma omp parallel for num_threads(threads) for (int p = 0; p < pc; p++) { double e = 0.0; for (int f = 0; f < fc; f++) { const double xpf = x[2*(p*fc + f) + 0]; const double ypf = x[2*(p*fc + f) + 1]; e -= Interpolation::cubicXY(correlation[p][f], xpf+offsets[f].x, ypf+offsets[f].y, 0, 0, true); if (f > 0 && f < fc-1) { const double xpfn = x[2*(p*fc + f - 1) + 0]; const double ypfn = x[2*(p*fc + f - 1) + 1]; const double xpfp = x[2*(p*fc + f + 1) + 0]; const double ypfp = x[2*(p*fc + f + 1) + 1]; const double ax = xpfn + xpfp - 2.0 * xpf; const double ay = ypfn + ypfp - 2.0 * ypf; e += accWgh[f] * (ax * ax + ay * ay); } if (f > 0) { const double xpfn = x[2*(p*fc + f - 1) + 0]; const double ypfn = x[2*(p*fc + f - 1) + 1]; const double vx = xpf - xpfn; const double vy = ypf - ypfn; e += velWgh[f-1] * (vx * vx + vy * vy); for (int q = p+1; q < pc; q++) { if (divWgh[f-1][p][q] <= 0.0) continue; const double xqf = x[2*(q*fc + f) + 0]; const double yqf = x[2*(q*fc + f) + 1]; const double xqfn = x[2*(q*fc + f - 1) + 0]; const double yqfn = x[2*(q*fc + f - 1) + 1]; const double cx = (xpf - xpfn) - (xqf - xqfn); const double cy = (ypf - ypfn) - (yqf - yqfn); e += divWgh[f-1][p][q] * (cx * cx + cy * cy); } } } #pragma omp atomic e_tot += e; } return e_tot; } void LocalMotionFit::grad(const std::vector &x, std::vector &gradDest, void* tempStorage) const { for (int p = 0; p < pc; p++) for (int f = 0; f < fc; f++) { gradDest[2*(p*fc + f) + 0] = 0.0; gradDest[2*(p*fc + f) + 1] = 0.0; } std::vector> tempGrad(threads); for (int i = 0; i < threads; i++) { tempGrad[i] = std::vector(2*pc*fc, 0.0); } #pragma omp parallel for num_threads(threads) for (int p = 0; p < pc; p++) { int th = omp_get_thread_num(); for (int f = 0; f < fc; f++) { const double xpf = x[2*(p*fc + f) + 0]; const double ypf = x[2*(p*fc + f) + 1]; gravis::t2Vector g = Interpolation::cubicXYgrad( correlation[p][f], xpf+offsets[f].x, ypf+offsets[f].y, 0, 0, true); tempGrad[th][2*(p*fc + f) + 0] -= g.x; tempGrad[th][2*(p*fc + f) + 1] -= g.y; if (f > 0 && f < fc-1) { const double xpfn = x[2*(p*fc + f - 1) + 0]; const double ypfn = x[2*(p*fc + f - 1) + 1]; const double xpfp = x[2*(p*fc + f + 1) + 0]; const double ypfp = x[2*(p*fc + f + 1) + 1]; const double ax = xpfn + xpfp - 2.0 * xpf; const double ay = ypfn + ypfp - 2.0 * ypf; tempGrad[th][2*(p*fc + f - 1) + 0] += 2.0 * accWgh[f] * ax; tempGrad[th][2*(p*fc + f - 1) + 1] += 2.0 * accWgh[f] * ay; tempGrad[th][2*(p*fc + f) + 0] -= 4.0 * accWgh[f] * ax; tempGrad[th][2*(p*fc + f) + 1] -= 4.0 * accWgh[f] * ay; tempGrad[th][2*(p*fc + f + 1) + 0] += 2.0 * accWgh[f] * ax; tempGrad[th][2*(p*fc + f + 1) + 1] += 2.0 * accWgh[f] * ay; } if (f > 0) { const double xpfn = x[2*(p*fc + f - 1) + 0]; const double ypfn = x[2*(p*fc + f - 1) + 1]; const double vx = xpf - xpfn; const double vy = ypf - ypfn; tempGrad[th][2*(p*fc + f) + 0] += 2.0 * velWgh[f-1] * vx; tempGrad[th][2*(p*fc + f) + 1] += 2.0 * velWgh[f-1] * vy; tempGrad[th][2*(p*fc + f - 1) + 0] -= 2.0 * velWgh[f-1] * vx; tempGrad[th][2*(p*fc + f - 1) + 1] -= 2.0 * velWgh[f-1] * vy; for (int q = p+1; q < pc; q++) { if (divWgh[f-1][p][q] <= 0.0) continue; const double xqf = x[2*(q*fc + f) + 0]; const double yqf = x[2*(q*fc + f) + 1]; const double xqfn = x[2*(q*fc + f - 1) + 0]; const double yqfn = x[2*(q*fc + f - 1) + 1]; const double cx = (xpf - xpfn) - (xqf - xqfn); const double cy = (ypf - ypfn) - (yqf - yqfn); const double wgh = divWgh[f-1][p][q]; tempGrad[th][2*(p*fc + f - 1) + 0] -= 2.0 * wgh * cx; tempGrad[th][2*(p*fc + f - 1) + 1] -= 2.0 * wgh * cy; tempGrad[th][2*(p*fc + f) + 0] += 2.0 * wgh * cx; tempGrad[th][2*(p*fc + f) + 1] += 2.0 * wgh * cy; tempGrad[th][2*(q*fc + f - 1) + 0] += 2.0 * wgh * cx; tempGrad[th][2*(q*fc + f - 1) + 1] += 2.0 * wgh * cy; tempGrad[th][2*(q*fc + f) + 0] -= 2.0 * wgh * cx; tempGrad[th][2*(q*fc + f) + 1] -= 2.0 * wgh * cy; } } } } for (int i = 0; i < threads; i++) { for (int j = 0; j < 2*pc*fc; j++) { gradDest[j] += tempGrad[i][j]; } } /*std::cout << "x:\n"; for (int p = 0; p < pc; p++) for (int f = 0; f < fc; f++) { std::cout << x[2*(p*fc + f) + 0] << ", " << x[2*(p*fc + f) + 1] << "\n"; } std::cout << "\n"; std::cout << "grad:\n"; for (int p = 0; p < pc; p++) for (int f = 0; f < fc; f++) { std::cout << gradDest[2*(p*fc + f) + 0] << ", " << gradDest[2*(p*fc + f) + 1] << "\n"; } std::cout << "\n";*/ } relion-3.1.3/src/jaz/local_motion_fit.h000066400000000000000000000040141411340063500200470ustar00rootroot00000000000000/*************************************************************************** * * Author: "Jasenko Zivanov" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #ifndef LOCAL_MOTION_FIT #define LOCAL_MOTION_FIT #include #include #include #include class LocalMotionFit : public DifferentiableOptimization { public: LocalMotionFit( const std::vector>>& correlation, const std::vector& velWgh, const std::vector& accWgh, const std::vector>>& divWgh, const std::vector& offsets, int threads); double f(const std::vector& x, void* tempStorage) const; void grad(const std::vector& x, std::vector& gradDest, void* tempStorage) const; private: int pc, fc, threads; const std::vector>>& correlation; const std::vector& velWgh; const std::vector& accWgh; const std::vector>>& divWgh; const std::vector& offsets; }; #endif relion-3.1.3/src/jaz/math/000077500000000000000000000000001411340063500153075ustar00rootroot00000000000000relion-3.1.3/src/jaz/math/Zernike.cpp000066400000000000000000000043721411340063500174300ustar00rootroot00000000000000#include "Zernike.h" #include #include #include std::vector>> Zernike::R_coeffs = std::vector>>(0); double Zernike::Z(int m, int n, double rho, double phi) { if (m >= 0) { return R(m, n, rho) * cos(m * phi); } else { return R(-m, n, rho) * sin(-m * phi); } } double Zernike::Z_cart(int m, int n, double x, double y) { if (x == 0 && y == 0) { return Z(m, n, sqrt(x*x + y*y), 0.0); } else { return Z(m, n, sqrt(x*x + y*y), atan2(y,x)); } } double Zernike::R(int m, int n, double rho) { if (m > n) { REPORT_ERROR_STR("Zernike::R: illegal argument: m = " << m << ", n = " << n << ".\n"); } if ((n - m) % 2 == 1) return 0.0; if (R_coeffs.size() <= n) { prepCoeffs(n); } double out = 0.0; for (int k = 0; k <= (n-m)/2; k++) { out += R_coeffs[n][m][k] * pow(rho, n - 2*k); } return out; } void Zernike::evenIndexToMN(int i, int &m, int &n) { const int k = (int)sqrt((double)i); m = 2*(i - k*k - k); n = 2*k; } int Zernike::numberOfEvenCoeffs(int n_max) { const int l = n_max / 2; return l*l + 2*l + 1; } void Zernike::oddIndexToMN(int i, int& m, int& n) { const int k = (int)((sqrt(1 + 4 * i) - 1.0) / 2.0); const int i0 = k*k + k; n = 2 * k + 1; m = 2 * (i - i0) - n; } int Zernike::numberOfOddCoeffs(int n_max) { const int l = (n_max - 1) / 2 + 1; return l * l + l; } double Zernike::factorial(int k) { // @TODO: replace by tgamma(k+1) once C++11 becomes available double out = 1.0; for (int i = 2; i <= k; i++) { out *= (double)i; } return out; } void Zernike::prepCoeffs(int n) { std::vector>> newCoeffs(n+1); for (int nn = 0; nn < R_coeffs.size(); nn++) { newCoeffs[nn] = R_coeffs[nn]; } for (int nn = R_coeffs.size(); nn <= n; nn++) { newCoeffs[nn] = std::vector>(nn+1); for (int m = 0; m <= nn; m++) { if ((nn - m) % 2 == 1) continue; newCoeffs[nn][m] = std::vector((nn-m)/2 + 1); for (int k = 0; k <= (nn-m)/2; k++) { newCoeffs[nn][m][k] = (1 - 2*(k%2)) * factorial(nn-k) / (factorial(k) * factorial((nn+m)/2 - k) * factorial((nn-m)/2 - k)); } } } R_coeffs = newCoeffs; } relion-3.1.3/src/jaz/math/Zernike.h000066400000000000000000000011321411340063500170640ustar00rootroot00000000000000#ifndef ZERNIKE_H #define ZERNIKE_H #include class Zernike { public: static double Z(int m, int n, double rho, double phi); static double Z_cart(int m, int n, double x, double y); static double R(int m, int n, double rho); static void evenIndexToMN(int i, int& m, int& n); static int numberOfEvenCoeffs(int n_max); static void oddIndexToMN(int i, int& m, int& n); static int numberOfOddCoeffs(int n_max); private: static std::vector>> R_coeffs; static double factorial(int k); static void prepCoeffs(int n); }; #endif relion-3.1.3/src/jaz/micrograph_handler.cpp000066400000000000000000000531211411340063500207140ustar00rootroot00000000000000/*************************************************************************** * * Author: "Jasenko Zivanov" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #include "micrograph_handler.h" #include #include using namespace gravis; MicrographHandler::MicrographHandler() : hasCorrMic(false), nr_omp_threads(1), firstFrame(0), lastFrame(-1), hotCutoff(-1), debug(false), saveMem(false), ready(false), last_gainFn(""), corrMicFn(""), eer_upsampling(-1), eer_grouping(-1) {} void MicrographHandler::init( // in: const std::vector& mdts, bool verb, int nr_omp_threads, // out: int& fc, double& dosePerFrame, std::string& metaFn) { this->nr_omp_threads = nr_omp_threads; this->firstFrame = firstFrame; this->lastFrame = lastFrame; if (corrMicFn != "") { MetaDataTable corrMic; ObservationModel obsModel; // Don't die even if conversion failed. Polishing does not use obsModel from a motion correction STAR file ObservationModel::loadSafely(corrMicFn, obsModel, corrMic, "micrographs", verb, false); mic2meta.clear(); std::string micName, metaName; if (!corrMic.containsLabel(EMDL_MICROGRAPH_NAME)) { REPORT_ERROR(" The corrected_micrographs STAR file does not contain rlnMicrographName label."); } if (!corrMic.containsLabel(EMDL_MICROGRAPH_METADATA_NAME)) { REPORT_ERROR(" The corrected_micrographs STAR file does not contain rlnMicrographMetadata label. Did you not run motion correction from the RELION-3.0 GUI?"); } for (int i = 0; i < corrMic.numberOfObjects(); i++) { corrMic.getValueToString(EMDL_MICROGRAPH_NAME, micName, i); corrMic.getValueToString(EMDL_MICROGRAPH_METADATA_NAME, metaName, i); // remove the pipeline job prefix FileName fn_pre, fn_jobnr, fn_post; decomposePipelineFileName(micName, fn_pre, fn_jobnr, fn_post); // std::cout << fn_post << " => " << metaName << std::endl; mic2meta[fn_post] = metaName; } hasCorrMic = true; } else { hasCorrMic = false; } loadInitial(mdts, verb, fc, dosePerFrame, metaFn); ready = true; } std::vector MicrographHandler::cullMissingMovies( const std::vector &mdts, int verb) { if (!ready) { // REPORT_ERROR("ERROR: MicrographHandler::cullMissingMovies - MicrographHandler not initialized."); } std::vector good(0); std::vector bad(0); const int mc = mdts.size(); for (int m = 0; m < mc; m++) { if (isMoviePresent(mdts[m], false)) { good.push_back(mdts[m]); } else { FileName fn_movie; mdts[m].getValueToString(EMDL_MICROGRAPH_NAME, fn_movie, 0); bad.push_back(fn_movie); } } if (verb && bad.size() > 0) { if (bad.size() == 1) { std::cerr << " - The movie for the following micrograph is missing:\n"; } else { std::cerr << " - Movies for the following micrographs are missing:\n"; } for (int i = 0; i < bad.size(); i++) { std::cerr << " " << bad[i] << "\n"; } } return good; } void MicrographHandler::findLowestFrameCount( const std::vector &mdts, int verb) { if (!ready) { REPORT_ERROR("ERROR: MicrographHandler::findLowestFrameCount - MicrographHandler not initialized."); } int fcmin = std::numeric_limits::max(); const int mc = mdts.size(); for (int m = 0; m < mc; m++) { int fcm = determineFrameCount(mdts[m]); if (fcm < fcmin) { fcmin = fcm; } } if (lastFrame >= fcmin) { std::cout << " - Warning: some movies contain only " << fcmin << " frames. Unable to load frames " << (fcmin+1) << ".." << (lastFrame+1) << " ( = --last_frame).\n"; } else if (verb > 0) { std::cout << " + Max. frame number available in all movies: " << fcmin << "\n"; } if (lastFrame < 0 || lastFrame > fcmin-1) { lastFrame = fcmin - 1; } } std::vector MicrographHandler::findLongEnoughMovies( const std::vector &mdts, int fc, int verb) { if (!ready) { REPORT_ERROR("ERROR: MicrographHandler::findLongEnoughMovies - MicrographHandler not initialized."); } std::vector good(0); std::vector bad(0); const int mc = mdts.size(); for (int m = 0; m < mc; m++) { int fcm = determineFrameCount(mdts[m]); if (fcm < fc) { bad.push_back(getMovieFilename(mdts[m])); } else { good.push_back(mdts[m]); } } if (good.size() == 0) { REPORT_ERROR_STR("ERROR: Not a single movie contains the requested number of frames (" << fc << ")"); } if (verb && bad.size() > 0) { if (bad.size() == 1) { std::cerr << " - The following micrograph does not contain " << fc << " frames. Particles in it will be ignored:\n"; } else { std::cerr << " - The following micrographs do not contain " << fc << " frames. Particles in them will be ignored:\n"; } for (int i = 0; i < bad.size(); i++) { std::cerr << " " << bad[i] << "\n"; } } return good; } // This reads pixel sizes from a single metadata star file. // For multi optics group scenarios, we should process only micrographs // in the given MotionCorr STAR file. Then we can safely assume all pixel sizes are the same. // TODO: TAKANORI: make sure in MotionCorr runner and Polish void MicrographHandler::loadInitial( const std::vector& mdts, bool verb, int& fc, double& dosePerFrame, std::string& metaFn) { if (hasCorrMic) { std::string mgFn; FileName fn_pre, fn_jobnr, fn_post; for (int i = 0, ilim = mdts.size(); i < ilim; i++) { mdts[i].getValueToString(EMDL_MICROGRAPH_NAME, mgFn, 0); // remove the pipeline job prefix decomposePipelineFileName(mgFn, fn_pre, fn_jobnr, fn_post); metaFn = getMetaName(fn_post, false); if (metaFn != "") break; } if (metaFn == "") REPORT_ERROR("There is no movie metadata STAR file for any micrographs!"); if (debug) { std::cout << "first movie: " << fn_post << "\n"; std::cout << "maps to: " << metaFn << "\n"; } micrograph = Micrograph(metaFn); if (movie_angpix <= 0) { movie_angpix = micrograph.angpix; if (verb > 0) { std::cout << " + Using movie pixel size from " << metaFn << ": " << movie_angpix << " A\n"; } } else { if (verb > 0) { std::cout << " + Using movie pixel size from command line: " << movie_angpix << " A\n"; } } if (coords_angpix <= 0) { coords_angpix = micrograph.angpix * micrograph.getBinningFactor(); if (verb > 0) { std::cout << " + Using coord. pixel size from " << metaFn << ": " << coords_angpix << " A\n"; } } else { if (verb > 0) { std::cout << " + Using coord. pixel size from command line: " << coords_angpix << " A\n"; } } dosePerFrame = micrograph.dose_per_frame; micrograph_size.x = micrograph.getWidth(); micrograph_size.y = micrograph.getHeight(); if (lastFrame >= micrograph.getNframes()) { REPORT_ERROR_STR("ERROR: There are only " << micrograph.getNframes() << " frames in " << metaFn << " - " << lastFrame << " have been requested using the --lastFrame option."); } if (lastFrame < 0) { fc = micrograph.getNframes() - firstFrame; } else { fc = lastFrame - firstFrame + 1; } } else { std::string mgFn0; mdts[0].getValueToString(EMDL_MICROGRAPH_NAME, mgFn0, 0); FileName fn_pre, fn_jobnr, fn_post; decomposePipelineFileName(mgFn0, fn_pre, fn_jobnr, fn_post); Image dum; dum.read(fn_post, false); micrograph_size.x = XSIZE(dum()); micrograph_size.y = YSIZE(dum()); const int fc0 = dum().zdim > 1? dum().zdim : dum().ndim; if (lastFrame < 0) { fc = fc0 - firstFrame; } else { if (lastFrame >= fc0) { REPORT_ERROR_STR("ERROR: There are only " << micrograph.getNframes() << " frames in " << metaFn << " - " << (lastFrame+1) << " have been requested using the --lastFrame option."); } else { fc = lastFrame - firstFrame + 1; } } } } void MicrographHandler::validatePixelSize(RFLOAT angpix) const { // std::cout << "angpix = " << angpix << " coords_angpix = " << coords_angpix << " movie_angpix = " << movie_angpix << std::endl; if (angpix < coords_angpix - 1e-9) { std::cerr << "WARNING: pixel size (--angpix) is smaller than the AutoPick pixel size (--coords_angpix)\n"; if (coords_angpix < angpix + 0.01) { std::cerr << " This is probably a rounding error. It is recommended to set --angpix (" << angpix << ") to at least " << coords_angpix << "\n"; } } if (angpix < movie_angpix - 1e-9) { std::cerr << "WARNING: pixel size (--angpix) is smaller than the movie pixel size (--movie_angpix)\n"; if (movie_angpix < angpix + 0.01) { std::cerr << " This is probably a rounding error. It is recommended to set --angpix (" << angpix << ") to at least " << movie_angpix << "\n"; } } } std::vector>> MicrographHandler::loadMovie( const MetaDataTable &mdt, int s, double angpix, std::vector& fts, const std::vector>* offsets_in, std::vector>* offsets_out, double data_angpix) { if (!ready) { REPORT_ERROR("ERROR: MicrographHandler::loadMovie - MicrographHandler not initialized."); } std::vector>> movie; const int nr_omp_threads = fts.size(); std::string mgFn0; mdt.getValueToString(EMDL_MICROGRAPH_NAME, mgFn0, 0); FileName fn_pre, fn_jobnr, fn_post; decomposePipelineFileName(mgFn0, fn_pre, fn_jobnr, fn_post); if (hasCorrMic) { std::string metaFn = getMetaName(fn_post); micrograph = Micrograph(metaFn); FileName mgFn = micrograph.getMovieFilename(); std::string gainFn = micrograph.getGainFilename(); MultidimArray defectMask; bool hasDefect = (micrograph.fnDefect != "" || micrograph.hotpixelX.size() != 0); if (hasDefect) micrograph.fillDefectAndHotpixels(defectMask); if (debug) { std::cout << "loading: " << fn_post << "\n"; std::cout << "-> meta: " << metaFn << "\n"; std::cout << "-> data: " << mgFn << "\n"; std::cout << "-> gain: " << gainFn << "\n"; std::cout << "-> mask: " << micrograph.fnDefect << "\n"; std::cout << "-> nhot: " << micrograph.hotpixelX.size() << "\n"; std::cout << "-> hasdefect: " << (hasDefect ? 1 : 0) << std::endl; } const bool isEER = EERRenderer::isEER(mgFn); bool mgHasGain = false; if (gainFn != "") { if (gainFn != last_gainFn) { last_gainFn = gainFn; if (isEER) // TODO: Takanori: Remove this once we updated RelionCor { if (eer_upsampling < 0) eer_upsampling = micrograph.getEERUpsampling(); EERRenderer::loadEERGain(gainFn, lastGainRef(), eer_upsampling); } else lastGainRef.read(gainFn); } mgHasGain = true; } if (!isEER) { #define OLD_CODE #ifdef OLD_CODE movie = StackHelper::extractMovieStackFS(&mdt, mgHasGain? &lastGainRef : 0, hasDefect ? &defectMask : 0, mgFn, angpix, coords_angpix, movie_angpix, data_angpix, s, nr_omp_threads, true, firstFrame, lastFrame, hotCutoff, debug, saveMem, offsets_in, offsets_out); #else // TODO: Implement gain and defect correction, and remove the old code path std::cout << "New code path" << std::endl; Image mgStack; mgStack.read(mgFn, false); // lastFrame and firstFrame is 0 indexed const int my_lastFrame = ((mgStack.data.zdim > 1)? mgStack.data.zdim : mgStack.data.ndim) - 1; const int n_frames = my_lastFrame - firstFrame + 1; std::cout << "first = " << firstFrame << " last = " << my_lastFrame << " n_frames = " << n_frames << std::endl; std::vector > Iframes(n_frames); #pragma omp parallel for num_threads(nr_omp_threads) for (int iframe = 0; iframe < n_frames; iframe++) { Image img; img.read(mgFn, true, iframe, false, true); Iframes[iframe] = img(); } movie = StackHelper::extractMovieStackFS(&mdt, Iframes, angpix, coords_angpix, movie_angpix, data_angpix, s, nr_omp_threads, true, debug, offsets_in, offsets_out); #endif } else { if (eer_upsampling < 0) eer_upsampling = micrograph.getEERUpsampling(); if (eer_grouping < 0) eer_grouping = micrograph.getEERGrouping(); EERRenderer renderer; renderer.read(mgFn, eer_upsampling); // lastFrame and firstFrame is 0 indexed int my_lastFrame = (lastFrame < 0) ? (renderer.getNFrames() / eer_grouping - 1) : lastFrame; int n_frames = my_lastFrame - firstFrame + 1; std::vector > Iframes(n_frames); #pragma omp parallel for num_threads(nr_omp_threads) for (int iframe = 0; iframe < n_frames; iframe++) { // this takes 1-indexed frame numbers // std::cout << "EER: iframe = " << iframe << " start = " << ((firstFrame + iframe) * eer_grouping + 1) << " end = " << ((firstFrame + iframe + 1) * eer_grouping) << std::endl; renderer.renderFrames((firstFrame + iframe) * eer_grouping + 1, (firstFrame + iframe + 1) * eer_grouping, Iframes[iframe]); if (mgHasGain) { FOR_ALL_DIRECT_ELEMENTS_IN_MULTIDIMARRAY(lastGainRef()) { DIRECT_MULTIDIM_ELEM(Iframes[iframe], n) *= DIRECT_MULTIDIM_ELEM(lastGainRef(), n); } } } if (hasDefect) // TODO: TAKANORI: Refactor!! Code duplication from RelionCor { if (XSIZE(defectMask) != XSIZE(Iframes[0]) || YSIZE(defectMask) != YSIZE(Iframes[0])) { std::cerr << "X/YSIZE of defectMask = " << XSIZE(defectMask) << " x " << YSIZE(defectMask) << std::endl; std::cerr << "X/YSIZE of Iframe[0] = " << XSIZE(Iframes[0]) << " x " << YSIZE(Iframes[0]) << std::endl; REPORT_ERROR("Invalid dfefect mask size for " + mgFn0); } MultidimArray Isum; Isum.initZeros(Iframes[0]); for (int iframe = 0; iframe < n_frames; iframe++) { #pragma omp parallel for num_threads(nr_omp_threads) FOR_ALL_DIRECT_ELEMENTS_IN_MULTIDIMARRAY(Isum) { DIRECT_MULTIDIM_ELEM(Isum, n) += DIRECT_MULTIDIM_ELEM(Iframes[iframe], n); } } #ifdef DEBUG Image tmp; tmp() = Isum; tmp.write("Isum.mrc"); #endif RFLOAT mean = 0, std = 0; #pragma omp parallel for reduction(+:mean) num_threads(nr_omp_threads) FOR_ALL_DIRECT_ELEMENTS_IN_MULTIDIMARRAY(Isum) { mean += DIRECT_MULTIDIM_ELEM(Isum, n); } mean /= YXSIZE(Isum); #pragma omp parallel for reduction(+:std) num_threads(nr_omp_threads) FOR_ALL_DIRECT_ELEMENTS_IN_MULTIDIMARRAY(Isum) { RFLOAT d = (DIRECT_MULTIDIM_ELEM(Isum, n) - mean); std += d * d; } std = std::sqrt(std / YXSIZE(Isum)); mean /= n_frames; std /= n_frames; Isum.clear(); // std::cout << "DEBUG: defect correction: mean = " << mean << " std = " << std << std::endl; const int NUM_MIN_OK = 6; const int D_MAX = isEER ? 4: 2; const int PBUF_SIZE = 100; FOR_ALL_DIRECT_ELEMENTS_IN_ARRAY2D(defectMask) { if (!DIRECT_A2D_ELEM(defectMask, i, j) && (!mgHasGain || DIRECT_A2D_ELEM(lastGainRef(), i, j) != 0)) continue; #pragma omp parallel for num_threads(nr_omp_threads) for (int iframe = 0; iframe < n_frames; iframe++) { int n_ok = 0; RFLOAT pbuf[PBUF_SIZE]; for (int dy= -D_MAX; dy <= D_MAX; dy++) { int y = i + dy; if (y < 0 || y >= YSIZE(defectMask)) continue; for (int dx = -D_MAX; dx <= D_MAX; dx++) { int x = j + dx; if (x < 0 || x >= XSIZE(defectMask)) continue; if (DIRECT_A2D_ELEM(defectMask, y, x)) continue; if (mgHasGain && DIRECT_A2D_ELEM(lastGainRef(), y, x) == 0) continue; pbuf[n_ok] = DIRECT_A2D_ELEM(Iframes[iframe], y, x); n_ok++; } } // std::cout << "n_ok = " << n_ok << std::endl; if (n_ok > NUM_MIN_OK) DIRECT_A2D_ELEM(Iframes[iframe], i, j) = pbuf[rand() % n_ok]; else DIRECT_A2D_ELEM(Iframes[iframe], i, j) = rnd_gaus(mean, std); } } #ifdef DEBUG Isum.initZeros(Iframes[0]); for (int iframe = 0; iframe < n_frames; iframe++) { #pragma omp parallel for num_threads(nr_omp_threads) FOR_ALL_DIRECT_ELEMENTS_IN_MULTIDIMARRAY(Isum) { DIRECT_MULTIDIM_ELEM(Isum, n) += DIRECT_MULTIDIM_ELEM(Iframes[iframe], n); } } tmp() = Isum; tmp.write("Isum-fix-defect.mrc"); exit(0); #endif } movie = StackHelper::extractMovieStackFS(&mdt, Iframes, angpix, coords_angpix, movie_angpix, data_angpix, s, nr_omp_threads, true, debug, offsets_in, offsets_out); } } else { REPORT_ERROR("You can no longer use this program without micrograph metadata STAR files."); } const int pc = movie.size(); #pragma omp parallel for num_threads(nr_omp_threads) for (int p = 0; p < pc; p++) { StackHelper::varianceNormalize(movie[p], false); } return movie; } std::vector>> MicrographHandler::loadMovie( const MetaDataTable &mdt, int s, double angpix, std::vector& fts, const std::vector& pos, std::vector>& tracks, bool unregGlob, std::vector& globComp, const std::vector>* offsets_in, std::vector>* offsets_out, double data_angpix) { std::vector>> out = loadMovie( mdt, s, angpix, fts, offsets_in, offsets_out, data_angpix); if (!hasCorrMic) { tracks.resize(0); } else { const int fc0 = micrograph.getNframes(); int fc; if (lastFrame >= 0) { fc = lastFrame - firstFrame + 1; } else { fc = fc0 - firstFrame; } const int pc = pos.size(); const d2Vector inputScale( coords_angpix / (movie_angpix * micrograph.getWidth()), coords_angpix / (movie_angpix * micrograph.getHeight())); const double outputScale = movie_angpix / angpix; globComp = std::vector(fc, d2Vector(0,0)); if (unregGlob) { for (int f = 0; f < fc; f++) { RFLOAT sx, sy; micrograph.getShiftAt(firstFrame + f + 1, 0, 0, sx, sy, false); globComp[f] = -outputScale * d2Vector(sx, sy); } } tracks.resize(pc); for (int p = 0; p < pc; p++) { tracks[p] = std::vector(fc); for (int f = 0; f < fc; f++) { d2Vector in(inputScale.x * pos[p].x - 0.5, inputScale.y * pos[p].y - 0.5); RFLOAT sx, sy; micrograph.getShiftAt(firstFrame + f + 1, in.x, in.y, sx, sy, true); tracks[p][f] = -outputScale * d2Vector(sx,sy) - globComp[f]; } } } return out; } std::string MicrographHandler::getMetaName(std::string micName, bool die_on_error) { // std::cout << "MicrographHandler::getMetaName " << micName << std::endl; std::map::iterator it = mic2meta.find(micName); if (it == mic2meta.end()) { if (die_on_error) REPORT_ERROR("ERROR: MicrographHandler::getMetaName: no metadata star-file for " +micName+" found in "+corrMicFn+"."); else return ""; } else { return it->second; } } // TODO: TAKANORI: This needs to handle changes in EER grouping int MicrographHandler::determineFrameCount(const MetaDataTable &mdt) { int fc = 0; std::string mgFn; mdt.getValueToString(EMDL_MICROGRAPH_NAME, mgFn, 0); FileName fn_pre, fn_jobnr, fn_post; decomposePipelineFileName(mgFn, fn_pre, fn_jobnr, fn_post); if (hasCorrMic) { std::string metaFn = getMetaName(fn_post); micrograph = Micrograph(metaFn); // TODO: TAKANORI: shouldn't read hot pixels if (!exists(micrograph.getMovieFilename())) { return -1; } fc = micrograph.getNframes(); } else { if (!exists(fn_post)) { return -1; } Image dum; dum.read(fn_post, false); fc = dum().zdim > 1? dum().zdim : dum().ndim; } return fc; } bool MicrographHandler::isMoviePresent(const MetaDataTable &mdt, bool die_on_error) { std::string mgFn; FileName fn_pre, fn_jobnr, fn_post; mdt.getValueToString(EMDL_MICROGRAPH_NAME, mgFn, 0); decomposePipelineFileName(mgFn, fn_pre, fn_jobnr, fn_post); if (hasCorrMic) { std::string metaFn = getMetaName(fn_post, die_on_error); if (exists(metaFn)) { micrograph = Micrograph(metaFn); return exists(micrograph.getMovieFilename()); } else { return false; } } else { return exists(fn_post); } } std::string MicrographHandler::getMovieFilename(const MetaDataTable& mdt, bool die_on_error) { std::string mgFn; mdt.getValueToString(EMDL_MICROGRAPH_NAME, mgFn, 0); FileName fn_pre, fn_jobnr, fn_post; decomposePipelineFileName(mgFn, fn_pre, fn_jobnr, fn_post); if (hasCorrMic) { std::string metaFn = getMetaName(fn_post, die_on_error); if (exists(metaFn)) { micrograph = Micrograph(metaFn); return micrograph.getMovieFilename(); } else { return metaFn; } } else { return fn_post; } } relion-3.1.3/src/jaz/micrograph_handler.h000066400000000000000000000076561411340063500203750ustar00rootroot00000000000000/*************************************************************************** * * Author: "Jasenko Zivanov" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #ifndef MICROGRAPH_HANDLER_H #define MICROGRAPH_HANDLER_H #include #include #include #include #include #include class MicrographHandler { public: MicrographHandler(); int nr_omp_threads, firstFrame, lastFrame; double movie_angpix, coords_angpix, data_angpix, hotCutoff; int eer_upsampling, eer_grouping; bool debug, saveMem, ready; std::string corrMicFn; std::string last_gainFn; // make protected gravis::t2Vector micrograph_size; // initialise corrected/uncorrected micrograph dictionary, then // load first movie (or read corrected_micrographs.star) to obtain: // fc, micrograph_xsize, micrograph_ysize, motionEstimator.dosePerFrame void init( // in: const std::vector& mdts, bool verb, int nr_omp_threads, // out: int& fc, double& dosePerFrame, std::string& metaFn); void validatePixelSize(RFLOAT angpix) const; // remove movies from the list for which either the meta-star or the movie itself is missing std::vector cullMissingMovies(const std::vector& mdts, int verb); // find the greatest number of frames available in all micrographs void findLowestFrameCount(const std::vector& mdts, int verb); // find all movies of sufficient length std::vector findLongEnoughMovies( const std::vector& mdts, int fc, int verb); // load a movie and extract all particles // returns a per-particle vector of per-frame images of size (s/2+1) x s std::vector>> loadMovie(const MetaDataTable& mdt, int s, double angpix, std::vector& fts, const std::vector>* offsets_in = 0, std::vector>* offsets_out = 0, double data_angpix = -1); /* Load a movie as above and also write tracks of particles at 'pos' into 'tracks'. If 'unregGlob' is set, also write the global component of motion into 'globComp'.*/ std::vector>> loadMovie( const MetaDataTable& mdt, int s, double angpix, std::vector& fts, const std::vector& pos, std::vector>& tracks, bool unregGlob, std::vector& globComp, const std::vector>* offsets_in = 0, std::vector>* offsets_out = 0, double data_angpix = -1); protected: Micrograph micrograph; Image lastGainRef; bool hasCorrMic; std::map mic2meta; void loadInitial( const std::vector& mdts, bool verb, int& fc, double& dosePerFrame, std::string& metaFn); std::string getMetaName(std::string micName, bool die_on_error=true); int determineFrameCount(const MetaDataTable& mdt); bool isMoviePresent(const MetaDataTable& mdt, bool die_on_error=true); std::string getMovieFilename(const MetaDataTable& mdt, bool die_on_error=true); }; #endif relion-3.1.3/src/jaz/motion/000077500000000000000000000000001411340063500156635ustar00rootroot00000000000000relion-3.1.3/src/jaz/motion/alignment_set.cpp000066400000000000000000000017071411340063500212250ustar00rootroot00000000000000/*************************************************************************** * * Author: "Jasenko Zivanov" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ // This file intentionally left blank relion-3.1.3/src/jaz/motion/alignment_set.h000066400000000000000000000156301411340063500206720ustar00rootroot00000000000000/*************************************************************************** * * Author: "Jasenko Zivanov" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #ifndef ALIGNMENT_SET_H #define ALIGNMENT_SET_H #include #include #include #include #include #include template class AlignmentSet { public: AlignmentSet(); AlignmentSet(const std::vector& mdts, int fc, int s, int k0, int k1); int mc, fc, s, sh, k0, k1, accPix; // micrograph < particle < frame > > std::vector >>> CCs; // micrograph < particle < frame > > std::vector> >>> obs; // micrograph < particle > std::vector> >> pred; // frame std::vector< std::vector > damage; std::vector> positions; std::vector>> initialTracks; std::vector> globComp; std::vector> accCoords; template void copyCC(int m, int p, int f, const Image& src); void accelerate(const Image& img, std::vector>& dest); void accelerate(const Image& img, std::vector& dest); gravis::d3Vector updateTsc( const std::vector>& tracks, int mg, int threads); }; template AlignmentSet::AlignmentSet() : mc(0), fc(0), s(0), sh(0), k0(0), k1(0) { } template AlignmentSet::AlignmentSet( const std::vector &mdts, int fc, int s, int k0, int k1) : mc(mdts.size()), fc(fc), s(s), sh(s/2+1), k0(k0), k1(k1) { accCoords.reserve(sh*s); int num = 0; for (int y = 0; y < s; y++) for (int x = 0; x < sh; x++) { const double xx = x; const double yy = y < sh? y : y - s; int r = ROUND(sqrt(xx*xx + yy*yy)); if (r >= k0 && r < k1) { accCoords.push_back(gravis::t2Vector(x,y)); num++; } } accPix = num; CCs.resize(mc); obs.resize(mc); pred.resize(mc); positions.resize(mc); initialTracks.resize(mc); globComp.resize(mc); for (int m = 0; m < mc; m++) { const int pc = mdts[m].numberOfObjects(); positions[m].resize(pc); globComp[m].resize(fc); initialTracks[m].resize(pc); CCs[m].resize(pc); obs[m].resize(pc); pred[m].resize(pc); for (int p = 0; p < pc; p++) { initialTracks[m][p].resize(fc); pred[m][p].resize(accPix); CCs[m][p].resize(fc); obs[m][p].resize(fc); for (int f = 0; f < fc; f++) { obs[m][p][f].resize(accPix); } } } damage.resize(fc); for (int f = 0; f < fc; f++) { damage[f].resize(accPix); } } template template void AlignmentSet::copyCC(int m, int p, int f, const Image &src) { if (m < 0 || m >= mc || p < 0 || p >= CCs[m].size() || f < 0 || f >= fc) { REPORT_ERROR_STR("AlignmentSet::copyCC: bad CC-index: " << m << ", " << p << ", " << f << " for " << mc << ", " << ((m >= 0 && m < mc)? CCs[m].size() : 0) << ", " << fc << "."); } CCs[m][p][f] = Image(src.data.xdim, src.data.ydim); for (int y = 0; y < src.data.ydim; y++) for (int x = 0; x < src.data.xdim; x++) { CCs[m][p][f](y,x) = (T)src(y,x); } } template void AlignmentSet::accelerate( const Image &img, std::vector>& dest) { for (int i = 0; i < accPix; i++) { gravis::t2Vector c = accCoords[i]; Complex z = img(c.y, c.x); dest[i] = gravis::t2Vector(z.real, z.imag); } } template void AlignmentSet::accelerate(const Image &img, std::vector& dest) { for (int i = 0; i < accPix; i++) { gravis::t2Vector c = accCoords[i]; dest[i] = img(c.y, c.x); } } template gravis::d3Vector AlignmentSet::updateTsc( const std::vector>& tracks, int mg, int threads) { const int pad = 512; std::vector outT(pad*threads, gravis::d3Vector(0.0, 0.0, 0.0)); const int pc = tracks.size(); #pragma omp parallel for num_threads(threads) for (int p = 0; p < pc; p++) for (int f = 0; f < fc; f++) { int t = omp_get_thread_num(); const gravis::d2Vector shift = tracks[p][f] / s; for (int i = 0; i < accPix; i++) { gravis::t2Vector acc = accCoords[i]; double x = acc.x; double y = acc.y < sh? acc.y : acc.y - s; const double dotp = 2 * PI * (x * shift.x + y * shift.y); double a, b; SINCOS(dotp, &b, &a); const gravis::t2Vector z_obs_t2 = obs[mg][p][f][i]; const double c = (double) z_obs_t2.x; const double d = (double) z_obs_t2.y; const double ac = a * c; const double bd = b * d; const double ab_cd = (a + b) * (c + d); const dComplex z_obs(ac - bd, ab_cd - ac - bd); const gravis::t2Vector z_pred_t2 = pred[mg][p][i]; const dComplex z_pred((double) z_pred_t2.x, (double) z_pred_t2.y); const double dmg = damage[f][i]; outT[pad*t][0] += dmg * (z_pred.real * z_obs.real + z_pred.imag * z_obs.imag); outT[pad*t][1] += dmg * z_obs.norm(); outT[pad*t][2] += dmg * z_pred.norm(); } } gravis::d3Vector out(0.0, 0.0, 0.0); for (int t = 0; t < threads; t++) { out += outT[pad*t]; } return out; } #endif relion-3.1.3/src/jaz/motion/frame_recombiner.cpp000066400000000000000000000520421411340063500216710ustar00rootroot00000000000000/*************************************************************************** * * Author: "Jasenko Zivanov" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #include "frame_recombiner.h" #include "motion_refiner.h" #include "motion_helper.h" #include #include #include #include #include #include #include #include using namespace gravis; FrameRecombiner::FrameRecombiner() {} void FrameRecombiner::read(IOParser& parser, int argc, char* argv[]) { parser.addSection("Combine frames options"); doCombineFrames = parser.checkOption("--combine_frames", "Combine movie frames into polished particles."); scale_arg = textToInteger(parser.getOption("--scale", "Re-scale the particles to this size (by default read from particles star file)", "-1")); box_arg = textToInteger(parser.getOption("--window", "Re-window the particles to this size (in movie-pixels; by default read from particles star file)", "-1")); crop_arg = textToInteger(parser.getOption("--crop", "Crop the scaled particles to this size after CTF pre-multiplication", "-1")); do_ctf_multiply = parser.checkOption("--ctf_multiply", "Premultiply by CTF."); k0a = textToDouble(parser.getOption("--bfac_minfreq", "Min. frequency used in B-factor fit [Angst]", "20")); k1a = textToDouble(parser.getOption("--bfac_maxfreq", "Max. frequency used in B-factor fit [Angst]", "-1")); bfacFn = parser.getOption("--bfactors", "A .star file with external B/k-factors", ""); bfac_diag = parser.checkOption("--diag_bfactor", "Write out B/k-factor diagnostic data"); suffix = parser.getOption("--suffix", "Add this suffix to shiny MRCS and STAR files", ""); do_recenter = parser.checkOption("--recenter", "Re-center particle according to rlnOriginX/Y in --reextract_data_star STAR file"); recenter_x = textToFloat(parser.getOption("--recenter_x", "X-coordinate (in pixel inside the reference) to recenter re-extracted data on", "0.")); recenter_y = textToFloat(parser.getOption("--recenter_y", "Y-coordinate (in pixel inside the reference) to recenter re-extracted data on", "0.")); recenter_z = textToFloat(parser.getOption("--recenter_z", "Z-coordinate (in pixel inside the reference) to recenter re-extracted data on", "0.")); if (box_arg > 0 || scale_arg > 0) { std::cerr << "WARNING: Changing the box size (--window and/or --scale) might " << "invalidate the current particle offsets.\nPlease remember to " << "run relion_refine again." << std::endl; } if (box_arg > 0 && box_arg % 2 != 0) { REPORT_ERROR_STR("The window size (--window) has to be an even number.\n"); } if (scale_arg > 0 && scale_arg % 2 != 0) { REPORT_ERROR_STR("The rescaled window size (--scale) has to be an even number.\n"); } if (crop_arg > 0 && !do_ctf_multiply) { REPORT_ERROR("--crop is meaningless without --ctf_multiply"); } if (box_arg > 0 && box_arg % 2 != 0) { REPORT_ERROR("--window must be an even number"); } if (scale_arg > 0 && scale_arg % 2 != 0) { REPORT_ERROR("--scale must be an even number"); } if (crop_arg > 0 && crop_arg % 2 != 0) { REPORT_ERROR("--crop must be an even number"); } } void FrameRecombiner::init( const std::vector& allMdts, int verb, int s_ref, int fc, double maxFreq, double angpix_ref, int nr_omp_threads, std::string outPath, bool debug, ReferenceMap* reference, ObservationModel* obsModel, MicrographHandler* micrographHandler) { this->verb = verb; this->s_ref = s_ref; this->sh_ref = s_ref/2 + 1; this->fc = fc; this->nr_omp_threads = nr_omp_threads; this->outPath = outPath; this->debug = debug; this->reference = reference; this->obsModel = obsModel; this->micrographHandler = micrographHandler; this->angpix_ref = angpix_ref; this->maxFreq = maxFreq; /* OLD: neither window nor scale provided: angpix_out = angpix_ref s_out = s_ref only window provided: angpix_out = angpix_ref s_out = window * angpix_mov / angpix_ref only scale provided: window = s_ref * angpix_mov / angpix_ref angpix_out = angpix_mov * window / scale s_out = scale both provided: angpix_out = angpix_mov * window / scale s_out = scale NEW: neither window nor scale provided: angpix_out = angpix(particle) s_out = box_size(particle) only window provided: angpix_out = angpix(particle) s_out = window * angpix_mov / angpix(particle) only scale provided: window_mov = box_size(particle) * angpix(particle) / angpix_mov angpix_out = angpix_mov * window_mov / scale s_out = scale both provided: angpix_out = angpix_mov * window / scale s_out = scale */ const int nog = obsModel->numberOfOpticsGroups(); s_mov.resize(nog); s_out.resize(nog); sh_out.resize(nog); angpix_out.resize(nog); data_angpix.resize(nog); freqWeights.resize(nog); const double angpix_mov = micrographHandler->movie_angpix; // TODO: TAKANORI: make sure the movie_angpix is the same for all micrographs // The optics group is used only to account for different pixel sizes. // Dose weighting uses information from all optics groups. for (int og = 0; og < nog; og++) { if (box_arg > 0) s_mov[og] = box_arg; else s_mov[og] = 2 * (int)(0.5 * s_ref * angpix_ref / angpix_mov + 0.5); if (scale_arg > 0) { s_out[og] = scale_arg; angpix_out[og] = angpix_mov * s_mov[og] / (double) scale_arg; } else { s_out[og] = 2 * (int)(0.5 * s_mov[og] * angpix_mov / angpix_ref + 0.5); angpix_out[og] = angpix_ref; } sh_out[og] = s_out[og]/2 + 1; data_angpix[og] = obsModel->getPixelSize(og); if (debug) { std::cout << "optics group " << og << "\n"; std::cout << "s_out: " << s_out[og] << "\n"; std::cout << "s_mov: " << s_mov[og] << "\n"; std::cout << "angpix_out: " << angpix_out[og] << "\n"; std::cout << "data_angpix: " << data_angpix[og] << "\n"; } if (s_out[og] > s_mov[og]) { REPORT_ERROR_STR("Images can only be scaled down, not up!\n" << "You are trying to extract squares of size " << s_mov[og] << " px from the movies and " << "scale them up to " << s_out[og] << " px\n"); } // Either calculate weights from FCC or from user-provided B-factors const bool hasBfacs = bfacFn != ""; std::stringstream sts; sts << "optics-group-" << (og + 1); if (!hasBfacs) { freqWeights[og] = weightsFromFCC(allMdts, s_out[og], angpix_out[og], sts.str()); } else { freqWeights[og] = weightsFromBfacs(allMdts, s_out[og], angpix_out[og]); } } } void FrameRecombiner::process(const std::vector& mdts, long g_start, long g_end) { int barstep; int my_nr_micrographs = g_end - g_start + 1; const RFLOAT ref_angpix = reference->angpix; const RFLOAT coords_angpix = micrographHandler->coords_angpix; // std::cout << "ref_angpix = " << ref_angpix << " coords_angpix = " << coords_angpix << std::endl; if (verb > 0) { std::cout << " + Combining frames for all micrographs ... " << std::endl; init_progress_bar(my_nr_micrographs); barstep = XMIPP_MAX(1, my_nr_micrographs/ 60); } std::vector fts(nr_omp_threads); int pctot = 0; long nr_done = 0; for (long g = g_start; g <= g_end; g++) { // Abort through the pipeline_control system, TODO: check how this goes with MPI.... if (pipeline_control_check_abort_job()) exit(RELION_EXIT_ABORTED); const int pc = mdts[g].numberOfObjects(); if (pc == 0) continue; pctot += pc; MetaDataTable mdtOut = mdts[g]; // optics group representative of this micrograph // (only the pixel and box sizes have to be identical) int ogmg = obsModel->getOpticsGroup(mdtOut, 0); if (!obsModel->allPixelAndBoxSizesIdentical(mdtOut)) { std::cerr << "WARNING: varying pixel or box sizes detected in " << MotionRefiner::getOutputFileNameRoot(outPath, mdtOut) << " - skipping micrograph." << std::endl; continue; } FileName fn_root = MotionRefiner::getOutputFileNameRoot(outPath, mdtOut); std::vector> shift0; shift0 = MotionHelper::readTracksInPix(fn_root + "_tracks.star", angpix_out[ogmg]); std::vector> shift = shift0; std::vector>> movie; for (int p = 0; do_recenter && p < pc; p++) { // FIXME: code duplication from preprocess.cpp RFLOAT xoff, yoff, xcoord, ycoord; Matrix1D my_projected_center(3); my_projected_center.initZeros(); mdtOut.getValue(EMDL_ORIENT_ORIGIN_X_ANGSTROM, xoff, p); // in A mdtOut.getValue(EMDL_ORIENT_ORIGIN_Y_ANGSTROM, yoff, p); // std::cout << "IN xoff = " << xoff << " yoff = " << yoff; xoff /= ref_angpix; // Now in reference pixels yoff /= ref_angpix; if (fabs(recenter_x) > 0. || fabs(recenter_y) > 0. || fabs(recenter_z) > 0.) { RFLOAT rot, tilt, psi; mdtOut.getValue(EMDL_ORIENT_ROT, rot, p); mdtOut.getValue(EMDL_ORIENT_TILT, tilt, p); mdtOut.getValue(EMDL_ORIENT_PSI, psi, p); // Project the center-coordinates Matrix1D my_center(3); Matrix2D A3D; XX(my_center) = recenter_x; // in reference pixels YY(my_center) = recenter_y; ZZ(my_center) = recenter_z; Euler_angles2matrix(rot, tilt, psi, A3D, false); my_projected_center = A3D * my_center; } xoff -= XX(my_projected_center); yoff -= YY(my_projected_center); xoff = xoff * ref_angpix / coords_angpix; // Now in (possibly binned) micrograph's pixel yoff = yoff * ref_angpix / coords_angpix; mdtOut.getValue(EMDL_IMAGE_COORD_X, xcoord, p); mdtOut.getValue(EMDL_IMAGE_COORD_Y, ycoord, p); // std::cout << " xcoord = " << xcoord << " ycoord = " << ycoord << std::endl;; xcoord -= ROUND(xoff); ycoord -= ROUND(yoff); xoff -= ROUND(xoff); yoff -= ROUND(yoff); mdtOut.setValue(EMDL_IMAGE_COORD_X, xcoord, p); mdtOut.setValue(EMDL_IMAGE_COORD_Y, ycoord, p); mdtOut.setValue(EMDL_ORIENT_ORIGIN_X_ANGSTROM, coords_angpix * xoff, p); mdtOut.setValue(EMDL_ORIENT_ORIGIN_Y_ANGSTROM, coords_angpix * yoff, p); // std::cout << "OUT xoff = " << xoff << " yoff = " << yoff << " xcoord = " << xcoord << " ycoord = " << ycoord << std::endl;; } // loadMovie() will extract squares around the value of shift0 rounded in movie coords, // and return the remainder in shift (in output coordinates) movie = micrographHandler->loadMovie(mdtOut, s_out[ogmg], angpix_out[ogmg], fts, &shift0, &shift, data_angpix[ogmg]); const int out_size = crop_arg > 0 ? crop_arg : s_out[ogmg]; Image stack(out_size, out_size, 1, pc); #pragma omp parallel for num_threads(nr_omp_threads) for (int p = 0; p < pc; p++) { int threadnum = omp_get_thread_num(); Image sum(sh_out[ogmg], s_out[ogmg]); sum.data.initZeros(); Image obs(sh_out[ogmg], s_out[ogmg]); for (int f = 0; f < fc; f++) { shiftImageInFourierTransform(movie[p][f](), obs(), s_out[ogmg], -shift[p][f].x, -shift[p][f].y); for (int y = 0; y < s_out[ogmg]; y++) for (int x = 0; x < sh_out[ogmg]; x++) { sum(y,x) += freqWeights[ogmg][f](y,x) * obs(y,x); } } Image real(s_out[ogmg], s_out[ogmg]); // Premultiply by CTF if (do_ctf_multiply) { CTF ctf; ctf.readByGroup(mdtOut, obsModel, p); int og = obsModel->getOpticsGroup(mdtOut, p); #pragma omp critical(FrameRecombiner_process) { if (obsModel->getBoxSize(og) != s_out[og]) obsModel->setBoxSize(og, s_out[og]); if (obsModel->getPixelSize(og) != angpix_out[og]) obsModel->setPixelSize(og, angpix_out[og]); } MultidimArray Fctf; Fctf.resize(YSIZE(sum()), XSIZE(sum())); ctf.getFftwImage(Fctf, s_out[og], s_out[og], angpix_out[og], false, false, false, true, false); FOR_ALL_DIRECT_ELEMENTS_IN_MULTIDIMARRAY(sum()) { DIRECT_MULTIDIM_ELEM(sum(), n) *= DIRECT_MULTIDIM_ELEM(Fctf, n); } } fts[threadnum].inverseFourierTransform(sum(), real()); real().setXmippOrigin(); const int half_out = out_size / 2; for (int y = 0; y < out_size; y++) for (int x = 0; x < out_size; x++) { DIRECT_NZYX_ELEM(stack(), p, 0, y, x) = real(y - half_out, x - half_out); // Image() is logical access } } stack.setSamplingRateInHeader(angpix_out[ogmg]); stack.write(fn_root+"_shiny" + suffix + ".mrcs"); if (debug) { VtkHelper::writeTomoVTK( stack, fn_root+"_shiny" + suffix + ".vtk", false, angpix_out[ogmg], -angpix_out[ogmg] * s_out[ogmg] * 0.5 * d3Vector(1,1,0)); } for (int p = 0; p < pc; p++) { std::stringstream sts; sts << (p+1); mdtOut.setValue(EMDL_IMAGE_NAME, sts.str() + "@" + fn_root+"_shiny" + suffix + ".mrcs", p); } mdtOut.write(fn_root+"_shiny" + suffix + ".star"); nr_done++; if (verb > 0 && nr_done % barstep == 0) { progress_bar(nr_done); } } if (verb > 0) { progress_bar(my_nr_micrographs); } } std::vector> FrameRecombiner::weightsFromFCC( const std::vector& allMdts, int s, double angpix, std::string og_name) { if (debug && verb > 0) { std::cout << " + Summing up FCCs..." << std::endl; } Image fccData, fccWgh0, fccWgh1; Image fccDataMg, fccWgh0Mg, fccWgh1Mg; bool first = true; // Compute B/k-factors from all available FCCs (allMdts), // even if only a subset of micrographs (chosenMdts) is being recombined. // TODO: BUG: og_name is not used. for (long g = 0; g < allMdts.size(); g++) { FileName fn_root = MotionRefiner::getOutputFileNameRoot(outPath, allMdts[g]); if (!( exists(fn_root + "_FCC_cc.mrc") && exists(fn_root + "_FCC_w0.mrc") && exists(fn_root + "_FCC_w1.mrc"))) { continue; } fccDataMg.read(fn_root + "_FCC_cc.mrc"); fccWgh0Mg.read(fn_root + "_FCC_w0.mrc"); fccWgh1Mg.read(fn_root + "_FCC_w1.mrc"); if (first) { sh_ref = fccDataMg.data.xdim; s_ref = 2 * (sh_ref-1); fc = fccDataMg.data.ydim; fccData = Image(sh_ref,fc); fccWgh0 = Image(sh_ref,fc); fccWgh1 = Image(sh_ref,fc); fccData.data.initZeros(); fccWgh0.data.initZeros(); fccWgh1.data.initZeros(); first = false; } for (int y = 0; y < fc; y++) for (int x = 0; x < sh_ref; x++) { if (fccDataMg(y,x) == fccDataMg(y,x)) fccData(y,x) += fccDataMg(y,x); if (fccWgh0Mg(y,x) == fccWgh0Mg(y,x)) fccWgh0(y,x) += fccWgh0Mg(y,x); if (fccWgh1Mg(y,x) == fccWgh1Mg(y,x)) fccWgh1(y,x) += fccWgh1Mg(y,x); } } Image fcc(sh_ref,fc); for (int y = 0; y < fc; y++) for (int x = 0; x < sh_ref; x++) { const double wgh = sqrt(fccWgh0(y,x) * fccWgh1(y,x)); if (wgh > 0.0) { fcc(y,x) = fccData(y,x) / wgh; } else { fcc(y,x) = 0.0; } } if (debug) std::cout << "done\n"; k0 = (int) reference->angToPix(k0a); if (!outerFreqKnown()) { k1a = maxFreq; } k1 = (int) reference->angToPix(k1a); if (verb > 0) { std::cout << " + Fitting B/k-factors for " << og_name << " using FCCs from all particles between " << k0 << " and " << k1 << " pixels, or " << k0a << " and " << k1a << " Angstrom ..." << std::endl; } std::pair,std::vector> bkFacs = DamageHelper::fitBkFactors(fcc, k0, k1); // sigmas (bkFacs[f].x) are given in pixels; // rescale if a different box size is to be extracted std::vector bkFacsRescaled(fc); if (s == s_ref) { bkFacsRescaled = bkFacs.first; } else { for (int f = 0; f < fc; f++) { bkFacsRescaled[f].x = (s * angpix) * bkFacs.first[f].x / (double) (s_ref * angpix_ref); bkFacsRescaled[f].y = bkFacs.first[f].y; } } const int sh = s/2 + 1; std::vector> freqWeights; freqWeights = DamageHelper::computeWeights(bkFacsRescaled, sh); const double cf = 8.0 * angpix_ref * angpix_ref * sh_ref * sh_ref; if (bfac_diag) { mktree(outPath + "/bfacs"); Image bfacFit = DamageHelper::renderBkFit(bkFacs, sh_ref, fc); Image bfacFitNoScale = DamageHelper::renderBkFit(bkFacs, sh_ref, fc, true); ImageLog::write(bfacFit, outPath + "/bfacs/glob_Bk-fit"); ImageLog::write(bfacFitNoScale, outPath + "/bfacs/glob_Bk-fit_noScale"); ImageLog::write(fcc, outPath + "/bfacs/glob_Bk-data"); ImageLog::write(freqWeights, outPath + "/bfacs/freqWeights"); std::ofstream bfacsDat(outPath + "/bfacs/Bfac.dat"); std::ofstream kfacsDat(outPath + "/bfacs/kfac.dat"); for (int i = 0; i < fc; i++) { double sig = bkFacs.first[i].x; double b = -cf/(sig*sig); bfacsDat << i << " " << b << std::endl; kfacsDat << i << " " << log(bkFacs.first[i].y) << std::endl; } bfacsDat.close(); kfacsDat.close(); } MetaDataTable mdt; mdt.setName("perframe_bfactors"); for (int f = 0; f < fc; f++ ) { double sig = bkFacs.first[f].x; double b = -cf/(sig*sig); double k = log(bkFacs.first[f].y); mdt.addObject(); mdt.setValue(EMDL_IMAGE_FRAME_NR, f); mdt.setValue(EMDL_POSTPROCESS_BFACTOR, b); mdt.setValue(EMDL_POSTPROCESS_GUINIER_FIT_INTERCEPT, k); } mdt.write(outPath + "/bfactors.star"); // Also write out EPS plots of the B-factors and scale factors CPlot2D plot2D("Polishing B-factors"); plot2D.SetXAxisSize(600); plot2D.SetYAxisSize(400); plot2D.SetDrawLegend(false); plot2D.SetXAxisTitle("movie frame"); plot2D.SetYAxisTitle("B-factor"); mdt.addToCPlot2D(&plot2D, EMDL_IMAGE_FRAME_NR, EMDL_POSTPROCESS_BFACTOR); plot2D.OutputPostScriptPlot(outPath + "bfactors.eps"); CPlot2D plot2Db("Polishing scale-factors"); plot2Db.SetXAxisSize(600); plot2Db.SetYAxisSize(400); plot2Db.SetDrawLegend(false); plot2Db.SetXAxisTitle("movie frame"); plot2Db.SetYAxisTitle("Scale-factor"); mdt.addToCPlot2D(&plot2Db, EMDL_IMAGE_FRAME_NR, EMDL_POSTPROCESS_GUINIER_FIT_INTERCEPT); plot2Db.OutputPostScriptPlot(outPath + "scalefactors.eps"); return freqWeights; } std::vector> FrameRecombiner::weightsFromBfacs( const std::vector& allMdts, int s, double angpix) { const int sh = s/2 + 1; // initialization on the first line to avoid copying of return value std::vector> freqWeights; MetaDataTable mdt; mdt.read(bfacFn); fc = mdt.numberOfObjects(); std::vector bkFacs(fc); double bfacOff = 0.0; for (int f = 0; f < fc; f++) { double b; mdt.getValue(EMDL_POSTPROCESS_BFACTOR, b, f); if (b > bfacOff) bfacOff = b; } const double cf = 8.0 * angpix_ref * angpix_ref * sh * sh; for (int f = 0; f < fc; f++) { double b, k; mdt.getValue(EMDL_POSTPROCESS_BFACTOR, b, f); mdt.getValue(EMDL_POSTPROCESS_GUINIER_FIT_INTERCEPT, k, f); bkFacs[f] = d2Vector(sqrt(-cf/(b-bfacOff-1)), exp(k)); } freqWeights = DamageHelper::computeWeights(bkFacs, sh); if (bfac_diag) { mktree(outPath + "bfacs"); std::pair,std::vector> bkFacs2; bkFacs2.first = bkFacs; bkFacs2.second = std::vector(fc, 1.0); Image bfacFitNoScale = DamageHelper::renderBkFit(bkFacs2, sh, fc, true); ImageLog::write(bfacFitNoScale, outPath + "/bfacs/glob_Bk-fit_noScale"); ImageLog::write(freqWeights, outPath + "/bfacs/freqWeights"); std::ofstream bfacsDat(outPath + "/bfacs/Bfac.dat"); std::ofstream kfacsDat(outPath + "/bfacs/kfac.dat"); for (int i = 0; i < fc; i++) { double sig = bkFacs[i].x; double b = -cf/(sig*sig); bfacsDat << i << " " << b << std::endl; kfacsDat << i << " " << log(bkFacs[i].y) << std::endl; } bfacsDat.close(); kfacsDat.close(); } return freqWeights; } bool FrameRecombiner::doingRecombination() { return doCombineFrames; } bool FrameRecombiner::outerFreqKnown() { return k1a > 0.0; } std::vector FrameRecombiner::findUnfinishedJobs( const std::vector &mdts, std::string path) { std::vector out(0); const int gc = mdts.size(); for (int g = 0; g < gc; g++) { std::string fn_root = MotionRefiner::getOutputFileNameRoot(path, mdts[g]); if (!isJobFinished(fn_root)) { out.push_back(mdts[g]); } } return out; } double FrameRecombiner::getOutputPixelSize(int opticsGroup) { return angpix_out[opticsGroup]; } int FrameRecombiner::getOutputBoxSize(int opticsGroup) { if (crop_arg > 0) return crop_arg; else return s_out[opticsGroup]; } bool FrameRecombiner::isCtfMultiplied(int opticsGroup) { return do_ctf_multiply; } std::string FrameRecombiner::getOutputSuffix() { return suffix; } bool FrameRecombiner::isJobFinished(std::string filenameRoot) { return exists(filenameRoot+"_shiny" + suffix + ".mrcs") && exists(filenameRoot+"_shiny" + suffix + ".star"); } relion-3.1.3/src/jaz/motion/frame_recombiner.h000066400000000000000000000061621411340063500213400ustar00rootroot00000000000000/*************************************************************************** * * Author: "Jasenko Zivanov" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #ifndef FRAME_RECOMBINER_H #define FRAME_RECOMBINER_H #include #include #include class IOParser; class ObservationModel; class MicrographHandler; class ReferenceMap; class FrameRecombiner { public: FrameRecombiner(); void read(IOParser& parser, int argc, char *argv[]); void init(const std::vector& allMdts, int verb, int s_ref, int fc, double maxFreq, double angpix_ref, int nr_omp_threads, std::string outPath, bool debug, ReferenceMap* reference, ObservationModel* obsModel, MicrographHandler* micrographHandler); void process(const std::vector& mdts, long g_start, long g_end); bool doingRecombination(); // has a max. freq. parameter been supplied? bool outerFreqKnown(); std::vector findUnfinishedJobs(const std::vector& mdts, std::string path); double getOutputPixelSize(int opticsGroup); int getOutputBoxSize(int opticsGroup); std::string getOutputSuffix(); bool isCtfMultiplied(int opticsGroup); protected: // read from cmd. line: bool doCombineFrames, bfac_diag, do_ctf_multiply, do_recenter; int k0, k1, box_arg, scale_arg, crop_arg; double k0a, k1a, recenter_x, recenter_y, recenter_z; std::string bfacFn, suffix; // set at init: int s_ref, sh_ref, fc; std::vector s_mov, s_out, sh_out; std::vector data_angpix; int verb, nr_omp_threads; std::string outPath; bool debug; double angpix_ref, maxFreq; std::vector angpix_out; ReferenceMap* reference; ObservationModel* obsModel; MicrographHandler* micrographHandler; // computed by weightsFromFCC or weightsFromBfacs: std::vector>> freqWeights; std::vector> weightsFromFCC(const std::vector& allMdts, int s, double angpix, std::string og_name); std::vector> weightsFromBfacs(const std::vector& allMdts, int s, double angpix); bool isJobFinished(std::string filenameRoot); }; #endif relion-3.1.3/src/jaz/motion/gp_motion_fit.cpp000066400000000000000000000353461411340063500212370ustar00rootroot00000000000000/*************************************************************************** * * Author: "Jasenko Zivanov" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #include "gp_motion_fit.h" #include #include #include using namespace gravis; GpMotionFit::GpMotionFit( const std::vector>>& correlation, double cc_pad, double sig_vel_px, double sig_div_px, double sig_acc_px, int maxDims, const std::vector& positions, const std::vector& perFrameOffsets, int threads, bool expKer) : expKer(expKer), pc(correlation.size()), fc(correlation[0].size()), threads(threads), cc_pad(cc_pad), sig_vel_px(sig_vel_px), sig_div_px(sig_div_px), sig_acc_px(sig_acc_px), correlation(correlation), positions(positions), perFrameOffsets(perFrameOffsets) { Matrix2D A(pc,pc); const double sv2 = sig_vel_px * sig_vel_px; const double sd2 = sig_div_px * sig_div_px; const double sd1 = sig_div_px; for (int i = 0; i < pc; i++) for (int j = i; j < pc; j++) { const double dd = (positions[i] - positions[j]).norm2(); const double k = sv2 * (expKer? exp(-sqrt(dd/sd2)) : exp(-0.5*dd/sd1)); A(i,j) = k; A(j,i) = k; } Matrix2D U, Vt; Matrix1D S; SvdHelper::decompose(A, U, S, Vt); dc = (maxDims < 0 || maxDims > pc)? pc : maxDims; // remove eigendeformations with too small eigenvalues const double eps = 1e-10; for (int d = 0; d < dc; d++) { if (S(d) < eps) { dc = d; break; } } basis = Matrix2D(pc,dc); for (int d = 0; d < dc; d++) { const double l = sqrt(S(d)); for (int p = 0; p < pc; p++) { basis(p,d) = l * Vt(p,d); } } eigenVals = std::vector(dc); for (int d = 0; d < dc; d++) { eigenVals[d] = S(d); } } double GpMotionFit::f(const std::vector &x) const { std::vector> pos(pc, std::vector(fc)); paramsToPos(x, pos); const int pad = 512; std::vector e_t(pad*threads, 0.0); #pragma omp parallel for num_threads(threads) for (int p = 0; p < pc; p++) { int t = omp_get_thread_num(); for (int f = 0; f < fc; f++) { e_t[pad*t] -= Interpolation::cubicXY( correlation[p][f], cc_pad * (pos[p][f].x + perFrameOffsets[f].x), cc_pad * (pos[p][f].y + perFrameOffsets[f].y), 0, 0, true); } } #pragma omp parallel for num_threads(threads) for (int f = 0; f < fc-1; f++) { int t = omp_get_thread_num(); for (int d = 0; d < dc; d++) { const double cx = x[2*(pc + dc*f + d) ]; const double cy = x[2*(pc + dc*f + d) + 1]; e_t[pad*t] += cx*cx + cy*cy; } } if (sig_acc_px > 0.0) { #pragma omp parallel for num_threads(threads) for (int f = 0; f < fc-2; f++) { int t = omp_get_thread_num(); for (int d = 0; d < dc; d++) { const double cx0 = x[2*(pc + dc*f + d) ]; const double cy0 = x[2*(pc + dc*f + d) + 1]; const double cx1 = x[2*(pc + dc*(f+1) + d) ]; const double cy1 = x[2*(pc + dc*(f+1) + d) + 1]; const double dcx = cx1 - cx0; const double dcy = cy1 - cy0; e_t[pad*t] += eigenVals[d]*(dcx*dcx + dcy*dcy) / (sig_acc_px*sig_acc_px); } } } double e_tot = 0.0; for (int t = 0; t < threads; t++) { e_tot += e_t[pad*t]; } return e_tot; } double GpMotionFit::f(const std::vector &x, void* tempStorage) const { if (tempStorage == 0) return f(x); TempStorage* ts = (TempStorage*) tempStorage; paramsToPos(x, ts->pos); for (int t = 0; t < threads; t++) { ts->e_t[ts->pad*t] = 0.0; } #pragma omp parallel for num_threads(threads) for (int p = 0; p < pc; p++) { int t = omp_get_thread_num(); for (int f = 0; f < fc; f++) { const double epf = Interpolation::cubicXY(correlation[p][f], cc_pad * (ts->pos[p][f].x + perFrameOffsets[f].x), cc_pad * (ts->pos[p][f].y + perFrameOffsets[f].y), 0, 0, true); ts->e_t[ts->pad*t] -= epf; } } #pragma omp parallel for num_threads(threads) for (int f = 0; f < fc-1; f++) { int t = omp_get_thread_num(); for (int d = 0; d < dc; d++) { const double cx = x[2*(pc + dc*f + d) ]; const double cy = x[2*(pc + dc*f + d) + 1]; ts->e_t[ts->pad*t] += cx*cx + cy*cy; } } if (sig_acc_px > 0.0) { #pragma omp parallel for num_threads(threads) for (int f = 0; f < fc-2; f++) { int t = omp_get_thread_num(); for (int d = 0; d < dc; d++) { const double cx0 = x[2*(pc + dc*f + d) ]; const double cy0 = x[2*(pc + dc*f + d) + 1]; const double cx1 = x[2*(pc + dc*(f+1) + d) ]; const double cy1 = x[2*(pc + dc*(f+1) + d) + 1]; const double dcx = cx1 - cx0; const double dcy = cy1 - cy0; ts->e_t[ts->pad*t] += eigenVals[d]*(dcx*dcx + dcy*dcy) / (sig_acc_px*sig_acc_px); } } } double e_tot = 0.0; for (int t = 0; t < threads; t++) { e_tot += ts->e_t[ts->pad*t]; } return e_tot; } void GpMotionFit::grad(const std::vector &x, std::vector &gradDest) const { std::vector> pos(pc, std::vector(fc)); paramsToPos(x, pos); int pad = 512; std::vector> ccg_pf(pc, std::vector(fc + pad)); #pragma omp parallel for num_threads(threads) for (int p = 0; p < pc; p++) { for (int f = 0; f < fc; f++) { d2Vector vr = Interpolation::cubicXYgrad( correlation[p][f], cc_pad * (pos[p][f].x + perFrameOffsets[f].x), cc_pad * (pos[p][f].y + perFrameOffsets[f].y), 0, 0, true); ccg_pf[p][f] = d2Vector(vr.x, vr.y); } } std::vector> gradDestT(threads, std::vector(gradDest.size()+pad, 0.0)); #pragma omp parallel for num_threads(threads) for (int p = 0; p < pc; p++) for (int f = 0; f < fc; f++) { int t = omp_get_thread_num(); gradDestT[t][2*p ] -= ccg_pf[p][f].x; gradDestT[t][2*p+1] -= ccg_pf[p][f].y; } #pragma omp parallel for num_threads(threads) for (int d = 0; d < dc; d++) for (int p = 0; p < pc; p++) { int t = omp_get_thread_num(); d2Vector g(0.0, 0.0); const double bpd = basis(p,d); for (int f = fc-2; f >= 0; f--) { g.x += bpd * ccg_pf[p][f+1].x; g.y += bpd * ccg_pf[p][f+1].y; gradDestT[t][2*(pc + dc*f + d) ] -= g.x; gradDestT[t][2*(pc + dc*f + d)+1] -= g.y; } } #pragma omp parallel for num_threads(threads) for (int f = 0; f < fc-1; f++) for (int d = 0; d < dc; d++) { int t = omp_get_thread_num(); gradDestT[t][2*(pc + dc*f + d) ] += 2.0 * x[2*(pc + dc*f + d) ]; gradDestT[t][2*(pc + dc*f + d)+1] += 2.0 * x[2*(pc + dc*f + d)+1]; } if (sig_acc_px > 0.0) { const double sa2 = sig_acc_px*sig_acc_px; #pragma omp parallel for num_threads(threads) for (int f = 0; f < fc-2; f++) for (int d = 0; d < dc; d++) { int t = omp_get_thread_num(); const double cx0 = x[2*(pc + dc*f + d) ]; const double cy0 = x[2*(pc + dc*f + d) + 1]; const double cx1 = x[2*(pc + dc*(f+1) + d) ]; const double cy1 = x[2*(pc + dc*(f+1) + d) + 1]; const double dcx = cx1 - cx0; const double dcy = cy1 - cy0; //e_tot += eigenVals[d]*(dcx*dcx + dcy*dcy) / (sig_acc_px*sig_acc_px); gradDestT[t][2*(pc + dc*f + d) ] -= 2.0 * eigenVals[d] * dcx / sa2; gradDestT[t][2*(pc + dc*f + d)+1] -= 2.0 * eigenVals[d] * dcy / sa2; gradDestT[t][2*(pc + dc*(f+1) + d) ] += 2.0 * eigenVals[d] * dcx / sa2; gradDestT[t][2*(pc + dc*(f+1) + d)+1] += 2.0 * eigenVals[d] * dcy / sa2; } } for (int i = 0; i < gradDest.size(); i++) { gradDest[i] = 0.0; } for (int t = 0; t < threads; t++) for (int i = 0; i < gradDest.size(); i++) { gradDest[i] += gradDestT[t][i]; } } void GpMotionFit::grad(const std::vector &x, std::vector &gradDest, void* tempStorage) const { if (tempStorage == 0) return grad(x, gradDest); TempStorage* ts = (TempStorage*) tempStorage; paramsToPos(x, ts->pos); #pragma omp parallel for num_threads(threads) for (int p = 0; p < pc; p++) { for (int f = 0; f < fc; f++) { d2Vector vr = Interpolation::cubicXYgrad( correlation[p][f], cc_pad * (ts->pos[p][f].x + perFrameOffsets[f].x), cc_pad * (ts->pos[p][f].y + perFrameOffsets[f].y), 0, 0, true); ts->ccg_pf[p][f] = d2Vector(vr.x, vr.y); } } #pragma omp parallel for num_threads(threads) for (int t = 0; t < threads; t++) for (int i = 0; i < gradDest.size(); i++) { ts->gradDestT[t][i] = 0.0; } #pragma omp parallel for num_threads(threads) for (int p = 0; p < pc; p++) for (int f = 0; f < fc; f++) { int t = omp_get_thread_num(); ts->gradDestT[t][2*p ] -= ts->ccg_pf[p][f].x; ts->gradDestT[t][2*p+1] -= ts->ccg_pf[p][f].y; } #pragma omp parallel for num_threads(threads) for (int d = 0; d < dc; d++) for (int p = 0; p < pc; p++) { int t = omp_get_thread_num(); d2Vector g(0.0, 0.0); const double bpd = basis(p,d); for (int f = fc-2; f >= 0; f--) { g.x += bpd * ts->ccg_pf[p][f+1].x; g.y += bpd * ts->ccg_pf[p][f+1].y; ts->gradDestT[t][2*(pc + dc*f + d) ] -= g.x; ts->gradDestT[t][2*(pc + dc*f + d)+1] -= g.y; } } #pragma omp parallel for num_threads(threads) for (int f = 0; f < fc-1; f++) for (int d = 0; d < dc; d++) { int t = omp_get_thread_num(); ts->gradDestT[t][2*(pc + dc*f + d) ] += 2.0 * x[2*(pc + dc*f + d) ]; ts->gradDestT[t][2*(pc + dc*f + d)+1] += 2.0 * x[2*(pc + dc*f + d)+1]; } if (sig_acc_px > 0.0) { const double sa2 = sig_acc_px*sig_acc_px; #pragma omp parallel for num_threads(threads) for (int f = 0; f < fc-2; f++) for (int d = 0; d < dc; d++) { int t = omp_get_thread_num(); const double cx0 = x[2*(pc + dc*f + d) ]; const double cy0 = x[2*(pc + dc*f + d) + 1]; const double cx1 = x[2*(pc + dc*(f+1) + d) ]; const double cy1 = x[2*(pc + dc*(f+1) + d) + 1]; const double dcx = cx1 - cx0; const double dcy = cy1 - cy0; //e_tot += eigenVals[d]*(dcx*dcx + dcy*dcy) / (sig_acc_px*sig_acc_px); ts->gradDestT[t][2*(pc + dc*f + d) ] -= 2.0 * eigenVals[d] * dcx / sa2; ts->gradDestT[t][2*(pc + dc*f + d)+1] -= 2.0 * eigenVals[d] * dcy / sa2; ts->gradDestT[t][2*(pc + dc*(f+1) + d) ] += 2.0 * eigenVals[d] * dcx / sa2; ts->gradDestT[t][2*(pc + dc*(f+1) + d)+1] += 2.0 * eigenVals[d] * dcy / sa2; } } for (int i = 0; i < gradDest.size(); i++) { gradDest[i] = 0.0; } for (int t = 0; t < threads; t++) for (int i = 0; i < gradDest.size(); i++) { gradDest[i] += ts->gradDestT[t][i]; } } void *GpMotionFit::allocateTempStorage() const { TempStorage* ts = new TempStorage; const int pad = 512; const int parCt = 2*(pc + dc*(fc-1)); ts->pad = pad; ts->pos = std::vector>(pc, std::vector(fc + pad)); ts->ccg_pf = std::vector>(pc, std::vector(fc + pad)); ts->gradDestT = std::vector>(threads, std::vector(parCt + pad, 0.0)); ts->e_t = std::vector(pad*threads, 0.0); return ts; } void GpMotionFit::deallocateTempStorage(void* ts) const { if (ts) delete (TempStorage*) ts; } void GpMotionFit::paramsToPos( const std::vector& x, std::vector>& pos) const { #pragma omp parallel for num_threads(threads) for (int p = 0; p < pc; p++) { d2Vector pp(x[2*p], x[2*p+1]); for (int f = 0; f < fc; f++) { pos[p][f] = pp; if (f < fc-1) { d2Vector vel(0.0, 0.0); for (int d = 0; d < dc; d++) { const double cx = x[2*(pc + dc*f + d) ]; const double cy = x[2*(pc + dc*f + d) + 1]; vel.x += cx * basis(p,d); vel.y += cy * basis(p,d); } pp += vel; } } } } void GpMotionFit::posToParams( const std::vector>& pos, std::vector& x) const { x.resize(2*(pc + dc*(fc-1))); for (int p = 0; p < pc; p++) { x[2*p] = pos[p][0].x; x[2*p+1] = pos[p][0].y; } for (int f = 0; f < fc-1; f++) for (int d = 0; d < dc; d++) { d2Vector c(0.0, 0.0); for (int p = 0; p < pc; p++) { d2Vector v = pos[p][f+1] - pos[p][f]; c.x += v.x * basis(p,d); c.y += v.y * basis(p,d); } x[2*(pc + dc*f + d) ] = c.x/eigenVals[d]; x[2*(pc + dc*f + d)+1] = c.y/eigenVals[d]; } } relion-3.1.3/src/jaz/motion/gp_motion_fit.h000066400000000000000000000055501411340063500206760ustar00rootroot00000000000000/*************************************************************************** * * Author: "Jasenko Zivanov" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #ifndef GP_MOTION_FIT #define GP_MOTION_FIT #include #include #include #include class GpMotionFit : public DifferentiableOptimization { public: GpMotionFit( const std::vector>>& correlation, double cc_pad, double sig_vel_px, double sig_div_px, double sig_acc_px, int maxDims, const std::vector& positions, const std::vector& perFrameOffsets, int threads, bool expKer); double f(const std::vector& x) const; double f(const std::vector& x, void* tempStorage) const; void grad(const std::vector& x, std::vector& gradDest) const; void grad(const std::vector& x, std::vector& gradDest, void* tempStorage) const; void* allocateTempStorage() const; void deallocateTempStorage(void* ts) const; void paramsToPos(const std::vector& x, std::vector>& pos) const; void posToParams(const std::vector>& pos, std::vector& x) const; class TempStorage { public: int pad; std::vector> pos, ccg_pf; std::vector> gradDestT; std::vector e_t; }; private: bool expKer; int pc, fc, dc, threads; double cc_pad, sig_vel_px, sig_div_px, sig_acc_px; Matrix2D basis; std::vector eigenVals; const std::vector>>& correlation; const std::vector& positions; const std::vector& perFrameOffsets; }; #endif relion-3.1.3/src/jaz/motion/motion_estimator.cpp000066400000000000000000000601501411340063500217650ustar00rootroot00000000000000/*************************************************************************** * * Author: "Jasenko Zivanov" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #include "motion_estimator.h" #include "motion_helper.h" #include "gp_motion_fit.h" #include "motion_refiner.h" #include #include #include #include #include #include #include #include #include #include using namespace gravis; MotionEstimator::MotionEstimator() : paramsRead(false), ready(false) { } void MotionEstimator::read(IOParser& parser, int argc, char *argv[]) { parser.addSection("Motion fit options (basic)"); dosePerFrame = textToDouble(parser.getOption("--fdose", "Electron dose per frame (in e^-/A^2)", "-1")); sig_vel = textToDouble(parser.getOption("--s_vel", "Velocity sigma [Angst/dose]", "0.5")); sig_div = textToDouble(parser.getOption("--s_div", "Divergence sigma [Angst]", "5000.0")); sig_acc = textToDouble(parser.getOption("--s_acc", "Acceleration sigma [Angst/dose]", "2.0")); paramsFn = parser.getOption("--params_file", "File containing s_vel, s_div and s_acc (overrides command line parameters)", ""); group = textToInteger(parser.getOption("--only_group", "Only align micrographs containing particles from this optics group (negative means off)", "-1")) - 1; all_groups = group < 0; diag = parser.checkOption("--diag", "Write out diagnostic data"); parser.addSection("Motion fit options (advanced)"); cc_pad = textToDouble(parser.getOption("--cc_pad", "Cross-correlation Fourier-padding", "1.0")); dmga = textToDouble(parser.getOption("--dmg_a", "Damage model, parameter a", " 3.40")); dmgb = textToDouble(parser.getOption("--dmg_b", " b", "-1.06")); dmgc = textToDouble(parser.getOption("--dmg_c", " c", "-0.54")); maxIters = textToInteger(parser.getOption("--max_iters", "Maximum number of iterations", "10000")); optEps = textToDouble(parser.getOption("--eps", "Terminate optimization after gradient length falls below this value", "1e-5")); no_whitening = parser.checkOption("--no_whiten", "Do not whiten the noise spectrum"); unregGlob = parser.checkOption("--unreg_glob", "Do not regularize global component of motion"); globOff = parser.checkOption("--glob_off", "Compute initial per-particle offsets"); globOffMax = textToInteger(parser.getOption("--glob_off_max", "Maximum per-particle offset range [Pixels]", "10")); params_scaled_by_dose = !parser.checkOption("--absolute_params", "Do not scale input motion parameters by dose"); debugOpt = parser.checkOption("--debug_opt", "Write optimization debugging info"); global_init = parser.checkOption("--gi", "Initialize with global trajectories instead of loading them from metadata file"); expKer = !parser.checkOption("--sq_exp_ker", "Use a square-exponential kernel instead of an exponential one"); maxEDs = textToInteger(parser.getOption("--max_ed", "Maximum number of eigendeformations", "-1")); cutoffOut = parser.checkOption("--out_cut", "Do not consider frequencies beyond the 0.143-FSC threshold for alignment"); paramsRead = true; } void MotionEstimator::init( int verb, int fc, int nr_omp_threads, bool debug, std::string outPath, ReferenceMap* reference, ObservationModel* obsModel, MicrographHandler* micrographHandler) { if (!paramsRead) { REPORT_ERROR("ERROR: MotionEstimator::init: MotionEstimator has not read its cmd-line parameters."); } this->verb = verb; obsModel->getBoxSizes(s, sh); this->fc = fc; this->nr_omp_threads = nr_omp_threads; this->debug = debug; this->outPath = outPath; this->reference = reference; this->obsModel = obsModel; this->micrographHandler = micrographHandler; angpix = obsModel->getPixelSizes(); s_ref = reference->s; sh_ref = s_ref/2 + 1; angpix_ref = reference->angpix; if (!global_init && micrographHandler->corrMicFn == "") { if (verb > 0) { std::cerr << " - Warning: in the absence of a corrected_micrographs.star file" << " (--corr_mic), global paths are used for initialization." << std::endl; } global_init = true; } if (verb > 0 && cutoffOut) { std::cout << " + maximum frequency to consider: " << (s_ref * angpix_ref)/(RFLOAT)reference->k_out << " A (" << reference->k_out << " ref. px)" << std::endl; } if (paramsFn != "") { if (verb > 0) { std::cout << " + using parameters from: " << paramsFn << std::endl; } std::ifstream ifs(paramsFn); if (ifs.fail()) { REPORT_ERROR("Unable to read " + paramsFn); } ifs >> sig_vel; ifs >> sig_div; ifs >> sig_acc; if (verb > 0) { std::cout << " s_vel: " << sig_vel << ", s_div: " << sig_div << ", s_acc: " << sig_acc << std::endl; } } if (debug) std::cout << "computing damage weights..." << std::endl; damageWeights.resize(obsModel->numberOfOpticsGroups()); for (int g = 0; g < obsModel->numberOfOpticsGroups(); g++) { damageWeights[g] = computeDamageWeights(g); // @TODO: make filter flank width (around FSC=.143) a parameter (and greater than 3 pixels) for (int f = 0; f < fc; f++) { damageWeights[g][f].data.xinit = 0; damageWeights[g][f].data.yinit = 0; if (cutoffOut) { double conv_fact = (s[g] * angpix[g]) / (s_ref * angpix_ref); damageWeights[g][f] = FilterHelper::raisedCosEnvFreq2D( damageWeights[g][f], conv_fact * (reference->k_out - 1), conv_fact * (reference->k_out + 1)); } } } ready = true; } void MotionEstimator::process(const std::vector& mdts, long g_start, long g_end) { if (!ready) { REPORT_ERROR("ERROR: MotionEstimator::process: MotionEstimator not initialized."); } int barstep = 1; int my_nr_micrographs = g_end - g_start + 1; if (verb > 0) { std::cout << " + Performing loop over micrographs ... " << std::endl; if (!debug) init_progress_bar(my_nr_micrographs); } std::vector fts(nr_omp_threads); std::vector> tables(nr_omp_threads), weights0(nr_omp_threads), weights1(nr_omp_threads); for (int i = 0; i < nr_omp_threads; i++) { FscHelper::initFscTable(sh_ref, fc, tables[i], weights0[i], weights1[i]); } int pctot = 0; long nr_done = 0; FileName prevdir = ""; for (long g = g_start; g <= g_end; g++) { // Abort through the pipeline_control system, TODO: check how this goes with MPI.... if (pipeline_control_check_abort_job()) exit(RELION_EXIT_ABORTED); const int pc = mdts[g].numberOfObjects(); if (pc == 0) continue; if (debug) { std::cout << g << "/" << g_end << " (" << pc << " particles)" << std::endl; } // optics group representative of this micrograph // (only the pixel and box sizes have to be identical) int ogmg = 0; if (!obsModel->allPixelAndBoxSizesIdentical(mdts[g])) { std::cerr << "WARNING: varying pixel or box sizes detected in " << MotionRefiner::getOutputFileNameRoot(outPath, mdts[g]) << " - skipping micrograph." << std::endl; continue; } if (!all_groups && !obsModel->containsGroup(mdts[g], group)) continue; // Make sure output directory exists FileName newdir = MotionRefiner::getOutputFileNameRoot(outPath, mdts[g]); newdir = newdir.beforeLastOf("/"); if (debug) { std::string mgName; mdts[g].getValue(EMDL_MICROGRAPH_NAME, mgName, 0); std::cout << " movie = " << mgName << std::endl; } if (newdir != prevdir) { std::string command = " mkdir -p " + newdir; int ret = system(command.c_str()); } std::vector>> movie; std::vector>> movieCC; std::vector positions(pc); std::vector> initialTracks(pc, std::vector(fc)); std::vector globComp(fc); /* The following try/catch block is important! - Do not remove! Even though we have either: - removed all movies with an insufficient number of frames or - determined the max. number available in all movies, this does not guarantee that the movies are actually: - available (we have only read the meta-stars) and - uncorrupted (the files could be damaged) Due to MPI, finding the bad micrograph after a job has crashed can be very time-consuming, since there is no obvious last file on which the estimation has succeeded. -- JZ, April 4th 2018 AD */ try { prepMicrograph( mdts[g], fts, damageWeights[ogmg], ogmg, movie, movieCC, positions, initialTracks, globComp); } catch (RelionError e) { std::string mgName; mdts[g].getValue(EMDL_MICROGRAPH_NAME, mgName, 0); std::cerr << " - Warning: unable to load raw movie frames for " << mgName << ". " << " Possible reasons include lack of the metadata STAR file, " << "the gain reference and/or the movie." << std::endl; continue; } pctot += pc; const double sig_vel_px = normalizeSigVel(sig_vel, angpix[ogmg]); const double sig_acc_px = normalizeSigAcc(sig_acc, angpix[ogmg]); const double sig_div_px = normalizeSigDiv(sig_div, angpix[ogmg]); std::vector> tracks; if (pc > 1) { tracks = optimize( movieCC, initialTracks, sig_vel_px, sig_acc_px, sig_div_px, positions, globComp); } else { tracks = initialTracks; } std::string fn_root = MotionRefiner::getOutputFileNameRoot(outPath, mdts[g]); bool hasNaNs = false; // find NaNs: for (int p = 0; p < pc; p++) for (int f = 0; f < fc; f++) { if (!(tracks[p][f].x == tracks[p][f].x) || !(tracks[p][f].y == tracks[p][f].y)) { tracks[p][f] = d2Vector(0.0, 0.0); hasNaNs = true; } } if (hasNaNs) { std::cerr << "NaNs detected in " << fn_root << "! Please inspect this movie." << std::endl; } updateFCC(movie, tracks, mdts[g], tables, weights0, weights1); writeOutput(tracks, angpix[ogmg], tables, weights0, weights1, positions, fn_root, 30.0); for (int i = 0; i < nr_omp_threads; i++) { tables[i].data.initZeros(); weights0[i].data.initZeros(); weights1[i].data.initZeros(); } nr_done++; if (!debug && verb > 0 && nr_done % barstep == 0) { progress_bar(nr_done); } } if (!debug && verb > 0) { progress_bar(my_nr_micrographs); } } void MotionEstimator::prepMicrograph( const MetaDataTable &mdt, std::vector& fts, const std::vector>& dmgWeight, int ogmg, std::vector>>& movie, std::vector>>& movieCC, std::vector& positions, std::vector>& initialTracks, std::vector& globComp) { const int pc = mdt.numberOfObjects(); std::vector> myInitialTracks; std::vector myGlobComp; for (int p = 0; p < pc; p++) { mdt.getValue(EMDL_IMAGE_COORD_X, positions[p].x, p); mdt.getValue(EMDL_IMAGE_COORD_Y, positions[p].y, p); } movie = micrographHandler->loadMovie( mdt, s[ogmg], angpix[ogmg], fts, positions, myInitialTracks, unregGlob, myGlobComp); // throws exceptions std::vector> preds = reference->predictAll( mdt, *obsModel, ReferenceMap::Own, nr_omp_threads); // std::cout << "motion estimator preds size = " << XSIZE(preds[0]()) << "x" << YSIZE(preds[0]()) << std::endl; if (!no_whitening) { std::vector sigma2 = StackHelper::powerSpectrum(movie); #pragma omp parallel for num_threads(nr_omp_threads) for (int p = 0; p < pc; p++) { MotionHelper::noiseNormalize(preds[p], sigma2, preds[p]); for (int f = 0; f < fc; f++) { MotionHelper::noiseNormalize(movie[p][f], sigma2, movie[p][f]); } } } movieCC = MotionHelper::movieCC(movie, preds, dmgWeight, cc_pad, nr_omp_threads); if (global_init || myInitialTracks.size() == 0) { std::vector> ccSum = MotionHelper::addCCs(movieCC); std::vector globTrack = MotionHelper::getGlobalTrack(ccSum, cc_pad); std::vector globOffsets; if (!globOff) { globOffsets = std::vector(pc, d2Vector(0,0)); } else { std::vector> initialTracks(pc, globTrack); globOffsets = MotionHelper::getGlobalOffsets(movieCC, initialTracks, cc_pad, 0.25 * s[ogmg], globOffMax, globOffMax, nr_omp_threads); } if (diag) { ImageLog::write(ccSum, MotionRefiner::getOutputFileNameRoot(outPath, mdt) + "_CCsum", CenterXY); } myInitialTracks.resize(pc); for (int p = 0; p < pc; p++) { myInitialTracks[p] = std::vector(fc); for (int f = 0; f < fc; f++) { if (unregGlob) { myInitialTracks[p][f] = globOffsets[p]; } else { myInitialTracks[p][f] = globTrack[f] + globOffsets[p]; } } } myGlobComp = unregGlob? globTrack : std::vector(fc, d2Vector(0,0)); } else if (globOff) { std::vector globOffsets; globOffsets = MotionHelper::getGlobalOffsets( movieCC, myInitialTracks, cc_pad, 0.25*s[ogmg], globOffMax, globOffMax, nr_omp_threads); for (int p = 0; p < pc; p++) { for (int f = 0; f < fc; f++) { myInitialTracks[p][f] += globOffsets[p]; } } } for (int p = 0; p < pc; p++) { for (int f = 0; f < fc; f++) { initialTracks[p][f] = myInitialTracks[p][f]; } } for (int f = 0; f < fc; f++) { globComp[f] = myGlobComp[f]; } } std::vector> MotionEstimator::optimize( const std::vector>>& movieCC, const std::vector>& inTracks, double sig_vel_px, double sig_acc_px, double sig_div_px, const std::vector& positions, const std::vector& globComp) const { if (maxIters == 0) return inTracks; const double eps = 1e-20; if (sig_vel_px < eps) { sig_vel_px = eps; } if (sig_div_px < eps) { sig_div_px = eps; } const int pc = inTracks.size(); if (pc == 0) return std::vector>(0); const int fc = inTracks[0].size(); GpMotionFit gpmf(movieCC, cc_pad, sig_vel_px, sig_div_px, sig_acc_px, maxEDs, positions, globComp, nr_omp_threads, expKer); std::vector initialCoeffs; gpmf.posToParams(inTracks, initialCoeffs); std::vector optCoeffs = LBFGS::optimize( initialCoeffs, gpmf, debugOpt, maxIters, optEps); std::vector> out(pc, std::vector(fc)); gpmf.paramsToPos(optCoeffs, out); for (int p = 0; p < pc; p++) for (int f = 0; f < fc; f++) { out[p][f] += globComp[f]; } return out; } std::vector> MotionEstimator::optimize( const std::vector>>& movieCC, const std::vector>& inTracks, double sig_vel_px, double sig_acc_px, double sig_div_px, const std::vector& positions, const std::vector& globComp) const { const int pc = movieCC.size(); const int fc = movieCC[0].size(); const int w = movieCC[0][0].data.xdim; const int h = movieCC[0][0].data.ydim; std::vector>> CCd(pc); #pragma omp parallel for num_threads(nr_omp_threads) for (int p = 0; p < pc; p++) { CCd[p].resize(fc); for (int f = 0; f < fc; f++) { CCd[p][f] = Image(w,h); for (int y = 0; y < h; y++) for (int x = 0; x < w; x++) { CCd[p][f](y,x) = movieCC[p][f](y,x); } } } return optimize(CCd, inTracks, sig_vel_px, sig_acc_px, sig_div_px, positions, globComp); } std::vector> MotionEstimator::computeDamageWeights(int opticsGroup) { return DamageHelper::damageWeights( s[opticsGroup], angpix[opticsGroup], micrographHandler->firstFrame, fc, dosePerFrame, dmga, dmgb, dmgc); } void MotionEstimator::updateFCC( const std::vector>>& movie, const std::vector>& tracks, const MetaDataTable& mdt, std::vector>& tables, std::vector>& weights0, std::vector>& weights1) { const int pc = mdt.numberOfObjects(); #pragma omp parallel for num_threads(nr_omp_threads) for (int p = 0; p < pc; p++) { int threadnum = omp_get_thread_num(); const int og = obsModel->getOpticsGroup(mdt, p); std::vector> obs = movie[p]; for (int f = 0; f < fc; f++) { shiftImageInFourierTransform(obs[f](), obs[f](), s[og], -tracks[p][f].x, -tracks[p][f].y); } Image pred = reference->predict( mdt, p, *obsModel, ReferenceMap::Opposite); const double scale = (s_ref * angpix_ref)/(s[og] * angpix[og]); FscHelper::updateFscTable( obs, pred, scale, tables[threadnum], weights0[threadnum], weights1[threadnum]); } } void MotionEstimator::writeOutput( const std::vector>& tracks, double angpix_mg, const std::vector>& fccData, const std::vector>& fccWeight0, const std::vector>& fccWeight1, const std::vector& positions, std::string fn_root, double visScale) { const int pc = tracks.size(); if (pc == 0) return; const int fc = tracks[0].size(); MotionHelper::writeTracks(tracks, fn_root + "_tracks.star", angpix_mg); Image fccDataSum(sh_ref,fc), fccWeight0Sum(sh_ref,fc), fccWeight1Sum(sh_ref,fc); fccDataSum.data.initZeros(); fccWeight0Sum.data.initZeros(); fccWeight1Sum.data.initZeros(); for (int i = 0; i < fccData.size(); i++) { for (int y = 0; y < fc; y++) for (int x = 0; x < sh_ref; x++) { fccDataSum(y,x) += fccData[i](y,x); fccWeight0Sum(y,x) += fccWeight0[i](y,x); fccWeight1Sum(y,x) += fccWeight1[i](y,x); } } fccDataSum.write(fn_root + "_FCC_cc.mrc"); fccWeight0Sum.write(fn_root + "_FCC_w0.mrc"); fccWeight1Sum.write(fn_root + "_FCC_w1.mrc"); // plot EPS graph with all observed and fitted tracks std::vector> visTracks(pc); for (int p = 0; p < pc; p++) { visTracks[p] = std::vector(fc); } std::vector globalTrack(fc); for (int f = 0; f < fc; f++) { globalTrack[f] = d2Vector(0,0); for (int p = 0; p < pc; p++) { globalTrack[f] += tracks[p][f]; } globalTrack[f] /= pc; for (int p = 0; p < pc; p++) { visTracks[p][f] = positions[p] + visScale * tracks[p][f]; } } // Make a postscript with the tracks FileName fn_eps = fn_root + "_tracks.eps"; CPlot2D *plot2D=new CPlot2D(fn_eps); plot2D->SetXAxisSize(600); plot2D->SetYAxisSize(600); plot2D->SetDrawLegend(false); plot2D->SetFlipY(true); // Global track in the middle CDataSet dataSet; dataSet.SetDrawMarker(false); dataSet.SetDatasetColor(0.0,0.0,1.0); dataSet.SetLineWidth(1.); const RFLOAT xcenterMg = micrographHandler->micrograph_size.x / 2.0; const RFLOAT ycenterMg = micrographHandler->micrograph_size.y / 2.0; const RFLOAT xcenterCoord = micrographHandler->movie_angpix * xcenterMg / micrographHandler->coords_angpix; const RFLOAT ycenterCoord = micrographHandler->movie_angpix * ycenterMg / micrographHandler->coords_angpix; for (int f = 0; f < fc; f++) { CDataPoint point(xcenterCoord + visScale * globalTrack[f].x, ycenterCoord + visScale * globalTrack[f].y); dataSet.AddDataPoint(point); } plot2D->AddDataSet(dataSet); // Mark starting point global track CDataSet dataSetStart; dataSetStart.SetDrawMarker(true); dataSetStart.SetMarkerSize(2); dataSetStart.SetDatasetColor(1.0,0.0,0.0); CDataPoint point2( xcenterCoord + visScale * globalTrack[0].x, ycenterCoord + visScale * globalTrack[0].y); dataSetStart.AddDataPoint(point2); plot2D->AddDataSet(dataSetStart); // Now loop over all particles for local tracks for (int p = 0; p < pc; p++) { // Mark start of each track CDataSet patch_start; patch_start.SetDrawMarker(true); patch_start.SetMarkerSize(8); patch_start.SetDatasetColor(0.2,0.5,1.0); CDataPoint point3(visTracks[p][0].x, visTracks[p][0].y); patch_start.AddDataPoint(point3); plot2D->AddDataSet(patch_start); } // Now loop over all particles for local tracks for (int p = 0; p < pc; p++) { CDataSet fit; fit.SetDrawMarker(false); fit.SetDatasetColor(0.0,0.0,0.0); fit.SetLineWidth(0.5); for (int f = 0; f < fc; f++) { CDataPoint point(visTracks[p][f].x, visTracks[p][f].y); fit.AddDataPoint(point); } plot2D->AddDataSet(fit); } char title[256]; snprintf(title, 255, "X (in pixels; trajectory scaled by %.0f)", visScale); plot2D->SetXAxisTitle(title); title[0] = 'Y'; plot2D->SetYAxisTitle(title); plot2D->OutputPostScriptPlot(fn_eps); delete plot2D; // Compatibility with Jasenko's diagnostic .dat files // TONOTDO: remove this // Don't! It's the only way to plot tracks on top of each other. // We'll probably need this in the future. // (e.g. each time there is something wrong with the polynomial tracks) if (!diag) return; std::ofstream rawOut(fn_root + "_tracks.dat"); std::ofstream visOut(fn_root + "_visTracks.dat"); std::ofstream visOut15(fn_root + "_visTracks_first15.dat"); for (int p = 0; p < pc; p++) { rawOut << "#particle " << p << std::endl; visOut << "#particle " << p << std::endl; visOut15 << "#particle " << p << std::endl; for (int f = 0; f < fc; f++) { rawOut << tracks[p][f].x << " " << tracks[p][f].y << std::endl; visOut << visTracks[p][f].x << " " << visTracks[p][f].y << std::endl; if (f < 15) visOut15 << visTracks[p][f].x << " " << visTracks[p][f].y << std::endl; } rawOut << std::endl; visOut << std::endl; visOut15 << std::endl; } std::ofstream glbOut(fn_root + "_globTrack.dat"); for (int f = 0; f < fc; f++) { glbOut << globalTrack[f].x << " " << globalTrack[f].y << std::endl; } } bool MotionEstimator::isReady() { return ready; } double MotionEstimator::getDosePerFrame() { return dosePerFrame; } void MotionEstimator::proposeDosePerFrame(double dpf, std::string metaFn, int verb) { if (dosePerFrame < 0) { if (metaFn == "") { REPORT_ERROR_STR("ERROR: No electron dose available. Please provide one " << "through the command line (--fdose)."); } else { dosePerFrame = dpf; if (verb > 0) { std::cout << " + Using dose per frame from " << metaFn << ": " << dosePerFrame << " e/A^2" << std::endl; } } } else { if (verb > 0) { std::cout << " + Using dose per frame from cmd. line: " << dosePerFrame << " e/A^2" << std::endl; } } } double MotionEstimator::getCCPad() { return cc_pad; } std::vector MotionEstimator::findUnfinishedJobs( const std::vector &mdts, std::string path) { std::vector out(0); const int gc = mdts.size(); for (int g = 0; g < gc; g++) { std::string fn_root = MotionRefiner::getOutputFileNameRoot(path, mdts[g]); if (!isJobFinished(fn_root)) { out.push_back(mdts[g]); } } return out; } double MotionEstimator::normalizeSigVel(double sig_vel, double angpix) { return params_scaled_by_dose? dosePerFrame * sig_vel / angpix : sig_vel / angpix; } double MotionEstimator::normalizeSigDiv(double sig_div, double angpix) { return sig_div / micrographHandler->coords_angpix; } double MotionEstimator::normalizeSigAcc(double sig_acc, double angpix) { return params_scaled_by_dose? dosePerFrame * sig_acc / angpix : sig_acc / angpix; } bool MotionEstimator::isJobFinished(std::string filenameRoot) { return exists(filenameRoot+"_tracks.star") && exists(filenameRoot+"_FCC_cc.mrc") && exists(filenameRoot+"_FCC_w0.mrc") && exists(filenameRoot+"_FCC_w1.mrc"); } relion-3.1.3/src/jaz/motion/motion_estimator.h000066400000000000000000000131531411340063500214330ustar00rootroot00000000000000/*************************************************************************** * * Author: "Jasenko Zivanov" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #ifndef MOTION_ESTIMATOR_H #define MOTION_ESTIMATOR_H #include #include #include class IOParser; class ParFourierTransformer; class ReferenceMap; class ObservationModel; class MicrographHandler; class MotionEstimator { public: MotionEstimator(); void read(IOParser& parser, int argc, char *argv[]); void init(int verb, int fc, int nr_omp_threads, bool debug, std::string outPath, ReferenceMap* reference, ObservationModel* obsModel, MicrographHandler* micrographHandler); void process(const std::vector &mdts, long g_start, long g_end); // load micrograph from mdt and compute all data required for the optimization; // positions, initialTracks and globComp need to have the right sizes already (pc, pc*fc, fc) // (also used by MotionParamEstimator) void prepMicrograph( // in: const MetaDataTable& mdt, std::vector& fts, const std::vector>& dmgWeight, int ogmg, // out: std::vector>>& movie, std::vector>>& movieCC, std::vector& positions, std::vector>& initialTracks, std::vector& globComp); // perform the actual optimization (also used by MotionParamEstimator) std::vector> optimize( const std::vector>>& movieCC, const std::vector>& inTracks, double sig_vel_px, double sig_acc_px, double sig_div_px, const std::vector& positions, const std::vector& globComp) const; // syntactic sugar for float-valued CCs std::vector> optimize( const std::vector>>& movieCC, const std::vector>& inTracks, double sig_vel_px, double sig_acc_px, double sig_div_px, const std::vector& positions, const std::vector& globComp) const; std::vector> computeDamageWeights(int opticsGroup); bool isReady(); double getDosePerFrame(); void proposeDosePerFrame(double dpf, std::string metaFn, int verb); double getCCPad(); static std::vector findUnfinishedJobs( const std::vector& mdts, std::string path); // translates the given parameters (in A or A/dose) into pixels // done in one place to ensure consistency double normalizeSigVel(double sig_vel, double angpix); double normalizeSigDiv(double sig_div, double angpix); double normalizeSigAcc(double sig_acc, double angpix); protected: bool paramsRead, ready; // read from cmd line int maxEDs, maxIters, globOffMax, group; bool unregGlob, globOff, cutoffOut, diag, expKer, global_init, debugOpt, params_scaled_by_dose; double dmga, dmgb, dmgc, dosePerFrame, sig_vel, sig_div, sig_acc, optEps, cc_pad; std::string paramsFn; // @TODO: allow for varying fc (frame count) // set at init int fc, verb, nr_omp_threads, s_ref, sh_ref; std::vector s, sh; double angpix_ref; std::vector angpix; bool debug, no_whitening, all_groups; std::vector>> damageWeights; std::string outPath; ReferenceMap* reference; ObservationModel* obsModel; MicrographHandler* micrographHandler; void updateFCC( const std::vector>>& movie, const std::vector>& tracks, const MetaDataTable& mdt, std::vector>& tables, std::vector>& weights0, std::vector>& weights1); void writeOutput( const std::vector>& tracks, double angpix_mg, const std::vector>& fccData, const std::vector>& fccWeight0, const std::vector>& fccWeight1, const std::vector& positions, std::string fn_root, double visScale); static bool isJobFinished(std::string filenameRoot); }; #endif relion-3.1.3/src/jaz/motion/motion_helper.cpp000066400000000000000000000272761411340063500212510ustar00rootroot00000000000000/*************************************************************************** * * Author: "Jasenko Zivanov" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #include "motion_helper.h" #include #include #include #include #include #include #include #include #include #include using namespace gravis; std::vector>> MotionHelper::movieCC( const std::vector>>& movie, const std::vector>& preds, const std::vector > &damageWeights, double pad, int threads) { const int pc = movie.size(); const int fc = movie[0].size(); const int s = movie[0][0]().ydim; const int sh = s/2 + 1; std::vector>> out(pc); std::vector> ccsFs(threads); std::vector> ccsRs(threads); const int s2 = (int)(pad * s); const int sh2 = s2/2 + 1; for (int t = 0; t < threads; t++) { ccsFs[t] = Image(sh2,s2); ccsFs[t].data.xinit = 0; ccsFs[t].data.yinit = 0; ccsRs[t] = Image(s2,s2); ccsRs[t].data.xinit = 0; ccsRs[t].data.yinit = 0; } NewFFT::DoublePlan plan(s2,s2,1); for (int p = 0; p < pc; p++) { out[p] = std::vector>(fc, Image(s2,s2)); #pragma omp parallel for num_threads(threads) for (int f = 0; f < fc; f++) { int t = omp_get_thread_num(); for (int y = 0; y < s; y++) for (int x = 0; x < sh; x++) { Complex z = movie[p][f](y,x) * damageWeights[f](y,x) * preds[p](y,x).conj(); const int yy = y < sh? y : s2 - (s - y); ccsFs[t](yy,x) = dComplex(z.real, z.imag); } NewFFT::inverseFourierTransform(ccsFs[t](), ccsRs[t](), plan); for (int y = 0; y < s2; y++) for (int x = 0; x < s2; x++) { out[p][f](y,x) = s * s * ccsRs[t](y,x); } } } return out; } /*std::vector>> MotionHelper::movieCC( Projector& projector0, Projector& projector1, const ObservationModel &obsModel, const MetaDataTable &viewParams, const std::vector > > &movie, const std::vector &sigma2, const std::vector > &damageWeights, std::vector& fts, int threads) { const int pc = movie.size(); const int fc = movie[0].size(); const int s = movie[0][0]().ydim; const int sh = s/2 + 1; std::vector>> out(pc); std::vector> ccsFs(threads); std::vector> ccsRs(threads); for (int t = 0; t < threads; t++) { ccsFs[t] = Image(sh,s); ccsFs[t].data.xinit = 0; ccsFs[t].data.yinit = 0; ccsRs[t] = Image(s,s); ccsRs[t].data.xinit = 0; ccsRs[t].data.yinit = 0; } Image pred; for (int p = 0; p < pc; p++) { out[p] = std::vector>(fc); for (int f = 0; f < fc; f++) { out[p][f] = Image(s,s); } int randSubset; viewParams.getValue(EMDL_PARTICLE_RANDOM_SUBSET, randSubset, p); randSubset -= 1; if (randSubset == 0) { pred = obsModel.predictObservation(projector0, viewParams, p, true, true); } else { pred = obsModel.predictObservation(projector1, viewParams, p, true, true); } noiseNormalize(pred, sigma2, pred); #pragma omp parallel for num_threads(threads) for (int f = 0; f < fc; f++) { int t = omp_get_thread_num(); for (int y = 0; y < s; y++) for (int x = 0; x < sh; x++) { ccsFs[t](y,x) = movie[p][f](y,x) * damageWeights[f](y,x) * pred(y,x).conj(); } fts[t].inverseFourierTransform(ccsFs[t](), ccsRs[t]()); for (int y = 0; y < s; y++) for (int x = 0; x < s; x++) { out[p][f](y,x) = s * s * ccsRs[t](y,x); } } } return out; }*/ /*std::vector MotionHelper::getGlobalTrack( const std::vector>>& movieCC) { const int pc = movieCC.size(); const int fc = movieCC[0].size(); const int s = movieCC[0][0]().xdim; const int sh = s/2 + 1; std::vector out(fc); const double eps = 1e-30; std::vector> e_sum(fc); for (int f = 0; f < fc; f++) { e_sum[f] = Image(s, s); e_sum[f].data.initZeros(); for (int p = 0; p < pc; p++) { for (int y = 0; y < s; y++) for (int x = 0; x < s; x++) { e_sum[f](y,x) += movieCC[p][f](y,x); } } d2Vector pos = Interpolation::quadraticMaxWrapXY(e_sum[f], eps); if (pos.x >= sh) pos.x -= s; if (pos.y >= sh) pos.y -= s; out[f] = pos; } return out; }*/ std::vector > MotionHelper::addCCs( const std::vector>> &movieCC) { const int pc = movieCC.size(); const int fc = movieCC[0].size(); const int s = movieCC[0][0]().xdim; std::vector> e_sum(fc); for (int f = 0; f < fc; f++) { e_sum[f] = Image(s, s); e_sum[f].data.initZeros(); for (int p = 0; p < pc; p++) { for (int y = 0; y < s; y++) for (int x = 0; x < s; x++) { e_sum[f](y,x) += movieCC[p][f](y,x); } } } return e_sum; } std::vector MotionHelper::getGlobalTrack( const std::vector> &movieCcSum, double cc_pad) { const int fc = movieCcSum.size(); const int s = movieCcSum[0]().xdim; const int sh = s/2 + 1; std::vector out(fc); const double eps = 1e-30; for (int f = 0; f < fc; f++) { d2Vector pos = Interpolation::quadraticMaxWrapXY(movieCcSum[f], eps); if (pos.x >= sh) pos.x -= s; if (pos.y >= sh) pos.y -= s; out[f] = pos/cc_pad; } return out; } std::vector MotionHelper::getGlobalOffsets( const std::vector>>& movieCC, const std::vector>& initialTracks, double cc_pad, double sigma, int wMax, int hMax, int threads) { const int pc = movieCC.size(); const int fc = movieCC[0].size(); const int s = movieCC[0][0]().xdim; const int sh = s/2 + 1; const double eps = 1e-30; std::vector out(pc); Image weight(s,s); for (int y = 0; y < s; y++) for (int x = 0; x < s; x++) { double xx = x >= sh? x - s: x; double yy = y >= sh? y - s: y; weight(y,x) = exp(-0.5*(xx*xx + yy*yy)/(sigma*sigma)); } #pragma omp parallel for num_threads(threads) for (int p = 0; p < pc; p++) { Image pSum(s,s); pSum.data.initZeros(); for (int f = 0; f < fc; f++) { const d2Vector g = initialTracks[p][f]; for (int y = 0; y < s; y++) for (int x = 0; x < s; x++) { pSum(y,x) += Interpolation::cubicXY(movieCC[p][f], x + g.x, y + g.y, 0, 0, true); } } for (int y = 0; y < s; y++) for (int x = 0; x < s; x++) { pSum(y,x) *= weight(y,x); } d2Vector out_p = Interpolation::quadraticMaxWrapXY(pSum, eps, wMax, hMax); if (out_p.x >= sh) out_p.x -= s; if (out_p.y >= sh) out_p.y -= s; out[p] = out_p/cc_pad; } return out; } void MotionHelper::noiseNormalize( const Image &img, const std::vector &sigma2, Image& dest) { int wf = img().xdim; int w = 2*wf - 1; int h = img().ydim; const double area = 0.25*PI*w*h; if (dest.data.xdim != img.data.xdim || dest.data.ydim != img.data.ydim) { dest.data.reshape(img.data); } dest.data.xinit = 0; dest.data.yinit = 0; for (int y = 0; y < h; y++) for (int x = 0; x < wf; x++) { if (x == 0 && y == 0) { dest(y,x) = Complex(0.0); continue; } const double yy = y < wf? y : y - h; const double xx = x; const int r = (int) sqrt(xx*xx + yy*yy); if (r >= wf || sigma2[r] == 0.0) { dest(y,x) = Complex(0.0); } else { dest(y,x) = DIRECT_A2D_ELEM(img.data, y, x) / sqrt(sigma2[r]*area); } } } void MotionHelper::writeTracks( const std::vector>& tracksInPix, std::string fn, double angpix) { const int pc = tracksInPix.size(); const int fc = tracksInPix[0].size(); std::string path = fn.substr(0, fn.find_last_of('/')); mktree(path); std::ofstream ofs(fn); MetaDataTable mdt; mdt.setName("general"); mdt.setIsList(true); mdt.addObject(); mdt.setValue(EMDL_PARTICLE_NUMBER, pc); mdt.write(ofs); mdt.clear(); for (int p = 0; p < pc; p++) { std::stringstream sts; sts << p; mdt.setName(sts.str()); for (int f = 0; f < fc; f++) { mdt.addObject(); mdt.setValue(EMDL_ORIENT_ORIGIN_X_ANGSTROM, angpix * tracksInPix[p][f].x); mdt.setValue(EMDL_ORIENT_ORIGIN_Y_ANGSTROM, angpix * tracksInPix[p][f].y); } mdt.write(ofs); mdt.clear(); } } std::vector> MotionHelper::readTracksInPix(std::string fn, double angpix) { std::ifstream ifs(fn); if (ifs.fail()) { REPORT_ERROR("MotionHelper::readTracks: unable to read " + fn + "."); } MetaDataTable mdt; mdt.readStar(ifs, "general"); int pc; if (!mdt.getValue(EMDL_PARTICLE_NUMBER, pc)) { REPORT_ERROR("MotionHelper::readTracks: missing particle number in "+fn+"."); } std::vector> out(pc); int fc = 0, lastFc = 0; for (int p = 0; p < pc; p++) { std::stringstream sts; sts << p; mdt.readStar(ifs, sts.str()); fc = mdt.numberOfObjects(); if (p > 0 && fc != lastFc) { REPORT_ERROR("MotionHelper::readTracks: broken file: "+fn+"."); } lastFc = fc; out[p] = std::vector(fc); for (int f = 0; f < fc; f++) { mdt.getValue(EMDL_ORIENT_ORIGIN_X_ANGSTROM, out[p][f].x, f); mdt.getValue(EMDL_ORIENT_ORIGIN_Y_ANGSTROM, out[p][f].y, f); out[p][f] /= angpix; } } return out; } relion-3.1.3/src/jaz/motion/motion_helper.h000066400000000000000000000066151411340063500207100ustar00rootroot00000000000000/*************************************************************************** * * Author: "Jasenko Zivanov" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #ifndef MOTION_HELPER_H #define MOTION_HELPER_H #include #include #include #include #include #include #include #include #include #include #include #include class MotionHelper { public: static std::vector>> movieCC( const std::vector>>& movie, const std::vector>& preds, const std::vector>& damageWeights, double pad, int threads); // deprecated: use the one above! /*static std::vector>> movieCC( Projector& projector0, Projector& projector1, const ObservationModel& obsModel, const MetaDataTable& viewParams, const std::vector>>& movie, const std::vector& sigma2, const std::vector>& damageWeights, std::vector& fts, int threads);*/ /*static std::vector getGlobalTrack( const std::vector>>& movieCC, double cc_pad);*/ static std::vector> addCCs( const std::vector>>& movieCC); static std::vector getGlobalTrack( const std::vector>& movieCcSum, double cc_pad); static std::vector getGlobalOffsets( const std::vector>>& movieCC, const std::vector>& initialTracks, double cc_pad, double sigma, int wMax, int hMax, int threads); static void noiseNormalize( const Image& img, const std::vector &sigma2, Image& dest); static void writeTracks( const std::vector>& tracksInPix, std::string fn, double angpix); // both stack number and frame number are 0-indexed in the array and STAR file static std::vector> readTracksInPix( std::string fn, double angpix); }; #endif relion-3.1.3/src/jaz/motion/motion_param_estimator.cpp000066400000000000000000000377131411340063500231560ustar00rootroot00000000000000/*************************************************************************** * * Author: "Jasenko Zivanov" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #include "motion_param_estimator.h" #include "motion_refiner.h" #include "two_hyperparameter_fit.h" #include "three_hyperparameter_fit.h" #include #include #include #include using namespace gravis; const double MotionParamEstimator::velScale = 10000.0; const double MotionParamEstimator::divScale = 1.0; const double MotionParamEstimator::accScale = 1000.0; MotionParamEstimator::MotionParamEstimator() : paramsRead(false), ready(false) { } void MotionParamEstimator::read(IOParser& parser, int argc, char *argv[]) { parser.addSection("Parameter estimation"); estim2 = parser.checkOption("--params2", "Estimate 2 parameters instead of motion"); estim3 = parser.checkOption("--params3", "Estimate 3 parameters instead of motion"); align_frac = textToDouble(parser.getOption("--align_frac", "Fraction of pixels to be used for alignment", "0.5")); eval_frac = textToDouble(parser.getOption("--eval_frac", "Fraction of pixels to be used for evaluation", "0.5")); minParticles = textToInteger(parser.getOption("--min_p", "Minimum number of particles on which to estimate the parameters", "1000")); group = textToInteger(parser.getOption("--par_group", "Estimate parameters for this optics group only (negative means all)", "-1")) - 1; sV = textToDouble(parser.getOption("--s_vel_0", "Initial s_vel", "0.6")); sD = textToDouble(parser.getOption("--s_div_0", "Initial s_div", "10000")); sA = textToDouble(parser.getOption("--s_acc_0", "Initial s_acc", "3")); iniStep = textToDouble(parser.getOption("--in_step", "Initial step size in s_div", "3000")); conv = textToDouble(parser.getOption("--conv", "Abort when simplex diameter falls below this", "30")); maxIters = textToInteger(parser.getOption("--par_iters", "Max. number of iterations", "100")); maxRange = textToInteger(parser.getOption("--mot_range", "Limit allowed motion range [Px]", "50")); seed = textToInteger(parser.getOption("--seed", "Random seed for micrograph selection", "23")); paramsRead = true; } void MotionParamEstimator::init( int verb, int nr_omp_threads, bool debug, std::string outPath, int fc, const std::vector& allMdts, MotionEstimator* motionEstimator, ReferenceMap* reference, ObservationModel* obsModel) { if (!paramsRead) { REPORT_ERROR("ERROR: MotionParamEstimator::init: MotionParamEstimator has not read its cmd-line parameters."); } this->verb = verb; this->nr_omp_threads = nr_omp_threads; this->debug = debug; this->outPath = outPath; this->fc = fc; this->motionEstimator = motionEstimator; this->obsModel = obsModel; this->reference = reference; this->s_ref = reference->s; std::vector allS, allSh; obsModel->getBoxSizes(allS, allSh); if (group < 0) { if (!obsModel->allPixelSizesIdentical() || !obsModel->allBoxSizesIdentical()) { REPORT_ERROR_STR("MotionParamEstimator::init: unable to estimate motion parameters for all \n" << "optics groups simultaneously due to varying pixel and box sizes.\n" << "Please estimate them separately for each optics group (--par_group)."); } s = allS[0]; sh = allSh[0]; group = 0; allGroups = true; if (verb > 0) { std::cout << " + estimating motion parameters for all optics groups simultaneously ..." << std::endl; } } else { s = allS[group]; sh = allSh[group]; allGroups = false; if (verb > 0) { std::cout << " + estimating motion parameters for optics group " << obsModel->getGroupName(group) << " ..." << std::endl; } } if (!motionEstimator->isReady()) { REPORT_ERROR("ERROR: MotionParamEstimator initialized before MotionEstimator."); } if (eval_frac + align_frac > 1.000001) { REPORT_ERROR_STR("ERROR: Alignment and evaluation sets are intersecting. " << "Please make sure that --align_frac and --eval_frac do not add up to more than 1."); } if (estim2 && estim3) { REPORT_ERROR("ERROR: Only 2 or 3 parameters can be estimated (--params2 or --params3), not both."); } k_out = reference->k_out; k_cutoff = (int)(k_out * sqrt(align_frac) + 0.5); k_eval = (int)(k_out * sqrt(1.0 - eval_frac) + 0.5); if (verb > 0) { double k_cutoff_Angst = reference->angToPix(k_cutoff); double k_eval_Angst = reference->angToPix(k_eval); std::cout << " + maximum frequency to consider for alignment: " << k_cutoff_Angst << " A (" << k_cutoff << " ref. px)" << std::endl; std::cout << " + frequency range to consider for evaluation: " << k_eval_Angst << " - " << reference->pixToAng(reference->k_out) << " A (" << k_eval << " - " << reference->k_out << " ref. px)" << std::endl; } const long mc = allMdts.size(); srand(seed); std::vector randNums(mc); for (int m = 0; m < mc; m++) { randNums[m] = rand() / (double)RAND_MAX; } std::vector order = IndexSort::sortIndices(randNums); int pc = 0; mdts.clear(); if (verb > 0) { std::cout << " + micrographs randomly selected for parameter optimization:" << std::endl; } for (int i = 0; i < order.size(); i++) { const int m = order[i]; const int pcm = allMdts[m].numberOfObjects(); // motion estimation does not work on one single particle if (pcm < 2) continue; if (allGroups) { mdts.push_back(allMdts[m]); pc += pcm; } else { MetaDataTable rightGroup; for (int p = 0; p < pcm; p++) { if (obsModel->getOpticsGroup(allMdts[m], p) == group) { rightGroup.addObject(allMdts[m].getObject(p)); } } mdts.push_back(rightGroup); pc += rightGroup.numberOfObjects(); } if (verb > 0) { std::string mn; allMdts[m].getValue(EMDL_MICROGRAPH_NAME, mn, 0); std::cout << " " << m << ": " << mn << std::endl; } if (pc >= minParticles) { if (verb > 0) { std::cout << "\n + " << pc << " particles found in " << mdts.size() << " micrographs\n"; } break; } } if (verb > 0 && pc < minParticles) { std::cout << "\n - Warning: this dataset does not contain " << minParticles << " particles (--min_p) in micrographs with at least 2 particles\n"; } ready = true; } void MotionParamEstimator::run() { #ifdef TIMING timeSetup = paramTimer.setNew(" time_Setup "); timeOpt = paramTimer.setNew(" time_Opt "); timeEval = paramTimer.setNew(" time_Eval "); #endif if (!ready) { REPORT_ERROR("ERROR: MotionParamEstimator::run: MotionParamEstimator not initialized."); } if (!estim2 && !estim3) return; RCTIC(paramTimer, timeSetup); prepAlignment(); RCTOC(paramTimer, timeSetup); d4Vector opt; std::cout.setf(std::ios::fixed, std::ios::floatfield); std::cout.precision(5); if (estim2) { opt = estimateTwoParamsNM(sV, sD, sA, iniStep, conv, maxIters); } if (estim3) { opt = estimateThreeParamsNM(sV, sD, sA, iniStep, conv, maxIters); } std::cout.setf(std::ios::floatfield); d3Vector nrm( opt[0] * velScale, opt[1] * divScale, opt[2] * accScale); // round result to conv / 2 (the min. radius of the optimization simplex) d3Vector rnd( conv * 0.5 * ((int)(2.0*nrm[0]/conv + 0.5)) / velScale, conv * 0.5 * ((int)(2.0*nrm[1]/conv + 0.5)) / divScale, conv * 0.5 * ((int)(2.0*nrm[2]/conv + 0.5)) / accScale); if (estim2) { rnd[2] = sA; } if (opt[2] <= 0.0) { rnd[2] = -1; } std::cout << "\ngood parameters:" << " --s_vel " << rnd[0] << " --s_div " << rnd[1] << " --s_acc " << rnd[2] << "\n\n"; FileName newdir = FileName(outPath).beforeLastOf("/"); std::string command = " mkdir -p " + newdir; int ret = system(command.c_str()); std::string paramFn; if (allGroups) { paramFn = "opt_params_all_groups.txt"; } else { paramFn = "opt_params_group_" + obsModel->getGroupName(group) + ".txt"; } std::ofstream ofs(outPath+paramFn); ofs << rnd[0] << " "; ofs << rnd[1] << " "; ofs << rnd[2] << std::endl; ofs.close(); std::cout << "written to " << (outPath+paramFn) << std::endl; #ifdef TIMING paramTimer.printTimes(true); #endif } bool MotionParamEstimator::anythingToDo() { return estim2 || estim3; } d4Vector MotionParamEstimator::estimateTwoParamsNM( double sig_v_0, double sig_d_0, double sig_acc, double inStep, double conv, int maxIters) { std::cout << "\nit: \t s_vel: \t s_div: \t s_acc: \t fsc:\n\n"; TwoHyperParameterProblem thpp(*this, sig_acc); std::vector initial = TwoHyperParameterProblem::motionToProblem( d2Vector(sig_v_0, sig_d_0)); double minTsc; std::vector final = NelderMead::optimize( initial, thpp, inStep, conv, maxIters, 1.0, 2.0, 0.5, 0.5, false, &minTsc); d2Vector vd = TwoHyperParameterProblem::problemToMotion(final); return d4Vector(vd[0], vd[1], sig_acc, -minTsc); } d4Vector MotionParamEstimator::estimateThreeParamsNM( double sig_v_0, double sig_d_0, double sig_a_0, double inStep, double conv, int maxIters) { std::cout << "\nit: \t s_vel: \t s_div: \t s_acc: \t fsc:\n\n"; ThreeHyperParameterProblem thpp(*this); std::vector initial = ThreeHyperParameterProblem::motionToProblem( d3Vector(sig_v_0, sig_d_0, sig_a_0)); double minTsc; std::vector final = NelderMead::optimize( initial, thpp, inStep, conv, maxIters, 1.0, 2.0, 0.5, 0.5, false, &minTsc); d3Vector vd = ThreeHyperParameterProblem::problemToMotion(final); return d4Vector(vd[0], vd[1], vd[2], -minTsc); } void MotionParamEstimator::evaluateParams( const std::vector& sig_vals, std::vector& TSCs) { const int paramCount = sig_vals.size(); TSCs.resize(paramCount); std::vector sig_v_vals_px(paramCount); std::vector sig_d_vals_px(paramCount); std::vector sig_a_vals_px(paramCount); for (int i = 0; i < paramCount; i++) { sig_v_vals_px[i] = motionEstimator->normalizeSigVel(sig_vals[i][0], reference->angpix); sig_d_vals_px[i] = motionEstimator->normalizeSigDiv(sig_vals[i][1], reference->angpix); sig_a_vals_px[i] = motionEstimator->normalizeSigAcc(sig_vals[i][2], reference->angpix); } int pctot = 0; std::vector tscsAs(paramCount, d3Vector(0.0, 0.0, 0.0)); const int gc = mdts.size(); for (long g = 0; g < gc; g++) { const int pc = mdts[g].numberOfObjects(); if (pc < 2) continue; // not really needed, mdts are pre-screened pctot += pc; if (debug) { std::cout << " micrograph " << (g+1) << " / " << mdts.size() << ": " << pc << " particles [" << pctot << " total]" << std::endl; } for (int i = 0; i < paramCount; i++) { if (debug) { std::cout << " evaluating: " << sig_vals[i] << std::endl; } RCTIC(paramTimer,timeOpt); std::vector> tracks = motionEstimator->optimize( alignmentSet.CCs[g], alignmentSet.initialTracks[g], sig_v_vals_px[i], sig_a_vals_px[i], sig_d_vals_px[i], alignmentSet.positions[g], alignmentSet.globComp[g]); if (debug) { std::stringstream sts; sts << "debug-track_" << sig_vals[i][0] << "_" << sig_vals[i][1] << "_" << sig_vals[i][2] << ".dat"; std::ofstream debugStr(sts.str()); for (int p = 0; p < pc; p++) { for (int f = 0; f < fc; f++) { debugStr << tracks[p][f] << std::endl; } debugStr << std::endl; } debugStr.close(); } RCTOC(paramTimer,timeOpt); RCTIC(paramTimer,timeEval); tscsAs[i] += alignmentSet.updateTsc(tracks, g, nr_omp_threads); RCTOC(paramTimer,timeEval); } } // micrographs if (debug) { std::cout << std::endl; } RCTIC(paramTimer,timeEval); // compute final TSC for (int i = 0; i < paramCount; i++) { double wg = tscsAs[i][1] * tscsAs[i][2]; if (wg > 0.0) { TSCs[i] = tscsAs[i][0] / sqrt(wg); } } RCTOC(paramTimer,timeEval); } void MotionParamEstimator::prepAlignment() { std::cout << " + preparing alignment data... " << std::endl; const std::vector>& dmgWgh = motionEstimator->computeDamageWeights(group); std::vector> alignDmgWgh(fc); for (int f = 0; f < fc; f++) { alignDmgWgh[f] = FilterHelper::raisedCosEnvFreq2D(dmgWgh[f], k_cutoff-1, k_cutoff+1); } alignmentSet = AlignmentSet(mdts, fc, s, k_eval+2, k_out); for (int f = 0; f < fc; f++) { alignmentSet.accelerate(dmgWgh[f], alignmentSet.damage[f]); } std::vector fts(nr_omp_threads); const int gc = mdts.size(); int pctot = 0; for (long g = 0; g < gc; g++) { const int pc = mdts[g].numberOfObjects(); if (pc < 2) continue; pctot += pc; std::cout << " micrograph " << (g+1) << " / " << gc << ": " << pc << " particles [" << pctot << " total]" << std::endl; std::vector>> movie; std::vector>> movieCC; try { motionEstimator->prepMicrograph( mdts[g], fts, alignDmgWgh, 0, movie, movieCC, alignmentSet.positions[g], alignmentSet.initialTracks[g], alignmentSet.globComp[g]); } catch (RelionError XE) { std::cerr << "warning: unable to load micrograph #" << (g+1) << std::endl; continue; } const int maxRangeP = 2 * motionEstimator->getCCPad() * maxRange; #pragma omp parallel for num_threads(nr_omp_threads) for (int p = 0; p < pc; p++) { for (int f = 0; f < fc; f++) { if (maxRange > 0) { movieCC[p][f] = FilterHelper::cropCorner2D(movieCC[p][f], maxRangeP, maxRangeP); } alignmentSet.copyCC(g, p, f, movieCC[p][f]); Image pred = reference->predict( mdts[g], p, *obsModel, ReferenceMap::Opposite); alignmentSet.accelerate(movie[p][f], alignmentSet.obs[g][p][f]); alignmentSet.accelerate(pred, alignmentSet.pred[g][p]); } } } // release all unneeded heap space back to the OS // (this can free tens of Gb) #if !defined(__APPLE__) malloc_trim(0); #endif std::cout << " done\n"; } relion-3.1.3/src/jaz/motion/motion_param_estimator.h000066400000000000000000000064661411340063500226240ustar00rootroot00000000000000/*************************************************************************** * * Author: "Jasenko Zivanov" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #ifndef MOTION_PARAM_ESTIMATOR_H #define MOTION_PARAM_ESTIMATOR_H #include "alignment_set.h" #include #include #include //#define TIMING 1 #ifdef TIMING #define RCTIC(timer,label) (timer.tic(label)) #define RCTOC(timer,label) (timer.toc(label)) #else #define RCTIC(timer,label) #define RCTOC(timer,label) #endif class MotionEstimator; class ReferenceMap; class ObservationModel; class ParFourierTransformer; class MotionParamEstimator { public: MotionParamEstimator(); void read(IOParser& parser, int argc, char *argv[]); void init(int verb, int nr_omp_threads, bool debug, std::string outPath, int fc, const std::vector& allMdts, MotionEstimator* motionEstimator, ReferenceMap* reference, ObservationModel* obsModel); void run(); bool anythingToDo(); // to be used by instances of OptimizationProblem void evaluateParams(const std::vector& sig_vals, std::vector& TSCs); static const double velScale, divScale, accScale; protected: bool paramsRead, ready; AlignmentSet alignmentSet; // read from cmd. line: bool estim2, estim3; int minParticles, maxRange, maxIters, seed, group; double sV, sD, sA; double iniStep, conv; double align_frac, eval_frac; double k_cutoff, k_eval; // set at init: std::vector mdts; MotionEstimator* motionEstimator; ObservationModel* obsModel; ReferenceMap* reference; int fc, k_out, verb, nr_omp_threads, s_ref, s, sh; bool allGroups; bool debug; std::string outPath; #ifdef TIMING Timer paramTimer; int timeSetup, timeOpt, timeEval; #endif gravis::d4Vector estimateTwoParamsNM( double sig_v_0, double sig_d_0, double sig_acc, double inStep, double conv, int maxIters); gravis::d4Vector estimateThreeParamsNM( double sig_v_0, double sig_d_0, double sig_a_0, double inStep, double conv, int maxIters); void prepAlignment(); }; #endif relion-3.1.3/src/jaz/motion/motion_refiner.cpp000066400000000000000000000414611411340063500214140ustar00rootroot00000000000000/*************************************************************************** * * Authors: "Jasenko Zivanov & Sjors H.W. Scheres" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #include "motion_refiner.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "gp_motion_fit.h" #include "motion_helper.h" using namespace gravis; MotionRefiner::MotionRefiner() : motionParamEstimator(), motionEstimator(), frameRecombiner() { } void MotionRefiner::read(int argc, char **argv) { IOParser parser; parser.setCommandLine(argc, argv); parser.addSection("General options"); // TODO: fn_opt = parser.getOption("--opt", "optimiser STAR file from a previous 3D auto-refinement"); starFn = parser.getOption("--i", "Input STAR file"); outPath = parser.getOption("--o", "Output directory, e.g. MotionFit/job041/"); reference.read(parser, argc, argv); micrographHandler.firstFrame = textToInteger(parser.getOption("--first_frame", "First move frame to process", "1")) - 1; micrographHandler.lastFrame = textToInteger(parser.getOption("--last_frame", "Last movie frame to process (default is all)", "-1")) - 1; only_do_unfinished = parser.checkOption("--only_do_unfinished", "Skip those steps for which output files already exist."); verb = textToInteger(parser.getOption("--verb", "Verbosity", "1")); motionEstimator.read(parser, argc, argv); motionParamEstimator.read(parser, argc, argv); frameRecombiner.read(parser, argc, argv); parser.addSection("Computational options"); nr_omp_threads = textToInteger(parser.getOption("--j", "Number of (OMP) threads", "1")); minMG = textToInteger(parser.getOption("--min_MG", "First micrograph index", "0")); maxMG = textToInteger(parser.getOption("--max_MG", "Last micrograph index (default is to process all)", "-1")); micrographHandler.saveMem = parser.checkOption("--sbs", "Load movies slice-by-slice to save memory (slower)"); parser.addSection("Expert options"); findShortestMovie = parser.checkOption("--find_shortest", "Load only as many frames as are present in all movies."); debug = parser.checkOption("--debug", "Write debugging data"); micrographHandler.corrMicFn = parser.getOption("--corr_mic", "List of uncorrected micrographs (e.g. corrected_micrographs.star)"); micrographHandler.movie_angpix = textToDouble(parser.getOption("--mps", "Pixel size of input movies (Angst/pix)", "-1")); micrographHandler.coords_angpix = textToDouble(parser.getOption("--cps", "Pixel size of particle coordinates in star-file (Angst/pix)", "-1")); micrographHandler.hotCutoff = textToDouble(parser.getOption("--hot", "Clip hot pixels to this max. value (-1 = off, TIFF only)", "-1")); micrographHandler.debug = parser.checkOption("--debug_mov", "Write debugging data for movie loading"); movie_toReplace = parser.getOption("--mov_toReplace", "Replace this string in micrograph names...", ""); movie_replaceBy = parser.getOption("--mov_replaceBy", "..by this one", ""); micrographHandler.eer_upsampling = textToInteger(parser.getOption("--eer_upsampling", "EER upsampling (1 = 4K or 2 = 8K)", "-1")); micrographHandler.eer_grouping = textToInteger(parser.getOption("--eer_grouping", "EER grouping", "-1")); if (micrographHandler.eer_upsampling > 0 || micrographHandler.eer_grouping > 0) { // TODO: TAKANORI: Support changing EER upsampling and EER grouping in Polish. // Change in upsampling needs changes in ImageX/Y, Binning, OriginalPixelSize, ShiftX/Y, HotpixelX/Y (but not coeffs) // Change in grouping needs changes in ImageZ, DoseRate and interpolation of ShiftX/Y and scaling of MotionModelCoeffs std::cerr << "At the moment, Polishing does not support changing --eer_upsampling and --eer_grouping from the values used in MotionCorr job.\n"; std::cerr << "You need to manually modify trajectory STAR files for it." << std::endl; } // Check for errors in the command-line option if (parser.checkForErrors()) { REPORT_ERROR("Errors encountered on the command line (see above), exiting..."); } if (argc == 1) parser.writeUsage(std::cerr); } void MotionRefiner::init() { if (outPath[outPath.length()-1] != '/') { outPath += "/"; } if (verb > 0) std::cout << " + Reading " << starFn << "..." << std::endl; mdt0.read(starFn); ObservationModel::loadSafely(starFn, obsModel, mdt0); //@CHECK if (!ObservationModel::containsAllColumnsNeededForPrediction(mdt0)) { REPORT_ERROR_STR(starFn << " does not contain all of the required columns (" << "rlnOriginX, rlnOriginY, rlnAngleRot, rlnAngleTilt, rlnAnglePsi and rlnRandomSubset)"); } adaptMovieNames(); allMdts = StackHelper::splitByMicrographName(mdt0); if (minMG >= allMdts.size()) { std::stringstream sts0, sts1; sts0 << minMG; sts0 << allMdts.size(); REPORT_ERROR("ERROR: Cannot start with micrograph "+sts0.str() +" (--min_MG); only "+sts1.str()+" micrographs defined in "+starFn+"."); } if (minMG < 0) { minMG = 0; } // Only work on a user-specified subset of the micrographs if (maxMG < 0 || maxMG >= allMdts.size()) { maxMG = allMdts.size()-1; } if (minMG > 0 || maxMG < allMdts.size()-1) { if (verb > 0) { std::cout << " - Will only process micrographs in range: [" << minMG << "-" << maxMG << "]" << std::endl; } chosenMdts.clear(); for (long int g = minMG; g <= maxMG; g++) { chosenMdts.push_back(allMdts[g]); } } else { chosenMdts = allMdts; } /* There are two options on how to handle movies of varying length: 1) Find the lowest frame-count in the selected dataset, and only extract that number of frames from all movies. (findShortestMovie == true) 2) Remove all movies of insufficient size from the dataset. If no --last_frame value is supplied, the length of the first movie is used as the reference length (findShortestMovie == false) */ { std::string metaFn = ""; // the first meta-star filename double fractDose = 0.0; // the dose in the first meta-star (@TODO: support variable dose) // => we don't need variable dose support as long as different motioncorr jobs are processed separatedly int fc0; // the frame count in the first movie // initialise corrected/uncorrected micrograph dictionary, then load the header // of the first movie (or read corrected_micrographs.star) to obtain: // frame count, micrograph size and the fractional dose micrographHandler.init( chosenMdts, verb, nr_omp_threads, // in fc0, fractDose, metaFn); // out chosenMdts = micrographHandler.cullMissingMovies(chosenMdts, verb); if (!findShortestMovie) { if (micrographHandler.lastFrame < 0) { fc = fc0 - micrographHandler.firstFrame; micrographHandler.lastFrame = fc0 - 1; } else { fc = micrographHandler.lastFrame - micrographHandler.firstFrame + 1; } } // metaFn is only needed for console output // verb is needed since motionEstimator has not been initialised yet motionEstimator.proposeDosePerFrame(fractDose, metaFn, verb); } if (findShortestMovie) { // make sure we don't try to load too many frames micrographHandler.findLowestFrameCount(chosenMdts, verb); fc = micrographHandler.lastFrame - micrographHandler.firstFrame + 1; } else { // remove movies of insufficient size chosenMdts = micrographHandler.findLongEnoughMovies( chosenMdts, micrographHandler.lastFrame+1, verb); } if (only_do_unfinished) { motionMdts.clear(); recombMdts.clear(); motionMdts = MotionEstimator::findUnfinishedJobs(chosenMdts, outPath); recombMdts = frameRecombiner.findUnfinishedJobs(chosenMdts, outPath); if (verb > 0) { if (motionMdts.size() > 0) { if (motionMdts.size() < chosenMdts.size()) { std::cout << " - Will only estimate motion for " << motionMdts.size() << " unfinished micrographs" << std::endl; } else { std::cout << " - Will estimate motion for all " << motionMdts.size() << " micrographs - none are finished" << std::endl; } } else { std::cout << " - Motion has already been estimated for all micrographs" << std::endl; } if (frameRecombiner.doingRecombination()) { if (recombMdts.size() > 0) { if (recombMdts.size() < chosenMdts.size()) { std::cout << " - Will only recombine frames for " << recombMdts.size() << " unfinished micrographs" << std::endl; } else { std::cout << " - Will recombine frames for all " << recombMdts.size() << " micrographs - none are finished" << std::endl; } } else { std::cout << " - Frames have already been recombined for all micrographs; " << "a new STAR file will be generated" << std::endl; } } } } else { motionMdts = chosenMdts; recombMdts = chosenMdts; } estimateParams = motionParamEstimator.anythingToDo(); estimateMotion = motionMdts.size() > 0; recombineFrames = frameRecombiner.doingRecombination() && (recombMdts.size() > 0 || !exists(outPath + "shiny" + frameRecombiner.getOutputSuffix() + ".star")); generateStar = frameRecombiner.doingRecombination(); bool doAnything = estimateParams || estimateMotion || recombineFrames; bool needsReference = doAnything; if (!doAnything) exit(0); //TODO: To be replaced with RELION_EXIT_SUCCESS if (needsReference) { if (verb > 0) std::cout << " + Reading references ..." << std::endl; reference.load(verb, debug); } micrographHandler.validatePixelSize(reference.angpix); if (estimateMotion || estimateParams) { if (verb > 0) std::cout << " + Initializing motion estimator ..." << std::endl; motionEstimator.init( verb, fc, nr_omp_threads, debug, outPath, &reference, &obsModel, µgraphHandler); } if (estimateParams) { //REPORT_ERROR("Parameter estimation currently not supported"); if (verb > 0) std::cout << " + Initializing motion parameter estimator ..." << std::endl; motionParamEstimator.init( verb, nr_omp_threads, debug, outPath, fc, chosenMdts, &motionEstimator, &reference, &obsModel); } } void MotionRefiner::run() { if (estimateParams) { motionParamEstimator.run(); return; // @TODO: apply the optimized parameters, then continue with motion estimation } // The subsets will be used in openMPI parallelisation: instead of over g0->gc, // they will be over smaller subsets // TODO: TAKANORI: first, estimate FCC on only a subset of movies here if (estimateMotion) { motionEstimator.process(motionMdts, 0, motionMdts.size()-1); } // TODO: TAKANORI: then process all movies, simultaneously estimating tracks and recombining. // micrograph handler can cache movie frames to avoid reading movies twice. // (if --sbs, don't cache to save memory) if (recombineFrames) { double k_out_A = reference.pixToAng(reference.k_out); frameRecombiner.init( allMdts, verb, reference.s, fc, k_out_A, reference.angpix, nr_omp_threads, outPath, debug, &reference, &obsModel, µgraphHandler); frameRecombiner.process(recombMdts, 0, recombMdts.size()-1); } if (generateStar) { combineEPSAndSTARfiles(); } } int MotionRefiner::getVerbosityLevel() { return verb; } // combine all EPS files into one logfile.pdf void MotionRefiner::combineEPSAndSTARfiles() { std::vector fn_eps; if (verb > 0) { std::cout << " + Combining all EPS and STAR files ... " << std::endl; } MetaDataTable mdtAll; if (frameRecombiner.doingRecombination()) { if (exists(outPath+"bfactors.eps")) { fn_eps.push_back(outPath+"bfactors.eps"); } if (exists(outPath+"scalefactors.eps")) { fn_eps.push_back(outPath+"scalefactors.eps"); } } std::vector n_OgPresent(obsModel.numberOfOpticsGroups(), 0); std::vector n_OgAbsent(obsModel.numberOfOpticsGroups(), 0); for (long g = 0; g < allMdts.size(); g++) { FileName fn_root = getOutputFileNameRoot(outPath, allMdts[g]); if (exists(fn_root+"_tracks.eps")) { fn_eps.push_back(fn_root+"_tracks.eps"); } if (frameRecombiner.doingRecombination() && exists(fn_root+"_shiny" + frameRecombiner.getOutputSuffix() + ".star")) { MetaDataTable mdt; mdt.read(fn_root+"_shiny" + frameRecombiner.getOutputSuffix() + ".star"); mdtAll.append(mdt); FOR_ALL_OBJECTS_IN_METADATA_TABLE(mdt) { n_OgPresent[obsModel.getOpticsGroup(mdt)]++; } } else { // Non-processed particles belonging to micrographs not present in the MotionCorr STAR file // Remove them from the output FOR_ALL_OBJECTS_IN_METADATA_TABLE(allMdts[g]) { n_OgAbsent[obsModel.getOpticsGroup(allMdts[g])]++; } } } if (fn_eps.size() > 0) { joinMultipleEPSIntoSinglePDF(outPath + "logfile.pdf", fn_eps); } if (frameRecombiner.doingRecombination()) { for (int og = 0; og < obsModel.numberOfOpticsGroups(); og++) { // If this optics group was not processed, don't change anything if (n_OgPresent[og] == 0) { std::cerr << "WARNING: All " << n_OgAbsent[og] << " particles in the optics group " << (og + 1) << " were removed because no particles belong to the movies in the input MotionCorr STAR file." << std::endl; obsModel.opticsMdt.setValue(EMDL_IMAGE_PIXEL_SIZE, -1.0, og); // mark for deletion continue; } if (n_OgAbsent[og] > 0) { std::cerr << "WARNING: " << n_OgAbsent[og] << " particles in the optics group " << (og + 1) << " were removed." << std::endl; } obsModel.opticsMdt.setValue(EMDL_IMAGE_PIXEL_SIZE, frameRecombiner.getOutputPixelSize(og), og); obsModel.opticsMdt.setValue(EMDL_IMAGE_SIZE, frameRecombiner.getOutputBoxSize(og), og); obsModel.opticsMdt.setValue(EMDL_OPTIMISER_DATA_ARE_CTF_PREMULTIPLIED, frameRecombiner.isCtfMultiplied(og), og); std::cout << " + Pixel size for optics group " << (og + 1) << ": " << frameRecombiner.getOutputPixelSize(og) << std::endl; } // Remove absent optics groups; After this, NOTHING should be done except for saving. obsModel's internal data structure is now corrupted! int og = 0; while (og < obsModel.opticsMdt.numberOfObjects()) { RFLOAT og_angpix; obsModel.opticsMdt.getValue(EMDL_IMAGE_PIXEL_SIZE, og_angpix, og); if (og_angpix < 0) { obsModel.opticsMdt.removeObject(og); } else { og++; } } obsModel.save(mdtAll, outPath + "shiny" + frameRecombiner.getOutputSuffix() + ".star"); } if (verb > 0) { std::cout << " + Done! " << std::endl; std::cout << " + Written logfile in " << outPath << "logfile.pdf" << std::endl; if (frameRecombiner.doingRecombination()) { std::cout << " + Written new particle STAR file in " << outPath << "shiny" + frameRecombiner.getOutputSuffix() + ".star" << std::endl; } } } // Get the output filename from the micrograph filename FileName MotionRefiner::getOutputFileNameRoot(std::string outPath, const MetaDataTable& mdt) { FileName fn_mic, fn_pre, fn_jobnr, fn_post; mdt.getValue(EMDL_MICROGRAPH_NAME, fn_mic, 0); decomposePipelineFileName(fn_mic, fn_pre, fn_jobnr, fn_post); return outPath + fn_post.withoutExtension(); } void MotionRefiner::adaptMovieNames() { if (movie_toReplace != "") { std::string name; for (int i = 0; i < mdt0.numberOfObjects(); i++) { mdt0.getValue(EMDL_MICROGRAPH_NAME, name, i); if (i == 0 && verb > 0) { std::cout << " - Replacing: " << name << std::endl; } std::string::size_type pos0 = name.find(movie_toReplace); if (pos0 != std::string::npos) { std::string::size_type pos1 = pos0 + movie_toReplace.length(); std::string before = name.substr(0, pos0); std::string after = pos1 < name.length()? name.substr(pos1) : ""; name = before + movie_replaceBy + after; } if (i == 0 && verb > 0) { std::cout << " -> " << name << std::endl; } mdt0.setValue(EMDL_MICROGRAPH_NAME, name, i); } } } relion-3.1.3/src/jaz/motion/motion_refiner.h000066400000000000000000000061101411340063500210510ustar00rootroot00000000000000/*************************************************************************** * * Author: "Jasenko Zivanov & Sjors H.W. Scheres" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #ifndef MOTION_REFINER_H_ #define MOTION_REFINER_H_ #include #include #include #include #include #include #include #include #include #include "motion_param_estimator.h" #include "motion_estimator.h" #include "frame_recombiner.h" #include class MotionRefiner { public: MotionRefiner(); // Read command line arguments void read(int argc, char **argv); // Initialise some general stuff after reading void init(); // General Running (Admiral Swimming!) void run(); int getVerbosityLevel(); // For original particle-polishing-like Bfactors (not used) //void calculateSingleFrameReconstruction(int iframe); // Get output STAR file name for this micrograph static FileName getOutputFileNameRoot(std::string outPath, const MetaDataTable& mdt); protected: // components that do the actual work MotionParamEstimator motionParamEstimator; MotionEstimator motionEstimator; FrameRecombiner frameRecombiner; // required components ObservationModel obsModel; ReferenceMap reference; MicrographHandler micrographHandler; // s: full image size, sh: half-size + 1, fc: frame count int s_ref, sh_ref, fc; // Verbosity int verb; bool debug, findShortestMovie; int nr_omp_threads; std::string outPath; std::string starFn, movie_toReplace, movie_replaceBy; // Allow continuation of crashed jobs bool only_do_unfinished; bool estimateParams, estimateMotion, recombineFrames, generateStar; long maxMG, minMG; MetaDataTable mdt0; std::vector allMdts, // all micrographs (used for B-factor computation) chosenMdts, // micrographs between minMG and maxMG motionMdts, recombMdts; // unfinished micrographs // combine all EPS files into one logfile.pdf void combineEPSAndSTARfiles(); // apply changes to micrograph-filenames implied by // movie_path, movie_ending and movie_toReplace/replaceBy void adaptMovieNames(); }; #endif relion-3.1.3/src/jaz/motion/motion_refiner_mpi.cpp000066400000000000000000000062421411340063500222570ustar00rootroot00000000000000/*************************************************************************** * * Author: "Jasenko Zivanov & Sjors H.W. Scheres" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #include "motion_refiner_mpi.h" void MotionRefinerMpi::read(int argc, char **argv) { // Define a new MpiNode node = new MpiNode(argc, argv); // First read in non-parallelisation-dependent variables MotionRefiner::read(argc, argv); // Don't put any output to screen for mpi followers verb = (node->isLeader()) ? verb : 0; // Possibly also read parallelisation-dependent variables here if (node->size < 2) { REPORT_ERROR("ERROR: this program needs to be run with at least two MPI processes!"); } if (node->isLeader() && (motionParamEstimator.anythingToDo())) { REPORT_ERROR("Parameter estimation is not supported in MPI mode."); return; } // Print out MPI info printMpiNodesMachineNames(*node); } void MotionRefinerMpi::run() { if (estimateParams) { REPORT_ERROR("Parameter estimation is not supported in MPI mode."); return; } // Parallel loop over micrographs if (estimateMotion) { long int total_nr_micrographs = motionMdts.size(); // Each node does part of the work long int my_first_micrograph, my_last_micrograph, my_nr_micrographs; divide_equally(total_nr_micrographs, node->size, node->rank, my_first_micrograph, my_last_micrograph); my_nr_micrographs = my_last_micrograph - my_first_micrograph + 1; motionEstimator.process(motionMdts, my_first_micrograph, my_last_micrograph); } MPI_Barrier(MPI_COMM_WORLD); if (recombineFrames) { long int total_nr_micrographs = recombMdts.size(); // Each node does part of the work long int my_first_micrograph, my_last_micrograph, my_nr_micrographs; divide_equally(total_nr_micrographs, node->size, node->rank, my_first_micrograph, my_last_micrograph); my_nr_micrographs = my_last_micrograph - my_first_micrograph + 1; double k_out_A = reference.pixToAng(reference.k_out); frameRecombiner.init( allMdts, verb, reference.s, fc, k_out_A, reference.angpix, nr_omp_threads, outPath, debug, &reference, &obsModel, µgraphHandler); frameRecombiner.process(recombMdts, my_first_micrograph, my_last_micrograph); } MPI_Barrier(MPI_COMM_WORLD); if (generateStar && node->isLeader()) { combineEPSAndSTARfiles(); } } relion-3.1.3/src/jaz/motion/motion_refiner_mpi.h000066400000000000000000000026611411340063500217250ustar00rootroot00000000000000/*************************************************************************** * * Author: "Jasenko Zivanov & Sjors H.W. Scheres" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #ifndef MOTION_REFINER_MPI_H_ #define MOTION_REFINER_MPI_H_ #include #include #include "motion_refiner.h" class MotionRefinerMpi : public MotionRefiner { private: MpiNode *node; public: /** Destructor, calls MPI_Finalize */ ~MotionRefinerMpi() { delete node; } /** Read * This could take care of mpi-parallelisation-dependent variables */ void read(int argc, char **argv); // Parallelized run function void run(); }; #endif relion-3.1.3/src/jaz/motion/three_hyperparameter_fit.cpp000066400000000000000000000055671411340063500234650ustar00rootroot00000000000000/*************************************************************************** * * Author: "Jasenko Zivanov" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #include "three_hyperparameter_fit.h" #include "motion_param_estimator.h" using namespace gravis; double ThreeHyperParameterProblem::accThresh = 500.0; double ThreeHyperParameterProblem::accEps = 1e-5; ThreeHyperParameterProblem::ThreeHyperParameterProblem( MotionParamEstimator& motionParamEstimator) : motionParamEstimator(motionParamEstimator) { } double ThreeHyperParameterProblem::f(const std::vector& x, void *tempStorage) const { d3Vector vda = problemToMotion(x); std::vector tsc(1); std::vector vdav{vda}; motionParamEstimator.evaluateParams(vdav, tsc); /*std::cout.precision(16); std::cout << " " << vda << " => " << tsc[0] << std::endl; std::cout.precision(5);*/ return -tsc[0]; } void ThreeHyperParameterProblem::report(int iteration, double cost, const std::vector& x) const { d3Vector vda = problemToMotion(x); std::cout.precision(5); std::cout << iteration << ": \t " << vda[0] << " \t " << vda[1] << " \t " << vda[2] << " \t "; std::cout.precision(12); std::cout << -cost << std::endl; std::cout.precision(5); } d3Vector ThreeHyperParameterProblem::problemToMotion(const std::vector& x) { const double w = x[2] / MotionParamEstimator::accScale; return d3Vector( x[0] / MotionParamEstimator::velScale, x[1] / MotionParamEstimator::divScale, //(w > 1.0/accThresh ? (w < 1.0/accEps? 1.0/w : 1.0/accEps) : -1.0)); (x[2] > accEps)? x[2] / MotionParamEstimator::accScale : accEps / MotionParamEstimator::accScale); } std::vector ThreeHyperParameterProblem::motionToProblem(d3Vector vd) { const double s = vd[2]; const double w = s < accThresh? (s > accEps? 1.0/s : 1.0/accEps): 0.0; return std::vector{ vd[0] * MotionParamEstimator::velScale, vd[1] * MotionParamEstimator::divScale, //w * MotionParamEstimator::accScale}; vd[2] * MotionParamEstimator::accScale}; } relion-3.1.3/src/jaz/motion/three_hyperparameter_fit.h000066400000000000000000000032671411340063500231250ustar00rootroot00000000000000/*************************************************************************** * * Author: "Jasenko Zivanov" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #ifndef THREE_HYPERPARAMETER_PROBLEM_H #define THREE_HYPERPARAMETER_PROBLEM_H #include #include class MotionParamEstimator; class ThreeHyperParameterProblem : public Optimization { public: ThreeHyperParameterProblem( MotionParamEstimator& motionParamEstimator); double f(const std::vector& x, void* tempStorage) const; void report(int iteration, double cost, const std::vector& x) const; static gravis::d3Vector problemToMotion(const std::vector& x); static std::vector motionToProblem(gravis::d3Vector vd); protected: MotionParamEstimator& motionParamEstimator; static double accThresh, accEps; }; #endif relion-3.1.3/src/jaz/motion/two_hyperparameter_fit.cpp000066400000000000000000000044401411340063500231540ustar00rootroot00000000000000/*************************************************************************** * * Author: "Jasenko Zivanov" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #include "two_hyperparameter_fit.h" #include "motion_param_estimator.h" using namespace gravis; TwoHyperParameterProblem::TwoHyperParameterProblem( MotionParamEstimator& motionParamEstimator, double s_acc) : motionParamEstimator(motionParamEstimator), s_acc(s_acc) { } double TwoHyperParameterProblem::f(const std::vector& x, void *tempStorage) const { d2Vector vd = problemToMotion(x); std::vector vda(1); vda[0] = d3Vector(vd[0], vd[1], s_acc); std::vector tsc(1); motionParamEstimator.evaluateParams(vda, tsc); return -tsc[0]; } void TwoHyperParameterProblem::report(int iteration, double cost, const std::vector& x) const { d2Vector vd = problemToMotion(x); std::cout.precision(5); std::cout << iteration << ": \t " << vd[0] << " \t " << vd[1] << " \t " << s_acc << " \t "; std::cout.precision(12); std::cout << -cost << std::endl; std::cout.precision(5); } d2Vector TwoHyperParameterProblem::problemToMotion(const std::vector& x) { return d2Vector( x[0] / MotionParamEstimator::velScale, x[1] / MotionParamEstimator::divScale); } std::vector TwoHyperParameterProblem::motionToProblem(d2Vector vd) { return std::vector{ vd[0] * MotionParamEstimator::velScale, vd[1] * MotionParamEstimator::divScale}; } relion-3.1.3/src/jaz/motion/two_hyperparameter_fit.h000066400000000000000000000032651411340063500226250ustar00rootroot00000000000000/*************************************************************************** * * Author: "Jasenko Zivanov" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #ifndef TWO_HYPERPARAMETER_PROBLEM_H #define TWO_HYPERPARAMETER_PROBLEM_H #include #include class MotionParamEstimator; class TwoHyperParameterProblem : public Optimization { public: TwoHyperParameterProblem( MotionParamEstimator& motionParamEstimator, double s_acc); double f(const std::vector& x, void* tempStorage) const; void report(int iteration, double cost, const std::vector& x) const; static gravis::d2Vector problemToMotion(const std::vector& x); static std::vector motionToProblem(gravis::d2Vector vd); protected: MotionParamEstimator& motionParamEstimator; double s_acc; }; #endif relion-3.1.3/src/jaz/new_ft.cpp000066400000000000000000000267371411340063500163630ustar00rootroot00000000000000/*************************************************************************** * * Author: "Jasenko Zivanov" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #include "new_ft.h" #include "src/macros.h" #include "src/fftw.h" #include "src/args.h" #include #include pthread_mutex_t NewFFT::fftw_plan_mutex_new = PTHREAD_MUTEX_INITIALIZER; void NewFFT::FourierTransform( MultidimArray& src, MultidimArray& dest, const NewFFT::DoublePlan& plan, Normalization normalization) { if (!plan.isCompatible(src)) { REPORT_ERROR("ERROR: plan incompatible with input array\n"); } if (!plan.isCompatible(dest)) { if (plan.isReusable()) { dest.resizeNoCp(src.ndim, src.zdim, src.ydim, src.xdim/2 + 1); } else { REPORT_ERROR("NewFFT::FourierTransform: plan incompatible with output array\n"); } } _FourierTransform(src, dest, plan, normalization); } void NewFFT::inverseFourierTransform( MultidimArray& src, MultidimArray& dest, const NewFFT::DoublePlan& plan, Normalization normalization, bool preserveInput) { if (preserveInput && !plan.isReusable()) { REPORT_ERROR("NewFFT::inverseFourierTransform: preserveInput is only supported for reusable plans\n"); } if (!plan.isCompatible(src)) { REPORT_ERROR("NewFFT::inverseFourierTransform: plan incompatible with input array\n"); } if (!plan.isCompatible(dest)) { if (plan.isReusable()) { dest.resizeNoCp(src.ndim, src.zdim, src.ydim, 2*(src.xdim-1)); } else { REPORT_ERROR("NewFFT::inverseFourierTransform: plan incompatible with output array\n"); } } MultidimArray src2; if (preserveInput) { src2 = src; _inverseFourierTransform(src2, dest, plan, normalization); } else { _inverseFourierTransform(src, dest, plan, normalization); } } void NewFFT::FourierTransform( MultidimArray& src, MultidimArray& dest, const NewFFT::FloatPlan& plan, Normalization normalization) { if (!plan.isCompatible(src)) { REPORT_ERROR("NewFFT::FourierTransform: plan incompatible with input array\n"); } if (!plan.isCompatible(dest)) { if (plan.isReusable()) { dest.resizeNoCp(src.ndim, src.zdim, src.ydim, src.xdim/2 + 1); } else { REPORT_ERROR("NewFFT::FourierTransform: plan incompatible with output array\n"); } } _FourierTransform(src, dest, plan, normalization); } void NewFFT::inverseFourierTransform( MultidimArray& src, MultidimArray& dest, const NewFFT::FloatPlan& plan, Normalization normalization, bool preserveInput) { if (preserveInput && !plan.isReusable()) { REPORT_ERROR("NewFFT::inverseFourierTransform: preserveInput is only supported for reusable plans\n"); } if (!plan.isCompatible(src)) { REPORT_ERROR("NewFFT::inverseFourierTransform: plan incompatible with input array\n"); } if (!plan.isCompatible(dest)) { if (plan.isReusable()) { dest.resizeNoCp(src.ndim, src.zdim, src.ydim, 2*(src.xdim-1)); } else { REPORT_ERROR("NewFFT::inverseFourierTransform: plan incompatible with output array\n"); } } MultidimArray src2; if (preserveInput) { src2 = src; _inverseFourierTransform(src2, dest, plan, normalization); } else { _inverseFourierTransform(src, dest, plan, normalization); } } void NewFFT::FourierTransform( MultidimArray& src, MultidimArray& dest, Normalization normalization) { if (!areSizesCompatible(src, dest)) { resizeComplexToMatch(src, dest); } DoublePlan p(src, dest); _FourierTransform(src, dest, p, normalization); } void NewFFT::inverseFourierTransform( MultidimArray& src, MultidimArray& dest, Normalization normalization, bool preserveInput) { if (!areSizesCompatible(dest, src)) { resizeRealToMatch(dest, src); } if (preserveInput) { MultidimArray src2 = src; DoublePlan p(dest, src2); _inverseFourierTransform(src2, dest, p, normalization); } else { DoublePlan p(dest, src); _inverseFourierTransform(src, dest, p, normalization); } } void NewFFT::FourierTransform( MultidimArray& src, MultidimArray& dest, Normalization normalization) { if (!areSizesCompatible(src, dest)) { resizeComplexToMatch(src, dest); } FloatPlan p(src, dest); _FourierTransform(src, dest, p, normalization); } void NewFFT::inverseFourierTransform( MultidimArray& src, MultidimArray& dest, Normalization normalization, bool preserveInput) { if (!areSizesCompatible(dest, src)) { resizeRealToMatch(dest, src); } if (preserveInput) { MultidimArray src2 = src; FloatPlan p(dest, src2); _inverseFourierTransform(src2, dest, p, normalization); } else { FloatPlan p(dest, src); _inverseFourierTransform(src, dest, p, normalization); } } void NewFFT::_FourierTransform( MultidimArray& src, MultidimArray& dest, const NewFFT::DoublePlan& plan, Normalization normalization) { fftw_execute_dft_r2c(plan.getForward(), MULTIDIM_ARRAY(src), (fftw_complex*) MULTIDIM_ARRAY(dest)); if (normalization == FwdOnly) { const double scale = MULTIDIM_SIZE(src); for (long int i = 0; i < NZYXSIZE(dest); i++) { dest.data[i] /= scale; } } else if (normalization == Both) { const double scale = sqrt(MULTIDIM_SIZE(src)); for (long int i = 0; i < NZYXSIZE(dest); i++) { dest.data[i] /= scale; } } } void NewFFT::_inverseFourierTransform( MultidimArray& src, MultidimArray& dest, const NewFFT::DoublePlan& plan, Normalization normalization) { fftw_complex* in = (fftw_complex*) MULTIDIM_ARRAY(src); fftw_execute_dft_c2r(plan.getBackward(), in, MULTIDIM_ARRAY(dest)); if (normalization == Both) { const double scale = sqrt(MULTIDIM_SIZE(dest)); for (long int i = 0; i < NZYXSIZE(dest); i++) { dest.data[i] /= scale; } } } void NewFFT::_FourierTransform( MultidimArray& src, MultidimArray& dest, const NewFFT::FloatPlan& plan, Normalization normalization) { fftwf_execute_dft_r2c(plan.getForward(), MULTIDIM_ARRAY(src), (fftwf_complex*) MULTIDIM_ARRAY(dest)); if (normalization == FwdOnly) { const float scale = MULTIDIM_SIZE(src); for (long int i = 0; i < NZYXSIZE(dest); i++) { dest.data[i] /= scale; } } else if (normalization == Both) { const float scale = sqrt(MULTIDIM_SIZE(src)); for (long int i = 0; i < NZYXSIZE(dest); i++) { dest.data[i] /= scale; } } } void NewFFT::_inverseFourierTransform( MultidimArray& src, MultidimArray& dest, const NewFFT::FloatPlan& plan, Normalization normalization) { fftwf_complex* in = (fftwf_complex*) MULTIDIM_ARRAY(src); fftwf_execute_dft_c2r(plan.getBackward(), in, MULTIDIM_ARRAY(dest)); if (normalization == Both) { const float scale = sqrt(MULTIDIM_SIZE(dest)); for (long int i = 0; i < NZYXSIZE(dest); i++) { dest.data[i] /= scale; } } } NewFFT::DoublePlan::DoublePlan(int w, int h, int d, unsigned int flags) : reusable(true), w(w), h(h), d(d), realPtr(0), complexPtr(0) { MultidimArray realDummy(d,h,w); MultidimArray complexDummy(d,h,w/2+1); std::vector N(0); if (d > 1) N.push_back(d); if (h > 1) N.push_back(h); N.push_back(w); const int ndim = N.size(); pthread_mutex_lock(&fftw_plan_mutex_new); fftw_plan planForward = fftw_plan_dft_r2c( ndim, &N[0], MULTIDIM_ARRAY(realDummy), (fftw_complex*) MULTIDIM_ARRAY(complexDummy), FFTW_UNALIGNED | flags); fftw_plan planBackward = fftw_plan_dft_c2r( ndim, &N[0], (fftw_complex*) MULTIDIM_ARRAY(complexDummy), MULTIDIM_ARRAY(realDummy), FFTW_UNALIGNED | flags); pthread_mutex_unlock(&fftw_plan_mutex_new); if (planForward == NULL || planBackward == NULL) { REPORT_ERROR("FFTW plans cannot be created"); } plan = std::shared_ptr(new Plan(planForward, planBackward)); } NewFFT::DoublePlan::DoublePlan( MultidimArray& real, MultidimArray& complex, unsigned int flags) : reusable(flags & FFTW_UNALIGNED), w(real.xdim), h(real.ydim), d(real.zdim), realPtr(MULTIDIM_ARRAY(real)), complexPtr((double*)MULTIDIM_ARRAY(complex)) { std::vector N(0); if (d > 1) N.push_back(d); if (h > 1) N.push_back(h); N.push_back(w); const int ndim = N.size(); pthread_mutex_lock(&fftw_plan_mutex_new); fftw_plan planForward = fftw_plan_dft_r2c( ndim, &N[0], MULTIDIM_ARRAY(real), (fftw_complex*) MULTIDIM_ARRAY(complex), flags); fftw_plan planBackward = fftw_plan_dft_c2r( ndim, &N[0], (fftw_complex*) MULTIDIM_ARRAY(complex), MULTIDIM_ARRAY(real), flags); pthread_mutex_unlock(&fftw_plan_mutex_new); if (planForward == NULL || planBackward == NULL) { REPORT_ERROR("FFTW plans cannot be created"); } plan = std::shared_ptr(new Plan(planForward, planBackward)); } NewFFT::FloatPlan::FloatPlan(int w, int h, int d, unsigned int flags) : reusable(true), w(w), h(h), d(d), realPtr(0), complexPtr(0) { MultidimArray realDummy(d,h,w); MultidimArray complexDummy(d,h,w/2+1); std::vector N(0); if (d > 1) N.push_back(d); if (h > 1) N.push_back(h); N.push_back(w); const int ndim = N.size(); pthread_mutex_lock(&fftw_plan_mutex_new); fftwf_plan planForward = fftwf_plan_dft_r2c( ndim, &N[0], MULTIDIM_ARRAY(realDummy), (fftwf_complex*) MULTIDIM_ARRAY(complexDummy), FFTW_UNALIGNED | flags); fftwf_plan planBackward = fftwf_plan_dft_c2r( ndim, &N[0], (fftwf_complex*) MULTIDIM_ARRAY(complexDummy), MULTIDIM_ARRAY(realDummy), FFTW_UNALIGNED | flags); pthread_mutex_unlock(&fftw_plan_mutex_new); if (planForward == NULL || planBackward == NULL) { REPORT_ERROR("FFTW plans cannot be created"); } plan = std::shared_ptr(new Plan(planForward, planBackward)); } NewFFT::FloatPlan::FloatPlan( MultidimArray& real, MultidimArray& complex, unsigned int flags) : reusable(flags & FFTW_UNALIGNED), w(real.xdim), h(real.ydim), d(real.zdim), realPtr(MULTIDIM_ARRAY(real)), complexPtr((float*)MULTIDIM_ARRAY(complex)) { std::vector N(0); if (d > 1) N.push_back(d); if (h > 1) N.push_back(h); N.push_back(w); const int ndim = N.size(); pthread_mutex_lock(&fftw_plan_mutex_new); fftwf_plan planForward = fftwf_plan_dft_r2c( ndim, &N[0], MULTIDIM_ARRAY(real), (fftwf_complex*) MULTIDIM_ARRAY(complex), flags); fftwf_plan planBackward = fftwf_plan_dft_c2r( ndim, &N[0], (fftwf_complex*) MULTIDIM_ARRAY(complex), MULTIDIM_ARRAY(real), flags); pthread_mutex_unlock(&fftw_plan_mutex_new); if (planForward == NULL || planBackward == NULL) { REPORT_ERROR("FFTW plans cannot be created"); } plan = std::shared_ptr(new Plan(planForward, planBackward)); } relion-3.1.3/src/jaz/new_ft.h000066400000000000000000000275401411340063500160210ustar00rootroot00000000000000/*************************************************************************** * * Author: "Jasenko Zivanov" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #ifndef NEW_FFTW_H #define NEW_FFTW_H #include #include #include #include "src/multidim_array.h" #include "src/jaz/t_complex.h" /* Usage patterns: #1: one-time transform - e.g.: call NewFFT::FourierTransform(U,V); for MultidimArray U and MultidimArray V #2: reusable, non-array-specific plans (does not allow for SIMD, since the arrays that are actually transformed might not be memory aligned) - create a plan using only the size of the arrays: e.g.: NewFFT::DoublePlan p(512, 512); - plans can be copied safely: e.g.: NewFFT::DoublePlan p2 = p; - transform arbitrary arrays of that size: e.g.: NewFFT::FourierTransform(U,V,p2); (in parallel, if desired) #3: reusable, array-specific plans (allows for SIMD if the arrays are memory-aligned) - create a plan specific to a pair of arrays: e.g.: NewFFT::DoublePlan p(U,V); - transform those arrays using their plan: e.g.: NewFFT::FourierTransform(U,V,p); */ class NewFFT { public: class DoublePlan; class FloatPlan; typedef enum { None, FwdOnly, // old default: divide by size when forward-transforming Both // divide by sqrt(size) when going both ways (Parseval's theorem holds) } Normalization; /* Four transforms using reusable plans (forward and backward for doubles and floats). NOTE: reusable plans created with the Double/FloatPlan(w,h,d) constructor always assume that the arrays are not memory-aligned - SIMD is thus disabled. The output array will be rescaled if and only if it does not have the correct size already. */ static void FourierTransform( MultidimArray& src, MultidimArray& dest, const DoublePlan& plan, Normalization normalization = FwdOnly); /* FFTW always destroys the input Complex-array in an inverse-FFT. If preserveInput is set, a copy will be made prior to transforming. */ static void inverseFourierTransform( MultidimArray& src, MultidimArray& dest, const DoublePlan& plan, Normalization normalization = FwdOnly, bool preserveInput = true); static void FourierTransform( MultidimArray& src, MultidimArray& dest, const FloatPlan& plan, Normalization normalization = FwdOnly); static void inverseFourierTransform( MultidimArray& src, MultidimArray& dest, const FloatPlan& plan, Normalization normalization = FwdOnly, bool preserveInput = true); // Four transforms using array-specific plans. // The plan will be created ad-hoc. // If the two arrays are memory-aligned, // SIMD will be used (if available) static void FourierTransform( MultidimArray& src, MultidimArray& dest, Normalization normalization = FwdOnly); static void inverseFourierTransform( MultidimArray& src, MultidimArray& dest, Normalization normalization = FwdOnly, bool preserveInput = true); static void FourierTransform( MultidimArray& src, MultidimArray& dest, Normalization normalization = FwdOnly); static void inverseFourierTransform( MultidimArray& src, MultidimArray& dest, Normalization normalization = FwdOnly, bool preserveInput = true); template static bool areSizesCompatible( const MultidimArray& real, const MultidimArray>& complex) { return real.xdim == 2 * (complex.xdim - 1) && real.ydim == complex.ydim && real.zdim == complex.zdim && real.ndim == complex.ndim; } template static void resizeRealToMatch( MultidimArray& real, const MultidimArray>& complex) { real.resizeNoCp(complex.ndim, complex.zdim, complex.ydim, 2 * (complex.xdim - 1)); } template static void resizeComplexToMatch( const MultidimArray& real, MultidimArray>& complex) { complex.resizeNoCp(real.ndim, real.zdim, real.ydim, real.xdim/2 + 1); } private: /* Private static methods that perform the actual transforms. The arrays are guaranteed to have the correct sizes and a compatible plan when these are called. Also, the complex array is always destroyed in the inverse transform - a copy has been made before.*/ static void _FourierTransform( MultidimArray& src, MultidimArray& dest, const DoublePlan& plan, Normalization normalization); static void _inverseFourierTransform( MultidimArray& src, MultidimArray& dest, const DoublePlan& plan, Normalization normalization); static void _FourierTransform( MultidimArray& src, MultidimArray& dest, const FloatPlan& plan, Normalization normalization); static void _inverseFourierTransform( MultidimArray& src, MultidimArray& dest, const FloatPlan& plan, Normalization normalization); public: /* These plan classes can be copied freely. The corresponding pairs of fftw_plan instances will be automatically deallocated using fftw_destroy_plan() when no instance of Double/FloatPlan points to them. (note the std::shared_ptr 'plan') */ class DoublePlan { public: /* Constructor for reusable plans. 'w', 'h' and 'd' are the dimensions of the *real*-array to be transformed. The corresponding complex array has the dimensions (w/2+1)*h*d 'flags' allows for controlling planning rigor and setting algorithmic restrictions. (cf. http://www.fftw.org/fftw3_doc/Planner-Flags.html) */ DoublePlan(int w, int h = 1, int d = 1, unsigned int flags = FFTW_ESTIMATE); // constructor for array-specific plans DoublePlan( MultidimArray& real, MultidimArray& complex, unsigned int flags = FFTW_ESTIMATE); fftw_plan getForward() const { return plan.get()->forward; } fftw_plan getBackward() const { return plan.get()->backward; } bool isCompatible(const MultidimArray& real) const { return real.xdim == w && real.ydim == h && real.zdim == d && (reusable || realPtr == MULTIDIM_ARRAY(real)); } bool isCompatible(const MultidimArray& complex) const { return complex.xdim == w/2+1 && complex.ydim == h && complex.zdim == d && (reusable || complexPtr == (double*)MULTIDIM_ARRAY(complex)); } bool isReusable() const { return reusable; } private: class Plan { public: Plan(fftw_plan forward, fftw_plan backward) : forward(forward), backward(backward) {} ~Plan() { pthread_mutex_lock(&fftw_plan_mutex_new); fftw_destroy_plan(forward); fftw_destroy_plan(backward); pthread_mutex_unlock(&fftw_plan_mutex_new); } fftw_plan forward, backward; }; bool reusable; int w, h, d; double *realPtr, *complexPtr; std::shared_ptr plan; }; class FloatPlan { public: FloatPlan(int w, int h = 1, int d = 1, unsigned int flags = FFTW_ESTIMATE); FloatPlan(MultidimArray& real, MultidimArray& complex, unsigned int flags = FFTW_ESTIMATE); fftwf_plan getForward() const { return plan.get()->forward; } fftwf_plan getBackward() const { return plan.get()->backward; } bool isCompatible(const MultidimArray& real) const { return (real.xdim == w && real.ydim == h && real.zdim == d) && (reusable || realPtr == MULTIDIM_ARRAY(real)); } bool isCompatible(const MultidimArray& complex) const { return (complex.xdim == w/2+1 && complex.ydim == h && complex.zdim == d) && (reusable || complexPtr == (float*)MULTIDIM_ARRAY(complex)); } bool isReusable() const { return reusable; } private: class Plan { public: Plan(fftwf_plan forward, fftwf_plan backward) : forward(forward), backward(backward) {} ~Plan() { pthread_mutex_lock(&fftw_plan_mutex_new); fftwf_destroy_plan(forward); fftwf_destroy_plan(backward); pthread_mutex_unlock(&fftw_plan_mutex_new); } fftwf_plan forward, backward; }; bool reusable; int w, h, d; float *realPtr, *complexPtr; std::shared_ptr plan; }; static pthread_mutex_t fftw_plan_mutex_new; }; // This is to get NewFFTPlan::Plan template struct NewFFTPlan {}; template <> struct NewFFTPlan { typedef NewFFT::DoublePlan type; }; template <> struct NewFFTPlan { typedef NewFFT::FloatPlan type; }; #endif relion-3.1.3/src/jaz/noise_helper.cpp000066400000000000000000000562411411340063500175460ustar00rootroot00000000000000/*************************************************************************** * * Author: "Jasenko Zivanov" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #include "src/jaz/noise_helper.h" #include "src/jaz/vtk_helper.h" #include "src/jaz/distribution_helper.h" #include "src/jaz/img_proc/filter_helper.h" #include Image NoiseHelper::predictCCNoise(Projector &prj, double sigma2, double nsamples_ppp, int max_nsamples, int nangles, Image& dmgWeight, CTF ctf0, double defocusMu, double defocusSigma, double angpix, int thread_num) { const int s = prj.ori_size; const int sh = s/2 + 1; const int vbins = 1024; const bool binValues = true; int goodAngles = 0; Image confusion(s,s); confusion.data.initZeros(); while (goodAngles < nangles) { double dx = 2.0*rand()/(double)RAND_MAX - 1.0; double dy = 2.0*rand()/(double)RAND_MAX - 1.0; double dz = 2.0*rand()/(double)RAND_MAX - 1.0; double dd = dx*dx + dy*dy + dz*dz; if (dd > 1.0) continue; goodAngles++; if (goodAngles % 10 == 0) std::cout << goodAngles << "/" << nangles << "\n"; Matrix1D dm(3); VECTOR_R3(dm, dx, dy, dz); RFLOAT rot, tilt; Euler_direction2angles(dm, rot, tilt); Matrix2D A3D; Euler_angles2matrix(rot, tilt, 0.0, A3D); Image spec(sh, s), ccspec(sh, s); spec.data.initZeros(); prj.get2DFourierTransform(spec.data, A3D); double defocus = DistributionHelper::sampleGauss(defocusMu, defocusSigma); ctf0.DeltafU = defocus; ctf0.DeltafV = defocus; ctf0.initialise(); FilterHelper::modulate(spec(), ctf0, angpix); for (long int yy = 0; yy < s; yy++) for (long int xx = 0; xx < sh; xx++) { DIRECT_A2D_ELEM(spec.data, yy, xx) *= sqrt(DIRECT_A2D_ELEM(dmgWeight.data, yy, xx)); DIRECT_A2D_ELEM(ccspec.data, yy, xx) = DIRECT_A2D_ELEM(spec.data, yy, xx).norm(); } Image mu0(s,s), img(s,s); FourierTransformer ft; ft.inverseFourierTransform(ccspec.data, mu0.data); ft.inverseFourierTransform(spec.data, img.data); const Image mu = mu0; double varScale = 0.0; for (long int yy = 0; yy < s; yy++) for (long int xx = 0; xx < s; xx++) { double m = DIRECT_A2D_ELEM(img.data, yy, xx)/(s*s); double mm = m*m; varScale += mm; } const double sigma2CC = sigma2*varScale; const double sigmaCC = sqrt(sigma2CC); RFLOAT vMin = std::numeric_limits::max(); RFLOAT vMax = -std::numeric_limits::max(); for (long int y = 0; y < s; y++) for (long int x = 0; x < s; x++) { double v = DIRECT_A2D_ELEM(mu.data, y, x); if (v > vMax) vMax = v; if (v < vMin) vMin = v; } std::vector plausibleVals(0); std::vector> plausiblePixels(0); plausibleVals.reserve(s*sh); plausiblePixels.reserve(s*sh); for (long int y = 0; y < sh; y++) for (long int x = 0; x < s; x++) { double m = DIRECT_A2D_ELEM(mu.data, y, x); double dm = vMax - m; if (dm <= 6.0*sigmaCC) { plausibleVals.push_back(m); plausiblePixels.push_back(std::make_pair(x,y)); } } const int npp = plausibleVals.size(); if (npp == 1) { std::pair origin = plausiblePixels[0]; DIRECT_A2D_ELEM(confusion.data, origin.second, origin.first) += 1.0; continue; } const int nsamples = std::min((int)(npp*nsamples_ppp), max_nsamples); std::vector hitsPerBin(vbins, 0), ppixelsPerBin(vbins, 0); const double floorBin = vMax - vMin < 6.0*sigmaCC? vMin : vMax - 6.0*sigmaCC; const double binRange = vMax - floorBin; if (binValues) { for (long int y = 0; y < sh; y++) for (long int x = 0; x < s; x++) { double m = DIRECT_A2D_ELEM(mu.data, y, x); double dm = vMax - m; if (dm <= 6.0*sigmaCC) { const int b = (int)((vbins - 1)*(m - floorBin)/binRange); ppixelsPerBin[b]++; } } } const int threads = thread_num; const double max_double = std::numeric_limits::max(); if (threads > 1) { const int stride = 2048; std::vector randThreadStates(threads*stride); for (int i = 0; i < threads; i++) { randThreadStates[stride*i] = rand(); } #pragma omp parallel num_threads(threads) { int threadnum = omp_get_thread_num(); for (int i = 0; i < nsamples/threads + 1; i++) { double vmax = -max_double; int jmax = 0; for (long int j = 0; j < npp; j++) { double m = plausibleVals[j]; double u1 = rand_r(&randThreadStates[stride*threadnum])/(double)RAND_MAX; double u2 = rand_r(&randThreadStates[stride*threadnum])/(double)RAND_MAX; double z = sqrt(-2.0*log(u1)) * cos(2.0*PI*u2); double v = m + sigmaCC * z; if (v > vmax) { vmax = v; jmax = j; } } if (binValues) { const int b = (int)((vbins - 1)*(plausibleVals[jmax] - floorBin)/binRange); #pragma omp atomic hitsPerBin[b]++; } else { int x = plausiblePixels[jmax].first; int y = plausiblePixels[jmax].second; #pragma omp atomic DIRECT_A2D_ELEM(confusion.data, y, x) += 1.0; } } } } else { for (int i = 0; i < nsamples; i++) { double vmax = -max_double; int jmax = 0; for (long int j = 0; j < npp; j++) { double m = plausibleVals[j]; double u1 = rand()/(double)RAND_MAX; double u2 = rand()/(double)RAND_MAX; double z = sqrt(-2.0*log(u1)) * cos(2.0*PI*u2); double v = m + sigmaCC * z; if (v > vmax) { vmax = v; jmax = j; } } if (binValues) { const int b = (int)((vbins - 1)*(plausibleVals[jmax] - floorBin)/binRange); hitsPerBin[b]++; } else { int x = plausiblePixels[jmax].first; int y = plausiblePixels[jmax].second; DIRECT_A2D_ELEM(confusion.data, y, x) += 1.0; } } } if (binValues) { for (long int y = 0; y < sh; y++) for (long int x = 0; x < s; x++) { const double m = DIRECT_A2D_ELEM(mu.data, y, x); if (m < floorBin) continue; const int b = (int)((vbins - 1)*(m - floorBin)/binRange); if (b >= 0 && hitsPerBin[b] > 0) { DIRECT_A2D_ELEM(confusion.data, y, x) += hitsPerBin[b] / (double) ppixelsPerBin[b]; } } } } return confusion; } std::vector NoiseHelper::radialAverage(Image &map, bool half) { const int w = map.data.xdim; const int h = map.data.ydim; const int b = (int)((w+h)/4); std::vector out(b, 0.0); std::vector wgh(b, 0.0); const int ha = half? h/2 : h; for (int yy = 0; yy < ha; yy++) for (int xx = 0; xx < w; xx++) { double x = xx < w/2.0? xx : xx - w; double y = yy < h/2.0? yy : yy - h; double rd = sqrt(x*x + y*y); int r0 = (int)(rd); double a = rd - r0; if (r0 < b) { out[r0] += (1.0 - a) * DIRECT_A2D_ELEM(map.data, yy, xx); wgh[r0] += 1.0 - a; } if (r0 < b-1) { out[r0+1] += a * DIRECT_A2D_ELEM(map.data, yy, xx); wgh[r0+1] += a; } } for (int i = 0; i < b; i++) { if (wgh[i] > 0.0) { out[i] /= wgh[i]; } } return out; } Image NoiseHelper::radialMap(std::vector &radAvg, bool centered) { const int b = radAvg.size(); const int s = 2*b; Image out(s,s); for (int yy = 0; yy < s; yy++) for (int xx = 0; xx < s; xx++) { double x, y; if (centered) { x = xx - s/2; y = yy - s/2; } else { x = xx < s/2? xx : xx - s; y = yy < s/2? yy : yy - s; } double rd = sqrt(x*x + y*y); int r0 = (int)rd; double a = rd - r0; if (r0 < b-1) { DIRECT_A2D_ELEM(out.data, yy, xx) = a * radAvg[r0+1] + (1-a) * radAvg[r0]; } else { DIRECT_A2D_ELEM(out.data, yy, xx) = radAvg[b-1]; } } return out; } std::vector NoiseHelper::radialAverage(Image &map, bool skipAxes) { const int w = map.data.xdim; const int h = map.data.ydim; const int b = w; std::vector out(b, 0.0); std::vector wgh(b, 0.0); for (int yy = 0; yy < h; yy++) for (int xx = 0; xx < w; xx++) { if (skipAxes && (xx == 0 || yy == 0)) continue; double x = xx; double y = yy < h/2.0? yy : yy - h; double rd = sqrt(x*x + y*y); int r0 = (int)(rd); double a = rd - r0; if (r0 < b) { out[r0] += (1.0 - a) * DIRECT_A2D_ELEM(map.data, yy, xx); wgh[r0] += 1.0 - a; } if (r0 < b-1) { out[r0+1] += a * DIRECT_A2D_ELEM(map.data, yy, xx); wgh[r0+1] += a; } } for (int i = 0; i < b; i++) { if (wgh[i] > 0.0) { out[i] /= wgh[i]; } } return out; } Image NoiseHelper::radialMap(std::vector &radAvg) { const int b = radAvg.size(); const int s = 2*b - 2; Image out(b,s); for (int yy = 0; yy < s; yy++) for (int xx = 0; xx < b; xx++) { double x = xx; double y = yy < s/2? yy : yy - s; double rd = sqrt(x*x + y*y); int r0 = (int)rd; double a = rd - r0; if (r0 < b-1) { DIRECT_A2D_ELEM(out.data, yy, xx) = a * radAvg[r0+1] + (1-a) * radAvg[r0]; } else { DIRECT_A2D_ELEM(out.data, yy, xx) = radAvg[b-1]; } } return out; } std::vector> NoiseHelper::radialAverageAndStdDevFFTW(Image &map) { const int w = map.data.xdim; const int h = map.data.ydim; const int b = w; std::vector avg(b, 0.0); std::vector wgh(b, 0.0); std::vector var(b, 0.0); for (int yy = 0; yy < h; yy++) for (int xx = 0; xx < w; xx++) { double x = xx; double y = yy < h/2.0? yy : yy - h; double rd = sqrt(x*x + y*y); int r = (int)(rd+0.5); if (r < b) { avg[r] += DIRECT_A2D_ELEM(map.data, yy, xx); wgh[r] += 1.0; } } for (int i = 0; i < b; i++) { if (wgh[i] > 0.0) { avg[i] /= wgh[i]; } } for (int yy = 0; yy < h; yy++) for (int xx = 0; xx < w; xx++) { double x = xx; double y = yy < h/2.0? yy : yy - h; double rd = sqrt(x*x + y*y); int r = (int)(rd+0.5); double mu = avg[r]; double v = DIRECT_A2D_ELEM(map.data, yy, xx) - mu; if (r < b) { var[r] += v*v; } } for (int i = 0; i < b; i++) { if (wgh[i] > 1.0) { var[i] /= (wgh[i]-1); } } std::vector> out(b); for (int i = 0; i < b; i++) { out[i] = std::make_pair(avg[i], sqrt(var[i])); } return out; } std::vector NoiseHelper::radialWeight(int w, int h, bool half) { const int b = (int)((w+h)/4); std::vector wgh(b, 0.0); const int ha = half? h/2 : h; for (int yy = 0; yy < ha; yy++) for (int xx = 0; xx < w; xx++) { double x = xx < w/2.0? xx : xx - w; double y = yy < h/2.0? yy : yy - h; double rd = sqrt(x*x + y*y); int r = (int)(rd + 0.5); if (r < b) { wgh[r] += 1.0; } } return wgh; } std::vector NoiseHelper::fill(Image& confusion, double lambda, int iterations) { const int s0 = confusion.data.xdim; std::vector ra = radialAverage(confusion, true); std::vector rw = radialWeight(s0, s0, true); const int s = ra.size(); std::vector w(s); for (int x = 0; x < s; x++) { w[x] = rw[x] * rw[x] * ra[x] * ra[x] + 1.0; } std::vector v = ra, vn = ra; for (int it = 0; it < iterations; it++) { for (int x = 1; x < s-1; x++) { vn[x] = (lambda*0.5*(v[x+1] + v[x-1]) + w[x]*ra[x])/(lambda + w[x]); } vn[s-1] = (lambda*0.5*v[s-2] + w[s-1]*ra[s-1])/(lambda*0.5 + w[s-1]); for (int x = 0; x < s; x++) { v[x] = vn[x]; } } return v; } Image NoiseHelper::normalize(const Image& confusion) { const int w = confusion.data.xdim; const int h = confusion.data.ydim; double sum = 0.0; for (int y = 0; y < h; y++) for (int x = 0; x < w; x++) { sum += DIRECT_A2D_ELEM(confusion.data, y, x); } Image out(w,h); if (sum > 0.0) { for (int y = 0; y < h; y++) for (int x = 0; x < w; x++) { DIRECT_A2D_ELEM(out.data, y, x) = DIRECT_A2D_ELEM(confusion.data, y, x) / sum; } } else { out.data.initZeros(); } return out; } void NoiseHelper::testVariance(Image img) { const int s = img.data.xdim; const int sh = s/2 + 1; Image spec(sh, s), ccspec(sh, s); spec.data.initZeros(); FourierTransformer ft; ft.FourierTransform(img(), spec()); for (long int yy = 0; yy < s; yy++) for (long int xx = 0; xx < sh; xx++) { DIRECT_A2D_ELEM(ccspec.data, yy, xx) = DIRECT_A2D_ELEM(spec.data, yy, xx).norm(); } Image mu(s,s); ft.inverseFourierTransform(ccspec.data, mu.data); double varScale = 0.0; for (long int yy = 0; yy < s; yy++) for (long int xx = 0; xx < s; xx++) { double m = DIRECT_A2D_ELEM(img.data, yy, xx)/(s*s); double mm = m*m; varScale += mm; } Image imgD = img; const double sig2 = 2.0; const double sig = sqrt(sig2); Image varImg = img; varImg.data.initZeros(); Image ccD(s,s); Image imgDs(sh,s), ccDs(sh,s); const int N = 10000; for (int i = 0; i < N; i++) { if (i%10==0) std::cout << i << "\n"; for (long int yy = 0; yy < s; yy++) for (long int xx = 0; xx < s; xx++) { double v = DIRECT_A2D_ELEM(img.data, yy, xx); DIRECT_A2D_ELEM(imgD.data, yy, xx) = DistributionHelper::sampleGauss(v, sig); } ft.FourierTransform(imgD(), imgDs()); for (long int yy = 0; yy < s; yy++) for (long int xx = 0; xx < sh; xx++) { DIRECT_A2D_ELEM(ccDs.data, yy, xx) = DIRECT_A2D_ELEM(spec.data, yy, xx) * DIRECT_A2D_ELEM(imgDs.data, yy, xx).conj(); } ft.inverseFourierTransform(ccDs.data, ccD.data); for (long int yy = 0; yy < s; yy++) for (long int xx = 0; xx < s; xx++) { double m0 = DIRECT_A2D_ELEM(mu.data, yy, xx); double md = DIRECT_A2D_ELEM(ccD.data, yy, xx); double d = md - m0; DIRECT_A2D_ELEM(varImg.data, yy, xx) += d*d/N; } } double varSum = 0.0; for (long int yy = 0; yy < s; yy++) for (long int xx = 0; xx < s; xx++) { varSum += DIRECT_A2D_ELEM(varImg.data, yy, xx); } varSum /= (s*s); std::cout << varSum << " vs. " << sig2*varScale << "\n"; } void NoiseHelper::testColorVariance(Image img, std::vector sig2) { const int s = img.data.xdim; const int sh = s/2 + 1; Image spec(sh, s); FourierTransformer ft; ft.FourierTransform(img(), spec()); VtkHelper::writeVTK(spec, "debug/spec.vtk"); for (long int y = 0; y < s; y++) for (long int x = 0; x < sh; x++) { if (x == 0 && y == 0) continue; const double yy = y < sh? y : y - s; const double xx = x; const int r = (int) sqrt(xx*xx + yy*yy); if (r >= sh) DIRECT_A2D_ELEM(spec.data, y, x) = Complex(0.0, 0.0); } double varPred = 0.0; for (long int y = 0; y < s; y++) for (long int x = 0; x < sh; x++) { if (x == 0 && y == 0) continue; const double yy = y < sh? y : y - s; const double xx = x; const int r = (int) sqrt(xx*xx + yy*yy); if (r < sh && r > 0) { double a = DIRECT_A2D_ELEM(spec.data, y, x).norm() / sig2[r]; if (x == 0) { varPred += a; } else { varPred += 2.0 * a; } } } //varPred *= s*s; std::cout << "pred: " << varPred << "\n"; std::vector sig(sh); for (int i = 0; i < sh; i++) { sig[i] = sqrt(sig2[i]); } Image varImg = img; varImg.data.initZeros(); Image ccD(s,s); Image ccDs(sh,s); const int N = 10000; const double sqrtH = sqrt(0.5); double varTest = 0.0; for (int i = 0; i < N; i++) { if (i%100==0) std::cout << i << "\n"; for (long int y = 0; y < s; y++) for (long int x = 0; x < sh; x++) { const double yy = y < sh? y : y - s; const double xx = x; const int r = (int) sqrt(xx*xx + yy*yy); if (r < sh && r > 0) { Complex z0 = DIRECT_A2D_ELEM(spec.data, y, x); double r0 = DistributionHelper::sampleGauss(0,sqrtH); double r1 = DistributionHelper::sampleGauss(0,sqrtH); Complex z1 = Complex(r0,r1) * z0 / sig[r]; DIRECT_A2D_ELEM(ccDs.data, y, x) = z1; if (x == 0 && y >= sh) { DIRECT_A2D_ELEM(ccDs.data, y, x) = DIRECT_A2D_ELEM(ccDs.data, s-y, x).conj(); } if (x == 0) { varTest += DIRECT_A2D_ELEM(ccDs.data, y, x).norm(); } else { varTest += 2.0 * DIRECT_A2D_ELEM(ccDs.data, y, x).norm(); } } else { DIRECT_A2D_ELEM(ccDs.data, y, x) = 0.0; } } ft.inverseFourierTransform(ccDs.data, ccD.data); for (long int y = 0; y < s; y++) for (long int x = 0; x < s; x++) { double d = DIRECT_A2D_ELEM(ccD.data, y, x); DIRECT_A2D_ELEM(varImg.data, y, x) += d*d/(N*varPred); } } varTest /= N; VtkHelper::writeVTK(varImg, "debug/varImg_cn.vtk"); double varSum = 0.0; for (long int yy = 0; yy < s; yy++) for (long int xx = 0; xx < s; xx++) { varSum += DIRECT_A2D_ELEM(varImg.data, yy, xx); } varSum /= (s*s); std::cout << varSum << " @ " << varPred << " vs. " << varTest << "\n"; } void NoiseHelper::testParseval() { const int s = 512; const int sh = s/2+1; Image real(s,s); Image freq(sh,s); for (int i = 0; i < 10; i++) { double varr = 0.0; for (int y = 0; y < s; y++) for (int x = 0; x < s; x++) { DIRECT_A2D_ELEM(real.data, y, x) = DistributionHelper::sampleGauss(0,2); double v = DIRECT_A2D_ELEM(real.data, y, x); varr += v*v; } varr /= s*s; FourierTransformer ft; ft.FourierTransform(real(), freq()); double var = 0.0; for (int y = 0; y < s; y++) for (int x = 0; x < sh; x++) { const Complex z = DIRECT_A2D_ELEM(freq.data, y, x); if (x == 0) { var += z.norm(); } else { var += 2*z.norm(); } } var /= s*s; std::cout << varr << " vs. " << var << " (" << var*((double)(s*s)) << ")\n"; // varr = varf*A } std::cout << "\n"; for (int i = 0; i < 10; i++) { double varf = 0.0; for (int y = 0; y < s; y++) for (int x = 0; x < sh; x++) { DIRECT_A2D_ELEM(freq.data, y, x).real = DistributionHelper::sampleGauss(0,sqrt(2.0)); if (x > 0 && x < sh-1) DIRECT_A2D_ELEM(freq.data, y, x).imag = DistributionHelper::sampleGauss(0,sqrt(2.0)); else DIRECT_A2D_ELEM(freq.data, y, x).imag = 0.0; if (x == 0) { varf += DIRECT_A2D_ELEM(freq.data, y, x).norm(); } else { varf += 2.0 * DIRECT_A2D_ELEM(freq.data, y, x).norm(); } } varf /= s*s; FourierTransformer ft; ft.inverseFourierTransform(freq(), real()); double var = 0.0; for (int y = 0; y < s; y++) for (int x = 0; x < s; x++) { double v = DIRECT_A2D_ELEM(real.data, y, x); var += v*v; } var /= s*s; std::cout << varf << " vs. " << var << " (" << var/((double)(s*s)) << ")\n"; // varr/A = varf } } relion-3.1.3/src/jaz/noise_helper.h000066400000000000000000000043141411340063500172050ustar00rootroot00000000000000/*************************************************************************** * * Author: "Jasenko Zivanov" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #ifndef NOISE_HELPER #define NOISE_HELPER #include #include "src/projector.h" #include "src/ctf.h" class NoiseHelper { public: static Image predictCCNoise(Projector& prj, double sigma2, double nsamples_ppp, int max_nsamples, int nangles, Image &dmgWeight, CTF ctf0, double defocusMu, double defocusSigma, double angpix, int thread_num = 1); static Image visualize(std::vector); static std::vector radialAverage(Image& map, bool half); static Image radialMap(std::vector& radAvg, bool centered); static std::vector radialAverage(Image& map, bool skipAxes); static Image radialMap(std::vector& radAvg); static std::vector> radialAverageAndStdDevFFTW(Image& map); static std::vector radialWeight(int w, int h, bool half); static std::vector fill(Image& confusion, double lambda, int iterations); static Image normalize(const Image &confusion); static void testVariance(Image img); static void testColorVariance(Image img, std::vector sig2); static void testParseval(); }; #endif relion-3.1.3/src/jaz/obs_model.cpp000066400000000000000000000776301411340063500170420ustar00rootroot00000000000000/*************************************************************************** * * Author: "Jasenko Zivanov" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #include "src/jaz/obs_model.h" #include "src/jaz/stack_helper.h" #include "src/jaz/img_proc/filter_helper.h" #include "src/jaz/Fourier_helper.h" #include "src/jaz/ctf/tilt_helper.h" #include "src/jaz/math/Zernike.h" #include "src/jaz/vtk_helper.h" #include "src/jaz/io/star_converter.h" #include #include #include using namespace gravis; void ObservationModel::loadSafely(std::string filename, ObservationModel& obsModel, MetaDataTable& particlesMdt, std::string tablename, int verb, bool do_die_upon_error) { MetaDataTable opticsMdt; std::string mytablename; if (tablename == "discover") { if (particlesMdt.read(filename, "particles")) { mytablename = "particles"; } else if (particlesMdt.read(filename, "micrographs")) { mytablename = "micrographs"; } else if (particlesMdt.read(filename, "movies")) { mytablename = "movies"; } } else { particlesMdt.read(filename, tablename); mytablename = tablename; } opticsMdt.read(filename, "optics"); if (opticsMdt.numberOfObjects() == 0) { if (verb > 0) { std::cerr << "WARNING: " << filename << " seems to be from a previous version of Relion. Attempting conversion...\n"; std::cerr << " You should make sure metadata in the optics group table after conversion is correct.\n"; } MetaDataTable oldMdt; oldMdt.read(filename); StarConverter::convert_3p0_particlesTo_3p1(oldMdt, particlesMdt, opticsMdt, mytablename, do_die_upon_error); if (!do_die_upon_error && opticsMdt.numberOfObjects() == 0) return; // return an empty optics table if error was raised if (mytablename == "" || mytablename == "discover") { if (particlesMdt.containsLabel(EMDL_IMAGE_NAME)) particlesMdt.setName("particles"); else if (particlesMdt.containsLabel(EMDL_MICROGRAPH_MOVIE_NAME)) particlesMdt.setName("movies"); else particlesMdt.setName("micrographs"); } } obsModel = ObservationModel(opticsMdt, do_die_upon_error); if (!do_die_upon_error && obsModel.opticsMdt.numberOfObjects() == 0) return; // return an empty optics table if error was raised // make sure all optics groups are defined std::vector undefinedOptGroups = obsModel.findUndefinedOptGroups(particlesMdt); if (undefinedOptGroups.size() > 0) { std::stringstream sts; for (int i = 0; i < undefinedOptGroups.size(); i++) { sts << undefinedOptGroups[i]; if (i < undefinedOptGroups.size()-1) { sts << ", "; } } REPORT_ERROR("ERROR: The following optics groups were not defined in "+ filename + ": " + sts.str()); } // make sure the optics groups appear in the right order (and rename them if necessary) if (!obsModel.opticsGroupsSorted()) { if (verb > 0) { std::cerr << " - Warning: the optics groups in " << filename << " are not in the right order - renaming them now" << std::endl; } obsModel.sortOpticsGroups(particlesMdt); } if (mytablename != "particles" && obsModel.opticsMdt.containsLabel(EMDL_IMAGE_PIXEL_SIZE)) { std::cerr << "WARNING: This is not a particle STAR file but contains rlnImagePixelSize column." << std::endl; if (!obsModel.opticsMdt.containsLabel(EMDL_MICROGRAPH_PIXEL_SIZE)) { std::cerr << "Pixel size in rlnImagePixelSize will be copied to rlnMicrographPixelSize column. Please make sure this is correct!" << std::endl; FOR_ALL_OBJECTS_IN_METADATA_TABLE(obsModel.opticsMdt) { RFLOAT image_angpix; obsModel.opticsMdt.getValue(EMDL_IMAGE_PIXEL_SIZE, image_angpix); obsModel.opticsMdt.setValue(EMDL_MICROGRAPH_PIXEL_SIZE, image_angpix); } } } } void ObservationModel::saveNew( MetaDataTable &particlesMdt, MetaDataTable &opticsMdt, std::string filename, std::string tablename) { std::string tmpfilename = filename + ".tmp"; std::ofstream of(tmpfilename); opticsMdt.setName("optics"); opticsMdt.write(of); particlesMdt.setName(tablename); particlesMdt.write(of); std::rename(tmpfilename.c_str(), filename.c_str()); } void ObservationModel::save(MetaDataTable &particlesMdt, std::string filename, std::string tablename) { std::string tmpfilename = filename + ".tmp"; std::ofstream of(tmpfilename); opticsMdt.setName("optics"); opticsMdt.write(of); particlesMdt.setName(tablename); particlesMdt.write(of); std::rename(tmpfilename.c_str(), filename.c_str()); } ObservationModel::ObservationModel() { } ObservationModel::ObservationModel(const MetaDataTable &_opticsMdt, bool do_die_upon_error) : opticsMdt(_opticsMdt), angpix(_opticsMdt.numberOfObjects()), lambda(_opticsMdt.numberOfObjects()), Cs(_opticsMdt.numberOfObjects()), boxSizes(_opticsMdt.numberOfObjects(), 0.0), CtfPremultiplied(_opticsMdt.numberOfObjects(), false) { if (!(opticsMdt.containsLabel(EMDL_IMAGE_PIXEL_SIZE) || opticsMdt.containsLabel(EMDL_MICROGRAPH_PIXEL_SIZE) || opticsMdt.containsLabel(EMDL_MICROGRAPH_ORIGINAL_PIXEL_SIZE)) || !opticsMdt.containsLabel(EMDL_CTF_VOLTAGE) || !opticsMdt.containsLabel(EMDL_CTF_CS)) { if (do_die_upon_error) { REPORT_ERROR_STR("ERROR: not all necessary variables defined in _optics.star file: " << "rlnPixelSize, rlnVoltage and rlnSphericalAberration. Make sure to convert older STAR files anew in version-3.1, " << "with relion_convert_star."); } else { opticsMdt.clear(); return; } } // symmetrical high-order aberrations: hasEvenZernike = opticsMdt.containsLabel(EMDL_IMAGE_EVEN_ZERNIKE_COEFFS); evenZernikeCoeffs = std::vector >(opticsMdt.numberOfObjects(), std::vector(0)); gammaOffset = std::vector > >(opticsMdt.numberOfObjects()); // antisymmetrical high-order aberrations: hasOddZernike = opticsMdt.containsLabel(EMDL_IMAGE_ODD_ZERNIKE_COEFFS); oddZernikeCoeffs = std::vector >(opticsMdt.numberOfObjects(), std::vector(0)); phaseCorr = std::vector > >(opticsMdt.numberOfObjects()); const bool hasTilt = opticsMdt.containsLabel(EMDL_IMAGE_BEAMTILT_X) || opticsMdt.containsLabel(EMDL_IMAGE_BEAMTILT_Y); // anisotropic magnification: hasMagMatrices = opticsMdt.containsLabel(EMDL_IMAGE_MAG_MATRIX_00) || opticsMdt.containsLabel(EMDL_IMAGE_MAG_MATRIX_01) || opticsMdt.containsLabel(EMDL_IMAGE_MAG_MATRIX_10) || opticsMdt.containsLabel(EMDL_IMAGE_MAG_MATRIX_11); magMatrices.resize(opticsMdt.numberOfObjects()); hasBoxSizes = opticsMdt.containsLabel(EMDL_IMAGE_SIZE); if (opticsMdt.containsLabel(EMDL_IMAGE_OPTICS_GROUP_NAME)) { groupNames.resize(opticsMdt.numberOfObjects()); } if (opticsMdt.containsLabel(EMDL_IMAGE_MTF_FILENAME)) { fnMtfs.resize(opticsMdt.numberOfObjects()); mtfImage = std::vector > >(opticsMdt.numberOfObjects()); } if (opticsMdt.containsLabel(EMDL_MICROGRAPH_ORIGINAL_PIXEL_SIZE)) { originalAngpix.resize(opticsMdt.numberOfObjects()); } for (int i = 0; i < opticsMdt.numberOfObjects(); i++) { if (!opticsMdt.getValue(EMDL_IMAGE_PIXEL_SIZE, angpix[i], i)) if (!opticsMdt.getValue(EMDL_MICROGRAPH_PIXEL_SIZE, angpix[i], i)) opticsMdt.getValue(EMDL_MICROGRAPH_ORIGINAL_PIXEL_SIZE, angpix[i], i); if (opticsMdt.containsLabel(EMDL_IMAGE_OPTICS_GROUP_NAME)) opticsMdt.getValue(EMDL_IMAGE_OPTICS_GROUP_NAME, groupNames[i], i); if (opticsMdt.containsLabel(EMDL_IMAGE_MTF_FILENAME)) opticsMdt.getValue(EMDL_IMAGE_MTF_FILENAME, fnMtfs[i], i); if (opticsMdt.containsLabel(EMDL_MICROGRAPH_ORIGINAL_PIXEL_SIZE)) opticsMdt.getValue(EMDL_MICROGRAPH_ORIGINAL_PIXEL_SIZE, originalAngpix[i], i); if (opticsMdt.containsLabel(EMDL_OPTIMISER_DATA_ARE_CTF_PREMULTIPLIED)) { bool val; opticsMdt.getValue(EMDL_OPTIMISER_DATA_ARE_CTF_PREMULTIPLIED, val, i); CtfPremultiplied[i] = val; } opticsMdt.getValue(EMDL_IMAGE_SIZE, boxSizes[i], i); double kV; opticsMdt.getValue(EMDL_CTF_VOLTAGE, kV, i); double V = kV * 1e3; lambda[i] = 12.2643247 / sqrt(V * (1.0 + V * 0.978466e-6)); opticsMdt.getValue(EMDL_CTF_CS, Cs[i], i); if (hasEvenZernike) { opticsMdt.getValue(EMDL_IMAGE_EVEN_ZERNIKE_COEFFS, evenZernikeCoeffs[i], i); } if (hasOddZernike) { opticsMdt.getValue(EMDL_IMAGE_ODD_ZERNIKE_COEFFS, oddZernikeCoeffs[i], i); } if (hasTilt) { double tx(0), ty(0); opticsMdt.getValue(EMDL_IMAGE_BEAMTILT_X, tx, i); opticsMdt.getValue(EMDL_IMAGE_BEAMTILT_Y, ty, i); if (!hasOddZernike) { oddZernikeCoeffs[i] = std::vector(6, 0.0); } TiltHelper::insertTilt(oddZernikeCoeffs[i], tx, ty, Cs[i], lambda[i]); } // always keep a set of mag matrices // if none are defined, keep a set of identity matrices magMatrices[i] = Matrix2D(2,2); magMatrices[i].initIdentity(); // See if there is more than one MTF, for more rapid divideByMtf hasMultipleMtfs = false; for (int j = 1; j < fnMtfs.size(); j++) { if (fnMtfs[j] != fnMtfs[0]) { hasMultipleMtfs = true; break; } } if (hasMagMatrices) { opticsMdt.getValue(EMDL_IMAGE_MAG_MATRIX_00, magMatrices[i](0,0), i); opticsMdt.getValue(EMDL_IMAGE_MAG_MATRIX_01, magMatrices[i](0,1), i); opticsMdt.getValue(EMDL_IMAGE_MAG_MATRIX_10, magMatrices[i](1,0), i); opticsMdt.getValue(EMDL_IMAGE_MAG_MATRIX_11, magMatrices[i](1,1), i); } } if (hasTilt) hasOddZernike = true; } void ObservationModel::predictObservation(Projector& proj, const MetaDataTable& partMdt, long int particle, MultidimArray& dest, double angpix_ref, bool applyCtf, bool shiftPhases, bool applyShift, bool applyMtf, bool applyCtfPadding) { const int s_ref = proj.ori_size; int opticsGroup; partMdt.getValue(EMDL_IMAGE_OPTICS_GROUP, opticsGroup, particle); opticsGroup--; if (!hasBoxSizes) { REPORT_ERROR_STR("ObservationModel::predictObservation: Unable to make a prediction without knowing the box size.\n"); } const int s_out = boxSizes[opticsGroup]; const int sh_out = s_out/2 + 1; double xoff, yoff; partMdt.getValue(EMDL_ORIENT_ORIGIN_X_ANGSTROM, xoff, particle); partMdt.getValue(EMDL_ORIENT_ORIGIN_Y_ANGSTROM, yoff, particle); xoff /= angpix[opticsGroup]; yoff /= angpix[opticsGroup]; double rot, tilt, psi; Matrix2D A3D; partMdt.getValue(EMDL_ORIENT_ROT, rot, particle); partMdt.getValue(EMDL_ORIENT_TILT, tilt, particle); partMdt.getValue(EMDL_ORIENT_PSI, psi, particle); Euler_angles2matrix(rot, tilt, psi, A3D); A3D = applyAnisoMag(A3D, opticsGroup); A3D = applyScaleDifference(A3D, opticsGroup, s_ref, angpix_ref); if (dest.xdim != sh_out || dest.ydim != s_out) { dest.resize(s_out,sh_out); } dest.initZeros(); proj.get2DFourierTransform(dest, A3D); if (applyShift) { shiftImageInFourierTransform(dest, dest, s_out, s_out/2 - xoff, s_out/2 - yoff); } if (applyCtf) { CTF ctf; ctf.readByGroup(partMdt, this, particle); Image ctfImg(sh_out,s_out); ctf.getFftwImage(ctfImg(), s_out, s_out, angpix[opticsGroup], false, false, false, true, applyCtfPadding); if (getCtfPremultiplied(opticsGroup)) { for (int y = 0; y < s_out; y++) for (int x = 0; x < sh_out; x++) { dest(y,x) *= ctfImg(y,x) * ctfImg(y,x); } } else { for (int y = 0; y < s_out; y++) for (int x = 0; x < sh_out; x++) { dest(y,x) *= ctfImg(y,x); } } } if (shiftPhases && oddZernikeCoeffs.size() > opticsGroup && oddZernikeCoeffs[opticsGroup].size() > 0) { const Image& corr = getPhaseCorrection(opticsGroup, s_out); for (int y = 0; y < s_out; y++) for (int x = 0; x < sh_out; x++) { dest(y,x) *= corr(y,x); } } if (applyMtf && fnMtfs.size() > opticsGroup) { const Image& mtf = getMtfImage(opticsGroup, s_out); for (int y = 0; y < s_out; y++) for (int x = 0; x < sh_out; x++) { dest(y,x) *= mtf(y,x); } } } Volume> ObservationModel::predictComplexGradient(Projector &proj, const MetaDataTable &partMdt, long particle, double angpix_ref, bool applyCtf, bool shiftPhases, bool applyShift, bool applyMtf, bool applyCtfPadding) { if (applyCtf || applyShift || applyCtfPadding) { REPORT_ERROR_STR("ObservationModel::predictComplexGradient: " << "applyCtf and applyShift and applyCtfPadding are currently not supported\n"); } const int s_ref = proj.ori_size; int opticsGroup; partMdt.getValue(EMDL_IMAGE_OPTICS_GROUP, opticsGroup, particle); opticsGroup--; const int s_out = boxSizes[opticsGroup]; const int sh_out = s_out/2 + 1; Volume> out(sh_out,s_out,1); double xoff, yoff; partMdt.getValue(EMDL_ORIENT_ORIGIN_X_ANGSTROM, xoff, particle); partMdt.getValue(EMDL_ORIENT_ORIGIN_Y_ANGSTROM, yoff, particle); xoff /= angpix[opticsGroup]; yoff /= angpix[opticsGroup]; double rot, tilt, psi; Matrix2D A3D; partMdt.getValue(EMDL_ORIENT_ROT, rot, particle); partMdt.getValue(EMDL_ORIENT_TILT, tilt, particle); partMdt.getValue(EMDL_ORIENT_PSI, psi, particle); Euler_angles2matrix(rot, tilt, psi, A3D); A3D = applyAnisoMag(A3D, opticsGroup); A3D = applyScaleDifference(A3D, opticsGroup, s_ref, angpix_ref); proj.projectGradient(out, A3D); if (shiftPhases && oddZernikeCoeffs.size() > opticsGroup && oddZernikeCoeffs[opticsGroup].size() > 0) { const Image& corr = getPhaseCorrection(opticsGroup, s_out); for (int y = 0; y < s_out; y++) for (int x = 0; x < sh_out; x++) { out(x,y,0).x *= corr(y,x); out(x,y,0).y *= corr(y,x); } } if (applyMtf && fnMtfs.size() > opticsGroup) { const Image& mtf = getMtfImage(opticsGroup, s_out); for (int y = 0; y < s_out; y++) for (int x = 0; x < sh_out; x++) { out(x,y,0).x *= mtf(y,x); out(x,y,0).y *= mtf(y,x); } } return out; } void ObservationModel::divideByMtf(const MetaDataTable& partMdt, long particle, MultidimArray& obsImage, bool do_multiply_instead, bool do_correct_average_mtf) { int opticsGroup; partMdt.getValue(EMDL_IMAGE_OPTICS_GROUP, opticsGroup, particle); opticsGroup--; divideByMtf(opticsGroup, obsImage, do_multiply_instead, do_correct_average_mtf); } void ObservationModel::divideByMtf(int opticsGroup, MultidimArray& obsImage, bool do_multiply_instead, bool do_correct_average_mtf) { const int s = obsImage.ydim; const int sh = obsImage.xdim; // If there is only a single MTF and we are correcting for the average, then do nothing... if (do_correct_average_mtf && !hasMultipleMtfs) return; if (fnMtfs.size() > opticsGroup) { const Image& mtf = getMtfImage(opticsGroup, s); const Image& avgmtf = getAverageMtfImage(s); if (do_multiply_instead) { if (do_correct_average_mtf) { for (int y = 0; y < s; y++) for (int x = 0; x < sh; x++) { obsImage(y,x) *= mtf(y,x); obsImage(y,x) /= avgmtf(y,x); } } else { for (int y = 0; y < s; y++) for (int x = 0; x < sh; x++) { obsImage(y,x) *= mtf(y,x); } } } else { if (do_correct_average_mtf) { for (int y = 0; y < s; y++) for (int x = 0; x < sh; x++) { obsImage(y,x) /= mtf(y,x); obsImage(y,x) *= avgmtf(y,x); } } else { for (int y = 0; y < s; y++) for (int x = 0; x < sh; x++) { obsImage(y,x) /= mtf(y,x); } } } } } void ObservationModel::demodulatePhase(const MetaDataTable& partMdt, long particle, MultidimArray& obsImage, bool do_modulate_instead) { int opticsGroup; partMdt.getValue(EMDL_IMAGE_OPTICS_GROUP, opticsGroup, particle); opticsGroup--; demodulatePhase(opticsGroup, obsImage, do_modulate_instead); } void ObservationModel::demodulatePhase(int opticsGroup, MultidimArray& obsImage, bool do_modulate_instead) { const int s = obsImage.ydim; const int sh = obsImage.xdim; if (oddZernikeCoeffs.size() > opticsGroup && oddZernikeCoeffs[opticsGroup].size() > 0) { const Image& corr = getPhaseCorrection(opticsGroup, s); if (do_modulate_instead) { for (int y = 0; y < s; y++) for (int x = 0; x < sh; x++) { obsImage(y,x) *= corr(y,x); } } else { for (int y = 0; y < s; y++) for (int x = 0; x < sh; x++) { obsImage(y,x) *= corr(y,x).conj(); } } } } bool ObservationModel::allPixelSizesIdentical() const { bool out = true; for (int i = 1; i < angpix.size(); i++) { if (angpix[i] != angpix[0]) { out = false; break; } } return out; } bool ObservationModel::allBoxSizesIdentical() const { bool out = true; for (int i = 1; i < boxSizes.size(); i++) { if (boxSizes[i] != boxSizes[0]) { out = false; break; } } return out; } double ObservationModel::angToPix(double a, int s, int opticsGroup) const { return s * angpix[opticsGroup] / a; } double ObservationModel::pixToAng(double p, int s, int opticsGroup) const { return s * angpix[opticsGroup] / p; } void ObservationModel::setPixelSize(int opticsGroup, RFLOAT newPixelSize) { if (opticsGroup < 0 || opticsGroup >= boxSizes.size()) { REPORT_ERROR("ObservationModel::setPixelSize: wrong opticsGroup"); } angpix[opticsGroup] = newPixelSize; phaseCorr[opticsGroup].clear(); gammaOffset[opticsGroup].clear(); // mtfImage can be empty if (mtfImage.size() > 0) mtfImage[opticsGroup].clear(); } double ObservationModel::getPixelSize(int opticsGroup) const { return angpix[opticsGroup]; } std::vector ObservationModel::getPixelSizes() const { return angpix; } double ObservationModel::getWavelength(int opticsGroup) const { return lambda[opticsGroup]; } std::vector ObservationModel::getWavelengths() const { return lambda; } double ObservationModel::getSphericalAberration(int opticsGroup) const { return Cs[opticsGroup]; } std::vector ObservationModel::getSphericalAberrations() const { return Cs; } void ObservationModel::setBoxSize(int opticsGroup, int newBoxSize) { if (opticsGroup < 0 || opticsGroup >= boxSizes.size()) { REPORT_ERROR("ObservationModel::setBoxSize: wrong opticsGroup"); } boxSizes[opticsGroup] = newBoxSize; phaseCorr[opticsGroup].clear(); gammaOffset[opticsGroup].clear(); // mtfImage can be empty if (mtfImage.size() > 0) mtfImage[opticsGroup].clear(); } int ObservationModel::getBoxSize(int opticsGroup) const { if (!hasBoxSizes) { REPORT_ERROR("ObservationModel::getBoxSize: box sizes not available. Make sure particle images are available before converting/importing STAR files from earlier versions of RELION.\n"); } return boxSizes[opticsGroup]; } void ObservationModel::getBoxSizes(std::vector& sDest, std::vector& shDest) const { if (!hasBoxSizes) { REPORT_ERROR("ObservationModel::getBoxSizes: box sizes not available. Make sure particle images are available before converting/importing STAR files from earlier versions of RELION.\n"); } sDest.resize(boxSizes.size()); shDest.resize(boxSizes.size()); for (int i = 0; i < boxSizes.size(); i++) { sDest[i] = boxSizes[i]; shDest[i] = boxSizes[i]/2 + 1; } } Matrix2D ObservationModel::getMagMatrix(int opticsGroup) const { return magMatrices[opticsGroup]; } void ObservationModel::setMagMatrix(int opticsGroup, const Matrix2D &M) { magMatrices[opticsGroup] = M; } std::vector > ObservationModel::getMagMatrices() const { return magMatrices; } int ObservationModel::getOpticsGroup(const MetaDataTable &particlesMdt, long int particle) const { int opticsGroup = -1; if (!particlesMdt.getValue(EMDL_IMAGE_OPTICS_GROUP, opticsGroup, particle)) REPORT_ERROR("ObservationModel::getOpticsGroup: Failed to get optics group for particle #" + particle); opticsGroup--; return opticsGroup; } bool ObservationModel::getCtfPremultiplied(int og) const { if (og < CtfPremultiplied.size()) { return CtfPremultiplied[og]; } else { return false; } } void ObservationModel::setCtfPremultiplied(int og, bool val) { CtfPremultiplied[og] = val; } std::string ObservationModel::getGroupName(int og) { if (og < groupNames.size()) { return groupNames[og]; } else { std::stringstream sts; sts << (og+1); return sts.str(); } } bool ObservationModel::allPixelAndBoxSizesIdentical(const MetaDataTable &mdt) { int og0 = getOpticsGroup(mdt, 0); int boxSize0 = getBoxSize(og0); double angpix0 = getPixelSize(og0); bool allGood = true; const int pc = mdt.numberOfObjects(); for (int p = 1; p < pc; p++) { int og = getOpticsGroup(mdt, p); if (og != og0) { int boxSize = getBoxSize(og); double angpix = getPixelSize(og); if (boxSize != boxSize0 || angpix != angpix0) { allGood = false; break; } } } return allGood; } bool ObservationModel::containsGroup(const MetaDataTable &mdt, int group) { const int pc = mdt.numberOfObjects(); for (int p = 0; p < pc; p++) { int og = getOpticsGroup(mdt, p); if (og == group) { return true; } } return false; } int ObservationModel::numberOfOpticsGroups() const { return opticsMdt.numberOfObjects(); } bool ObservationModel::opticsGroupsSorted() const { for (int i = 0; i < opticsMdt.numberOfObjects(); i++) { int og; opticsMdt.getValue(EMDL_IMAGE_OPTICS_GROUP, og, i); if (og != i+1) { return false; } } return true; } std::vector ObservationModel::findUndefinedOptGroups(const MetaDataTable &partMdt) const { std::set definedGroups; for (int i = 0; i < opticsMdt.numberOfObjects(); i++) { int og; opticsMdt.getValue(EMDL_IMAGE_OPTICS_GROUP, og, i); definedGroups.insert(og); } std::vector out; out.reserve(opticsMdt.numberOfObjects()); for (long int i = 0; i < partMdt.numberOfObjects(); i++) { int og; partMdt.getValue(EMDL_IMAGE_OPTICS_GROUP, og, i); if (definedGroups.find(og) == definedGroups.end()) { out.push_back(og); } } return out; } void ObservationModel::sortOpticsGroups(MetaDataTable& partMdt) { std::map old2new; for (int i = 0; i < opticsMdt.numberOfObjects(); i++) { int og; opticsMdt.getValue(EMDL_IMAGE_OPTICS_GROUP, og, i); old2new[og] = i+1; opticsMdt.setValue(EMDL_IMAGE_OPTICS_GROUP, i+1, i); } for (long int i = 0; i < partMdt.numberOfObjects(); i++) { int og; partMdt.getValue(EMDL_IMAGE_OPTICS_GROUP, og, i); partMdt.setValue(EMDL_IMAGE_OPTICS_GROUP, old2new[og], i); } } std::vector ObservationModel::getOptGroupsPresent_oneBased(const MetaDataTable& partMdt) const { const int gc = opticsMdt.numberOfObjects(); const long long int pc = partMdt.numberOfObjects(); std::vector optGroupIsPresent(gc, false); for (long int p = 0; p < pc; p++) { int og; partMdt.getValue(EMDL_IMAGE_OPTICS_GROUP, og, p); optGroupIsPresent[og-1] = true; } std::vector out(0); out.reserve(gc); for (int g = 0; g < gc; g++) { if (optGroupIsPresent[g]) { out.push_back(g+1); } } return out; } std::vector ObservationModel::getOptGroupsPresent_zeroBased(const MetaDataTable& partMdt) const { const int gc = opticsMdt.numberOfObjects(); const long long int pc = partMdt.numberOfObjects(); std::vector optGroupIsPresent(gc, false); for (long int p = 0; p < pc; p++) { int og; partMdt.getValue(EMDL_IMAGE_OPTICS_GROUP, og, p); optGroupIsPresent[og-1] = true; } std::vector out(0); out.reserve(gc); for (int g = 0; g < gc; g++) { if (optGroupIsPresent[g]) { out.push_back(g); } } return out; } std::vector>> ObservationModel::splitParticlesByOpticsGroup(const MetaDataTable &partMdt) const { std::vector presentGroups = ObservationModel::getOptGroupsPresent_zeroBased(partMdt); const int pogc = presentGroups.size(); const int ogc = opticsMdt.numberOfObjects(); std::vector groupToPresentGroup(ogc, -1); for (int pog = 0; pog < pogc; pog++) { const int og = presentGroups[pog]; groupToPresentGroup[og] = pog; } std::vector>> out(pogc); for (int pog = 0; pog < pogc; pog++) { out[pog] = std::make_pair(presentGroups[pog], std::vector(0)); } const long long int pc = partMdt.numberOfObjects(); for (long int p = 0; p < pc; p++) { int og; partMdt.getValue(EMDL_IMAGE_OPTICS_GROUP, og, p); og--; int pog = groupToPresentGroup[og]; out[pog].second.push_back(p); } return out; } const Image& ObservationModel::getMtfImage(int optGroup, int s) { #pragma omp critical(ObservationModel_getMtfImage) { if (mtfImage[optGroup].find(s) == mtfImage[optGroup].end()) { if (mtfImage[optGroup].size() > 100) { std::cerr << "Warning: " << (mtfImage[optGroup].size()+1) << " mtf images in cache for the same ObservationModel." << std::endl; } if (optGroup >= originalAngpix.size()) REPORT_ERROR("For MTF correction, the rlnMicrographOriginalPixelSize column is necessary in the optics table."); MetaDataTable MDmtf; MultidimArray mtf_resol, mtf_value; MDmtf.read(fnMtfs[optGroup]); mtf_resol.resize(MDmtf.numberOfObjects()); mtf_value.resize(mtf_resol); RFLOAT resol_inv_pixel; int i = 0; FOR_ALL_OBJECTS_IN_METADATA_TABLE(MDmtf) { MDmtf.getValue(EMDL_RESOLUTION_INVPIXEL, resol_inv_pixel); DIRECT_A1D_ELEM(mtf_resol, i) = resol_inv_pixel/originalAngpix[optGroup]; // resolution needs to be given in 1/Ang MDmtf.getValue(EMDL_POSTPROCESS_MTF_VALUE, DIRECT_A1D_ELEM(mtf_value, i) ); if (DIRECT_A1D_ELEM(mtf_value, i) < 1e-10) { std::cerr << " i= " << i << " mtf_value[i]= " << DIRECT_A1D_ELEM(mtf_value, i) << std::endl; REPORT_ERROR("ERROR: zero or negative values encountered in MTF curve: " + fnMtfs[optGroup]); } i++; } // Calculate slope of resolution (in 1/A) per element in the MTF array, in order to interpolate below RFLOAT res_per_elem = (DIRECT_A1D_ELEM(mtf_resol, i-1) - DIRECT_A1D_ELEM(mtf_resol, 0)) / (RFLOAT)(i); if (res_per_elem < 1e-10) REPORT_ERROR(" ERROR: the resolution in the MTF star file does not go up...."); const int sh = s/2 + 1; mtfImage[optGroup][s] = Image(sh,s); Image& img = mtfImage[optGroup][s]; const double as = angpix[optGroup] * boxSizes[optGroup]; for (int y = 0; y < s; y++) for (int x = 0; x < sh; x++) { const double xx = x/as; // logical X-coordinate in 1/A const double yy = y < sh? y/as : (y-s)/as; // logical Y-coordinate in 1/A RFLOAT res = sqrt(xx*xx + yy*yy); // get resolution in 1/Ang int i_0 = FLOOR(res / res_per_elem); RFLOAT mtf; // check boundaries of the array if (i_0 >= MULTIDIM_SIZE(mtf_value) - 1) { mtf = DIRECT_A1D_ELEM(mtf_value, MULTIDIM_SIZE(mtf_value) - 1); } else if (i_0 <= 0) { mtf = DIRECT_A1D_ELEM(mtf_value, 0); } else { // linear interpolation: RFLOAT x_0 = DIRECT_A1D_ELEM(mtf_resol, i_0); RFLOAT y_0 = DIRECT_A1D_ELEM(mtf_value, i_0); RFLOAT x_1 = DIRECT_A1D_ELEM(mtf_resol, i_0 + 1); RFLOAT y_1 = DIRECT_A1D_ELEM(mtf_value, i_0 + 1); mtf = y_0 + (y_1 - y_0)*(res - x_0)/(x_1 - x_0); } img(y,x) = mtf; } } } return mtfImage[optGroup][s]; } const Image& ObservationModel::getAverageMtfImage(int s) { #pragma omp critical(ObservationModel_getAverageMtfImage) { if (avgMtfImage.find(s) == avgMtfImage.end()) { // get first mtfImage avgMtfImage[s] = getMtfImage(0, s); // Then add rest of optics groups for (int i = 1; i < mtfImage.size(); i++) { avgMtfImage[s].data += getMtfImage(i, s).data; } avgMtfImage[s].data /= (RFLOAT)mtfImage.size(); } } return avgMtfImage[s]; } const Image& ObservationModel::getPhaseCorrection(int optGroup, int s) { #pragma omp critical(ObservationModel_getPhaseCorrection) { if (phaseCorr[optGroup].find(s) == phaseCorr[optGroup].end()) { if (phaseCorr[optGroup].size() > 100) { std::cerr << "Warning: " << (phaseCorr[optGroup].size()+1) << " phase shift images in cache for the same ObservationModel." << std::endl; } const int sh = s/2 + 1; phaseCorr[optGroup][s] = Image(sh,s); Image& img = phaseCorr[optGroup][s]; const double as = angpix[optGroup] * boxSizes[optGroup]; const Matrix2D& M = magMatrices[optGroup]; for (int y = 0; y < s; y++) for (int x = 0; x < sh; x++) { double phase = 0.0; for (int i = 0; i < oddZernikeCoeffs[optGroup].size(); i++) { int m, n; Zernike::oddIndexToMN(i, m, n); const double xx0 = x/as; const double yy0 = y < sh-1? y/as : (y-s)/as; const double xx = M(0,0) * xx0 + M(0,1) * yy0; const double yy = M(1,0) * xx0 + M(1,1) * yy0; phase += oddZernikeCoeffs[optGroup][i] * Zernike::Z_cart(m,n,xx,yy); } img(y,x).real = cos(phase); img(y,x).imag = sin(phase); } } } return phaseCorr[optGroup][s]; } const Image& ObservationModel::getGammaOffset(int optGroup, int s) { #pragma omp critical(ObservationModel_getGammaOffset) { if (gammaOffset[optGroup].find(s) == gammaOffset[optGroup].end()) { if (gammaOffset[optGroup].size() > 100) { std::cerr << "Warning: " << (gammaOffset[optGroup].size()+1) << " gamma offset images in cache for the same ObservationModel." << std::endl; } const int sh = s/2 + 1; gammaOffset[optGroup][s] = Image(sh,s); Image& img = gammaOffset[optGroup][s]; const double as = angpix[optGroup] * boxSizes[optGroup]; const Matrix2D& M = magMatrices[optGroup]; for (int y = 0; y < s; y++) for (int x = 0; x < sh; x++) { double phase = 0.0; for (int i = 0; i < evenZernikeCoeffs[optGroup].size(); i++) { int m, n; Zernike::evenIndexToMN(i, m, n); const double xx0 = x/as; const double yy0 = y < sh-1? y/as : (y-s)/as; const double xx = M(0,0) * xx0 + M(0,1) * yy0; const double yy = M(1,0) * xx0 + M(1,1) * yy0; phase += evenZernikeCoeffs[optGroup][i] * Zernike::Z_cart(m,n,xx,yy); } img(y,x) = phase; } } } return gammaOffset[optGroup][s]; } Matrix2D ObservationModel::applyAnisoMag(Matrix2D A3D, int opticsGroup) { Matrix2D out; if (hasMagMatrices) { Matrix2D mag3D(3,3); mag3D.initIdentity(); mag3D(0,0) = magMatrices[opticsGroup](0,0); mag3D(0,1) = magMatrices[opticsGroup](0,1); mag3D(1,0) = magMatrices[opticsGroup](1,0); mag3D(1,1) = magMatrices[opticsGroup](1,1); out = mag3D.inv() * A3D; } else { out = A3D; } return out; } Matrix2D ObservationModel::applyScaleDifference(Matrix2D A3D, int opticsGroup, int s3D, double angpix3D) { Matrix2D out = A3D; out *= (boxSizes[opticsGroup] * angpix[opticsGroup]) / (s3D * angpix3D); return out; } bool ObservationModel::containsAllColumnsNeededForPrediction(const MetaDataTable& partMdt) { return (partMdt.containsLabel(EMDL_ORIENT_ORIGIN_X_ANGSTROM) && partMdt.containsLabel(EMDL_ORIENT_ORIGIN_Y_ANGSTROM) && partMdt.containsLabel(EMDL_ORIENT_ROT) && partMdt.containsLabel(EMDL_ORIENT_TILT) && partMdt.containsLabel(EMDL_ORIENT_PSI) && partMdt.containsLabel(EMDL_PARTICLE_RANDOM_SUBSET)); } relion-3.1.3/src/jaz/obs_model.h000066400000000000000000000166421411340063500165030ustar00rootroot00000000000000/*************************************************************************** * * Author: "Jasenko Zivanov" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #ifndef OBS_MODEL_H #define OBS_MODEL_H #include #include #include #include #include #include class BackProjector; class ObservationModel { public: // tablename can be "particles", "micrographs" or "movies". // If tablename is "discover", the function will try to read the data table with all three names (in that order). static void loadSafely(std::string filename, ObservationModel& obsModel, MetaDataTable& particlesMdt, std::string tablename = "particles", int verb = 0, bool do_die_upon_error = true); static void saveNew(MetaDataTable& particlesMdt, MetaDataTable& opticsMdt, std::string filename, std::string _tablename = "particles"); void save(MetaDataTable& particlesMdt, std::string filename, std::string _tablename = "particles"); static bool containsAllColumnsNeededForPrediction(const MetaDataTable& partMdt); ObservationModel(); ObservationModel(const MetaDataTable &opticsMdt, bool do_die_upon_error = true); MetaDataTable opticsMdt; bool hasEvenZernike, hasOddZernike, hasMagMatrices, hasBoxSizes, hasMultipleMtfs; protected: // cached values - protected to prevent users from accidentally changing them, // expecting the changes to propagate into the optics star-file std::vector angpix, originalAngpix, lambda, Cs; std::vector boxSizes; std::vector CtfPremultiplied; std::vector > evenZernikeCoeffs, oddZernikeCoeffs; std::vector > magMatrices; std::vector fnMtfs, groupNames; // cached aberration effects for a set of given image sizes // e.g.: phaseCorr[opt. group][img. height](y,x) std::vector > > phaseCorr; std::vector > > gammaOffset, mtfImage; std::map > avgMtfImage; public: // Prediction // void predictObservation(Projector &proj, const MetaDataTable &partMdt, long int particle, MultidimArray& dest, double angpix_ref, bool applyCtf = true, bool shiftPhases = true, bool applyShift = true, bool applyMtf = true, bool applyCtfPadding = false); Volume > predictComplexGradient(Projector &proj, const MetaDataTable &partMdt, long int particle, double angpix_ref, bool applyCtf = true, bool shiftPhases = true, bool applyShift = true, bool applyMtf = true, bool applyCtfPadding = false); // Correction // // divide by MTF of detector (using cache) void divideByMtf(const MetaDataTable& partMdt, long particle, MultidimArray& obsImage, bool do_multiply_instead = false, bool do_correct_average_mtf = true); void divideByMtf(int opticsGroup, MultidimArray& obsImage, bool do_multiply_instead = false, bool do_correct_average_mtf = true); // 2D image with the MTF (cached) const Image& getMtfImage(int optGroup, int s); // 2D image with the average MTF (cached) const Image& getAverageMtfImage(int s); // apply effect of antisymmetric aberration (using cache) void demodulatePhase(int optGroup, MultidimArray& obsImage, bool do_modulate_instead = false); // syntactic sugar void demodulatePhase(const MetaDataTable &partMdt, long int particle, MultidimArray& obsImage, bool do_modulate_instead = false); // effect of antisymmetric aberration (cached) const Image& getPhaseCorrection(int optGroup, int s); // effect of symmetric aberration (cached) const Image& getGammaOffset(int optGroup, int s); Matrix2D applyAnisoMag(Matrix2D A3D, int opticsGroup); Matrix2D applyScaleDifference(Matrix2D A3D, int opticsGroup, int s3D, double angpix3D); // Bureaucracy bool allPixelSizesIdentical() const; bool allBoxSizesIdentical() const; double angToPix(double a, int s, int opticsGroup) const; double pixToAng(double p, int s, int opticsGroup) const; double getPixelSize(int opticsGroup) const; std::vector getPixelSizes() const; double getWavelength(int opticsGroup) const; std::vector getWavelengths() const; double getSphericalAberration(int opticsGroup) const; std::vector getSphericalAberrations() const; int getBoxSize(int opticsGroup) const; void getBoxSizes(std::vector& sDest, std::vector& shDest) const; // These do NOT update the metadata table! // These are only to change prediction etc. void setBoxSize(int opticsGroup, int newBoxSize); void setPixelSize(int opticsGroup, RFLOAT newPixelSize); Matrix2D getMagMatrix(int opticsGroup) const; std::vector > getMagMatrices() const; void setMagMatrix(int opticsGroup, const Matrix2D& M); // 0-indexed int getOpticsGroup(const MetaDataTable &particlesMdt, long int particle = -1) const; bool getCtfPremultiplied(int og) const; void setCtfPremultiplied(int og, bool val); std::string getGroupName(int og); bool allPixelAndBoxSizesIdentical(const MetaDataTable& mdt); bool containsGroup(const MetaDataTable& mdt, int group); /* duh */ int numberOfOpticsGroups() const; /* Check whether the optics groups appear in the correct order. This makes it possible to access a group g through: opticsMdt.getValue(label, dest, g-1); */ bool opticsGroupsSorted() const; /* Find all optics groups used in particles table partMdt that are not defined in opticsMdt (should return an empty vector) */ std::vector findUndefinedOptGroups(const MetaDataTable& partMdt) const; /* Rename optics groups to enforce the correct order and translate the indices in particle table partMdt. (Merely changing the order in opticsMdt would fail if groups were missing.) */ void sortOpticsGroups(MetaDataTable& partMdt); /* Return the set of optics groups present in partMdt */ std::vector getOptGroupsPresent_oneBased(const MetaDataTable& partMdt) const; /* Return the set of optics groups present in partMdt */ std::vector getOptGroupsPresent_zeroBased(const MetaDataTable& partMdt) const; std::vector > > splitParticlesByOpticsGroup(const MetaDataTable& partMdt) const; }; #endif relion-3.1.3/src/jaz/optimization/000077500000000000000000000000001411340063500171045ustar00rootroot00000000000000relion-3.1.3/src/jaz/optimization/gradient_descent.cpp000066400000000000000000000057641411340063500231260ustar00rootroot00000000000000/*************************************************************************** * * Author: "Jasenko Zivanov" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #include "gradient_descent.h" #include std::vector GradientDescent::optimize( const std::vector &initial, const DifferentiableOptimization &opt, double step, double minStep, double minDiff, long maxIters, double inertia, bool verbose) { std::vector x = initial; std::vector last_x = x; const long n = initial.size(); std::vector g(n, 0.0), v(n, 0.0); void* tempStorage = opt.allocateTempStorage(); double last_f = opt.f(initial, tempStorage); double act_step = step; int goodSince = 0, accAfter = 5; if (verbose) { std::cout << "initial: " << last_f << "\n"; } for (int i = 0; i < maxIters; i++) { opt.grad(x, g, tempStorage); for (int j = 0; j < n; j++) { v[j] = inertia * v[j] - (1.0 - inertia) * act_step * g[j]; x[j] += v[j]; } double f = opt.f(x, tempStorage); if (verbose) { std::cout << i << ": " << f << " (" << act_step << ") [" << (f - last_f) << "]"; } if (f > last_f) { if (verbose) { std::cout << " *\n"; } for (int j = 0; j < n; j++) { x[j] = last_x[j]; v[j] = 0.0; } act_step /= 2.0; if (act_step < minStep) break; } else { if (verbose) { std::cout << "\n"; } if (last_f - f < minDiff) break; for (int j = 0; j < n; j++) { last_x[j] = x[j]; } if (act_step < step/2.0) { goodSince++; if (goodSince >= accAfter) { goodSince = 0; act_step *= 2.0; } } else { act_step = step; } last_f = f; } } opt.deallocateTempStorage(tempStorage); return x; } relion-3.1.3/src/jaz/optimization/gradient_descent.h000066400000000000000000000025051411340063500225610ustar00rootroot00000000000000/*************************************************************************** * * Author: "Jasenko Zivanov" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #ifndef GRADIENT_DESCENT_H #define GRADIENT_DESCENT_H #include #include "optimization.h" class GradientDescent { public: static std::vector optimize(const std::vector& initial, const DifferentiableOptimization& opt, double step, double minStep, double minDiff, long maxIters, double inertia = 0.0, bool verbose = false); }; #endif relion-3.1.3/src/jaz/optimization/lbfgs.cpp000066400000000000000000000146151411340063500207140ustar00rootroot00000000000000/*************************************************************************** * * Author: "Jasenko Zivanov" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #include "lbfgs.h" #include static pthread_mutex_t lib_lbfgs_mutex = PTHREAD_MUTEX_INITIALIZER; std::vector LBFGS::optimize( const std::vector &initial, const DifferentiableOptimization &opt, bool verbose, int max_iters, double epsilon) { const int N = initial.size(); lbfgsfloatval_t fx; lbfgsfloatval_t* m_x = lbfgs_malloc(N); if (m_x == NULL) { REPORT_ERROR("LBFGS::optimize: Failed to allocate a memory block for variables.\n"); } for (int i = 0; i < N; i++) { m_x[i] = initial[i]; } void* tempStorage = opt.allocateTempStorage(); LibLbfgsAdapter adapter(opt, tempStorage, N, verbose); int ret; lbfgs_parameter_t param; lbfgs_parameter_init(¶m); param.max_iterations = max_iters; param.epsilon = epsilon; pthread_mutex_lock(&lib_lbfgs_mutex); { ret = lbfgs(N, m_x, &fx, evaluate, progress, &adapter, ¶m); } pthread_mutex_unlock(&lib_lbfgs_mutex); if (verbose) { std::cout << "L-BFGS optimization terminated with status code = " << translateError(ret) << "\n"; std::cout << " fx = " << fx << "\n"; } std::vector out(N); for (int i = 0; i < N; i++) { out[i] = m_x[i]; } lbfgs_free(m_x); opt.deallocateTempStorage(tempStorage); return out; } void LBFGS::test() { RosenbrockBanana rb; std::vector initial(2); initial[0] = 3.0; initial[0] = 1.0; std::vector x0 = optimize(initial, rb, false); std::cout << "should be close to 1, 1: " << x0[0] << ", " << x0[1] << "\n"; } std::string LBFGS::translateError(int ret) { switch (ret) { case LBFGS_SUCCESS: return "LBFGS_SUCCESS"; case LBFGS_STOP: return "LBFGS_STOP"; case LBFGS_ALREADY_MINIMIZED: return "LBFGS_ALREADY_MINIMIZED"; case LBFGSERR_UNKNOWNERROR: return "LBFGSERR_UNKNOWNERROR"; case LBFGSERR_LOGICERROR: return "LBFGSERR_LOGICERROR"; case LBFGSERR_OUTOFMEMORY: return "LBFGSERR_OUTOFMEMORY"; case LBFGSERR_CANCELED: return "LBFGSERR_CANCELED"; case LBFGSERR_INVALID_N: return "LBFGSERR_INVALID_N"; case LBFGSERR_INVALID_N_SSE: return "LBFGSERR_INVALID_N_SSE"; case LBFGSERR_INVALID_X_SSE: return "LBFGSERR_INVALID_X_SSE"; case LBFGSERR_INVALID_EPSILON: return "LBFGSERR_INVALID_EPSILON"; case LBFGSERR_INVALID_TESTPERIOD: return "LBFGSERR_INVALID_TESTPERIOD"; case LBFGSERR_INVALID_DELTA: return "LBFGSERR_INVALID_DELTA"; case LBFGSERR_INVALID_LINESEARCH: return "LBFGSERR_INVALID_LINESEARCH"; case LBFGSERR_INVALID_MINSTEP: return "LBFGSERR_INVALID_MINSTEP"; case LBFGSERR_INVALID_MAXSTEP: return "LBFGSERR_INVALID_MAXSTEP"; case LBFGSERR_INVALID_FTOL: return "LBFGSERR_INVALID_FTOL"; case LBFGSERR_INVALID_WOLFE: return "LBFGSERR_INVALID_WOLFE"; case LBFGSERR_INVALID_GTOL: return "LBFGSERR_INVALID_GTOL"; case LBFGSERR_INVALID_XTOL: return "LBFGSERR_INVALID_XTOL"; case LBFGSERR_INVALID_MAXLINESEARCH: return "LBFGSERR_INVALID_MAXLINESEARCH"; case LBFGSERR_INVALID_ORTHANTWISE: return "LBFGSERR_INVALID_ORTHANTWISE"; case LBFGSERR_INVALID_ORTHANTWISE_START: return "LBFGSERR_INVALID_ORTHANTWISE_START"; case LBFGSERR_INVALID_ORTHANTWISE_END: return "LBFGSERR_INVALID_ORTHANTWISE_END"; case LBFGSERR_OUTOFINTERVAL: return "LBFGSERR_OUTOFINTERVAL"; case LBFGSERR_INCORRECT_TMINMAX: return "LBFGSERR_INCORRECT_TMINMAX"; case LBFGSERR_ROUNDING_ERROR: return "LBFGSERR_ROUNDING_ERROR"; case LBFGSERR_MINIMUMSTEP: return "LBFGSERR_MINIMUMSTEP"; case LBFGSERR_MAXIMUMSTEP: return "LBFGSERR_MAXIMUMSTEP"; case LBFGSERR_MAXIMUMLINESEARCH: return "LBFGSERR_MAXIMUMLINESEARCH"; case LBFGSERR_MAXIMUMITERATION: return "LBFGSERR_MAXIMUMITERATION"; case LBFGSERR_WIDTHTOOSMALL: return "LBFGSERR_WIDTHTOOSMALL"; case LBFGSERR_INVALIDPARAMETERS: return "LBFGSERR_INVALIDPARAMETERS"; case LBFGSERR_INCREASEGRADIENT: return "LBFGSERR_INCREASEGRADIENT"; default: return "uninterpretable error"; } } lbfgsfloatval_t LBFGS::evaluate( void *instance, const lbfgsfloatval_t *x, lbfgsfloatval_t *g, const int n, const lbfgsfloatval_t step) { LibLbfgsAdapter* adapter = (LibLbfgsAdapter*) instance; return adapter->evaluate(x, g, n, step); } int LBFGS::progress( void *instance, const lbfgsfloatval_t *x, const lbfgsfloatval_t *g, const lbfgsfloatval_t fx, const lbfgsfloatval_t xnorm, const lbfgsfloatval_t gnorm, const lbfgsfloatval_t step, int n, int k, int ls) { LibLbfgsAdapter* adapter = (LibLbfgsAdapter*) instance; return adapter->progress(x, g, fx, xnorm, gnorm, step, n, k, ls); } LBFGS::LibLbfgsAdapter::LibLbfgsAdapter( const DifferentiableOptimization &opt, void *tempStorage, int n, bool verbose) : opt(opt), n(n), verbose(verbose), x_vec(n), grad_vec(n), tempStorage(tempStorage) { } lbfgsfloatval_t LBFGS::LibLbfgsAdapter::evaluate( const lbfgsfloatval_t *x, lbfgsfloatval_t *g, const int n, const lbfgsfloatval_t step) { for (int i = 0; i < n; i++) { x_vec[i] = x[i]; } double fx = opt.f(x_vec, tempStorage); opt.grad(x_vec, grad_vec, tempStorage); for (int i = 0; i < n; i++) { g[i] = grad_vec[i]; } return fx; } int LBFGS::LibLbfgsAdapter::progress( const lbfgsfloatval_t *x, const lbfgsfloatval_t *g, const lbfgsfloatval_t fx, const lbfgsfloatval_t xnorm, const lbfgsfloatval_t gnorm, const lbfgsfloatval_t step, int n, int k, int ls) { if (verbose) { std::cout << k << ": " << fx << "\n"; } return 0; } relion-3.1.3/src/jaz/optimization/lbfgs.h000066400000000000000000000057331411340063500203620ustar00rootroot00000000000000/*************************************************************************** * * Author: "Jasenko Zivanov" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #ifndef LBFGS_OPT_H #define LBFGS_OPT_H #include #include #include "optimization.h" #include class LBFGS { public: static std::vector optimize( const std::vector& initial, const DifferentiableOptimization& opt, bool verbose = false, int max_iters = 0, double epsilon = 1e-5); static void test(); protected: static std::string translateError(int ret); static lbfgsfloatval_t evaluate( void *instance, const lbfgsfloatval_t *x, lbfgsfloatval_t *g, const int n, const lbfgsfloatval_t step); static int progress( void *instance, const lbfgsfloatval_t *x, const lbfgsfloatval_t *g, const lbfgsfloatval_t fx, const lbfgsfloatval_t xnorm, const lbfgsfloatval_t gnorm, const lbfgsfloatval_t step, int n, int k, int ls); class LibLbfgsAdapter { public: LibLbfgsAdapter( const DifferentiableOptimization& opt, void* tempStorage, int n, bool verbose); const DifferentiableOptimization& opt; int n; bool verbose; std::vector x_vec, grad_vec; void* tempStorage; lbfgsfloatval_t evaluate( const lbfgsfloatval_t *x, lbfgsfloatval_t *g, const int n, const lbfgsfloatval_t step); int progress( const lbfgsfloatval_t *x, const lbfgsfloatval_t *g, const lbfgsfloatval_t fx, const lbfgsfloatval_t xnorm, const lbfgsfloatval_t gnorm, const lbfgsfloatval_t step, int n, int k, int ls); }; }; #endif relion-3.1.3/src/jaz/optimization/nelder_mead.cpp000066400000000000000000000132661411340063500220570ustar00rootroot00000000000000/*************************************************************************** * * Author: "Jasenko Zivanov" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #include "nelder_mead.h" #include #include #include std::vector NelderMead::optimize( const std::vector& initial, const Optimization& opt, double initialStep, double tolerance, long maxIters, double alpha, double gamma, double rho, double sigma, bool verbose, double* minCost) { const double n = initial.size(); const double m = initial.size() + 1; std::vector > simplex(m); simplex[0] = initial; for (int j = 1; j < m; j++) { simplex[j] = initial; simplex[j][j-1] += initialStep; } // avoid runtime allocations (std::vectors live on the heap) std::vector > nextSimplex(m); std::vector values(m), nextValues(m), centroid(n), reflected(n), expanded(n), contracted(n); void* tempStorage = opt.allocateTempStorage(); // compute values for (int j = 0; j < m; j++) { values[j] = opt.f(simplex[j], tempStorage); } if (verbose) { std::cout << "f0 = " << values[0] << std::endl; } for (long i = 0; i < maxIters; i++) { // sort x and f(x) by ascending f(x) std::vector order = IndexSort::sortIndices(values); opt.report(i, values[order[0]], simplex[order[0]]); if (verbose) { std::cout << i << ": " << values[order[0]] << std::endl; } for (int j = 0; j < m; j++) { nextSimplex[j] = simplex[order[j]]; nextValues[j] = values[order[j]]; } simplex = nextSimplex; values = nextValues; // compute centroid for (int k = 0; k < n; k++) { centroid[k] = 0.0; } for (int j = 0; j < n; j++) // leave out the worst x { for (int k = 0; k < n; k++) { centroid[k] += simplex[j][k]; } } for (int k = 0; k < n; k++) { centroid[k] /= n; } // check for convergence bool allInside = true; for (int j = 0; j < m; j++) { double dx = 0.0; for (int k = 0; k < n; k++) { double ddx = simplex[j][k] - centroid[k]; dx += ddx * ddx; } if (sqrt(dx) > tolerance) { allInside = false; break; } } if (allInside) { if (verbose) std::cout << "Exiting because allInside" << std::endl; opt.deallocateTempStorage(tempStorage); return simplex[0]; } // reflect for (int k = 0; k < n; k++) { reflected[k] = (1.0 + alpha) * centroid[k] - alpha * simplex[n][k]; } double vRefl = opt.f(reflected, tempStorage); if (vRefl < values[n-1] && vRefl > values[0]) { simplex[n] = reflected; values[n] = vRefl; continue; } // expand if (vRefl < values[0]) { for (int k = 0; k < n; k++) { expanded[k] = (1.0 - gamma) * centroid[k] + gamma * reflected[k]; } double vExp = opt.f(expanded, tempStorage); if (vExp < vRefl) { simplex[n] = expanded; values[n] = vExp; } else { simplex[n] = reflected; values[n] = vRefl; } continue; } // contract for (int k = 0; k < n; k++) { contracted[k] = (1.0 - rho) * centroid[k] + rho * simplex[n][k]; } double vContr = opt.f(contracted, tempStorage); if (vContr < values[n]) { simplex[n] = contracted; values[n] = vContr; continue; } // shrink for (int j = 1; j < m; j++) { for (int k = 0; k < n; k++) { simplex[j][k] = (1.0 - sigma) * simplex[0][k] + sigma * simplex[j][k]; } values[j] = opt.f(simplex[j], tempStorage); } } if (verbose) std::cout << "Exiting after reaching maxIter" << std::endl; opt.deallocateTempStorage(tempStorage); std::vector order = IndexSort::sortIndices(values); if (minCost) { *minCost = values[order[0]]; } return simplex[order[0]]; } void NelderMead::test() { RosenbrockBanana rb; std::vector initial(2); initial[0] = 3.0; initial[0] = 1.0; std::vector x0 = optimize(initial, rb, 0.5, 0.001, 1000); std::cout << "should be close to 1, 1: " << x0[0] << ", " << x0[1] << "\n"; } relion-3.1.3/src/jaz/optimization/nelder_mead.h000066400000000000000000000026741411340063500215250ustar00rootroot00000000000000/*************************************************************************** * * Author: "Jasenko Zivanov" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #ifndef NELDER_MEAD_H #define NELDER_MEAD_H #include #include "optimization.h" class NelderMead { public: static std::vector optimize( const std::vector& initial, const Optimization& opt, double initialStep, double tolerance, long maxIters, double alpha = 1.0, double gamma = 2.0, double rho = 0.5, double sigma = 0.5, bool verbose = false, double* minCost = 0); static void test(); }; #endif relion-3.1.3/src/jaz/optimization/optimization.cpp000066400000000000000000000037701411340063500223450ustar00rootroot00000000000000/*************************************************************************** * * Author: "Jasenko Zivanov" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #include "optimization.h" #include #include double RosenbrockBanana::f(const std::vector &x, void *tempStorage) const { const double a = 1.0; const double b = 100.0; const double& xx = x[0]; const double& yy = x[1]; return (a - xx) * (a - xx) + b * (yy - xx * xx) * (yy - xx * xx); } void RosenbrockBanana::grad( const std::vector &x, std::vector &gradDest, void *tempStorage) const { const double a = 1.0; const double b = 100.0; const double& xx = x[0]; const double& yy = x[1]; gradDest[0] = -2.0 * (a - xx) - 4.0 * b * (yy - xx * xx) * xx; gradDest[1] = 2.0 * b * (yy - xx * xx); } void DifferentiableOptimization::testGradient(const std::vector &x, double eps) { const int n = x.size(); std::vector x1 = x, grad0(n,0.0); const double f0 = f(x, 0); grad(x, grad0, 0); for (int i = 0; i < n; i++) { x1[i] = x[i] + eps; const double f1 = f(x1, 0); x1[i] = x[i]; std::cout << i << ": " << std::setprecision(16) << (f1 - f0)/eps << " vs. " << grad0[i] << "\n"; } } relion-3.1.3/src/jaz/optimization/optimization.h000066400000000000000000000036321411340063500220070ustar00rootroot00000000000000/*************************************************************************** * * Author: "Jasenko Zivanov" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #ifndef JAZ_OPTIMIZATION_H #define JAZ_OPTIMIZATION_H #include // abstract class for optimization problems class Optimization { public: // Don't forget const when overriding these functions! virtual double f(const std::vector& x, void* tempStorage) const = 0; virtual void* allocateTempStorage() const {return 0;} virtual void deallocateTempStorage(void* ts) const {} virtual void report(int iteration, double cost, const std::vector& x) const {} }; class DifferentiableOptimization : public Optimization { public: virtual void grad(const std::vector& x, std::vector& gradDest, void* tempStorage) const = 0; void testGradient(const std::vector& x, double eps = 1e-9); }; class RosenbrockBanana : public DifferentiableOptimization { public: double f(const std::vector& x, void* tempStorage) const; void grad(const std::vector& x, std::vector& gradDest, void* tempStorage) const; }; #endif relion-3.1.3/src/jaz/parallel_ft.cpp000066400000000000000000000261711411340063500173560ustar00rootroot00000000000000/*************************************************************************** * * Author: "Jasenko Zivanov" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #include "src/macros.h" #include "src/jaz/parallel_ft.h" #include "src/fftw.h" #include "src/args.h" #include #include static pthread_mutex_t fftw_plan_mutex_par = PTHREAD_MUTEX_INITIALIZER; //#define DEBUG_PLANS // Constructors and destructors -------------------------------------------- ParFourierTransformer::ParFourierTransformer(): plans_are_set(false) { init(); #ifdef DEBUG_PLANS std::cerr << "INIT this= "< &inputFourier) { memcpy(MULTIDIM_ARRAY(fFourier),MULTIDIM_ARRAY(inputFourier), MULTIDIM_SIZE(inputFourier)*2*sizeof(RFLOAT)); } // Transform --------------------------------------------------------------- void ParFourierTransformer::Transform(int sign) { if (sign == FFTW_FORWARD) { #ifdef RELION_SINGLE_PRECISION fftwf_execute_dft_r2c(fPlanForward,MULTIDIM_ARRAY(*fReal), (fftwf_complex*) MULTIDIM_ARRAY(fFourier)); #else fftw_execute_dft_r2c(fPlanForward,MULTIDIM_ARRAY(*fReal), (fftw_complex*) MULTIDIM_ARRAY(fFourier)); #endif // Normalisation of the transform unsigned long int size=0; if(fReal!=NULL) size = MULTIDIM_SIZE(*fReal); else if (fComplex!= NULL) size = MULTIDIM_SIZE(*fComplex); else REPORT_ERROR("No complex nor real data defined"); FOR_ALL_DIRECT_ELEMENTS_IN_MULTIDIMARRAY(fFourier) DIRECT_MULTIDIM_ELEM(fFourier,n) /= size; } else if (sign == FFTW_BACKWARD) { #ifdef RELION_SINGLE_PRECISION fftwf_execute_dft_c2r(fPlanBackward, (fftwf_complex*) MULTIDIM_ARRAY(fFourier), MULTIDIM_ARRAY(*fReal)); #else fftw_execute_dft_c2r(fPlanBackward, (fftw_complex*) MULTIDIM_ARRAY(fFourier), MULTIDIM_ARRAY(*fReal)); #endif } } void ParFourierTransformer::FourierTransform() { Transform(FFTW_FORWARD); } void ParFourierTransformer::inverseFourierTransform() { Transform(FFTW_BACKWARD); } // Enforce Hermitian symmetry --------------------------------------------- void ParFourierTransformer::enforceHermitianSymmetry() { int ndim=3; if (ZSIZE(*fReal)==1) { ndim=2; if (YSIZE(*fReal)==1) ndim=1; } long int yHalf=YSIZE(*fReal)/2; if (YSIZE(*fReal)%2==0) yHalf--; long int zHalf=ZSIZE(*fReal)/2; if (ZSIZE(*fReal)%2==0) zHalf--; switch (ndim) { case 2: for (long int i=1; i<=yHalf; i++) { long int isym=intWRAP(-i,0,YSIZE(*fReal)-1); Complex mean=0.5*( DIRECT_A2D_ELEM(fFourier,i,0)+ conj(DIRECT_A2D_ELEM(fFourier,isym,0))); DIRECT_A2D_ELEM(fFourier,i,0)=mean; DIRECT_A2D_ELEM(fFourier,isym,0)=conj(mean); } break; case 3: for (long int k=0; k #include "src/multidim_array.h" #include "src/funcs.h" #include "src/tabfuncs.h" #include "src/complex.h" #include "src/CPlot2D.h" /* Parallelizable version of FourierTransformer from src/fftw.h: FFTW plans are managed globally and only one can be computed at a time. The original class recomputes its plan each time a new Image is passed to its FourierTransform() or inverseFourierTransform(). As a consequence, when working on sets of multiple images, only one FT can run at a time. This class only recomputes the plans if the size of the image changes. The same plan is reused for different images of the same size. Otherwise, the two classes are identical. -- J. Zivanov, Feb. 9th 2018 */ class ParFourierTransformer { public: /** Real array, in fact a pointer to the user array is stored. */ MultidimArray *fReal; /** Complex array, in fact a pointer to the user array is stored. */ MultidimArray *fComplex; /** Fourier array */ MultidimArray< Complex > fFourier; #ifdef RELION_SINGLE_PRECISION /* fftw Forward plan */ fftwf_plan fPlanForward; /* fftw Backward plan */ fftwf_plan fPlanBackward; #else /* fftw Forward plan */ fftw_plan fPlanForward; /* fftw Backward plan */ fftw_plan fPlanBackward; #endif bool plans_are_set; // Public methods public: /** Default constructor */ ParFourierTransformer(); /** Destructor */ ~ParFourierTransformer(); /** Copy constructor * * The created ParFourierTransformer is a perfect copy of the input array but with a * different memory assignment. * */ ParFourierTransformer(const ParFourierTransformer& op); /** Compute the Fourier transform of a MultidimArray, 2D and 3D. If getCopy is false, an alias to the transformed data is returned. This is a faster option since a copy of all the data is avoided, but you need to be careful that an inverse Fourier transform may change the data. */ template void FourierTransform(T& v, T1& V, bool getCopy=true) { setReal(v); Transform(FFTW_FORWARD); if (getCopy) getFourierCopy(V); else getFourierAlias(V); } /** Compute the Fourier transform. The data is taken from the matrix with which the object was created. */ void FourierTransform(); /** Enforce Hermitian symmetry. If the Fourier transform risks of losing Hermitian symmetry, use this function to renforce it. */ void enforceHermitianSymmetry(); /** Compute the inverse Fourier transform. The result is stored in the same real data that was passed for the forward transform. The Fourier coefficients are taken from the internal Fourier coefficients */ void inverseFourierTransform(); /** Compute the inverse Fourier transform. New data is provided for the Fourier coefficients and the output can be any matrix1D, 2D or 3D. It is important that the output matrix is already resized to the right size before entering in this function. */ template void inverseFourierTransform(const T& V, T1& v) { setReal(v); setFourier(V); Transform(FFTW_BACKWARD); } /** Get Fourier coefficients. */ template void getFourierAlias(T& V) {V.alias(fFourier); return;} /** Get Fourier coefficients. */ MultidimArray< Complex>& getFourierReference() {return fFourier;} /** Get Fourier coefficients. */ template void getFourierCopy(T& V) { V.reshape(fFourier); memcpy(MULTIDIM_ARRAY(V),MULTIDIM_ARRAY(fFourier), MULTIDIM_SIZE(fFourier)*2*sizeof(RFLOAT)); } /** Return a complete Fourier transform (two halves). */ template void getCompleteFourier(T& V) { V.reshape(*fReal); int ndim=3; if (ZSIZE(*fReal)==1) { ndim=2; if (YSIZE(*fReal)==1) ndim=1; } switch (ndim) { case 1: FOR_ALL_DIRECT_ELEMENTS_IN_ARRAY1D(V) if (i void setFromCompleteFourier(T& V) { int ndim=3; if (ZSIZE(*fReal)==1) { ndim=2; if (YSIZE(*fReal)==1) ndim=1; } switch (ndim) { case 1: FOR_ALL_DIRECT_ELEMENTS_IN_ARRAY1D(fFourier) DIRECT_A1D_ELEM(fFourier,i)=DIRECT_A1D_ELEM(V,i); break; case 2: FOR_ALL_DIRECT_ELEMENTS_IN_ARRAY2D(fFourier) DIRECT_A2D_ELEM(fFourier,i,j) = DIRECT_A2D_ELEM(V,i,j); break; case 3: FOR_ALL_DIRECT_ELEMENTS_IN_ARRAY3D(fFourier) DIRECT_A3D_ELEM(fFourier,k,i,j) = DIRECT_A3D_ELEM(V,k,i,j); break; } } // Internal methods public: /* Pointer to the array of RFLOATs with which the plan was computed */ RFLOAT * dataPtr; /* Pointer to the array of complex with which the plan was computed */ Complex * complexDataPtr; /* Initialise all pointers to NULL */ void init(); /** Clear object */ void clear(); /** This calls fftw_cleanup. */ void cleanup(); /** Destroy both forward and backward fftw planes (mutex locked */ void destroyPlans(); /** Computes the transform, specified in Init() function If normalization=true the forward transform is normalized (no normalization is made in the inverse transform) If normalize=false no normalization is performed and therefore the image is scaled by the number of pixels. */ void Transform(int sign); /** Get the Multidimarray that is being used as input. */ const MultidimArray &getReal() const; const MultidimArray &getComplex() const; /** Set a Multidimarray for input. The data of img will be the one of fReal. In forward transforms it is not modified, but in backward transforms, the result will be stored in img. This means that the size of img cannot change between calls. */ void setReal(MultidimArray &img); /** Set a Multidimarray for input. The data of img will be the one of fComplex. In forward transforms it is not modified, but in backward transforms, the result will be stored in img. This means that the size of img cannot change between calls. */ void setReal(MultidimArray &img); /** Set a Multidimarray for the Fourier transform. The values of the input array are copied in the internal array. It is assumed that the container for the real image as well as the one for the Fourier array are already resized. No plan is updated. */ void setFourier(const MultidimArray &imgFourier); }; #endif relion-3.1.3/src/jaz/reference_map.cpp000066400000000000000000000236461411340063500176700ustar00rootroot00000000000000/*************************************************************************** * * Author: "Jasenko Zivanov" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #include "reference_map.h" #include #include #include #include #include #include #include #include #include #include using namespace gravis; ReferenceMap::ReferenceMap() : reconFn0(""), reconFn1(""), maskFn(""), fscFn(""), paddingFactor(2.0), hasMask(false) { } void ReferenceMap::read(IOParser& parser, int argc, char* argv[]) { reconFn0 = parser.getOption("--m1", "Reference map, half 1", ""); reconFn1 = parser.getOption("--m2", "Reference map, half 2", ""); angpix = textToDouble(parser.getOption("--angpix_ref", "Pixel size of the reference map", "-1")); maskFn = parser.getOption("--mask", "Reference mask", ""); fscFn = parser.getOption("--f", "Input STAR file with the FSC of the reference (usually from PostProcess)"); paddingFactor = textToFloat(parser.getOption("--pad", "Padding factor", "2")); } void ReferenceMap::load(int verb, bool debug) { if (reconFn0 == "" || reconFn1 == "") { // Get half maps and masks from the PostProcess STAR file. FileName fn_half1, fn_half2, fn_mask; MetaDataTable MD; MD.read(fscFn, "general"); bool star_is_valid = MD.getValue(EMDL_POSTPROCESS_UNFIL_HALFMAP1, fn_half1) && MD.getValue(EMDL_POSTPROCESS_UNFIL_HALFMAP2, fn_half2) && MD.getValue(EMDL_MASK_NAME, fn_mask); if (star_is_valid) { if (verb > 0) { std::cout << " + The names of the reference half maps and the mask were taken from the PostProcess STAR file.\n"; std::cout << " - Half map 1: " << fn_half1 << "\n"; std::cout << " - Half map 2: " << fn_half2 << "\n"; std::cout << " - Mask: " << fn_mask << std::endl; } reconFn0 = fn_half1; reconFn1 = fn_half2; maskFn = fn_mask; } else { REPORT_ERROR("could not get filenames for unfiltered half maps from the postprocess STAR file."); } } Image maps[2], powSpec[2]; if (debug) std::cout << "reading: " << reconFn0 << "\n"; maps[0].read(reconFn0); if ( maps[0].data.xdim != maps[0].data.ydim || maps[0].data.ydim != maps[0].data.zdim) { REPORT_ERROR(reconFn0 + " is not cubical.\n"); } if (debug) std::cout << "reading: " << reconFn1 << "\n"; maps[1].read(reconFn1); if ( maps[1].data.xdim != maps[1].data.ydim || maps[1].data.ydim != maps[1].data.zdim) { REPORT_ERROR(reconFn1 + " is not cubical.\n"); } if ( maps[0].data.xdim != maps[1].data.xdim || maps[0].data.ydim != maps[1].data.ydim || maps[0].data.zdim != maps[1].data.zdim) { REPORT_ERROR(reconFn0 + " and " + reconFn1 + " are of unequal size.\n"); } if (angpix < 0) { angpix = maps[0].samplingRateX(); std::cerr << "WARNING: You did not specify --angpix_ref. The pixel size in the image header of " << reconFn0 << ", " << angpix << " A/px, is used." << std::endl; } s = maps[0].data.ydim; sh = s/2 + 1; if (maskFn != "") { if (verb > 0) std::cout << " + Masking references ...\n"; Image maskedRef; mask.read(maskFn); ImageOp::multiply(mask, maps[0], maskedRef); maps[0] = maskedRef; ImageOp::multiply(mask, maps[1], maskedRef); maps[1] = maskedRef; hasMask = true; } if (verb > 0) std::cout << " + Transforming references ...\n"; projectors[0] = Projector(s, TRILINEAR, paddingFactor, 10, 2); projectors[0].computeFourierTransformMap(maps[0].data, powSpec[0].data, maps[0].data.xdim); projectors[1] = Projector(s, TRILINEAR, paddingFactor, 10, 2); projectors[1].computeFourierTransformMap(maps[1].data, powSpec[1].data, maps[1].data.xdim); if (fscFn != "") { MetaDataTable fscMdt; fscMdt.read(fscFn, "fsc"); if (!fscMdt.containsLabel(EMDL_SPECTRAL_IDX)) { REPORT_ERROR(fscFn + " does not contain a value for " + EMDL::label2Str(EMDL_SPECTRAL_IDX)); } if (!fscMdt.containsLabel(EMDL_POSTPROCESS_FSC_TRUE)) { REPORT_ERROR(fscFn + " does not contain a value for " + EMDL::label2Str(EMDL_POSTPROCESS_FSC_TRUE)); } RefinementHelper::drawFSC(&fscMdt, freqWeight1D, freqWeight); } else { freqWeight1D = std::vector(sh,1.0); freqWeight = Image(sh,s); freqWeight.data.initConstant(1.0); } k_out = sh; for (int i = 1; i < sh; i++) { if (freqWeight1D[i] <= 0.0) { k_out = i; break; } } } Image ReferenceMap::getHollowWeight( double kmin_ang, int s_out, double angpix_out) { const int sh_out = s_out/2 + 1; Image out(sh_out, s_out); const double as_out = s_out * angpix_out; const double as_ref = s * angpix; for (int y = 0; y < s_out; y++) for (int x = 0; x < sh_out; x++) { const double x_out = x; const double y_out = y <= sh_out? y : y - s_out; const double x_ang = x_out / as_out; const double y_ang = y_out / as_out; const double x_ref = x_ang * as_ref; const double y_ref = y_ang * as_ref; const int xx_ref = (int)(x_ref + 0.5); const int yy_ref = y_ref >= 0.0? (int)(y_ref + 0.5) : (int)(y_ref + s + 0.5); double r = sqrt(x_ang * x_ang + y_ang * y_ang); if (r < 1.0 / kmin_ang || xx_ref >= sh || yy_ref < 0 || yy_ref >= s) { out(y,x) = 0.0; } else { out(y,x) = freqWeight(yy_ref, xx_ref); } } return out; } std::vector> ReferenceMap::predictAll( const MetaDataTable& mdt, ObservationModel& obs, HalfSet hs, int threads, bool applyCtf, bool applyTilt, bool applyShift, bool applyMtf, bool applyCtfPadding) { // declare on first line to prevent copying std::vector> out(mdt.numberOfObjects()); const int pc = mdt.numberOfObjects(); #pragma omp parallel for num_threads(threads) for (int p = 0; p < pc; p++) { out[p] = predict(mdt, p, obs, hs, applyCtf, applyTilt, applyShift, applyMtf, applyCtfPadding); } return out; } Image ReferenceMap::predict( const MetaDataTable& mdt, int p, ObservationModel& obs, HalfSet hs, bool applyCtf, bool applyTilt, bool applyShift, bool applyMtf, bool applyCtfPadding) { Image pred; int randSubset; mdt.getValue(EMDL_PARTICLE_RANDOM_SUBSET, randSubset, p); randSubset -= 1; int pi = (hs == Own)? randSubset : 1 - randSubset; obs.predictObservation(projectors[pi], mdt, p, pred(), angpix, applyCtf, applyTilt, applyShift, applyMtf, applyCtfPadding); return pred; } std::vector>> ReferenceMap::predictAllComplexGradients( const MetaDataTable &mdt, ObservationModel &obs, ReferenceMap::HalfSet hs, int threads, bool applyCtf, bool applyTilt, bool applyShift, bool applyMtf, bool applyCtfPadding) { // declare on first line to prevent copying std::vector>> out(mdt.numberOfObjects()); const int pc = mdt.numberOfObjects(); #pragma omp parallel for num_threads(threads) for (int p = 0; p < pc; p++) { out[p] = predictComplexGradient(mdt, p, obs, hs, applyCtf, applyTilt, applyShift, applyMtf, applyCtfPadding); } return out; } Volume> ReferenceMap::predictComplexGradient( const MetaDataTable &mdt, int p, ObservationModel &obs, ReferenceMap::HalfSet hs, bool applyCtf, bool applyTilt, bool applyShift, bool applyMtf, bool applyCtfPadding) { Volume> pred; int randSubset; mdt.getValue(EMDL_PARTICLE_RANDOM_SUBSET, randSubset, p); randSubset -= 1; int pi = (hs == Own)? randSubset : 1 - randSubset; pred = obs.predictComplexGradient(projectors[pi], mdt, p, angpix, applyCtf, applyTilt, applyShift, applyMtf, applyCtfPadding); return pred; } std::vector> ReferenceMap::predictAll( const MetaDataTable& mdt, const LegacyObservationModel& obs, HalfSet hs, int threads, bool applyCtf, bool applyTilt, bool applyShift) { // declare on first line to prevent copying std::vector> out(mdt.numberOfObjects()); const int pc = mdt.numberOfObjects(); #pragma omp parallel for num_threads(threads) for (int p = 0; p < pc; p++) { out[p] = predict(mdt, p, obs, hs, applyCtf, applyTilt, applyShift); } return out; } Image ReferenceMap::predict( const MetaDataTable& mdt, int p, const LegacyObservationModel& obs, HalfSet hs, bool applyCtf, bool applyTilt, bool applyShift) { Image pred; int randSubset; mdt.getValue(EMDL_PARTICLE_RANDOM_SUBSET, randSubset, p); randSubset -= 1; int pi = (hs == Own)? randSubset : 1 - randSubset; pred = obs.predictObservation(projectors[pi], mdt, p, applyCtf, applyTilt, applyShift); return pred; } double ReferenceMap::angToPix(double a) const { return s * angpix / a; } double ReferenceMap::pixToAng(double p) const { return s * angpix / p; } // perhaps some other day: /*void ReferenceMap::predictOccupancy(const MetaDataTable &particles, int threads) { for (int half = 0; half < 1; half++) { occupancies[half] = Projector(s, TRILINEAR, 1.0, 10, 2); occupancies[half].data = MultidimArray(1,s,s,sh); const int pc std::vector ctfs( #pragma omp parallel for num_threads(threads) for (int z = 0; z < s; z++) { for (int y = 0; y < s; y++) for (int x = 0; x < sh; x++) { } } } }*/ relion-3.1.3/src/jaz/reference_map.h000066400000000000000000000063511411340063500173270ustar00rootroot00000000000000/*************************************************************************** * * Author: "Jasenko Zivanov" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #ifndef REFERENCE_MAP_H #define REFERENCE_MAP_H #include #include #include #include #include class ObservationModel; class LegacyObservationModel; class ReferenceMap { public: typedef enum {Own, Opposite} HalfSet; ReferenceMap(); // input parameters: std::string reconFn0, reconFn1, maskFn, fscFn; double paddingFactor; // data: Image freqWeight, mask; std::vector freqWeight1D; Projector projectors[2]; int k_out, s, sh; bool hasMask; double angpix; void read(IOParser& parser, int argc, char *argv[]); void load(int verb, bool debug); Image getHollowWeight(double kmin_ang, int s_out, double angpix_out); std::vector> predictAll( const MetaDataTable& mdt, ObservationModel& obs, HalfSet hs, int threads, bool applyCtf = true, bool applyTilt = true, bool applyShift = true, bool applyMtf = true, bool applyCtfPadding = false); Image predict( const MetaDataTable& mdt, int p, ObservationModel& obs, HalfSet hs, bool applyCtf = true, bool applyTilt = true, bool applyShift = true, bool applyMtf = true, bool applyCtfPadding = false); std::vector > > predictAllComplexGradients( const MetaDataTable& mdt, ObservationModel& obs, HalfSet hs, int threads, bool applyCtf = true, bool applyTilt = true, bool applyShift = true, bool applyMtf = true, bool applyCtfPadding = false); Volume> predictComplexGradient( const MetaDataTable& mdt, int p, ObservationModel& obs, HalfSet hs, bool applyCtf = true, bool applyTilt = true, bool applyShift = true, bool applyMtf = true, bool applyCtfPadding = false); std::vector> predictAll( const MetaDataTable& mdt, const LegacyObservationModel& obs, HalfSet hs, int threads, bool applyCtf = true, bool applyTilt = true, bool applyShift = true); Image predict( const MetaDataTable& mdt, int p, const LegacyObservationModel& obs, HalfSet hs, bool applyCtf = true, bool applyTilt = true, bool applyShift = true); double angToPix(double a) const; double pixToAng(double p) const; }; #endif relion-3.1.3/src/jaz/refinement_helper.cpp000066400000000000000000000167561411340063500205740ustar00rootroot00000000000000/*************************************************************************** * * Author: "Jasenko Zivanov" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #include #include #include #include #include #include #include using namespace gravis; void RefinementHelper::drawFSC(const MetaDataTable *mdt, std::vector& dest1D, Image &dest, double thresh) { const int n = mdt->numberOfObjects(); const int w = 2*(n-1); const int h = 2*(n-1); dest1D = std::vector(n); for (int i = 0; i < n; i++) { int idx; mdt->getValue(EMDL_SPECTRAL_IDX, idx, i); mdt->getValue(EMDL_POSTPROCESS_FSC_TRUE, dest1D[i], i); if (dest1D[i] < thresh) dest1D[i] = 0.0; } dest = Image(n,h); for (int y = 0; y < h; y++) for (int x = 0; x < n; x++) { double xx = x; double yy = y > h/2? y - h : y; double r = sqrt(xx*xx + yy*yy); int ri = (int)(r+0.5); if (ri > w/2) ri = w/2; DIRECT_A2D_ELEM(dest.data, y, x) = dest1D[ri]; } } void RefinementHelper::computeSNR(const MetaDataTable *mdt, Image &dest, double eps) { const int n = mdt->numberOfObjects(); const int w = 2*(n-1); const int h = 2*(n-1); std::vector snr(n); for (int i = 0; i < n; i++) { int idx; double fsc; mdt->getValue(EMDL_SPECTRAL_IDX, idx, i); mdt->getValue(EMDL_POSTPROCESS_FSC_TRUE, fsc, i); if (fsc > 1.0 - eps) fsc = 1.0 - eps; //else if (fsc < eps) fsc = 0.0; snr[i] = fsc / (1.0 - fsc); } dest = Image(n,h); for (int y = 0; y < h; y++) for (int x = 0; x < n; x++) { double xx = x; double yy = y > h/2? y - h : y; double r = sqrt(xx*xx + yy*yy); int ri = (int)(r+0.5); if (ri > w/2) ri = w/2; DIRECT_A2D_ELEM(dest.data, y, x) = snr[ri]; } } void RefinementHelper::computeSigInvSq(const MetaDataTable *mdt, const std::vector& signalPow, Image &dest, double eps) { const int n = mdt->numberOfObjects(); const int w = 2*(n-1); const int h = 2*(n-1); std::vector sigInvSq(n); for (int i = 0; i < n; i++) { int idx; double fsc; mdt->getValue(EMDL_SPECTRAL_IDX, idx, i); mdt->getValue(EMDL_POSTPROCESS_FSC_TRUE, fsc, i); if (fsc > 1.0 - eps) fsc = 1.0 - eps; //else if (fsc < eps) fsc = 0.0; double snr = fsc / (1.0 - fsc); double sigPow = signalPow[i]; if (sigPow < eps) sigPow = eps; sigInvSq[i] = snr / sigPow; } dest = Image(n,h); for (int y = 0; y < h; y++) for (int x = 0; x < n; x++) { double xx = x; double yy = y > h/2? y - h : y; double r = sqrt(xx*xx + yy*yy); int ri = (int)(r+0.5); if (ri > w/2) ri = w/2; DIRECT_A2D_ELEM(dest.data, y, x) = sigInvSq[ri]; } } Image RefinementHelper::correlation(const Image &prediction, const Image &observation) { const long w = prediction.data.xdim; const long h = prediction.data.ydim; Image out(w,h); for (long y = 0; y < h; y++) for (long x = 0; x < w; x++) { Complex vx = DIRECT_A2D_ELEM(prediction.data, y, x); Complex vy = DIRECT_A2D_ELEM(observation.data, y, x); DIRECT_A2D_ELEM(out.data, y, x) = (vy.real * vx.real + vy.imag * vx.imag); } return out; } Image RefinementHelper::correlation( const std::vector >& predictions, const std::vector >& observations) { const long w = predictions[0].data.xdim; const long h = predictions[0].data.ydim; const long c = predictions.size(); Image out(w,h); out.data.initZeros(); for (long i = 0; i < c; i++) { for (long y = 0; y < h; y++) for (long x = 0; x < w; x++) { Complex vx = DIRECT_A2D_ELEM(predictions[i].data, y, x); Complex vy = DIRECT_A2D_ELEM(observations[i].data, y, x); DIRECT_A2D_ELEM(out.data, y, x) += (vy.real * vx.real + vy.imag * vx.imag); } } return out; } void RefinementHelper::addToQR(const Image& prediction, const Image& observation, Image& q, Image& r) { const long w = prediction.data.xdim; const long h = prediction.data.ydim; for (long y = 0; y < h; y++) for (long x = 0; x < w; x++) { Complex vx = DIRECT_A2D_ELEM(prediction.data, y, x); Complex vy = DIRECT_A2D_ELEM(observation.data, y, x); DIRECT_A2D_ELEM(q.data, y, x) += vy.conj() * vx; DIRECT_A2D_ELEM(r.data, y, x) += vx.norm(); } } void RefinementHelper::addToPQR(const Image& prediction, const Image& observation, Image& p, Image& q, Image& r) { const long w = prediction.data.xdim; const long h = prediction.data.ydim; for (long y = 0; y < h; y++) for (long x = 0; x < w; x++) { Complex vx = DIRECT_A2D_ELEM(prediction.data, y, x); Complex vy = DIRECT_A2D_ELEM(observation.data, y, x); DIRECT_A2D_ELEM(p.data, y, x) += vx.norm(); DIRECT_A2D_ELEM(q.data, y, x) += vy.conj() * vx; DIRECT_A2D_ELEM(r.data, y, x) += vx.norm(); } } double RefinementHelper::squaredDiff( const Image& prediction, const Image& observation, CTF& ctf, RFLOAT angpix, const Image& weight) { const long w = prediction.data.xdim; const long h = prediction.data.ydim; double out = 0.0; Image ctfImg(w,h); ctf.getFftwImage(ctfImg(), h, h, angpix); for (long y = 0; y < h; y++) for (long x = 0; x < w; x++) { Complex vx = DIRECT_A2D_ELEM(prediction.data, y, x); const Complex vy = DIRECT_A2D_ELEM(observation.data, y, x); const RFLOAT vw = DIRECT_A2D_ELEM(weight.data, y, x); RFLOAT vm = ctfImg(y,x); out += vw * (vy - vm * vx).norm(); } return out; } double RefinementHelper::squaredDiff( const std::vector > &predictions, const std::vector > &observations, CTF &ctf, RFLOAT angpix, const Image &weight) { double out = 0.0; for (long i = 0; i < predictions.size(); i++) { out += squaredDiff(predictions[i], observations[i], ctf, angpix, weight); } return out; } relion-3.1.3/src/jaz/refinement_helper.h000066400000000000000000000054151411340063500202270ustar00rootroot00000000000000/*************************************************************************** * * Author: "Jasenko Zivanov" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #ifndef REFINEMENT_HELPER_H #define REFINEMENT_HELPER_H #include #include #include #include #include #include #include class Projector; class RefinementHelper { public: static void drawFSC(const MetaDataTable* mdt, std::vector& dest1D, Image& dest, double thresh = 0.143); static void computeSNR(const MetaDataTable* mdt, Image& dest, double eps = 1e-15); static void computeSigInvSq(const MetaDataTable* mdt, const std::vector& signalPow, Image& dest, double eps = 1e-15); static Image correlation(const Image& prediction, const Image& observation); static Image correlation(const std::vector >& prediction, const std::vector >& observation); static void addToQR( const Image& prediction, const Image& observation, Image& q, Image& r); static void addToPQR( const Image& prediction, const Image& observation, Image& p, Image& q, Image& r); static double squaredDiff( const Image& prediction, const Image& observation, CTF& ctf, RFLOAT angpix, const Image& weight); static double squaredDiff( const std::vector>& predictions, const std::vector>& observations, CTF& ctf, RFLOAT angpix, const Image& weight); }; #endif relion-3.1.3/src/jaz/resampling_helper.cpp000066400000000000000000000017071411340063500205670ustar00rootroot00000000000000/*************************************************************************** * * Author: "Jasenko Zivanov" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ // this file intentionally left blank relion-3.1.3/src/jaz/resampling_helper.h000066400000000000000000000147131411340063500202350ustar00rootroot00000000000000/*************************************************************************** * * Author: "Jasenko Zivanov" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #ifndef RESAMPLING_HELPER_H #define RESAMPLING_HELPER_H #include #include #include class ResamplingHelper { public: // low-pass filter in real-space, then subsample by factor n template static void downsampleGauss2D(const Image& src, double n, Image& dest); template static void downsampleBox2D(const Image& src, double n, Image& dest); template static void subsample2D(const Image& src, double n, Image& dest); template static void subsample2D_cubic(const Image& src, double n, Image& dest, bool wrap = false); template static void subsample3D(const Image& src, int n, Image& dest); template static void upsample2D_linear(const Image& src, int n, Image& dest, bool wrap = false); template static void upsample2D_cubic(const Image& src, int n, Image& dest, bool wrap = false, int w = -1, int h = -1); }; template void ResamplingHelper::downsampleGauss2D(const Image& src, double n, Image& dest) { Image temp(src.data.ydim, src.data.xdim); FilterHelper::separableGaussianXY(src, temp, 0.5*(n-1), n-1); subsample2D(temp, n, dest); } template void ResamplingHelper::downsampleBox2D(const Image& src, double n, Image& dest) { const int w0 = src.data.xdim; const int h0 = src.data.ydim; const int w1 = (int)(w0/n); const int h1 = (int)(h0/n); if (dest.data.xdim != w1 || dest.data.ydim != h1 || dest.data.zdim != 1 || dest.data.ndim != 1) { dest = Image(w1,h1); } for (int y = 0; y < h1; y++) for (int x = 0; x < w1; x++) { T val = 0.0; RFLOAT wgh = 0.0; for (int yy = 0; yy < n; yy++) for (int xx = 0; xx < n; xx++) { int xin = (int)(x*n) + xx; int yin = (int)(y*n) + yy; if (xin < w0 && yin < h0) { val += DIRECT_NZYX_ELEM(src(), 0, 0, yin, xin); wgh += 1.0; } } if (wgh > 0.0) { val /= wgh; } DIRECT_NZYX_ELEM(dest.data, 0, 0, y, x) = val; } } template void ResamplingHelper::subsample2D(const Image& src, double n, Image& dest) { if (dest.data.xdim != (int)(src.data.xdim/n) || dest.data.ydim != (int)(src.data.ydim/n)) { dest = Image((int)(src.data.xdim/n), (int)(src.data.ydim/n)); } for (int y = 0; y < dest.data.ydim; y++) for (int x = 0; x < dest.data.xdim; x++) { DIRECT_NZYX_ELEM(dest.data, 0, 0, y, x) = DIRECT_NZYX_ELEM(src.data, 0, 0, (int)(n*y), (int)(n*x)); } } template void ResamplingHelper::subsample2D_cubic(const Image& src, double n, Image& dest, bool wrap) { dest.data.reshape(src.data.zdim, src.data.ydim/n, src.data.xdim/n); for (size_t z = 0; z < dest.data.zdim; z++) for (size_t y = 0; y < dest.data.ydim; y++) for (size_t x = 0; x < dest.data.xdim; x++) { double xx = x * (double)n; double yy = y * (double)n; DIRECT_NZYX_ELEM(dest.data, 0, z, y, x) = Interpolation::cubicXY(src, xx, yy, z, 0, wrap); } } template void ResamplingHelper::subsample3D(const Image& src, int n, Image& dest) { dest.data.reshape(src.data.zdim/n, src.data.ydim/n, src.data.xdim/n); for (size_t z = 0; z < dest.data.zdim; z++) for (size_t y = 0; y < dest.data.ydim; y++) for (size_t x = 0; x < dest.data.xdim; x++) { DIRECT_NZYX_ELEM(dest.data, 0, z, y, x) = DIRECT_NZYX_ELEM(src.data, 0, n*z, n*y, n*x); } } template void ResamplingHelper::upsample2D_linear(const Image& src, int n, Image& dest, bool wrap) { dest.data.reshape(src.data.zdim, src.data.ydim*n, src.data.xdim*n); for (size_t z = 0; z < dest.data.zdim; z++) for (size_t y = 0; y < dest.data.ydim; y++) for (size_t x = 0; x < dest.data.xdim; x++) { int x0 = x/n; int y0 = y/n; int x1 = x0 + 1; int y1 = y0 + 1; double xf = (x / (double)n) - x0; double yf = (y / (double)n) - y0; if (wrap) { x1 = (x1 + src.data.xdim) % src.data.xdim; y1 = (y1 + src.data.ydim) % src.data.ydim; } else { if (x1 >= src.data.xdim) x1 = src.data.xdim - 1; if (y1 >= src.data.ydim) y1 = src.data.ydim - 1; } T v00 = DIRECT_NZYX_ELEM(src.data, 0, z, y0, x0); T v01 = DIRECT_NZYX_ELEM(src.data, 0, z, y0, x1); T v10 = DIRECT_NZYX_ELEM(src.data, 0, z, y1, x0); T v11 = DIRECT_NZYX_ELEM(src.data, 0, z, y1, x1); DIRECT_NZYX_ELEM(dest.data, 0, z, y, x) = xf*(yf*v11 + (1-yf)*v01) + (1-xf)*(yf*v10 + (1-yf)*v00); } } template void ResamplingHelper::upsample2D_cubic(const Image& src, int n, Image& dest, bool wrap, int w, int h) { if (w < 0) w = src.data.xdim*n; if (h < 0) h = src.data.ydim*n; dest.data.reshape(src.data.zdim, h, w); for (size_t z = 0; z < dest.data.zdim; z++) for (size_t y = 0; y < h; y++) for (size_t x = 0; x < w; x++) { double xx = x / (double)n; double yy = y / (double)n; DIRECT_NZYX_ELEM(dest.data, 0, z, y, x) = Interpolation::cubicXY(src, xx, yy, z, 0, wrap); } } #endif relion-3.1.3/src/jaz/slice_helper.cpp000066400000000000000000000500751411340063500175270ustar00rootroot00000000000000/*************************************************************************** * * Author: "Jasenko Zivanov" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #include #include #include using namespace gravis; void SliceHelper::affineTransform(const Image& img, d4Matrix A, Image& dest) { d4Matrix Ai = A; Ai.invert(); dest.data.resize(1, 1, img.data.ydim, img.data.xdim); for (long int y = 0; y < dest.data.ydim; y++) for (long int x = 0; x < dest.data.xdim; x++) { d4Vector s0(x,y,0,1); d4Vector s1 = Ai * s0; DIRECT_A2D_ELEM(dest.data, y, x) = Interpolation::linearXY(img, s1.x, s1.y, 0); } } void SliceHelper::downsample(Image& img, Image& dest) { double q = dest.data.xdim / (double) img.data.xdim; Image slice0(img.data.xdim, img.data.ydim, 1); FilterHelper::lowPassFilter(img, 0.9*q, q, slice0); subsample(slice0, dest); } void SliceHelper::downsampleSlices(const Image& img, Image& dest) { double q = dest.data.xdim / (double) img.data.xdim; Image slice0(img.data.xdim, img.data.ydim, 1); Image slice1(dest.data.xdim, dest.data.ydim, 1); for (long int n = 0; n < img.data.ndim; n++) { std::cout << n << "/" << img.data.ndim << "\n"; extractStackSlice(img, slice0, n); FilterHelper::lowPassFilter(slice0, 0.9*q, q, slice0); subsample(slice0, slice1); insertStackSlice(slice1, dest, n); } } void SliceHelper::downsampleSlicesReal(const Image& img, Image& dest) { double q = dest.data.xdim / (double) img.data.xdim; Image slice0(img.data.xdim, img.data.ydim, 1); Image sliceT(img.data.xdim, img.data.ydim, 1); Image slice1(dest.data.xdim, dest.data.ydim, 1); for (long int n = 0; n < img.data.ndim; n++) { extractStackSlice(img, slice0, n); FilterHelper::separableGaussianXYZ(slice0, sliceT, 1.5/q); subsample(sliceT, slice1); insertStackSlice(slice1, dest, n); } } void SliceHelper::lowPassFilterSlicewise(Image& img, double maxFreq0, double maxFreq1) { Image slice0(img.data.xdim, img.data.ydim, 1); for (long int n = 0; n < img.data.ndim; n++) { extractStackSlice(img, slice0, n); FilterHelper::lowPassFilter(slice0, maxFreq0, maxFreq1, slice0); insertStackSlice(slice0, img, n); } } void SliceHelper::lowPassFilterSlice(Image& img, long int n, double maxFreq0, double maxFreq1) { Image slice0(img.data.xdim, img.data.ydim, 1); extractStackSlice(img, slice0, n); FilterHelper::lowPassFilter(slice0, maxFreq0, maxFreq1, slice0); insertStackSlice(slice0, img, n); } void SliceHelper::subsample(const Image& img, Image& dest) { double q = img.data.xdim / (double) dest.data.xdim; for (long int y = 0; y < dest.data.ydim; y++) for (long int x = 0; x < dest.data.xdim; x++) { DIRECT_A2D_ELEM(dest.data, y, x) = DIRECT_A2D_ELEM(img.data, (long int)(q*y + 0.5), (long int)(q*x + 0.5)); } } void SliceHelper::avgPad(const Volume& src, Volume& dest, double ratio) { int padX = (int)(ratio * src.dimx); int padY = (int)(ratio * src.dimy); int padZ = (int)(ratio * src.dimz); double avg = 0.0; for (long int z = 0; z < src.dimz; z++) for (long int y = 0; y < src.dimy; y++) for (long int x = 0; x < src.dimx; x++) { avg += src(x,y,z); } avg /= src.dimx * src.dimy * src.dimz; dest.resize(src.dimx + 2*padX, src.dimy + 2*padY, src.dimz + 2*padZ); dest.fill(avg); for (long int z = 0; z < src.dimz; z++) for (long int y = 0; y < src.dimy; y++) for (long int x = 0; x < src.dimx; x++) { dest(x+padX, y+padY, z+padZ) = src(x,y,z); } } void SliceHelper::avgPad2D(const Image& src, Image& dest, double ratio) { int padX = (int)(ratio * src.data.xdim); int padY = (int)(ratio * src.data.ydim); double avg = 0.0; for (long int y = 0; y < src.data.ydim; y++) for (long int x = 0; x < src.data.xdim; x++) { avg += DIRECT_A2D_ELEM(src.data, y, x); } avg /= src.data.xdim * src.data.ydim; dest = Image(src.data.xdim + 2*padX, src.data.ydim + 2*padY); for (long int y = 0; y < dest.data.ydim; y++) for (long int x = 0; x < dest.data.xdim; x++) { DIRECT_A2D_ELEM(dest.data, y, x) = avg; } for (long int y = 0; y < src.data.ydim; y++) for (long int x = 0; x < src.data.xdim; x++) { DIRECT_A2D_ELEM(dest.data, y+padY, x+padX) = DIRECT_A2D_ELEM(src.data, y, x); } } void SliceHelper::halveSpectrum2D(Image& src, Image& dest) { dest = Image(src.data.xdim/2 + 1, src.data.ydim); const int xo = src.data.xdim/2 + 1; const int yo = src.data.ydim/2 + 1; const int wd = dest.data.xdim; const int hd = dest.data.ydim; for (long int y = 0; y < hd; y++) for (long int x = 0; x < wd; x++) { /*if (x == 0) { DIRECT_A2D_ELEM(dest.data, y, 0) = DIRECT_A2D_ELEM(src.data, y, xo); } else if (xo + x < src.data.xdim) { DIRECT_A2D_ELEM(dest.data, y, x) = 0.5 * (DIRECT_A2D_ELEM(src.data, y, xo + x) + DIRECT_A2D_ELEM(src.data, yo - (y - yo), xo - x)); } else { DIRECT_A2D_ELEM(dest.data, y, x) = DIRECT_A2D_ELEM(src.data, yo - (y - yo), xo - x); }*/ const int yr = (int)y; const int yw = (yr+yo)%hd; DIRECT_A2D_ELEM(dest.data, y, x) = DIRECT_A2D_ELEM(src.data, yw, xo+x); } } void SliceHelper::extractSpectralSlice(Image& src, Image& dest, d3Matrix proj, d2Vector volCentImg, double oversample) { const int wi = (double)dest.data.xdim; const int hi = (double)dest.data.ydim; const double wv = (double)src.data.xdim; const double hv = (double)src.data.ydim; const double dv = (double)src.data.zdim; const double wios = oversample*wi; const double hios = oversample*hi; const int wiosI = ((int)wios)/2 + 1; const int hiosI = ((int)hios); const int ciosX = ((int)wios)/2; const int ciosY = ((int)hios)/2; Image dest2(wiosI,hiosI); Image weight(wiosI,hiosI); d2Vector shift(volCentImg.x - ciosX, volCentImg.y - ciosY); for (long int y = 0; y < dest2.data.ydim; y++) for (long int x = 0; x < dest2.data.xdim; x++) { d3Vector pi((double)x/(double)(wiosI-1), 2.0*(double)y/(double)hiosI, 0.0); if (pi.y >= 1.0) pi.y = pi.y - 2.0; if (pi.norm2() > 1.0) { DIRECT_A2D_ELEM(dest2.data, y, x) = Complex(0,0); continue; } d3Vector pv = proj * pi; bool conj = false; if (pv.x < 0.0) { pv = -pv; conj = true; } if (pv.norm2() > 1.0) { DIRECT_A2D_ELEM(dest2.data, y, x) = Complex(0,0); continue; } double xxd = (wv-1) * pv.x; double yyd = hv * pv.y / 2.0; double zzd = dv * pv.z / 2.0; double ax = std::abs(xxd); double ay = std::abs(yyd); double az = std::abs(zzd); double phi = - PI * (pi.x * shift.x + pi.y * shift.y); Complex z0(cos(phi), sin(phi)); if (ax < 1.0 && ay < 1.0 && az < 1.0) { DIRECT_A2D_ELEM(weight.data, y, x) = z0 * Complex((1.0 - ax) * (1.0 - ay) * (1.0 - az), 0.0); } else { DIRECT_A2D_ELEM(weight.data, y, x) = Complex(0.0, 0.0); } if (yyd < 0.0) yyd += hv; if (zzd < 0.0) zzd += dv; if (conj) { DIRECT_A2D_ELEM(dest2.data, y, x) = z0 * Interpolation::linearFFTW3D(src, xxd, yyd, zzd).conj(); } else { DIRECT_A2D_ELEM(dest2.data, y, x) = z0 * Interpolation::linearFFTW3D(src, xxd, yyd, zzd); } } Image dest2r = Image(2 * (dest2.data.xdim - 1), dest2.data.ydim); Image weightr = Image(2 * (dest2.data.xdim - 1), dest2.data.ydim); FourierTransformer ft; ft.inverseFourierTransform(dest2.data, dest2r()); CenterFFT(dest2r.data, true); FourierTransformer ftw; ftw.inverseFourierTransform(weight.data, weightr()); CenterFFT(weightr.data, true); for (long int y = 0; y < dest.data.ydim; y++) for (long int x = 0; x < dest.data.xdim; x++) { DIRECT_A2D_ELEM(dest.data, y, x) = DIRECT_A2D_ELEM(dest2r.data, y, x) / DIRECT_A2D_ELEM(weightr.data, y, x); } } void SliceHelper::insertSpectralSlices( std::vector >& src, std::vector proj, std::vector volCentImg, Image& dest, double thickness, double thicknessSlope, double imgPad) { const double wv = dest.data.xdim; const double hv = dest.data.ydim; const double dv = dest.data.zdim; const double wir = src[0].data.xdim; const double hir = src[0].data.ydim; const int ic = src.size(); std::vector > srcSpectra(ic); std::vector shifts(ic); std::vector thz(ic); Image img; for (int i = 0; i < ic; i++) { avgPad2D(src[i], img, imgPad); FourierTransformer ft; CenterFFT(img.data, false); ft.FourierTransform(img(), srcSpectra[i].data, true); shifts[i] = d2Vector(volCentImg[i].x - wir/2.0, volCentImg[i].y - hir/2.0); thz[i] = 0.5*d3Vector(wv*proj[i](2,0), hv*proj[i](2,1), dv*proj[i](2,2)).length(); } const double wif = srcSpectra[0].data.xdim; const double hif = srcSpectra[0].data.ydim; for (long int z = 0; z < dest.data.zdim; z++) for (long int y = 0; y < dest.data.ydim; y++) for (long int x = 0; x < dest.data.xdim; x++) { d3Vector pv((double)x/(wv-1), 2.0*(double)y/hv, 2.0*(double)z/dv); if (pv.y > 1.0) pv.y = pv.y - 2.0; if (pv.z > 1.0) pv.z = pv.z - 2.0; if (pv.norm2() >= 1.0) { DIRECT_NZYX_ELEM(dest.data, 0, z, y, x) = Complex(0,0); continue; } const double r = sqrt(pv.x*pv.x + pv.z*pv.z); Complex zs(0.0, 0.0); double wgh = 0.0; for (int i = 0; i < ic; i++) { d3Vector pi3 = proj[i] * pv; if (pi3.x*pi3.x + pi3.y*pi3.y >= 1.0) { continue; } const double za = thz[i] * std::abs(pi3.z); const double th_r = thickness + r * thz[i] * thicknessSlope; if (za > th_r) continue; bool conj = false; if (pi3.x < 0.0) { pi3 = -pi3; conj = true; } double xi = (wif-1) * pi3.x; double yi = hif * pi3.y / 2.0; if (yi < 0.0) yi += hif; double phi = PI * (pi3.x * shifts[i].x + pi3.y * shifts[i].y); Complex z0(cos(phi), sin(phi)); //double wgi = (1.0 - za/th_r) * (thickness / th_r); double wgi = 1.0 - za/th_r; Complex zz = z0 * Interpolation::linearFFTW2D(srcSpectra[i], xi, yi); if (conj) { zz = zz.conj(); } zs += wgi * zz; wgh += wgi; } if (wgh > 1.0) zs /= wgh; DIRECT_NZYX_ELEM(dest.data, 0, z, y, x) = zs; } } void SliceHelper::insertWeightedSpectralSlices( std::vector >& src, std::vector proj, std::vector volCentImg, std::vector imgWeights, Image& dest, double thickness, double imgPad) { const double wv = dest.data.xdim; const double hv = dest.data.ydim; const double dv = dest.data.zdim; const double wir = src[0].data.xdim; const double hir = src[0].data.ydim; const int ic = src.size(); std::vector > srcSpectra(ic); std::vector shifts(ic); std::vector thz(ic); Image img; for (int i = 0; i < ic; i++) { avgPad2D(src[i], img, imgPad); FourierTransformer ft; CenterFFT(img.data, false); ft.FourierTransform(img(), srcSpectra[i].data, true); shifts[i] = d2Vector(volCentImg[i].x - wir/2.0, volCentImg[i].y - hir/2.0); thz[i] = 0.5*d3Vector(wv*proj[i](2,0), hv*proj[i](2,1), dv*proj[i](2,2)).length(); } const double wif = srcSpectra[0].data.xdim; const double hif = srcSpectra[0].data.ydim; for (long int z = 0; z < dest.data.zdim; z++) for (long int y = 0; y < dest.data.ydim; y++) for (long int x = 0; x < dest.data.xdim; x++) { d3Vector pv((double)x/(wv-1), 2.0*(double)y/hv, 2.0*(double)z/dv); if (pv.y > 1.0) pv.y = pv.y - 2.0; if (pv.z > 1.0) pv.z = pv.z - 2.0; if (pv.norm2() >= 1.0) { DIRECT_NZYX_ELEM(dest.data, 0, z, y, x) = Complex(0,0); continue; } Complex zs(0.0, 0.0); double wgh = 0.0; for (int i = 0; i < ic; i++) { d3Vector pi3 = proj[i] * pv; if (pi3.x*pi3.x + pi3.y*pi3.y >= 1.0) { continue; } const double za = thz[i] * std::abs(pi3.z); const double th_r = thickness; if (za > th_r) continue; bool conj = false; if (pi3.x < 0.0) { pi3 = -pi3; conj = true; } double xi = (wif-1) * pi3.x; double yi = hif * pi3.y / 2.0; if (yi < 0.0) yi += hif; double phi = PI * (pi3.x * shifts[i].x + pi3.y * shifts[i].y); Complex z0(cos(phi), sin(phi)); double wgi = imgWeights[i] * (1.0 - za/th_r); Complex zz = z0 * Interpolation::linearFFTW2D(srcSpectra[i], xi, yi); if (conj) { zz = zz.conj(); } zs += wgi * zz; wgh += wgi; } if (wgh > 1.0) zs /= wgh; DIRECT_NZYX_ELEM(dest.data, 0, z, y, x) = zs; } } void SliceHelper::extractStackSlice(const Image& src, Image& dest, long int s) { if (src.data.xdim != dest.data.xdim || src.data.ydim != dest.data.ydim) { REPORT_ERROR("SliceHelper::extractSlice: image size mismatch.\n"); } for (long int y = 0; y < src.data.ydim; y++) for (long int x = 0; x < src.data.xdim; x++) { DIRECT_NZYX_ELEM(dest.data, 0, 0, y, x) = DIRECT_NZYX_ELEM(src.data, s, 0, y, x); } } void SliceHelper::extractStackSlices(const Image& src, Image& dest, long int s) { if (src.data.xdim != dest.data.xdim || src.data.ydim != dest.data.ydim) { REPORT_ERROR("SliceHelper::extractSlice: image size mismatch.\n"); } for (long int n = 0; n < dest.data.ndim; n++) for (long int y = 0; y < dest.data.ydim; y++) for (long int x = 0; x < dest.data.xdim; x++) { DIRECT_NZYX_ELEM(dest.data, n, 0, y, x) = DIRECT_NZYX_ELEM(src.data, n+s, 0, y, x); } } void SliceHelper::extractStackSlices(const Image& src, Image& dest, long int s) { if (src.data.xdim != dest.data.xdim || src.data.ydim != dest.data.ydim) { REPORT_ERROR("SliceHelper::extractSlice: image size mismatch.\n"); } for (long int n = 0; n < dest.data.ndim; n++) for (long int y = 0; y < dest.data.ydim; y++) for (long int x = 0; x < dest.data.xdim; x++) { DIRECT_NZYX_ELEM(dest.data, n, 0, y, x) = (RFLOAT) DIRECT_NZYX_ELEM(src.data, n+s, 0, y, x); } } Image SliceHelper::getStackSlice(const Image &src, long n) { const long int w = src().xdim; const long int h = src().ydim; Image out(w,h); for (long int y = 0; y < h; y++) for (long int x = 0; x < w; x++) { DIRECT_NZYX_ELEM(out.data, 0, 0, y, x) = DIRECT_NZYX_ELEM(src.data, n, 0, y, x); } return out; } void SliceHelper::insertStackSlice(const Image& src, Image& dest, long int s) { if (src.data.xdim != dest.data.xdim || src.data.ydim != dest.data.ydim) { REPORT_ERROR("SliceHelper::extractSlice: image size mismatch.\n"); } for (long int y = 0; y < src.data.ydim; y++) for (long int x = 0; x < src.data.xdim; x++) { DIRECT_NZYX_ELEM(dest.data, s, 0, y, x) = DIRECT_NZYX_ELEM(src.data, 0, 0, y, x); } } void SliceHelper::insertStackSlice(const Image& src, Image& dest, long int s) { if (src.data.xdim != dest.data.xdim || src.data.ydim != dest.data.ydim) { REPORT_ERROR("SliceHelper::extractSlice: image size mismatch.\n"); } for (long int y = 0; y < src.data.ydim; y++) for (long int x = 0; x < src.data.xdim; x++) { DIRECT_NZYX_ELEM(dest.data, s, 0, y, x) = DIRECT_NZYX_ELEM(src.data, 0, 0, y, x); } } void SliceHelper::insertZSlice(const Image& src, Image& dest, long int s) { if (src.data.xdim != dest.data.xdim || src.data.ydim != dest.data.ydim) { REPORT_ERROR("SliceHelper::extractSlice: image size mismatch.\n"); } for (long int y = 0; y < src.data.ydim; y++) for (long int x = 0; x < src.data.xdim; x++) { DIRECT_NZYX_ELEM(dest.data, 0, s, y, x) = DIRECT_NZYX_ELEM(src.data, 0, 0, y, x); } } void SliceHelper::insertZSlice(const Image& src, Image& dest, long int s) { if (src.data.xdim != dest.data.xdim || src.data.ydim != dest.data.ydim) { REPORT_ERROR("SliceHelper::extractSlice: image size mismatch.\n"); } for (long int y = 0; y < src.data.ydim; y++) for (long int x = 0; x < src.data.xdim; x++) { DIRECT_NZYX_ELEM(dest.data, 0, s, y, x) = DIRECT_NZYX_ELEM(src.data, 0, 0, y, x); } } Image SliceHelper::consolidate(const std::vector >& src, bool toN) { const int w = src[0].data.xdim; const int h = src[0].data.ydim; const int ic = src.size(); const int zc = toN? 1 : ic; const int nc = toN? ic : 1; Image out(w,h,zc,nc); for (int i = 0; i < ic; i++) { if (src[i].data.xdim != w || src[i].data.ydim != h) { REPORT_ERROR("SliceHelper::consolidate(): images are of unequal size.\n"); } if (toN) insertStackSlice(src[i], out, i); else insertZSlice(src[i], out, i); } return out; } Image SliceHelper::consolidate(const std::vector >& src, bool toN) { const int w = src[0].data.xdim; const int h = src[0].data.ydim; const int ic = src.size(); Image out(w,h,1,ic); for (int i = 0; i < ic; i++) { if (src[i].data.xdim != w || src[i].data.ydim != h) { REPORT_ERROR("SliceHelper::consolidate(): images are of unequal size.\n"); } if (toN) insertStackSlice(src[i], out, i); else insertZSlice(src[i], out, i); } return out; } void SliceHelper::stat(const Image& img) { std::cout << "xdim: " << img.data.xdim << "\n"; std::cout << "ydim: " << img.data.ydim << "\n"; std::cout << "zdim: " << img.data.zdim << "\n"; std::cout << "ndim: " << img.data.ndim << "\n"; } relion-3.1.3/src/jaz/slice_helper.h000066400000000000000000000100111411340063500171560ustar00rootroot00000000000000/*************************************************************************** * * Author: "Jasenko Zivanov" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #ifndef SLICE_HELPER_H #define SLICE_HELPER_H #include #include #include #include #include #include class SliceHelper { public: static void affineTransform(const Image& img, gravis::d4Matrix A, Image& dest); static void downsample(Image& img, Image& dest); static void downsampleSlices(const Image& img, Image& dest); static void downsampleSlicesReal(const Image& img, Image& dest); static void lowPassFilterSlicewise(Image& img, double maxFreq0, double maxFreq1); static void lowPassFilterSlice(Image& img, long int n, double maxFreq0, double maxFreq1); static void subsample(const Image& img, Image& dest); static void avgPad(const Volume& src, Volume& dest, double ratio); static void avgPad2D(const Image& src, Image& dest, double ratio); static void halveSpectrum2D(Image& src, Image& dest); static void extractSpectralSlice(Image& src, Image& dest, gravis::d3Matrix proj, gravis::d2Vector volCentImg, double oversample = 4.0); static void insertSpectralSlices(std::vector >& src, std::vector proj, std::vector volCentImg, Image& dest, double thickness = 1.0, double thicknessSlope = 0.0, double imgPad = 0.5); static void insertWeightedSpectralSlices(std::vector >& src, std::vector proj, std::vector volCentImg, std::vector imgWeights, Image& dest, double thickness = 1.0, double imgPad = 0.5); static void extractStackSlice(const Image& src, Image& dest, long int s); static void extractStackSlices(const Image& src, Image& dest, long int s); static void extractStackSlices(const Image& src, Image& dest, long int s); static Image getStackSlice(const Image& src, long int n); static void insertStackSlice(const Image& src, Image& dest, long int s); static void insertStackSlice(const Image& src, Image& dest, long int s); static void insertZSlice(const Image& src, Image& dest, long int s); static void insertZSlice(const Image& src, Image& dest, long int s); static Image consolidate(const std::vector >& src, bool toN = false); static Image consolidate(const std::vector >& src, bool toN = false); static void stat(const Image& img); }; #endif relion-3.1.3/src/jaz/spectral_helper.cpp000066400000000000000000000032401411340063500202350ustar00rootroot00000000000000/*************************************************************************** * * Author: "Jasenko Zivanov" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #include void SpectralHelper :: computePhase(const Image& src, Image& dest) { dest = Image(src.data.xdim, src.data.ydim, src.data.zdim, src.data.ndim); FOR_ALL_NZYX_ELEMENTS_IN_MULTIDIMARRAY(src.data) { Complex z = DIRECT_NZYX_ELEM(src.data, l, k, i, j); DIRECT_NZYX_ELEM(dest.data, l, k, i, j) = atan2(z.imag, z.real); } } void SpectralHelper::computeAbs(const Image& src, Image& dest) { dest = Image(src.data.xdim, src.data.ydim, src.data.zdim, src.data.ndim); FOR_ALL_NZYX_ELEMENTS_IN_MULTIDIMARRAY(src.data) { Complex z = DIRECT_NZYX_ELEM(src.data, l, k, i, j); DIRECT_NZYX_ELEM(dest.data, l, k, i, j) = z.abs(); } } relion-3.1.3/src/jaz/spectral_helper.h000066400000000000000000000022771411340063500177130ustar00rootroot00000000000000/*************************************************************************** * * Author: "Jasenko Zivanov" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #ifndef SPECTRAL_HELPER_H #define SPECTRAL_HELPER_H #include class SpectralHelper { public: static void computePhase(const Image& src, Image& dest); static void computeAbs(const Image& src, Image& dest); }; #endif relion-3.1.3/src/jaz/stack_helper.cpp000066400000000000000000000530161411340063500175330ustar00rootroot00000000000000/*************************************************************************** * * Author: "Jasenko Zivanov" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #include #include #include #include #include #include #include #include #include #include #include #include #include #include using namespace gravis; std::vector StackHelper::splitByMicrographName(const MetaDataTable& mdt) { std::vector out(0); if (!mdt.labelExists(EMDL_MICROGRAPH_NAME)) { REPORT_ERROR("StackHelper::splitByMicrographName: " + EMDL::label2Str(EMDL_MICROGRAPH_NAME) + " missing from MetaDataTable.\n"); } MetaDataTable md2(mdt); md2.newSort(EMDL_MICROGRAPH_NAME); const long lc = md2.numberOfObjects(); std::string lastName = "", curName; long curInd = -1; for (int i = 0; i < lc; i++) { md2.getValue(EMDL_MICROGRAPH_NAME, curName, i); if (curName != lastName) { lastName = curName; curInd++; out.push_back(MetaDataTable()); } out[curInd].addObject(md2.getObject(i)); } for (int i = 0; i <= curInd; i++) { out[i].newSort(EMDL_IMAGE_NAME, false, false, true); } return out; } MetaDataTable StackHelper::merge(const std::vector &mdts) { MetaDataTable out; for (int i = 0; i < mdts.size(); i++) { out.append(mdts[i]); } return out; } std::vector StackHelper::splitByStack(const MetaDataTable* mdt) { std::vector out(0); if (!mdt->labelExists(EMDL_IMAGE_NAME)) { REPORT_ERROR("StackHelper::splitByStack: "+EMDL::label2Str(EMDL_IMAGE_NAME)+" missing in meta_data_table.\n"); } std::string testString; mdt->getValue(EMDL_IMAGE_NAME, testString, 0); if (testString.find("@") < 0) { REPORT_ERROR("StackHelper::splitByStack: "+EMDL::label2Str(EMDL_IMAGE_NAME)+" does not contain an '@'.\n"); } MetaDataTable md2(*mdt); md2.newSort(EMDL_IMAGE_NAME, false, true); const long lc = md2.numberOfObjects(); std::string lastName = "", curName, curFullName; long curInd = -1; for (int i = 0; i < lc; i++) { md2.getValue(EMDL_IMAGE_NAME, curFullName, i); curName = curFullName.substr(curFullName.find("@")+1); if (curName != lastName) { lastName = curName; curInd++; out.push_back(MetaDataTable()); } out[curInd].addObject(md2.getObject(i)); } for (int i = 0; i <= curInd; i++) { out[i].newSort(EMDL_IMAGE_NAME, false, false, true); } return out; } std::vector > StackHelper::loadStack(const MetaDataTable* mdt, std::string path, int threads) { std::vector> out(mdt->numberOfObjects()); const long ic = mdt->numberOfObjects(); std::string name, fullName; mdt->getValue(EMDL_IMAGE_NAME, fullName, 0); name = fullName.substr(fullName.find("@")+1); if (path != "") { name = path + "/" + name.substr(name.find_last_of("/")+1); } #pragma omp parallel for num_threads(threads) for (long i = 0; i < ic; i++) { std::string sliceName; mdt->getValue(EMDL_IMAGE_NAME, sliceName, i); out[i].read(sliceName, true, -1, false, true); } return out; } std::vector > StackHelper::loadStackFS( const MetaDataTable& mdt, std::string path, int threads, bool centerParticle, ObservationModel* obs) { std::vector > out(mdt.numberOfObjects()); if (centerParticle && obs == 0) { REPORT_ERROR("StackHelper::loadStackFS: centering particles requires an observation model."); } const long ic = mdt.numberOfObjects(); std::string name, fullName; mdt.getValue(EMDL_IMAGE_NAME, fullName, 0); name = fullName.substr(fullName.find("@")+1); if (path != "") { name = path + "/" + name.substr(name.find_last_of("/")+1); } Image dummy; dummy.read(name, false); const int s = dummy.data.xdim; NewFFTPlan::type plan(s,s,1); #pragma omp parallel for num_threads(threads) for (long i = 0; i < ic; i++) { int optGroup = obs->getOpticsGroup(mdt, i); double angpix = obs->getPixelSize(optGroup); std::string sliceName; mdt.getValue(EMDL_IMAGE_NAME, sliceName, i); Image in; in.read(sliceName, true, -1, false, true); NewFFT::FourierTransform(in(), out[i](), plan); if (centerParticle) { const int s = in.data.ydim; double xoff, yoff; mdt.getValue(EMDL_ORIENT_ORIGIN_X_ANGSTROM, xoff, i); mdt.getValue(EMDL_ORIENT_ORIGIN_Y_ANGSTROM, yoff, i); xoff /= angpix; yoff /= angpix; shiftImageInFourierTransform(out[i](), out[i](), s, xoff - s/2, yoff - s/2); } } return out; } void StackHelper::saveStack(std::vector > &stack, std::string fn) { const int w = stack[0].data.xdim; const int h = stack[0].data.ydim; const int c = stack.size(); Image img(w,h,1,c); for (int i = 0; i < c; i++) { SliceHelper::insertStackSlice(stack[i], img, i); } img.write(fn); } std::vector > > StackHelper::loadMovieStack(const MetaDataTable* mdt, std::string moviePath) { std::vector > > out(mdt->numberOfObjects()); const long pc = mdt->numberOfObjects(); std::string name, fullName, movieName; mdt->getValue(EMDL_IMAGE_NAME, fullName, 0); mdt->getValue(EMDL_MICROGRAPH_NAME, movieName, 0); name = fullName.substr(fullName.find("@")+1); std::string finName; if (moviePath == "") { finName = name; } else { finName = moviePath + "/" + movieName.substr(movieName.find_last_of("/")+1); } std::cout << "loading real: " << finName << "\n"; Image in; in.read(finName); std::cout << "size = " << in.data.xdim << "x" << in.data.ydim << "x" << in.data.zdim << "x" << in.data.ndim << "\n"; std::cout << "pc = " << pc << "\n"; const int fc = in.data.ndim / pc; const int w = in.data.xdim; const int h = in.data.ydim; for (long p = 0; p < pc; p++) { out[p] = std::vector >(fc); for (long f = 0; f < fc; f++) { out[p][f] = Image(w,h); SliceHelper::extractStackSlice(in, out[p][f], f*pc + p); } } return out; } std::vector>> StackHelper::extractMovieStackFS( const MetaDataTable* mdt, Image* gainRef, MultidimArray* defectMask, std::string movieFn, double outPs, double coordsPs, double moviePs, double dataPs, int squareSize, int threads, // squareSize is the output box size in pixels after downsampling to outPs bool loadData, int firstFrame, int lastFrame, RFLOAT hot, bool verbose, bool saveMemory, const std::vector>* offsets_in, std::vector>* offsets_out) { std::vector>> out(mdt->numberOfObjects()); const long pc = mdt->numberOfObjects(); Image mgStack; mgStack.read(movieFn, false); if (verbose) { std::cout << "size: " << mgStack().xdim << "x" << mgStack().ydim << "x" << mgStack().zdim << "x" << mgStack().ndim << "\n"; } const bool dataInZ = mgStack.data.zdim > 1; const int w0 = mgStack.data.xdim; const int h0 = mgStack.data.ydim; const int fcM = dataInZ? mgStack.data.zdim : mgStack.data.ndim; // lastFrame and firstFrame is 0 indexed, while fcM is 1-indexed const int fc = lastFrame > 0? lastFrame - firstFrame + 1 : fcM - firstFrame; if (dataPs < 0) dataPs = outPs; if (fcM <= lastFrame) { REPORT_ERROR("StackHelper::extractMovieStackFS: insufficient number of frames in "+movieFn); } const bool useGain = gainRef != 0; if (useGain && (w0 != gainRef->data.xdim || h0 != gainRef->data.ydim)) { REPORT_ERROR("StackHelper::extractMovieStackFS: incompatible gain reference - size is different from "+movieFn); } const bool fixDefect = false; // TAKANORI DEBUG: defectMask != 0; if (fixDefect && (w0 != defectMask->xdim || h0 != defectMask->ydim)) { REPORT_ERROR("StackHelper::extractMovieStackFS: incompatible defect mask - size is different from "+movieFn); } if (verbose) { if (dataInZ) std::cout << "data in Z\n"; else std::cout << "data in N\n"; std::cout << "frame count in movie = " << fcM << "\n"; std::cout << "frame count to load = " << fc << "\n"; std::cout << "pc, fc = " << pc << ", " << fc << "\n"; } for (long p = 0; p < pc; p++) { out[p] = std::vector>(fc); } if (!loadData) return out; const int sqMg = 2*(int)(0.5 * squareSize * outPs / moviePs + 0.5); // This should be equal to s_mov in frame_recombiner if (verbose) { std::cout << "square size in micrograph: " << sqMg << "\n"; } std::vector fts(threads); std::vector> aux0(threads); std::vector> aux1(threads); for (int t = 0; t < threads; t++) { aux0[t] = Image(sqMg, sqMg); if (outPs != moviePs) { aux1[t] = Image(sqMg/2+1,sqMg); } } int threads_f = saveMemory? 1 : threads; int threads_p = saveMemory? threads : 1; #pragma omp parallel for num_threads(threads_f) for (long f = 0; f < fc; f++) { int tf = omp_get_thread_num(); Image muGraph; muGraph.read(movieFn, true, f+firstFrame, false, true); if (verbose) std::cout << (f+1) << "/" << fc << "\n"; #pragma omp parallel for num_threads(threads_p) for (long int y = 0; y < h0; y++) for (long int x = 0; x < w0; x++) { RFLOAT val = DIRECT_NZYX_ELEM(muGraph.data, 0, 0, y, x); RFLOAT gain = 1.0; if (useGain) gain = DIRECT_NZYX_ELEM(gainRef->data, 0, 0, y, x); if (hot > 0.0 && val > hot) val = hot; DIRECT_NZYX_ELEM(muGraph.data, 0, 0, y, x) = -gain * val; } if (fixDefect) { RFLOAT frame_mean = 0, frame_std = 0; long long n_valid = 0; #pragma omp parallel for reduction(+:frame_mean, n_valid) num_threads(threads_p) FOR_ALL_DIRECT_ELEMENTS_IN_MULTIDIMARRAY(muGraph.data) { if (!DIRECT_MULTIDIM_ELEM(*defectMask, n)) continue; frame_mean += DIRECT_MULTIDIM_ELEM(muGraph.data, n); n_valid ++; } frame_mean /= n_valid; #pragma omp parallel for reduction(+:frame_std) num_threads(threads_p) FOR_ALL_DIRECT_ELEMENTS_IN_MULTIDIMARRAY(muGraph.data) { if (!DIRECT_MULTIDIM_ELEM(*defectMask, n)) continue; RFLOAT d = (DIRECT_MULTIDIM_ELEM(muGraph.data, n) - frame_mean); frame_std += d * d; } frame_std = std::sqrt(frame_std / n_valid); // 25 neighbours; should be enough even for super-resolution images. const int NUM_MIN_OK = 6; const int D_MAX = 2; // EER code path does not use this function const int PBUF_SIZE = 100; #pragma omp parallel for num_threads(threads_p) FOR_ALL_DIRECT_ELEMENTS_IN_ARRAY2D(muGraph.data) { if (!DIRECT_A2D_ELEM(*defectMask, i, j)) continue; int n_ok = 0; RFLOAT pbuf[PBUF_SIZE]; for (int dy= -D_MAX; dy <= D_MAX; dy++) { int y = i + dy; if (y < 0 || y >= h0) continue; for (int dx = -D_MAX; dx <= D_MAX; dx++) { int x = j + dx; if (x < 0 || x >= w0) continue; if (DIRECT_A2D_ELEM(*defectMask, y, x)) continue; pbuf[n_ok] = DIRECT_A2D_ELEM(muGraph.data, y, x); n_ok++; } } if (n_ok > NUM_MIN_OK) DIRECT_A2D_ELEM(muGraph.data, i, j) = pbuf[rand() % n_ok]; else DIRECT_A2D_ELEM(muGraph.data, i, j) = rnd_gaus(frame_mean, frame_std); } } // TODO: TAKANORI: Cache muGraph HERE #pragma omp parallel for num_threads(threads_p) for (long p = 0; p < pc; p++) { int tp = omp_get_thread_num(); int t = saveMemory? tp : tf; out[p][f] = Image(sqMg,sqMg); double xpC, ypC; mdt->getValue(EMDL_IMAGE_COORD_X, xpC, p); mdt->getValue(EMDL_IMAGE_COORD_Y, ypC, p); const double xpO = (int)(coordsPs * xpC / dataPs); const double ypO = (int)(coordsPs * ypC / dataPs); int x0 = (int)round(xpO * dataPs / moviePs) - sqMg / 2; int y0 = (int)round(ypO * dataPs / moviePs) - sqMg / 2; if (offsets_in != 0 && offsets_out != 0) { double dxM = (*offsets_in)[p][f].x * outPs / moviePs; double dyM = (*offsets_in)[p][f].y * outPs / moviePs; int dxI = (int)round(dxM); int dyI = (int)round(dyM); x0 += dxI; y0 += dyI; double dxR = (dxM - dxI) * moviePs / outPs; double dyR = (dyM - dyI) * moviePs / outPs; (*offsets_out)[p][f] = d2Vector(dxR, dyR); } for (long int y = 0; y < sqMg; y++) for (long int x = 0; x < sqMg; x++) { int xx = x0 + x; int yy = y0 + y; if (xx < 0) xx = 0; else if (xx >= w0) xx = w0 - 1; if (yy < 0) yy = 0; else if (yy >= h0) yy = h0 - 1; DIRECT_NZYX_ELEM(aux0[t].data, 0, 0, y, x) = DIRECT_NZYX_ELEM(muGraph.data, 0, 0, yy, xx); } if (outPs == moviePs) { fts[t].FourierTransform(aux0[t](), out[p][f]()); } else { fts[t].FourierTransform(aux0[t](), aux1[t]()); out[p][f] = FilterHelper::cropCorner2D(aux1[t], squareSize/2+1, squareSize); } out[p][f](0,0) = Complex(0.0,0.0); } } return out; } // TAKANORI: TODO: Code duplication with above will be sorted out later! std::vector>> StackHelper::extractMovieStackFS( const MetaDataTable* mdt, std::vector > &Iframes, double outPs, double coordsPs, double moviePs, double dataPs, int squareSize, int threads, bool loadData, bool verbose, const std::vector>* offsets_in, std::vector>* offsets_out) { std::vector>> out(mdt->numberOfObjects()); const long pc = mdt->numberOfObjects(); const int fc = Iframes.size(); if (fc == 0) REPORT_ERROR("Empty Iframes passed to StackHelper::extractMovieStackFS"); const int w0 = Iframes[0].xdim; const int h0 = Iframes[0].ydim; if (dataPs < 0) dataPs = outPs; if (verbose) { std::cout << "pc, fc = " << pc << ", " << fc << "\n"; std::cout << "size: x = " << w0 << " y = " << h0 << "\n"; } for (long p = 0; p < pc; p++) { out[p] = std::vector>(fc); } if (!loadData) return out; const int sqMg = 2*(int)(0.5 * squareSize * outPs / moviePs + 0.5); if (verbose) { std::cout << "square size in micrograph: " << sqMg << "\n"; } std::vector fts(threads); std::vector> aux0(threads); std::vector> aux1(threads); for (int t = 0; t < threads; t++) { aux0[t] = Image(sqMg, sqMg); if (outPs != moviePs) { aux1[t] = Image(sqMg/2+1,sqMg); } } #pragma omp parallel for num_threads(threads) for (long f = 0; f < fc; f++) { int tf = omp_get_thread_num(); if (verbose) std::cout << (f+1) << "/" << fc << "\n"; for (long p = 0; p < pc; p++) { int t = tf; out[p][f] = Image(sqMg,sqMg); double xpC, ypC; mdt->getValue(EMDL_IMAGE_COORD_X, xpC, p); mdt->getValue(EMDL_IMAGE_COORD_Y, ypC, p); const double xpO = (int)(coordsPs * xpC / dataPs); const double ypO = (int)(coordsPs * ypC / dataPs); int x0 = (int)round(xpO * dataPs / moviePs) - sqMg / 2; int y0 = (int)round(ypO * dataPs / moviePs) - sqMg / 2; if (offsets_in != 0 && offsets_out != 0) { double dxM = (*offsets_in)[p][f].x * outPs / moviePs; double dyM = (*offsets_in)[p][f].y * outPs / moviePs; int dxI = (int)round(dxM); int dyI = (int)round(dyM); x0 += dxI; y0 += dyI; double dxR = (dxM - dxI) * moviePs / outPs; double dyR = (dyM - dyI) * moviePs / outPs; (*offsets_out)[p][f] = d2Vector(dxR, dyR); } for (long int y = 0; y < sqMg; y++) for (long int x = 0; x < sqMg; x++) { int xx = x0 + x; int yy = y0 + y; if (xx < 0) xx = 0; else if (xx >= w0) xx = w0 - 1; if (yy < 0) yy = 0; else if (yy >= h0) yy = h0 - 1; // Note the MINUS here!!! DIRECT_NZYX_ELEM(aux0[t].data, 0, 0, y, x) = -DIRECT_A2D_ELEM(Iframes[f], yy, xx); } if (outPs == moviePs) { fts[t].FourierTransform(aux0[t](), out[p][f]()); } else { fts[t].FourierTransform(aux0[t](), aux1[t]()); out[p][f] = FilterHelper::cropCorner2D(aux1[t], squareSize/2+1, squareSize); } out[p][f](0,0) = Complex(0.0,0.0); } } return out; } std::vector > StackHelper::FourierTransform(std::vector >& stack) { std::vector > out(stack.size()); const long ic = stack.size(); for (long i = 0; i < ic; i++) { FourierTransformer ft; ft.FourierTransform(stack[i].data, out[i].data); } return out; } std::vector > StackHelper::inverseFourierTransform(std::vector >& stack) { std::vector > out(stack.size()); const long ic = stack.size(); const int h = stack[0].data.ydim; const int ww = stack[0].data.xdim; const int w = 2*(ww - 1); for (long i = 0; i < ic; i++) { out[i] = Image(w,h); FourierTransformer ft; ft.inverseFourierTransform(stack[i].data, out[i].data); } return out; } Image StackHelper::toSingleImage(const std::vector> stack) { const int s = stack.size(); if (s < 1) return Image(0,0,0); const int w = stack[0].data.xdim; const int h = stack[0].data.ydim; Image out(w,h,1,s); for (int n = 0; n < s; n++) { for (int y = 0; y < h; y++) for (int x = 0; x < w; x++) { DIRECT_NZYX_ELEM(out(),n,0,y,x) = stack[n](y,x); } } return out; } void StackHelper::varianceNormalize(std::vector>& movie, bool circleCropped) { const int fc = movie.size(); const int w = movie[0].data.xdim; const int h = movie[0].data.ydim; const int wt = 2*(w-1); double var = 0.0; double cnt = 0.0; const double rr = (w-2)*(w-2); for (int f = 0; f < fc; f++) { for (int y = 0; y < h; y++) for (int x = 0; x < w; x++) { if (x == 0 && y == 0) continue; if (circleCropped) { const double yy = y < w? y : y - h; const double xx = x; if (xx*xx + yy*yy > rr) continue; } double scale = x > 0? 2.0 : 1.0; var += scale * movie[f](y,x).norm(); cnt += scale; } } const double scale = sqrt(wt*h*var/(cnt*fc)); //std::cout << "scale: " << scale << "\n"; for (int f = 0; f < fc; f++) { for (int y = 0; y < h; y++) for (int x = 0; x < w; x++) { movie[f](y,x) /= scale; } } } std::vector StackHelper::powerSpectrum(const std::vector>> &stack) { const int ic = stack.size(); const int fc = stack[0].size(); const int w = stack[0][0].data.xdim; const int h = stack[0][0].data.ydim; std::vector out(w, 0.0), wgh(w, 0.0); for (int i = 0; i < ic; i++) for (int f = 0; f < fc; f++) { for (int y = 0; y < h; y++) for (int x = 0; x < w; x++) { const Complex z = DIRECT_A2D_ELEM(stack[i][f].data, y, x); const double yy = y < w? y : y - h; const double xx = x; const int r = (int) sqrt(xx*xx + yy*yy); if (r >= w) continue; out[r] += z.norm(); wgh[r] += 1.0; } } for (int x = 0; x < w; x++) { if (wgh[x] > 0.0) { out[x] /= wgh[x]; } } return out; } std::vector StackHelper::varSpectrum(const std::vector>> &stack) { const int ic = stack.size(); const int fc = stack[0].size(); const int w = stack[0][0].data.xdim; const int h = stack[0][0].data.ydim; std::vector out(w, 0.0), wgh(w, 0.0); std::vector mean(w, 0.0); for (int i = 0; i < ic; i++) for (int f = 0; f < fc; f++) { for (int y = 0; y < h; y++) for (int x = 0; x < w; x++) { const Complex z = DIRECT_A2D_ELEM(stack[i][f].data, y, x); const double yy = y < w? y : y - h; const double xx = x; const int r = (int) sqrt(xx*xx + yy*yy); if (r >= w) continue; mean[r] += z; wgh[r] += 1.0; } } for (int x = 0; x < w; x++) { if (wgh[x] > 0.0) { mean[x] /= wgh[x]; } } for (int i = 0; i < ic; i++) for (int f = 0; f < fc; f++) { for (int y = 0; y < h; y++) for (int x = 0; x < w; x++) { const Complex z = DIRECT_A2D_ELEM(stack[i][f].data, y, x); const double yy = y < w? y : y - h; const double xx = x; const int r = (int) sqrt(xx*xx + yy*yy); if (r >= w) continue; out[r] += (z - mean[r]).norm(); } } for (int x = 0; x < w; x++) { if (wgh[x] > 1.0) { out[x] /= (wgh[x] - 1.0); } } return out; } std::vector StackHelper::powerSpectrum( const std::vector>>& obs, const std::vector >& signal) { const int ic = obs.size(); const int fc = obs[0].size(); const int w = obs[0][0].data.xdim; const int h = obs[0][0].data.ydim; std::vector out(w, 0.0), wgh(w, 0.0); for (int i = 0; i < ic; i++) for (int f = 0; f < fc; f++) { for (int y = 0; y < h; y++) for (int x = 0; x < w; x++) { const Complex z = DIRECT_A2D_ELEM(obs[i][f].data, y, x) - DIRECT_A2D_ELEM(signal[i].data, y, x); const double yy = y < w? y : y - h; const double xx = x; const int r = (int) sqrt(xx*xx + yy*yy); if (r >= w) continue; out[r] += z.norm(); wgh[r] += 1.0; } } for (int x = 0; x < w; x++) { if (wgh[x] > 0.0) { out[x] /= wgh[x]; } } return out; } relion-3.1.3/src/jaz/stack_helper.h000066400000000000000000000074251411340063500172030ustar00rootroot00000000000000/*************************************************************************** * * Author: "Jasenko Zivanov" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #ifndef STACK_HELPER_H #define STACK_HELPER_H #include #include #include #include #include #include #include #include class Projector; class ObservationModel; class StackHelper { public: static std::vector splitByMicrographName(const MetaDataTable& mdt); static MetaDataTable merge(const std::vector& mdts); static std::vector splitByStack(const MetaDataTable* mdt); static std::vector > loadStack( const MetaDataTable* mdt, std::string path = "", int threads = 1); static std::vector > loadStackFS( const MetaDataTable& mdt, std::string path = "", int threads = 1, bool centerParticle = false, ObservationModel* obs = 0); static void saveStack(std::vector >& stack, std::string fn); static std::vector>> loadMovieStack( const MetaDataTable* mdt, std::string moviePath); // For movies in file static std::vector>> extractMovieStackFS( const MetaDataTable* mdt, Image* gainRef, MultidimArray* defectMask, std::string movieFn, double outPs, double coordsPs, double moviePs, double dataPs, int squareSize, int threads, bool loadData = true, int firstFrame = 0, int lastFrame = -1, RFLOAT hot = -1.0, bool verbose = false, bool saveMemory = false, const std::vector>* offsets_in = 0, std::vector>* offsets_out = 0); // For movies in memory static std::vector>> extractMovieStackFS( const MetaDataTable* mdt, std::vector > &mgStack, double outPs, double coordsPs, double moviePs, double dataPs, int squareSize, int threads, bool loadData = true, bool verbose = false, const std::vector>* offsets_in = 0, std::vector>* offsets_out = 0); static std::vector> FourierTransform(std::vector >& stack); static std::vector> inverseFourierTransform(std::vector >& stack); static Image toSingleImage(const std::vector> stack); static void varianceNormalize( std::vector>& movie, bool circleCropped = false); static std::vector powerSpectrum( const std::vector>>& stack); static std::vector varSpectrum( const std::vector>>& stack); static std::vector powerSpectrum( const std::vector>>& obs, const std::vector >& signal); }; #endif relion-3.1.3/src/jaz/structure_tensor.cpp000066400000000000000000000065041411340063500205210ustar00rootroot00000000000000/*************************************************************************** * * Author: "Jasenko Zivanov" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #include #include using namespace gravis; void StructureTensor :: computeEdgeTensor(const Volume& src, Volume >& dest) { dest.resize(src.dimx, src.dimy, src.dimz); FOR_ALL_VOXELS(src) { t3Vector g = FilterHelper::centralGradient(src, x, y, z); dest(x,y,z) = Tensor3x3::autoDyadicProduct(g); } } void StructureTensor :: compute(const Volume& src, Volume >& dest, RFLOAT rho) { Volume > E(src.dimx, src.dimy, src.dimz); computeEdgeTensor(src, E); dest.resize(src.dimx, src.dimy, src.dimz); FilterHelper::separableGaussian(E, dest, rho); } void StructureTensor :: computeEigenvalues(const Volume >& J, Volume >& dest) { dest.resize(J.dimx, J.dimy, J.dimz); gravis::t3Matrix Q; gravis::t3Vector d; FOR_ALL_VOXELS(J) { J(x,y,z).diagonalize(d, Q); dest(x,y,z) = d; } } void StructureTensor :: computeEigenvalues(const Volume& src, Volume >& dest, RFLOAT rho) { Volume > J; compute(src, J, rho); computeEigenvalues(J, dest); } void StructureTensor :: computeSmallestEigenvalue(const Volume& src, Volume& dest, RFLOAT rho) { Volume > J; compute(src, J, rho); dest.resize(J.dimx, J.dimy, J.dimz); gravis::t3Matrix Q; gravis::t3Vector d; FOR_ALL_VOXELS(J) { J(x,y,z).diagonalize(d, Q); dest(x,y,z) = d.z; } } void StructureTensor :: computeMiddleEigenvalue(const Volume& src, Volume& dest, RFLOAT rho) { Volume > J; compute(src, J, rho); dest.resize(J.dimx, J.dimy, J.dimz); gravis::t3Matrix Q; gravis::t3Vector d; FOR_ALL_VOXELS(J) { J(x,y,z).diagonalize(d, Q); dest(x,y,z) = d.y; } } void StructureTensor :: computeEigenvalueLC(const Volume& src, Volume& dest, RFLOAT rho, RFLOAT w0, RFLOAT w1, RFLOAT w2) { Volume > J; compute(src, J, rho); dest.resize(J.dimx, J.dimy, J.dimz); gravis::t3Matrix Q; gravis::t3Vector d; FOR_ALL_VOXELS(J) { J(x,y,z).diagonalize(d, Q); dest(x,y,z) = w0*d[0] + w1*d[1] + w2*d[2]; } } relion-3.1.3/src/jaz/structure_tensor.h000066400000000000000000000036171411340063500201700ustar00rootroot00000000000000/*************************************************************************** * * Author: "Jasenko Zivanov" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #ifndef STRUCTURE_TENSOR_H #define STRUCTURE_TENSOR_H #include #include #include class StructureTensor { public: static void compute(const Volume& src, Volume >& dest, RFLOAT rho); static void computeEdgeTensor(const Volume& src, Volume >& dest); static void computeEigenvalues(const Volume >& J, Volume >& dest); // helper functions static void computeEigenvalues(const Volume& src, Volume >& dest, RFLOAT rho); static void computeSmallestEigenvalue(const Volume& src, Volume& dest, RFLOAT rho); static void computeMiddleEigenvalue(const Volume& src, Volume& dest, RFLOAT rho); static void computeEigenvalueLC(const Volume& src, Volume& dest, RFLOAT rho, RFLOAT w0, RFLOAT w1, RFLOAT w2); }; #endif relion-3.1.3/src/jaz/svd_helper.cpp000066400000000000000000000034711411340063500172220ustar00rootroot00000000000000/*************************************************************************** * * Author: "Jasenko Zivanov" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #include #include void SvdHelper::decompose( const Matrix2D& A, Matrix2D& U, Matrix1D& S, Matrix2D& Vt) { Matrix2D U0, Vt0; Matrix1D S0; svdcmp(A, U0, S0, Vt0); const int rc = A.mdimy; const int cc = A.mdimx; std::vector Svec(cc); for (int i = 0; i < cc; i++) { Svec[i] = S0(i); } std::vector order = IndexSort::sortIndices(Svec); U = Matrix2D(rc,cc); S = Matrix1D(cc); Vt = Matrix2D(cc,cc); for (int i = 0; i < cc; i++) { const int j = order[cc - i - 1]; for (int c = 0; c < cc; c++) { Vt(c,i) = Vt0(c,j); } S(i) = S0(j); for (int r = 0; r < rc; r++) { U(r,i) = U0(r,j); } } } relion-3.1.3/src/jaz/svd_helper.h000066400000000000000000000022741411340063500166670ustar00rootroot00000000000000/*************************************************************************** * * Author: "Jasenko Zivanov" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #ifndef SVD_HELPER_H #define SVD_HELPER_H #include class SvdHelper { public: static void decompose( const Matrix2D& A, Matrix2D& U, Matrix1D& S, Matrix2D& Vt); }; #endif relion-3.1.3/src/jaz/t_complex.cpp000066400000000000000000000017071411340063500170610ustar00rootroot00000000000000/*************************************************************************** * * Author: "Jasenko Zivanov" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ // This file intentionally left blank relion-3.1.3/src/jaz/t_complex.h000066400000000000000000000131731411340063500165260ustar00rootroot00000000000000/*************************************************************************** * * Author: "Jasenko Zivanov" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ /*************************************************************************** * * Author: "Sjors H.W. Scheres" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #ifndef T_COMPLEX_H #define T_COMPLEX_H #include #include #include "src/macros.h" template class tComplex { public: tComplex() {} tComplex(T real, T imag = 0) : real(real), imag(imag) {} T real, imag; tComplex& operator += (const tComplex& arg) { real += arg.real; imag += arg.imag; return *this; } tComplex& operator -= (const tComplex& arg) { real -= arg.real; imag -= arg.imag; return *this; } tComplex operator - () const { return tComplex(-real, -imag); } tComplex& operator *= (const tComplex& arg) { T re = real*arg.real - imag*arg.imag; T im = real*arg.imag + imag*arg.real; real = re; imag = im; return *this; } tComplex& operator /= (const tComplex& arg) { T cd = arg.real*arg.real + arg.imag*arg.imag; T re = real*arg.real + imag*arg.imag; T im = imag*arg.real - real*arg.imag; real = re/cd; imag = im/cd; return *this; } bool operator == (const tComplex& arg) const { return (real == arg.real && imag == arg.imag); } bool operator != (const tComplex& arg) const { return !(*this == arg); } operator T() const { return real; } tComplex conj() const { return tComplex(real, -imag); } T abs() const { return sqrt(real*real + imag*imag); } T norm() const { return real*real + imag*imag; } T arg() const { return atan2(imag,real); } }; template inline tComplex conj(const tComplex& op) { return op.conj(); } template inline T abs(const tComplex& op) { return op.abs(); } template inline T norm(const tComplex& op) { return op.norm(); } template inline T arg(const tComplex& op) { return op.arg(); } template inline tComplex operator + (const tComplex& z, const tComplex& w) { return tComplex(z.real + w.real, z.imag + w.imag); } template inline tComplex operator - (const tComplex& z, const tComplex& w) { return tComplex(z.real - w.real, z.imag - w.imag); } template inline tComplex operator - (const tComplex& z) { return tComplex(-z.real, -z.imag); } template inline tComplex operator * (const tComplex& z, const tComplex& w) { return tComplex( z.real * w.real - z.imag * w.imag, z.real * w.imag + z.imag * w.real); } template inline tComplex operator * (const tComplex& z, const T2& x) { return tComplex(x * z.real, x * z.imag); } template inline tComplex operator * (const T2& x, const tComplex& z) { return tComplex(x * z.real, x * z.imag); } template inline tComplex operator / (const tComplex& z, const tComplex& w) { const T1 d = w.real * w.real + w.imag * w.imag; return tComplex( (z.real * w.real + z.imag * w.imag) / d, (z.imag * w.real - z.real * w.imag) / d); } template inline tComplex operator / (const tComplex& z, const T2& x) { return tComplex(z.real / x, z.imag / x); } template inline std::ostream& operator << (std::ostream& os, const tComplex& z) { os << "[" << z.real << ", " << z.imag << "]"; return os; } typedef tComplex fComplex; typedef tComplex dComplex; #endif relion-3.1.3/src/jaz/tensor2x2.h000066400000000000000000000140041411340063500163740ustar00rootroot00000000000000/*************************************************************************** * * Author: "Jasenko Zivanov" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #ifndef TENSOR_2X2_H #define TENSOR_2X2_H #include #include #include #include #include #include extern "C" { #include } /* Symmetric 2x2 matrix to be used as e.g. a structure tensor of a 2D image */ template class Tensor2x2 { public: enum Component {XX = 0, XY = 1, YY = 2}; Tensor2x2(){} Tensor2x2(T t) : xx(t), xy(t), yy(t) {} Tensor2x2(T xx, T xy, T yy) : xx(xx), xy(xy), yy(yy) {} T xx, xy, yy; static Tensor2x2 autoDyadicProduct(const gravis::t2Vector& v); static Tensor2x2 dyadicProduct(const gravis::t2Vector& v0, const gravis::t2Vector& v1); void diagonalize(gravis::t2Vector& eigenvalues, gravis::t2Matrix& eigenvectors) const; gravis::t2Matrix toMatrix() const; T& operator[] (int idx) { switch (idx) { case XX: return xx; case XY: return xy; case YY: return yy; } REPORT_ERROR("Tensor2x2 operator []: invalid index"); } const T& operator[] (int idx) const { switch (idx) { case XX: return xx; case XY: return xy; case YY: return yy; } REPORT_ERROR("Tensor2x2 operator []: invalid index"); } Tensor2x2& operator += (const Tensor2x2& arg) { xx += arg.xx; xy += arg.xy; yy += arg.yy; return *this; } Tensor2x2& operator -= (const Tensor2x2& arg) { xx -= arg.xx; xy -= arg.xy; yy -= arg.yy; return *this; } Tensor2x2& operator *= (T arg) { xx *= arg; xy *= arg; yy *= arg; return *this; } Tensor2x2 operator * (T arg) const { return Tensor2x2(xx * arg, xy * arg, yy * arg); } Tensor2x2& operator /= (T arg) { xx /= arg; xy /= arg; yy /= arg; return *this; } Tensor2x2 operator / (T arg) const { return Tensor2x2(xx / arg, xy / arg, yy / arg); } }; template Tensor2x2 Tensor2x2::autoDyadicProduct(const gravis::t2Vector& v) { Tensor2x2 out; out.xx = v.x * v.x; out.xy = v.x * v.y; out.yy = v.y * v.y; return out; } template Tensor2x2 Tensor2x2::dyadicProduct(const gravis::t2Vector& v0, const gravis::t2Vector& v1) { Tensor2x2 out; out.xx = v0.x0 * v1.x1; out.xy = v0.x0 * v1.y1; out.xz = v0.x0 * v1.z1; out.yy = v0.y0 * v1.y1; out.yz = v0.y0 * v1.z1; out.zz = v0.z0 * v1.z1; return out; } template void Tensor2x2::diagonalize(gravis::t2Vector& eigenvalues, gravis::t2Matrix& eigenvectors) const { dsyev2(xx, xy, yy, &eigenvalues[0], &eigenvalues[1], &eigenvectors(0,0), &eigenvectors(0,1)); eigenvectors(1,0) = -eigenvectors(0,1); eigenvectors(1,1) = eigenvectors(0,0); } template gravis::t2Matrix Tensor2x2::toMatrix() const { return gravis::t2Matrix(xx,xy,xy,yy); } template inline Tensor2x2 operator + (const Tensor2x2& v1, const Tensor2x2& v2) { return Tensor2x2( v1.xx + v2.xx, v1.xy + v2.xy, v1.yy + v2.yy); } template inline Tensor2x2 operator - (const Tensor2x2& v1) { return Tensor2x2(-v1.xx, -v1.xy, -v1.yy); } template inline Tensor2x2 operator - (const Tensor2x2& v1, const Tensor2x2& v2) { return Tensor2x2( v1.xx - v2.xx, v1.xy - v2.xy, v1.yy - v2.yy); } template inline Tensor2x2 operator * (float f, const Tensor2x2& v) { return Tensor2x2(f * v.xx, f * v.xy, f * v.yy); } template inline Tensor2x2 operator * (const Tensor2x2& v, float f) { return Tensor2x2(v.xx * f, v.xy * f, v.yy * f); } template inline Tensor2x2 operator * (double f, const Tensor2x2& v) { return Tensor2x2(f * v.xx, f * v.xy, f * v.yy); } template inline Tensor2x2 operator * (const Tensor2x2& v, double f) { return Tensor2x2(v.xx * f, v.xy * f, v.yy * f); } template inline Tensor2x2 operator / (const Tensor2x2& v, T f) { return Tensor2x2(v.xx / f, v.xy / f, v.yy / f); } template inline std::ostream& operator<< (std::ostream& os, const Tensor2x2& arg) { os << std::setprecision(17) << "[" << std::setw(8) << arg.xx << ", " << std::setw(8) << arg.xy << ", " << std::setw(8) << arg.yy << "]"; return os; } #endif relion-3.1.3/src/jaz/tensor3x3.h000066400000000000000000000167571411340063500164170ustar00rootroot00000000000000/*************************************************************************** * * Author: "Jasenko Zivanov" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #ifndef TENSOR_3X3_H #define TENSOR_3X3_H #include #include #include #include #include #include extern "C" { #include #include } /* Symmetric 3x3 matrix to be used as e.g. a structure tensor of a 3D volume */ template class Tensor3x3 { public: enum Component {XX = 0, XY = 1, XZ = 2, YY = 3, YZ = 4, ZZ = 5}; Tensor3x3(){} Tensor3x3(T t) : xx(t), xy(t), xz(t), yy(t), yz(t), zz(t) {} Tensor3x3(T xx, T xy, T xz, T yy, T yz, T zz) : xx(xx), xy(xy), xz(xz), yy(yy), yz(yz), zz(zz) {} T xx, xy, xz, yy, yz, zz; static Tensor3x3 autoDyadicProduct(const gravis::t3Vector& v); static Tensor3x3 dyadicProduct(const gravis::t3Vector& v0, const gravis::t3Vector& v1); void diagonalize(gravis::t3Vector& eigenvalues, gravis::t3Matrix& eigenvectors) const; T& operator[] (int idx) { switch (idx) { case XX: return xx; case XY: return xy; case XZ: return xz; case YY: return yy; case YZ: return yz; case ZZ: return zz; } REPORT_ERROR("Tensor3x3 operator []: invalid index"); } const T& operator[] (int idx) const { switch (idx) { case XX: return xx; case XY: return xy; case XZ: return xz; case YY: return yy; case YZ: return yz; case ZZ: return zz; } REPORT_ERROR("Tensor3x3 operator []: invalid index"); } Tensor3x3& operator += (const Tensor3x3& arg) { xx += arg.xx; xy += arg.xy; xz += arg.xz; yy += arg.yy; yz += arg.yz; zz += arg.zz; return *this; } Tensor3x3& operator -= (const Tensor3x3& arg) { xx -= arg.xx; xy -= arg.xy; xz -= arg.xz; yy -= arg.yy; yz -= arg.yz; zz -= arg.zz; return *this; } Tensor3x3& operator *= (T arg) { xx *= arg; xy *= arg; xz *= arg; yy *= arg; yz *= arg; zz *= arg; return *this; } Tensor3x3 operator * (float arg) const { return Tensor3x3(xx * arg, xy * arg, xz * arg, yy * arg, yz * arg, zz * arg); } Tensor3x3 operator * (double arg) const { return Tensor3x3(xx * arg, xy * arg, xz * arg, yy * arg, yz * arg, zz * arg); } Tensor3x3& operator /= (T arg) { xx /= arg; xy /= arg; xz /= arg; yy /= arg; yz /= arg; zz /= arg; return *this; } Tensor3x3 operator / (T arg) const { return Tensor3x3(xx / arg, xy / arg, xz / arg, yy / arg, yz / arg, zz / arg); } }; template Tensor3x3 Tensor3x3::autoDyadicProduct(const gravis::t3Vector& v) { Tensor3x3 out; out.xx = v.x * v.x; out.xy = v.x * v.y; out.xz = v.x * v.z; out.yy = v.y * v.y; out.yz = v.y * v.z; out.zz = v.z * v.z; return out; } template Tensor3x3 Tensor3x3::dyadicProduct(const gravis::t3Vector& v0, const gravis::t3Vector& v1) { Tensor3x3 out; out.xx = v0.x0 * v1.x1; out.xy = v0.x0 * v1.y1; out.xz = v0.x0 * v1.z1; out.yy = v0.y0 * v1.y1; out.yz = v0.y0 * v1.z1; out.zz = v0.z0 * v1.z1; return out; } template void Tensor3x3::diagonalize(gravis::t3Vector& eigenvalues, gravis::t3Matrix& eigenvectors) const { double A[3][3]; A[0][0] = (double)xx; A[0][1] = (double)xy; A[0][2] = (double)xz; A[1][1] = (double)yy; A[1][2] = (double)yz; A[2][2] = (double)zz; double Q[3][3]; std::vector w(3); dsyevh3(A, Q, &w[0]); std::vector inds = IndexSort::sortIndices(w); for (int i = 0; i < 3; i++) { eigenvalues[2-i] = (T) w[inds[i]]; for (int j = 0; j < 3; j++) { eigenvectors(j,2-i) = (T) Q[j][inds[i]]; } } } template inline Tensor3x3 operator + (const Tensor3x3& v1, const Tensor3x3& v2) { return Tensor3x3( v1.xx + v2.xx, v1.xy + v2.xy, v1.xz + v2.xz, v1.yy + v2.yy, v1.yz + v2.yz, v1.zz + v2.zz); } template inline Tensor3x3 operator - (const Tensor3x3& v1) { return Tensor3x3(-v1.xx, -v1.xy, -v1.xz, -v1.yy, -v1.yz, -v1.zz); } template inline Tensor3x3 operator - (const Tensor3x3& v1, const Tensor3x3& v2) { return Tensor3x3( v1.xx - v2.xx, v1.xy - v2.xy, v1.xz - v2.xz, v1.yy - v2.yy, v1.yz - v2.yz, v1.zz - v2.zz); } template inline Tensor3x3 operator * (float f, const Tensor3x3& v) { return Tensor3x3(f * v.xx, f * v.xy, f * v.xz, f * v.yy, f * v.yz, f * v.zz); } template inline Tensor3x3 operator * (const Tensor3x3& v, float f) { return Tensor3x3(v.xx * f, v.xy * f, v.xz * f, v.yy * f, v.yz * f, v.zz * f); } template inline Tensor3x3 operator * (double f, const Tensor3x3& v) { return Tensor3x3(f * v.xx, f * v.xy, f * v.xz, f * v.yy, f * v.yz, f * v.zz); } template inline Tensor3x3 operator * (const Tensor3x3& v, double f) { return Tensor3x3(v.xx * f, v.xy * f, v.xz * f, v.yy * f, v.yz * f, v.zz * f); } template inline Tensor3x3 operator / (const Tensor3x3& v, T f) { return Tensor3x3(v.xx / f, v.xy / f, v.xz / f, v.yy / f, v.yz / f, v.zz / f); } template inline std::ostream& operator<< (std::ostream& os, const Tensor3x3& arg) { os << std::setprecision(17) << "[" << std::setw(8) << arg.xx << ", " << std::setw(8) << arg.xy << ", " << std::setw(8) << arg.xz << ", " << std::setw(8) << arg.yy << ", " << std::setw(8) << arg.yz << ", " << std::setw(8) << arg.zz << "]"; return os; } #endif relion-3.1.3/src/jaz/tomo/000077500000000000000000000000001411340063500153345ustar00rootroot00000000000000relion-3.1.3/src/jaz/tomo/backprojection_helper.cpp000066400000000000000000000441041411340063500223770ustar00rootroot00000000000000/*************************************************************************** * * Author: "Jasenko Zivanov" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #include #include "backprojection_helper.h" #include #include #include #include #include #include #include #include using namespace gravis; void BackprojectionHelper::backprojectRaw( const Image& stack, std::string tiltAngles, Volume& dest, Volume& maskDest, d3Vector origin, double spacing, int frames) { const double cix = stack.data.xdim / 2.0; const double ciy = stack.data.ydim / 2.0; std::cout << "center: " << cix << ", " << ciy << "\n"; std::ifstream anglesFile(tiltAngles.c_str()); if (!anglesFile.is_open()) { REPORT_ERROR("BackprojectionHelper::backproject: failed to open "+tiltAngles+"."); } // vol2world * (0,0,0,1) = (xw0, yw0, zw0, 1) // vol2world * (xwc,ywc,zwc,1) = (xw1, yw1, zw1, 1) d4Matrix vol2world; vol2world(0,0) = spacing; vol2world(1,1) = spacing; vol2world(2,2) = spacing; vol2world(0,3) = origin.x; vol2world(1,3) = origin.y; vol2world(2,3) = origin.z; std::vector angles; std::vector vol2img; const double deg2rad = PI/180.0; while (anglesFile.good()) { double a; anglesFile >> a; a *= deg2rad; angles.push_back(a); d4Matrix w2i; w2i(0,0) = cos(a); w2i(0,2) = sin(a); w2i(2,0) = -sin(a); w2i(2,2) = cos(a); w2i(0,3) = cix; w2i(1,3) = ciy; vol2img.push_back(w2i * vol2world); } const int ic = frames > 0? frames : stack.data.ndim; if (vol2img.size() < ic) { REPORT_ERROR("BackprojectionHelper::backproject: not enough angles in "+tiltAngles+"."); } #if JAZ_USE_OPENMP #pragma omp parallel for #endif FOR_ALL_VOXELS(dest) { if (x == 0 && y == 0) { std::cout << z << "/" << dest.dimz << "\n"; } double sum = 0.0; double wgh = 0.0; d4Vector pw(x,y,z,1.0); for (int im = 0; im < ic; im++) { d4Vector pi = vol2img[im] * pw; if (Interpolation::isInSlice(stack, pi.x, pi.y)) { sum += Interpolation::linearXY(stack, pi.x, pi.y, im); wgh += 1.0; } } if (wgh > 0.0) { sum /= wgh; } dest(x,y,z) = sum; maskDest(x,y,z) = wgh > 0.0? 1 : 0; } } void BackprojectionHelper::backprojectRaw( const TomoStack& stack, Volume& dest, Volume& maskDest, gravis::d3Vector origin, double spacing, InterpolationType interpolation, double taperX, double taperY, double wMin, int frame0, int frames) { d4Matrix vol2world; vol2world(0,0) = spacing; vol2world(1,1) = spacing; vol2world(2,2) = spacing; vol2world(0,3) = origin.x; vol2world(1,3) = origin.y; vol2world(2,3) = origin.z; /*std::cout << "vol2world: \n" << vol2world << "\n"; std::cout << "stack.worldToImage[0]: \n" << stack.worldToImage[0] << "\n"; std::cout << "vol2img[0]: \n" << (stack.worldToImage[0] * vol2world) << "\n";*/ const int ic = frames > 0? frames + frame0 : stack.images.size(); std::cout << frame0 << " - " << (ic-1) << "\n"; std::vector vol2img(ic); for (int im = 0; im < ic; im++) { vol2img[im] = stack.worldToImage[im] * vol2world; } /*#if JAZ_USE_OPENMP #pragma omp parallel for #endif*/ /*std::vector> debugImgs(ic); for (int im = frame0; im < ic; im++) { debugImgs[im] = stack.images[im]; }*/ FOR_ALL_VOXELS(dest) { double sum = 0.0; double wgh = 0.0; d4Vector pw(x,y,z,1.0); for (int im = frame0; im < ic; im++) { d4Vector pi = vol2img[im] * pw; if (Interpolation::isInSlice(stack.images[im], pi.x, pi.y)) { double wghi = Interpolation::getTaperWeight(stack.images[im], pi.x, pi.y, taperX, taperY); if (interpolation == Linear) { sum += wghi * Interpolation::linearXY(stack.images[im], pi.x, pi.y, 0); } else { sum += wghi * Interpolation::cubicXY(stack.images[im], pi.x, pi.y, 0); } //debugImgs[im]((int)(pi.y+0.5), (int)(pi.x+0.5)) += 1000.0; wgh += wghi; } } if (wgh > 0.0) { sum /= wgh; } dest(x,y,z) = sum; maskDest(x,y,z) = wgh; } /*JazConfig::writeMrc = false; JazConfig::writeVtk = true; ImageLog::write(debugImgs, "debug_imgs");*/ double mean = 0.0, sum = 0.0; FOR_ALL_VOXELS(dest) { mean += maskDest(x,y,z)*dest(x,y,z); sum += maskDest(x,y,z); } if (sum > 0.0) { mean /= sum; } #if JAZ_USE_OPENMP #pragma omp parallel for #endif FOR_ALL_VOXELS(dest) { double t = maskDest(x,y,z) / wMin; if (t < 1.0) { dest(x,y,z) = t * dest(x,y,z) + (1.0 - t) * mean; } } } void BackprojectionHelper::backprojectExactWeights( const TomoStack& stack, Volume& dest, d3Vector origin, double spacing, double taperX, double taperY, double taperZ, double wMin, int frame0, int frames) { const int wv = dest.dimx; const int hv = dest.dimy; const int dv = dest.dimz; Volume vol(wv,hv,dv), volM(wv,hv,dv); std::cout << "performing unweighted backprojection...\n"; backprojectRaw(stack, vol, volM, origin, spacing, Linear, taperX, taperY, wMin, frame0, frames); taperEdges(vol, taperX, taperY, taperZ); Volume weight(wv/2 + 1,hv,dv); std::cout << "backprojecting dots...\n"; backprojectDots(stack, weight, origin, spacing, taperX, taperY, taperZ, frame0, frames); std::cout << "applying weights...\n"; Image volRL; VolumeConverter::convert(vol, volRL); vol.resize(0,0,0); Image dataFreq; FourierTransformer ft; ft.FourierTransform(volRL(), dataFreq.data, false); FilterHelper::divideExcessive(dataFreq, weight, weight(0,0,0)/(double)stack.images.size(), dataFreq); FourierTransformer ft2; ft2.inverseFourierTransform(dataFreq.data, volRL()); VolumeConverter::convert(volRL, dest); } void BackprojectionHelper::backprojectExactWeightsFreq( const TomoStack& stack, Image& dest, Volume& weight, d3Vector origin, double spacing, double taperX, double taperY, double taperZ, double wMin, int frame0, int frames) { const int wv = 2 * (dest.data.xdim - 1); const int hv = dest.data.ydim; const int dv = dest.data.zdim; std::cout << wv << "x" << hv << "x" << dv << "x" << dest.data.ndim << "\n"; Volume vol(wv,hv,dv), volM(wv,hv,dv); std::cout << "performing unweighted backprojection...\n"; backprojectRaw(stack, vol, volM, origin, spacing, Linear, taperX, taperY, wMin, frame0, frames); taperEdges(vol, taperX, taperY, taperZ); weight.resize(wv/2 + 1,hv,dv); std::cout << "backprojecting dots...\n"; backprojectDots(stack, weight, origin, spacing, taperX, taperY, taperZ, frame0, frames); std::cout << "applying weights...\n"; Image volRL; VolumeConverter::convert(vol, volRL); vol.resize(0,0,0); FourierTransformer ft; ft.FourierTransform(volRL(), dest.data, true); const double theta = weight(0,0,0)/(double)stack.images.size(); for (long int z = 0; z < dv; z++) for (long int y = 0; y < hv; y++) for (long int x = 0; x < wv/2 + 1; x++) { const double t = weight(x,y,z)/theta; if (t > 1) { DIRECT_NZYX_ELEM(dest.data, 0, z, y, x) = DIRECT_NZYX_ELEM(dest.data, 0, z, y, x) / t; } else { DIRECT_NZYX_ELEM(dest.data, 0, z, y, x) = DIRECT_NZYX_ELEM(dest.data, 0, z, y, x); } weight(x,y,z) = t; } } void BackprojectionHelper::backprojectDots( const TomoStack& stack, Volume& dest, gravis::d3Vector origin, double spacing, double taperX, double taperY, double taperZ, int frame0, int frames) { const int wv = 2*(dest.dimx-1); const int hv = dest.dimy; const int dv = dest.dimz; d4Matrix vol2world; vol2world(0,0) = spacing; vol2world(1,1) = spacing; vol2world(2,2) = spacing; vol2world(0,3) = origin.x; vol2world(1,3) = origin.y; vol2world(2,3) = origin.z; const int ic = frames > 0? frames + frame0 : stack.images.size(); std::cout << frame0 << " - " << (ic-1) << "\n"; d4Vector originVol(wv/2, hv/2, dv/2, 1.0); std::cout << "originVol = " << originVol << "\n"; Volume streakVol(wv, hv, dv); Image volRL; Image spectrum; std::vector vol2img(ic); std::vector volOrigImg(ic); for (int im = 0; im < ic; im++) { //std::cout << " " << im << "/" << (ic-1) << "\n"; vol2img[im] = stack.worldToImage[im] * vol2world; volOrigImg[im] = vol2img[im] * originVol; } dest.fill(0.0); streakVol.fill(0.0); #if JAZ_USE_OPENMP #pragma omp parallel for #endif FOR_ALL_VOXELS(streakVol) { for (int im = 0; im < ic; im++) { d4Vector pw(x,y,z,1.0); d4Vector pi = vol2img[im] * pw; d4Vector d = pi - volOrigImg[im]; double dx, dy; if (d.x < -1 || d.x > 1) dx = 0.0; else dx = 1.0 - std::abs(d.x); if (d.y < -1 || d.y > 1) dy = 0.0; else dy = 1.0 - std::abs(d.y); streakVol(x,y,z) += dx*dy; } } taperEdges(streakVol, taperX, taperY, taperZ); VolumeConverter::convert(streakVol, volRL); CenterFFT(volRL.data, true); FourierTransformer ft; ft.FourierTransform(volRL(), spectrum.data, false); FOR_ALL_DIRECT_ELEMENTS_IN_ARRAY3D(spectrum.data) { Complex z = DIRECT_A3D_ELEM(spectrum.data, k, i, j); dest(j,i,k) = z.abs(); } } void BackprojectionHelper::backprojectDotsFS( const TomoStack& stack, Image& dest, gravis::d3Vector origin, double spacing, double taperX, double taperY, double taperZ, int frame0, int frames) { const int wv = 2*(dest.data.xdim-1); const int hv = dest.data.ydim; const int dv = dest.data.zdim; d4Matrix vol2world; vol2world(0,0) = spacing; vol2world(1,1) = spacing; vol2world(2,2) = spacing; vol2world(0,3) = origin.x; vol2world(1,3) = origin.y; vol2world(2,3) = origin.z; const int ic = frames > 0? frames + frame0 : stack.images.size(); std::cout << frame0 << " - " << (ic-1) << "\n"; d4Vector originVol(wv/2, hv/2, dv/2, 1.0); std::cout << "originVol = " << originVol << "\n"; Volume streakVol(wv, hv, dv); Image volRL; std::vector vol2img(ic); std::vector volOrigImg(ic); for (int im = 0; im < ic; im++) { std::cout << " " << im << "/" << (ic-1) << "\n"; vol2img[im] = stack.worldToImage[im] * vol2world; volOrigImg[im] = vol2img[im] * originVol; } streakVol.fill(0.0); #if JAZ_USE_OPENMP #pragma omp parallel for #endif FOR_ALL_VOXELS(streakVol) { for (int im = 0; im < ic; im++) { d4Vector pw(x,y,z,1.0); d4Vector pi = vol2img[im] * pw; d4Vector d = pi - volOrigImg[im]; double dx, dy; if (d.x < -1 || d.x > 1) dx = 0.0; else dx = 1.0 - std::abs(d.x); if (d.y < -1 || d.y > 1) dy = 0.0; else dy = 1.0 - std::abs(d.y); streakVol(x,y,z) += dx*dy; } } taperEdges(streakVol, taperX, taperY, taperZ); VolumeConverter::convert(streakVol, volRL); CenterFFT(volRL.data, true); FourierTransformer ft; ft.FourierTransform(volRL(), dest.data, true); } void BackprojectionHelper::backprojectDotsSeparately( const TomoStack& stack, Volume& dest, gravis::d3Vector origin, double spacing, double taperX, double taperY, double taperZ, int frame0, int frames) { const int wv = 2*(dest.dimx-1); const int hv = dest.dimy; const int dv = dest.dimz; d4Matrix vol2world; vol2world(0,0) = spacing; vol2world(1,1) = spacing; vol2world(2,2) = spacing; vol2world(0,3) = origin.x; vol2world(1,3) = origin.y; vol2world(2,3) = origin.z; const int ic = frames > 0? frames + frame0 : stack.images.size(); std::cout << frame0 << " - " << (ic-1) << "\n"; d4Vector originVol((double)(dest.dimx - 1), (double)dest.dimy/2.0, (double)dest.dimz/2.0, 1.0); Volume streakVol(wv, hv, dv); Image volRL; Image spectrum; dest.fill(0.0); for (int im = 0; im < ic; im++) { std::cout << " " << im << "/" << (ic-1) << "\n"; d4Matrix vol2img = stack.worldToImage[im] * vol2world; d4Vector volOrigImg = vol2img * originVol; #if JAZ_USE_OPENMP #pragma omp parallel for #endif FOR_ALL_VOXELS(streakVol) { double sum = 0.0; d4Vector pw(x,y,z,1.0); d4Vector pi = vol2img * pw; d4Vector d = pi - volOrigImg; double dx, dy; if (d.x < -1 || d.x > 1) dx = 0.0; else dx = 1.0 - std::abs(d.x); if (d.y < -1 || d.y > 1) dy = 0.0; else dy = 1.0 - std::abs(d.y); sum += dx*dy; streakVol(x,y,z) = sum; } taperEdges(streakVol, 0.05*wv, 0.02*hv, 0.05*dv); VolumeConverter::convert(streakVol, volRL); FourierTransformer ft; ft.FourierTransform(volRL(), spectrum.data, false); if (im % 10 == 1) { std::stringstream sts; sts << im; std::string fn; sts >> fn; //VtkHelper::writeVTK(streakVol, "streakVol_"+fn+".vtk"); //VtkHelper::writeVTK_Complex(spectrum.data, "streakVol_"+fn+"_FS.vtk"); } FOR_ALL_DIRECT_ELEMENTS_IN_ARRAY3D(spectrum.data) { Complex z = DIRECT_A3D_ELEM(spectrum.data, k, i, j); dest(j,i,k) += z.abs(); } } } void BackprojectionHelper::backprojectOriginDot( const TomoStack& stack, Volume& dest, double sigma, gravis::d3Vector origin, double spacing, int frame0, int frames) { d4Matrix vol2world; vol2world(0,0) = spacing; vol2world(1,1) = spacing; vol2world(2,2) = spacing; vol2world(0,3) = origin.x; vol2world(1,3) = origin.y; vol2world(2,3) = origin.z; const int ic = frames > 0? frames + frame0 : stack.images.size(); std::cout << frame0 << " - " << (ic-1) << "\n"; d4Vector originVol0(0.5/(double)dest.dimx, 0.5/(double)dest.dimy, 0.5/(double)dest.dimz, 1.0); std::vector originVol(8); for (int c = 0; c < 8; c++) { int sx = c%2; int sy = (c/2)%2; int sz = (c/4)%2; originVol[c].x = originVol0.x + sx * (double)dest.dimx; originVol[c].y = originVol0.y + sy * (double)dest.dimy; originVol[c].z = originVol0.z + sz * (double)dest.dimz; } std::vector vol2img(ic); std::vector volOrigImg(8*ic); for (int im = 0; im < ic; im++) { vol2img[im] = stack.worldToImage[im] * vol2world; for (int c = 0; c < 8; c++) { volOrigImg[8*im + c] = vol2img[im] * originVol[c]; } } const double s2 = sigma*sigma; #if JAZ_USE_OPENMP #pragma omp parallel for #endif FOR_ALL_VOXELS(dest) { double sum = 0.0; d4Vector pw(x,y,z,1.0); for (int im = frame0; im < ic; im++) { d4Vector pi = vol2img[im] * pw; for (int c = 0; c < 8; c++) { d4Vector d = pi - volOrigImg[8*im + c]; double dx, dy; if (d.x < -1 || d.x > 1) dx = 0.0; else dx = 1.0 - std::abs(d.x); if (d.y < -1 || d.y > 1) dy = 0.0; else dy = 1.0 - std::abs(d.y); sum += dx*dy; } } dest(x,y,z) = sum; } } void BackprojectionHelper::taperEdges(Volume& vol, double rx, double ry, double rz) { double mean = 0.0; FOR_ALL_VOXELS(vol) { mean += vol(x,y,z); } mean /= ((double)vol.dimx * (double)vol.dimy * (double)vol.dimz); #if JAZ_USE_OPENMP #pragma omp parallel for #endif FOR_ALL_VOXELS(vol) { double wx(1.0), wy(1.0), wz(1.0); if (x < rx) wx *= (1.0 - cos(PI * (x+1) / rx))/2.0; if (x >= vol.dimx - rx) wx *= (1.0 - cos(PI * (vol.dimx - x) / rx))/2.0; if (y < ry) wy *= (1.0 - cos(PI * (y+1) / ry))/2.0; if (y >= vol.dimy - ry) wy *= (1.0 - cos(PI * (vol.dimy - y) / ry))/2.0; if (z < rz) wz *= (1.0 - cos(PI * (z+1) / rz))/2.0; if (z >= vol.dimz - rz) wz *= (1.0 - cos(PI * (vol.dimz - z) / rz))/2.0; const double ww = wx*wy*wz; vol(x,y,z) = ww * vol(x,y,z) + (1.0 - ww) * mean; } } relion-3.1.3/src/jaz/tomo/backprojection_helper.h000066400000000000000000000075421411340063500220510ustar00rootroot00000000000000/*************************************************************************** * * Author: "Jasenko Zivanov" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #ifndef BACKPROJECTON_HELPER_H #define BACKPROJECTON_HELPER_H #include #include #include "tomo_stack.h" #include #include class BackprojectionHelper { public: enum InterpolationType {Linear, Cubic}; static void backprojectRaw( const Image& stack, std::string tiltAngles, Volume& dest, Volume& maskDest, gravis::d3Vector origin, double spacing = 1.0, int frames = -1); static void backprojectRaw( const TomoStack& stack, Volume& dest, Volume& maskDest, gravis::d3Vector origin, double spacing = 1.0, InterpolationType interpolation = Linear, double taperX = 20, double taperY = 20, double wMin = 3.0, int frame0 = 0, int frames = -1); static void backprojectExactWeights(const TomoStack& stack, Volume& dest, gravis::d3Vector origin, double spacing = 1.0, double taperX = 20, double taperY = 20, double taperZ = 20, double wMin = 3.0, int frame0 = 0, int frames = -1); static void backprojectExactWeightsFreq(const TomoStack& stack, Image& dest, Volume& weight, gravis::d3Vector origin, double spacing = 1.0, double taperX = 20, double taperY = 20, double taperZ = 20, double wMin = 3.0, int frame0 = 0, int frames = -1); static void backprojectDots(const TomoStack& stack, Volume& dest, gravis::d3Vector origin, double spacing = 1.0, double taperX = 20, double taperY = 20, double taperZ = 20, int frame0 = 0, int frames = -1); static void backprojectDotsFS(const TomoStack& stack, Image& dest, gravis::d3Vector origin, double spacing = 1.0, double taperX = 20, double taperY = 20, double taperZ = 20, int frame0 = 0, int frames = -1); static void backprojectDotsSeparately(const TomoStack& stack, Volume& dest, gravis::d3Vector origin, double spacing = 1.0, double taperX = 20, double taperY = 20, double taperZ = 20, int frame0 = 0, int frames = -1); static void backprojectOriginDot(const TomoStack& stack, Volume& dest, double sigma, gravis::d3Vector origin, double spacing = 1.0, int frame0 = 0, int frames = -1); static void taperEdges(Volume& vol, double rx, double ry, double rz); }; #endif relion-3.1.3/src/jaz/tomo/imod_helper.cpp000066400000000000000000000046371411340063500203410ustar00rootroot00000000000000/*************************************************************************** * * Author: "Jasenko Zivanov" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #include "imod_helper.h" #include #include #include using namespace gravis; std::vector ImodHelper::readTiltTransforms(std::string fn, d4Matrix vol2world, double cix, double ciy) { std::ifstream anglesFile(fn.c_str()); if (!anglesFile.is_open()) { REPORT_ERROR("ImodHelper::readTiltTransforms: failed to open "+fn+"."); } std::vector vol2img; const double deg2rad = PI/180.0; while (anglesFile.good()) { double a; anglesFile >> a; a *= deg2rad; d4Matrix w2i; w2i(0,0) = cos(a); w2i(0,2) = sin(a); w2i(2,0) = -sin(a); w2i(2,2) = cos(a); w2i(0,3) = cix; w2i(1,3) = ciy; vol2img.push_back(w2i * vol2world); } return vol2img; } std::vector ImodHelper::readAffineTransforms(std::string fn) { std::vector vol2img; /*std::ifstream mapFile(fn.c_str()); if (!mapFile.is_open()) { REPORT_ERROR("ImodHelper::readAffineTransforms: failed to open "+fn+"."); } std::vector angles; while (anglesFile.good()) { double a; anglesFile >> a; a *= deg2rad; angles.push_back(a); d4Matrix w2i; w2i(0,0) = cos(a); w2i(0,2) = sin(a); w2i(2,0) = -sin(a); w2i(2,2) = cos(a); w2i(0,3) = cix; w2i(1,3) = ciy; vol2img.push_back(w2i * vol2world); } */ return vol2img; } relion-3.1.3/src/jaz/tomo/imod_helper.h000066400000000000000000000024231411340063500177750ustar00rootroot00000000000000/*************************************************************************** * * Author: "Jasenko Zivanov" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #ifndef IMOD_HELPER_H #define IMOD_HELPER_H #include #include #include class ImodHelper { public: static std::vector readTiltTransforms(std::string fn, gravis::d4Matrix vol2world, double cix, double ciy); static std::vector readAffineTransforms(std::string fn); }; #endif relion-3.1.3/src/jaz/tomo/projection_helper.cpp000066400000000000000000000100401411340063500215460ustar00rootroot00000000000000/*************************************************************************** * * Author: "Jasenko Zivanov" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #include "projection_helper.h" #include #include #include #include using namespace gravis; std::vector ProjectionHelper::loadTiltProjections( std::string tiltFile, double centerX, double centerY) { std::ifstream anglesFile(tiltFile.c_str()); if (!anglesFile.is_open()) { REPORT_ERROR("failed to open " + tiltFile + '\n'); } std::vector angles; std::vector vol2img; const double deg2rad = 3.14159265358979323846/180.0; d4Matrix w2i0; w2i0(0,3) = -centerX; w2i0(1,3) = -centerY; while (anglesFile.good()) { double a; anglesFile >> a; a *= deg2rad; angles.push_back(a); d4Matrix w2i; w2i(0,0) = cos(a); w2i(0,2) = sin(a); w2i(2,0) = -sin(a); w2i(2,2) = cos(a); w2i(0,3) = centerX; w2i(1,3) = centerY; vol2img.push_back(w2i*w2i0); } return vol2img; } std::vector ProjectionHelper::loadTiltProjectionsVol( std::string tiltFile, double centerX, double centerY, double X0, double Y0, double Z0, double spacing) { std::ifstream anglesFile(tiltFile.c_str()); if (!anglesFile.is_open()) { REPORT_ERROR("failed to open " + tiltFile + '\n'); } // vol2world * (0,0,0,1) = (xw0, yw0, zw0, 1) // vol2world * (xwc,ywc,zwc,1) = (xw1, yw1, zw1, 1) d4Matrix vol2world; vol2world(0,0) = spacing; vol2world(1,1) = spacing; vol2world(2,2) = spacing; vol2world(0,3) = X0; vol2world(1,3) = Y0; vol2world(2,3) = Z0; std::vector angles; std::vector vol2img; const double deg2rad = 3.14159265358979323846/180.0; while (anglesFile.good()) { double a; anglesFile >> a; a *= deg2rad; angles.push_back(a); d4Matrix w2i; w2i(0,0) = cos(a); w2i(0,2) = sin(a); w2i(2,0) = -sin(a); w2i(2,2) = cos(a); w2i(0,3) = centerX; w2i(1,3) = centerY; vol2img.push_back(w2i * vol2world); } return vol2img; } std::vector ProjectionHelper::loadAffineTransforms(std::string xformFile, double cx, double cy, bool square_result) { std::cout << "img. center: " << cx << ", " << cy << '\n'; std::ifstream file(xformFile.c_str()); if (!file.is_open()) { REPORT_ERROR("failed to open " + xformFile + '\n'); } std::vector xforms; d4Matrix P, Q; P.loadIdentity(); Q.loadIdentity(); P(0,3) = -cx; P(1,3) = -cy; Q(0,3) = cx; if (square_result) { Q(1,3) = cx; } else { Q(1,3) = cy; } char text[4096]; while (file.good()) { file.getline(text, 4096); if (strlen(text) < 11) break; std::stringstream line(text); d4Matrix A; A.loadIdentity(); line >> A(0,0); line >> A(0,1); line >> A(1,0); line >> A(1,1); line >> A(0,3); line >> A(1,3); xforms.push_back(Q*A*P); } return xforms; } relion-3.1.3/src/jaz/tomo/projection_helper.h000066400000000000000000000046501411340063500212250ustar00rootroot00000000000000/*************************************************************************** * * Author: "Jasenko Zivanov" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #ifndef PROJECTION_HELPER_H #define PROJECTION_HELPER_H #include #include class ProjectionHelper { public: /* loads a sequence of tilt angles and returns 4x4 matrices that map *world space* coordinates to image coordinates*/ static std::vector loadTiltProjections( std::string tiltFile, // file containing the tilt angles in ASCII double centerX, double centerY); // world origin projected into the images (usually, the image center) /* loads a sequence of tilt angles and returns 4x4 matrices that map *voxel* coordinates to image coordinates*/ static std::vector loadTiltProjectionsVol( std::string tiltFile, // file containing the tilt angles in ASCII double centerX, double centerY, // world origin projected into the images (usually, the image center) double X0, double Y0, double Z0, // origin of the volume in world coordinates double spacing = 1.0); // volume resolution /* loads a sequence of affine transforms*/ static std::vector loadAffineTransforms( std::string xformFile, // file containing the affine transforms in ASCII (ie. the .xf-file from imod) double cx, double cy, // coordinates of image center bool square_result = true); // coordinate origin in the output image is at (cx,cx), not (cx,cy) }; #endif relion-3.1.3/src/jaz/tomo/tomo_stack.cpp000066400000000000000000000225121411340063500202050ustar00rootroot00000000000000/*************************************************************************** * * Author: "Jasenko Zivanov" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #include "tomo_stack.h" #include "projection_helper.h" #include #include #include #include #include #include using namespace gravis; TomoStack :: TomoStack(std::string imagesFn, int imgCount, std::string angles, std::string affineTransforms, std::string ctfPath, double angpix, double scaleFactor, bool loadImgs) : angpix(angpix), scaleFactor(scaleFactor) { size_t ast = imagesFn.find_first_of('*'); if (ast == std::string::npos) { REPORT_ERROR("TomoStack::ctor: asterisk required in image filename.\n"); } std::string fnBase = imagesFn.substr(0, ast); std::string fnEnd = imagesFn.substr(ast+1); images.resize(imgCount); for (int i = 0; i < imgCount; i++) { std::stringstream sts; sts << i; std::string fn; sts >> fn; std::string fnn = fnBase+fn+fnEnd; std::cout << "reading: " << fnn << "\n"; images[i].read(fnn); if (!loadImgs) break; } d2Vector center; center.x = images[0].data.xdim/(2.0 * scaleFactor); center.y = images[0].data.ydim/(2.0 * scaleFactor); tiltProjs = ProjectionHelper::loadTiltProjections(angles, center.x, center.y); affineXforms = ProjectionHelper::loadAffineTransforms(affineTransforms, center.x, center.y); ctfs = CtfHelper::loadCtffind4(ctfPath, imgCount, 300.0, 2.7, 0.07); if (tiltProjs.size() < imgCount) { REPORT_ERROR("BackprojectionHelper::backproject: not enough angles in "+angles+"."); } if (affineXforms.size() < imgCount) { REPORT_ERROR("BackprojectionHelper::backproject: not enough affine transforms in "+affineTransforms+"."); } worldToImage.resize(imgCount); for (int i = 0; i < imgCount; i++) { d4Matrix Ai = affineXforms[i]; Ai.invert(); worldToImage[i] = Ai * tiltProjs[i]; for (int j = 0; j < 3; j++) for (int k = 0; k < 4; k++) { worldToImage[i](j,k) *= scaleFactor; } } } TomoStack TomoStack :: extractSubStack(gravis::d3Vector center, int w, int h) { const int ic = images.size(); TomoStack ts; ts.angpix = angpix; ts.scaleFactor = scaleFactor; ts.images.resize(ic); ts.affineXforms.resize(ic); ts.tiltProjs.resize(ic); ts.worldToImage.resize(ic); ts.ctfs.resize(ic); d4Vector pw(center.x, center.y, center.z, 1.0); for (int i = 0; i < ic; i++) { d4Vector pi = worldToImage[i] * pw; int x0 = (int)(pi.x - w/2.0 + 0.5); int y0 = (int)(pi.y - h/2.0 + 0.5); FilterHelper::extract2D(images[i], ts.images[i], x0, y0, w, h); ts.affineXforms[i] = affineXforms[i]; ts.affineXforms[i](0,3) -= x0; ts.affineXforms[i](1,3) -= y0; ts.tiltProjs[i] = tiltProjs[i]; ts.worldToImage[i] = worldToImage[i]; ts.worldToImage[i](0,3) -= x0; ts.worldToImage[i](1,3) -= y0; ts.ctfs[i] = ctfs[i]; } return ts; } void TomoStack :: downsample(int factor, int f0, int fc) { const int ic = fc < 0? images.size() : fc+f0; Image temp(images[0].data.xdim/factor, images[0].data.ydim/factor); for (int i = f0; i < ic; i++) { SliceHelper::downsample(images[i], temp); images[i] = temp; worldToImage[i] /= factor; worldToImage[i](3,3) = 1.0; } angpix *= factor; scaleFactor /= factor; } void TomoStack :: phaseFlip(int f0, int fc) { const int ic = fc < 0? images.size() : fc+f0; for (int i = f0; i < ic; i++) { FilterHelper::phaseFlip(images[i], ctfs[i], angpix, images[i]); } } void TomoStack :: ctfModulate(int f0, int fc) { const int ic = fc < 0? images.size() : fc+f0; for (int i = f0; i < ic; i++) { FilterHelper::modulate(images[i], ctfs[i], angpix, images[i]); } } void TomoStack :: wienerFilter(RFLOAT eps, RFLOAT Bfac, int f0, int fc) { const int ic = fc < 0? images.size() : fc+f0; for (int i = f0; i < ic; i++) { FilterHelper::wienerFilter(images[i], ctfs[i], angpix, eps, Bfac, images[i]); } } void TomoStack :: richardsonLucy(int iterations, RFLOAT eps, int f0, int fc) { const int ic = fc < 0? images.size() : fc+f0; for (int i = f0; i < ic; i++) { FilterHelper::richardsonLucy(images[i], ctfs[i], angpix, eps, iterations, images[i]); } } void TomoStack :: rampFilter(RFLOAT s0, RFLOAT t1, int f0, int fc) { const int ic = fc < 0? images.size() : fc+f0; for (int i = f0; i < ic; i++) { d2Vector u(affineXforms[i](0,0), affineXforms[i](1,0)); u + u / u.length(); FilterHelper::rampFilter(images[i], s0, t1, u.x, u.y, images[i]); } } void TomoStack :: safeLog(RFLOAT eps, int f0, int fc) { const int ic = fc < 0? images.size() : fc+f0; for (int im = f0; im < ic; im++) { FOR_ALL_DIRECT_ELEMENTS_IN_ARRAY2D(images[im].data) { const double v = DIRECT_A2D_ELEM(images[im].data, i, j); DIRECT_A2D_ELEM(images[im].data, i, j) = v > eps ? log(v) : log(eps); } } } void TomoStack :: scaledExp(RFLOAT scale, int f0, int fc) { const int ic = fc < 0? images.size() : fc+f0; for (int im = f0; im < ic; im++) { FOR_ALL_DIRECT_ELEMENTS_IN_ARRAY2D(images[im].data) { DIRECT_A2D_ELEM(images[im].data, i, j) = exp(scale * DIRECT_A2D_ELEM(images[im].data, i, j)); } } } void TomoStack :: defocusStack(int f, double dz0, double dz1, double eps, double Bfac, std::vector >& dest, int x0, int y0, int w, int h) { int zc = dest.size(); CTF ctf0 = ctfs[f]; CTF ctf = ctf0; Image img; if (w < 0 || h < 0) { img = images[f]; } else { img = Image(w,h,1,1); FilterHelper::extract2D(images[f], img, x0, y0, w, h); } for (int z = 0; z < zc; z++) { double dz = dz0 + z * (dz1 - dz0) / (double)(zc - 1); std::cout << z << ": " << dz << " \t "; ctf.DeltafU = ctf0.DeltafU + dz; ctf.DeltafV = ctf0.DeltafV + dz; ctf.initialise(); FilterHelper::wienerFilter(img, ctf, angpix, eps, Bfac, dest[z]); //FilterHelper::lowPassFilter(dest[z], 0.2, 0.1); //FilterHelper::phaseFlip(img, ctf, angpix, dest[z]); std::cout << FilterHelper::totalLogVariation(dest[z]) << "\n"; } } void TomoStack :: saveImages(std::string path, int f0, int fc) { size_t ast = path.find_first_of('*'); if (ast == std::string::npos) { REPORT_ERROR("TomoStack::saveImages: asterisk required in path.\n"); } std::string fnBase = path.substr(0, ast); std::string fnEnd = path.substr(ast+1); const int ic = fc < 0? images.size() : fc+f0; for (int i = f0; i < ic; i++) { std::stringstream sts; sts << i; std::string fn; sts >> fn; std::string fnn = fnBase+fn+fnEnd; std::cout << "writing: " << fnn << "\n"; images[i].write(fnn); } } std::vector > TomoStack::loadFiducials(std::string file, double scale) { std::ifstream is(file); if (!is.is_open()) { REPORT_ERROR("failed to open " + file + '\n'); } std::vector batch(0); std::vector > out(0); int lastF = -1; while (is.good()) { double x, y; int f; is >> x; is >> y; is >> f; if (f < 0 || f >= affineXforms.size()) { std::stringstream sts; sts << f; std::string fs; sts >> fs; REPORT_ERROR("illegal fiducial index: "+fs+"\n"); } while (f > lastF + 1) { std::cout << "Warning: fiducial position for frame " << (lastF + 1) << " is missing.\n"; lastF++; batch.push_back(d2Vector(-1000.0,-1000.0)); } if (f == 0) { if (batch.size() > 0) { out.push_back(batch); } batch.clear(); } lastF = f; d4Matrix Ai = affineXforms[f]; Ai.invert(); d4Vector d0(scale*x,scale*y,0,1); d4Vector d = Ai * d0; batch.push_back(d2Vector(d.x,d.y)); } out.push_back(batch); return out; } relion-3.1.3/src/jaz/tomo/tomo_stack.h000066400000000000000000000050231411340063500176500ustar00rootroot00000000000000/*************************************************************************** * * Author: "Jasenko Zivanov" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #ifndef TOMO_STACK_H #define TOMO_STACK_H #include #include #include #include #include class TomoStack { public: TomoStack(){} TomoStack(std::string imagesFn, int imgCount, std::string angles, std::string affineTransforms, std::string ctfPath, double angpix, double scaleFactor = 1.0, bool loadImgs = true); TomoStack extractSubStack(gravis::d3Vector center, int w, int h); std::vector > images; std::vector affineXforms; std::vector tiltProjs; std::vector worldToImage; std::vector ctfs; double angpix, scaleFactor; void downsample(int factor, int f0 = 0, int fc = -1); void phaseFlip(int f0 = 0, int fc = -1); void ctfModulate(int f0 = 0, int fc = -1); void wienerFilter(RFLOAT eps, RFLOAT Bfac, int f0 = 0, int fc = -1); void richardsonLucy(int iterations, RFLOAT eps, int f0 = 0, int fc = -1); void rampFilter(RFLOAT s0, RFLOAT t1, int f0 = 0, int fc = -1); void safeLog(RFLOAT eps, int f0 = 0, int fc = -1); void scaledExp(RFLOAT scale, int f0 = 0, int fc = -1); void defocusStack(int f, double dz0, double dz1, double eps, double Bfac, std::vector >& dest, int x0 = 0, int y0 = 0, int w = -1, int h = -1); void saveImages(std::string path, int f0 = 0, int fc = -1); std::vector > loadFiducials(std::string file, double scale); }; #endif relion-3.1.3/src/jaz/volume.h000066400000000000000000000070421411340063500160410ustar00rootroot00000000000000/*************************************************************************** * * Author: "Jasenko Zivanov" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #ifndef JAZ_VOLUME_H #define JAZ_VOLUME_H #include #include #include /* class Volume: represents a grid of voxels of arbitrary type. Only provides methods for direct access at integral coordinates. Everything else is handled by external classes. */ #define FOR_ALL_VOXELS(V) \ for (size_t z = 0; z < (V).dimz; z++) \ for (size_t y = 0; y < (V).dimy; y++) \ for (size_t x = 0; x < (V).dimx; x++) template class Volume { public: Volume(){} /* The constructor only allocates memory, it does not initialize the values.*/ Volume(size_t dimx, size_t dimy, size_t dimz); long int dimx, dimy, dimz; std::vector voxels; /* operator (x,y,z): returns a reference to the indicated voxel. The correct version (const or non-const) will be chosen by the compiler, depending on whether the instance is declared as const or not.*/ const T& operator() (size_t, size_t, size_t) const; T& operator() (size_t, size_t, size_t); /* data(): returns a pointer to the first data element.*/ const T* data() const; T* data(); void resize(size_t dimx, size_t dimy, size_t dimz); void resize(const Volume& example); void fill(T t); Volume& operator += (const Volume& v) { for (int i = 0; i < voxels.size(); i++) { voxels[i] += v.voxels[i]; } return *this; } }; template Volume::Volume(size_t dimx, size_t dimy, size_t dimz) : dimx(dimx), dimy(dimy), dimz(dimz) { voxels.resize(dimx*dimy*dimz); } template inline const T& Volume::operator() (size_t x, size_t y, size_t z) const { return voxels[(z*dimy + y)*dimx + x]; } template inline T& Volume::operator() (size_t x, size_t y, size_t z) { return voxels[(z*dimy + y)*dimx + x]; } template inline const T* Volume::data() const { return &voxels[0]; } template inline T* Volume::data() { return &voxels[0]; } template inline void Volume::resize(size_t dimx, size_t dimy, size_t dimz) { this->dimx = dimx; this->dimy = dimy; this->dimz = dimz; voxels.resize(dimx*dimy*dimz); } template inline void Volume::resize(const Volume& example) { dimx = example.dimx; dimy = example.dimy; dimz = example.dimz; voxels.resize(dimx*dimy*dimz); } template inline void Volume::fill(T t) { for (long int i = 0; i < voxels.size(); i++) { voxels[i] = t; } } #endif relion-3.1.3/src/jaz/volume_converter.cpp000066400000000000000000000032311411340063500204570ustar00rootroot00000000000000/*************************************************************************** * * Author: "Jasenko Zivanov" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #include void VolumeConverter::convert(const Image& src, Volume& dest) { dest.resize(src.data.xdim, src.data.ydim, src.data.zdim); FOR_ALL_VOXELS(dest) { dest(x,y,z) = DIRECT_A3D_ELEM(src.data, z, y, x); } } void VolumeConverter::convertStack(const Image& src, Volume& dest) { dest.resize(src.data.xdim, src.data.ydim, src.data.ndim); FOR_ALL_VOXELS(dest) { dest(x,y,z) = DIRECT_NZYX_ELEM(src.data, z, 1, y, x); } } void VolumeConverter::convert(const Volume& src, Image& dest) { dest.data.resize(src.dimz, src.dimy, src.dimx); FOR_ALL_VOXELS(src) { DIRECT_A3D_ELEM(dest.data, z, y, x) = src(x,y,z); } } relion-3.1.3/src/jaz/volume_converter.h000066400000000000000000000027001411340063500201240ustar00rootroot00000000000000/*************************************************************************** * * Author: "Jasenko Zivanov" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #ifndef VOLUME_CONVERTER_H #define VOLUME_CONVERTER_H #include #include #include /* class VolumeConverter: facilitates conversion between 'Volume' and Relion's 'Image' and 'MultidimArray' classes. */ class VolumeConverter { public: static void convert(const Image& src, Volume& dest); static void convertStack(const Image& src, Volume& dest); static void convert(const Volume& src, Image& dest); }; #endif relion-3.1.3/src/jaz/volume_integration.cpp000066400000000000000000000051721411340063500210010ustar00rootroot00000000000000/*************************************************************************** * * Author: "Jasenko Zivanov" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #include #include #include using namespace gravis; void VolumeIntegration :: integrateAlongZ(const Volume& vol, gravis::d4Matrix vol2img, Image& dest) { const int xsv = vol.dimx; const int ysv = vol.dimy; const int zsv = vol.dimz; const int xsi = dest.data.xdim; const int ysi = dest.data.ydim; d2Matrix A2; A2(0,0) = vol2img(0,0); A2(0,1) = vol2img(0,1); A2(1,0) = vol2img(1,0); A2(1,1) = vol2img(1,1); A2.invert(); #if JAZ_USE_OPENMP #pragma omp parallel for #endif for (int yi = 0; yi < ysi; yi++) for (int xi = 0; xi < xsi; xi++) { DIRECT_A2D_ELEM(dest.data, yi, xi) = 0.0; } for (int zv = 0; zv < zsv; zv++) { #if JAZ_USE_OPENMP #pragma omp parallel for #endif for (int yi = 0; yi < ysi; yi++) for (int xi = 0; xi < xsi; xi++) { d2Vector b(xi - zv*vol2img(0,2) - vol2img(0,3), yi - zv*vol2img(1,2) - vol2img(1,3)); d2Vector v = A2 * b; int xvi = (int) v.x; int yvi = (int) v.y; double xvf = v.x - xvi; double yvf = v.y - yvi; if (xvi < 0 || yvi < 0 || xvi >= xsv-1 || yvi >= ysv-1) continue; RFLOAT vv00 = vol(xvi,yvi,zv); RFLOAT vv10 = vol(xvi+1,yvi,zv); RFLOAT vv01 = vol(xvi,yvi+1,zv); RFLOAT vv11 = vol(xvi+1,yvi+1,zv); RFLOAT vv0 = yvf * vv01 + (1.0 - yvf) * vv00; RFLOAT vv1 = yvf * vv11 + (1.0 - yvf) * vv10; RFLOAT vv = xvf * vv1 + (1.0 - xvf) * vv0; DIRECT_A2D_ELEM(dest.data, yi, xi) += vv; } } } relion-3.1.3/src/jaz/volume_integration.h000066400000000000000000000023221411340063500204400ustar00rootroot00000000000000/*************************************************************************** * * Author: "Jasenko Zivanov" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #ifndef VOLUME_INTEGRATION_H #define VOLUME_INTEGRATION_H #include "src/jaz/volume.h" #include "src/jaz/gravis/t4Matrix.h" #include "src/image.h" class VolumeIntegration { public: static void integrateAlongZ(const Volume& vol, gravis::d4Matrix vol2img, Image& dest); }; #endif relion-3.1.3/src/jaz/vtk_helper.cpp000066400000000000000000000524131411340063500172320ustar00rootroot00000000000000/*************************************************************************** * * Author: "Jasenko Zivanov" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #include using namespace gravis; Image VtkHelper::allToZ(const Image &img) { std::cout << img.data.xdim << "x" << img.data.ydim << "x" << img.data.zdim << "x" << img.data.ndim << "\n"; if (img.data.ndim == 1) return img; Image out(img.data.xdim, img.data.ydim, img.data.ndim); for (int n = 0; n < img.data.ndim; n++) for (int y = 0; y < img.data.ydim; y++) for (int x = 0; x < img.data.xdim; x++) { DIRECT_NZYX_ELEM(out(), 0, n, y, x) = DIRECT_NZYX_ELEM(img(), n, 0, y, x); } return out; } void VtkHelper :: writeVTK(Image& img, std::string fn, double originX, double originY, double originZ, double spacingX, double spacingY, double spacingZ, bool binary) { const size_t size = (img.data.xdim * img.data.ydim * img.data.zdim); std::ofstream os(fn.c_str(), std::ios::binary); std::string sizetype = "double"; os << "# vtk DataFile Version 2.0\n"; os << "Volume example\n"; if (binary) { os << "BINARY\n"; } else { os << "ASCII\n"; } os << "DATASET STRUCTURED_POINTS\n"; os << "DIMENSIONS " << img.data.xdim << " " << img.data.ydim << " " << img.data.zdim << "\n"; os << "SPACING " << spacingX << " " << spacingY << " " << spacingZ << "\n"; os << "ORIGIN " << originX << " " << originY << " " << originZ << "\n"; os << "POINT_DATA " << size << "\n"; os << "SCALARS volume_scalars " << sizetype << " 1\n"; os << "LOOKUP_TABLE default\n"; if (binary) { os.write((char*)(img.data.data), sizeof(RFLOAT)*size); } else { FOR_ALL_DIRECT_ELEMENTS_IN_ARRAY3D(img.data) { os << DIRECT_A3D_ELEM(img.data, k, i, j) << "\n"; } } } void VtkHelper :: writeVTK(Image& img, std::string fn, double originX, double originY, double originZ, double spacingX, double spacingY, double spacingZ, bool binary) { const size_t size = (img.data.xdim * img.data.ydim * img.data.zdim); std::ofstream os(fn.c_str(), std::ios::binary); std::string sizetype = "float"; os << "# vtk DataFile Version 2.0\n"; os << "Volume example\n"; if (binary) { os << "BINARY\n"; } else { os << "ASCII\n"; } os << "DATASET STRUCTURED_POINTS\n"; os << "DIMENSIONS " << img.data.xdim << " " << img.data.ydim << " " << img.data.zdim << "\n"; os << "SPACING " << spacingX << " " << spacingY << " " << spacingZ << "\n"; os << "ORIGIN " << originX << " " << originY << " " << originZ << "\n"; os << "POINT_DATA " << size << "\n"; os << "SCALARS volume_scalars " << sizetype << " 1\n"; os << "LOOKUP_TABLE default\n"; if (binary) { os.write((char*)(img.data.data), sizeof(RFLOAT)*size); } else { FOR_ALL_DIRECT_ELEMENTS_IN_ARRAY3D(img.data) { os << DIRECT_A3D_ELEM(img.data, k, i, j) << "\n"; } } } void VtkHelper::writeVTK(Image &img, std::string fn, double originX, double originY, double originZ, double spacingX, double spacingY, double spacingZ, bool binary) { const size_t size = (img.data.xdim * img.data.ydim * img.data.zdim); std::ofstream os(fn.c_str(), std::ios::binary); std::string sizetype = "float"; if (sizeof(RFLOAT) > 4) sizetype = "double"; //std::cout << "size: " << size << "\n"; //std::cout << "sizetype: " << sizetype << "\n"; os << "# vtk DataFile Version 2.0\n"; os << "Volume example\n"; if (binary) { os << "BINARY\n"; } else { os << "ASCII\n"; } os << "DATASET STRUCTURED_POINTS\n"; os << "DIMENSIONS " << img.data.xdim << " " << img.data.ydim << " " << img.data.zdim << "\n"; os << "SPACING " << spacingX << " " << spacingY << " " << spacingZ << "\n"; os << "ORIGIN " << originX << " " << originY << " " << originZ << "\n"; os << "POINT_DATA " << size << "\n"; os << "SCALARS volume_scalars " << sizetype << " 2\n"; os << "LOOKUP_TABLE default\n"; if (binary) { os.write((char*)(img.data.data), 2*sizeof(RFLOAT)*size); } else { FOR_ALL_DIRECT_ELEMENTS_IN_ARRAY3D(img.data) { os << DIRECT_A3D_ELEM(img.data, k, i, j).real << "\n"; os << DIRECT_A3D_ELEM(img.data, k, i, j).imag << "\n"; } } } void VtkHelper :: writeVTK(MultidimArray& img, std::string fn, double originX, double originY, double originZ, double spacingX, double spacingY, double spacingZ, bool binary) { const size_t size = (img.xdim * img.ydim * img.zdim); std::ofstream os(fn.c_str(), std::ios::binary); std::string sizetype = "float"; if (sizeof(RFLOAT) > 4) sizetype = "double"; //std::cout << "size: " << size << "\n"; //std::cout << "sizetype: " << sizetype << "\n"; os << "# vtk DataFile Version 2.0\n"; os << "Volume example\n"; if (binary) { os << "BINARY\n"; } else { os << "ASCII\n"; } os << "DATASET STRUCTURED_POINTS\n"; os << "DIMENSIONS " << img.xdim << " " << img.ydim << " " << img.zdim << "\n"; os << "SPACING " << spacingX << " " << spacingY << " " << spacingZ << "\n"; os << "ORIGIN " << originX << " " << originY << " " << originZ << "\n"; os << "POINT_DATA " << size << "\n"; os << "SCALARS volume_scalars " << sizetype << " 1\n"; os << "LOOKUP_TABLE default\n"; if (binary) { os.write((char*)(img.data), sizeof(RFLOAT)*size); } else { FOR_ALL_DIRECT_ELEMENTS_IN_ARRAY3D(img) { os << DIRECT_A3D_ELEM(img, k, i, j) << "\n"; } } } void VtkHelper :: writeVTK_Complex(const MultidimArray& img, std::string fn, bool binary) { const size_t size = (img.xdim * img.ydim * img.zdim); std::ofstream os(fn.c_str(), std::ios::binary); std::string sizetype = "float"; if (sizeof(RFLOAT) > 4) sizetype = "double"; //std::cout << "size: " << size << "\n"; //std::cout << "sizetype: " << sizetype << "\n"; os << "# vtk DataFile Version 2.0\n"; os << "Volume example\n"; if (binary) { os << "BINARY\n"; } else { os << "ASCII\n"; } os << "DATASET STRUCTURED_POINTS\n"; os << "DIMENSIONS " << img.xdim << " " << img.ydim << " " << img.zdim << "\n"; os << "SPACING 1 1 1\n"; os << "ORIGIN 0 0 0\n"; os << "POINT_DATA " << size << "\n"; os << "SCALARS volume_scalars " << sizetype << " 2\n"; os << "LOOKUP_TABLE default\n"; if (binary) { os.write((char*)(img.data), 2*sizeof(RFLOAT)*size); } else { FOR_ALL_DIRECT_ELEMENTS_IN_ARRAY3D(img) { os << DIRECT_A3D_ELEM(img, k, i, j).real << "\n"; os << DIRECT_A3D_ELEM(img, k, i, j).imag << "\n"; } } } void VtkHelper :: writeVTK_d3(MultidimArray >& img, std::string fn, bool binary) { const size_t size = (img.xdim * img.ydim * img.zdim); std::ofstream os(fn.c_str(), std::ios::binary); std::string sizetype = "float"; if (sizeof(RFLOAT) > 4) sizetype = "double"; //std::cout << "size: " << size << "\n"; //std::cout << "sizetype: " << sizetype << "\n"; os << "# vtk DataFile Version 2.0\n"; os << "Volume example\n"; if (binary) { os << "BINARY\n"; } else { os << "ASCII\n"; } os << "DATASET STRUCTURED_POINTS\n"; os << "DIMENSIONS " << img.xdim << " " << img.ydim << " " << img.zdim << "\n"; os << "SPACING 1 1 1\n"; os << "ORIGIN 0 0 0\n"; os << "POINT_DATA " << size << "\n"; os << "SCALARS volume_scalars " << sizetype << " 3\n"; os << "LOOKUP_TABLE default\n"; if (binary) { os.write((char*)(img.data), 2*sizeof(RFLOAT)*size); } else { FOR_ALL_DIRECT_ELEMENTS_IN_ARRAY3D(img) { os << DIRECT_A3D_ELEM(img, k, i, j).x << "\n"; os << DIRECT_A3D_ELEM(img, k, i, j).y << "\n"; os << DIRECT_A3D_ELEM(img, k, i, j).z << "\n"; } } } void VtkHelper :: writeTomoVTK(Image& img, std::string fn, bool binary, double pixelSize, d3Vector origin) { const size_t size = (img.data.xdim * img.data.ydim * img.data.ndim); std::ofstream os(fn.c_str(), std::ios::binary); std::string sizetype = "float"; if (sizeof(RFLOAT) > 4) sizetype = "double"; //std::cout << "size: " << size << "\n"; //std::cout << "sizetype: " << sizetype << "\n"; os << "# vtk DataFile Version 2.0\n"; os << "Volume example\n"; if (binary) { os << "BINARY\n"; } else { os << "ASCII\n"; } os << "DATASET STRUCTURED_POINTS\n"; os << "DIMENSIONS " << img.data.xdim << " " << img.data.ydim << " " << img.data.ndim << "\n"; os << "SPACING " << pixelSize << " " << pixelSize << " " << pixelSize << "\n"; os << "ORIGIN " << origin.x << " " << origin.y << " " << origin.z << "\n"; os << "POINT_DATA " << size << "\n"; os << "SCALARS volume_scalars " << sizetype << " 1\n"; os << "LOOKUP_TABLE default\n"; if (binary) { os.write((char*)(img.data.data), sizeof(RFLOAT)*size); } else { FOR_ALL_NZYX_ELEMENTS_IN_MULTIDIMARRAY(img.data) { os << DIRECT_NZYX_ELEM(img.data, l, 0, i, j) << "\n"; } } } void VtkHelper :: write(std::vector >& stack, std::string fn, double originX, double originY, double spacingX, double spacingY, bool binary) { const size_t size = (stack[0].data.xdim * stack[0].data.ydim * stack.size()); std::ofstream os(fn.c_str(), std::ios::binary); std::string sizetype = "float"; if (sizeof(RFLOAT) > 4) sizetype = "double"; os << "# vtk DataFile Version 2.0\n"; os << "Volume example\n"; if (binary) { os << "BINARY\n"; } else { os << "ASCII\n"; } os << "DATASET STRUCTURED_POINTS\n"; os << "DIMENSIONS " << stack[0].data.xdim << " " << stack[0].data.ydim << " " << stack.size() << "\n"; os << "SPACING " << spacingX << " " << spacingY << " 1\n"; os << "ORIGIN " << originX << " " << originY << " 0\n"; os << "POINT_DATA " << size << "\n"; os << "SCALARS volume_scalars " << sizetype << " 1\n"; os << "LOOKUP_TABLE default\n"; if (binary) { os.write((char*)(stack[0].data.data), sizeof(RFLOAT)*size); } else { for (int ind = 0; ind < stack.size(); ind++) { FOR_ALL_DIRECT_ELEMENTS_IN_ARRAY2D(stack[ind].data) { os << DIRECT_A2D_ELEM(stack[ind].data, i, j) << "\n"; } } } } void VtkHelper :: writeCentered(std::vector >& stack, std::string fn, double originX, double originY, double spacingX, double spacingY, bool binary) { const size_t size = (stack[0].data.xdim * stack[0].data.ydim * stack.size()); std::ofstream os(fn.c_str(), std::ios::binary); std::string sizetype = "float"; if (sizeof(RFLOAT) > 4) sizetype = "double"; os << "# vtk DataFile Version 2.0\n"; os << "Volume example\n"; if (binary) { os << "BINARY\n"; } else { os << "ASCII\n"; } os << "DATASET STRUCTURED_POINTS\n"; os << "DIMENSIONS " << stack[0].data.xdim << " " << stack[0].data.ydim << " " << stack.size() << "\n"; os << "SPACING " << spacingX << " " << spacingY << " 1\n"; os << "ORIGIN " << originX << " " << originY << " 0\n"; os << "POINT_DATA " << size << "\n"; os << "SCALARS volume_scalars " << sizetype << " 1\n"; os << "LOOKUP_TABLE default\n"; if (binary) { os.write((char*)(stack[0].data.data), sizeof(RFLOAT)*size); } else { const int w = stack[0].data.xdim; const int h = stack[0].data.ydim; for (int ind = 0; ind < stack.size(); ind++) { FOR_ALL_DIRECT_ELEMENTS_IN_ARRAY2D(stack[ind].data) { int ii = (h + i - h/2)%h; int jj = (w + j - w/2)%w; os << DIRECT_A2D_ELEM(stack[ind].data, ii, jj) << "\n"; } } } } void VtkHelper :: write(std::vector >& stack, std::string fn, double originX, double originY, double spacingX, double spacingY, bool binary) { const size_t size = (stack[0].data.xdim * stack[0].data.ydim * stack.size()); std::ofstream os(fn.c_str(), std::ios::binary); std::string sizetype = "float"; os << "# vtk DataFile Version 2.0\n"; os << "Volume example\n"; if (binary) { os << "BINARY\n"; } else { os << "ASCII\n"; } os << "DATASET STRUCTURED_POINTS\n"; os << "DIMENSIONS " << stack[0].data.xdim << " " << stack[0].data.ydim << " " << stack.size() << "\n"; os << "SPACING " << spacingX << " " << spacingY << " 1\n"; os << "ORIGIN " << originX << " " << originY << " 0\n"; os << "POINT_DATA " << size << "\n"; os << "SCALARS volume_scalars " << sizetype << " 1\n"; os << "LOOKUP_TABLE default\n"; if (binary) { os.write((char*)(stack[0].data.data), sizeof(RFLOAT)*size); } else { for (int ind = 0; ind < stack.size(); ind++) { FOR_ALL_DIRECT_ELEMENTS_IN_ARRAY2D(stack[ind].data) { os << DIRECT_A2D_ELEM(stack[ind].data, i, j) << "\n"; } } } } void VtkHelper :: writeVTK(Volume& vol, std::string fn, double originX, double originY, double originZ, double spacingX, double spacingY, double spacingZ, bool binary) { const size_t size = (vol.dimx * vol.dimy * vol.dimz); std::ofstream os(fn.c_str()); std::string sizetype = "float"; if (sizeof(RFLOAT) > 4) sizetype = "double"; os << "# vtk DataFile Version 2.0\n"; os << "Volume example\n"; if (binary) { os << "BINARY\n"; } else { os << "ASCII\n"; } os << "DATASET STRUCTURED_POINTS\n"; os << "DIMENSIONS " << vol.dimx << " " << vol.dimy << " " << vol.dimz << "\n"; os << "SPACING " << spacingX << " " << spacingY << " " << spacingZ << "\n"; os << "ORIGIN " << originX << " " << originY << " " << originZ << "\n"; os << "POINT_DATA " << size << "\n"; os << "SCALARS volume_scalars " << sizetype << " 1\n"; os << "LOOKUP_TABLE default\n"; if (binary) { os.write((char*)(vol.data()), sizeof(RFLOAT)*size); } else { FOR_ALL_VOXELS(vol) { os << vol(x,y,z) << "\n"; } } } void VtkHelper :: writeVTK(Volume >& vol, std::string fn, double originX, double originY, double originZ, double spacingX, double spacingY, double spacingZ, bool binary) { const size_t size = (vol.dimx * vol.dimy * vol.dimz); std::ofstream os(fn.c_str()); std::string sizetype = "float"; if (sizeof(RFLOAT) > 4) sizetype = "double"; os << "# vtk DataFile Version 2.0\n"; os << "Volume example\n"; if (binary) { os << "BINARY\n"; } else { os << "ASCII\n"; } os << "DATASET STRUCTURED_POINTS\n"; os << "DIMENSIONS " << vol.dimx << " " << vol.dimy << " " << vol.dimz << "\n"; os << "SPACING " << spacingX << " " << spacingY << " " << spacingZ << "\n"; os << "ORIGIN " << originX << " " << originY << " " << originZ << "\n"; os << "POINT_DATA " << size << "\n"; os << "SCALARS volume_scalars " << sizetype << " 3\n"; os << "LOOKUP_TABLE default\n"; if (binary) { os.write((char*)(vol.data()), sizeof(RFLOAT)*size); } else { FOR_ALL_VOXELS(vol) { os << vol(x,y,z).x << " " << vol(x,y,z).y << " " << vol(x,y,z).z << "\n"; } } } void VtkHelper :: readVTK(std::string fn, Volume& vol, d3Vector& origin, d3Vector& spacing) { std::ifstream file(fn.c_str()); char text[4096]; if (!file.is_open()) { REPORT_ERROR("failed to open " + fn + '\n'); } file.getline(text, 4096); if (std::string(text) != "# vtk DataFile Version 2.0") { REPORT_ERROR("Unsupported VTK format: " + std::string(text) + '\n'); } file.getline(text, 4096); file.getline(text, 4096); if (std::string(text) != "ASCII") { REPORT_ERROR("Only ASCII VTK files are supported.\n"); } /* os << "DATASET STRUCTURED_POINTS\n"; os << "DIMENSIONS " << vol.dimx << " " << vol.dimy << " " << vol.dimz << "\n"; os << "SPACING " << spacingX << " " << spacingY << " " << spacingZ << "\n"; os << "ORIGIN " << originX << " " << originY << " " << originZ << "\n"; os << "POINT_DATA " << size << "\n"; os << "SCALARS volume_scalars " << sizetype << " 6\n"; os << "LOOKUP_TABLE default\n"; */ file.getline(text, 4096); std::string first, dummy; int dimx, dimy, dimz, dims; size_t size; for (int i = 0; i < 6; i++) { file.getline(text, 4096); std::stringstream linestream(text); linestream >> first; if (first == "DIMENSIONS") { linestream >> dimx; linestream >> dimy; linestream >> dimz; } else if (first == "SPACING") { linestream >> spacing.x; linestream >> spacing.y; linestream >> spacing.z; } else if (first == "ORIGIN") { linestream >> origin.x; linestream >> origin.y; linestream >> origin.z; } else if (first == "POINT_DATA") { linestream >> size; } else if (first == "SCALARS") { linestream >> dummy; linestream >> dummy; linestream >> dims; if (dims != 1) { std::stringstream sts; std::string st; sts << fn << " is not a scalar volume (voxeldims = " << dims << ")\n"; sts >> st; REPORT_ERROR(st); } } else if (first == "LOOKUP_TABLE") { linestream >> dummy; } } if (size != ((size_t)dimx)*((size_t)dimy)*((size_t)dimz)) { std::cout << "Bad size info in " << fn << ": " << size << " vs. " << (dimx*dimy*dimz) << "\n"; std::exit(666); std::stringstream sts; std::string st; sts << "Bad size info in " << fn << ": " << size << " vs. " << (dimx*dimy*dimz) << "\n"; sts >> st; REPORT_ERROR(st); } vol.resize(dimx, dimy, dimz); for (size_t i = 0; i < size; i++) { file >> vol.voxels[i]; } } void VtkHelper :: writeVTK(Volume >& vol, std::string fn, double originX, double originY, double originZ, double spacingX, double spacingY, double spacingZ, bool binary) { const size_t size = (vol.dimx * vol.dimy * vol.dimz); std::ofstream os(fn.c_str()); std::string sizetype = "float"; if (sizeof(RFLOAT) > 4) sizetype = "double"; os << "# vtk DataFile Version 2.0\n"; os << "Volume example\n"; if (binary) { os << "BINARY\n"; } else { os << "ASCII\n"; } os << "DATASET STRUCTURED_POINTS\n"; os << "DIMENSIONS " << vol.dimx << " " << vol.dimy << " " << vol.dimz << "\n"; os << "SPACING " << spacingX << " " << spacingY << " " << spacingZ << "\n"; os << "ORIGIN " << originX << " " << originY << " " << originZ << "\n"; os << "POINT_DATA " << size << "\n"; os << "SCALARS volume_scalars " << sizetype << " 6\n"; os << "LOOKUP_TABLE default\n"; if (binary) { os.write((char*)(vol.data()), sizeof(RFLOAT)*size); } else { FOR_ALL_VOXELS(vol) { os << vol(x,y,z).xx << " " << vol(x,y,z).yy << " " << vol(x,y,z).zz << " " << vol(x,y,z).xy << " " << vol(x,y,z).yz << " " << vol(x,y,z).xz << "\n"; } } } relion-3.1.3/src/jaz/vtk_helper.h000066400000000000000000000112431411340063500166730ustar00rootroot00000000000000/*************************************************************************** * * Author: "Jasenko Zivanov" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #ifndef VTK_HELPER_H #define VTK_HELPER_H #include #include #include #include #include class VtkHelper { public: static Image allToZ(const Image& img); static void writeVTK(Image& img, std::string fn, double originX = 0.0, double originY = 0.0, double originZ = 0.0, double spacingX = 1.0, double spacingY = 1.0, double spacingZ = 1.0, bool binary = false); static void writeVTK(Image& img, std::string fn, double originX = 0.0, double originY = 0.0, double originZ = 0.0, double spacingX = 1.0, double spacingY = 1.0, double spacingZ = 1.0, bool binary = false); static void writeVTK(Image& img, std::string fn, double originX = 0.0, double originY = 0.0, double originZ = 0.0, double spacingX = 1.0, double spacingY = 1.0, double spacingZ = 1.0, bool binary = false); static void writeVTK(MultidimArray& img, std::string fn, double originX = 0.0, double originY = 0.0, double originZ = 0.0, double spacingX = 1.0, double spacingY = 1.0, double spacingZ = 1.0, bool binary = false); static void writeVTK_Complex(const MultidimArray& img, std::string fn, bool binary = false); static void writeVTK_d3(MultidimArray >& img, std::string fn, bool binary = false); static void writeTomoVTK(Image& img, std::string fn, bool binary = false, double pixelSize = 1.0, gravis::d3Vector origin = gravis::d3Vector(0.0,0.0,0.0)); static void write(std::vector >& img, std::string fn, double originX = 0.0, double originY = 0.0, double spacingX = 1.0, double spacingY = 1.0, bool binary = false); static void writeCentered(std::vector >& img, std::string fn, double originX = 0.0, double originY = 0.0, double spacingX = 1.0, double spacingY = 1.0, bool binary = false); static void write(std::vector >& img, std::string fn, double originX = 0.0, double originY = 0.0, double spacingX = 1.0, double spacingY = 1.0, bool binary = false); static void readVTK(std::string fn, Volume& vol, gravis::d3Vector& origin, gravis::d3Vector& spacing); static void writeVTK(Volume& vol, std::string fn, double originX = 0.0, double originY = 0.0, double originZ = 0.0, double spacingX = 1.0, double spacingY = 1.0, double spacingZ = 1.0, bool binary = false); static void writeVTK(Volume >& vol, std::string fn, double originX = 0.0, double originY = 0.0, double originZ = 0.0, double spacingX = 1.0, double spacingY = 1.0, double spacingZ = 1.0, bool binary = false); static void writeVTK(Volume >& vol, std::string fn, double originX = 0.0, double originY = 0.0, double originZ = 0.0, double spacingX = 1.0, double spacingY = 1.0, double spacingZ = 1.0, bool binary = false); }; #endif relion-3.1.3/src/local_symmetry.cpp000066400000000000000000002760321411340063500173530ustar00rootroot00000000000000/*************************************************************************** * * Author: "Shaoda He" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #include "src/local_symmetry.h" //#define DEBUG #define NEW_APPLY_SYMMETRY_METHOD static std::string str_new_mask = "NEW_MASK_AND_OPERATORS"; static std::string str_mask_filename = "MASKFILENAME"; void sum3DCubicMask( const MultidimArray v, RFLOAT& val_sum, RFLOAT& val_ctr) { RFLOAT val = 0.; val_sum = val_ctr = 0.; FOR_ALL_DIRECT_ELEMENTS_IN_ARRAY3D(v) { val = DIRECT_A3D_ELEM(v, k, i, j); if ( (val < -(XMIPP_EQUAL_ACCURACY)) || ((val - 1.) > (XMIPP_EQUAL_ACCURACY))) REPORT_ERROR("ERROR: mask - values are not in range [0,1]!"); if (val > XMIPP_EQUAL_ACCURACY) { val_sum += val; val_ctr += 1.; } } if ( (val_ctr < 0.9) || (val_sum < 0.01) ) REPORT_ERROR("ERROR: mask is empty!"); } bool similar3DCubicMasks( RFLOAT mask1_sum, RFLOAT mask1_ctr, RFLOAT mask2_sum, RFLOAT mask2_ctr) { RFLOAT q_sum = 1., q_ctr = 1.; if ( (mask1_ctr < 0.9) || (mask1_sum < 0.01) || (mask2_ctr < 0.9) || (mask2_sum < 0.01) ) REPORT_ERROR("ERROR: mask1 and/or mask2 are empty!"); q_sum = (mask1_sum > mask2_sum) ? (mask1_sum / mask2_sum) : (mask2_sum / mask1_sum); q_ctr = (mask1_ctr > mask2_ctr) ? (mask1_ctr / mask2_ctr) : (mask2_ctr / mask1_ctr); if ( (q_sum > 1.1) || (q_ctr > 1.1) ) return false; return true; } void truncateMultidimArray( MultidimArray& v, RFLOAT minval, RFLOAT maxval) { RFLOAT val = 0.; if (minval > maxval) REPORT_ERROR("ERROR: minval should be smaller than maxval!"); /* FOR_ALL_DIRECT_ELEMENTS_IN_ARRAY3D(v) { val = DIRECT_A3D_ELEM(v, k, i, j); if (val < minval) DIRECT_A3D_ELEM(v, k, i, j) = minval; if (val > maxval) DIRECT_A3D_ELEM(v, k, i, j) = maxval; } */ FOR_ALL_DIRECT_ELEMENTS_IN_MULTIDIMARRAY(v) { val = DIRECT_MULTIDIM_ELEM(v, n); if (val < minval) DIRECT_MULTIDIM_ELEM(v, n) = minval; if (val > maxval) DIRECT_MULTIDIM_ELEM(v, n) = maxval; } } void Localsym_outputOperator( const Matrix1D& op, std::ostream* o_ptr, RFLOAT scale_angpix) { if (VEC_XSIZE(op) != NR_LOCALSYM_PARAMETERS) REPORT_ERROR("ERROR: op is not a local symmetry operator!"); if (o_ptr == NULL) REPORT_ERROR("ERROR: std::ostream* o_ptr == NULL !"); if (scale_angpix < 0.001) REPORT_ERROR("ERROR: Invalid scale of pixel size!"); // Enable bold fonts in Unix OS #ifdef __unix__ (*o_ptr) << "Angles (rot, tilt, psi) = (" << "\e[1m" << VEC_ELEM(op, AA_POS) << ", " << VEC_ELEM(op, BB_POS) << ", " << VEC_ELEM(op, GG_POS) << "\e[0m" << ") degree(s). Translations (dx, dy, dz) = (" << "\e[1m" << scale_angpix * VEC_ELEM(op, DX_POS) << ", " << scale_angpix * VEC_ELEM(op, DY_POS) << ", " << scale_angpix * VEC_ELEM(op, DZ_POS) << "\e[0m" << ") Angstrom(s)." << std::flush; #else (*o_ptr) << "Angles (rot, tilt, psi) = (" << VEC_ELEM(op, AA_POS) << ", " << VEC_ELEM(op, BB_POS) << ", " << VEC_ELEM(op, GG_POS) << ") degree(s). Translations (dx, dy, dz) = (" << scale_angpix * VEC_ELEM(op, DX_POS) << ", " << scale_angpix * VEC_ELEM(op, DY_POS) << ", " << scale_angpix * VEC_ELEM(op, DZ_POS) << ") Angstrom(s)." << std::flush; #endif } void Localsym_composeOperator( Matrix1D& op, RFLOAT aa, RFLOAT bb, RFLOAT gg, RFLOAT dx, RFLOAT dy, RFLOAT dz, RFLOAT cc) { op.initZeros(NR_LOCALSYM_PARAMETERS); VEC_ELEM(op, AA_POS) = aa; VEC_ELEM(op, BB_POS) = bb; VEC_ELEM(op, GG_POS) = gg; VEC_ELEM(op, DX_POS) = dx; VEC_ELEM(op, DY_POS) = dy; VEC_ELEM(op, DZ_POS) = dz; VEC_ELEM(op, CC_POS) = cc; } void Localsym_decomposeOperator( const Matrix1D& op, RFLOAT& aa, RFLOAT& bb, RFLOAT& gg, RFLOAT& dx, RFLOAT& dy, RFLOAT& dz, RFLOAT& cc) { aa = bb = gg = dx = dy = dz = 0.; cc = (1e10); if (VEC_XSIZE(op) != NR_LOCALSYM_PARAMETERS) REPORT_ERROR("ERROR: op is not a local symmetry operator!"); aa = VEC_ELEM(op, AA_POS); bb = VEC_ELEM(op, BB_POS); gg = VEC_ELEM(op, GG_POS); dx = VEC_ELEM(op, DX_POS); dy = VEC_ELEM(op, DY_POS); dz = VEC_ELEM(op, DZ_POS); cc = VEC_ELEM(op, CC_POS); } void Localsym_scaleTranslations( Matrix1D& op, RFLOAT factor) { if (VEC_XSIZE(op) != NR_LOCALSYM_PARAMETERS) REPORT_ERROR("ERROR: op is not a local symmetry operator!"); VEC_ELEM(op, DX_POS) *= factor; VEC_ELEM(op, DY_POS) *= factor; VEC_ELEM(op, DZ_POS) *= factor; } void Localsym_shiftTranslations( Matrix1D& op, const Matrix1D& voffset) { if (VEC_XSIZE(op) != NR_LOCALSYM_PARAMETERS) REPORT_ERROR("ERROR: op is not a local symmetry operator!"); if (VEC_XSIZE(voffset) != 3) REPORT_ERROR("ERROR: voffset is not a vectorR3!"); VEC_ELEM(op, DX_POS) += XX(voffset); VEC_ELEM(op, DY_POS) += YY(voffset); VEC_ELEM(op, DZ_POS) += ZZ(voffset); } void Localsym_translations2vector( const Matrix1D& vec, Matrix1D& trans_vec, bool invert) { trans_vec.clear(); if (vec.size() != NR_LOCALSYM_PARAMETERS) REPORT_ERROR("ERROR: Syntax error in input vector!"); trans_vec.initZeros(3); XX(trans_vec) = VEC_ELEM(vec, DX_POS); YY(trans_vec) = VEC_ELEM(vec, DY_POS); ZZ(trans_vec) = VEC_ELEM(vec, DZ_POS); if (invert == LOCALSYM_OP_DO_INVERT) { XX(trans_vec) *= -1.; YY(trans_vec) *= -1.; ZZ(trans_vec) *= -1.; } } void Localsym_angles2matrix( const Matrix1D& vec, Matrix2D& mat, bool invert) { RFLOAT aa = 0., bb = 0., gg = 0.; mat.clear(); if (vec.size() != NR_LOCALSYM_PARAMETERS) REPORT_ERROR("ERROR: Syntax error in input vector!"); aa = VEC_ELEM(vec, AA_POS); bb = VEC_ELEM(vec, BB_POS); gg = VEC_ELEM(vec, GG_POS); Euler_angles2matrix(aa, bb, gg, mat); if (invert == LOCALSYM_OP_DO_INVERT) mat = mat.transpose(); mat.resize(4, 4); MAT_ELEM(mat, 3, 3) = 1.; } void Localsym_operator2matrix( const Matrix1D& vec, Matrix2D& mat, bool invert) { RFLOAT aa = 0., bb = 0., gg = 0.; Matrix1D trans_vec; mat.clear(); if (vec.size() != NR_LOCALSYM_PARAMETERS) REPORT_ERROR("ERROR: Syntax error in input vector!"); aa = VEC_ELEM(vec, AA_POS); bb = VEC_ELEM(vec, BB_POS); gg = VEC_ELEM(vec, GG_POS); Euler_angles2matrix(aa, bb, gg, mat); if (invert == LOCALSYM_OP_DO_INVERT) { mat = mat.transpose(); trans_vec.initZeros(3); XX(trans_vec)= (-1.) * VEC_ELEM(vec, DX_POS); YY(trans_vec)= (-1.) * VEC_ELEM(vec, DY_POS); ZZ(trans_vec)= (-1.) * VEC_ELEM(vec, DZ_POS); trans_vec = mat * trans_vec; mat.resize(4, 4); MAT_ELEM(mat, 0, 3) = XX(trans_vec); MAT_ELEM(mat, 1, 3) = YY(trans_vec); MAT_ELEM(mat, 2, 3) = ZZ(trans_vec); } else { mat.resize(4, 4); MAT_ELEM(mat, 0, 3) = VEC_ELEM(vec, DX_POS); MAT_ELEM(mat, 1, 3) = VEC_ELEM(vec, DY_POS); MAT_ELEM(mat, 2, 3) = VEC_ELEM(vec, DZ_POS); } MAT_ELEM(mat, 3, 3) = 1.; } void standardiseEulerAngles( RFLOAT aa_old, RFLOAT bb_old, RFLOAT gg_old, RFLOAT& aa_new, RFLOAT& bb_new, RFLOAT& gg_new) { Matrix2D rot_mat; rot_mat.clear(); // Re-calculate angles so that they follow the conventions in RELION! if ( (ABS(aa_old) > 179.) || (bb_old < 1.) || (bb_old > 179.) || (ABS(gg_old) > 179.) ) { Euler_angles2matrix(aa_old, bb_old, gg_old, rot_mat); Euler_matrix2angles(rot_mat, aa_new, bb_new, gg_new); return; } aa_new = aa_old; bb_new = bb_old; gg_new = gg_old; } bool sameLocalsymOperators( const Matrix1D& lhs, const Matrix1D& rhs) { RFLOAT aa1 = 0., bb1 = 0., gg1 = 0., dx1 = 0., dy1 = 0., dz1 = 0., cc1 = 0.; RFLOAT aa2 = 0., bb2 = 0., gg2 = 0., dx2 = 0., dy2 = 0., dz2 = 0., cc2 = 0.; const RFLOAT eps = (XMIPP_EQUAL_ACCURACY); Localsym_decomposeOperator(lhs, aa1, bb1, gg1, dx1, dy1, dz1, cc1); Localsym_decomposeOperator(rhs, aa2, bb2, gg2, dx2, dy2, dz2, cc2); standardiseEulerAngles(aa1, bb1, gg1, aa1, bb1, gg1); standardiseEulerAngles(aa2, bb2, gg2, aa2, bb2, gg2); if ( (ABS(aa1 - aa2) < eps) && (ABS(bb1 - bb2) < eps) && (ABS(gg1 - gg2) < eps) && (ABS(dx1 - dx2) < eps) && (ABS(dy1 - dy2) < eps) && (ABS(dz1 - dz2) < eps) ) { return true; } return false; } // Parsing only. Don't validate data here. void parseDMFormatMasksAndOperators( FileName fn_in, FileName fn_out) { std::ifstream fin; std::ofstream fout; std::string line; std::vector words; fin.open(fn_in.c_str(), std::ios_base::in); if (fin.fail()) REPORT_ERROR("ERROR: Cannot open file: " + (std::string)(fn_in)); //if (exists(fn_out)) // REPORT_ERROR("ERROR: output file: " + (std::string)(fn_out) + " exists! Please use another file name!"); fout.open(fn_out.c_str(), std::ios_base::out); if (fout.fail()) REPORT_ERROR("ERROR: Cannot open file: " + (std::string)(fn_out)); while (getline(fin, line, '\n')) { tokenize(line, words); // Empty line if (words.size() < 1) continue; // Commented line if (words[0][0] == '#') continue; // Line with mask filename if (words[0] == str_mask_filename) fout << str_new_mask << std::endl; for (int i = 0; i < words.size(); i++) fout << words[i] << " " << std::flush; fout << std::endl; } fout.close(); fin.close(); } void readRelionFormatMasksAndOperators( FileName fn_info, std::vector& fn_mask_list, std::vector > >& ops, RFLOAT angpix, bool verb) { MetaDataTable MD; FileName fn_mask; std::vector > dummy; Matrix1D op, op_i; bool is_maskname_found = false; RFLOAT aa = 0., bb = 0., gg = 0., dx = 0., dy = 0., dz = 0.; // Initialisation fn_mask_list.clear(); ops.clear(); MD.clear(); dummy.clear(); op.clear(); op_i.clear(); if (angpix < 0.001) REPORT_ERROR("ERROR: Pixel size is invalid!"); if (fn_info.getExtension() != "star") REPORT_ERROR("ERROR: " + (std::string)(fn_info) + " is not a STAR file!"); if (verb) std::cout << " Reading list of masks from " << fn_info << "..." << std::endl; MD.read(fn_info); if (MD.numberOfObjects() < 1) REPORT_ERROR("ERROR: STAR file " + (std::string)(fn_info) + " is empty!"); if ( (!MD.containsLabel(EMDL_MASK_NAME)) || (!MD.containsLabel(EMDL_ORIENT_ROT)) || (!MD.containsLabel(EMDL_ORIENT_TILT)) || (!MD.containsLabel(EMDL_ORIENT_PSI)) || (!MD.containsLabel(EMDL_ORIENT_ORIGIN_X_ANGSTROM)) || (!MD.containsLabel(EMDL_ORIENT_ORIGIN_Y_ANGSTROM)) || (!MD.containsLabel(EMDL_ORIENT_ORIGIN_Z_ANGSTROM)) ) REPORT_ERROR("ERROR: You need rlnMaskName, rlnAngleRot, rlnAngleTilt, rlnAnglePsi, rlnOriginXAngst, rlnOriginYAngst and rlnOriginZAngst columns. Some of them are missing in your STAR file " + (std::string)(fn_info) + ". Note that rlnOriginX/Y/Z were changed to rlnOriginX/Y/ZAngst in RELION 3.1. Since the values in the symmetry definition file were in Angstrom from the beginning, please only edit the column names, not values."); // Load mask names FOR_ALL_OBJECTS_IN_METADATA_TABLE(MD) { is_maskname_found = false; MD.getValue(EMDL_MASK_NAME, fn_mask); for (int id_mask = 0; id_mask < fn_mask_list.size(); id_mask++) { if (fn_mask_list[id_mask] == fn_mask) { is_maskname_found = true; break; } } if (!is_maskname_found) fn_mask_list.push_back(fn_mask); } if (fn_mask_list.size() < 1) REPORT_ERROR("ERROR: No mask filenames in " + (std::string)(fn_info) + " !"); // Load all operators op.initZeros(NR_LOCALSYM_PARAMETERS); op_i.initZeros(NR_LOCALSYM_PARAMETERS); for (int id_mask = 0; id_mask < fn_mask_list.size(); id_mask++) { dummy.clear(); if (verb) { std::cout << " * Mask #" << (id_mask + 1) << " = " << fn_mask_list[id_mask] << std::endl; std::cout << " --> Operator #" << int(0) << " = " << std::flush; Localsym_outputOperator(op_i, &std::cout); std::cout << " (the original)" << std::endl; } // Find all operators for this mask FOR_ALL_OBJECTS_IN_METADATA_TABLE(MD) { MD.getValue(EMDL_MASK_NAME, fn_mask); if (fn_mask != fn_mask_list[id_mask]) continue; // Get this operator MD.getValue(EMDL_ORIENT_ROT, aa); MD.getValue(EMDL_ORIENT_TILT, bb); MD.getValue(EMDL_ORIENT_PSI, gg); MD.getValue(EMDL_ORIENT_ORIGIN_X_ANGSTROM, dx); MD.getValue(EMDL_ORIENT_ORIGIN_Y_ANGSTROM, dy); MD.getValue(EMDL_ORIENT_ORIGIN_Z_ANGSTROM, dz); // Re-calculate angles so that they follow the conventions in RELION! standardiseEulerAngles(aa, bb, gg, aa, bb, gg); Localsym_composeOperator(op, aa, bb, gg, dx, dy, dz); // Do nothing if it is an identical operator if (sameLocalsymOperators(op, op_i)) continue; if (verb) { std::cout << " --> Operator #" << (dummy.size() + 1) << " = " << std::flush; Localsym_outputOperator(op, &std::cout); std::cout << std::endl; } Localsym_scaleTranslations(op, 1. / angpix); // Push back the operator dummy.push_back(op); } if (dummy.size() < 1) REPORT_ERROR("ERROR: Please provide at least one non-identical operator for mask file " + fn_mask_list[id_mask] + " !"); ops.push_back(dummy); } // Verify mask filenames and operators (detect duplication) if ((fn_mask_list.size() < 1) || (ops.size() < 1)) REPORT_ERROR("ERROR: number of masks and/or operator lists are zero!"); if (fn_mask_list.size() != ops.size()) REPORT_ERROR("ERROR: number of masks and operator lists do not match!"); // Check mask filenames for (int imask = 0; imask < fn_mask_list.size() - 1; imask++) { for (int jmask = imask + 1; jmask < fn_mask_list.size(); jmask++) { if ( (fn_mask_list[imask].afterLastOf("/").length() > 0) && (fn_mask_list[jmask].afterLastOf("/").length() > 0) && (fn_mask_list[imask].afterLastOf("/") == fn_mask_list[jmask].afterLastOf("/")) ) REPORT_ERROR("ERROR: Ambiguous mask filenames: " + fn_mask_list[imask] + " and " + fn_mask_list[jmask] + " !"); } } // Detect duplicated operators // Identical operators have already been removed for (int imask = 0; imask < fn_mask_list.size(); imask++) { for (int iop = 0; iop < ops[imask].size() - 1; iop++) { for (int jop = iop + 1; jop < ops[imask].size(); jop++) { if (sameLocalsymOperators(ops[imask][iop], ops[imask][jop])) REPORT_ERROR("ERROR: mask filename: " + fn_mask_list[imask] + " contain duplicated operators!"); } } } } void readRelionFormatMasksWithoutOperators( FileName fn_info, std::vector& fn_mask_list, std::vector > >& ops, std::vector >& op_masks, bool all_angular_search_ranges_are_global, bool verb) { FileName fn; MetaDataTable MD; std::vector fns, fns_empty; long int id = 0, ide = 0; std::vector ids; std::vector > op_masks_tmp; Matrix1D op_empty; std::vector > ops_empty; // Initialisation fn_mask_list.clear(); ops.clear(); op_masks.clear(); if (fn_info.getExtension() != "star") REPORT_ERROR("ERROR: " + (std::string)(fn_info) + " is not a STAR file!"); if (verb) std::cout << " Reading list of masks from " << fn_info << "..." << std::endl; MD.clear(); MD.read(fn_info); if ( (MD.numberOfObjects() < 2) || (MD.numberOfObjects() > 999) ) REPORT_ERROR("ERROR: STAR file " + (std::string)(fn_info) + " should have 2~999 entries!"); if ( (!MD.containsLabel(EMDL_MASK_NAME)) || (!MD.containsLabel(EMDL_AREA_ID)) ) REPORT_ERROR("ERROR: Label EMDL_MASK_NAME and/or EMDL_AREA_ID are missing in STAR file " + (std::string)(fn_info) + " !"); if (verb) std::cout << " Reading list of masks for all operators from " << fn_info << "..." << std::endl; // Collect all entries fns.clear(); ids.clear(); FOR_ALL_OBJECTS_IN_METADATA_TABLE(MD) { MD.getValue(EMDL_MASK_NAME, fn); MD.getValue(EMDL_AREA_ID, id); fns.push_back(fn); ids.push_back(id); if (id <= 0) REPORT_ERROR("ERROR: EMDL_AREA_ID is not a positive integer: " + (std::string)(fn)); } // Check whether there exist duplicated mask filenames // fns.size() = id_max for (int ii = 0; ii < fns.size() - 1; ii++) { for (int jj = ii + 1; jj < fns.size(); jj++) { if (fns[ii] == fns[jj]) REPORT_ERROR("ERROR: Duplicated mask filenames have been detected: " + fns[ii]); } } // Initialise empty op_masks_tmp[0 ... id_max] op_masks_tmp.clear(); fns_empty.clear(); for (int ii = 0; ii <= fns.size(); ii++) op_masks_tmp.push_back(fns_empty); // Collect mask filenames according to their IDs for (int ii = 0; ii < fns.size(); ii++) { // 1 <= ids[ii] <= id_max, op_masks_tmp.size() = id_max + 1 if (ids[ii] >= op_masks_tmp.size()) REPORT_ERROR("ERROR: Mask filename contains invalid ID: " + fns[ii]); op_masks_tmp[ids[ii]].push_back(fns[ii]); } // Find the largest area ID (id_e) with mask filenames ide = 0; // op_masks_tmp.size() = id_max + 1 for (int ii = op_masks_tmp.size() - 1; ii >= 1; ii--) { if (op_masks_tmp[ii].size() > 0) { ide = ii; break; } } if (ide <= 0) REPORT_ERROR("ERROR: No masks (this should not happen)!"); // All area IDs 1 ... id_e should be assigned with >= 2 mask filenames for (int ii = 1; ii <= ide; ii++) { if (op_masks_tmp[ii].size() < 2) REPORT_ERROR("ERROR: There should be multiple (>= 2) masks for each set of regions!"); } // Input files are valid. Now output arrays for the program. fn_mask_list.clear(); ops.clear(); op_masks.clear(); Localsym_composeOperator(op_empty, 0., (all_angular_search_ranges_are_global) ? (90.) : (0.), 0.); fns_empty.clear(); ops_empty.clear(); for (int ii = 1; ii <= ide; ii++) { // For each set of N regions: // There is a same mask filename in the local symmetry description file fn_mask_list.push_back(op_masks_tmp[ii][0]); // There is a list of N-1 mask filenames used for global searches op_masks.push_back(fns_empty); // There is a list of N-1 operators in the local symmetry description file ops.push_back(ops_empty); // Fill in N-1 mask filenames and N-1 operators into the arrays for (int jj = 1; jj < op_masks_tmp[ii].size(); jj++) { op_masks[op_masks.size() - 1].push_back(op_masks_tmp[ii][jj]); ops[ops.size() - 1].push_back(op_empty); } } // Screen output if (verb) { op_empty.initZeros(NR_LOCALSYM_PARAMETERS); for (int imask = 0; imask < fn_mask_list.size(); imask++) { std::cout << " * Mask #" << (imask + 1) << " = " << fn_mask_list[imask] << std::endl; std::cout << " --> Operator #" << int(0) << " = " << std::flush; Localsym_outputOperator(op_empty, &std::cout); std::cout << " (the original)" << std::endl; for (int iop = 0; iop < ops[imask].size(); iop++) { std::cout << " --> Operator #" << (iop + 1) << " = " << std::flush; Localsym_outputOperator(ops[imask][iop], &std::cout); std::cout << " (undefined) - from mask " << op_masks[imask][iop] << std::endl; } } } // TODO: this function needs thorough tests!!! } void writeRelionFormatMasksAndOperators( FileName fn_info, const std::vector& fn_mask_list, const std::vector > >& ops, RFLOAT angpix) { MetaDataTable MD; if (fn_info.getExtension() != "star") REPORT_ERROR("ERROR: Output file should have .star extension!"); if (fn_mask_list.size() != ops.size()) REPORT_ERROR("ERROR: number of masks and operator lists do not match!"); if (fn_mask_list.size() < 1) REPORT_ERROR("No masks!"); if (angpix < 0.001) REPORT_ERROR("ERROR: Invalid pixel size!"); MD.clear(); MD.addLabel(EMDL_MASK_NAME); MD.addLabel(EMDL_ORIENT_ROT); MD.addLabel(EMDL_ORIENT_TILT); MD.addLabel(EMDL_ORIENT_PSI); MD.addLabel(EMDL_ORIENT_ORIGIN_X_ANGSTROM); MD.addLabel(EMDL_ORIENT_ORIGIN_Y_ANGSTROM); MD.addLabel(EMDL_ORIENT_ORIGIN_Z_ANGSTROM); for (int imask = 0; imask < fn_mask_list.size(); imask++) { if (ops[imask].size() < 1) REPORT_ERROR("ERROR: no operators for mask: " + fn_mask_list[imask]); for (int iop = 0; iop < ops[imask].size(); iop++) { MD.addObject(); MD.setValue(EMDL_MASK_NAME, fn_mask_list[imask]); MD.setValue(EMDL_ORIENT_ROT, VEC_ELEM(ops[imask][iop], AA_POS)); MD.setValue(EMDL_ORIENT_TILT, VEC_ELEM(ops[imask][iop], BB_POS)); MD.setValue(EMDL_ORIENT_PSI, VEC_ELEM(ops[imask][iop], GG_POS)); MD.setValue(EMDL_ORIENT_ORIGIN_X_ANGSTROM, angpix * VEC_ELEM(ops[imask][iop], DX_POS)); MD.setValue(EMDL_ORIENT_ORIGIN_Y_ANGSTROM, angpix * VEC_ELEM(ops[imask][iop], DY_POS)); MD.setValue(EMDL_ORIENT_ORIGIN_Z_ANGSTROM, angpix * VEC_ELEM(ops[imask][iop], DZ_POS)); } } MD.write(fn_info); } void writeRelionFormatLocalSearchOperatorResults( FileName fn_out, const std::vector >& op_samplings, RFLOAT angpix) { MetaDataTable MD; if (angpix < 0.001) REPORT_ERROR("ERROR: Invalid pixel size!"); if (fn_out.getExtension() != "star") REPORT_ERROR("ERROR: Output file should have .star extension!"); if (op_samplings.size() < 1) REPORT_ERROR("ERROR: No results!"); MD.clear(); MD.addLabel(EMDL_ORIENT_ROT); MD.addLabel(EMDL_ORIENT_TILT); MD.addLabel(EMDL_ORIENT_PSI); MD.addLabel(EMDL_ORIENT_ORIGIN_X_ANGSTROM); MD.addLabel(EMDL_ORIENT_ORIGIN_Y_ANGSTROM); MD.addLabel(EMDL_ORIENT_ORIGIN_Z_ANGSTROM); MD.addLabel(EMDL_IMAGE_WEIGHT); for (int iop = 0; iop < op_samplings.size(); iop++) { if (VEC_XSIZE(op_samplings[iop]) != NR_LOCALSYM_PARAMETERS) REPORT_ERROR("ERROR: syntax errors in results!"); MD.addObject(); MD.setValue(EMDL_ORIENT_ROT, VEC_ELEM(op_samplings[iop], AA_POS)); MD.setValue(EMDL_ORIENT_TILT, VEC_ELEM(op_samplings[iop], BB_POS)); MD.setValue(EMDL_ORIENT_PSI, VEC_ELEM(op_samplings[iop], GG_POS)); MD.setValue(EMDL_ORIENT_ORIGIN_X_ANGSTROM, angpix * VEC_ELEM(op_samplings[iop], DX_POS)); MD.setValue(EMDL_ORIENT_ORIGIN_Y_ANGSTROM, angpix * VEC_ELEM(op_samplings[iop], DY_POS)); MD.setValue(EMDL_ORIENT_ORIGIN_Z_ANGSTROM, angpix * VEC_ELEM(op_samplings[iop], DZ_POS)); MD.setValue(EMDL_IMAGE_WEIGHT, VEC_ELEM(op_samplings[iop], CC_POS)); } MD.write(fn_out); } void readDMFormatMasksAndOperators(FileName fn_info, std::vector& fn_mask_list, std::vector > >& op_list, RFLOAT angpix, bool verb) { // http://www.ccp4.ac.uk/html/rotationmatrices.html std::ifstream fin; std::string line; FileName fn_mask; std::vector words; const std::string str_mask = str_mask_filename; const std::string str_rota = "ROTA "; const std::string str_euler = "ROTA EULER"; const std::string str_polar = "ROTA POLAR"; const std::string str_matrix = "ROTA MATRIX"; const std::string str_omat = "OMAT"; const std::string str_trans = "TRAN"; Matrix1D op, op_i; std::vector > ops; int id_matrix_type = 0; RFLOAT a11 = 0., a12 = 0., a13 = 0., a21 = 0., a22 = 0., a23 = 0., a31 = 0., a32 = 0., a33 = 0.; RFLOAT dx = 0., dy = 0., dz = 0., aa = 0., bb = 0., gg = 0.; // Initialisation fn_mask_list.clear(); op_list.clear(); op.clear(); op_i.clear(); ops.clear(); op.initZeros(NR_LOCALSYM_PARAMETERS); op_i.initZeros(NR_LOCALSYM_PARAMETERS); // Open info file fin.open(fn_info.c_str(), std::ios_base::in); if (fin.fail()) REPORT_ERROR("ERROR: Cannot open file: " + (std::string)(fn_info)); if (verb) std::cout << " Reading list of masks from " << fn_info << "..." << std::endl; while (getline(fin, line, '\n')) { if (line.find(str_new_mask) != std::string::npos) continue; // Mask filename is found if (line.find(str_mask) != std::string::npos) { tokenize(line.substr(str_mask.length() + 1), words); fn_mask = words[0]; if (!exists(fn_mask)) REPORT_ERROR("ERROR: Mask file " + fn_mask + " does not exist!"); fn_mask_list.push_back(fn_mask); if (verb) { std::cout << " * Mask #" << fn_mask_list.size() << " = " << fn_mask << std::endl; std::cout << " --> Operator #" << int(0) << " = " << std::flush; Localsym_outputOperator(op_i, &std::cout); std::cout << " (the original)" << std::endl; } // Get all the operators for this mask ops.clear(); while (getline(fin, line, '\n')) { if (line.find(str_new_mask) != std::string::npos) break; id_matrix_type = 0; if (line.find(str_rota) == std::string::npos) { if (line.find(str_omat) == std::string::npos) REPORT_ERROR("ERROR: Syntax error: Operators of mask file " + fn_mask); else id_matrix_type += OMAT_TYPE; } if (line.find(str_euler) != std::string::npos) id_matrix_type += ROTA_EULER_TYPE; if (line.find(str_polar) != std::string::npos) id_matrix_type += ROTA_POLAR_TYPE; if (line.find(str_matrix) != std::string::npos) id_matrix_type += ROTA_MATRIX_TYPE; if ((id_matrix_type != ROTA_EULER_TYPE) && (id_matrix_type != ROTA_POLAR_TYPE) && (id_matrix_type != ROTA_MATRIX_TYPE) && (id_matrix_type != OMAT_TYPE)) REPORT_ERROR("ERROR: Syntax error: Operators of mask file " + fn_mask); if (id_matrix_type == ROTA_EULER_TYPE) tokenize(line.substr(str_euler.length() + 1), words); else if (id_matrix_type == ROTA_POLAR_TYPE) tokenize(line.substr(str_polar.length() + 1), words); else if (id_matrix_type == ROTA_MATRIX_TYPE) tokenize(line.substr(str_matrix.length() + 1), words); else if (id_matrix_type == OMAT_TYPE) { if (!getline(fin, line, '\n')) // Read a new line REPORT_ERROR("ERROR: Syntax error: Operators of mask file " + fn_mask); tokenize(line, words); } if (words.size() < 3) REPORT_ERROR("ERROR: Syntax error: Operators of mask file " + fn_mask); a11 = textToFloat(words[0]); a12 = textToFloat(words[1]); a13 = textToFloat(words[2]); if ((id_matrix_type == ROTA_MATRIX_TYPE) || (id_matrix_type == OMAT_TYPE)) { if (getline(fin, line, '\n')) { tokenize(line, words); if (words.size() < 3) REPORT_ERROR("ERROR: Syntax error: Operators of mask file " + fn_mask); a21 = textToFloat(words[0]); a22 = textToFloat(words[1]); a23 = textToFloat(words[2]); } else REPORT_ERROR("ERROR: Syntax error: Operators of mask file " + fn_mask); if (getline(fin, line, '\n')) { tokenize(line, words); if (words.size() < 3) REPORT_ERROR("ERROR: Syntax error: Operators of mask file " + fn_mask); a31 = textToFloat(words[0]); a32 = textToFloat(words[1]); a33 = textToFloat(words[2]); } else REPORT_ERROR("ERROR: Syntax error: Operators of mask file " + fn_mask); } if (id_matrix_type == OMAT_TYPE) { if (getline(fin, line, '\n')) { tokenize(line, words); if (words.size() < 3) REPORT_ERROR("ERROR: Syntax error: Operators of mask file " + fn_mask); dx = textToFloat(words[0]); dy = textToFloat(words[1]); dz = textToFloat(words[2]); } else REPORT_ERROR("ERROR: Syntax error: Operators of mask file " + fn_mask); } if (id_matrix_type == ROTA_EULER_TYPE) { standardiseEulerAngles(a11, a12, a13, aa, bb, gg); } else if (id_matrix_type == ROTA_POLAR_TYPE) { // omega, phi, kappa RFLOAT omega = a11, phi = a12, kappa = a13; RFLOAT ll = sin(DEG2RAD(omega)) * cos(DEG2RAD(phi)); RFLOAT mm = sin(DEG2RAD(omega)) * sin(DEG2RAD(phi)); RFLOAT nn = cos(DEG2RAD(omega)); RFLOAT ck = cos(DEG2RAD(kappa)); RFLOAT sk = sin(DEG2RAD(kappa)); a11 = ll * ll + (mm * mm + nn * nn) * ck; a12 = ll * mm * (1. - ck) - nn * sk; a13 = nn * ll * (1. - ck) + mm * sk; a21 = ll * mm * (1. - ck) + nn * sk; a22 = mm * mm + (ll * ll + nn * nn) * ck; a23 = mm * nn * (1. - ck) - ll * sk; a31 = nn * ll * (1. - ck) - mm * sk; a32 = mm * nn * (1. - ck) + ll * sk; a33 = nn * nn + (ll * ll + mm * mm) * ck; } // These three type of operators contain angular matrices if ((id_matrix_type == ROTA_POLAR_TYPE) || (id_matrix_type == ROTA_MATRIX_TYPE) || (id_matrix_type == OMAT_TYPE)) { Matrix2D A; A.resize(3, 3); MAT_ELEM(A, 0, 0) = a11; MAT_ELEM(A, 0, 1) = a12; MAT_ELEM(A, 0, 2) = a13; MAT_ELEM(A, 1, 0) = a21; MAT_ELEM(A, 1, 1) = a22; MAT_ELEM(A, 1, 2) = a23; MAT_ELEM(A, 2, 0) = a31; MAT_ELEM(A, 2, 1) = a32; MAT_ELEM(A, 2, 2) = a33; Euler_matrix2angles(A.transpose(), aa, bb, gg); // TODO: do we need transpose here? } // Read TRANS if ((id_matrix_type == ROTA_EULER_TYPE) || (id_matrix_type == ROTA_POLAR_TYPE) || (id_matrix_type == ROTA_MATRIX_TYPE)) { if (getline(fin, line, '\n') && (line.find(str_trans) != std::string::npos)) { tokenize(line.substr(str_trans.length() + 1), words); if (words.size() < 3) REPORT_ERROR("ERROR: Syntax error: Operators of mask file " + fn_mask); dx = textToFloat(words[0]); dy = textToFloat(words[1]); dz = textToFloat(words[2]); } else REPORT_ERROR("ERROR: Syntax error: Operators of mask file " + fn_mask); } // New matrix has been processed Localsym_composeOperator(op, aa, bb, gg, dx, dy, dz); // Check whether it is an identical operator, then push back if (sameLocalsymOperators(op, op_i)) continue; if (verb) { std::cout << " --> Operator #" << (ops.size() + 1) << " = " << std::flush; Localsym_outputOperator(op, &std::cout); std::cout << std::endl; } Localsym_scaleTranslations(op, 1. / angpix); ops.push_back(op); } // All the operators for this mask are read if (ops.size() < 1) REPORT_ERROR("ERROR: Please provide at least one non-identical operator for mask file " + fn_mask + " !"); op_list.push_back(ops); } else { // Mask filename is not found REPORT_ERROR("ERROR: Syntax error: mask filename is not found! " + fn_info); } } // Verify mask filenames and operators (detect duplication) if ((fn_mask_list.size() < 1) || (op_list.size() < 1)) REPORT_ERROR("ERROR: number of masks and/or operator lists are zero!"); if (fn_mask_list.size() != op_list.size()) REPORT_ERROR("ERROR: number of masks and operator lists do not match!"); // Check mask filenames for (int imask = 0; imask < fn_mask_list.size() - 1; imask++) { for (int jmask = imask + 1; jmask < fn_mask_list.size(); jmask++) { if ( (fn_mask_list[imask].afterLastOf("/").length() > 0) && (fn_mask_list[jmask].afterLastOf("/").length() > 0) && (fn_mask_list[imask].afterLastOf("/") == fn_mask_list[jmask].afterLastOf("/")) ) REPORT_ERROR("ERROR: Ambiguous mask filenames: " + fn_mask_list[imask] + " and " + fn_mask_list[jmask] + " !"); } } // Detect duplicated operators // Identical operators have already been removed for (int imask = 0; imask < fn_mask_list.size(); imask++) { for (int iop = 0; iop < op_list[imask].size() - 1; iop++) { for (int jop = iop + 1; jop < op_list[imask].size(); jop++) { if (sameLocalsymOperators(op_list[imask][iop], op_list[imask][jop])) REPORT_ERROR("ERROR: mask filename: " + fn_mask_list[imask] + " contain duplicated operators!"); } } } #ifdef DEBUG for (int imask = 0; imask < fn_mask_list.size(); imask++) { std::cout << " * Mask #" << (imask + 1) << " = " << fn_mask_list[imask] << std::endl; for (int iop = 0; iop < op_list[imask].size(); iop++) { std::cout << " --> Operator #" << (iop + 1) << " = " << VEC_ELEM(op_list[imask][iop], AA_POS) << ", " << VEC_ELEM(op_list[imask][iop], BB_POS) << ", " << VEC_ELEM(op_list[imask][iop], GG_POS) << "; " << VEC_ELEM(op_list[imask][iop], DX_POS) << ", " << VEC_ELEM(op_list[imask][iop], DY_POS) << ", " << VEC_ELEM(op_list[imask][iop], DZ_POS) << std::endl; } } #endif } void writeDMFormatMasksAndOperators( FileName fn_info, const std::vector& fn_mask_list, const std::vector > >& ops, RFLOAT angpix) { if (fn_info.getExtension() == "star") REPORT_ERROR("ERROR: Output file should not have .star extension!"); if (fn_mask_list.size() != ops.size()) REPORT_ERROR("ERROR: number of masks and operator lists do not match!"); if (fn_mask_list.size() < 1) REPORT_ERROR("No masks!"); if (angpix < 0.001) REPORT_ERROR("ERROR: Invalid pixel size!"); for (int imask = 0; imask < fn_mask_list.size(); imask++) { if (ops[imask].size() < 1) REPORT_ERROR("ERROR: no operators for mask: " + fn_mask_list[imask]); } int str_w = 15; std::ofstream fout; fout.open(fn_info.c_str(), std::ios::out); if (!fout) REPORT_ERROR("ERROR: Cannot write to file: " + fn_info); for (int imask = 0; imask < fn_mask_list.size(); imask++) { fout << std::endl << str_mask_filename << " " << fn_mask_list[imask] << std::endl; for (int iop = 0; iop < ops[imask].size(); iop++) { fout << " ROTA EULER " << std::setiosflags(std::ios::fixed) << std::setw(str_w) << VEC_ELEM(ops[imask][iop], AA_POS) << " " << std::setw(str_w) << VEC_ELEM(ops[imask][iop], BB_POS) << " " << std::setw(str_w) << VEC_ELEM(ops[imask][iop], GG_POS) << std::resetiosflags(std::ios::fixed) << std::endl; fout << " TRAN " << std::setiosflags(std::ios::fixed) << std::setw(str_w) << angpix * VEC_ELEM(ops[imask][iop], DX_POS) << " " << std::setw(str_w) << angpix * VEC_ELEM(ops[imask][iop], DY_POS) << " " << std::setw(str_w) << angpix * VEC_ELEM(ops[imask][iop], DZ_POS) << std::resetiosflags(std::ios::fixed) << std::endl; } } fout.close(); } void duplicateLocalSymmetry( MultidimArray& out_map, const MultidimArray& ori_map, const std::vector fn_masks, const std::vector > > ops, bool duplicate_masks_only) { Image mask; MultidimArray vol1, ori_map_masked; Matrix1D trans_vec; Matrix2D op_mat; out_map.clear(); if ((fn_masks.size() < 1) || (ops.size() < 1)) REPORT_ERROR("ERROR: number of masks and/or operator lists are zero!"); if (fn_masks.size() != ops.size()) REPORT_ERROR("ERROR: number of masks and operator lists do not match!"); // Open the first mask header, or copy original map for initialisation of output map if (duplicate_masks_only) { if (!exists(fn_masks[0])) REPORT_ERROR("ERROR: mask " + std::string(fn_masks[0]) + " does not exist!"); mask.read(fn_masks[0], false); if ((NSIZE(mask()) != 1) || (ZSIZE(mask()) <= 1) || (YSIZE(mask()) <= 1) || (XSIZE(mask()) <= 1)) REPORT_ERROR("ERROR: input mask is not 3D!"); out_map.initZeros(mask()); } else out_map.initZeros(ori_map); vol1.clear(); ori_map_masked.clear(); // Loop over all masks for (int imask = 0; imask < fn_masks.size(); imask++) { // Load this mask if (!exists(fn_masks[imask])) REPORT_ERROR("ERROR: mask " + std::string(fn_masks[imask]) + " does not exist!"); mask.clear(); mask.read(fn_masks[imask]); if ((NSIZE(out_map) != NSIZE(mask())) || (ZSIZE(out_map) != ZSIZE(mask())) || (YSIZE(out_map) != YSIZE(mask())) || (XSIZE(out_map) != XSIZE(mask()))) REPORT_ERROR("ERROR: All masks (and input map) should have the same sizes!"); // Masks and the original map may not have the same origin! mask().copyShape(out_map); // VERY IMPORTANT! // Add this mask (or masked original map) to final result if (duplicate_masks_only) out_map += mask(); else { ori_map_masked = ori_map * mask(); out_map += ori_map_masked; } // Loop over all operators for this mask if (ops[imask].size() < 1) REPORT_ERROR("ERROR: number of operators for mask " + std::string(fn_masks[imask]) + " is less than 1!"); for (int iop = 0; iop < ops[imask].size(); iop++) { #ifdef NEW_APPLY_SYMMETRY_METHOD Localsym_operator2matrix(ops[imask][iop], op_mat); if (duplicate_masks_only) applyGeometry(mask(), vol1, op_mat, IS_NOT_INV, DONT_WRAP); else applyGeometry(ori_map_masked, vol1, op_mat, IS_NOT_INV, DONT_WRAP); #else Localsym_angles2matrix(ops[imask][iop], op_mat); Localsym_translations2vector(ops[imask][iop], trans_vec); if (duplicate_masks_only) applyGeometry(mask(), vol1, op_mat, IS_NOT_INV, DONT_WRAP); else applyGeometry(ori_map_masked, vol1, op_mat, IS_NOT_INV, DONT_WRAP); selfTranslate(vol1, trans_vec, DONT_WRAP); #endif out_map += vol1; } } } void applyLocalSymmetry(MultidimArray& sym_map, const MultidimArray& ori_map, const std::vector fn_masks, const std::vector > > ops, RFLOAT radius, RFLOAT cosine_width_pix) { MultidimArray w, vol1, vol2; Image mask; Matrix1D trans_vec; Matrix2D op_mat; RFLOAT mask_val = 0., sym_val = 0., radius2 = 0., radiusw2 = 0., dist2 = 0., xinit = 0., yinit = 0., zinit = 0.; // Initialise the result sym_map.clear(); if ((NSIZE(ori_map) != 1) || (ZSIZE(ori_map) <= 1) || (YSIZE(ori_map) <= 1) || (XSIZE(ori_map) <= 1)) REPORT_ERROR("ERROR: input unsymmetrised map is not 3D!"); // Support 3D maps which are not cubic // Support 3D maps and masks which do not share the same origins if ( (radius > 0.) && (cosine_width_pix < (XMIPP_EQUAL_ACCURACY)) ) REPORT_ERROR("ERROR: Cosine width should be larger than 0!"); if ((fn_masks.size() < 1) || (ops.size() < 1)) REPORT_ERROR("ERROR: number of masks and/or operator lists are zero!"); if (fn_masks.size() != ops.size()) REPORT_ERROR("ERROR: number of masks and operator lists do not match!"); sym_map.initZeros(ori_map); w.initZeros(ori_map); vol1.clear(); vol2.clear(); // Use vol2 only as the output from 'applyGeometry()' // Loop over all the masks for (int imask = 0; imask < fn_masks.size(); imask++) { vol1 = ori_map; // Loop over all operators for this mask RFLOAT nr_ops = RFLOAT(ops[imask].size()); if (nr_ops < 0.9) REPORT_ERROR("ERROR: number of operators for mask " + std::string(fn_masks[imask]) + " is less than 1!"); for (int iop = 0; iop < ops[imask].size(); iop++) { // A0 op(--rot--> --trans-->) A1 // Now we want A1 op'(?) A0 // Transform A1 to A0 (original) position, then superimpose with the original #ifdef NEW_APPLY_SYMMETRY_METHOD Localsym_operator2matrix(ops[imask][iop], op_mat, LOCALSYM_OP_DO_INVERT); applyGeometry(ori_map, vol2, op_mat, IS_NOT_INV, DONT_WRAP); #else Localsym_translations2vector(ops[imask][iop], trans_vec, LOCALSYM_OP_DO_INVERT); Localsym_angles2matrix(ops[imask][iop], op_mat, LOCALSYM_OP_DO_INVERT); translate(ori_map, vol2, trans_vec, DONT_WRAP); selfApplyGeometry(vol2, op_mat, IS_NOT_INV, DONT_WRAP); #endif vol1 += vol2; } // Load this mask if (!exists(fn_masks[imask])) REPORT_ERROR("ERROR: mask " + std::string(fn_masks[imask]) + " does not exist!"); mask.clear(); mask.read(fn_masks[imask]); if ((NSIZE(ori_map) != NSIZE(mask())) || (ZSIZE(ori_map) != ZSIZE(mask())) || (YSIZE(ori_map) != YSIZE(mask())) || (XSIZE(ori_map) != XSIZE(mask()))) REPORT_ERROR("ERROR: sizes of input and masks do not match!"); // Masks and the original map may not have the same origin! mask().copyShape(ori_map); // VERY IMPORTANT! // 'Vol1' contains one symmetrised subunit, make it into perfect "mask-weighted sum" FOR_ALL_DIRECT_ELEMENTS_IN_ARRAY3D(vol1) { // Get values of mask in every voxel mask_val = DIRECT_A3D_ELEM(mask(), k, i, j); // "weights from mask" - w if ((mask_val < -(XMIPP_EQUAL_ACCURACY)) || ((mask_val - 1.) > (XMIPP_EQUAL_ACCURACY))) REPORT_ERROR("ERROR: mask " + std::string(fn_masks[imask]) + " - values are not in range [0,1]!"); // This voxel is inside the mask if (mask_val > (XMIPP_EQUAL_ACCURACY)) { DIRECT_A3D_ELEM(vol1, k, i, j) *= mask_val / (nr_ops + 1.); // "mask-weighted sum" - wsum } else { // This voxel is not inside the mask DIRECT_A3D_ELEM(vol1, k, i, j) = 0.; } } // Make various copies of vol1 and mask to wsum and w sym_map += vol1; w += mask(); for (int iop = 0; iop < ops[imask].size(); iop++) { #ifdef NEW_APPLY_SYMMETRY_METHOD Localsym_operator2matrix(ops[imask][iop], op_mat); applyGeometry(vol1, vol2, op_mat, IS_NOT_INV, DONT_WRAP); sym_map += vol2; applyGeometry(mask(), vol2, op_mat, IS_NOT_INV, DONT_WRAP); w += vol2; #else Localsym_angles2matrix(ops[imask][iop], op_mat); Localsym_translations2vector(ops[imask][iop], trans_vec); applyGeometry(vol1, vol2, op_mat, IS_NOT_INV, DONT_WRAP); selfTranslate(vol2, trans_vec, DONT_WRAP); sym_map += vol2; applyGeometry(mask(), vol2, op_mat, IS_NOT_INV, DONT_WRAP); selfTranslate(vol2, trans_vec, DONT_WRAP); w += vol2; #endif } // Unload this mask mask.clear(); } vol1.clear(); vol2.clear(); mask.clear(); // TODO: check! please always ensure - free memory space in time! // sym_map and w contain all symmetised subunits (wsum) and mask coefficients (w) needed FOR_ALL_DIRECT_ELEMENTS_IN_ARRAY3D(sym_map) { mask_val = DIRECT_A3D_ELEM(w, k, i, j); // get weights // TODO: check radius2 here! // This voxel is inside one of the masks if (mask_val > (XMIPP_EQUAL_ACCURACY)) // weight > 0 { if ((mask_val - 1.) > (XMIPP_EQUAL_ACCURACY)) // weight > 1 { // ncs = wsum / w DIRECT_A3D_ELEM(sym_map, k, i, j) /= mask_val; } else if ((mask_val - 1.) < (-(XMIPP_EQUAL_ACCURACY))) // 0 < weight < 1 { // ncs = w * (wsum / w) + (1 - w) * ori_val sym_val = DIRECT_A3D_ELEM(sym_map, k, i, j); DIRECT_A3D_ELEM(sym_map, k, i, j) = sym_val + (1. - mask_val) * DIRECT_A3D_ELEM(ori_map, k, i, j); } // weight = 1, ncs = wsum / w, nothing to do... } else { // weight <= 0, ncs = ori_val DIRECT_A3D_ELEM(sym_map, k, i, j) = DIRECT_A3D_ELEM(ori_map, k, i, j); } } if (radius > 0.) { radius2 = radius * radius; radiusw2 = (radius + cosine_width_pix) * (radius + cosine_width_pix); xinit = FIRST_XMIPP_INDEX(XSIZE(sym_map)); yinit = FIRST_XMIPP_INDEX(YSIZE(sym_map)); zinit = FIRST_XMIPP_INDEX(ZSIZE(sym_map)); FOR_ALL_DIRECT_ELEMENTS_IN_ARRAY3D(sym_map) { dist2 = (k + zinit) * (k + zinit) + (i + yinit) * (i + yinit) + (j + xinit) * (j + xinit); if (dist2 > radiusw2) DIRECT_A3D_ELEM(sym_map, k, i, j) = 0.; else if (dist2 > radius2) DIRECT_A3D_ELEM(sym_map, k, i, j) *= 0.5 + 0.5 * cos(PI * (radius + cosine_width_pix - sqrt(dist2)) / cosine_width_pix); } } // Done! } void applyLocalSymmetry( MultidimArray& map, const std::vector fn_masks, const std::vector > > ops, RFLOAT radius, RFLOAT cosine_width_pix) { MultidimArray vol; applyLocalSymmetry(vol, map, fn_masks, ops, radius, cosine_width_pix); map = vol; } void getMinCropSize( MultidimArray& vol, Matrix1D& center, long int& mindim, RFLOAT edge) { RFLOAT val = 0., dist2 = 0., dist2_max = 0.; RFLOAT xori = 0., yori = 0., zori = 0.; Matrix1D new_center; mindim = -1; center.initZeros(3); new_center.initZeros(3); if ((NSIZE(vol) != 1) || (ZSIZE(vol) <= 1) || (YSIZE(vol) <= 1) || (XSIZE(vol) <= 1)) REPORT_ERROR("ERROR: input mask is not 3D!"); vol.setXmippOrigin(); vol.centerOfMass(center); xori = XX(center); yori = YY(center); zori = ZZ(center); dist2_max = -999.; FOR_ALL_ELEMENTS_IN_ARRAY3D(vol) { val = A3D_ELEM(vol, k, i, j); if (val < -(XMIPP_EQUAL_ACCURACY)) REPORT_ERROR("ERROR: all voxels in the input map should have positive values!"); if (val > (XMIPP_EQUAL_ACCURACY)) { dist2 = (RFLOAT(k) - zori) * (RFLOAT(k) - zori) + (RFLOAT(i) - yori) * (RFLOAT(i) - yori) + (RFLOAT(j) - xori) * (RFLOAT(j) - xori); if (dist2 > dist2_max) dist2_max = dist2; } } if (dist2_max < 0.) REPORT_ERROR("ERROR: the input map is empty!"); if (dist2_max > 99999999. * 99999999.) REPORT_ERROR("ERROR: size of the input map is too large (> 99999999)!"); dist2_max = sqrt(dist2_max); if (edge > 0.) dist2_max += edge; mindim = 2 * (long int)(ceil(dist2_max)); // bestdim % 2 = 0 } bool compareOperatorsByCC( const Matrix1D& lhs, const Matrix1D& rhs) { return (VEC_ELEM(lhs, CC_POS) < VEC_ELEM(rhs, CC_POS)); } void getLocalSearchOperatorSamplings( const Matrix1D& op_old, const Matrix1D& op_search_ranges, std::vector >& op_samplings, RFLOAT ang_search_step, RFLOAT trans_search_step, bool use_healpix, bool verb) { RFLOAT aa = 0., bb = 0., gg = 0., dx = 0., dy = 0., dz = 0., cc = 0.; RFLOAT aa_range = 0., bb_range = 0., gg_range = 0., dx_range = 0., dy_range = 0., dz_range = 0.; RFLOAT aa_residue = 0., bb_residue = 0., gg_residue = 0., dx_residue = 0., dy_residue = 0., dz_residue = 0.; RFLOAT aa_init = 0., bb_init = 0., gg_init = 0., dx_init = 0., dy_init = 0., dz_init = 0.;; RFLOAT val = 0., r2 = 0.; long int nr_dir = 0, nr_all_samplings = 0; std::vector aas, bbs, ggs, dxs, dys, dzs; Matrix1D op_tmp; Matrix2D op_mat; HealpixSampling sampling; std::vector pointer_dir_nonzeroprior, pointer_psi_nonzeroprior; op_samplings.clear(); op_tmp.clear(); op_mat.clear(); sampling.clear(); pointer_dir_nonzeroprior.clear(); pointer_psi_nonzeroprior.clear(); if ( (VEC_XSIZE(op_old) != NR_LOCALSYM_PARAMETERS) || (VEC_XSIZE(op_search_ranges) != NR_LOCALSYM_PARAMETERS) ) REPORT_ERROR("ERROR: Input operator contains syntax error!"); if ( (ang_search_step < 0.0001) || (ang_search_step > 30.) ) REPORT_ERROR("ERROR: Angular searching step should be within range (+0.0001, +30.0000) degrees!"); if ( (trans_search_step < 0.0001) || (trans_search_step > 5.) ) REPORT_ERROR("ERROR: Translational searching step should be within range (+0.0001, +5.0000) rescaled / binned pixels!"); Localsym_decomposeOperator(op_old, aa_init, bb_init, gg_init, dx_init, dy_init, dz_init, cc); Localsym_decomposeOperator(op_search_ranges, aa_range, bb_range, gg_range, dx_range, dy_range, dz_range, cc); // Angular searching ranges if (!use_healpix) { //aa_range = ( (aa_range > 180.) || (aa_range < 0.) ) ? (180.) : aa_range; //bb_range = ( (bb_range > 90.) || (bb_range < 0.) ) ? ( 90.) : bb_range; //gg_range = ( (gg_range > 180.) || (gg_range < 0.) ) ? (180.) : gg_range; aa_range = (aa_range > 180.) ? (180.) : aa_range; bb_range = (bb_range > 90.) ? ( 90.) : bb_range; gg_range = (gg_range > 180.) ? (180.) : gg_range; aa_range = (aa_range > 0.) ? aa_range : 0.; bb_range = (bb_range > 0.) ? bb_range : 0.; gg_range = (gg_range > 0.) ? gg_range : 0.; } if ( ( (aa_range < ang_search_step) && (aa_range > XMIPP_EQUAL_ACCURACY) ) || ( (bb_range < ang_search_step) && (bb_range > XMIPP_EQUAL_ACCURACY) ) || ( (gg_range < ang_search_step) && (gg_range > XMIPP_EQUAL_ACCURACY) ) ) REPORT_ERROR("ERROR: Angular searching step should be smaller than its searching range!"); if (!use_healpix) { // aa, bb, gg ranges >= 0, ang_search_step > 0.01 aa_residue = aa_range - ang_search_step * floor(aa_range / ang_search_step); bb_residue = bb_range - ang_search_step * floor(bb_range / ang_search_step); gg_residue = gg_range - ang_search_step * floor(gg_range / ang_search_step); } // Translational searching ranges dx_range = (dx_range > 0.) ? dx_range : 0.; dy_range = (dy_range > 0.) ? dy_range : 0.; dz_range = (dz_range > 0.) ? dz_range : 0.; if ( ( (dx_range < trans_search_step) && (dx_range > XMIPP_EQUAL_ACCURACY) ) || ( (dy_range < trans_search_step) && (dy_range > XMIPP_EQUAL_ACCURACY) ) || ( (dz_range < trans_search_step) && (dz_range > XMIPP_EQUAL_ACCURACY) ) ) REPORT_ERROR("ERROR: Translational searching step should be smaller than its searching range!"); // dx, dy, dz ranges >= 0, ang_search_step > 0.01 dx_residue = dx_range - trans_search_step * floor(dx_range / trans_search_step); dy_residue = dy_range - trans_search_step * floor(dy_range / trans_search_step); dz_residue = dz_range - trans_search_step * floor(dz_range / trans_search_step); if (verb) { //std::cout << " + Local searches of local symmetry operator: Angles (rot, tilt, psi) = (" // << aa_init << ", " << bb_init << ", " << gg_init << ") degree(s), center of mass (x, y, z; cropped, rescaled, binned) = (" // << dx_init << ", " << dy_init << ", " << dz_init << ") pixel(s)..." << std::endl; std::cout << " + Generating sampling points with ranges: Angles (rot, tilt, psi) = +/- (" << aa_range << ", " << bb_range << ", " << gg_range << ") degree(s), center of mass (x, y, z; cropped, rescaled, binned) = +/- (" << dx_range << ", " << dy_range << ", " << dz_range << ") pixel(s)." << std::endl; std::cout << " + Generating sampling points with step sizes: " << ang_search_step << " degree(s), " << trans_search_step << " rescaled (binned) pixel(s)." << std::endl; } // Angular samplings if (use_healpix) { std::vector dummy1, dummy2; pointer_dir_nonzeroprior.clear(); pointer_psi_nonzeroprior.clear(); dummy1.clear(); dummy2.clear(); // Get healpix order and mode int healpix_order = 0, prior_mode = 0; for (healpix_order = 0; healpix_order <= 100; healpix_order++) { if (ang_search_step > (360. / (6. * ROUND(std::pow(2., healpix_order)))) ) break; } if (healpix_order >= 100) REPORT_ERROR("ERROR: healpix_order is larger than 100!"); prior_mode = ((aa_range < 0.) && (bb_range < 0) && (gg_range < 0.)) ? (NOPRIOR) : (PRIOR_ROTTILT_PSI); // Initialise healpix sampling sampling.clear(); sampling.healpix_order = healpix_order; sampling.is_3D = sampling.is_3d_trans = true; sampling.limit_tilt = -91.; // Don't limit tilts sampling.psi_step = 360. / (6. * ROUND(std::pow(2., healpix_order))); sampling.offset_range = sampling.offset_step = 1.; // I don't use Healpix translational samplings sampling.random_perturbation = sampling.perturbation_factor = 0.; // Get all orientations sampling.initialise(3, true, false, false, (prior_mode == NOPRIOR) ? (false) : (true)); sampling.setOrientations(); // Select orientations if (prior_mode == PRIOR_ROTTILT_PSI) { sampling.selectOrientationsWithNonZeroPriorProbability( aa_init, bb_init, gg_init, aa_range, bb_range, gg_range, pointer_dir_nonzeroprior, dummy1, pointer_psi_nonzeroprior, dummy2, false, 1.); } else { // Just push all directions for (int idir = 0; idir < sampling.rot_angles.size(); idir++) pointer_dir_nonzeroprior.push_back(idir); for (int ipsi = 0; ipsi < sampling.psi_angles.size(); ipsi++) pointer_psi_nonzeroprior.push_back(ipsi); } if ( (sampling.rot_angles.size() < 1) || (sampling.tilt_angles.size() < 1) || (sampling.psi_angles.size() < 1) || (sampling.rot_angles.size() != sampling.tilt_angles.size()) ) REPORT_ERROR("ERROR: sampling.rot, tilt, psi_angles.size() are invalid!"); if ( (pointer_dir_nonzeroprior.size() < 1) || (pointer_dir_nonzeroprior.size() > sampling.rot_angles.size()) || (pointer_psi_nonzeroprior.size() < 1) || (pointer_psi_nonzeroprior.size() > sampling.psi_angles.size()) ) REPORT_ERROR("ERROR: pointer_dir_nonzeroprior.size() and/or pointer_psi_nonzeroprior.size() are invalid!"); nr_dir = pointer_dir_nonzeroprior.size() * pointer_psi_nonzeroprior.size(); } else { aas.clear(); bbs.clear(); ggs.clear(); if (aa_range > XMIPP_EQUAL_ACCURACY) { for (val = aa_init + aa_residue - aa_range; val < aa_init + aa_range + XMIPP_EQUAL_ACCURACY; val += ang_search_step) aas.push_back(val); } else aas.push_back(aa_init); if (bb_range > XMIPP_EQUAL_ACCURACY) { for (val = bb_init + bb_residue - bb_range; val < bb_init + bb_range + XMIPP_EQUAL_ACCURACY; val += ang_search_step) bbs.push_back(val); } else bbs.push_back(bb_init); if (gg_range > XMIPP_EQUAL_ACCURACY) { for (val = gg_init + gg_residue - gg_range; val < gg_init + gg_range + XMIPP_EQUAL_ACCURACY; val += ang_search_step) ggs.push_back(val); } else ggs.push_back(gg_init); nr_dir = aas.size() * bbs.size() * ggs.size(); } // Translational samplings dxs.clear(); dys.clear(); dzs.clear(); if (dx_range > XMIPP_EQUAL_ACCURACY) { for (val = dx_residue - dx_range; val < dx_range + XMIPP_EQUAL_ACCURACY; val += trans_search_step) dxs.push_back(val); } else dxs.push_back(0.); if (dy_range > XMIPP_EQUAL_ACCURACY) { for (val = dy_residue - dy_range; val < dy_range + XMIPP_EQUAL_ACCURACY; val += trans_search_step) dys.push_back(val); } else dys.push_back(0.); if (dz_range > XMIPP_EQUAL_ACCURACY) { for (val = dz_residue - dz_range; val < dz_range + XMIPP_EQUAL_ACCURACY; val += trans_search_step) dzs.push_back(val); } else dzs.push_back(0.); #ifdef DEBUG if (verb) { if (use_healpix) { std::cout << " PSI = " << std::flush; for (int ii = 0; ii < pointer_psi_nonzeroprior.size(); ii++) std::cout << sampling.psi_angles[pointer_psi_nonzeroprior[ii]] << ", " << std::flush; } else { std::cout << " ROT = " << std::flush; for (int ii = 0; ii < aas.size(); ii++) std::cout << aas[ii] << ", " << std::flush; std::cout << std::endl << " TILT = " << std::flush; for (int ii = 0; ii < bbs.size(); ii++) std::cout << bbs[ii] << ", " << std::flush; std::cout << std::endl << " PSI = " << std::flush; for (int ii = 0; ii < ggs.size(); ii++) std::cout << ggs[ii] << ", " << std::flush; } std::cout << std::endl << " DX = " << std::flush; for (int ii = 0; ii < dxs.size(); ii++) std::cout << dxs[ii] << ", " << std::flush; std::cout << std::endl << " DY = " << std::flush; for (int ii = 0; ii < dys.size(); ii++) std::cout << dys[ii] << ", " << std::flush; std::cout << std::endl << " DZ = " << std::flush; for (int ii = 0; ii < dzs.size(); ii++) std::cout << dzs[ii] << ", " << std::flush; std::cout << std::endl << " NR_TOTAL_DIR = " << nr_dir << ", NR_TOTAL_TRANS <= " << dxs.size() * dys.size() * dzs.size() << std::endl; } #endif // Get all sampling points op_samplings.clear(); op_tmp.initZeros(NR_LOCALSYM_PARAMETERS); nr_all_samplings = 0; // For translations: op_ori = op_int + op_res if (dx_range < XMIPP_EQUAL_ACCURACY) dx_range = (1e+10); if (dy_range < XMIPP_EQUAL_ACCURACY) dy_range = (1e+10); if (dz_range < XMIPP_EQUAL_ACCURACY) dz_range = (1e+10); for (int idz = 0; idz < dzs.size(); idz++) { for (int idy = 0; idy < dys.size(); idy++) { for (int idx = 0; idx < dxs.size(); idx++) { dz = dzs[idz]; dy = dys[idy]; dx = dxs[idx]; r2 = (dz * dz) / (dz_range * dz_range) + (dy * dy) / (dy_range * dy_range) + (dx * dx) / (dx_range * dx_range); if ( (r2 - XMIPP_EQUAL_ACCURACY) > 1.) continue; if (use_healpix) { for (int idir = 0; idir < pointer_dir_nonzeroprior.size(); idir++) { aa = sampling.rot_angles[pointer_dir_nonzeroprior[idir]]; bb = sampling.tilt_angles[pointer_dir_nonzeroprior[idir]]; for (int ipsi = 0; ipsi < pointer_psi_nonzeroprior.size(); ipsi++) { gg = sampling.psi_angles[pointer_psi_nonzeroprior[ipsi]]; // Re-calculate op_old so that they follow the conventions in RELION! standardiseEulerAngles(aa, bb, gg, aa, bb, gg); Localsym_composeOperator(op_tmp, aa, bb, gg, dx + dx_init, dy + dy_init, dz + dz_init, (1e10)); op_samplings.push_back(op_tmp); nr_all_samplings++; } } } else { for (int iaa = 0; iaa < aas.size(); iaa++) { for (int ibb = 0; ibb < bbs.size(); ibb++) { for (int igg = 0; igg < ggs.size(); igg++) { // Re-calculate op_old so that they follow the conventions in RELION! standardiseEulerAngles(aas[iaa], bbs[ibb], ggs[igg], aa, bb, gg); Localsym_composeOperator(op_tmp, aa, bb, gg, dx + dx_init, dy + dy_init, dz + dz_init, (1e10)); op_samplings.push_back(op_tmp); nr_all_samplings++; } } } } } } } if (verb) { #ifdef __unix__ std::cout << " + Total sampling points = " << "\e[1m" << op_samplings.size() << "\e[0m" << std::flush; #else std::cout << " + Total sampling points = " << op_samplings.size() << std::flush; #endif std::cout << ", calculating cross-correlation (CC) values ..." << std::endl; } if (op_samplings.size() < 1) REPORT_ERROR("ERROR: No sampling points!"); } void calculateOperatorCC( const MultidimArray& src, const MultidimArray& dest, const MultidimArray& mask, std::vector >& op_samplings, bool do_sort, bool verb) { RFLOAT val = 0., mask_val = 0., mask_val_sum = 0., mask_val_ctr = 0., cc = 0.; int barstep = 0, updatebar = 0, totalbar = 0; Matrix2D op_mat; MultidimArray vol; if (op_samplings.size() < 1) REPORT_ERROR("ERROR: No sampling points!"); if ( (!isMultidimArray3DCubic(src)) || (!isMultidimArray3DCubic(dest)) || (!isMultidimArray3DCubic(mask)) ) REPORT_ERROR("ERROR: MultidimArray src, dest, mask should all be 3D cubic!"); if ( (!src.sameShape(dest)) || (!src.sameShape(mask)) ) REPORT_ERROR("ERROR: MultidimArray src, dest, mask should have the same sizes!"); // Check the mask, calculate the sum of mask values sum3DCubicMask(mask, mask_val_sum, mask_val_ctr); if (mask_val_sum < 1.) std::cout << " + WARNING: sum of mask values is smaller than 1! Please check whether it is a correct mask!" << std::endl; // Calculate all CCs if (verb) { //std::cout << " + Calculate CCs for all sampling points ..." << std::endl; init_progress_bar(op_samplings.size()); barstep = op_samplings.size() / 100; updatebar = totalbar = 0; } for (int iop = 0; iop < op_samplings.size(); iop++) { Localsym_operator2matrix(op_samplings[iop], op_mat, LOCALSYM_OP_DO_INVERT); applyGeometry(dest, vol, op_mat, IS_NOT_INV, DONT_WRAP); cc = 0.; FOR_ALL_DIRECT_ELEMENTS_IN_ARRAY3D(vol) { mask_val = DIRECT_A3D_ELEM(mask, k, i, j); if (mask_val < XMIPP_EQUAL_ACCURACY) continue; val = DIRECT_A3D_ELEM(vol, k, i, j) - DIRECT_A3D_ELEM(src, k, i, j); //cc += val * val; cc += mask_val * val * val; // weighted by mask value ? } VEC_ELEM(op_samplings[iop], CC_POS) = sqrt(cc / mask_val_sum); if (verb) { if (updatebar > barstep) { updatebar = 0; progress_bar(totalbar); } updatebar++; totalbar++; } } if (verb) progress_bar(op_samplings.size()); // Sort cc, in descending order if (do_sort) std::stable_sort(op_samplings.begin(), op_samplings.end(), compareOperatorsByCC); } void separateMasksBFS( const FileName& fn_in, const int K, RFLOAT val_thres) { MetaDataTable MD; MultidimArray vol_rec; Image img, img_out; FileName fn_out; RFLOAT x_angpix = 0., y_angpix = 0., z_angpix = 0., float_val = 0.; long int pos_val_ctr = 0, xx = 0, yy = 0, zz = 0; int id = 0, int_val = 0; std::queue > q; Matrix1D vec1; const int K_max = 999; // Check K if ( (K < 2) || (K > K_max) ) REPORT_ERROR("ERROR: number of sub-masks should be at 2~999 !"); if (K > 20) std::cerr << " WARNING: K = " << K << " seems too large!" << std::endl; #ifdef DEBUG std::cout << " K = " << K << std::endl; #endif // Read the header of input map img.read(fn_in); //img().setXmippOrigin(); if ((NSIZE(img()) != 1) || (ZSIZE(img()) <= 10) || (YSIZE(img()) <= 10) || (XSIZE(img()) <= 10)) REPORT_ERROR("ERROR: Image file " + fn_in + " is an invalid 3D map! (< 10 X 10 X 10 pixels)"); if ( (XSIZE(img()) != YSIZE(img())) || (XSIZE(img()) != ZSIZE(img())) ) REPORT_ERROR("ERROR: Image file " + fn_in + " is not a 3D cubic map!"); img.MDMainHeader.getValue(EMDL_IMAGE_SAMPLINGRATE_X, x_angpix); img.MDMainHeader.getValue(EMDL_IMAGE_SAMPLINGRATE_Y, y_angpix); img.MDMainHeader.getValue(EMDL_IMAGE_SAMPLINGRATE_Z, z_angpix); // Initialise vol_rec vol_rec.initZeros(img()); //vol_rec.setXmippOrigin(); // Count voxels with positive values pos_val_ctr = 0; FOR_ALL_DIRECT_ELEMENTS_IN_ARRAY3D(img()) { float_val = DIRECT_A3D_ELEM(img(), k, i, j); //if (val < -(XMIPP_EQUAL_ACCURACY)) // REPORT_ERROR("ERROR: Image file " + fn_in + " contains negative values!"); if (float_val > val_thres) pos_val_ctr++; else DIRECT_A3D_ELEM(vol_rec, k, i, j) = -1; // Mark as invalid! } if (pos_val_ctr <= K) REPORT_ERROR("ERROR: Image file " + fn_in + " has nearly no voxels with positive values!"); #ifdef DEBUG std::cout << " pos_val_ctr = " << pos_val_ctr << std::endl; #endif id = 0; FOR_ALL_DIRECT_ELEMENTS_IN_ARRAY3D(vol_rec) { int_val = DIRECT_A3D_ELEM(vol_rec, k, i, j); if (int_val != 0) continue; id++; #ifdef DEBUG std::cout << " id= " << id << ", kij= " << k << ", " << i << ", " << j << ", " << std::endl; #endif q.push(vectorR3(int(j), int(i), int(k))); DIRECT_A3D_ELEM(vol_rec, k, i, j) = id; while(!q.empty()) { vec1 = q.front(); q.pop(); DIRECT_A3D_ELEM(vol_rec, ZZ(vec1), YY(vec1), XX(vec1)) = id; for (int dz = -1; dz <= 1; dz++) { for (int dy = -1; dy <= 1; dy++) { for (int dx = -1; dx <= 1; dx++) { if ( (dx * dy * dz) != 0) continue; zz = ZZ(vec1) + dz; yy = YY(vec1) + dy; xx = XX(vec1) + dx; if ( (zz < 0) || (zz >= ZSIZE(vol_rec)) || (yy < 0) || (yy >= YSIZE(vol_rec)) || (xx < 0) || (xx >= XSIZE(vol_rec)) ) continue; if (DIRECT_A3D_ELEM(vol_rec, zz, yy, xx) == 0) { q.push(vectorR3(int(xx), int(yy), int(zz))); DIRECT_A3D_ELEM(vol_rec, zz, yy, xx) = id; } } } } } } std::cout << " " << id << " region(s) detected on map " << fn_in << "." << std::endl; if (K != id) { std::cout << " But the number of regions specified is " << K << "! Please check your input map. Exit now with no file output..." << std::endl; #ifndef DEBUG return; #endif } // Write output maps and STAR file MD.clear(); MD.addLabel(EMDL_MASK_NAME); for (int icen = 0; (icen < K) && (icen < id); icen++) { fn_out = fn_in.withoutExtension() + "_sub" + integerToString(icen + 1, 3, '0') + ".mrc"; img_out().initZeros(img()); //img_out().setXmippOrigin(); FOR_ALL_DIRECT_ELEMENTS_IN_ARRAY3D(vol_rec) { if (DIRECT_A3D_ELEM(vol_rec, k, i, j) == (icen + 1) ) DIRECT_A3D_ELEM(img_out(), k, i, j) = 1.; } img_out.setStatisticsInHeader(); img_out.setSamplingRateInHeader(x_angpix, y_angpix, z_angpix); img_out.write(fn_out); MD.addObject(); MD.setValue(EMDL_MASK_NAME, fn_out); } fn_out = fn_in.withoutExtension() + "_masklist.star"; MD.write(fn_out); } /* void separateMasksKMeans( const FileName& fn_in, const int K, int random_seed) { Image img, img_out; std::vector > ocen, ncen; std::vector wcen; std::vector vec_rec; Matrix1D vec; Matrix2D mat; RFLOAT a = 0., b = 0., g = 0., x = 0., y = 0., z = 0., val = 0., dist2 = 0, dist2_min = 0.; RFLOAT x_angpix = 0., y_angpix = 0., z_angpix = 0.; int best_cen = -1, pos_val_ctr = 0; long int cen_ptr = 0; FileName fn_out; MultidimArray vol_rec; const int K_max = 999; int vec_len_max = 1024000, q = 0; // Check K if ( (K < 2) || (K > K_max) ) REPORT_ERROR("ERROR: number of sub-masks should be at 2~999 !"); if (K > 20) std::cerr << " WARNING: K = " << K << " seems too large!" << std::endl; #ifdef DEBUG std::cout << " K = " << K << std::endl; #endif // Initialise arrays for centroids ocen.clear(); ncen.clear(); wcen.clear(); for (int ii = 0; ii < K; ii++) { ocen.push_back(vectorR3(0., 0., 0.)); ncen.push_back(vectorR3(0., 0., 0.)); wcen.push_back(0.); } // Initialise random number generator if (random_seed < 0) random_seed = time(NULL); init_random_generator(random_seed); // Read the header of input map img.read(fn_in); img().setXmippOrigin(); if ((NSIZE(img()) != 1) || (ZSIZE(img()) <= 10) || (YSIZE(img()) <= 10) || (XSIZE(img()) <= 10)) REPORT_ERROR("ERROR: Image file " + fn_in + " is an invalid 3D map! (< 10 X 10 X 10 pixels)"); if ( (XSIZE(img()) != YSIZE(img())) || (XSIZE(img()) != ZSIZE(img())) ) REPORT_ERROR("ERROR: Image file " + fn_in + " is not a 3D cubic map!"); img.MDMainHeader.getValue(EMDL_IMAGE_SAMPLINGRATE_X, x_angpix); img.MDMainHeader.getValue(EMDL_IMAGE_SAMPLINGRATE_Y, y_angpix); img.MDMainHeader.getValue(EMDL_IMAGE_SAMPLINGRATE_Z, z_angpix); // Count voxels with positive values pos_val_ctr = 0; FOR_ALL_DIRECT_ELEMENTS_IN_ARRAY3D(img()) { val = DIRECT_A3D_ELEM(img(), k, i, j); //if (val < -(XMIPP_EQUAL_ACCURACY)) // REPORT_ERROR("ERROR: Image file " + fn_in + " contains negative values!"); if (val > XMIPP_EQUAL_ACCURACY) pos_val_ctr++; } if (pos_val_ctr <= K) REPORT_ERROR("ERROR: Image file " + fn_in + " has nearly no voxels with positive values!"); #ifdef DEBUG std::cout << " pos_val_ctr = " << pos_val_ctr << std::endl; #endif // ???? vol_rec.initZeros(img()); vol_rec.setXmippOrigin(); // Randomly select K centroids vec_rec.clear(); vec_len_max = (vec_len_max >= pos_val_ctr) ? (pos_val_ctr) : (vec_len_max); q = pos_val_ctr / vec_len_max; q = (q <= 1) ? (1) : (q); vec.initZeros(vec_len_max + 1); for (int ii = 1; ii <= K; ii++) // Knuth shuffle { cen_ptr = (long int)(rnd_unif(RFLOAT(ii), RFLOAT(vec_len_max))); cen_ptr = (cen_ptr < ii) ? (ii) : (cen_ptr); cen_ptr = (cen_ptr > vec_len_max) ? (vec_len_max) : (cen_ptr); if (VEC_ELEM(vec, cen_ptr) != 0) vec_rec.push_back(q * VEC_ELEM(vec, cen_ptr)); else vec_rec.push_back(q * cen_ptr); VEC_ELEM(vec, cen_ptr) = ii; } #ifdef DEBUG std::cout << " " << vec_rec.size() << " voxel IDs in total: " << std::flush; for (int ii = 0; ii < vec_rec.size(); ii++) std::cout << vec_rec[ii] << ", " << std::flush; std::cout << std::endl; #endif best_cen = pos_val_ctr = 0; FOR_ALL_ELEMENTS_IN_ARRAY3D(img()) { if (best_cen >= K) break; if (A3D_ELEM(img(), k, i, j) > XMIPP_EQUAL_ACCURACY) { pos_val_ctr++; if (vec_rec[best_cen] == pos_val_ctr) { ocen[best_cen] = vectorR3(RFLOAT(k), RFLOAT(i), RFLOAT(j)); best_cen++; } } } for (int ii = 0; ii < K; ii++) { #ifdef DEBUG std::cout << " Centroid #" << ii + 1 << " : XYZ= " << XX(ocen[ii]) << ", " << YY(ocen[ii]) << ", " << ZZ(ocen[ii]) << std::endl; #endif } //a = rnd_unif(-179., 179.); //b = rnd_unif(1., 179.); //g = rnd_unif(-179., 179.); //Euler_angles2matrix(a, b, g, mat); //for (int ii = 0; ii < K; ii++) //{ // z = RFLOAT(ii) * RFLOAT(ZSIZE(img())) / RFLOAT(K) + RFLOAT(STARTINGZ(img())); // y = rnd_unif(0., YSIZE(img())) + RFLOAT(STARTINGY(img())); // x = rnd_unif(0., XSIZE(img())) + RFLOAT(STARTINGX(img())); // z /= sqrt(3.); y /= sqrt(3.); x /= sqrt(3.); // ocen[ii] = mat * vectorR3(x, y, z); //#ifdef DEBUG // std::cout << " Centroid #" << ii + 1 << " : XYZ= " << XX(ocen[ii]) << ", " << YY(ocen[ii]) << ", " << ZZ(ocen[ii]) << std::endl; //#endif //} // K-means for (int iter = 1; iter <= 100; iter++) { #ifdef DEBUG std::cout << std::endl; #endif FOR_ALL_ELEMENTS_IN_ARRAY3D(img()) { // For voxels with positive values val = A3D_ELEM(img(), k, i, j); if (val < XMIPP_EQUAL_ACCURACY) continue; // Find the smallest distance to one of the centroids dist2_min = 1e+30; best_cen = -1; for (int icen = 0; icen < K; icen++) { z = ZZ(ocen[icen]); y = YY(ocen[icen]); x = XX(ocen[icen]); dist2 = (RFLOAT(k) - z) * (RFLOAT(k) - z) + (RFLOAT(i) - y) * (RFLOAT(i) - y) + (RFLOAT(j) - x) * (RFLOAT(j) - x); if (dist2 < dist2_min) { dist2_min = dist2; best_cen = icen; } } if (best_cen < 0) REPORT_ERROR("ERROR: best_cen < 0 !"); ZZ(ncen[best_cen]) += k * val; YY(ncen[best_cen]) += i * val; XX(ncen[best_cen]) += j * val; wcen[best_cen] += val; A3D_ELEM(vol_rec, k, i, j) = best_cen + 1; } // Update centroids for (int ii = 0; ii < K; ii++) { if (wcen[ii] < XMIPP_EQUAL_ACCURACY) REPORT_ERROR("ERROR: wcen[ii] <= 0 !"); ocen[ii] = ncen[ii] / wcen[ii]; ncen[ii] = vectorR3(0., 0., 0.); wcen[ii] = 0.; #ifdef DEBUG std::cout << " Centroid #" << ii + 1 << " : XYZ= " << XX(ocen[ii]) << ", " << YY(ocen[ii]) << ", " << ZZ(ocen[ii]) << std::endl; #endif } } // Write output maps for (int icen = 0; icen < K; icen++) { fn_out = fn_in.withoutExtension() + "_sub" + integerToString(icen + 1, 3, '0') + ".mrc"; img_out().initZeros(img()); img_out().setXmippOrigin(); FOR_ALL_ELEMENTS_IN_ARRAY3D(vol_rec) { if (A3D_ELEM(vol_rec, k, i, j) == (icen + 1) ) A3D_ELEM(img_out(), k, i, j) = A3D_ELEM(img(), k, i, j); } img_out.MDMainHeader.setValue(EMDL_IMAGE_SAMPLINGRATE_X, x_angpix); img_out.MDMainHeader.setValue(EMDL_IMAGE_SAMPLINGRATE_Y, y_angpix); img_out.MDMainHeader.setValue(EMDL_IMAGE_SAMPLINGRATE_Z, z_angpix); img_out.write(fn_out); } } */ void local_symmetry_parameters::initBoolOptions() { show_usage_for_an_option = false; do_apply_local_symmetry = false; do_duplicate_local_symmetry = false; do_local_search_local_symmetry_ops = false; do_txt2rln = false; do_transform = false; do_debug = false; } void local_symmetry_parameters::clear() { parser.clear(); initBoolOptions(); } void local_symmetry_parameters::displayEmptyLine() { std::cout << "=========================================================================" << std::endl; } void local_symmetry_parameters::usage() { parser.writeUsage(std::cerr); } void local_symmetry_parameters::read(int argc, char **argv) { parser.setCommandLine(argc, argv); int init_section = parser.addSection("Show usage"); show_usage_for_an_option = parser.checkOption("--function_help", "Show usage for the selected function (JUN 30, 2017)"); int options_section = parser.addSection("Options"); do_apply_local_symmetry = parser.checkOption("--apply", "Apply local symmetry to a 3D cryo-EM density map"); do_duplicate_local_symmetry = parser.checkOption("--duplicate", "Duplicate subunits/masks according to local symmetry operators"); do_local_search_local_symmetry_ops = parser.checkOption("--search", "Local searches of local symmetry operators"); do_transform = parser.checkOption("--transform", "Transform a map according to three Euler angles and XYZ translations"); do_txt2rln = parser.checkOption("--txt2rln", "Convert operators from DM to RELION STAR format"); do_debug = parser.checkOption("--debug", "(DEBUG ONLY)"); int params_section = parser.addSection("Parameters (alphabetically ordered)"); angpix_image = textToFloat(parser.getOption("--angpix", "Pixel size (in Angstroms) of input image", "1.")); ang_range = textToFloat(parser.getOption("--ang_range", "Angular search range of operators (in degrees), overwrite rot-tilt-psi ranges if set to positive", "0.")); ang_rot_range = textToFloat(parser.getOption("--ang_rot_range", "Angular (rot) search range of operators (in degrees)", "0.")); ang_tilt_range = textToFloat(parser.getOption("--ang_tilt_range", "Angular (tilt) search range of operators (in degrees)", "0.")); ang_psi_range = textToFloat(parser.getOption("--ang_psi_range", "Angular (psi) search range of operators (in degrees)", "0.")); ang_step = textToFloat(parser.getOption("--ang_step", "Angular search step of operators (in degrees)", "1.")); binning_factor = textToFloat(parser.getOption("--bin", "Binning factor (<= 1 means no binning)", "-1.")); ini_threshold = textToFloat(parser.getOption("--ini_threshold", "Initial threshold for binarization", "0.01")); fn_unsym = parser.getOption("--i_map", "Input 3D unsymmetrised map", ""); fn_info_in = parser.getOption("--i_mask_info", "Input file with mask filenames and rotational / translational operators (for local searches)", "maskinfo.txt"); fn_op_mask_info_in = parser.getOption("--i_op_mask_info", "Input file with mask filenames for all operators (for global searches)", "None"); nr_masks = textToInteger(parser.getOption("--n", "Create this number of masks according to the input density map", "2")); offset_range = textToFloat(parser.getOption("--offset_range", "Translational search range of operators (in Angstroms), overwrite x-y-z ranges if set to positive", "0.")); offset_x_range = textToFloat(parser.getOption("--offset_x_range", "Translational (x) search range of operators (in Angstroms)", "0.")); offset_y_range = textToFloat(parser.getOption("--offset_y_range", "Translational (y) search range of operators (in Angstroms)", "0.")); offset_z_range = textToFloat(parser.getOption("--offset_z_range", "Translational (z) search range of operators (in Angstroms)", "0.")); offset_step = textToFloat(parser.getOption("--offset_step", "Translational search step of operators (in Angstroms)", "1.")); fn_sym = parser.getOption("--o_map", "Output 3D symmetrised map", ""); fn_info_out = parser.getOption("--o_mask_info", "Output file with mask filenames and rotational / translational operators", "maskinfo_refined.txt"); psi = textToFloat(parser.getOption("--psi", "Third Euler angle (psi, in degrees)", "0.")); rot = textToFloat(parser.getOption("--rot", "First Euler angle (rot, in degrees)", "0.")); sphere_percentage = textToFloat(parser.getOption("--sphere_percentage", "Diameter of spherical mask divided by the box size (< 0.99)", "-1.")); tilt = textToFloat(parser.getOption("--tilt", "Second Euler angle (tilt, in degrees)", "0.")); xoff = textToFloat(parser.getOption("--xoff", "X-offset (in Angstroms)", "0.")); yoff = textToFloat(parser.getOption("--yoff", "Y-offset (in Angstroms)", "0.")); zoff = textToFloat(parser.getOption("--zoff", "Z-offset (in Angstroms)", "0.")); verb = parser.checkOption("--verb", "Verbose output?"); int expert_section = parser.addSection("Parameters (expert options - alphabetically ordered)"); fn_mask = parser.getOption("--i_mask", "(DEBUG) Input mask", "mask.mrc"); fn_info_in_parsed_ext = parser.getOption("--i_mask_info_parsed_ext", "Extension of parsed input file with mask filenames and rotational / translational operators", "parsed"); use_healpix_sampling = parser.checkOption("--use_healpix", "Use Healpix for angular samplings?"); width_edge_pix = textToFloat(parser.getOption("--width", "Width of cosine soft edge (in pixels)", "5.")); // Check for errors in the command-line option if (parser.checkForErrors()) REPORT_ERROR("Errors encountered on the command line (see above), exiting..."); }; void local_symmetry_parameters::run() { bool do_sort = true, do_verb = true; FileName fn_parsed, fn_tmp; std::vector fn_mask_list; std::vector > > op_list; fn_mask_list.clear(); op_list.clear(); // Check options int valid_options = 0; valid_options += (do_apply_local_symmetry) ? (1) : (0); valid_options += (do_duplicate_local_symmetry) ? (1) : (0); valid_options += (do_local_search_local_symmetry_ops) ? (1) : (0); valid_options += (do_txt2rln) ? (1) : (0); valid_options += (do_transform) ? (1) : (0); valid_options += (do_debug) ? (1) : (0); if (valid_options <= 0) REPORT_ERROR("Please specify one option!"); if (valid_options > 1) REPORT_ERROR("Only one option can be specified at one time! valid_options = " + integerToString(valid_options)); if (do_apply_local_symmetry) { if (show_usage_for_an_option) { displayEmptyLine(); std::cout << " Apply local symmetry to a 3D cryo-EM density map" << std::endl; std::cout << " USAGE: --apply --angpix 1.34 --i_map unsym.mrc --i_mask_info maskinfo.star --o_map sym.mrc (--sphere_percentage 0.9)" << std::endl; displayEmptyLine(); return; } Image unsym_map, sym_map; if (angpix_image < 0.001) REPORT_ERROR("Invalid pixel size!"); if (sphere_percentage > 0.991) REPORT_ERROR("Diameter of spherical mask divided by the box size should be smaller than 0.99!"); // Parse mask info file if (fn_info_in.getExtension() == "star") { readRelionFormatMasksAndOperators(fn_info_in, fn_mask_list, op_list, angpix_image, do_verb); } else { fn_parsed = fn_info_in + std::string(".") + fn_info_in_parsed_ext; parseDMFormatMasksAndOperators(fn_info_in, fn_parsed); readDMFormatMasksAndOperators(fn_parsed, fn_mask_list, op_list, angpix_image, do_verb); } unsym_map.clear(); unsym_map.read(fn_unsym); //sym_map.clear(); int box_size = ((XSIZE(unsym_map())) < (YSIZE(unsym_map()))) ? (XSIZE(unsym_map())) : (YSIZE(unsym_map())); box_size = (box_size < (ZSIZE(unsym_map()))) ? box_size : (ZSIZE(unsym_map())); applyLocalSymmetry(sym_map(), unsym_map(), fn_mask_list, op_list, (RFLOAT(box_size) * sphere_percentage) / 2., width_edge_pix); sym_map().setXmippOrigin(); sym_map.setSamplingRateInHeader(angpix_image, angpix_image, angpix_image); sym_map.setStatisticsInHeader(); sym_map.write(fn_sym); } else if (do_duplicate_local_symmetry) { if (show_usage_for_an_option) { displayEmptyLine(); std::cout << " Duplicate subunits/masks according to local symmetry operators" << std::endl; std::cout << " USAGE: --duplicate (--i_map unsym.mrc) --angpix 1.34 --i_mask_info maskinfo.txt --o_map duplicated.mrc" << std::endl; std::cout << " Leave '--i_map' empty if you want to duplicate masks only." << std::endl; displayEmptyLine(); return; } Image map_in, map_out; bool duplicate_masks_only = true; if (angpix_image < 0.001) REPORT_ERROR("Invalid pixel size!"); // Parse mask info file if (fn_info_in.getExtension() == "star") { readRelionFormatMasksAndOperators(fn_info_in, fn_mask_list, op_list, angpix_image, do_verb); } else { fn_parsed = fn_info_in + std::string(".") + fn_info_in_parsed_ext; parseDMFormatMasksAndOperators(fn_info_in, fn_parsed); readDMFormatMasksAndOperators(fn_parsed, fn_mask_list, op_list, angpix_image, do_verb); } map_in.clear(); //map_out.clear(); if (exists(fn_unsym)) { duplicate_masks_only = false; map_in.read(fn_unsym); } else duplicate_masks_only = true; duplicateLocalSymmetry(map_out(), map_in(), fn_mask_list, op_list, duplicate_masks_only); map_out().setXmippOrigin(); map_out.setSamplingRateInHeader(angpix_image, angpix_image, angpix_image); map_out.setStatisticsInHeader(); map_out.write(fn_sym); } else if (do_local_search_local_symmetry_ops) { if (show_usage_for_an_option) { displayEmptyLine(); std::cout << " Searches of local symmetry operators" << std::endl; std::cout << " MPI: mpirun -n 23 relion_localsym_mpi ..." << std::endl; std::cout << " USAGE FOR GLOBAL SEARCHES:" << std::endl; std::cout << " --search --i_map unsym.mrc --i_op_mask_info mask_list.star --o_mask_info maskinfo_iter000.star --angpix 1.34 (--bin 2)" << std::endl; std::cout << " --ang_step 5 (--offset_range 2 --offset_step 1)" << std::endl; std::cout << " USAGE FOR LOCAL SEARCHES:" << std::endl; std::cout << " --search --i_map unsym.mrc --i_mask_info maskinfo_iter001.star --o_mask_info maskinfo_iter002.star --angpix 1.34 (--bin 2)" << std::endl; std::cout << " --ang_range 2 (--ang_rot_range 2 --ang_tilt_range 2 --ang_psi_range 2) --ang_step 0.5" << std::endl; std::cout << " --offset_range 2 (--offset_x_range 2 --offset_y_range 2 --offset_z_range 2) --offset_step 1" << std::endl; std::cout << " Ranges/steps of angular and translational searches are in degrees and Angstroms respectively." << std::endl; displayEmptyLine(); return; } // Adapted from local_symmetry_mpi.cpp long int newdim = 0, cropdim = 0, z0 = 0, y0 = 0, x0 = 0, zf = 0, yf = 0, xf = 0; RFLOAT aa = 0, bb = 0, gg = 0., dx = 0., dy = 0., dz = 0., cc = 0., tmp_binning_factor = 1.; RFLOAT mask_sum = 0., mask_ctr = 0., mask2_sum = 0., mask2_ctr = 0.; Image map, mask, mask2; Matrix1D op_search_ranges, op, com0_int, com1_int, com1_float, com1_diff, vecR3; //std::vector fn_mask_list; //std::vector > > op_list; std::vector > op_mask_list; std::vector > op_samplings; MultidimArray src_cropped, dest_cropped, mask_cropped; Matrix2D mat1; FileName fn_searched_op_samplings; //FileName fn_parsed, fn_tmp; map.clear(); mask.clear(); mask2.clear(); op_search_ranges.clear(); op.clear(); com0_int.clear(); com1_int.clear(); com1_float.clear(); com1_diff.clear(); vecR3.clear(); fn_mask_list.clear(); op_list.clear(); op_mask_list.clear(); op_samplings.clear(); src_cropped.clear(); dest_cropped.clear(); mask_cropped.clear(); mat1.clear(); fn_parsed.clear(); fn_tmp.clear(); fn_searched_op_samplings.clear(); displayEmptyLine(); // Master gets search ranges (in degrees and pixels), sets offset_step (in pixels). if (angpix_image < 0.001) REPORT_ERROR("Invalid pixel size!"); if (fn_op_mask_info_in != "None") { if ( (ang_range < (XMIPP_EQUAL_ACCURACY) ) && (ang_rot_range < (XMIPP_EQUAL_ACCURACY) ) && (ang_tilt_range < (XMIPP_EQUAL_ACCURACY) ) && (ang_psi_range < (XMIPP_EQUAL_ACCURACY) ) ) { ang_range = 180.; std::cout << " Initial searches: reset searching ranges of all 3 Euler angles to +/-180 degrees." << std::endl; } else { if (ang_range > (XMIPP_EQUAL_ACCURACY) ) std::cout << " User-defined initial searches: searching ranges of all 3 Euler angles are set to +/-" << ang_range << " degree(s)." << std::endl; else std::cout << " User-defined initial searches: (rot, tilt, psi) ranges are +/- (" << ang_rot_range << ", " << ang_tilt_range << ", " << ang_psi_range << ") degree(s)." << std::endl; } } Localsym_composeOperator( op_search_ranges, (ang_range > (XMIPP_EQUAL_ACCURACY)) ? (ang_range) : (ang_rot_range), (ang_range > (XMIPP_EQUAL_ACCURACY)) ? (ang_range) : (ang_tilt_range), (ang_range > (XMIPP_EQUAL_ACCURACY)) ? (ang_range) : (ang_psi_range), (offset_range > (XMIPP_EQUAL_ACCURACY)) ? (offset_range) : (offset_x_range), (offset_range > (XMIPP_EQUAL_ACCURACY)) ? (offset_range) : (offset_y_range), (offset_range > (XMIPP_EQUAL_ACCURACY)) ? (offset_range) : (offset_z_range) ); Localsym_scaleTranslations(op_search_ranges, 1. / angpix_image); offset_step /= angpix_image; // Master parses and reads mask info file // Local searches if (fn_op_mask_info_in == "None") { if (fn_info_in.getExtension() == "star") { readRelionFormatMasksAndOperators(fn_info_in, fn_mask_list, op_list, angpix_image, true); } else { fn_parsed = fn_info_in + std::string(".") + fn_info_in_parsed_ext; parseDMFormatMasksAndOperators(fn_info_in, fn_parsed); readDMFormatMasksAndOperators(fn_parsed, fn_mask_list, op_list, angpix_image, true); } } else { // Global searches std::cout << " Global searches: option --i_mask_info " << fn_info_in << " is ignored." << std::endl; readRelionFormatMasksWithoutOperators(fn_op_mask_info_in, fn_mask_list, op_list, op_mask_list, (ang_range > 179.99), true); } // Master reads input map std::cout << std::endl << " Pixel size = " << angpix_image << " Angstrom(s)" << std::endl; std::cout << " Read input map " << fn_unsym << " ..." << std::endl; map.read(fn_unsym); map().setXmippOrigin(); if (!isMultidimArray3DCubic(map())) REPORT_ERROR("ERROR: Input map " + fn_unsym + " is not 3D cube!"); // All nodes loop over all masks for (int imask = 0; imask < fn_mask_list.size(); imask++) { displayEmptyLine(); // Master reads and checks the mask std::cout << " Read mask #" << imask + 1 << ": " << fn_mask_list[imask] << " ..." << std::endl; mask.read(fn_mask_list[imask]); mask().setXmippOrigin(); if (!isMultidimArray3DCubic(mask())) REPORT_ERROR("ERROR: Input mask " + fn_mask_list[imask] + " is not 3D cube!"); if (!map().sameShape(mask())) REPORT_ERROR("ERROR: Input map " + fn_unsym + " and mask " + fn_mask_list[imask] + " should have the same size!"); sum3DCubicMask(mask(), mask_sum, mask_ctr); // Get com0 of this mask. Assume that com0 has all integer values! getMinCropSize(mask(), com0_int, cropdim, offset_range / angpix_image); if (cropdim < 2) REPORT_ERROR("ERROR: Mask " + fn_mask_list[imask] + " is too small!"); XX(com0_int) = round(XX(com0_int)); YY(com0_int) = round(YY(com0_int)); ZZ(com0_int) = round(ZZ(com0_int)); std::cout << " Mask #" << imask + 1 << " : center of mass XYZ = (" << XX(com0_int) << ", " << YY(com0_int) << ", " << ZZ(com0_int) << ") pixel(s)."<< std::endl; // Crop the mask and the corresponding region of the map z0 = ROUND(ZZ(com0_int)) + FIRST_XMIPP_INDEX(cropdim); zf = ROUND(ZZ(com0_int)) + LAST_XMIPP_INDEX(cropdim); y0 = ROUND(YY(com0_int)) + FIRST_XMIPP_INDEX(cropdim); yf = ROUND(YY(com0_int)) + LAST_XMIPP_INDEX(cropdim); x0 = ROUND(XX(com0_int)) + FIRST_XMIPP_INDEX(cropdim); xf = ROUND(XX(com0_int)) + LAST_XMIPP_INDEX(cropdim); std::cout << " Mask #" << imask + 1 << " : cropped box size = " << cropdim << " pixels." << std::endl; #ifdef DEBUG std::cout << " Window: x0, y0, z0 = " << x0 << ", " << y0 << ", " << z0 << "; xf, yf, zf = " << xf << ", " << yf << ", " << zf << std::endl; #endif mask().window(mask_cropped, z0, y0, x0, zf, yf, xf); mask_cropped.setXmippOrigin(); map().window(src_cropped, z0, y0, x0, zf, yf, xf); src_cropped.setXmippOrigin(); // Rescale the map and the mask (if binning_factor > 1), set 'newdim'. tmp_binning_factor = 1.; newdim = cropdim; if ((binning_factor - 1.) > XMIPP_EQUAL_ACCURACY) { newdim = (long int)(ceil(RFLOAT(cropdim) / binning_factor)); if (newdim < 2) REPORT_ERROR("ERROR: Binning factor is too large / Mask is too small!"); if ((newdim + 1) < cropdim) // Need rescaling { // Dimension should always be even if (newdim % 2) newdim++; resizeMap(mask_cropped, newdim); mask_cropped.setXmippOrigin(); resizeMap(src_cropped, newdim); src_cropped.setXmippOrigin(); tmp_binning_factor = RFLOAT(cropdim) / RFLOAT(newdim); std::cout << " + Rescale cropped box size from " << cropdim << " to " << newdim << " pixels. Binning factor = " << tmp_binning_factor << std::endl; // Mask values might go out of range after rescaling. Fix it if it happens truncateMultidimArray(mask_cropped, 0., 1.); } else newdim = cropdim; } #ifdef DEBUG std::cout << " newdim= " << newdim << ", cropdim= " << cropdim << std::endl; #endif // All nodes loop over all operators of this mask for (int iop = 0; iop < op_list[imask].size(); iop++) { std::cout << std::endl; // Master gets sampling points com1_float.initZeros(3); com1_int.initZeros(3); com1_diff.initZeros(3); Localsym_decomposeOperator(op_list[imask][iop], aa, bb, gg, dx, dy, dz, cc); if (fn_op_mask_info_in == "None") { // Local searches // Get com1_float. (floating point numbers) // Com1f = R * Com0 + v Euler_angles2matrix(aa, bb, gg, mat1); com1_float = mat1 * com0_int; com1_float += vectorR3(dx, dy, dz); } else { // Global searches // Master reads and checks the mask std::cout << " Read mask #" << imask + 1 << " operator #" << iop + 1 << " : " << op_mask_list[imask][iop] << " ..." << std::endl; mask2.read(op_mask_list[imask][iop]); mask2().setXmippOrigin(); if (!isMultidimArray3DCubic(mask2())) REPORT_ERROR("ERROR: Input mask " + op_mask_list[imask][iop] + " is not 3D cube!"); if (!map().sameShape(mask2())) REPORT_ERROR("ERROR: Input map " + fn_unsym + " and mask " + op_mask_list[imask][iop] + " should have the same size!"); sum3DCubicMask(mask2(), mask2_sum, mask2_ctr); if (!similar3DCubicMasks(mask_sum, mask_ctr, mask2_sum, mask2_ctr)) std::cerr << " WARNING: masks " << fn_mask_list[imask] << " and " << op_mask_list[imask][iop] << " seem different! Please check whether they are covering regions from the same set!" << std::endl; // Calculate Com1f of this mask mask2().centerOfMass(com1_float); std::cout << " Mask #" << imask + 1 << " operator #" << iop + 1 << " : center of mass XYZ = (" << XX(com1_float) << ", " << YY(com1_float) << ", " << ZZ(com1_float) << ") pixel(s)."<< std::endl; } // Get com1_int and com1_diff // diff = Com1f - Com1i XX(com1_int) = round(XX(com1_float)); YY(com1_int) = round(YY(com1_float)); ZZ(com1_int) = round(ZZ(com1_float)); XX(com1_diff) = XX(com1_float) - XX(com1_int); YY(com1_diff) = YY(com1_float) - YY(com1_int); ZZ(com1_diff) = ZZ(com1_float) - ZZ(com1_int); // Crop this region z0 = ROUND(ZZ(com1_int)) + FIRST_XMIPP_INDEX(cropdim); zf = ROUND(ZZ(com1_int)) + LAST_XMIPP_INDEX(cropdim); y0 = ROUND(YY(com1_int)) + FIRST_XMIPP_INDEX(cropdim); yf = ROUND(YY(com1_int)) + LAST_XMIPP_INDEX(cropdim); x0 = ROUND(XX(com1_int)) + FIRST_XMIPP_INDEX(cropdim); xf = ROUND(XX(com1_int)) + LAST_XMIPP_INDEX(cropdim); #ifdef DEBUG std::cout << " Window: x0, y0, z0 = " << x0 << ", " << y0 << ", " << z0 << "; xf, yf, zf = " << xf << ", " << yf << ", " << zf << std::endl; #endif map().window(dest_cropped, z0, y0, x0, zf, yf, xf); dest_cropped.setXmippOrigin(); // Do the same rescaling if (newdim != cropdim) { resizeMap(dest_cropped, newdim); dest_cropped.setXmippOrigin(); } // Master gets sampling points // Get sampling points - Rescale translational search ranges and steps Localsym_composeOperator(op, aa, bb, gg, XX(com1_diff), YY(com1_diff), ZZ(com1_diff), cc); if (newdim != cropdim) { Localsym_scaleTranslations(op_search_ranges, 1. / tmp_binning_factor); offset_step *= 1. / tmp_binning_factor; Localsym_scaleTranslations(op, 1. / tmp_binning_factor); } #ifdef __unix__ std::cout << " + Refining " << "\e[1m" << "Mask #" << imask + 1 << " Operator #" << iop + 1 << "\e[0m" << ": " << std::flush; #else std::cout << " + Refining Mask #" << imask + 1 << " Operator #" << iop + 1 << ": " << std::flush; #endif Localsym_outputOperator(op_list[imask][iop], &std::cout, angpix_image); std::cout << std::endl; getLocalSearchOperatorSamplings( op, op_search_ranges, op_samplings, ang_step, offset_step, use_healpix_sampling, true); if (newdim != cropdim) { Localsym_scaleTranslations(op_search_ranges, tmp_binning_factor); offset_step *= tmp_binning_factor; Localsym_scaleTranslations(op, tmp_binning_factor); } // TODO: test this!!! if (op_samplings.size() <= 0) REPORT_ERROR("ERROR: No sampling points!"); // Calculate all CCs for the sampling points calculateOperatorCC(src_cropped, dest_cropped, mask_cropped, op_samplings, false, do_verb); // TODO: For rescaled maps if (newdim != cropdim) { for (int isamp = 0; isamp < op_samplings.size(); isamp++) Localsym_scaleTranslations(op_samplings[isamp], tmp_binning_factor); } // Now translations are all unscaled. // TODO: add vectors together!!! // Update com1_float for (int isamp = 0; isamp < op_samplings.size(); isamp++) { // Get new_com1 // newCom1f = Com1f + best_trans_samp - diff Localsym_shiftTranslations(op_samplings[isamp], com1_float - com1_diff); // equivalently, com1_int // Update v = newCom1f + ( - newR * com0) Localsym_decomposeOperator(op_samplings[isamp], aa, bb, gg, dx, dy, dz, cc); Euler_angles2matrix(aa, bb, gg, mat1); vecR3 = vectorR3(dx, dy, dz) - mat1 * com0_int; Localsym_composeOperator(op_samplings[isamp], aa, bb, gg, XX(vecR3), YY(vecR3), ZZ(vecR3), cc); } // Master sorts the results std::stable_sort(op_samplings.begin(), op_samplings.end(), compareOperatorsByCC); // Master outputs the local searches results fn_tmp.compose(fn_info_out.withoutExtension() + "_cc_mask", imask + 1, "tmp", 3); // "*_cc_mask001.tmp" fn_tmp = fn_tmp.withoutExtension(); // "*_cc_mask001" fn_searched_op_samplings.compose(fn_tmp + "_op", iop + 1, "star", 3); // "*_cc_mask001_op001.star" writeRelionFormatLocalSearchOperatorResults(fn_searched_op_samplings, op_samplings, angpix_image); std::cout << " + List of sampling points for this local symmetry operator: " << fn_searched_op_samplings << std::endl; // Master updates this operator and do screen output op_list[imask][iop] = op_samplings[0]; std::cout << " + Done! Refined operator: " << std::flush; Localsym_outputOperator(op_samplings[0], &std::cout, angpix_image); std::cout << std::endl; } } // Master writes out new mask info file if (fn_info_out.getExtension() == "star") writeRelionFormatMasksAndOperators(fn_info_out, fn_mask_list, op_list, angpix_image); else writeDMFormatMasksAndOperators(fn_info_out, fn_mask_list, op_list, angpix_image); displayEmptyLine(); #ifdef __unix__ std::cout << " Done! New local symmetry description file: " << "\e[1m" << fn_info_out << "\e[0m" << std::endl; #else std::cout << " Done! New local symmetry description file: " << fn_info_out << std::endl; #endif } else if (do_txt2rln) { if (show_usage_for_an_option) { displayEmptyLine(); std::cout << " Convert operators from DM to RELION STAR format" << std::endl; std::cout << " USAGE: --txt2rln --i_mask_info in.txt --o_mask_info out.star" << std::endl; displayEmptyLine(); return; } if ( (fn_info_in.getExtension() == "star") || (fn_info_out.getExtension() != "star") ) REPORT_ERROR("ERROR: input and output text files should be in plain-text (not .star) and .star formats respectively!"); fn_parsed = fn_info_in + std::string(".") + fn_info_in_parsed_ext; parseDMFormatMasksAndOperators(fn_info_in, fn_parsed); readDMFormatMasksAndOperators(fn_parsed, fn_mask_list, op_list, 1., do_verb); writeRelionFormatMasksAndOperators(fn_info_out, fn_mask_list, op_list, 1.); } else if (do_transform) { if (show_usage_for_an_option) { displayEmptyLine(); std::cout << " Transform a map according to three Euler angles and XYZ translations" << std::endl; std::cout << " USAGE: --transform --angpix 1.34 --i_map in.mrc --o_map out.mrc --rot 5 --tilt 5 --psi 5 --xoff 5 --yoff 5 --zoff 5" << std::endl; displayEmptyLine(); return; } Image img; Matrix2D op_mat; Matrix1D op; img.read(fn_unsym); standardiseEulerAngles(rot, tilt, psi, rot, tilt, psi); Localsym_composeOperator(op, rot, tilt, psi, xoff / angpix_image, yoff / angpix_image, zoff / angpix_image); std::cout << " Pixel size = " << angpix_image << " Angstrom(s)" << std::endl; std::cout << " Transform input map " << fn_unsym << " : " << std::flush; Localsym_outputOperator(op, &std::cout, angpix_image); std::cout << std::endl; Localsym_operator2matrix(op, op_mat, LOCALSYM_OP_DONT_INVERT); selfApplyGeometry(img(), op_mat, IS_NOT_INV, DONT_WRAP); img.write(fn_sym); std::cout << " Done writing " << fn_sym << std::endl; } else if (do_debug) { //separateMasksKMeans(fn_unsym, 4); separateMasksBFS(fn_unsym, nr_masks, ini_threshold); /* Image img1, img2; std::vector > op_samplings; RFLOAT aa = 0., bb = 0., gg = 0., dx = 0., dy = 0., dz = 0.; Matrix2D op_mat1, op_mat2; Matrix1D op_old, op_search_ranges, op_new, trans_vec1, trans_vec2; img1.read(fn_unsym); img2.read(fn_mask); Localsym_composeOperator(op_old, 36., 130., -110., -6., 4., -5.); Localsym_composeOperator(op_search_ranges, 2., 2., 2., 2., 2., 2.); localRefineOneOperator( img1(), img2(), op_old, op_search_ranges, op_samplings, 0.5, 1.); return; aa = 37.6; bb = 129.3; gg = -111.9; dx = -4.87; dy = 5.22; dz = -3.8; Localsym_composeOperator(op_old, aa, bb, gg, dx, dy, dz); Localsym_operator2matrix(op_old, op_mat1); Localsym_operator2matrix(op_old, op_mat2, LOCALSYM_OP_DO_INVERT); img1.read(fn_unsym); img2 = img1; applyGeometry(img1(), img2(), op_mat1, IS_NOT_INV, DONT_WRAP); img2.write(fn_sym); return; */ } else { REPORT_ERROR("Please specify an option!"); } if ( (!show_usage_for_an_option) && (!do_debug) ) { writeCommand("relion_localsym.log", "`which relion_localsym`"); } } void local_symmetry_parameters::writeCommand(FileName fn_cmd, std::string str_executable_name) { std::ofstream ofs; ofs.open(fn_cmd.c_str(), std::ofstream::out | std::ofstream::app); time_t now = time(0); char nodename[64] = "undefined"; gethostname(nodename,sizeof(nodename)); std::string hostname(nodename); ofs << std::endl << " ++++ Executed the following command at host " << hostname << " on " << ctime(&now); ofs << " " << str_executable_name << " " << std::flush; parser.writeCommandLine(ofs); ofs.close(); }; relion-3.1.3/src/local_symmetry.h000066400000000000000000000174261411340063500170200ustar00rootroot00000000000000/*************************************************************************** * * Author: "Sjors H.W. Scheres" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #ifndef LOCAL_SYMMETRY_H_ #define LOCAL_SYMMETRY_H_ #include "src/args.h" #include "src/euler.h" #include "src/funcs.h" #include "src/macros.h" #include "src/matrix1d.h" #include "src/matrix2d.h" #include "src/image.h" #include "src/fftw.h" #include "src/transformations.h" #include "src/healpix_sampling.h" #include "src/time.h" #include // DM (ccp4) operator types // http://www.ccp4.ac.uk/html/rotationmatrices.html #define ROTA_EULER_TYPE 1 #define ROTA_POLAR_TYPE 2 #define ROTA_MATRIX_TYPE 4 #define OMAT_TYPE 8 // Positions of parameters in Matrix1D operators #define AA_POS 0 #define BB_POS 1 #define GG_POS 2 #define DX_POS 3 #define DY_POS 4 #define DZ_POS 5 #define CC_POS 6 #define NR_LOCALSYM_PARAMETERS 7 #define LOCALSYM_OP_DO_INVERT (true) #define LOCALSYM_OP_DONT_INVERT (false) template bool isMultidimArray3DCubic(const MultidimArray& v) { if ( (NSIZE(v) != 1) || (ZSIZE(v) <= 1) || (YSIZE(v) <= 1) || (XSIZE(v) <= 1) || (ZSIZE(v) != YSIZE(v)) || (ZSIZE(v) != XSIZE(v)) || (ZSIZE(v) % 2) ) return false; return true; } void sum3DCubicMask( const MultidimArray v, RFLOAT& val_sum, RFLOAT& val_ctr); bool similar3DCubicMasks( RFLOAT mask1_sum, RFLOAT mask1_ctr, RFLOAT mask2_sum, RFLOAT mask2_ctr); void truncateMultidimArray( MultidimArray& v, RFLOAT minval = 0., RFLOAT maxval = 0.); void Localsym_outputOperator( const Matrix1D& op, std::ostream* o_ptr, RFLOAT scale_angpix = 1.); void Localsym_composeOperator( Matrix1D& op, RFLOAT aa = 0., RFLOAT bb = 0., RFLOAT gg = 0., RFLOAT dx = 0., RFLOAT dy = 0., RFLOAT dz = 0., RFLOAT cc = (1e10)); void Localsym_decomposeOperator( const Matrix1D& op, RFLOAT& aa, RFLOAT& bb, RFLOAT& gg, RFLOAT& dx, RFLOAT& dy, RFLOAT& dz, RFLOAT& cc); void Localsym_scaleTranslations( Matrix1D& op, RFLOAT factor = 1.); void Localsym_shiftTranslations( Matrix1D& op, const Matrix1D& voffset); void Localsym_translations2vector( const Matrix1D& vec, Matrix1D& trans_vec, bool invert = LOCALSYM_OP_DONT_INVERT); void Localsym_angles2matrix( const Matrix1D& vec, Matrix2D& mat, bool invert = LOCALSYM_OP_DONT_INVERT); void Localsym_operator2matrix( const Matrix1D& vec, Matrix2D& mat, bool invert = LOCALSYM_OP_DONT_INVERT); void standardiseEulerAngles( RFLOAT aa_old, RFLOAT bb_old, RFLOAT gg_old, RFLOAT& aa_new, RFLOAT& bb_new, RFLOAT& gg_new); bool sameLocalsymOperators( const Matrix1D& lhs, const Matrix1D& rhs); void parseDMFormatMasksAndOperators( FileName fn_in, FileName fn_out); void readRelionFormatMasksAndOperators( FileName fn_info, std::vector& fn_mask_list, std::vector > >& ops, RFLOAT angpix = 1., bool verb = false); void readRelionFormatMasksWithoutOperators( FileName fn_info, std::vector& fn_mask_list, std::vector > >& ops, std::vector >& op_masks, bool all_angular_search_ranges_are_global = true, bool verb = false); void writeRelionFormatMasksAndOperators( FileName fn_info, const std::vector& fn_mask_list, const std::vector > >& ops, RFLOAT angpix = 1.); void writeRelionFormatLocalSearchOperatorResults( FileName fn_out, const std::vector >& op_samplings, RFLOAT angpix = 1.); void readDMFormatMasksAndOperators( FileName fn_info, std::vector& fn_mask_list, std::vector > >& op_list, RFLOAT angpix = 1., bool verb = false); void writeDMFormatMasksAndOperators( FileName fn_info, const std::vector& fn_mask_list, const std::vector > >& ops, RFLOAT angpix = 1.); void duplicateLocalSymmetry( MultidimArray& out_map, const MultidimArray& ori_map, const std::vector fn_masks, const std::vector > > ops, bool duplicate_masks_only = false); void applyLocalSymmetry( MultidimArray& sym_map, const MultidimArray& ori_map, const std::vector fn_masks, const std::vector > > ops, RFLOAT radius = -1., RFLOAT cosine_width_pix = 5.); void applyLocalSymmetry( MultidimArray& map, const std::vector fn_masks, const std::vector > > ops, RFLOAT radius = -1., RFLOAT cosine_width_pix = 5.); void getMinCropSize( MultidimArray& vol, Matrix1D& center, long int& mindim, RFLOAT edge = 0.); bool compareOperatorsByCC( const Matrix1D& lhs, const Matrix1D& rhs); void getLocalSearchOperatorSamplings( const Matrix1D& op_old, const Matrix1D& op_search_ranges, std::vector >& op_samplings, RFLOAT ang_search_step = 1., RFLOAT trans_search_step = 1., bool use_healpix = false, bool verb = true); void calculateOperatorCC( const MultidimArray& src, const MultidimArray& dest, const MultidimArray& mask, std::vector >& op_samplings, bool do_sort = true, bool verb = true); void separateMasksBFS( const FileName& fn_in, const int K = 2, RFLOAT val_thres = XMIPP_EQUAL_ACCURACY); /* void separateMasksKMeans( const FileName& fn_in, const int K = 2, int random_seed = -1); */ class local_symmetry_parameters { public: IOParser parser; // Available options // PLEASE MAKE SURE THAT ALL THESE OPTIONS ARE INITIALISED IN THE PARSING STEP! // ---------------------------------------- bool show_usage_for_an_option; bool do_apply_local_symmetry; bool do_duplicate_local_symmetry; bool do_local_search_local_symmetry_ops; bool do_txt2rln; bool do_transform; bool do_debug; FileName fn_unsym, fn_sym, fn_mask; // Input file with mask filenames and rotational / translational operators FileName fn_info_in, fn_op_mask_info_in, fn_info_out, fn_info_in_parsed_ext; // Manually reset pixel size (in Angstroms) RFLOAT angpix_image; // Local searches of local symmetry operators RFLOAT ang_rot_range, ang_tilt_range, ang_psi_range, ang_range, ang_step; RFLOAT offset_x_range, offset_y_range, offset_z_range, offset_range, offset_step; RFLOAT rot, tilt, psi, xoff, yoff, zoff; RFLOAT binning_factor; // Width of soft edge RFLOAT width_edge_pix; // % of box size as the 2D / 3D spherical mask RFLOAT sphere_percentage; int nr_masks; RFLOAT ini_threshold; bool use_healpix_sampling; // Verbose output? bool verb; void initBoolOptions(); void clear(); void displayEmptyLine(); void usage(); void read(int argc, char **argv); void run(); void writeCommand(FileName fn_cmd, std::string str_executable_name); local_symmetry_parameters() { clear(); }; ~local_symmetry_parameters() { clear(); }; }; #endif /* LOCAL_SYMMETRY_H_ */ relion-3.1.3/src/local_symmetry_mpi.cpp000066400000000000000000000523351411340063500202160ustar00rootroot00000000000000#include "src/local_symmetry_mpi.h" //#define DEBUG void local_symmetry_parameters_mpi::read(int argc, char **argv) { // Define a new MpiNode node = new MpiNode(argc, argv); // First read in non-parallelisation-dependent variables local_symmetry_parameters::read(argc, argv); // Don't put any output to screen for mpi followers verb = (node->isLeader()) ? 1 : 0; // Possibly also read parallelisation-dependent variables here // Print out MPI info printMpiNodesMachineNames(*node); } void local_symmetry_parameters_mpi::run() { int nr_masks = 0, nr_ops = 0, nr_total_samplings = 0; long int newdim = 0, cropdim = 0, z0 = 0, y0 = 0, x0 = 0, zf = 0, yf = 0, xf = 0, first = 0, last = 0; RFLOAT aa = 0, bb = 0, gg = 0., dx = 0., dy = 0., dz = 0., cc = 0., tmp_binning_factor = 1.; RFLOAT mask_sum = 0., mask_ctr = 0., mask2_sum = 0., mask2_ctr = 0.; Image map, mask, mask2; Matrix1D op_search_ranges, op, com0_int, com1_int, com1_float, com1_diff, vecR3; std::vector fn_mask_list; std::vector > > op_list; std::vector > op_mask_list; std::vector > op_samplings, op_samplings_batch; MultidimArray op_samplings_batch_packed, src_cropped, dest_cropped, mask_cropped; Matrix2D mat1; FileName fn_parsed, fn_tmp, fn_searched_op_samplings; map.clear(); mask.clear(); mask2.clear(); op_search_ranges.clear(); op.clear(); com0_int.clear(); com1_int.clear(); com1_float.clear(); com1_diff.clear(); vecR3.clear(); fn_mask_list.clear(); op_list.clear(); op_mask_list.clear(); op_samplings.clear(); op_samplings_batch.clear(); op_samplings_batch_packed.clear(); src_cropped.clear(); dest_cropped.clear(); mask_cropped.clear(); mat1.clear(); fn_parsed.clear(); fn_tmp.clear(); fn_searched_op_samplings.clear(); // Check options if ( (do_apply_local_symmetry) || (do_duplicate_local_symmetry) || (do_txt2rln) || (do_transform) || (do_debug) || (!do_local_search_local_symmetry_ops) ) REPORT_ERROR("ERROR: Please specify '--search' as the only option! For other options use non-parallel version (without '_mpi') instead!"); // Leader writes out commands if ( (!show_usage_for_an_option) && (!do_debug) && (node->isLeader()) ) { local_symmetry_parameters::writeCommand("relion_localsym.log", "mpirun -n " + integerToString(node->size) + " `which relion_localsym_mpi`"); } MPI_Barrier(MPI_COMM_WORLD); if (node->isLeader()) { displayEmptyLine(); #ifdef DEBUG std::cout << " DEBUG: relion_localsym_mpi is running ..." << std::endl; #endif // Leader gets search ranges (in degrees and pixels), sets offset_step (in pixels). if (angpix_image < 0.001) REPORT_ERROR("Invalid pixel size!"); if (fn_op_mask_info_in != "None") { if ( (ang_range < (XMIPP_EQUAL_ACCURACY) ) && (ang_rot_range < (XMIPP_EQUAL_ACCURACY) ) && (ang_tilt_range < (XMIPP_EQUAL_ACCURACY) ) && (ang_psi_range < (XMIPP_EQUAL_ACCURACY) ) ) { ang_range = 180.; std::cout << " Initial searches: reset searching ranges of all 3 Euler angles to +/-180 degrees." << std::endl; } else { if (ang_range > (XMIPP_EQUAL_ACCURACY) ) std::cout << " User-defined initial searches: searching ranges of all 3 Euler angles are set to +/-" << ang_range << " degree(s)." << std::endl; else std::cout << " User-defined initial searches: (rot, tilt, psi) ranges are +/- (" << ang_rot_range << ", " << ang_tilt_range << ", " << ang_psi_range << ") degree(s)." << std::endl; } } Localsym_composeOperator( op_search_ranges, (ang_range > (XMIPP_EQUAL_ACCURACY)) ? (ang_range) : (ang_rot_range), (ang_range > (XMIPP_EQUAL_ACCURACY)) ? (ang_range) : (ang_tilt_range), (ang_range > (XMIPP_EQUAL_ACCURACY)) ? (ang_range) : (ang_psi_range), (offset_range > (XMIPP_EQUAL_ACCURACY)) ? (offset_range) : (offset_x_range), (offset_range > (XMIPP_EQUAL_ACCURACY)) ? (offset_range) : (offset_y_range), (offset_range > (XMIPP_EQUAL_ACCURACY)) ? (offset_range) : (offset_z_range) ); Localsym_scaleTranslations(op_search_ranges, 1. / angpix_image); offset_step /= angpix_image; // Leader parses and reads mask info file // Local searches if (fn_op_mask_info_in == "None") { if (fn_info_in.getExtension() == "star") { readRelionFormatMasksAndOperators(fn_info_in, fn_mask_list, op_list, angpix_image, true); } else { fn_parsed = fn_info_in + std::string(".") + fn_info_in_parsed_ext; parseDMFormatMasksAndOperators(fn_info_in, fn_parsed); readDMFormatMasksAndOperators(fn_parsed, fn_mask_list, op_list, angpix_image, true); } } else { // Global searches std::cout << " Global searches: option --i_mask_info " << fn_info_in << " is ignored." << std::endl; readRelionFormatMasksWithoutOperators(fn_op_mask_info_in, fn_mask_list, op_list, op_mask_list, (ang_range > 179.99), true); } // Leader set total number of masks nr_masks = fn_mask_list.size(); // Leader reads input map std::cout << std::endl << " Pixel size = " << angpix_image << " Angstrom(s)" << std::endl; std::cout << " Read input map " << fn_unsym << " ..." << std::endl; map.read(fn_unsym); map().setXmippOrigin(); if (!isMultidimArray3DCubic(map())) REPORT_ERROR("ERROR: Input map " + fn_unsym + " is not 3D cube!"); #ifdef DEBUG std::cout << " I am leader. The nxyzdim of map() is " << map().nzyxdim << std::endl; #endif } MPI_Barrier(MPI_COMM_WORLD); // Leader broadcasts total number of masks node->relion_MPI_Bcast(&nr_masks, 1, MPI_INT, 0, MPI_COMM_WORLD); // All nodes loop over all masks for (int imask = 0; imask < nr_masks; imask++) { MPI_Barrier(MPI_COMM_WORLD); if (node->isLeader()) { displayEmptyLine(); // Leader reads and checks the mask std::cout << " Read mask #" << imask + 1 << ": " << fn_mask_list[imask] << " ..." << std::endl; mask.read(fn_mask_list[imask]); mask().setXmippOrigin(); if (!isMultidimArray3DCubic(mask())) REPORT_ERROR("ERROR: Input mask " + fn_mask_list[imask] + " is not 3D cube!"); if (!map().sameShape(mask())) REPORT_ERROR("ERROR: Input map " + fn_unsym + " and mask " + fn_mask_list[imask] + " should have the same size!"); sum3DCubicMask(mask(), mask_sum, mask_ctr); // Get com0 of this mask. Assume that com0 has all integer values! getMinCropSize(mask(), com0_int, cropdim, offset_range / angpix_image); if (cropdim < 2) REPORT_ERROR("ERROR: Mask " + fn_mask_list[imask] + " is too small!"); XX(com0_int) = round(XX(com0_int)); YY(com0_int) = round(YY(com0_int)); ZZ(com0_int) = round(ZZ(com0_int)); std::cout << " Mask #" << imask + 1 << " : center of mass XYZ = (" << XX(com0_int) << ", " << YY(com0_int) << ", " << ZZ(com0_int) << ") pixel(s)."<< std::endl; // Crop the mask and the corresponding region of the map z0 = ROUND(ZZ(com0_int)) + FIRST_XMIPP_INDEX(cropdim); zf = ROUND(ZZ(com0_int)) + LAST_XMIPP_INDEX(cropdim); y0 = ROUND(YY(com0_int)) + FIRST_XMIPP_INDEX(cropdim); yf = ROUND(YY(com0_int)) + LAST_XMIPP_INDEX(cropdim); x0 = ROUND(XX(com0_int)) + FIRST_XMIPP_INDEX(cropdim); xf = ROUND(XX(com0_int)) + LAST_XMIPP_INDEX(cropdim); std::cout << " Mask #" << imask + 1 << " : cropped box size = " << cropdim << " pixels." << std::endl; #ifdef DEBUG std::cout << " Window: x0, y0, z0 = " << x0 << ", " << y0 << ", " << z0 << "; xf, yf, zf = " << xf << ", " << yf << ", " << zf << std::endl; #endif mask().window(mask_cropped, z0, y0, x0, zf, yf, xf); mask_cropped.setXmippOrigin(); map().window(src_cropped, z0, y0, x0, zf, yf, xf); src_cropped.setXmippOrigin(); // Rescale the map and the mask (if binning_factor > 1), set 'newdim'. tmp_binning_factor = 1.; newdim = cropdim; if ((binning_factor - 1.) > XMIPP_EQUAL_ACCURACY) { newdim = (long int)(ceil(RFLOAT(cropdim) / binning_factor)); if (newdim < 2) REPORT_ERROR("ERROR: Binning factor is too large / Mask is too small!"); if ((newdim + 1) < cropdim) // Need rescaling { // Dimension should always be even if (newdim % 2) newdim++; resizeMap(mask_cropped, newdim); mask_cropped.setXmippOrigin(); resizeMap(src_cropped, newdim); src_cropped.setXmippOrigin(); tmp_binning_factor = RFLOAT(cropdim) / RFLOAT(newdim); std::cout << " + Rescale cropped box size from " << cropdim << " to " << newdim << " pixels. Binning factor = " << tmp_binning_factor << std::endl; // Mask values might go out of range after rescaling. Fix it if it happens truncateMultidimArray(mask_cropped, 0., 1.); } else newdim = cropdim; } #ifdef DEBUG std::cout << " newdim= " << newdim << ", cropdim= " << cropdim << std::endl; #endif } MPI_Barrier(MPI_COMM_WORLD); node->relion_MPI_Bcast(&newdim, 1, MPI_LONG, 0, MPI_COMM_WORLD); MPI_Barrier(MPI_COMM_WORLD); // Follower allocate space for MultidimArray // TODO: check whether the space is allocated and the map is read and successfully broadcast!!! if (! node->isLeader()) { mask_cropped.clear(); mask_cropped.initZeros(1, newdim, newdim, newdim); mask_cropped.setXmippOrigin(); src_cropped.clear(); src_cropped.initZeros(1, newdim, newdim, newdim); src_cropped.setXmippOrigin(); dest_cropped.clear(); dest_cropped.initZeros(1, newdim, newdim, newdim); dest_cropped.setXmippOrigin(); #ifdef DEBUG if (node->rank == 1) { std::cout << " I am node rank 1. The nxyzdim of mask_cropped is " << mask_cropped.nzyxdim << std::endl; std::cout << " I am node rank 1. The nxyzdim of src_cropped is " << src_cropped.nzyxdim << std::endl; std::cout << " I am node rank 1. The nxyzdim of dest_cropped is " << dest_cropped.nzyxdim << std::endl; } #endif } MPI_Barrier(MPI_COMM_WORLD); // Leader broadcasts the mask to all nodes #ifdef DEBUG if (node->isLeader()) std::cout << " Leader is broadcasting cropped masked region #" << (imask + 1) << " ..." << std::endl; #endif node->relion_MPI_Bcast(MULTIDIM_ARRAY(mask_cropped), MULTIDIM_SIZE(mask_cropped), MY_MPI_DOUBLE, 0, MPI_COMM_WORLD); node->relion_MPI_Bcast(MULTIDIM_ARRAY(src_cropped), MULTIDIM_SIZE(src_cropped), MY_MPI_DOUBLE, 0, MPI_COMM_WORLD); #ifdef DEBUG if (node->isLeader()) std::cout << " Leader has completed broadcasting cropped masked region #" << (imask + 1) << "." << std::endl; #endif // Leader reads total number of operators for this mask if (node->isLeader()) { nr_ops = op_list[imask].size(); #ifdef DEBUG std::cout << " nr_ops= " << nr_ops << std::endl; #endif } MPI_Barrier(MPI_COMM_WORLD); // Leader broadcasts total number of operators for this mask to all followers node->relion_MPI_Bcast(&nr_ops, 1, MPI_INT, 0, MPI_COMM_WORLD); // All nodes loop over all operators of this mask for (int iop = 0; iop < nr_ops; iop++) { MPI_Barrier(MPI_COMM_WORLD); // Leader gets sampling points if (node->isLeader()) { std::cout << std::endl; com1_float.initZeros(3); com1_int.initZeros(3); com1_diff.initZeros(3); Localsym_decomposeOperator(op_list[imask][iop], aa, bb, gg, dx, dy, dz, cc); if (fn_op_mask_info_in == "None") { // Local searches // Get com1_float. (floating point numbers) // Com1f = R * Com0 + v Euler_angles2matrix(aa, bb, gg, mat1); com1_float = mat1 * com0_int; com1_float += vectorR3(dx, dy, dz); } else { // Global searches // Leader reads and checks the mask std::cout << " Read mask #" << imask + 1 << " operator #" << iop + 1 << " : " << op_mask_list[imask][iop] << " ..." << std::endl; mask2.read(op_mask_list[imask][iop]); mask2().setXmippOrigin(); if (!isMultidimArray3DCubic(mask2())) REPORT_ERROR("ERROR: Input mask " + op_mask_list[imask][iop] + " is not 3D cube!"); if (!map().sameShape(mask2())) REPORT_ERROR("ERROR: Input map " + fn_unsym + " and mask " + op_mask_list[imask][iop] + " should have the same size!"); sum3DCubicMask(mask2(), mask2_sum, mask2_ctr); if (!similar3DCubicMasks(mask_sum, mask_ctr, mask2_sum, mask2_ctr)) std::cerr << " WARNING: masks " << fn_mask_list[imask] << " and " << op_mask_list[imask][iop] << " seem different! Please check whether they are covering regions from the same set!" << std::endl; // Calculate Com1f of this mask mask2().centerOfMass(com1_float); std::cout << " Mask #" << imask + 1 << " operator #" << iop + 1 << " : center of mass XYZ = (" << XX(com1_float) << ", " << YY(com1_float) << ", " << ZZ(com1_float) << ") pixel(s)."<< std::endl; } // Get com1_int and com1_diff // diff = Com1f - Com1i XX(com1_int) = round(XX(com1_float)); YY(com1_int) = round(YY(com1_float)); ZZ(com1_int) = round(ZZ(com1_float)); XX(com1_diff) = XX(com1_float) - XX(com1_int); YY(com1_diff) = YY(com1_float) - YY(com1_int); ZZ(com1_diff) = ZZ(com1_float) - ZZ(com1_int); // Crop this region z0 = ROUND(ZZ(com1_int)) + FIRST_XMIPP_INDEX(cropdim); zf = ROUND(ZZ(com1_int)) + LAST_XMIPP_INDEX(cropdim); y0 = ROUND(YY(com1_int)) + FIRST_XMIPP_INDEX(cropdim); yf = ROUND(YY(com1_int)) + LAST_XMIPP_INDEX(cropdim); x0 = ROUND(XX(com1_int)) + FIRST_XMIPP_INDEX(cropdim); xf = ROUND(XX(com1_int)) + LAST_XMIPP_INDEX(cropdim); #ifdef DEBUG std::cout << " Window: x0, y0, z0 = " << x0 << ", " << y0 << ", " << z0 << "; xf, yf, zf = " << xf << ", " << yf << ", " << zf << std::endl; #endif map().window(dest_cropped, z0, y0, x0, zf, yf, xf); dest_cropped.setXmippOrigin(); // Do the same rescaling if (newdim != cropdim) { resizeMap(dest_cropped, newdim); dest_cropped.setXmippOrigin(); } // Leader gets sampling points // Get sampling points - Rescale translational search ranges and steps Localsym_composeOperator(op, aa, bb, gg, XX(com1_diff), YY(com1_diff), ZZ(com1_diff), cc); if (newdim != cropdim) { Localsym_scaleTranslations(op_search_ranges, 1. / tmp_binning_factor); offset_step *= 1. / tmp_binning_factor; Localsym_scaleTranslations(op, 1. / tmp_binning_factor); } #ifdef __unix__ std::cout << " + Refining " << "\e[1m" << "Mask #" << imask + 1 << " Operator #" << iop + 1 << "\e[0m" << ": " << std::flush; #else std::cout << " + Refining Mask #" << imask + 1 << " Operator #" << iop + 1 << ": " << std::flush; #endif Localsym_outputOperator(op_list[imask][iop], &std::cout, angpix_image); std::cout << std::endl; getLocalSearchOperatorSamplings( op, op_search_ranges, op_samplings, ang_step, offset_step, use_healpix_sampling, true); if (newdim != cropdim) { Localsym_scaleTranslations(op_search_ranges, tmp_binning_factor); offset_step *= tmp_binning_factor; Localsym_scaleTranslations(op, tmp_binning_factor); } if (op_samplings.size() <= (node->size)) REPORT_ERROR("ERROR: Too few sampling points! Use non-parallel version (without '_mpi') instead!"); nr_total_samplings = op_samplings.size(); } MPI_Barrier(MPI_COMM_WORLD); // Leader sends this 'dest' cropped region to all followers node->relion_MPI_Bcast(MULTIDIM_ARRAY(dest_cropped), MULTIDIM_SIZE(dest_cropped), MY_MPI_DOUBLE, 0, MPI_COMM_WORLD); // Leader sends the number of total samplings to all followers node->relion_MPI_Bcast(&nr_total_samplings, 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Barrier(MPI_COMM_WORLD); // All nodes allocate space for op_samplings_batch_packed // Allow 10 more empty units to prevent segmentation fault op_samplings_batch_packed.initZeros((nr_total_samplings / (node->size)) + 10, NR_LOCALSYM_PARAMETERS); MPI_Barrier(MPI_COMM_WORLD); // Leader distributes sampling points to all followers first = 0; last = 0; if (node->isLeader()) { for (int id_rank = (node->size) - 1; id_rank >= 0; id_rank--) { divide_equally(nr_total_samplings, node->size, id_rank, first, last); // Beware: YSIZE(op_samplings_batch_packed) is larger than (last - first + 1) FOR_ALL_DIRECT_ELEMENTS_IN_ARRAY2D(op_samplings_batch_packed) { if ( (i >= 0) && (i <= (last - first)) ) DIRECT_A2D_ELEM(op_samplings_batch_packed, i, j) = VEC_ELEM(op_samplings[i + first], j); } // Leader distributes sampling points to all followers if (id_rank > 0) node->relion_MPI_Send(MULTIDIM_ARRAY(op_samplings_batch_packed), (last - first + 1) * NR_LOCALSYM_PARAMETERS, MY_MPI_DOUBLE, id_rank, MPITAG_LOCALSYM_SAMPLINGS_PACK, MPI_COMM_WORLD); // If id_rank == 0 (leader), just keep op_samplings_batch_packed to the leader itself } } else { MPI_Status status; // Followers receive sampling points from leader // Important: Followers calculate first and last subscripts! divide_equally(nr_total_samplings, node->size, node->rank, first, last); node->relion_MPI_Recv(MULTIDIM_ARRAY(op_samplings_batch_packed), (last - first + 1) * NR_LOCALSYM_PARAMETERS, MY_MPI_DOUBLE, 0, MPITAG_LOCALSYM_SAMPLINGS_PACK, MPI_COMM_WORLD, status); } MPI_Barrier(MPI_COMM_WORLD); // All nodes unpack sampling points op_samplings_batch.clear(); for (long int i=0; iisLeader()); for (int op_id = 0; op_id < op_samplings_batch.size(); op_id++) { DIRECT_A2D_ELEM(op_samplings_batch_packed, op_id, CC_POS) = VEC_ELEM(op_samplings_batch[op_id], CC_POS); } MPI_Barrier(MPI_COMM_WORLD); // Followers send their results back to leader if (! node->isLeader()) { node->relion_MPI_Send(MULTIDIM_ARRAY(op_samplings_batch_packed), (last - first + 1) * NR_LOCALSYM_PARAMETERS, MY_MPI_DOUBLE, 0, MPITAG_LOCALSYM_SAMPLINGS_PACK, MPI_COMM_WORLD); } else { MPI_Status status; for (int id_rank = 0; id_rank < (node->size); id_rank++) { divide_equally(op_samplings.size(), node->size, id_rank, first, last); // Leader receives op_samplings_batch_packed from followers if (id_rank > 0) node->relion_MPI_Recv(MULTIDIM_ARRAY(op_samplings_batch_packed), (last - first + 1) * NR_LOCALSYM_PARAMETERS, MY_MPI_DOUBLE, id_rank, MPITAG_LOCALSYM_SAMPLINGS_PACK, MPI_COMM_WORLD, status); // Leader does something for itself if id_rank == 0 FOR_ALL_DIRECT_ELEMENTS_IN_ARRAY2D(op_samplings_batch_packed) { // Beware: YSIZE(op_samplings_batch_packed) is larger than (last - first + 1) if ( (i >= 0) && (i <= (last - first)) ) VEC_ELEM(op_samplings[i + first], CC_POS) = DIRECT_A2D_ELEM(op_samplings_batch_packed, i, CC_POS); } } } MPI_Barrier(MPI_COMM_WORLD); if (node->isLeader()) { // TODO: For rescaled maps if (newdim != cropdim) { for (int isamp = 0; isamp < op_samplings.size(); isamp++) Localsym_scaleTranslations(op_samplings[isamp], tmp_binning_factor); } // Now translations are all unscaled. // TODO: add vectors together!!! // Update com1_float for (int isamp = 0; isamp < op_samplings.size(); isamp++) { // Get new_com1 // newCom1f = Com1f + best_trans_samp - diff Localsym_shiftTranslations(op_samplings[isamp], com1_float - com1_diff); // equivalently, com1_int // Update v = newCom1f + ( - newR * com0) Localsym_decomposeOperator(op_samplings[isamp], aa, bb, gg, dx, dy, dz, cc); Euler_angles2matrix(aa, bb, gg, mat1); vecR3 = vectorR3(dx, dy, dz) - mat1 * com0_int; Localsym_composeOperator(op_samplings[isamp], aa, bb, gg, XX(vecR3), YY(vecR3), ZZ(vecR3), cc); } // Leader sorts the results std::stable_sort(op_samplings.begin(), op_samplings.end(), compareOperatorsByCC); // Leader outputs the local searches results fn_tmp.compose(fn_info_out.withoutExtension() + "_cc_mask", imask + 1, "tmp", 3); // "*_cc_mask001.tmp" fn_tmp = fn_tmp.withoutExtension(); // "*_cc_mask001" fn_searched_op_samplings.compose(fn_tmp + "_op", iop + 1, "star", 3); // "*_cc_mask001_op001.star" writeRelionFormatLocalSearchOperatorResults(fn_searched_op_samplings, op_samplings, angpix_image); std::cout << " + List of sampling points for this local symmetry operator: " << fn_searched_op_samplings << std::endl; // Leader updates this operator and do screen output op_list[imask][iop] = op_samplings[0]; std::cout << " + Done! Refined operator: " << std::flush; Localsym_outputOperator(op_samplings[0], &std::cout, angpix_image); std::cout << std::endl; } MPI_Barrier(MPI_COMM_WORLD); } MPI_Barrier(MPI_COMM_WORLD); } MPI_Barrier(MPI_COMM_WORLD); if (node->isLeader()) { // Leader writes out new mask info file if (fn_info_out.getExtension() == "star") writeRelionFormatMasksAndOperators(fn_info_out, fn_mask_list, op_list, angpix_image); else writeDMFormatMasksAndOperators(fn_info_out, fn_mask_list, op_list, angpix_image); displayEmptyLine(); #ifdef __unix__ std::cout << " Done! New local symmetry description file: " << "\e[1m" << fn_info_out << "\e[0m" << std::endl; #else std::cout << " Done! New local symmetry description file: " << fn_info_out << std::endl; #endif } MPI_Barrier(MPI_COMM_WORLD); } relion-3.1.3/src/local_symmetry_mpi.h000066400000000000000000000011741411340063500176560ustar00rootroot00000000000000#ifndef LOCAL_SYMMETRY_MPI_H_ #define LOCAL_SYMMETRY_MPI_H_ #include "src/mpi.h" #include "src/local_symmetry.h" #include "src/parallel.h" //#define DEBUG #define MPITAG_LOCALSYM_SAMPLINGS_PACK 1 class local_symmetry_parameters_mpi: public local_symmetry_parameters { private: MpiNode *node; public: /** Destructor, calls MPI_Finalize */ ~local_symmetry_parameters_mpi() { delete node; } /** Read * This could take care of mpi-parallelisation-dependent variables */ void read(int argc, char **argv); // Parallelized run function void run(); }; #endif /* LOCAL_SYMMETRY_MPI_H_ */ relion-3.1.3/src/macros.cpp.in000066400000000000000000000004631411340063500161720ustar00rootroot00000000000000#include "src/macros.h" // This is necessary because we cannot directly concatenate string // literals in the preprocessor #define RELION_VERSION_SUFFIX "@RELION_VERSION_SUFFIX@" #define RELION_LONG_VERSION (RELION_SHORT_VERSION RELION_VERSION_SUFFIX) const char *g_RELION_VERSION = RELION_LONG_VERSION; relion-3.1.3/src/macros.h000066400000000000000000000257241411340063500152410ustar00rootroot00000000000000/*************************************************************************** * * Author: "Sjors H.W. Scheres" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ /*************************************************************************** * * Authors: Carlos Oscar S. Sorzano (coss@cnb.csic.es) * * Unidad de Bioinformatica of Centro Nacional de Biotecnologia , CSIC * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA * 02111-1307 USA * * All comments concerning this program package may be sent to the * e-mail address 'xmipp@cnb.csic.es' ***************************************************************************/ #ifndef MACROS_H #define MACROS_H #define RELION_SHORT_VERSION "3.1.3" extern const char *g_RELION_VERSION; #include #include #include "src/pipeline_control.h" #include "src/error.h" #ifndef _CYGWIN #ifdef __APPLE__ #include #else #include #endif #endif #ifndef MINFLOAT #define MINFLOAT -1e30 #endif #ifndef MAXFLOAT #define MAXFLOAT 1e30 #endif #ifdef RELION_SINGLE_PRECISION #define RFLOAT float #define LARGE_NUMBER 99e36 #define MY_MPI_DOUBLE MPI_FLOAT #define MY_MPI_COMPLEX MPI_C_COMPLEX #else #define RFLOAT double #define LARGE_NUMBER 99e99 #define MY_MPI_DOUBLE MPI_DOUBLE #define MY_MPI_COMPLEX MPI_C_DOUBLE_COMPLEX #endif #if defined CUDA and DEBUG_CUDA #define CRITICAL(string) raise(SIGSEGV); #else #define CRITICAL(string) REPORT_ERROR(string); #endif //#define DEBUG //#define DEBUG_CHECKSIZES /// @defgroup Macros Macros /// @ingroup DataLibrary //@{ /// @name Constants //@{ /** Pi * @ingroup MacrosConstants */ #ifndef PI #define PI 3.14159265358979323846 #endif /** Equal accuracy * * In a comparison if two values are closer than this epsilon they are said to * be the same. Actually For double precision calculations set to 1e-6, for single-precision set to 1e-4 (finding symmetry subgroups will go wrong otherwise) */ #ifdef RELION_SINGLE_PRECISION #define XMIPP_EQUAL_ACCURACY 1e-4 #else #define XMIPP_EQUAL_ACCURACY 1e-6 #endif //@} /// @name Numerical functions //@{ /** Absolute value * * Valid for any kind of number (int, short, float, etc) * * @code * x = ABS(x); * @endcode */ #ifndef ABS #define ABS(x) (((x) >= 0) ? (x) : (-(x))) #endif /** Sign of * * Valid for any kind of number (int, short, float, etc). It returns +1 or -1 * * @code * if (SGN(x) == -1) * std::cout << "x is negative" << std::endl; * @endcode */ #ifndef SGN #define SGN(x) (((x) >= 0) ? 1 : -1) #endif /** Sign of, considering 0 as 0 * * Valid for any kind of number (int, short, float, etc). It returns +1 if the * number is positive, -1 if the number is negative, and 0 if the number is 0. * * @code * if (SGN0(x) == -1) * std::cout << "x is negative" << std::endl; * @endcode */ #ifndef SGN0 #define SGN0(x) (((x) >= 0) ? (((x) == 0) ? 0:1) : -1) #endif /** Minimum * * Valid for any kind of numbers (int, short, float, etc). * * @code * min_val = XMIPP_MIN(x, y); * @endcode */ #ifndef XMIPP_MIN #define XMIPP_MIN(x, y) (((x) >= (y)) ? (y) : (x)) #endif /** Maximum * * Valid for any kind of numbers (int, short, float, etc). * * @code * max_val = XMIPP_MAX(x, y); * @endcode */ #ifndef XMIPP_MAX #define XMIPP_MAX(x,y) (((x)>=(y))?(x):(y)) #endif /** Round to next integer * * Valid for any kind of numbers (int, short, float, etc). The result is of type * integer. * * @code * a = ROUND(-0.8); // a = -1 * a = ROUND(-0.2); // a = 0 * a = ROUND(0.2); // a = 0 * a = ROUND(0.8); // a = 1 * @endcode */ #ifndef ROUND #define ROUND(x) (((x) > 0) ? (int)((x) + 0.5) : (int)((x) - 0.5)) #endif /** Round to next larger integer * * Valid for any kind of numbers (int, short, float, etc). The result is of type * integer. * * @code * a = CEIL(-0.8); // a = 0 * a = CEIL(-0.2); // a = 0 * a = CEIL(0.2); // a = 1 * a = CEIL(0.8); // a = 1 * @endcode */ #define CEIL(x) (((x) == (int)(x)) ? (int)(x):(((x) > 0) ? (int)((x) + 1) : (int)(x))) /** Round to next smaller integer * * Valid for any kind of numbers (int, short, float, etc). The result is of type * integer. * * @code * a = FLOOR(-0.8); // a = -1 * a = FLOOR(-0.2); // a = -1 * a = FLOOR(0.2); // a = 0 * a = FLOOR(0.8); // a = 0 * @endcode */ #define FLOOR(x) (((x) == (int)(x)) ? (int)(x):(((x) > 0) ? (int)(x) : (int)((x) - 1))) /** Return the fractional part of a value * * The fractional part of 3.7 is 0.7 and of -3.7 is -0.7. */ #define FRACTION(x) ((x) - (int)(x)) /** Clip in a saturation fashion * * CLIP is a macro which acts like a saturation curve, a value x is "clipped" to * a range defined by x0 and xF, for example the output values for the following * x and CLIP(x,-2,2) would be * * @code * x = ... -8 -7 -6 -5 -4 -3 -2 -1 0 1 2 3 4 5 6 7 8 ... * output = ... -2 -2 -2 -2 -2 -2 -2 -1 0 1 2 2 2 2 2 2 2 ... * @endcode */ #define CLIP(x, x0, xF) (((x) < (x0)) ? (x0) : (((x) > (xF)) ? (xF) : (x))) /** Wrapping for integers * * intWRAP performs a wrapping in the integer set, when the cycle is finsihed it * begins again. For example, for intWRAP(x,-2,2) would be * * @code * x = ... -8 -7 -6 -5 -4 -3 -2 -1 0 1 2 3 4 5 6 7 8 ... * output = ... 2 -2 -1 0 1 2 -2 -1 0 1 2 -2 -1 0 1 2 -2 ... * @endcode */ #define intWRAP(x, x0, xF) (((x) >= (x0) && (x) <= (xF)) ? (x) : ((x) < (x0)) ? ((x) - (int)(((x) - (x0) + 1) / ((xF) - (x0) + 1) - 1) * ((xF) - (x0) + 1)) : ((x) - (int)(((x) - (xF) - 1) / ((xF) - (x0) + 1) + 1) * ((xF) - (x0) + 1))) /** Wrapping for real numbers * * realWRAP is used to keep a floating number between a range with a wrapping * fashion. For instance, it is used in trigonometry to say that an angle of * 5*PI is the same as PI, ie, to keep an angle in the range 0...2*PI * * @code * Corrected_angle = realWRAP(angle, 0, 2*PI); * @endcode */ #define realWRAP(x, x0, xF) (((x) >= (x0) && (x) <= (xF)) ? (x) : ((x) < (x0)) ? ((x) - (int)(((x) - (x0)) / ((xF) - (x0)) - 1) * ((xF) - (x0))) : ((x) - (int)(((x) - (xF)) / ((xF) - (x0)) + 1) * ((xF) - (x0)))) /** Degrees to radians * * @code * angle_in_radians = DEG2RAD(ang_in_degrees); * @endcode */ #define DEG2RAD(d) ((d) * PI / 180) /** Radians to degrees * * @code * angle_in_degrees = RAD2DEG(ang_in_radians); * @endcode */ #define RAD2DEG(r) ((r) * 180 / PI) /** Cosine in degrees * * @code * if (COSD(90) == 0) * std::cout << "This is in degrees!\n"; * @endcode */ #define COSD(x) cos(PI * (x) / 180.) /** ArcCosine in degrees * * @code * if (ACOSD(0.5) == 60) * std::cout << "This is in degrees!\n"; * @endcode */ #define ACOSD(x) acos((x)) * 180. / PI /** Sine in degrees * * @code * if (SIND(90) == 1) * std::cout << "This is in degrees!\n"; * @endcode */ #define SIND(x) sin(PI * (x) / 180.) /** ArcSine in degrees * * @code * if (ASIND(0.5) == 30.) * std::cout << "This is in degrees!\n"; * @endcode */ #define ASIND(x) asin((x)) * 180. / PI /** SINC function * * The sinc function is defined as sin(PI*x)/(PI*x). */ #define SINC(x) (((x) < 0.0001 && (x) > -0.0001) ? 1 : sin(PI * (x)) / (PI * (x))) #if defined HAVE_SINCOS || defined DOXGEN /** Sincos function * * Wrappper to make sincos(x,&sinval,&cosval) work for all compilers. */ #define SINCOS(x,s,c) sincos(x,s,c) /** Sincosf function * * Wrappper to make sincosf(x,&sinval,&cosval) work for all compilers. */ #define SINCOSF(x,s,c) sincosf(x,s,c) #elif defined HAVE___SINCOS // Use __sincos and __sincosf instead (primarily clang) #define SINCOS(x,s,c) __sincos(x,s,c) #define SINCOSF(x,s,c) __sincosf(x,s,c) #else // Neither sincos or __sincos available, use raw functions. static void SINCOS(double x, double *s, double *c) { *s = sin(x); *c = cos(x); } static void SINCOSF(float x, float *s, float *c) { *s = sinf(x); *c = cosf(x); } #endif /** Returns next positive power_class of 2 * * It is supposed that the given number is positive although it's not needed to * be an integer * * @code * next_power = NEXT_POWER_OF_2(1000); // next_power = 1024 * @endcode */ #define NEXT_POWER_OF_2(x) pow(2, ceil(log((RFLOAT) x) / log(2.0)-XMIPP_EQUAL_ACCURACY) ) /** Linear interpolation * * From low (when a=0) to high (when a=1). The following value is returned * (equal to (a*h)+((1-a)*l) */ #define LIN_INTERP(a, l, h) ((l) + ((h) - (l)) * (a)) /** XOR * * Logical Xor */ #define XOR(a, b) (((a) && !(b)) || (!(a) && (b))) //@} /// @name Miscellaneous //@{ /** Swap two values * * It uses a temporal variable which must be of the same type as the two * parameters */ #define SWAP(a, b, tmp) { tmp = a; a = b; b = tmp; } /** Starting point for Xmipp volume/image * * Given a size (in some direction), this function returns the first index for * a volume/image/array with this size. The formula is -(int) ((float) (size) * / 2.0) */ #define FIRST_XMIPP_INDEX(size) -(long int)((float) (size) / 2.0) /** Starting point for Xmipp volume/image * @ingroup MacrosMisc * * Given a size (in some direction), this function returns the first index for a * volume/image/array with this size. The formula is FIRST_XMIPP_INDEX(size) + * (size) - 1 */ #define LAST_XMIPP_INDEX(size) FIRST_XMIPP_INDEX(size) + (size) - 1 static void PRINT_VERSION_INFO() { std::cout << "RELION version: " << g_RELION_VERSION << " " #if defined(DEBUG) || defined(DEBUG_CUDA) << "(debug-build) " #endif << std::endl << "Precision: " #ifdef RELION_SINGLE_PRECISION << "BASE=single" #else << "BASE=double" #endif #if defined(CUDA) || defined(ALTCPU) #ifdef CUDA << ", CUDA-ACC=" #endif #ifdef ALTCPU << ", VECTOR-ACC=" #endif #ifdef ACC_DOUBLE_PRECISION << "double " #else << "single " #endif #endif << std::endl << std::endl; } //@} //@} #endif relion-3.1.3/src/manualpicker.cpp000066400000000000000000000503031411340063500167520ustar00rootroot00000000000000/*************************************************************************** * * Author: "Sjors H.W. Scheres" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #include "src/manualpicker.h" std::vector imics; std::vector global_fn_mics; std::vector global_fn_ctfs; std::vector selected; std::vector number_picked; std::vector viewmic_buttons; std::vector viewctf_buttons; std::vector text_displays; std::vector count_displays; std::vector defocus_displays; std::vector check_buttons; int first_pick_viewed, last_pick_viewed; int last_ctf_viewed; bool global_has_ctf; bool global_pick_startend; RFLOAT global_angpix; RFLOAT global_coord_scale; RFLOAT global_lowpass; RFLOAT global_highpass; RFLOAT global_particle_diameter; RFLOAT global_sigma_contrast; RFLOAT global_black_val; RFLOAT global_white_val; RFLOAT global_micscale; RFLOAT global_ctfscale; RFLOAT global_ctfsigma; RFLOAT global_blue_value; RFLOAT global_red_value; int global_total_count; int global_nr_simultaneous; FileName global_fn_odir; FileName global_pickname; FileName global_fn_color; FileName global_color_label; bool global_do_color; void cb_viewmic(Fl_Widget* w, void* data) { // Get my own number back int *iptr = (int*)data; int imic = *iptr; const bool with_control = (Fl::event_ctrl() != 0); int nr_simultaneous = (with_control) ? global_nr_simultaneous : 1; // Update the count of the last one we picked... for (int mymic = first_pick_viewed; mymic <= last_pick_viewed; mymic++) { if (mymic >= 0 && mymic < count_displays.size()) { MetaDataTable MDcoord; FileName fn_pre, fn_jobnr, fn_post; decomposePipelineSymlinkName(global_fn_mics[mymic], fn_pre, fn_jobnr, fn_post); FileName fn_coord = global_fn_odir + fn_post.withoutExtension() + "_" + global_pickname + ".star"; int my_nr_picked; if (exists(fn_coord)) { MDcoord.read(fn_coord); my_nr_picked = MDcoord.numberOfObjects(); } else { my_nr_picked = 0; } Fl_Text_Buffer *textbuff2 = new Fl_Text_Buffer(); textbuff2->text(floatToString(my_nr_picked).c_str()); count_displays[mymic]->buffer(textbuff2); count_displays[mymic]->redraw(); // Also reset the color of the button to light viewmic_buttons[mymic]->color(GUI_BUTTON_COLOR, GUI_BUTTON_COLOR); viewmic_buttons[mymic]->redraw(); } } // Launch the picking window first_pick_viewed = imic; last_pick_viewed = XMIPP_MIN(global_fn_mics.size() - 1, imic + nr_simultaneous - 1); for (int mymic = first_pick_viewed; mymic <= last_pick_viewed; mymic++) { FileName fn_pre, fn_jobnr, fn_post; decomposePipelineSymlinkName(global_fn_mics[mymic], fn_pre, fn_jobnr, fn_post); FileName fn_coord = global_fn_odir + fn_post.withoutExtension() + "_" + global_pickname + ".star"; int rad = ROUND(global_particle_diameter/(2. * global_angpix)); std::string command; command = "relion_display --pick --i " + global_fn_mics[mymic]; command += " --coords " + fn_coord; command += " --scale " + floatToString(global_micscale); command += " --coord_scale " + floatToString(global_coord_scale); command += " --black " + floatToString(global_black_val); command += " --white " + floatToString(global_white_val); command += " --sigma_contrast " + floatToString(global_sigma_contrast); command += " --particle_radius " + floatToString(rad); command += " --lowpass " + floatToString(global_lowpass); command += " --highpass " + floatToString(global_highpass); command += " --angpix " + floatToString(global_angpix); if (global_pick_startend) command += " --pick_start_end "; if (global_color_label != "") { command += " --color_label " + global_color_label; command += " --blue " + floatToString(global_blue_value); command += " --red " + floatToString(global_red_value); if (global_fn_color != "") command += " --color_star " + global_fn_color; } command += " &"; int res = system(command.c_str()); } for (int i = 0; i < viewmic_buttons.size(); i++) { if (i >= first_pick_viewed && i <= last_pick_viewed) { viewmic_buttons[i]->color(GUI_BUTTON_DARK_COLOR, GUI_BUTTON_DARK_COLOR); } else { viewmic_buttons[i]->color(GUI_BUTTON_COLOR, GUI_BUTTON_COLOR); } viewmic_buttons[i]->redraw(); } } void cb_viewctf(Fl_Widget* w, void* data) { // Get my own number back int *iptr = (int*)data; int imic = *iptr; std::string command; command = "relion_display --i " + global_fn_ctfs[imic]; command += " --scale " + floatToString(global_ctfscale); command += " --sigma_contrast " + floatToString(global_ctfsigma); command += " &"; int res = system(command.c_str()); last_ctf_viewed = imic; for (int i = 0; i < viewctf_buttons.size(); i++) { if (i == last_ctf_viewed) { viewctf_buttons[i]->color(GUI_BUTTON_DARK_COLOR, GUI_BUTTON_DARK_COLOR); } else { viewctf_buttons[i]->color(GUI_BUTTON_COLOR, GUI_BUTTON_COLOR); } } } void cb_selectmic(Fl_Widget* w, void* data) { // Get my own number back int *iptr = (int*)data; int imic = *iptr; Fl_Text_Buffer *textbuff2 = new Fl_Text_Buffer(); selected[imic] = !selected[imic]; if (selected[imic]) { text_displays[imic]->color(GUI_INPUT_COLOR, GUI_INPUT_COLOR); text_displays[imic]->activate(); viewmic_buttons[imic]->activate(); count_displays[imic]->color(GUI_INPUT_COLOR, GUI_INPUT_COLOR); textbuff2->text(floatToString(number_picked[imic]).c_str()); count_displays[imic]->buffer(textbuff2); count_displays[imic]->activate(); if (global_has_ctf) { viewctf_buttons[imic]->activate(); defocus_displays[imic]->color(GUI_INPUT_COLOR, GUI_INPUT_COLOR); defocus_displays[imic]->activate(); } } else { text_displays[imic]->color(GUI_BACKGROUND_COLOR, GUI_BACKGROUND_COLOR); text_displays[imic]->deactivate(); viewmic_buttons[imic]->deactivate(); count_displays[imic]->color(GUI_BACKGROUND_COLOR, GUI_BACKGROUND_COLOR); textbuff2->text(""); count_displays[imic]->buffer(textbuff2); count_displays[imic]->deactivate(); if (global_has_ctf) { viewctf_buttons[imic]->deactivate(); defocus_displays[imic]->color(GUI_BACKGROUND_COLOR, GUI_BACKGROUND_COLOR); defocus_displays[imic]->deactivate(); } } } int manualpickerGuiWindow::fill() { color(GUI_BACKGROUND_COLOR); Fl_Menu_Bar *menubar = new Fl_Menu_Bar(0, 0, w(), 25); if (do_allow_save) { menubar->add("File/Save selection", FL_ALT+'s', cb_menubar_save, this); menubar->add("File/Invert selection", FL_ALT+'i', cb_menubar_invert_selection, this); } menubar->add("File/Recount picked particles", FL_ALT+'c', cb_menubar_recount, this); menubar->add("File/Quit", FL_ALT+'q', cb_menubar_quit, this); int current_y = 25; // Scroll bars Fl_Scroll scroll(0, current_y, w(), h()-current_y); scroll.type(Fl_Scroll::VERTICAL); selected.clear(); number_picked.clear(); global_has_ctf = MDin.containsLabel(EMDL_CTF_IMAGE); FileName fn_mic, fn_ctf; int ystep = 35; imics.clear(); for (int ii =0; ii < MDin.numberOfObjects(); ii++) { imics.push_back(ii); } int imic =0; global_fn_mics.clear(); global_fn_ctfs.clear(); text_displays.clear(); viewmic_buttons.clear(); viewctf_buttons.clear(); number_picked.clear(); FOR_ALL_OBJECTS_IN_METADATA_TABLE(MDin) { MDin.getValue(EMDL_MICROGRAPH_NAME, fn_mic); // Display the name of the micrograph global_fn_mics.push_back(fn_mic); Fl_Check_Button *mycheck = new Fl_Check_Button(4, current_y, ystep-8, ystep-8, ""); mycheck->callback(cb_selectmic, &(imics[imic])); mycheck->value(1); if (!do_allow_save) mycheck->deactivate(); selected.push_back(true); check_buttons.push_back(mycheck); Fl_Text_Buffer *textbuff = new Fl_Text_Buffer(); textbuff->text(fn_mic.c_str()); int ystep2 = (fn_mic.length() > MWCOL1/12) ? ystep - 5 : ystep - 10; Fl_Text_Display* mydisp = new Fl_Text_Display(MXCOL0, current_y, MWCOL1, ystep2); mydisp->scrollbar_width(5); mydisp->buffer(textbuff); mydisp->scroll(0, 9999); mydisp->color(GUI_INPUT_COLOR, GUI_INPUT_COLOR); text_displays.push_back(mydisp); // Button to display the micrographimage Fl_Button *myviewmic = new Fl_Button(MXCOL1, current_y, MWCOL2, ystep-5, "pick"); myviewmic->color(GUI_BUTTON_COLOR); myviewmic->callback(cb_viewmic, &(imics[imic])); viewmic_buttons.push_back(myviewmic); // Count how many particles have been picked Fl_Text_Buffer *textbuff2 = new Fl_Text_Buffer(); textbuff2->text(""); Fl_Text_Display* mycount = new Fl_Text_Display(MXCOL2, current_y, MWCOL3, ystep-5); mycount->color(GUI_INPUT_COLOR, GUI_INPUT_COLOR); mycount->buffer(textbuff2); count_displays.push_back(mycount); number_picked.push_back(10); // Button to display the CTF image if (global_has_ctf) { MDin.getValue(EMDL_CTF_IMAGE, fn_ctf); global_fn_ctfs.push_back(fn_ctf); // Button to display the CTF image Fl_Button *myviewctf = new Fl_Button(MXCOL3, current_y, MWCOL4, ystep-5, "CTF"); myviewctf->color(GUI_BUTTON_COLOR); myviewctf->callback(cb_viewctf, &(imics[imic])); viewctf_buttons.push_back(myviewctf); Fl_Text_Buffer *textbuffDF = new Fl_Text_Buffer(); RFLOAT defocus; MDin.getValue(EMDL_CTF_DEFOCUSU, defocus); std::ostringstream os; os << defocus; std::string str = os.str(); textbuffDF->text(str.c_str()); Fl_Text_Display* myDF = new Fl_Text_Display(MXCOL4, current_y, MWCOL4, ystep-5); myDF->color(GUI_INPUT_COLOR, GUI_INPUT_COLOR); myDF->buffer(textbuffDF); defocus_displays.push_back(myDF); } imic++; current_y += ystep; } // See if the output STAR file already exists, if so apply that selection if (do_allow_save) readOutputStarfile(); if (do_fast_save) cb_menubar_save_i(); // Also count the number of particles that were already picked cb_menubar_recount_i(); resizable(*this); show(); return Fl::run(); } void manualpickerGuiWindow::readOutputStarfile() { if (exists(fn_sel)) { for (int imic = 0; imic < selected.size(); imic++) selected[imic] = false; MetaDataTable MDout; ObservationModel::loadSafely(fn_sel, obsModel, MDout, "micrographs"); FileName fn_mic, fn_mic_in; for (int imic = 0; imic < selected.size(); imic++) { MDin.getValue(EMDL_MICROGRAPH_NAME, fn_mic_in, imic); bool has_found = false; FOR_ALL_OBJECTS_IN_METADATA_TABLE(MDout) { MDout.getValue(EMDL_MICROGRAPH_NAME, fn_mic); // Which one in the input metadatatable was this one? if (fn_mic == fn_mic_in) { has_found = true; break; } } selected[imic] = has_found; if (has_found) { check_buttons[imic]->value(1); text_displays[imic]->color(GUI_INPUT_COLOR, GUI_INPUT_COLOR); text_displays[imic]->activate(); viewmic_buttons[imic]->activate(); count_displays[imic]->color(GUI_INPUT_COLOR, GUI_INPUT_COLOR); count_displays[imic]->activate(); if (global_has_ctf) viewctf_buttons[imic]->activate(); } else { check_buttons[imic]->value(0); text_displays[imic]->color(GUI_BACKGROUND_COLOR, GUI_BACKGROUND_COLOR); text_displays[imic]->deactivate(); viewmic_buttons[imic]->deactivate(); count_displays[imic]->color(GUI_BACKGROUND_COLOR, GUI_BACKGROUND_COLOR); count_displays[imic]->deactivate(); if (global_has_ctf) viewctf_buttons[imic]->deactivate(); } } } } void manualpickerGuiWindow::writeOutputStarfile() { MetaDataTable MDout; for (int imic = 0; imic < selected.size(); imic++) { if (selected[imic]) { MDout.addObject(MDin.getObject(imic)); } } if (obsModel.opticsMdt.numberOfObjects() > 0) { obsModel.save(MDout, fn_sel, "micrographs"); } else { MDout.write(fn_sel); } } void manualpickerGuiWindow::cb_menubar_save(Fl_Widget* w, void* v) { manualpickerGuiWindow* T=(manualpickerGuiWindow*)v; T->cb_menubar_save_i(); } void manualpickerGuiWindow::cb_menubar_save_i() { writeOutputStarfile(); std::cout << " Saved " << fn_sel << std::endl; RELION_EXIT_SUCCESS; } void manualpickerGuiWindow::cb_menubar_invert_selection(Fl_Widget* w, void* v) { manualpickerGuiWindow* T=(manualpickerGuiWindow*)v; T->cb_menubar_invert_selection_i(); } void manualpickerGuiWindow::cb_menubar_invert_selection_i() { for (int imic = 0; imic < selected.size(); imic++) { selected[imic] = !selected[imic]; if (selected[imic]) { check_buttons[imic]->value(1); text_displays[imic]->color(GUI_INPUT_COLOR, GUI_INPUT_COLOR); text_displays[imic]->activate(); viewmic_buttons[imic]->activate(); count_displays[imic]->color(GUI_INPUT_COLOR, GUI_INPUT_COLOR); count_displays[imic]->activate(); if (global_has_ctf) viewctf_buttons[imic]->activate(); } else { check_buttons[imic]->value(0); text_displays[imic]->color(GUI_BACKGROUND_COLOR, GUI_BACKGROUND_COLOR); text_displays[imic]->deactivate(); viewmic_buttons[imic]->deactivate(); count_displays[imic]->color(GUI_BACKGROUND_COLOR, GUI_BACKGROUND_COLOR); count_displays[imic]->deactivate(); if (global_has_ctf) viewctf_buttons[imic]->deactivate(); } } } void manualpickerGuiWindow::cb_menubar_quit(Fl_Widget* w, void* v) { manualpickerGuiWindow* T=(manualpickerGuiWindow*)v; T->cb_menubar_quit_i(); } void manualpickerGuiWindow::cb_menubar_quit_i() { cb_menubar_save_i(); exit(0); } void manualpickerGuiWindow::cb_menubar_recount(Fl_Widget* w, void* v) { manualpickerGuiWindow* T=(manualpickerGuiWindow*)v; T->cb_menubar_recount_i(); } void manualpickerGuiWindow::cb_menubar_recount_i() { global_total_count = 0; int nr_sel_mic = 0; for (int imic = 0; imic < global_fn_mics.size(); imic++) { MetaDataTable MDcoord; FileName fn_pre, fn_jobnr, fn_post; decomposePipelineSymlinkName(global_fn_mics[imic], fn_pre, fn_jobnr, fn_post); FileName fn_coord = global_fn_odir + fn_post.withoutExtension() + "_" + global_pickname + ".star"; int my_nr_picked; if (exists(fn_coord)) { MDcoord.read(fn_coord); my_nr_picked = MDcoord.numberOfObjects(); } else { my_nr_picked = 0; } Fl_Text_Buffer *textbuff2 = new Fl_Text_Buffer(); if (selected[imic]) { global_total_count += my_nr_picked; textbuff2->text(floatToString(my_nr_picked).c_str()); count_displays[imic]->buffer(textbuff2); count_displays[imic]->redraw(); nr_sel_mic++; } else { textbuff2->text(""); count_displays[imic]->buffer(textbuff2); } number_picked[imic] = my_nr_picked; } std::cout << " Total number of picked particles: " << global_total_count << " from " << nr_sel_mic << " selected micrographs." << std::endl; } void ManualPicker::read(int argc, char **argv) { parser.setCommandLine(argc, argv); int gen_section = parser.addSection("General options"); fn_in = parser.getOption("--i", "Micrograph STAR file OR filenames from which to pick particles, e.g. \"Micrographs/*.mrc\""); global_fn_odir = parser.getOption("--odir", "Output directory for coordinate files (default is to store next to micrographs)", "ManualPick/"); fn_sel = parser.getOption("--selection", "STAR file with selected micrographs", "micrographs_selected.star"); global_pickname = parser.getOption("--pickname", "Rootname for the picked coordinate files", "manualpick"); global_angpix = textToFloat(parser.getOption("--angpix", "Pixel size in Angstroms", "-1.")); global_coord_scale = textToFloat(parser.getOption("--coord_scale", "Scale coordinates before display", "1.0")); global_particle_diameter = textToFloat(parser.getOption("--particle_diameter", "Diameter of the circles that will be drawn around each picked particle (in Angstroms)")); global_pick_startend = parser.checkOption("--pick_start_end", "Pick start-end coordinates of helices"); do_allow_save = parser.checkOption("--allow_save", "Allow saving of the selected micrographs"); do_fast_save = parser.checkOption("--fast_save", "Save a default selection of all micrographs immediately"); global_nr_simultaneous = textToInteger(parser.getOption("--open_simultaneous", "Open this many of the next micrographs simultaneously when pressing CTRL and a Pick button", "10")); int mic_section = parser.addSection("Displaying options"); global_micscale = textToFloat(parser.getOption("--scale", "Relative scale for the micrograph display", "1")); global_black_val = textToFloat(parser.getOption("--black", "Pixel value for black (default is auto-contrast)", "0")); global_white_val = textToFloat(parser.getOption("--white", "Pixel value for white (default is auto-contrast)", "0")); global_sigma_contrast = textToFloat(parser.getOption("--sigma_contrast", "Set white and black pixel values this many times the image stddev from the mean (default is auto-contrast)", "0")); global_lowpass = textToFloat(parser.getOption("--lowpass", "Lowpass filter in Angstroms for the micrograph (0 for no filtering)","0")); global_highpass = textToFloat(parser.getOption("--highpass", "Highpass filter in Angstroms for the micrograph (0 for no filtering)","0")); global_ctfscale = textToFloat(parser.getOption("--ctf_scale", "Relative scale for the CTF-image display", "1")); global_ctfsigma = textToFloat(parser.getOption("--ctf_sigma_contrast", "Sigma-contrast for the CTF-image display", "3")); // coloring global_fn_color = parser.getOption("--color_star", "STAR file with a column for red-blue coloring (a subset of) the particles", ""); global_color_label = parser.getOption("--color_label", "MetaDataLabel to color particles on (e.g. rlnParticleSelectZScore)", ""); global_blue_value = textToFloat(parser.getOption("--blue", "Value of the blue color", "1.")); global_red_value = textToFloat(parser.getOption("--red", "Value of the red color", "0.")); // Check for errors in the command-line option if (parser.checkForErrors()) REPORT_ERROR("Errors encountered on the command line (see above), exiting..."); } void ManualPicker::usage() { parser.writeUsage(std::cout); } void ManualPicker::initialise() { if (fn_in.isStarFile()) { ObservationModel::loadSafely(fn_in, obsModel, MDin, "micrographs"); if (obsModel.opticsMdt.containsLabel(EMDL_MICROGRAPH_PIXEL_SIZE)) { obsModel.opticsMdt.getValue(EMDL_MICROGRAPH_PIXEL_SIZE, global_angpix, 0); std::cout << " Setting angpix to " << global_angpix << " based on the input STAR file... " << std::endl; } else { if (global_angpix < 0.) { REPORT_ERROR("ERROR: the input STAR file does not contain the micrograph pixel size, and it is not given through --angpix."); } std::cout << " Setting angpix to " << global_angpix << " based on command-line input... " << std::endl; FOR_ALL_OBJECTS_IN_METADATA_TABLE(obsModel.opticsMdt) { obsModel.opticsMdt.setValue(EMDL_MICROGRAPH_PIXEL_SIZE, global_angpix); } } } else { std::vector glob_fn_mics; fn_in.globFiles(glob_fn_mics); for (int imic = 0; imic < glob_fn_mics.size(); imic++) { MDin.addObject(); MDin.setValue(EMDL_MICROGRAPH_NAME, glob_fn_mics[imic]); } if (global_angpix < 0.) { std::cerr << " WARNING: no --angpix provided and no information about pixel size in input STAR file. Setting angpix to 1..." << std::endl; global_angpix = 1.; } } // If we down-scale the micrograph: always low-pass filter to get better displays if (global_micscale < 1.) { RFLOAT new_nyquist = global_angpix * 2. / global_micscale; if (new_nyquist > global_lowpass) global_lowpass = new_nyquist; std::cout << " Set low-pass filter to " << global_lowpass << " due to downscaling of " << global_micscale << std::endl; } } void ManualPicker::run() { Fl::scheme("gtk+"); manualpickerGuiWindow win(TOTALWIDTH, TOTALHEIGHT, "RELION manual-picking GUI"); // Transfer all parameters to the gui win.MDin = MDin; win.obsModel = obsModel; win.fn_sel = fn_sel; win.do_allow_save = do_allow_save; win.do_fast_save = do_fast_save; win.fill(); } relion-3.1.3/src/manualpicker.h000066400000000000000000000077361411340063500164330ustar00rootroot00000000000000/*************************************************************************** * * Author: "Sjors H.W. Scheres" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #ifndef MANUALPICKER_H_ #define MANUALPICKER_H_ // this define, and the undef below the FL includes, protects against another Complex definition in fltk #define Complex tmpComplex #include #include #include #include #include #include #include #include #include #include #include #include #undef Complex #include "src/metadata_table.h" #include "src/args.h" #include "src/funcs.h" #include "src/filename.h" #include "src/gui_entries.h" #include "src/jaz/obs_model.h" #define MWCOL1 300 #define MWCOL2 60 #define MWCOL3 60 #define MWCOL4 80 #define MXCOL0 30 #define MXCOL1 (MXCOL0 + MWCOL1 + 10) #define MXCOL2 (MXCOL1 + MWCOL2 + 10) #define MXCOL3 (MXCOL2 + MWCOL3 + 10) #define MXCOL4 (MXCOL3 + MWCOL4 + 10) #define TOTALWIDTH (MWCOL1 + MWCOL2 + MWCOL3 + MWCOL4 + MWCOL4 + 100) #define TOTALHEIGHT 500 // The button for picking particles void cb_viewmic(Fl_Widget* w, void* data); // The button for viewing the CTF void cb_viewctf(Fl_Widget* w, void* data); // The selection button void cb_selectmic(Fl_Widget* w, void* data); // This class only puts scrollbars around the resizable canvas class manualpickerGuiWindow : public Fl_Window { public: // Input, picking & output names FileName fn_in, fn_sel; // Allow saving selected micrographs? bool do_allow_save; // Save default selection immediately? (useful for always generating output files in pipeline) bool do_fast_save; // MetaDataTable of input micrographs MetaDataTable MDin; // Observation model of input micrographs ObservationModel obsModel; // Constructor with w x h size of the window and a title manualpickerGuiWindow(int W, int H, const char* title=0): Fl_Window(W, H, title){} // Fill the window with all entries int fill(); private: static void cb_menubar_save(Fl_Widget*, void*); inline void cb_menubar_save_i(); static void cb_menubar_invert_selection(Fl_Widget*, void*); inline void cb_menubar_invert_selection_i(); static void cb_menubar_quit(Fl_Widget*, void*); inline void cb_menubar_quit_i(); static void cb_menubar_recount(Fl_Widget*, void*); inline void cb_menubar_recount_i(); void readOutputStarfile(); void writeOutputStarfile(); }; class ManualPicker { public: // I/O Parser IOParser parser; // The input micrographs MetaDataTable MDin; // Observation model for the input mirographs ObservationModel obsModel; // Input, picking & output names FileName fn_in, fn_sel; // Allow save selected micrographs? bool do_allow_save; // Save an output selection file immediately (with all micrographs selected) bool do_fast_save; public: // Read command line arguments void read(int argc, char **argv); // Print usage instructions void usage(); // Initialise some general stuff after reading void initialise(); // General function to decide what to do void run(); private: void writeOutput(); }; #endif /* MANUALPICKER_H_ */ relion-3.1.3/src/mask.cpp000066400000000000000000000400721411340063500152340ustar00rootroot00000000000000/*************************************************************************** * * Author: "Sjors H.W. Scheres" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #include #include "src/mask.h" // https://stackoverflow.com/questions/48273190/undefined-symbol-error-for-stdstringempty-c-standard-method-linking-error/48273604#48273604 #if defined(__APPLE__) // explicit instantiation of std::string needed, otherwise we get a linker error on osx // thats a bug in libc++, because of interaction with __attribute__ ((__visibility__("hidden"), __always_inline__)) in std::string template class std::basic_string; #endif // Workaround for compiler versions before 2018 update 2 #ifdef __INTEL_COMPILER # if (__INTEL_COMPILER<1800) # pragma optimize ("", off) # endif # if (__INTEL_COMPILER==1800) # if (__INTEL_COMPILER_UPDATE<2) # pragma optimize ("", off) # endif # endif #endif // Mask out corners outside sphere (replace by average value) // Apply a soft mask (raised cosine with cosine_width pixels width) void softMaskOutsideMap(MultidimArray &vol, RFLOAT radius, RFLOAT cosine_width, MultidimArray *Mnoise) { vol.setXmippOrigin(); RFLOAT r, radius_p, raisedcos, sum_bg = 0., sum = 0.; if (radius < 0) radius = (RFLOAT)XSIZE(vol)/2.; radius_p = radius + cosine_width; if (Mnoise == NULL) { // Calculate average background value FOR_ALL_ELEMENTS_IN_ARRAY3D(vol) { r = sqrt((RFLOAT)(k*k + i*i + j*j)); if (r < radius) continue; else if (r > radius_p) { sum += 1.; sum_bg += A3D_ELEM(vol, k, i, j); } else { raisedcos = 0.5 + 0.5 * cos(PI * (radius_p - r) / cosine_width ); sum += raisedcos; sum_bg += raisedcos * A3D_ELEM(vol, k, i, j); } } sum_bg /= sum; } // Apply noisy or average background value FOR_ALL_ELEMENTS_IN_ARRAY3D(vol) { r = sqrt((RFLOAT)(k*k + i*i + j*j)); if (r < radius) { continue; } else if (r > radius_p) { A3D_ELEM(vol, k, i, j) = (Mnoise == NULL) ? sum_bg : A3D_ELEM(*Mnoise, k, i, j); } else { raisedcos = 0.5 + 0.5 * cos(PI * (radius_p - r) / cosine_width ); RFLOAT add = (Mnoise == NULL) ? sum_bg : A3D_ELEM(*Mnoise, k, i, j); A3D_ELEM(vol, k, i, j) = (1 - raisedcos) * A3D_ELEM(vol, k, i, j) + raisedcos * add; } } } // May27,2015 - Shaoda, Helical refinement void softMaskOutsideMapForHelix( MultidimArray &vol, RFLOAT psi_deg, RFLOAT tilt_deg, RFLOAT mask_sphere_radius_pix, RFLOAT mask_cyl_radius_pix, RFLOAT cosine_width, MultidimArray *Mnoise) { Matrix1D coords; Matrix2D A; RFLOAT sum_bg, sum, R1, R2, D1, D2, r, d, noise_w, noise_w1, noise_w2, noise_val; int dim = vol.getDim(); int boxsize = -1; // Center the box vol.setXmippOrigin(); // Dimension of a particle (box) should be 2 or 3 if ( (dim != 2) && (dim != 3) ) { REPORT_ERROR("mask.cpp::softMaskOutsideMapForHelix(): Dimension of particles should be 2 or 3!"); return; } // Check the shape of Mnoise if ( (Mnoise != NULL) && ((*Mnoise).sameShape(vol) == false) ) { REPORT_ERROR("mask.cpp::softMaskOutsideMapForHelix(): Input particle and Mnoise should have same shape!"); return; } // Box size is the minimum value of the 2 or 3 dimensions boxsize = (XSIZE(vol) < YSIZE(vol)) ? XSIZE(vol) : YSIZE(vol); // If it is a 2D particle, tilt angle does not apply if (dim == 2) tilt_deg = 0.; else boxsize = (boxsize < ZSIZE(vol)) ? boxsize : ZSIZE(vol); boxsize = boxsize / 2 - ((boxsize + 1) % 2); // Diameter of the cylindrical mask around the helix should not exceed the box size, otherwise noise cannot be estimated if ( (cosine_width < 0.) || (mask_sphere_radius_pix < 1.) || (mask_sphere_radius_pix > boxsize) || (mask_cyl_radius_pix < 1.) || (mask_cyl_radius_pix > boxsize) || (mask_sphere_radius_pix < mask_cyl_radius_pix) ) REPORT_ERROR("mask.cpp::softMaskOutsideMapForHelix(): Invalid radii of spherical and cylindrical masks or soft cosine widths!"); // Spherical mask: 0 < R1 < R2 R1 = mask_sphere_radius_pix; R2 = R1 + cosine_width; // Cylindrical mask: 0 < D1 < D2 D1 = mask_cyl_radius_pix; D2 = D1 + cosine_width; // Init coords coords.clear(); coords.resize(3); coords.initZeros(); // Init rotational matrix A A.clear(); A.resize(3, 3); // Rotate the particle (helical axes are X and Z for 2D and 3D segments respectively) Euler_angles2matrix(0., tilt_deg, psi_deg, A, false); // Don't put negative signs before tilt and psi values, use 'transpose' instead A = A.transpose(); // Calculate noise weights for all voxels sum_bg = sum = 0.; if (Mnoise == NULL) { FOR_ALL_ELEMENTS_IN_ARRAY3D(vol) { // X, Y, Z coordinates if (dim == 3) ZZ(coords) = ((RFLOAT)(k)); else ZZ(coords) = 0.; YY(coords) = ((RFLOAT)(i)); XX(coords) = ((RFLOAT)(j)); // Rotate coords = A * coords; // Distance from the point to helical axis (perpendicular to X axis) if (dim == 3) d = sqrt(YY(coords) * YY(coords) + XX(coords) * XX(coords)); else d = ABS(YY(coords)); if (d > D2) // Noise areas (get values for noise estimations) { sum_bg += A3D_ELEM(vol, k, i, j); sum += 1.; } else if (d > D1) // Edges of noise areas (get values and weights for noise estimations) { noise_w = 0.5 + 0.5 * cos(PI * (D2 - d) / cosine_width ); sum_bg += noise_w * A3D_ELEM(vol, k, i, j); sum += noise_w; } } // Test (this should not happen) if (sum < 0.00001) REPORT_ERROR("mask.cpp::softMaskOutsideMapForHelix(): No background (noise) areas found in this particle!"); sum_bg /= sum; } // Apply noisy or average background value noise_val = sum_bg; FOR_ALL_ELEMENTS_IN_ARRAY3D(vol) { // X, Y, Z coordinates if (dim == 3) ZZ(coords) = ((RFLOAT)(k)); else ZZ(coords) = 0.; YY(coords) = ((RFLOAT)(i)); XX(coords) = ((RFLOAT)(j)); // Rotate coords = A * coords; // Distance from the point to helical axis (perpendicular to X axis) if (dim == 3) d = sqrt(YY(coords) * YY(coords) + XX(coords) * XX(coords)); else d = ABS(YY(coords)); // Distance from the origin r = (RFLOAT)(i * i + j * j); if (dim == 3) r += (RFLOAT)(k * k); r = sqrt(r); // Info areas if ( (r < R1) && (d < D1) ) continue; if (Mnoise != NULL) noise_val = A3D_ELEM(*Mnoise, k, i, j); if ( (r > R2) || (d > D2) ) // Noise areas, fill in background values A3D_ELEM(vol, k, i, j) = noise_val; else // Edges of info areas { noise_w1 = noise_w2 = 0.; if (r > R1) noise_w1 = 0.5 + 0.5 * cos(PI * (R2 - r) / cosine_width ); if (d > D1) noise_w2 = 0.5 + 0.5 * cos(PI * (D2 - d) / cosine_width ); noise_w = (noise_w1 > noise_w2) ? (noise_w1) : (noise_w2); A3D_ELEM(vol, k, i, j) = (1. - noise_w) * A3D_ELEM(vol, k, i, j) + noise_w * noise_val; } } return; } // Workaround for compiler versions before 2018 update 2 #ifdef __INTEL_COMPILER # if (__INTEL_COMPILER<1800) # pragma optimize ("", on) # endif # if (__INTEL_COMPILER==1800) # if (__INTEL_COMPILER_UPDATE<2) # pragma optimize ("", on) # endif # endif #endif void softMaskOutsideMap(MultidimArray &vol, MultidimArray &msk, bool invert_mask) { if (msk.computeMax() > 1. || msk.computeMin() < 0.) { std::cerr << " msk.computeMax()= " << msk.computeMax() << " msk.computeMin()= " << msk.computeMin() << std::endl; REPORT_ERROR("ERROR: Values in the solvent mask should be between zero and one."); } if (!(msk.sameShape(vol))) REPORT_ERROR("ERROR: Solvent mask does not have the same size as the reference vol."); // Replace solvent by the average value in the solvent region RFLOAT sum = 0.; RFLOAT sum_bg = 0.; RFLOAT solv; FOR_ALL_DIRECT_ELEMENTS_IN_ARRAY3D(msk) { solv = (invert_mask) ? DIRECT_A3D_ELEM(msk, k, i, j) : 1. - DIRECT_A3D_ELEM(msk, k, i, j); sum += solv; sum_bg += solv * DIRECT_A3D_ELEM(vol, k, i, j); } sum_bg /= sum; FOR_ALL_DIRECT_ELEMENTS_IN_ARRAY3D(msk) { solv = (invert_mask) ? DIRECT_A3D_ELEM(msk, k, i, j) : 1. - DIRECT_A3D_ELEM(msk, k, i, j); DIRECT_A3D_ELEM(vol, k, i, j) = ( 1. - solv) * DIRECT_A3D_ELEM(vol, k, i, j) + solv * sum_bg; } } void autoMask(MultidimArray &img_in, MultidimArray &msk_out, RFLOAT ini_mask_density_threshold, RFLOAT extend_ini_mask, RFLOAT width_soft_mask_edge, bool verb, int n_threads) { MultidimArray msk_cp; int barstep, update_bar, totalbar; // Resize output mask img_in.setXmippOrigin(); msk_out.clear(); msk_out.resize(img_in); // A. Calculate initial binary mask based on density threshold FOR_ALL_DIRECT_ELEMENTS_IN_MULTIDIMARRAY(img_in) { if (DIRECT_MULTIDIM_ELEM(img_in, n) >= ini_mask_density_threshold) DIRECT_MULTIDIM_ELEM(msk_out, n) = 1.; else DIRECT_MULTIDIM_ELEM(msk_out, n) = 0.; } // B. extend/shrink initial binary mask. To save memory store a temporary copy of Im in I1 if (extend_ini_mask > 0. || extend_ini_mask < 0.) { if (verb) { if (extend_ini_mask > 0.) std::cout << "== Extending initial binary mask ..." << std::endl; else std::cout << "== Shrinking initial binary mask ..." << std::endl; init_progress_bar(MULTIDIM_SIZE(img_in) / n_threads); barstep = MULTIDIM_SIZE(img_in) / 120 / n_threads; update_bar = 0; totalbar =0; } int extend_size = ABS(CEIL(extend_ini_mask)); RFLOAT extend_ini_mask2 = extend_ini_mask * extend_ini_mask; msk_cp = msk_out; if (extend_ini_mask > 0.) { #pragma omp parallel for num_threads(n_threads) FOR_ALL_ELEMENTS_IN_ARRAY3D(msk_cp) { // only extend zero values to 1. if (A3D_ELEM(msk_cp, k, i, j) < 0.001) { bool already_done = false; for (long int kp = k - extend_size; kp <= k + extend_size; kp++) { for (long int ip = i - extend_size; ip <= i + extend_size; ip++) { for (long int jp = j - extend_size; jp <= j + extend_size; jp++) { if ((kp >= STARTINGZ(msk_cp) && kp <= FINISHINGZ(msk_cp)) && (ip >= STARTINGY(msk_cp) && ip <= FINISHINGY(msk_cp)) && (jp >= STARTINGX(msk_cp) && jp <= FINISHINGX(msk_cp))) { // only check distance if neighbouring Im() is one if (A3D_ELEM(msk_cp, kp, ip, jp) > 0.999) { RFLOAT r2 = (RFLOAT)( (kp-k)*(kp-k) + (ip-i)*(ip-i)+ (jp-j)*(jp-j) ); // Set original voxel to 1 if a neghouring with Im()=1 is within distance extend_ini_mask if (r2 < extend_ini_mask2) { A3D_ELEM(msk_out, k, i, j) = 1.; already_done = true; } } } if (already_done) break; } if (already_done) break; } if (already_done) break; } } if (verb && omp_get_thread_num() == 0) { if (update_bar > barstep) { update_bar = 0; progress_bar(totalbar); } update_bar++; totalbar++; } } } else { #pragma omp parallel for num_threads(n_threads) FOR_ALL_ELEMENTS_IN_ARRAY3D(msk_cp) { // only extend one values to zero. if (A3D_ELEM(msk_cp, k, i, j) > 0.999) { bool already_done = false; for (long int kp = k - extend_size; kp <= k + extend_size; kp++) { for (long int ip = i - extend_size; ip <= i + extend_size; ip++) { for (long int jp = j - extend_size; jp <= j + extend_size; jp++) { if ((kp >= STARTINGZ(msk_cp) && kp <= FINISHINGZ(msk_cp)) && (ip >= STARTINGY(msk_cp) && ip <= FINISHINGY(msk_cp)) && (jp >= STARTINGX(msk_cp) && jp <= FINISHINGX(msk_cp))) { // only check distance if neighbouring Im() is one if (A3D_ELEM(msk_cp, kp, ip, jp) < 0.001) { RFLOAT r2 = (RFLOAT)( (kp-k)*(kp-k) + (ip-i)*(ip-i)+ (jp-j)*(jp-j) ); // Set original voxel to 1 if a neghouring with Im()=1 is within distance extend_ini_mask if (r2 < extend_ini_mask2) { A3D_ELEM(msk_out, k, i, j) = 0.; already_done = true; } } } if (already_done) break; } if (already_done) break; } if (already_done) break; } } if (verb && omp_get_thread_num() == 0) { if (update_bar > barstep) { update_bar = 0; progress_bar(totalbar); } update_bar++; totalbar++; } } } if (verb) progress_bar(MULTIDIM_SIZE(msk_out) / n_threads); } if (width_soft_mask_edge > 0.) { if (verb) { std::cout << "== Making a soft edge on the extended mask ..." << std::endl; init_progress_bar(MULTIDIM_SIZE(msk_out) / n_threads); barstep = MULTIDIM_SIZE(msk_out) / 120 / n_threads; update_bar = 0; totalbar =0; } // C. Make a soft edge to the mask // Note that the extended mask is now in I1, and we'll put the soft-edge mask again into Im msk_cp = msk_out; int extend_size = CEIL(width_soft_mask_edge); RFLOAT width_soft_mask_edge2 = width_soft_mask_edge * width_soft_mask_edge; #pragma omp parallel for num_threads(n_threads) FOR_ALL_ELEMENTS_IN_ARRAY3D(msk_cp) { // only extend zero values to values between 0 and 1. if (A3D_ELEM(msk_cp, k, i, j) < 0.001) { RFLOAT min_r2 = 9999.; for (long int kp = k - extend_size; kp <= k + extend_size; kp++) { for (long int ip = i - extend_size; ip <= i + extend_size; ip++) { for (long int jp = j - extend_size; jp <= j + extend_size; jp++) { if ((kp >= STARTINGZ(msk_cp) && kp <= FINISHINGZ(msk_cp)) && (ip >= STARTINGY(msk_cp) && ip <= FINISHINGY(msk_cp)) && (jp >= STARTINGX(msk_cp) && jp <= FINISHINGX(msk_cp))) { // only update distance to a neighbouring msk_cp is one if (A3D_ELEM(msk_cp, kp, ip, jp) > 0.999) { RFLOAT r2 = (RFLOAT)( (kp-k)*(kp-k) + (ip-i)*(ip-i)+ (jp-j)*(jp-j) ); // Set original voxel to 1 if a neghouring with Im()=1 is within distance extend_ini_mask if (r2 < min_r2) min_r2 = r2; } } } } } if (min_r2 < width_soft_mask_edge2) { A3D_ELEM(msk_out, k, i, j) = 0.5 + 0.5 * cos( PI * sqrt(min_r2) / width_soft_mask_edge); } } if (verb && omp_get_thread_num() == 0) { if (update_bar > barstep) { update_bar = 0; progress_bar(totalbar); } update_bar++; totalbar++; } } if (verb) progress_bar(MULTIDIM_SIZE(msk_cp) / n_threads); } } void raisedCosineMask(MultidimArray &mask, RFLOAT radius, RFLOAT radius_p, int x, int y, int z) { mask.setXmippOrigin(); FOR_ALL_ELEMENTS_IN_ARRAY3D(mask) { // calculate distance from the origin RFLOAT d = sqrt((RFLOAT)((z-k)*(z-k) + (y-i)*(y-i) + (x-j)*(x-j))); if (d > radius_p) A3D_ELEM(mask, k, i, j) = 0.; else if (d < radius) A3D_ELEM(mask, k, i, j) = 1.; else A3D_ELEM(mask, k, i, j) = 0.5 - 0.5 * cos(PI * (radius_p - d) / (radius_p - radius)); } } void raisedCrownMask(MultidimArray &mask, RFLOAT inner_radius, RFLOAT outer_radius, RFLOAT width, RFLOAT x, RFLOAT y, RFLOAT z) { RFLOAT inner_border = inner_radius - width; RFLOAT outer_border = outer_radius + width; mask.setXmippOrigin(); FOR_ALL_ELEMENTS_IN_ARRAY3D(mask) { RFLOAT d = sqrt((RFLOAT)((z-k)*(z-k) + (y-i)*(y-i) + (x-j)*(x-j))); if (d < inner_border) A3D_ELEM(mask, k, i, j) = 0.; else if (d < inner_radius) A3D_ELEM(mask, k, i, j) = 0.5 - 0.5 * cos(PI * (d - inner_border) / width); else if (d < outer_radius) A3D_ELEM(mask, k, i, j) = 1.; else if (d < outer_border) A3D_ELEM(mask, k, i, j) = 0.5 - 0.5 * cos(PI * (outer_border - d) / width); else A3D_ELEM(mask, k, i, j) = 0.; } } relion-3.1.3/src/mask.h000066400000000000000000000053201411340063500146760ustar00rootroot00000000000000/*************************************************************************** * * Author: "Sjors H.W. Scheres" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #ifndef MASK_H_ #define MASK_H_ #include "src/multidim_array.h" #include "src/fftw.h" #include "src/time.h" #include "src/euler.h" #include "src/macros.h" // Mask out corners outside sphere (replace by average value) // Apply a soft mask (raised cosine with cosine_width pixels width) void softMaskOutsideMap( MultidimArray &vol, RFLOAT radius = -1., RFLOAT cosine_width = 3, MultidimArray *Mnoise = NULL); // May27,2015 - Shaoda, Helical refinement void softMaskOutsideMapForHelix( MultidimArray &vol, RFLOAT psi_deg, RFLOAT tilt_deg, RFLOAT mask_sphere_radius_pix, RFLOAT mask_cyl_radius_pix, RFLOAT cosine_width = 3, MultidimArray *Mnoise = NULL); // Apply a soft mask and set density outside the mask at the average value of those pixels in the original map void softMaskOutsideMap(MultidimArray &vol, MultidimArray &msk, bool invert_mask = false); // Make an automated mask, based on: // 1. initial binarization (based on ini_mask_density_threshold) // 2. Growing extend_ini_mask in all directions // 3. Putting a raised-cosine edge on the mask with width width_soft_mask_edge // If verb, then output description of steps and progress bars void autoMask(MultidimArray &img_in, MultidimArray &msk_out, RFLOAT ini_mask_density_threshold, RFLOAT extend_ini_mask, RFLOAT width_soft_mask_edge, bool verb = false, int n_threads = 1); // Fills mask with a soft-edge circular mask (soft-edge in between radius and radius_p), centred at (x, y, z) void raisedCosineMask(MultidimArray &mask, RFLOAT radius, RFLOAT radius_p, int x, int y, int z = 0); // Make a raised crown mask. void raisedCrownMask(MultidimArray &mask, RFLOAT inner_radius, RFLOAT outer_radius, RFLOAT width, RFLOAT x, RFLOAT y, RFLOAT z); #endif /* MASK_H_ */ relion-3.1.3/src/matrix1d.cpp000066400000000000000000000053721411340063500160360ustar00rootroot00000000000000/*************************************************************************** * * Author: "Sjors H.W. Scheres" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ /*************************************************************************** * * Authors: Carlos Oscar S. Sorzano (coss@cnb.csic.es) * * Unidad de Bioinformatica of Centro Nacional de Biotecnologia , CSIC * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA * 02111-1307 USA * * All comments concerning this program package may be sent to the * e-mail address 'xmipp@cnb.csic.es' ***************************************************************************/ #include "src/matrix1d.h" Matrix1D vectorR2(RFLOAT x, RFLOAT y) { Matrix1D result(2); result( 0) = x; result( 1) = y; return result; } Matrix1D vectorR3(RFLOAT x, RFLOAT y, RFLOAT z) { Matrix1D result(3); result( 0) = x; result( 1) = y; result( 2) = z; return result; } // This function only makes sense after all code has been modified with 'sed' to allow single-precision runs #ifdef RELION_SINGLE_PRECISION Matrix1D vectorR3(double xx, double yy, double zz) { return vectorR3((float)xx, (float)yy, (float)zz); } #endif Matrix1D vectorR3(int x, int y, int z) { Matrix1D result(3); result( 0) = x; result( 1) = y; result( 2) = z; return result; } relion-3.1.3/src/matrix1d.h000066400000000000000000000757351411340063500155150ustar00rootroot00000000000000/*************************************************************************** * * Author: "Sjors H.W. Scheres" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ /*************************************************************************** * * Authors: Carlos Oscar S. Sorzano (coss@cnb.csic.es) * * Unidad de Bioinformatica of Centro Nacional de Biotecnologia , CSIC * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA * 02111-1307 USA * * All comments concerning this program package may be sent to the * e-mail address 'xmipp@cnb.csic.es' ***************************************************************************/ #ifndef MATRIX1D_H_ #define MATRIX1D_H_ #include "src/funcs.h" #include "src/filename.h" extern int bestPrecision(float F, int _width); extern std::string floatToString(float F, int _width, int _prec); template class Matrix2D; /** @defgroup Vectors Matrix1D Vectors * @ingroup DataLibrary */ //@{ /** @name Vectors speed up macros * * This macros are defined to allow high speed in critical parts of your * program. They shouldn't be used systematically as usually there is no * checking on the correctness of the operation you are performing. Speed comes * from three facts: first, they are macros and no function call is performed * (although most of the critical functions are inline functions), there is no * checking on the correctness of the operation (it could be wrong and you are * not warned of it), and destination vectors are not returned saving time in * the copy constructor and in the creation/destruction of temporary vectors. */ //@{ /** Array access. * This macro gives you access to the array (T) */ #define MATRIX1D_ARRAY(v) ((v).vdata) /** For all elements in the array * This macro is used to generate loops for the vector in an easy manner. It * defines an internal index 'i' which ranges the vector using its mathematical * definition (ie, logical access). * * @code * FOR_ALL_ELEMENTS_IN_MATRIX1D(v) * { * std::cout << v(i) << " "; * } * @endcode */ #define FOR_ALL_ELEMENTS_IN_MATRIX1D(v) \ for (int i=0; i v(2); * VECTOR_R2(v, 1, 2); * @endcode */ #define VECTOR_R2(v, x, y) { \ XX(v) = x; YY(v) = y; } /** Creates vector in R3 * The vector must be created beforehand to the correct size. After this macro * the vector is (x, y, z) in R3. * * @code * MultidimArray< RFLOAT > v(3); * VECTOR_R2(v, 1, 2, 1); * @endcode */ #define VECTOR_R3(v, x, y, z) { \ XX(v) = x; YY(v) = y; ZZ(v) = z;} /** Adding two R2 vectors (a=b+c) * @code * MultidimArray< RFLOAT > a(2), b(2), c(2); * ...; * V2_PLUS_V2(a, b, c); * @endcode */ #define V2_PLUS_V2(a, b, c) { \ XX(a) = XX(b) + XX(c); \ YY(a) = YY(b) + YY(c); } /** Substracting two R2 vectors (a=b-c) * @code * MultidimArray< RFLOAT > a(2), b(2), c(2); * ...; * V2_MINUS_V2(a, b, c); * @endcode */ #define V2_MINUS_V2(a, b, c) { \ XX(a) = XX(b) - XX(c); \ YY(a) = YY(b) - YY(c); } /** Adding/substracting a constant to a R2 vector (a=b-k). * @code * MultidimArray< RFLOAT > a(2), b(2); * RFLOAT k; * ...; * V2_PLUS_CT(a, b, k); * * MultidimArray< RFLOAT > a(2), b(2); * RFLOAT k; * ...; * V2_PLUS_CT(a, b, -k); * @endcode */ #define V2_PLUS_CT(a, b, k) { \ XX(a) = XX(b) + (k); \ YY(a) = YY(b) + (k); } /** Multiplying/dividing by a constant a R2 vector (a=b*k) * @code * MultidimArray< RFLOAT > a(2), b(2); * RFLOAT k; * ...; * V2_BY_CT(a, b, k); * * MultidimArray< RFLOAT > a(2), b(2); * RFLOAT k; * ...; * V2_BY_CT(a, b, 1/k); * @endcode */ #define V2_BY_CT(a, b, k) { \ XX(a) = XX(b) * (k); \ YY(a) = YY(b) * (k); } /** Adding two R3 vectors (a=b+c) * @code * MultidimArray< RFLOAT > a(3), b(3), c(3); * ...; * V3_PLUS_V3(a, b, c); * @endcode */ #define V3_PLUS_V3(a, b, c) { \ XX(a) = XX(b) + XX(c); \ YY(a) = YY(b) + YY(c); \ ZZ(a) = ZZ(b) + ZZ(c); } /** Substracting two R3 vectors (a=b-c) * @code * MultidimArray< RFLOAT > a(3), b(3), c(3); * ...; * V3_MINUS_V3(a, b, c); * @endcode */ #define V3_MINUS_V3(a, b, c) { \ XX(a) = XX(b) - XX(c); \ YY(a) = YY(b) - YY(c); \ ZZ(a) = ZZ(b) - ZZ(c); } /** Adding/substracting a constant to a R3 vector (a=b-k) * @code * MultidimArray< RFLOAT > a(3), b(3); * RFLOAT k; * ...; * V3_PLUS_CT(a, b, k); * * MultidimArray< RFLOAT > a(3), b(3); * RFLOAT k; * ...; * V3_PLUS_CT(a, b, -k); * @endcode */ #define V3_PLUS_CT(a, b, c) { \ XX(a) = XX(b) + (c); \ YY(a) = YY(b) + (c); \ ZZ(a) = ZZ(b) + (c); } /** Multiplying/dividing by a constant a R3 vector (a=b*k) * @code * MultidimArray< RFLOAT > a(3), b(3); * RFLOAT k; * ...; * V3_BY_CT(a, b, k); * * MultidimArray< RFLOAT > a(3), b(3); * RFLOAT k; * ...; * V3_BY_CT(a, b, 1/k); * @endcode */ #define V3_BY_CT(a, b, c) { \ XX(a) = XX(b) * (c); \ YY(a) = YY(b) * (c); \ ZZ(a) = ZZ(b) * (c); } /** Direct access to vector element */ #define VEC_ELEM(v,i) ((v).vdata[(i)]) //@} /** Matrix1D class.*/ template class Matrix1D { public: /// The array itself T* vdata; /// Destroy data bool destroyData; /// Number of elements int vdim; /// <0=column vector (default), 1=row vector bool row; /// @name Constructors //@{ /** Empty constructor * * The empty constructor creates a vector with no memory associated, * origin=0, size=0, no statistics, ... You can choose between a column * vector (by default), or a row one. * * @code * Matrix1D< RFLOAT > v1; * Matrix1D< RFLOAT > v1(true); * // both are examples of empty column vectors * * Matrix1D< int > v1(false); * // empty row vector * @endcode */ Matrix1D(bool column = true) { coreInit(); row = ! column; } /** Dimension constructor * * The dimension constructor creates a vector with memory associated (but * not assigned to anything, could be full of garbage) origin=0, size=the * given one. You can choose between a column vector (by default), or a row * one. * * @code * Matrix1D< RFLOAT > v1(6); * Matrix1D< RFLOAT > v1(6, 'y'); * // both are examples of column vectors of dimensions 6 * * Matrix1D< int > v1('n'); * // empty row vector * @endcode */ Matrix1D(int dim, bool column = true) { coreInit(); row = ! column; resize(dim); } /** Copy constructor * * The created vector is a perfect copy of the input vector but with a * different memory assignment. * * @code * Matrix1D< RFLOAT > v2(v1); * @endcode */ Matrix1D(const Matrix1D& v) { coreInit(); *this = v; } /** Destructor. */ ~Matrix1D() { coreDeallocate(); } /** Assignment. * * You can build as complex assignment expressions as you like. Multiple * assignment is allowed. * * @code * v1 = v2 + v3; * v1 = v2 = v3; * @endcode */ Matrix1D& operator=(const Matrix1D& op1) { if (&op1 != this) { resize(op1); for (int i = 0; i < vdim; i++) vdata[i] = op1.vdata[i]; row=op1.row; } return *this; } //@} /// @name Core memory operations for Matrix1D //@{ /** Clear. */ void clear() { coreDeallocate(); coreInit(); } /** Core init. * Initialize everything to 0 */ void coreInit() { vdim=0; row=false; vdata=NULL; destroyData=true; } /** Core allocate. */ inline void coreAllocate(int _vdim) { if (_vdim<=0) { clear(); return; } vdim=_vdim; vdata = new T [vdim]; if (vdata == NULL) REPORT_ERROR("Allocate: No space left"); } /** Core deallocate. * Free all vdata. */ inline void coreDeallocate() { if (vdata != NULL && destroyData) delete[] vdata; vdata=NULL; } //@} ///@name Size and shape of Matrix1D //@{ /** Resize to a given size * * This function resize the actual array to the given size. The origin is * not modified. If the actual array is larger than the pattern then the * values outside the new size are lost, if it is smaller then 0's are * added. An exception is thrown if there is no memory. * * @code * V1.resize(3, 3, 2); * @endcode */ inline void resize(int Xdim) { if (Xdim == vdim) return; if (Xdim <= 0) { clear(); return; } T * new_vdata; try { new_vdata = new T [Xdim]; } catch (std::bad_alloc &) { REPORT_ERROR("Allocate: No space left"); } // Copy needed elements, fill with 0 if necessary for (int j = 0; j < Xdim; j++) { T val; if (j >= vdim) val = 0; else val = vdata[j]; new_vdata[j] = val; } // deallocate old vector coreDeallocate(); // assign *this vector to the newly created vdata = new_vdata; vdim = Xdim; } /** Resize according to a pattern. * * This function resize the actual array to the same size * as the input pattern. If the actual array is larger than the pattern * then the trailing values are lost, if it is smaller then 0's are * added at the end * * @code * v2.resize(v1); * // v2 has got now the same structure as v1 * @endcode */ template void resize(const Matrix1D &v) { if (vdim != v.vdim) resize(v.vdim); } /** Same shape. * * Returns true if this object has got the same shape (origin and size) * than the argument */ template bool sameShape(const Matrix1D& op) const { return (vdim == op.vdim); } /** Returns the size of this vector * * @code * int nn = a.size(); * @endcode */ inline int size() const { return vdim; } /** True if vector is a row. * * @code * if (v.isRow()) * std::cout << "v is a row vector\n"; * @endcode */ int isRow() const { return row; } /** True if vector is a column * * @code * if (v.isCol()) * std::cout << "v is a column vector\n"; * @endcode */ int isCol() const { return !row; } /** Forces the vector to be a row vector * * @code * v.setRow(); * @endcode */ void setRow() { row = true; } /** Forces the vector to be a column vector * * @code * v.setCol(); * @endcode */ void setCol() { row = false; } //@} /// @name Initialization of Matrix1D values //@{ /** Same value in all components. * * The constant must be of a type compatible with the array type, ie, * you cannot assign a RFLOAT to an integer array without a casting. * It is not an error if the array is empty, then nothing is done. * * @code * v.initConstant(3.14); * @endcode */ void initConstant(T val) { for (int j = 0; j < vdim; j++) { vdata[j] = val; } } /** Initialize to zeros with current size. * * All values are set to 0. The current size and origin are kept. It is not * an error if the array is empty, then nothing is done. * * @code * v.initZeros(); * @endcode */ void initZeros() { memset(vdata,0,vdim*sizeof(T)); } /** Initialize to zeros with a given size. */ void initZeros(int Xdim) { if (vdim!=Xdim) resize(Xdim); memset(vdata,0,vdim*sizeof(T)); } /** Initialize to zeros following a pattern. * * All values are set to 0, and the origin and size of the pattern are * adopted. * * @code * v2.initZeros(v1); * @endcode */ template void initZeros(const Matrix1D& op) { if (vdim!=op.vdim) resize(op); memset(vdata,0,vdim*sizeof(T)); } //@} /// @name Matrix1D operators //@{ /** v3 = v1 * k. */ Matrix1D operator*(T op1) const { Matrix1D tmp(*this); for (int i=0; i < vdim; i++) tmp.vdata[i] = vdata[i] * op1; return tmp; } /** v3 = v1 / k. */ Matrix1D operator/(T op1) const { Matrix1D tmp(*this); for (int i=0; i < vdim; i++) tmp.vdata[i] = vdata[i] / op1; return tmp; } /** v3 = v1 + k. */ Matrix1D operator+(T op1) const { Matrix1D tmp(*this); for (int i=0; i < vdim; i++) tmp.vdata[i] = vdata[i] + op1; return tmp; } /** v3 = v1 - k. */ Matrix1D operator-(T op1) const { Matrix1D tmp(*this); for (int i=0; i < vdim; i++) tmp.vdata[i] = vdata[i] - op1; return tmp; } /** v3 = k * v2. */ friend Matrix1D operator*(T op1, const Matrix1D& op2) { Matrix1D tmp(op2); for (int i=0; i < op2.vdim; i++) tmp.vdata[i] = op1 * op2.vdata[i]; return tmp; } /** v3 = k / v2. */ friend Matrix1D operator/(T op1, const Matrix1D& op2) { Matrix1D tmp(op2); for (int i=0; i < op2.vdim; i++) tmp.vdata[i] = op1 / op2.vdata[i]; return tmp; } /** v3 = k + v2. */ friend Matrix1D operator+(T op1, const Matrix1D& op2) { Matrix1D tmp(op2); for (int i=0; i < op2.vdim; i++) tmp.vdata[i] = op1 + op2.vdata[i]; return tmp; } /** Vector summation * * @code * A += B; * @endcode */ void operator+=(const Matrix1D& op1) const { if (vdim != op1.vdim) REPORT_ERROR("Not same sizes in vector summation"); for (int i = 0; i < vdim; i++) vdata[i] += op1.vdata[i]; } /** v3 = k - v2. */ friend Matrix1D operator-(T op1, const Matrix1D& op2) { Matrix1D tmp(op2); for (int i=0; i < op2.vdim; i++) tmp.vdata[i] = op1 - op2.vdata[i]; return tmp; } /** Vector substraction * * @code * A -= B; * @endcode */ void operator-=(const Matrix1D& op1) const { if (vdim != op1.vdim) REPORT_ERROR("Not same sizes in vector summation"); for (int i = 0; i < vdim; i++) vdata[i] -= op1.vdata[i]; } /** v3 *= k. */ void operator*=(T op1) { for (int i=0; i < vdim; i++) vdata[i] *= op1; } /** v3 /= k. */ void operator/=(T op1) { for (int i=0; i < vdim; i++) vdata[i] /= op1; } /** v3 += k. */ void operator+=(T op1) { for (int i=0; i < vdim; i++) vdata[i] += op1; } /** v3 -= k. */ void operator-=(T op1) { for (int i=0; i < vdim; i++) vdata[i] -= op1; } /** v3 = v1 * v2. */ Matrix1D operator*(const Matrix1D& op1) const { if (vdim != op1.vdim) REPORT_ERROR("Not same sizes in vector multiplication"); Matrix1D tmp(op1); for (int i=0; i < vdim; i++) tmp.vdata[i] = vdata[i] * op1.vdata[i]; return tmp; } /** v3 = v1 / v2. */ Matrix1D operator/(const Matrix1D& op1) const { if (vdim != op1.vdim) REPORT_ERROR("Not same sizes in vector division"); Matrix1D tmp(op1); for (int i=0; i < vdim; i++) tmp.vdata[i] = vdata[i] / op1.vdata[i]; return tmp; } /** v3 = v1 + v2. */ Matrix1D operator+(const Matrix1D& op1) const { if (vdim != op1.vdim) REPORT_ERROR("Not same sizes in vector summation"); Matrix1D tmp(op1); for (int i=0; i < vdim; i++) tmp.vdata[i] = vdata[i] + op1.vdata[i]; return tmp; } /** v3 = v1 - v2. */ Matrix1D operator-(const Matrix1D& op1) const { if (vdim != op1.vdim) REPORT_ERROR("Not same sizes in vector subtraction"); Matrix1D tmp(op1); for (int i=0; i < vdim; i++) tmp.vdata[i] = vdata[i] - op1.vdata[i]; return tmp; } /** v3 *= v2. */ void operator*=(const Matrix1D& op1) { if (vdim != op1.vdim) REPORT_ERROR("Not same sizes in vector multiplication"); for (int i=0; i < vdim; i++) vdata[i] *= op1.vdata[i]; } /** v3 /= v2. */ void operator/=(const Matrix1D& op1) { if (vdim != op1.vdim) REPORT_ERROR("Not same sizes in vector division"); for (int i=0; i < vdim; i++) vdata[i] /= op1.vdata[i]; } /** v3 += v2. */ void operator+=(const Matrix1D& op1) { if (vdim != op1.vdim) REPORT_ERROR("Not same sizes in vector summation"); for (int i=0; i < vdim; i++) vdata[i] += op1.vdata[i]; } /** v3 -= v2. */ void operator-=(const Matrix1D& op1) { if (vdim != op1.vdim) REPORT_ERROR("Not same sizes in vector subtraction"); for (int i=0; i < vdim; i++) vdata[i] -= op1.vdata[i]; } /** Unary minus. * * It is used to build arithmetic expressions. You can make a minus * of anything as long as it is correct semantically. * * @code * v1 = -v2; * v1 = -v2.transpose(); * @endcode */ Matrix1D operator-() const { Matrix1D tmp(*this); for (int i=0; i < vdim; i++) tmp.vdata[i] = - vdata[i]; return tmp; } /** Vector by matrix * * Algebraic vector by matrix multiplication. This function is actually * implemented in xmippMatrices2D */ Matrix1D operator*(const Matrix2D& M); /** Vector element access * * Returns the value of a vector logical position. In our example we could * access from v(-2) to v(2). The elements can be used either by value or by * reference. * * @code * v(-2) = 1; * val = v(-2); * @endcode */ T& operator()(int i) const { return vdata[i]; } //@} /// @name Utilities for Matrix1D //@{ /** Produce a vector suitable for working with Numerical Recipes * * This function must be used only as a preparation for routines which need * that the first physical index is 1 and not 0 as it usually is in C. In * fact the vector provided for Numerical recipes is exactly this same one * but with the indexes changed. * * This function is not ported to Python. */ T* adaptForNumericalRecipes() const { return MATRIX1D_ARRAY(*this) - 1; } /** Kill an array produced for Numerical Recipes. * * Nothing needs to be done in fact. * * This function is not ported to Python. */ void killAdaptationForNumericalRecipes(T* m) const {} /** CEILING * * Applies a CEILING (look for the nearest larger integer) to each * array element. */ void selfCEIL() { for (int i=0; i < vdim; i++) vdata[i] = CEIL(vdata[i]); } /** FLOOR * * Applies a FLOOR (look for the nearest larger integer) to each * array element. */ void selfFLOOR() { for (int i=0; i < vdim; i++) vdata[i] = FLOOR(vdata[i]); } /** ROUND * * Applies a ROUND (look for the nearest larger integer) to each * array element. */ void selfROUND() { for (int i=0; i < vdim; i++) vdata[i] = ROUND(vdata[i]); } /** Index for the maximum element. * * This function returns the index of the maximum element of an matrix1d. * Returns -1 if the array is empty */ void maxIndex(int& jmax) const { if (vdim == 0) { jmax = -1; return; } jmax = 0; T maxval = (*this)(0); for (int j = 0; j < vdim; j++) if ( (*this)(j) > maxval ) jmax =j; } /** Index for the minimum element. * * This function returns the index of the minimum element of an matrix1d. * Returns -1 if the array is empty */ void minIndex(int& jmin) const { if (vdim == 0) { jmin = -1; return; } jmin = 0; T minval = (*this)(0); for (int j = 0; j < vdim; j++) if ( (*this)(j) < minval ) jmin =j; } /** Algebraic transpose of vector * * You can use the transpose in as complex expressions as you like. The * origin of the vector is not changed. * * @code * v2 = v1.transpose(); * @endcode */ Matrix1D transpose() const { Matrix1D temp(*this); temp.selfTranspose(); return temp; } /** Algebraic transpose of vector * * The same as before but the result is stored in this same object. */ void selfTranspose() { row = !row; } /** Sum of vector values. * * This function returns the sum of all internal values. * * @code * RFLOAT sum = m.sum(); * @endcode */ RFLOAT sum(bool average=false) const { RFLOAT sum = 0; for (int j = 0; j < vdim; j++) { sum += vdata[j]; } if (average) return sum/(RFLOAT)vdim; else return sum; } /** Sum of squared vector values. * * This function returns the sum of all internal values to the second * power_class. * * @code * RFLOAT sum2 = m.sum2(); * @endcode */ RFLOAT sum2() const { RFLOAT sum = 0; for (int j = 0; j < vdim; j++) { sum += vdata[j] * vdata[j]; } return sum; } /** Module of the vector * * This module is defined as the square root of the sum of the squared * components. Euclidean norm of the vector. * * @code * RFLOAT mod = v.module(); * @endcode */ RFLOAT module() const { return sqrt(sum2()); } /** Angle of the vector * * Supposing this vector is in R2 this function returns the angle of this * vector with X axis, ie, atan2(YY(v), XX(v)) */ RFLOAT angle() { return atan2((RFLOAT) YY(*this), (RFLOAT) XX(*this)); } /** Normalize this vector, store the result here */ void selfNormalize() { RFLOAT m = module(); if (ABS(m) > XMIPP_EQUAL_ACCURACY) { T im=(T) (1.0/m); *this *= im; } else initZeros(); } /** Reverse vector values, keep in this object. */ void selfReverse() { for (int j = 0; j <= (int)(vdim - 1) / 2; j++) { T aux; SWAP(vdata[j], vdata[vdim-1-j], aux); } } /** Compute numerical derivative * * The numerical derivative is of the same size as the input vector. * However, the first two and the last two samples are set to 0, * because the numerical method is not able to correctly estimate the * derivative there. */ void numericalDerivative(Matrix1D &result) { const RFLOAT i12=1.0/12.0; result.initZeros(*this); for (int i=STARTINGX(*this)+2; i<=FINISHINGX(*this)-2; i++) result(i)=i12*(-(*this)(i+2)+8*(*this)(i+1) -8*(*this)(i-1)+(*this)(i+2)); } /** Output to output stream.*/ friend std::ostream& operator<<(std::ostream& ostrm, const Matrix1D& v) { if (v.vdim == 0) ostrm << "NULL Array\n"; else ostrm << std::endl; RFLOAT max_val = ABS(v.vdata[0]); for (int j = 0; j < v.vdim; j++) { max_val = XMIPP_MAX(max_val, v.vdata[j]); } int prec = bestPrecision(max_val, 10); for (int j = 0; j < v.vdim; j++) { ostrm << floatToString((RFLOAT) v.vdata[j], 10, prec) << std::endl; } return ostrm; } //@} }; /**@name Vector Related functions * These functions are not methods of Matrix1D */ /** Creates vector in R2. * After this function the vector is (x,y) in R2. * * @code * Matrix1D< RFLOAT > v = vectorR2(1, 2); * @endcode */ Matrix1D< RFLOAT > vectorR2(RFLOAT x, RFLOAT y); /** Creates vector in R3. * After this function the vector is (x,y,z) in R3. * * @code * Matrix1D< RFLOAT > v = vectorR2(1, 2, 1); * @endcode */ Matrix1D< RFLOAT > vectorR3(RFLOAT x, RFLOAT y, RFLOAT z); // This function is only needed for single-precision compilation #ifdef RELION_SINGLE_PRECISION Matrix1D< float > vectorR3(double xx, double yy, double zz); #endif /** Creates an integer vector in Z3. */ Matrix1D< int > vectorR3(int x, int y, int z); /** Dot product. * Given any two vectors in Rn (n-dimensional vector), this function returns the * dot product of both. If the vectors are not of the same size or shape then an * exception is thrown. The dot product is defined as the sum of the component * by component multiplication. * * For the R3 vectors (V1x,V1y,V1z), (V2x, V2y, V2z) the result is V1x*V2x + * V1y*V2y + V1z*V2z. * * @code * Matrix1D< RFLOAT > v1(1000); * v1.init_random(0, 10, "gaussian"); * std::cout << "The power_class of this vector should be 100 and is " << * dotProduct(v1, v1) << std::endl; * @endcode */ template T dotProduct(const Matrix1D< T >& v1, const Matrix1D< T >& v2) { if (!v1.sameShape(v2)) REPORT_ERROR("Dot product: vectors of different size or shape"); T accumulate = 0; for (int j = 0; j < v1.vdim; j++) { accumulate += v1.vdata[j] * v2.vdata[j]; } return accumulate; } /** Vector product in R3. * This function takes two R3 vectors and compute their vectorial product. For * two vectors (V1x,V1y,V1z), (V2x, V2y, V2z) the result is (V1y*V2z-V1z*v2y, * V1z*V2x-V1x*V2z, V1x*V2y-V1y*V2x). Pay attention that this operator is not * conmutative. An exception is thrown if the vectors are not of the same shape * or they don't belong to R3. * * @code * Matrix1D< T > X = vectorR3(1, 0, 0), Y = vector_R3(0, 1, 0); * std::cout << "X*Y=Z=" << vectorProduct(X,Y).transpose() << std::endl; * @endcode */ template Matrix1D< T > vectorProduct(const Matrix1D< T >& v1, const Matrix1D< T >& v2) { if (v1.vdim != 3 || v2.vdim != 3) REPORT_ERROR("Vector_product: vectors are not in R3"); if (v1.isRow() != v2.isRow()) REPORT_ERROR("Vector_product: vectors are of different shape"); Matrix1D< T > result(3); XX(result) = YY(v1) * ZZ(v2) - ZZ(v1) * YY(v2); YY(result) = ZZ(v1) * XX(v2) - XX(v1) * ZZ(v2); ZZ(result) = XX(v1) * YY(v2) - YY(v1) * XX(v2); return result; } /** Vector product in R3. * This function computes the vector product of two R3 vectors. * No check is performed, it is assumed that the output vector * is already resized * */ template void vectorProduct(const Matrix1D< T >& v1, const Matrix1D< T >& v2, Matrix1D &result) { XX(result) = YY(v1) * ZZ(v2) - ZZ(v1) * YY(v2); YY(result) = ZZ(v1) * XX(v2) - XX(v1) * ZZ(v2); ZZ(result) = XX(v1) * YY(v2) - YY(v1) * XX(v2); } /** Sort two vectors. * v1 and v2 must be of the same shape, if not an exception is thrown. After * calling this function all components in v1 are the minimum between the * corresponding components in v1 and v2, and all components in v2 are the * maximum. * * For instance, XX(v1)=MIN(XX(v1), XX(v2)), XX(v2)=MAX(XX(v1), XX(v2)). Notice * that both vectors are modified. This function is very useful for sorting two * corners. After calling it you can certainly perform a non-empty for (from * corner1 to corner2) loop. */ template void sortTwoVectors(Matrix1D& v1, Matrix1D& v2) { T temp; if (!v1.sameShape(v2)) REPORT_ERROR("sortTwoVectors: vectors are not of the same shape"); for (int j = 0; j < v1.vdim; j++) { temp = XMIPP_MIN(v1.vdata[j], v2.vdata[j]); v2.vdata[j] = XMIPP_MAX(v1.vdata[j], v2.vdata[j]); v1.vdata[j] = temp; } } /** Conversion from one type to another. * If we have an integer array and we need a RFLOAT one, we can use this * function. The conversion is done through a type casting of each element * If n >= 0, only the nth volumes will be converted, otherwise all NSIZE volumes */ template void typeCast(const Matrix1D& v1, Matrix1D& v2) { if (v1.vdim == 0) { v2.clear(); return; } v2.resize(v1.vdim); for (int j = 0; j < v1.vdim; j++) { v2.vdata[j] = static_cast< T2 > (v1.vdata[j]); } } //@} #endif /* MATRIX1D_H_ */ relion-3.1.3/src/matrix2d.cpp000066400000000000000000000030341411340063500160300ustar00rootroot00000000000000/*************************************************************************** * * Author: "Sjors H.W. Scheres" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ /* Is diagonal ------------------------------------------------------------- */ #include "src/matrix2d.h" /* Interface to numerical recipes: svbksb ---------------------------------- */ void svbksb(Matrix2D &u, Matrix1D &w, Matrix2D &v, Matrix1D &b, Matrix1D &x) { // Call to the numerical recipes routine. Results will be stored in X svbksb(u.adaptForNumericalRecipes2(), w.adaptForNumericalRecipes(), v.adaptForNumericalRecipes2(), u.mdimy, u.mdimx, b.adaptForNumericalRecipes(), x.adaptForNumericalRecipes()); } relion-3.1.3/src/matrix2d.h000066400000000000000000001121371411340063500155020ustar00rootroot00000000000000/*************************************************************************** * * Author: "Sjors H.W. Scheres" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ /*************************************************************************** * * Authors: Carlos Oscar S. Sorzano (coss@cnb.csic.es) * * Unidad de Bioinformatica of Centro Nacional de Biotecnologia , CSIC * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA * 02111-1307 USA * * All comments concerning this program package may be sent to the * e-mail address 'xmipp@cnb.csic.es' ***************************************************************************/ #ifndef MATRIX2D_H_ #define MATRIX2D_H_ #include #include #include "src/matrix1d.h" /** @defgroup Matrices Matrix2D Matrices * @ingroup DataLibrary */ //@{ /** @name Matrices speed up macros */ //@{ /** Array access. * * This macro gives you access to the array (T) */ #define MATRIX2D_ARRAY(m) ((m).mdata) /** For all elements in the array * * This macro is used to generate loops for the matrix in an easy way. It * defines internal indexes 'i' and 'j' which ranges the matrix using its * mathematical definition (ie, logical access). * * @code * FOR_ALL_ELEMENTS_IN_MATRIX2D(m) * { * std::cout << m(i, j) << " "; * } * @endcode */ #define FOR_ALL_ELEMENTS_IN_MATRIX2D(m) \ for (int i=0; i<(m).mdimy; i++) \ for (int j=0; j<(m).mdimx; j++) /** Access to a matrix element * v is the array, i and j define the element v_ij. * * @code * MAT_ELEM(m, 0, 0) = 1; * val = MAT_ELEM(m, 0, 0); * @endcode */ #define MAT_ELEM(m,i,j) ((m).mdata[(i)*(m).mdimx+(j)]) /** X dimension of the matrix */ #define MAT_XSIZE(m) ((m).mdimx) /** Y dimension of the matrix */ #define MAT_YSIZE(m) ((m).mdimy) // Forward declarations template class Matrix1D; template class Matrix2D; template void ludcmp(const Matrix2D& A, Matrix2D& LU, Matrix1D< int >& indx, T& d); template void lubksb(const Matrix2D& LU, Matrix1D< int >& indx, Matrix1D& b); template void svdcmp(const Matrix2D< T >& a, Matrix2D< RFLOAT >& u, Matrix1D< RFLOAT >& w, Matrix2D< RFLOAT >& v); void svbksb(Matrix2D< RFLOAT >& u, Matrix1D< RFLOAT >& w, Matrix2D< RFLOAT >& v, Matrix1D< RFLOAT >& b, Matrix1D< RFLOAT >& x); template void solve(const Matrix2D& A, const Matrix1D& b, Matrix1D< RFLOAT >& result, RFLOAT tolerance); /** Matrix2D class */ template class Matrix2D { public: // The array itself T* mdata; // Destroy data bool destroyData; // Number of elements in X int mdimx; // Number of elements in Y int mdimy; // Total number of elements int mdim; /// @name Constructors /// @{ /** Empty constructor */ Matrix2D() { coreInit(); } /** Dimension constructor */ Matrix2D(int Ydim, int Xdim) { coreInit(); resize(Ydim, Xdim); } /** Copy constructor */ Matrix2D(const Matrix2D& v) { coreInit(); *this = v; } /** Destructor. */ ~Matrix2D() { coreDeallocate(); } /** Assignment. * * You can build as complex assignment expressions as you like. Multiple * assignment is allowed. * * @code * v1 = v2 + v3; * v1 = v2 = v3; * @endcode */ Matrix2D& operator=(const Matrix2D& op1) { if (&op1 != this) { if (MAT_XSIZE(*this)!=MAT_XSIZE(op1) || MAT_YSIZE(*this)!=MAT_YSIZE(op1)) resize(op1); memcpy(mdata,op1.mdata,op1.mdim*sizeof(T)); } return *this; } //@} /// @name Core memory operations for Matrix2D //@{ /** Clear. */ void clear() { coreDeallocate(); coreInit(); } /** Core init. * Initialize everything to 0 */ void coreInit() { mdimx=mdimy=mdim=0; mdata=NULL; destroyData=true; } /** Core allocate. */ void coreAllocate(int _mdimy, int _mdimx) { if (_mdimy <= 0 ||_mdimx<=0) { clear(); return; } mdimx=_mdimx; mdimy=_mdimy; mdim=_mdimx*_mdimy; mdata = new T [mdim]; if (mdata == NULL) REPORT_ERROR("coreAllocate: No space left"); } /** Core deallocate. * Free all mdata. */ void coreDeallocate() { if (mdata != NULL && destroyData) delete[] mdata; mdata=NULL; } //@} /// @name Size and shape of Matrix2D //@{ /** Resize to a given size */ void resize(int Ydim, int Xdim) { if (Xdim == mdimx && Ydim == mdimy) return; if (Xdim <= 0 || Ydim <= 0) { clear(); return; } T * new_mdata; size_t YXdim=Ydim*Xdim; try { new_mdata = new T [YXdim]; } catch (std::bad_alloc &) { REPORT_ERROR("Allocate: No space left"); } // Copy needed elements, fill with 0 if necessary for (int i = 0; i < Ydim; i++) for (int j = 0; j < Xdim; j++) { T val; if (i >= mdimy) val = 0; else if (j >= mdimx) val = 0; else val = mdata[i*mdimx + j]; new_mdata[i*Xdim+j] = val; } // deallocate old vector coreDeallocate(); // assign *this vector to the newly created mdata = new_mdata; mdimx = Xdim; mdimy = Ydim; mdim = Xdim * Ydim; } /** Resize according to a pattern. * * This function resize the actual array to the same size and origin * as the input pattern. If the actual array is larger than the pattern * then the trailing values are lost, if it is smaller then 0's are * added at the end * * @code * v2.resize(v1); * // v2 has got now the same structure as v1 * @endcode */ template void resize(const Matrix2D &v) { if (mdimx != v.mdimx || mdimy != v.mdimy) resize(v.mdimy, v.mdimx); } /** Extract submatrix and assign to this object. */ void submatrix(int i0, int j0, int iF, int jF) { if (i0 < 0 || j0 < 0 || iF >= MAT_YSIZE(*this) || jF >= MAT_XSIZE(*this)) REPORT_ERROR("Submatrix indexes out of bounds"); Matrix2D result(iF - i0 + 1, jF - j0 + 1); FOR_ALL_ELEMENTS_IN_MATRIX2D(result) MAT_ELEM(result, i, j) = MAT_ELEM(*this, i+i0, j+j0); *this = result; } /** Same shape. * * Returns true if this object has got the same shape (origin and size) * than the argument */ template bool sameShape(const Matrix2D& op) const { return ((mdimx == op.mdimx) && (mdimy == op.mdimy)); } /** X dimension * * Returns X dimension */ inline int Xdim() const { return mdimx; } /** Y dimension * * Returns Y dimension */ inline int Ydim() const { return mdimy; } //@} /// @name Initialization of Matrix2D values //@{ /** Same value in all components. * * The constant must be of a type compatible with the array type, ie, * you cannot assign a RFLOAT to an integer array without a casting. * It is not an error if the array is empty, then nothing is done. * * @code * v.initConstant(3.14); * @endcode */ void initConstant(T val) { for (int j = 0; j < mdim; j++) mdata[j] = val; } /** Initialize to zeros with current size. * * All values are set to 0. The current size and origin are kept. It is not * an error if the array is empty, then nothing is done. * * @code * v.initZeros(); * @endcode */ void initZeros() { memset(mdata,0,mdimx*mdimy*sizeof(T)); } /** Initialize to zeros with a given size. */ void initZeros(int Ydim, int Xdim) { if (mdimx!=Xdim || mdimy!=Ydim) resize(Ydim, Xdim); memset(mdata,0,mdimx*mdimy*sizeof(T)); } /** Initialize to zeros following a pattern. * * All values are set to 0, and the origin and size of the pattern are * adopted. * * @code * v2.initZeros(v1); * @endcode */ template void initZeros(const Matrix2D& op) { if (mdimx!=op.mdimx || mdimy!=op.mdimy) resize(op); memset(mdata,0,mdimx*mdimy*sizeof(T)); } /** 2D Identity matrix of current size * * If actually the matrix is not squared then an identity matrix is * generated of size (Xdim x Xdim). * * @code * m.initIdentity(); * @endcode */ void initIdentity() { initIdentity(MAT_XSIZE(*this)); } /** 2D Identity matrix of a given size * * A (dim x dim) identity matrix is generated. * * @code * m.initIdentity(3); * @endcode */ void initIdentity(int dim) { initZeros(dim, dim); for (int i = 0; i < dim; i++) MAT_ELEM(*this,i,i) = 1; } //@} /// @name Operators for Matrix2D //@{ /** Matrix element access */ T& operator()(int i, int j) { return MAT_ELEM((*this),i,j); } // for constant matrices (the compiler will pick the right version) const T& operator()(int i, int j) const { return MAT_ELEM((*this),i,j); } /** Parenthesis operator for phyton */ void setVal(T val,int y, int x) { MAT_ELEM((*this),y,x)=val; } /** Parenthesis operator for phyton */ T getVal( int y, int x) const { return MAT_ELEM((*this),y,x); } /** v3 = v1 * k. */ Matrix2D operator*(T op1) const { Matrix2D tmp(*this); for (int i=0; i < mdim; i++) tmp.mdata[i] = mdata[i] * op1; return tmp; } /** v3 = v1 / k. */ Matrix2D operator/(T op1) const { Matrix2D tmp(*this); for (int i=0; i < mdim; i++) tmp.mdata[i] = mdata[i] / op1; return tmp; } /** v3 = k * v2. */ friend Matrix2D operator*(T op1, const Matrix2D& op2) { Matrix2D tmp(op2); for (int i=0; i < op2.mdim; i++) tmp.mdata[i] = op1 * op2.mdata[i]; return tmp; } /** v3 *= k. */ void operator*=(T op1) { for (int i=0; i < mdim; i++) mdata[i] *= op1; } /** v3 /= k. */ void operator/=(T op1) { for (int i=0; i < mdim; i++) mdata[i] /= op1; } /** Matrix by vector multiplication * * @code * v2 = A*v1; * @endcode */ Matrix1D operator*(const Matrix1D& op1) const { Matrix1D result; if (mdimx != op1.size()) { std::cerr << " mdimx= " << mdimx << " opp1.size()= " << op1.size() << std::endl; REPORT_ERROR("Not compatible sizes in matrix by vector"); } if (!op1.isCol()) REPORT_ERROR("Vector is not a column"); result.initZeros(mdimy); for (int i = 0; i < mdimy; i++) for (int j = 0; j < op1.size(); j++) result(i) += (*this)(i, j) * op1(j); result.setCol(); return result; } /** Matrix by Matrix multiplication * * @code * C = A*B; * @endcode */ Matrix2D operator*(const Matrix2D& op1) const { Matrix2D result; if (mdimx != op1.mdimy) REPORT_ERROR("Not compatible sizes in matrix multiplication"); result.initZeros(mdimy, op1.mdimx); for (int i = 0; i < mdimy; i++) for (int j = 0; j < op1.mdimx; j++) for (int k = 0; k < mdimx; k++) result(i, j) += (*this)(i, k) * op1(k, j); return result; } /** Matrix summation * * @code * C = A + B; * @endcode */ Matrix2D operator+(const Matrix2D& op1) const { Matrix2D result; if (mdimx != op1.mdimx || mdimy != op1.mdimy) REPORT_ERROR("operator+: Not same sizes in matrix summation"); result.initZeros(mdimy, mdimx); for (int i = 0; i < mdimy; i++) for (int j = 0; j < mdimx; j++) result(i, j) = (*this)(i, j) + op1(i, j); return result; } /** Matrix summation * * @code * A += B; * @endcode */ void operator+=(const Matrix2D& op1) const { if (mdimx != op1.mdimx || mdimy != op1.mdimy) REPORT_ERROR("operator+=: Not same sizes in matrix summation"); for (int i = 0; i < mdimy; i++) for (int j = 0; j < mdimx; j++) MAT_ELEM(*this,i, j) += MAT_ELEM(op1, i, j); } /** Matrix subtraction * * @code * C = A - B; * @endcode */ Matrix2D operator-(const Matrix2D& op1) const { Matrix2D result; if (mdimx != op1.mdimx || mdimy != op1.mdimy) REPORT_ERROR("operator-: Not same sizes in matrix summation"); result.initZeros(mdimy, mdimx); for (int i = 0; i < mdimy; i++) for (int j = 0; j < mdimx; j++) result(i, j) = (*this)(i, j) - op1(i, j); return result; } /** Matrix substraction * * @code * A -= B; * @endcode */ void operator-=(const Matrix2D& op1) const { if (mdimx != op1.mdimx || mdimy != op1.mdimy) REPORT_ERROR("operator-=: Not same sizes in matrix summation"); for (int i = 0; i < mdimy; i++) for (int j = 0; j < mdimx; j++) MAT_ELEM(*this,i, j) -= MAT_ELEM(op1, i, j); } /** Equality. * * Returns true if this object has got the same shape (origin and size) * than the argument and the same values (within accuracy). */ bool equal(const Matrix2D& op, RFLOAT accuracy = XMIPP_EQUAL_ACCURACY) const { if (!sameShape(op)) return false; for (int i = 0; i < mdimy; i++) for (int j = 0; j < mdimx; j++) if (ABS( (*this)(i,j) - op(i,j) ) > accuracy) return false; return true; } //@} /// @name Utilities for Matrix2D //@{ /** Set very small values (ABS(val)< accuracy) equal to zero * */ void setSmallValuesToZero(RFLOAT accuracy = XMIPP_EQUAL_ACCURACY) { for (int i = 0; i < mdimy; i++) for (int j = 0; j < mdimx; j++) if (ABS( (*this)(i,j) ) < accuracy) (*this)(i,j) = 0.; } /// @name Utilities for Matrix2D //@{ /** Maximum of the values in the array. * * The returned value is of the same type as the type of the array. */ T computeMax() const { if (mdim <= 0) return static_cast< T >(0); T maxval = mdata[0]; for (int n = 0; n < mdim; n++) if (mdata[n] > maxval) maxval = mdata[n]; return maxval; } /** Minimum of the values in the array. * * The returned value is of the same type as the type of the array. */ T computeMin() const { if (mdim <= 0) return static_cast< T >(0); T minval = mdata[0]; for (int n = 0; n < mdim; n++) if (mdata[n] < minval) minval = mdata[n]; return minval; } /** Produce a 2D array suitable for working with Numerical Recipes * * This function must be used only as a preparation for routines which need * that the first physical index is 1 and not 0 as it usually is in C. New * memory is needed to hold the new RFLOAT pointer array. */ T** adaptForNumericalRecipes() const { T** m = NULL; ask_Tmatrix(m, 1, mdimy, 1, mdimx); for (int i = 0; i < mdimy; i++) for (int j = 0; j < mdimx; j++) m[i+1][j+1] = mdata[i*mdimx + j]; return m; } /** Produce a 1D pointer suitable for working with Numerical Recipes (2) * * This function meets the same goal as the one before, however this one * work with 2D arrays as a single pointer. The first element of the array * is pointed by result[1*Xdim+1], and in general result[i*Xdim+j] */ T* adaptForNumericalRecipes2() const { return mdata - 1 - mdimx; } /** Load 2D array from numerical recipes result. */ void loadFromNumericalRecipes(T** m, int Ydim, int Xdim) { if (mdimx!=Xdim || mdimy!=Ydim) resize(Ydim, Xdim); for (int i = 1; i <= Ydim; i++) for (int j = 1; j <= Xdim; j++) (*this)(i - 1, j - 1) = m[i][j]; } /** Kill a 2D array produced for numerical recipes * * The allocated memory is freed. */ void killAdaptationForNumericalRecipes(T** m) const { free_Tmatrix(m, 1, mdimy, 1, mdimx); } /** Kill a 2D array produced for numerical recipes, 2. * * Nothing needs to be done. */ void killAdaptationForNumericalRecipes2(T** m) const {} /** Write this matrix to file */ void write(const FileName &fn) const { std::ofstream fhOut; fhOut.open(fn.c_str()); if (!fhOut) REPORT_ERROR((std::string)"write: Cannot open "+fn+" for output"); fhOut << *this; fhOut.close(); } /** Show matrix */ friend std::ostream& operator<<(std::ostream& ostrm, const Matrix2D& v) { if (v.Xdim() == 0 || v.Ydim() == 0) ostrm << "NULL matrix\n"; else { ostrm << std::endl; RFLOAT max_val = v.computeMax(); int prec = bestPrecision(max_val, 10); for (int i = 0; i < v.Ydim(); i++) { for (int j = 0; j < v.Xdim(); j++) { ostrm << std::setw(13) << floatToString((RFLOAT) v(i, j), 10, prec) << ' '; } ostrm << std::endl; } } return ostrm; } /** Makes a matrix from a vector * * The origin of the matrix is set such that it has one of the index origins * (X or Y) to the same value as the vector, and the other set to 0 * according to the shape. * * @code * Matrix2D< RFLOAT > m = fromVector(v); * @endcode */ void fromVector(const Matrix1D& op1) { // Null vector => Null matrix if (op1.size() == 0) { clear(); return; } // Look at shape and copy values if (op1.isRow()) { if (mdimy!=1 || mdimx!=VEC_XSIZE(op1)) resize(1, VEC_XSIZE(op1)); for (int j = 0; j < VEC_XSIZE(op1); j++) MAT_ELEM(*this,0, j) = VEC_ELEM(op1,j); } else { if (mdimy!=1 || mdimx!=VEC_XSIZE(op1)) resize(VEC_XSIZE(op1), 1); for (int i = 0; i < VEC_XSIZE(op1); i++) MAT_ELEM(*this,i, 0) = VEC_ELEM(op1,i); } } /** Makes a vector from a matrix * * An exception is thrown if the matrix is not a single row or a single * column. The origin of the vector is set according to the one of the * matrix. * * @code * Matrix1D< RFLOAT > v; * m.toVector(v); * @endcode */ void toVector(Matrix1D& op1) const { // Null matrix => Null vector if (mdimx == 0 || mdimy == 0) { op1.clear(); return; } // If matrix is not a vector, produce an error if (!(mdimx == 1 || mdimy == 1)) REPORT_ERROR("toVector: Matrix cannot be converted to vector"); // Look at shape and copy values if (mdimy == 1) { // Row vector if (VEC_XSIZE(op1)!=mdimx) op1.resize(mdimx); for (int j = 0; j < mdimx; j++) VEC_ELEM(op1,j) = MAT_ELEM(*this,0, j); op1.setRow(); } else { // Column vector if (VEC_XSIZE(op1)!=mdimy) op1.resize(mdimy); for (int i = 0; i < mdimy; i++) VEC_ELEM(op1,i) = MAT_ELEM(*this,i, 0); op1.setCol(); } } /**Copy matrix to stl::vector */ void copyToVector(std::vector &v) { v.assign(mdata, mdata+mdim); } /**Copy stl::vector to matrix */ void copyFromVector(std::vector &v,int Xdim, int Ydim) { if (mdimx!=Xdim || mdimy!=Ydim) resize(Ydim, Xdim); copy( v.begin(), v.begin()+v.size(), mdata); } /** Get row * * This function returns a row vector corresponding to the choosen * row inside the nth 2D matrix, the numbering of the rows is also * logical not physical. * * @code * std::vector< RFLOAT > v; * m.getRow(-2, v); * @endcode */ void getRow(int i, Matrix1D& v) const { if (mdimx == 0 || mdimy == 0) { v.clear(); return; } if (i < 0 || i >= mdimy) REPORT_ERROR("getRow: Matrix subscript (i) greater than matrix dimension"); if (VEC_XSIZE(v)!=mdimx) v.resize(mdimx); for (int j = 0; j < mdimx; j++) VEC_ELEM(v,j) = MAT_ELEM(*this,i, j); v.setRow(); } /** Get Column * * This function returns a column vector corresponding to the * choosen column. * * @code * std::vector< RFLOAT > v; * m.getCol(-1, v); * @endcode */ void getCol(int j, Matrix1D& v) const { if (mdimx == 0 || mdimy == 0) { v.clear(); return; } if (j < 0 || j >= mdimx) REPORT_ERROR("getCol: Matrix subscript (j) greater than matrix dimension"); if (VEC_XSIZE(v)!=mdimy) v.resize(mdimy); for (int i = 0; i < mdimy; i++) VEC_ELEM(v,i) = MAT_ELEM(*this,i, j); v.setCol(); } /** Set Row * * This function sets a row vector corresponding to the choosen row in the 2D Matrix * * @code * m.setRow(-2, m.row(1)); // Copies row 1 in row -2 * @endcode */ void setRow(int i, const Matrix1D& v) { if (mdimx == 0 || mdimy == 0) REPORT_ERROR("setRow: Target matrix is empty"); if (i < 0 || i >= mdimy) REPORT_ERROR("setRow: Matrix subscript (i) out of range"); if (VEC_XSIZE(v) != mdimx) REPORT_ERROR("setRow: Vector dimension different from matrix one"); if (!v.isRow()) REPORT_ERROR("setRow: Not a row vector in assignment"); for (int j = 0; j < mdimx; j++) MAT_ELEM(*this,i, j) = VEC_ELEM(v,j); } /** Set Column * * This function sets a column vector corresponding to the choosen column * inside matrix. * * @code * m.setCol(0, (m.row(1)).transpose()); // Copies row 1 in column 0 * @endcode */ void setCol(int j, const Matrix1D& v) { if (mdimx == 0 || mdimy == 0) REPORT_ERROR("setCol: Target matrix is empty"); if (j < 0 || j>= mdimx) REPORT_ERROR("setCol: Matrix subscript (j) out of range"); if (VEC_XSIZE(v) != mdimy) REPORT_ERROR("setCol: Vector dimension different from matrix one"); if (!v.isCol()) REPORT_ERROR("setCol: Not a column vector in assignment"); for (int i = 0; i < mdimy; i++) MAT_ELEM(*this,i, j) = VEC_ELEM(v,i); } /** Determinant of a matrix * * An exception is thrown if the matrix is not squared or it is empty. * * @code * RFLOAT det = m.det(); * @endcode */ T det() const { // (see Numerical Recipes, Chapter 2 Section 5) if (mdimx == 0 || mdimy == 0) REPORT_ERROR("determinant: Matrix is empty"); if (mdimx != mdimy) REPORT_ERROR("determinant: Matrix is not squared"); for (int i = 0; i < mdimy; i++) { bool all_zeros = true; for (int j = 0; j < mdimx; j++) if (ABS(MAT_ELEM((*this),i, j)) > XMIPP_EQUAL_ACCURACY) { all_zeros = false; break; } if (all_zeros) return 0; } // Perform decomposition Matrix1D< int > indx; T d; Matrix2D LU; ludcmp(*this, LU, indx, d); // Calculate determinant for (int i = 0; i < mdimx; i++) d *= (T) MAT_ELEM(LU,i , i); return d; } /** Algebraic transpose of a Matrix * * You can use the transpose in as complex expressions as you like. The * origin of the vector is not changed. * * @code * v2 = v1.transpose(); * @endcode */ Matrix2D transpose() const { Matrix2D result(mdimx, mdimy); FOR_ALL_ELEMENTS_IN_MATRIX2D(result) MAT_ELEM(result,i,j) = MAT_ELEM((*this),j,i); return result; } /** Inverse of a matrix * * The matrix is inverted using a SVD decomposition. In fact the * pseudoinverse is returned. * * @code * Matrix2D< RFLOAT > m1_inv; * m1.inv(m1_inv); * @endcode */ void inv(Matrix2D& result) const { if (mdimx == 0 || mdimy == 0) { REPORT_ERROR("Inverse: Matrix is empty"); } // Initialise output result.initZeros(mdimx, mdimy); if (mdimx == 3 && mdimy == 3) { MAT_ELEM(result, 0, 0) = MAT_ELEM((*this), 2, 2)*MAT_ELEM((*this), 1, 1)-MAT_ELEM((*this), 2, 1)*MAT_ELEM((*this), 1, 2); MAT_ELEM(result, 0, 1) = -(MAT_ELEM((*this), 2, 2)*MAT_ELEM((*this), 0, 1)-MAT_ELEM((*this), 2, 1)*MAT_ELEM((*this), 0, 2)); MAT_ELEM(result, 0, 2) = MAT_ELEM((*this), 1, 2)*MAT_ELEM((*this), 0, 1)-MAT_ELEM((*this), 1, 1)*MAT_ELEM((*this), 0, 2); MAT_ELEM(result, 1, 0) = -(MAT_ELEM((*this), 2, 2)*MAT_ELEM((*this), 1, 0)-MAT_ELEM((*this), 2, 0)*MAT_ELEM((*this), 1, 2)); MAT_ELEM(result, 1, 1) = MAT_ELEM((*this), 2, 2)*MAT_ELEM((*this), 0, 0)-MAT_ELEM((*this), 2, 0)*MAT_ELEM((*this), 0, 2); MAT_ELEM(result, 1, 2) = -(MAT_ELEM((*this), 1, 2)*MAT_ELEM((*this), 0, 0)-MAT_ELEM((*this), 1, 0)*MAT_ELEM((*this), 0, 2)); MAT_ELEM(result, 2, 0) = MAT_ELEM((*this), 2, 1)*MAT_ELEM((*this), 1, 0)-MAT_ELEM((*this), 2, 0)*MAT_ELEM((*this), 1, 1); MAT_ELEM(result, 2, 1) = -(MAT_ELEM((*this), 2, 1)*MAT_ELEM((*this), 0, 0)-MAT_ELEM((*this), 2, 0)*MAT_ELEM((*this), 0, 1)); MAT_ELEM(result, 2, 2) = MAT_ELEM((*this), 1, 1)*MAT_ELEM((*this), 0, 0)-MAT_ELEM((*this), 1, 0)*MAT_ELEM((*this), 0, 1); RFLOAT tmp = MAT_ELEM((*this), 0, 0) * MAT_ELEM(result, 0, 0) + MAT_ELEM((*this), 1, 0) * MAT_ELEM(result, 0, 1) + MAT_ELEM((*this), 2, 0) * MAT_ELEM(result, 0, 2); result /= tmp; } else if (mdimx == 2 && mdimy == 2) { MAT_ELEM(result, 0, 0) = MAT_ELEM((*this), 1, 1); MAT_ELEM(result, 0, 1) = -MAT_ELEM((*this), 0, 1); MAT_ELEM(result, 1, 0) = -MAT_ELEM((*this), 1, 0); MAT_ELEM(result, 1, 1) = MAT_ELEM((*this), 0, 0); RFLOAT tmp = MAT_ELEM((*this), 0, 0) * MAT_ELEM((*this), 1, 1) - MAT_ELEM((*this), 0, 1) * MAT_ELEM((*this), 1, 0); result /= tmp; } else { // Perform SVD decomposition Matrix2D< RFLOAT > u, v; Matrix1D< RFLOAT > w; svdcmp(*this, u, w, v); // *this = U * W * V^t RFLOAT tol = computeMax() * XMIPP_MAX(mdimx, mdimy) * 1e-14; // Compute W^-1 bool invertible = false; FOR_ALL_ELEMENTS_IN_MATRIX1D(w) { if (ABS(VEC_ELEM(w,i)) > tol) { VEC_ELEM(w,i) = 1.0 / VEC_ELEM(w,i); invertible = true; } else VEC_ELEM(w,i) = 0.0; } if (!invertible) return; // Compute V*W^-1 FOR_ALL_ELEMENTS_IN_MATRIX2D(v) MAT_ELEM(v,i,j) *= VEC_ELEM(w,j); // Compute Inverse for (int i = 0; i < mdimx; i++) for (int j = 0; j < mdimy; j++) for (int k = 0; k < mdimx; k++) MAT_ELEM(result,i,j) += (T) MAT_ELEM(v,i,k) * MAT_ELEM(u,j,k); } } /** Inverse of a matrix */ Matrix2D inv() const { Matrix2D result; inv(result); return result; } /** True if the matrix is identity * * @code * if (m.isIdentity()) * std::cout << "The matrix is identity\n"; * @endcode */ bool isIdentity() const { for (int i = 0; i < mdimy; i++) for (int j = 0; j < mdimx; j++) if (i != j) { if (ABS(MAT_ELEM(*this,i,j)) > XMIPP_EQUAL_ACCURACY) return false; } else { if (ABS(MAT_ELEM(*this,i,j) - 1.) > XMIPP_EQUAL_ACCURACY ) return false; } return true; } //@} }; // Implementation of the vector*matrix // Documented in matrix1D.h template Matrix1D Matrix1D::operator*(const Matrix2D& M) { Matrix1D result; if (VEC_XSIZE(*this) != MAT_YSIZE(M)) REPORT_ERROR("Not compatible sizes in matrix by vector"); if (!isRow()) REPORT_ERROR("Vector is not a row"); result.initZeros(MAT_XSIZE(M)); for (int j = 0; j < MAT_XSIZE(M); j++) for (int i = 0; i < MAT_YSIZE(M); i++) VEC_ELEM(result,j) += VEC_ELEM(*this,i) * MAT_ELEM(M,i, j); result.setRow(); return result; } /**@name Matrix Related functions * These functions are not methods of Matrix2D */ //@{ /** LU Decomposition */ template void ludcmp(const Matrix2D& A, Matrix2D& LU, Matrix1D< int >& indx, T& d) { LU = A; if (VEC_XSIZE(indx)!=A.mdimx) indx.resize(A.mdimx); ludcmp(LU.adaptForNumericalRecipes2(), A.mdimx, indx.adaptForNumericalRecipes(), &d); } /** LU Backsubstitution */ template void lubksb(const Matrix2D& LU, Matrix1D< int >& indx, Matrix1D& b) { lubksb(LU.adaptForNumericalRecipes2(), indx.size(), indx.adaptForNumericalRecipes(), b.adaptForNumericalRecipes()); } /** SVD Backsubstitution */ void svbksb(Matrix2D< RFLOAT >& u, Matrix1D< RFLOAT >& w, Matrix2D< RFLOAT >& v, Matrix1D< RFLOAT >& b, Matrix1D< RFLOAT >& x); /** SVD Decomposition (through numerical recipes) */ template void svdcmp(const Matrix2D< T >& a, Matrix2D< RFLOAT >& u, Matrix1D< RFLOAT >& w, Matrix2D< RFLOAT >& v) { // svdcmp only works with RFLOAT typeCast(a, u); // Set size of matrices w.initZeros(u.mdimx); v.initZeros(u.mdimx, u.mdimx); // Call to the numerical recipes routine svdcmp(u.mdata, u.mdimy, u.mdimx, w.vdata, v.mdata); } /** Solve system of linear equations (Ax=b) through SVD Decomposition (through numerical recipes) */ template void solve(const Matrix2D< T >& A, const Matrix1D< T >& b, Matrix1D< RFLOAT >& result, RFLOAT tolerance) { if (A.mdimx == 0) REPORT_ERROR("Solve: Matrix is empty"); /*if (A.mdimx != A.mdimy) REPORT_ERROR("Solve: Matrix is not squared");*/ if (A.mdimy != b.vdim) REPORT_ERROR("Solve: Different sizes of Matrix and Vector"); /*if (b.isRow()) REPORT_ERROR("Solve: Not correct vector shape");*/ // First perform de single value decomposition // Xmipp interface that calls to svdcmp of numerical recipes Matrix2D< RFLOAT > u, v; Matrix1D< RFLOAT > w; svdcmp(A, u, w, v); // Here is checked if eigenvalues of the svd decomposition are acceptable // If a value is lower than tolerance, the it's zeroed, as this increases // the precision of the routine. FOR_ALL_ELEMENTS_IN_MATRIX1D(w) if (w(i) < tolerance) w(i) = 0; // Set size of matrices result.resize(b.vdim); // Xmipp interface that calls to svdksb of numerical recipes Matrix1D< RFLOAT > bd; typeCast(b, bd); svbksb(u, w, v, bd, result); } /** Solve system of linear equations (Ax=b), x and b being matrices through SVD Decomposition (through Gauss-Jordan numerical recipes) */ template void solve(const Matrix2D& A, const Matrix2D& b, Matrix2D& result) { if (A.mdimx == 0) REPORT_ERROR("Solve: Matrix is empty"); if (A.mdimx != A.mdimy) REPORT_ERROR("Solve: Matrix is not squared"); if (A.mdimy != b.mdimy) REPORT_ERROR("Solve: Different sizes of A and b"); // Solve result = b; Matrix2D Aux = A; gaussj(Aux.adaptForNumericalRecipes2(), Aux.mdimy, result.adaptForNumericalRecipes2(), b.mdimx); } /** Least-squares rigid transformation between two sets of 3D coordinates * RFLOAT lsq_rigid_body_transformation(std::vector > &set1, std::vector > &set2, Matrix2D &Rot, Matrix1D &trans) { Matrix2D A; Matrix1D avg1, avg2; if (set1.size() != set2.size()) REPORT_ERROR("lsq_rigid_body_transformation ERROR: unequal set size"); // Calculate average of set1 and set2 avg1 = vectorR3(0., 0., 0.); avg2 = vectorR3(0., 0., 0.); for (int i = 0; i < set1.size(); i++) { if (set1[i].vdim != 3) REPORT_ERROR("lsq_rigid_body_transformation ERROR: not a 3-point set1"); if (set2[i].vdim != 3) REPORT_ERROR("lsq_rigid_body_transformation ERROR: not a 3-point set2"); avg1 += set1[i]; avg2 += set2[i]; } avg1 /= (RFLOAT)set1.size(); avg2 /= (RFLOAT)set1.size(); A.initZeros(3, 3); Rot.initZeros(4,4); for (int i = 0; i < set1.size(); i++) { // fill A A(0, 0) += (XX(set1[i]) - XX(avg1)) * (XX(set2[i]) - XX(avg2)); A(0, 1) += (XX(set1[i]) - XX(avg1)) * (YY(set2[i]) - YY(avg2)); A(0, 2) += (XX(set1[i]) - XX(avg1)) * (ZZ(set2[i]) - ZZ(avg2)); A(1, 0) += (YY(set1[i]) - YY(avg1)) * (XX(set2[i]) - XX(avg2)); A(1, 1) += (YY(set1[i]) - YY(avg1)) * (YY(set2[i]) - YY(avg2)); A(1, 2) += (YY(set1[i]) - YY(avg1)) * (ZZ(set2[i]) - ZZ(avg2)); A(2, 0) += (ZZ(set1[i]) - ZZ(avg1)) * (XX(set2[i]) - XX(avg2)); A(2, 1) += (ZZ(set1[i]) - ZZ(avg1)) * (YY(set2[i]) - YY(avg2)); A(2, 2) += (ZZ(set1[i]) - ZZ(avg1)) * (ZZ(set2[i]) - ZZ(avg2)); } Matrix2D< RFLOAT > U, V; Matrix1D< RFLOAT > w; // TODO: check inverse, transpose etc etc!!! // Optimal rotation svdcmp(A, U, w, V); Rot = V.transpose() * U; // Optimal translation trans = avg1 - Rot * avg2; // return the squared difference term RFLOAT error = 0.; for (int i = 0; i < set1.size(); i++) { error += (Rot * set2[i] + trans - set1[i]).sum2(); } return error; } */ /** Conversion from one type to another. * * If we have an integer array and we need a RFLOAT one, we can use this * function. The conversion is done through a type casting of each element * If n >= 0, only the nth volumes will be converted, otherwise all NSIZE volumes */ template void typeCast(const Matrix2D& v1, Matrix2D& v2) { if (v1.mdim == 0) { v2.clear(); return; } if (v1.mdimx!=v2.mdimx || v1.mdimy!=v2.mdimy) v2.resize(v1); for (unsigned long int n = 0; n < v1.mdim; n++) v2.mdata[n] = static_cast< T2 > (v1.mdata[n]); } //@} //@} #endif /* MATRIX2D_H_ */ relion-3.1.3/src/memory.cpp000066400000000000000000000032041411340063500156050ustar00rootroot00000000000000/*************************************************************************** * * Author: "Sjors H.W. Scheres" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #include "src/memory.h" char* askMemory(unsigned long memsize) { char* ptr = NULL; if ( memsize == 0 ) { REPORT_ERROR("Error in askMemory: Memory allocation size requested is zero!"); return(NULL); } if ( ( ptr = (char *) calloc(1,memsize*sizeof(char)) ) == NULL ) { std::cerr<<"Memory allocation of %ld bytes failed, memsize= "<< memsize< void ask_Tvector(T* &v, int nl, int nh) { if (nh - nl + 1 > 1) { v = (T *)malloc((unsigned)(nh - nl + 1) * sizeof(T)); if (!v) REPORT_ERROR("allocation failure in vector()"); v -= nl; } else v = NULL; } /** Free memory associated to any type vector. After freeing v=NULL*/ template void free_Tvector(T* &v, int nl, int nh) { if (v != NULL) { free((char*)(v + nl)); v = NULL; } } /** Ask memory for any type matrix. The valid values range from v[nrl][ncl] to v[nrh][nch]. If no memory is available an exception is thrown. NULL is returned if any nh is not greater than its nl*/ template void ask_Tmatrix(T ** &m, int nrl, int nrh, int ncl, int nch) { if (nrh - nrl + 1 > 1 && nch - ncl + 1 > 1) { m = (T **) malloc((unsigned)(nrh - nrl + 1) * sizeof(T*)); if (!m) REPORT_ERROR( "allocation failure 1 in matrix()"); m -= nrl; for (int i = nrl;i <= nrh;i++) { m[i] = (T *) malloc((unsigned)(nch - ncl + 1) * sizeof(T)); if (!m[i]) REPORT_ERROR( "allocation failure 2 in matrix()"); m[i] -= ncl; } } else m = NULL; } /** Free memory associated to any type matrix. After freeing v=NULL*/ template void free_Tmatrix(T ** &m, int nrl, int nrh, int ncl, int nch) { if (m != NULL) { for (int i = nrh;i >= nrl;i--) free((char*)(m[i] + ncl)); free((char*)(m + nrl)); m = NULL; } } /** Ask memory for any type voliume. The valid values range from v[nsl][nrl][ncl] to v[nsh][nrh][nch]. If no memory is available an exception is thrown. NULL is returned if any nh is not greater than its nl. */ template void ask_Tvolume(T *** &m, int nsl, int nsh, int nrl, int nrh, int ncl, int nch) { if (nsh - nsl + 1 > 1 && nrh - nrl + 1 > 1 && nch - ncl + 1 > 1) { m = (T ***) malloc((unsigned)(nsh - nsl + 1) * sizeof(T**)); if (!m) REPORT_ERROR( "allocation failure 1 in matrix()"); m -= nsl; for (int k = nsl;k <= nsh;k++) { m[k] = (T **) malloc((unsigned)(nrh - nrl + 1) * sizeof(T*)); if (!m[k]) REPORT_ERROR( "allocation failure 2 in matrix()"); m[k] -= nrl; for (int i = nrl;i <= nrh;i++) { m[k][i] = (T *) malloc((unsigned)(nch - ncl + 1) * sizeof(T)); if (!m[k][i]) REPORT_ERROR( "allocation failure 2 in matrix()"); m[k][i] -= ncl; } } } else m = NULL; } /** Free memory associated to any type volume. After freeing v=NULL*/ template void free_Tvolume(T *** &m, int nsl, int nsh, int nrl, int nrh, int ncl, int nch) { if (m != NULL) { for (int k = nsh;k >= nsl;k--) { for (int i = nrh;i >= nrl;i--) free((char*)(m[k][i] + ncl)); free((char*)(m[k] + nrl)); } free((char*)(m + nsl)); m = NULL; } } /** Allocates memory. * Adapted from Bsofts bfree * * It is called exactly like malloc, with the following enhancements: * * - If allocation of zero bytes are requested it notifies the user. * - NO LONGER TRUE: Successfully allocated memory is zeroed * - Allocation is attempted and an error message is printed on failure. * - All failures return a NULL pointer to allow error handling from * calling functions. * * returns char* : a pointer to the memory (NULL on failure) */ char* askMemory(unsigned long size); /** Frees allocated memory. * Adapted from Bsofts bfree * * It is called exactly like free, with the following enhancements: * - If freeing fails an error message is printed. * - the pointer is reset to NULL * * returns int: 0 = success, -1 = failure. */ int freeMemory(void* ptr, unsigned long memsize); //@} #endif relion-3.1.3/src/metadata_container.cpp000066400000000000000000000070001411340063500201150ustar00rootroot00000000000000/*************************************************************************** * * Author: "Jasenko Zivanov" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #include "src/metadata_container.h" MetaDataContainer::MetaDataContainer() : doubles(0), ints(0), bools(0), strings(0), doubleVectors(0), unknowns(0) {} MetaDataContainer::MetaDataContainer( MetaDataTable *table, long doubleCount, long intCount, long boolCount, long stringCount, long doubleVectorCount, long unknownCount) : table(table), doubles(doubleCount, 0), ints(intCount, 0), bools(boolCount, false), strings(stringCount, ""), doubleVectors(doubleVectorCount), unknowns(unknownCount) {} MetaDataContainer::MetaDataContainer( MetaDataTable *table, MetaDataContainer* mdc) : table(table), doubles(mdc->doubles), ints(mdc->ints), bools(mdc->bools), strings(mdc->strings), doubleVectors(mdc->doubleVectors), unknowns(mdc->unknowns) {} void MetaDataContainer::getValue(long offset, double& dest) const { dest = doubles[offset]; } void MetaDataContainer::getValue(long offset, float& dest) const { dest = (float)doubles[offset]; } void MetaDataContainer::getValue(long offset, int& dest) const { dest = (int)ints[offset]; } void MetaDataContainer::getValue(long offset, long& dest) const { dest = ints[offset]; } void MetaDataContainer::getValue(long offset, bool& dest) const { dest = bools[offset]; } void MetaDataContainer::getValue(long offset, std::vector& dest) const { dest = doubleVectors[offset]; } void MetaDataContainer::getValue(long offset, std::vector& dest) const { dest.resize(doubleVectors[offset].size()); std::copy(doubleVectors[offset].begin(), doubleVectors[offset].end(), dest.begin()); } void MetaDataContainer::getValue(long offset, std::string& dest) const { dest = (strings[offset] == "\"\"") ? "" : strings[offset]; } void MetaDataContainer::setValue(long offset, const double& src) { doubles[offset] = src; } void MetaDataContainer::setValue(long offset, const float& src) { doubles[offset] = src; } void MetaDataContainer::setValue(long offset, const int& src) { ints[offset] = src; } void MetaDataContainer::setValue(long offset, const long& src) { ints[offset] = src; } void MetaDataContainer::setValue(long offset, const bool& src) { bools[offset] = src; } void MetaDataContainer::setValue(long offset, const std::string& src) { strings[offset] = (src.length() == 0) ? "\"\"" : src; } void MetaDataContainer::setValue(long offset, const std::vector& src) { doubleVectors[offset] = src; } void MetaDataContainer::setValue(long offset, const std::vector& src) { doubleVectors[offset].resize(src.size()); std::copy(src.begin(), src.end(), doubleVectors[offset].begin()); } relion-3.1.3/src/metadata_container.h000066400000000000000000000051221411340063500175650ustar00rootroot00000000000000/*************************************************************************** * * Author: "Jasenko Zivanov" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #ifndef METADATA_CONTAINER_H #define METADATA_CONTAINER_H #include #include #include #include #include #include "src/funcs.h" #include "src/metadata_label.h" class MetaDataTable; class MetaDataContainer { public: MetaDataTable* table; std::vector doubles; std::vector ints; std::vector bools; std::vector strings; std::vector > doubleVectors; std::vector unknowns; MetaDataContainer(); MetaDataContainer(MetaDataTable* table, long doubleCount, long intCount, long boolCount, long stringCount, long doubleVectorCount, long unknownCount); MetaDataContainer(MetaDataTable* table, MetaDataContainer* mdc); void getValue(long offset, double& dest) const; void getValue(long offset, float& dest) const; void getValue(long offset, int& dest) const; void getValue(long offset, long& dest) const; void getValue(long offset, bool& dest) const; void getValue(long offset, std::string& dest) const; void getValue(long offset, std::vector& dest) const; void getValue(long offset, std::vector& dest) const; void setValue(long offset, const double& src); void setValue(long offset, const float& src); void setValue(long offset, const int& src); void setValue(long offset, const long& src); void setValue(long offset, const bool& src); void setValue(long offset, const std::string& src); void setValue(long offset, const std::vector& src); void setValue(long offset, const std::vector& src); }; #endif relion-3.1.3/src/metadata_label.cpp000066400000000000000000000115471411340063500172250ustar00rootroot00000000000000/*************************************************************************** * * Author: "Sjors H.W. Scheres" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ /*************************************************************************** * * Authors: J.M. De la Rosa Trevin (jmdelarosa@cnb.csic.es) * * Unidad de Bioinformatica of Centro Nacional de Biotecnologia , CSIC * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA * 02111-1307 USA * * All comments concerning this program package may be sent to the * e-mail address 'xmipp@cnb.csic.es' ***************************************************************************/ #include "src/metadata_label.h" //This is needed for static memory allocation std::map EMDL::data; std::map EMDL::names; std::map EMDL::definitions; StaticInitialization EMDL::initialization; //Just for initialization void EMDL::addLabel(EMDLabel label, EMDLabelType type, std::string name, std::string definition) { data[label] = EMDLabelData(type, name); names[name] = label; definitions[name] = definition; } void EMDL::addAltLabel(EMDLabel label, std::string name) { names[name] = label; } void EMDL::printDefinitions(std::ostream& out) { out << "+++ RELION MetaDataLabel (EMDL) definitions: +++" << std::endl; std::map::const_iterator strIt; for (strIt = definitions.begin(); strIt != definitions.end(); strIt++) { out << std::setw(30) <first; if (EMDL::isInt(names[strIt->first])) { out << " (int) "; } else if (EMDL::isBool(names[strIt->first])) { out << " (bool) "; } else if (EMDL::isDouble(names[strIt->first])) { out << " (double) "; } else if (EMDL::isString(names[strIt->first])) { out << " (string) "; } else if (EMDL::isDoubleVector(names[strIt->first])) { out << " (vector) "; } else if (EMDL::isUnknown(names[strIt->first])) { out << " (string) "; } else { REPORT_ERROR("EMDL::printDefinitions: unrecognised type"); } out << ": " << strIt->second < EMDL_UNDEFINED && label < EMDL_LAST_LABEL); } bool EMDL::isValidLabel(const std::string &labelName) { EMDLabel label = EMDL::str2Label(labelName); return EMDL::isValidLabel(label); } relion-3.1.3/src/metadata_label.h000066400000000000000000002436761411340063500167040ustar00rootroot00000000000000/*************************************************************************** * * Author: "Sjors H.W. Scheres" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ /*************************************************************************** * * Authors: J.M. De la Rosa Trevin (jmdelarosa@cnb.csic.es) * * Unidad de Bioinformatica of Centro Nacional de Biotecnologia , CSIC * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA * 02111-1307 USA * * All comments concerning this program package may be sent to the * e-mail address 'xmipp@cnb.csic.es' ***************************************************************************/ #ifndef METADATA_LABEL_H #define METADATA_LABEL_H #include #include #include #include #include #include "src/funcs.h" class EMDLabelData; class StaticInitialization; enum EMDLabel { EMDL_UNDEFINED = -1, // Keep the order the same as in StaticInitialization below!! EMDL_FIRST_LABEL, EMDL_OBJID = EMDL_FIRST_LABEL, ///< object id (int), NOTE: This label is special and shouldn't be used EMDL_AREA_ID, ///< ID for the area (or field of view). If one does not use (tilt) series, area would be the same as micrograph... EMDL_AREA_NAME, ///< Name for the area (or field of view). If one does not use (tilt) series, area would be the same as micrograph... EMDL_COMMENT, // The EMDL_COMMENT is handled specially as well EMDL_BODY_MASK_NAME, ///< For multi-body refinements EMDL_BODY_KEEP_FIXED, ///< For multi-body refinements EMDL_BODY_REFERENCE_NAME, EMDL_BODY_ROTATE_DIRECTION_X, EMDL_BODY_ROTATE_DIRECTION_Y, EMDL_BODY_ROTATE_DIRECTION_Z, EMDL_BODY_ROTATE_RELATIVE_TO, EMDL_BODY_SIGMA_ANG, EMDL_BODY_SIGMA_OFFSET, // deprecated EMDL_BODY_SIGMA_OFFSET_ANGSTROM, EMDL_BODY_SIGMA_ROT, EMDL_BODY_SIGMA_TILT, EMDL_BODY_SIGMA_PSI, EMDL_BODY_STAR_FILE, EMDL_CTF_ASTIGMATISM, EMDL_CTF_BFACTOR, ///< B-factor EMDL_CTF_MAXRES, ///< Maximum resolution with Thon rings EMDL_CTF_VALIDATIONSCORE, ///< Gctf-based validation score for CTF fit EMDL_CTF_SCALEFACTOR, ///< linear scale-factor EMDL_CTF_SAMPLING_RATE, ///< Sampling rate EMDL_CTF_VOLTAGE, ///< Microscope voltage (kV) EMDL_CTF_DEFOCUSU, ///< Defocus U (Angstroms) EMDL_CTF_DEFOCUSV, ///< Defocus V (Angstroms) EMDL_CTF_DEFOCUS_ANGLE, ///< Defocus angle (degrees) EMDL_CTF_CS, ///< Spherical aberration EMDL_CTF_CA, ///< Chromatic aberration EMDL_CTF_DETECTOR_PIXEL_SIZE, ///< Pixel size for detector as used in CTF-determination (deprecated) EMDL_CTF_POWER_SPECTRUM, EMDL_CTF_ENERGY_LOSS, ///< Energy loss EMDL_CTF_FOM, ///< ctffind FOM (CC) for quality of CTF-fit EMDL_CTF_IMAGE, ///< name of an image describing the CTF model EMDL_CTF_LENS_STABILITY, ///< Lens stability EMDL_CTF_MAGNIFICATION, ///< Magnification used for CTF-determination (deprecated) EMDL_CTF_PHASESHIFT, ///< Phase-shift from a phase plate EMDL_CTF_CONVERGENCE_CONE, ///< Convergence cone EMDL_CTF_LONGITUDINAL_DISPLACEMENT, ///< Longitudinal displacement EMDL_CTF_TRANSVERSAL_DISPLACEMENT, ///< Transversal displacemente EMDL_CTF_Q0, ///< Amplitude contrast EMDL_CTF_K, ///< CTF gain EMDL_CTF_VALUE, ///< CTF value EMDL_IMAGE_NAME, EMDL_IMAGE_ORI_NAME, EMDL_IMAGE_RECONSTRUCT_NAME, EMDL_IMAGE_ID, EMDL_IMAGE_ENABLED, EMDL_IMAGE_DATATYPE, EMDL_IMAGE_DIMENSIONALITY, EMDL_IMAGE_BEAMTILT_X, EMDL_IMAGE_BEAMTILT_Y, EMDL_IMAGE_MTF_FILENAME, EMDL_IMAGE_OPTICS_GROUP, EMDL_IMAGE_OPTICS_GROUP_NAME, EMDL_IMAGE_ODD_ZERNIKE_COEFFS, EMDL_IMAGE_EVEN_ZERNIKE_COEFFS, EMDL_IMAGE_PIXEL_SIZE, EMDL_IMAGE_MAG_MATRIX_00, EMDL_IMAGE_MAG_MATRIX_01, EMDL_IMAGE_MAG_MATRIX_10, EMDL_IMAGE_MAG_MATRIX_11, EMDL_IMAGE_COORD_X, EMDL_IMAGE_COORD_Y, EMDL_IMAGE_COORD_Z, EMDL_IMAGE_FRAME_NR, EMDL_IMAGE_MAGNIFICATION_CORRECTION, EMDL_IMAGE_NORM_CORRECTION, EMDL_IMAGE_SAMPLINGRATE, EMDL_IMAGE_SAMPLINGRATE_X, EMDL_IMAGE_SAMPLINGRATE_Y, EMDL_IMAGE_SAMPLINGRATE_Z, EMDL_IMAGE_SIZE, EMDL_IMAGE_SIZE_X, EMDL_IMAGE_SIZE_Y, EMDL_IMAGE_SIZE_Z, EMDL_IMAGE_STATS_MIN, EMDL_IMAGE_STATS_MAX, EMDL_IMAGE_STATS_AVG, EMDL_IMAGE_STATS_STDDEV, EMDL_IMAGE_STATS_SKEW, EMDL_IMAGE_STATS_KURT, EMDL_IMAGE_WEIGHT, EMDL_JOB_IS_CONTINUE, EMDL_JOB_TYPE, EMDL_JOB_TYPE_NAME, EMDL_JOBOPTION_TYPE, EMDL_JOBOPTION_VARIABLE, EMDL_JOBOPTION_VALUE, EMDL_JOBOPTION_LABEL, EMDL_JOBOPTION_DEFAULT_VALUE, EMDL_JOBOPTION_MINVAL, EMDL_JOBOPTION_MAXVAL, EMDL_JOBOPTION_STEPVAL, EMDL_JOBOPTION_HELPTEXT, EMDL_JOBOPTION_PATTERN, EMDL_JOBOPTION_DIRECTORY, EMDL_JOBOPTION_MENUOPTIONS, EMDL_MATRIX_1_1, EMDL_MATRIX_1_2, EMDL_MATRIX_1_3, EMDL_MATRIX_2_1, EMDL_MATRIX_2_2, EMDL_MATRIX_2_3, EMDL_MATRIX_3_1, EMDL_MATRIX_3_2, EMDL_MATRIX_3_3, EMDL_MICROGRAPH_ACCUM_MOTION_TOTAL, EMDL_MICROGRAPH_ACCUM_MOTION_EARLY, EMDL_MICROGRAPH_ACCUM_MOTION_LATE, EMDL_MICROGRAPH_ID, EMDL_MICROGRAPH_NAME, EMDL_MICROGRAPH_GAIN_NAME, EMDL_MICROGRAPH_DEFECT_FILE, EMDL_MICROGRAPH_NAME_WODOSE, EMDL_MICROGRAPH_MOVIE_NAME, EMDL_MICROGRAPH_METADATA_NAME, EMDL_MICROGRAPH_TILT_ANGLE, EMDL_MICROGRAPH_TILT_AXIS_DIRECTION, EMDL_MICROGRAPH_TILT_AXIS_OUTOFPLANE, EMDL_MICROGRAPH_ORIGINAL_PIXEL_SIZE, EMDL_MICROGRAPH_PIXEL_SIZE, EMDL_MICROGRAPH_PRE_EXPOSURE, EMDL_MICROGRAPH_DOSE_RATE, EMDL_MICROGRAPH_BINNING, EMDL_MICROGRAPH_FRAME_NUMBER, EMDL_MICROGRAPH_MOTION_MODEL_VERSION, EMDL_MICROGRAPH_START_FRAME, EMDL_MICROGRAPH_END_FRAME, EMDL_MICROGRAPH_SHIFT_X, EMDL_MICROGRAPH_SHIFT_Y, EMDL_MICROGRAPH_MOTION_COEFFS_IDX, EMDL_MICROGRAPH_MOTION_COEFF, EMDL_MICROGRAPH_EER_UPSAMPLING, EMDL_MICROGRAPH_EER_GROUPING, EMDL_MASK_NAME, EMDL_MLMODEL_ACCURACY_ROT, EMDL_MLMODEL_ACCURACY_TRANS, // deprecated EMDL_MLMODEL_ACCURACY_TRANS_ANGSTROM, EMDL_MLMODEL_AVE_PMAX, EMDL_MLMODEL_CURRENT_RESOLUTION, EMDL_MLMODEL_CURRENT_SIZE, EMDL_MLMODEL_DATA_VS_PRIOR_REF, EMDL_MLMODEL_DIMENSIONALITY, EMDL_MLMODEL_DIMENSIONALITY_DATA, EMDL_MLMODEL_DIFF2_HALVES_REF, EMDL_MLMODEL_ESTIM_RESOL_REF, EMDL_MLMODEL_FOURIER_COVERAGE_REF, EMDL_MLMODEL_FOURIER_COVERAGE_TOTAL_REF, EMDL_MLMODEL_FSC_HALVES_REF, EMDL_MLMODEL_GROUP_NAME, EMDL_MLMODEL_GROUP_NO, EMDL_MLMODEL_GROUP_NR_PARTICLES, EMDL_MLMODEL_GROUP_SCALE_CORRECTION, EMDL_MLMODEL_HELICAL_NR_ASU, EMDL_MLMODEL_HELICAL_TWIST, EMDL_MLMODEL_HELICAL_TWIST_MIN, EMDL_MLMODEL_HELICAL_TWIST_MAX, EMDL_MLMODEL_HELICAL_TWIST_INITIAL_STEP, EMDL_MLMODEL_HELICAL_RISE, EMDL_MLMODEL_HELICAL_RISE_MIN, EMDL_MLMODEL_HELICAL_RISE_MAX, EMDL_MLMODEL_HELICAL_RISE_INITIAL_STEP, EMDL_MLMODEL_IS_HELIX, EMDL_MLMODEL_INTERPOLATOR, EMDL_MLMODEL_LL, EMDL_MLMODEL_MINIMUM_RADIUS_NN_INTERPOLATION, EMDL_MLMODEL_NORM_CORRECTION_AVG, EMDL_MLMODEL_NR_BODIES, EMDL_MLMODEL_NR_CLASSES, EMDL_MLMODEL_NR_GROUPS, EMDL_MLMODEL_ORIGINAL_SIZE, EMDL_MLMODEL_ORIENTABILITY_CONTRIBUTION, EMDL_MLMODEL_PADDING_FACTOR, EMDL_MLMODEL_PDF_CLASS, EMDL_MLMODEL_PRIOR_OFFX_CLASS, EMDL_MLMODEL_PRIOR_OFFY_CLASS, EMDL_MLMODEL_PDF_ORIENT, EMDL_MLMODEL_PIXEL_SIZE, EMDL_MLMODEL_POWER_REF, EMDL_MLMODEL_PRIOR_MODE, EMDL_MLMODEL_SIGMA_OFFSET, // deprecated EMDL_MLMODEL_SIGMA_OFFSET_ANGSTROM, EMDL_MLMODEL_SIGMA_ROT, EMDL_MLMODEL_SIGMA_TILT, EMDL_MLMODEL_SIGMA_PSI, EMDL_MLMODEL_REF_IMAGE, EMDL_MLMODEL_SGD_GRADIENT_IMAGE, EMDL_MLMODEL_SIGMA2_NOISE, EMDL_MLMODEL_SIGMA2_REF, EMDL_MLMODEL_SSNR_REF, EMDL_MLMODEL_TAU2_FUDGE_FACTOR, EMDL_MLMODEL_TAU2_REF, EMDL_OPTIMISER_ACCURACY_ROT, EMDL_OPTIMISER_ACCURACY_TRANS, // deprecated EMDL_OPTIMISER_ACCURACY_TRANS_ANGSTROM, EMDL_OPTIMISER_ADAPTIVE_FRACTION, EMDL_OPTIMISER_ADAPTIVE_OVERSAMPLING, EMDL_OPTIMISER_AUTO_LOCAL_HP_ORDER, EMDL_OPTIMISER_AVAILABLE_MEMORY, EMDL_OPTIMISER_BEST_RESOL_THUS_FAR, EMDL_OPTIMISER_CHANGES_OPTIMAL_OFFSETS, EMDL_OPTIMISER_CHANGES_OPTIMAL_ORIENTS, EMDL_OPTIMISER_CHANGES_OPTIMAL_CLASSES, EMDL_OPTIMISER_COARSE_SIZE, EMDL_OPTIMISER_DATA_ARE_CTF_PHASE_FLIPPED, EMDL_OPTIMISER_DATA_ARE_CTF_PREMULTIPLIED, EMDL_OPTIMISER_DATA_STARFILE, EMDL_OPTIMISER_DO_AUTO_REFINE, EMDL_OPTIMISER_DO_ONLY_FLIP_CTF_PHASES, EMDL_OPTIMISER_DO_CORRECT_CTF, EMDL_OPTIMISER_DO_CORRECT_MAGNIFICATION, EMDL_OPTIMISER_DO_CORRECT_NORM, EMDL_OPTIMISER_DO_CORRECT_SCALE, EMDL_OPTIMISER_DO_EXTERNAL_RECONSTRUCT, EMDL_OPTIMISER_DO_REALIGN_MOVIES, EMDL_OPTIMISER_DO_MAP, EMDL_OPTIMISER_DO_SGD, EMDL_OPTIMISER_DO_STOCHASTIC_EM, EMDL_OPTIMISER_EXTERNAL_RECONS_DATA_REAL, EMDL_OPTIMISER_EXTERNAL_RECONS_DATA_IMAG, EMDL_OPTIMISER_EXTERNAL_RECONS_WEIGHT, EMDL_OPTIMISER_EXTERNAL_RECONS_RESULT, EMDL_OPTIMISER_EXTERNAL_RECONS_NEWSTAR, EMDL_OPTIMISER_FAST_SUBSETS, EMDL_OPTIMISER_SGD_INI_ITER, EMDL_OPTIMISER_SGD_FIN_ITER, EMDL_OPTIMISER_SGD_INBETWEEN_ITER, EMDL_OPTIMISER_SGD_INI_RESOL, EMDL_OPTIMISER_SGD_FIN_RESOL, EMDL_OPTIMISER_SGD_INI_SUBSET_SIZE, EMDL_OPTIMISER_SGD_FIN_SUBSET_SIZE, EMDL_OPTIMISER_SGD_MU, EMDL_OPTIMISER_SGD_SIGMA2FUDGE_INI, EMDL_OPTIMISER_SGD_SIGMA2FUDGE_HALFLIFE, EMDL_OPTIMISER_SGD_SKIP_ANNNEAL, EMDL_OPTIMISER_SGD_SUBSET_SIZE, EMDL_OPTIMISER_SGD_WRITE_EVERY_SUBSET, EMDL_OPTIMISER_SGD_MAX_SUBSETS, EMDL_OPTIMISER_SGD_STEPSIZE, EMDL_OPTIMISER_DO_SOLVENT_FLATTEN, EMDL_OPTIMISER_DO_SOLVENT_FSC, EMDL_OPTIMISER_DO_SKIP_ALIGN, EMDL_OPTIMISER_DO_SKIP_ROTATE, EMDL_OPTIMISER_DO_SPLIT_RANDOM_HALVES, EMDL_OPTIMISER_DO_ZERO_MASK, EMDL_OPTIMISER_FIX_SIGMA_NOISE, EMDL_OPTIMISER_FIX_SIGMA_OFFSET, EMDL_OPTIMISER_FIX_TAU, EMDL_OPTIMISER_HAS_CONVERGED, EMDL_OPTIMISER_HAS_HIGH_FSC_AT_LIMIT, EMDL_OPTIMISER_HAS_LARGE_INCR_SIZE_ITER_AGO, EMDL_OPTIMISER_DO_HELICAL_REFINE, EMDL_OPTIMISER_IGNORE_HELICAL_SYMMETRY, EMDL_OPTIMISER_FOURIER_MASK, EMDL_OPTIMISER_HELICAL_TWIST_INITIAL, EMDL_OPTIMISER_HELICAL_RISE_INITIAL, EMDL_OPTIMISER_HELICAL_Z_PERCENTAGE, EMDL_OPTIMISER_HELICAL_NSTART, EMDL_OPTIMISER_HELICAL_TUBE_INNER_DIAMETER, EMDL_OPTIMISER_HELICAL_TUBE_OUTER_DIAMETER, EMDL_OPTIMISER_HELICAL_SYMMETRY_LOCAL_REFINEMENT, EMDL_OPTIMISER_HELICAL_SIGMA_DISTANCE, EMDL_OPTIMISER_HELICAL_KEEP_TILT_PRIOR_FIXED, EMDL_OPTIMISER_LOWRES_LIMIT_EXP, EMDL_OPTIMISER_HIGHRES_LIMIT_EXP, EMDL_OPTIMISER_HIGHRES_LIMIT_SGD, EMDL_OPTIMISER_IGNORE_CTF_UNTIL_FIRST_PEAK, EMDL_OPTIMISER_INCR_SIZE, EMDL_OPTIMISER_ITERATION_NO, EMDL_OPTIMISER_LOCAL_SYMMETRY_FILENAME, EMDL_OPTIMISER_LOWRES_JOIN_RANDOM_HALVES, EMDL_OPTIMISER_MAGNIFICATION_RANGE, EMDL_OPTIMISER_MAGNIFICATION_STEP, EMDL_OPTIMISER_MAX_COARSE_SIZE, EMDL_OPTIMISER_MAX_NR_POOL, EMDL_OPTIMISER_MODEL_STARFILE, EMDL_OPTIMISER_MODEL_STARFILE2, EMDL_OPTIMISER_NR_ITERATIONS, EMDL_OPTIMISER_NR_ITER_WO_RESOL_GAIN, EMDL_OPTIMISER_NR_ITER_WO_HIDDEN_VAR_CHANGES, EMDL_OPTIMISER_OPTICS_STARFILE, EMDL_OPTIMISER_OUTPUT_ROOTNAME, EMDL_OPTIMISER_PARTICLE_DIAMETER, EMDL_OPTIMISER_RADIUS_MASK_3D_MAP, EMDL_OPTIMISER_RADIUS_MASK_EXP_PARTICLES, EMDL_OPTIMISER_RANDOM_SEED, EMDL_OPTIMISER_REFS_ARE_CTF_CORRECTED, EMDL_OPTIMISER_SAMPLING_STARFILE, EMDL_OPTIMISER_SMALLEST_CHANGES_OPT_CLASSES, EMDL_OPTIMISER_SMALLEST_CHANGES_OPT_OFFSETS, EMDL_OPTIMISER_SMALLEST_CHANGES_OPT_ORIENTS, EMDL_OPTIMISER_SOLVENT_MASK_NAME, EMDL_OPTIMISER_SOLVENT_MASK2_NAME, EMDL_OPTIMISER_TAU_SPECTRUM_NAME, EMDL_OPTIMISER_USE_TOO_COARSE_SAMPLING, EMDL_OPTIMISER_WIDTH_MASK_EDGE, EMDL_ORIENT_FLIP, EMDL_ORIENT_ID, EMDL_ORIENT_ORIGIN_X, // (deprecated) EMDL_ORIENT_ORIGIN_Y, // (deprecated) EMDL_ORIENT_ORIGIN_Z, // (deprecated) EMDL_ORIENT_ORIGIN_X_PRIOR, // (deprecated) EMDL_ORIENT_ORIGIN_Y_PRIOR, // (deprecated) EMDL_ORIENT_ORIGIN_Z_PRIOR, // (deprecated) EMDL_ORIENT_ORIGIN_X_ANGSTROM, EMDL_ORIENT_ORIGIN_Y_ANGSTROM, EMDL_ORIENT_ORIGIN_Z_ANGSTROM, EMDL_ORIENT_ORIGIN_X_PRIOR_ANGSTROM, EMDL_ORIENT_ORIGIN_Y_PRIOR_ANGSTROM, EMDL_ORIENT_ORIGIN_Z_PRIOR_ANGSTROM, EMDL_ORIENT_ROT, EMDL_ORIENT_ROT_PRIOR, EMDL_ORIENT_ROT_PRIOR_FLIP_RATIO, // KThurber EMDL_ORIENT_TILT, EMDL_ORIENT_TILT_PRIOR, EMDL_ORIENT_PSI, EMDL_ORIENT_PSI_PRIOR, EMDL_ORIENT_PSI_PRIOR_FLIP_RATIO, EMDL_ORIENT_PSI_PRIOR_FLIP, // KThurber EMDL_PARTICLE_AUTOPICK_FOM, EMDL_PARTICLE_HELICAL_TUBE_ID, EMDL_PARTICLE_HELICAL_TUBE_PITCH, EMDL_PARTICLE_HELICAL_TRACK_LENGTH, //deprecated EMDL_PARTICLE_HELICAL_TRACK_LENGTH_ANGSTROM, EMDL_PARTICLE_CLASS, EMDL_PARTICLE_DLL, EMDL_PARTICLE_ID, EMDL_PARTICLE_FOM, EMDL_PARTICLE_KL_DIVERGENCE, EMDL_PARTICLE_RANDOM_SUBSET, EMDL_PARTICLE_BEAM_TILT_CLASS, EMDL_PARTICLE_NAME, EMDL_PARTICLE_ORI_NAME, EMDL_PARTICLE_NR_SIGNIFICANT_SAMPLES, EMDL_PARTICLE_NR_FRAMES, EMDL_PARTICLE_NR_FRAMES_AVG, EMDL_PARTICLE_MOVIE_RUNNING_AVG, EMDL_PARTICLE_PMAX, EMDL_PARTICLE_NUMBER, EMDL_PIPELINE_JOB_COUNTER, EMDL_PIPELINE_NODE_NAME, EMDL_PIPELINE_NODE_TYPE, EMDL_PIPELINE_PROCESS_ALIAS, EMDL_PIPELINE_PROCESS_NAME, EMDL_PIPELINE_PROCESS_TYPE, EMDL_PIPELINE_PROCESS_STATUS, EMDL_PIPELINE_EDGE_FROM, EMDL_PIPELINE_EDGE_TO, EMDL_PIPELINE_EDGE_PROCESS, EMDL_POSTPROCESS_BFACTOR, EMDL_POSTPROCESS_FINAL_RESOLUTION, EMDL_POSTPROCESS_FRACTION_MOLWEIGHT, EMDL_POSTPROCESS_FRACTION_SOLVENT_MASK, EMDL_POSTPROCESS_FSC_GENERAL, EMDL_POSTPROCESS_FSC_TRUE, EMDL_POSTPROCESS_FSC_PART_MOLWEIGHT, EMDL_POSTPROCESS_FSC_PART_FRACMASK, EMDL_POSTPROCESS_FSC_MASKED, EMDL_POSTPROCESS_FSC_UNMASKED, EMDL_POSTPROCESS_FSC_RANDOM_MASKED, EMDL_POSTPROCESS_AMPLCORR_MASKED, EMDL_POSTPROCESS_AMPLCORR_UNMASKED, EMDL_POSTPROCESS_DPR_MASKED, EMDL_POSTPROCESS_DPR_UNMASKED, EMDL_POSTPROCESS_GUINIER_FIT_CORRELATION, EMDL_POSTPROCESS_GUINIER_FIT_INTERCEPT, EMDL_POSTPROCESS_GUINIER_FIT_SLOPE, EMDL_POSTPROCESS_GUINIER_VALUE_IN, EMDL_POSTPROCESS_GUINIER_VALUE_INVMTF, EMDL_POSTPROCESS_GUINIER_VALUE_WEIGHTED, EMDL_POSTPROCESS_GUINIER_VALUE_SHARPENED, EMDL_POSTPROCESS_GUINIER_VALUE_INTERCEPT, EMDL_POSTPROCESS_GUINIER_RESOL_SQUARED, EMDL_POSTPROCESS_MOLWEIGHT, EMDL_POSTPROCESS_MTF_VALUE, ///< Detector MTF value EMDL_POSTPROCESS_RANDOMISE_FROM, EMDL_POSTPROCESS_UNFIL_HALFMAP1, EMDL_POSTPROCESS_UNFIL_HALFMAP2, EMDL_SAMPLING_IS_3D, EMDL_SAMPLING_IS_3D_TRANS, EMDL_SAMPLING_HEALPIX_ORDER, EMDL_SAMPLING_HEALPIX_ORDER_ORI, EMDL_SAMPLING_LIMIT_TILT, EMDL_SAMPLING_OFFSET_RANGE, EMDL_SAMPLING_OFFSET_STEP, EMDL_SAMPLING_OFFSET_RANGE_ORI, EMDL_SAMPLING_OFFSET_STEP_ORI, EMDL_SAMPLING_HELICAL_OFFSET_STEP, EMDL_SAMPLING_PERTURB, EMDL_SAMPLING_PERTURBATION_FACTOR, EMDL_SAMPLING_PRIOR_MODE, EMDL_SAMPLING_PSI_STEP, EMDL_SAMPLING_PSI_STEP_ORI, EMDL_SAMPLING_SIGMA_ROT, EMDL_SAMPLING_SIGMA_TILT, EMDL_SAMPLING_SIGMA_PSI, EMDL_SAMPLING_SYMMETRY, EMDL_SCHEDULE_EDGE_NUMBER, EMDL_SCHEDULE_EDGE_INPUT, EMDL_SCHEDULE_EDGE_OUTPUT, EMDL_SCHEDULE_EDGE_IS_FORK, EMDL_SCHEDULE_EDGE_OUTPUT_TRUE, EMDL_SCHEDULE_EDGE_BOOLEAN, EMDL_SCHEDULE_GENERAL_CURRENT_NODE, EMDL_SCHEDULE_GENERAL_ORIGINAL_START_NODE, EMDL_SCHEDULE_GENERAL_EMAIL, EMDL_SCHEDULE_GENERAL_NAME, EMDL_SCHEDULE_JOB_NAME, EMDL_SCHEDULE_JOB_ORI_NAME, EMDL_SCHEDULE_JOB_MODE, EMDL_SCHEDULE_JOB_HAS_STARTED, EMDL_SCHEDULE_OPERATOR_NAME, EMDL_SCHEDULE_OPERATOR_TYPE, EMDL_SCHEDULE_OPERATOR_INPUT1, EMDL_SCHEDULE_OPERATOR_INPUT2, EMDL_SCHEDULE_OPERATOR_OUTPUT, EMDL_SCHEDULE_VAR_BOOL_NAME, EMDL_SCHEDULE_VAR_BOOL_VALUE, EMDL_SCHEDULE_VAR_BOOL_ORI_VALUE, EMDL_SCHEDULE_VAR_FLOAT_NAME, EMDL_SCHEDULE_VAR_FLOAT_VALUE, EMDL_SCHEDULE_VAR_FLOAT_ORI_VALUE, EMDL_SCHEDULE_VAR_STRING_NAME, EMDL_SCHEDULE_VAR_STRING_VALUE, EMDL_SCHEDULE_VAR_STRING_ORI_VALUE, EMDL_SELECTED, EMDL_SELECT_PARTICLES_ZSCORE, EMDL_SORTED_IDX, EMDL_STARFILE_MOVIE_PARTICLES, EMDL_PERFRAME_CUMULATIVE_WEIGHT, EMDL_PERFRAME_RELATIVE_WEIGHT, EMDL_RESOLUTION, EMDL_RESOLUTION_ANGSTROM, EMDL_RESOLUTION_INVPIXEL, EMDL_SPECTRAL_IDX, EMDL_UNKNOWN_LABEL, EMDL_LAST_LABEL // **** NOTE ****: Do keep this label always at the end // it is here for looping purposes };//close enum Label enum EMDLabelType { EMDL_INT, EMDL_BOOL, EMDL_DOUBLE, EMDL_STRING, EMDL_DOUBLE_VECTOR, EMDL_UNKNOWN }; class EMDL { public: // This enum defines what MetaDataLabels this class can manage, if // you need a new one add it here and modify affected methods: // // - static EMDLabel codifyLabel( std::string strLabel ); EMDL::addLabel(EMDL_OPTIMISER_RANDOM_SEED, EMDL_INT, "randomSeed"); // - static std::string EMDL::label2Str( EMDLabel inputLabel ); // - void writeValuesToFile( std::ofstream &outfile, EMDLabel inputLabel ); // - void addValue( std::string name, std::string value ); // // Keep this special structure (using EMDL_FIRSTLABEL and EMDL_LAST_LABEL) so the // programmer can iterate through it like this: // // for( EMDLabel mdl = EMDL_FIRST_LABEL ; mdl < EMDL_LAST_LABEL ; EMDLabel( mdl+1 ) ) // static EMDLabel str2Label(const std::string &labelName); static std::string label2Str(const EMDLabel &label); static bool isInt(const EMDLabel &label); static bool isBool(const EMDLabel &label); static bool isString(const EMDLabel &label); static bool isDouble(const EMDLabel &label); static bool isNumber(const EMDLabel &label); static bool isDoubleVector(const EMDLabel &label); static bool isVector(const EMDLabel &label); static bool isUnknown(const EMDLabel &label); static bool isValidLabel(const EMDLabel &label); static bool isValidLabel(const std::string &labelName); static void printDefinitions(std::ostream& out); private: static std::map data; static std::map names; static std::map definitions; static StaticInitialization initialization; //Just for initialization static void addLabel(EMDLabel label, EMDLabelType type, std::string name, std::string definition = "undocumented"); static void addAltLabel(EMDLabel label, std::string name); friend class StaticInitialization; } ;//close class MLD definition //Just an struct to store type and string alias class EMDLabelData { public: EMDLabelType type; std::string str; //Default constructor EMDLabelData() { } EMDLabelData(EMDLabelType t, std::string s) { type = t; str = s; } };//close class EMDLabelData c //Just a class for static initialization class StaticInitialization { private: StaticInitialization() { ///==== Add labels entries from here in the SAME ORDER as declared in ENUM ========== EMDL::addLabel(EMDL_COMMENT, EMDL_STRING, "rlnComment", "A metadata comment (This is treated in a special way)"); EMDL::addLabel(EMDL_AREA_ID, EMDL_INT, "rlnAreaId", "ID (i.e. a unique number) of an area (i.e. field-of-view)"); EMDL::addLabel(EMDL_AREA_NAME, EMDL_STRING, "rlnAreaName", "Name of an area (i.e. field-of-view)"); EMDL::addLabel(EMDL_BODY_MASK_NAME, EMDL_STRING, "rlnBodyMaskName", "Name of an image that contains a [0,1] body mask for multi-body refinement"); EMDL::addLabel(EMDL_BODY_KEEP_FIXED, EMDL_INT, "rlnBodyKeepFixed", "Flag to indicate whether to keep a body fixed (value 1) or keep on refining it (0)"); EMDL::addLabel(EMDL_BODY_REFERENCE_NAME, EMDL_STRING, "rlnBodyReferenceName", "Name of an image that contains the initial reference for one body of a multi-body refinement"); EMDL::addLabel(EMDL_BODY_ROTATE_DIRECTION_X, EMDL_DOUBLE, "rlnBodyRotateDirectionX", "X-component of axis around which to rotate this body"); EMDL::addLabel(EMDL_BODY_ROTATE_DIRECTION_Y, EMDL_DOUBLE, "rlnBodyRotateDirectionY", "Y-component of axis around which to rotate this body"); EMDL::addLabel(EMDL_BODY_ROTATE_DIRECTION_Z, EMDL_DOUBLE, "rlnBodyRotateDirectionZ", "Z-component of axis around which to rotate this body"); EMDL::addLabel(EMDL_BODY_ROTATE_RELATIVE_TO, EMDL_INT, "rlnBodyRotateRelativeTo", "Number of the body relative to which this body rotates (if negative, use rlnBodyRotateDirectionXYZ)"); EMDL::addLabel(EMDL_BODY_SIGMA_ANG, EMDL_DOUBLE, "rlnBodySigmaAngles", "Width of prior on all three Euler angles of a body in multibody refinement (in degrees)"); EMDL::addLabel(EMDL_BODY_SIGMA_OFFSET, EMDL_DOUBLE, "rlnBodySigmaOffset", "Width of prior on origin offsets of a body in multibody refinement (in pixels)"); EMDL::addLabel(EMDL_BODY_SIGMA_OFFSET_ANGSTROM, EMDL_DOUBLE, "rlnBodySigmaOffsetAngst", "Width of prior on origin offsets of a body in multibody refinement (in Angstroms)"); EMDL::addLabel(EMDL_BODY_SIGMA_ROT, EMDL_DOUBLE, "rlnBodySigmaRot", "Width of prior on rot angles of a body in multibody refinement (in degrees)"); EMDL::addLabel(EMDL_BODY_SIGMA_TILT, EMDL_DOUBLE, "rlnBodySigmaTilt", "Width of prior on tilt angles of a body in multibody refinement (in degrees)"); EMDL::addLabel(EMDL_BODY_SIGMA_PSI, EMDL_DOUBLE, "rlnBodySigmaPsi", "Width of prior on psi angles of a body in multibody refinement (in degrees)"); EMDL::addLabel(EMDL_BODY_STAR_FILE, EMDL_STRING, "rlnBodyStarFile", "Name of STAR file with body masks and metadata"); EMDL::addLabel(EMDL_CTF_ASTIGMATISM, EMDL_DOUBLE, "rlnCtfAstigmatism", "Absolute value of the difference between defocus in U- and V-direction (in A)"); EMDL::addLabel(EMDL_CTF_BFACTOR, EMDL_DOUBLE, "rlnCtfBfactor", "B-factor (in A^2) that describes CTF power spectrum fall-off"); EMDL::addLabel(EMDL_CTF_MAXRES, EMDL_DOUBLE, "rlnCtfMaxResolution", "Estimated maximum resolution (in A) of significant CTF Thon rings"); EMDL::addLabel(EMDL_CTF_VALIDATIONSCORE, EMDL_DOUBLE, "rlnCtfValidationScore", "Gctf-based validation score for the quality of the CTF fit"); EMDL::addLabel(EMDL_CTF_SCALEFACTOR, EMDL_DOUBLE, "rlnCtfScalefactor", "Linear scale-factor on the CTF (values between 0 and 1)"); EMDL::addLabel(EMDL_CTF_VOLTAGE, EMDL_DOUBLE, "rlnVoltage", "Voltage of the microscope (in kV)"); EMDL::addLabel(EMDL_CTF_DEFOCUSU, EMDL_DOUBLE, "rlnDefocusU", "Defocus in U-direction (in Angstroms, positive values for underfocus)"); EMDL::addLabel(EMDL_CTF_DEFOCUSV, EMDL_DOUBLE, "rlnDefocusV", "Defocus in V-direction (in Angstroms, positive values for underfocus)"); EMDL::addLabel(EMDL_CTF_DEFOCUS_ANGLE, EMDL_DOUBLE, "rlnDefocusAngle", "Angle between X and defocus U direction (in degrees)"); EMDL::addLabel(EMDL_CTF_CS, EMDL_DOUBLE, "rlnSphericalAberration", "Spherical aberration (in millimeters)"); EMDL::addLabel(EMDL_CTF_CA, EMDL_DOUBLE, "rlnChromaticAberration", "Chromatic aberration (in millimeters)"); EMDL::addLabel(EMDL_CTF_DETECTOR_PIXEL_SIZE, EMDL_DOUBLE, "rlnDetectorPixelSize", "Pixel size of the detector (in micrometers)"); EMDL::addLabel(EMDL_CTF_POWER_SPECTRUM, EMDL_STRING, "rlnCtfPowerSpectrum", "Power spectrum for CTF estimation"); EMDL::addLabel(EMDL_CTF_ENERGY_LOSS, EMDL_DOUBLE, "rlnEnergyLoss", "Energy loss (in eV)"); EMDL::addLabel(EMDL_CTF_FOM, EMDL_DOUBLE, "rlnCtfFigureOfMerit", "Figure of merit for the fit of the CTF (not used inside relion_refine)"); EMDL::addLabel(EMDL_CTF_IMAGE, EMDL_STRING, "rlnCtfImage", "Name of an image with all CTF values"); EMDL::addLabel(EMDL_CTF_LENS_STABILITY, EMDL_DOUBLE, "rlnLensStability", "Lens stability (in ppm)"); EMDL::addLabel(EMDL_CTF_MAGNIFICATION, EMDL_DOUBLE, "rlnMagnification", "Magnification at the detector (in times)"); EMDL::addLabel(EMDL_CTF_PHASESHIFT, EMDL_DOUBLE, "rlnPhaseShift", "Phase-shift from a phase-plate (in degrees)"); EMDL::addLabel(EMDL_CTF_CONVERGENCE_CONE, EMDL_DOUBLE, "rlnConvergenceCone", "Convergence cone (in mrad)"); EMDL::addLabel(EMDL_CTF_LONGITUDINAL_DISPLACEMENT, EMDL_DOUBLE, "rlnLongitudinalDisplacement", "Longitudinal displacement (in Angstroms)"); EMDL::addLabel(EMDL_CTF_TRANSVERSAL_DISPLACEMENT, EMDL_DOUBLE, "rlnTransversalDisplacement", "Transversal displacement (in Angstroms)"); EMDL::addLabel(EMDL_CTF_Q0, EMDL_DOUBLE, "rlnAmplitudeContrast", "Amplitude contrast (as a fraction, i.e. 10% = 0.1)"); EMDL::addLabel(EMDL_CTF_VALUE, EMDL_DOUBLE, "rlnCtfValue", "Value of the Contrast Transfer Function"); EMDL::addLabel(EMDL_IMAGE_NAME, EMDL_STRING, "rlnImageName", "Name of an image"); EMDL::addLabel(EMDL_IMAGE_ORI_NAME, EMDL_STRING, "rlnImageOriginalName", "Original name of an image"); EMDL::addLabel(EMDL_IMAGE_RECONSTRUCT_NAME, EMDL_STRING, "rlnReconstructImageName", "Name of an image to be used for reconstruction only"); EMDL::addLabel(EMDL_IMAGE_ID, EMDL_INT, "rlnImageId", "ID (i.e. a unique number) of an image"); EMDL::addLabel(EMDL_IMAGE_ENABLED, EMDL_BOOL, "rlnEnabled", "Not used in RELION, only included for backward compatibility with XMIPP selfiles"); EMDL::addLabel(EMDL_IMAGE_DATATYPE, EMDL_INT, "rlnDataType", "Type of data stored in an image (e.g. int, RFLOAT etc)"); EMDL::addLabel(EMDL_IMAGE_DIMENSIONALITY, EMDL_INT, "rlnImageDimensionality", "Dimensionality of data stored in an image (i.e. 2 or 3)"); EMDL::addLabel(EMDL_IMAGE_BEAMTILT_X, EMDL_DOUBLE, "rlnBeamTiltX", "Beam tilt in the X-direction (in mrad)"); EMDL::addLabel(EMDL_IMAGE_BEAMTILT_Y, EMDL_DOUBLE, "rlnBeamTiltY", "Beam tilt in the Y-direction (in mrad)"); EMDL::addLabel(EMDL_IMAGE_MTF_FILENAME, EMDL_STRING, "rlnMtfFileName", "The filename of a STAR file with the MTF for this optics group or image"); EMDL::addLabel(EMDL_IMAGE_OPTICS_GROUP, EMDL_INT, "rlnOpticsGroup", "Group of particles with identical optical properties"); EMDL::addLabel(EMDL_IMAGE_OPTICS_GROUP_NAME, EMDL_STRING, "rlnOpticsGroupName", "The name of a group of particles with identical optical properties"); EMDL::addLabel(EMDL_IMAGE_ODD_ZERNIKE_COEFFS, EMDL_DOUBLE_VECTOR, "rlnOddZernike", "Coefficients for the antisymmetrical Zernike polynomials"); EMDL::addLabel(EMDL_IMAGE_EVEN_ZERNIKE_COEFFS, EMDL_DOUBLE_VECTOR, "rlnEvenZernike", "Coefficients for the symmetrical Zernike polynomials"); EMDL::addLabel(EMDL_IMAGE_PIXEL_SIZE, EMDL_DOUBLE, "rlnImagePixelSize", "Pixel size (in Angstrom)"); EMDL::addLabel(EMDL_IMAGE_MAG_MATRIX_00, EMDL_DOUBLE, "rlnMagMat00", "Anisotropic magnification matrix, element 1,1"); EMDL::addLabel(EMDL_IMAGE_MAG_MATRIX_01, EMDL_DOUBLE, "rlnMagMat01", "Anisotropic magnification matrix, element 1,2"); EMDL::addLabel(EMDL_IMAGE_MAG_MATRIX_10, EMDL_DOUBLE, "rlnMagMat10", "Anisotropic magnification matrix, element 2,1"); EMDL::addLabel(EMDL_IMAGE_MAG_MATRIX_11, EMDL_DOUBLE, "rlnMagMat11", "Anisotropic magnification matrix, element 2,2"); EMDL::addLabel(EMDL_IMAGE_COORD_X, EMDL_DOUBLE, "rlnCoordinateX", "X-Position of an image in a micrograph (in pixels)"); EMDL::addLabel(EMDL_IMAGE_COORD_Y, EMDL_DOUBLE, "rlnCoordinateY", "Y-Position of an image in a micrograph (in pixels)"); EMDL::addLabel(EMDL_IMAGE_COORD_Z, EMDL_DOUBLE, "rlnCoordinateZ", "Z-Position of an image in a 3D micrograph, i.e. tomogram (in pixels)"); EMDL::addLabel(EMDL_IMAGE_FRAME_NR, EMDL_INT, "rlnMovieFrameNumber", "Number of a movie frame"); EMDL::addLabel(EMDL_IMAGE_NORM_CORRECTION, EMDL_DOUBLE, "rlnNormCorrection", "Normalisation correction value for an image"); EMDL::addLabel(EMDL_IMAGE_MAGNIFICATION_CORRECTION, EMDL_DOUBLE, "rlnMagnificationCorrection", "Magnification correction value for an image"); EMDL::addLabel(EMDL_IMAGE_SAMPLINGRATE, EMDL_DOUBLE, "rlnSamplingRate", "Sampling rate of an image (in Angstrom/pixel)"); EMDL::addLabel(EMDL_IMAGE_SAMPLINGRATE_X, EMDL_DOUBLE, "rlnSamplingRateX", "Sampling rate in X-direction of an image (in Angstrom/pixel)"); EMDL::addLabel(EMDL_IMAGE_SAMPLINGRATE_Y, EMDL_DOUBLE, "rlnSamplingRateY", "Sampling rate in Y-direction of an image (in Angstrom/pixel)"); EMDL::addLabel(EMDL_IMAGE_SAMPLINGRATE_Z, EMDL_DOUBLE, "rlnSamplingRateZ", "Sampling rate in Z-direction of an image (in Angstrom/pixel)"); EMDL::addLabel(EMDL_IMAGE_SIZE, EMDL_INT, "rlnImageSize", "Size of an image (in pixels)"); EMDL::addLabel(EMDL_IMAGE_SIZE_X, EMDL_INT, "rlnImageSizeX", "Size of an image in the X-direction (in pixels)"); EMDL::addLabel(EMDL_IMAGE_SIZE_Y, EMDL_INT, "rlnImageSizeY", "Size of an image in the Y-direction (in pixels)"); EMDL::addLabel(EMDL_IMAGE_SIZE_Z, EMDL_INT, "rlnImageSizeZ", "Size of an image in the Z-direction (in pixels)"); EMDL::addLabel(EMDL_IMAGE_STATS_MIN, EMDL_DOUBLE, "rlnMinimumValue", "Minimum value for the pixels in an image"); EMDL::addLabel(EMDL_IMAGE_STATS_MAX, EMDL_DOUBLE, "rlnMaximumValue", "Maximum value for the pixels in an image"); EMDL::addLabel(EMDL_IMAGE_STATS_AVG, EMDL_DOUBLE, "rlnAverageValue", "Average value for the pixels in an image"); EMDL::addLabel(EMDL_IMAGE_STATS_STDDEV, EMDL_DOUBLE, "rlnStandardDeviationValue", "Standard deviation for the pixel values in an image"); EMDL::addLabel(EMDL_IMAGE_STATS_SKEW, EMDL_DOUBLE, "rlnSkewnessValue", "Skewness (3rd moment) for the pixel values in an image"); EMDL::addLabel(EMDL_IMAGE_STATS_KURT, EMDL_DOUBLE, "rlnKurtosisExcessValue", "Kurtosis excess (4th moment - 3) for the pixel values in an image"); EMDL::addLabel(EMDL_IMAGE_WEIGHT, EMDL_DOUBLE, "rlnImageWeight", "Relative weight of an image"); EMDL::addLabel(EMDL_MASK_NAME, EMDL_STRING, "rlnMaskName", "Name of an image that contains a [0,1] mask"); EMDL::addLabel(EMDL_JOB_IS_CONTINUE, EMDL_BOOL, "rlnJobIsContinue", "Is tthis a continuation job?"); EMDL::addLabel(EMDL_JOB_TYPE, EMDL_INT, "rlnJobType", "Which type of job is this?"); EMDL::addLabel(EMDL_JOB_TYPE_NAME, EMDL_STRING, "rlnJobTypeName", "The name for this type of job (also name of main directory for output jobs)"); EMDL::addLabel(EMDL_JOBOPTION_TYPE, EMDL_INT, "rlnJoboptionType", "Which type of joboption is this?"); EMDL::addLabel(EMDL_JOBOPTION_VARIABLE, EMDL_STRING, "rlnJobOptionVariable", "Name of the joboption variable"); EMDL::addLabel(EMDL_JOBOPTION_VALUE, EMDL_STRING, "rlnJobOptionValue", "Value of a joboption"); EMDL::addLabel(EMDL_JOBOPTION_LABEL, EMDL_STRING, "rlnJobOptionGUILabel", "GUI label of a joboption"); EMDL::addLabel(EMDL_JOBOPTION_DEFAULT_VALUE, EMDL_STRING, "rlnJobOptionDefaultValue", "Default value of a joboption"); EMDL::addLabel(EMDL_JOBOPTION_MINVAL, EMDL_DOUBLE, "rlnJobOptionSliderMin", "Minimum value for slider of a joboption"); EMDL::addLabel(EMDL_JOBOPTION_MAXVAL, EMDL_DOUBLE, "rlnJobOptionSliderMax", "Maximum value for slider of a joboption"); EMDL::addLabel(EMDL_JOBOPTION_STEPVAL, EMDL_DOUBLE, "rlnJobOptionSliderStep", "Step value for slider of a joboption"); EMDL::addLabel(EMDL_JOBOPTION_HELPTEXT, EMDL_STRING, "rlnJobOptionHelpText", "Extra helptext of a joboption"); EMDL::addLabel(EMDL_JOBOPTION_PATTERN, EMDL_STRING, "rlnJobOptionFilePattern", "Pattern for file browser of a joboption"); EMDL::addLabel(EMDL_JOBOPTION_DIRECTORY, EMDL_STRING, "rlnJobOptionDirectoryDefault", "Default directory for file browser of a joboption"); EMDL::addLabel(EMDL_JOBOPTION_MENUOPTIONS, EMDL_STRING, "rlnJobOptionMenuOptions", "Options for pull-down menu"); EMDL::addLabel(EMDL_MATRIX_1_1, EMDL_DOUBLE, "rlnMatrix_1_1", "Matrix element (1,1) of a 3x3 matrix"); EMDL::addLabel(EMDL_MATRIX_1_2, EMDL_DOUBLE, "rlnMatrix_1_2", "Matrix element (1,2) of a 3x3 matrix"); EMDL::addLabel(EMDL_MATRIX_1_3, EMDL_DOUBLE, "rlnMatrix_1_3", "Matrix element (1,3) of a 3x3 matrix"); EMDL::addLabel(EMDL_MATRIX_2_1, EMDL_DOUBLE, "rlnMatrix_2_1", "Matrix element (2,1) of a 3x3 matrix"); EMDL::addLabel(EMDL_MATRIX_2_2, EMDL_DOUBLE, "rlnMatrix_2_2", "Matrix element (2,1) of a 3x3 matrix"); EMDL::addLabel(EMDL_MATRIX_2_3, EMDL_DOUBLE, "rlnMatrix_2_3", "Matrix element (2,1) of a 3x3 matrix"); EMDL::addLabel(EMDL_MATRIX_3_1, EMDL_DOUBLE, "rlnMatrix_3_1", "Matrix element (3,1) of a 3x3 matrix"); EMDL::addLabel(EMDL_MATRIX_3_2, EMDL_DOUBLE, "rlnMatrix_3_2", "Matrix element (3,1) of a 3x3 matrix"); EMDL::addLabel(EMDL_MATRIX_3_3, EMDL_DOUBLE, "rlnMatrix_3_3", "Matrix element (3,1) of a 3x3 matrix"); EMDL::addLabel(EMDL_MICROGRAPH_ACCUM_MOTION_TOTAL, EMDL_DOUBLE, "rlnAccumMotionTotal","Accumulated global motion during the entire movie (in A)"); EMDL::addLabel(EMDL_MICROGRAPH_ACCUM_MOTION_EARLY, EMDL_DOUBLE, "rlnAccumMotionEarly","Accumulated global motion during the first frames of the movie (in A)"); EMDL::addLabel(EMDL_MICROGRAPH_ACCUM_MOTION_LATE, EMDL_DOUBLE, "rlnAccumMotionLate","Accumulated global motion during the last frames of the movie (in A)"); EMDL::addLabel(EMDL_MICROGRAPH_ID, EMDL_INT, "rlnMicrographId", "ID (i.e. a unique number) of a micrograph"); EMDL::addLabel(EMDL_MICROGRAPH_NAME, EMDL_STRING, "rlnMicrographName", "Name of a micrograph"); EMDL::addLabel(EMDL_MICROGRAPH_GAIN_NAME, EMDL_STRING, "rlnMicrographGainName", "Name of a gain reference"); EMDL::addLabel(EMDL_MICROGRAPH_DEFECT_FILE, EMDL_STRING, "rlnMicrographDefectFile", "Name of a defect list file"); EMDL::addLabel(EMDL_MICROGRAPH_NAME_WODOSE, EMDL_STRING, "rlnMicrographNameNoDW", "Name of a micrograph without dose weighting"); EMDL::addLabel(EMDL_MICROGRAPH_MOVIE_NAME, EMDL_STRING, "rlnMicrographMovieName", "Name of a micrograph movie stack"); EMDL::addLabel(EMDL_MICROGRAPH_METADATA_NAME, EMDL_STRING, "rlnMicrographMetadata", "Name of a micrograph metadata file"); EMDL::addLabel(EMDL_MICROGRAPH_TILT_ANGLE, EMDL_DOUBLE, "rlnMicrographTiltAngle", "Tilt angle (in degrees) used to collect a micrograph"); EMDL::addLabel(EMDL_MICROGRAPH_TILT_AXIS_DIRECTION, EMDL_DOUBLE, "rlnMicrographTiltAxisDirection", "Direction of the tilt-axis (in degrees) used to collect a micrograph"); EMDL::addLabel(EMDL_MICROGRAPH_TILT_AXIS_OUTOFPLANE, EMDL_DOUBLE, "rlnMicrographTiltAxisOutOfPlane", "Out-of-plane angle (in degrees) of the tilt-axis used to collect a micrograph (90=in-plane)"); EMDL::addLabel(EMDL_MICROGRAPH_ORIGINAL_PIXEL_SIZE, EMDL_DOUBLE, "rlnMicrographOriginalPixelSize", "Pixel size of original movie before binning in Angstrom/pixel."); EMDL::addLabel(EMDL_MICROGRAPH_PIXEL_SIZE, EMDL_DOUBLE, "rlnMicrographPixelSize", "Pixel size of (averaged) micrographs after binning in Angstrom/pixel."); EMDL::addLabel(EMDL_MICROGRAPH_PRE_EXPOSURE, EMDL_DOUBLE, "rlnMicrographPreExposure", "Pre-exposure dose in electrons per square Angstrom"); EMDL::addLabel(EMDL_MICROGRAPH_DOSE_RATE, EMDL_DOUBLE, "rlnMicrographDoseRate", "Dose rate in electrons per square Angstrom per frame"); EMDL::addLabel(EMDL_MICROGRAPH_BINNING, EMDL_DOUBLE, "rlnMicrographBinning", "Micrograph binning factor"); EMDL::addLabel(EMDL_MICROGRAPH_FRAME_NUMBER, EMDL_INT, "rlnMicrographFrameNumber", "Micrograph frame number"); EMDL::addLabel(EMDL_MICROGRAPH_MOTION_MODEL_VERSION, EMDL_INT, "rlnMotionModelVersion", "Version of micrograph motion model"); EMDL::addLabel(EMDL_MICROGRAPH_START_FRAME, EMDL_INT, "rlnMicrographStartFrame", "Start frame of a motion model"); EMDL::addLabel(EMDL_MICROGRAPH_END_FRAME, EMDL_INT, "rlnMicrographEndFrame", "End frame of a motion model"); EMDL::addLabel(EMDL_MICROGRAPH_SHIFT_X, EMDL_DOUBLE, "rlnMicrographShiftX", "X shift of a (patch of) micrograph"); EMDL::addLabel(EMDL_MICROGRAPH_SHIFT_Y, EMDL_DOUBLE, "rlnMicrographShiftY", "Y shift of a (patch of) micrograph"); EMDL::addLabel(EMDL_MICROGRAPH_MOTION_COEFFS_IDX, EMDL_INT, "rlnMotionModelCoeffsIdx", "Index of a coefficient of a motion model"); EMDL::addLabel(EMDL_MICROGRAPH_MOTION_COEFF, EMDL_DOUBLE, "rlnMotionModelCoeff", "A coefficient of a motion model"); EMDL::addLabel(EMDL_MICROGRAPH_EER_UPSAMPLING, EMDL_INT, "rlnEERUpsampling", "EER upsampling ratio (1 = 4K, 2 = 8K)"); EMDL::addLabel(EMDL_MICROGRAPH_EER_GROUPING, EMDL_INT, "rlnEERGrouping", "The number of hardware frames to group"); EMDL::addLabel(EMDL_MLMODEL_ACCURACY_ROT, EMDL_DOUBLE, "rlnAccuracyRotations", "Estimated accuracy (in degrees) with which rotations can be assigned"); EMDL::addLabel(EMDL_MLMODEL_ACCURACY_TRANS, EMDL_DOUBLE, "rlnAccuracyTranslations", "Estimated accuracy (in pixels) with which translations can be assigned"); EMDL::addLabel(EMDL_MLMODEL_ACCURACY_TRANS_ANGSTROM, EMDL_DOUBLE, "rlnAccuracyTranslationsAngst", "Estimated accuracy (in Angstroms) with which translations can be assigned"); EMDL::addLabel(EMDL_MLMODEL_AVE_PMAX, EMDL_DOUBLE, "rlnAveragePmax", "Average value (over all images) of the maxima of the probability distributions"); EMDL::addLabel(EMDL_MLMODEL_CURRENT_RESOLUTION, EMDL_DOUBLE, "rlnCurrentResolution", "Current resolution where SSNR^MAP drops below 1 (in 1/Angstroms)"); EMDL::addLabel(EMDL_MLMODEL_CURRENT_SIZE, EMDL_INT, "rlnCurrentImageSize", "Current size of the images used in the refinement"); EMDL::addLabel(EMDL_MLMODEL_DATA_VS_PRIOR_REF, EMDL_DOUBLE, "rlnSsnrMap", "Spectral signal-to-noise ratio as defined for MAP estimation (SSNR^MAP)"); EMDL::addLabel(EMDL_MLMODEL_DIMENSIONALITY, EMDL_INT, "rlnReferenceDimensionality", "Dimensionality of the references (2D/3D)"); EMDL::addLabel(EMDL_MLMODEL_DIMENSIONALITY_DATA, EMDL_INT, "rlnDataDimensionality", "Dimensionality of the data (2D/3D)"); EMDL::addLabel(EMDL_MLMODEL_DIFF2_HALVES_REF, EMDL_DOUBLE, "rlnDiff2RandomHalves", "Power of the differences between two independent reconstructions from random halves of the data"); EMDL::addLabel(EMDL_MLMODEL_ESTIM_RESOL_REF, EMDL_DOUBLE, "rlnEstimatedResolution", "Estimated resolution (in A) for a reference"); EMDL::addLabel(EMDL_MLMODEL_FOURIER_COVERAGE_REF, EMDL_DOUBLE, "rlnFourierCompleteness", "Fraction of Fourier components (per resolution shell) with SNR>1"); EMDL::addLabel(EMDL_MLMODEL_FOURIER_COVERAGE_TOTAL_REF, EMDL_DOUBLE, "rlnOverallFourierCompleteness", "Fraction of all Fourier components up to the current resolution with SNR>1"); EMDL::addLabel(EMDL_MLMODEL_FSC_HALVES_REF, EMDL_DOUBLE, "rlnGoldStandardFsc", "Fourier shell correlation between two independent reconstructions from random halves of the data"); EMDL::addLabel(EMDL_MLMODEL_GROUP_NAME, EMDL_STRING, "rlnGroupName", "The name of a group of images (e.g. all images from a micrograph)"); EMDL::addLabel(EMDL_MLMODEL_GROUP_NO, EMDL_INT, "rlnGroupNumber", "The number of a group of images"); EMDL::addLabel(EMDL_MLMODEL_GROUP_NR_PARTICLES, EMDL_INT, "rlnGroupNrParticles", "Number particles in a group of images"); EMDL::addLabel(EMDL_MLMODEL_GROUP_SCALE_CORRECTION, EMDL_DOUBLE, "rlnGroupScaleCorrection", "Intensity-scale correction for a group of images"); EMDL::addLabel(EMDL_MLMODEL_HELICAL_NR_ASU, EMDL_INT, "rlnNrHelicalAsymUnits", "How many new helical asymmetric units are there in each box"); EMDL::addLabel(EMDL_MLMODEL_HELICAL_TWIST, EMDL_DOUBLE, "rlnHelicalTwist", "The helical twist (rotation per subunit) in degrees"); EMDL::addLabel(EMDL_MLMODEL_HELICAL_TWIST_MIN, EMDL_DOUBLE, "rlnHelicalTwistMin", "Minimum helical twist (in degrees, + for right-handedness)"); EMDL::addLabel(EMDL_MLMODEL_HELICAL_TWIST_MAX, EMDL_DOUBLE, "rlnHelicalTwistMax", "Maximum helical twist (in degrees, + for right-handedness)"); EMDL::addLabel(EMDL_MLMODEL_HELICAL_TWIST_INITIAL_STEP, EMDL_DOUBLE, "rlnHelicalTwistInitialStep", "Initial step of helical twist search (in degrees)"); EMDL::addLabel(EMDL_MLMODEL_HELICAL_RISE, EMDL_DOUBLE, "rlnHelicalRise", "The helical rise (translation per subunit) in Angstroms"); EMDL::addLabel(EMDL_MLMODEL_HELICAL_RISE_MIN, EMDL_DOUBLE, "rlnHelicalRiseMin", "Minimum helical rise (in Angstroms)"); EMDL::addLabel(EMDL_MLMODEL_HELICAL_RISE_MAX, EMDL_DOUBLE, "rlnHelicalRiseMax", "Maximum helical rise (in Angstroms)"); EMDL::addLabel(EMDL_MLMODEL_HELICAL_RISE_INITIAL_STEP, EMDL_DOUBLE, "rlnHelicalRiseInitialStep", "Initial step of helical rise search (in Angstroms)"); EMDL::addLabel(EMDL_MLMODEL_IS_HELIX, EMDL_BOOL, "rlnIsHelix", "Flag to indicate that helical refinement should be performed"); EMDL::addLabel(EMDL_MLMODEL_INTERPOLATOR, EMDL_INT, "rlnFourierSpaceInterpolator", "The kernel used for Fourier-space interpolation (NN=0, linear=1)"); EMDL::addLabel(EMDL_MLMODEL_LL, EMDL_DOUBLE, "rlnLogLikelihood", "Value of the log-likelihood target function"); EMDL::addLabel(EMDL_MLMODEL_MINIMUM_RADIUS_NN_INTERPOLATION, EMDL_INT, "rlnMinRadiusNnInterpolation","Minimum radius for NN-interpolation (in Fourier pixels), for smaller radii linear int. is used"); EMDL::addLabel(EMDL_MLMODEL_NORM_CORRECTION_AVG, EMDL_DOUBLE, "rlnNormCorrectionAverage", "Average value (over all images) of the normalisation correction values"); EMDL::addLabel(EMDL_MLMODEL_NR_CLASSES, EMDL_INT, "rlnNrClasses", "The number of references (i.e. classes) to be used in refinement"); EMDL::addLabel(EMDL_MLMODEL_NR_BODIES, EMDL_INT, "rlnNrBodies", "The number of independent rigid bodies to be refined in multi-body refinement"); EMDL::addLabel(EMDL_MLMODEL_NR_GROUPS, EMDL_INT, "rlnNrGroups", "The number of different groups of images (each group has its own noise spectrum, and intensity-scale correction)"); EMDL::addLabel(EMDL_MLMODEL_ORIENTABILITY_CONTRIBUTION, EMDL_DOUBLE, "rlnSpectralOrientabilityContribution", "Spectral SNR contribution to the orientability of individual particles"); EMDL::addLabel(EMDL_MLMODEL_ORIGINAL_SIZE, EMDL_INT, "rlnOriginalImageSize", "Original size of the images (in pixels)"); EMDL::addLabel(EMDL_MLMODEL_PADDING_FACTOR, EMDL_DOUBLE, "rlnPaddingFactor", "Oversampling factor for Fourier transforms of the references"); EMDL::addLabel(EMDL_MLMODEL_PDF_CLASS, EMDL_DOUBLE, "rlnClassDistribution", "Probability Density Function of the different classes (i.e. fraction of images assigned to each class)"); EMDL::addLabel(EMDL_MLMODEL_PRIOR_OFFX_CLASS, EMDL_DOUBLE, "rlnClassPriorOffsetX", "Prior in the X-offset for a class (in pixels)"); EMDL::addLabel(EMDL_MLMODEL_PRIOR_OFFY_CLASS, EMDL_DOUBLE, "rlnClassPriorOffsetY", "Prior in the Y-offset for a class (in pixels)"); EMDL::addLabel(EMDL_MLMODEL_PDF_ORIENT, EMDL_DOUBLE, "rlnOrientationDistribution", "Probability Density Function of the orientations (i.e. fraction of images assigned to each orient)"); EMDL::addLabel(EMDL_MLMODEL_PIXEL_SIZE, EMDL_DOUBLE, "rlnPixelSize", "Size of the pixels in the references and images (in Angstroms)"); EMDL::addLabel(EMDL_MLMODEL_POWER_REF, EMDL_DOUBLE, "rlnReferenceSpectralPower", "Spherical average of the power of the reference"); EMDL::addLabel(EMDL_MLMODEL_PRIOR_MODE, EMDL_INT, "rlnOrientationalPriorMode", "Mode for prior distributions on the orientations (0=no prior; 1=(rot,tilt,psi); 2=(rot,tilt); 3=rot; 4=tilt; 5=psi) "); EMDL::addLabel(EMDL_MLMODEL_REF_IMAGE, EMDL_STRING, "rlnReferenceImage", "Name of a reference image"); EMDL::addLabel(EMDL_MLMODEL_SGD_GRADIENT_IMAGE, EMDL_STRING, "rlnSGDGradientImage", "Name of image containing the SGD gradient"); EMDL::addLabel(EMDL_MLMODEL_SIGMA_OFFSET, EMDL_DOUBLE, "rlnSigmaOffsets","Standard deviation in the origin offsets (in pixels)"); EMDL::addLabel(EMDL_MLMODEL_SIGMA_OFFSET_ANGSTROM, EMDL_DOUBLE, "rlnSigmaOffsetsAngst","Standard deviation in the origin offsets (in Angstroms)"); EMDL::addLabel(EMDL_MLMODEL_SIGMA2_NOISE, EMDL_DOUBLE, "rlnSigma2Noise", "Spherical average of the standard deviation in the noise (sigma)"); EMDL::addLabel(EMDL_MLMODEL_SIGMA2_REF, EMDL_DOUBLE, "rlnReferenceSigma2", "Spherical average of the estimated power in the noise of a reference"); EMDL::addLabel(EMDL_MLMODEL_SIGMA_ROT, EMDL_DOUBLE, "rlnSigmaPriorRotAngle", "Standard deviation of the prior on the rot (i.e. first Euler) angle"); EMDL::addLabel(EMDL_MLMODEL_SIGMA_TILT, EMDL_DOUBLE, "rlnSigmaPriorTiltAngle", "Standard deviation of the prior on the tilt (i.e. second Euler) angle"); EMDL::addLabel(EMDL_MLMODEL_SIGMA_PSI, EMDL_DOUBLE, "rlnSigmaPriorPsiAngle", "Standard deviation of the prior on the psi (i.e. third Euler) angle"); EMDL::addLabel(EMDL_MLMODEL_SSNR_REF, EMDL_DOUBLE, "rlnSignalToNoiseRatio", "Spectral signal-to-noise ratio for a reference"); EMDL::addLabel(EMDL_MLMODEL_TAU2_FUDGE_FACTOR, EMDL_DOUBLE, "rlnTau2FudgeFactor", "Regularisation parameter with which estimates for the power in the references will be multiplied (T in original paper)"); EMDL::addLabel(EMDL_MLMODEL_TAU2_REF, EMDL_DOUBLE, "rlnReferenceTau2", "Spherical average of the estimated power in the signal of a reference"); EMDL::addLabel(EMDL_OPTIMISER_ACCURACY_ROT, EMDL_DOUBLE, "rlnOverallAccuracyRotations", "Overall accuracy of the rotational assignments (in degrees)"); EMDL::addLabel(EMDL_OPTIMISER_ACCURACY_TRANS, EMDL_DOUBLE, "rlnOverallAccuracyTranslations", "Overall accuracy of the translational assignments (in pixels)"); EMDL::addLabel(EMDL_OPTIMISER_ACCURACY_TRANS_ANGSTROM, EMDL_DOUBLE, "rlnOverallAccuracyTranslationsAngst", "Overall accuracy of the translational assignments (in Angstroms)"); EMDL::addLabel(EMDL_OPTIMISER_ADAPTIVE_FRACTION, EMDL_DOUBLE, "rlnAdaptiveOversampleFraction", "Fraction of the weights that will be oversampled in a second pass of the adaptive oversampling strategy"); EMDL::addLabel(EMDL_OPTIMISER_ADAPTIVE_OVERSAMPLING, EMDL_INT, "rlnAdaptiveOversampleOrder", "Order of the adaptive oversampling (0=no oversampling, 1= 2x oversampling; 2= 4x oversampling, etc)"); EMDL::addLabel(EMDL_OPTIMISER_AUTO_LOCAL_HP_ORDER, EMDL_INT, "rlnAutoLocalSearchesHealpixOrder", "Healpix order (before oversampling) from which autosampling procedure will use local angular searches"); EMDL::addLabel(EMDL_OPTIMISER_AVAILABLE_MEMORY, EMDL_DOUBLE, "rlnAvailableMemory", "Available memory per computing node (i.e. per MPI-process)"); EMDL::addLabel(EMDL_OPTIMISER_BEST_RESOL_THUS_FAR, EMDL_DOUBLE, "rlnBestResolutionThusFar", "The highest resolution that has been obtained in this optimization thus far"); EMDL::addLabel(EMDL_OPTIMISER_COARSE_SIZE, EMDL_INT, "rlnCoarseImageSize", "Current size of the images to be used in the first pass of the adaptive oversampling strategy (may be smaller than the original image size)"); EMDL::addLabel(EMDL_OPTIMISER_CHANGES_OPTIMAL_OFFSETS, EMDL_DOUBLE, "rlnChangesOptimalOffsets", "The average change in optimal translation in the last iteration (in pixels) "); EMDL::addLabel(EMDL_OPTIMISER_CHANGES_OPTIMAL_ORIENTS, EMDL_DOUBLE, "rlnChangesOptimalOrientations", "The average change in optimal orientation in the last iteration (in degrees) "); EMDL::addLabel(EMDL_OPTIMISER_CHANGES_OPTIMAL_CLASSES, EMDL_DOUBLE, "rlnChangesOptimalClasses", "The number of particles that changed their optimal clsas assignment in the last iteration"); EMDL::addLabel(EMDL_OPTIMISER_DATA_ARE_CTF_PHASE_FLIPPED, EMDL_BOOL, "rlnCtfDataArePhaseFlipped", "Flag to indicate that the input images have been phase-flipped"); EMDL::addLabel(EMDL_OPTIMISER_DATA_ARE_CTF_PREMULTIPLIED, EMDL_BOOL, "rlnCtfDataAreCtfPremultiplied", "Flag to indicate that the input images have been premultiplied with their CTF"); EMDL::addLabel(EMDL_OPTIMISER_DATA_STARFILE, EMDL_STRING, "rlnExperimentalDataStarFile", "STAR file with metadata for the experimental images"); EMDL::addLabel(EMDL_OPTIMISER_DO_CORRECT_CTF, EMDL_BOOL, "rlnDoCorrectCtf", "Flag to indicate that CTF-correction should be performed"); EMDL::addLabel(EMDL_OPTIMISER_DO_CORRECT_MAGNIFICATION, EMDL_BOOL, "rlnDoCorrectMagnification", "Flag to indicate that (per-group) magnification correction should be performed"); EMDL::addLabel(EMDL_OPTIMISER_DO_CORRECT_NORM, EMDL_BOOL, "rlnDoCorrectNorm", "Flag to indicate that (per-image) normalisation-error correction should be performed"); EMDL::addLabel(EMDL_OPTIMISER_DO_CORRECT_SCALE, EMDL_BOOL, "rlnDoCorrectScale", "Flag to indicate that internal (per-group) intensity-scale correction should be performed"); EMDL::addLabel(EMDL_OPTIMISER_DO_EXTERNAL_RECONSTRUCT, EMDL_BOOL, "rlnDoExternalReconstruct", "Flag to indicate that the reconstruction will be performed outside relion_refine, e.g. for learned priors"); EMDL::addLabel(EMDL_OPTIMISER_DO_REALIGN_MOVIES, EMDL_BOOL, "rlnDoRealignMovies", "Flag to indicate that individual frames of movies are being re-aligned"); EMDL::addLabel(EMDL_OPTIMISER_DO_MAP, EMDL_BOOL, "rlnDoMapEstimation", "Flag to indicate that MAP estimation should be performed (otherwise ML estimation)"); EMDL::addLabel(EMDL_OPTIMISER_DO_SGD, EMDL_BOOL, "rlnDoStochasticGradientDescent", "Flag to indicate that SGD-optimisation should be performed (otherwise expectation maximisation)"); EMDL::addLabel(EMDL_OPTIMISER_DO_STOCHASTIC_EM,EMDL_BOOL, "rlnDoStochasticEM", "Flag to indicate that stochastic EM-optimisation should be performed (an alternative to SGD)"); EMDL::addLabel(EMDL_OPTIMISER_EXTERNAL_RECONS_DATA_REAL, EMDL_STRING, "rlnExtReconsDataReal", "Name of the map with the real components of the input data array for the external reconstruction program"); EMDL::addLabel(EMDL_OPTIMISER_EXTERNAL_RECONS_DATA_IMAG, EMDL_STRING, "rlnExtReconsDataImag", "Name of the map with the imaginary components of the input data array for the external reconstruction program"); EMDL::addLabel(EMDL_OPTIMISER_EXTERNAL_RECONS_WEIGHT, EMDL_STRING, "rlnExtReconsWeight", "Name of the map with the input weight array for the external reconstruction program"); EMDL::addLabel(EMDL_OPTIMISER_EXTERNAL_RECONS_RESULT, EMDL_STRING, "rlnExtReconsResult", "Name of the output reconstruction from the external reconstruction program"); EMDL::addLabel(EMDL_OPTIMISER_EXTERNAL_RECONS_NEWSTAR, EMDL_STRING, "rlnExtReconsResultStarfile", "Name of the output STAR file with updated FSC or tau curves"); EMDL::addLabel(EMDL_OPTIMISER_FAST_SUBSETS, EMDL_BOOL, "rlnDoFastSubsetOptimisation", "Use subsets of the data in the earlier iterations to speed up convergence"); EMDL::addLabel(EMDL_OPTIMISER_SGD_INI_ITER, EMDL_INT, "rlnSgdInitialIterations", "Number of initial SGD iterations (at rlnSgdInitialResolution and with rlnSgdInitialSubsetSize)"); EMDL::addLabel(EMDL_OPTIMISER_SGD_FIN_ITER, EMDL_INT, "rlnSgdFinalIterations", "Number of final SGD iterations (at rlnSgdFinalResolution and with rlnSgdFinalSubsetSize)"); EMDL::addLabel(EMDL_OPTIMISER_SGD_INBETWEEN_ITER, EMDL_INT, "rlnSgdInBetweenIterations", "Number of SGD iteration in between the initial ones to the final ones (with linear interpolation of resolution and subset size)"); EMDL::addLabel(EMDL_OPTIMISER_SGD_INI_RESOL, EMDL_DOUBLE, "rlnSgdInitialResolution", "Resolution (in A) to use during the initial SGD iterations"); EMDL::addLabel(EMDL_OPTIMISER_SGD_FIN_RESOL, EMDL_DOUBLE, "rlnSgdFinalResolution", "Resolution (in A) to use during the final SGD iterations"); EMDL::addLabel(EMDL_OPTIMISER_SGD_INI_SUBSET_SIZE, EMDL_INT, "rlnSgdInitialSubsetSize", "Number of particles in a mini-batch (subset) during the initial SGD iterations"); EMDL::addLabel(EMDL_OPTIMISER_SGD_FIN_SUBSET_SIZE, EMDL_INT, "rlnSgdFinalSubsetSize", "Number of particles in a mini-batch (subset) during the final SGD iteration"); EMDL::addLabel(EMDL_OPTIMISER_SGD_MU, EMDL_DOUBLE, "rlnSgdMuFactor", "The mu-parameter that controls the momentum of the SGD gradients"); EMDL::addLabel(EMDL_OPTIMISER_SGD_SIGMA2FUDGE_INI, EMDL_DOUBLE, "rlnSgdSigma2FudgeInitial", "The variance of the noise will initially be multiplied with this value (larger than 1)"); EMDL::addLabel(EMDL_OPTIMISER_SGD_SIGMA2FUDGE_HALFLIFE, EMDL_INT, "rlnSgdSigma2FudgeHalflife", "After processing this many particles the multiplicative factor for the noise variance will have halved"); EMDL::addLabel(EMDL_OPTIMISER_SGD_SKIP_ANNNEAL, EMDL_BOOL, "rlnSgdSkipAnneal", "Option to switch off annealing of multiple references in SGD"); EMDL::addLabel(EMDL_OPTIMISER_SGD_SUBSET_SIZE, EMDL_INT, "rlnSgdSubsetSize", "The number of particles in the random subsets for SGD"); EMDL::addLabel(EMDL_OPTIMISER_SGD_WRITE_EVERY_SUBSET, EMDL_INT, "rlnSgdWriteEverySubset", "Every this many iterations the model is written to disk in SGD"); EMDL::addLabel(EMDL_OPTIMISER_SGD_MAX_SUBSETS, EMDL_INT, "rlnSgdMaxSubsets", "Stop SGD after doing this many subsets (possibly spanning more than 1 iteration)"); EMDL::addLabel(EMDL_OPTIMISER_SGD_STEPSIZE, EMDL_DOUBLE, "rlnSgdStepsize", "Stepsize in SGD updates)"); EMDL::addLabel(EMDL_OPTIMISER_DO_AUTO_REFINE, EMDL_BOOL, "rlnDoAutoRefine", "Flag to indicate that 3D auto-refine procedure is being used"); EMDL::addLabel(EMDL_OPTIMISER_DO_ONLY_FLIP_CTF_PHASES, EMDL_BOOL, "rlnDoOnlyFlipCtfPhases", "Flag to indicate that CTF-correction should only comprise phase-flipping"); EMDL::addLabel(EMDL_OPTIMISER_DO_SOLVENT_FLATTEN, EMDL_BOOL, "rlnDoSolventFlattening", "Flag to indicate that the references should be masked to set their solvent areas to a constant density"); EMDL::addLabel(EMDL_OPTIMISER_DO_SOLVENT_FSC, EMDL_BOOL, "rlnDoSolventFscCorrection", "Flag to indicate that the FSCs should be solvent-corrected during refinement"); EMDL::addLabel(EMDL_OPTIMISER_DO_SKIP_ALIGN, EMDL_BOOL, "rlnDoSkipAlign", "Flag to indicate that orientational (i.e. rotational and translational) searches will be omitted from the refinement, only marginalisation over classes will take place"); EMDL::addLabel(EMDL_OPTIMISER_DO_SKIP_ROTATE, EMDL_BOOL, "rlnDoSkipRotate", "Flag to indicate that rotational searches will be omitted from the refinement, only marginalisation over classes and translations will take place"); EMDL::addLabel(EMDL_OPTIMISER_DO_SPLIT_RANDOM_HALVES, EMDL_BOOL, "rlnDoSplitRandomHalves", "Flag to indicate that the data should be split into two completely separate, random halves"); EMDL::addLabel(EMDL_OPTIMISER_DO_ZERO_MASK, EMDL_BOOL, "rlnDoZeroMask", "Flag to indicate that the surrounding solvent area in the experimental particles will be masked to zeros (by default random noise will be used"); EMDL::addLabel(EMDL_OPTIMISER_FIX_SIGMA_NOISE, EMDL_BOOL, "rlnFixSigmaNoiseEstimates", "Flag to indicate that the estimates for the power spectra of the noise should be kept constant"); EMDL::addLabel(EMDL_OPTIMISER_FIX_SIGMA_OFFSET ,EMDL_BOOL, "rlnFixSigmaOffsetEstimates", "Flag to indicate that the estimates for the stddev in the origin offsets should be kept constant"); EMDL::addLabel(EMDL_OPTIMISER_FIX_TAU, EMDL_BOOL, "rlnFixTauEstimates", "Flag to indicate that the estimates for the power spectra of the signal (i.e. the references) should be kept constant"); EMDL::addLabel(EMDL_OPTIMISER_HAS_CONVERGED, EMDL_BOOL, "rlnHasConverged", "Flag to indicate that the optimization has converged"); EMDL::addLabel(EMDL_OPTIMISER_HAS_HIGH_FSC_AT_LIMIT, EMDL_BOOL, "rlnHasHighFscAtResolLimit", "Flag to indicate that the FSC at the resolution limit is significant"); EMDL::addLabel(EMDL_OPTIMISER_HAS_LARGE_INCR_SIZE_ITER_AGO, EMDL_INT, "rlnHasLargeSizeIncreaseIterationsAgo", "How many iterations have passed since the last large increase in image size"); EMDL::addLabel(EMDL_OPTIMISER_DO_HELICAL_REFINE, EMDL_BOOL, "rlnDoHelicalRefine", "Flag to indicate that helical refinement should be performed"); EMDL::addLabel(EMDL_OPTIMISER_IGNORE_HELICAL_SYMMETRY, EMDL_BOOL, "rlnIgnoreHelicalSymmetry", "Flag to indicate that helical symmetry is ignored in 3D reconstruction"); EMDL::addLabel(EMDL_OPTIMISER_FOURIER_MASK, EMDL_STRING, "rlnFourierMask", "Name of an FFTW-centred Fourier mask to be applied to the Projector for refinement."); EMDL::addLabel(EMDL_OPTIMISER_HELICAL_TWIST_INITIAL, EMDL_DOUBLE, "rlnHelicalTwistInitial", "The intial helical twist (rotation per subunit) in degrees before refinement"); EMDL::addLabel(EMDL_OPTIMISER_HELICAL_RISE_INITIAL, EMDL_DOUBLE, "rlnHelicalRiseInitial", "The initial helical rise (translation per subunit) in Angstroms before refinement"); EMDL::addLabel(EMDL_OPTIMISER_HELICAL_Z_PERCENTAGE, EMDL_DOUBLE, "rlnHelicalCentralProportion", "Only expand this central fraction of the Z axis when imposing real-space helical symmetry"); EMDL::addLabel(EMDL_OPTIMISER_HELICAL_NSTART, EMDL_INT, "rlnNrHelicalNStart", "The N-number for an N-start helix"); EMDL::addLabel(EMDL_OPTIMISER_HELICAL_TUBE_INNER_DIAMETER, EMDL_DOUBLE, "rlnHelicalMaskTubeInnerDiameter", "Inner diameter of helical tubes in Angstroms (for masks of helical references and particles)"); EMDL::addLabel(EMDL_OPTIMISER_HELICAL_TUBE_OUTER_DIAMETER, EMDL_DOUBLE, "rlnHelicalMaskTubeOuterDiameter", "Outer diameter of helical tubes in Angstroms (for masks of helical references and particles)"); EMDL::addLabel(EMDL_OPTIMISER_HELICAL_SYMMETRY_LOCAL_REFINEMENT, EMDL_BOOL, "rlnHelicalSymmetryLocalRefinement", "Flag to indicate that local refinement of helical parameters should be performed"); EMDL::addLabel(EMDL_OPTIMISER_HELICAL_SIGMA_DISTANCE, EMDL_DOUBLE, "rlnHelicalSigmaDistance", "Sigma of distance along the helical tracks"); EMDL::addLabel(EMDL_OPTIMISER_HELICAL_KEEP_TILT_PRIOR_FIXED, EMDL_BOOL, "rlnHelicalKeepTiltPriorFixed", "Flag to indicate that helical tilt priors are kept fixed (at 90 degrees) in global angular searches"); EMDL::addLabel(EMDL_OPTIMISER_LOWRES_LIMIT_EXP, EMDL_DOUBLE, "rlnLowresLimitExpectation", "Low-resolution-limit (in Angstrom) for the expectation step"); EMDL::addLabel(EMDL_OPTIMISER_HIGHRES_LIMIT_EXP, EMDL_DOUBLE, "rlnHighresLimitExpectation", "High-resolution-limit (in Angstrom) for the expectation step"); EMDL::addLabel(EMDL_OPTIMISER_HIGHRES_LIMIT_SGD, EMDL_DOUBLE, "rlnHighresLimitSGD", "High-resolution-limit (in Angstrom) for Stochastic Gradient Descent"); EMDL::addLabel(EMDL_OPTIMISER_IGNORE_CTF_UNTIL_FIRST_PEAK, EMDL_BOOL, "rlnDoIgnoreCtfUntilFirstPeak", "Flag to indicate that the CTFs should be ignored until their first peak"); EMDL::addLabel(EMDL_OPTIMISER_INCR_SIZE, EMDL_INT, "rlnIncrementImageSize", "Number of Fourier shells to be included beyond the resolution where SSNR^MAP drops below 1"); EMDL::addLabel(EMDL_OPTIMISER_ITERATION_NO, EMDL_INT, "rlnCurrentIteration", "The number of the current iteration"); EMDL::addLabel(EMDL_OPTIMISER_LOCAL_SYMMETRY_FILENAME, EMDL_STRING, "rlnLocalSymmetryFile", "Local symmetry description file containing list of masks and their operators"); EMDL::addLabel(EMDL_OPTIMISER_LOWRES_JOIN_RANDOM_HALVES, EMDL_DOUBLE, "rlnJoinHalvesUntilThisResolution", "Resolution (in Angstrom) to join the two random half-reconstructions to prevent their diverging orientations (for C-symmetries)"); EMDL::addLabel(EMDL_OPTIMISER_MAGNIFICATION_RANGE, EMDL_DOUBLE, "rlnMagnificationSearchRange", "Search range for magnification correction"); EMDL::addLabel(EMDL_OPTIMISER_MAGNIFICATION_STEP, EMDL_DOUBLE, "rlnMagnificationSearchStep", "Step size for magnification correction"); EMDL::addLabel(EMDL_OPTIMISER_MAX_COARSE_SIZE, EMDL_INT, "rlnMaximumCoarseImageSize", "Maximum size of the images to be used in the first pass of the adaptive oversampling strategy (may be smaller than the original image size)"); EMDL::addLabel(EMDL_OPTIMISER_MAX_NR_POOL, EMDL_INT, "rlnMaxNumberOfPooledParticles", "Maximum number particles that are processed together to speed up calculations"); EMDL::addLabel(EMDL_OPTIMISER_MODEL_STARFILE, EMDL_STRING, "rlnModelStarFile", "STAR file with metadata for the model that is being refined"); EMDL::addLabel(EMDL_OPTIMISER_MODEL_STARFILE2, EMDL_STRING, "rlnModelStarFile2", "STAR file with metadata for the second model that is being refined (from random halves of the data)"); EMDL::addLabel(EMDL_OPTIMISER_NR_ITERATIONS, EMDL_INT, "rlnNumberOfIterations", "Maximum number of iterations to be performed"); EMDL::addLabel(EMDL_OPTIMISER_NR_ITER_WO_RESOL_GAIN, EMDL_INT, "rlnNumberOfIterWithoutResolutionGain", "Number of iterations that have passed without a gain in resolution"); EMDL::addLabel(EMDL_OPTIMISER_NR_ITER_WO_HIDDEN_VAR_CHANGES, EMDL_INT, "rlnNumberOfIterWithoutChangingAssignments", "Number of iterations that have passed without large changes in orientation and class assignments"); EMDL::addLabel(EMDL_OPTIMISER_OPTICS_STARFILE, EMDL_STRING, "rlnOpticsStarFile", "STAR file with metadata for the optical groups (new as of version 3.1)"); EMDL::addLabel(EMDL_OPTIMISER_OUTPUT_ROOTNAME, EMDL_STRING, "rlnOutputRootName", "Rootname for all output files (this may include a directory structure, which should then exist)"); EMDL::addLabel(EMDL_OPTIMISER_PARTICLE_DIAMETER, EMDL_DOUBLE, "rlnParticleDiameter", "Diameter of the circular mask to be applied to all experimental images (in Angstroms)"); EMDL::addLabel(EMDL_OPTIMISER_RADIUS_MASK_3D_MAP, EMDL_INT, "rlnRadiusMaskMap", "Radius of the spherical mask to be applied to all references (in Angstroms)"); EMDL::addLabel(EMDL_OPTIMISER_RADIUS_MASK_EXP_PARTICLES, EMDL_INT, "rlnRadiusMaskExpImages", "Radius of the circular mask to be applied to all experimental images (in Angstroms)"); EMDL::addLabel(EMDL_OPTIMISER_RANDOM_SEED, EMDL_INT, "rlnRandomSeed", "Seed (i.e. a number) for the random number generator"); EMDL::addLabel(EMDL_OPTIMISER_REFS_ARE_CTF_CORRECTED, EMDL_BOOL, "rlnRefsAreCtfCorrected", "Flag to indicate that the input references have been CTF-amplitude corrected"); EMDL::addLabel(EMDL_OPTIMISER_SMALLEST_CHANGES_OPT_CLASSES, EMDL_INT, "rlnSmallestChangesClasses", "Smallest changes thus far in the optimal class assignments (in numer of particles)."); EMDL::addLabel(EMDL_OPTIMISER_SMALLEST_CHANGES_OPT_OFFSETS, EMDL_DOUBLE, "rlnSmallestChangesOffsets", "Smallest changes thus far in the optimal offset assignments (in pixels)."); EMDL::addLabel(EMDL_OPTIMISER_SMALLEST_CHANGES_OPT_ORIENTS, EMDL_DOUBLE, "rlnSmallestChangesOrientations", "Smallest changes thus far in the optimal orientation assignments (in degrees)."); EMDL::addLabel(EMDL_OPTIMISER_SAMPLING_STARFILE, EMDL_STRING, "rlnOrientSamplingStarFile", "STAR file with metadata for the orientational sampling"); EMDL::addLabel(EMDL_OPTIMISER_SOLVENT_MASK_NAME, EMDL_STRING, "rlnSolventMaskName", "Name of an image that contains a (possibly soft) mask for the solvent area (values=0 for solvent, values =1 for protein)"); EMDL::addLabel(EMDL_OPTIMISER_SOLVENT_MASK2_NAME, EMDL_STRING, "rlnSolventMask2Name", "Name of a secondary solvent mask (e.g. to flatten density inside an icosahedral virus)"); EMDL::addLabel(EMDL_OPTIMISER_TAU_SPECTRUM_NAME, EMDL_STRING, "rlnTauSpectrumName", "Name of a STAR file that holds a tau2-spectrum"); EMDL::addLabel(EMDL_OPTIMISER_USE_TOO_COARSE_SAMPLING, EMDL_BOOL, "rlnUseTooCoarseSampling", "Flag to indicate that the angular sampling on the sphere will be one step coarser than needed to speed up calculations"); EMDL::addLabel(EMDL_OPTIMISER_WIDTH_MASK_EDGE, EMDL_INT, "rlnWidthMaskEdge", "Width (in pixels) of the soft edge for spherical/circular masks to be used for solvent flattening"); EMDL::addLabel(EMDL_ORIENT_FLIP, EMDL_BOOL, "rlnIsFlip", "Flag to indicate that an image should be mirrored"); EMDL::addLabel(EMDL_ORIENT_ID, EMDL_INT, "rlnOrientationsID", "ID (i.e. a unique number) for an orientation"); EMDL::addLabel(EMDL_ORIENT_ORIGIN_X, EMDL_DOUBLE, "rlnOriginX", "X-coordinate (in pixels) for the origin of rotation"); EMDL::addLabel(EMDL_ORIENT_ORIGIN_Y, EMDL_DOUBLE, "rlnOriginY", "Y-coordinate (in pixels) for the origin of rotation"); EMDL::addLabel(EMDL_ORIENT_ORIGIN_Z, EMDL_DOUBLE, "rlnOriginZ", "Z-coordinate (in pixels) for the origin of rotation"); EMDL::addLabel(EMDL_ORIENT_ORIGIN_X_PRIOR, EMDL_DOUBLE, "rlnOriginXPrior", "Center of the prior on the X-coordinate (in pixels) for the origin of rotation"); EMDL::addLabel(EMDL_ORIENT_ORIGIN_Y_PRIOR, EMDL_DOUBLE, "rlnOriginYPrior", "Center of the prior on the Y-coordinate (in pixels) for the origin of rotation"); EMDL::addLabel(EMDL_ORIENT_ORIGIN_Z_PRIOR, EMDL_DOUBLE, "rlnOriginZPrior", "Center of the prior on the Z-coordinate (in pixels) for the origin of rotation"); EMDL::addLabel(EMDL_ORIENT_ORIGIN_X_ANGSTROM, EMDL_DOUBLE, "rlnOriginXAngst", "X-coordinate (in Angstrom) for the origin of rotation"); EMDL::addLabel(EMDL_ORIENT_ORIGIN_Y_ANGSTROM, EMDL_DOUBLE, "rlnOriginYAngst", "Y-coordinate (in Angstrom) for the origin of rotation"); EMDL::addLabel(EMDL_ORIENT_ORIGIN_Z_ANGSTROM, EMDL_DOUBLE, "rlnOriginZAngst", "Z-coordinate (in Angstrom) for the origin of rotation"); EMDL::addLabel(EMDL_ORIENT_ORIGIN_X_PRIOR_ANGSTROM, EMDL_DOUBLE, "rlnOriginXPriorAngst", "Center of the prior on the X-coordinate (in Angstrom) for the origin of rotation"); EMDL::addLabel(EMDL_ORIENT_ORIGIN_Y_PRIOR_ANGSTROM, EMDL_DOUBLE, "rlnOriginYPriorAngst", "Center of the prior on the Y-coordinate (in Angstrom) for the origin of rotation"); EMDL::addLabel(EMDL_ORIENT_ORIGIN_Z_PRIOR_ANGSTROM, EMDL_DOUBLE, "rlnOriginZPriorAngst", "Center of the prior on the Z-coordinate (in Angstrom) for the origin of rotation"); EMDL::addLabel(EMDL_ORIENT_ROT, EMDL_DOUBLE, "rlnAngleRot", "First Euler angle (rot, in degrees)"); EMDL::addLabel(EMDL_ORIENT_ROT_PRIOR, EMDL_DOUBLE, "rlnAngleRotPrior", "Center of the prior (in degrees) on the first Euler angle (rot)"); EMDL::addLabel(EMDL_ORIENT_ROT_PRIOR_FLIP_RATIO, EMDL_DOUBLE, "rlnAngleRotFlipRatio", "Flip ratio of bimodal rot prior (0~0.5, 0 means an ordinary prior, 0.5 means a perfect bimodal prior)"); // KThurber EMDL::addLabel(EMDL_ORIENT_TILT, EMDL_DOUBLE, "rlnAngleTilt", "Second Euler angle (tilt, in degrees)"); EMDL::addLabel(EMDL_ORIENT_TILT_PRIOR, EMDL_DOUBLE, "rlnAngleTiltPrior", "Center of the prior (in degrees) on the second Euler angle (tilt)"); EMDL::addLabel(EMDL_ORIENT_PSI, EMDL_DOUBLE, "rlnAnglePsi", "Third Euler, or in-plane angle (psi, in degrees)"); EMDL::addLabel(EMDL_ORIENT_PSI_PRIOR, EMDL_DOUBLE, "rlnAnglePsiPrior", "Center of the prior (in degrees) on the third Euler angle (psi)"); EMDL::addLabel(EMDL_ORIENT_PSI_PRIOR_FLIP_RATIO, EMDL_DOUBLE, "rlnAnglePsiFlipRatio", "Flip ratio of bimodal psi prior (0~0.5, 0 means an ordinary prior, 0.5 means a perfect bimodal prior)"); EMDL::addLabel(EMDL_ORIENT_PSI_PRIOR_FLIP, EMDL_BOOL, "rlnAnglePsiFlip", "Flag to indicate that psi prior angle has been flipped"); // KThurber EMDL::addLabel(EMDL_PARTICLE_AUTOPICK_FOM, EMDL_DOUBLE, "rlnAutopickFigureOfMerit", "Autopicking FOM for a particle"); EMDL::addLabel(EMDL_PARTICLE_HELICAL_TUBE_ID, EMDL_INT, "rlnHelicalTubeID", "Helical tube ID for a helical segment"); EMDL::addLabel(EMDL_PARTICLE_HELICAL_TUBE_PITCH, EMDL_DOUBLE, "rlnHelicalTubePitch", "Cross-over distance for a helical segment (A)"); EMDL::addLabel(EMDL_PARTICLE_HELICAL_TRACK_LENGTH, EMDL_DOUBLE, "rlnHelicalTrackLength", "Distance (in pix) from the position of this helical segment to the starting point of the tube"); EMDL::addLabel(EMDL_PARTICLE_HELICAL_TRACK_LENGTH_ANGSTROM, EMDL_DOUBLE, "rlnHelicalTrackLengthAngst", "Distance (in A) from the position of this helical segment to the starting point of the tube"); EMDL::addLabel(EMDL_PARTICLE_CLASS, EMDL_INT, "rlnClassNumber", "Class number for which a particle has its highest probability"); EMDL::addLabel(EMDL_PARTICLE_DLL, EMDL_DOUBLE, "rlnLogLikeliContribution", "Contribution of a particle to the log-likelihood target function"); EMDL::addLabel(EMDL_PARTICLE_ID, EMDL_INT, "rlnParticleId", "ID (i.e. a unique number) for a particle"); EMDL::addLabel(EMDL_PARTICLE_FOM, EMDL_DOUBLE, "rlnParticleFigureOfMerit", "Developmental FOM for a particle"); EMDL::addLabel(EMDL_PARTICLE_KL_DIVERGENCE, EMDL_DOUBLE, "rlnKullbackLeiblerDivergence", "Kullback-Leibler divergence for a particle"); EMDL::addAltLabel(EMDL_PARTICLE_KL_DIVERGENCE, "rlnKullbackLeibnerDivergence"); // wrong spelling for backwards compatibility EMDL::addLabel(EMDL_PARTICLE_RANDOM_SUBSET, EMDL_INT, "rlnRandomSubset", "Random subset to which this particle belongs"); EMDL::addLabel(EMDL_PARTICLE_BEAM_TILT_CLASS, EMDL_INT, "rlnBeamTiltClass", "Beam-tilt class of a particle"); EMDL::addLabel(EMDL_PARTICLE_NAME, EMDL_STRING, "rlnParticleName", "Name for a particle"); EMDL::addLabel(EMDL_PARTICLE_ORI_NAME, EMDL_STRING, "rlnOriginalParticleName", "Original name for a particles"); EMDL::addLabel(EMDL_PARTICLE_NR_SIGNIFICANT_SAMPLES, EMDL_INT, "rlnNrOfSignificantSamples", "Number of orientational/class assignments (for a particle) with sign.probabilities in the 1st pass of adaptive oversampling"); /**< particle, Number of orientations contributing to weights*/ EMDL::addLabel(EMDL_PARTICLE_NR_FRAMES, EMDL_INT, "rlnNrOfFrames", "Number of movie frames that were collected for this particle"); EMDL::addLabel(EMDL_PARTICLE_NR_FRAMES_AVG, EMDL_INT, "rlnAverageNrOfFrames", "Number of movie frames that one averages over upon extraction of movie-particles"); EMDL::addLabel(EMDL_PARTICLE_MOVIE_RUNNING_AVG, EMDL_INT, "rlnMovieFramesRunningAverage", "Number of movie frames inside the running average that will be used for movie-refinement"); EMDL::addLabel(EMDL_PARTICLE_PMAX, EMDL_DOUBLE, "rlnMaxValueProbDistribution", "Maximum value of the (normalised) probability function for a particle"); /**< particle, Maximum value of probability distribution */ EMDL::addLabel(EMDL_PARTICLE_NUMBER, EMDL_INT, "rlnParticleNumber", "Number of particles"); EMDL::addLabel(EMDL_PIPELINE_JOB_COUNTER, EMDL_INT, "rlnPipeLineJobCounter", "Number of the last job in the pipeline"); EMDL::addLabel(EMDL_PIPELINE_NODE_NAME, EMDL_STRING , "rlnPipeLineNodeName", "Name of a Node in the pipeline"); EMDL::addLabel(EMDL_PIPELINE_NODE_TYPE, EMDL_INT, "rlnPipeLineNodeType", "Type of a Node in the pipeline"); EMDL::addLabel(EMDL_PIPELINE_PROCESS_ALIAS, EMDL_STRING , "rlnPipeLineProcessAlias", "Alias of a Process in the pipeline"); EMDL::addLabel(EMDL_PIPELINE_PROCESS_NAME, EMDL_STRING , "rlnPipeLineProcessName", "Name of a Process in the pipeline"); EMDL::addLabel(EMDL_PIPELINE_PROCESS_TYPE, EMDL_INT, "rlnPipeLineProcessType", "Type of a Process in the pipeline"); EMDL::addLabel(EMDL_PIPELINE_PROCESS_STATUS, EMDL_INT, "rlnPipeLineProcessStatus", "Status of a Process in the pipeline (running, scheduled, finished or cancelled)"); EMDL::addLabel(EMDL_PIPELINE_EDGE_FROM, EMDL_STRING , "rlnPipeLineEdgeFromNode", "Name of the origin of an edge"); EMDL::addLabel(EMDL_PIPELINE_EDGE_TO, EMDL_STRING ,"rlnPipeLineEdgeToNode", "Name of the to-Node in an edge"); EMDL::addLabel(EMDL_PIPELINE_EDGE_PROCESS, EMDL_STRING ,"rlnPipeLineEdgeProcess", "Name of the destination of an edge"); EMDL::addLabel(EMDL_POSTPROCESS_FINAL_RESOLUTION, EMDL_DOUBLE, "rlnFinalResolution", "Final estimated resolution after postprocessing (in Angstroms)"); EMDL::addLabel(EMDL_POSTPROCESS_BFACTOR, EMDL_DOUBLE, "rlnBfactorUsedForSharpening", "Applied B-factor in the sharpening of the map"); EMDL::addLabel(EMDL_POSTPROCESS_FRACTION_MOLWEIGHT, EMDL_DOUBLE, "rlnParticleBoxFractionMolecularWeight", "Fraction of protein voxels in the box, based on ordered molecular weight estimate, for calculating cisTEM-like part_FSC"); EMDL::addLabel(EMDL_POSTPROCESS_FRACTION_SOLVENT_MASK, EMDL_DOUBLE, "rlnParticleBoxFractionSolventMask", "Fraction of protein voxels in the box, based on the solvent mask, for calculating cisTEM-like part_FSC"); EMDL::addLabel(EMDL_POSTPROCESS_FSC_GENERAL, EMDL_DOUBLE, "rlnFourierShellCorrelation", "FSC value (of unspecified type, e.g. masked or unmasked)"); EMDL::addLabel(EMDL_POSTPROCESS_FSC_TRUE, EMDL_DOUBLE, "rlnFourierShellCorrelationCorrected", "Final FSC value: i.e. after correction based on masking of randomized-phases maps"); EMDL::addLabel(EMDL_POSTPROCESS_FSC_PART_MOLWEIGHT, EMDL_DOUBLE, "rlnFourierShellCorrelationParticleMolWeight", "CisTEM-like correction of unmasked FSCs, based on ordered molecular weight estimate"); EMDL::addLabel(EMDL_POSTPROCESS_FSC_PART_FRACMASK, EMDL_DOUBLE, "rlnFourierShellCorrelationParticleMaskFraction", "CisTEM-like correction of unmasked FSCs, based on fraction of white pixels in solvent mask"); EMDL::addLabel(EMDL_POSTPROCESS_FSC_MASKED, EMDL_DOUBLE, "rlnFourierShellCorrelationMaskedMaps", "FSC value after masking of the original maps"); EMDL::addLabel(EMDL_POSTPROCESS_FSC_UNMASKED, EMDL_DOUBLE, "rlnFourierShellCorrelationUnmaskedMaps", "FSC value before masking of the original maps"); EMDL::addLabel(EMDL_POSTPROCESS_FSC_RANDOM_MASKED, EMDL_DOUBLE, "rlnCorrectedFourierShellCorrelationPhaseRandomizedMaskedMaps", "FSC value after masking of the randomized-phases maps"); EMDL::addLabel(EMDL_POSTPROCESS_AMPLCORR_MASKED, EMDL_DOUBLE, "rlnAmplitudeCorrelationMaskedMaps", "Correlation coefficient between amplitudes in Fourier shells of masked maps"); EMDL::addLabel(EMDL_POSTPROCESS_AMPLCORR_UNMASKED, EMDL_DOUBLE, "rlnAmplitudeCorrelationUnmaskedMaps", "Correlation coefficient between amplitudes in Fourier shells of unmasked maps"); EMDL::addLabel(EMDL_POSTPROCESS_DPR_MASKED, EMDL_DOUBLE, "rlnDifferentialPhaseResidualMaskedMaps", "Differential Phase Residual in Fourier shells of masked maps"); EMDL::addLabel(EMDL_POSTPROCESS_DPR_UNMASKED, EMDL_DOUBLE, "rlnDifferentialPhaseResidualUnmaskedMaps", "Differential Phase Residual in Fourier shells of unmasked maps"); EMDL::addLabel(EMDL_POSTPROCESS_GUINIER_FIT_INTERCEPT, EMDL_DOUBLE, "rlnFittedInterceptGuinierPlot", "The fitted intercept of the Guinier-plot"); EMDL::addLabel(EMDL_POSTPROCESS_GUINIER_FIT_SLOPE, EMDL_DOUBLE, "rlnFittedSlopeGuinierPlot", "The fitted slope of the Guinier-plot"); EMDL::addLabel(EMDL_POSTPROCESS_GUINIER_FIT_CORRELATION, EMDL_DOUBLE, "rlnCorrelationFitGuinierPlot", "The correlation coefficient of the fitted line through the Guinier-plot"); EMDL::addLabel(EMDL_POSTPROCESS_GUINIER_VALUE_IN, EMDL_DOUBLE, "rlnLogAmplitudesOriginal", "Y-value for Guinier plot: the logarithm of the radially averaged amplitudes of the input map"); EMDL::addLabel(EMDL_POSTPROCESS_GUINIER_VALUE_INVMTF, EMDL_DOUBLE, "rlnLogAmplitudesMTFCorrected", "Y-value for Guinier plot: the logarithm of the radially averaged amplitudes after MTF correction"); EMDL::addLabel(EMDL_POSTPROCESS_GUINIER_VALUE_WEIGHTED, EMDL_DOUBLE, "rlnLogAmplitudesWeighted", "Y-value for Guinier plot: the logarithm of the radially averaged amplitudes after FSC-weighting"); EMDL::addLabel(EMDL_POSTPROCESS_GUINIER_VALUE_SHARPENED, EMDL_DOUBLE, "rlnLogAmplitudesSharpened", "Y-value for Guinier plot: the logarithm of the radially averaged amplitudes after sharpening"); EMDL::addLabel(EMDL_POSTPROCESS_GUINIER_VALUE_INTERCEPT, EMDL_DOUBLE, "rlnLogAmplitudesIntercept", "Y-value for Guinier plot: the fitted plateau of the logarithm of the radially averaged amplitudes"); EMDL::addLabel(EMDL_POSTPROCESS_GUINIER_RESOL_SQUARED, EMDL_DOUBLE, "rlnResolutionSquared", "X-value for Guinier plot: squared resolution in 1/Angstrom^2"); EMDL::addLabel(EMDL_POSTPROCESS_MOLWEIGHT, EMDL_DOUBLE, "rlnMolecularWeight", "Molecular weight of the ordered mass inside the box for calculating cisTEM-like part.FSC (in kDa)"); EMDL::addLabel(EMDL_POSTPROCESS_MTF_VALUE, EMDL_DOUBLE, "rlnMtfValue", "Value of the detectors modulation transfer function (between 0 and 1)"); EMDL::addLabel(EMDL_POSTPROCESS_RANDOMISE_FROM, EMDL_DOUBLE, "rlnRandomiseFrom", "Resolution (in A) from which the phases are randomised in the postprocessing step"); EMDL::addLabel(EMDL_POSTPROCESS_UNFIL_HALFMAP1, EMDL_STRING, "rlnUnfilteredMapHalf1", "Name of the unfiltered map from halfset 1"); EMDL::addLabel(EMDL_POSTPROCESS_UNFIL_HALFMAP2, EMDL_STRING, "rlnUnfilteredMapHalf2", "Name of the unfiltered map from halfset 2"); EMDL::addLabel(EMDL_SAMPLING_IS_3D, EMDL_BOOL, "rlnIs3DSampling", "Flag to indicate this concerns a 3D sampling "); EMDL::addLabel(EMDL_SAMPLING_IS_3D_TRANS, EMDL_BOOL, "rlnIs3DTranslationalSampling", "Flag to indicate this concerns a x,y,z-translational sampling "); EMDL::addLabel(EMDL_SAMPLING_HEALPIX_ORDER, EMDL_INT, "rlnHealpixOrder", "Healpix order for the sampling of the first two Euler angles (rot, tilt) on the 3D sphere"); EMDL::addLabel(EMDL_SAMPLING_HEALPIX_ORDER_ORI, EMDL_INT, "rlnHealpixOrderOriginal", "Original healpix order for the sampling of the first two Euler angles (rot, tilt) on the 3D sphere"); EMDL::addLabel(EMDL_SAMPLING_LIMIT_TILT, EMDL_DOUBLE, "rlnTiltAngleLimit", "Values to which to limit the tilt angles (positive for keeping side views, negative for keeping top views)"); EMDL::addLabel(EMDL_SAMPLING_OFFSET_RANGE, EMDL_DOUBLE, "rlnOffsetRange", "Search range for the origin offsets (in Angstroms)"); EMDL::addLabel(EMDL_SAMPLING_OFFSET_STEP, EMDL_DOUBLE, "rlnOffsetStep", "Step size for the searches in the origin offsets (in Angstroms)"); EMDL::addLabel(EMDL_SAMPLING_OFFSET_RANGE_ORI, EMDL_DOUBLE, "rlnOffsetRangeOriginal", "Original search range for the origin offsets (in Angstroms)"); EMDL::addLabel(EMDL_SAMPLING_OFFSET_STEP_ORI, EMDL_DOUBLE, "rlnOffsetStepOriginal", "Original step size for the searches in the origin offsets (in Angstroms)"); EMDL::addLabel(EMDL_SAMPLING_HELICAL_OFFSET_STEP, EMDL_DOUBLE, "rlnHelicalOffsetStep", "Step size for the searches of offsets along helical axis (in Angstroms)"); EMDL::addLabel(EMDL_SAMPLING_PERTURB, EMDL_DOUBLE, "rlnSamplingPerturbInstance", "Random instance of the random perturbation on the orientational sampling"); EMDL::addLabel(EMDL_SAMPLING_PERTURBATION_FACTOR, EMDL_DOUBLE, "rlnSamplingPerturbFactor", "Factor for random perturbation on the orientational sampling (between 0 no perturbation and 1 very strong perturbation)"); EMDL::addLabel(EMDL_SAMPLING_PSI_STEP, EMDL_DOUBLE, "rlnPsiStep", "Step size (in degrees) for the sampling of the in-plane rotation angle (psi)"); EMDL::addLabel(EMDL_SAMPLING_PSI_STEP_ORI, EMDL_DOUBLE, "rlnPsiStepOriginal", "Original step size (in degrees) for the sampling of the in-plane rotation angle (psi)"); EMDL::addLabel(EMDL_SAMPLING_SYMMETRY, EMDL_STRING, "rlnSymmetryGroup", "Symmetry group (e.g., C1, D7, I2, I5, etc.)"); EMDL::addLabel(EMDL_SCHEDULE_EDGE_NUMBER, EMDL_INT, "rlnScheduleEdgeNumber", "Numbered index of an edge inside a Schedule"); EMDL::addLabel(EMDL_SCHEDULE_EDGE_INPUT, EMDL_STRING, "rlnScheduleEdgeInputNodeName" , "Name of the input Node for a schedule Edge"); EMDL::addLabel(EMDL_SCHEDULE_EDGE_OUTPUT, EMDL_STRING, "rlnScheduleEdgeOutputNodeName", "Name of the output Node for a schedule Edge"); EMDL::addLabel(EMDL_SCHEDULE_EDGE_IS_FORK, EMDL_BOOL, "rlnScheduleEdgeIsFork", "Flag to indicate that this Edge is a Fork, dependent on a Boolean Schedule variable"); EMDL::addLabel(EMDL_SCHEDULE_EDGE_OUTPUT_TRUE, EMDL_STRING, "rlnScheduleEdgeOutputNodeNameIfTrue", "Name of the output Node for a schedule Fork if the associated Boolean is True"); EMDL::addLabel(EMDL_SCHEDULE_EDGE_BOOLEAN, EMDL_STRING, "rlnScheduleEdgeBooleanVariable", "Name of the associated Boolean variable if this Edge is a Fork"); EMDL::addLabel(EMDL_SCHEDULE_GENERAL_CURRENT_NODE, EMDL_STRING, "rlnScheduleCurrentNodeName", "Name of the current Node for this Schedule"); EMDL::addLabel(EMDL_SCHEDULE_GENERAL_ORIGINAL_START_NODE, EMDL_STRING, "rlnScheduleOriginalStartNodeName", "Name of the original starting Node for this Schedule"); EMDL::addLabel(EMDL_SCHEDULE_GENERAL_EMAIL, EMDL_STRING, "rlnScheduleEmailAddress", "Email address to send message when Schedule finishes"); EMDL::addLabel(EMDL_SCHEDULE_GENERAL_NAME, EMDL_STRING, "rlnScheduleName", "Name for this Schedule"); EMDL::addLabel(EMDL_SCHEDULE_JOB_NAME, EMDL_STRING, "rlnScheduleJobName", "Name of a Job in a Schedule"); EMDL::addLabel(EMDL_SCHEDULE_JOB_ORI_NAME, EMDL_STRING, "rlnScheduleJobNameOriginal", "Original name of a Job in a Schedule"); EMDL::addLabel(EMDL_SCHEDULE_JOB_MODE, EMDL_STRING, "rlnScheduleJobMode", "Mode on how to execute a Job"); EMDL::addLabel(EMDL_SCHEDULE_JOB_HAS_STARTED, EMDL_BOOL, "rlnScheduleJobHasStarted", "Flag to indicate whether a Job has started already in the execution of the Schedule"); EMDL::addLabel(EMDL_SCHEDULE_OPERATOR_NAME, EMDL_STRING, "rlnScheduleOperatorName", "Name of a Boolean operator in the Schedule"); EMDL::addLabel(EMDL_SCHEDULE_OPERATOR_TYPE, EMDL_STRING, "rlnScheduleOperatorType", "Type of an operator in the Schedule"); EMDL::addLabel(EMDL_SCHEDULE_OPERATOR_INPUT1, EMDL_STRING, "rlnScheduleOperatorInput1", "Name of the 1st input to the operator"); EMDL::addLabel(EMDL_SCHEDULE_OPERATOR_INPUT2, EMDL_STRING, "rlnScheduleOperatorInput2", "Name of the 2nd input to the operator"); EMDL::addLabel(EMDL_SCHEDULE_OPERATOR_OUTPUT, EMDL_STRING, "rlnScheduleOperatorOutput", "Name of the output variable on which this operator acts"); EMDL::addLabel(EMDL_SCHEDULE_VAR_BOOL_NAME, EMDL_STRING, "rlnScheduleBooleanVariableName", "Name of a Boolean variable in the Schedule"); EMDL::addLabel(EMDL_SCHEDULE_VAR_BOOL_VALUE, EMDL_BOOL, "rlnScheduleBooleanVariableValue", "Value of a Boolean variable in the Schedule"); EMDL::addLabel(EMDL_SCHEDULE_VAR_BOOL_ORI_VALUE, EMDL_BOOL, "rlnScheduleBooleanVariableResetValue", "Value which a Boolean variable will take upon a reset"); EMDL::addLabel(EMDL_SCHEDULE_VAR_FLOAT_NAME, EMDL_STRING, "rlnScheduleFloatVariableName", "Name of a Float variable in the Schedule"); EMDL::addLabel(EMDL_SCHEDULE_VAR_FLOAT_VALUE, EMDL_DOUBLE, "rlnScheduleFloatVariableValue", "Value of a Float variable in the Schedule"); EMDL::addLabel(EMDL_SCHEDULE_VAR_FLOAT_ORI_VALUE, EMDL_DOUBLE, "rlnScheduleFloatVariableResetValue", "Value which a Float variable will take upon a reset"); EMDL::addLabel(EMDL_SCHEDULE_VAR_STRING_NAME, EMDL_STRING, "rlnScheduleStringVariableName", "Name of a String variable in the Schedule"); EMDL::addLabel(EMDL_SCHEDULE_VAR_STRING_VALUE, EMDL_STRING, "rlnScheduleStringVariableValue", "Value of a String variable in the Schedule"); EMDL::addLabel(EMDL_SCHEDULE_VAR_STRING_ORI_VALUE, EMDL_STRING, "rlnScheduleStringVariableResetValue", "Value which a String variable will take upon a reset"); EMDL::addLabel(EMDL_SELECTED, EMDL_INT, "rlnSelected", "Flag whether an entry in a metadatatable is selected (1) in the viewer or not (0)"); EMDL::addLabel(EMDL_SELECT_PARTICLES_ZSCORE, EMDL_DOUBLE, "rlnParticleSelectZScore", "Sum of Z-scores from particle_select. High Z-scores are likely to be outliers."); EMDL::addLabel(EMDL_SORTED_IDX, EMDL_INT, "rlnSortedIndex", "Index of a metadata entry after sorting (first sorted index is 0)."); EMDL::addLabel(EMDL_STARFILE_MOVIE_PARTICLES, EMDL_STRING, "rlnStarFileMovieParticles", "Filename of a STAR file with movie-particles in it"); EMDL::addLabel(EMDL_PERFRAME_CUMULATIVE_WEIGHT, EMDL_DOUBLE, "rlnPerFrameCumulativeWeight", "Sum of the resolution-dependent relative weights from the first frame until the given frame"); EMDL::addLabel(EMDL_PERFRAME_RELATIVE_WEIGHT, EMDL_DOUBLE, "rlnPerFrameRelativeWeight", "The resolution-dependent relative weights for a given frame"); EMDL::addLabel(EMDL_RESOLUTION, EMDL_DOUBLE, "rlnResolution", "Resolution (in 1/Angstroms)"); EMDL::addLabel(EMDL_RESOLUTION_ANGSTROM, EMDL_DOUBLE, "rlnAngstromResolution", "Resolution (in Angstroms)"); EMDL::addLabel(EMDL_RESOLUTION_INVPIXEL, EMDL_DOUBLE, "rlnResolutionInversePixel", "Resolution (in 1/pixel, Nyquist = 0.5)"); EMDL::addLabel(EMDL_SPECTRAL_IDX, EMDL_INT, "rlnSpectralIndex", "Spectral index (i.e. distance in pixels to the origin in Fourier space) "); EMDL::addLabel(EMDL_UNKNOWN_LABEL, EMDL_UNKNOWN, "rlnUnknownLabel", "NON-RELION label: values will be ignored, yet maintained in the STAR file."); } ~StaticInitialization() { } friend class EMDL; }; #endif relion-3.1.3/src/metadata_table.cpp000066400000000000000000001402461411340063500172340ustar00rootroot00000000000000/*************************************************************************** * * Author: "Sjors H.W. Scheres" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ /*************************************************************************** * * Authors: J.R. Bilbao-Castro (jrbcast@ace.ual.es) * * Unidad de Bioinformatica of Centro Nacional de Biotecnologia , CSIC * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA * 02111-1307 USA * * All comments concerning this program package may be sent to the * e-mail address 'xmipp@cnb.csic.es' ***************************************************************************/ #include "src/metadata_table.h" #include "src/metadata_label.h" MetaDataTable::MetaDataTable() : objects(0), label2offset(EMDL_LAST_LABEL, -1), current_objectID(0), doubleLabels(0), intLabels(0), boolLabels(0), stringLabels(0), doubleVectorLabels(0), unknownLabels(0), isList(false), name(""), comment(""), version(CURRENT_MDT_VERSION), activeLabels(0) { } MetaDataTable::MetaDataTable(const MetaDataTable &MD) : objects(MD.objects.size()), label2offset(MD.label2offset), unknownLabelPosition2Offset(MD.unknownLabelPosition2Offset), unknownLabelNames(MD.unknownLabelNames), current_objectID(0), doubleLabels(MD.doubleLabels), intLabels(MD.intLabels), boolLabels(MD.boolLabels), stringLabels(MD.stringLabels), doubleVectorLabels(MD.doubleVectorLabels), unknownLabels(MD.unknownLabels), isList(MD.isList), name(MD.name), comment(MD.comment), version(MD.version), activeLabels(MD.activeLabels) { for (size_t idx = 0; idx < MD.objects.size(); idx++) { objects[idx] = new MetaDataContainer(*(MD.objects[idx])); objects[idx]->table = this; } } MetaDataTable& MetaDataTable::operator = (const MetaDataTable &MD) { if (this != &MD) { clear(); objects.resize(MD.objects.size()); label2offset = MD.label2offset; unknownLabelPosition2Offset = MD.unknownLabelPosition2Offset; unknownLabelNames = MD.unknownLabelNames; current_objectID = 0; doubleLabels = MD.doubleLabels; intLabels = MD.intLabels; boolLabels = MD.boolLabels; stringLabels = MD.stringLabels; doubleVectorLabels = MD.doubleVectorLabels; unknownLabels = MD.unknownLabels; isList = MD.isList; name = MD.name; comment = MD.comment; version = MD.version; activeLabels = MD.activeLabels; for (long int idx = 0; idx < MD.objects.size(); idx++) { objects[idx] = new MetaDataContainer(this, MD.objects[idx]); objects[idx]->table = this; } } return *this; } void MetaDataTable::setIsList(bool is_list) { isList = is_list; } MetaDataTable::~MetaDataTable() { for (long i = 0; i < objects.size(); i++) { delete objects[i]; } } bool MetaDataTable::isEmpty() const { return (objects.size()==0); } size_t MetaDataTable::numberOfObjects() const { return objects.size(); } void MetaDataTable::clear() { for (long i = 0; i < objects.size(); i++) { delete objects[i]; } objects.clear(); label2offset = std::vector(EMDL_LAST_LABEL, -1); current_objectID = 0; unknownLabelPosition2Offset.clear(); unknownLabelNames.clear(); doubleLabels = 0; intLabels = 0; boolLabels = 0; stringLabels = 0; unknownLabels = 0; isList = false; name = ""; comment = ""; version = CURRENT_MDT_VERSION; activeLabels.clear(); } void MetaDataTable::setComment(const std::string newComment) { comment = newComment; } std::string MetaDataTable::getComment() const { return comment; } bool MetaDataTable::containsComment() const { return (comment != std::string("")); } void MetaDataTable::setName(const std::string newName) { name = newName; } std::string MetaDataTable::getName() const { return name; } void MetaDataTable::setVersion(int v) { version = v; } int MetaDataTable::getVersion() const { return version; } int MetaDataTable::getCurrentVersion() { return CURRENT_MDT_VERSION; } std::string MetaDataTable::getUnknownLabelNameAt(int i) const { if (activeLabels[i] != EMDL_UNKNOWN_LABEL) REPORT_ERROR("MetaDataTable::getUnknownLabelNameAt(): the requested column is not an unknown label."); return unknownLabelNames[unknownLabelPosition2Offset[i]]; } bool MetaDataTable::getValueToString(EMDLabel label, std::string &value, long objectID, bool escape) const { // SHWS 18jul2018: this function previously had a stringstream, but it greatly slowed down // writing of large STAR files in some strange circumstances (with large data.star // and model.star files in refinement) // Therefore replaced the strstream with faster snprintf // // JZ 9aug2018: still using a stringstream for vector fields // => Avoid vector-valued columns in particle star-files. char buffer[14]; if (EMDL::isString(label)) { if (!getValue(label, value, objectID)) return false; if (escape) escapeStringForSTAR(value); return true; } else { if (EMDL::isDouble(label)) { double v; if(!getValue(label, v, objectID)) return false; if ((ABS(v) > 0. && ABS(v) < 0.001) || ABS(v) > 100000.) { if (v < 0.) { snprintf(buffer,13, "%12.5e", v); } else { snprintf(buffer,13, "%12.6e", v); } } else { if (v < 0.) { snprintf(buffer,13, "%12.5f", v); } else { snprintf(buffer,13, "%12.6f", v); } } } else if (EMDL::isInt(label)) { long v; if (!getValue(label, v, objectID)) return false; snprintf(buffer,13, "%12ld", v); } else if (EMDL::isBool(label)) { bool v; if (!getValue(label, v, objectID)) return false; snprintf(buffer,13, "%12d", (int)v); } else if (EMDL::isDoubleVector(label)) { std::vector v; getValue(label, v, objectID); if (v.size() == 0) { value = "[]"; } else { std::stringstream sts; sts << std::setprecision(12); sts << '['; for (int i = 0; i < v.size()-1; i++) { sts << v[i] << ','; } sts << v[v.size()-1] << ']'; value = sts.str(); } return true; } std::string tt(buffer); value = tt; return true; } } bool MetaDataTable::setUnknownValue(int labelPosition, const std::string &value) { long offset = unknownLabelPosition2Offset[labelPosition]; if (offset < 0) REPORT_ERROR("MetaDataTable::setValueFromString BUG: offset should not be negative here...."); if (offset > -1) { objects[current_objectID]->unknowns[offset] = value; return true; } else { return false; } } bool MetaDataTable::setValueFromString( EMDLabel label, const std::string &value, long int objectID) { if (EMDL::isString(label)) { return setValue(label, value, objectID); } else { std::istringstream i(value); if (EMDL::isDouble(label)) { double v; i >> v; return setValue(label, v, objectID); } else if (EMDL::isInt(label)) { long v; i >> v; return setValue(label, v, objectID); } else if (EMDL::isBool(label)) { bool v; i >> v; return setValue(label, v, objectID); } else if (EMDL::isDoubleVector(label)) { std::vector v; v.reserve(32); char* temp = new char[value.size()+1]; strcpy(temp, value.c_str()); char* token; char* rest = temp; while ((token = strtok_r(rest, "[,]", &rest)) != 0) { double d; std::stringstream sts(token); sts >> d; v.push_back(d); } delete[] temp; return setValue(label, v, objectID); } } REPORT_ERROR("Logic error: should not happen"); return false; } // comparators used for sorting struct MdDoubleComparator { MdDoubleComparator(long index) : index(index) {} bool operator()(MetaDataContainer *lh, MetaDataContainer *rh) const { return lh->doubles[index] < rh->doubles[index]; } long index; }; struct MdIntComparator { MdIntComparator(long index) : index(index) {} bool operator()(MetaDataContainer *lh, MetaDataContainer *rh) const { return lh->ints[index] < rh->ints[index]; } long index; }; struct MdStringComparator { MdStringComparator(long index) : index(index) {} bool operator()(MetaDataContainer *lh, MetaDataContainer *rh) const { return lh->strings[index] < rh->strings[index]; } long index; }; struct MdStringAfterAtComparator { MdStringAfterAtComparator(long index) : index(index) {} bool operator()(MetaDataContainer *lh, MetaDataContainer *rh) const { std::string slh = lh->strings[index]; std::string srh = rh->strings[index]; slh = slh.substr(slh.find("@")+1); srh = srh.substr(srh.find("@")+1); return slh < srh; } long index; }; struct MdStringBeforeAtComparator { MdStringBeforeAtComparator(long index) : index(index) {} bool operator()(MetaDataContainer *lh, MetaDataContainer *rh) const { std::string slh = lh->strings[index]; std::string srh = rh->strings[index]; slh = slh.substr(0, slh.find("@")); srh = srh.substr(0, srh.find("@")); std::stringstream stslh, stsrh; stslh << slh; stsrh << srh; long ilh, irh; stslh >> ilh; stsrh >> irh; return ilh < irh; } long index; }; void MetaDataTable::sort(EMDLabel name, bool do_reverse, bool only_set_index, bool do_random) { if (do_random) { srand (time(NULL)); /* initialize random seed: */ } else if (!EMDL::isNumber(name)) { REPORT_ERROR("MetadataTable::sort%% ERROR: can only sorted numbers"); } std::vector > vp; vp.reserve(objects.size()); long int i = 0; FOR_ALL_OBJECTS_IN_METADATA_TABLE(*this) { double dval; if (do_random) { dval = (double)rand(); } else if (EMDL::isInt(name)) { long val; getValue(name, val); dval = (double) val; } else // EMDL::isDouble(name) { getValue(name, dval); } vp.push_back(std::make_pair(dval, i)); i++; } std::sort(vp.begin(), vp.end()); if (do_reverse && !do_random) std::reverse(vp.begin(), vp.end()); if (only_set_index) { // Add an extra column with the sorted position of each entry for (long j = 0; j < vp.size(); j++) { (*this).setValue(EMDL_SORTED_IDX, j, vp[j].second); } } else { // Change the actual order in the MetaDataTable std::vector objs(objects.size()); for (long j = 0; j < vp.size(); j++) { objs[j] = objects[vp[j].second]; } objects = objs; } // reset pointer to the beginning of the table firstObject(); } void MetaDataTable::newSort(const EMDLabel label, bool do_reverse, bool do_sort_after_at, bool do_sort_before_at) { if (EMDL::isString(label)) { if (do_sort_after_at) { std::stable_sort(objects.begin(), objects.end(), MdStringAfterAtComparator(label2offset[label])); } else if (do_sort_before_at) { std::stable_sort(objects.begin(), objects.end(), MdStringBeforeAtComparator(label2offset[label])); } else { std::stable_sort(objects.begin(), objects.end(), MdStringComparator(label2offset[label])); } } else if (EMDL::isDouble(label)) { std::stable_sort(objects.begin(), objects.end(), MdDoubleComparator(label2offset[label])); } else if (EMDL::isInt(label)) { std::stable_sort(objects.begin(), objects.end(), MdIntComparator(label2offset[label])); } else { REPORT_ERROR("Cannot sort this label: " + EMDL::label2Str(label)); } if (do_reverse) { std::reverse(objects.begin(), objects.end()); } } // Will be removed in 3.2 bool MetaDataTable::labelExists(EMDLabel name) const { return containsLabel(name); } bool MetaDataTable::containsLabel(const EMDLabel label, std::string unknownLabel) const { for (int i = 0; i < activeLabels.size(); i++) { if (activeLabels[i] == label && (label != EMDL_UNKNOWN_LABEL || getUnknownLabelNameAt(i) == unknownLabel)) return true; } return false; } std::vector MetaDataTable::getActiveLabels() const { return activeLabels; } void MetaDataTable::deactivateLabel(EMDLabel label, std::string unknownLabel) { for (int i = 0; i < activeLabels.size(); i++) { if (activeLabels[i] == label && (label != EMDL_UNKNOWN_LABEL || unknownLabelNames[unknownLabelPosition2Offset[i]] == unknownLabel)) { activeLabels.erase(activeLabels.begin() + i); unknownLabelPosition2Offset.erase(unknownLabelPosition2Offset.begin() + i); if (label != EMDL_UNKNOWN_LABEL) label2offset[label] = -1; } } } void MetaDataTable::addLabel(EMDLabel label, std::string unknownLabel) { if (label >= EMDL_LAST_LABEL) REPORT_ERROR(std::string("MetaDataTable::addLabel: unrecognised label: ") + EMDL::label2Str(label)); if (label == EMDL_UNKNOWN_LABEL && unknownLabel == "") REPORT_ERROR("MetaDataTable::addLabel: unknownLabel is empty"); if (label2offset[label] < 0 || label == EMDL_UNKNOWN_LABEL) // keep pushing the same unknown label... { long id; if (EMDL::isDouble(label)) { id = doubleLabels; for (long i = 0; i < objects.size(); i++) { objects[i]->doubles.push_back(0); } doubleLabels++; } else if (EMDL::isInt(label)) { id = intLabels; for (long i = 0; i < objects.size(); i++) { objects[i]->ints.push_back(0); } intLabels++; } else if (EMDL::isBool(label)) { id = boolLabels; for (long i = 0; i < objects.size(); i++) { objects[i]->bools.push_back(false); } boolLabels++; } else if (EMDL::isString(label)) { id = stringLabels; for (long i = 0; i < objects.size(); i++) { objects[i]->strings.push_back("empty"); } stringLabels++; } else if (EMDL::isDoubleVector(label)) { id = doubleVectorLabels; for (long i = 0; i < objects.size(); i++) { objects[i]->doubleVectors.push_back(std::vector()); } doubleVectorLabels++; } else if (EMDL::isUnknown(label)) { id = unknownLabels; for (long i = 0; i < objects.size(); i++) { objects[i]->unknowns.push_back("empty"); } unknownLabelNames.push_back(unknownLabel); unknownLabels++; } activeLabels.push_back(label); unknownLabelPosition2Offset.push_back(EMDL::isUnknown(label) ? id : -1); label2offset[label] = id; } } void MetaDataTable::addMissingLabels(const MetaDataTable* mdt) { for (long i = 0; i < mdt->activeLabels.size(); i++) { EMDLabel l = mdt->activeLabels[i]; if (l == EMDL_UNKNOWN_LABEL) { std::string unknownLabel = mdt->getUnknownLabelNameAt(i); if (!containsLabel(l, unknownLabel)) addLabel(l, unknownLabel); } else if (label2offset[l] < 0) { addLabel(l); } } } void MetaDataTable::append(const MetaDataTable& mdt) { if (activeLabels.size() == 0) { // If the current one is empty, add missing labels and append the new one: addMissingLabels(&mdt); } else { // If the current one is not-empty, check all labels are the same before appending. Otherwise, raise error if (!compareLabels(*this, mdt)) REPORT_ERROR("ERROR in appending metadata tables with not the same columns!"); } // Now append objects.reserve(objects.size() + mdt.numberOfObjects()); for (long i = 0; i < mdt.objects.size(); i++) { objects.push_back(new MetaDataContainer( this, doubleLabels, intLabels, boolLabels, stringLabels, doubleVectorLabels, unknownLabels)); setObjectUnsafe(mdt.getObject(i), objects.size() - 1); } // reset pointer to the beginning of the table firstObject(); } MetaDataContainer* MetaDataTable::getObject(long objectID) const { if (objectID < 0) objectID = current_objectID; checkObjectID(objectID, "MetaDataTable::getObject"); return objects[objectID]; } void MetaDataTable::setObject(MetaDataContainer* data, long objectID) { if (objectID < 0) objectID = current_objectID; checkObjectID(objectID, "MetaDataTable::setObject"); addMissingLabels(data->table); setObjectUnsafe(data, objectID); } void MetaDataTable::setValuesOfDefinedLabels(MetaDataContainer* data, long objectID) { if (objectID < 0) objectID = current_objectID; checkObjectID(objectID, "MetaDataTable::setValuesOfDefinedLabels"); setObjectUnsafe(data, objectID); } void MetaDataTable::reserve(size_t capacity) { objects.reserve(capacity); } void MetaDataTable::setObjectUnsafe(MetaDataContainer* data, long objectID) { MetaDataContainer* obj = objects[objectID]; for (long i = 0; i < data->table->activeLabels.size(); i++) { EMDLabel label = data->table->activeLabels[i]; if (label != EMDL_UNKNOWN_LABEL) { long myOff = label2offset[label]; long srcOff = data->table->label2offset[label]; if (myOff < 0) continue; if (EMDL::isDouble(label)) { obj->doubles[myOff] = data->doubles[srcOff]; } else if (EMDL::isInt(label)) { obj->ints[myOff] = data->ints[srcOff]; } else if (EMDL::isBool(label)) { obj->bools[myOff] = data->bools[srcOff]; } else if (EMDL::isString(label)) { obj->strings[myOff] = data->strings[srcOff]; } else if (EMDL::isDoubleVector(label)) { obj->doubleVectors[myOff] = data->doubleVectors[srcOff]; } } else { std::string unknownLabel = data->table->getUnknownLabelNameAt(i); long srcOff = data->table->unknownLabelPosition2Offset[i]; long myOff = -1; for (int j = 0; j < unknownLabelNames.size(); j++) { if (unknownLabelNames[j] == unknownLabel) { myOff = j; break; } } if (myOff < 0) REPORT_ERROR("MetaDataTable::setObjectUnsafe: logic error. cannot find srcOff."); obj->unknowns[myOff] = data->unknowns[srcOff]; } } } void MetaDataTable::addObject() { objects.push_back(new MetaDataContainer( this, doubleLabels, intLabels, boolLabels, stringLabels, doubleVectorLabels, unknownLabels)); current_objectID = objects.size()-1; } void MetaDataTable::addObject(MetaDataContainer* data) { objects.push_back(new MetaDataContainer( this, doubleLabels, intLabels, boolLabels, stringLabels, doubleVectorLabels, unknownLabels)); setObject(data, objects.size()-1); current_objectID = objects.size()-1; } void MetaDataTable::addValuesOfDefinedLabels(MetaDataContainer* data) { objects.push_back(new MetaDataContainer( this, doubleLabels, intLabels, boolLabels, stringLabels, doubleVectorLabels, unknownLabels)); setValuesOfDefinedLabels(data, objects.size()-1); current_objectID = objects.size()-1; } void MetaDataTable::removeObject(long objectID) { long i = (objectID < 0) ? current_objectID : objectID; checkObjectID(i, "MetaDataTable::removeObject"); delete objects[i]; objects.erase(objects.begin() + i); current_objectID = objects.size() - 1; } long int MetaDataTable::firstObject() { current_objectID = 0; return 0; } long int MetaDataTable::nextObject() { current_objectID++; if (current_objectID >= objects.size()) { return NO_MORE_OBJECTS; } else { return current_objectID; } } long int MetaDataTable::goToObject(long int objectID) { checkObjectID(objectID, "MetaDataTable::goToObject"); current_objectID = objectID; return current_objectID; } long int MetaDataTable::readStarLoop(std::ifstream& in, bool do_only_count) { setIsList(false); //Read column labels int labelPosition = 0; std::string line, token; // First read all the column labels while (getline(in, line, '\n')) { line = simplify(line); // TODO: handle comments... if (line[0] == '#' || line[0] == '\0' || line[0] == ';') continue; if (line[0] == '_') // label definition line { //Only take string from "_" until "#" size_t pos0 = line.find("_"); size_t pos1 = line.find("#"); token = line.substr(pos0 + 1, pos1 - pos0 - 2); EMDLabel label = EMDL::str2Label(token); if (label == EMDL_UNDEFINED) { std::cerr << " + WARNING: will ignore (but maintain) values for the unknown label: " << token << std::endl; label = EMDL_UNKNOWN_LABEL; } addLabel(label, token); labelPosition++; } else // found first data line { break; } } // Then fill the table (dont read another line until the one from above has been handled) bool is_first = true; long int nr_objects = 0; const int num_labels = activeLabels.size(); while (is_first || getline(in, line, '\n')) { is_first = false; line = simplify(line); // Stop at empty line if (line[0] == '\0') break; nr_objects++; if (!do_only_count) { // Add a new line to the table addObject(); // Parse data values int pos = 0; std::string value; labelPosition = 0; while (nextTokenInSTAR(line, pos, value)) { if (labelPosition >= num_labels) { std::cerr << "Error in line: " << line << std::endl; REPORT_ERROR("A line in the STAR file contains more columns than the number of labels."); } // Check whether this is an unknown label if (activeLabels[labelPosition] == EMDL_UNKNOWN_LABEL) { setUnknownValue(labelPosition, value); } else { setValueFromString(activeLabels[labelPosition], value); } labelPosition++; } if (labelPosition < num_labels && num_labels > 2) { // For backward-compatibility for cases like "fn_mtf ", don't die if num_labels == 2. std::cerr << "Error in line: " << line << std::endl; REPORT_ERROR("A line in the STAR file contains fewer columns than the number of labels. Expected = " + integerToString(num_labels) + " Found = " + integerToString(labelPosition)); } } } return nr_objects; } bool MetaDataTable::readStarList(std::ifstream& in) { setIsList(true); addObject(); long int objectID = objects.size() - 1; std::string line, firstword, value; bool also_has_loop = false; // Read data and fill structures accordingly int labelPosition = 0; while (getline(in, line, '\n')) { int pos = 0; // Ignore empty lines if (!nextTokenInSTAR(line, pos, firstword)) continue; // Get label-value pairs if (firstword[0] == '_') { std::string token = firstword.substr(1); // get rid of leading underscore EMDLabel label = EMDL::str2Label(token); if (!nextTokenInSTAR(line, pos, value)) REPORT_ERROR("MetaDataTable::readStarList: did not encounter a single word after "+firstword); if (label == EMDL_UNDEFINED) { label = EMDL_UNKNOWN_LABEL; addLabel(label, token); setUnknownValue(labelPosition, value); std::cerr << " + WARNING: will ignore (but maintain) values for the unknown label: " << token << std::endl; } else { addLabel(label); setValueFromString(label, value, objectID); } labelPosition++; } // Check whether there is a comment or an empty line else if (firstword[0] == '#' || firstword[0] == ';') { // TODO: handle comments? continue; } // Check whether a loop structure comes after this list else if (firstword.find("loop_") == 0) { also_has_loop = true; return also_has_loop; } // Check whether this data blocks ends (because a next one is there) else if (firstword.find("data_") == 0) { // Should I reverse the pointer one line? return also_has_loop; } } // Reached the end of the file return also_has_loop; } long int MetaDataTable::readStar(std::ifstream& in, const std::string &name, bool do_only_count) { std::string line, token, value; clear(); bool also_has_loop; // Start reading the ifstream at the top in.seekg(0); // Set the version to 30000 by default, in case there is no version tag // (version tags were introduced in version 31000) version = 30000; // Proceed until the next data_ or _loop statement // The loop statement may be necessary for data blocks that have a list AND a table inside them while (getline(in, line, '\n')) { trim(line); if (line.find("# version ") != std::string::npos) { token = line.substr(line.find("# version ") + std::string("# version ").length()); std::istringstream sts(token); sts >> version; } // Find data_ lines if (line.find("data_") != std::string::npos) { token = line.substr(line.find("data_") + 5); // If a name has been given, only read data_thatname // Otherwise, just read the first data_ block if (name == "" || name == token) { setName(token); // Get the next item that starts with "_somelabel" or with "loop_" int current_pos = in.tellg(); while (getline(in, line, '\n')) { if (line.find("loop_") != std::string::npos) { return readStarLoop(in, do_only_count); } else if (line[0] == '_') { // go back one line in the ifstream in.seekg(current_pos); also_has_loop = readStarList(in); return (also_has_loop) ? 0 : 1; } } } } } // Clear the eofbit so we can perform more actions on the stream. in.clear(); return 0; } long int MetaDataTable::read(const FileName &filename, const std::string &name, bool do_only_count) { // Clear current table clear(); // Check for an :star extension FileName fn_read = filename.removeFileFormat(); std::ifstream in(fn_read.data(), std::ios_base::in); if (in.fail()) { REPORT_ERROR( (std::string) "MetaDataTable::read: File " + fn_read + " does not exist" ); } return readStar(in, name, do_only_count); in.close(); // Go to the first object firstObject(); } void MetaDataTable::write(std::ostream& out) { // Only write tables that have something in them if (isEmpty()) { return; } if (version >= 30000) { out << "\n"; out << "# version " << getCurrentVersion() <<"\n"; } out << "\n"; out << "data_" << getName() <<"\n"; if (containsComment()) { out << "# "<< comment << "\n"; } out << "\n"; if (!isList) { // Write loop header structure out << "loop_ \n"; for (long i = 0, n_printed = 1; i < activeLabels.size(); i++) { EMDLabel l = activeLabels[i]; if (l == EMDL_UNKNOWN_LABEL) { const long offset = unknownLabelPosition2Offset[i]; out << "_" << unknownLabelNames[offset]<< " #" << (n_printed++) << " \n"; } else if (l != EMDL_COMMENT && l != EMDL_SORTED_IDX) // EMDL_SORTED_IDX is only for internal use, never write it out! { out << "_" << EMDL::label2Str(l) << " #" << (n_printed++) << " \n"; } } // Write actual data block for (long int idx = 0; idx < objects.size(); idx++) { std::string entryComment = ""; for (long i = 0; i < activeLabels.size(); i++) { EMDLabel l = activeLabels[i]; if (l == EMDL_UNKNOWN_LABEL) { out.width(10); std::string token, val; long offset = unknownLabelPosition2Offset[i]; val = objects[idx]->unknowns[offset]; escapeStringForSTAR(val); out << val << " "; } else if (l != EMDL_COMMENT && l != EMDL_SORTED_IDX) { out.width(10); std::string val; getValueToString(l, val, idx, true); // escape=true out << val << " "; } if (l == EMDL_COMMENT) { getValue(EMDL_COMMENT, entryComment, idx); } } if (entryComment != std::string("")) { out << "# " << entryComment; } out << "\n"; } // Finish table with a white-line out << " \n"; } else // isList { // Get first object. In this case (row format) there is a single object std::string entryComment = ""; int maxWidth=10; for (long i = 0; i < activeLabels.size(); i++) { EMDLabel l = activeLabels[i]; if (l != EMDL_COMMENT && l != EMDL_UNKNOWN_LABEL) { int w = EMDL::label2Str(l).length(); if (w > maxWidth) maxWidth = w; } else if (l == EMDL_UNKNOWN_LABEL) { long offset = unknownLabelPosition2Offset[i]; int w = unknownLabelNames[offset].length(); if (w > maxWidth) maxWidth = w; } else { getValue(EMDL_COMMENT, entryComment, 0); } } for (long i = 0; i < activeLabels.size(); i++) { EMDLabel l = activeLabels[i]; if (l == EMDL_UNKNOWN_LABEL) { long offset = unknownLabelPosition2Offset[i]; int w = unknownLabelNames[offset].length(); out << "_" << unknownLabelNames[offset] << std::setw(12 + maxWidth - w) << " " << objects[0]->unknowns[offset] << "\n"; } else if (l != EMDL_COMMENT) { int w = EMDL::label2Str(l).length(); out << "_" << EMDL::label2Str(l) << std::setw(12 + maxWidth - w) << " "; std::string val; getValueToString(l, val, 0, true); // escape=true out << val << "\n"; } } if (entryComment != std::string("")) { out << "# " << entryComment << "\n"; } // End a data block with a white line out << " \n"; } } void MetaDataTable::write(const FileName &fn_out) { std::ofstream fh; FileName fn_tmp = fn_out + ".tmp"; fh.open((fn_tmp).c_str(), std::ios::out); if (!fh) REPORT_ERROR( (std::string)"MetaDataTable::write: cannot write to file: " + fn_out); // fh << "# RELION; version " << g_RELION_VERSION << std::endl; write(fh); fh.close(); // Rename to prevent errors with programs in pipeliner reading in incomplete STAR files std::rename(fn_tmp.c_str(), fn_out.c_str()); } void MetaDataTable::columnHistogram(EMDLabel label, std::vector &histX, std::vector &histY, int verb, CPlot2D *plot2D, long int nr_bin, RFLOAT hist_min, RFLOAT hist_max, bool do_fractional_instead, bool do_cumulative_instead) { if (!containsLabel(label)) REPORT_ERROR("ERROR: The column specified is not present in the MetaDataTable."); std::vector values; FOR_ALL_OBJECTS_IN_METADATA_TABLE(*this) { RFLOAT val; if (EMDL::isDouble(label)) { getValue(label, val); } else if (EMDL::isInt(label)) { long aux; getValue(label, aux); val = aux; } else if (EMDL::isBool(label)) { bool aux; getValue(label, aux); val = aux ? 1 : 0; } else { REPORT_ERROR("Cannot use --stat_column for this type of column"); } values.push_back(val); } std::string title = EMDL::label2Str(label); histogram(values, histX, histY, verb, title, plot2D, nr_bin, hist_min, hist_max, do_fractional_instead, do_cumulative_instead); } void MetaDataTable::histogram(std::vector &values, std::vector &histX, std::vector &histY, int verb, std::string title, CPlot2D *plot2D, long int nr_bin, RFLOAT hist_min, RFLOAT hist_max, bool do_fractional_instead, bool do_cumulative_instead) { double sum = 0, sumsq = 0; for (size_t i = 0, ilim = values.size(); i < ilim; i++) { RFLOAT value = values[i]; sum += value; sumsq += value * value; } long long n_row = values.size(); std::sort(values.begin(), values.end()); sum /= n_row; sumsq /= n_row; if (verb > 0) { std::cout << "Number of items: " << n_row << std::endl; std::cout << "Min: " << values[0] << " Q1: " << values[n_row / 4]; std::cout << " Median: " << values[n_row / 2] << " Q3: " << values[n_row * 3 / 4] << " Max: " << values[n_row - 1] << std::endl; std::cout << "Mean: " << sum << " Std: " << std::sqrt(sumsq - sum * sum) << std::endl; } RFLOAT iqr = values[n_row * 3 / 4] - values[n_row / 2]; RFLOAT bin_width = 1; unsigned int bin_size = 1; // change bin parameters only when there are many values if (iqr != 0) { if (nr_bin <= 0) { hist_min = values[0]; hist_max = values[n_row - 1]; bin_width = 2 * iqr / std::pow(n_row, 1.0 / 3); // Freedman-Diaconis rule bin_size = (unsigned int)(std::ceil((hist_max - hist_min) / bin_width)); if (bin_size > 5000) bin_size = 5000; // FIXME: Ad hoc upper limit to avoid using too much memory } else { if (!std::isfinite(hist_min) || hist_min == -LARGE_NUMBER) hist_min = values[0]; if (!std::isfinite(hist_max) || hist_max == LARGE_NUMBER) hist_max = values[n_row - 1]; bin_size = nr_bin; } bin_width = (hist_max - hist_min) / bin_size; } else { if (!std::isfinite(hist_min) || hist_min == -LARGE_NUMBER) hist_min = values[0]; if (!std::isfinite(hist_max) || hist_max == LARGE_NUMBER) hist_max = values[n_row - 1]; } bin_size += 2; // for -inf and +inf if (verb > 0) std::cout << "Bin size: " << bin_size << " width: " << bin_width << std::endl; std::vector hist(bin_size); histY.resize(4*bin_size, 0.); histX.resize(4*bin_size, 0.); for (int i = 0; i < n_row; i++) { int ibin = (int)((values[i] - hist_min) / bin_width) + 1; if (ibin < 0) ibin = 0; if (ibin >= bin_size) ibin = bin_size - 1; hist[ibin]++; } long cum = 0; for (int i = 0; i < bin_size; i++) { if (i == 0) { if (verb > 0) std::cout << "[-INF, " << hist_min << "): "; histX[4*i] = hist_min - bin_width; histX[4*i+1] = hist_min - bin_width; histX[4*i+2] = hist_min; histX[4*i+3] = hist_min; } else if (i == bin_size - 1) { if (verb > 0) std::cout << "[" << hist_max << ", +INF]: "; histX[4*i] = hist_max; histX[4*i+1] = hist_max; histX[4*i+2] = hist_max + bin_width; histX[4*i+3] = hist_max + bin_width; } else { if (verb > 0) std::cout << "[" << (hist_min + bin_width * (i - 1)) << ", " << (hist_min + bin_width * i) << "): "; histX[4*i] = hist_min + bin_width * (i - 1); histX[4*i+1] = hist_min + bin_width * (i - 1); histX[4*i+2] = hist_min + bin_width * i; histX[4*i+3] = hist_min + bin_width * i; } cum += hist[i]; if (do_fractional_instead) hist[i] = (100. * hist[i] / (float)n_row); else if (do_cumulative_instead) hist[i] = (100 * cum / (float)n_row); if (verb > 0) std::cout << hist[i] << std::endl; histY[4*i+1] = histY[4*i+2] = hist[i]; histY[4*i] = histY[4*i+3] = 0.; } histX[histX.size()-1] = histX[histX.size()-2]; if (plot2D != NULL) { plot2D->SetTitle(" Histogram of " + title); plot2D->SetDrawLegend(false); plot2D->AddDataSet(histX, histY); plot2D->SetXAxisTitle(title); plot2D->SetYAxisTitle("# entries"); } } void MetaDataTable::addToCPlot2D(CPlot2D *plot2D, EMDLabel xaxis, EMDLabel yaxis, double red, double green, double blue, double linewidth, std::string marker) { CDataSet dataSet; if (marker=="") { dataSet.SetDrawMarker(false); } else { dataSet.SetDrawMarker(true); dataSet.SetMarkerSymbol(marker); } dataSet.SetLineWidth(linewidth); dataSet.SetDatasetColor(red, green, blue); dataSet.SetDatasetTitle(EMDL::label2Str(yaxis)); double mydbl; long int myint; double xval, yval; for (long int idx = 0; idx < objects.size(); idx++) { const long offx = label2offset[xaxis]; if (offx < 0) REPORT_ERROR("MetaDataTable::addToCPlot2D ERROR: cannot find x-axis label"); if (xaxis == EMDL_UNDEFINED) { xval = idx+1; } else if (EMDL::isDouble(xaxis)) { objects[idx]->getValue(offx, mydbl); xval = mydbl; } else if (EMDL::isInt(xaxis)) { objects[idx]->getValue(offx, myint); xval = myint; } else REPORT_ERROR("MetaDataTable::addToCPlot2D ERROR: can only plot x-axis double, int or long int"); const long offy = label2offset[yaxis]; if (offy < 0) REPORT_ERROR("MetaDataTable::addToCPlot2D ERROR: cannot find y-axis label"); if (EMDL::isDouble(yaxis)) { objects[idx]->getValue(offy, mydbl); yval = mydbl; } else if (EMDL::isInt(yaxis)) { objects[idx]->getValue(offy, myint); yval = myint; } else REPORT_ERROR("MetaDataTable::addToCPlot2D ERROR: can only plot y-axis double, int or long int"); CDataPoint point(xval, yval); dataSet.AddDataPoint(point); } plot2D->AddDataSet(dataSet); if (xaxis != EMDL_UNDEFINED) plot2D->SetXAxisTitle(EMDL::label2Str(xaxis)); plot2D->SetYAxisTitle(EMDL::label2Str(yaxis)); } void MetaDataTable::printLabels(std::ostream &ost) { for (int i = 0; i < activeLabels.size(); i++) { ost << EMDL::label2Str(activeLabels[i]) << "\n"; } } void MetaDataTable::randomiseOrder() { std::random_shuffle(objects.begin(), objects.end()); } void MetaDataTable::checkObjectID(long id, std::string caller) const { if (id >= objects.size() || id < 0) { std::stringstream sts0, sts1; sts0 << id; sts1 << objects.size(); REPORT_ERROR(caller+": object " + sts0.str() + " out of bounds! (" + sts1.str() + " objects present)"); } } //FIXME: does not support unknownLabels but this function is only used by relion_star_handler // so I will leave this for future... void compareMetaDataTable(MetaDataTable &MD1, MetaDataTable &MD2, MetaDataTable &MDboth, MetaDataTable &MDonly1, MetaDataTable &MDonly2, EMDLabel label1, double eps, EMDLabel label2, EMDLabel label3) { if (!MD1.containsLabel(label1)) REPORT_ERROR("compareMetaDataTableEqualLabel::ERROR MD1 does not contain the specified label1."); if (!MD2.containsLabel(label1)) REPORT_ERROR("compareMetaDataTableEqualLabel::ERROR MD2 does not contain the specified label1."); if (label2 != EMDL_UNDEFINED) { if (!EMDL::isDouble(label1) || !EMDL::isDouble(label2)) REPORT_ERROR("compareMetaDataTableEqualLabel::ERROR 2D or 3D distances are only allowed for doubles."); if (!MD1.containsLabel(label2)) REPORT_ERROR("compareMetaDataTableEqualLabel::ERROR MD1 does not contain the specified label2."); if (!MD2.containsLabel(label2)) REPORT_ERROR("compareMetaDataTableEqualLabel::ERROR MD2 does not contain the specified label2."); } if (label3 != EMDL_UNDEFINED) { if (!EMDL::isDouble(label3)) REPORT_ERROR("compareMetaDataTableEqualLabel::ERROR 3D distances are only allowed for doubles."); if (!MD1.containsLabel(label3)) REPORT_ERROR("compareMetaDataTableEqualLabel::ERROR MD1 does not contain the specified label3."); if (!MD2.containsLabel(label3)) REPORT_ERROR("compareMetaDataTableEqualLabel::ERROR MD2 does not contain the specified label3."); } MDboth.clear(); MDonly1.clear(); MDonly2.clear(); std::string mystr1, mystr2; long int myint1, myint2; double myd1, myd2, mydy1 = 0., mydy2 = 0., mydz1 = 0., mydz2 = 0.; // loop over MD1 std::vector to_remove_from_only2; for (long int current_object1 = MD1.firstObject(); current_object1 != MetaDataTable::NO_MORE_OBJECTS && current_object1 != MetaDataTable::NO_OBJECTS_STORED; current_object1 = MD1.nextObject()) { if (EMDL::isString(label1)) MD1.getValue(label1, mystr1); else if (EMDL::isInt(label1)) MD1.getValue(label1, myint1); else if (EMDL::isDouble(label1)) { MD1.getValue(label1, myd1); if (label2 != EMDL_UNDEFINED) MD1.getValue(label2, mydy1); if (label3 != EMDL_UNDEFINED) MD1.getValue(label3, mydz1); } else REPORT_ERROR("compareMetaDataTableEqualLabel ERROR: only implemented for strings, integers or doubles"); // loop over MD2 bool have_in_2 = false; for (long int current_object2 = MD2.firstObject(); current_object2 != MetaDataTable::NO_MORE_OBJECTS && current_object2 != MetaDataTable::NO_OBJECTS_STORED; current_object2 = MD2.nextObject()) { if (EMDL::isString(label1)) { MD2.getValue(label1, mystr2); if (strcmp(mystr1.c_str(), mystr2.c_str()) == 0) { have_in_2 = true; to_remove_from_only2.push_back(current_object2); MDboth.addObject(MD1.getObject()); break; } } else if (EMDL::isInt(label1)) { MD2.getValue(label1, myint2); if ( ABS(myint2 - myint1) <= ROUND(eps) ) { have_in_2 = true; to_remove_from_only2.push_back(current_object2); MDboth.addObject(MD1.getObject()); break; } } else if (EMDL::isDouble(label1)) { MD2.getValue(label1, myd2); if (label2 != EMDL_UNDEFINED) MD2.getValue(label2, mydy2); if (label3 != EMDL_UNDEFINED) MD2.getValue(label3, mydz2); double dist = sqrt( (myd1 - myd2) * (myd1 - myd2) + (mydy1 - mydy2) * (mydy1 - mydy2) + (mydz1 - mydz2) * (mydz1 - mydz2) ); if ( ABS(dist) <= eps ) { have_in_2 = true; to_remove_from_only2.push_back(current_object2); //std::cerr << " current_object1= " << current_object1 << std::endl; //std::cerr << " myd1= " << myd1 << " myd2= " << myd2 << " mydy1= " << mydy1 << " mydy2= " << mydy2 << " dist= "<= min_value); } else { RFLOAT val; MDin.getValue(label, val); do_include = ((RFLOAT)val <= max_value && (RFLOAT)val >= min_value); } if (do_include) { MDout.addObject(MDin.getObject(current_object)); } } return MDout; } MetaDataTable subsetMetaDataTable(MetaDataTable &MDin, EMDLabel label, std::string search_str, bool exclude) { if (!EMDL::isString(label)) REPORT_ERROR("subsetMetadataTable ERROR: can only make a subset selection based on strings"); if (!MDin.containsLabel(label)) REPORT_ERROR("subsetMetadataTable ERROR: input MetaDataTable does not contain label: " + EMDL::label2Str(label)); MetaDataTable MDout; FOR_ALL_OBJECTS_IN_METADATA_TABLE(MDin) { std::string val; MDin.getValue(label, val); bool found = (val.find(search_str) != std::string::npos); if ((!exclude && found) || (exclude && !found)) { MDout.addObject(MDin.getObject(current_object)); } } return MDout; } MetaDataTable removeDuplicatedParticles(MetaDataTable &MDin, EMDLabel mic_label, RFLOAT threshold, RFLOAT origin_scale, FileName fn_removed, bool verb) { // Sanity check if (!MDin.containsLabel(EMDL_ORIENT_ORIGIN_X_ANGSTROM) || !MDin.containsLabel(EMDL_ORIENT_ORIGIN_Y_ANGSTROM)) REPORT_ERROR("You need rlnOriginXAngst and rlnOriginYAngst to remove duplicated particles"); if (!MDin.containsLabel(EMDL_IMAGE_COORD_X) && !MDin.containsLabel(EMDL_IMAGE_COORD_Y)) REPORT_ERROR("You need rlnCoordinateX, rlnCoordinateY to remove duplicated particles"); if (!MDin.containsLabel(mic_label)) REPORT_ERROR("STAR file does not contain " + EMDL::label2Str(mic_label)); std::vector valid(MDin.numberOfObjects(), true); std::vector xs(MDin.numberOfObjects(), 0.0); std::vector ys(MDin.numberOfObjects(), 0.0); std::vector zs; bool dataIs3D = false; if (MDin.containsLabel(EMDL_IMAGE_COORD_Z)) { if (!MDin.containsLabel(EMDL_ORIENT_ORIGIN_Z_ANGSTROM)) REPORT_ERROR("You need rlnOriginZAngst to remove duplicated 3D particles"); dataIs3D = true; zs.resize(MDin.numberOfObjects(), 0.0); } RFLOAT threshold_sq = threshold * threshold; // group by micrograph std::map > grouped; FOR_ALL_OBJECTS_IN_METADATA_TABLE(MDin) { std::string mic_name; MDin.getValue(mic_label, mic_name); RFLOAT val1, val2; MDin.getValue(EMDL_ORIENT_ORIGIN_X_ANGSTROM, val1); MDin.getValue(EMDL_IMAGE_COORD_X, val2); xs[current_object] = -val1 * origin_scale + val2; MDin.getValue(EMDL_ORIENT_ORIGIN_Y_ANGSTROM, val1); MDin.getValue(EMDL_IMAGE_COORD_Y, val2); ys[current_object] = -val1 * origin_scale + val2; if (dataIs3D) { MDin.getValue(EMDL_ORIENT_ORIGIN_Z_ANGSTROM, val1); MDin.getValue(EMDL_IMAGE_COORD_Z, val2); zs[current_object] = -val1 * origin_scale + val2; } grouped[mic_name].push_back(current_object); } // find duplicate for (std::map >::iterator it = grouped.begin(); it != grouped.end(); ++it) { long n_particles = it->second.size(); for (long i = 0; i < n_particles; i++) { long part_id1 = it->second[i]; for (long j = i + 1; j < n_particles; j++) { long part_id2 = it->second[j]; RFLOAT dist_sq = (xs[part_id1] - xs[part_id2]) * (xs[part_id1] - xs[part_id2]) + (ys[part_id1] - ys[part_id2]) * (ys[part_id1] - ys[part_id2]); if (dataIs3D) dist_sq += (zs[part_id1] - zs[part_id2]) * (zs[part_id1] - zs[part_id2]); if (dist_sq <= threshold_sq) { // std::cout << it->first << " " << part_id1 << " " << part_id2 << " " << dist_sq << std::endl; valid[part_id1] = false; break; } } } } MetaDataTable MDout, MDremoved; long n_removed = 0; FOR_ALL_OBJECTS_IN_METADATA_TABLE(MDin) { if (valid[current_object]) { MDout.addObject(MDin.getObject(current_object)); } else { MDremoved.addObject(MDin.getObject(current_object)); n_removed++; } } if (fn_removed != "") MDremoved.write(fn_removed); std::cout << "Removed " << n_removed << " duplicated objects from " << MDin.numberOfObjects() << " objects." << std::endl; return MDout; } relion-3.1.3/src/metadata_table.h000066400000000000000000000414111411340063500166730ustar00rootroot00000000000000/*************************************************************************** * * Author: "Sjors H.W. Scheres" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ /*************************************************************************** * * Redesigned by: J. Zivanov in June 2017 * MRC Laboratory of Molecular Biology * * Original author: J.R. Bilbao-Castro (jrbcast@ace.ual.es) * Unidad de Bioinformatica of Centro Nacional de Biotecnologia, CSIC ***************************************************************************/ #ifndef METADATA_TABLE_H #define METADATA_TABLE_H #include #include #include #include #include #include #include #include #if !defined(__APPLE__) #include #endif #include "src/funcs.h" #include "src/args.h" #include "src/CPlot2D.h" #include "src/metadata_container.h" #include "src/metadata_label.h" #define CURRENT_MDT_VERSION 30001 /** For all objects. @code FOR_ALL_OBJECTS_IN_METADATA(metadata) { RFLOAT rot; DF.getValue( EMDL_ANGLEROT, rot); } @endcode This is not thread-safe because current_objectID is updated! @TODO: remove "&& current_object >= 0" and make "nextObject()" return "current_object++" after "enum errors" has been removed (see below) */ #define FOR_ALL_OBJECTS_IN_METADATA_TABLE(mdt_arg) \ for(long int current_object = (mdt_arg).firstObject(); \ current_object < (mdt_arg).numberOfObjects() \ && current_object >= 0; \ current_object = (mdt_arg).nextObject()) /* class MetaDataTable: * * - stores a table of values for an arbitrary subset of predefined EMDLabels * - each column corresponds to a label * - each row represents a data point * - the rows are stored in per-type contiguous blocks of memory * * 2020/Nov/12: * This class is organized as an array (`objects`) of structures (`MetaDataContainer`). * * `activeLabels` contains all valid labels. * Even when a label is `deactivateLabel`-ed, the values remain in `MetaDataContainer`s. * The label is only removed from `activeLabels`. * * Each data type (int, double, etc) contains its own storage array inside `MetaDataContainer`. * Thus, values in `label2offsets` are NOT unique. Accessing columns via a wrong type is * very DANGEROUS. Use `cmake -DMDT_TYPE_CHECK=ON` to enable runtime checks. * * Handling of labels unknown to RELION needs care. * They all share the same label, EMD_UNKNOWN_LABEL. Thus, `addLabel`, `containsLabel`, * `compareLabels` etc must check not only EMDLabel in `activeLabels` but also the * real labels stored in `unknownLabelNames`. This should be done via `getUnknownLabelNameAt`. * Note that two STAR files might contain the same set of unknown labels, but in different orders. * * Whenever `activeLabels` is modified, `unknownLabelPosition2Offset` MUST be updated accordingly. * When the label for a column is EMD_UNKNOWN_LABEL, the corresponding element in * `unknownLabelPosition2Offset` must store the offset in `unknownLabelNames` and * `MetaDataContainer->unknowns`. Otherwise, the value does not matter. */ class MetaDataTable { // Effectively stores all metadata std::vector objects; // Maps labels to corresponding indices in the vectors in MetaDataContainer. // The length of label2offset is always equal to the number of defined labels (~320) // e.g.: // the value of "defocus-U" for row r is stored in: // objects[r]->doubles[label2offset[EMDL_CTF_DEFOCUSU]] // the value of "image name" is stored in: // objects[r]->strings[label2offset[EMDL_IMAGE_NAME]] std::vector label2offset; /** What labels have been read from a docfile/metadata file * and/or will be stored on a new metadata file when "save" is * called **/ std::vector activeLabels; std::vector unknownLabelNames; std::vector unknownLabelPosition2Offset; // Current object id long current_objectID; // Number of labels of each type long doubleLabels, intLabels, boolLabels, stringLabels, doubleVectorLabels, unknownLabels; // Is this a 2D table or a 1D list? bool isList; // Name of the metadata table std::string name; // A comment for the metadata table std::string comment; // The version number of the file format (multiplied by 10,000) int version; public: MetaDataTable(); // Copy constructor and assignment operator: // Fill the new table with *copies* of all objects MetaDataTable(const MetaDataTable & c); MetaDataTable& operator = (const MetaDataTable &MD); ~MetaDataTable(); bool isAList() { return isList; } void setIsList(bool is_list); bool isEmpty() const; size_t numberOfObjects() const; void clear(); void setComment(const std::string Comment); std::string getComment() const; bool containsComment() const; void setName(const std::string Name); std::string getName() const; void setVersion(int v); int getVersion() const; static int getCurrentVersion(); // getValue: returns true if the label exists // objectID is 0-indexed. template bool getValue(EMDLabel label, T& value, long objectID = -1) const; bool getValueToString(EMDLabel label, std::string &value, long int objectID = -1, bool escape=false) const; std::string getUnknownLabelNameAt(int i) const; // Set the value of label for a specified object. // If no objectID is given, the internal iterator 'current_objectID' is used // objectID is 0-indexed. template bool setValue(EMDLabel name, const T &value, long int objectID = -1); bool setUnknownValue(int labelPosition, const std::string &value); bool setValueFromString(EMDLabel label, const std::string &value, long int objectID = -1); // Sort the order of the elements based on the values in the input label // (only numbers, no strings/bools) void sort(EMDLabel name, bool do_reverse = false, bool only_set_index = false, bool do_random = false); void newSort(const EMDLabel name, bool do_reverse = false, bool do_sort_after_at = false, bool do_sort_before_at = false); // Check whether a label is defined in the table. // This is redundant and will be removed in 3.2. bool labelExists(EMDLabel name) const; // Check whether a label is contained in activeLabels. bool containsLabel(const EMDLabel label, const std::string unknownLabel="") const; std::vector getActiveLabels() const; // Deactivate a column from a table, so that it is no longer written out void deactivateLabel(EMDLabel label, std::string unknownLabel=""); // add a new label and update all objects void addLabel(EMDLabel label, std::string unknownLabel=""); // add missing labels that are present in 'app' void addMissingLabels(const MetaDataTable* app); // add all rows from app to the end of the table and // insert all missing labels void append(const MetaDataTable& app); // Get metadatacontainer for objectID (current_objectID if objectID < 0) MetaDataContainer* getObject(long objectID = -1) const; /* setObject(data, objectID) * copies values from 'data' to object 'objectID'. * The target object is assumed to exist. * If objectID < 0, then current_objectID is set. * Undefined labels are inserted. * * Use addObject() to set an object that does not yet exist */ void setObject(MetaDataContainer* data, long objectID = -1); /* setValuesOfDefinedLabels(data, objectID) * copies values from 'data' to object 'objectID'. * The target object is assumed to exist. * If objectID < 0, then current_objectID is set. * Only already defined labels are considered. * * Use addValuesOfDefinedLabels() to add an object that does not yet exist */ void setValuesOfDefinedLabels(MetaDataContainer* data, long objectID = -1); // reserve memory for this many lines void reserve(size_t capacity); /* addObject() * Adds a new object and initializes the defined labels with default values. * Afterwards, 'current_objectID' points to the newly added object.*/ void addObject(); /* addObject(data) * Adds a new object and sets its values to those from 'data'. * The set of labels for the table is extended as necessary. * Afterwards, 'current_objectID' points to the newly added object.*/ void addObject(MetaDataContainer* data); /* addValuesOfDefinedLabels(data) * Adds a new object and sets the already defined values to those from 'data'. * Labels from 'data' that are not already defined are ignored. * Afterwards, 'current_objectID' points to the newly added object.*/ void addValuesOfDefinedLabels(MetaDataContainer* data); /* removeObject(objectID) * If objectID is not given, 'current_objectID' will be removed. * 'current_objectID' is set to the last object in the list. */ void removeObject(long objectID = -1); long firstObject(); long nextObject(); // @TODO: remove nextObject() after removing calls in: // - "particle_reposition.cpp" // - "helix.cpp" // - "preprocessing.cpp" long goToObject(long objectID); // Read a STAR loop structure long int readStarLoop(std::ifstream& in, bool do_only_count = false); /* Read a STAR list * The function returns true if the list is followed by a loop, false otherwise */ bool readStarList(std::ifstream& in); /* Read a MetaDataTable from a STAR-format data block * * If the data block contains a list and a table, the function will return 2, * the first time it is called and the list is read into the MetaDataTable * in that case the function needs to be called another time. The second time * it will read the _loop structure into the MetaDataTable and 1 will be returned * * If the data block contains only a list or a table, it is read in the MetaDataTable and the function will return 1 * * If no data block is found the function will return 0 and the MetaDataTable remains empty */ long int readStar(std::ifstream& in, const std::string &name = "", bool do_only_count = false); // Read a MetaDataTable (get file format from extension) long int read(const FileName &filename, const std::string &name = "", bool do_only_count = false); // Write a MetaDataTable in STAR format void write(std::ostream& out = std::cout); // Write to a single file void write(const FileName & fn_out); // Make a histogram of a column void columnHistogram(EMDLabel label, std::vector &histX, std::vector &histY, int verb = 0, CPlot2D *plot2D = NULL, long int nr_bin = -1, RFLOAT hist_min = -LARGE_NUMBER, RFLOAT hist_max = LARGE_NUMBER, bool do_fractional_instead = false, bool do_cumulative_instead = false); static void histogram(std::vector &values, std::vector &histX, std::vector &histY, int verb = 0, std::string title="Histogram", CPlot2D *plot2D = NULL, long int nr_bin = -1, RFLOAT hist_min = -LARGE_NUMBER, RFLOAT hist_max = LARGE_NUMBER, bool do_fractional_instead = false, bool do_cumulative_instead = false); void addToCPlot2D(CPlot2D *plot2D, EMDLabel xaxis, EMDLabel yaxis, double red=0., double green=0., double blue=0., double linewidth = 1.0, std::string marker=""); void printLabels(std::ostream& ost); // Randomise the order inside the STAR file void randomiseOrder(); // Feb14,2017 - Shaoda, Check whether the two MetaDataTables contain the same set of activeLabels static bool compareLabels(const MetaDataTable &MD1, const MetaDataTable &MD2); // Join 2 metadata tables. Only include labels that are present in both of them. static MetaDataTable combineMetaDataTables(std::vector &MDin); // legacy error codes: // @TODO: remove after changing: // - particle_reposition.cpp, line ~127 // - preprocessing.cpp, line ~299 enum errors { NO_OBJECTS_STORED = -1, NO_MORE_OBJECTS = -2, NO_OBJECT_FOUND = -3 }; template bool isTypeCompatible(EMDLabel label, T& value) const; private: // Check if 'id' corresponds to an actual object. // Crash if it does not. void checkObjectID(long id, std::string caller) const; /* setObjectUnsafe(data) * Same as setObject, but assumes that all labels are present. */ void setObjectUnsafe(MetaDataContainer* data, long objId); }; void compareMetaDataTable(MetaDataTable &MD1, MetaDataTable &MD2, MetaDataTable &MDboth, MetaDataTable &MDonly1, MetaDataTable &MDonly2, EMDLabel label1, double eps = 0., EMDLabel label2 = EMDL_UNDEFINED, EMDLabel label3 = EMDL_UNDEFINED); // find a subset of the input metadata table that has corresponding entries between the specified min and max values MetaDataTable subsetMetaDataTable(MetaDataTable &MDin, EMDLabel label, RFLOAT min_value, RFLOAT max_value); // find a subset of the input metadata table that has corresponding entries with or without a given substring MetaDataTable subsetMetaDataTable(MetaDataTable &MDin, EMDLabel label, std::string search_str, bool exclude=false); // remove duplicated particles that are in the same micrograph (mic_label) and within a given threshold [px] // OriginX/Y are multiplied by origin_scale before added to CoordinateX/Y to compensate for down-sampling MetaDataTable removeDuplicatedParticles(MetaDataTable &MDin, EMDLabel mic_label, RFLOAT threshold, RFLOAT origin_scale=1.0, FileName fn_removed="", bool verb=true); #ifdef METADATA_TABLE_TYPE_CHECK //#pragma message("typecheck enabled") template bool MetaDataTable::isTypeCompatible(EMDLabel label, T& value) const { // remove const appended by setValue() typedef typename std::remove_const::type U; // In C++11, this repeat can be avoided by using "if constexpr(...) else static_assert" static_assert(std::is_same::value || std::is_same::value || std::is_same::value || std::is_same::value || std::is_same::value || std::is_same::value || std::is_same::value || std::is_same, U>::value || std::is_same, U>::value, "Compile error: wrong type given to MetaDataTable::getValur or setValue"); if (std::is_same::value) return EMDL::isBool(label); else if (std::is_same::value || std::is_same::value) return EMDL::isString(label); else if (std::is_same::value || std::is_same::value) return EMDL::isDouble(label); else if (std::is_same::value || std::is_same::value) return EMDL::isInt(label); else if (std::is_same, U>::value || std::is_same, U>::value) return EMDL::isVector(label); else return false; } #endif template bool MetaDataTable::getValue(EMDLabel label, T& value, long objectID) const { if (label < 0 || label >= EMDL_LAST_LABEL) return false; if (label == EMDL_UNKNOWN_LABEL) REPORT_ERROR("MetaDataTable::setValue does not support unknown label."); #ifdef METADATA_TABLE_TYPE_CHECK if (!isTypeCompatible(label, value)) REPORT_ERROR("Runtime error: wrong type given to MetaDataTable::getValue for label " + EMDL::label2Str(label)); #endif const long off = label2offset[label]; if (off > -1) { if (objectID < 0) { objectID = current_objectID; } else checkObjectID(objectID, "MetaDataTable::getValue"); objects[objectID]->getValue(off, value); return true; } else { return false; } } template bool MetaDataTable::setValue(EMDLabel label, const T &value, long int objectID) { if (label < 0 || label >= EMDL_LAST_LABEL) return false; if (label == EMDL_UNKNOWN_LABEL) REPORT_ERROR("MetaDataTable::setValue does not support unknown label."); #ifdef METADATA_TABLE_TYPE_CHECK if (!isTypeCompatible(label, value)) REPORT_ERROR("Runtime error: wrong type given to MetaDataTable::setValue for label " + EMDL::label2Str(label)); #endif long off = label2offset[label]; if (off < 0) { addLabel(label); off = label2offset[label]; } if (objectID < 0) { objectID = current_objectID; } else checkObjectID(objectID, "MetaDataTable::setValue"); if (off > -1) { objects[objectID]->setValue(off, value); return true; } else { return false; } } #endif relion-3.1.3/src/micrograph_model.cpp000066400000000000000000000402211411340063500176100ustar00rootroot00000000000000/*************************************************************************** * * Author: "Sjors H.W. Scheres", "Takanori Nakane" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #include "src/micrograph_model.h" #include "src/metadata_table.h" #include "src/image.h" #include "src/motioncorr_runner.h" #include "src/renderEER.h" // TODO: Think about first frame for local model const RFLOAT Micrograph::NOT_OBSERVED = -9999; const int ThirdOrderPolynomialModel::NUM_COEFFS_PER_DIM = 18; int ThirdOrderPolynomialModel::getShiftAt(RFLOAT z, RFLOAT x, RFLOAT y, RFLOAT &shiftx, RFLOAT &shifty) const { const RFLOAT x2 = x * x, y2 = y * y, xy = x * y, z2 = z * z; const RFLOAT z3 = z2 * z; shiftx = (coeffX(0) * z + coeffX(1) * z2 + coeffX(2) * z3) + (coeffX(3) * z + coeffX(4) * z2 + coeffX(5) * z3) * x + (coeffX(6) * z + coeffX(7) * z2 + coeffX(8) * z3) * x2 + (coeffX(9) * z + coeffX(10) * z2 + coeffX(11) * z3) * y + (coeffX(12) * z + coeffX(13) * z2 + coeffX(14) * z3) * y2 + (coeffX(15) * z + coeffX(16) * z2 + coeffX(17) * z3) * xy; shifty = (coeffY(0) * z + coeffY(1) * z2 + coeffY(2) * z3) + (coeffY(3) * z + coeffY(4) * z2 + coeffY(5) * z3) * x + (coeffY(6) * z + coeffY(7) * z2 + coeffY(8) * z3) * x2 + (coeffY(9) * z + coeffY(10) * z2 + coeffY(11) * z3) * y + (coeffY(12) * z + coeffY(13) * z2 + coeffY(14) * z3) * y2 + (coeffY(15) * z + coeffY(16) * z2 + coeffY(17) * z3) * xy; return 0; } MotionModel* ThirdOrderPolynomialModel::clone() const { return (MotionModel*) new ThirdOrderPolynomialModel(*this); } void ThirdOrderPolynomialModel::write(std::ostream &fh, std::string block_name) { MetaDataTable MD; MD.setName(block_name); int coeff_idx = 0; // Write coeffX for (int i = 0; i < NUM_COEFFS_PER_DIM; i++) { MD.addObject(); MD.setValue(EMDL_MICROGRAPH_MOTION_COEFFS_IDX, coeff_idx); MD.setValue(EMDL_MICROGRAPH_MOTION_COEFF, coeffX(i)); coeff_idx++; } // Write coeffY for (int i = 0; i < NUM_COEFFS_PER_DIM; i++) { MD.addObject(); MD.setValue(EMDL_MICROGRAPH_MOTION_COEFFS_IDX, coeff_idx); MD.setValue(EMDL_MICROGRAPH_MOTION_COEFF, coeffY(i)); coeff_idx++; } MD.write(fh); } void ThirdOrderPolynomialModel::read(std::ifstream &fh, std::string block_name) { MetaDataTable MD; MD.readStar(fh, block_name); const int NUM_COEFFS = NUM_COEFFS_PER_DIM * 2; int num_read = 0; coeffX.resize(NUM_COEFFS_PER_DIM); coeffX.initZeros(); coeffY.resize(NUM_COEFFS_PER_DIM); coeffY.initZeros(); FOR_ALL_OBJECTS_IN_METADATA_TABLE(MD) { int idx; RFLOAT val; if (!MD.getValue(EMDL_MICROGRAPH_MOTION_COEFFS_IDX, idx) || !MD.getValue(EMDL_MICROGRAPH_MOTION_COEFF, val)) { REPORT_ERROR("ThirdOrderPolynomialModel coefficients table: missing index or coefficients"); } if (idx >= 0 && idx < NUM_COEFFS_PER_DIM) { coeffX(idx) = val; } else if (idx >= NUM_COEFFS_PER_DIM && idx < NUM_COEFFS) { coeffY(idx - NUM_COEFFS_PER_DIM) = val; } else { REPORT_ERROR("ThirdOrderPolynomialModel coefficients table: wrong index"); } num_read++; } if (num_read != NUM_COEFFS) { REPORT_ERROR("ThirdOrderPolynomialModel coefficients table: incomplete values"); } } Micrograph::Micrograph() : ready(false), model(NULL) { clearFields(); } Micrograph::Micrograph(const Micrograph& m) : ready(m.ready), model((m.model != NULL)? m.model->clone() : NULL) { copyFieldsFrom(m); } Micrograph::Micrograph(FileName filename, FileName fnGain, RFLOAT binning, int eer_upsampling, int eer_grouping) : ready(false), model(NULL) { clearFields(); if (filename.getExtension() == "star" && fnGain == "") { if (eer_upsampling > 0 || eer_grouping > 0) REPORT_ERROR("Micrograph::Micrograph: When reading STAR file, you shouldn't specify eer_upsampling and eer_grouping. They are read from the STAR file."); read(filename); } else { this->eer_upsampling = eer_upsampling; this->eer_grouping = eer_grouping; setMovie(filename, fnGain, binning); } ready = true; } Micrograph::~Micrograph() { if (model != NULL) delete model; } Micrograph& Micrograph::operator = (const Micrograph& m) { ready = m.ready; if (model != NULL) delete model; model = (m.model != NULL)? m.model->clone() : NULL; copyFieldsFrom(m); return *this; } void Micrograph::write(FileName filename) { checkReadyFlag("write"); std::ofstream fh; MetaDataTable MD; fh.open(filename.c_str()); if (!fh) { REPORT_ERROR((std::string)"Micrograph::write: Cannot write file: " + filename); } MD.setName("general"); MD.setIsList(true); MD.addObject(); MD.setValue(EMDL_IMAGE_SIZE_X, width); MD.setValue(EMDL_IMAGE_SIZE_Y, height); MD.setValue(EMDL_IMAGE_SIZE_Z, n_frames); MD.setValue(EMDL_MICROGRAPH_MOVIE_NAME, fnMovie); if (fnGain != "") MD.setValue(EMDL_MICROGRAPH_GAIN_NAME, fnGain); if (fnDefect != "") MD.setValue(EMDL_MICROGRAPH_DEFECT_FILE, fnDefect); MD.setValue(EMDL_MICROGRAPH_BINNING, binning); if (angpix != -1) MD.setValue(EMDL_MICROGRAPH_ORIGINAL_PIXEL_SIZE, angpix); if (dose_per_frame != -1) MD.setValue(EMDL_MICROGRAPH_DOSE_RATE, dose_per_frame); if (pre_exposure != -1) MD.setValue(EMDL_MICROGRAPH_PRE_EXPOSURE, pre_exposure); if (voltage != -1) MD.setValue(EMDL_CTF_VOLTAGE, voltage); MD.setValue(EMDL_MICROGRAPH_START_FRAME, first_frame); // 1-indexed if (EERRenderer::isEER(fnMovie)) { if (eer_upsampling > 0) MD.setValue(EMDL_MICROGRAPH_EER_UPSAMPLING, this->eer_upsampling); if (eer_grouping > 0) MD.setValue(EMDL_MICROGRAPH_EER_GROUPING, this->eer_grouping); } if (model != NULL) MD.setValue(EMDL_MICROGRAPH_MOTION_MODEL_VERSION, model->getModelVersion()); else MD.setValue(EMDL_MICROGRAPH_MOTION_MODEL_VERSION, (int)MOTION_MODEL_NULL); MD.write(fh); MD.clear(); MD.setName("global_shift"); for (int frame = 0; frame < n_frames; frame++) { MD.addObject(); MD.setValue(EMDL_MICROGRAPH_FRAME_NUMBER, frame + 1); // make 1-indexed MD.setValue(EMDL_MICROGRAPH_SHIFT_X, globalShiftX[frame]); MD.setValue(EMDL_MICROGRAPH_SHIFT_Y, globalShiftY[frame]); } MD.write(fh); if (model != NULL) { std::string block_name = "local_motion_model"; model->write(fh, block_name); } MD.clear(); MD.setName("hot_pixels"); if (hotpixelX.size() != hotpixelY.size()) REPORT_ERROR("Logic error: hotpixelX.size() != hotpixelY.size()"); for (int i = 0, ilim = hotpixelX.size(); i < ilim; i++) { MD.addObject(); MD.setValue(EMDL_IMAGE_COORD_X, (RFLOAT)hotpixelX[i]); MD.setValue(EMDL_IMAGE_COORD_Y, (RFLOAT)hotpixelY[i]); } MD.write(fh); MD.clear(); MD.setName("local_shift"); int n_local_trajectory = localShiftX.size(); if (n_local_trajectory != localShiftY.size() || n_local_trajectory != patchX.size() || n_local_trajectory != patchY.size() || n_local_trajectory != patchZ.size()) REPORT_ERROR("Logic error: inconsistent local trajectory"); for (int i = 0; i < n_local_trajectory; i++) { MD.addObject(); MD.setValue(EMDL_MICROGRAPH_FRAME_NUMBER, (int)patchZ[i]); MD.setValue(EMDL_IMAGE_COORD_X, patchX[i]); MD.setValue(EMDL_IMAGE_COORD_Y, patchY[i]); MD.setValue(EMDL_MICROGRAPH_SHIFT_X, localShiftX[i]); MD.setValue(EMDL_MICROGRAPH_SHIFT_Y, localShiftY[i]); } MD.write(fh); fh.close(); } FileName Micrograph::getGainFilename() const { return fnGain; } RFLOAT Micrograph::getBinningFactor() const { return binning; } FileName Micrograph::getMovieFilename() const { return fnMovie; } int Micrograph::getWidth() const { return width; } int Micrograph::getHeight() const { return height; } int Micrograph::getNframes() const { return n_frames; } int Micrograph::getEERUpsampling() const { return eer_upsampling; } int Micrograph::getEERGrouping() const { return eer_grouping; } void Micrograph::fillDefectAndHotpixels(MultidimArray &mask) const { checkReadyFlag("getShiftAt"); mask.initZeros(height, width); bool fix_defect = (fnDefect != ""); if (fnDefect.getExtension() == "txt" && MotioncorrRunner::detectSerialEMDefectText(fnDefect)) { std::cerr << "WARNING: The defect file specified in the micrograph metadata STAR file seems to be a SerialEM's defect file and not in the MotionCor2's format (x y w h). The defect file is ignored." << std::endl; fix_defect = false; } if (fix_defect) MotioncorrRunner::fillDefectMask(mask, fnDefect); if (hotpixelX.size() != hotpixelY.size()) REPORT_ERROR("Logic error: hotpixelX.size() != hotpixelY.size()"); for (int i = 0, ilim = hotpixelX.size(); i < ilim; i++) { DIRECT_A2D_ELEM(mask, hotpixelY[i], hotpixelX[i]) = true; } } int Micrograph::getShiftAt(RFLOAT frame, RFLOAT x, RFLOAT y, RFLOAT &shiftx, RFLOAT &shifty, bool use_local, bool normalise) const { checkReadyFlag("getShiftAt"); if (normalise) { x = x / width - 0.5; y = y / height - 0.5; } if (globalShiftX[frame - 1] == NOT_OBSERVED || globalShiftX[frame - 1] == NOT_OBSERVED) { // Find the shift of the closest observed frame. // If the given 'frame' is unobserved due to initial frame truncation (--first_frame), // the output becomes zero. This is OK because the shift of the first observed frame // is zero by definition. So we don't have to search after the 'frame'. shiftx = shifty = 0; for (int i = frame - 1; i >= 0; i--) { if (globalShiftX[i] != NOT_OBSERVED && globalShiftY[i] != NOT_OBSERVED) { shiftx = globalShiftX[i]; shifty = globalShiftY[i]; break; } } return -1; } if (model != NULL && use_local) { // both frame and first_frame is 1 indexed model->getShiftAt(frame - first_frame, x, y, shiftx, shifty); } else { shiftx = 0; shifty = 0; } // frame is 1-indexed! shiftx += globalShiftX[frame - 1]; shifty += globalShiftY[frame - 1]; return 0; } void Micrograph::setGlobalShift(int frame, RFLOAT shiftx, RFLOAT shifty) { checkReadyFlag("setGlobalShift"); if (frame <= 0 || frame > n_frames) { std::cout << "Frame: " << frame << " n_frames: " << n_frames << std::endl; REPORT_ERROR("Micrograph::setGlobalShift() frame out of range"); } frame--; // frame is 1-indexed globalShiftX[frame] = shiftx; globalShiftY[frame] = shifty; } void Micrograph::read(FileName fn_in, bool read_hotpixels) { if (model != NULL) { delete model; model = NULL; } // Clear current model clearFields(); // Open input file std::ifstream in(fn_in.data(), std::ios_base::in); if (in.fail()) { REPORT_ERROR("MicrographModel::read: File " + fn_in + " cannot be read."); } MetaDataTable MDglobal, MDhot; // Read Image metadata MDglobal.readStar(in, "general"); if (!MDglobal.getValue(EMDL_IMAGE_SIZE_X, width) || !MDglobal.getValue(EMDL_IMAGE_SIZE_Y, height) || !MDglobal.getValue(EMDL_IMAGE_SIZE_Z, n_frames) || !MDglobal.getValue(EMDL_MICROGRAPH_MOVIE_NAME, fnMovie)) { REPORT_ERROR("MicrographModel::read: insufficient general information in " + fn_in); } globalShiftX.resize(n_frames, NOT_OBSERVED); globalShiftY.resize(n_frames, NOT_OBSERVED); if (!MDglobal.getValue(EMDL_MICROGRAPH_GAIN_NAME, fnGain)) fnGain = ""; if (!MDglobal.getValue(EMDL_MICROGRAPH_DEFECT_FILE, fnDefect)) fnDefect = ""; if (!MDglobal.getValue(EMDL_MICROGRAPH_BINNING, binning)) binning = 1.0; if (!MDglobal.getValue(EMDL_MICROGRAPH_ORIGINAL_PIXEL_SIZE, angpix)) angpix = -1; if (!MDglobal.getValue(EMDL_MICROGRAPH_PRE_EXPOSURE, pre_exposure)) pre_exposure = -1; if (!MDglobal.getValue(EMDL_MICROGRAPH_DOSE_RATE, dose_per_frame)) dose_per_frame = -1; if (!MDglobal.getValue(EMDL_CTF_VOLTAGE, voltage)) voltage = -1; if (!MDglobal.getValue(EMDL_MICROGRAPH_START_FRAME, first_frame)) first_frame = 1; // 1-indexed if (EERRenderer::isEER(fnMovie)) { if (!MDglobal.getValue(EMDL_MICROGRAPH_EER_UPSAMPLING, eer_upsampling)) eer_upsampling = -1; if (!MDglobal.getValue(EMDL_MICROGRAPH_EER_GROUPING, eer_grouping)) eer_grouping = -1; } int model_version; model = NULL; if (MDglobal.getValue(EMDL_MICROGRAPH_MOTION_MODEL_VERSION, model_version)) { if (model_version == MOTION_MODEL_THIRD_ORDER_POLYNOMIAL) { model = new ThirdOrderPolynomialModel(); } else if (model_version == (int)MOTION_MODEL_NULL) { model = NULL; } else { std::cerr << "Warning: Ignoring unknown motion model " << model_version << std::endl; } } else { std::cerr << "Warning: local motion model is absent in the micrograph star file." << std::endl; } if (model != NULL) { model->read(in, "local_motion_model"); } // Read global shifts int frame; RFLOAT shiftX, shiftY; MDglobal.readStar(in, "global_shift"); FOR_ALL_OBJECTS_IN_METADATA_TABLE(MDglobal) { if (!MDglobal.getValue(EMDL_MICROGRAPH_FRAME_NUMBER, frame) || !MDglobal.getValue(EMDL_MICROGRAPH_SHIFT_X, shiftX) || !MDglobal.getValue(EMDL_MICROGRAPH_SHIFT_Y, shiftY)) { REPORT_ERROR("MicrographModel::read: incorrect global_shift table in " + fn_in); } // frame is 1-indexed! globalShiftX[frame - 1] = shiftX; globalShiftY[frame - 1] = shiftY; } if (read_hotpixels) { MDhot.readStar(in, "hot_pixels"); RFLOAT x, y; FOR_ALL_OBJECTS_IN_METADATA_TABLE(MDhot) { if (!MDhot.getValue(EMDL_IMAGE_COORD_X, x) || !MDhot.getValue(EMDL_IMAGE_COORD_Y, y)) REPORT_ERROR("MicrographModel::read: incorrect hot_pixels table in " + fn_in); hotpixelX.push_back((int)x); hotpixelY.push_back((int)y); } } } void Micrograph::setMovie(FileName fnMovie, FileName fnGain, RFLOAT binning) { if (EERRenderer::isEER(fnMovie)) { EERRenderer renderer; renderer.read(fnMovie, eer_upsampling); width = renderer.getWidth(); height = renderer.getHeight(); n_frames = renderer.getNFrames() / eer_grouping; } else { Image Ihead; Ihead.read(fnMovie, false, -1, false, true); // select_img -1, mmap false, is_2D true width = XSIZE(Ihead()); height = YSIZE(Ihead()); n_frames = NSIZE(Ihead()); } this->binning = binning; globalShiftX.resize(n_frames, NOT_OBSERVED); globalShiftY.resize(n_frames, NOT_OBSERVED); this->fnMovie = fnMovie; this->fnGain = fnGain; } void Micrograph::clearFields() { width = 0; height = 0; n_frames = 0; first_frame = 0; binning = 1; angpix = -1; voltage = -1; dose_per_frame = -1; pre_exposure = -1; eer_upsampling = -1; eer_grouping = -1; fnMovie = ""; fnGain = ""; fnDefect = ""; hotpixelX.resize(0); hotpixelY.resize(0); globalShiftX.resize(0); globalShiftY.resize(0); localShiftX.resize(0); localShiftY.resize(0); localFitX.resize(0); localFitY.resize(0); patchX.resize(0); patchY.resize(0); patchZ.resize(0); patchW.resize(0); patchH.resize(0); } void Micrograph::copyFieldsFrom(const Micrograph& m) { width = m.width; height = m.height; n_frames = m.n_frames; first_frame = m.first_frame; binning = m.binning; angpix = m.angpix; voltage = m.voltage; dose_per_frame = m.dose_per_frame; pre_exposure = m.pre_exposure; eer_upsampling = m.eer_upsampling; eer_grouping = m.eer_grouping; fnMovie = m.fnMovie; fnGain = m.fnGain; fnDefect = m.fnDefect; hotpixelX = m.hotpixelX; hotpixelY = m.hotpixelY; globalShiftX = m.globalShiftX; globalShiftY = m.globalShiftY; localShiftX = m.localShiftX; localShiftY = m.localShiftY; localFitX = m.localFitX; localFitY = m.localFitY; patchX = m.patchX; patchY = m.patchY; patchZ = m.patchZ; patchW = m.patchW; patchH = m.patchH; } void Micrograph::checkReadyFlag(std::string origin) const { if (!ready) { REPORT_ERROR("Micrograph::"+origin+": instance not initialized.\n"); } } relion-3.1.3/src/micrograph_model.h000066400000000000000000000114761411340063500172670ustar00rootroot00000000000000/*************************************************************************** * * Author: "Sjors H.W. Scheres", "Takanori Nakane" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #ifndef MICROGRAPH_MODEL_H_ #define MICROGRAPH_MODEL_H_ #include #include "src/filename.h" #include "src/matrix1d.h" #include "src/multidim_array.h" enum MotionModelVersion { MOTION_MODEL_NULL = 0, MOTION_MODEL_THIRD_ORDER_POLYNOMIAL = 1, }; class MotionModel { public: virtual ~MotionModel(){} // Fit model based on observations virtual void fit() = 0; virtual void read(std::ifstream &fh, std::string block_name) = 0; virtual void write(std::ostream &fh, std::string block_name) = 0; virtual int getModelVersion() const = 0; // Get motion at frame and (x, y); // NB: differences from Micrograph::getShiftAt! // - frame is 0-indexed // - (x, y) are normalised pixels (i.e. unbinned_pixel_x / width - 0.5) virtual int getShiftAt(RFLOAT frame, RFLOAT x, RFLOAT y, RFLOAT &shiftx, RFLOAT &shifty) const = 0; virtual MotionModel* clone() const = 0; }; class ThirdOrderPolynomialModel: public MotionModel { public: static const int NUM_COEFFS_PER_DIM; Matrix1D coeffX, coeffY; void fit() { REPORT_ERROR("Not implemented yet."); } void read(std::ifstream &fh, std::string block_name); void write(std::ostream &fh, std::string block_name); int getModelVersion() const { return MOTION_MODEL_THIRD_ORDER_POLYNOMIAL; } int getShiftAt(RFLOAT frame, RFLOAT x, RFLOAT y, RFLOAT &shiftx, RFLOAT &shifty) const; MotionModel* clone() const; }; class Micrograph { public: // When you add a new field, don't forget to update the copy constructor! bool ready; static const RFLOAT NOT_OBSERVED; RFLOAT angpix, voltage, dose_per_frame, pre_exposure; FileName fnDefect; int first_frame; // First frame for local motion model. 1-indexed. MotionModel *model; // Local trajectories (not read from STAR files) std::vector localShiftX, localShiftY, localFitX, localFitY, patchX, patchY, patchZ, patchW, patchH; std::vector hotpixelX, hotpixelY; // Default constructor Micrograph(); // Copy-constructor Micrograph(const Micrograph& m); // Create from a movie or a STAR file Micrograph(FileName filename, FileName fnGain="", RFLOAT binning=1.0, int eer_upsampling=-1, int eer_grouping=-1); ~Micrograph(); Micrograph& operator = (const Micrograph& m); // Write micrograph model from a STAR file void write(FileName filename); // Get gain reference file name FileName getGainFilename() const; // Get binning factor RFLOAT getBinningFactor() const; // Get original movie name FileName getMovieFilename() const; int getWidth() const; int getHeight() const; int getNframes() const; // Get shift vector at (x, y, frame) // frame is 1-indexed // (x, y) are normalised coordinate (i.e. pixel_x / width, pixel_y / height) when normalise=false (default) // unbinned pixels in the original movie when normalise=true // (shiftx, shifty) are always UNBINNED pixels in the original movie. // Returns non zero if failed (e.g. not observed). int getShiftAt(RFLOAT frame, RFLOAT x, RFLOAT y, RFLOAT &shiftx, RFLOAT &shifty, bool use_local=true, bool normalise=false) const; // Set global shift for frame; frame is 1-indexed // (shiftx, shifty) is UNBINNED pixels in the original movie void setGlobalShift(int frame, RFLOAT shiftx, RFLOAT shifty); // Fills a pixel mask where defect and hot pixels are true void fillDefectAndHotpixels(MultidimArray &mask) const; int getEERUpsampling() const; int getEERGrouping() const; private: int width, height, n_frames; RFLOAT binning; FileName fnGain; FileName fnMovie; int eer_upsampling, eer_grouping; std::vector globalShiftX, globalShiftY; // Read micrograph model from a STAR file void read(FileName filename, bool read_hotpixels=true); // Set target movie file void setMovie(FileName fnMovie, FileName fnGain="", RFLOAT binning=1.0); void clear(); void clearFields(); void copyFieldsFrom(const Micrograph& m); void checkReadyFlag(std::string origin) const; }; #endif /* MICROGRAPH_MODEL_H_ */ relion-3.1.3/src/ml_model.cpp000066400000000000000000002074241411340063500160770ustar00rootroot00000000000000/*************************************************************************** * * Author: "Sjors H.W. Scheres" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #include "src/ml_model.h" #ifdef MDL_TIMING Timer mdl_timer; int TIMING_MDL_1 = proj_timer.setNew("MDL_1"); #define TIMING_TOC(id) mdl_timer.toc(id) #else #define TIMING_TIC(id) #define TIMING_TOC(id) #endif void MlModel::initialise(bool _do_sgd) { // Auxiliary vector with relevant size in Fourier space MultidimArray aux; aux.initZeros(ori_size / 2 + 1); // Now resize all relevant vectors Iref.resize(nr_classes * nr_bodies); masks_bodies.resize(nr_bodies); com_bodies.resize(nr_bodies); rotate_direction_bodies.resize(nr_bodies); orient_bodies.resize(nr_bodies); sigma_tilt_bodies.resize(nr_bodies, 0.); sigma_psi_bodies.resize(nr_bodies, 0.); sigma_offset_bodies.resize(nr_bodies, 0.); keep_fixed_bodies.resize(nr_bodies, 0); pointer_body_overlap.resize(nr_bodies, nr_bodies); max_radius_mask_bodies.resize(nr_bodies, -1); pdf_class.resize(nr_classes, 1./(RFLOAT)nr_classes); pdf_direction.resize(nr_classes * nr_bodies); group_names.resize(nr_groups, ""); sigma2_noise.resize(nr_groups); nr_particles_per_group.resize(nr_groups); tau2_class.resize(nr_classes * nr_bodies, aux); fsc_halves_class.resize(nr_classes * nr_bodies, aux); sigma2_class.resize(nr_classes * nr_bodies, aux); data_vs_prior_class.resize(nr_classes * nr_bodies, aux); fourier_coverage_class.resize(nr_classes * nr_bodies, aux); // TODO handle these two correctly. bfactor_correction.resize(nr_groups, 0.); scale_correction.resize(nr_groups, 1.); acc_rot.resize(nr_classes * nr_bodies, 0); acc_trans.resize(nr_classes * nr_bodies, 0); estimated_resolution.resize(nr_classes * nr_bodies, 0); total_fourier_coverage.resize(nr_classes * nr_bodies, 0); helical_twist.resize(nr_classes, 0); helical_rise.resize(nr_classes, 0); if (ref_dim==2) { Matrix1D empty(2); prior_offset_class.resize(nr_classes * nr_bodies, empty); } // These arrays will be resized when they are filled orientability_contrib.resize(nr_classes * nr_bodies); Projector ref(ori_size, interpolator, padding_factor, r_min_nn, data_dim); PPref.clear(); PPrefRank.clear(); // Now fill the entire vector with instances of "ref" if(nr_classes != 1 && nr_bodies !=1) REPORT_ERROR("MlModel::initialise() - nr_bodies or nr_classes must be 1"); PPref.resize(nr_classes * nr_bodies, ref); do_sgd = _do_sgd; if (do_sgd) Igrad.resize(nr_classes); } // Reading from a file void MlModel::read(FileName fn_in) { // Clear current model clear(); // Open input file std::ifstream in(fn_in.data(), std::ios_base::in); if (in.fail()) REPORT_ERROR( (std::string) "MlModel::readStar: File " + fn_in + " cannot be read." ); MetaDataTable MDclass, MDgroup, MDlog, MDsigma, MDbodies; // Read general stuff MDlog.readStar(in, "model_general"); if (!MDlog.getValue(EMDL_MLMODEL_DIMENSIONALITY, ref_dim) || !MDlog.getValue(EMDL_MLMODEL_ORIGINAL_SIZE, ori_size) || !MDlog.getValue(EMDL_MLMODEL_CURRENT_RESOLUTION, current_resolution) || !MDlog.getValue(EMDL_MLMODEL_CURRENT_SIZE, current_size) || !MDlog.getValue(EMDL_MLMODEL_PADDING_FACTOR, padding_factor) || !MDlog.getValue(EMDL_MLMODEL_INTERPOLATOR, interpolator) || !MDlog.getValue(EMDL_MLMODEL_MINIMUM_RADIUS_NN_INTERPOLATION, r_min_nn) || !MDlog.getValue(EMDL_MLMODEL_PIXEL_SIZE, pixel_size) || !MDlog.getValue(EMDL_MLMODEL_NR_CLASSES, nr_classes) || !MDlog.getValue(EMDL_MLMODEL_NR_GROUPS, nr_groups) || !MDlog.getValue(EMDL_MLMODEL_TAU2_FUDGE_FACTOR, tau2_fudge_factor) || !MDlog.getValue(EMDL_MLMODEL_NORM_CORRECTION_AVG, avg_norm_correction) || !MDlog.getValue(EMDL_MLMODEL_PRIOR_MODE, orientational_prior_mode) || !MDlog.getValue(EMDL_MLMODEL_SIGMA_ROT, sigma2_rot) || !MDlog.getValue(EMDL_MLMODEL_SIGMA_TILT, sigma2_tilt) || !MDlog.getValue(EMDL_MLMODEL_SIGMA_PSI, sigma2_psi) || !MDlog.getValue(EMDL_MLMODEL_LL, LL) || !MDlog.getValue(EMDL_MLMODEL_AVE_PMAX, ave_Pmax) ) REPORT_ERROR("MlModel::readStar: incorrect model_general table"); if (!MDlog.getValue(EMDL_MLMODEL_SIGMA_OFFSET_ANGSTROM, sigma2_offset)) { if (MDlog.getValue(EMDL_MLMODEL_SIGMA_OFFSET, sigma2_offset)) { sigma2_offset *= pixel_size; } else { REPORT_ERROR("MlModel::readStar: incorrect model_general table: cannot find sigma_offset"); } } // Retain compability with model files written by Relion prior to 1.4 if (!MDlog.getValue(EMDL_MLMODEL_DIMENSIONALITY_DATA, data_dim)) data_dim = 2; if (!MDlog.getValue(EMDL_MLMODEL_NR_BODIES, nr_bodies)) nr_bodies = 1; if (!MDlog.getValue(EMDL_MLMODEL_IS_HELIX, is_helix)) is_helix = false; if (is_helix) { if (nr_bodies != 1) REPORT_ERROR("MlModel::readStar: incorrect nr_bodies for helix"); } if (!MDlog.getValue(EMDL_MLMODEL_HELICAL_NR_ASU, helical_nr_asu)) helical_nr_asu = 1; if (!MDlog.getValue(EMDL_MLMODEL_HELICAL_TWIST_MIN, helical_twist_min)) helical_twist_min = 0.; if (!MDlog.getValue(EMDL_MLMODEL_HELICAL_TWIST_MAX, helical_twist_max)) helical_twist_max = 0.; if (!MDlog.getValue(EMDL_MLMODEL_HELICAL_TWIST_INITIAL_STEP, helical_twist_inistep)) helical_twist_inistep = 0.; if (!MDlog.getValue(EMDL_MLMODEL_HELICAL_RISE_MIN, helical_rise_min)) helical_rise_min = 0.; if (!MDlog.getValue(EMDL_MLMODEL_HELICAL_RISE_MAX, helical_rise_max)) helical_rise_max = 0.; if (!MDlog.getValue(EMDL_MLMODEL_HELICAL_RISE_INITIAL_STEP, helical_rise_inistep)) helical_rise_inistep = 0.; // Treat classes or bodies (for multi-body refinement) in the same way... int nr_classes_bodies = (nr_bodies > 1) ? nr_bodies : nr_classes; if (nr_classes > 1 && nr_bodies > 1) REPORT_ERROR("MlModel::readStar: nr_classes and nr_bodies cannot be both larger than one."); // Take inverse again of current resolution: current_resolution = 1. / current_resolution; sigma2_offset *= sigma2_offset; sigma2_rot *= sigma2_rot; sigma2_tilt *= sigma2_tilt; sigma2_psi *= sigma2_psi; // Resize vectors initialise(); // Read classes FileName fn_tmp, fn_tmp2; Image img; if (nr_bodies > 1) MDclass.readStar(in, "model_bodies"); else MDclass.readStar(in, "model_classes"); int iclass = 0; do_sgd = false; FOR_ALL_OBJECTS_IN_METADATA_TABLE(MDclass) { if (!MDclass.getValue(EMDL_MLMODEL_ACCURACY_TRANS_ANGSTROM, acc_trans[iclass])) { if (MDclass.getValue(EMDL_MLMODEL_ACCURACY_TRANS, acc_trans[iclass])) { acc_trans[iclass] *= pixel_size; } else { REPORT_ERROR("MlModel::readStar: incorrect model_classes/bodies table: no acc_trans"); } } if (!MDclass.getValue(EMDL_MLMODEL_REF_IMAGE, fn_tmp) || !MDclass.getValue(EMDL_MLMODEL_ACCURACY_ROT, acc_rot[iclass]) ) REPORT_ERROR("MlModel::readStar: incorrect model_classes/bodies table: no ref_image or acc_rot"); // backwards compatible if (!MDclass.getValue(EMDL_MLMODEL_ESTIM_RESOL_REF, estimated_resolution[iclass])) estimated_resolution[iclass] = 0.; if (!MDclass.getValue(EMDL_MLMODEL_FOURIER_COVERAGE_TOTAL_REF, total_fourier_coverage[iclass])) total_fourier_coverage[iclass] = 0.; if (ref_dim==2) if (!MDclass.getValue(EMDL_MLMODEL_PRIOR_OFFX_CLASS, XX(prior_offset_class[iclass])) || !MDclass.getValue(EMDL_MLMODEL_PRIOR_OFFY_CLASS, YY(prior_offset_class[iclass])) ) REPORT_ERROR("MlModel::readStar: incorrect model_classes/bodies table: no offset priors for 2D classes"); if (iclass == 0 || nr_bodies == 1) // there is only one pdf_class for multibody, but multiple for classification! if (!MDclass.getValue(EMDL_MLMODEL_PDF_CLASS, pdf_class[iclass]) ) REPORT_ERROR("MlModel::readStar: incorrect model_classes table: no pdf_class"); if (is_helix) { if (!MDclass.getValue(EMDL_MLMODEL_HELICAL_RISE, helical_rise[iclass]) || !MDclass.getValue(EMDL_MLMODEL_HELICAL_TWIST, helical_twist[iclass]) ) REPORT_ERROR("MlModel::readStar: incorrect helical parameters"); } if (nr_bodies > 1) { if (MDclass.containsLabel(EMDL_BODY_KEEP_FIXED)) MDclass.getValue(EMDL_BODY_KEEP_FIXED, keep_fixed_bodies[iclass]); else keep_fixed_bodies[iclass] = 0; } // Read in actual reference image img.read(fn_tmp); img().setXmippOrigin(); Iref[iclass] = img(); // Check to see whether there is a SGD-gradient entry as well if (MDclass.getValue(EMDL_MLMODEL_SGD_GRADIENT_IMAGE, fn_tmp)) { do_sgd=true; if (iclass == 0) Igrad.resize(nr_classes); img.read(fn_tmp); Igrad[iclass] = img(); } iclass++; } // Read group stuff MDgroup.readStar(in, "model_groups"); long int igroup, optics_group; FOR_ALL_OBJECTS_IN_METADATA_TABLE(MDgroup) { if (!MDgroup.getValue(EMDL_MLMODEL_GROUP_NO, igroup)) { REPORT_ERROR("MlModel::readStar: incorrect model_groups table"); } //Start counting of groups at 1, not at 0.... if (!MDgroup.getValue(EMDL_MLMODEL_GROUP_SCALE_CORRECTION, scale_correction[igroup - 1]) || !MDgroup.getValue(EMDL_MLMODEL_GROUP_NR_PARTICLES, nr_particles_per_group[igroup - 1]) || !MDgroup.getValue(EMDL_MLMODEL_GROUP_NAME, group_names[igroup-1])) REPORT_ERROR("MlModel::readStar: incorrect model_groups table"); } // Read SSNR, noise reduction, tau2_class spectra for each class for (int iclass = 0; iclass < nr_classes_bodies; iclass++) { if (nr_bodies > 1) MDsigma.readStar(in, "model_body_" + integerToString(iclass + 1)); else MDsigma.readStar(in, "model_class_" + integerToString(iclass + 1)); int idx; FOR_ALL_OBJECTS_IN_METADATA_TABLE(MDsigma) { if (!MDsigma.getValue(EMDL_SPECTRAL_IDX, idx)) REPORT_ERROR("MlModel::readStar: incorrect table model_class/body_"+integerToString(iclass + 1)); if (!MDsigma.getValue(EMDL_MLMODEL_DATA_VS_PRIOR_REF, data_vs_prior_class[iclass](idx)) || !MDsigma.getValue(EMDL_MLMODEL_TAU2_REF, tau2_class[iclass](idx)) || !MDsigma.getValue(EMDL_MLMODEL_FSC_HALVES_REF, fsc_halves_class[iclass](idx)) || !MDsigma.getValue(EMDL_MLMODEL_SIGMA2_REF, sigma2_class[iclass](idx))) REPORT_ERROR("MlModel::readStar: incorrect table model_class/body_"+integerToString(iclass + 1)); // backwards compatible with STAR files without Fourier coverage if (!MDsigma.getValue(EMDL_MLMODEL_FOURIER_COVERAGE_REF, fourier_coverage_class[iclass](idx))) fourier_coverage_class[iclass](idx) = 0.; } } // Read sigma models for each group for (int igroup = 0; igroup < nr_groups; igroup++) { // Allow sigma2_noise with different sizes! sigma2_noise[igroup].resize(ori_size/2 + 1); if (nr_particles_per_group[igroup] > 0) { MDsigma.readStar(in, "model_group_" + integerToString(igroup + 1)); int idx; FOR_ALL_OBJECTS_IN_METADATA_TABLE(MDsigma) { if (!MDsigma.getValue(EMDL_SPECTRAL_IDX, idx)) REPORT_ERROR("MlModel::readStar: incorrect table model_group_" + integerToString(igroup + 1)); if (!MDsigma.getValue(EMDL_MLMODEL_SIGMA2_NOISE, sigma2_noise[igroup](idx))) REPORT_ERROR("MlModel::readStar: incorrect table model_group_" + integerToString(igroup + 1)); } } else { FOR_ALL_DIRECT_ELEMENTS_IN_MULTIDIMARRAY(sigma2_noise[igroup]) { DIRECT_MULTIDIM_ELEM(sigma2_noise[igroup], n) = 0.; } } } // Read pdf_direction models for each class if (ref_dim == 3) { for (int iclass = 0; iclass < nr_classes_bodies; iclass++) { if (nr_bodies > 1) MDclass.readStar(in, "model_pdf_orient_body_" + integerToString(iclass + 1)); else MDclass.readStar(in, "model_pdf_orient_class_" + integerToString(iclass + 1)); pdf_direction[iclass].clear(); RFLOAT aux; std::vector vaux; vaux.clear(); FOR_ALL_OBJECTS_IN_METADATA_TABLE(MDclass) { if (!MDclass.getValue(EMDL_MLMODEL_PDF_ORIENT, aux)) REPORT_ERROR("MlModel::readStar: incorrect table model_pdf_orient_class_" + integerToString(iclass + 1)); vaux.push_back(aux); } pdf_direction[iclass].resize(vaux.size()); FOR_ALL_DIRECT_ELEMENTS_IN_ARRAY1D(pdf_direction[iclass]) { DIRECT_A1D_ELEM(pdf_direction[iclass], i) = vaux[i]; } nr_directions = vaux.size(); } } else { // For 2D case, just fill pdf_direction with ones. for (int iclass = 0; iclass < nr_classes_bodies; iclass++) { pdf_direction[iclass].clear(); pdf_direction[iclass].resize(1); DIRECT_A1D_ELEM(pdf_direction[iclass], 0) = 1.; } nr_directions = 1; } // Close file handler in.close(); } void MlModel::write(FileName fn_out, HealpixSampling &sampling, bool do_write_bild, bool only_write_images) { MetaDataTable MDclass, MDgroup, MDlog, MDsigma, MDbodies; FileName fn_tmp, fn_tmp2; RFLOAT aux; std::ofstream fh; // Treat classes or bodies (for multi-body refinement) in the same way... int nr_classes_bodies = (nr_bodies > 1) ? nr_bodies : nr_classes; // A. Write images if (ref_dim == 2) { Image img(XSIZE(Iref[0]), YSIZE(Iref[0]), 1, nr_classes_bodies); for (int iclass = 0; iclass < nr_classes_bodies; iclass++) { FOR_ALL_DIRECT_ELEMENTS_IN_ARRAY2D(Iref[iclass]) { DIRECT_NZYX_ELEM(img(), iclass, 0, i, j) = DIRECT_A2D_ELEM(Iref[iclass], i, j); } } img.setSamplingRateInHeader(pixel_size); if (nr_bodies > 1) img.write(fn_out + "_bodies.mrcs"); else img.write(fn_out + "_classes.mrcs"); if (do_sgd) { for (int iclass = 0; iclass < nr_classes; iclass++) { FOR_ALL_DIRECT_ELEMENTS_IN_ARRAY2D(Igrad[iclass]) { DIRECT_NZYX_ELEM(img(), iclass, 0, i, j) = DIRECT_A2D_ELEM(Igrad[iclass], i, j); } } img.write(fn_out + "_gradients.mrcs"); } } else { Image img; // Set correct voxel size in the header for (int iclass = 0; iclass < nr_classes_bodies; iclass++) { img() = Iref[iclass]; img.setSamplingRateInHeader(pixel_size); if (nr_bodies > 1) { fn_tmp.compose(fn_out+"_body", iclass+1, "mrc", 3); // apply the body mask for output to the user // No! That interferes with a clean continuation of multibody refinement, as ref will be masked 2x then! // img() *= masks_bodies[iclass]; } else fn_tmp.compose(fn_out+"_class", iclass+1, "mrc", 3); img.write(fn_tmp); } if (do_sgd) { for (int iclass = 0; iclass < nr_classes; iclass++) { fn_tmp.compose(fn_out+"_grad", iclass+1, "mrc", 3); img() = Igrad[iclass]; img.write(fn_tmp); } } if (do_write_bild) { // Also write out bild files with the orientational distribution of each class // Also write out angular distributions // Don't do this for bodies, only for classes! for (int iclass = 0; iclass < nr_classes_bodies; iclass++) { FileName fn_bild; if (nr_bodies > 1) fn_bild.compose(fn_out+"_body",iclass+1,"", 3); else fn_bild.compose(fn_out+"_class",iclass+1,"", 3); fn_bild += "_angdist.bild"; RFLOAT offset = ori_size * pixel_size / 2.; if (nr_bodies > 1) { // 14jul2017: rotations are all relative to (rot,tilt)=(0,90) to prevent problems with psi-prior around tilt=0! sampling.writeBildFileOrientationalDistribution(pdf_direction[iclass], fn_bild, offset, offset, &orient_bodies[iclass], &com_bodies[iclass]); } else { sampling.writeBildFileOrientationalDistribution(pdf_direction[iclass], fn_bild, offset, offset); } } } } if (only_write_images) return; // B. Write STAR file with metadata fn_tmp = fn_out + "_model.star"; fh.open((fn_tmp).c_str(), std::ios::out); if (!fh) REPORT_ERROR( (std::string)"MlModel::write: Cannot write file: " + fn_tmp); // Write the output STAR file MDlog.setIsList(true); MDlog.addObject(); MDlog.setName("model_general"); MDlog.setValue(EMDL_MLMODEL_DIMENSIONALITY, ref_dim); MDlog.setValue(EMDL_MLMODEL_DIMENSIONALITY_DATA, data_dim); MDlog.setValue(EMDL_MLMODEL_ORIGINAL_SIZE, ori_size); MDlog.setValue(EMDL_MLMODEL_CURRENT_RESOLUTION, 1./current_resolution); MDlog.setValue(EMDL_MLMODEL_CURRENT_SIZE, current_size); MDlog.setValue(EMDL_MLMODEL_PADDING_FACTOR, padding_factor); MDlog.setValue(EMDL_MLMODEL_IS_HELIX, is_helix); if (is_helix) { MDlog.setValue(EMDL_MLMODEL_HELICAL_NR_ASU, helical_nr_asu); MDlog.setValue(EMDL_MLMODEL_HELICAL_TWIST_MIN, helical_twist_min); MDlog.setValue(EMDL_MLMODEL_HELICAL_TWIST_MAX, helical_twist_max); MDlog.setValue(EMDL_MLMODEL_HELICAL_TWIST_INITIAL_STEP, helical_twist_inistep); MDlog.setValue(EMDL_MLMODEL_HELICAL_RISE_MIN, helical_rise_min); MDlog.setValue(EMDL_MLMODEL_HELICAL_RISE_MAX, helical_rise_max); MDlog.setValue(EMDL_MLMODEL_HELICAL_RISE_INITIAL_STEP, helical_rise_inistep); } MDlog.setValue(EMDL_MLMODEL_INTERPOLATOR, interpolator); MDlog.setValue(EMDL_MLMODEL_MINIMUM_RADIUS_NN_INTERPOLATION, r_min_nn); MDlog.setValue(EMDL_MLMODEL_PIXEL_SIZE, pixel_size); MDlog.setValue(EMDL_MLMODEL_NR_CLASSES, nr_classes); MDlog.setValue(EMDL_MLMODEL_NR_BODIES, nr_bodies); MDlog.setValue(EMDL_MLMODEL_NR_GROUPS, nr_groups); MDlog.setValue(EMDL_MLMODEL_TAU2_FUDGE_FACTOR, tau2_fudge_factor); MDlog.setValue(EMDL_MLMODEL_NORM_CORRECTION_AVG, avg_norm_correction); MDlog.setValue(EMDL_MLMODEL_SIGMA_OFFSET_ANGSTROM, sqrt(sigma2_offset)); MDlog.setValue(EMDL_MLMODEL_PRIOR_MODE, orientational_prior_mode); MDlog.setValue(EMDL_MLMODEL_SIGMA_ROT, sqrt(sigma2_rot)); MDlog.setValue(EMDL_MLMODEL_SIGMA_TILT, sqrt(sigma2_tilt)); MDlog.setValue(EMDL_MLMODEL_SIGMA_PSI, sqrt(sigma2_psi)); MDlog.setValue(EMDL_MLMODEL_LL, LL); MDlog.setValue(EMDL_MLMODEL_AVE_PMAX, ave_Pmax); MDlog.write(fh); // Calculate resolutions and total Fourier coverages for each class calculateTotalFourierCoverage(); // Write metadata and images for all classes FileName fn_root; fn_root = fn_out.beforeFirstOf("_it"); if (nr_bodies > 1) MDclass.setName("model_bodies"); else MDclass.setName("model_classes"); for (int iclass = 0; iclass < nr_classes_bodies; iclass++) { MDclass.addObject(); Image Itmp; if (ref_dim==2) { if (nr_bodies > 1) { fn_tmp = fn_out + "_bodies.mrcs"; fn_tmp2.compose(fn_root+"_body",iclass+1,"", 3); // class number from 1 to K! fn_tmp2 += "_mask.mrc"; } else { fn_tmp = fn_out + "_classes.mrcs"; } fn_tmp.compose(iclass+1, fn_tmp); // fn_tmp = integerToString(iclass) + "@" + fn_tmp; } else { if (nr_bodies > 1) { fn_tmp.compose(fn_out+"_body",iclass+1,"mrc", 3); // class number from 1 to K! fn_tmp2.compose(fn_root+"_body",iclass+1,"", 3); // class number from 1 to K! fn_tmp2 += "_mask.mrc"; } else fn_tmp.compose(fn_out+"_class",iclass+1,"mrc", 3); // class number from 1 to K! } MDclass.setValue(EMDL_MLMODEL_REF_IMAGE, fn_tmp); if (do_sgd) { if (ref_dim==2) fn_tmp.compose(iclass+1, fn_out + "_gradients.mrcs"); else fn_tmp.compose(fn_out+"_grad",iclass+1,"mrc", 3); MDclass.setValue(EMDL_MLMODEL_SGD_GRADIENT_IMAGE, fn_tmp); } // For multiple bodies: only star PDF_CLASS in the first one! int myclass = (nr_bodies > 1) ? 0 : iclass; // for multi-body: just set iclass=0 MDclass.setValue(EMDL_MLMODEL_PDF_CLASS, pdf_class[myclass]); MDclass.setValue(EMDL_MLMODEL_ACCURACY_ROT, acc_rot[iclass]); MDclass.setValue(EMDL_MLMODEL_ACCURACY_TRANS_ANGSTROM, acc_trans[iclass]); MDclass.setValue(EMDL_MLMODEL_ESTIM_RESOL_REF, estimated_resolution[iclass]); MDclass.setValue(EMDL_MLMODEL_FOURIER_COVERAGE_TOTAL_REF, total_fourier_coverage[iclass]); if (nr_bodies > 1) { MDclass.setValue(EMDL_BODY_ROTATE_DIRECTION_X, XX(rotate_direction_bodies[iclass])); MDclass.setValue(EMDL_BODY_ROTATE_DIRECTION_Y, YY(rotate_direction_bodies[iclass])); MDclass.setValue(EMDL_BODY_ROTATE_DIRECTION_Z, ZZ(rotate_direction_bodies[iclass])); MDclass.setValue(EMDL_BODY_KEEP_FIXED, keep_fixed_bodies[iclass]); } if (ref_dim==2) { MDclass.setValue(EMDL_MLMODEL_PRIOR_OFFX_CLASS, XX(prior_offset_class[iclass])); MDclass.setValue(EMDL_MLMODEL_PRIOR_OFFY_CLASS, YY(prior_offset_class[iclass])); } if (is_helix) { MDclass.setValue(EMDL_MLMODEL_HELICAL_RISE, helical_rise[iclass]); MDclass.setValue(EMDL_MLMODEL_HELICAL_TWIST, helical_twist[iclass]); } } MDclass.write(fh); // Write radial_average of tau2_class and data_vs_prior_class for each reference for (int iclass = 0; iclass < nr_classes_bodies; iclass++) { MDsigma.clear(); if (nr_bodies > 1) MDsigma.setName("model_body_"+integerToString(iclass+1)); else MDsigma.setName("model_class_"+integerToString(iclass+1)); for (int ii = 0; ii < XSIZE(tau2_class[iclass]); ii++) { MDsigma.addObject(); MDsigma.setValue(EMDL_SPECTRAL_IDX, ii); MDsigma.setValue(EMDL_RESOLUTION, getResolution(ii)); MDsigma.setValue(EMDL_RESOLUTION_ANGSTROM, getResolutionAngstrom(ii)); MDsigma.setValue(EMDL_MLMODEL_DATA_VS_PRIOR_REF, data_vs_prior_class[iclass](ii)); MDsigma.setValue(EMDL_MLMODEL_FSC_HALVES_REF, fsc_halves_class[iclass](ii)); MDsigma.setValue(EMDL_MLMODEL_FOURIER_COVERAGE_REF, fourier_coverage_class[iclass](ii)); MDsigma.setValue(EMDL_MLMODEL_SIGMA2_REF, sigma2_class[iclass](ii)); MDsigma.setValue(EMDL_MLMODEL_TAU2_REF, tau2_class[iclass](ii)); // Only write orientabilities if they have been determined if (XSIZE(orientability_contrib[iclass]) == XSIZE(tau2_class[iclass])) MDsigma.setValue(EMDL_MLMODEL_ORIENTABILITY_CONTRIBUTION, orientability_contrib[iclass](ii)); } MDsigma.write(fh); } // Write scale-correction for all groups MDgroup.setName("model_groups"); for (long int igroup = 0; igroup < nr_groups; igroup++) { MDgroup.addObject(); //Start counting of groups at 1, not at 0.... MDgroup.setValue(EMDL_MLMODEL_GROUP_NO, igroup+1); MDgroup.setValue(EMDL_MLMODEL_GROUP_NAME, group_names[igroup]); MDgroup.setValue(EMDL_MLMODEL_GROUP_NR_PARTICLES, nr_particles_per_group[igroup]); MDgroup.setValue(EMDL_MLMODEL_GROUP_SCALE_CORRECTION, scale_correction[igroup]); } MDgroup.write(fh); // Write sigma models for each group for (int igroup = 0; igroup < nr_groups; igroup++) { if (nr_particles_per_group[igroup] > 0) { MDsigma.clear(); MDsigma.setName("model_group_"+integerToString(igroup+1)); for (int ii = 0; ii < XSIZE(sigma2_noise[igroup]); ii++) { MDsigma.addObject(); // Some points in sigma2_noise arrays are never used... aux = sigma2_noise[igroup](ii); if (aux > 0.) { MDsigma.setValue(EMDL_SPECTRAL_IDX, ii); MDsigma.setValue(EMDL_RESOLUTION, getResolution(ii)); MDsigma.setValue(EMDL_MLMODEL_SIGMA2_NOISE, aux); } } MDsigma.write(fh); } } // Write pdf_direction models for each class if (ref_dim == 3) { for (int iclass = 0; iclass < nr_classes_bodies; iclass++) { MDclass.clear(); if (nr_bodies > 1) MDclass.setName("model_pdf_orient_body_"+integerToString(iclass+1)); else MDclass.setName("model_pdf_orient_class_"+integerToString(iclass+1)); for (int ii=0; ii < XSIZE(pdf_direction[iclass]); ii++) { MDclass.addObject(); MDclass.setValue(EMDL_MLMODEL_PDF_ORIENT, pdf_direction[iclass](ii)); } MDclass.write(fh); } } } void MlModel::readTauSpectrum(FileName fn_tau, int verb) { MetaDataTable MDtau; RFLOAT val; int idx; MDtau.read(fn_tau); FOR_ALL_OBJECTS_IN_METADATA_TABLE(MDtau) { MDtau.getValue(EMDL_SPECTRAL_IDX, idx); MDtau.getValue(EMDL_MLMODEL_TAU2_REF, val); if (idx < XSIZE(tau2_class[0])) tau2_class[0](idx) = tau2_fudge_factor * val; } if (idx < XSIZE(tau2_class[0]) - 1) { if (verb > 0) std::cerr<< " Warning: provided tau2-spectrum has fewer entries ("< 0) std::cerr << " WARNING: input particles STAR file does not have a column for image dimensionality, assuming 2D images ..." << std::endl; data_dim = 2; } else { _mydata.obsModel.opticsMdt.getValue(EMDL_IMAGE_DIMENSIONALITY, data_dim, 0); } // Read references into memory Image img; FileName fn_tmp; if (fn_ref != "None") { // Read the references into memory do_average_unaligned = false; // If this is a STAR file, ignore nr_classes and read all references from this file if (fn_ref.isStarFile()) { MetaDataTable MDref; MDref.read(fn_ref,"model_classes"); if(!MDref.getValue(EMDL_MLMODEL_REF_IMAGE, fn_tmp)) // if we did not find the meta-data label _rlnReferenceImage in a directed search, try more generally MDref.read(fn_ref); if(!MDref.getValue(EMDL_MLMODEL_REF_IMAGE, fn_tmp)) // if we still did not find the meta-data label _rlnReferenceImage, report an error REPORT_ERROR("When specifying a .star-file as --ref input, you need to have the _rlnReferenceImage field"); do_generate_seeds = false; // ignore nr_classes from the command line, use number of entries in STAR file nr_classes = 0; Iref.clear(); Igrad.clear(); FOR_ALL_OBJECTS_IN_METADATA_TABLE(MDref) { MDref.getValue(EMDL_MLMODEL_REF_IMAGE, fn_tmp); img.read(fn_tmp); img().setXmippOrigin(); if (_ref_angpix > 0.) { pixel_size = _ref_angpix; } else { RFLOAT header_pixel_size; if (nr_classes == 0) { img.MDMainHeader.getValue(EMDL_IMAGE_SAMPLINGRATE_X, header_pixel_size); pixel_size = header_pixel_size; } else { img.MDMainHeader.getValue(EMDL_IMAGE_SAMPLINGRATE_X, header_pixel_size); if (fabs(header_pixel_size - pixel_size) > 0.001) { REPORT_ERROR("MlModel::readImages ERROR: different models have different pixel sizes in their headers!"); } } } ori_size = XSIZE(img()); ref_dim = img().getDim(); Iref.push_back(img()); if (_do_sgd) { img() *= 0.; Igrad.push_back(img()); } nr_classes++; } } // For a single image, read this image as reference and set it in all nr_classes Irefs else { img.read(fn_ref); img().setXmippOrigin(); if (_ref_angpix > 0.) { pixel_size = _ref_angpix; } else { RFLOAT header_pixel_size; img.MDMainHeader.getValue(EMDL_IMAGE_SAMPLINGRATE_X, header_pixel_size); if (header_pixel_size <= 0) { std::cerr << " header_pixel_size = " << header_pixel_size << std::endl; REPORT_ERROR("MlModel::initialiseFromImages: Pixel size of reference image is not set!"); } pixel_size = header_pixel_size; } ori_size = XSIZE(img()); ref_dim = img().getDim(); if (ori_size != XSIZE(img()) || ori_size != YSIZE(img())) { std::cerr << " ori_size= " << ori_size << " XSIZE(img())= " << XSIZE(img()) << std::endl; REPORT_ERROR("MlOptimiser::read: size of reference image is not the same as the experimental images!"); } Iref.clear(); Igrad.clear(); if (nr_bodies > 1) { for (int ibody = 0; ibody < nr_bodies; ibody++) { Iref.push_back(img()); if (masks_bodies.size() <= ibody) REPORT_ERROR("BUG: masks_bodies.size() < ibody. Did you initialise the body masks before reading the references?"); } } else { for (int iclass = 0; iclass < nr_classes; iclass++) { Iref.push_back(img()); if (_do_sgd) { img() *= 0.; Igrad.push_back(img()); } } } if (nr_classes > 1) do_generate_seeds = true; else do_generate_seeds = false; } } // Make sure that the model has the same box and pixel size as (the first optics group of) the data RFLOAT pixel_size_first_optics_group = _mydata.getOpticsPixelSize(0); int box_size_first_optics_group = _mydata.getOpticsImageSize(0); if (fn_ref != "None") { if (fabs(pixel_size - pixel_size_first_optics_group) > 0.001 || ori_size != box_size_first_optics_group) { std::string mesg = ""; if (fabs(pixel_size - pixel_size_first_optics_group) > 0.001) { mesg = " The reference pixel size is " + floatToString(pixel_size) + " A/px, but the pixel size of the first optics group of the data is " + floatToString(pixel_size_first_optics_group) + " A/px! \n"; } if (ori_size != box_size_first_optics_group) { mesg += " The reference box size is " + integerToString(ori_size) + " px, but the box size of the first optics group of the data is " + integerToString(box_size_first_optics_group) + " px!\n"; } if (!_do_trust_ref_size) REPORT_ERROR("ERROR " + mesg + "\nIf you want to re-scale and/or re-box input particles into the pixel size and the box size of the reference, re-run the program with the --trust_ref_size option."); else if (verb) std::cerr << " WARNING " << mesg; } } else { pixel_size = pixel_size_first_optics_group; ori_size = box_size_first_optics_group; // Calculate average of all unaligned images later on. do_average_unaligned = true; do_generate_seeds = false; // after SGD introduction, this is now done in the estimation of initial sigma2 step! refs_are_ctf_corrected = true; if (_is_3d_model || data_dim == 3) { ref_dim = 3; img().initZeros(ori_size, ori_size, ori_size); } else { ref_dim = 2; img().initZeros(ori_size, ori_size); } img().setXmippOrigin(); Iref.clear(); Igrad.clear(); for (int iclass = 0; iclass < nr_classes; iclass++) { Iref.push_back(img()); if (_do_sgd) Igrad.push_back(img()); } } // Set some group stuff nr_groups = _mydata.groups.size(); MultidimArray aux; aux.initZeros(ori_size/2 + 1); sigma2_noise.resize(nr_groups, aux); initialise(_do_sgd); // Now set the group names from the Experiment groups list for (int i=0; i< nr_groups; i++) group_names[i] = _mydata.groups[i].name; } void MlModel::initialisePdfDirection(long long int newsize) { // If the pdf_direction were already filled (size!=0), and newsize=oldsize then leave them as they were // If they were still empty, or if the size changes, then initialise them with an even distribution for (int iclass = 0; iclass < nr_classes * nr_bodies; iclass++) { long long int oldsize = MULTIDIM_SIZE(pdf_direction[iclass]); if (oldsize == 0 || oldsize != newsize) { pdf_direction[iclass].resize(newsize); pdf_direction[iclass].initConstant(1./((RFLOAT) nr_classes * newsize)); } } nr_directions = newsize; } void MlModel::initialiseBodies(FileName fn_masks, FileName fn_root_out, bool also_initialise_rest, int rank) { MetaDataTable MD; MD.read(fn_masks); if (!MD.containsLabel(EMDL_BODY_MASK_NAME)) REPORT_ERROR("ERROR MlModel::initialiseBodyMasks: body-mask STAR file does not contain rlnBodyMaskName label."); nr_bodies = 0; masks_bodies.resize(MD.numberOfObjects()); com_bodies.resize(MD.numberOfObjects()); rotate_direction_bodies.resize(MD.numberOfObjects()); orient_bodies.resize(MD.numberOfObjects()); sigma_tilt_bodies.resize(MD.numberOfObjects()); sigma_psi_bodies.resize(MD.numberOfObjects()); sigma_offset_bodies.resize(MD.numberOfObjects()); keep_fixed_bodies.resize(MD.numberOfObjects()); max_radius_mask_bodies.resize(MD.numberOfObjects()); FileName fn_mask; Image Imask; std::vector relatives_to; Matrix1D one_direction(3); bool has_rotate_directions = false; FOR_ALL_OBJECTS_IN_METADATA_TABLE(MD) { MD.getValue(EMDL_BODY_MASK_NAME, fn_mask); Imask.read(fn_mask); RFLOAT minval, maxval; Imask().computeDoubleMinMax(minval, maxval); if (minval < 0. || maxval > 1.) REPORT_ERROR("ERROR: the mask " + fn_mask + " has values outside the range [0,1]"); Imask().setXmippOrigin(); masks_bodies[nr_bodies] = Imask(); Imask.setSamplingRateInHeader(pixel_size); // find center-of-mass for rotations around it int mydim = Imask().getDim(); Matrix1D com(mydim); Imask().centerOfMass(com); com_bodies[nr_bodies].resize(3); XX(com_bodies[nr_bodies]) = ROUND(XX(com)); // ROUND so no interpolation artifacts in selfTranslate(Iref) YY(com_bodies[nr_bodies]) = ROUND(YY(com)); if (mydim == 3) ZZ(com_bodies[nr_bodies]) = ROUND(ZZ(com)); else ZZ(com_bodies[nr_bodies]) = 0.; // find maximum radius of mask around it's COM int max_d2 = 0.; FOR_ALL_ELEMENTS_IN_ARRAY3D(Imask()) { if (A3D_ELEM(Imask(), k, i, j) > 0.05) { int d2 = (k - ZZ(com)) * (k - ZZ(com)) + (i - YY(com)) * (i - YY(com)) + (j - XX(com)) * (j - XX(com)); max_d2 = XMIPP_MAX(max_d2, d2); } } max_radius_mask_bodies[nr_bodies] = CEIL(pixel_size * sqrt((RFLOAT)max_d2)); // Get which body to rotate relative to int relative_to = -1; if (MD.containsLabel(EMDL_BODY_ROTATE_RELATIVE_TO)) { MD.getValue(EMDL_BODY_ROTATE_RELATIVE_TO, relative_to); relative_to--;// numbering in STAR file starts with 1 } relatives_to.push_back(relative_to); if (MD.containsLabel(EMDL_BODY_ROTATE_DIRECTION_X) && MD.containsLabel(EMDL_BODY_ROTATE_DIRECTION_Y) && MD.containsLabel(EMDL_BODY_ROTATE_DIRECTION_Z)) { has_rotate_directions = true; MD.getValue(EMDL_BODY_ROTATE_DIRECTION_X, XX(one_direction)); MD.getValue(EMDL_BODY_ROTATE_DIRECTION_Y, YY(one_direction)); MD.getValue(EMDL_BODY_ROTATE_DIRECTION_Z, ZZ(one_direction)); rotate_direction_bodies.push_back(one_direction); } RFLOAT val; if (MD.containsLabel(EMDL_BODY_SIGMA_ANG)) { MD.getValue(EMDL_BODY_SIGMA_ANG, val); sigma_tilt_bodies[nr_bodies] = val; sigma_psi_bodies[nr_bodies] = val; } else { if (!(MD.containsLabel(EMDL_BODY_SIGMA_TILT) && MD.containsLabel(EMDL_BODY_SIGMA_PSI)) ) REPORT_ERROR("ERROR: either provide rlnBodySigmaAngles OR provide rlnBodySigmaTilt and rlnBodySigmaPsi in the body STAR file."); MD.getValue(EMDL_BODY_SIGMA_TILT, val); sigma_tilt_bodies[nr_bodies] = val; MD.getValue(EMDL_BODY_SIGMA_PSI, val); sigma_psi_bodies[nr_bodies] = val; } if (MD.getValue(EMDL_BODY_SIGMA_OFFSET_ANGSTROM, val)) { sigma_offset_bodies[nr_bodies] = val; } else if (MD.getValue(EMDL_BODY_SIGMA_OFFSET, val)) { val *= pixel_size; } else { REPORT_ERROR("ERROR: the body STAR file should contain a rlnBodySigmaOffsetAngst column for the prior on the offsets for each body"); } // Also write the mask with the standard name to disk fn_mask.compose(fn_root_out + "_body", nr_bodies + 1, "", 3); // body number from 1 to K! fn_mask += "_mask.mrc"; if (rank == 0) Imask.write(fn_mask); // update counter at the end! nr_bodies++; } // Now that we have the COMs, also get the orientation matrix and the direction of rotation for each body for (int ibody = 0; ibody < nr_bodies; ibody++) { if (relatives_to[ibody] >= 0) { // If another body was given in the input STAR file, rotate this body wrt the COM of the other body rotate_direction_bodies[ibody] = com_bodies[relatives_to[ibody]]; rotate_direction_bodies[ibody] -= com_bodies[ibody]; } else if (has_rotate_directions) { // If the rotation vector is specified directly, just use this one } else { // if no relative-bodies, nor explicit rotation directions are specified in the STAR file, then rotate relative to (0,0,0) rotate_direction_bodies[ibody].initZeros(); rotate_direction_bodies[ibody] -= com_bodies[ibody]; } rotate_direction_bodies[ibody].selfNormalize(); alignWithZ(-rotate_direction_bodies[ibody], orient_bodies[ibody], false); } if (also_initialise_rest) { if (Iref.size() != 1) REPORT_ERROR("BUG: at this point, there should only be a single reference!"); for (int ibody = 1; ibody < nr_bodies; ibody++) { Iref.push_back(Iref[0]); tau2_class.push_back(tau2_class[0]); fsc_halves_class.push_back(fsc_halves_class[0]); sigma2_class.push_back(sigma2_class[0]); data_vs_prior_class.push_back(data_vs_prior_class[0]); fourier_coverage_class.push_back(fourier_coverage_class[0]); acc_rot.push_back(acc_rot[0]); acc_trans.push_back(acc_trans[0]); estimated_resolution.push_back(estimated_resolution[0]); total_fourier_coverage.push_back(total_fourier_coverage[0]); if (ref_dim==2) prior_offset_class.push_back(prior_offset_class[0]); orientability_contrib.push_back(orientability_contrib[0]); PPref.push_back(PPref[0]); pdf_direction.push_back(pdf_direction[0]); // If all sigmas are zero, ignore this body in the refinement if (sigma_tilt_bodies[ibody] < 0.001 && sigma_psi_bodies[ibody] < 0.001 && sigma_offset_bodies[ibody] < 0.001) keep_fixed_bodies[ibody] = 1; else keep_fixed_bodies[ibody] = 0; } // If provided a specific reference, re-set the corresponding Iref entry if (MD.containsLabel(EMDL_BODY_REFERENCE_NAME)) { int ibody = 0; FOR_ALL_OBJECTS_IN_METADATA_TABLE(MD) { FileName fn_ref; MD.getValue(EMDL_BODY_REFERENCE_NAME, fn_ref); if (fn_ref != "None") { Image img; img.read(fn_ref); img().setXmippOrigin(); Iref[ibody] = img(); } ibody++; } } } // Find the overlap of the bodies, and extend the Iref, PPref and masks_bodies vectors pointer_body_overlap.resize(nr_bodies, nr_bodies); pointer_body_overlap_inv.resize(nr_bodies); //#define DEBUG_OVERLAP if (norm_body_mask_overlap) { MultidimArray sum_mask = masks_bodies[0]; for (int ibody = 1; ibody < nr_bodies; ibody++) sum_mask += masks_bodies[ibody]; FOR_ALL_DIRECT_ELEMENTS_IN_ARRAY1D(sum_mask) if (DIRECT_A1D_ELEM(sum_mask, i) > 1.) for (int ibody = 0; ibody < nr_bodies; ibody++) DIRECT_A1D_ELEM(masks_bodies[ibody], i) /= DIRECT_A1D_ELEM(sum_mask, i); for (int ibody = 0; ibody < nr_bodies; ibody++) { for (int obody = 0; obody < nr_bodies; obody++) DIRECT_A2D_ELEM(pointer_body_overlap, ibody, obody) = obody; pointer_body_overlap_inv[ibody] = ibody; } #ifdef DEBUG_OVERLAP for (int ibody = 0; ibody < nr_bodies; ibody++) { Image It; It()= masks_bodies[ibody]; fnt = "mask_ibody"+integerToString(ibody)+".spi"; It.write(fnt); std::cerr << " PPref.size()= " << PPref.size() << std::endl; } #endif } else { for (int ibody = 0; ibody < nr_bodies; ibody++) { #ifdef DEBUG_OVERLAP Image It; FileName fnt; It()= masks_bodies[ibody]; fnt = "mask_ibody"+integerToString(ibody)+".spi"; It.write(fnt); #endif for (int obody = 0; obody < nr_bodies; obody++) { if (ibody == obody) { DIRECT_A2D_ELEM(pointer_body_overlap, ibody, obody) = obody; pointer_body_overlap_inv[obody] = obody; } else { // Sum all the previously done obody masks to see whether there is also overlap with any of them MultidimArray overlap_mask = masks_bodies[ibody]; for (int oldobody = 0; oldobody < obody; oldobody++) { if (oldobody != ibody) { int ii = DIRECT_A2D_ELEM(pointer_body_overlap, ibody, oldobody); overlap_mask += masks_bodies[ii]; } } // Calculate the overlap between the sum of ibody and all the old obodies until now overlap_mask *= masks_bodies[obody]; // element-wise multiplication // If there is overlap, generate another PPref if (overlap_mask.sum() > 0.) { // Calculate the mask that has the overlap subtracted from the obody mask overlap_mask = masks_bodies[obody] - overlap_mask; // set the right pointer in the 2D matrix DIRECT_A2D_ELEM(pointer_body_overlap, ibody, obody) = PPref.size(); //std::cerr << " ibody= " << ibody << " obody= " << obody << " overlap= " << overlap_mask.sum() << " icc= " << PPref.size() << std::endl; // Extend the two vectors here! PPref.push_back(PPref[obody]); masks_bodies.push_back(overlap_mask); // And keep track of which ibody this entry belonged to pointer_body_overlap_inv.push_back(obody); #ifdef DEBUG_OVERLAP It()= overlap_mask; fnt = "mask_ibody"+integerToString(ibody)+"_obody"+integerToString(obody)+"_overlap.spi"; It.write(fnt); std::cerr << " PPref.size()= " << PPref.size() << std::endl; #endif } else // if there is no overlap: just point to the original obody DIRECT_A2D_ELEM(pointer_body_overlap, ibody, obody) = obody; } } } } } void MlModel::writeBildFileBodies(FileName fn_bild) { std::ofstream fh_bild; fh_bild.open(fn_bild.c_str(), std::ios::out); if (!fh_bild) REPORT_ERROR("HealpixSampling::writeBildFileOrientationalDistribution: cannot open " + fn_bild); RFLOAT xcen = -STARTINGX(Iref[0]) * pixel_size; RFLOAT ycen = -STARTINGY(Iref[0]) * pixel_size; RFLOAT zcen = -STARTINGZ(Iref[0]) * pixel_size; // Place a black sphere in the centre of the box fh_bild << ".color 0 0 0 " << std::endl; fh_bild << ".sphere " << xcen << " " << ycen << " " << zcen << " 3 " << std::endl; for (int ibody = 0; ibody < nr_bodies; ibody++) { // Sample evenly colors from the rainbow RFLOAT r, g, b; HSL2RGB((RFLOAT)ibody/(RFLOAT)nr_bodies, 1.0, 0.5, r, g, b); fh_bild << ".color " << r << " " << g << " " << b << std::endl; // Place a sphere at the centre-of-mass RFLOAT x = XX(com_bodies[ibody]) * pixel_size; RFLOAT y = YY(com_bodies[ibody]) * pixel_size; RFLOAT z = ZZ(com_bodies[ibody]) * pixel_size; // Add the center of the box to the coordinates x += pixel_size + xcen; y += pixel_size + ycen; z += pixel_size + zcen; fh_bild << ".sphere " << x << " " << y << " " << z << " 3 " << std::endl; // Add a label fh_bild << ".cmov " << x+5 << " " << y+5 << " " << z+5 << std::endl; fh_bild << "body " << ibody+1 << std::endl; // Add an arrow for the direction of the rotation RFLOAT length = 10.; fh_bild << ".arrow " << x << " " << y << " " << z << " " << x + length*XX(rotate_direction_bodies[ibody]) * pixel_size << " " << y + length*YY(rotate_direction_bodies[ibody]) * pixel_size << " " << z + length*ZZ(rotate_direction_bodies[ibody]) * pixel_size << " 1 " << std::endl; } // Close and write file to disc fh_bild.close(); } void MlModel::setFourierTransformMaps(bool update_tau2_spectra, int nr_threads, RFLOAT strict_lowres_exp, const MultidimArray *fourier_mask) { bool do_heavy(true); int min_ires = -1; if (strict_lowres_exp > 0) { min_ires = ROUND(pixel_size * ori_size / strict_lowres_exp); // std::cout << "MlModel::setFourierTransformMaps: strict_lowres_exp = " << strict_lowres_exp // << " pixel_size = " << pixel_size << " ori_size = " << ori_size << " min_ires = " << min_ires << std::endl;; } // Note that PPref.size() can be bigger than nr_bodies in multi-body refinement, due to extra PPrefs needed for overlapping bodies // These only exist in PPref form, they are not needed for reconstructions, only for subtractions in getFourierTransformsAndCtfs for (int iclass = 0; iclass < PPref.size(); iclass++) { MultidimArray Irefp; if (nr_bodies > 1) { // ibody deals with overlapping bodies here, as iclass can be larger than nr_bodies when bodies overlap, // but there are only nr_bodies Iref; ibody is the number of the original body (max nr_bodies) int ibody = pointer_body_overlap_inv[iclass]; Irefp = Iref[ibody] * masks_bodies[iclass]; // Place each body with its center-of-mass in the center of the box selfTranslate(Irefp, -com_bodies[ibody], DONT_WRAP); } else { Irefp = Iref[iclass]; } if(PPrefRank.size() > 1) do_heavy = PPrefRank[iclass]; if (update_tau2_spectra && iclass < nr_classes * nr_bodies) { PPref[iclass].computeFourierTransformMap(Irefp, tau2_class[iclass], current_size, nr_threads, true, do_heavy, min_ires, fourier_mask, do_gpu); } else { MultidimArray dummy; PPref[iclass].computeFourierTransformMap(Irefp, dummy, current_size, nr_threads, true, do_heavy, min_ires, fourier_mask, do_gpu); } } } void MlModel::initialiseDataVersusPrior(bool fix_tau) { // Get total number of particles RFLOAT nr_particles = 0.; for (int igroup = 0; igroup < nr_particles_per_group.size(); igroup++) { nr_particles += (RFLOAT)nr_particles_per_group[igroup]; } // Calculate average sigma2_noise over all image groups MultidimArray avg_sigma2_noise, sum_parts; avg_sigma2_noise.initZeros(ori_size /2 + 1); sum_parts.initZeros(ori_size /2 + 1); for (int igroup = 0; igroup < nr_particles_per_group.size(); igroup++) { avg_sigma2_noise += (RFLOAT)(nr_particles_per_group[igroup]) * sigma2_noise[igroup]; } avg_sigma2_noise /= nr_particles; // Get the FT of all reference structures // The Fourier Transforms are all "normalised" for 2D transforms of size = ori_size x ori_size // And spectrum is squared, so ori_size*ori_size in the 3D case! RFLOAT normfft = (ref_dim == 3 && data_dim == 2) ? (RFLOAT)(ori_size * ori_size) : 1.; int nr_classes_bodies = nr_classes * nr_bodies; // also set multiple bodies! for (int iclass = 0; iclass < nr_classes_bodies; iclass++) { // Initialise output arrays to correct size tau2_class[iclass].resize(ori_size /2 + 1); // Get the power spectrum of the reference MultidimArray spectrum(ori_size /2 + 1); getSpectrum(Iref[iclass], spectrum, POWER_SPECTRUM); // Factor two because of two-dimensionality of the complex plane // (just like sigma2_noise estimates, the power spectra should be divided by 2) spectrum *= normfft / 2.; // Update the tau2_class spectrum for this reference // This is only for writing out in the it000000_model.star file if (!fix_tau) { FOR_ALL_DIRECT_ELEMENTS_IN_ARRAY1D(tau2_class[iclass]) { DIRECT_A1D_ELEM(tau2_class[iclass], i) = tau2_fudge_factor * DIRECT_A1D_ELEM(spectrum, i); } } // Calculate data_vs_prior_class as spectral_nr_observations_per_class/sigma2_noise vs 1/tau2_class data_vs_prior_class[iclass].resize(ori_size /2 + 1); if (nr_bodies > 1) { fsc_halves_class[iclass].initZeros(ori_size /2 + 1); } FOR_ALL_DIRECT_ELEMENTS_IN_ARRAY1D(tau2_class[iclass]) { RFLOAT evidence = nr_particles * pdf_class[iclass] / DIRECT_A1D_ELEM(avg_sigma2_noise, i); // empirical accounting for ratio of pixels in 3D shells compared to 2D shells if (ref_dim == 3 && i > 0) evidence /= (2. * (RFLOAT)i); RFLOAT prior = 1. / DIRECT_A1D_ELEM(tau2_class[iclass], i); RFLOAT myssnr = evidence / prior; DIRECT_A1D_ELEM(data_vs_prior_class[iclass], i ) = myssnr; // Also initialise FSC-halves here (...) //DIRECT_A1D_ELEM(fsc_halves_class[iclass], i ) = myssnr / (myssnr + 1); } } // end loop iclass } void MlModel::initialiseHelicalParametersLists(RFLOAT _helical_twist, RFLOAT _helical_rise) { if (nr_classes < 1) REPORT_ERROR("MlModel.cpp::initialiseHelicalParametersLists nr_classes is smaller than 1"); helical_twist.resize(nr_classes); helical_rise.resize(nr_classes); for (int iclass = 0; iclass < nr_classes; iclass++) { helical_twist[iclass] = _helical_twist; helical_rise[iclass] = _helical_rise; } } void MlModel::calculateTotalFourierCoverage() { for (int iclass = 0; iclass < nr_classes * nr_bodies; iclass++) { int maxres = 0; for (int ires = 0; ires < XSIZE(data_vs_prior_class[iclass]); ires++) { if (DIRECT_A1D_ELEM(data_vs_prior_class[iclass], ires) < 1.) break; maxres = ires; } int coverwindow = maxres*2 - 1; estimated_resolution[iclass] = 1./getResolution(maxres); total_fourier_coverage[iclass] = 0.; RFLOAT count = 0; for (long int k=FIRST_XMIPP_INDEX(coverwindow); k<=LAST_XMIPP_INDEX(coverwindow); k++) \ for (long int i=FIRST_XMIPP_INDEX(coverwindow); i<=LAST_XMIPP_INDEX(coverwindow); i++) \ for (long int j=FIRST_XMIPP_INDEX(coverwindow); j<=LAST_XMIPP_INDEX(coverwindow); j++) \ { int r = sqrt(RFLOAT(k*k+i*i+j*j)); if (r <= maxres) { total_fourier_coverage[iclass] += DIRECT_A1D_ELEM(fourier_coverage_class[iclass], r); count += 1.; } } total_fourier_coverage[iclass] /= count; } } /////////// MlWsumModel void MlWsumModel::initialise(MlModel &_model, FileName fn_sym, bool asymmetric_padding, bool _skip_gridding) { pixel_size = _model.pixel_size; nr_classes = _model.nr_classes; nr_bodies = _model.nr_bodies; nr_groups = _model.nr_groups; nr_directions = _model.nr_directions; ref_dim = _model.ref_dim; data_dim = _model.data_dim; ori_size = _model.ori_size; pdf_class = _model.pdf_class; if (ref_dim == 2) prior_offset_class = _model.prior_offset_class; pdf_direction = _model.pdf_direction; sigma2_offset = _model.sigma2_offset; sigma2_noise = _model.sigma2_noise; sigma2_rot = _model.sigma2_rot; sigma2_tilt = _model.sigma2_tilt; sigma2_psi = _model.sigma2_psi; interpolator = _model.interpolator; r_min_nn = _model.r_min_nn; is_helix = _model.is_helix; helical_nr_asu = _model.helical_nr_asu; helical_twist_min = _model.helical_twist_min; helical_twist_max = _model.helical_twist_max; helical_twist_inistep = _model.helical_twist_inistep; helical_rise_min = _model.helical_rise_min; helical_rise_max = _model.helical_rise_max; helical_rise_inistep = _model.helical_rise_inistep; padding_factor = _model.padding_factor; if (asymmetric_padding) padding_factor ++; // Don't need forward projectors in MlWsumModel! PPref.clear(); // Don't need scale_correction and bfactor_correction, keep wsum_signal_product and wsum_reference_power instead scale_correction.clear(); bfactor_correction.clear(); tau2_class.clear(); data_vs_prior_class.clear(); acc_rot.clear(); acc_trans.clear(); estimated_resolution.clear(); total_fourier_coverage.clear(); orientability_contrib.clear(); helical_twist.resize(nr_classes); helical_rise.resize(nr_classes); for (int iclass = 0; iclass < nr_classes; iclass++) { helical_twist[iclass] = _model.helical_twist[iclass]; helical_rise[iclass] = _model.helical_rise[iclass]; } wsum_signal_product.resize(nr_groups); wsum_reference_power.resize(nr_groups); for (long int igroup = 0; igroup < nr_groups; igroup++) { wsum_signal_product[igroup] = 0.; wsum_reference_power[igroup] = 0.; } // Resize MlWsumModel-specific vectors BackProjector BP(ori_size, ref_dim, fn_sym, interpolator, padding_factor, r_min_nn, ML_BLOB_ORDER, ML_BLOB_RADIUS, ML_BLOB_ALPHA, data_dim, _skip_gridding); BPref.clear(); BPref.resize(nr_classes * nr_bodies, BP); // also set multiple bodies sumw_group.resize(nr_groups); } void MlWsumModel::initZeros() { LL = 0.; ave_Pmax = 0.; sigma2_offset = 0.; avg_norm_correction = 0.; sigma2_rot = 0.; sigma2_tilt = 0.; sigma2_psi = 0.; // Set all weighted sums to zero for (int iclass = 0; iclass < nr_classes * nr_bodies; iclass++) { BPref[iclass].initZeros(current_size); // Assume pdf_direction is already of the right size... pdf_direction[iclass].initZeros(); } for (int iclass = 0; iclass < nr_classes; iclass++) { pdf_class[iclass] = 0.; if (ref_dim == 2) prior_offset_class[iclass].initZeros(); } // Initialise sigma2_noise spectra and sumw_group for (int igroup = 0; igroup < nr_groups; igroup++) { sumw_group[igroup] = 0.; sigma2_noise[igroup].initZeros(); wsum_signal_product[igroup] = 0.; wsum_reference_power[igroup] = 0.; } } //#define DEBUG_PACK #ifdef DEBUG_PACK #define MAX_PACK_SIZE 100000 #else // Approximately 1024*1024*1024/8/2 ~ 0.5 Gb #define MAX_PACK_SIZE 671010000 #endif void MlWsumModel::pack(MultidimArray &packed) { unsigned long long packed_size = 0; int spectral_size = (ori_size / 2) + 1; // for LL & avePmax & sigma2_offset & avg_norm_correction & sigma2_rot & sigma2_tilt & sigma2_psi packed_size += 7 ; // for all group-related stuff packed_size += nr_groups * spectral_size; // for sumw_group packed_size += 3 * nr_groups; // for all class-related stuff // data is complex: multiply by two! packed_size += nr_classes * nr_bodies * 2 * (unsigned long long)BPref[0].getSize(); packed_size += nr_classes * nr_bodies * (unsigned long long)BPref[0].getSize(); packed_size += nr_classes * nr_bodies * (unsigned long long)nr_directions; // for pdf_class packed_size += nr_classes; // for priors for each class if (ref_dim==2) packed_size += nr_classes*2; // Get memory for the packed array packed.clear(); packed.resize(packed_size); // Start packing unsigned long long idx = 0; DIRECT_MULTIDIM_ELEM(packed, idx++) = LL; DIRECT_MULTIDIM_ELEM(packed, idx++) = ave_Pmax; DIRECT_MULTIDIM_ELEM(packed, idx++) = sigma2_offset; DIRECT_MULTIDIM_ELEM(packed, idx++) = avg_norm_correction; DIRECT_MULTIDIM_ELEM(packed, idx++) = sigma2_rot; DIRECT_MULTIDIM_ELEM(packed, idx++) = sigma2_tilt; DIRECT_MULTIDIM_ELEM(packed, idx++) = sigma2_psi; for (int igroup = 0; igroup < nr_groups; igroup++) { FOR_ALL_DIRECT_ELEMENTS_IN_MULTIDIMARRAY(sigma2_noise[igroup]) { DIRECT_MULTIDIM_ELEM(packed, idx++) =DIRECT_MULTIDIM_ELEM(sigma2_noise[igroup], n); } sigma2_noise[igroup].clear(); DIRECT_MULTIDIM_ELEM(packed, idx++) = wsum_signal_product[igroup]; DIRECT_MULTIDIM_ELEM(packed, idx++) = wsum_reference_power[igroup]; DIRECT_MULTIDIM_ELEM(packed, idx++) = sumw_group[igroup]; } for (int iclass = 0; iclass < nr_classes * nr_bodies; iclass++) { FOR_ALL_DIRECT_ELEMENTS_IN_MULTIDIMARRAY(BPref[iclass].data) { DIRECT_MULTIDIM_ELEM(packed, idx++) = (DIRECT_MULTIDIM_ELEM(BPref[iclass].data, n)).real; DIRECT_MULTIDIM_ELEM(packed, idx++) = (DIRECT_MULTIDIM_ELEM(BPref[iclass].data, n)).imag; } BPref[iclass].data.clear(); FOR_ALL_DIRECT_ELEMENTS_IN_MULTIDIMARRAY(BPref[iclass].weight) { DIRECT_MULTIDIM_ELEM(packed, idx++) = DIRECT_MULTIDIM_ELEM(BPref[iclass].weight, n); } BPref[iclass].weight.clear(); FOR_ALL_DIRECT_ELEMENTS_IN_MULTIDIMARRAY(pdf_direction[iclass]) { DIRECT_MULTIDIM_ELEM(packed, idx++) = DIRECT_MULTIDIM_ELEM(pdf_direction[iclass], n); } } for (int iclass = 0; iclass < nr_classes; iclass++) { pdf_direction[iclass].clear(); DIRECT_MULTIDIM_ELEM(packed, idx++) = pdf_class[iclass]; if (ref_dim==2) { DIRECT_MULTIDIM_ELEM(packed, idx++) = XX(prior_offset_class[iclass]); DIRECT_MULTIDIM_ELEM(packed, idx++) = YY(prior_offset_class[iclass]); } } #ifdef DEBUG_PACK std::cerr << " idx= " << idx << " packed_size= " << packed_size << std::endl; #endif // Just to check whether we went outside our memory... if (idx != packed_size) { std::cerr << "idx= " << idx << "packed_size= " << packed_size << std::endl; REPORT_ERROR("MlWsumModel::pack: idx != packed_size"); } } void MlWsumModel::unpack(MultidimArray &packed) { unsigned long long idx = 0; int spectral_size = (ori_size / 2) + 1; LL = DIRECT_MULTIDIM_ELEM(packed, idx++); ave_Pmax = DIRECT_MULTIDIM_ELEM(packed, idx++); sigma2_offset = DIRECT_MULTIDIM_ELEM(packed, idx++); avg_norm_correction = DIRECT_MULTIDIM_ELEM(packed, idx++); sigma2_rot = DIRECT_MULTIDIM_ELEM(packed, idx++); sigma2_tilt = DIRECT_MULTIDIM_ELEM(packed, idx++); sigma2_psi = DIRECT_MULTIDIM_ELEM(packed, idx++); for (int igroup = 0; igroup < nr_groups; igroup++) { sigma2_noise[igroup].resize(spectral_size); FOR_ALL_DIRECT_ELEMENTS_IN_MULTIDIMARRAY(sigma2_noise[igroup]) { DIRECT_MULTIDIM_ELEM(sigma2_noise[igroup], n) = DIRECT_MULTIDIM_ELEM(packed, idx++); } wsum_signal_product[igroup] = DIRECT_MULTIDIM_ELEM(packed, idx++); wsum_reference_power[igroup] = DIRECT_MULTIDIM_ELEM(packed, idx++); sumw_group[igroup] = DIRECT_MULTIDIM_ELEM(packed, idx++); } for (int iclass = 0; iclass < nr_classes * nr_bodies; iclass++) { BPref[iclass].initialiseDataAndWeight(current_size); FOR_ALL_DIRECT_ELEMENTS_IN_MULTIDIMARRAY(BPref[iclass].data) { (DIRECT_MULTIDIM_ELEM(BPref[iclass].data, n)).real = DIRECT_MULTIDIM_ELEM(packed, idx++); (DIRECT_MULTIDIM_ELEM(BPref[iclass].data, n)).imag = DIRECT_MULTIDIM_ELEM(packed, idx++); } FOR_ALL_DIRECT_ELEMENTS_IN_MULTIDIMARRAY(BPref[iclass].weight) { DIRECT_MULTIDIM_ELEM(BPref[iclass].weight, n) = DIRECT_MULTIDIM_ELEM(packed, idx++); } pdf_direction[iclass].resize(nr_directions); FOR_ALL_DIRECT_ELEMENTS_IN_MULTIDIMARRAY(pdf_direction[iclass]) { DIRECT_MULTIDIM_ELEM(pdf_direction[iclass], n) = DIRECT_MULTIDIM_ELEM(packed, idx++); } } for (int iclass = 0; iclass < nr_classes; iclass++) { pdf_class[iclass] = DIRECT_MULTIDIM_ELEM(packed, idx++); if (ref_dim==2) { XX(prior_offset_class[iclass]) = DIRECT_MULTIDIM_ELEM(packed, idx++); YY(prior_offset_class[iclass]) = DIRECT_MULTIDIM_ELEM(packed, idx++); } } unsigned long long packed_size = MULTIDIM_SIZE(packed); packed.clear(); // Just to check whether we went outside our memory... if (idx != packed_size) { std::cerr << "idx= " << idx << " packed_size= " << packed_size << std::endl; REPORT_ERROR("MlWsumModel::unpack: idx != idx_stop-idx_start"); } } void MlWsumModel::pack(MultidimArray &packed, int &piece, int &nr_pieces, bool do_clear) { // Determine size of the packed array unsigned long long nr_groups = sigma2_noise.size(); unsigned long long nr_classes_bodies = BPref.size(); unsigned long long nr_classes = pdf_class.size(); unsigned long long spectral_size = (ori_size / 2) + 1; unsigned long long packed_size = 0; unsigned long long idx_start, idx_stop; // for LL & avePmax & sigma2_offset & avg_norm_correction & sigma2_rot & sigma2_tilt & sigma2_psi packed_size += 7 ; // for group-related spectra packed_size += nr_groups * spectral_size; // sigma2_noise[spectral_size] // for sumw_group packed_size += 3 * nr_groups; // wsum_signal_product, wsum_reference_power, sumw_group // for all class-related stuff // data is complex: multiply by two! packed_size += nr_classes_bodies * 2 * (unsigned long long) BPref[0].getSize(); // BPref.data packed_size += nr_classes_bodies * (unsigned long long) BPref[0].getSize(); // BPref.weight packed_size += nr_classes_bodies * (unsigned long long) nr_directions; // pdf_directions // for pdf_class packed_size += nr_classes; // for priors for each class if (ref_dim==2) packed_size += nr_classes*2; if (piece < 0 && nr_pieces < 0) { // Special case: prevent making multiple pieces if input piece and nr_pieces are both negative idx_start = 0; idx_stop = packed_size; } else if (packed_size > MAX_PACK_SIZE) { idx_start = (unsigned long long)piece * MAX_PACK_SIZE; idx_stop = XMIPP_MIN(idx_start + MAX_PACK_SIZE, packed_size); nr_pieces = CEIL((RFLOAT)packed_size/(RFLOAT)MAX_PACK_SIZE); } else { idx_start = 0; idx_stop = packed_size; nr_pieces = 1; } // increment piece so that pack will be called again piece++; //#define DEBUG_PACK #ifdef DEBUG_PACK std::cerr << " PACK: idx_start= " << idx_start << " idx_stop= " << idx_stop << " piece= " << piece << " nr_pieces= " << nr_pieces <<" packed_size= "< &packed, int piece, bool do_clear) { int nr_groups = sigma2_noise.size(); int nr_classes_bodies = BPref.size(); int nr_classes = pdf_class.size(); int spectral_size = (ori_size / 2) + 1; unsigned long long idx_start; unsigned long long idx_stop; if (piece < 0) { // Special case: prevent making multiple pieces if input piece is negative idx_start = 0; idx_stop = MULTIDIM_SIZE(packed); } else { idx_start = (unsigned long long)piece * MAX_PACK_SIZE; idx_stop = idx_start + (unsigned long long)MULTIDIM_SIZE(packed); } unsigned long long ori_idx = 0; unsigned long long idx = 0; #ifdef DEBUG_PACK std::cerr << " UNPACK piece= " << piece << " idx_start= " << idx_start << " idx_stop= " << idx_stop << std::endl; #endif if (ori_idx >= idx_start && ori_idx < idx_stop) LL = DIRECT_MULTIDIM_ELEM(packed, idx++); ori_idx++; if (ori_idx >= idx_start && ori_idx < idx_stop) ave_Pmax = DIRECT_MULTIDIM_ELEM(packed, idx++); ori_idx++; if (ori_idx >= idx_start && ori_idx < idx_stop) sigma2_offset = DIRECT_MULTIDIM_ELEM(packed, idx++); ori_idx++; if (ori_idx >= idx_start && ori_idx < idx_stop) avg_norm_correction = DIRECT_MULTIDIM_ELEM(packed, idx++); ori_idx++; if (ori_idx >= idx_start && ori_idx < idx_stop) sigma2_rot = DIRECT_MULTIDIM_ELEM(packed, idx++); ori_idx++; if (ori_idx >= idx_start && ori_idx < idx_stop) sigma2_tilt = DIRECT_MULTIDIM_ELEM(packed, idx++); ori_idx++; if (ori_idx >= idx_start && ori_idx < idx_stop) sigma2_psi = DIRECT_MULTIDIM_ELEM(packed, idx++); ori_idx++; for (int igroup = 0; igroup < nr_groups; igroup++) { if (idx == ori_idx) sigma2_noise[igroup].resize(spectral_size); FOR_ALL_DIRECT_ELEMENTS_IN_MULTIDIMARRAY(sigma2_noise[igroup]) { if (ori_idx >= idx_start && ori_idx < idx_stop) DIRECT_MULTIDIM_ELEM(sigma2_noise[igroup], n) = DIRECT_MULTIDIM_ELEM(packed, idx++); ori_idx++; } if (ori_idx >= idx_start && ori_idx < idx_stop) wsum_signal_product[igroup] = DIRECT_MULTIDIM_ELEM(packed, idx++); ori_idx++; if (ori_idx >= idx_start && ori_idx < idx_stop) wsum_reference_power[igroup] = DIRECT_MULTIDIM_ELEM(packed, idx++); ori_idx++; if (ori_idx >= idx_start && ori_idx < idx_stop) sumw_group[igroup] = DIRECT_MULTIDIM_ELEM(packed, idx++); ori_idx++; } for (int iclass = 0; iclass < nr_classes_bodies; iclass++) { if (idx == ori_idx) BPref[iclass].initialiseDataAndWeight(current_size); FOR_ALL_DIRECT_ELEMENTS_IN_MULTIDIMARRAY(BPref[iclass].data) { if (ori_idx >= idx_start && ori_idx < idx_stop) (DIRECT_MULTIDIM_ELEM(BPref[iclass].data, n)).real = DIRECT_MULTIDIM_ELEM(packed, idx++); ori_idx++; if (ori_idx >= idx_start && ori_idx < idx_stop) (DIRECT_MULTIDIM_ELEM(BPref[iclass].data, n)).imag = DIRECT_MULTIDIM_ELEM(packed, idx++); ori_idx++; //DIRECT_MULTIDIM_ELEM(BPref[iclass].data, n) = Complex(re, im); } FOR_ALL_DIRECT_ELEMENTS_IN_MULTIDIMARRAY(BPref[iclass].weight) { if (ori_idx >= idx_start && ori_idx < idx_stop) DIRECT_MULTIDIM_ELEM(BPref[iclass].weight, n) = DIRECT_MULTIDIM_ELEM(packed, idx++); ori_idx++; } if (idx == ori_idx) pdf_direction[iclass].resize(nr_directions); FOR_ALL_DIRECT_ELEMENTS_IN_MULTIDIMARRAY(pdf_direction[iclass]) { if (ori_idx >= idx_start && ori_idx < idx_stop) DIRECT_MULTIDIM_ELEM(pdf_direction[iclass], n) = DIRECT_MULTIDIM_ELEM(packed, idx++); ori_idx++; } } for (int iclass = 0; iclass < nr_classes; iclass++) { if (ori_idx >= idx_start && ori_idx < idx_stop) pdf_class[iclass] = DIRECT_MULTIDIM_ELEM(packed, idx++); ori_idx++; if (ref_dim == 2) { if (ori_idx >= idx_start && ori_idx < idx_stop) XX(prior_offset_class[iclass]) = DIRECT_MULTIDIM_ELEM(packed, idx++); ori_idx++; if (ori_idx >= idx_start && ori_idx < idx_stop) YY(prior_offset_class[iclass]) = DIRECT_MULTIDIM_ELEM(packed, idx++); ori_idx++; } } unsigned long long packed_size = MULTIDIM_SIZE(packed); // Free memory if (do_clear) packed.clear(); // Just to check whether we went outside our memory... //std::cerr << " UNPACK piece= " << piece << " idx= " << idx << " idx_stop-idx_start= " << idx_stop-idx_start << " idx_start= " << idx_start << " idx_stop= " << idx_stop << std::endl; if (idx != idx_stop-idx_start) { std::cerr << "idx= " << idx << "ori_idx= " << ori_idx << " idx_start= " << idx_start << " idx_stop= " << idx_stop << " packed_size= " << packed_size << std::endl; REPORT_ERROR("MlWsumModel::unpack: idx != idx_stop-idx_start"); } } relion-3.1.3/src/ml_model.h000066400000000000000000000372021411340063500155370ustar00rootroot00000000000000/*************************************************************************** * * Author: "Sjors H.W. Scheres" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ #ifndef ML_MODEL_H_ #define ML_MODEL_H_ #include "src/projector.h" #include "src/backprojector.h" #include "src/metadata_table.h" #include "src/exp_model.h" #include "src/healpix_sampling.h" #define ML_BLOB_ORDER 0 #define ML_BLOB_RADIUS 1.9 #define ML_BLOB_ALPHA 15 class MlModel { public: // Dimension of the references (2D or 3D) int ref_dim; // Dimension of the data (2D or 3D) int data_dim; // Original size of the images int ori_size; // Pixel size (in Angstrom) RFLOAT pixel_size; // Current size of the images to be used in the expectation int current_size; // Current resolution (in 1/Ang) RFLOAT current_resolution; // Number of classes int nr_classes; // Number of independent bodies for multi-body refinement int nr_bodies; // Number of image groups with separate sigma2_noise spectra int nr_groups; // Perform SGD instead of expectation maximization? bool do_sgd; // Number of particles in each group std::vector nr_particles_per_group; // Number of directions (size of pdf_direction); long long int nr_directions; // Log-likelihood target value RFLOAT LL; // Padding factor RFLOAT padding_factor; // Fourier space interpolator int interpolator; // Minimum number of shells to perform linear interpolation int r_min_nn; // Average Pmax of the normalised probability distributions RFLOAT ave_Pmax; // Average normalisation correction factor RFLOAT avg_norm_correction; // Variance in the origin offsets RFLOAT sigma2_offset; // Fudge factor to adjust estimated tau2_class spectra RFLOAT tau2_fudge_factor; // Vector with all reference images std::vector > Iref; // Vector with all SGD gradients std::vector > Igrad; // Vector with masks for all bodies in multi-body refinement std::vector > masks_bodies; // Vector with center-of-mass coordinates for all bodies in multi-body refinement std::vector > com_bodies; // Vector with 2D matrices that pre-orient all bodies in multi-body refinement std::vector > orient_bodies; // Vector with directions around which to rotate each body in multi-body refinement std::vector > rotate_direction_bodies; // One projector for each class; std::vector PPref; std::vector PPrefRank; // One name for each group std::vector group_names; // One noise spectrum for each group std::vector > sigma2_noise; // One intensity scale for each group std::vector scale_correction; // One intensity B-factor for each group std::vector bfactor_correction; // Prior information: one restrained power_class spectrum for each class (inverse of right-hand side in Wiener-filter-like update formula) std::vector > tau2_class; // Radial average of the estimated variance in the reconstruction (inverse of left-hand side in Wiener-filter-like update formula) std::vector > sigma2_class; // FSC spectra between random halves of the data (multiple ones for each body in multibody-refinement) std::vector< MultidimArray > fsc_halves_class; // One likelihood vs prior ratio spectrum for each class std::vector > data_vs_prior_class; // One Fourier-coverage spectrum for each class std::vector > fourier_coverage_class; // One value for each class std::vector pdf_class; // One array for each class std::vector > pdf_direction; // Priors for offsets for each class (only in 2D) std::vector > prior_offset_class; // Mode for orientational prior distributions int orientational_prior_mode; // Variance in rot angle for the orientational pdf RFLOAT sigma2_rot; // Variance in tilt angle for the orientational pdf RFLOAT sigma2_tilt; // Variance in psi angle for the orientational pdf RFLOAT sigma2_psi; // Stddev in tilt angle for the orientational pdf of each body std::vector sigma_tilt_bodies; // Stddev in psi angle for the orientational pdf of each body std::vector sigma_psi_bodies; // Stddev in offsets for the orientational pdf of each body std::vector sigma_offset_bodies; // Is this body kept fixed in refinement? std::vector keep_fixed_bodies; // Maximum radius of mask (in Angstrom!) std::vector max_radius_mask_bodies; // 2D Matrix with pointers to the PPrefs for overlapping bodies MultidimArray pointer_body_overlap; std::vector pointer_body_overlap_inv; // Estimated accuracy at which rotations can be assigned, one for each class std::vector acc_rot; // Estimated accuracy at which translations can be assigned, one for each class std::vector acc_trans; // The estimate resolution, one for each class std::vector estimated_resolution; // Fourier coverage up to the estimate resolution, one for each class std::vector total_fourier_coverage; // Spectral contribution to orientability of individual particles, one for each class std::vector > orientability_contrib; // Nov20,2015 - Shaoda, Helical refinement bool is_helix; // Number of helical asymmetrical units int helical_nr_asu; // Helical twist (in degrees) std::vector helical_twist; // Helical rise (in Angstroms) std::vector helical_rise; // Search range of helical twist (in degrees) RFLOAT helical_twist_min, helical_twist_max, helical_twist_inistep; // Search range of helical rise (in Angstroms) RFLOAT helical_rise_min, helical_rise_max, helical_rise_inistep; // Normalize overlapping regions in multibody masks bool norm_body_mask_overlap; // Process data on GPU bool do_gpu; public: // Constructor MlModel(): ref_dim(0), data_dim(0), ori_size(0), pixel_size (0), current_size(0), current_resolution(0), nr_classes(0), nr_bodies(0), nr_groups(0), nr_directions(0), LL(0), padding_factor(0.), interpolator(0), r_min_nn(0), ave_Pmax(0), avg_norm_correction(0), sigma2_offset(0), tau2_fudge_factor(0), orientational_prior_mode(0), sigma2_rot(0), sigma2_tilt(0), sigma2_psi(0), is_helix(0), helical_nr_asu(1), helical_twist_min(0), helical_twist_max(0), helical_twist_inistep(0), helical_rise_min(0), helical_rise_max(0), helical_rise_inistep(0), norm_body_mask_overlap(false), do_gpu(false) { clear(); } // Destructor ~MlModel() { clear(); } /** Assignment operator */ MlModel& operator =(const MlModel &MD) { if (this != &MD) { clear(); ref_dim = MD.ref_dim; data_dim = MD.data_dim; ori_size = MD.ori_size; pixel_size = MD.pixel_size; current_size = MD.current_size; current_resolution = MD.current_resolution; nr_classes = MD.nr_classes; nr_bodies = MD.nr_bodies; nr_groups = MD.nr_groups; do_sgd = MD.do_sgd; nr_directions = MD.nr_directions; LL = MD.LL; padding_factor = MD.padding_factor; interpolator = MD.interpolator; r_min_nn = MD.r_min_nn; ave_Pmax = MD.ave_Pmax; avg_norm_correction = MD.avg_norm_correction; sigma2_offset = MD.sigma2_offset; tau2_fudge_factor = MD.tau2_fudge_factor; orientational_prior_mode = MD.orientational_prior_mode; sigma2_rot = MD.sigma2_rot; sigma2_tilt = MD.sigma2_tilt; sigma2_psi = MD.sigma2_psi; is_helix = MD.is_helix; helical_nr_asu = MD.helical_nr_asu; helical_twist_min = MD.helical_twist_min; helical_twist_max = MD.helical_twist_max; helical_twist_inistep = MD.helical_twist_inistep; helical_rise_min = MD.helical_rise_min; helical_rise_max = MD.helical_rise_max; helical_rise_inistep= MD.helical_rise_inistep; Iref = MD.Iref; Igrad = MD.Igrad; masks_bodies = MD.masks_bodies; com_bodies = MD.com_bodies; orient_bodies = MD.orient_bodies; sigma_tilt_bodies = MD.sigma_tilt_bodies; sigma_psi_bodies = MD.sigma_psi_bodies; sigma_offset_bodies = MD.sigma_offset_bodies; keep_fixed_bodies = MD.keep_fixed_bodies; max_radius_mask_bodies = MD.max_radius_mask_bodies; PPref = MD.PPref; PPrefRank = MD.PPrefRank; group_names = MD.group_names; sigma2_noise = MD.sigma2_noise; scale_correction = MD.scale_correction; bfactor_correction = MD.bfactor_correction; tau2_class = MD.tau2_class; sigma2_class = MD.sigma2_class; fsc_halves_class = MD.fsc_halves_class; data_vs_prior_class = MD.data_vs_prior_class; fourier_coverage_class = MD.fourier_coverage_class; pdf_class = MD.pdf_class; pdf_direction = MD.pdf_direction; prior_offset_class = MD.prior_offset_class; nr_particles_per_group = MD.nr_particles_per_group; acc_rot = MD.acc_rot; acc_trans = MD.acc_trans; estimated_resolution = MD.estimated_resolution; total_fourier_coverage = MD.total_fourier_coverage; orientability_contrib = MD.orientability_contrib; helical_twist = MD.helical_twist; helical_rise = MD.helical_rise; do_gpu = MD.do_gpu; } return *this; } // Clear everything void clear() { Iref.clear(); Igrad.clear(); masks_bodies.clear(); com_bodies.clear(); orient_bodies.clear(); sigma_tilt_bodies.clear(); sigma_psi_bodies.clear(); sigma_offset_bodies.clear(); keep_fixed_bodies.clear(); max_radius_mask_bodies.clear(); PPref.clear(); PPrefRank.clear(); group_names.clear(); sigma2_noise.clear(); scale_correction.clear(); bfactor_correction.clear(); tau2_class.clear(); fsc_halves_class.clear(); sigma2_class.clear(); data_vs_prior_class.clear(); fourier_coverage_class.clear(); prior_offset_class.clear(); pdf_class.clear(); pdf_direction.clear(); nr_particles_per_group.clear(); ref_dim = data_dim = ori_size = nr_classes = nr_bodies = nr_groups = nr_directions = interpolator = r_min_nn; padding_factor = 0.; ave_Pmax = avg_norm_correction = LL = sigma2_offset = tau2_fudge_factor = 0.; sigma2_rot = sigma2_tilt = sigma2_psi = 0.; acc_rot.clear(); acc_trans.clear(); estimated_resolution.clear(); total_fourier_coverage.clear(); orientability_contrib.clear(); helical_twist.clear(); helical_rise.clear(); do_sgd=false; } // Initialise vectors with the right size void initialise(bool _do_sgd = false); //Read a model from a file void read(FileName fn_in); // Write a model to disc void write(FileName fn_out, HealpixSampling &sampling, bool do_write_bild = true, bool do_only_write_images = false); //Read a tau-spectrum from a STAR file void readTauSpectrum(FileName fn_tau, int verb); // Read images from disc and initialise // Also set do_average_unaligned and do_generate_seeds flags void initialiseFromImages(FileName fn_ref, bool _is_3d_model, Experiment &_mydata, bool &do_average_unaligned, bool &do_generate_seeds, bool &refs_are_ctf_corrected, RFLOAT ref_angpix = -1., bool _do_sgd = false, bool do_trust_ref = false, bool verb = false); RFLOAT getResolution(int ipix) { return (RFLOAT)ipix/(pixel_size * ori_size); } RFLOAT getResolutionAngstrom(int ipix) { return (ipix==0) ? 999. : (pixel_size * ori_size)/(RFLOAT)ipix; } int getPixelFromResolution(RFLOAT resol) { return (int)ROUND(resol * pixel_size * ori_size); } /** Initialise pdf_orient arrays to the given size * If the pdf_orient vectors were empty, resize them to the given size and initialise with an even distribution * If they were not empty, check that the new size is equal to the old one, and otherwise throw an exception * because one cannot use an old pdf_orient with size unequal to the new one */ void initialisePdfDirection(long long int newsize); /** Read in the binary masks provided by the user and then make a soft edge on those */ void initialiseBodies(FileName fn_masks, FileName fn_root_out, bool also_initialise_rest = false, int rank = 0); /** Write out a Bild file with the COMs and directions or rotation for each body */ void writeBildFileBodies(FileName fn_bild); // Set FourierTransforms in Projector of each class // current_size will determine the size of the transform (in number of Fourier shells) to be held in the projector ( thisClass == -1 => do all classes this call) void setFourierTransformMaps(bool update_tau2_spectra, int nr_threads = 1, RFLOAT strict_lowres_exp = -1, const MultidimArray *fourier_mask = NULL ); // current_size will determine the size of the transform (in number of Fourier shells) to be held in the projector ( thisClass == -1 => do all classes this call) void setFourierTransformMaps(bool update_tau2_spectra, std::vector ListCheapSetup, int nr_threads = 1, RFLOAT strict_lowres_exp = -1); /* Initialises the radial average of the data-versus-prior ratio */ void initialiseDataVersusPrior(bool fix_tau); void initialiseHelicalParametersLists(RFLOAT _helical_twist, RFLOAT _helical_rise); void calculateTotalFourierCoverage(); }; class MlWsumModel: public MlModel { public: // One backprojector for CTF-corrected estimate of each class; std::vector BPref; // Store the sum of the weights inside each group // That is the number of particles inside each group std::vector sumw_group; // For the refinement of group intensity scales and bfactors // For each group store weighted sums of experimental image times reference image as a function of resolution std::vector wsum_signal_product; // For each group store weighted sums of squared reference as a function of resolution std::vector wsum_reference_power; // Constructor MlWsumModel() { clear(); } // Destructor ~MlWsumModel() { clear(); } // Clear everything void clear() { BPref.clear(); sumw_group.clear(); MlModel::clear(); } // Initialise all weighted sums (according to size of corresponding model void initialise(MlModel &_model, FileName fn_sym = "c1", bool asymmetric_padding = false, bool _skip_gridding = false); // Initialize all weighted sums to zero (with resizing the BPrefs to current_size) void initZeros(); // Pack entire structure into one large MultidimArray for reading/writing to disc // To save memory, the model itself will be cleared after packing. void pack(MultidimArray &packed); // Fill the model again using unpack (this is the inverse operation from pack) void unpack(MultidimArray &packed); // Pack entire structure into one large MultidimArray for shipping over with MPI // To save memory, the model itself will be cleared after packing. // If the whole thing becomes bigger than 1Gb (see MAX_PACK_SIZE in ml_model.cpp), then break it up into pieces because MPI cannot handle very large messages // When broken up: nr_pieces > 1 void pack(MultidimArray &packed, int &piece, int &nr_pieces, bool do_clear=true); // Fill the model again using unpack (this is the inverse operation from pack) void unpack(MultidimArray &packed, int piece, bool do_clear=true); }; #endif /* ML_MODEL_H_ */ relion-3.1.3/src/ml_optimiser.cpp000066400000000000000000014253201411340063500170100ustar00rootroot00000000000000/*************************************************************************** * * Author: "Sjors H.W. Scheres" * MRC Laboratory of Molecular Biology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This complete copyright notice must be included in any revised version of the * source code. Additional authorship citations may be added, but existing * author citations must be preserved. ***************************************************************************/ //#define DEBUG_CHECKSIZES //#define DEBUG_HELICAL_ORIENTATIONAL_SEARCH //#define PRINT_GPU_MEM_INFO //#define DEBUG_BODIES #ifdef TIMING #define RCTIC(timer,label) (timer.tic(label)) #define RCTOC(timer,label) (timer.toc(label)) #else #define RCTIC(timer,label) #define RCTOC(timer,label) #endif #include #include #include #include #include #include #include #include #include "src/macros.h" #include "src/error.h" #include "src/ml_optimiser.h" #ifdef CUDA #include "src/acc/cuda/cuda_ml_optimiser.h" #include #include #endif #ifdef ALTCPU #include #include #include #define TBB_PREVIEW_GLOBAL_CONTROL 1 #include #include "src/acc/cpu/cpu_ml_optimiser.h" #endif #define NR_CLASS_MUTEXES 5 //Some global threads management variables static pthread_mutex_t global_mutex2[NR_CLASS_MUTEXES] = { PTHREAD_MUTEX_INITIALIZER }; static pthread_mutex_t global_mutex = PTHREAD_MUTEX_INITIALIZER; Barrier * global_barrier; ThreadManager * global_ThreadManager; /** ========================== Threaded parallelization of expectation === */ void globalThreadExpectationSomeParticles(ThreadArgument &thArg) { MlOptimiser *MLO = (MlOptimiser*) thArg.workClass; try { #ifdef CUDA if (MLO->do_gpu) ((MlOptimiserCuda*) MLO->cudaOptimisers[thArg.thread_id])->doThreadExpectationSomeParticles(thArg.thread_id); else #endif MLO->doThreadExpectationSomeParticles(thArg.thread_id); } catch (RelionError XE) { RelionError *gE = new RelionError(XE.msg, XE.file, XE.line); gE->msg = XE.msg; MLO->threadException = gE; } } /** ========================== I/O operations =========================== */ void MlOptimiser::usage() { parser.writeUsage(std::cout); } void MlOptimiser::read(int argc, char **argv, int rank) { //#define DEBUG_READ parser.setCommandLine(argc, argv); if (checkParameter(argc, argv, "--continue")) { // Do this before reading in the data.star file below! do_preread_images = checkParameter(argc, argv, "--preread_images"); do_parallel_disc_io = !checkParameter(argc, argv, "--no_parallel_disc_io"); parser.addSection("Continue options"); FileName fn_in = parser.getOption("--continue", "_optimiser.star file of the iteration after which to continue"); // Read in previously calculated parameters if (fn_in != "") read(fn_in, rank); // And look for additional command-line options... parseContinue(argc, argv); } else { // Start a new run from scratch parseInitial(argc, argv); } } void MlOptimiser::parseContinue(int argc, char **argv) { #ifdef DEBUG std::cerr << "Entering parseContinue" << std::endl; #endif int general_section = parser.addSection("General options"); // Not all parameters are accessible here... FileName fn_out_new = parser.getOption("--o", "Output rootname", "OLD_ctX"); if (fn_out_new == "OLD_ctX" || fn_out_new == fn_out ) fn_out += "_ct" + integerToString(iter); else fn_out = fn_out_new; do_force_converge = parser.checkOption("--force_converge", "Force an auto-refinement run to converge immediately upon continuation."); // For multi-body refinement bool fn_body_masks_was_empty = (fn_body_masks == "None"); std::string fnt; fnt = parser.getOption("--multibody_masks", "STAR file with masks and metadata for multi-body refinement", "OLD"); if (fnt != "OLD") fn_body_masks = fnt; // Don't use _ctXX at start of a multibody refinement if (fn_body_masks_was_empty && fn_body_masks != "") fn_out = parser.getOption("--o", "Output rootname", "run"); // Also allow change of padding... fnt = parser.getOption("--pad", "Oversampling factor for the Fourier transforms of the references", "OLD"); if (fnt != "OLD") { if (textToInteger(fnt) != mymodel.padding_factor) { if (mymodel.nr_bodies > 1) REPORT_ERROR("ERROR: cannot change padding factor in a continuation of a multi-body refinement..."); mymodel.padding_factor = textToInteger(fnt); // Re-initialise the model to get the right padding factors in the PPref vectors mymodel.initialise(); } } // Is this a new multi-body refinement? if (fn_body_masks_was_empty && fn_body_masks != "None") do_initialise_bodies = true; else do_initialise_bodies = false; if (do_initialise_bodies) { ini_high = textToFloat(parser.getOption("--ini_high", "Resolution (in Angstroms) to which to limit refinement in the first iteration ", "-1")); mymodel.norm_body_mask_overlap = parser.checkOption("--multibody_norm_overlap", "Overlapping regions between bodies are normalized. This reduces memory requirements."); } do_reconstruct_subtracted_bodies = parser.checkOption("--reconstruct_subtracted_bodies", "Use this flag to perform reconstructions with the subtracted images in multi-body refinement"); fnt = parser.getOption("--iter", "Maximum number of iterations to perform", "OLD"); if (fnt != "OLD") nr_iter = textToInteger(fnt); fnt = parser.getOption("--tau2_fudge", "Regularisation parameter (values higher than 1 give more weight to the data)", "OLD"); if (fnt != "OLD") mymodel.tau2_fudge_factor = textToFloat(fnt); auto_ignore_angle_changes = parser.checkOption("--auto_ignore_angles", "In auto-refinement, update angular sampling regardless of changes in orientations for convergence. This makes convergence faster."); auto_resolution_based_angles= parser.checkOption("--auto_resol_angles", "In auto-refinement, update angular sampling based on resolution-based required sampling. This makes convergence faster."); allow_coarser_samplings = parser.checkOption("--allow_coarser_sampling", "In 2D/3D classification, allow coarser angular and translational samplings if accuracies are bad (typically in earlier iterations."); // Solvent flattening if (parser.checkOption("--flatten_solvent", "Switch on masking on the references?", "OLD")) do_solvent = true; // Check whether the mask has changed fnt = parser.getOption("--solvent_mask", "User-provided mask for the references", "OLD"); if (fnt != "OLD") fn_mask = fnt; // Check whether the secondary mask has changed fnt = parser.getOption("--solvent_mask2", "User-provided secondary mask", "OLD"); if (fnt != "OLD") fn_mask2 = fnt; // These are still experimental; so not in the optimiser.star yet. fn_lowpass_mask = parser.getOption("--lowpass_mask", "User-provided mask for low-pass filtering", "None"); lowpass = textToFloat(parser.getOption("--lowpass", "User-provided cutoff for region specified above", "0")); // Check whether tau2-spectrum has changed fnt = parser.getOption("--tau", "STAR file with input tau2-spectrum (to be kept constant)", "OLD"); if (fnt != "OLD") fn_tau = fnt; // Check whether particle diameter has changed fnt = parser.getOption("--particle_diameter", "Diameter of the circular mask that will be applied to the experimental images (in Angstroms)", "OLD"); if (fnt != "OLD") particle_diameter = textToFloat(fnt); // SGD stuff fnt = parser.getOption("--sgd_ini_iter", "Number of initial SGD iterations", "OLD"); if (fnt != "OLD") sgd_ini_iter = textToInteger(fnt); fnt = parser.getOption("--sgd_fin_iter", "Number of final SGD iterations", "OLD"); if (fnt != "OLD") sgd_fin_iter = textToInteger(fnt); fnt = parser.getOption("--sgd_inbetween_iter", "Number of SGD iterations between the initial and final ones", "OLD"); if (fnt != "OLD") sgd_inbetween_iter = textToInteger(fnt); fnt = parser.getOption("--sgd_ini_resol", "Resolution cutoff during the initial SGD iterations (A)", "OLD"); if (fnt != "OLD") sgd_ini_resol = textToFloat(fnt); fnt = parser.getOption("--sgd_fin_resol", "Resolution cutoff during the final SGD iterations (A)", "OLD"); if (fnt != "OLD") sgd_fin_resol = textToFloat(fnt); fnt = parser.getOption("--sgd_ini_subset", "Mini-batch size during the initial SGD iterations", "OLD"); if (fnt != "OLD") sgd_ini_subset_size = textToInteger(fnt); fnt = parser.getOption("--sgd_fin_subset", "Mini-batch size during the final SGD iterations", "OLD"); if (fnt != "OLD") sgd_fin_subset_size = textToInteger(fnt); fnt = parser.getOption("--sgd_stepsize", "Step size parameter for SGD updates", "OLD"); if (fnt != "OLD") sgd_stepsize = textToInteger(fnt); fnt = parser.getOption("--mu", "Momentum parameter for SGD updates", "OLD"); if (fnt != "OLD") mu = textToFloat(fnt); fnt = parser.getOption("--sgd_write_iter", "Write out model every so many iterations in SGD", "OLD"); if (fnt != "OLD") write_every_sgd_iter = textToInteger(fnt); fnt = parser.getOption("--relax_sym", "The symmetry to be relaxed", "OLD"); if (fnt != "OLD") { sampling.fn_sym_relax = fnt; } do_join_random_halves = parser.checkOption("--join_random_halves", "Join previously split random halves again (typically to perform a final reconstruction)."); // ORIENTATIONS int orientations_section = parser.addSection("Orientations"); fnt = parser.getOption("--oversampling", "Adaptive oversampling order to speed-up calculations (0=no oversampling, 1=2x, 2=4x, etc)", "OLD"); if (fnt != "OLD") adaptive_oversampling = textToInteger(fnt); // Check whether angular sampling has changed // Do not do this for auto_refine, but make sure to do this when initialising multi-body refinement! if (!do_auto_refine || do_initialise_bodies) { directions_have_changed = false; fnt = parser.getOption("--healpix_order", "Healpix order for the angular sampling rate on the sphere (before oversampling): hp2=15deg, hp3=7.5deg, etc", "OLD"); if (fnt != "OLD") { int _order = textToInteger(fnt); if (_order != sampling.healpix_order) { directions_have_changed = true; sampling.healpix_order = _order; } } fnt = parser.getOption("--psi_step", "Angular sampling (before oversampling) for the in-plane angle (default=10deg for 2D, hp sampling for 3D)", "OLD"); if (fnt != "OLD") sampling.psi_step = textToFloat(fnt); fnt = parser.getOption("--offset_range", "Search range for origin offsets (in pixels)", "OLD"); if (fnt != "OLD") { sampling.offset_range = textToFloat(fnt); sampling.offset_range *= mymodel.pixel_size; // sampling.offset_range is in Angstroms, but command line in pixels! } fnt = parser.getOption("--offset_step", "Sampling rate for origin offsets (in pixels)", "OLD"); if (fnt != "OLD") { sampling.offset_step = textToFloat(fnt); sampling.offset_step *= mymodel.pixel_size; // sampling.offset_step is in Angstroms, but command line in pixels! } } fnt = parser.getOption("--auto_local_healpix_order", "Minimum healpix order (before oversampling) from which auto-refine procedure will use local searches", "OLD"); if (fnt != "OLD") autosampling_hporder_local_searches = textToInteger(fnt); // Check whether the prior mode changes RFLOAT _sigma_rot, _sigma_tilt, _sigma_psi, _sigma_off; int _mode; fnt = parser.getOption("--sigma_ang", "Stddev on all three Euler angles for local angular searches (of +/- 3 stddev)", "OLD"); if (fnt != "OLD") { mymodel.orientational_prior_mode = PRIOR_ROTTILT_PSI; mymodel.sigma2_rot = mymodel.sigma2_tilt = mymodel.sigma2_psi = textToFloat(fnt) * textToFloat(fnt); } fnt = parser.getOption("--sigma_rot", "Stddev on the first Euler angle for local angular searches (of +/- 3 stddev)", "OLD"); if (fnt != "OLD") { mymodel.orientational_prior_mode = PRIOR_ROTTILT_PSI; mymodel.sigma2_rot = textToFloat(fnt) * textToFloat(fnt); } fnt = parser.getOption("--sigma_tilt", "Stddev on the first Euler angle for local angular searches (of +/- 3 stddev)", "OLD"); if (fnt != "OLD") { mymodel.orientational_prior_mode = PRIOR_ROTTILT_PSI; mymodel.sigma2_tilt = textToFloat(fnt) * textToFloat(fnt); } fnt = parser.getOption("--sigma_psi", "Stddev on the in-plane angle for local angular searches (of +/- 3 stddev)", "OLD"); if (fnt != "OLD") { mymodel.orientational_prior_mode = PRIOR_ROTTILT_PSI; mymodel.sigma2_psi = textToFloat(fnt) * textToFloat(fnt); } fnt = parser.getOption("--sigma_off", "Stddev. on the translations", "OLD"); if (fnt != "OLD") { mymodel.sigma2_offset = textToFloat(fnt) * textToFloat(fnt); } fnt = parser.getOption("--helical_inner_diameter", "Inner diameter of helical tubes in Angstroms (for masks of helical references and particles)", "OLD"); if (fnt != "OLD") { helical_tube_inner_diameter = textToFloat(fnt); } fnt = parser.getOption("--helical_outer_diameter", "Outer diameter of helical tubes in Angstroms (for masks of helical references and particles)", "OLD"); if (fnt != "OLD") { helical_tube_outer_diameter = textToFloat(fnt); } fnt = parser.getOption("--perturb", "Perturbation factor for the angular sampling (0=no perturb; 0.5=perturb)", "OLD"); if (fnt != "OLD") { sampling.perturbation_factor = textToFloat(fnt); } if (parser.checkOption("--skip_align", "Skip orientational assignment (only classify)?")) do_skip_align = true; else do_skip_align = false; // do_skip_align should normally be false... if (parser.checkOption("--skip_rotate", "Skip rotational assignment (only translate and classify)?")) do_skip_rotate = true; else do_skip_rotate = false; // do_skip_rotate should normally be false... if (parser.checkOption("--bimodal_psi", "Do bimodal searches of psi angle?")) // Oct07,2015 - Shaoda, bimodal psi do_bimodal_psi = true; else do_bimodal_psi = false; do_skip_maximization = parser.checkOption("--skip_maximize", "Skip maximization step (only write out data.star file)?"); int corrections_section = parser.addSection("Corrections"); do_ctf_padding = parser.checkOption("--pad_ctf", "Perform CTF padding to treat CTF aliaising better?"); if (do_ctf_padding) REPORT_ERROR("--pad_ctf currently disabled."); // Can also switch the following option OFF if (parser.checkOption("--scale", "Switch on intensity-scale corrections on image groups", "OLD")) do_scale_correction = true; if (parser.checkOption("--no_scale", "Switch off intensity-scale corrections on image groups", "OLD")) do_scale_correction = false; // Can also switch the following option OFF if (parser.checkOption("--norm", "Switch on normalisation-error correction","OLD")) do_norm_correction = true; if (parser.checkOption("--no_norm", "Switch off normalisation-error correction","OLD")) do_norm_correction = false; int computation_section = parser.addSection("Computation"); x_pool = textToInteger(parser.getOption("--pool", "Number of images to pool for each thread task", "1")); nr_threads = textToInteger(parser.getOption("--j", "Number of threads to run in parallel (only useful on multi-core machines)", "1")); do_parallel_disc_io = !parser.checkOption("--no_parallel_disc_io", "Do NOT let parallel (MPI) processes access the disc simultaneously (use this option with NFS)"); combine_weights_thru_disc = !parser.checkOption("--dont_combine_weights_via_disc", "Send the large arrays of summed weights through the MPI network, instead of writing large files to disc"); do_shifts_onthefly = parser.checkOption("--onthefly_shifts", "Calculate shifted images on-the-fly, do not store precalculated ones in memory"); do_preread_images = parser.checkOption("--preread_images", "Use this to let the leader process read all particles into memory. Be careful you have enough RAM for large data sets!"); fn_scratch = parser.getOption("--scratch_dir", "If provided, particle stacks will be copied to this local scratch disk prior to refinement.", ""); keep_free_scratch_Gb = textToFloat(parser.getOption("--keep_free_scratch", "Space available for copying particle stacks (in Gb)", "10")); do_reuse_scratch = parser.checkOption("--reuse_scratch", "Re-use data on scratchdir, instead of wiping it and re-copying all data. This works only when ALL particles have already been cached."); keep_scratch = parser.checkOption("--keep_scratch", "Don't remove scratch after convergence. Following jobs that use EXACTLY the same particles should use --reuse_scratch."); #ifdef ALTCPU do_cpu = parser.checkOption("--cpu", "Use intel vectorisation implementation for CPU"); #else do_cpu = false; #endif failsafe_threshold = textToInteger(parser.getOption("--failsafe_threshold", "Maximum number of particles permitted to be drop, due to zero sum of weights, before exiting with an error (GPU only).", "40")); do_gpu = parser.checkOption("--gpu", "Use available gpu resources for some calculations"); gpu_ids = parser.getOption("--gpu", "Device ids for each MPI-thread","default"); #ifndef CUDA if(do_gpu) { std::cerr << "+ WARNING : Relion was compiled without CUDA of at least version 7.0 - you do NOT have support for GPUs" << std::endl; do_gpu = false; } #endif double temp_reqSize = textToDouble(parser.getOption("--free_gpu_memory", "GPU device memory (in Mb) to leave free after allocation.", "0")); if(!do_zero_mask) temp_reqSize += 100; temp_reqSize *= 1000*1000; if(temp_reqSize<0) REPORT_ERROR("Invalid free_gpu_memory value."); else requested_free_gpu_memory = temp_reqSize; // only allow switching ON solvent_fsc, not off if (parser.checkOption("--solvent_correct_fsc", "Correct FSC curve for the effects of the solvent mask?")) do_phase_random_fsc = true; verb = textToInteger(parser.getOption("--verb", "Verbosity (1=normal, 0=silent)", "1")); int expert_section = parser.addSection("Expert options"); fnt = parser.getOption("--strict_highres_exp", "Resolution limit (in Angstrom) to restrict probability calculations in the expectation step", "OLD"); if (fnt != "OLD") strict_highres_exp = textToFloat(fnt); do_trust_ref_size = parser.checkOption("--trust_ref_size", "Trust the pixel and box size of the input reference; by default the program will die if these are different from the first optics group of the data"); // Debugging/analysis/hidden stuff do_map = !checkParameter(argc, argv, "--no_map"); minres_map = textToInteger(getParameter(argc, argv, "--minres_map", "5")); gridding_nr_iter = textToInteger(getParameter(argc, argv, "--gridding_iter", "10")); debug1 = textToFloat(getParameter(argc, argv, "--debug1", "0.")); debug2 = textToFloat(getParameter(argc, argv, "--debug2", "0.")); debug3 = textToFloat(getParameter(argc, argv, "--debug3", "0.")); do_bfactor = checkParameter(argc, argv, "--bfactor"); // Read in initial sigmaNoise spectrum fn_sigma = getParameter(argc, argv, "--sigma",""); sigma2_fudge = textToFloat(getParameter(argc, argv, "--sigma2_fudge", "1.")); do_acc_currentsize_despite_highres_exp = checkParameter(argc, argv, "--accuracy_current_size"); do_sequential_halves_recons = checkParameter(argc, argv, "--sequential_halves_recons"); do_always_join_random_halves = checkParameter(argc, argv, "--always_join_random_halves"); do_use_all_data = checkParameter(argc, argv, "--use_all_data"); do_always_cc = checkParameter(argc, argv, "--always_cc"); do_only_sample_tilt = checkParameter(argc, argv, "--only_sample_tilt"); minimum_angular_sampling = textToFloat(getParameter(argc, argv, "--minimum_angular_sampling", "0")); maximum_angular_sampling = textToFloat(getParameter(argc, argv, "--maximum_angular_sampling", "0")); asymmetric_padding = parser.checkOption("--asymmetric_padding", "", "false", true); maximum_significants = textToInteger(parser.getOption("--maxsig", "Maximum number of poses & translations to consider", "-1")); skip_gridding = parser.checkOption("--skip_gridding", "Skip gridding in the M step"); nr_iter_max = textToInteger(parser.getOption("--auto_iter_max", "In auto-refinement, stop at this iteration.", "999")); debug_split_random_half = textToInteger(getParameter(argc, argv, "--debug_split_random_half", "0")); do_print_metadata_labels = false; do_print_symmetry_ops = false; #ifdef DEBUG std::cerr << "Leaving parseContinue" << std::endl; #endif } void MlOptimiser::parseInitial(int argc, char **argv) { #ifdef DEBUG_READ std::cerr<<"MlOptimiser::parseInitial Entering "< 0.) { mymodel.orientational_prior_mode = PRIOR_ROTTILT_PSI; // the sigma-values for the orientational prior are in model (and not in sampling) because one might like to estimate them // from the data by calculating weighted sums of all angular differences: therefore it needs to be in wsum_model and thus in mymodel. mymodel.sigma2_rot = mymodel.sigma2_tilt = mymodel.sigma2_psi = _sigma_ang * _sigma_ang; } else if (_sigma_rot > 0. || _sigma_tilt > 0. || _sigma_psi > 0.) { mymodel.orientational_prior_mode = PRIOR_ROTTILT_PSI; mymodel.sigma2_rot = (_sigma_rot > 0. ) ? _sigma_rot * _sigma_rot : 0.; mymodel.sigma2_tilt = (_sigma_tilt > 0.) ? _sigma_tilt * _sigma_tilt : 0.; mymodel.sigma2_psi = (_sigma_psi > 0. ) ? _sigma_psi * _sigma_psi : 0.; } else { //default // Very small to force the algorithm to take the current orientation if (sym_relax_ != "") { mymodel.orientational_prior_mode = PRIOR_ROTTILT_PSI; _sigma_ang = 0.0033; mymodel.sigma2_rot = mymodel.sigma2_tilt = mymodel.sigma2_psi = _sigma_ang * _sigma_ang; } else { mymodel.orientational_prior_mode = NOPRIOR; mymodel.sigma2_rot = mymodel.sigma2_tilt = mymodel.sigma2_psi = 0.; } } do_skip_align = parser.checkOption("--skip_align", "Skip orientational assignment (only classify)?"); do_skip_rotate = parser.checkOption("--skip_rotate", "Skip rotational assignment (only translate and classify)?"); do_bimodal_psi = parser.checkOption("--bimodal_psi", "Do bimodal searches of psi angle?"); // Oct07,2015 - Shaoda, bimodal psi do_skip_maximization = false; // Helical reconstruction int helical_section = parser.addSection("Helical reconstruction (in development...)"); do_helical_refine = parser.checkOption("--helix", "Perform 3D classification or refinement for helices?"); ignore_helical_symmetry = parser.checkOption("--ignore_helical_symmetry", "Ignore helical symmetry?"); mymodel.helical_nr_asu = textToInteger(parser.getOption("--helical_nr_asu", "Number of new helical asymmetric units (asu) per box (1 means no helical symmetry is present)", "1")); helical_twist_initial = textToFloat(parser.getOption("--helical_twist_initial", "Helical twist (in degrees, positive values for right-handedness)", "0.")); mymodel.helical_twist_min = textToFloat(parser.getOption("--helical_twist_min", "Minimum helical twist (in degrees, positive values for right-handedness)", "0.")); mymodel.helical_twist_max = textToFloat(parser.getOption("--helical_twist_max", "Maximum helical twist (in degrees, positive values for right-handedness)", "0.")); mymodel.helical_twist_inistep = textToFloat(parser.getOption("--helical_twist_inistep", "Initial step of helical twist search (in degrees)", "0.")); helical_rise_initial = textToFloat(parser.getOption("--helical_rise_initial", "Helical rise (in Angstroms)", "0.")); mymodel.helical_rise_min = textToFloat(parser.getOption("--helical_rise_min", "Minimum helical rise (in Angstroms)", "0.")); mymodel.helical_rise_max = textToFloat(parser.getOption("--helical_rise_max", "Maximum helical rise (in Angstroms)", "0.")); mymodel.helical_rise_inistep = textToFloat(parser.getOption("--helical_rise_inistep", "Initial step of helical rise search (in Angstroms)", "0.")); helical_nstart = textToInteger(parser.getOption("--helical_nstart", "N-number for the N-start helix (only useful for rotational priors)", "1")); helical_z_percentage = textToFloat(parser.getOption("--helical_z_percentage", "This box length along the center of Z axis contains good information of the helix. Important in imposing and refining symmetry", "0.3")); helical_tube_inner_diameter = textToFloat(parser.getOption("--helical_inner_diameter", "Inner diameter of helical tubes in Angstroms (for masks of helical references and particles)", "-1.")); helical_tube_outer_diameter = textToFloat(parser.getOption("--helical_outer_diameter", "Outer diameter of helical tubes in Angstroms (for masks of helical references and particles)", "-1.")); do_helical_symmetry_local_refinement = parser.checkOption("--helical_symmetry_search", "Perform local refinement of helical symmetry?"); helical_sigma_distance = textToFloat(parser.getOption("--helical_sigma_distance", "Sigma of distance along the helical tracks", "-1.")); helical_keep_tilt_prior_fixed = parser.checkOption("--helical_keep_tilt_prior_fixed", "Keep helical tilt priors fixed (at 90 degrees) in global angular searches?"); if (ignore_helical_symmetry) { mymodel.helical_nr_asu = 1; // IMPORTANT ! do_helical_symmetry_local_refinement = false; helical_twist_initial = mymodel.helical_twist_min = mymodel.helical_twist_max = mymodel.helical_twist_inistep = 0.; helical_rise_initial = mymodel.helical_rise_min = mymodel.helical_rise_max = mymodel.helical_rise_inistep = 0.; helical_z_percentage = 0.; } mymodel.initialiseHelicalParametersLists(helical_twist_initial, helical_rise_initial); mymodel.is_helix = do_helical_refine; RFLOAT tmp_RFLOAT = 0.; if (mymodel.helical_rise_min > mymodel.helical_rise_max) SWAP(mymodel.helical_rise_min, mymodel.helical_rise_max, tmp_RFLOAT); if (mymodel.helical_twist_min > mymodel.helical_twist_max) SWAP(mymodel.helical_twist_min, mymodel.helical_twist_max, tmp_RFLOAT); helical_fourier_mask_resols = parser.getOption("--helical_exclude_resols", "Resolutions (in A) along helical axis to exclude from refinement (comma-separated pairs, e.g. 50-5)", ""); fn_fourier_mask = parser.getOption("--fourier_mask", "Originally-sized, FFTW-centred image with Fourier mask for Projector", "None"); // CTF, norm, scale, bfactor correction etc. int corrections_section = parser.addSection("Corrections"); do_ctf_correction = parser.checkOption("--ctf", "Perform CTF correction?"); do_ctf_padding = parser.checkOption("--pad_ctf", "Perform CTF padding to treat CTF aliaising better?"); if (do_ctf_padding) REPORT_ERROR("--pad_ctf currently disabled."); intact_ctf_first_peak = parser.checkOption("--ctf_intact_first_peak", "Ignore CTFs until their first peak?"); refs_are_ctf_corrected = parser.checkOption("--ctf_corrected_ref", "Have the input references been CTF-amplitude corrected?"); ctf_phase_flipped = parser.checkOption("--ctf_phase_flipped", "Have the data been CTF phase-flipped?"); only_flip_phases = parser.checkOption("--only_flip_phases", "Only perform CTF phase-flipping? (default is full amplitude-correction)"); do_norm_correction = parser.checkOption("--norm", "Perform normalisation-error correction?"); do_scale_correction = parser.checkOption("--scale", "Perform intensity-scale corrections on image groups?"); // Allow switching off norm and scale (which is on by default in the GUI) if (parser.checkOption("--no_norm", "Switch off normalisation-error correction?")) do_norm_correction = false; if (parser.checkOption("--no_scale", "Switch off intensity-scale corrections on image groups?")) do_scale_correction = false; // SGD stuff int sgd_section = parser.addSection("Stochastic Gradient Descent"); do_sgd = parser.checkOption("--sgd", "Perform stochastic gradient descent instead of default expectation-maximization"); do_avoid_sgd = parser.checkOption("--stochastic_em", "Perform stochastic EM instead of SGD to avoid patent problems for initial model generation by commercial users"); // Stochastic EM is implemented as a variant of SGD, though it is really a different algorithm! if (do_avoid_sgd) do_sgd = true; sgd_ini_iter = textToInteger(parser.getOption("--sgd_ini_iter", "Number of initial SGD iterations", "50")); sgd_fin_iter = textToInteger(parser.getOption("--sgd_fin_iter", "Number of final SGD iterations", "50")); sgd_inbetween_iter = textToInteger(parser.getOption("--sgd_inbetween_iter", "Number of SGD iterations between the initial and final ones", "200")); sgd_ini_resol = textToInteger(parser.getOption("--sgd_ini_resol", "Resolution cutoff during the initial SGD iterations (A)", "35")); sgd_fin_resol = textToInteger(parser.getOption("--sgd_fin_resol", "Resolution cutoff during the final SGD iterations (A)", "15")); sgd_ini_subset_size = textToInteger(parser.getOption("--sgd_ini_subset", "Mini-batch size during the initial SGD iterations", "100")); sgd_fin_subset_size = textToInteger(parser.getOption("--sgd_fin_subset", "Mini-batch size during the final SGD iterations", "500")); mu = textToFloat(parser.getOption("--mu", "Momentum parameter for SGD updates", "0.9")); sgd_stepsize = textToFloat(parser.getOption("--sgd_stepsize", "Step size parameter for SGD updates", "0.5")); sgd_sigma2fudge_ini = textToFloat(parser.getOption("--sgd_sigma2fudge_initial", "Initial factor by which the noise variance will be multiplied for SGD (not used if halftime is negative)", "8")); sgd_sigma2fudge_halflife = textToInteger(parser.getOption("--sgd_sigma2fudge_halflife", "Initialise SGD with 8x higher noise-variance, and reduce with this half-life in # of particles (default is keep normal variance)", "-1")); do_sgd_skip_anneal = parser.checkOption("--sgd_skip_anneal", "By default, multiple references are annealed during the in_between iterations. Use this option to switch annealing off"); write_every_sgd_iter = textToInteger(parser.getOption("--sgd_write_iter", "Write out model every so many iterations in SGD (default is writing out all iters)", "1")); // Computation stuff // The number of threads is always read from the command line int computation_section = parser.addSection("Computation"); x_pool = textToInteger(parser.getOption("--pool", "Number of images to pool for each thread task", "1")); nr_threads = textToInteger(parser.getOption("--j", "Number of threads to run in parallel (only useful on multi-core machines)", "1")); combine_weights_thru_disc = !parser.checkOption("--dont_combine_weights_via_disc", "Send the large arrays of summed weights through the MPI network, instead of writing large files to disc"); do_shifts_onthefly = parser.checkOption("--onthefly_shifts", "Calculate shifted images on-the-fly, do not store precalculated ones in memory"); do_parallel_disc_io = !parser.checkOption("--no_parallel_disc_io", "Do NOT let parallel (MPI) processes access the disc simultaneously (use this option with NFS)"); do_preread_images = parser.checkOption("--preread_images", "Use this to let the leader process read all particles into memory. Be careful you have enough RAM for large data sets!"); fn_scratch = parser.getOption("--scratch_dir", "If provided, particle stacks will be copied to this local scratch disk prior to refinement.", ""); keep_free_scratch_Gb = textToFloat(parser.getOption("--keep_free_scratch", "Space available for copying particle stacks (in Gb)", "10")); do_reuse_scratch = parser.checkOption("--reuse_scratch", "Re-use data on scratchdir, instead of wiping it and re-copying all data."); keep_scratch = parser.checkOption("--keep_scratch", "Don't remove scratch after convergence. Following jobs that use EXACTLY the same particles should use --reuse_scratch."); do_fast_subsets = parser.checkOption("--fast_subsets", "Use faster optimisation by using subsets of the data in the first 15 iterations"); #ifdef ALTCPU do_cpu = parser.checkOption("--cpu", "Use intel vectorisation implementation for CPU"); #else do_cpu = false; #endif do_gpu = parser.checkOption("--gpu", "Use available gpu resources for some calculations"); gpu_ids = parser.getOption("--gpu", "Device ids for each MPI-thread","default"); #ifndef CUDA if(do_gpu) { std::cerr << "+ WARNING : Relion was compiled without CUDA of at least version 7.0 - you do NOT have support for GPUs" << std::endl; do_gpu = false; } #endif double temp_reqSize = textToDouble(parser.getOption("--free_gpu_memory", "GPU device memory (in Mb) to leave free after allocation.", "0")); if(!do_zero_mask) temp_reqSize += 100; temp_reqSize *= 1000*1000; if(temp_reqSize<0) REPORT_ERROR("Invalid free_gpu_memory value."); else requested_free_gpu_memory = temp_reqSize; // Expert options int expert_section = parser.addSection("Expert options"); mymodel.padding_factor = textToFloat(parser.getOption("--pad", "Oversampling factor for the Fourier transforms of the references", "2")); ref_angpix = textToFloat(parser.getOption("--ref_angpix", "Pixel size (in A) for the input reference (default is to read from header)", "-1.")); mymodel.interpolator = (parser.checkOption("--NN", "Perform nearest-neighbour instead of linear Fourier-space interpolation?")) ? NEAREST_NEIGHBOUR : TRILINEAR; mymodel.r_min_nn = textToInteger(parser.getOption("--r_min_nn", "Minimum number of Fourier shells to perform linear Fourier-space interpolation", "10")); verb = textToInteger(parser.getOption("--verb", "Verbosity (1=normal, 0=silent)", "1")); random_seed = textToInteger(parser.getOption("--random_seed", "Number for the random seed generator", "-1")); max_coarse_size = textToInteger(parser.getOption("--coarse_size", "Maximum image size for the first pass of the adaptive sampling approach", "-1")); adaptive_fraction = textToFloat(parser.getOption("--adaptive_fraction", "Fraction of the weights to be considered in the first pass of adaptive oversampling ", "0.999")); width_mask_edge = textToInteger(parser.getOption("--maskedge", "Width of the soft edge of the spherical mask (in pixels)", "5")); fix_sigma_noise = parser.checkOption("--fix_sigma_noise", "Fix the experimental noise spectra?"); fix_sigma_offset = parser.checkOption("--fix_sigma_offset", "Fix the stddev in the origin offsets?"); incr_size = textToInteger(parser.getOption("--incr_size", "Number of Fourier shells beyond the current resolution to be included in refinement", "10")); do_print_metadata_labels = parser.checkOption("--print_metadata_labels", "Print a table with definitions of all metadata labels, and exit"); do_print_symmetry_ops = parser.checkOption("--print_symmetry_ops", "Print all symmetry transformation matrices, and exit"); strict_highres_exp = textToFloat(parser.getOption("--strict_highres_exp", "High resolution limit (in Angstrom) to restrict probability calculations in the expectation step", "-1")); strict_lowres_exp = textToFloat(parser.getOption("--strict_lowres_exp", "Low resolution limit (in Angstrom) to restrict probability calculations in the expectation step", "-1")); dont_raise_norm_error = parser.checkOption("--dont_check_norm", "Skip the check whether the images are normalised correctly"); do_always_cc = parser.checkOption("--always_cc", "Perform CC-calculation in all iterations (useful for faster denovo model generation?)"); do_phase_random_fsc = parser.checkOption("--solvent_correct_fsc", "Correct FSC curve for the effects of the solvent mask?"); do_skip_maximization = parser.checkOption("--skip_maximize", "Skip maximization step (only write out data.star file)?"); failsafe_threshold = textToInteger(parser.getOption("--failsafe_threshold", "Maximum number of particles permitted to be handled by fail-safe mode, due to zero sum of weights, before exiting with an error (GPU only).", "40")); do_external_reconstruct = parser.checkOption("--external_reconstruct", "Perform the reconstruction step outside relion_refine, e.g. for learned priors?)"); nr_iter_max = textToInteger(parser.getOption("--auto_iter_max", "In auto-refinement, stop at this iteration.", "999")); auto_ignore_angle_changes = parser.checkOption("--auto_ignore_angles", "In auto-refinement, update angular sampling regardless of changes in orientations for convergence. This makes convergence faster."); auto_resolution_based_angles= parser.checkOption("--auto_resol_angles", "In auto-refinement, update angular sampling based on resolution-based required sampling. This makes convergence faster."); allow_coarser_samplings = parser.checkOption("--allow_coarser_sampling", "In 2D/3D classification, allow coarser angular and translational samplings if accuracies are bad (typically in earlier iterations."); do_trust_ref_size = parser.checkOption("--trust_ref_size", "Trust the pixel and box size of the input reference; by default the program will die if these are different from the first optics group of the data"); ///////////////// Special stuff for first iteration (only accessible via CL, not through readSTAR //////////////////// // When reading from the CL: always start at iteration 1 and subset 1 iter = 0; // When starting from CL: always calculate initial sigma_noise do_calculate_initial_sigma_noise = true; // Start average norm correction at 1! mymodel.avg_norm_correction = 1.; // Always initialise the PDF of the directions directions_have_changed = true; // Only reconstruct and join random halves are only available when continuing an old run do_join_random_halves = false; // For auto-sampling and convergence check nr_iter_wo_resol_gain = 0; nr_iter_wo_large_hidden_variable_changes = 0; current_changes_optimal_classes = 9999999; current_changes_optimal_offsets = 999.; current_changes_optimal_orientations = 999.; smallest_changes_optimal_classes = 9999999; smallest_changes_optimal_offsets = 999.; smallest_changes_optimal_orientations = 999.; acc_rot = acc_trans = 999.; best_resol_thus_far = 1./999.; has_converged = false; has_high_fsc_at_limit = false; has_large_incr_size_iter_ago = 0; do_initialise_bodies = false; // By default, start with nr_bodies to 1 mymodel.nr_bodies = 1; fn_body_masks = "None"; // Debugging/analysis/hidden stuff do_map = !checkParameter(argc, argv, "--no_map"); minres_map = textToInteger(getParameter(argc, argv, "--minres_map", "5")); do_bfactor = checkParameter(argc, argv, "--bfactor"); gridding_nr_iter = textToInteger(getParameter(argc, argv, "--gridding_iter", "10")); debug1 = textToFloat(getParameter(argc, argv, "--debug1", "0")); debug2 = textToFloat(getParameter(argc, argv, "--debug2", "0")); debug3 = textToFloat(getParameter(argc, argv, "--debug3", "0")); // Read in initial sigmaNoise spectrum fn_sigma = getParameter(argc, argv, "--sigma",""); do_calculate_initial_sigma_noise = (fn_sigma == "") ? true : false; sigma2_fudge = textToFloat(getParameter(argc, argv, "--sigma2_fudge", "1")); do_acc_currentsize_despite_highres_exp = checkParameter(argc, argv, "--accuracy_current_size"); do_sequential_halves_recons = checkParameter(argc, argv, "--sequential_halves_recons"); do_always_join_random_halves = checkParameter(argc, argv, "--always_join_random_halves"); do_use_all_data = checkParameter(argc, argv, "--use_all_data"); do_only_sample_tilt = checkParameter(argc, argv, "--only_sample_tilt"); minimum_angular_sampling = textToFloat(getParameter(argc, argv, "--minimum_angular_sampling", "0")); maximum_angular_sampling = textToFloat(getParameter(argc, argv, "--maximum_angular_sampling", "0")); asymmetric_padding = parser.checkOption("--asymmetric_padding", "", "false", true); maximum_significants = textToInteger(parser.getOption("--maxsig", "Maximum number of poses & translations to consider", "-1")); skip_gridding = parser.checkOption("--skip_gridding", "Skip gridding in the M step"); debug_split_random_half = textToInteger(getParameter(argc, argv, "--debug_split_random_half", "0")); #ifdef DEBUG_READ std::cerr<<"MlOptimiser::parseInitial Done"<